Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/cpu/us3_common.c
+++ new/usr/src/uts/sun4u/cpu/us3_common.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/systm.h>
28 28 #include <sys/ddi.h>
29 29 #include <sys/sysmacros.h>
30 30 #include <sys/archsystm.h>
31 31 #include <sys/vmsystm.h>
32 32 #include <sys/machparam.h>
33 33 #include <sys/machsystm.h>
34 34 #include <sys/machthread.h>
35 35 #include <sys/cpu.h>
36 36 #include <sys/cmp.h>
37 37 #include <sys/elf_SPARC.h>
38 38 #include <vm/vm_dep.h>
39 39 #include <vm/hat_sfmmu.h>
40 40 #include <vm/seg_kpm.h>
41 41 #include <sys/cpuvar.h>
42 42 #include <sys/cheetahregs.h>
43 43 #include <sys/us3_module.h>
44 44 #include <sys/async.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/debug.h>
47 47 #include <sys/dditypes.h>
48 48 #include <sys/prom_debug.h>
49 49 #include <sys/prom_plat.h>
50 50 #include <sys/cpu_module.h>
51 51 #include <sys/sysmacros.h>
52 52 #include <sys/intreg.h>
53 53 #include <sys/clock.h>
54 54 #include <sys/platform_module.h>
55 55 #include <sys/machtrap.h>
56 56 #include <sys/ontrap.h>
57 57 #include <sys/panic.h>
58 58 #include <sys/memlist.h>
59 59 #include <sys/bootconf.h>
60 60 #include <sys/ivintr.h>
61 61 #include <sys/atomic.h>
62 62 #include <sys/taskq.h>
63 63 #include <sys/note.h>
64 64 #include <sys/ndifm.h>
65 65 #include <sys/ddifm.h>
66 66 #include <sys/fm/protocol.h>
67 67 #include <sys/fm/util.h>
68 68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 69 #include <sys/fpras_impl.h>
70 70 #include <sys/dtrace.h>
71 71 #include <sys/watchpoint.h>
72 72 #include <sys/plat_ecc_unum.h>
73 73 #include <sys/cyclic.h>
74 74 #include <sys/errorq.h>
75 75 #include <sys/errclassify.h>
76 76 #include <sys/pghw.h>
77 77 #include <sys/clock_impl.h>
78 78
79 79 #ifdef CHEETAHPLUS_ERRATUM_25
80 80 #include <sys/xc_impl.h>
81 81 #endif /* CHEETAHPLUS_ERRATUM_25 */
82 82
83 83 ch_cpu_logout_t clop_before_flush;
84 84 ch_cpu_logout_t clop_after_flush;
85 85 uint_t flush_retries_done = 0;
86 86 /*
87 87 * Note that 'Cheetah PRM' refers to:
88 88 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
89 89 */
90 90
91 91 /*
92 92 * Per CPU pointers to physical address of TL>0 logout data areas.
93 93 * These pointers have to be in the kernel nucleus to avoid MMU
94 94 * misses.
95 95 */
96 96 uint64_t ch_err_tl1_paddrs[NCPU];
97 97
98 98 /*
99 99 * One statically allocated structure to use during startup/DR
100 100 * to prevent unnecessary panics.
101 101 */
102 102 ch_err_tl1_data_t ch_err_tl1_data;
103 103
104 104 /*
105 105 * Per CPU pending error at TL>0, used by level15 softint handler
106 106 */
107 107 uchar_t ch_err_tl1_pending[NCPU];
108 108
109 109 /*
110 110 * For deferred CE re-enable after trap.
111 111 */
112 112 taskq_t *ch_check_ce_tq;
113 113
114 114 /*
115 115 * Internal functions.
116 116 */
117 117 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
118 118 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
119 119 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
120 120 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
121 121 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
122 122 uint64_t t_afsr_bit);
123 123 static int clear_ecc(struct async_flt *ecc);
124 124 #if defined(CPU_IMP_ECACHE_ASSOC)
125 125 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
126 126 #endif
127 127 int cpu_ecache_set_size(struct cpu *cp);
128 128 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
129 129 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
130 130 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
131 131 int cpu_ectag_pa_to_subblk_state(int cachesize,
132 132 uint64_t subaddr, uint64_t tag);
133 133 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
134 134 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
135 135 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
136 136 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
137 137 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
138 138 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
139 139 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
140 140 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
141 141 static void cpu_scrubphys(struct async_flt *aflt);
142 142 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
143 143 int *, int *);
144 144 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
145 145 static void cpu_ereport_init(struct async_flt *aflt);
146 146 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
147 147 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
148 148 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149 149 uint64_t nceen, ch_cpu_logout_t *clop);
150 150 static int cpu_ce_delayed_ec_logout(uint64_t);
151 151 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
152 152 static int cpu_error_is_ecache_data(int, uint64_t);
153 153 static void cpu_fmri_cpu_set(nvlist_t *, int);
154 154 static int cpu_error_to_resource_type(struct async_flt *aflt);
155 155
156 156 #ifdef CHEETAHPLUS_ERRATUM_25
157 157 static int mondo_recover_proc(uint16_t, int);
158 158 static void cheetah_nudge_init(void);
159 159 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
160 160 cyc_time_t *when);
161 161 static void cheetah_nudge_buddy(void);
162 162 #endif /* CHEETAHPLUS_ERRATUM_25 */
163 163
164 164 #if defined(CPU_IMP_L1_CACHE_PARITY)
165 165 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
166 166 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
167 167 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
168 168 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
169 169 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
170 170 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
171 171 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
172 172 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
173 173 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
174 174 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
175 175 #endif /* CPU_IMP_L1_CACHE_PARITY */
176 176
177 177 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
178 178 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
179 179 int *segsp, int *banksp, int *mcidp);
180 180
181 181 /*
182 182 * This table is used to determine which bit(s) is(are) bad when an ECC
183 183 * error occurs. The array is indexed by an 9-bit syndrome. The entries
184 184 * of this array have the following semantics:
185 185 *
186 186 * 00-127 The number of the bad bit, when only one bit is bad.
187 187 * 128 ECC bit C0 is bad.
188 188 * 129 ECC bit C1 is bad.
189 189 * 130 ECC bit C2 is bad.
190 190 * 131 ECC bit C3 is bad.
191 191 * 132 ECC bit C4 is bad.
192 192 * 133 ECC bit C5 is bad.
193 193 * 134 ECC bit C6 is bad.
194 194 * 135 ECC bit C7 is bad.
195 195 * 136 ECC bit C8 is bad.
196 196 * 137-143 reserved for Mtag Data and ECC.
197 197 * 144(M2) Two bits are bad within a nibble.
198 198 * 145(M3) Three bits are bad within a nibble.
199 199 * 146(M3) Four bits are bad within a nibble.
200 200 * 147(M) Multiple bits (5 or more) are bad.
201 201 * 148 NO bits are bad.
202 202 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
203 203 */
204 204
205 205 #define C0 128
206 206 #define C1 129
207 207 #define C2 130
208 208 #define C3 131
209 209 #define C4 132
210 210 #define C5 133
211 211 #define C6 134
212 212 #define C7 135
213 213 #define C8 136
214 214 #define MT0 137 /* Mtag Data bit 0 */
215 215 #define MT1 138
216 216 #define MT2 139
217 217 #define MTC0 140 /* Mtag Check bit 0 */
218 218 #define MTC1 141
219 219 #define MTC2 142
220 220 #define MTC3 143
221 221 #define M2 144
222 222 #define M3 145
223 223 #define M4 146
224 224 #define M 147
225 225 #define NA 148
226 226 #if defined(JALAPENO) || defined(SERRANO)
227 227 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
228 228 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */
229 229 #define SLAST S003MEM /* last special syndrome */
230 230 #else /* JALAPENO || SERRANO */
231 231 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */
232 232 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */
233 233 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */
234 234 #define SLAST S11C /* last special syndrome */
235 235 #endif /* JALAPENO || SERRANO */
236 236 #if defined(JALAPENO) || defined(SERRANO)
237 237 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */
238 238 #define BPAR15 167
239 239 #endif /* JALAPENO || SERRANO */
240 240
241 241 static uint8_t ecc_syndrome_tab[] =
242 242 {
243 243 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M,
244 244 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16,
245 245 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10,
246 246 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M,
247 247 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6,
248 248 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4,
249 249 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4,
250 250 #if defined(JALAPENO) || defined(SERRANO)
251 251 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
252 252 #else /* JALAPENO || SERRANO */
253 253 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
254 254 #endif /* JALAPENO || SERRANO */
255 255 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5,
256 256 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M,
257 257 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2,
258 258 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3,
259 259 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M,
260 260 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3,
261 261 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M,
262 262 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M,
263 263 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4,
264 264 #if defined(JALAPENO) || defined(SERRANO)
265 265 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M,
266 266 #else /* JALAPENO || SERRANO */
267 267 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M,
268 268 #endif /* JALAPENO || SERRANO */
269 269 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2,
270 270 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M,
271 271 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4,
272 272 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3,
273 273 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3,
274 274 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2,
275 275 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4,
276 276 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M,
277 277 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3,
278 278 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M,
279 279 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3,
280 280 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M,
281 281 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M,
282 282 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
283 283 };
284 284
285 285 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
286 286
287 287 #if !(defined(JALAPENO) || defined(SERRANO))
288 288 /*
289 289 * This table is used to determine which bit(s) is(are) bad when a Mtag
290 290 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries
291 291 * of this array have the following semantics:
292 292 *
293 293 * -1 Invalid mtag syndrome.
294 294 * 137 Mtag Data 0 is bad.
295 295 * 138 Mtag Data 1 is bad.
296 296 * 139 Mtag Data 2 is bad.
297 297 * 140 Mtag ECC 0 is bad.
298 298 * 141 Mtag ECC 1 is bad.
299 299 * 142 Mtag ECC 2 is bad.
300 300 * 143 Mtag ECC 3 is bad.
301 301 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
302 302 */
303 303 short mtag_syndrome_tab[] =
304 304 {
305 305 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2
306 306 };
307 307
308 308 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short))
309 309
310 310 #else /* !(JALAPENO || SERRANO) */
311 311
312 312 #define BSYND_TBL_SIZE 16
313 313
314 314 #endif /* !(JALAPENO || SERRANO) */
315 315
316 316 /*
317 317 * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318 318 * dcache data tag.
319 319 */
320 320 #define VA13 INT64_C(0x0000000000000002)
321 321
322 322 /*
323 323 * Types returned from cpu_error_to_resource_type()
324 324 */
325 325 #define ERRTYPE_UNKNOWN 0
326 326 #define ERRTYPE_CPU 1
327 327 #define ERRTYPE_MEMORY 2
328 328 #define ERRTYPE_ECACHE_DATA 3
329 329
330 330 /*
331 331 * CE initial classification and subsequent action lookup table
332 332 */
333 333 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
334 334 static int ce_disp_inited;
335 335
336 336 /*
337 337 * Set to disable leaky and partner check for memory correctables
338 338 */
339 339 int ce_xdiag_off;
340 340
341 341 /*
342 342 * The following are not incremented atomically so are indicative only
343 343 */
344 344 static int ce_xdiag_drops;
345 345 static int ce_xdiag_lkydrops;
346 346 static int ce_xdiag_ptnrdrops;
347 347 static int ce_xdiag_bad;
348 348
349 349 /*
350 350 * CE leaky check callback structure
351 351 */
352 352 typedef struct {
353 353 struct async_flt *lkycb_aflt;
354 354 errorq_t *lkycb_eqp;
355 355 errorq_elem_t *lkycb_eqep;
356 356 } ce_lkychk_cb_t;
357 357
358 358 /*
359 359 * defines for various ecache_flush_flag's
360 360 */
361 361 #define ECACHE_FLUSH_LINE 1
362 362 #define ECACHE_FLUSH_ALL 2
363 363
364 364 /*
365 365 * STICK sync
366 366 */
367 367 #define STICK_ITERATION 10
368 368 #define MAX_TSKEW 1
369 369 #define EV_A_START 0
370 370 #define EV_A_END 1
371 371 #define EV_B_START 2
372 372 #define EV_B_END 3
373 373 #define EVENTS 4
374 374
375 375 static int64_t stick_iter = STICK_ITERATION;
376 376 static int64_t stick_tsk = MAX_TSKEW;
377 377
378 378 typedef enum {
379 379 EVENT_NULL = 0,
380 380 SLAVE_START,
381 381 SLAVE_CONT,
382 382 MASTER_START
383 383 } event_cmd_t;
384 384
385 385 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
386 386 static int64_t timestamp[EVENTS];
387 387 static volatile int slave_done;
388 388
389 389 #ifdef DEBUG
390 390 #define DSYNC_ATTEMPTS 64
391 391 typedef struct {
392 392 int64_t skew_val[DSYNC_ATTEMPTS];
393 393 } ss_t;
394 394
395 395 ss_t stick_sync_stats[NCPU];
396 396 #endif /* DEBUG */
397 397
398 398 uint_t cpu_impl_dual_pgsz = 0;
399 399 #if defined(CPU_IMP_DUAL_PAGESIZE)
400 400 uint_t disable_dual_pgsz = 0;
401 401 #endif /* CPU_IMP_DUAL_PAGESIZE */
402 402
403 403 /*
404 404 * Save the cache bootup state for use when internal
405 405 * caches are to be re-enabled after an error occurs.
406 406 */
407 407 uint64_t cache_boot_state;
408 408
409 409 /*
410 410 * PA[22:0] represent Displacement in Safari configuration space.
411 411 */
412 412 uint_t root_phys_addr_lo_mask = 0x7fffffu;
413 413
414 414 bus_config_eclk_t bus_config_eclk[] = {
415 415 #if defined(JALAPENO) || defined(SERRANO)
416 416 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
417 417 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
418 418 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
419 419 #else /* JALAPENO || SERRANO */
420 420 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
421 421 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
422 422 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
423 423 #endif /* JALAPENO || SERRANO */
424 424 {0, 0}
425 425 };
426 426
427 427 /*
428 428 * Interval for deferred CEEN reenable
429 429 */
430 430 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
431 431
432 432 /*
433 433 * set in /etc/system to control logging of user BERR/TO's
434 434 */
435 435 int cpu_berr_to_verbose = 0;
436 436
437 437 /*
438 438 * set to 0 in /etc/system to defer CEEN reenable for all CEs
439 439 */
440 440 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
441 441 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
442 442
443 443 /*
444 444 * Set of all offline cpus
445 445 */
446 446 cpuset_t cpu_offline_set;
447 447
448 448 static void cpu_delayed_check_ce_errors(void *);
449 449 static void cpu_check_ce_errors(void *);
450 450 void cpu_error_ecache_flush(ch_async_flt_t *);
451 451 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
452 452 static void cpu_log_and_clear_ce(ch_async_flt_t *);
453 453 void cpu_ce_detected(ch_cpu_errors_t *, int);
454 454
455 455 /*
456 456 * CE Leaky check timeout in microseconds. This is chosen to be twice the
457 457 * memory refresh interval of current DIMMs (64ms). After initial fix that
458 458 * gives at least one full refresh cycle in which the cell can leak
459 459 * (whereafter further refreshes simply reinforce any incorrect bit value).
460 460 */
461 461 clock_t cpu_ce_lkychk_timeout_usec = 128000;
462 462
463 463 /*
464 464 * CE partner check partner caching period in seconds
465 465 */
466 466 int cpu_ce_ptnr_cachetime_sec = 60;
467 467
468 468 /*
469 469 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
470 470 */
471 471 #define CH_SET_TRAP(ttentry, ttlabel) \
472 472 bcopy((const void *)&ttlabel, &ttentry, 32); \
473 473 flush_instr_mem((caddr_t)&ttentry, 32);
474 474
475 475 static int min_ecache_size;
476 476 static uint_t priv_hcl_1;
477 477 static uint_t priv_hcl_2;
478 478 static uint_t priv_hcl_4;
479 479 static uint_t priv_hcl_8;
480 480
481 481 void
482 482 cpu_setup(void)
483 483 {
484 484 extern int at_flags;
485 485 extern int cpc_has_overflow_intr;
486 486
487 487 /*
488 488 * Setup chip-specific trap handlers.
489 489 */
490 490 cpu_init_trap();
491 491
492 492 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
493 493
494 494 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
495 495
496 496 /*
497 497 * save the cache bootup state.
498 498 */
499 499 cache_boot_state = get_dcu() & DCU_CACHE;
500 500
501 501 /*
502 502 * Due to the number of entries in the fully-associative tlb
503 503 * this may have to be tuned lower than in spitfire.
504 504 */
505 505 pp_slots = MIN(8, MAXPP_SLOTS);
506 506
507 507 /*
508 508 * Block stores do not invalidate all pages of the d$, pagecopy
509 509 * et. al. need virtual translations with virtual coloring taken
510 510 * into consideration. prefetch/ldd will pollute the d$ on the
511 511 * load side.
512 512 */
513 513 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
514 514
515 515 if (use_page_coloring) {
516 516 do_pg_coloring = 1;
517 517 }
518 518
519 519 isa_list =
520 520 "sparcv9+vis2 sparcv9+vis sparcv9 "
521 521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 522 "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 523
524 524 /*
525 525 * On Panther-based machines, this should
526 526 * also include AV_SPARC_POPC too
527 527 */
528 528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 529
530 530 /*
531 531 * On cheetah, there's no hole in the virtual address space
532 532 */
533 533 hole_start = hole_end = 0;
534 534
535 535 /*
536 536 * The kpm mapping window.
537 537 * kpm_size:
538 538 * The size of a single kpm range.
539 539 * The overall size will be: kpm_size * vac_colors.
540 540 * kpm_vbase:
541 541 * The virtual start address of the kpm range within the kernel
542 542 * virtual address space. kpm_vbase has to be kpm_size aligned.
543 543 */
544 544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 545 kpm_size_shift = 43;
546 546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 547 kpm_smallpages = 1;
548 548
549 549 /*
550 550 * The traptrace code uses either %tick or %stick for
551 551 * timestamping. We have %stick so we can use it.
552 552 */
553 553 traptrace_use_stick = 1;
554 554
555 555 /*
556 556 * Cheetah has a performance counter overflow interrupt
557 557 */
558 558 cpc_has_overflow_intr = 1;
559 559
560 560 #if defined(CPU_IMP_DUAL_PAGESIZE)
561 561 /*
562 562 * Use Cheetah+ and later dual page size support.
563 563 */
564 564 if (!disable_dual_pgsz) {
565 565 cpu_impl_dual_pgsz = 1;
566 566 }
567 567 #endif /* CPU_IMP_DUAL_PAGESIZE */
568 568
569 569 /*
570 570 * Declare that this architecture/cpu combination does fpRAS.
571 571 */
572 572 fpras_implemented = 1;
573 573
574 574 /*
575 575 * Setup CE lookup table
576 576 */
577 577 CE_INITDISPTBL_POPULATE(ce_disp_table);
578 578 ce_disp_inited = 1;
579 579 }
580 580
581 581 /*
582 582 * Called by setcpudelay
583 583 */
584 584 void
585 585 cpu_init_tick_freq(void)
586 586 {
587 587 /*
588 588 * For UltraSPARC III and beyond we want to use the
589 589 * system clock rate as the basis for low level timing,
590 590 * due to support of mixed speed CPUs and power managment.
591 591 */
592 592 if (system_clock_freq == 0)
593 593 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
594 594
595 595 sys_tick_freq = system_clock_freq;
596 596 }
597 597
598 598 #ifdef CHEETAHPLUS_ERRATUM_25
599 599 /*
600 600 * Tunables
601 601 */
602 602 int cheetah_bpe_off = 0;
603 603 int cheetah_sendmondo_recover = 1;
604 604 int cheetah_sendmondo_fullscan = 0;
605 605 int cheetah_sendmondo_recover_delay = 5;
606 606
607 607 #define CHEETAH_LIVELOCK_MIN_DELAY 1
608 608
609 609 /*
610 610 * Recovery Statistics
611 611 */
612 612 typedef struct cheetah_livelock_entry {
613 613 int cpuid; /* fallen cpu */
614 614 int buddy; /* cpu that ran recovery */
615 615 clock_t lbolt; /* when recovery started */
616 616 hrtime_t recovery_time; /* time spent in recovery */
617 617 } cheetah_livelock_entry_t;
618 618
619 619 #define CHEETAH_LIVELOCK_NENTRY 32
620 620
621 621 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
622 622 int cheetah_livelock_entry_nxt;
623 623
624 624 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \
625 625 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \
626 626 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \
627 627 cheetah_livelock_entry_nxt = 0; \
628 628 } \
629 629 }
630 630
631 631 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val
632 632
633 633 struct {
634 634 hrtime_t hrt; /* maximum recovery time */
635 635 int recovery; /* recovered */
636 636 int full_claimed; /* maximum pages claimed in full recovery */
637 637 int proc_entry; /* attempted to claim TSB */
638 638 int proc_tsb_scan; /* tsb scanned */
639 639 int proc_tsb_partscan; /* tsb partially scanned */
640 640 int proc_tsb_fullscan; /* whole tsb scanned */
641 641 int proc_claimed; /* maximum pages claimed in tsb scan */
642 642 int proc_user; /* user thread */
643 643 int proc_kernel; /* kernel thread */
644 644 int proc_onflt; /* bad stack */
645 645 int proc_cpu; /* null cpu */
646 646 int proc_thread; /* null thread */
647 647 int proc_proc; /* null proc */
648 648 int proc_as; /* null as */
649 649 int proc_hat; /* null hat */
650 650 int proc_hat_inval; /* hat contents don't make sense */
651 651 int proc_hat_busy; /* hat is changing TSBs */
652 652 int proc_tsb_reloc; /* TSB skipped because being relocated */
653 653 int proc_cnum_bad; /* cnum out of range */
654 654 int proc_cnum; /* last cnum processed */
655 655 tte_t proc_tte; /* last tte processed */
656 656 } cheetah_livelock_stat;
657 657
658 658 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++
659 659
660 660 #define CHEETAH_LIVELOCK_STATSET(item, value) \
661 661 cheetah_livelock_stat.item = value
662 662
663 663 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \
664 664 if (value > cheetah_livelock_stat.item) \
665 665 cheetah_livelock_stat.item = value; \
666 666 }
667 667
668 668 /*
669 669 * Attempt to recover a cpu by claiming every cache line as saved
670 670 * in the TSB that the non-responsive cpu is using. Since we can't
671 671 * grab any adaptive lock, this is at best an attempt to do so. Because
672 672 * we don't grab any locks, we must operate under the protection of
673 673 * on_fault().
674 674 *
675 675 * Return 1 if cpuid could be recovered, 0 if failed.
676 676 */
677 677 int
678 678 mondo_recover_proc(uint16_t cpuid, int bn)
679 679 {
680 680 label_t ljb;
681 681 cpu_t *cp;
682 682 kthread_t *t;
683 683 proc_t *p;
684 684 struct as *as;
685 685 struct hat *hat;
686 686 uint_t cnum;
687 687 struct tsb_info *tsbinfop;
688 688 struct tsbe *tsbep;
689 689 caddr_t tsbp;
690 690 caddr_t end_tsbp;
691 691 uint64_t paddr;
692 692 uint64_t idsr;
693 693 u_longlong_t pahi, palo;
694 694 int pages_claimed = 0;
695 695 tte_t tsbe_tte;
696 696 int tried_kernel_tsb = 0;
697 697 mmu_ctx_t *mmu_ctxp;
698 698
699 699 CHEETAH_LIVELOCK_STAT(proc_entry);
700 700
701 701 if (on_fault(&ljb)) {
702 702 CHEETAH_LIVELOCK_STAT(proc_onflt);
703 703 goto badstruct;
704 704 }
705 705
706 706 if ((cp = cpu[cpuid]) == NULL) {
707 707 CHEETAH_LIVELOCK_STAT(proc_cpu);
708 708 goto badstruct;
709 709 }
710 710
711 711 if ((t = cp->cpu_thread) == NULL) {
712 712 CHEETAH_LIVELOCK_STAT(proc_thread);
713 713 goto badstruct;
714 714 }
715 715
716 716 if ((p = ttoproc(t)) == NULL) {
717 717 CHEETAH_LIVELOCK_STAT(proc_proc);
718 718 goto badstruct;
719 719 }
720 720
721 721 if ((as = p->p_as) == NULL) {
722 722 CHEETAH_LIVELOCK_STAT(proc_as);
723 723 goto badstruct;
724 724 }
725 725
726 726 if ((hat = as->a_hat) == NULL) {
727 727 CHEETAH_LIVELOCK_STAT(proc_hat);
728 728 goto badstruct;
729 729 }
730 730
731 731 if (hat != ksfmmup) {
732 732 CHEETAH_LIVELOCK_STAT(proc_user);
733 733 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
734 734 CHEETAH_LIVELOCK_STAT(proc_hat_busy);
735 735 goto badstruct;
736 736 }
737 737 tsbinfop = hat->sfmmu_tsb;
738 738 if (tsbinfop == NULL) {
739 739 CHEETAH_LIVELOCK_STAT(proc_hat_inval);
740 740 goto badstruct;
741 741 }
742 742 tsbp = tsbinfop->tsb_va;
743 743 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
744 744 } else {
745 745 CHEETAH_LIVELOCK_STAT(proc_kernel);
746 746 tsbinfop = NULL;
747 747 tsbp = ktsb_base;
748 748 end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
749 749 }
750 750
751 751 /* Verify as */
752 752 if (hat->sfmmu_as != as) {
753 753 CHEETAH_LIVELOCK_STAT(proc_hat_inval);
754 754 goto badstruct;
755 755 }
756 756
757 757 mmu_ctxp = CPU_MMU_CTXP(cp);
758 758 ASSERT(mmu_ctxp);
759 759 cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
760 760 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
761 761
762 762 if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
763 763 (cnum >= mmu_ctxp->mmu_nctxs)) {
764 764 CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
765 765 goto badstruct;
766 766 }
767 767
768 768 do {
769 769 CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
770 770
771 771 /*
772 772 * Skip TSBs being relocated. This is important because
773 773 * we want to avoid the following deadlock scenario:
774 774 *
775 775 * 1) when we came in we set ourselves to "in recover" state.
776 776 * 2) when we try to touch TSB being relocated the mapping
777 777 * will be in the suspended state so we'll spin waiting
778 778 * for it to be unlocked.
779 779 * 3) when the CPU that holds the TSB mapping locked tries to
780 780 * unlock it it will send a xtrap which will fail to xcall
781 781 * us or the CPU we're trying to recover, and will in turn
782 782 * enter the mondo code.
783 783 * 4) since we are still spinning on the locked mapping
784 784 * no further progress will be made and the system will
785 785 * inevitably hard hang.
786 786 *
787 787 * A TSB not being relocated can't begin being relocated
788 788 * while we're accessing it because we check
789 789 * sendmondo_in_recover before relocating TSBs.
790 790 */
791 791 if (hat != ksfmmup &&
792 792 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
793 793 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
794 794 goto next_tsbinfo;
795 795 }
796 796
797 797 for (tsbep = (struct tsbe *)tsbp;
798 798 tsbep < (struct tsbe *)end_tsbp; tsbep++) {
799 799 tsbe_tte = tsbep->tte_data;
800 800
801 801 if (tsbe_tte.tte_val == 0) {
802 802 /*
803 803 * Invalid tte
804 804 */
805 805 continue;
806 806 }
807 807 if (tsbe_tte.tte_se) {
808 808 /*
809 809 * Don't want device registers
810 810 */
811 811 continue;
812 812 }
813 813 if (tsbe_tte.tte_cp == 0) {
814 814 /*
815 815 * Must be cached in E$
816 816 */
817 817 continue;
818 818 }
819 819 if (tsbep->tte_tag.tag_invalid != 0) {
820 820 /*
821 821 * Invalid tag, ingnore this entry.
822 822 */
823 823 continue;
824 824 }
825 825 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
826 826 idsr = getidsr();
827 827 if ((idsr & (IDSR_NACK_BIT(bn) |
828 828 IDSR_BUSY_BIT(bn))) == 0) {
829 829 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
830 830 goto done;
831 831 }
832 832 pahi = tsbe_tte.tte_pahi;
833 833 palo = tsbe_tte.tte_palo;
834 834 paddr = (uint64_t)((pahi << 32) |
835 835 (palo << MMU_PAGESHIFT));
836 836 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
837 837 CH_ECACHE_SUBBLK_SIZE);
838 838 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
839 839 shipit(cpuid, bn);
840 840 }
841 841 pages_claimed++;
842 842 }
843 843 next_tsbinfo:
844 844 if (tsbinfop != NULL)
845 845 tsbinfop = tsbinfop->tsb_next;
846 846 if (tsbinfop != NULL) {
847 847 tsbp = tsbinfop->tsb_va;
848 848 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
849 849 } else if (tsbp == ktsb_base) {
850 850 tried_kernel_tsb = 1;
851 851 } else if (!tried_kernel_tsb) {
852 852 tsbp = ktsb_base;
853 853 end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
854 854 hat = ksfmmup;
855 855 tsbinfop = NULL;
856 856 }
857 857 } while (tsbinfop != NULL ||
858 858 ((tsbp == ktsb_base) && !tried_kernel_tsb));
859 859
860 860 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
861 861 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
862 862 no_fault();
863 863 idsr = getidsr();
864 864 if ((idsr & (IDSR_NACK_BIT(bn) |
865 865 IDSR_BUSY_BIT(bn))) == 0) {
866 866 return (1);
867 867 } else {
868 868 return (0);
869 869 }
870 870
871 871 done:
872 872 no_fault();
873 873 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
874 874 return (1);
875 875
876 876 badstruct:
877 877 no_fault();
878 878 return (0);
879 879 }
880 880
881 881 /*
882 882 * Attempt to claim ownership, temporarily, of every cache line that a
883 883 * non-responsive cpu might be using. This might kick that cpu out of
884 884 * this state.
885 885 *
886 886 * The return value indicates to the caller if we have exhausted all recovery
887 887 * techniques. If 1 is returned, it is useless to call this function again
888 888 * even for a different target CPU.
889 889 */
890 890 int
891 891 mondo_recover(uint16_t cpuid, int bn)
892 892 {
893 893 struct memseg *seg;
894 894 uint64_t begin_pa, end_pa, cur_pa;
895 895 hrtime_t begin_hrt, end_hrt;
896 896 int retval = 0;
897 897 int pages_claimed = 0;
898 898 cheetah_livelock_entry_t *histp;
899 899 uint64_t idsr;
900 900
901 901 if (atomic_cas_32(&sendmondo_in_recover, 0, 1) != 0) {
902 902 /*
903 903 * Wait while recovery takes place
904 904 */
905 905 while (sendmondo_in_recover) {
906 906 drv_usecwait(1);
907 907 }
908 908 /*
909 909 * Assume we didn't claim the whole memory. If
910 910 * the target of this caller is not recovered,
911 911 * it will come back.
912 912 */
913 913 return (retval);
914 914 }
915 915
916 916 CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
917 917 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
918 918 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
919 919 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
920 920
921 921 begin_hrt = gethrtime_waitfree();
922 922 /*
923 923 * First try to claim the lines in the TSB the target
924 924 * may have been using.
925 925 */
926 926 if (mondo_recover_proc(cpuid, bn) == 1) {
927 927 /*
928 928 * Didn't claim the whole memory
929 929 */
930 930 goto done;
931 931 }
932 932
933 933 /*
934 934 * We tried using the TSB. The target is still
935 935 * not recovered. Check if complete memory scan is
936 936 * enabled.
937 937 */
938 938 if (cheetah_sendmondo_fullscan == 0) {
939 939 /*
940 940 * Full memory scan is disabled.
941 941 */
942 942 retval = 1;
943 943 goto done;
944 944 }
945 945
946 946 /*
947 947 * Try claiming the whole memory.
948 948 */
949 949 for (seg = memsegs; seg; seg = seg->next) {
950 950 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
951 951 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
952 952 for (cur_pa = begin_pa; cur_pa < end_pa;
953 953 cur_pa += MMU_PAGESIZE) {
954 954 idsr = getidsr();
955 955 if ((idsr & (IDSR_NACK_BIT(bn) |
956 956 IDSR_BUSY_BIT(bn))) == 0) {
957 957 /*
958 958 * Didn't claim all memory
959 959 */
960 960 goto done;
961 961 }
962 962 claimlines(cur_pa, MMU_PAGESIZE,
963 963 CH_ECACHE_SUBBLK_SIZE);
964 964 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
965 965 shipit(cpuid, bn);
966 966 }
967 967 pages_claimed++;
968 968 }
969 969 }
970 970
971 971 /*
972 972 * We did all we could.
973 973 */
974 974 retval = 1;
975 975
976 976 done:
977 977 /*
978 978 * Update statistics
979 979 */
980 980 end_hrt = gethrtime_waitfree();
981 981 CHEETAH_LIVELOCK_STAT(recovery);
982 982 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
983 983 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
984 984 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
985 985 (end_hrt - begin_hrt));
986 986
987 987 while (atomic_cas_32(&sendmondo_in_recover, 1, 0) != 1)
988 988 ;
989 989
990 990 return (retval);
991 991 }
992 992
993 993 /*
994 994 * This is called by the cyclic framework when this CPU becomes online
995 995 */
996 996 /*ARGSUSED*/
997 997 static void
998 998 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
999 999 {
1000 1000
1001 1001 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1002 1002 hdlr->cyh_level = CY_LOW_LEVEL;
1003 1003 hdlr->cyh_arg = NULL;
1004 1004
1005 1005 /*
1006 1006 * Stagger the start time
1007 1007 */
1008 1008 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1009 1009 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1010 1010 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1011 1011 }
1012 1012 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1013 1013 }
1014 1014
1015 1015 /*
1016 1016 * Create a low level cyclic to send a xtrap to the next cpu online.
1017 1017 * However, there's no need to have this running on a uniprocessor system.
1018 1018 */
1019 1019 static void
1020 1020 cheetah_nudge_init(void)
1021 1021 {
1022 1022 cyc_omni_handler_t hdlr;
1023 1023
1024 1024 if (max_ncpus == 1) {
1025 1025 return;
1026 1026 }
1027 1027
1028 1028 hdlr.cyo_online = cheetah_nudge_onln;
1029 1029 hdlr.cyo_offline = NULL;
1030 1030 hdlr.cyo_arg = NULL;
1031 1031
1032 1032 mutex_enter(&cpu_lock);
1033 1033 (void) cyclic_add_omni(&hdlr);
1034 1034 mutex_exit(&cpu_lock);
1035 1035 }
1036 1036
1037 1037 /*
1038 1038 * Cyclic handler to wake up buddy
1039 1039 */
1040 1040 void
1041 1041 cheetah_nudge_buddy(void)
1042 1042 {
1043 1043 /*
1044 1044 * Disable kernel preemption to protect the cpu list
1045 1045 */
1046 1046 kpreempt_disable();
1047 1047 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1048 1048 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1049 1049 0, 0);
1050 1050 }
1051 1051 kpreempt_enable();
1052 1052 }
1053 1053
1054 1054 #endif /* CHEETAHPLUS_ERRATUM_25 */
1055 1055
1056 1056 #ifdef SEND_MONDO_STATS
1057 1057 uint32_t x_one_stimes[64];
1058 1058 uint32_t x_one_ltimes[16];
1059 1059 uint32_t x_set_stimes[64];
1060 1060 uint32_t x_set_ltimes[16];
1061 1061 uint32_t x_set_cpus[NCPU];
1062 1062 uint32_t x_nack_stimes[64];
1063 1063 #endif
1064 1064
1065 1065 /*
1066 1066 * Note: A version of this function is used by the debugger via the KDI,
1067 1067 * and must be kept in sync with this version. Any changes made to this
1068 1068 * function to support new chips or to accomodate errata must also be included
1069 1069 * in the KDI-specific version. See us3_kdi.c.
1070 1070 */
1071 1071 void
1072 1072 send_one_mondo(int cpuid)
1073 1073 {
1074 1074 int busy, nack;
1075 1075 uint64_t idsr, starttick, endtick, tick, lasttick;
1076 1076 uint64_t busymask;
1077 1077 #ifdef CHEETAHPLUS_ERRATUM_25
1078 1078 int recovered = 0;
1079 1079 #endif
1080 1080
1081 1081 CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1082 1082 starttick = lasttick = gettick();
1083 1083 shipit(cpuid, 0);
1084 1084 endtick = starttick + xc_tick_limit;
1085 1085 busy = nack = 0;
1086 1086 #if defined(JALAPENO) || defined(SERRANO)
1087 1087 /*
1088 1088 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1089 1089 * will be used for dispatching interrupt. For now, assume
1090 1090 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1091 1091 * issues with respect to BUSY/NACK pair usage.
1092 1092 */
1093 1093 busymask = IDSR_BUSY_BIT(cpuid);
1094 1094 #else /* JALAPENO || SERRANO */
1095 1095 busymask = IDSR_BUSY;
1096 1096 #endif /* JALAPENO || SERRANO */
1097 1097 for (;;) {
1098 1098 idsr = getidsr();
1099 1099 if (idsr == 0)
1100 1100 break;
1101 1101
1102 1102 tick = gettick();
1103 1103 /*
1104 1104 * If there is a big jump between the current tick
1105 1105 * count and lasttick, we have probably hit a break
1106 1106 * point. Adjust endtick accordingly to avoid panic.
1107 1107 */
1108 1108 if (tick > (lasttick + xc_tick_jump_limit))
1109 1109 endtick += (tick - lasttick);
1110 1110 lasttick = tick;
1111 1111 if (tick > endtick) {
1112 1112 if (panic_quiesce)
1113 1113 return;
1114 1114 #ifdef CHEETAHPLUS_ERRATUM_25
1115 1115 if (cheetah_sendmondo_recover && recovered == 0) {
1116 1116 if (mondo_recover(cpuid, 0)) {
1117 1117 /*
1118 1118 * We claimed the whole memory or
1119 1119 * full scan is disabled.
1120 1120 */
1121 1121 recovered++;
1122 1122 }
1123 1123 tick = gettick();
1124 1124 endtick = tick + xc_tick_limit;
1125 1125 lasttick = tick;
1126 1126 /*
1127 1127 * Recheck idsr
1128 1128 */
1129 1129 continue;
1130 1130 } else
1131 1131 #endif /* CHEETAHPLUS_ERRATUM_25 */
1132 1132 {
1133 1133 cmn_err(CE_PANIC, "send mondo timeout "
1134 1134 "(target 0x%x) [%d NACK %d BUSY]",
1135 1135 cpuid, nack, busy);
1136 1136 }
1137 1137 }
1138 1138
1139 1139 if (idsr & busymask) {
1140 1140 busy++;
1141 1141 continue;
1142 1142 }
1143 1143 drv_usecwait(1);
1144 1144 shipit(cpuid, 0);
1145 1145 nack++;
1146 1146 busy = 0;
1147 1147 }
1148 1148 #ifdef SEND_MONDO_STATS
1149 1149 {
1150 1150 int n = gettick() - starttick;
1151 1151 if (n < 8192)
1152 1152 x_one_stimes[n >> 7]++;
1153 1153 else
1154 1154 x_one_ltimes[(n >> 13) & 0xf]++;
1155 1155 }
1156 1156 #endif
1157 1157 }
1158 1158
1159 1159 void
1160 1160 syncfpu(void)
1161 1161 {
1162 1162 }
1163 1163
1164 1164 /*
1165 1165 * Return processor specific async error structure
1166 1166 * size used.
1167 1167 */
1168 1168 int
1169 1169 cpu_aflt_size(void)
1170 1170 {
1171 1171 return (sizeof (ch_async_flt_t));
1172 1172 }
1173 1173
1174 1174 /*
1175 1175 * Tunable to disable the checking of other cpu logout areas during panic for
1176 1176 * potential syndrome 71 generating errors.
1177 1177 */
1178 1178 int enable_check_other_cpus_logout = 1;
1179 1179
1180 1180 /*
1181 1181 * Check other cpus logout area for potential synd 71 generating
1182 1182 * errors.
1183 1183 */
1184 1184 static void
1185 1185 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186 1186 ch_cpu_logout_t *clop)
1187 1187 {
1188 1188 struct async_flt *aflt;
1189 1189 ch_async_flt_t ch_flt;
1190 1190 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1191 1191
1192 1192 if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193 1193 return;
1194 1194 }
1195 1195
1196 1196 bzero(&ch_flt, sizeof (ch_async_flt_t));
1197 1197
1198 1198 t_afar = clop->clo_data.chd_afar;
1199 1199 t_afsr = clop->clo_data.chd_afsr;
1200 1200 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201 1201 #if defined(SERRANO)
1202 1202 ch_flt.afar2 = clop->clo_data.chd_afar2;
1203 1203 #endif /* SERRANO */
1204 1204
1205 1205 /*
1206 1206 * In order to simplify code, we maintain this afsr_errs
1207 1207 * variable which holds the aggregate of AFSR and AFSR_EXT
1208 1208 * sticky bits.
1209 1209 */
1210 1210 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211 1211 (t_afsr & C_AFSR_ALL_ERRS);
1212 1212
1213 1213 /* Setup the async fault structure */
1214 1214 aflt = (struct async_flt *)&ch_flt;
1215 1215 aflt->flt_id = gethrtime_waitfree();
1216 1216 ch_flt.afsr_ext = t_afsr_ext;
1217 1217 ch_flt.afsr_errs = t_afsr_errs;
1218 1218 aflt->flt_stat = t_afsr;
1219 1219 aflt->flt_addr = t_afar;
1220 1220 aflt->flt_bus_id = cpuid;
1221 1221 aflt->flt_inst = cpuid;
1222 1222 aflt->flt_pc = tpc;
1223 1223 aflt->flt_prot = AFLT_PROT_NONE;
1224 1224 aflt->flt_class = CPU_FAULT;
1225 1225 aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226 1226 aflt->flt_tl = tl;
1227 1227 aflt->flt_status = ecc_type;
1228 1228 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1229 1229
1230 1230 /*
1231 1231 * Queue events on the async event queue, one event per error bit.
1232 1232 * If no events are queued, queue an event to complain.
1233 1233 */
1234 1234 if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235 1235 ch_flt.flt_type = CPU_INV_AFSR;
1236 1236 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237 1237 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238 1238 aflt->flt_panic);
1239 1239 }
1240 1240
1241 1241 /*
1242 1242 * Zero out + invalidate CPU logout.
1243 1243 */
1244 1244 bzero(clop, sizeof (ch_cpu_logout_t));
1245 1245 clop->clo_data.chd_afar = LOGOUT_INVALID;
1246 1246 }
1247 1247
1248 1248 /*
1249 1249 * Check the logout areas of all other cpus for unlogged errors.
1250 1250 */
1251 1251 static void
1252 1252 cpu_check_other_cpus_logout(void)
1253 1253 {
1254 1254 int i, j;
1255 1255 processorid_t myid;
1256 1256 struct cpu *cp;
1257 1257 ch_err_tl1_data_t *cl1p;
1258 1258
1259 1259 myid = CPU->cpu_id;
1260 1260 for (i = 0; i < NCPU; i++) {
1261 1261 cp = cpu[i];
1262 1262
1263 1263 if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264 1264 (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265 1265 continue;
1266 1266 }
1267 1267
1268 1268 /*
1269 1269 * Check each of the tl>0 logout areas
1270 1270 */
1271 1271 cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272 1272 for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273 1273 if (cl1p->ch_err_tl1_flags == 0)
1274 1274 continue;
1275 1275
1276 1276 cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277 1277 1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1278 1278 }
1279 1279
1280 1280 /*
1281 1281 * Check each of the remaining logout areas
1282 1282 */
1283 1283 cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284 1284 CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285 1285 cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286 1286 CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287 1287 cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288 1288 CPU_PRIVATE_PTR(cp, chpr_async_logout));
1289 1289 }
1290 1290 }
1291 1291
1292 1292 /*
1293 1293 * The fast_ecc_err handler transfers control here for UCU, UCC events.
1294 1294 * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1295 1295 * flush the error that caused the UCU/UCC, then again here at the end to
1296 1296 * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1297 1297 * the probability of getting a TL>1 Fast ECC trap when we're fielding
1298 1298 * another Fast ECC trap.
1299 1299 *
1300 1300 * Cheetah+ also handles: TSCE: No additional processing required.
1301 1301 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1302 1302 *
1303 1303 * Note that the p_clo_flags input is only valid in cases where the
1304 1304 * cpu_private struct is not yet initialized (since that is the only
1305 1305 * time that information cannot be obtained from the logout struct.)
1306 1306 */
1307 1307 /*ARGSUSED*/
1308 1308 void
1309 1309 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1310 1310 {
1311 1311 ch_cpu_logout_t *clop;
1312 1312 uint64_t ceen, nceen;
1313 1313
1314 1314 /*
1315 1315 * Get the CPU log out info. If we can't find our CPU private
1316 1316 * pointer, then we will have to make due without any detailed
1317 1317 * logout information.
1318 1318 */
1319 1319 if (CPU_PRIVATE(CPU) == NULL) {
1320 1320 clop = NULL;
1321 1321 ceen = p_clo_flags & EN_REG_CEEN;
1322 1322 nceen = p_clo_flags & EN_REG_NCEEN;
1323 1323 } else {
1324 1324 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1325 1325 ceen = clop->clo_flags & EN_REG_CEEN;
1326 1326 nceen = clop->clo_flags & EN_REG_NCEEN;
1327 1327 }
1328 1328
1329 1329 cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330 1330 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1331 1331 }
1332 1332
1333 1333 /*
1334 1334 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1335 1335 * ECC at TL>0. Need to supply either a error register pointer or a
1336 1336 * cpu logout structure pointer.
1337 1337 */
1338 1338 static void
1339 1339 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340 1340 uint64_t nceen, ch_cpu_logout_t *clop)
1341 1341 {
1342 1342 struct async_flt *aflt;
1343 1343 ch_async_flt_t ch_flt;
1344 1344 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1345 1345 char pr_reason[MAX_REASON_STRING];
1346 1346 ch_cpu_errors_t cpu_error_regs;
1347 1347
1348 1348 bzero(&ch_flt, sizeof (ch_async_flt_t));
1349 1349 /*
1350 1350 * If no cpu logout data, then we will have to make due without
1351 1351 * any detailed logout information.
1352 1352 */
1353 1353 if (clop == NULL) {
1354 1354 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1355 1355 get_cpu_error_state(&cpu_error_regs);
1356 1356 set_cpu_error_state(&cpu_error_regs);
1357 1357 t_afar = cpu_error_regs.afar;
1358 1358 t_afsr = cpu_error_regs.afsr;
1359 1359 t_afsr_ext = cpu_error_regs.afsr_ext;
1360 1360 #if defined(SERRANO)
1361 1361 ch_flt.afar2 = cpu_error_regs.afar2;
1362 1362 #endif /* SERRANO */
1363 1363 } else {
1364 1364 t_afar = clop->clo_data.chd_afar;
1365 1365 t_afsr = clop->clo_data.chd_afsr;
1366 1366 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1367 1367 #if defined(SERRANO)
1368 1368 ch_flt.afar2 = clop->clo_data.chd_afar2;
1369 1369 #endif /* SERRANO */
1370 1370 }
1371 1371
1372 1372 /*
1373 1373 * In order to simplify code, we maintain this afsr_errs
1374 1374 * variable which holds the aggregate of AFSR and AFSR_EXT
1375 1375 * sticky bits.
1376 1376 */
1377 1377 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1378 1378 (t_afsr & C_AFSR_ALL_ERRS);
1379 1379 pr_reason[0] = '\0';
1380 1380
1381 1381 /* Setup the async fault structure */
1382 1382 aflt = (struct async_flt *)&ch_flt;
1383 1383 aflt->flt_id = gethrtime_waitfree();
1384 1384 ch_flt.afsr_ext = t_afsr_ext;
1385 1385 ch_flt.afsr_errs = t_afsr_errs;
1386 1386 aflt->flt_stat = t_afsr;
1387 1387 aflt->flt_addr = t_afar;
1388 1388 aflt->flt_bus_id = getprocessorid();
1389 1389 aflt->flt_inst = CPU->cpu_id;
1390 1390 aflt->flt_pc = tpc;
1391 1391 aflt->flt_prot = AFLT_PROT_NONE;
1392 1392 aflt->flt_class = CPU_FAULT;
1393 1393 aflt->flt_priv = priv;
1394 1394 aflt->flt_tl = tl;
1395 1395 aflt->flt_status = ECC_F_TRAP;
1396 1396 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1397 1397
1398 1398 /*
1399 1399 * XXXX - Phenomenal hack to get around Solaris not getting all the
1400 1400 * cmn_err messages out to the console. The situation is a UCU (in
1401 1401 * priv mode) which causes a WDU which causes a UE (on the retry).
1402 1402 * The messages for the UCU and WDU are enqueued and then pulled off
1403 1403 * the async queue via softint and syslogd starts to process them
1404 1404 * but doesn't get them to the console. The UE causes a panic, but
1405 1405 * since the UCU/WDU messages are already in transit, those aren't
1406 1406 * on the async queue. The hack is to check if we have a matching
1407 1407 * WDU event for the UCU, and if it matches, we're more than likely
1408 1408 * going to panic with a UE, unless we're under protection. So, we
1409 1409 * check to see if we got a matching WDU event and if we're under
1410 1410 * protection.
1411 1411 *
1412 1412 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1413 1413 * looks like this:
1414 1414 * UCU->WDU->UE
1415 1415 * For Panther, it could look like either of these:
1416 1416 * UCU---->WDU->L3_WDU->UE
1417 1417 * L3_UCU->WDU->L3_WDU->UE
1418 1418 */
1419 1419 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1420 1420 aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1421 1421 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1422 1422 get_cpu_error_state(&cpu_error_regs);
1423 1423 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1424 1424 aflt->flt_panic |=
1425 1425 ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1426 1426 (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1427 1427 (cpu_error_regs.afar == t_afar));
1428 1428 aflt->flt_panic |= ((clop == NULL) &&
1429 1429 (t_afsr_errs & C_AFSR_WDU) &&
1430 1430 (t_afsr_errs & C_AFSR_L3_WDU));
1431 1431 } else {
1432 1432 aflt->flt_panic |=
1433 1433 ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1434 1434 (cpu_error_regs.afar == t_afar));
1435 1435 aflt->flt_panic |= ((clop == NULL) &&
1436 1436 (t_afsr_errs & C_AFSR_WDU));
1437 1437 }
1438 1438 }
1439 1439
1440 1440 /*
1441 1441 * Queue events on the async event queue, one event per error bit.
1442 1442 * If no events are queued or no Fast ECC events are on in the AFSR,
1443 1443 * queue an event to complain.
1444 1444 */
1445 1445 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1446 1446 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1447 1447 ch_flt.flt_type = CPU_INV_AFSR;
1448 1448 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1449 1449 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1450 1450 aflt->flt_panic);
1451 1451 }
1452 1452
1453 1453 /*
1454 1454 * Zero out + invalidate CPU logout.
1455 1455 */
1456 1456 if (clop) {
1457 1457 bzero(clop, sizeof (ch_cpu_logout_t));
1458 1458 clop->clo_data.chd_afar = LOGOUT_INVALID;
1459 1459 }
1460 1460
1461 1461 /*
1462 1462 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1463 1463 * or disrupting errors have happened. We do this because if a
1464 1464 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1465 1465 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
1466 1466 * CEEN works differently on Cheetah than on Spitfire. Also, we enable
1467 1467 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1468 1468 * deferred or disrupting error happening between checking the AFSR and
1469 1469 * enabling NCEEN/CEEN.
1470 1470 *
1471 1471 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1472 1472 * taken.
1473 1473 */
1474 1474 set_error_enable(get_error_enable() | (nceen | ceen));
1475 1475 if (clear_errors(&ch_flt)) {
1476 1476 aflt->flt_panic |= ((ch_flt.afsr_errs &
1477 1477 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1478 1478 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1479 1479 NULL);
1480 1480 }
1481 1481
1482 1482 /*
1483 1483 * Panic here if aflt->flt_panic has been set. Enqueued errors will
1484 1484 * be logged as part of the panic flow.
1485 1485 */
1486 1486 if (aflt->flt_panic)
1487 1487 fm_panic("%sError(s)", pr_reason);
1488 1488
1489 1489 /*
1490 1490 * Flushing the Ecache here gets the part of the trap handler that
1491 1491 * is run at TL=1 out of the Ecache.
1492 1492 */
1493 1493 cpu_flush_ecache();
1494 1494 }
1495 1495
1496 1496 /*
1497 1497 * This is called via sys_trap from pil15_interrupt code if the
1498 1498 * corresponding entry in ch_err_tl1_pending is set. Checks the
1499 1499 * various ch_err_tl1_data structures for valid entries based on the bit
1500 1500 * settings in the ch_err_tl1_flags entry of the structure.
1501 1501 */
1502 1502 /*ARGSUSED*/
1503 1503 void
1504 1504 cpu_tl1_error(struct regs *rp, int panic)
1505 1505 {
1506 1506 ch_err_tl1_data_t *cl1p, cl1;
1507 1507 int i, ncl1ps;
1508 1508 uint64_t me_flags;
1509 1509 uint64_t ceen, nceen;
1510 1510
1511 1511 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1512 1512 cl1p = &ch_err_tl1_data;
1513 1513 ncl1ps = 1;
1514 1514 } else if (CPU_PRIVATE(CPU) != NULL) {
1515 1515 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1516 1516 ncl1ps = CH_ERR_TL1_TLMAX;
1517 1517 } else {
1518 1518 ncl1ps = 0;
1519 1519 }
1520 1520
1521 1521 for (i = 0; i < ncl1ps; i++, cl1p++) {
1522 1522 if (cl1p->ch_err_tl1_flags == 0)
1523 1523 continue;
1524 1524
1525 1525 /*
1526 1526 * Grab a copy of the logout data and invalidate
1527 1527 * the logout area.
1528 1528 */
1529 1529 cl1 = *cl1p;
1530 1530 bzero(cl1p, sizeof (ch_err_tl1_data_t));
1531 1531 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1532 1532 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1533 1533
1534 1534 /*
1535 1535 * Log "first error" in ch_err_tl1_data.
1536 1536 */
1537 1537 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1538 1538 ceen = get_error_enable() & EN_REG_CEEN;
1539 1539 nceen = get_error_enable() & EN_REG_NCEEN;
1540 1540 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1541 1541 1, ceen, nceen, &cl1.ch_err_tl1_logout);
1542 1542 }
1543 1543 #if defined(CPU_IMP_L1_CACHE_PARITY)
1544 1544 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1545 1545 cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1546 1546 (caddr_t)cl1.ch_err_tl1_tpc);
1547 1547 }
1548 1548 #endif /* CPU_IMP_L1_CACHE_PARITY */
1549 1549
1550 1550 /*
1551 1551 * Log "multiple events" in ch_err_tl1_data. Note that
1552 1552 * we don't read and clear the AFSR/AFAR in the TL>0 code
1553 1553 * if the structure is busy, we just do the cache flushing
1554 1554 * we have to do and then do the retry. So the AFSR/AFAR
1555 1555 * at this point *should* have some relevant info. If there
1556 1556 * are no valid errors in the AFSR, we'll assume they've
1557 1557 * already been picked up and logged. For I$/D$ parity,
1558 1558 * we just log an event with an "Unknown" (NULL) TPC.
1559 1559 */
1560 1560 if (me_flags & CH_ERR_FECC) {
1561 1561 ch_cpu_errors_t cpu_error_regs;
1562 1562 uint64_t t_afsr_errs;
1563 1563
1564 1564 /*
1565 1565 * Get the error registers and see if there's
1566 1566 * a pending error. If not, don't bother
1567 1567 * generating an "Invalid AFSR" error event.
1568 1568 */
1569 1569 get_cpu_error_state(&cpu_error_regs);
1570 1570 t_afsr_errs = (cpu_error_regs.afsr_ext &
1571 1571 C_AFSR_EXT_ALL_ERRS) |
1572 1572 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1573 1573 if (t_afsr_errs != 0) {
1574 1574 ceen = get_error_enable() & EN_REG_CEEN;
1575 1575 nceen = get_error_enable() & EN_REG_NCEEN;
1576 1576 cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1577 1577 1, ceen, nceen, NULL);
1578 1578 }
1579 1579 }
1580 1580 #if defined(CPU_IMP_L1_CACHE_PARITY)
1581 1581 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1582 1582 cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1583 1583 }
1584 1584 #endif /* CPU_IMP_L1_CACHE_PARITY */
1585 1585 }
1586 1586 }
1587 1587
1588 1588 /*
1589 1589 * Called from Fast ECC TL>0 handler in case of fatal error.
1590 1590 * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1591 1591 * but if we don't, we'll panic with something reasonable.
1592 1592 */
1593 1593 /*ARGSUSED*/
1594 1594 void
1595 1595 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1596 1596 {
1597 1597 cpu_tl1_error(rp, 1);
1598 1598 /*
1599 1599 * Should never return, but just in case.
1600 1600 */
1601 1601 fm_panic("Unsurvivable ECC Error at TL>0");
1602 1602 }
1603 1603
1604 1604 /*
1605 1605 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1606 1606 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1607 1607 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1608 1608 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1609 1609 *
1610 1610 * Cheetah+ also handles (No additional processing required):
1611 1611 * DUE, DTO, DBERR (NCEEN controlled)
1612 1612 * THCE (CEEN and ET_ECC_en controlled)
1613 1613 * TUE (ET_ECC_en controlled)
1614 1614 *
1615 1615 * Panther further adds:
1616 1616 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1617 1617 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1618 1618 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
1619 1619 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
1620 1620 * THCE (CEEN and L2_tag_ECC_en controlled)
1621 1621 * L3_THCE (CEEN and ET_ECC_en controlled)
1622 1622 *
1623 1623 * Note that the p_clo_flags input is only valid in cases where the
1624 1624 * cpu_private struct is not yet initialized (since that is the only
1625 1625 * time that information cannot be obtained from the logout struct.)
1626 1626 */
1627 1627 /*ARGSUSED*/
1628 1628 void
1629 1629 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1630 1630 {
1631 1631 struct async_flt *aflt;
1632 1632 ch_async_flt_t ch_flt;
1633 1633 char pr_reason[MAX_REASON_STRING];
1634 1634 ch_cpu_logout_t *clop;
1635 1635 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1636 1636 ch_cpu_errors_t cpu_error_regs;
1637 1637
1638 1638 bzero(&ch_flt, sizeof (ch_async_flt_t));
1639 1639 /*
1640 1640 * Get the CPU log out info. If we can't find our CPU private
1641 1641 * pointer, then we will have to make due without any detailed
1642 1642 * logout information.
1643 1643 */
1644 1644 if (CPU_PRIVATE(CPU) == NULL) {
1645 1645 clop = NULL;
1646 1646 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 1647 get_cpu_error_state(&cpu_error_regs);
1648 1648 set_cpu_error_state(&cpu_error_regs);
1649 1649 t_afar = cpu_error_regs.afar;
1650 1650 t_afsr = cpu_error_regs.afsr;
1651 1651 t_afsr_ext = cpu_error_regs.afsr_ext;
1652 1652 #if defined(SERRANO)
1653 1653 ch_flt.afar2 = cpu_error_regs.afar2;
1654 1654 #endif /* SERRANO */
1655 1655 } else {
1656 1656 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1657 1657 t_afar = clop->clo_data.chd_afar;
1658 1658 t_afsr = clop->clo_data.chd_afsr;
1659 1659 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1660 1660 #if defined(SERRANO)
1661 1661 ch_flt.afar2 = clop->clo_data.chd_afar2;
1662 1662 #endif /* SERRANO */
1663 1663 }
1664 1664
1665 1665 /*
1666 1666 * In order to simplify code, we maintain this afsr_errs
1667 1667 * variable which holds the aggregate of AFSR and AFSR_EXT
1668 1668 * sticky bits.
1669 1669 */
1670 1670 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1671 1671 (t_afsr & C_AFSR_ALL_ERRS);
1672 1672
1673 1673 pr_reason[0] = '\0';
1674 1674 /* Setup the async fault structure */
1675 1675 aflt = (struct async_flt *)&ch_flt;
1676 1676 ch_flt.afsr_ext = t_afsr_ext;
1677 1677 ch_flt.afsr_errs = t_afsr_errs;
1678 1678 aflt->flt_stat = t_afsr;
1679 1679 aflt->flt_addr = t_afar;
1680 1680 aflt->flt_pc = (caddr_t)rp->r_pc;
1681 1681 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1682 1682 aflt->flt_tl = 0;
1683 1683 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1684 1684
1685 1685 /*
1686 1686 * If this trap is a result of one of the errors not masked
1687 1687 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1688 1688 * indicate that a timeout is to be set later.
1689 1689 */
1690 1690 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1691 1691 !aflt->flt_panic)
1692 1692 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1693 1693 else
1694 1694 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1695 1695
1696 1696 /*
1697 1697 * log the CE and clean up
1698 1698 */
1699 1699 cpu_log_and_clear_ce(&ch_flt);
1700 1700
1701 1701 /*
1702 1702 * We re-enable CEEN (if required) and check if any disrupting errors
1703 1703 * have happened. We do this because if a disrupting error had occurred
1704 1704 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1705 1705 * Note that CEEN works differently on Cheetah than on Spitfire. Also,
1706 1706 * we enable CEEN *before* checking the AFSR to avoid the small window
1707 1707 * of a error happening between checking the AFSR and enabling CEEN.
1708 1708 */
1709 1709 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1710 1710 set_error_enable(get_error_enable() | EN_REG_CEEN);
1711 1711 if (clear_errors(&ch_flt)) {
1712 1712 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1713 1713 NULL);
1714 1714 }
1715 1715
1716 1716 /*
1717 1717 * Panic here if aflt->flt_panic has been set. Enqueued errors will
1718 1718 * be logged as part of the panic flow.
1719 1719 */
1720 1720 if (aflt->flt_panic)
1721 1721 fm_panic("%sError(s)", pr_reason);
1722 1722 }
1723 1723
1724 1724 /*
1725 1725 * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1726 1726 * L3_EDU:BLD, TO, and BERR events.
1727 1727 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1728 1728 *
1729 1729 * Cheetah+: No additional errors handled.
1730 1730 *
1731 1731 * Note that the p_clo_flags input is only valid in cases where the
1732 1732 * cpu_private struct is not yet initialized (since that is the only
1733 1733 * time that information cannot be obtained from the logout struct.)
1734 1734 */
1735 1735 /*ARGSUSED*/
1736 1736 void
1737 1737 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1738 1738 {
1739 1739 ushort_t ttype, tl;
1740 1740 ch_async_flt_t ch_flt;
1741 1741 struct async_flt *aflt;
1742 1742 int trampolined = 0;
1743 1743 char pr_reason[MAX_REASON_STRING];
1744 1744 ch_cpu_logout_t *clop;
1745 1745 uint64_t ceen, clo_flags;
1746 1746 uint64_t log_afsr;
1747 1747 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1748 1748 ch_cpu_errors_t cpu_error_regs;
1749 1749 int expected = DDI_FM_ERR_UNEXPECTED;
1750 1750 ddi_acc_hdl_t *hp;
1751 1751
1752 1752 /*
1753 1753 * We need to look at p_flag to determine if the thread detected an
1754 1754 * error while dumping core. We can't grab p_lock here, but it's ok
1755 1755 * because we just need a consistent snapshot and we know that everyone
1756 1756 * else will store a consistent set of bits while holding p_lock. We
1757 1757 * don't have to worry about a race because SDOCORE is set once prior
1758 1758 * to doing i/o from the process's address space and is never cleared.
1759 1759 */
1760 1760 uint_t pflag = ttoproc(curthread)->p_flag;
1761 1761
1762 1762 bzero(&ch_flt, sizeof (ch_async_flt_t));
1763 1763 /*
1764 1764 * Get the CPU log out info. If we can't find our CPU private
1765 1765 * pointer then we will have to make due without any detailed
1766 1766 * logout information.
1767 1767 */
1768 1768 if (CPU_PRIVATE(CPU) == NULL) {
1769 1769 clop = NULL;
1770 1770 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1771 1771 get_cpu_error_state(&cpu_error_regs);
1772 1772 set_cpu_error_state(&cpu_error_regs);
1773 1773 t_afar = cpu_error_regs.afar;
1774 1774 t_afsr = cpu_error_regs.afsr;
1775 1775 t_afsr_ext = cpu_error_regs.afsr_ext;
1776 1776 #if defined(SERRANO)
1777 1777 ch_flt.afar2 = cpu_error_regs.afar2;
1778 1778 #endif /* SERRANO */
1779 1779 clo_flags = p_clo_flags;
1780 1780 } else {
1781 1781 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1782 1782 t_afar = clop->clo_data.chd_afar;
1783 1783 t_afsr = clop->clo_data.chd_afsr;
1784 1784 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1785 1785 #if defined(SERRANO)
1786 1786 ch_flt.afar2 = clop->clo_data.chd_afar2;
1787 1787 #endif /* SERRANO */
1788 1788 clo_flags = clop->clo_flags;
1789 1789 }
1790 1790
1791 1791 /*
1792 1792 * In order to simplify code, we maintain this afsr_errs
1793 1793 * variable which holds the aggregate of AFSR and AFSR_EXT
1794 1794 * sticky bits.
1795 1795 */
1796 1796 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1797 1797 (t_afsr & C_AFSR_ALL_ERRS);
1798 1798 pr_reason[0] = '\0';
1799 1799
1800 1800 /*
1801 1801 * Grab information encoded into our clo_flags field.
1802 1802 */
1803 1803 ceen = clo_flags & EN_REG_CEEN;
1804 1804 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1805 1805 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1806 1806
1807 1807 /*
1808 1808 * handle the specific error
1809 1809 */
1810 1810 aflt = (struct async_flt *)&ch_flt;
1811 1811 aflt->flt_id = gethrtime_waitfree();
1812 1812 aflt->flt_bus_id = getprocessorid();
1813 1813 aflt->flt_inst = CPU->cpu_id;
1814 1814 ch_flt.afsr_ext = t_afsr_ext;
1815 1815 ch_flt.afsr_errs = t_afsr_errs;
1816 1816 aflt->flt_stat = t_afsr;
1817 1817 aflt->flt_addr = t_afar;
1818 1818 aflt->flt_pc = (caddr_t)rp->r_pc;
1819 1819 aflt->flt_prot = AFLT_PROT_NONE;
1820 1820 aflt->flt_class = CPU_FAULT;
1821 1821 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1822 1822 aflt->flt_tl = (uchar_t)tl;
1823 1823 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1824 1824 C_AFSR_PANIC(t_afsr_errs));
1825 1825 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1826 1826 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1827 1827
1828 1828 /*
1829 1829 * If the trap occurred in privileged mode at TL=0, we need to check to
1830 1830 * see if we were executing in the kernel under on_trap() or t_lofault
1831 1831 * protection. If so, modify the saved registers so that we return
1832 1832 * from the trap to the appropriate trampoline routine.
1833 1833 */
1834 1834 if (aflt->flt_priv && tl == 0) {
1835 1835 if (curthread->t_ontrap != NULL) {
1836 1836 on_trap_data_t *otp = curthread->t_ontrap;
1837 1837
1838 1838 if (otp->ot_prot & OT_DATA_EC) {
1839 1839 aflt->flt_prot = AFLT_PROT_EC;
1840 1840 otp->ot_trap |= OT_DATA_EC;
1841 1841 rp->r_pc = otp->ot_trampoline;
1842 1842 rp->r_npc = rp->r_pc + 4;
1843 1843 trampolined = 1;
1844 1844 }
1845 1845
1846 1846 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1847 1847 (otp->ot_prot & OT_DATA_ACCESS)) {
1848 1848 aflt->flt_prot = AFLT_PROT_ACCESS;
1849 1849 otp->ot_trap |= OT_DATA_ACCESS;
1850 1850 rp->r_pc = otp->ot_trampoline;
1851 1851 rp->r_npc = rp->r_pc + 4;
1852 1852 trampolined = 1;
1853 1853 /*
1854 1854 * for peeks and caut_gets errors are expected
1855 1855 */
1856 1856 hp = (ddi_acc_hdl_t *)otp->ot_handle;
1857 1857 if (!hp)
1858 1858 expected = DDI_FM_ERR_PEEK;
1859 1859 else if (hp->ah_acc.devacc_attr_access ==
1860 1860 DDI_CAUTIOUS_ACC)
1861 1861 expected = DDI_FM_ERR_EXPECTED;
1862 1862 }
1863 1863
1864 1864 } else if (curthread->t_lofault) {
1865 1865 aflt->flt_prot = AFLT_PROT_COPY;
1866 1866 rp->r_g1 = EFAULT;
1867 1867 rp->r_pc = curthread->t_lofault;
1868 1868 rp->r_npc = rp->r_pc + 4;
1869 1869 trampolined = 1;
1870 1870 }
1871 1871 }
1872 1872
1873 1873 /*
1874 1874 * If we're in user mode or we're doing a protected copy, we either
1875 1875 * want the ASTON code below to send a signal to the user process
1876 1876 * or we want to panic if aft_panic is set.
1877 1877 *
1878 1878 * If we're in privileged mode and we're not doing a copy, then we
1879 1879 * need to check if we've trampolined. If we haven't trampolined,
1880 1880 * we should panic.
1881 1881 */
1882 1882 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1883 1883 if (t_afsr_errs &
1884 1884 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1885 1885 ~(C_AFSR_BERR | C_AFSR_TO)))
1886 1886 aflt->flt_panic |= aft_panic;
1887 1887 } else if (!trampolined) {
1888 1888 aflt->flt_panic = 1;
1889 1889 }
1890 1890
1891 1891 /*
1892 1892 * If we've trampolined due to a privileged TO or BERR, or if an
1893 1893 * unprivileged TO or BERR occurred, we don't want to enqueue an
1894 1894 * event for that TO or BERR. Queue all other events (if any) besides
1895 1895 * the TO/BERR. Since we may not be enqueing any events, we need to
1896 1896 * ignore the number of events queued. If we haven't trampolined due
1897 1897 * to a TO or BERR, just enqueue events normally.
1898 1898 */
1899 1899 log_afsr = t_afsr_errs;
1900 1900 if (trampolined) {
1901 1901 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1902 1902 } else if (!aflt->flt_priv) {
1903 1903 /*
1904 1904 * User mode, suppress messages if
1905 1905 * cpu_berr_to_verbose is not set.
1906 1906 */
1907 1907 if (!cpu_berr_to_verbose)
1908 1908 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1909 1909 }
1910 1910
1911 1911 /*
1912 1912 * Log any errors that occurred
1913 1913 */
1914 1914 if (((log_afsr &
1915 1915 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1916 1916 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1917 1917 (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1918 1918 ch_flt.flt_type = CPU_INV_AFSR;
1919 1919 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1920 1920 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1921 1921 aflt->flt_panic);
1922 1922 }
1923 1923
1924 1924 /*
1925 1925 * Zero out + invalidate CPU logout.
1926 1926 */
1927 1927 if (clop) {
1928 1928 bzero(clop, sizeof (ch_cpu_logout_t));
1929 1929 clop->clo_data.chd_afar = LOGOUT_INVALID;
1930 1930 }
1931 1931
1932 1932 #if defined(JALAPENO) || defined(SERRANO)
1933 1933 /*
1934 1934 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1935 1935 * IO errors that may have resulted in this trap.
1936 1936 */
1937 1937 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1938 1938 cpu_run_bus_error_handlers(aflt, expected);
1939 1939 }
1940 1940
1941 1941 /*
1942 1942 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1943 1943 * line from the Ecache. We also need to query the bus nexus for
1944 1944 * fatal errors. Attempts to do diagnostic read on caches may
1945 1945 * introduce more errors (especially when the module is bad).
1946 1946 */
1947 1947 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1948 1948 /*
1949 1949 * Ask our bus nexus friends if they have any fatal errors. If
1950 1950 * so, they will log appropriate error messages.
1951 1951 */
1952 1952 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1953 1953 aflt->flt_panic = 1;
1954 1954
1955 1955 /*
1956 1956 * We got a UE or RUE and are panicking, save the fault PA in
1957 1957 * a known location so that the platform specific panic code
1958 1958 * can check for copyback errors.
1959 1959 */
1960 1960 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1961 1961 panic_aflt = *aflt;
1962 1962 }
1963 1963 }
1964 1964
1965 1965 /*
1966 1966 * Flush Ecache line or entire Ecache
1967 1967 */
1968 1968 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1969 1969 cpu_error_ecache_flush(&ch_flt);
1970 1970 #else /* JALAPENO || SERRANO */
1971 1971 /*
1972 1972 * UE/BERR/TO: Call our bus nexus friends to check for
1973 1973 * IO errors that may have resulted in this trap.
1974 1974 */
1975 1975 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1976 1976 cpu_run_bus_error_handlers(aflt, expected);
1977 1977 }
1978 1978
1979 1979 /*
1980 1980 * UE: If the UE is in memory, we need to flush the bad
1981 1981 * line from the Ecache. We also need to query the bus nexus for
1982 1982 * fatal errors. Attempts to do diagnostic read on caches may
1983 1983 * introduce more errors (especially when the module is bad).
1984 1984 */
1985 1985 if (t_afsr & C_AFSR_UE) {
1986 1986 /*
1987 1987 * Ask our legacy bus nexus friends if they have any fatal
1988 1988 * errors. If so, they will log appropriate error messages.
1989 1989 */
1990 1990 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1991 1991 aflt->flt_panic = 1;
1992 1992
1993 1993 /*
1994 1994 * We got a UE and are panicking, save the fault PA in a known
1995 1995 * location so that the platform specific panic code can check
1996 1996 * for copyback errors.
1997 1997 */
1998 1998 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1999 1999 panic_aflt = *aflt;
2000 2000 }
2001 2001 }
2002 2002
2003 2003 /*
2004 2004 * Flush Ecache line or entire Ecache
2005 2005 */
2006 2006 if (t_afsr_errs &
2007 2007 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2008 2008 cpu_error_ecache_flush(&ch_flt);
2009 2009 #endif /* JALAPENO || SERRANO */
2010 2010
2011 2011 /*
2012 2012 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2013 2013 * or disrupting errors have happened. We do this because if a
2014 2014 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2015 2015 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
2016 2016 * CEEN works differently on Cheetah than on Spitfire. Also, we enable
2017 2017 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2018 2018 * deferred or disrupting error happening between checking the AFSR and
2019 2019 * enabling NCEEN/CEEN.
2020 2020 *
2021 2021 * Note: CEEN reenabled only if it was on when trap taken.
2022 2022 */
2023 2023 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2024 2024 if (clear_errors(&ch_flt)) {
2025 2025 /*
2026 2026 * Check for secondary errors, and avoid panicking if we
2027 2027 * have them
2028 2028 */
2029 2029 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2030 2030 t_afar) == 0) {
2031 2031 aflt->flt_panic |= ((ch_flt.afsr_errs &
2032 2032 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2033 2033 }
2034 2034 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2035 2035 NULL);
2036 2036 }
2037 2037
2038 2038 /*
2039 2039 * Panic here if aflt->flt_panic has been set. Enqueued errors will
2040 2040 * be logged as part of the panic flow.
2041 2041 */
2042 2042 if (aflt->flt_panic)
2043 2043 fm_panic("%sError(s)", pr_reason);
2044 2044
2045 2045 /*
2046 2046 * If we queued an error and we are going to return from the trap and
2047 2047 * the error was in user mode or inside of a copy routine, set AST flag
2048 2048 * so the queue will be drained before returning to user mode. The
2049 2049 * AST processing will also act on our failure policy.
2050 2050 */
2051 2051 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2052 2052 int pcb_flag = 0;
2053 2053
2054 2054 if (t_afsr_errs &
2055 2055 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2056 2056 ~(C_AFSR_BERR | C_AFSR_TO)))
2057 2057 pcb_flag |= ASYNC_HWERR;
2058 2058
2059 2059 if (t_afsr & C_AFSR_BERR)
2060 2060 pcb_flag |= ASYNC_BERR;
2061 2061
2062 2062 if (t_afsr & C_AFSR_TO)
2063 2063 pcb_flag |= ASYNC_BTO;
2064 2064
2065 2065 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2066 2066 aston(curthread);
2067 2067 }
2068 2068 }
2069 2069
2070 2070 #if defined(CPU_IMP_L1_CACHE_PARITY)
2071 2071 /*
2072 2072 * Handling of data and instruction parity errors (traps 0x71, 0x72).
2073 2073 *
2074 2074 * For Panther, P$ data parity errors during floating point load hits
2075 2075 * are also detected (reported as TT 0x71) and handled by this trap
2076 2076 * handler.
2077 2077 *
2078 2078 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2079 2079 * is available.
2080 2080 */
2081 2081 /*ARGSUSED*/
2082 2082 void
2083 2083 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2084 2084 {
2085 2085 ch_async_flt_t ch_flt;
2086 2086 struct async_flt *aflt;
2087 2087 uchar_t tl = ((flags & CH_ERR_TL) != 0);
2088 2088 uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2089 2089 uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2090 2090 char *error_class;
2091 2091 int index, way, word;
2092 2092 ch_dc_data_t tmp_dcp;
2093 2093 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2094 2094 uint64_t parity_bits, pbits;
2095 2095 /* The parity bit array corresponds to the result of summing two bits */
2096 2096 static int parity_bits_popc[] = { 0, 1, 1, 0 };
2097 2097
2098 2098 /*
2099 2099 * Log the error.
2100 2100 * For icache parity errors the fault address is the trap PC.
2101 2101 * For dcache/pcache parity errors the instruction would have to
2102 2102 * be decoded to determine the address and that isn't possible
2103 2103 * at high PIL.
2104 2104 */
2105 2105 bzero(&ch_flt, sizeof (ch_async_flt_t));
2106 2106 aflt = (struct async_flt *)&ch_flt;
2107 2107 aflt->flt_id = gethrtime_waitfree();
2108 2108 aflt->flt_bus_id = getprocessorid();
2109 2109 aflt->flt_inst = CPU->cpu_id;
2110 2110 aflt->flt_pc = tpc;
2111 2111 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2112 2112 aflt->flt_prot = AFLT_PROT_NONE;
2113 2113 aflt->flt_class = CPU_FAULT;
2114 2114 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0;
2115 2115 aflt->flt_tl = tl;
2116 2116 aflt->flt_panic = panic;
2117 2117 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2118 2118 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2119 2119
2120 2120 if (iparity) {
2121 2121 cpu_icache_parity_info(&ch_flt);
2122 2122 if (ch_flt.parity_data.ipe.cpl_off != -1)
2123 2123 error_class = FM_EREPORT_CPU_USIII_IDSPE;
2124 2124 else if (ch_flt.parity_data.ipe.cpl_way != -1)
2125 2125 error_class = FM_EREPORT_CPU_USIII_ITSPE;
2126 2126 else
2127 2127 error_class = FM_EREPORT_CPU_USIII_IPE;
2128 2128 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2129 2129 } else {
2130 2130 cpu_dcache_parity_info(&ch_flt);
2131 2131 if (ch_flt.parity_data.dpe.cpl_off != -1) {
2132 2132 /*
2133 2133 * If not at TL 0 and running on a Jalapeno processor,
2134 2134 * then process as a true ddspe. A true
2135 2135 * ddspe error can only occur if the way == 0
2136 2136 */
2137 2137 way = ch_flt.parity_data.dpe.cpl_way;
2138 2138 if ((tl == 0) && (way != 0) &&
2139 2139 IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2140 2140 for (index = 0; index < dc_set_size;
2141 2141 index += dcache_linesize) {
2142 2142 get_dcache_dtag(index + way *
2143 2143 dc_set_size,
2144 2144 (uint64_t *)&tmp_dcp);
2145 2145 /*
2146 2146 * Check data array for even parity.
2147 2147 * The 8 parity bits are grouped into
2148 2148 * 4 pairs each of which covers a 64-bit
2149 2149 * word. The endianness is reversed
2150 2150 * -- the low-order parity bits cover
2151 2151 * the high-order data words.
2152 2152 */
2153 2153 parity_bits = tmp_dcp.dc_utag >> 8;
2154 2154 for (word = 0; word < 4; word++) {
2155 2155 pbits = (parity_bits >>
2156 2156 (6 - word * 2)) & 3;
2157 2157 if (((popc64(
2158 2158 tmp_dcp.dc_data[word]) +
2159 2159 parity_bits_popc[pbits]) &
2160 2160 1) && (tmp_dcp.dc_tag &
2161 2161 VA13)) {
2162 2162 /* cleanup */
2163 2163 correct_dcache_parity(
2164 2164 dcache_size,
2165 2165 dcache_linesize);
2166 2166 if (cache_boot_state &
2167 2167 DCU_DC) {
2168 2168 flush_dcache();
2169 2169 }
2170 2170
2171 2171 set_dcu(get_dcu() |
2172 2172 cache_boot_state);
2173 2173 return;
2174 2174 }
2175 2175 }
2176 2176 }
2177 2177 } /* (tl == 0) && (way != 0) && IS JALAPENO */
2178 2178 error_class = FM_EREPORT_CPU_USIII_DDSPE;
2179 2179 } else if (ch_flt.parity_data.dpe.cpl_way != -1)
2180 2180 error_class = FM_EREPORT_CPU_USIII_DTSPE;
2181 2181 else
2182 2182 error_class = FM_EREPORT_CPU_USIII_DPE;
2183 2183 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2184 2184 /*
2185 2185 * For panther we also need to check the P$ for parity errors.
2186 2186 */
2187 2187 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2188 2188 cpu_pcache_parity_info(&ch_flt);
2189 2189 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2190 2190 error_class = FM_EREPORT_CPU_USIII_PDSPE;
2191 2191 aflt->flt_payload =
2192 2192 FM_EREPORT_PAYLOAD_PCACHE_PE;
2193 2193 }
2194 2194 }
2195 2195 }
2196 2196
2197 2197 cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2198 2198 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2199 2199
2200 2200 if (iparity) {
2201 2201 /*
2202 2202 * Invalidate entire I$.
2203 2203 * This is required due to the use of diagnostic ASI
2204 2204 * accesses that may result in a loss of I$ coherency.
2205 2205 */
2206 2206 if (cache_boot_state & DCU_IC) {
2207 2207 flush_icache();
2208 2208 }
2209 2209 /*
2210 2210 * According to section P.3.1 of the Panther PRM, we
2211 2211 * need to do a little more for recovery on those
2212 2212 * CPUs after encountering an I$ parity error.
2213 2213 */
2214 2214 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2215 2215 flush_ipb();
2216 2216 correct_dcache_parity(dcache_size,
2217 2217 dcache_linesize);
2218 2218 flush_pcache();
2219 2219 }
2220 2220 } else {
2221 2221 /*
2222 2222 * Since the valid bit is ignored when checking parity the
2223 2223 * D$ data and tag must also be corrected. Set D$ data bits
2224 2224 * to zero and set utag to 0, 1, 2, 3.
2225 2225 */
2226 2226 correct_dcache_parity(dcache_size, dcache_linesize);
2227 2227
2228 2228 /*
2229 2229 * According to section P.3.3 of the Panther PRM, we
2230 2230 * need to do a little more for recovery on those
2231 2231 * CPUs after encountering a D$ or P$ parity error.
2232 2232 *
2233 2233 * As far as clearing P$ parity errors, it is enough to
2234 2234 * simply invalidate all entries in the P$ since P$ parity
2235 2235 * error traps are only generated for floating point load
2236 2236 * hits.
2237 2237 */
2238 2238 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2239 2239 flush_icache();
2240 2240 flush_ipb();
2241 2241 flush_pcache();
2242 2242 }
2243 2243 }
2244 2244
2245 2245 /*
2246 2246 * Invalidate entire D$ if it was enabled.
2247 2247 * This is done to avoid stale data in the D$ which might
2248 2248 * occur with the D$ disabled and the trap handler doing
2249 2249 * stores affecting lines already in the D$.
2250 2250 */
2251 2251 if (cache_boot_state & DCU_DC) {
2252 2252 flush_dcache();
2253 2253 }
2254 2254
2255 2255 /*
2256 2256 * Restore caches to their bootup state.
2257 2257 */
2258 2258 set_dcu(get_dcu() | cache_boot_state);
2259 2259
2260 2260 /*
2261 2261 * Panic here if aflt->flt_panic has been set. Enqueued errors will
2262 2262 * be logged as part of the panic flow.
2263 2263 */
2264 2264 if (aflt->flt_panic)
2265 2265 fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2266 2266
2267 2267 /*
2268 2268 * If this error occurred at TL>0 then flush the E$ here to reduce
2269 2269 * the chance of getting an unrecoverable Fast ECC error. This
2270 2270 * flush will evict the part of the parity trap handler that is run
2271 2271 * at TL>1.
2272 2272 */
2273 2273 if (tl) {
2274 2274 cpu_flush_ecache();
2275 2275 }
2276 2276 }
2277 2277
2278 2278 /*
2279 2279 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2280 2280 * to indicate which portions of the captured data should be in the ereport.
2281 2281 */
2282 2282 void
2283 2283 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2284 2284 {
2285 2285 int way = ch_flt->parity_data.ipe.cpl_way;
2286 2286 int offset = ch_flt->parity_data.ipe.cpl_off;
2287 2287 int tag_index;
2288 2288 struct async_flt *aflt = (struct async_flt *)ch_flt;
2289 2289
2290 2290
2291 2291 if ((offset != -1) || (way != -1)) {
2292 2292 /*
2293 2293 * Parity error in I$ tag or data
2294 2294 */
2295 2295 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2296 2296 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2297 2297 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2298 2298 PN_ICIDX_TO_WAY(tag_index);
2299 2299 else
2300 2300 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2301 2301 CH_ICIDX_TO_WAY(tag_index);
2302 2302 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2303 2303 IC_LOGFLAG_MAGIC;
2304 2304 } else {
2305 2305 /*
2306 2306 * Parity error was not identified.
2307 2307 * Log tags and data for all ways.
2308 2308 */
2309 2309 for (way = 0; way < CH_ICACHE_NWAY; way++) {
2310 2310 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2311 2311 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2312 2312 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2313 2313 PN_ICIDX_TO_WAY(tag_index);
2314 2314 else
2315 2315 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2316 2316 CH_ICIDX_TO_WAY(tag_index);
2317 2317 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2318 2318 IC_LOGFLAG_MAGIC;
2319 2319 }
2320 2320 }
2321 2321 }
2322 2322
2323 2323 /*
2324 2324 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2325 2325 * to indicate which portions of the captured data should be in the ereport.
2326 2326 */
2327 2327 void
2328 2328 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2329 2329 {
2330 2330 int way = ch_flt->parity_data.dpe.cpl_way;
2331 2331 int offset = ch_flt->parity_data.dpe.cpl_off;
2332 2332 int tag_index;
2333 2333
2334 2334 if (offset != -1) {
2335 2335 /*
2336 2336 * Parity error in D$ or P$ data array.
2337 2337 *
2338 2338 * First check to see whether the parity error is in D$ or P$
2339 2339 * since P$ data parity errors are reported in Panther using
2340 2340 * the same trap.
2341 2341 */
2342 2342 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2343 2343 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2344 2344 ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2345 2345 CH_PCIDX_TO_WAY(tag_index);
2346 2346 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2347 2347 PC_LOGFLAG_MAGIC;
2348 2348 } else {
2349 2349 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2350 2350 ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2351 2351 CH_DCIDX_TO_WAY(tag_index);
2352 2352 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2353 2353 DC_LOGFLAG_MAGIC;
2354 2354 }
2355 2355 } else if (way != -1) {
2356 2356 /*
2357 2357 * Parity error in D$ tag.
2358 2358 */
2359 2359 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2360 2360 ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2361 2361 CH_DCIDX_TO_WAY(tag_index);
2362 2362 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2363 2363 DC_LOGFLAG_MAGIC;
2364 2364 }
2365 2365 }
2366 2366 #endif /* CPU_IMP_L1_CACHE_PARITY */
2367 2367
2368 2368 /*
2369 2369 * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2370 2370 * post-process CPU events that are dequeued. As such, it can be invoked
2371 2371 * from softint context, from AST processing in the trap() flow, or from the
2372 2372 * panic flow. We decode the CPU-specific data, and take appropriate actions.
2373 2373 * Historically this entry point was used to log the actual cmn_err(9F) text;
2374 2374 * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2375 2375 * With FMA this function now also returns a flag which indicates to the
2376 2376 * caller whether the ereport should be posted (1) or suppressed (0).
2377 2377 */
2378 2378 static int
2379 2379 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2380 2380 {
2381 2381 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2382 2382 struct async_flt *aflt = (struct async_flt *)flt;
2383 2383 uint64_t errors;
2384 2384 extern void memscrub_induced_error(void);
2385 2385
2386 2386 switch (ch_flt->flt_type) {
2387 2387 case CPU_INV_AFSR:
2388 2388 /*
2389 2389 * If it is a disrupting trap and the AFSR is zero, then
2390 2390 * the event has probably already been noted. Do not post
2391 2391 * an ereport.
2392 2392 */
2393 2393 if ((aflt->flt_status & ECC_C_TRAP) &&
2394 2394 (!(aflt->flt_stat & C_AFSR_MASK)))
2395 2395 return (0);
2396 2396 else
2397 2397 return (1);
2398 2398 case CPU_TO:
2399 2399 case CPU_BERR:
2400 2400 case CPU_FATAL:
2401 2401 case CPU_FPUERR:
2402 2402 return (1);
2403 2403
2404 2404 case CPU_UE_ECACHE_RETIRE:
2405 2405 cpu_log_err(aflt);
2406 2406 cpu_page_retire(ch_flt);
2407 2407 return (1);
2408 2408
2409 2409 /*
2410 2410 * Cases where we may want to suppress logging or perform
2411 2411 * extended diagnostics.
2412 2412 */
2413 2413 case CPU_CE:
2414 2414 case CPU_EMC:
2415 2415 /*
2416 2416 * We want to skip logging and further classification
2417 2417 * only if ALL the following conditions are true:
2418 2418 *
2419 2419 * 1. There is only one error
2420 2420 * 2. That error is a correctable memory error
2421 2421 * 3. The error is caused by the memory scrubber (in
2422 2422 * which case the error will have occurred under
2423 2423 * on_trap protection)
2424 2424 * 4. The error is on a retired page
2425 2425 *
2426 2426 * Note: AFLT_PROT_EC is used places other than the memory
2427 2427 * scrubber. However, none of those errors should occur
2428 2428 * on a retired page.
2429 2429 */
2430 2430 if ((ch_flt->afsr_errs &
2431 2431 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2432 2432 aflt->flt_prot == AFLT_PROT_EC) {
↓ open down ↓ |
2432 lines elided |
↑ open up ↑ |
2433 2433
2434 2434 if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2435 2435 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2436 2436
2437 2437 /*
2438 2438 * Since we're skipping logging, we'll need
2439 2439 * to schedule the re-enabling of CEEN
2440 2440 */
2441 2441 (void) timeout(cpu_delayed_check_ce_errors,
2442 2442 (void *)(uintptr_t)aflt->flt_inst,
2443 - drv_usectohz((clock_t)cpu_ceen_delay_secs
2444 - * MICROSEC));
2443 + drv_sectohz((clock_t)cpu_ceen_delay_secs));
2445 2444 }
2446 2445
2447 2446 /*
2448 2447 * Inform memscrubber - scrubbing induced
2449 2448 * CE on a retired page.
2450 2449 */
2451 2450 memscrub_induced_error();
2452 2451 return (0);
2453 2452 }
2454 2453 }
2455 2454
2456 2455 /*
2457 2456 * Perform/schedule further classification actions, but
2458 2457 * only if the page is healthy (we don't want bad
2459 2458 * pages inducing too much diagnostic activity). If we could
2460 2459 * not find a page pointer then we also skip this. If
2461 2460 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2462 2461 * to copy and recirculate the event (for further diagnostics)
2463 2462 * and we should not proceed to log it here.
2464 2463 *
2465 2464 * This must be the last step here before the cpu_log_err()
2466 2465 * below - if an event recirculates cpu_ce_log_err() will
2467 2466 * not call the current function but just proceed directly
2468 2467 * to cpu_ereport_post after the cpu_log_err() avoided below.
2469 2468 *
2470 2469 * Note: Check cpu_impl_async_log_err if changing this
2471 2470 */
2472 2471 if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2473 2472 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2474 2473 CE_XDIAG_SKIP_NOPP);
2475 2474 } else {
2476 2475 if (errors != PR_OK) {
2477 2476 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2478 2477 CE_XDIAG_SKIP_PAGEDET);
2479 2478 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2480 2479 offsetof(ch_async_flt_t, cmn_asyncflt))) {
2481 2480 return (0);
2482 2481 }
2483 2482 }
2484 2483 /*FALLTHRU*/
2485 2484
2486 2485 /*
2487 2486 * Cases where we just want to report the error and continue.
2488 2487 */
2489 2488 case CPU_CE_ECACHE:
2490 2489 case CPU_UE_ECACHE:
2491 2490 case CPU_IV:
2492 2491 case CPU_ORPH:
2493 2492 cpu_log_err(aflt);
2494 2493 return (1);
2495 2494
2496 2495 /*
2497 2496 * Cases where we want to fall through to handle panicking.
2498 2497 */
2499 2498 case CPU_UE:
2500 2499 /*
2501 2500 * We want to skip logging in the same conditions as the
2502 2501 * CE case. In addition, we want to make sure we're not
2503 2502 * panicking.
2504 2503 */
2505 2504 if (!panicstr && (ch_flt->afsr_errs &
2506 2505 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2507 2506 aflt->flt_prot == AFLT_PROT_EC) {
2508 2507 if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2509 2508 /* Zero the address to clear the error */
2510 2509 softcall(ecc_page_zero, (void *)aflt->flt_addr);
2511 2510 /*
2512 2511 * Inform memscrubber - scrubbing induced
2513 2512 * UE on a retired page.
2514 2513 */
2515 2514 memscrub_induced_error();
2516 2515 return (0);
2517 2516 }
2518 2517 }
2519 2518 cpu_log_err(aflt);
2520 2519 break;
2521 2520
2522 2521 default:
2523 2522 /*
2524 2523 * If the us3_common.c code doesn't know the flt_type, it may
2525 2524 * be an implementation-specific code. Call into the impldep
2526 2525 * backend to find out what to do: if it tells us to continue,
2527 2526 * break and handle as if falling through from a UE; if not,
2528 2527 * the impldep backend has handled the error and we're done.
2529 2528 */
2530 2529 switch (cpu_impl_async_log_err(flt, eqep)) {
2531 2530 case CH_ASYNC_LOG_DONE:
2532 2531 return (1);
2533 2532 case CH_ASYNC_LOG_RECIRC:
2534 2533 return (0);
2535 2534 case CH_ASYNC_LOG_CONTINUE:
2536 2535 break; /* continue on to handle UE-like error */
2537 2536 default:
2538 2537 cmn_err(CE_WARN, "discarding error 0x%p with "
2539 2538 "invalid fault type (0x%x)",
2540 2539 (void *)aflt, ch_flt->flt_type);
2541 2540 return (0);
2542 2541 }
2543 2542 }
2544 2543
2545 2544 /* ... fall through from the UE case */
2546 2545
2547 2546 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2548 2547 if (!panicstr) {
2549 2548 cpu_page_retire(ch_flt);
2550 2549 } else {
2551 2550 /*
2552 2551 * Clear UEs on panic so that we don't
2553 2552 * get haunted by them during panic or
2554 2553 * after reboot
2555 2554 */
2556 2555 cpu_clearphys(aflt);
2557 2556 (void) clear_errors(NULL);
2558 2557 }
2559 2558 }
2560 2559
2561 2560 return (1);
2562 2561 }
2563 2562
2564 2563 /*
2565 2564 * Retire the bad page that may contain the flushed error.
2566 2565 */
2567 2566 void
2568 2567 cpu_page_retire(ch_async_flt_t *ch_flt)
2569 2568 {
2570 2569 struct async_flt *aflt = (struct async_flt *)ch_flt;
2571 2570 (void) page_retire(aflt->flt_addr, PR_UE);
2572 2571 }
2573 2572
2574 2573 /*
2575 2574 * Return true if the error specified in the AFSR indicates
2576 2575 * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2577 2576 * for Panther, none for Jalapeno/Serrano).
2578 2577 */
2579 2578 /* ARGSUSED */
2580 2579 static int
2581 2580 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2582 2581 {
2583 2582 #if defined(JALAPENO) || defined(SERRANO)
2584 2583 return (0);
2585 2584 #elif defined(CHEETAH_PLUS)
2586 2585 if (IS_PANTHER(cpunodes[cpuid].implementation))
2587 2586 return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2588 2587 return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2589 2588 #else /* CHEETAH_PLUS */
2590 2589 return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2591 2590 #endif
2592 2591 }
2593 2592
2594 2593 /*
2595 2594 * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2596 2595 * generic event post-processing for correctable and uncorrectable memory,
2597 2596 * E$, and MTag errors. Historically this entry point was used to log bits of
2598 2597 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2599 2598 * converted into an ereport. In addition, it transmits the error to any
2600 2599 * platform-specific service-processor FRU logging routines, if available.
2601 2600 */
2602 2601 void
2603 2602 cpu_log_err(struct async_flt *aflt)
2604 2603 {
2605 2604 char unum[UNUM_NAMLEN];
2606 2605 int synd_status, synd_code, afar_status;
2607 2606 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2608 2607
2609 2608 if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2610 2609 aflt->flt_status |= ECC_ECACHE;
2611 2610 else
2612 2611 aflt->flt_status &= ~ECC_ECACHE;
2613 2612 /*
2614 2613 * Determine syndrome status.
2615 2614 */
2616 2615 synd_status = afsr_to_synd_status(aflt->flt_inst,
2617 2616 ch_flt->afsr_errs, ch_flt->flt_bit);
2618 2617
2619 2618 /*
2620 2619 * Determine afar status.
2621 2620 */
2622 2621 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2623 2622 afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2624 2623 ch_flt->flt_bit);
2625 2624 else
2626 2625 afar_status = AFLT_STAT_INVALID;
2627 2626
2628 2627 synd_code = synd_to_synd_code(synd_status,
2629 2628 aflt->flt_synd, ch_flt->flt_bit);
2630 2629
2631 2630 /*
2632 2631 * If afar status is not invalid do a unum lookup.
2633 2632 */
2634 2633 if (afar_status != AFLT_STAT_INVALID) {
2635 2634 (void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2636 2635 } else {
2637 2636 unum[0] = '\0';
2638 2637 }
2639 2638
2640 2639 /*
2641 2640 * Do not send the fruid message (plat_ecc_error_data_t)
2642 2641 * to the SC if it can handle the enhanced error information
2643 2642 * (plat_ecc_error2_data_t) or when the tunable
2644 2643 * ecc_log_fruid_enable is set to 0.
2645 2644 */
2646 2645
2647 2646 if (&plat_ecc_capability_sc_get &&
2648 2647 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2649 2648 if (&plat_log_fruid_error)
2650 2649 plat_log_fruid_error(synd_code, aflt, unum,
2651 2650 ch_flt->flt_bit);
2652 2651 }
2653 2652
2654 2653 if (aflt->flt_func != NULL)
2655 2654 aflt->flt_func(aflt, unum);
2656 2655
2657 2656 if (afar_status != AFLT_STAT_INVALID)
2658 2657 cpu_log_diag_info(ch_flt);
2659 2658
2660 2659 /*
↓ open down ↓ |
206 lines elided |
↑ open up ↑ |
2661 2660 * If we have a CEEN error , we do not reenable CEEN until after
2662 2661 * we exit the trap handler. Otherwise, another error may
2663 2662 * occur causing the handler to be entered recursively.
2664 2663 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2665 2664 * to try and ensure that the CPU makes progress in the face
2666 2665 * of a CE storm.
2667 2666 */
2668 2667 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2669 2668 (void) timeout(cpu_delayed_check_ce_errors,
2670 2669 (void *)(uintptr_t)aflt->flt_inst,
2671 - drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2670 + drv_sectohz((clock_t)cpu_ceen_delay_secs));
2672 2671 }
2673 2672 }
2674 2673
2675 2674 /*
2676 2675 * Invoked by error_init() early in startup and therefore before
2677 2676 * startup_errorq() is called to drain any error Q -
2678 2677 *
2679 2678 * startup()
2680 2679 * startup_end()
2681 2680 * error_init()
2682 2681 * cpu_error_init()
2683 2682 * errorq_init()
2684 2683 * errorq_drain()
2685 2684 * start_other_cpus()
2686 2685 *
2687 2686 * The purpose of this routine is to create error-related taskqs. Taskqs
2688 2687 * are used for this purpose because cpu_lock can't be grabbed from interrupt
2689 2688 * context.
2690 2689 */
2691 2690 void
2692 2691 cpu_error_init(int items)
2693 2692 {
2694 2693 /*
2695 2694 * Create taskq(s) to reenable CE
2696 2695 */
2697 2696 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2698 2697 items, items, TASKQ_PREPOPULATE);
2699 2698 }
2700 2699
2701 2700 void
2702 2701 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2703 2702 {
2704 2703 char unum[UNUM_NAMLEN];
2705 2704 int len;
2706 2705
2707 2706 switch (aflt->flt_class) {
2708 2707 case CPU_FAULT:
2709 2708 cpu_ereport_init(aflt);
2710 2709 if (cpu_async_log_err(aflt, eqep))
2711 2710 cpu_ereport_post(aflt);
2712 2711 break;
2713 2712
2714 2713 case BUS_FAULT:
2715 2714 if (aflt->flt_func != NULL) {
2716 2715 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2717 2716 unum, UNUM_NAMLEN, &len);
2718 2717 aflt->flt_func(aflt, unum);
2719 2718 }
2720 2719 break;
2721 2720
2722 2721 case RECIRC_CPU_FAULT:
2723 2722 aflt->flt_class = CPU_FAULT;
2724 2723 cpu_log_err(aflt);
2725 2724 cpu_ereport_post(aflt);
2726 2725 break;
2727 2726
2728 2727 case RECIRC_BUS_FAULT:
2729 2728 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2730 2729 /*FALLTHRU*/
2731 2730 default:
2732 2731 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2733 2732 "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2734 2733 return;
2735 2734 }
2736 2735 }
2737 2736
2738 2737 /*
2739 2738 * Scrub and classify a CE. This function must not modify the
2740 2739 * fault structure passed to it but instead should return the classification
2741 2740 * information.
2742 2741 */
2743 2742
2744 2743 static uchar_t
2745 2744 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2746 2745 {
2747 2746 uchar_t disp = CE_XDIAG_EXTALG;
2748 2747 on_trap_data_t otd;
2749 2748 uint64_t orig_err;
2750 2749 ch_cpu_logout_t *clop;
2751 2750
2752 2751 /*
2753 2752 * Clear CEEN. CPU CE TL > 0 trap handling will already have done
2754 2753 * this, but our other callers have not. Disable preemption to
2755 2754 * avoid CPU migration so that we restore CEEN on the correct
2756 2755 * cpu later.
2757 2756 *
2758 2757 * CEEN is cleared so that further CEs that our instruction and
2759 2758 * data footprint induce do not cause use to either creep down
2760 2759 * kernel stack to the point of overflow, or do so much CE
2761 2760 * notification as to make little real forward progress.
2762 2761 *
2763 2762 * NCEEN must not be cleared. However it is possible that
2764 2763 * our accesses to the flt_addr may provoke a bus error or timeout
2765 2764 * if the offending address has just been unconfigured as part of
2766 2765 * a DR action. So we must operate under on_trap protection.
2767 2766 */
2768 2767 kpreempt_disable();
2769 2768 orig_err = get_error_enable();
2770 2769 if (orig_err & EN_REG_CEEN)
2771 2770 set_error_enable(orig_err & ~EN_REG_CEEN);
2772 2771
2773 2772 /*
2774 2773 * Our classification algorithm includes the line state before
2775 2774 * the scrub; we'd like this captured after the detection and
2776 2775 * before the algorithm below - the earlier the better.
2777 2776 *
2778 2777 * If we've come from a cpu CE trap then this info already exists
2779 2778 * in the cpu logout area.
2780 2779 *
2781 2780 * For a CE detected by memscrub for which there was no trap
2782 2781 * (running with CEEN off) cpu_log_and_clear_ce has called
2783 2782 * cpu_ce_delayed_ec_logout to capture some cache data, and
2784 2783 * marked the fault structure as incomplete as a flag to later
2785 2784 * logging code.
2786 2785 *
2787 2786 * If called directly from an IO detected CE there has been
2788 2787 * no line data capture. In this case we logout to the cpu logout
2789 2788 * area - that's appropriate since it's the cpu cache data we need
2790 2789 * for classification. We thus borrow the cpu logout area for a
2791 2790 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2792 2791 * this time (we will invalidate it again below).
2793 2792 *
2794 2793 * If called from the partner check xcall handler then this cpu
2795 2794 * (the partner) has not necessarily experienced a CE at this
2796 2795 * address. But we want to capture line state before its scrub
2797 2796 * attempt since we use that in our classification.
2798 2797 */
2799 2798 if (logout_tried == B_FALSE) {
2800 2799 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2801 2800 disp |= CE_XDIAG_NOLOGOUT;
2802 2801 }
2803 2802
2804 2803 /*
2805 2804 * Scrub memory, then check AFSR for errors. The AFAR we scrub may
2806 2805 * no longer be valid (if DR'd since the initial event) so we
2807 2806 * perform this scrub under on_trap protection. If this access is
2808 2807 * ok then further accesses below will also be ok - DR cannot
2809 2808 * proceed while this thread is active (preemption is disabled);
2810 2809 * to be safe we'll nonetheless use on_trap again below.
2811 2810 */
2812 2811 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2813 2812 cpu_scrubphys(ecc);
2814 2813 } else {
2815 2814 no_trap();
2816 2815 if (orig_err & EN_REG_CEEN)
2817 2816 set_error_enable(orig_err);
2818 2817 kpreempt_enable();
2819 2818 return (disp);
2820 2819 }
2821 2820 no_trap();
2822 2821
2823 2822 /*
2824 2823 * Did the casx read of the scrub log a CE that matches the AFAR?
2825 2824 * Note that it's quite possible that the read sourced the data from
2826 2825 * another cpu.
2827 2826 */
2828 2827 if (clear_ecc(ecc))
2829 2828 disp |= CE_XDIAG_CE1;
2830 2829
2831 2830 /*
2832 2831 * Read the data again. This time the read is very likely to
2833 2832 * come from memory since the scrub induced a writeback to memory.
2834 2833 */
2835 2834 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2836 2835 (void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2837 2836 } else {
2838 2837 no_trap();
2839 2838 if (orig_err & EN_REG_CEEN)
2840 2839 set_error_enable(orig_err);
2841 2840 kpreempt_enable();
2842 2841 return (disp);
2843 2842 }
2844 2843 no_trap();
2845 2844
2846 2845 /* Did that read induce a CE that matches the AFAR? */
2847 2846 if (clear_ecc(ecc))
2848 2847 disp |= CE_XDIAG_CE2;
2849 2848
2850 2849 /*
2851 2850 * Look at the logout information and record whether we found the
2852 2851 * line in l2/l3 cache. For Panther we are interested in whether
2853 2852 * we found it in either cache (it won't reside in both but
2854 2853 * it is possible to read it that way given the moving target).
2855 2854 */
2856 2855 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2857 2856 if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2858 2857 clop->clo_data.chd_afar != LOGOUT_INVALID) {
2859 2858 int hit, level;
2860 2859 int state;
2861 2860 int totalsize;
2862 2861 ch_ec_data_t *ecp;
2863 2862
2864 2863 /*
2865 2864 * If hit is nonzero then a match was found and hit will
2866 2865 * be one greater than the index which hit. For Panther we
2867 2866 * also need to pay attention to level to see which of l2$ or
2868 2867 * l3$ it hit in.
2869 2868 */
2870 2869 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2871 2870 0, &level);
2872 2871
2873 2872 if (hit) {
2874 2873 --hit;
2875 2874 disp |= CE_XDIAG_AFARMATCH;
2876 2875
2877 2876 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2878 2877 if (level == 2)
2879 2878 ecp = &clop->clo_data.chd_l2_data[hit];
2880 2879 else
2881 2880 ecp = &clop->clo_data.chd_ec_data[hit];
2882 2881 } else {
2883 2882 ASSERT(level == 2);
2884 2883 ecp = &clop->clo_data.chd_ec_data[hit];
2885 2884 }
2886 2885 totalsize = cpunodes[CPU->cpu_id].ecache_size;
2887 2886 state = cpu_ectag_pa_to_subblk_state(totalsize,
2888 2887 ecc->flt_addr, ecp->ec_tag);
2889 2888
2890 2889 /*
2891 2890 * Cheetah variants use different state encodings -
2892 2891 * the CH_ECSTATE_* defines vary depending on the
2893 2892 * module we're compiled for. Translate into our
2894 2893 * one true version. Conflate Owner-Shared state
2895 2894 * of SSM mode with Owner as victimisation of such
2896 2895 * lines may cause a writeback.
2897 2896 */
2898 2897 switch (state) {
2899 2898 case CH_ECSTATE_MOD:
2900 2899 disp |= EC_STATE_M;
2901 2900 break;
2902 2901
2903 2902 case CH_ECSTATE_OWN:
2904 2903 case CH_ECSTATE_OWS:
2905 2904 disp |= EC_STATE_O;
2906 2905 break;
2907 2906
2908 2907 case CH_ECSTATE_EXL:
2909 2908 disp |= EC_STATE_E;
2910 2909 break;
2911 2910
2912 2911 case CH_ECSTATE_SHR:
2913 2912 disp |= EC_STATE_S;
2914 2913 break;
2915 2914
2916 2915 default:
2917 2916 disp |= EC_STATE_I;
2918 2917 break;
2919 2918 }
2920 2919 }
2921 2920
2922 2921 /*
2923 2922 * If we initiated the delayed logout then we are responsible
2924 2923 * for invalidating the logout area.
2925 2924 */
2926 2925 if (logout_tried == B_FALSE) {
2927 2926 bzero(clop, sizeof (ch_cpu_logout_t));
2928 2927 clop->clo_data.chd_afar = LOGOUT_INVALID;
2929 2928 }
2930 2929 }
2931 2930
2932 2931 /*
2933 2932 * Re-enable CEEN if we turned it off.
2934 2933 */
2935 2934 if (orig_err & EN_REG_CEEN)
2936 2935 set_error_enable(orig_err);
2937 2936 kpreempt_enable();
2938 2937
2939 2938 return (disp);
2940 2939 }
2941 2940
2942 2941 /*
2943 2942 * Scrub a correctable memory error and collect data for classification
2944 2943 * of CE type. This function is called in the detection path, ie tl0 handling
2945 2944 * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2946 2945 */
2947 2946 void
2948 2947 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2949 2948 {
2950 2949 /*
2951 2950 * Cheetah CE classification does not set any bits in flt_status.
2952 2951 * Instead we will record classification datapoints in flt_disp.
2953 2952 */
2954 2953 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2955 2954
2956 2955 /*
2957 2956 * To check if the error detected by IO is persistent, sticky or
2958 2957 * intermittent. This is noticed by clear_ecc().
2959 2958 */
2960 2959 if (ecc->flt_status & ECC_IOBUS)
2961 2960 ecc->flt_stat = C_AFSR_MEMORY;
2962 2961
2963 2962 /*
2964 2963 * Record information from this first part of the algorithm in
2965 2964 * flt_disp.
2966 2965 */
2967 2966 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2968 2967 }
2969 2968
2970 2969 /*
2971 2970 * Select a partner to perform a further CE classification check from.
2972 2971 * Must be called with kernel preemption disabled (to stop the cpu list
2973 2972 * from changing). The detecting cpu we are partnering has cpuid
2974 2973 * aflt->flt_inst; we might not be running on the detecting cpu.
2975 2974 *
2976 2975 * Restrict choice to active cpus in the same cpu partition as ourselves in
2977 2976 * an effort to stop bad cpus in one partition causing other partitions to
2978 2977 * perform excessive diagnostic activity. Actually since the errorq drain
2979 2978 * is run from a softint most of the time and that is a global mechanism
2980 2979 * this isolation is only partial. Return NULL if we fail to find a
2981 2980 * suitable partner.
2982 2981 *
2983 2982 * We prefer a partner that is in a different latency group to ourselves as
2984 2983 * we will share fewer datapaths. If such a partner is unavailable then
2985 2984 * choose one in the same lgroup but prefer a different chip and only allow
2986 2985 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and
2987 2986 * flags includes PTNR_SELFOK then permit selection of the original detector.
2988 2987 *
2989 2988 * We keep a cache of the last partner selected for a cpu, and we'll try to
2990 2989 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2991 2990 * have passed since that selection was made. This provides the benefit
2992 2991 * of the point-of-view of different partners over time but without
2993 2992 * requiring frequent cpu list traversals.
2994 2993 */
2995 2994
2996 2995 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */
2997 2996 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */
2998 2997
2999 2998 static cpu_t *
3000 2999 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
3001 3000 {
3002 3001 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
3003 3002 hrtime_t lasttime, thistime;
3004 3003
3005 3004 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
3006 3005
3007 3006 dtcr = cpu[aflt->flt_inst];
3008 3007
3009 3008 /*
3010 3009 * Short-circuit for the following cases:
3011 3010 * . the dtcr is not flagged active
3012 3011 * . there is just one cpu present
3013 3012 * . the detector has disappeared
3014 3013 * . we were given a bad flt_inst cpuid; this should not happen
3015 3014 * (eg PCI code now fills flt_inst) but if it does it is no
3016 3015 * reason to panic.
3017 3016 * . there is just one cpu left online in the cpu partition
3018 3017 *
3019 3018 * If we return NULL after this point then we do not update the
3020 3019 * chpr_ceptnr_seltime which will cause us to perform a full lookup
3021 3020 * again next time; this is the case where the only other cpu online
3022 3021 * in the detector's partition is on the same chip as the detector
3023 3022 * and since CEEN re-enable is throttled even that case should not
3024 3023 * hurt performance.
3025 3024 */
3026 3025 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
3027 3026 return (NULL);
3028 3027 }
3029 3028 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
3030 3029 if (flags & PTNR_SELFOK) {
3031 3030 *typep = CE_XDIAG_PTNR_SELF;
3032 3031 return (dtcr);
3033 3032 } else {
3034 3033 return (NULL);
3035 3034 }
3036 3035 }
3037 3036
3038 3037 thistime = gethrtime();
3039 3038 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
3040 3039
3041 3040 /*
3042 3041 * Select a starting point.
3043 3042 */
3044 3043 if (!lasttime) {
3045 3044 /*
3046 3045 * We've never selected a partner for this detector before.
3047 3046 * Start the scan at the next online cpu in the same cpu
3048 3047 * partition.
3049 3048 */
3050 3049 sp = dtcr->cpu_next_part;
3051 3050 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
3052 3051 /*
3053 3052 * Our last selection has not aged yet. If this partner:
3054 3053 * . is still a valid cpu,
3055 3054 * . is still in the same partition as the detector
3056 3055 * . is still marked active
3057 3056 * . satisfies the 'flags' argument criteria
3058 3057 * then select it again without updating the timestamp.
3059 3058 */
3060 3059 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3061 3060 if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3062 3061 !cpu_flagged_active(sp->cpu_flags) ||
3063 3062 (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3064 3063 (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3065 3064 !(flags & PTNR_SIBLINGOK))) {
3066 3065 sp = dtcr->cpu_next_part;
3067 3066 } else {
3068 3067 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3069 3068 *typep = CE_XDIAG_PTNR_REMOTE;
3070 3069 } else if (sp == dtcr) {
3071 3070 *typep = CE_XDIAG_PTNR_SELF;
3072 3071 } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3073 3072 *typep = CE_XDIAG_PTNR_SIBLING;
3074 3073 } else {
3075 3074 *typep = CE_XDIAG_PTNR_LOCAL;
3076 3075 }
3077 3076 return (sp);
3078 3077 }
3079 3078 } else {
3080 3079 /*
3081 3080 * Our last selection has aged. If it is nonetheless still a
3082 3081 * valid cpu then start the scan at the next cpu in the
3083 3082 * partition after our last partner. If the last selection
3084 3083 * is no longer a valid cpu then go with our default. In
3085 3084 * this way we slowly cycle through possible partners to
3086 3085 * obtain multiple viewpoints over time.
3087 3086 */
3088 3087 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3089 3088 if (sp == NULL) {
3090 3089 sp = dtcr->cpu_next_part;
3091 3090 } else {
3092 3091 sp = sp->cpu_next_part; /* may be dtcr */
3093 3092 if (sp->cpu_part != dtcr->cpu_part)
3094 3093 sp = dtcr;
3095 3094 }
3096 3095 }
3097 3096
3098 3097 /*
3099 3098 * We have a proposed starting point for our search, but if this
3100 3099 * cpu is offline then its cpu_next_part will point to itself
3101 3100 * so we can't use that to iterate over cpus in this partition in
3102 3101 * the loop below. We still want to avoid iterating over cpus not
3103 3102 * in our partition, so in the case that our starting point is offline
3104 3103 * we will repoint it to be the detector itself; and if the detector
3105 3104 * happens to be offline we'll return NULL from the following loop.
3106 3105 */
3107 3106 if (!cpu_flagged_active(sp->cpu_flags)) {
3108 3107 sp = dtcr;
3109 3108 }
3110 3109
3111 3110 ptnr = sp;
3112 3111 locptnr = NULL;
3113 3112 sibptnr = NULL;
3114 3113 do {
3115 3114 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3116 3115 continue;
3117 3116 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3118 3117 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3119 3118 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3120 3119 *typep = CE_XDIAG_PTNR_REMOTE;
3121 3120 return (ptnr);
3122 3121 }
3123 3122 if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3124 3123 if (sibptnr == NULL)
3125 3124 sibptnr = ptnr;
3126 3125 continue;
3127 3126 }
3128 3127 if (locptnr == NULL)
3129 3128 locptnr = ptnr;
3130 3129 } while ((ptnr = ptnr->cpu_next_part) != sp);
3131 3130
3132 3131 /*
3133 3132 * A foreign partner has already been returned if one was available.
3134 3133 *
3135 3134 * If locptnr is not NULL it is a cpu in the same lgroup as the
3136 3135 * detector, is active, and is not a sibling of the detector.
3137 3136 *
3138 3137 * If sibptnr is not NULL it is a sibling of the detector, and is
3139 3138 * active.
3140 3139 *
3141 3140 * If we have to resort to using the detector itself we have already
3142 3141 * checked that it is active.
3143 3142 */
3144 3143 if (locptnr) {
3145 3144 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3146 3145 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3147 3146 *typep = CE_XDIAG_PTNR_LOCAL;
3148 3147 return (locptnr);
3149 3148 } else if (sibptnr && flags & PTNR_SIBLINGOK) {
3150 3149 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3151 3150 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3152 3151 *typep = CE_XDIAG_PTNR_SIBLING;
3153 3152 return (sibptnr);
3154 3153 } else if (flags & PTNR_SELFOK) {
3155 3154 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3156 3155 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3157 3156 *typep = CE_XDIAG_PTNR_SELF;
3158 3157 return (dtcr);
3159 3158 }
3160 3159
3161 3160 return (NULL);
3162 3161 }
3163 3162
3164 3163 /*
3165 3164 * Cross call handler that is requested to run on the designated partner of
3166 3165 * a cpu that experienced a possibly sticky or possibly persistnet CE.
3167 3166 */
3168 3167 static void
3169 3168 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3170 3169 {
3171 3170 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3172 3171 }
3173 3172
3174 3173 /*
3175 3174 * The associated errorqs are never destroyed so we do not need to deal with
3176 3175 * them disappearing before this timeout fires. If the affected memory
3177 3176 * has been DR'd out since the original event the scrub algrithm will catch
3178 3177 * any errors and return null disposition info. If the original detecting
3179 3178 * cpu has been DR'd out then ereport detector info will not be able to
3180 3179 * lookup CPU type; with a small timeout this is unlikely.
3181 3180 */
3182 3181 static void
3183 3182 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3184 3183 {
3185 3184 struct async_flt *aflt = cbarg->lkycb_aflt;
3186 3185 uchar_t disp;
3187 3186 cpu_t *cp;
3188 3187 int ptnrtype;
3189 3188
3190 3189 kpreempt_disable();
3191 3190 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3192 3191 &ptnrtype)) {
3193 3192 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3194 3193 (uint64_t)&disp);
3195 3194 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3196 3195 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3197 3196 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3198 3197 } else {
3199 3198 ce_xdiag_lkydrops++;
3200 3199 if (ncpus > 1)
3201 3200 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3202 3201 CE_XDIAG_SKIP_NOPTNR);
3203 3202 }
3204 3203 kpreempt_enable();
3205 3204
3206 3205 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3207 3206 kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3208 3207 }
3209 3208
3210 3209 /*
3211 3210 * Called from errorq drain code when processing a CE error, both from
3212 3211 * CPU and PCI drain functions. Decide what further classification actions,
3213 3212 * if any, we will perform. Perform immediate actions now, and schedule
3214 3213 * delayed actions as required. Note that we are no longer necessarily running
3215 3214 * on the detecting cpu, and that the async_flt structure will not persist on
3216 3215 * return from this function.
3217 3216 *
3218 3217 * Calls to this function should aim to be self-throtlling in some way. With
3219 3218 * the delayed re-enable of CEEN the absolute rate of calls should not
3220 3219 * be excessive. Callers should also avoid performing in-depth classification
3221 3220 * for events in pages that are already known to be suspect.
3222 3221 *
3223 3222 * We return nonzero to indicate that the event has been copied and
3224 3223 * recirculated for further testing. The caller should not log the event
3225 3224 * in this case - it will be logged when further test results are available.
3226 3225 *
3227 3226 * Our possible contexts are that of errorq_drain: below lock level or from
3228 3227 * panic context. We can assume that the cpu we are running on is online.
3229 3228 */
3230 3229
3231 3230
3232 3231 #ifdef DEBUG
3233 3232 static int ce_xdiag_forceaction;
3234 3233 #endif
3235 3234
3236 3235 int
3237 3236 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3238 3237 errorq_elem_t *eqep, size_t afltoffset)
3239 3238 {
3240 3239 ce_dispact_t dispact, action;
3241 3240 cpu_t *cp;
3242 3241 uchar_t dtcrinfo, disp;
3243 3242 int ptnrtype;
3244 3243
3245 3244 if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3246 3245 ce_xdiag_drops++;
3247 3246 return (0);
3248 3247 } else if (!aflt->flt_in_memory) {
3249 3248 ce_xdiag_drops++;
3250 3249 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3251 3250 return (0);
3252 3251 }
3253 3252
3254 3253 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3255 3254
3256 3255 /*
3257 3256 * Some correctable events are not scrubbed/classified, such as those
3258 3257 * noticed at the tail of cpu_deferred_error. So if there is no
3259 3258 * initial detector classification go no further.
3260 3259 */
3261 3260 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3262 3261 ce_xdiag_drops++;
3263 3262 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3264 3263 return (0);
3265 3264 }
3266 3265
3267 3266 dispact = CE_DISPACT(ce_disp_table,
3268 3267 CE_XDIAG_AFARMATCHED(dtcrinfo),
3269 3268 CE_XDIAG_STATE(dtcrinfo),
3270 3269 CE_XDIAG_CE1SEEN(dtcrinfo),
3271 3270 CE_XDIAG_CE2SEEN(dtcrinfo));
3272 3271
3273 3272
3274 3273 action = CE_ACT(dispact); /* bad lookup caught below */
3275 3274 #ifdef DEBUG
3276 3275 if (ce_xdiag_forceaction != 0)
3277 3276 action = ce_xdiag_forceaction;
3278 3277 #endif
3279 3278
3280 3279 switch (action) {
3281 3280 case CE_ACT_LKYCHK: {
3282 3281 caddr_t ndata;
3283 3282 errorq_elem_t *neqep;
3284 3283 struct async_flt *ecc;
3285 3284 ce_lkychk_cb_t *cbargp;
3286 3285
3287 3286 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3288 3287 ce_xdiag_lkydrops++;
3289 3288 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3290 3289 CE_XDIAG_SKIP_DUPFAIL);
3291 3290 break;
3292 3291 }
3293 3292 ecc = (struct async_flt *)(ndata + afltoffset);
3294 3293
3295 3294 ASSERT(ecc->flt_class == CPU_FAULT ||
3296 3295 ecc->flt_class == BUS_FAULT);
3297 3296 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3298 3297 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3299 3298
3300 3299 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3301 3300 cbargp->lkycb_aflt = ecc;
3302 3301 cbargp->lkycb_eqp = eqp;
3303 3302 cbargp->lkycb_eqep = neqep;
3304 3303
3305 3304 (void) timeout((void (*)(void *))ce_lkychk_cb,
3306 3305 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3307 3306 return (1);
3308 3307 }
3309 3308
3310 3309 case CE_ACT_PTNRCHK:
3311 3310 kpreempt_disable(); /* stop cpu list changing */
3312 3311 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3313 3312 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3314 3313 (uint64_t)aflt, (uint64_t)&disp);
3315 3314 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3316 3315 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3317 3316 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3318 3317 } else if (ncpus > 1) {
3319 3318 ce_xdiag_ptnrdrops++;
3320 3319 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3321 3320 CE_XDIAG_SKIP_NOPTNR);
3322 3321 } else {
3323 3322 ce_xdiag_ptnrdrops++;
3324 3323 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3325 3324 CE_XDIAG_SKIP_UNIPROC);
3326 3325 }
3327 3326 kpreempt_enable();
3328 3327 break;
3329 3328
3330 3329 case CE_ACT_DONE:
3331 3330 break;
3332 3331
3333 3332 case CE_ACT(CE_DISP_BAD):
3334 3333 default:
3335 3334 #ifdef DEBUG
3336 3335 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3337 3336 #endif
3338 3337 ce_xdiag_bad++;
3339 3338 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3340 3339 break;
3341 3340 }
3342 3341
3343 3342 return (0);
3344 3343 }
3345 3344
3346 3345 /*
3347 3346 * We route all errors through a single switch statement.
3348 3347 */
3349 3348 void
3350 3349 cpu_ue_log_err(struct async_flt *aflt)
3351 3350 {
3352 3351 switch (aflt->flt_class) {
3353 3352 case CPU_FAULT:
3354 3353 cpu_ereport_init(aflt);
3355 3354 if (cpu_async_log_err(aflt, NULL))
3356 3355 cpu_ereport_post(aflt);
3357 3356 break;
3358 3357
3359 3358 case BUS_FAULT:
3360 3359 bus_async_log_err(aflt);
3361 3360 break;
3362 3361
3363 3362 default:
3364 3363 cmn_err(CE_WARN, "discarding async error %p with invalid "
3365 3364 "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3366 3365 return;
3367 3366 }
3368 3367 }
3369 3368
3370 3369 /*
3371 3370 * Routine for panic hook callback from panic_idle().
3372 3371 */
3373 3372 void
3374 3373 cpu_async_panic_callb(void)
3375 3374 {
3376 3375 ch_async_flt_t ch_flt;
3377 3376 struct async_flt *aflt;
3378 3377 ch_cpu_errors_t cpu_error_regs;
3379 3378 uint64_t afsr_errs;
3380 3379
3381 3380 get_cpu_error_state(&cpu_error_regs);
3382 3381
3383 3382 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3384 3383 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3385 3384
3386 3385 if (afsr_errs) {
3387 3386
3388 3387 bzero(&ch_flt, sizeof (ch_async_flt_t));
3389 3388 aflt = (struct async_flt *)&ch_flt;
3390 3389 aflt->flt_id = gethrtime_waitfree();
3391 3390 aflt->flt_bus_id = getprocessorid();
3392 3391 aflt->flt_inst = CPU->cpu_id;
3393 3392 aflt->flt_stat = cpu_error_regs.afsr;
3394 3393 aflt->flt_addr = cpu_error_regs.afar;
3395 3394 aflt->flt_prot = AFLT_PROT_NONE;
3396 3395 aflt->flt_class = CPU_FAULT;
3397 3396 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3398 3397 aflt->flt_panic = 1;
3399 3398 ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3400 3399 ch_flt.afsr_errs = afsr_errs;
3401 3400 #if defined(SERRANO)
3402 3401 ch_flt.afar2 = cpu_error_regs.afar2;
3403 3402 #endif /* SERRANO */
3404 3403 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3405 3404 }
3406 3405 }
3407 3406
3408 3407 /*
3409 3408 * Routine to convert a syndrome into a syndrome code.
3410 3409 */
3411 3410 static int
3412 3411 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3413 3412 {
3414 3413 if (synd_status == AFLT_STAT_INVALID)
3415 3414 return (-1);
3416 3415
3417 3416 /*
3418 3417 * Use the syndrome to index the appropriate syndrome table,
3419 3418 * to get the code indicating which bit(s) is(are) bad.
3420 3419 */
3421 3420 if (afsr_bit &
3422 3421 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3423 3422 if (afsr_bit & C_AFSR_MSYND_ERRS) {
3424 3423 #if defined(JALAPENO) || defined(SERRANO)
3425 3424 if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3426 3425 return (-1);
3427 3426 else
3428 3427 return (BPAR0 + synd);
3429 3428 #else /* JALAPENO || SERRANO */
3430 3429 if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3431 3430 return (-1);
3432 3431 else
3433 3432 return (mtag_syndrome_tab[synd]);
3434 3433 #endif /* JALAPENO || SERRANO */
3435 3434 } else {
3436 3435 if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3437 3436 return (-1);
3438 3437 else
3439 3438 return (ecc_syndrome_tab[synd]);
3440 3439 }
3441 3440 } else {
3442 3441 return (-1);
3443 3442 }
3444 3443 }
3445 3444
3446 3445 int
3447 3446 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3448 3447 {
3449 3448 if (&plat_get_mem_sid)
3450 3449 return (plat_get_mem_sid(unum, buf, buflen, lenp));
3451 3450 else
3452 3451 return (ENOTSUP);
3453 3452 }
3454 3453
3455 3454 int
3456 3455 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3457 3456 {
3458 3457 if (&plat_get_mem_offset)
3459 3458 return (plat_get_mem_offset(flt_addr, offp));
3460 3459 else
3461 3460 return (ENOTSUP);
3462 3461 }
3463 3462
3464 3463 int
3465 3464 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3466 3465 {
3467 3466 if (&plat_get_mem_addr)
3468 3467 return (plat_get_mem_addr(unum, sid, offset, addrp));
3469 3468 else
3470 3469 return (ENOTSUP);
3471 3470 }
3472 3471
3473 3472 /*
3474 3473 * Routine to return a string identifying the physical name
3475 3474 * associated with a memory/cache error.
3476 3475 */
3477 3476 int
3478 3477 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3479 3478 uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3480 3479 ushort_t flt_status, char *buf, int buflen, int *lenp)
3481 3480 {
3482 3481 int synd_code;
3483 3482 int ret;
3484 3483
3485 3484 /*
3486 3485 * An AFSR of -1 defaults to a memory syndrome.
3487 3486 */
3488 3487 if (flt_stat == (uint64_t)-1)
3489 3488 flt_stat = C_AFSR_CE;
3490 3489
3491 3490 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3492 3491
3493 3492 /*
3494 3493 * Syndrome code must be either a single-bit error code
3495 3494 * (0...143) or -1 for unum lookup.
3496 3495 */
3497 3496 if (synd_code < 0 || synd_code >= M2)
3498 3497 synd_code = -1;
3499 3498 if (&plat_get_mem_unum) {
3500 3499 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3501 3500 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3502 3501 buf[0] = '\0';
3503 3502 *lenp = 0;
3504 3503 }
3505 3504
3506 3505 return (ret);
3507 3506 }
3508 3507
3509 3508 return (ENOTSUP);
3510 3509 }
3511 3510
3512 3511 /*
3513 3512 * Wrapper for cpu_get_mem_unum() routine that takes an
3514 3513 * async_flt struct rather than explicit arguments.
3515 3514 */
3516 3515 int
3517 3516 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3518 3517 char *buf, int buflen, int *lenp)
3519 3518 {
3520 3519 /*
3521 3520 * If we come thru here for an IO bus error aflt->flt_stat will
3522 3521 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3523 3522 * so it will interpret this as a memory error.
3524 3523 */
3525 3524 return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3526 3525 (aflt->flt_class == BUS_FAULT) ?
3527 3526 (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3528 3527 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3529 3528 aflt->flt_status, buf, buflen, lenp));
3530 3529 }
3531 3530
3532 3531 /*
3533 3532 * Return unum string given synd_code and async_flt into
3534 3533 * the buf with size UNUM_NAMLEN
3535 3534 */
3536 3535 static int
3537 3536 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3538 3537 {
3539 3538 int ret, len;
3540 3539
3541 3540 /*
3542 3541 * Syndrome code must be either a single-bit error code
3543 3542 * (0...143) or -1 for unum lookup.
3544 3543 */
3545 3544 if (synd_code < 0 || synd_code >= M2)
3546 3545 synd_code = -1;
3547 3546 if (&plat_get_mem_unum) {
3548 3547 if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3549 3548 aflt->flt_bus_id, aflt->flt_in_memory,
3550 3549 aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3551 3550 buf[0] = '\0';
3552 3551 }
3553 3552 return (ret);
3554 3553 }
3555 3554
3556 3555 buf[0] = '\0';
3557 3556 return (ENOTSUP);
3558 3557 }
3559 3558
3560 3559 /*
3561 3560 * This routine is a more generic interface to cpu_get_mem_unum()
3562 3561 * that may be used by other modules (e.g. the 'mm' driver, through
3563 3562 * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3564 3563 * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3565 3564 */
3566 3565 int
3567 3566 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3568 3567 char *buf, int buflen, int *lenp)
3569 3568 {
3570 3569 int synd_status, flt_in_memory, ret;
3571 3570 ushort_t flt_status = 0;
3572 3571 char unum[UNUM_NAMLEN];
3573 3572 uint64_t t_afsr_errs;
3574 3573
3575 3574 /*
3576 3575 * Check for an invalid address.
3577 3576 */
3578 3577 if (afar == (uint64_t)-1)
3579 3578 return (ENXIO);
3580 3579
3581 3580 if (synd == (uint64_t)-1)
3582 3581 synd_status = AFLT_STAT_INVALID;
3583 3582 else
3584 3583 synd_status = AFLT_STAT_VALID;
3585 3584
3586 3585 flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3587 3586 pf_is_memory(afar >> MMU_PAGESHIFT);
3588 3587
3589 3588 /*
3590 3589 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3591 3590 */
3592 3591 if (*afsr == (uint64_t)-1)
3593 3592 t_afsr_errs = C_AFSR_CE;
3594 3593 else {
3595 3594 t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3596 3595 #if defined(CHEETAH_PLUS)
3597 3596 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3598 3597 t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3599 3598 #endif /* CHEETAH_PLUS */
3600 3599 }
3601 3600
3602 3601 /*
3603 3602 * Turn on ECC_ECACHE if error type is E$ Data.
3604 3603 */
3605 3604 if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3606 3605 flt_status |= ECC_ECACHE;
3607 3606
3608 3607 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3609 3608 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3610 3609 if (ret != 0)
3611 3610 return (ret);
3612 3611
3613 3612 if (*lenp >= buflen)
3614 3613 return (ENAMETOOLONG);
3615 3614
3616 3615 (void) strncpy(buf, unum, buflen);
3617 3616
3618 3617 return (0);
3619 3618 }
3620 3619
3621 3620 /*
3622 3621 * Routine to return memory information associated
3623 3622 * with a physical address and syndrome.
3624 3623 */
3625 3624 int
3626 3625 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3627 3626 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3628 3627 int *segsp, int *banksp, int *mcidp)
3629 3628 {
3630 3629 int synd_status, synd_code;
3631 3630
3632 3631 if (afar == (uint64_t)-1)
3633 3632 return (ENXIO);
3634 3633
3635 3634 if (synd == (uint64_t)-1)
3636 3635 synd_status = AFLT_STAT_INVALID;
3637 3636 else
3638 3637 synd_status = AFLT_STAT_VALID;
3639 3638
3640 3639 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3641 3640
3642 3641 if (p2get_mem_info != NULL)
3643 3642 return ((p2get_mem_info)(synd_code, afar,
3644 3643 mem_sizep, seg_sizep, bank_sizep,
3645 3644 segsp, banksp, mcidp));
3646 3645 else
3647 3646 return (ENOTSUP);
3648 3647 }
3649 3648
3650 3649 /*
3651 3650 * Routine to return a string identifying the physical
3652 3651 * name associated with a cpuid.
3653 3652 */
3654 3653 int
3655 3654 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3656 3655 {
3657 3656 int ret;
3658 3657 char unum[UNUM_NAMLEN];
3659 3658
3660 3659 if (&plat_get_cpu_unum) {
3661 3660 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3662 3661 != 0)
3663 3662 return (ret);
3664 3663 } else {
3665 3664 return (ENOTSUP);
3666 3665 }
3667 3666
3668 3667 if (*lenp >= buflen)
3669 3668 return (ENAMETOOLONG);
3670 3669
3671 3670 (void) strncpy(buf, unum, buflen);
3672 3671
3673 3672 return (0);
3674 3673 }
3675 3674
3676 3675 /*
3677 3676 * This routine exports the name buffer size.
3678 3677 */
3679 3678 size_t
3680 3679 cpu_get_name_bufsize()
3681 3680 {
3682 3681 return (UNUM_NAMLEN);
3683 3682 }
3684 3683
3685 3684 /*
3686 3685 * Historical function, apparantly not used.
3687 3686 */
3688 3687 /* ARGSUSED */
3689 3688 void
3690 3689 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3691 3690 {}
3692 3691
3693 3692 /*
3694 3693 * Historical function only called for SBus errors in debugging.
3695 3694 */
3696 3695 /*ARGSUSED*/
3697 3696 void
3698 3697 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3699 3698 {}
3700 3699
3701 3700 /*
3702 3701 * Clear the AFSR sticky bits. The routine returns a non-zero value if
3703 3702 * any of the AFSR's sticky errors are detected. If a non-null pointer to
3704 3703 * an async fault structure argument is passed in, the captured error state
3705 3704 * (AFSR, AFAR) info will be returned in the structure.
3706 3705 */
3707 3706 int
3708 3707 clear_errors(ch_async_flt_t *ch_flt)
3709 3708 {
3710 3709 struct async_flt *aflt = (struct async_flt *)ch_flt;
3711 3710 ch_cpu_errors_t cpu_error_regs;
3712 3711
3713 3712 get_cpu_error_state(&cpu_error_regs);
3714 3713
3715 3714 if (ch_flt != NULL) {
3716 3715 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3717 3716 aflt->flt_addr = cpu_error_regs.afar;
3718 3717 ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3719 3718 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3720 3719 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3721 3720 #if defined(SERRANO)
3722 3721 ch_flt->afar2 = cpu_error_regs.afar2;
3723 3722 #endif /* SERRANO */
3724 3723 }
3725 3724
3726 3725 set_cpu_error_state(&cpu_error_regs);
3727 3726
3728 3727 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3729 3728 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3730 3729 }
3731 3730
3732 3731 /*
3733 3732 * Clear any AFSR error bits, and check for persistence.
3734 3733 *
3735 3734 * It would be desirable to also insist that syndrome match. PCI handling
3736 3735 * has already filled flt_synd. For errors trapped by CPU we only fill
3737 3736 * flt_synd when we queue the event, so we do not have a valid flt_synd
3738 3737 * during initial classification (it is valid if we're called as part of
3739 3738 * subsequent low-pil additional classification attempts). We could try
3740 3739 * to determine which syndrome to use: we know we're only called for
3741 3740 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3742 3741 * would be esynd/none and esynd/msynd, respectively. If that is
3743 3742 * implemented then what do we do in the case that we do experience an
3744 3743 * error on the same afar but with different syndrome? At the very least
3745 3744 * we should count such occurences. Anyway, for now, we'll leave it as
3746 3745 * it has been for ages.
3747 3746 */
3748 3747 static int
3749 3748 clear_ecc(struct async_flt *aflt)
3750 3749 {
3751 3750 ch_cpu_errors_t cpu_error_regs;
3752 3751
3753 3752 /*
3754 3753 * Snapshot the AFSR and AFAR and clear any errors
3755 3754 */
3756 3755 get_cpu_error_state(&cpu_error_regs);
3757 3756 set_cpu_error_state(&cpu_error_regs);
3758 3757
3759 3758 /*
3760 3759 * If any of the same memory access error bits are still on and
3761 3760 * the AFAR matches, return that the error is persistent.
3762 3761 */
3763 3762 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3764 3763 cpu_error_regs.afar == aflt->flt_addr);
3765 3764 }
3766 3765
3767 3766 /*
3768 3767 * Turn off all cpu error detection, normally only used for panics.
3769 3768 */
3770 3769 void
3771 3770 cpu_disable_errors(void)
3772 3771 {
3773 3772 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3774 3773
3775 3774 /*
3776 3775 * With error detection now turned off, check the other cpus
3777 3776 * logout areas for any unlogged errors.
3778 3777 */
3779 3778 if (enable_check_other_cpus_logout) {
3780 3779 cpu_check_other_cpus_logout();
3781 3780 /*
3782 3781 * Make a second pass over the logout areas, in case
3783 3782 * there is a failing CPU in an error-trap loop which
3784 3783 * will write to the logout area once it is emptied.
3785 3784 */
3786 3785 cpu_check_other_cpus_logout();
3787 3786 }
3788 3787 }
3789 3788
3790 3789 /*
3791 3790 * Enable errors.
3792 3791 */
3793 3792 void
3794 3793 cpu_enable_errors(void)
3795 3794 {
3796 3795 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3797 3796 }
3798 3797
3799 3798 /*
3800 3799 * Flush the entire ecache using displacement flush by reading through a
3801 3800 * physical address range twice as large as the Ecache.
3802 3801 */
3803 3802 void
3804 3803 cpu_flush_ecache(void)
3805 3804 {
3806 3805 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3807 3806 cpunodes[CPU->cpu_id].ecache_linesize);
3808 3807 }
3809 3808
3810 3809 /*
3811 3810 * Return CPU E$ set size - E$ size divided by the associativity.
3812 3811 * We use this function in places where the CPU_PRIVATE ptr may not be
3813 3812 * initialized yet. Note that for send_mondo and in the Ecache scrubber,
3814 3813 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set
3815 3814 * up before the kernel switches from OBP's to the kernel's trap table, so
3816 3815 * we don't have to worry about cpunodes being unitialized.
3817 3816 */
3818 3817 int
3819 3818 cpu_ecache_set_size(struct cpu *cp)
3820 3819 {
3821 3820 if (CPU_PRIVATE(cp))
3822 3821 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3823 3822
3824 3823 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3825 3824 }
3826 3825
3827 3826 /*
3828 3827 * Flush Ecache line.
3829 3828 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3830 3829 * Uses normal displacement flush for Cheetah.
3831 3830 */
3832 3831 static void
3833 3832 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3834 3833 {
3835 3834 struct async_flt *aflt = (struct async_flt *)ch_flt;
3836 3835 int ec_set_size = cpu_ecache_set_size(CPU);
3837 3836
3838 3837 ecache_flush_line(aflt->flt_addr, ec_set_size);
3839 3838 }
3840 3839
3841 3840 /*
3842 3841 * Scrub physical address.
3843 3842 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3844 3843 * Ecache or direct-mapped Ecache.
3845 3844 */
3846 3845 static void
3847 3846 cpu_scrubphys(struct async_flt *aflt)
3848 3847 {
3849 3848 int ec_set_size = cpu_ecache_set_size(CPU);
3850 3849
3851 3850 scrubphys(aflt->flt_addr, ec_set_size);
3852 3851 }
3853 3852
3854 3853 /*
3855 3854 * Clear physical address.
3856 3855 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3857 3856 * Ecache or direct-mapped Ecache.
3858 3857 */
3859 3858 void
3860 3859 cpu_clearphys(struct async_flt *aflt)
3861 3860 {
3862 3861 int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3863 3862 int ec_set_size = cpu_ecache_set_size(CPU);
3864 3863
3865 3864
3866 3865 clearphys(aflt->flt_addr, ec_set_size, lsize);
3867 3866 }
3868 3867
3869 3868 #if defined(CPU_IMP_ECACHE_ASSOC)
3870 3869 /*
3871 3870 * Check for a matching valid line in all the sets.
3872 3871 * If found, return set# + 1. Otherwise return 0.
3873 3872 */
3874 3873 static int
3875 3874 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3876 3875 {
3877 3876 struct async_flt *aflt = (struct async_flt *)ch_flt;
3878 3877 int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3879 3878 int ec_set_size = cpu_ecache_set_size(CPU);
3880 3879 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3881 3880 int nway = cpu_ecache_nway();
3882 3881 int i;
3883 3882
3884 3883 for (i = 0; i < nway; i++, ecp++) {
3885 3884 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3886 3885 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3887 3886 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3888 3887 return (i+1);
3889 3888 }
3890 3889 return (0);
3891 3890 }
3892 3891 #endif /* CPU_IMP_ECACHE_ASSOC */
3893 3892
3894 3893 /*
3895 3894 * Check whether a line in the given logout info matches the specified
3896 3895 * fault address. If reqval is set then the line must not be Invalid.
3897 3896 * Returns 0 on failure; on success (way + 1) is returned an *level is
3898 3897 * set to 2 for l2$ or 3 for l3$.
3899 3898 */
3900 3899 static int
3901 3900 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3902 3901 {
3903 3902 ch_diag_data_t *cdp = data;
3904 3903 ch_ec_data_t *ecp;
3905 3904 int totalsize, ec_set_size;
3906 3905 int i, ways;
3907 3906 int match = 0;
3908 3907 int tagvalid;
3909 3908 uint64_t addr, tagpa;
3910 3909 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3911 3910
3912 3911 /*
3913 3912 * Check the l2$ logout data
3914 3913 */
3915 3914 if (ispanther) {
3916 3915 ecp = &cdp->chd_l2_data[0];
3917 3916 ec_set_size = PN_L2_SET_SIZE;
3918 3917 ways = PN_L2_NWAYS;
3919 3918 } else {
3920 3919 ecp = &cdp->chd_ec_data[0];
3921 3920 ec_set_size = cpu_ecache_set_size(CPU);
3922 3921 ways = cpu_ecache_nway();
3923 3922 totalsize = cpunodes[CPU->cpu_id].ecache_size;
3924 3923 }
3925 3924 /* remove low order PA bits from fault address not used in PA tag */
3926 3925 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3927 3926 for (i = 0; i < ways; i++, ecp++) {
3928 3927 if (ispanther) {
3929 3928 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3930 3929 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3931 3930 } else {
3932 3931 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3933 3932 tagvalid = !cpu_ectag_line_invalid(totalsize,
3934 3933 ecp->ec_tag);
3935 3934 }
3936 3935 if (tagpa == addr && (!reqval || tagvalid)) {
3937 3936 match = i + 1;
3938 3937 *level = 2;
3939 3938 break;
3940 3939 }
3941 3940 }
3942 3941
3943 3942 if (match || !ispanther)
3944 3943 return (match);
3945 3944
3946 3945 /* For Panther we also check the l3$ */
3947 3946 ecp = &cdp->chd_ec_data[0];
3948 3947 ec_set_size = PN_L3_SET_SIZE;
3949 3948 ways = PN_L3_NWAYS;
3950 3949 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3951 3950
3952 3951 for (i = 0; i < ways; i++, ecp++) {
3953 3952 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3954 3953 !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3955 3954 match = i + 1;
3956 3955 *level = 3;
3957 3956 break;
3958 3957 }
3959 3958 }
3960 3959
3961 3960 return (match);
3962 3961 }
3963 3962
3964 3963 #if defined(CPU_IMP_L1_CACHE_PARITY)
3965 3964 /*
3966 3965 * Record information related to the source of an Dcache Parity Error.
3967 3966 */
3968 3967 static void
3969 3968 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3970 3969 {
3971 3970 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3972 3971 int index;
3973 3972
3974 3973 /*
3975 3974 * Since instruction decode cannot be done at high PIL
3976 3975 * just examine the entire Dcache to locate the error.
3977 3976 */
3978 3977 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3979 3978 ch_flt->parity_data.dpe.cpl_way = -1;
3980 3979 ch_flt->parity_data.dpe.cpl_off = -1;
3981 3980 }
3982 3981 for (index = 0; index < dc_set_size; index += dcache_linesize)
3983 3982 cpu_dcache_parity_check(ch_flt, index);
3984 3983 }
3985 3984
3986 3985 /*
3987 3986 * Check all ways of the Dcache at a specified index for good parity.
3988 3987 */
3989 3988 static void
3990 3989 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3991 3990 {
3992 3991 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3993 3992 uint64_t parity_bits, pbits, data_word;
3994 3993 static int parity_bits_popc[] = { 0, 1, 1, 0 };
3995 3994 int way, word, data_byte;
3996 3995 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3997 3996 ch_dc_data_t tmp_dcp;
3998 3997
3999 3998 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
4000 3999 /*
4001 4000 * Perform diagnostic read.
4002 4001 */
4003 4002 get_dcache_dtag(index + way * dc_set_size,
4004 4003 (uint64_t *)&tmp_dcp);
4005 4004
4006 4005 /*
4007 4006 * Check tag for even parity.
4008 4007 * Sum of 1 bits (including parity bit) should be even.
4009 4008 */
4010 4009 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
4011 4010 /*
4012 4011 * If this is the first error log detailed information
4013 4012 * about it and check the snoop tag. Otherwise just
4014 4013 * record the fact that we found another error.
4015 4014 */
4016 4015 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4017 4016 ch_flt->parity_data.dpe.cpl_way = way;
4018 4017 ch_flt->parity_data.dpe.cpl_cache =
4019 4018 CPU_DC_PARITY;
4020 4019 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
4021 4020
4022 4021 if (popc64(tmp_dcp.dc_sntag &
4023 4022 CHP_DCSNTAG_PARMASK) & 1) {
4024 4023 ch_flt->parity_data.dpe.cpl_tag |=
4025 4024 CHP_DC_SNTAG;
4026 4025 ch_flt->parity_data.dpe.cpl_lcnt++;
4027 4026 }
4028 4027
4029 4028 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
4030 4029 }
4031 4030
4032 4031 ch_flt->parity_data.dpe.cpl_lcnt++;
4033 4032 }
4034 4033
4035 4034 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4036 4035 /*
4037 4036 * Panther has more parity bits than the other
4038 4037 * processors for covering dcache data and so each
4039 4038 * byte of data in each word has its own parity bit.
4040 4039 */
4041 4040 parity_bits = tmp_dcp.dc_pn_data_parity;
4042 4041 for (word = 0; word < 4; word++) {
4043 4042 data_word = tmp_dcp.dc_data[word];
4044 4043 pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
4045 4044 for (data_byte = 0; data_byte < 8;
4046 4045 data_byte++) {
4047 4046 if (((popc64(data_word &
4048 4047 PN_DC_DATA_PARITY_MASK)) & 1) ^
4049 4048 (pbits & 1)) {
4050 4049 cpu_record_dc_data_parity(
4051 4050 ch_flt, dcp, &tmp_dcp, way,
4052 4051 word);
4053 4052 }
4054 4053 pbits >>= 1;
4055 4054 data_word >>= 8;
4056 4055 }
4057 4056 parity_bits >>= 8;
4058 4057 }
4059 4058 } else {
4060 4059 /*
4061 4060 * Check data array for even parity.
4062 4061 * The 8 parity bits are grouped into 4 pairs each
4063 4062 * of which covers a 64-bit word. The endianness is
4064 4063 * reversed -- the low-order parity bits cover the
4065 4064 * high-order data words.
4066 4065 */
4067 4066 parity_bits = tmp_dcp.dc_utag >> 8;
4068 4067 for (word = 0; word < 4; word++) {
4069 4068 pbits = (parity_bits >> (6 - word * 2)) & 3;
4070 4069 if ((popc64(tmp_dcp.dc_data[word]) +
4071 4070 parity_bits_popc[pbits]) & 1) {
4072 4071 cpu_record_dc_data_parity(ch_flt, dcp,
4073 4072 &tmp_dcp, way, word);
4074 4073 }
4075 4074 }
4076 4075 }
4077 4076 }
4078 4077 }
4079 4078
4080 4079 static void
4081 4080 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4082 4081 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4083 4082 {
4084 4083 /*
4085 4084 * If this is the first error log detailed information about it.
4086 4085 * Otherwise just record the fact that we found another error.
4087 4086 */
4088 4087 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4089 4088 ch_flt->parity_data.dpe.cpl_way = way;
4090 4089 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4091 4090 ch_flt->parity_data.dpe.cpl_off = word * 8;
4092 4091 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4093 4092 }
4094 4093 ch_flt->parity_data.dpe.cpl_lcnt++;
4095 4094 }
4096 4095
4097 4096 /*
4098 4097 * Record information related to the source of an Icache Parity Error.
4099 4098 *
4100 4099 * Called with the Icache disabled so any diagnostic accesses are safe.
4101 4100 */
4102 4101 static void
4103 4102 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4104 4103 {
4105 4104 int ic_set_size;
4106 4105 int ic_linesize;
4107 4106 int index;
4108 4107
4109 4108 if (CPU_PRIVATE(CPU)) {
4110 4109 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4111 4110 CH_ICACHE_NWAY;
4112 4111 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4113 4112 } else {
4114 4113 ic_set_size = icache_size / CH_ICACHE_NWAY;
4115 4114 ic_linesize = icache_linesize;
4116 4115 }
4117 4116
4118 4117 ch_flt->parity_data.ipe.cpl_way = -1;
4119 4118 ch_flt->parity_data.ipe.cpl_off = -1;
4120 4119
4121 4120 for (index = 0; index < ic_set_size; index += ic_linesize)
4122 4121 cpu_icache_parity_check(ch_flt, index);
4123 4122 }
4124 4123
4125 4124 /*
4126 4125 * Check all ways of the Icache at a specified index for good parity.
4127 4126 */
4128 4127 static void
4129 4128 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4130 4129 {
4131 4130 uint64_t parmask, pn_inst_parity;
4132 4131 int ic_set_size;
4133 4132 int ic_linesize;
4134 4133 int flt_index, way, instr, num_instr;
4135 4134 struct async_flt *aflt = (struct async_flt *)ch_flt;
4136 4135 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4137 4136 ch_ic_data_t tmp_icp;
4138 4137
4139 4138 if (CPU_PRIVATE(CPU)) {
4140 4139 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4141 4140 CH_ICACHE_NWAY;
4142 4141 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4143 4142 } else {
4144 4143 ic_set_size = icache_size / CH_ICACHE_NWAY;
4145 4144 ic_linesize = icache_linesize;
4146 4145 }
4147 4146
4148 4147 /*
4149 4148 * Panther has twice as many instructions per icache line and the
4150 4149 * instruction parity bit is in a different location.
4151 4150 */
4152 4151 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4153 4152 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4154 4153 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4155 4154 } else {
4156 4155 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4157 4156 pn_inst_parity = 0;
4158 4157 }
4159 4158
4160 4159 /*
4161 4160 * Index at which we expect to find the parity error.
4162 4161 */
4163 4162 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4164 4163
4165 4164 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4166 4165 /*
4167 4166 * Diagnostic reads expect address argument in ASI format.
4168 4167 */
4169 4168 get_icache_dtag(2 * (index + way * ic_set_size),
4170 4169 (uint64_t *)&tmp_icp);
4171 4170
4172 4171 /*
4173 4172 * If this is the index in which we expect to find the
4174 4173 * error log detailed information about each of the ways.
4175 4174 * This information will be displayed later if we can't
4176 4175 * determine the exact way in which the error is located.
4177 4176 */
4178 4177 if (flt_index == index)
4179 4178 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4180 4179
4181 4180 /*
4182 4181 * Check tag for even parity.
4183 4182 * Sum of 1 bits (including parity bit) should be even.
4184 4183 */
4185 4184 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4186 4185 /*
4187 4186 * If this way is the one in which we expected
4188 4187 * to find the error record the way and check the
4189 4188 * snoop tag. Otherwise just record the fact we
4190 4189 * found another error.
4191 4190 */
4192 4191 if (flt_index == index) {
4193 4192 ch_flt->parity_data.ipe.cpl_way = way;
4194 4193 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4195 4194
4196 4195 if (popc64(tmp_icp.ic_sntag &
4197 4196 CHP_ICSNTAG_PARMASK) & 1) {
4198 4197 ch_flt->parity_data.ipe.cpl_tag |=
4199 4198 CHP_IC_SNTAG;
4200 4199 ch_flt->parity_data.ipe.cpl_lcnt++;
4201 4200 }
4202 4201
4203 4202 }
4204 4203 ch_flt->parity_data.ipe.cpl_lcnt++;
4205 4204 continue;
4206 4205 }
4207 4206
4208 4207 /*
4209 4208 * Check instruction data for even parity.
4210 4209 * Bits participating in parity differ for PC-relative
4211 4210 * versus non-PC-relative instructions.
4212 4211 */
4213 4212 for (instr = 0; instr < num_instr; instr++) {
4214 4213 parmask = (tmp_icp.ic_data[instr] &
4215 4214 CH_ICDATA_PRED_ISPCREL) ?
4216 4215 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4217 4216 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4218 4217 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4219 4218 /*
4220 4219 * If this way is the one in which we expected
4221 4220 * to find the error record the way and offset.
4222 4221 * Otherwise just log the fact we found another
4223 4222 * error.
4224 4223 */
4225 4224 if (flt_index == index) {
4226 4225 ch_flt->parity_data.ipe.cpl_way = way;
4227 4226 ch_flt->parity_data.ipe.cpl_off =
4228 4227 instr * 4;
4229 4228 }
4230 4229 ch_flt->parity_data.ipe.cpl_lcnt++;
4231 4230 continue;
4232 4231 }
4233 4232 }
4234 4233 }
4235 4234 }
4236 4235
4237 4236 /*
4238 4237 * Record information related to the source of an Pcache Parity Error.
4239 4238 */
4240 4239 static void
4241 4240 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4242 4241 {
4243 4242 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4244 4243 int index;
4245 4244
4246 4245 /*
4247 4246 * Since instruction decode cannot be done at high PIL just
4248 4247 * examine the entire Pcache to check for any parity errors.
4249 4248 */
4250 4249 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4251 4250 ch_flt->parity_data.dpe.cpl_way = -1;
4252 4251 ch_flt->parity_data.dpe.cpl_off = -1;
4253 4252 }
4254 4253 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4255 4254 cpu_pcache_parity_check(ch_flt, index);
4256 4255 }
4257 4256
4258 4257 /*
4259 4258 * Check all ways of the Pcache at a specified index for good parity.
4260 4259 */
4261 4260 static void
4262 4261 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4263 4262 {
4264 4263 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4265 4264 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4266 4265 int way, word, pbit, parity_bits;
4267 4266 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4268 4267 ch_pc_data_t tmp_pcp;
4269 4268
4270 4269 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4271 4270 /*
4272 4271 * Perform diagnostic read.
4273 4272 */
4274 4273 get_pcache_dtag(index + way * pc_set_size,
4275 4274 (uint64_t *)&tmp_pcp);
4276 4275 /*
4277 4276 * Check data array for odd parity. There are 8 parity
4278 4277 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4279 4278 * of those bits covers exactly 8 bytes of the data
4280 4279 * array:
4281 4280 *
4282 4281 * parity bit P$ data bytes covered
4283 4282 * ---------- ---------------------
4284 4283 * 50 63:56
4285 4284 * 51 55:48
4286 4285 * 52 47:40
4287 4286 * 53 39:32
4288 4287 * 54 31:24
4289 4288 * 55 23:16
4290 4289 * 56 15:8
4291 4290 * 57 7:0
4292 4291 */
4293 4292 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4294 4293 for (word = 0; word < pc_data_words; word++) {
4295 4294 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4296 4295 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4297 4296 /*
4298 4297 * If this is the first error log detailed
4299 4298 * information about it. Otherwise just record
4300 4299 * the fact that we found another error.
4301 4300 */
4302 4301 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4303 4302 ch_flt->parity_data.dpe.cpl_way = way;
4304 4303 ch_flt->parity_data.dpe.cpl_cache =
4305 4304 CPU_PC_PARITY;
4306 4305 ch_flt->parity_data.dpe.cpl_off =
4307 4306 word * sizeof (uint64_t);
4308 4307 bcopy(&tmp_pcp, pcp,
4309 4308 sizeof (ch_pc_data_t));
4310 4309 }
4311 4310 ch_flt->parity_data.dpe.cpl_lcnt++;
4312 4311 }
4313 4312 }
4314 4313 }
4315 4314 }
4316 4315
4317 4316
4318 4317 /*
4319 4318 * Add L1 Data cache data to the ereport payload.
4320 4319 */
4321 4320 static void
4322 4321 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4323 4322 {
4324 4323 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4325 4324 ch_dc_data_t *dcp;
4326 4325 ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4327 4326 uint_t nelem;
4328 4327 int i, ways_to_check, ways_logged = 0;
4329 4328
4330 4329 /*
4331 4330 * If this is an D$ fault then there may be multiple
4332 4331 * ways captured in the ch_parity_log_t structure.
4333 4332 * Otherwise, there will be at most one way captured
4334 4333 * in the ch_diag_data_t struct.
4335 4334 * Check each way to see if it should be encoded.
4336 4335 */
4337 4336 if (ch_flt->flt_type == CPU_DC_PARITY)
4338 4337 ways_to_check = CH_DCACHE_NWAY;
4339 4338 else
4340 4339 ways_to_check = 1;
4341 4340 for (i = 0; i < ways_to_check; i++) {
4342 4341 if (ch_flt->flt_type == CPU_DC_PARITY)
4343 4342 dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4344 4343 else
4345 4344 dcp = &ch_flt->flt_diag_data.chd_dc_data;
4346 4345 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4347 4346 bcopy(dcp, &dcdata[ways_logged],
4348 4347 sizeof (ch_dc_data_t));
4349 4348 ways_logged++;
4350 4349 }
4351 4350 }
4352 4351
4353 4352 /*
4354 4353 * Add the dcache data to the payload.
4355 4354 */
4356 4355 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4357 4356 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4358 4357 if (ways_logged != 0) {
4359 4358 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4360 4359 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4361 4360 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4362 4361 }
4363 4362 }
4364 4363
4365 4364 /*
4366 4365 * Add L1 Instruction cache data to the ereport payload.
4367 4366 */
4368 4367 static void
4369 4368 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4370 4369 {
4371 4370 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4372 4371 ch_ic_data_t *icp;
4373 4372 ch_ic_data_t icdata[CH_ICACHE_NWAY];
4374 4373 uint_t nelem;
4375 4374 int i, ways_to_check, ways_logged = 0;
4376 4375
4377 4376 /*
4378 4377 * If this is an I$ fault then there may be multiple
4379 4378 * ways captured in the ch_parity_log_t structure.
4380 4379 * Otherwise, there will be at most one way captured
4381 4380 * in the ch_diag_data_t struct.
4382 4381 * Check each way to see if it should be encoded.
4383 4382 */
4384 4383 if (ch_flt->flt_type == CPU_IC_PARITY)
4385 4384 ways_to_check = CH_ICACHE_NWAY;
4386 4385 else
4387 4386 ways_to_check = 1;
4388 4387 for (i = 0; i < ways_to_check; i++) {
4389 4388 if (ch_flt->flt_type == CPU_IC_PARITY)
4390 4389 icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4391 4390 else
4392 4391 icp = &ch_flt->flt_diag_data.chd_ic_data;
4393 4392 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4394 4393 bcopy(icp, &icdata[ways_logged],
4395 4394 sizeof (ch_ic_data_t));
4396 4395 ways_logged++;
4397 4396 }
4398 4397 }
4399 4398
4400 4399 /*
4401 4400 * Add the icache data to the payload.
4402 4401 */
4403 4402 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4404 4403 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4405 4404 if (ways_logged != 0) {
4406 4405 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4407 4406 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4408 4407 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4409 4408 }
4410 4409 }
4411 4410
4412 4411 #endif /* CPU_IMP_L1_CACHE_PARITY */
4413 4412
4414 4413 /*
4415 4414 * Add ecache data to payload.
4416 4415 */
4417 4416 static void
4418 4417 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4419 4418 {
4420 4419 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4421 4420 ch_ec_data_t *ecp;
4422 4421 ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4423 4422 uint_t nelem;
4424 4423 int i, ways_logged = 0;
4425 4424
4426 4425 /*
4427 4426 * Check each way to see if it should be encoded
4428 4427 * and concatinate it into a temporary buffer.
4429 4428 */
4430 4429 for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4431 4430 ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4432 4431 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4433 4432 bcopy(ecp, &ecdata[ways_logged],
4434 4433 sizeof (ch_ec_data_t));
4435 4434 ways_logged++;
4436 4435 }
4437 4436 }
4438 4437
4439 4438 /*
4440 4439 * Panther CPUs have an additional level of cache and so
4441 4440 * what we just collected was the L3 (ecache) and not the
4442 4441 * L2 cache.
4443 4442 */
4444 4443 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4445 4444 /*
4446 4445 * Add the L3 (ecache) data to the payload.
4447 4446 */
4448 4447 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4449 4448 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4450 4449 if (ways_logged != 0) {
4451 4450 nelem = sizeof (ch_ec_data_t) /
4452 4451 sizeof (uint64_t) * ways_logged;
4453 4452 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4454 4453 DATA_TYPE_UINT64_ARRAY, nelem,
4455 4454 (uint64_t *)ecdata, NULL);
4456 4455 }
4457 4456
4458 4457 /*
4459 4458 * Now collect the L2 cache.
4460 4459 */
4461 4460 ways_logged = 0;
4462 4461 for (i = 0; i < PN_L2_NWAYS; i++) {
4463 4462 ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4464 4463 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4465 4464 bcopy(ecp, &ecdata[ways_logged],
4466 4465 sizeof (ch_ec_data_t));
4467 4466 ways_logged++;
4468 4467 }
4469 4468 }
4470 4469 }
4471 4470
4472 4471 /*
4473 4472 * Add the L2 cache data to the payload.
4474 4473 */
4475 4474 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4476 4475 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4477 4476 if (ways_logged != 0) {
4478 4477 nelem = sizeof (ch_ec_data_t) /
4479 4478 sizeof (uint64_t) * ways_logged;
4480 4479 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4481 4480 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL);
4482 4481 }
4483 4482 }
4484 4483
4485 4484 /*
4486 4485 * Initialize cpu scheme for specified cpu.
4487 4486 */
4488 4487 static void
4489 4488 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4490 4489 {
4491 4490 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4492 4491 uint8_t mask;
4493 4492
4494 4493 mask = cpunodes[cpuid].version;
4495 4494 (void) snprintf(sbuf, sizeof (sbuf), "%llX",
4496 4495 (u_longlong_t)cpunodes[cpuid].device_id);
4497 4496 (void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4498 4497 cpuid, &mask, (const char *)sbuf);
4499 4498 }
4500 4499
4501 4500 /*
4502 4501 * Returns ereport resource type.
4503 4502 */
4504 4503 static int
4505 4504 cpu_error_to_resource_type(struct async_flt *aflt)
4506 4505 {
4507 4506 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4508 4507
4509 4508 switch (ch_flt->flt_type) {
4510 4509
4511 4510 case CPU_CE_ECACHE:
4512 4511 case CPU_UE_ECACHE:
4513 4512 case CPU_UE_ECACHE_RETIRE:
4514 4513 case CPU_ORPH:
4515 4514 /*
4516 4515 * If AFSR error bit indicates L2$ Data for Cheetah,
4517 4516 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4518 4517 * E$ Data type, otherwise, return CPU type.
4519 4518 */
4520 4519 if (cpu_error_is_ecache_data(aflt->flt_inst,
4521 4520 ch_flt->flt_bit))
4522 4521 return (ERRTYPE_ECACHE_DATA);
4523 4522 return (ERRTYPE_CPU);
4524 4523
4525 4524 case CPU_CE:
4526 4525 case CPU_UE:
4527 4526 case CPU_EMC:
4528 4527 case CPU_DUE:
4529 4528 case CPU_RCE:
4530 4529 case CPU_RUE:
4531 4530 case CPU_FRC:
4532 4531 case CPU_FRU:
4533 4532 return (ERRTYPE_MEMORY);
4534 4533
4535 4534 case CPU_IC_PARITY:
4536 4535 case CPU_DC_PARITY:
4537 4536 case CPU_FPUERR:
4538 4537 case CPU_PC_PARITY:
4539 4538 case CPU_ITLB_PARITY:
4540 4539 case CPU_DTLB_PARITY:
4541 4540 return (ERRTYPE_CPU);
4542 4541 }
4543 4542 return (ERRTYPE_UNKNOWN);
4544 4543 }
4545 4544
4546 4545 /*
4547 4546 * Encode the data saved in the ch_async_flt_t struct into
4548 4547 * the FM ereport payload.
4549 4548 */
4550 4549 static void
4551 4550 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4552 4551 nvlist_t *resource, int *afar_status, int *synd_status)
4553 4552 {
4554 4553 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4555 4554 *synd_status = AFLT_STAT_INVALID;
4556 4555 *afar_status = AFLT_STAT_INVALID;
4557 4556
4558 4557 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4559 4558 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4560 4559 DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4561 4560 }
4562 4561
4563 4562 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4564 4563 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4565 4564 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4566 4565 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4567 4566 }
4568 4567
4569 4568 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4570 4569 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4571 4570 ch_flt->flt_bit);
4572 4571 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4573 4572 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4574 4573 }
4575 4574
4576 4575 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4577 4576 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4578 4577 DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4579 4578 }
4580 4579
4581 4580 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4582 4581 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4583 4582 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4584 4583 }
4585 4584
4586 4585 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4587 4586 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4588 4587 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4589 4588 }
4590 4589
4591 4590 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4592 4591 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4593 4592 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4594 4593 }
4595 4594
4596 4595 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4597 4596 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4598 4597 DATA_TYPE_BOOLEAN_VALUE,
4599 4598 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4600 4599 }
4601 4600
4602 4601 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4603 4602 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4604 4603 DATA_TYPE_BOOLEAN_VALUE,
4605 4604 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4606 4605 }
4607 4606
4608 4607 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4609 4608 *synd_status = afsr_to_synd_status(aflt->flt_inst,
4610 4609 ch_flt->afsr_errs, ch_flt->flt_bit);
4611 4610 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4612 4611 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4613 4612 }
4614 4613
4615 4614 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4616 4615 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4617 4616 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4618 4617 }
4619 4618
4620 4619 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4621 4620 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4622 4621 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4623 4622 }
4624 4623
4625 4624 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4626 4625 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4627 4626 DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4628 4627 }
4629 4628
4630 4629 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4631 4630 cpu_payload_add_ecache(aflt, payload);
4632 4631
4633 4632 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4634 4633 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4635 4634 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4636 4635 }
4637 4636
4638 4637 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4639 4638 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4640 4639 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4641 4640 }
4642 4641
4643 4642 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4644 4643 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4645 4644 DATA_TYPE_UINT32_ARRAY, 16,
4646 4645 (uint32_t *)&ch_flt->flt_fpdata, NULL);
4647 4646 }
4648 4647
4649 4648 #if defined(CPU_IMP_L1_CACHE_PARITY)
4650 4649 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4651 4650 cpu_payload_add_dcache(aflt, payload);
4652 4651 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4653 4652 cpu_payload_add_icache(aflt, payload);
4654 4653 #endif /* CPU_IMP_L1_CACHE_PARITY */
4655 4654
4656 4655 #if defined(CHEETAH_PLUS)
4657 4656 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4658 4657 cpu_payload_add_pcache(aflt, payload);
4659 4658 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4660 4659 cpu_payload_add_tlb(aflt, payload);
4661 4660 #endif /* CHEETAH_PLUS */
4662 4661 /*
4663 4662 * Create the FMRI that goes into the payload
4664 4663 * and contains the unum info if necessary.
4665 4664 */
4666 4665 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4667 4666 char unum[UNUM_NAMLEN] = "";
4668 4667 char sid[DIMM_SERIAL_ID_LEN] = "";
4669 4668 int len, ret, rtype, synd_code;
4670 4669 uint64_t offset = (uint64_t)-1;
4671 4670
4672 4671 rtype = cpu_error_to_resource_type(aflt);
4673 4672 switch (rtype) {
4674 4673
4675 4674 case ERRTYPE_MEMORY:
4676 4675 case ERRTYPE_ECACHE_DATA:
4677 4676
4678 4677 /*
4679 4678 * Memory errors, do unum lookup
4680 4679 */
4681 4680 if (*afar_status == AFLT_STAT_INVALID)
4682 4681 break;
4683 4682
4684 4683 if (rtype == ERRTYPE_ECACHE_DATA)
4685 4684 aflt->flt_status |= ECC_ECACHE;
4686 4685 else
4687 4686 aflt->flt_status &= ~ECC_ECACHE;
4688 4687
4689 4688 synd_code = synd_to_synd_code(*synd_status,
4690 4689 aflt->flt_synd, ch_flt->flt_bit);
4691 4690
4692 4691 if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4693 4692 break;
4694 4693
4695 4694 ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4696 4695 &len);
4697 4696
4698 4697 if (ret == 0) {
4699 4698 (void) cpu_get_mem_offset(aflt->flt_addr,
4700 4699 &offset);
4701 4700 }
4702 4701
4703 4702 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4704 4703 NULL, unum, (ret == 0) ? sid : NULL, offset);
4705 4704 fm_payload_set(payload,
4706 4705 FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4707 4706 DATA_TYPE_NVLIST, resource, NULL);
4708 4707 break;
4709 4708
4710 4709 case ERRTYPE_CPU:
4711 4710 /*
4712 4711 * On-board processor array error, add cpu resource.
4713 4712 */
4714 4713 cpu_fmri_cpu_set(resource, aflt->flt_inst);
4715 4714 fm_payload_set(payload,
4716 4715 FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4717 4716 DATA_TYPE_NVLIST, resource, NULL);
4718 4717 break;
4719 4718 }
4720 4719 }
4721 4720 }
4722 4721
4723 4722 /*
4724 4723 * Initialize the way info if necessary.
4725 4724 */
4726 4725 void
4727 4726 cpu_ereport_init(struct async_flt *aflt)
4728 4727 {
4729 4728 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4730 4729 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4731 4730 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4732 4731 int i;
4733 4732
4734 4733 /*
4735 4734 * Initialize the info in the CPU logout structure.
4736 4735 * The I$/D$ way information is not initialized here
4737 4736 * since it is captured in the logout assembly code.
4738 4737 */
4739 4738 for (i = 0; i < CHD_EC_DATA_SETS; i++)
4740 4739 (ecp + i)->ec_way = i;
4741 4740
4742 4741 for (i = 0; i < PN_L2_NWAYS; i++)
4743 4742 (l2p + i)->ec_way = i;
4744 4743 }
4745 4744
4746 4745 /*
4747 4746 * Returns whether fault address is valid for this error bit and
4748 4747 * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4749 4748 */
4750 4749 int
4751 4750 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4752 4751 {
4753 4752 struct async_flt *aflt = (struct async_flt *)ch_flt;
4754 4753
4755 4754 return ((t_afsr_bit & C_AFSR_MEMORY) &&
4756 4755 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4757 4756 AFLT_STAT_VALID &&
4758 4757 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4759 4758 }
4760 4759
4761 4760 /*
4762 4761 * Returns whether fault address is valid based on the error bit for the
4763 4762 * one event being queued and whether the address is "in memory".
4764 4763 */
4765 4764 static int
4766 4765 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4767 4766 {
4768 4767 struct async_flt *aflt = (struct async_flt *)ch_flt;
4769 4768 int afar_status;
4770 4769 uint64_t afsr_errs, afsr_ow, *ow_bits;
4771 4770
4772 4771 if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4773 4772 !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4774 4773 return (0);
4775 4774
4776 4775 afsr_errs = ch_flt->afsr_errs;
4777 4776 afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4778 4777
4779 4778 switch (afar_status) {
4780 4779 case AFLT_STAT_VALID:
4781 4780 return (1);
4782 4781
4783 4782 case AFLT_STAT_AMBIGUOUS:
4784 4783 /*
4785 4784 * Status is ambiguous since another error bit (or bits)
4786 4785 * of equal priority to the specified bit on in the afsr,
4787 4786 * so check those bits. Return 1 only if the bits on in the
4788 4787 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4789 4788 * Otherwise not all the equal priority bits are for memory
4790 4789 * errors, so return 0.
4791 4790 */
4792 4791 ow_bits = afar_overwrite;
4793 4792 while ((afsr_ow = *ow_bits++) != 0) {
4794 4793 /*
4795 4794 * Get other bits that are on in t_afsr_bit's priority
4796 4795 * class to check for Memory Error bits only.
4797 4796 */
4798 4797 if (afsr_ow & t_afsr_bit) {
4799 4798 if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4800 4799 return (0);
4801 4800 else
4802 4801 return (1);
4803 4802 }
4804 4803 }
4805 4804 /*FALLTHRU*/
4806 4805
4807 4806 default:
4808 4807 return (0);
4809 4808 }
4810 4809 }
4811 4810
4812 4811 static void
4813 4812 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4814 4813 {
4815 4814 struct async_flt *aflt = (struct async_flt *)ch_flt;
4816 4815 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4817 4816 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4818 4817 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4819 4818 #if defined(CPU_IMP_ECACHE_ASSOC)
4820 4819 int i, nway;
4821 4820 #endif /* CPU_IMP_ECACHE_ASSOC */
4822 4821
4823 4822 /*
4824 4823 * Check if the CPU log out captured was valid.
4825 4824 */
4826 4825 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4827 4826 ch_flt->flt_data_incomplete)
4828 4827 return;
4829 4828
4830 4829 #if defined(CPU_IMP_ECACHE_ASSOC)
4831 4830 nway = cpu_ecache_nway();
4832 4831 i = cpu_ecache_line_valid(ch_flt);
4833 4832 if (i == 0 || i > nway) {
4834 4833 for (i = 0; i < nway; i++)
4835 4834 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4836 4835 } else
4837 4836 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4838 4837 #else /* CPU_IMP_ECACHE_ASSOC */
4839 4838 ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4840 4839 #endif /* CPU_IMP_ECACHE_ASSOC */
4841 4840
4842 4841 #if defined(CHEETAH_PLUS)
4843 4842 pn_cpu_log_diag_l2_info(ch_flt);
4844 4843 #endif /* CHEETAH_PLUS */
4845 4844
4846 4845 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4847 4846 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4848 4847 dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4849 4848 }
4850 4849
4851 4850 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4852 4851 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4853 4852 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4854 4853 else
4855 4854 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4856 4855 icp->ic_logflag = IC_LOGFLAG_MAGIC;
4857 4856 }
4858 4857 }
4859 4858
4860 4859 /*
4861 4860 * Cheetah ECC calculation.
4862 4861 *
4863 4862 * We only need to do the calculation on the data bits and can ignore check
4864 4863 * bit and Mtag bit terms in the calculation.
4865 4864 */
4866 4865 static uint64_t ch_ecc_table[9][2] = {
4867 4866 /*
4868 4867 * low order 64-bits high-order 64-bits
4869 4868 */
4870 4869 { 0x46bffffeccd1177f, 0x488800022100014c },
4871 4870 { 0x42fccc81331ff77f, 0x14424f1010249184 },
4872 4871 { 0x8898827c222f1ffe, 0x22c1222808184aaf },
4873 4872 { 0xf7632203e131ccf1, 0xe1241121848292b8 },
4874 4873 { 0x7f5511421b113809, 0x901c88d84288aafe },
4875 4874 { 0x1d49412184882487, 0x8f338c87c044c6ef },
4876 4875 { 0xf552181014448344, 0x7ff8f4443e411911 },
4877 4876 { 0x2189240808f24228, 0xfeeff8cc81333f42 },
4878 4877 { 0x3280008440001112, 0xfee88b337ffffd62 },
4879 4878 };
4880 4879
4881 4880 /*
4882 4881 * 64-bit population count, use well-known popcnt trick.
4883 4882 * We could use the UltraSPARC V9 POPC instruction, but some
4884 4883 * CPUs including Cheetahplus and Jaguar do not support that
4885 4884 * instruction.
4886 4885 */
4887 4886 int
4888 4887 popc64(uint64_t val)
4889 4888 {
4890 4889 int cnt;
4891 4890
4892 4891 for (cnt = 0; val != 0; val &= val - 1)
4893 4892 cnt++;
4894 4893 return (cnt);
4895 4894 }
4896 4895
4897 4896 /*
4898 4897 * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4899 4898 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4900 4899 * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4901 4900 * instead of doing all the xor's.
4902 4901 */
4903 4902 uint32_t
4904 4903 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4905 4904 {
4906 4905 int bitno, s;
4907 4906 int synd = 0;
4908 4907
4909 4908 for (bitno = 0; bitno < 9; bitno++) {
4910 4909 s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4911 4910 popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4912 4911 synd |= (s << bitno);
4913 4912 }
4914 4913 return (synd);
4915 4914
4916 4915 }
4917 4916
4918 4917 /*
4919 4918 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1
4920 4919 * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4921 4920 * the UE event queue. Otherwise it is dispatched to the CE event queue.
4922 4921 */
4923 4922 static void
4924 4923 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4925 4924 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4926 4925 {
4927 4926 struct async_flt *aflt = (struct async_flt *)ch_flt;
4928 4927
4929 4928 if (reason &&
4930 4929 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4931 4930 (void) strcat(reason, eccp->ec_reason);
4932 4931 }
4933 4932
4934 4933 ch_flt->flt_bit = eccp->ec_afsr_bit;
4935 4934 ch_flt->flt_type = eccp->ec_flt_type;
4936 4935 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4937 4936 ch_flt->flt_diag_data = *cdp;
4938 4937 else
4939 4938 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4940 4939 aflt->flt_in_memory =
4941 4940 cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4942 4941
4943 4942 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4944 4943 aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4945 4944 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4946 4945 aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4947 4946 else
4948 4947 aflt->flt_synd = 0;
4949 4948
4950 4949 aflt->flt_payload = eccp->ec_err_payload;
4951 4950
4952 4951 if (aflt->flt_panic || (eccp->ec_afsr_bit &
4953 4952 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4954 4953 cpu_errorq_dispatch(eccp->ec_err_class,
4955 4954 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4956 4955 aflt->flt_panic);
4957 4956 else
4958 4957 cpu_errorq_dispatch(eccp->ec_err_class,
4959 4958 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4960 4959 aflt->flt_panic);
4961 4960 }
4962 4961
4963 4962 /*
4964 4963 * Queue events on async event queue one event per error bit. First we
4965 4964 * queue the events that we "expect" for the given trap, then we queue events
4966 4965 * that we may not expect. Return number of events queued.
4967 4966 */
4968 4967 int
4969 4968 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4970 4969 ch_cpu_logout_t *clop)
4971 4970 {
4972 4971 struct async_flt *aflt = (struct async_flt *)ch_flt;
4973 4972 ecc_type_to_info_t *eccp;
4974 4973 int nevents = 0;
4975 4974 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4976 4975 #if defined(CHEETAH_PLUS)
4977 4976 uint64_t orig_t_afsr_errs;
4978 4977 #endif
4979 4978 uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4980 4979 uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4981 4980 ch_diag_data_t *cdp = NULL;
4982 4981
4983 4982 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4984 4983
4985 4984 #if defined(CHEETAH_PLUS)
4986 4985 orig_t_afsr_errs = t_afsr_errs;
4987 4986
4988 4987 /*
4989 4988 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4990 4989 */
4991 4990 if (clop != NULL) {
4992 4991 /*
4993 4992 * Set the AFSR and AFAR fields to the shadow registers. The
4994 4993 * flt_addr and flt_stat fields will be reset to the primaries
4995 4994 * below, but the sdw_addr and sdw_stat will stay as the
4996 4995 * secondaries.
4997 4996 */
4998 4997 cdp = &clop->clo_sdw_data;
4999 4998 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
5000 4999 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
5001 5000 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
5002 5001 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5003 5002 (cdp->chd_afsr & C_AFSR_ALL_ERRS);
5004 5003
5005 5004 /*
5006 5005 * If the primary and shadow AFSR differ, tag the shadow as
5007 5006 * the first fault.
5008 5007 */
5009 5008 if ((primary_afar != cdp->chd_afar) ||
5010 5009 (primary_afsr_errs != ch_flt->afsr_errs)) {
5011 5010 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
5012 5011 }
5013 5012
5014 5013 /*
5015 5014 * Check AFSR bits as well as AFSR_EXT bits in order of
5016 5015 * the AFAR overwrite priority. Our stored AFSR_EXT value
5017 5016 * is expected to be zero for those CPUs which do not have
5018 5017 * an AFSR_EXT register.
5019 5018 */
5020 5019 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
5021 5020 if ((eccp->ec_afsr_bit &
5022 5021 (ch_flt->afsr_errs & t_afsr_errs)) &&
5023 5022 ((eccp->ec_flags & aflt->flt_status) != 0)) {
5024 5023 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5025 5024 cdp = NULL;
5026 5025 t_afsr_errs &= ~eccp->ec_afsr_bit;
5027 5026 nevents++;
5028 5027 }
5029 5028 }
5030 5029
5031 5030 /*
5032 5031 * If the ME bit is on in the primary AFSR turn all the
5033 5032 * error bits on again that may set the ME bit to make
5034 5033 * sure we see the ME AFSR error logs.
5035 5034 */
5036 5035 if ((primary_afsr & C_AFSR_ME) != 0)
5037 5036 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
5038 5037 }
5039 5038 #endif /* CHEETAH_PLUS */
5040 5039
5041 5040 if (clop != NULL)
5042 5041 cdp = &clop->clo_data;
5043 5042
5044 5043 /*
5045 5044 * Queue expected errors, error bit and fault type must match
5046 5045 * in the ecc_type_to_info table.
5047 5046 */
5048 5047 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5049 5048 eccp++) {
5050 5049 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
5051 5050 (eccp->ec_flags & aflt->flt_status) != 0) {
5052 5051 #if defined(SERRANO)
5053 5052 /*
5054 5053 * For FRC/FRU errors on Serrano the afar2 captures
5055 5054 * the address and the associated data is
5056 5055 * in the shadow logout area.
5057 5056 */
5058 5057 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
5059 5058 if (clop != NULL)
5060 5059 cdp = &clop->clo_sdw_data;
5061 5060 aflt->flt_addr = ch_flt->afar2;
5062 5061 } else {
5063 5062 if (clop != NULL)
5064 5063 cdp = &clop->clo_data;
5065 5064 aflt->flt_addr = primary_afar;
5066 5065 }
5067 5066 #else /* SERRANO */
5068 5067 aflt->flt_addr = primary_afar;
5069 5068 #endif /* SERRANO */
5070 5069 aflt->flt_stat = primary_afsr;
5071 5070 ch_flt->afsr_ext = primary_afsr_ext;
5072 5071 ch_flt->afsr_errs = primary_afsr_errs;
5073 5072 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5074 5073 cdp = NULL;
5075 5074 t_afsr_errs &= ~eccp->ec_afsr_bit;
5076 5075 nevents++;
5077 5076 }
5078 5077 }
5079 5078
5080 5079 /*
5081 5080 * Queue unexpected errors, error bit only match.
5082 5081 */
5083 5082 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5084 5083 eccp++) {
5085 5084 if (eccp->ec_afsr_bit & t_afsr_errs) {
5086 5085 #if defined(SERRANO)
5087 5086 /*
5088 5087 * For FRC/FRU errors on Serrano the afar2 captures
5089 5088 * the address and the associated data is
5090 5089 * in the shadow logout area.
5091 5090 */
5092 5091 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
5093 5092 if (clop != NULL)
5094 5093 cdp = &clop->clo_sdw_data;
5095 5094 aflt->flt_addr = ch_flt->afar2;
5096 5095 } else {
5097 5096 if (clop != NULL)
5098 5097 cdp = &clop->clo_data;
5099 5098 aflt->flt_addr = primary_afar;
5100 5099 }
5101 5100 #else /* SERRANO */
5102 5101 aflt->flt_addr = primary_afar;
5103 5102 #endif /* SERRANO */
5104 5103 aflt->flt_stat = primary_afsr;
5105 5104 ch_flt->afsr_ext = primary_afsr_ext;
5106 5105 ch_flt->afsr_errs = primary_afsr_errs;
5107 5106 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5108 5107 cdp = NULL;
5109 5108 t_afsr_errs &= ~eccp->ec_afsr_bit;
5110 5109 nevents++;
5111 5110 }
5112 5111 }
5113 5112 return (nevents);
5114 5113 }
5115 5114
5116 5115 /*
5117 5116 * Return trap type number.
5118 5117 */
5119 5118 uint8_t
5120 5119 flt_to_trap_type(struct async_flt *aflt)
5121 5120 {
5122 5121 if (aflt->flt_status & ECC_I_TRAP)
5123 5122 return (TRAP_TYPE_ECC_I);
5124 5123 if (aflt->flt_status & ECC_D_TRAP)
5125 5124 return (TRAP_TYPE_ECC_D);
5126 5125 if (aflt->flt_status & ECC_F_TRAP)
5127 5126 return (TRAP_TYPE_ECC_F);
5128 5127 if (aflt->flt_status & ECC_C_TRAP)
5129 5128 return (TRAP_TYPE_ECC_C);
5130 5129 if (aflt->flt_status & ECC_DP_TRAP)
5131 5130 return (TRAP_TYPE_ECC_DP);
5132 5131 if (aflt->flt_status & ECC_IP_TRAP)
5133 5132 return (TRAP_TYPE_ECC_IP);
5134 5133 if (aflt->flt_status & ECC_ITLB_TRAP)
5135 5134 return (TRAP_TYPE_ECC_ITLB);
5136 5135 if (aflt->flt_status & ECC_DTLB_TRAP)
5137 5136 return (TRAP_TYPE_ECC_DTLB);
5138 5137 return (TRAP_TYPE_UNKNOWN);
5139 5138 }
5140 5139
5141 5140 /*
5142 5141 * Decide an error type based on detector and leaky/partner tests.
5143 5142 * The following array is used for quick translation - it must
5144 5143 * stay in sync with ce_dispact_t.
5145 5144 */
5146 5145
5147 5146 static char *cetypes[] = {
5148 5147 CE_DISP_DESC_U,
5149 5148 CE_DISP_DESC_I,
5150 5149 CE_DISP_DESC_PP,
5151 5150 CE_DISP_DESC_P,
5152 5151 CE_DISP_DESC_L,
5153 5152 CE_DISP_DESC_PS,
5154 5153 CE_DISP_DESC_S
5155 5154 };
5156 5155
5157 5156 char *
5158 5157 flt_to_error_type(struct async_flt *aflt)
5159 5158 {
5160 5159 ce_dispact_t dispact, disp;
5161 5160 uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5162 5161
5163 5162 /*
5164 5163 * The memory payload bundle is shared by some events that do
5165 5164 * not perform any classification. For those flt_disp will be
5166 5165 * 0 and we will return "unknown".
5167 5166 */
5168 5167 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5169 5168 return (cetypes[CE_DISP_UNKNOWN]);
5170 5169
5171 5170 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5172 5171
5173 5172 /*
5174 5173 * It is also possible that no scrub/classification was performed
5175 5174 * by the detector, for instance where a disrupting error logged
5176 5175 * in the AFSR while CEEN was off in cpu_deferred_error.
5177 5176 */
5178 5177 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5179 5178 return (cetypes[CE_DISP_UNKNOWN]);
5180 5179
5181 5180 /*
5182 5181 * Lookup type in initial classification/action table
5183 5182 */
5184 5183 dispact = CE_DISPACT(ce_disp_table,
5185 5184 CE_XDIAG_AFARMATCHED(dtcrinfo),
5186 5185 CE_XDIAG_STATE(dtcrinfo),
5187 5186 CE_XDIAG_CE1SEEN(dtcrinfo),
5188 5187 CE_XDIAG_CE2SEEN(dtcrinfo));
5189 5188
5190 5189 /*
5191 5190 * A bad lookup is not something to panic production systems for.
5192 5191 */
5193 5192 ASSERT(dispact != CE_DISP_BAD);
5194 5193 if (dispact == CE_DISP_BAD)
5195 5194 return (cetypes[CE_DISP_UNKNOWN]);
5196 5195
5197 5196 disp = CE_DISP(dispact);
5198 5197
5199 5198 switch (disp) {
5200 5199 case CE_DISP_UNKNOWN:
5201 5200 case CE_DISP_INTERMITTENT:
5202 5201 break;
5203 5202
5204 5203 case CE_DISP_POSS_PERS:
5205 5204 /*
5206 5205 * "Possible persistent" errors to which we have applied a valid
5207 5206 * leaky test can be separated into "persistent" or "leaky".
5208 5207 */
5209 5208 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5210 5209 if (CE_XDIAG_TESTVALID(lkyinfo)) {
5211 5210 if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5212 5211 CE_XDIAG_CE2SEEN(lkyinfo))
5213 5212 disp = CE_DISP_LEAKY;
5214 5213 else
5215 5214 disp = CE_DISP_PERS;
5216 5215 }
5217 5216 break;
5218 5217
5219 5218 case CE_DISP_POSS_STICKY:
5220 5219 /*
5221 5220 * Promote "possible sticky" results that have been
5222 5221 * confirmed by a partner test to "sticky". Unconfirmed
5223 5222 * "possible sticky" events are left at that status - we do not
5224 5223 * guess at any bad reader/writer etc status here.
5225 5224 */
5226 5225 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5227 5226 if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5228 5227 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5229 5228 disp = CE_DISP_STICKY;
5230 5229
5231 5230 /*
5232 5231 * Promote "possible sticky" results on a uniprocessor
5233 5232 * to "sticky"
5234 5233 */
5235 5234 if (disp == CE_DISP_POSS_STICKY &&
5236 5235 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5237 5236 disp = CE_DISP_STICKY;
5238 5237 break;
5239 5238
5240 5239 default:
5241 5240 disp = CE_DISP_UNKNOWN;
5242 5241 break;
5243 5242 }
5244 5243
5245 5244 return (cetypes[disp]);
5246 5245 }
5247 5246
5248 5247 /*
5249 5248 * Given the entire afsr, the specific bit to check and a prioritized list of
5250 5249 * error bits, determine the validity of the various overwrite priority
5251 5250 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5252 5251 * different overwrite priorities.
5253 5252 *
5254 5253 * Given a specific afsr error bit and the entire afsr, there are three cases:
5255 5254 * INVALID: The specified bit is lower overwrite priority than some other
5256 5255 * error bit which is on in the afsr (or IVU/IVC).
5257 5256 * VALID: The specified bit is higher priority than all other error bits
5258 5257 * which are on in the afsr.
5259 5258 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5260 5259 * bit is on in the afsr.
5261 5260 */
5262 5261 int
5263 5262 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5264 5263 {
5265 5264 uint64_t afsr_ow;
5266 5265
5267 5266 while ((afsr_ow = *ow_bits++) != 0) {
5268 5267 /*
5269 5268 * If bit is in the priority class, check to see if another
5270 5269 * bit in the same class is on => ambiguous. Otherwise,
5271 5270 * the value is valid. If the bit is not on at this priority
5272 5271 * class, but a higher priority bit is on, then the value is
5273 5272 * invalid.
5274 5273 */
5275 5274 if (afsr_ow & afsr_bit) {
5276 5275 /*
5277 5276 * If equal pri bit is on, ambiguous.
5278 5277 */
5279 5278 if (afsr & (afsr_ow & ~afsr_bit))
5280 5279 return (AFLT_STAT_AMBIGUOUS);
5281 5280 return (AFLT_STAT_VALID);
5282 5281 } else if (afsr & afsr_ow)
5283 5282 break;
5284 5283 }
5285 5284
5286 5285 /*
5287 5286 * We didn't find a match or a higher priority bit was on. Not
5288 5287 * finding a match handles the case of invalid AFAR for IVC, IVU.
5289 5288 */
5290 5289 return (AFLT_STAT_INVALID);
5291 5290 }
5292 5291
5293 5292 static int
5294 5293 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5295 5294 {
5296 5295 #if defined(SERRANO)
5297 5296 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5298 5297 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5299 5298 else
5300 5299 #endif /* SERRANO */
5301 5300 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5302 5301 }
5303 5302
5304 5303 static int
5305 5304 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5306 5305 {
5307 5306 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5308 5307 }
5309 5308
5310 5309 static int
5311 5310 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5312 5311 {
5313 5312 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5314 5313 }
5315 5314
5316 5315 static int
5317 5316 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5318 5317 {
5319 5318 #ifdef lint
5320 5319 cpuid = cpuid;
5321 5320 #endif
5322 5321 #if defined(CHEETAH_PLUS)
5323 5322 /*
5324 5323 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5325 5324 * policy for Cheetah+ and separate for Panther CPUs.
5326 5325 */
5327 5326 if (afsr_bit & C_AFSR_MSYND_ERRS) {
5328 5327 if (IS_PANTHER(cpunodes[cpuid].implementation))
5329 5328 return (afsr_to_msynd_status(afsr, afsr_bit));
5330 5329 else
5331 5330 return (afsr_to_esynd_status(afsr, afsr_bit));
5332 5331 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5333 5332 if (IS_PANTHER(cpunodes[cpuid].implementation))
5334 5333 return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5335 5334 else
5336 5335 return (afsr_to_esynd_status(afsr, afsr_bit));
5337 5336 #else /* CHEETAH_PLUS */
5338 5337 if (afsr_bit & C_AFSR_MSYND_ERRS) {
5339 5338 return (afsr_to_msynd_status(afsr, afsr_bit));
5340 5339 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5341 5340 return (afsr_to_esynd_status(afsr, afsr_bit));
5342 5341 #endif /* CHEETAH_PLUS */
5343 5342 } else {
5344 5343 return (AFLT_STAT_INVALID);
5345 5344 }
5346 5345 }
5347 5346
5348 5347 /*
5349 5348 * Slave CPU stick synchronization.
5350 5349 */
5351 5350 void
5352 5351 sticksync_slave(void)
5353 5352 {
5354 5353 int i;
5355 5354 int tries = 0;
5356 5355 int64_t tskew;
5357 5356 int64_t av_tskew;
5358 5357
5359 5358 kpreempt_disable();
5360 5359 /* wait for the master side */
5361 5360 while (stick_sync_cmd != SLAVE_START)
5362 5361 ;
5363 5362 /*
5364 5363 * Synchronization should only take a few tries at most. But in the
5365 5364 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5366 5365 * without it's stick synchronized wouldn't be a good citizen.
5367 5366 */
5368 5367 while (slave_done == 0) {
5369 5368 /*
5370 5369 * Time skew calculation.
5371 5370 */
5372 5371 av_tskew = tskew = 0;
5373 5372
5374 5373 for (i = 0; i < stick_iter; i++) {
5375 5374 /* make location hot */
5376 5375 timestamp[EV_A_START] = 0;
5377 5376 stick_timestamp(×tamp[EV_A_START]);
5378 5377
5379 5378 /* tell the master we're ready */
5380 5379 stick_sync_cmd = MASTER_START;
5381 5380
5382 5381 /* and wait */
5383 5382 while (stick_sync_cmd != SLAVE_CONT)
5384 5383 ;
5385 5384 /* Event B end */
5386 5385 stick_timestamp(×tamp[EV_B_END]);
5387 5386
5388 5387 /* calculate time skew */
5389 5388 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5390 5389 - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5391 5390 / 2;
5392 5391
5393 5392 /* keep running count */
5394 5393 av_tskew += tskew;
5395 5394 } /* for */
5396 5395
5397 5396 /*
5398 5397 * Adjust stick for time skew if not within the max allowed;
5399 5398 * otherwise we're all done.
5400 5399 */
5401 5400 if (stick_iter != 0)
5402 5401 av_tskew = av_tskew/stick_iter;
5403 5402 if (ABS(av_tskew) > stick_tsk) {
5404 5403 /*
5405 5404 * If the skew is 1 (the slave's STICK register
5406 5405 * is 1 STICK ahead of the master's), stick_adj
5407 5406 * could fail to adjust the slave's STICK register
5408 5407 * if the STICK read on the slave happens to
5409 5408 * align with the increment of the STICK.
5410 5409 * Therefore, we increment the skew to 2.
5411 5410 */
5412 5411 if (av_tskew == 1)
5413 5412 av_tskew++;
5414 5413 stick_adj(-av_tskew);
5415 5414 } else
5416 5415 slave_done = 1;
5417 5416 #ifdef DEBUG
5418 5417 if (tries < DSYNC_ATTEMPTS)
5419 5418 stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5420 5419 av_tskew;
5421 5420 ++tries;
5422 5421 #endif /* DEBUG */
5423 5422 #ifdef lint
5424 5423 tries = tries;
5425 5424 #endif
5426 5425
5427 5426 } /* while */
5428 5427
5429 5428 /* allow the master to finish */
5430 5429 stick_sync_cmd = EVENT_NULL;
5431 5430 kpreempt_enable();
5432 5431 }
5433 5432
5434 5433 /*
5435 5434 * Master CPU side of stick synchronization.
5436 5435 * - timestamp end of Event A
5437 5436 * - timestamp beginning of Event B
5438 5437 */
5439 5438 void
5440 5439 sticksync_master(void)
5441 5440 {
5442 5441 int i;
5443 5442
5444 5443 kpreempt_disable();
5445 5444 /* tell the slave we've started */
5446 5445 slave_done = 0;
5447 5446 stick_sync_cmd = SLAVE_START;
5448 5447
5449 5448 while (slave_done == 0) {
5450 5449 for (i = 0; i < stick_iter; i++) {
5451 5450 /* wait for the slave */
5452 5451 while (stick_sync_cmd != MASTER_START)
5453 5452 ;
5454 5453 /* Event A end */
5455 5454 stick_timestamp(×tamp[EV_A_END]);
5456 5455
5457 5456 /* make location hot */
5458 5457 timestamp[EV_B_START] = 0;
5459 5458 stick_timestamp(×tamp[EV_B_START]);
5460 5459
5461 5460 /* tell the slave to continue */
5462 5461 stick_sync_cmd = SLAVE_CONT;
5463 5462 } /* for */
5464 5463
5465 5464 /* wait while slave calculates time skew */
5466 5465 while (stick_sync_cmd == SLAVE_CONT)
5467 5466 ;
5468 5467 } /* while */
5469 5468 kpreempt_enable();
5470 5469 }
5471 5470
5472 5471 /*
5473 5472 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5474 5473 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also,
5475 5474 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5476 5475 * panic idle.
5477 5476 */
5478 5477 /*ARGSUSED*/
5479 5478 void
5480 5479 cpu_check_allcpus(struct async_flt *aflt)
5481 5480 {}
5482 5481
5483 5482 struct kmem_cache *ch_private_cache;
5484 5483
5485 5484 /*
5486 5485 * Cpu private unitialization. Uninitialize the Ecache scrubber and
5487 5486 * deallocate the scrubber data structures and cpu_private data structure.
5488 5487 */
5489 5488 void
5490 5489 cpu_uninit_private(struct cpu *cp)
5491 5490 {
5492 5491 cheetah_private_t *chprp = CPU_PRIVATE(cp);
5493 5492
5494 5493 ASSERT(chprp);
5495 5494 cpu_uninit_ecache_scrub_dr(cp);
5496 5495 CPU_PRIVATE(cp) = NULL;
5497 5496 ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5498 5497 kmem_cache_free(ch_private_cache, chprp);
5499 5498 cmp_delete_cpu(cp->cpu_id);
5500 5499
5501 5500 }
5502 5501
5503 5502 /*
5504 5503 * Cheetah Cache Scrubbing
5505 5504 *
5506 5505 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5507 5506 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5508 5507 * protected by either parity or ECC.
5509 5508 *
5510 5509 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5511 5510 * cache per second). Due to the the specifics of how the I$ control
5512 5511 * logic works with respect to the ASI used to scrub I$ lines, the entire
5513 5512 * I$ is scanned at once.
5514 5513 */
5515 5514
5516 5515 /*
5517 5516 * Tuneables to enable and disable the scrubbing of the caches, and to tune
5518 5517 * scrubbing behavior. These may be changed via /etc/system or using mdb
5519 5518 * on a running system.
5520 5519 */
5521 5520 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */
5522 5521
5523 5522 /*
5524 5523 * The following are the PIL levels that the softints/cross traps will fire at.
5525 5524 */
5526 5525 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */
5527 5526 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */
5528 5527 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */
5529 5528
5530 5529 #if defined(JALAPENO)
5531 5530
5532 5531 /*
5533 5532 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5534 5533 * on Jalapeno.
5535 5534 */
5536 5535 int ecache_scrub_enable = 0;
5537 5536
5538 5537 #else /* JALAPENO */
5539 5538
5540 5539 /*
5541 5540 * With all other cpu types, E$ scrubbing is on by default
5542 5541 */
5543 5542 int ecache_scrub_enable = 1;
5544 5543
5545 5544 #endif /* JALAPENO */
5546 5545
5547 5546
5548 5547 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5549 5548
5550 5549 /*
5551 5550 * The I$ scrubber tends to cause latency problems for real-time SW, so it
5552 5551 * is disabled by default on non-Cheetah systems
5553 5552 */
5554 5553 int icache_scrub_enable = 0;
5555 5554
5556 5555 /*
5557 5556 * Tuneables specifying the scrub calls per second and the scan rate
5558 5557 * for each cache
5559 5558 *
5560 5559 * The cyclic times are set during boot based on the following values.
5561 5560 * Changing these values in mdb after this time will have no effect. If
5562 5561 * a different value is desired, it must be set in /etc/system before a
5563 5562 * reboot.
5564 5563 */
5565 5564 int ecache_calls_a_sec = 1;
5566 5565 int dcache_calls_a_sec = 2;
5567 5566 int icache_calls_a_sec = 2;
5568 5567
5569 5568 int ecache_scan_rate_idle = 1;
5570 5569 int ecache_scan_rate_busy = 1;
5571 5570 int dcache_scan_rate_idle = 1;
5572 5571 int dcache_scan_rate_busy = 1;
5573 5572 int icache_scan_rate_idle = 1;
5574 5573 int icache_scan_rate_busy = 1;
5575 5574
5576 5575 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */
5577 5576
5578 5577 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */
5579 5578
5580 5579 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */
5581 5580 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */
5582 5581 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */
5583 5582
5584 5583 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */
5585 5584 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */
5586 5585 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */
5587 5586 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */
5588 5587 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */
5589 5588 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */
5590 5589
5591 5590 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */
5592 5591
5593 5592 /*
5594 5593 * In order to scrub on offline cpus, a cross trap is sent. The handler will
5595 5594 * increment the outstanding request counter and schedule a softint to run
5596 5595 * the scrubber.
5597 5596 */
5598 5597 extern xcfunc_t cache_scrubreq_tl1;
5599 5598
5600 5599 /*
5601 5600 * These are the softint functions for each cache scrubber
5602 5601 */
5603 5602 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5604 5603 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5605 5604 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5606 5605
5607 5606 /*
5608 5607 * The cache scrub info table contains cache specific information
5609 5608 * and allows for some of the scrub code to be table driven, reducing
5610 5609 * duplication of cache similar code.
5611 5610 *
5612 5611 * This table keeps a copy of the value in the calls per second variable
5613 5612 * (?cache_calls_a_sec). This makes it much more difficult for someone
5614 5613 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5615 5614 * mdb in a misguided attempt to disable the scrubber).
5616 5615 */
5617 5616 struct scrub_info {
5618 5617 int *csi_enable; /* scrubber enable flag */
5619 5618 int csi_freq; /* scrubber calls per second */
5620 5619 int csi_index; /* index to chsm_outstanding[] */
5621 5620 uint64_t csi_inum; /* scrubber interrupt number */
5622 5621 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */
5623 5622 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */
5624 5623 char csi_name[3]; /* cache name for this scrub entry */
5625 5624 } cache_scrub_info[] = {
5626 5625 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5627 5626 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5628 5627 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5629 5628 };
5630 5629
5631 5630 /*
5632 5631 * If scrubbing is enabled, increment the outstanding request counter. If it
5633 5632 * is 1 (meaning there were no previous requests outstanding), call
5634 5633 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5635 5634 * a self trap.
5636 5635 */
5637 5636 static void
5638 5637 do_scrub(struct scrub_info *csi)
5639 5638 {
5640 5639 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5641 5640 int index = csi->csi_index;
5642 5641 uint32_t *outstanding = &csmp->chsm_outstanding[index];
5643 5642
5644 5643 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5645 5644 if (atomic_inc_32_nv(outstanding) == 1) {
5646 5645 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5647 5646 csi->csi_inum, 0);
5648 5647 }
5649 5648 }
5650 5649 }
5651 5650
5652 5651 /*
5653 5652 * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5654 5653 * cross-trap the offline cpus.
5655 5654 */
5656 5655 static void
5657 5656 do_scrub_offline(struct scrub_info *csi)
5658 5657 {
5659 5658 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5660 5659
5661 5660 if (CPUSET_ISNULL(cpu_offline_set)) {
5662 5661 /*
5663 5662 * No offline cpus - nothing to do
5664 5663 */
5665 5664 return;
5666 5665 }
5667 5666
5668 5667 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5669 5668 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5670 5669 csi->csi_index);
5671 5670 }
5672 5671 }
5673 5672
5674 5673 /*
5675 5674 * This is the initial setup for the scrubber cyclics - it sets the
5676 5675 * interrupt level, frequency, and function to call.
5677 5676 */
5678 5677 /*ARGSUSED*/
5679 5678 static void
5680 5679 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5681 5680 cyc_time_t *when)
5682 5681 {
5683 5682 struct scrub_info *csi = (struct scrub_info *)arg;
5684 5683
5685 5684 ASSERT(csi != NULL);
5686 5685 hdlr->cyh_func = (cyc_func_t)do_scrub;
5687 5686 hdlr->cyh_level = CY_LOW_LEVEL;
5688 5687 hdlr->cyh_arg = arg;
5689 5688
5690 5689 when->cyt_when = 0; /* Start immediately */
5691 5690 when->cyt_interval = NANOSEC / csi->csi_freq;
5692 5691 }
5693 5692
5694 5693 /*
5695 5694 * Initialization for cache scrubbing.
5696 5695 * This routine is called AFTER all cpus have had cpu_init_private called
5697 5696 * to initialize their private data areas.
5698 5697 */
5699 5698 void
5700 5699 cpu_init_cache_scrub(void)
5701 5700 {
5702 5701 int i;
5703 5702 struct scrub_info *csi;
5704 5703 cyc_omni_handler_t omni_hdlr;
5705 5704 cyc_handler_t offline_hdlr;
5706 5705 cyc_time_t when;
5707 5706
5708 5707 /*
5709 5708 * save away the maximum number of lines for the D$
5710 5709 */
5711 5710 dcache_nlines = dcache_size / dcache_linesize;
5712 5711
5713 5712 /*
5714 5713 * register the softints for the cache scrubbing
5715 5714 */
5716 5715 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5717 5716 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5718 5717 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5719 5718 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5720 5719
5721 5720 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5722 5721 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5723 5722 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5724 5723 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5725 5724
5726 5725 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5727 5726 add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5728 5727 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5729 5728 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5730 5729
5731 5730 /*
5732 5731 * start the scrubbing for all the caches
5733 5732 */
5734 5733 mutex_enter(&cpu_lock);
5735 5734 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5736 5735
5737 5736 csi = &cache_scrub_info[i];
5738 5737
5739 5738 if (!(*csi->csi_enable))
5740 5739 continue;
5741 5740
5742 5741 /*
5743 5742 * force the following to be true:
5744 5743 * 1 <= calls_a_sec <= hz
5745 5744 */
5746 5745 if (csi->csi_freq > hz) {
5747 5746 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5748 5747 "(%d); resetting to hz (%d)", csi->csi_name,
5749 5748 csi->csi_freq, hz);
5750 5749 csi->csi_freq = hz;
5751 5750 } else if (csi->csi_freq < 1) {
5752 5751 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5753 5752 "(%d); resetting to 1", csi->csi_name,
5754 5753 csi->csi_freq);
5755 5754 csi->csi_freq = 1;
5756 5755 }
5757 5756
5758 5757 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5759 5758 omni_hdlr.cyo_offline = NULL;
5760 5759 omni_hdlr.cyo_arg = (void *)csi;
5761 5760
5762 5761 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5763 5762 offline_hdlr.cyh_arg = (void *)csi;
5764 5763 offline_hdlr.cyh_level = CY_LOW_LEVEL;
5765 5764
5766 5765 when.cyt_when = 0; /* Start immediately */
5767 5766 when.cyt_interval = NANOSEC / csi->csi_freq;
5768 5767
5769 5768 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5770 5769 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5771 5770 }
5772 5771 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5773 5772 mutex_exit(&cpu_lock);
5774 5773 }
5775 5774
5776 5775 /*
5777 5776 * Indicate that the specified cpu is idle.
5778 5777 */
5779 5778 void
5780 5779 cpu_idle_ecache_scrub(struct cpu *cp)
5781 5780 {
5782 5781 if (CPU_PRIVATE(cp) != NULL) {
5783 5782 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5784 5783 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5785 5784 }
5786 5785 }
5787 5786
5788 5787 /*
5789 5788 * Indicate that the specified cpu is busy.
5790 5789 */
5791 5790 void
5792 5791 cpu_busy_ecache_scrub(struct cpu *cp)
5793 5792 {
5794 5793 if (CPU_PRIVATE(cp) != NULL) {
5795 5794 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5796 5795 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5797 5796 }
5798 5797 }
5799 5798
5800 5799 /*
5801 5800 * Initialization for cache scrubbing for the specified cpu.
5802 5801 */
5803 5802 void
5804 5803 cpu_init_ecache_scrub_dr(struct cpu *cp)
5805 5804 {
5806 5805 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5807 5806 int cpuid = cp->cpu_id;
5808 5807
5809 5808 /* initialize the number of lines in the caches */
5810 5809 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5811 5810 cpunodes[cpuid].ecache_linesize;
5812 5811 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5813 5812 CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5814 5813
5815 5814 /*
5816 5815 * do_scrub() and do_scrub_offline() check both the global
5817 5816 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers
5818 5817 * check this value before scrubbing. Currently, we use it to
5819 5818 * disable the E$ scrubber on multi-core cpus or while running at
5820 5819 * slowed speed. For now, just turn everything on and allow
5821 5820 * cpu_init_private() to change it if necessary.
5822 5821 */
5823 5822 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5824 5823 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5825 5824 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5826 5825
5827 5826 cpu_busy_ecache_scrub(cp);
5828 5827 }
5829 5828
5830 5829 /*
5831 5830 * Un-initialization for cache scrubbing for the specified cpu.
5832 5831 */
5833 5832 static void
5834 5833 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5835 5834 {
5836 5835 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5837 5836
5838 5837 /*
5839 5838 * un-initialize bookkeeping for cache scrubbing
5840 5839 */
5841 5840 bzero(csmp, sizeof (ch_scrub_misc_t));
5842 5841
5843 5842 cpu_idle_ecache_scrub(cp);
5844 5843 }
5845 5844
5846 5845 /*
5847 5846 * Called periodically on each CPU to scrub the D$.
5848 5847 */
5849 5848 static void
5850 5849 scrub_dcache(int how_many)
5851 5850 {
5852 5851 int i;
5853 5852 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5854 5853 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5855 5854
5856 5855 /*
5857 5856 * scrub the desired number of lines
5858 5857 */
5859 5858 for (i = 0; i < how_many; i++) {
5860 5859 /*
5861 5860 * scrub a D$ line
5862 5861 */
5863 5862 dcache_inval_line(index);
5864 5863
5865 5864 /*
5866 5865 * calculate the next D$ line to scrub, assumes
5867 5866 * that dcache_nlines is a power of 2
5868 5867 */
5869 5868 index = (index + 1) & (dcache_nlines - 1);
5870 5869 }
5871 5870
5872 5871 /*
5873 5872 * set the scrub index for the next visit
5874 5873 */
5875 5874 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5876 5875 }
5877 5876
5878 5877 /*
5879 5878 * Handler for D$ scrub inum softint. Call scrub_dcache until
5880 5879 * we decrement the outstanding request count to zero.
5881 5880 */
5882 5881 /*ARGSUSED*/
5883 5882 static uint_t
5884 5883 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5885 5884 {
5886 5885 int i;
5887 5886 int how_many;
5888 5887 int outstanding;
5889 5888 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5890 5889 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5891 5890 struct scrub_info *csi = (struct scrub_info *)arg1;
5892 5891 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5893 5892 dcache_scan_rate_idle : dcache_scan_rate_busy;
5894 5893
5895 5894 /*
5896 5895 * The scan rates are expressed in units of tenths of a
5897 5896 * percent. A scan rate of 1000 (100%) means the whole
5898 5897 * cache is scanned every second.
5899 5898 */
5900 5899 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5901 5900
5902 5901 do {
5903 5902 outstanding = *countp;
5904 5903 for (i = 0; i < outstanding; i++) {
5905 5904 scrub_dcache(how_many);
5906 5905 }
5907 5906 } while (atomic_add_32_nv(countp, -outstanding));
5908 5907
5909 5908 return (DDI_INTR_CLAIMED);
5910 5909 }
5911 5910
5912 5911 /*
5913 5912 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5914 5913 * by invalidating lines. Due to the characteristics of the ASI which
5915 5914 * is used to invalidate an I$ line, the entire I$ must be invalidated
5916 5915 * vs. an individual I$ line.
5917 5916 */
5918 5917 static void
5919 5918 scrub_icache(int how_many)
5920 5919 {
5921 5920 int i;
5922 5921 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5923 5922 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5924 5923 int icache_nlines = csmp->chsm_icache_nlines;
5925 5924
5926 5925 /*
5927 5926 * scrub the desired number of lines
5928 5927 */
5929 5928 for (i = 0; i < how_many; i++) {
5930 5929 /*
5931 5930 * since the entire I$ must be scrubbed at once,
5932 5931 * wait until the index wraps to zero to invalidate
5933 5932 * the entire I$
5934 5933 */
5935 5934 if (index == 0) {
5936 5935 icache_inval_all();
5937 5936 }
5938 5937
5939 5938 /*
5940 5939 * calculate the next I$ line to scrub, assumes
5941 5940 * that chsm_icache_nlines is a power of 2
5942 5941 */
5943 5942 index = (index + 1) & (icache_nlines - 1);
5944 5943 }
5945 5944
5946 5945 /*
5947 5946 * set the scrub index for the next visit
5948 5947 */
5949 5948 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5950 5949 }
5951 5950
5952 5951 /*
5953 5952 * Handler for I$ scrub inum softint. Call scrub_icache until
5954 5953 * we decrement the outstanding request count to zero.
5955 5954 */
5956 5955 /*ARGSUSED*/
5957 5956 static uint_t
5958 5957 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5959 5958 {
5960 5959 int i;
5961 5960 int how_many;
5962 5961 int outstanding;
5963 5962 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5964 5963 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5965 5964 struct scrub_info *csi = (struct scrub_info *)arg1;
5966 5965 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5967 5966 icache_scan_rate_idle : icache_scan_rate_busy;
5968 5967 int icache_nlines = csmp->chsm_icache_nlines;
5969 5968
5970 5969 /*
5971 5970 * The scan rates are expressed in units of tenths of a
5972 5971 * percent. A scan rate of 1000 (100%) means the whole
5973 5972 * cache is scanned every second.
5974 5973 */
5975 5974 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5976 5975
5977 5976 do {
5978 5977 outstanding = *countp;
5979 5978 for (i = 0; i < outstanding; i++) {
5980 5979 scrub_icache(how_many);
5981 5980 }
5982 5981 } while (atomic_add_32_nv(countp, -outstanding));
5983 5982
5984 5983 return (DDI_INTR_CLAIMED);
5985 5984 }
5986 5985
5987 5986 /*
5988 5987 * Called periodically on each CPU to scrub the E$.
5989 5988 */
5990 5989 static void
5991 5990 scrub_ecache(int how_many)
5992 5991 {
5993 5992 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5994 5993 int i;
5995 5994 int cpuid = CPU->cpu_id;
5996 5995 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5997 5996 int nlines = csmp->chsm_ecache_nlines;
5998 5997 int linesize = cpunodes[cpuid].ecache_linesize;
5999 5998 int ec_set_size = cpu_ecache_set_size(CPU);
6000 5999
6001 6000 /*
6002 6001 * scrub the desired number of lines
6003 6002 */
6004 6003 for (i = 0; i < how_many; i++) {
6005 6004 /*
6006 6005 * scrub the E$ line
6007 6006 */
6008 6007 ecache_flush_line(ecache_flushaddr + (index * linesize),
6009 6008 ec_set_size);
6010 6009
6011 6010 /*
6012 6011 * calculate the next E$ line to scrub based on twice
6013 6012 * the number of E$ lines (to displace lines containing
6014 6013 * flush area data), assumes that the number of lines
6015 6014 * is a power of 2
6016 6015 */
6017 6016 index = (index + 1) & ((nlines << 1) - 1);
6018 6017 }
6019 6018
6020 6019 /*
6021 6020 * set the ecache scrub index for the next visit
6022 6021 */
6023 6022 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
6024 6023 }
6025 6024
6026 6025 /*
6027 6026 * Handler for E$ scrub inum softint. Call the E$ scrubber until
6028 6027 * we decrement the outstanding request count to zero.
6029 6028 *
6030 6029 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6031 6030 * become negative after the atomic_add_32_nv(). This is not a problem, as
6032 6031 * the next trip around the loop won't scrub anything, and the next add will
6033 6032 * reset the count back to zero.
6034 6033 */
6035 6034 /*ARGSUSED*/
6036 6035 static uint_t
6037 6036 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
6038 6037 {
6039 6038 int i;
6040 6039 int how_many;
6041 6040 int outstanding;
6042 6041 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
6043 6042 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
6044 6043 struct scrub_info *csi = (struct scrub_info *)arg1;
6045 6044 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
6046 6045 ecache_scan_rate_idle : ecache_scan_rate_busy;
6047 6046 int ecache_nlines = csmp->chsm_ecache_nlines;
6048 6047
6049 6048 /*
6050 6049 * The scan rates are expressed in units of tenths of a
6051 6050 * percent. A scan rate of 1000 (100%) means the whole
6052 6051 * cache is scanned every second.
6053 6052 */
6054 6053 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6055 6054
6056 6055 do {
6057 6056 outstanding = *countp;
6058 6057 for (i = 0; i < outstanding; i++) {
6059 6058 scrub_ecache(how_many);
6060 6059 }
6061 6060 } while (atomic_add_32_nv(countp, -outstanding));
6062 6061
6063 6062 return (DDI_INTR_CLAIMED);
6064 6063 }
↓ open down ↓ |
3383 lines elided |
↑ open up ↑ |
6065 6064
6066 6065 /*
6067 6066 * Timeout function to reenable CE
6068 6067 */
6069 6068 static void
6070 6069 cpu_delayed_check_ce_errors(void *arg)
6071 6070 {
6072 6071 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6073 6072 TQ_NOSLEEP)) {
6074 6073 (void) timeout(cpu_delayed_check_ce_errors, arg,
6075 - drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6074 + drv_sectohz((clock_t)cpu_ceen_delay_secs));
6076 6075 }
6077 6076 }
6078 6077
6079 6078 /*
6080 6079 * CE Deferred Re-enable after trap.
6081 6080 *
6082 6081 * When the CPU gets a disrupting trap for any of the errors
6083 6082 * controlled by the CEEN bit, CEEN is disabled in the trap handler
6084 6083 * immediately. To eliminate the possibility of multiple CEs causing
6085 6084 * recursive stack overflow in the trap handler, we cannot
6086 6085 * reenable CEEN while still running in the trap handler. Instead,
6087 6086 * after a CE is logged on a CPU, we schedule a timeout function,
6088 6087 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6089 6088 * seconds. This function will check whether any further CEs
6090 6089 * have occurred on that CPU, and if none have, will reenable CEEN.
6091 6090 *
6092 6091 * If further CEs have occurred while CEEN is disabled, another
6093 6092 * timeout will be scheduled. This is to ensure that the CPU can
6094 6093 * make progress in the face of CE 'storms', and that it does not
6095 6094 * spend all its time logging CE errors.
6096 6095 */
6097 6096 static void
6098 6097 cpu_check_ce_errors(void *arg)
6099 6098 {
6100 6099 int cpuid = (int)(uintptr_t)arg;
6101 6100 cpu_t *cp;
6102 6101
6103 6102 /*
6104 6103 * We acquire cpu_lock.
6105 6104 */
6106 6105 ASSERT(curthread->t_pil == 0);
6107 6106
6108 6107 /*
6109 6108 * verify that the cpu is still around, DR
6110 6109 * could have got there first ...
6111 6110 */
6112 6111 mutex_enter(&cpu_lock);
6113 6112 cp = cpu_get(cpuid);
6114 6113 if (cp == NULL) {
6115 6114 mutex_exit(&cpu_lock);
6116 6115 return;
6117 6116 }
6118 6117 /*
6119 6118 * make sure we don't migrate across CPUs
6120 6119 * while checking our CE status.
6121 6120 */
6122 6121 kpreempt_disable();
6123 6122
6124 6123 /*
6125 6124 * If we are running on the CPU that got the
6126 6125 * CE, we can do the checks directly.
6127 6126 */
6128 6127 if (cp->cpu_id == CPU->cpu_id) {
6129 6128 mutex_exit(&cpu_lock);
6130 6129 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6131 6130 kpreempt_enable();
6132 6131 return;
6133 6132 }
6134 6133 kpreempt_enable();
6135 6134
6136 6135 /*
6137 6136 * send an x-call to get the CPU that originally
6138 6137 * got the CE to do the necessary checks. If we can't
6139 6138 * send the x-call, reschedule the timeout, otherwise we
6140 6139 * lose CEEN forever on that CPU.
6141 6140 */
6142 6141 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6143 6142 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6144 6143 TIMEOUT_CEEN_CHECK, 0);
6145 6144 mutex_exit(&cpu_lock);
6146 6145 } else {
6147 6146 /*
6148 6147 * When the CPU is not accepting xcalls, or
6149 6148 * the processor is offlined, we don't want to
6150 6149 * incur the extra overhead of trying to schedule the
6151 6150 * CE timeout indefinitely. However, we don't want to lose
6152 6151 * CE checking forever.
6153 6152 *
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
6154 6153 * Keep rescheduling the timeout, accepting the additional
6155 6154 * overhead as the cost of correctness in the case where we get
6156 6155 * a CE, disable CEEN, offline the CPU during the
6157 6156 * the timeout interval, and then online it at some
6158 6157 * point in the future. This is unlikely given the short
6159 6158 * cpu_ceen_delay_secs.
6160 6159 */
6161 6160 mutex_exit(&cpu_lock);
6162 6161 (void) timeout(cpu_delayed_check_ce_errors,
6163 6162 (void *)(uintptr_t)cp->cpu_id,
6164 - drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6163 + drv_sectohz((clock_t)cpu_ceen_delay_secs));
6165 6164 }
6166 6165 }
6167 6166
6168 6167 /*
6169 6168 * This routine will check whether CEs have occurred while
6170 6169 * CEEN is disabled. Any CEs detected will be logged and, if
6171 6170 * possible, scrubbed.
6172 6171 *
6173 6172 * The memscrubber will also use this routine to clear any errors
6174 6173 * caused by its scrubbing with CEEN disabled.
6175 6174 *
6176 6175 * flag == SCRUBBER_CEEN_CHECK
6177 6176 * called from memscrubber, just check/scrub, no reset
6178 6177 * paddr physical addr. for start of scrub pages
6179 6178 * vaddr virtual addr. for scrub area
6180 6179 * psz page size of area to be scrubbed
6181 6180 *
6182 6181 * flag == TIMEOUT_CEEN_CHECK
6183 6182 * timeout function has triggered, reset timeout or CEEN
6184 6183 *
6185 6184 * Note: We must not migrate cpus during this function. This can be
6186 6185 * achieved by one of:
6187 6186 * - invoking as target of an x-call in which case we're at XCALL_PIL
6188 6187 * The flag value must be first xcall argument.
6189 6188 * - disabling kernel preemption. This should be done for very short
6190 6189 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6191 6190 * scrub an extended area with cpu_check_block. The call for
6192 6191 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6193 6192 * brief for this case.
6194 6193 * - binding to a cpu, eg with thread_affinity_set(). This is used
6195 6194 * in the SCRUBBER_CEEN_CHECK case, but is not practical for
6196 6195 * the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6197 6196 */
6198 6197 void
6199 6198 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6200 6199 {
6201 6200 ch_cpu_errors_t cpu_error_regs;
6202 6201 uint64_t ec_err_enable;
6203 6202 uint64_t page_offset;
6204 6203
6205 6204 /* Read AFSR */
6206 6205 get_cpu_error_state(&cpu_error_regs);
6207 6206
6208 6207 /*
6209 6208 * If no CEEN errors have occurred during the timeout
6210 6209 * interval, it is safe to re-enable CEEN and exit.
6211 6210 */
6212 6211 if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6213 6212 (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6214 6213 if (flag == TIMEOUT_CEEN_CHECK &&
6215 6214 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6216 6215 set_error_enable(ec_err_enable | EN_REG_CEEN);
6217 6216 return;
6218 6217 }
6219 6218
6220 6219 /*
6221 6220 * Ensure that CEEN was not reenabled (maybe by DR) before
6222 6221 * we log/clear the error.
6223 6222 */
6224 6223 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6225 6224 set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6226 6225
6227 6226 /*
6228 6227 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6229 6228 * timeout will be rescheduled when the error is logged.
6230 6229 */
6231 6230 if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6232 6231 (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6233 6232 cpu_ce_detected(&cpu_error_regs,
6234 6233 CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6235 6234 else
6236 6235 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6237 6236
6238 6237 /*
6239 6238 * If the memory scrubber runs while CEEN is
6240 6239 * disabled, (or if CEEN is disabled during the
6241 6240 * scrub as a result of a CE being triggered by
6242 6241 * it), the range being scrubbed will not be
6243 6242 * completely cleaned. If there are multiple CEs
6244 6243 * in the range at most two of these will be dealt
6245 6244 * with, (one by the trap handler and one by the
6246 6245 * timeout). It is also possible that none are dealt
6247 6246 * with, (CEEN disabled and another CE occurs before
6248 6247 * the timeout triggers). So to ensure that the
6249 6248 * memory is actually scrubbed, we have to access each
6250 6249 * memory location in the range and then check whether
6251 6250 * that access causes a CE.
6252 6251 */
6253 6252 if (flag == SCRUBBER_CEEN_CHECK && va) {
6254 6253 if ((cpu_error_regs.afar >= pa) &&
6255 6254 (cpu_error_regs.afar < (pa + psz))) {
6256 6255 /*
6257 6256 * Force a load from physical memory for each
6258 6257 * 64-byte block, then check AFSR to determine
6259 6258 * whether this access caused an error.
6260 6259 *
6261 6260 * This is a slow way to do a scrub, but as it will
6262 6261 * only be invoked when the memory scrubber actually
6263 6262 * triggered a CE, it should not happen too
6264 6263 * frequently.
6265 6264 *
6266 6265 * cut down what we need to check as the scrubber
6267 6266 * has verified up to AFAR, so get it's offset
6268 6267 * into the page and start there.
6269 6268 */
6270 6269 page_offset = (uint64_t)(cpu_error_regs.afar &
6271 6270 (psz - 1));
6272 6271 va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6273 6272 psz -= (uint_t)(P2ALIGN(page_offset, 64));
6274 6273 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6275 6274 psz);
6276 6275 }
6277 6276 }
6278 6277
6279 6278 /*
6280 6279 * Reset error enable if this CE is not masked.
6281 6280 */
6282 6281 if ((flag == TIMEOUT_CEEN_CHECK) &&
6283 6282 (cpu_error_regs.afsr & cpu_ce_not_deferred))
6284 6283 set_error_enable(ec_err_enable | EN_REG_CEEN);
6285 6284
6286 6285 }
6287 6286
6288 6287 /*
6289 6288 * Attempt a cpu logout for an error that we did not trap for, such
6290 6289 * as a CE noticed with CEEN off. It is assumed that we are still running
6291 6290 * on the cpu that took the error and that we cannot migrate. Returns
6292 6291 * 0 on success, otherwise nonzero.
6293 6292 */
6294 6293 static int
6295 6294 cpu_ce_delayed_ec_logout(uint64_t afar)
6296 6295 {
6297 6296 ch_cpu_logout_t *clop;
6298 6297
6299 6298 if (CPU_PRIVATE(CPU) == NULL)
6300 6299 return (0);
6301 6300
6302 6301 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6303 6302 if (atomic_cas_64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6304 6303 LOGOUT_INVALID)
6305 6304 return (0);
6306 6305
6307 6306 cpu_delayed_logout(afar, clop);
6308 6307 return (1);
6309 6308 }
6310 6309
6311 6310 /*
6312 6311 * We got an error while CEEN was disabled. We
6313 6312 * need to clean up after it and log whatever
6314 6313 * information we have on the CE.
6315 6314 */
6316 6315 void
6317 6316 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6318 6317 {
6319 6318 ch_async_flt_t ch_flt;
6320 6319 struct async_flt *aflt;
6321 6320 char pr_reason[MAX_REASON_STRING];
6322 6321
6323 6322 bzero(&ch_flt, sizeof (ch_async_flt_t));
6324 6323 ch_flt.flt_trapped_ce = flag;
6325 6324 aflt = (struct async_flt *)&ch_flt;
6326 6325 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6327 6326 ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6328 6327 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6329 6328 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6330 6329 aflt->flt_addr = cpu_error_regs->afar;
6331 6330 #if defined(SERRANO)
6332 6331 ch_flt.afar2 = cpu_error_regs->afar2;
6333 6332 #endif /* SERRANO */
6334 6333 aflt->flt_pc = NULL;
6335 6334 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6336 6335 aflt->flt_tl = 0;
6337 6336 aflt->flt_panic = 0;
6338 6337 cpu_log_and_clear_ce(&ch_flt);
6339 6338
6340 6339 /*
6341 6340 * check if we caused any errors during cleanup
6342 6341 */
6343 6342 if (clear_errors(&ch_flt)) {
6344 6343 pr_reason[0] = '\0';
6345 6344 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6346 6345 NULL);
6347 6346 }
6348 6347 }
6349 6348
6350 6349 /*
6351 6350 * Log/clear CEEN-controlled disrupting errors
6352 6351 */
6353 6352 static void
6354 6353 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6355 6354 {
6356 6355 struct async_flt *aflt;
6357 6356 uint64_t afsr, afsr_errs;
6358 6357 ch_cpu_logout_t *clop;
6359 6358 char pr_reason[MAX_REASON_STRING];
6360 6359 on_trap_data_t *otp = curthread->t_ontrap;
6361 6360
6362 6361 aflt = (struct async_flt *)ch_flt;
6363 6362 afsr = aflt->flt_stat;
6364 6363 afsr_errs = ch_flt->afsr_errs;
6365 6364 aflt->flt_id = gethrtime_waitfree();
6366 6365 aflt->flt_bus_id = getprocessorid();
6367 6366 aflt->flt_inst = CPU->cpu_id;
6368 6367 aflt->flt_prot = AFLT_PROT_NONE;
6369 6368 aflt->flt_class = CPU_FAULT;
6370 6369 aflt->flt_status = ECC_C_TRAP;
6371 6370
6372 6371 pr_reason[0] = '\0';
6373 6372 /*
6374 6373 * Get the CPU log out info for Disrupting Trap.
6375 6374 */
6376 6375 if (CPU_PRIVATE(CPU) == NULL) {
6377 6376 clop = NULL;
6378 6377 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6379 6378 } else {
6380 6379 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6381 6380 }
6382 6381
6383 6382 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6384 6383 ch_cpu_errors_t cpu_error_regs;
6385 6384
6386 6385 get_cpu_error_state(&cpu_error_regs);
6387 6386 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6388 6387 clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6389 6388 clop->clo_data.chd_afar = cpu_error_regs.afar;
6390 6389 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6391 6390 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6392 6391 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6393 6392 clop->clo_sdw_data.chd_afsr_ext =
6394 6393 cpu_error_regs.shadow_afsr_ext;
6395 6394 #if defined(SERRANO)
6396 6395 clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6397 6396 #endif /* SERRANO */
6398 6397 ch_flt->flt_data_incomplete = 1;
6399 6398
6400 6399 /*
6401 6400 * The logging/clear code expects AFSR/AFAR to be cleared.
6402 6401 * The trap handler does it for CEEN enabled errors
6403 6402 * so we need to do it here.
6404 6403 */
6405 6404 set_cpu_error_state(&cpu_error_regs);
6406 6405 }
6407 6406
6408 6407 #if defined(JALAPENO) || defined(SERRANO)
6409 6408 /*
6410 6409 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6411 6410 * For Serrano, even thou we do have the AFAR, we still do the
6412 6411 * scrub on the RCE side since that's where the error type can
6413 6412 * be properly classified as intermittent, persistent, etc.
6414 6413 *
6415 6414 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory.
6416 6415 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6417 6416 * the flt_status bits.
6418 6417 */
6419 6418 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6420 6419 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6421 6420 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6422 6421 cpu_ce_scrub_mem_err(aflt, B_TRUE);
6423 6422 }
6424 6423 #else /* JALAPENO || SERRANO */
6425 6424 /*
6426 6425 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory.
6427 6426 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6428 6427 * the flt_status bits.
6429 6428 */
6430 6429 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6431 6430 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6432 6431 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6433 6432 cpu_ce_scrub_mem_err(aflt, B_TRUE);
6434 6433 }
6435 6434 }
6436 6435
6437 6436 #endif /* JALAPENO || SERRANO */
6438 6437
6439 6438 /*
6440 6439 * Update flt_prot if this error occurred under on_trap protection.
6441 6440 */
6442 6441 if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6443 6442 aflt->flt_prot = AFLT_PROT_EC;
6444 6443
6445 6444 /*
6446 6445 * Queue events on the async event queue, one event per error bit.
6447 6446 */
6448 6447 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6449 6448 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6450 6449 ch_flt->flt_type = CPU_INV_AFSR;
6451 6450 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6452 6451 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6453 6452 aflt->flt_panic);
6454 6453 }
6455 6454
6456 6455 /*
6457 6456 * Zero out + invalidate CPU logout.
6458 6457 */
6459 6458 if (clop) {
6460 6459 bzero(clop, sizeof (ch_cpu_logout_t));
6461 6460 clop->clo_data.chd_afar = LOGOUT_INVALID;
6462 6461 }
6463 6462
6464 6463 /*
6465 6464 * If either a CPC, WDC or EDC error has occurred while CEEN
6466 6465 * was disabled, we need to flush either the entire
6467 6466 * E$ or an E$ line.
6468 6467 */
6469 6468 #if defined(JALAPENO) || defined(SERRANO)
6470 6469 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6471 6470 #else /* JALAPENO || SERRANO */
6472 6471 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6473 6472 C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6474 6473 #endif /* JALAPENO || SERRANO */
6475 6474 cpu_error_ecache_flush(ch_flt);
6476 6475
6477 6476 }
6478 6477
6479 6478 /*
6480 6479 * depending on the error type, we determine whether we
6481 6480 * need to flush the entire ecache or just a line.
6482 6481 */
6483 6482 static int
6484 6483 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6485 6484 {
6486 6485 struct async_flt *aflt;
6487 6486 uint64_t afsr;
6488 6487 uint64_t afsr_errs = ch_flt->afsr_errs;
6489 6488
6490 6489 aflt = (struct async_flt *)ch_flt;
6491 6490 afsr = aflt->flt_stat;
6492 6491
6493 6492 /*
6494 6493 * If we got multiple errors, no point in trying
6495 6494 * the individual cases, just flush the whole cache
6496 6495 */
6497 6496 if (afsr & C_AFSR_ME) {
6498 6497 return (ECACHE_FLUSH_ALL);
6499 6498 }
6500 6499
6501 6500 /*
6502 6501 * If either a CPC, WDC or EDC error has occurred while CEEN
6503 6502 * was disabled, we need to flush entire E$. We can't just
6504 6503 * flush the cache line affected as the ME bit
6505 6504 * is not set when multiple correctable errors of the same
6506 6505 * type occur, so we might have multiple CPC or EDC errors,
6507 6506 * with only the first recorded.
6508 6507 */
6509 6508 #if defined(JALAPENO) || defined(SERRANO)
6510 6509 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6511 6510 #else /* JALAPENO || SERRANO */
6512 6511 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6513 6512 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6514 6513 #endif /* JALAPENO || SERRANO */
6515 6514 return (ECACHE_FLUSH_ALL);
6516 6515 }
6517 6516
6518 6517 #if defined(JALAPENO) || defined(SERRANO)
6519 6518 /*
6520 6519 * If only UE or RUE is set, flush the Ecache line, otherwise
6521 6520 * flush the entire Ecache.
6522 6521 */
6523 6522 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6524 6523 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6525 6524 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6526 6525 return (ECACHE_FLUSH_LINE);
6527 6526 } else {
6528 6527 return (ECACHE_FLUSH_ALL);
6529 6528 }
6530 6529 }
6531 6530 #else /* JALAPENO || SERRANO */
6532 6531 /*
6533 6532 * If UE only is set, flush the Ecache line, otherwise
6534 6533 * flush the entire Ecache.
6535 6534 */
6536 6535 if (afsr_errs & C_AFSR_UE) {
6537 6536 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6538 6537 C_AFSR_UE) {
6539 6538 return (ECACHE_FLUSH_LINE);
6540 6539 } else {
6541 6540 return (ECACHE_FLUSH_ALL);
6542 6541 }
6543 6542 }
6544 6543 #endif /* JALAPENO || SERRANO */
6545 6544
6546 6545 /*
6547 6546 * EDU: If EDU only is set, flush the ecache line, otherwise
6548 6547 * flush the entire Ecache.
6549 6548 */
6550 6549 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6551 6550 if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6552 6551 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6553 6552 return (ECACHE_FLUSH_LINE);
6554 6553 } else {
6555 6554 return (ECACHE_FLUSH_ALL);
6556 6555 }
6557 6556 }
6558 6557
6559 6558 /*
6560 6559 * BERR: If BERR only is set, flush the Ecache line, otherwise
6561 6560 * flush the entire Ecache.
6562 6561 */
6563 6562 if (afsr_errs & C_AFSR_BERR) {
6564 6563 if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6565 6564 return (ECACHE_FLUSH_LINE);
6566 6565 } else {
6567 6566 return (ECACHE_FLUSH_ALL);
6568 6567 }
6569 6568 }
6570 6569
6571 6570 return (0);
6572 6571 }
6573 6572
6574 6573 void
6575 6574 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6576 6575 {
6577 6576 int ecache_flush_flag =
6578 6577 cpu_error_ecache_flush_required(ch_flt);
6579 6578
6580 6579 /*
6581 6580 * Flush Ecache line or entire Ecache based on above checks.
6582 6581 */
6583 6582 if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6584 6583 cpu_flush_ecache();
6585 6584 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6586 6585 cpu_flush_ecache_line(ch_flt);
6587 6586 }
6588 6587
6589 6588 }
6590 6589
6591 6590 /*
6592 6591 * Extract the PA portion from the E$ tag.
6593 6592 */
6594 6593 uint64_t
6595 6594 cpu_ectag_to_pa(int setsize, uint64_t tag)
6596 6595 {
6597 6596 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6598 6597 return (JG_ECTAG_TO_PA(setsize, tag));
6599 6598 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6600 6599 return (PN_L3TAG_TO_PA(tag));
6601 6600 else
6602 6601 return (CH_ECTAG_TO_PA(setsize, tag));
6603 6602 }
6604 6603
6605 6604 /*
6606 6605 * Convert the E$ tag PA into an E$ subblock index.
6607 6606 */
6608 6607 int
6609 6608 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6610 6609 {
6611 6610 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6612 6611 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6613 6612 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6614 6613 /* Panther has only one subblock per line */
6615 6614 return (0);
6616 6615 else
6617 6616 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6618 6617 }
6619 6618
6620 6619 /*
6621 6620 * All subblocks in an E$ line must be invalid for
6622 6621 * the line to be invalid.
6623 6622 */
6624 6623 int
6625 6624 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6626 6625 {
6627 6626 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6628 6627 return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6629 6628 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6630 6629 return (PN_L3_LINE_INVALID(tag));
6631 6630 else
6632 6631 return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6633 6632 }
6634 6633
6635 6634 /*
6636 6635 * Extract state bits for a subblock given the tag. Note that for Panther
6637 6636 * this works on both l2 and l3 tags.
6638 6637 */
6639 6638 int
6640 6639 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6641 6640 {
6642 6641 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6643 6642 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6644 6643 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6645 6644 return (tag & CH_ECSTATE_MASK);
6646 6645 else
6647 6646 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6648 6647 }
6649 6648
6650 6649 /*
6651 6650 * Cpu specific initialization.
6652 6651 */
6653 6652 void
6654 6653 cpu_mp_init(void)
6655 6654 {
6656 6655 #ifdef CHEETAHPLUS_ERRATUM_25
6657 6656 if (cheetah_sendmondo_recover) {
6658 6657 cheetah_nudge_init();
6659 6658 }
6660 6659 #endif
6661 6660 }
6662 6661
6663 6662 void
6664 6663 cpu_ereport_post(struct async_flt *aflt)
6665 6664 {
6666 6665 char *cpu_type, buf[FM_MAX_CLASS];
6667 6666 nv_alloc_t *nva = NULL;
6668 6667 nvlist_t *ereport, *detector, *resource;
6669 6668 errorq_elem_t *eqep;
6670 6669 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6671 6670 char unum[UNUM_NAMLEN];
6672 6671 int synd_code;
6673 6672 uint8_t msg_type;
6674 6673 plat_ecc_ch_async_flt_t plat_ecc_ch_flt;
6675 6674
6676 6675 if (aflt->flt_panic || panicstr) {
6677 6676 eqep = errorq_reserve(ereport_errorq);
6678 6677 if (eqep == NULL)
6679 6678 return;
6680 6679 ereport = errorq_elem_nvl(ereport_errorq, eqep);
6681 6680 nva = errorq_elem_nva(ereport_errorq, eqep);
6682 6681 } else {
6683 6682 ereport = fm_nvlist_create(nva);
6684 6683 }
6685 6684
6686 6685 /*
6687 6686 * Create the scheme "cpu" FMRI.
6688 6687 */
6689 6688 detector = fm_nvlist_create(nva);
6690 6689 resource = fm_nvlist_create(nva);
6691 6690 switch (cpunodes[aflt->flt_inst].implementation) {
6692 6691 case CHEETAH_IMPL:
6693 6692 cpu_type = FM_EREPORT_CPU_USIII;
6694 6693 break;
6695 6694 case CHEETAH_PLUS_IMPL:
6696 6695 cpu_type = FM_EREPORT_CPU_USIIIplus;
6697 6696 break;
6698 6697 case JALAPENO_IMPL:
6699 6698 cpu_type = FM_EREPORT_CPU_USIIIi;
6700 6699 break;
6701 6700 case SERRANO_IMPL:
6702 6701 cpu_type = FM_EREPORT_CPU_USIIIiplus;
6703 6702 break;
6704 6703 case JAGUAR_IMPL:
6705 6704 cpu_type = FM_EREPORT_CPU_USIV;
6706 6705 break;
6707 6706 case PANTHER_IMPL:
6708 6707 cpu_type = FM_EREPORT_CPU_USIVplus;
6709 6708 break;
6710 6709 default:
6711 6710 cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6712 6711 break;
6713 6712 }
6714 6713
6715 6714 cpu_fmri_cpu_set(detector, aflt->flt_inst);
6716 6715
6717 6716 /*
6718 6717 * Encode all the common data into the ereport.
6719 6718 */
6720 6719 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6721 6720 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6722 6721
6723 6722 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6724 6723 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6725 6724 detector, NULL);
6726 6725
6727 6726 /*
6728 6727 * Encode the error specific data that was saved in
6729 6728 * the async_flt structure into the ereport.
6730 6729 */
6731 6730 cpu_payload_add_aflt(aflt, ereport, resource,
6732 6731 &plat_ecc_ch_flt.ecaf_afar_status,
6733 6732 &plat_ecc_ch_flt.ecaf_synd_status);
6734 6733
6735 6734 if (aflt->flt_panic || panicstr) {
6736 6735 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6737 6736 } else {
6738 6737 (void) fm_ereport_post(ereport, EVCH_TRYHARD);
6739 6738 fm_nvlist_destroy(ereport, FM_NVA_FREE);
6740 6739 fm_nvlist_destroy(detector, FM_NVA_FREE);
6741 6740 fm_nvlist_destroy(resource, FM_NVA_FREE);
6742 6741 }
6743 6742 /*
6744 6743 * Send the enhanced error information (plat_ecc_error2_data_t)
6745 6744 * to the SC olny if it can process it.
6746 6745 */
6747 6746
6748 6747 if (&plat_ecc_capability_sc_get &&
6749 6748 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6750 6749 msg_type = cpu_flt_bit_to_plat_error(aflt);
6751 6750 if (msg_type != PLAT_ECC_ERROR2_NONE) {
6752 6751 /*
6753 6752 * If afar status is not invalid do a unum lookup.
6754 6753 */
6755 6754 if (plat_ecc_ch_flt.ecaf_afar_status !=
6756 6755 AFLT_STAT_INVALID) {
6757 6756 synd_code = synd_to_synd_code(
6758 6757 plat_ecc_ch_flt.ecaf_synd_status,
6759 6758 aflt->flt_synd, ch_flt->flt_bit);
6760 6759 (void) cpu_get_mem_unum_synd(synd_code,
6761 6760 aflt, unum);
6762 6761 } else {
6763 6762 unum[0] = '\0';
6764 6763 }
6765 6764 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6766 6765 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6767 6766 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6768 6767 plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6769 6768 ch_flt->flt_sdw_afsr_ext;
6770 6769
6771 6770 if (&plat_log_fruid_error2)
6772 6771 plat_log_fruid_error2(msg_type, unum, aflt,
6773 6772 &plat_ecc_ch_flt);
6774 6773 }
6775 6774 }
6776 6775 }
6777 6776
6778 6777 void
6779 6778 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6780 6779 {
6781 6780 int status;
6782 6781 ddi_fm_error_t de;
6783 6782
6784 6783 bzero(&de, sizeof (ddi_fm_error_t));
6785 6784
6786 6785 de.fme_version = DDI_FME_VERSION;
6787 6786 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6788 6787 FM_ENA_FMT1);
6789 6788 de.fme_flag = expected;
6790 6789 de.fme_bus_specific = (void *)aflt->flt_addr;
6791 6790 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6792 6791 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6793 6792 aflt->flt_panic = 1;
6794 6793 }
6795 6794
6796 6795 void
6797 6796 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6798 6797 errorq_t *eqp, uint_t flag)
6799 6798 {
6800 6799 struct async_flt *aflt = (struct async_flt *)payload;
6801 6800
6802 6801 aflt->flt_erpt_class = error_class;
6803 6802 errorq_dispatch(eqp, payload, payload_sz, flag);
6804 6803 }
6805 6804
6806 6805 /*
6807 6806 * This routine may be called by the IO module, but does not do
6808 6807 * anything in this cpu module. The SERD algorithm is handled by
6809 6808 * cpumem-diagnosis engine instead.
6810 6809 */
6811 6810 /*ARGSUSED*/
6812 6811 void
6813 6812 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6814 6813 {}
6815 6814
6816 6815 void
6817 6816 adjust_hw_copy_limits(int ecache_size)
6818 6817 {
6819 6818 /*
6820 6819 * Set hw copy limits.
6821 6820 *
6822 6821 * /etc/system will be parsed later and can override one or more
6823 6822 * of these settings.
6824 6823 *
6825 6824 * At this time, ecache size seems only mildly relevant.
6826 6825 * We seem to run into issues with the d-cache and stalls
6827 6826 * we see on misses.
6828 6827 *
6829 6828 * Cycle measurement indicates that 2 byte aligned copies fare
6830 6829 * little better than doing things with VIS at around 512 bytes.
6831 6830 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6832 6831 * aligned is faster whenever the source and destination data
6833 6832 * in cache and the total size is less than 2 Kbytes. The 2K
6834 6833 * limit seems to be driven by the 2K write cache.
6835 6834 * When more than 2K of copies are done in non-VIS mode, stores
6836 6835 * backup in the write cache. In VIS mode, the write cache is
6837 6836 * bypassed, allowing faster cache-line writes aligned on cache
6838 6837 * boundaries.
6839 6838 *
6840 6839 * In addition, in non-VIS mode, there is no prefetching, so
6841 6840 * for larger copies, the advantage of prefetching to avoid even
6842 6841 * occasional cache misses is enough to justify using the VIS code.
6843 6842 *
6844 6843 * During testing, it was discovered that netbench ran 3% slower
6845 6844 * when hw_copy_limit_8 was 2K or larger. Apparently for server
6846 6845 * applications, data is only used once (copied to the output
6847 6846 * buffer, then copied by the network device off the system). Using
6848 6847 * the VIS copy saves more L2 cache state. Network copies are
6849 6848 * around 1.3K to 1.5K in size for historical reasons.
6850 6849 *
6851 6850 * Therefore, a limit of 1K bytes will be used for the 8 byte
6852 6851 * aligned copy even for large caches and 8 MB ecache. The
6853 6852 * infrastructure to allow different limits for different sized
6854 6853 * caches is kept to allow further tuning in later releases.
6855 6854 */
6856 6855
6857 6856 if (min_ecache_size == 0 && use_hw_bcopy) {
6858 6857 /*
6859 6858 * First time through - should be before /etc/system
6860 6859 * is read.
6861 6860 * Could skip the checks for zero but this lets us
6862 6861 * preserve any debugger rewrites.
6863 6862 */
6864 6863 if (hw_copy_limit_1 == 0) {
6865 6864 hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6866 6865 priv_hcl_1 = hw_copy_limit_1;
6867 6866 }
6868 6867 if (hw_copy_limit_2 == 0) {
6869 6868 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6870 6869 priv_hcl_2 = hw_copy_limit_2;
6871 6870 }
6872 6871 if (hw_copy_limit_4 == 0) {
6873 6872 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6874 6873 priv_hcl_4 = hw_copy_limit_4;
6875 6874 }
6876 6875 if (hw_copy_limit_8 == 0) {
6877 6876 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6878 6877 priv_hcl_8 = hw_copy_limit_8;
6879 6878 }
6880 6879 min_ecache_size = ecache_size;
6881 6880 } else {
6882 6881 /*
6883 6882 * MP initialization. Called *after* /etc/system has
6884 6883 * been parsed. One CPU has already been initialized.
6885 6884 * Need to cater for /etc/system having scragged one
6886 6885 * of our values.
6887 6886 */
6888 6887 if (ecache_size == min_ecache_size) {
6889 6888 /*
6890 6889 * Same size ecache. We do nothing unless we
6891 6890 * have a pessimistic ecache setting. In that
6892 6891 * case we become more optimistic (if the cache is
6893 6892 * large enough).
6894 6893 */
6895 6894 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6896 6895 /*
6897 6896 * Need to adjust hw_copy_limit* from our
6898 6897 * pessimistic uniprocessor value to a more
6899 6898 * optimistic UP value *iff* it hasn't been
6900 6899 * reset.
6901 6900 */
6902 6901 if ((ecache_size > 1048576) &&
6903 6902 (priv_hcl_8 == hw_copy_limit_8)) {
6904 6903 if (ecache_size <= 2097152)
6905 6904 hw_copy_limit_8 = 4 *
6906 6905 VIS_COPY_THRESHOLD;
6907 6906 else if (ecache_size <= 4194304)
6908 6907 hw_copy_limit_8 = 4 *
6909 6908 VIS_COPY_THRESHOLD;
6910 6909 else
6911 6910 hw_copy_limit_8 = 4 *
6912 6911 VIS_COPY_THRESHOLD;
6913 6912 priv_hcl_8 = hw_copy_limit_8;
6914 6913 }
6915 6914 }
6916 6915 } else if (ecache_size < min_ecache_size) {
6917 6916 /*
6918 6917 * A different ecache size. Can this even happen?
6919 6918 */
6920 6919 if (priv_hcl_8 == hw_copy_limit_8) {
6921 6920 /*
6922 6921 * The previous value that we set
6923 6922 * is unchanged (i.e., it hasn't been
6924 6923 * scragged by /etc/system). Rewrite it.
6925 6924 */
6926 6925 if (ecache_size <= 1048576)
6927 6926 hw_copy_limit_8 = 8 *
6928 6927 VIS_COPY_THRESHOLD;
6929 6928 else if (ecache_size <= 2097152)
6930 6929 hw_copy_limit_8 = 8 *
6931 6930 VIS_COPY_THRESHOLD;
6932 6931 else if (ecache_size <= 4194304)
6933 6932 hw_copy_limit_8 = 8 *
6934 6933 VIS_COPY_THRESHOLD;
6935 6934 else
6936 6935 hw_copy_limit_8 = 10 *
6937 6936 VIS_COPY_THRESHOLD;
6938 6937 priv_hcl_8 = hw_copy_limit_8;
6939 6938 min_ecache_size = ecache_size;
6940 6939 }
6941 6940 }
6942 6941 }
6943 6942 }
6944 6943
6945 6944 /*
6946 6945 * Called from illegal instruction trap handler to see if we can attribute
6947 6946 * the trap to a fpras check.
6948 6947 */
6949 6948 int
6950 6949 fpras_chktrap(struct regs *rp)
6951 6950 {
6952 6951 int op;
6953 6952 struct fpras_chkfngrp *cgp;
6954 6953 uintptr_t tpc = (uintptr_t)rp->r_pc;
6955 6954
6956 6955 if (fpras_chkfngrps == NULL)
6957 6956 return (0);
6958 6957
6959 6958 cgp = &fpras_chkfngrps[CPU->cpu_id];
6960 6959 for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6961 6960 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6962 6961 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6963 6962 break;
6964 6963 }
6965 6964 if (op == FPRAS_NCOPYOPS)
6966 6965 return (0);
6967 6966
6968 6967 /*
6969 6968 * This is an fpRAS failure caught through an illegal
6970 6969 * instruction - trampoline.
6971 6970 */
6972 6971 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6973 6972 rp->r_npc = rp->r_pc + 4;
6974 6973 return (1);
6975 6974 }
6976 6975
6977 6976 /*
6978 6977 * fpras_failure is called when a fpras check detects a bad calculation
6979 6978 * result or an illegal instruction trap is attributed to an fpras
6980 6979 * check. In all cases we are still bound to CPU.
6981 6980 */
6982 6981 int
6983 6982 fpras_failure(int op, int how)
6984 6983 {
6985 6984 int use_hw_bcopy_orig, use_hw_bzero_orig;
6986 6985 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6987 6986 ch_async_flt_t ch_flt;
6988 6987 struct async_flt *aflt = (struct async_flt *)&ch_flt;
6989 6988 struct fpras_chkfn *sfp, *cfp;
6990 6989 uint32_t *sip, *cip;
6991 6990 int i;
6992 6991
6993 6992 /*
6994 6993 * We're running on a sick CPU. Avoid further FPU use at least for
6995 6994 * the time in which we dispatch an ereport and (if applicable) panic.
6996 6995 */
6997 6996 use_hw_bcopy_orig = use_hw_bcopy;
6998 6997 use_hw_bzero_orig = use_hw_bzero;
6999 6998 hcl1_orig = hw_copy_limit_1;
7000 6999 hcl2_orig = hw_copy_limit_2;
7001 7000 hcl4_orig = hw_copy_limit_4;
7002 7001 hcl8_orig = hw_copy_limit_8;
7003 7002 use_hw_bcopy = use_hw_bzero = 0;
7004 7003 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
7005 7004 hw_copy_limit_8 = 0;
7006 7005
7007 7006 bzero(&ch_flt, sizeof (ch_async_flt_t));
7008 7007 aflt->flt_id = gethrtime_waitfree();
7009 7008 aflt->flt_class = CPU_FAULT;
7010 7009 aflt->flt_inst = CPU->cpu_id;
7011 7010 aflt->flt_status = (how << 8) | op;
7012 7011 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
7013 7012 ch_flt.flt_type = CPU_FPUERR;
7014 7013
7015 7014 /*
7016 7015 * We must panic if the copy operation had no lofault protection -
7017 7016 * ie, don't panic for copyin, copyout, kcopy and bcopy called
7018 7017 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
7019 7018 */
7020 7019 aflt->flt_panic = (curthread->t_lofault == NULL);
7021 7020
7022 7021 /*
7023 7022 * XOR the source instruction block with the copied instruction
7024 7023 * block - this will show us which bit(s) are corrupted.
7025 7024 */
7026 7025 sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
7027 7026 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
7028 7027 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
7029 7028 sip = &sfp->fpras_blk0[0];
7030 7029 cip = &cfp->fpras_blk0[0];
7031 7030 } else {
7032 7031 sip = &sfp->fpras_blk1[0];
7033 7032 cip = &cfp->fpras_blk1[0];
7034 7033 }
7035 7034 for (i = 0; i < 16; ++i, ++sip, ++cip)
7036 7035 ch_flt.flt_fpdata[i] = *sip ^ *cip;
7037 7036
7038 7037 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
7039 7038 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
7040 7039
7041 7040 if (aflt->flt_panic)
7042 7041 fm_panic("FPU failure on CPU %d", CPU->cpu_id);
7043 7042
7044 7043 /*
7045 7044 * We get here for copyin/copyout and kcopy or bcopy where the
7046 7045 * caller has used on_fault. We will flag the error so that
7047 7046 * the process may be killed The trap_async_hwerr mechanism will
7048 7047 * take appropriate further action (such as a reboot, contract
7049 7048 * notification etc). Since we may be continuing we will
7050 7049 * restore the global hardware copy acceleration switches.
7051 7050 *
7052 7051 * When we return from this function to the copy function we want to
7053 7052 * avoid potentially bad data being used, ie we want the affected
7054 7053 * copy function to return an error. The caller should therefore
7055 7054 * invoke its lofault handler (which always exists for these functions)
7056 7055 * which will return the appropriate error.
7057 7056 */
7058 7057 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7059 7058 aston(curthread);
7060 7059
7061 7060 use_hw_bcopy = use_hw_bcopy_orig;
7062 7061 use_hw_bzero = use_hw_bzero_orig;
7063 7062 hw_copy_limit_1 = hcl1_orig;
7064 7063 hw_copy_limit_2 = hcl2_orig;
7065 7064 hw_copy_limit_4 = hcl4_orig;
7066 7065 hw_copy_limit_8 = hcl8_orig;
7067 7066
7068 7067 return (1);
7069 7068 }
7070 7069
7071 7070 #define VIS_BLOCKSIZE 64
7072 7071
7073 7072 int
7074 7073 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7075 7074 {
7076 7075 int ret, watched;
7077 7076
7078 7077 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7079 7078 ret = dtrace_blksuword32(addr, data, 0);
7080 7079 if (watched)
7081 7080 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7082 7081
7083 7082 return (ret);
7084 7083 }
7085 7084
7086 7085 /*
7087 7086 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7088 7087 * faulted cpu into that state). Cross-trap to the faulted cpu to clear
7089 7088 * CEEN from the EER to disable traps for further disrupting error types
7090 7089 * on that cpu. We could cross-call instead, but that has a larger
7091 7090 * instruction and data footprint than cross-trapping, and the cpu is known
7092 7091 * to be faulted.
7093 7092 */
7094 7093
7095 7094 void
7096 7095 cpu_faulted_enter(struct cpu *cp)
7097 7096 {
7098 7097 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7099 7098 }
7100 7099
7101 7100 /*
7102 7101 * Called when a cpu leaves the CPU_FAULTED state to return to one of
7103 7102 * offline, spare, or online (by the cpu requesting this state change).
7104 7103 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7105 7104 * disrupting error bits that have accumulated without trapping, then
7106 7105 * we cross-trap to re-enable CEEN controlled traps.
7107 7106 */
7108 7107 void
7109 7108 cpu_faulted_exit(struct cpu *cp)
7110 7109 {
7111 7110 ch_cpu_errors_t cpu_error_regs;
7112 7111
7113 7112 cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7114 7113 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7115 7114 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7116 7115 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7117 7116 (uint64_t)&cpu_error_regs, 0);
7118 7117
7119 7118 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7120 7119 }
7121 7120
7122 7121 /*
7123 7122 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7124 7123 * the errors in the original AFSR, 0 otherwise.
7125 7124 *
7126 7125 * For all procs if the initial error was a BERR or TO, then it is possible
7127 7126 * that we may have caused a secondary BERR or TO in the process of logging the
7128 7127 * inital error via cpu_run_bus_error_handlers(). If this is the case then
7129 7128 * if the request was protected then a panic is still not necessary, if not
7130 7129 * protected then aft_panic is already set - so either way there's no need
7131 7130 * to set aft_panic for the secondary error.
7132 7131 *
7133 7132 * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7134 7133 * a store merge, then the error handling code will call cpu_deferred_error().
7135 7134 * When clear_errors() is called, it will determine that secondary errors have
7136 7135 * occurred - in particular, the store merge also caused a EDU and WDU that
7137 7136 * weren't discovered until this point.
7138 7137 *
7139 7138 * We do three checks to verify that we are in this case. If we pass all three
7140 7139 * checks, we return 1 to indicate that we should not panic. If any unexpected
7141 7140 * errors occur, we return 0.
7142 7141 *
7143 7142 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7144 7143 * handled in cpu_disrupting_errors(). Since this function is not even called
7145 7144 * in the case we are interested in, we just return 0 for these processors.
7146 7145 */
7147 7146 /*ARGSUSED*/
7148 7147 static int
7149 7148 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7150 7149 uint64_t t_afar)
7151 7150 {
7152 7151 #if defined(CHEETAH_PLUS)
7153 7152 #else /* CHEETAH_PLUS */
7154 7153 struct async_flt *aflt = (struct async_flt *)ch_flt;
7155 7154 #endif /* CHEETAH_PLUS */
7156 7155
7157 7156 /*
7158 7157 * Was the original error a BERR or TO and only a BERR or TO
7159 7158 * (multiple errors are also OK)
7160 7159 */
7161 7160 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7162 7161 /*
7163 7162 * Is the new error a BERR or TO and only a BERR or TO
7164 7163 * (multiple errors are also OK)
7165 7164 */
7166 7165 if ((ch_flt->afsr_errs &
7167 7166 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7168 7167 return (1);
7169 7168 }
7170 7169
7171 7170 #if defined(CHEETAH_PLUS)
7172 7171 return (0);
7173 7172 #else /* CHEETAH_PLUS */
7174 7173 /*
7175 7174 * Now look for secondary effects of a UE on cheetah/jalapeno
7176 7175 *
7177 7176 * Check the original error was a UE, and only a UE. Note that
7178 7177 * the ME bit will cause us to fail this check.
7179 7178 */
7180 7179 if (t_afsr_errs != C_AFSR_UE)
7181 7180 return (0);
7182 7181
7183 7182 /*
7184 7183 * Check the secondary errors were exclusively an EDU and/or WDU.
7185 7184 */
7186 7185 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7187 7186 return (0);
7188 7187
7189 7188 /*
7190 7189 * Check the AFAR of the original error and secondary errors
7191 7190 * match to the 64-byte boundary
7192 7191 */
7193 7192 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7194 7193 return (0);
7195 7194
7196 7195 /*
7197 7196 * We've passed all the checks, so it's a secondary error!
7198 7197 */
7199 7198 return (1);
7200 7199 #endif /* CHEETAH_PLUS */
7201 7200 }
7202 7201
7203 7202 /*
7204 7203 * Translate the flt_bit or flt_type into an error type. First, flt_bit
7205 7204 * is checked for any valid errors. If found, the error type is
7206 7205 * returned. If not found, the flt_type is checked for L1$ parity errors.
7207 7206 */
7208 7207 /*ARGSUSED*/
7209 7208 static uint8_t
7210 7209 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7211 7210 {
7212 7211 #if defined(JALAPENO)
7213 7212 /*
7214 7213 * Currently, logging errors to the SC is not supported on Jalapeno
7215 7214 */
7216 7215 return (PLAT_ECC_ERROR2_NONE);
7217 7216 #else
7218 7217 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7219 7218
7220 7219 switch (ch_flt->flt_bit) {
7221 7220 case C_AFSR_CE:
7222 7221 return (PLAT_ECC_ERROR2_CE);
7223 7222 case C_AFSR_UCC:
7224 7223 case C_AFSR_EDC:
7225 7224 case C_AFSR_WDC:
7226 7225 case C_AFSR_CPC:
7227 7226 return (PLAT_ECC_ERROR2_L2_CE);
7228 7227 case C_AFSR_EMC:
7229 7228 return (PLAT_ECC_ERROR2_EMC);
7230 7229 case C_AFSR_IVC:
7231 7230 return (PLAT_ECC_ERROR2_IVC);
7232 7231 case C_AFSR_UE:
7233 7232 return (PLAT_ECC_ERROR2_UE);
7234 7233 case C_AFSR_UCU:
7235 7234 case C_AFSR_EDU:
7236 7235 case C_AFSR_WDU:
7237 7236 case C_AFSR_CPU:
7238 7237 return (PLAT_ECC_ERROR2_L2_UE);
7239 7238 case C_AFSR_IVU:
7240 7239 return (PLAT_ECC_ERROR2_IVU);
7241 7240 case C_AFSR_TO:
7242 7241 return (PLAT_ECC_ERROR2_TO);
7243 7242 case C_AFSR_BERR:
7244 7243 return (PLAT_ECC_ERROR2_BERR);
7245 7244 #if defined(CHEETAH_PLUS)
7246 7245 case C_AFSR_L3_EDC:
7247 7246 case C_AFSR_L3_UCC:
7248 7247 case C_AFSR_L3_CPC:
7249 7248 case C_AFSR_L3_WDC:
7250 7249 return (PLAT_ECC_ERROR2_L3_CE);
7251 7250 case C_AFSR_IMC:
7252 7251 return (PLAT_ECC_ERROR2_IMC);
7253 7252 case C_AFSR_TSCE:
7254 7253 return (PLAT_ECC_ERROR2_L2_TSCE);
7255 7254 case C_AFSR_THCE:
7256 7255 return (PLAT_ECC_ERROR2_L2_THCE);
7257 7256 case C_AFSR_L3_MECC:
7258 7257 return (PLAT_ECC_ERROR2_L3_MECC);
7259 7258 case C_AFSR_L3_THCE:
7260 7259 return (PLAT_ECC_ERROR2_L3_THCE);
7261 7260 case C_AFSR_L3_CPU:
7262 7261 case C_AFSR_L3_EDU:
7263 7262 case C_AFSR_L3_UCU:
7264 7263 case C_AFSR_L3_WDU:
7265 7264 return (PLAT_ECC_ERROR2_L3_UE);
7266 7265 case C_AFSR_DUE:
7267 7266 return (PLAT_ECC_ERROR2_DUE);
7268 7267 case C_AFSR_DTO:
7269 7268 return (PLAT_ECC_ERROR2_DTO);
7270 7269 case C_AFSR_DBERR:
7271 7270 return (PLAT_ECC_ERROR2_DBERR);
7272 7271 #endif /* CHEETAH_PLUS */
7273 7272 default:
7274 7273 switch (ch_flt->flt_type) {
7275 7274 #if defined(CPU_IMP_L1_CACHE_PARITY)
7276 7275 case CPU_IC_PARITY:
7277 7276 return (PLAT_ECC_ERROR2_IPE);
7278 7277 case CPU_DC_PARITY:
7279 7278 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7280 7279 if (ch_flt->parity_data.dpe.cpl_cache ==
7281 7280 CPU_PC_PARITY) {
7282 7281 return (PLAT_ECC_ERROR2_PCACHE);
7283 7282 }
7284 7283 }
7285 7284 return (PLAT_ECC_ERROR2_DPE);
7286 7285 #endif /* CPU_IMP_L1_CACHE_PARITY */
7287 7286 case CPU_ITLB_PARITY:
7288 7287 return (PLAT_ECC_ERROR2_ITLB);
7289 7288 case CPU_DTLB_PARITY:
7290 7289 return (PLAT_ECC_ERROR2_DTLB);
7291 7290 default:
7292 7291 return (PLAT_ECC_ERROR2_NONE);
7293 7292 }
7294 7293 }
7295 7294 #endif /* JALAPENO */
7296 7295 }
↓ open down ↓ |
1122 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX