Print this page
4664 CPU->cpu_pri_data hasn't been used for years
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/mp_startup.c
+++ new/usr/src/uts/i86pc/os/mp_startup.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
31 31 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
32 32 */
33 33
34 34 #include <sys/types.h>
35 35 #include <sys/thread.h>
36 36 #include <sys/cpuvar.h>
37 37 #include <sys/cpu.h>
38 38 #include <sys/t_lock.h>
39 39 #include <sys/param.h>
40 40 #include <sys/proc.h>
41 41 #include <sys/disp.h>
42 42 #include <sys/class.h>
43 43 #include <sys/cmn_err.h>
44 44 #include <sys/debug.h>
45 45 #include <sys/note.h>
46 46 #include <sys/asm_linkage.h>
47 47 #include <sys/x_call.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/var.h>
50 50 #include <sys/vtrace.h>
51 51 #include <vm/hat.h>
52 52 #include <vm/as.h>
53 53 #include <vm/seg_kmem.h>
54 54 #include <vm/seg_kp.h>
55 55 #include <sys/segments.h>
56 56 #include <sys/kmem.h>
57 57 #include <sys/stack.h>
58 58 #include <sys/smp_impldefs.h>
59 59 #include <sys/x86_archext.h>
60 60 #include <sys/machsystm.h>
61 61 #include <sys/traptrace.h>
62 62 #include <sys/clock.h>
63 63 #include <sys/cpc_impl.h>
64 64 #include <sys/pg.h>
65 65 #include <sys/cmt.h>
66 66 #include <sys/dtrace.h>
67 67 #include <sys/archsystm.h>
68 68 #include <sys/fp.h>
69 69 #include <sys/reboot.h>
70 70 #include <sys/kdi_machimpl.h>
71 71 #include <vm/hat_i86.h>
72 72 #include <vm/vm_dep.h>
73 73 #include <sys/memnode.h>
74 74 #include <sys/pci_cfgspace.h>
75 75 #include <sys/mach_mmu.h>
76 76 #include <sys/sysmacros.h>
77 77 #if defined(__xpv)
78 78 #include <sys/hypervisor.h>
79 79 #endif
80 80 #include <sys/cpu_module.h>
81 81 #include <sys/ontrap.h>
82 82
83 83 struct cpu cpus[1]; /* CPU data */
84 84 struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */
85 85 struct cpu *cpu_free_list; /* list for released CPUs */
86 86 cpu_core_t cpu_core[NCPU]; /* cpu_core structures */
87 87
88 88 #define cpu_next_free cpu_prev
89 89
90 90 /*
91 91 * Useful for disabling MP bring-up on a MP capable system.
92 92 */
93 93 int use_mp = 1;
94 94
95 95 /*
96 96 * to be set by a PSM to indicate what cpus
97 97 * are sitting around on the system.
98 98 */
99 99 cpuset_t mp_cpus;
100 100
101 101 /*
102 102 * This variable is used by the hat layer to decide whether or not
103 103 * critical sections are needed to prevent race conditions. For sun4m,
104 104 * this variable is set once enough MP initialization has been done in
105 105 * order to allow cross calls.
106 106 */
107 107 int flushes_require_xcalls;
108 108
109 109 cpuset_t cpu_ready_set; /* initialized in startup() */
110 110
111 111 static void mp_startup_boot(void);
112 112 static void mp_startup_hotplug(void);
113 113
114 114 static void cpu_sep_enable(void);
115 115 static void cpu_sep_disable(void);
116 116 static void cpu_asysc_enable(void);
117 117 static void cpu_asysc_disable(void);
118 118
119 119 /*
120 120 * Init CPU info - get CPU type info for processor_info system call.
121 121 */
122 122 void
123 123 init_cpu_info(struct cpu *cp)
124 124 {
125 125 processor_info_t *pi = &cp->cpu_type_info;
126 126
127 127 /*
128 128 * Get clock-frequency property for the CPU.
129 129 */
130 130 pi->pi_clock = cpu_freq;
131 131
132 132 /*
133 133 * Current frequency in Hz.
134 134 */
135 135 cp->cpu_curr_clock = cpu_freq_hz;
136 136
137 137 /*
138 138 * Supported frequencies.
139 139 */
140 140 if (cp->cpu_supp_freqs == NULL) {
141 141 cpu_set_supp_freqs(cp, NULL);
142 142 }
143 143
144 144 (void) strcpy(pi->pi_processor_type, "i386");
145 145 if (fpu_exists)
146 146 (void) strcpy(pi->pi_fputypes, "i387 compatible");
147 147
148 148 cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
149 149 cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
150 150
151 151 /*
152 152 * If called for the BSP, cp is equal to current CPU.
153 153 * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info
154 154 * of current CPU as default values for cpu_idstr and cpu_brandstr.
155 155 * They will be corrected in mp_startup_common() after cpuid_pass1()
156 156 * has been invoked on target CPU.
157 157 */
158 158 (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN);
159 159 (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN);
160 160 }
161 161
162 162 /*
163 163 * Configure syscall support on this CPU.
164 164 */
165 165 /*ARGSUSED*/
166 166 void
167 167 init_cpu_syscall(struct cpu *cp)
168 168 {
169 169 kpreempt_disable();
170 170
171 171 #if defined(__amd64)
172 172 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
173 173 is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
174 174
175 175 #if !defined(__lint)
176 176 /*
177 177 * The syscall instruction imposes a certain ordering on
178 178 * segment selectors, so we double-check that ordering
179 179 * here.
180 180 */
181 181 ASSERT(KDS_SEL == KCS_SEL + 8);
182 182 ASSERT(UDS_SEL == U32CS_SEL + 8);
183 183 ASSERT(UCS_SEL == U32CS_SEL + 16);
184 184 #endif
185 185 /*
186 186 * Turn syscall/sysret extensions on.
187 187 */
188 188 cpu_asysc_enable();
189 189
190 190 /*
191 191 * Program the magic registers ..
192 192 */
193 193 wrmsr(MSR_AMD_STAR,
194 194 ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32);
195 195 wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
196 196 wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
197 197
198 198 /*
199 199 * This list of flags is masked off the incoming
200 200 * %rfl when we enter the kernel.
201 201 */
202 202 wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T));
203 203 }
204 204 #endif
205 205
206 206 /*
207 207 * On 32-bit kernels, we use sysenter/sysexit because it's too
208 208 * hard to use syscall/sysret, and it is more portable anyway.
209 209 *
210 210 * On 64-bit kernels on Nocona machines, the 32-bit syscall
211 211 * variant isn't available to 32-bit applications, but sysenter is.
212 212 */
213 213 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
214 214 is_x86_feature(x86_featureset, X86FSET_SEP)) {
215 215
216 216 #if !defined(__lint)
217 217 /*
218 218 * The sysenter instruction imposes a certain ordering on
219 219 * segment selectors, so we double-check that ordering
220 220 * here. See "sysenter" in Intel document 245471-012, "IA-32
221 221 * Intel Architecture Software Developer's Manual Volume 2:
222 222 * Instruction Set Reference"
223 223 */
224 224 ASSERT(KDS_SEL == KCS_SEL + 8);
225 225
226 226 ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
227 227 ASSERT32(UDS_SEL == UCS_SEL + 8);
228 228
229 229 ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
230 230 ASSERT64(UDS_SEL == U32CS_SEL + 8);
231 231 #endif
232 232
233 233 cpu_sep_enable();
234 234
235 235 /*
236 236 * resume() sets this value to the base of the threads stack
237 237 * via a context handler.
238 238 */
239 239 wrmsr(MSR_INTC_SEP_ESP, 0);
240 240 wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
241 241 }
242 242
243 243 kpreempt_enable();
244 244 }
245 245
246 246 /*
247 247 * Multiprocessor initialization.
248 248 *
249 249 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
250 250 * startup and idle threads for the specified CPU.
251 251 * Parameter boot is true for boot time operations and is false for CPU
252 252 * DR operations.
253 253 */
254 254 static struct cpu *
255 255 mp_cpu_configure_common(int cpun, boolean_t boot)
256 256 {
257 257 struct cpu *cp;
258 258 kthread_id_t tp;
259 259 caddr_t sp;
260 260 proc_t *procp;
261 261 #if !defined(__xpv)
262 262 extern int idle_cpu_prefer_mwait;
263 263 extern void cpu_idle_mwait();
264 264 #endif
265 265 extern void idle();
266 266 extern void cpu_idle();
267 267
268 268 #ifdef TRAPTRACE
269 269 trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
270 270 #endif
271 271
272 272 ASSERT(MUTEX_HELD(&cpu_lock));
273 273 ASSERT(cpun < NCPU && cpu[cpun] == NULL);
274 274
275 275 if (cpu_free_list == NULL) {
276 276 cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
277 277 } else {
278 278 cp = cpu_free_list;
279 279 cpu_free_list = cp->cpu_next_free;
280 280 }
281 281
282 282 cp->cpu_m.mcpu_istamp = cpun << 16;
283 283
284 284 /* Create per CPU specific threads in the process p0. */
285 285 procp = &p0;
286 286
287 287 /*
288 288 * Initialize the dispatcher first.
289 289 */
290 290 disp_cpu_init(cp);
291 291
292 292 cpu_vm_data_init(cp);
293 293
294 294 /*
295 295 * Allocate and initialize the startup thread for this CPU.
296 296 * Interrupt and process switch stacks get allocated later
297 297 * when the CPU starts running.
298 298 */
299 299 tp = thread_create(NULL, 0, NULL, NULL, 0, procp,
300 300 TS_STOPPED, maxclsyspri);
301 301
302 302 /*
303 303 * Set state to TS_ONPROC since this thread will start running
304 304 * as soon as the CPU comes online.
305 305 *
306 306 * All the other fields of the thread structure are setup by
307 307 * thread_create().
308 308 */
309 309 THREAD_ONPROC(tp, cp);
310 310 tp->t_preempt = 1;
311 311 tp->t_bound_cpu = cp;
312 312 tp->t_affinitycnt = 1;
313 313 tp->t_cpu = cp;
314 314 tp->t_disp_queue = cp->cpu_disp;
315 315
316 316 /*
317 317 * Setup thread to start in mp_startup_common.
318 318 */
319 319 sp = tp->t_stk;
320 320 tp->t_sp = (uintptr_t)(sp - MINFRAME);
321 321 #if defined(__amd64)
322 322 tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */
323 323 #endif
324 324 /*
325 325 * Setup thread start entry point for boot or hotplug.
326 326 */
327 327 if (boot) {
328 328 tp->t_pc = (uintptr_t)mp_startup_boot;
329 329 } else {
330 330 tp->t_pc = (uintptr_t)mp_startup_hotplug;
331 331 }
332 332
333 333 cp->cpu_id = cpun;
334 334 cp->cpu_self = cp;
335 335 cp->cpu_thread = tp;
336 336 cp->cpu_lwp = NULL;
337 337 cp->cpu_dispthread = tp;
338 338 cp->cpu_dispatch_pri = DISP_PRIO(tp);
339 339
340 340 /*
341 341 * cpu_base_spl must be set explicitly here to prevent any blocking
342 342 * operations in mp_startup_common from causing the spl of the cpu
343 343 * to drop to 0 (allowing device interrupts before we're ready) in
344 344 * resume().
345 345 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
346 346 * As an extra bit of security on DEBUG kernels, this is enforced with
347 347 * an assertion in mp_startup_common() -- before cpu_base_spl is set
348 348 * to its proper value.
349 349 */
350 350 cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
351 351
352 352 /*
353 353 * Now, initialize per-CPU idle thread for this CPU.
354 354 */
355 355 tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1);
356 356
357 357 cp->cpu_idle_thread = tp;
358 358
359 359 tp->t_preempt = 1;
360 360 tp->t_bound_cpu = cp;
361 361 tp->t_affinitycnt = 1;
362 362 tp->t_cpu = cp;
363 363 tp->t_disp_queue = cp->cpu_disp;
364 364
365 365 /*
366 366 * Bootstrap the CPU's PG data
367 367 */
368 368 pg_cpu_bootstrap(cp);
369 369
370 370 /*
371 371 * Perform CPC initialization on the new CPU.
372 372 */
373 373 kcpc_hw_init(cp);
374 374
375 375 /*
376 376 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2
377 377 * for each CPU.
378 378 */
379 379 setup_vaddr_for_ppcopy(cp);
380 380
381 381 /*
382 382 * Allocate page for new GDT and initialize from current GDT.
383 383 */
384 384 #if !defined(__lint)
385 385 ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE);
386 386 #endif
387 387 cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP);
388 388 bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT));
389 389
390 390 #if defined(__i386)
391 391 /*
392 392 * setup kernel %gs.
393 393 */
394 394 set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
395 395 SEL_KPL, 0, 1);
396 396 #endif
397 397
398 398 /*
399 399 * If we have more than one node, each cpu gets a copy of IDT
400 400 * local to its node. If this is a Pentium box, we use cpu 0's
401 401 * IDT. cpu 0's IDT has been made read-only to workaround the
402 402 * cmpxchgl register bug
403 403 */
404 404 if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
↓ open down ↓ |
404 lines elided |
↑ open up ↑ |
405 405 #if !defined(__lint)
406 406 ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE);
407 407 #endif
408 408 cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP);
409 409 bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE);
410 410 } else {
411 411 cp->cpu_idt = CPU->cpu_idt;
412 412 }
413 413
414 414 /*
415 - * Get interrupt priority data from cpu 0.
416 - */
417 - cp->cpu_pri_data = CPU->cpu_pri_data;
418 -
419 - /*
420 415 * alloc space for cpuid info
421 416 */
422 417 cpuid_alloc_space(cp);
423 418 #if !defined(__xpv)
424 419 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) &&
425 420 idle_cpu_prefer_mwait) {
426 421 cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
427 422 cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
428 423 } else
429 424 #endif
430 425 cp->cpu_m.mcpu_idle_cpu = cpu_idle;
431 426
432 427 init_cpu_info(cp);
433 428
434 429 /*
435 430 * alloc space for ucode_info
436 431 */
437 432 ucode_alloc_space(cp);
438 433 xc_init_cpu(cp);
439 434 hat_cpu_online(cp);
440 435
441 436 #ifdef TRAPTRACE
442 437 /*
443 438 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers
444 439 */
445 440 ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP);
446 441 ttc->ttc_next = ttc->ttc_first;
447 442 ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
448 443 #endif
449 444
450 445 /*
451 446 * Record that we have another CPU.
452 447 */
453 448 /*
454 449 * Initialize the interrupt threads for this CPU
455 450 */
456 451 cpu_intr_alloc(cp, NINTR_THREADS);
457 452
458 453 cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
459 454 cpu_set_state(cp);
460 455
461 456 /*
462 457 * Add CPU to list of available CPUs. It'll be on the active list
463 458 * after mp_startup_common().
464 459 */
465 460 cpu_add_unit(cp);
466 461
467 462 return (cp);
468 463 }
469 464
470 465 /*
471 466 * Undo what was done in mp_cpu_configure_common
472 467 */
473 468 static void
474 469 mp_cpu_unconfigure_common(struct cpu *cp, int error)
475 470 {
476 471 ASSERT(MUTEX_HELD(&cpu_lock));
477 472
478 473 /*
479 474 * Remove the CPU from the list of available CPUs.
480 475 */
481 476 cpu_del_unit(cp->cpu_id);
482 477
483 478 if (error == ETIMEDOUT) {
484 479 /*
485 480 * The cpu was started, but never *seemed* to run any
486 481 * code in the kernel; it's probably off spinning in its
487 482 * own private world, though with potential references to
488 483 * our kmem-allocated IDTs and GDTs (for example).
489 484 *
490 485 * Worse still, it may actually wake up some time later,
491 486 * so rather than guess what it might or might not do, we
492 487 * leave the fundamental data structures intact.
493 488 */
494 489 cp->cpu_flags = 0;
495 490 return;
496 491 }
497 492
498 493 /*
499 494 * At this point, the only threads bound to this CPU should
500 495 * special per-cpu threads: it's idle thread, it's pause threads,
501 496 * and it's interrupt threads. Clean these up.
502 497 */
503 498 cpu_destroy_bound_threads(cp);
504 499 cp->cpu_idle_thread = NULL;
505 500
506 501 /*
507 502 * Free the interrupt stack.
508 503 */
509 504 segkp_release(segkp,
510 505 cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME)));
511 506 cp->cpu_intr_stack = NULL;
512 507
513 508 #ifdef TRAPTRACE
514 509 /*
515 510 * Discard the trap trace buffer
516 511 */
517 512 {
518 513 trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id];
519 514
520 515 kmem_free((void *)ttc->ttc_first, trap_trace_bufsize);
521 516 ttc->ttc_first = NULL;
522 517 }
523 518 #endif
524 519
525 520 hat_cpu_offline(cp);
526 521
527 522 ucode_free_space(cp);
528 523
529 524 /* Free CPU ID string and brand string. */
530 525 if (cp->cpu_idstr) {
531 526 kmem_free(cp->cpu_idstr, CPU_IDSTRLEN);
532 527 cp->cpu_idstr = NULL;
533 528 }
534 529 if (cp->cpu_brandstr) {
535 530 kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN);
536 531 cp->cpu_brandstr = NULL;
537 532 }
538 533
539 534 #if !defined(__xpv)
540 535 if (cp->cpu_m.mcpu_mwait != NULL) {
541 536 cpuid_mwait_free(cp);
542 537 cp->cpu_m.mcpu_mwait = NULL;
543 538 }
544 539 #endif
545 540 cpuid_free_space(cp);
546 541
547 542 if (cp->cpu_idt != CPU->cpu_idt)
548 543 kmem_free(cp->cpu_idt, PAGESIZE);
549 544 cp->cpu_idt = NULL;
550 545
551 546 kmem_free(cp->cpu_gdt, PAGESIZE);
552 547 cp->cpu_gdt = NULL;
553 548
554 549 if (cp->cpu_supp_freqs != NULL) {
555 550 size_t len = strlen(cp->cpu_supp_freqs) + 1;
556 551 kmem_free(cp->cpu_supp_freqs, len);
557 552 cp->cpu_supp_freqs = NULL;
558 553 }
559 554
560 555 teardown_vaddr_for_ppcopy(cp);
561 556
562 557 kcpc_hw_fini(cp);
563 558
564 559 cp->cpu_dispthread = NULL;
565 560 cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */
566 561
567 562 cpu_vm_data_destroy(cp);
568 563
569 564 xc_fini_cpu(cp);
570 565 disp_cpu_fini(cp);
571 566
572 567 ASSERT(cp != CPU0);
573 568 bzero(cp, sizeof (*cp));
574 569 cp->cpu_next_free = cpu_free_list;
575 570 cpu_free_list = cp;
576 571 }
577 572
578 573 /*
579 574 * Apply workarounds for known errata, and warn about those that are absent.
580 575 *
581 576 * System vendors occasionally create configurations which contain different
582 577 * revisions of the CPUs that are almost but not exactly the same. At the
583 578 * time of writing, this meant that their clock rates were the same, their
584 579 * feature sets were the same, but the required workaround were -not-
585 580 * necessarily the same. So, this routine is invoked on -every- CPU soon
586 581 * after starting to make sure that the resulting system contains the most
587 582 * pessimal set of workarounds needed to cope with *any* of the CPUs in the
588 583 * system.
589 584 *
590 585 * workaround_errata is invoked early in mlsetup() for CPU 0, and in
591 586 * mp_startup_common() for all slave CPUs. Slaves process workaround_errata
592 587 * prior to acknowledging their readiness to the master, so this routine will
593 588 * never be executed by multiple CPUs in parallel, thus making updates to
594 589 * global data safe.
595 590 *
596 591 * These workarounds are based on Rev 3.57 of the Revision Guide for
597 592 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005.
598 593 */
599 594
600 595 #if defined(OPTERON_ERRATUM_88)
601 596 int opteron_erratum_88; /* if non-zero -> at least one cpu has it */
602 597 #endif
603 598
604 599 #if defined(OPTERON_ERRATUM_91)
605 600 int opteron_erratum_91; /* if non-zero -> at least one cpu has it */
606 601 #endif
607 602
608 603 #if defined(OPTERON_ERRATUM_93)
609 604 int opteron_erratum_93; /* if non-zero -> at least one cpu has it */
610 605 #endif
611 606
612 607 #if defined(OPTERON_ERRATUM_95)
613 608 int opteron_erratum_95; /* if non-zero -> at least one cpu has it */
614 609 #endif
615 610
616 611 #if defined(OPTERON_ERRATUM_100)
617 612 int opteron_erratum_100; /* if non-zero -> at least one cpu has it */
618 613 #endif
619 614
620 615 #if defined(OPTERON_ERRATUM_108)
621 616 int opteron_erratum_108; /* if non-zero -> at least one cpu has it */
622 617 #endif
623 618
624 619 #if defined(OPTERON_ERRATUM_109)
625 620 int opteron_erratum_109; /* if non-zero -> at least one cpu has it */
626 621 #endif
627 622
628 623 #if defined(OPTERON_ERRATUM_121)
629 624 int opteron_erratum_121; /* if non-zero -> at least one cpu has it */
630 625 #endif
631 626
632 627 #if defined(OPTERON_ERRATUM_122)
633 628 int opteron_erratum_122; /* if non-zero -> at least one cpu has it */
634 629 #endif
635 630
636 631 #if defined(OPTERON_ERRATUM_123)
637 632 int opteron_erratum_123; /* if non-zero -> at least one cpu has it */
638 633 #endif
639 634
640 635 #if defined(OPTERON_ERRATUM_131)
641 636 int opteron_erratum_131; /* if non-zero -> at least one cpu has it */
642 637 #endif
643 638
644 639 #if defined(OPTERON_WORKAROUND_6336786)
645 640 int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */
646 641 int opteron_workaround_6336786_UP = 0; /* Not needed for UP */
647 642 #endif
648 643
649 644 #if defined(OPTERON_WORKAROUND_6323525)
650 645 int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */
651 646 #endif
652 647
653 648 #if defined(OPTERON_ERRATUM_298)
654 649 int opteron_erratum_298;
655 650 #endif
656 651
657 652 #if defined(OPTERON_ERRATUM_721)
658 653 int opteron_erratum_721;
659 654 #endif
660 655
661 656 static void
662 657 workaround_warning(cpu_t *cp, uint_t erratum)
663 658 {
664 659 cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u",
665 660 cp->cpu_id, erratum);
666 661 }
667 662
668 663 static void
669 664 workaround_applied(uint_t erratum)
670 665 {
671 666 if (erratum > 1000000)
672 667 cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n",
673 668 erratum);
674 669 else
675 670 cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n",
676 671 erratum);
677 672 }
678 673
679 674 static void
680 675 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error)
681 676 {
682 677 cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d",
683 678 cp->cpu_id, rw, msr, error);
684 679 }
685 680
686 681 /*
687 682 * Determine the number of nodes in a Hammer / Greyhound / Griffin family
688 683 * system.
689 684 */
690 685 static uint_t
691 686 opteron_get_nnodes(void)
692 687 {
693 688 static uint_t nnodes = 0;
694 689
695 690 if (nnodes == 0) {
696 691 #ifdef DEBUG
697 692 uint_t family;
698 693
699 694 /*
700 695 * This routine uses a PCI config space based mechanism
701 696 * for retrieving the number of nodes in the system.
702 697 * Device 24, function 0, offset 0x60 as used here is not
703 698 * AMD processor architectural, and may not work on processor
704 699 * families other than those listed below.
705 700 *
706 701 * Callers of this routine must ensure that we're running on
707 702 * a processor which supports this mechanism.
708 703 * The assertion below is meant to catch calls on unsupported
709 704 * processors.
710 705 */
711 706 family = cpuid_getfamily(CPU);
712 707 ASSERT(family == 0xf || family == 0x10 || family == 0x11);
713 708 #endif /* DEBUG */
714 709
715 710 /*
716 711 * Obtain the number of nodes in the system from
717 712 * bits [6:4] of the Node ID register on node 0.
718 713 *
719 714 * The actual node count is NodeID[6:4] + 1
720 715 *
721 716 * The Node ID register is accessed via function 0,
722 717 * offset 0x60. Node 0 is device 24.
723 718 */
724 719 nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1;
725 720 }
726 721 return (nnodes);
727 722 }
728 723
729 724 uint_t
730 725 do_erratum_298(struct cpu *cpu)
731 726 {
732 727 static int osvwrc = -3;
733 728 extern int osvw_opteron_erratum(cpu_t *, uint_t);
734 729
735 730 /*
736 731 * L2 Eviction May Occur During Processor Operation To Set
737 732 * Accessed or Dirty Bit.
738 733 */
739 734 if (osvwrc == -3) {
740 735 osvwrc = osvw_opteron_erratum(cpu, 298);
741 736 } else {
742 737 /* osvw return codes should be consistent for all cpus */
743 738 ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298));
744 739 }
745 740
746 741 switch (osvwrc) {
747 742 case 0: /* erratum is not present: do nothing */
748 743 break;
749 744 case 1: /* erratum is present: BIOS workaround applied */
750 745 /*
751 746 * check if workaround is actually in place and issue warning
752 747 * if not.
753 748 */
754 749 if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) ||
755 750 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) {
756 751 #if defined(OPTERON_ERRATUM_298)
757 752 opteron_erratum_298++;
758 753 #else
759 754 workaround_warning(cpu, 298);
760 755 return (1);
761 756 #endif
762 757 }
763 758 break;
764 759 case -1: /* cannot determine via osvw: check cpuid */
765 760 if ((cpuid_opteron_erratum(cpu, 298) > 0) &&
766 761 (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) ||
767 762 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) {
768 763 #if defined(OPTERON_ERRATUM_298)
769 764 opteron_erratum_298++;
770 765 #else
771 766 workaround_warning(cpu, 298);
772 767 return (1);
773 768 #endif
774 769 }
775 770 break;
776 771 }
777 772 return (0);
778 773 }
779 774
780 775 uint_t
781 776 workaround_errata(struct cpu *cpu)
782 777 {
783 778 uint_t missing = 0;
784 779
785 780 ASSERT(cpu == CPU);
786 781
787 782 /*LINTED*/
788 783 if (cpuid_opteron_erratum(cpu, 88) > 0) {
789 784 /*
790 785 * SWAPGS May Fail To Read Correct GS Base
791 786 */
792 787 #if defined(OPTERON_ERRATUM_88)
793 788 /*
794 789 * The workaround is an mfence in the relevant assembler code
795 790 */
796 791 opteron_erratum_88++;
797 792 #else
798 793 workaround_warning(cpu, 88);
799 794 missing++;
800 795 #endif
801 796 }
802 797
803 798 if (cpuid_opteron_erratum(cpu, 91) > 0) {
804 799 /*
805 800 * Software Prefetches May Report A Page Fault
806 801 */
807 802 #if defined(OPTERON_ERRATUM_91)
808 803 /*
809 804 * fix is in trap.c
810 805 */
811 806 opteron_erratum_91++;
812 807 #else
813 808 workaround_warning(cpu, 91);
814 809 missing++;
815 810 #endif
816 811 }
817 812
818 813 if (cpuid_opteron_erratum(cpu, 93) > 0) {
819 814 /*
820 815 * RSM Auto-Halt Restart Returns to Incorrect RIP
821 816 */
822 817 #if defined(OPTERON_ERRATUM_93)
823 818 /*
824 819 * fix is in trap.c
825 820 */
826 821 opteron_erratum_93++;
827 822 #else
828 823 workaround_warning(cpu, 93);
829 824 missing++;
830 825 #endif
831 826 }
832 827
833 828 /*LINTED*/
834 829 if (cpuid_opteron_erratum(cpu, 95) > 0) {
835 830 /*
836 831 * RET Instruction May Return to Incorrect EIP
837 832 */
838 833 #if defined(OPTERON_ERRATUM_95)
839 834 #if defined(_LP64)
840 835 /*
841 836 * Workaround this by ensuring that 32-bit user code and
842 837 * 64-bit kernel code never occupy the same address
843 838 * range mod 4G.
844 839 */
845 840 if (_userlimit32 > 0xc0000000ul)
846 841 *(uintptr_t *)&_userlimit32 = 0xc0000000ul;
847 842
848 843 /*LINTED*/
849 844 ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u);
850 845 opteron_erratum_95++;
851 846 #endif /* _LP64 */
852 847 #else
853 848 workaround_warning(cpu, 95);
854 849 missing++;
855 850 #endif
856 851 }
857 852
858 853 if (cpuid_opteron_erratum(cpu, 100) > 0) {
859 854 /*
860 855 * Compatibility Mode Branches Transfer to Illegal Address
861 856 */
862 857 #if defined(OPTERON_ERRATUM_100)
863 858 /*
864 859 * fix is in trap.c
865 860 */
866 861 opteron_erratum_100++;
867 862 #else
868 863 workaround_warning(cpu, 100);
869 864 missing++;
870 865 #endif
871 866 }
872 867
873 868 /*LINTED*/
874 869 if (cpuid_opteron_erratum(cpu, 108) > 0) {
875 870 /*
876 871 * CPUID Instruction May Return Incorrect Model Number In
877 872 * Some Processors
878 873 */
879 874 #if defined(OPTERON_ERRATUM_108)
880 875 /*
881 876 * (Our cpuid-handling code corrects the model number on
882 877 * those processors)
883 878 */
884 879 #else
885 880 workaround_warning(cpu, 108);
886 881 missing++;
887 882 #endif
888 883 }
889 884
890 885 /*LINTED*/
891 886 if (cpuid_opteron_erratum(cpu, 109) > 0) do {
892 887 /*
893 888 * Certain Reverse REP MOVS May Produce Unpredictable Behavior
894 889 */
895 890 #if defined(OPTERON_ERRATUM_109)
896 891 /*
897 892 * The "workaround" is to print a warning to upgrade the BIOS
898 893 */
899 894 uint64_t value;
900 895 const uint_t msr = MSR_AMD_PATCHLEVEL;
901 896 int err;
902 897
903 898 if ((err = checked_rdmsr(msr, &value)) != 0) {
904 899 msr_warning(cpu, "rd", msr, err);
905 900 workaround_warning(cpu, 109);
906 901 missing++;
907 902 }
908 903 if (value == 0)
909 904 opteron_erratum_109++;
910 905 #else
911 906 workaround_warning(cpu, 109);
912 907 missing++;
913 908 #endif
914 909 /*CONSTANTCONDITION*/
915 910 } while (0);
916 911
917 912 /*LINTED*/
918 913 if (cpuid_opteron_erratum(cpu, 121) > 0) {
919 914 /*
920 915 * Sequential Execution Across Non_Canonical Boundary Caused
921 916 * Processor Hang
922 917 */
923 918 #if defined(OPTERON_ERRATUM_121)
924 919 #if defined(_LP64)
925 920 /*
926 921 * Erratum 121 is only present in long (64 bit) mode.
927 922 * Workaround is to include the page immediately before the
928 923 * va hole to eliminate the possibility of system hangs due to
929 924 * sequential execution across the va hole boundary.
930 925 */
931 926 if (opteron_erratum_121)
932 927 opteron_erratum_121++;
933 928 else {
934 929 if (hole_start) {
935 930 hole_start -= PAGESIZE;
936 931 } else {
937 932 /*
938 933 * hole_start not yet initialized by
939 934 * mmu_init. Initialize hole_start
940 935 * with value to be subtracted.
941 936 */
942 937 hole_start = PAGESIZE;
943 938 }
944 939 opteron_erratum_121++;
945 940 }
946 941 #endif /* _LP64 */
947 942 #else
948 943 workaround_warning(cpu, 121);
949 944 missing++;
950 945 #endif
951 946 }
952 947
953 948 /*LINTED*/
954 949 if (cpuid_opteron_erratum(cpu, 122) > 0) do {
955 950 /*
956 951 * TLB Flush Filter May Cause Coherency Problem in
957 952 * Multiprocessor Systems
958 953 */
959 954 #if defined(OPTERON_ERRATUM_122)
960 955 uint64_t value;
961 956 const uint_t msr = MSR_AMD_HWCR;
962 957 int error;
963 958
964 959 /*
965 960 * Erratum 122 is only present in MP configurations (multi-core
966 961 * or multi-processor).
967 962 */
968 963 #if defined(__xpv)
969 964 if (!DOMAIN_IS_INITDOMAIN(xen_info))
970 965 break;
971 966 if (!opteron_erratum_122 && xpv_nr_phys_cpus() == 1)
972 967 break;
973 968 #else
974 969 if (!opteron_erratum_122 && opteron_get_nnodes() == 1 &&
975 970 cpuid_get_ncpu_per_chip(cpu) == 1)
976 971 break;
977 972 #endif
978 973 /* disable TLB Flush Filter */
979 974
980 975 if ((error = checked_rdmsr(msr, &value)) != 0) {
981 976 msr_warning(cpu, "rd", msr, error);
982 977 workaround_warning(cpu, 122);
983 978 missing++;
984 979 } else {
985 980 value |= (uint64_t)AMD_HWCR_FFDIS;
986 981 if ((error = checked_wrmsr(msr, value)) != 0) {
987 982 msr_warning(cpu, "wr", msr, error);
988 983 workaround_warning(cpu, 122);
989 984 missing++;
990 985 }
991 986 }
992 987 opteron_erratum_122++;
993 988 #else
994 989 workaround_warning(cpu, 122);
995 990 missing++;
996 991 #endif
997 992 /*CONSTANTCONDITION*/
998 993 } while (0);
999 994
1000 995 /*LINTED*/
1001 996 if (cpuid_opteron_erratum(cpu, 123) > 0) do {
1002 997 /*
1003 998 * Bypassed Reads May Cause Data Corruption of System Hang in
1004 999 * Dual Core Processors
1005 1000 */
1006 1001 #if defined(OPTERON_ERRATUM_123)
1007 1002 uint64_t value;
1008 1003 const uint_t msr = MSR_AMD_PATCHLEVEL;
1009 1004 int err;
1010 1005
1011 1006 /*
1012 1007 * Erratum 123 applies only to multi-core cpus.
1013 1008 */
1014 1009 if (cpuid_get_ncpu_per_chip(cpu) < 2)
1015 1010 break;
1016 1011 #if defined(__xpv)
1017 1012 if (!DOMAIN_IS_INITDOMAIN(xen_info))
1018 1013 break;
1019 1014 #endif
1020 1015 /*
1021 1016 * The "workaround" is to print a warning to upgrade the BIOS
1022 1017 */
1023 1018 if ((err = checked_rdmsr(msr, &value)) != 0) {
1024 1019 msr_warning(cpu, "rd", msr, err);
1025 1020 workaround_warning(cpu, 123);
1026 1021 missing++;
1027 1022 }
1028 1023 if (value == 0)
1029 1024 opteron_erratum_123++;
1030 1025 #else
1031 1026 workaround_warning(cpu, 123);
1032 1027 missing++;
1033 1028
1034 1029 #endif
1035 1030 /*CONSTANTCONDITION*/
1036 1031 } while (0);
1037 1032
1038 1033 /*LINTED*/
1039 1034 if (cpuid_opteron_erratum(cpu, 131) > 0) do {
1040 1035 /*
1041 1036 * Multiprocessor Systems with Four or More Cores May Deadlock
1042 1037 * Waiting for a Probe Response
1043 1038 */
1044 1039 #if defined(OPTERON_ERRATUM_131)
1045 1040 uint64_t nbcfg;
1046 1041 const uint_t msr = MSR_AMD_NB_CFG;
1047 1042 const uint64_t wabits =
1048 1043 AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR;
1049 1044 int error;
1050 1045
1051 1046 /*
1052 1047 * Erratum 131 applies to any system with four or more cores.
1053 1048 */
1054 1049 if (opteron_erratum_131)
1055 1050 break;
1056 1051 #if defined(__xpv)
1057 1052 if (!DOMAIN_IS_INITDOMAIN(xen_info))
1058 1053 break;
1059 1054 if (xpv_nr_phys_cpus() < 4)
1060 1055 break;
1061 1056 #else
1062 1057 if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4)
1063 1058 break;
1064 1059 #endif
1065 1060 /*
1066 1061 * Print a warning if neither of the workarounds for
1067 1062 * erratum 131 is present.
1068 1063 */
1069 1064 if ((error = checked_rdmsr(msr, &nbcfg)) != 0) {
1070 1065 msr_warning(cpu, "rd", msr, error);
1071 1066 workaround_warning(cpu, 131);
1072 1067 missing++;
1073 1068 } else if ((nbcfg & wabits) == 0) {
1074 1069 opteron_erratum_131++;
1075 1070 } else {
1076 1071 /* cannot have both workarounds set */
1077 1072 ASSERT((nbcfg & wabits) != wabits);
1078 1073 }
1079 1074 #else
1080 1075 workaround_warning(cpu, 131);
1081 1076 missing++;
1082 1077 #endif
1083 1078 /*CONSTANTCONDITION*/
1084 1079 } while (0);
1085 1080
1086 1081 /*
1087 1082 * This isn't really an erratum, but for convenience the
1088 1083 * detection/workaround code lives here and in cpuid_opteron_erratum.
1089 1084 */
1090 1085 if (cpuid_opteron_erratum(cpu, 6336786) > 0) {
1091 1086 #if defined(OPTERON_WORKAROUND_6336786)
1092 1087 /*
1093 1088 * Disable C1-Clock ramping on multi-core/multi-processor
1094 1089 * K8 platforms to guard against TSC drift.
1095 1090 */
1096 1091 if (opteron_workaround_6336786) {
1097 1092 opteron_workaround_6336786++;
1098 1093 #if defined(__xpv)
1099 1094 } else if ((DOMAIN_IS_INITDOMAIN(xen_info) &&
1100 1095 xpv_nr_phys_cpus() > 1) ||
1101 1096 opteron_workaround_6336786_UP) {
1102 1097 /*
1103 1098 * XXPV Hmm. We can't walk the Northbridges on
1104 1099 * the hypervisor; so just complain and drive
1105 1100 * on. This probably needs to be fixed in
1106 1101 * the hypervisor itself.
1107 1102 */
1108 1103 opteron_workaround_6336786++;
1109 1104 workaround_warning(cpu, 6336786);
1110 1105 #else /* __xpv */
1111 1106 } else if ((opteron_get_nnodes() *
1112 1107 cpuid_get_ncpu_per_chip(cpu) > 1) ||
1113 1108 opteron_workaround_6336786_UP) {
1114 1109
1115 1110 uint_t node, nnodes;
1116 1111 uint8_t data;
1117 1112
1118 1113 nnodes = opteron_get_nnodes();
1119 1114 for (node = 0; node < nnodes; node++) {
1120 1115 /*
1121 1116 * Clear PMM7[1:0] (function 3, offset 0x87)
1122 1117 * Northbridge device is the node id + 24.
1123 1118 */
1124 1119 data = pci_getb_func(0, node + 24, 3, 0x87);
1125 1120 data &= 0xFC;
1126 1121 pci_putb_func(0, node + 24, 3, 0x87, data);
1127 1122 }
1128 1123 opteron_workaround_6336786++;
1129 1124 #endif /* __xpv */
1130 1125 }
1131 1126 #else
1132 1127 workaround_warning(cpu, 6336786);
1133 1128 missing++;
1134 1129 #endif
1135 1130 }
1136 1131
1137 1132 /*LINTED*/
1138 1133 /*
1139 1134 * Mutex primitives don't work as expected.
1140 1135 */
1141 1136 if (cpuid_opteron_erratum(cpu, 6323525) > 0) {
1142 1137 #if defined(OPTERON_WORKAROUND_6323525)
1143 1138 /*
1144 1139 * This problem only occurs with 2 or more cores. If bit in
1145 1140 * MSR_AMD_BU_CFG set, then not applicable. The workaround
1146 1141 * is to patch the semaphone routines with the lfence
1147 1142 * instruction to provide necessary load memory barrier with
1148 1143 * possible subsequent read-modify-write ops.
1149 1144 *
1150 1145 * It is too early in boot to call the patch routine so
1151 1146 * set erratum variable to be done in startup_end().
1152 1147 */
1153 1148 if (opteron_workaround_6323525) {
1154 1149 opteron_workaround_6323525++;
1155 1150 #if defined(__xpv)
1156 1151 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
1157 1152 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1158 1153 /*
1159 1154 * XXPV Use dom0_msr here when extended
1160 1155 * operations are supported?
1161 1156 */
1162 1157 if (xpv_nr_phys_cpus() > 1)
1163 1158 opteron_workaround_6323525++;
1164 1159 } else {
1165 1160 /*
1166 1161 * We have no way to tell how many physical
1167 1162 * cpus there are, or even if this processor
1168 1163 * has the problem, so enable the workaround
1169 1164 * unconditionally (at some performance cost).
1170 1165 */
1171 1166 opteron_workaround_6323525++;
1172 1167 }
1173 1168 #else /* __xpv */
1174 1169 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2) &&
1175 1170 ((opteron_get_nnodes() *
1176 1171 cpuid_get_ncpu_per_chip(cpu)) > 1)) {
1177 1172 if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0)
1178 1173 opteron_workaround_6323525++;
1179 1174 #endif /* __xpv */
1180 1175 }
1181 1176 #else
1182 1177 workaround_warning(cpu, 6323525);
1183 1178 missing++;
1184 1179 #endif
1185 1180 }
1186 1181
1187 1182 missing += do_erratum_298(cpu);
1188 1183
1189 1184 if (cpuid_opteron_erratum(cpu, 721) > 0) {
1190 1185 #if defined(OPTERON_ERRATUM_721)
1191 1186 on_trap_data_t otd;
1192 1187
1193 1188 if (!on_trap(&otd, OT_DATA_ACCESS))
1194 1189 wrmsr(MSR_AMD_DE_CFG,
1195 1190 rdmsr(MSR_AMD_DE_CFG) | AMD_DE_CFG_E721);
1196 1191 no_trap();
1197 1192
1198 1193 opteron_erratum_721++;
1199 1194 #else
1200 1195 workaround_warning(cpu, 721);
1201 1196 missing++;
1202 1197 #endif
1203 1198 }
1204 1199
1205 1200 #ifdef __xpv
1206 1201 return (0);
1207 1202 #else
1208 1203 return (missing);
1209 1204 #endif
1210 1205 }
1211 1206
1212 1207 void
1213 1208 workaround_errata_end()
1214 1209 {
1215 1210 #if defined(OPTERON_ERRATUM_88)
1216 1211 if (opteron_erratum_88)
1217 1212 workaround_applied(88);
1218 1213 #endif
1219 1214 #if defined(OPTERON_ERRATUM_91)
1220 1215 if (opteron_erratum_91)
1221 1216 workaround_applied(91);
1222 1217 #endif
1223 1218 #if defined(OPTERON_ERRATUM_93)
1224 1219 if (opteron_erratum_93)
1225 1220 workaround_applied(93);
1226 1221 #endif
1227 1222 #if defined(OPTERON_ERRATUM_95)
1228 1223 if (opteron_erratum_95)
1229 1224 workaround_applied(95);
1230 1225 #endif
1231 1226 #if defined(OPTERON_ERRATUM_100)
1232 1227 if (opteron_erratum_100)
1233 1228 workaround_applied(100);
1234 1229 #endif
1235 1230 #if defined(OPTERON_ERRATUM_108)
1236 1231 if (opteron_erratum_108)
1237 1232 workaround_applied(108);
1238 1233 #endif
1239 1234 #if defined(OPTERON_ERRATUM_109)
1240 1235 if (opteron_erratum_109) {
1241 1236 cmn_err(CE_WARN,
1242 1237 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
1243 1238 " processor\nerratum 109 was not detected; updating your"
1244 1239 " system's BIOS to a version\ncontaining this"
1245 1240 " microcode patch is HIGHLY recommended or erroneous"
1246 1241 " system\noperation may occur.\n");
1247 1242 }
1248 1243 #endif
1249 1244 #if defined(OPTERON_ERRATUM_121)
1250 1245 if (opteron_erratum_121)
1251 1246 workaround_applied(121);
1252 1247 #endif
1253 1248 #if defined(OPTERON_ERRATUM_122)
1254 1249 if (opteron_erratum_122)
1255 1250 workaround_applied(122);
1256 1251 #endif
1257 1252 #if defined(OPTERON_ERRATUM_123)
1258 1253 if (opteron_erratum_123) {
1259 1254 cmn_err(CE_WARN,
1260 1255 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
1261 1256 " processor\nerratum 123 was not detected; updating your"
1262 1257 " system's BIOS to a version\ncontaining this"
1263 1258 " microcode patch is HIGHLY recommended or erroneous"
1264 1259 " system\noperation may occur.\n");
1265 1260 }
1266 1261 #endif
1267 1262 #if defined(OPTERON_ERRATUM_131)
1268 1263 if (opteron_erratum_131) {
1269 1264 cmn_err(CE_WARN,
1270 1265 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
1271 1266 " processor\nerratum 131 was not detected; updating your"
1272 1267 " system's BIOS to a version\ncontaining this"
1273 1268 " microcode patch is HIGHLY recommended or erroneous"
1274 1269 " system\noperation may occur.\n");
1275 1270 }
1276 1271 #endif
1277 1272 #if defined(OPTERON_WORKAROUND_6336786)
1278 1273 if (opteron_workaround_6336786)
1279 1274 workaround_applied(6336786);
1280 1275 #endif
1281 1276 #if defined(OPTERON_WORKAROUND_6323525)
1282 1277 if (opteron_workaround_6323525)
1283 1278 workaround_applied(6323525);
1284 1279 #endif
1285 1280 #if defined(OPTERON_ERRATUM_298)
1286 1281 if (opteron_erratum_298) {
1287 1282 cmn_err(CE_WARN,
1288 1283 "BIOS microcode patch for AMD 64/Opteron(tm)"
1289 1284 " processor\nerratum 298 was not detected; updating your"
1290 1285 " system's BIOS to a version\ncontaining this"
1291 1286 " microcode patch is HIGHLY recommended or erroneous"
1292 1287 " system\noperation may occur.\n");
1293 1288 }
1294 1289 #endif
1295 1290 #if defined(OPTERON_ERRATUM_721)
1296 1291 if (opteron_erratum_721)
1297 1292 workaround_applied(721);
1298 1293 #endif
1299 1294 }
1300 1295
1301 1296 /*
1302 1297 * The procset_slave and procset_master are used to synchronize
1303 1298 * between the control CPU and the target CPU when starting CPUs.
1304 1299 */
1305 1300 static cpuset_t procset_slave, procset_master;
1306 1301
1307 1302 static void
1308 1303 mp_startup_wait(cpuset_t *sp, processorid_t cpuid)
1309 1304 {
1310 1305 cpuset_t tempset;
1311 1306
1312 1307 for (tempset = *sp; !CPU_IN_SET(tempset, cpuid);
1313 1308 tempset = *(volatile cpuset_t *)sp) {
1314 1309 SMT_PAUSE();
1315 1310 }
1316 1311 CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid);
1317 1312 }
1318 1313
1319 1314 static void
1320 1315 mp_startup_signal(cpuset_t *sp, processorid_t cpuid)
1321 1316 {
1322 1317 cpuset_t tempset;
1323 1318
1324 1319 CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid);
1325 1320 for (tempset = *sp; CPU_IN_SET(tempset, cpuid);
1326 1321 tempset = *(volatile cpuset_t *)sp) {
1327 1322 SMT_PAUSE();
1328 1323 }
1329 1324 }
1330 1325
1331 1326 int
1332 1327 mp_start_cpu_common(cpu_t *cp, boolean_t boot)
1333 1328 {
1334 1329 _NOTE(ARGUNUSED(boot));
1335 1330
1336 1331 void *ctx;
1337 1332 int delays;
1338 1333 int error = 0;
1339 1334 cpuset_t tempset;
1340 1335 processorid_t cpuid;
1341 1336 #ifndef __xpv
1342 1337 extern void cpupm_init(cpu_t *);
1343 1338 #endif
1344 1339
1345 1340 ASSERT(cp != NULL);
1346 1341 cpuid = cp->cpu_id;
1347 1342 ctx = mach_cpucontext_alloc(cp);
1348 1343 if (ctx == NULL) {
1349 1344 cmn_err(CE_WARN,
1350 1345 "cpu%d: failed to allocate context", cp->cpu_id);
1351 1346 return (EAGAIN);
1352 1347 }
1353 1348 error = mach_cpu_start(cp, ctx);
1354 1349 if (error != 0) {
1355 1350 cmn_err(CE_WARN,
1356 1351 "cpu%d: failed to start, error %d", cp->cpu_id, error);
1357 1352 mach_cpucontext_free(cp, ctx, error);
1358 1353 return (error);
1359 1354 }
1360 1355
1361 1356 for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid);
1362 1357 delays++) {
1363 1358 if (delays == 500) {
1364 1359 /*
1365 1360 * After five seconds, things are probably looking
1366 1361 * a bit bleak - explain the hang.
1367 1362 */
1368 1363 cmn_err(CE_NOTE, "cpu%d: started, "
1369 1364 "but not running in the kernel yet", cpuid);
1370 1365 } else if (delays > 2000) {
1371 1366 /*
1372 1367 * We waited at least 20 seconds, bail ..
1373 1368 */
1374 1369 error = ETIMEDOUT;
1375 1370 cmn_err(CE_WARN, "cpu%d: timed out", cpuid);
1376 1371 mach_cpucontext_free(cp, ctx, error);
1377 1372 return (error);
1378 1373 }
1379 1374
1380 1375 /*
1381 1376 * wait at least 10ms, then check again..
1382 1377 */
1383 1378 delay(USEC_TO_TICK_ROUNDUP(10000));
1384 1379 tempset = *((volatile cpuset_t *)&procset_slave);
1385 1380 }
1386 1381 CPUSET_ATOMIC_DEL(procset_slave, cpuid);
1387 1382
1388 1383 mach_cpucontext_free(cp, ctx, 0);
1389 1384
1390 1385 #ifndef __xpv
1391 1386 if (tsc_gethrtime_enable)
1392 1387 tsc_sync_master(cpuid);
1393 1388 #endif
1394 1389
1395 1390 if (dtrace_cpu_init != NULL) {
1396 1391 (*dtrace_cpu_init)(cpuid);
1397 1392 }
1398 1393
1399 1394 /*
1400 1395 * During CPU DR operations, the cpu_lock is held by current
1401 1396 * (the control) thread. We can't release the cpu_lock here
1402 1397 * because that will break the CPU DR logic.
1403 1398 * On the other hand, CPUPM and processor group initialization
1404 1399 * routines need to access the cpu_lock. So we invoke those
1405 1400 * routines here on behalf of mp_startup_common().
1406 1401 *
1407 1402 * CPUPM and processor group initialization routines depend
1408 1403 * on the cpuid probing results. Wait for mp_startup_common()
1409 1404 * to signal that cpuid probing is done.
1410 1405 */
1411 1406 mp_startup_wait(&procset_slave, cpuid);
1412 1407 #ifndef __xpv
1413 1408 cpupm_init(cp);
1414 1409 #endif
1415 1410 (void) pg_cpu_init(cp, B_FALSE);
1416 1411 cpu_set_state(cp);
1417 1412 mp_startup_signal(&procset_master, cpuid);
1418 1413
1419 1414 return (0);
1420 1415 }
1421 1416
1422 1417 /*
1423 1418 * Start a single cpu, assuming that the kernel context is available
1424 1419 * to successfully start another cpu.
1425 1420 *
1426 1421 * (For example, real mode code is mapped into the right place
1427 1422 * in memory and is ready to be run.)
1428 1423 */
1429 1424 int
1430 1425 start_cpu(processorid_t who)
1431 1426 {
1432 1427 cpu_t *cp;
1433 1428 int error = 0;
1434 1429 cpuset_t tempset;
1435 1430
1436 1431 ASSERT(who != 0);
1437 1432
1438 1433 /*
1439 1434 * Check if there's at least a Mbyte of kmem available
1440 1435 * before attempting to start the cpu.
1441 1436 */
1442 1437 if (kmem_avail() < 1024 * 1024) {
1443 1438 /*
1444 1439 * Kick off a reap in case that helps us with
1445 1440 * later attempts ..
1446 1441 */
1447 1442 kmem_reap();
1448 1443 return (ENOMEM);
1449 1444 }
1450 1445
1451 1446 /*
1452 1447 * First configure cpu.
1453 1448 */
1454 1449 cp = mp_cpu_configure_common(who, B_TRUE);
1455 1450 ASSERT(cp != NULL);
1456 1451
1457 1452 /*
1458 1453 * Then start cpu.
1459 1454 */
1460 1455 error = mp_start_cpu_common(cp, B_TRUE);
1461 1456 if (error != 0) {
1462 1457 mp_cpu_unconfigure_common(cp, error);
1463 1458 return (error);
1464 1459 }
1465 1460
1466 1461 mutex_exit(&cpu_lock);
1467 1462 tempset = cpu_ready_set;
1468 1463 while (!CPU_IN_SET(tempset, who)) {
1469 1464 drv_usecwait(1);
1470 1465 tempset = *((volatile cpuset_t *)&cpu_ready_set);
1471 1466 }
1472 1467 mutex_enter(&cpu_lock);
1473 1468
1474 1469 return (0);
1475 1470 }
1476 1471
1477 1472 void
1478 1473 start_other_cpus(int cprboot)
1479 1474 {
1480 1475 _NOTE(ARGUNUSED(cprboot));
1481 1476
1482 1477 uint_t who;
1483 1478 uint_t bootcpuid = 0;
1484 1479
1485 1480 /*
1486 1481 * Initialize our own cpu_info.
1487 1482 */
1488 1483 init_cpu_info(CPU);
1489 1484
1490 1485 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_idstr);
1491 1486 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_brandstr);
1492 1487
1493 1488 /*
1494 1489 * Initialize our syscall handlers
1495 1490 */
1496 1491 init_cpu_syscall(CPU);
1497 1492
1498 1493 /*
1499 1494 * Take the boot cpu out of the mp_cpus set because we know
1500 1495 * it's already running. Add it to the cpu_ready_set for
1501 1496 * precisely the same reason.
1502 1497 */
1503 1498 CPUSET_DEL(mp_cpus, bootcpuid);
1504 1499 CPUSET_ADD(cpu_ready_set, bootcpuid);
1505 1500
1506 1501 /*
1507 1502 * skip the rest of this if
1508 1503 * . only 1 cpu dectected and system isn't hotplug-capable
1509 1504 * . not using MP
1510 1505 */
1511 1506 if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) ||
1512 1507 use_mp == 0) {
1513 1508 if (use_mp == 0)
1514 1509 cmn_err(CE_CONT, "?***** Not in MP mode\n");
1515 1510 goto done;
1516 1511 }
1517 1512
1518 1513 /*
1519 1514 * perform such initialization as is needed
1520 1515 * to be able to take CPUs on- and off-line.
1521 1516 */
1522 1517 cpu_pause_init();
1523 1518
1524 1519 xc_init_cpu(CPU); /* initialize processor crosscalls */
1525 1520
1526 1521 if (mach_cpucontext_init() != 0)
1527 1522 goto done;
1528 1523
1529 1524 flushes_require_xcalls = 1;
1530 1525
1531 1526 /*
1532 1527 * We lock our affinity to the master CPU to ensure that all slave CPUs
1533 1528 * do their TSC syncs with the same CPU.
1534 1529 */
1535 1530 affinity_set(CPU_CURRENT);
1536 1531
1537 1532 for (who = 0; who < NCPU; who++) {
1538 1533 if (!CPU_IN_SET(mp_cpus, who))
1539 1534 continue;
1540 1535 ASSERT(who != bootcpuid);
1541 1536
1542 1537 mutex_enter(&cpu_lock);
1543 1538 if (start_cpu(who) != 0)
1544 1539 CPUSET_DEL(mp_cpus, who);
1545 1540 cpu_state_change_notify(who, CPU_SETUP);
1546 1541 mutex_exit(&cpu_lock);
1547 1542 }
1548 1543
1549 1544 /* Free the space allocated to hold the microcode file */
1550 1545 ucode_cleanup();
1551 1546
1552 1547 affinity_clear();
1553 1548
1554 1549 mach_cpucontext_fini();
1555 1550
1556 1551 done:
1557 1552 if (get_hwenv() == HW_NATIVE)
1558 1553 workaround_errata_end();
1559 1554 cmi_post_mpstartup();
1560 1555
1561 1556 if (use_mp && ncpus != boot_max_ncpus) {
1562 1557 cmn_err(CE_NOTE,
1563 1558 "System detected %d cpus, but "
1564 1559 "only %d cpu(s) were enabled during boot.",
1565 1560 boot_max_ncpus, ncpus);
1566 1561 cmn_err(CE_NOTE,
1567 1562 "Use \"boot-ncpus\" parameter to enable more CPU(s). "
1568 1563 "See eeprom(1M).");
1569 1564 }
1570 1565 }
1571 1566
1572 1567 int
1573 1568 mp_cpu_configure(int cpuid)
1574 1569 {
1575 1570 cpu_t *cp;
1576 1571
1577 1572 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
1578 1573 return (ENOTSUP);
1579 1574 }
1580 1575
1581 1576 cp = cpu_get(cpuid);
1582 1577 if (cp != NULL) {
1583 1578 return (EALREADY);
1584 1579 }
1585 1580
1586 1581 /*
1587 1582 * Check if there's at least a Mbyte of kmem available
1588 1583 * before attempting to start the cpu.
1589 1584 */
1590 1585 if (kmem_avail() < 1024 * 1024) {
1591 1586 /*
1592 1587 * Kick off a reap in case that helps us with
1593 1588 * later attempts ..
1594 1589 */
1595 1590 kmem_reap();
1596 1591 return (ENOMEM);
1597 1592 }
1598 1593
1599 1594 cp = mp_cpu_configure_common(cpuid, B_FALSE);
1600 1595 ASSERT(cp != NULL && cpu_get(cpuid) == cp);
1601 1596
1602 1597 return (cp != NULL ? 0 : EAGAIN);
1603 1598 }
1604 1599
1605 1600 int
1606 1601 mp_cpu_unconfigure(int cpuid)
1607 1602 {
1608 1603 cpu_t *cp;
1609 1604
1610 1605 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
1611 1606 return (ENOTSUP);
1612 1607 } else if (cpuid < 0 || cpuid >= max_ncpus) {
1613 1608 return (EINVAL);
1614 1609 }
1615 1610
1616 1611 cp = cpu_get(cpuid);
1617 1612 if (cp == NULL) {
1618 1613 return (ENODEV);
1619 1614 }
1620 1615 mp_cpu_unconfigure_common(cp, 0);
1621 1616
1622 1617 return (0);
1623 1618 }
1624 1619
1625 1620 /*
1626 1621 * Startup function for 'other' CPUs (besides boot cpu).
1627 1622 * Called from real_mode_start.
1628 1623 *
1629 1624 * WARNING: until CPU_READY is set, mp_startup_common and routines called by
1630 1625 * mp_startup_common should not call routines (e.g. kmem_free) that could call
1631 1626 * hat_unload which requires CPU_READY to be set.
1632 1627 */
1633 1628 static void
1634 1629 mp_startup_common(boolean_t boot)
1635 1630 {
1636 1631 cpu_t *cp = CPU;
1637 1632 uchar_t new_x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
1638 1633 extern void cpu_event_init_cpu(cpu_t *);
1639 1634
1640 1635 /*
1641 1636 * We need to get TSC on this proc synced (i.e., any delta
1642 1637 * from cpu0 accounted for) as soon as we can, because many
1643 1638 * many things use gethrtime/pc_gethrestime, including
1644 1639 * interrupts, cmn_err, etc.
1645 1640 */
1646 1641
1647 1642 /* Let the control CPU continue into tsc_sync_master() */
1648 1643 mp_startup_signal(&procset_slave, cp->cpu_id);
1649 1644
1650 1645 #ifndef __xpv
1651 1646 if (tsc_gethrtime_enable)
1652 1647 tsc_sync_slave();
1653 1648 #endif
1654 1649
1655 1650 /*
1656 1651 * Once this was done from assembly, but it's safer here; if
1657 1652 * it blocks, we need to be able to swtch() to and from, and
1658 1653 * since we get here by calling t_pc, we need to do that call
1659 1654 * before swtch() overwrites it.
1660 1655 */
1661 1656 (void) (*ap_mlsetup)();
1662 1657
1663 1658 bzero(new_x86_featureset, BT_SIZEOFMAP(NUM_X86_FEATURES));
1664 1659 cpuid_pass1(cp, new_x86_featureset);
1665 1660
1666 1661 #ifndef __xpv
1667 1662 /*
1668 1663 * Program this cpu's PAT
1669 1664 */
1670 1665 if (is_x86_feature(x86_featureset, X86FSET_PAT))
1671 1666 pat_sync();
1672 1667 #endif
1673 1668
1674 1669 /*
1675 1670 * Set up TSC_AUX to contain the cpuid for this processor
1676 1671 * for the rdtscp instruction.
1677 1672 */
1678 1673 if (is_x86_feature(x86_featureset, X86FSET_TSCP))
1679 1674 (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id);
1680 1675
1681 1676 /*
1682 1677 * Initialize this CPU's syscall handlers
1683 1678 */
1684 1679 init_cpu_syscall(cp);
1685 1680
1686 1681 /*
1687 1682 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the
1688 1683 * highest level at which a routine is permitted to block on
1689 1684 * an adaptive mutex (allows for cpu poke interrupt in case
1690 1685 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks
1691 1686 * device interrupts that may end up in the hat layer issuing cross
1692 1687 * calls before CPU_READY is set.
1693 1688 */
1694 1689 splx(ipltospl(LOCK_LEVEL));
1695 1690 sti();
1696 1691
1697 1692 /*
1698 1693 * Do a sanity check to make sure this new CPU is a sane thing
1699 1694 * to add to the collection of processors running this system.
1700 1695 *
1701 1696 * XXX Clearly this needs to get more sophisticated, if x86
1702 1697 * systems start to get built out of heterogenous CPUs; as is
1703 1698 * likely to happen once the number of processors in a configuration
1704 1699 * gets large enough.
1705 1700 */
1706 1701 if (compare_x86_featureset(x86_featureset, new_x86_featureset) ==
1707 1702 B_FALSE) {
1708 1703 cmn_err(CE_CONT, "cpu%d: featureset\n", cp->cpu_id);
1709 1704 print_x86_featureset(new_x86_featureset);
1710 1705 cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
1711 1706 }
1712 1707
1713 1708 /*
1714 1709 * We do not support cpus with mixed monitor/mwait support if the
1715 1710 * boot cpu supports monitor/mwait.
1716 1711 */
1717 1712 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) !=
1718 1713 is_x86_feature(new_x86_featureset, X86FSET_MWAIT))
1719 1714 panic("unsupported mixed cpu monitor/mwait support detected");
1720 1715
1721 1716 /*
1722 1717 * We could be more sophisticated here, and just mark the CPU
1723 1718 * as "faulted" but at this point we'll opt for the easier
1724 1719 * answer of dying horribly. Provided the boot cpu is ok,
1725 1720 * the system can be recovered by booting with use_mp set to zero.
1726 1721 */
1727 1722 if (workaround_errata(cp) != 0)
1728 1723 panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
1729 1724
1730 1725 /*
1731 1726 * We can touch cpu_flags here without acquiring the cpu_lock here
1732 1727 * because the cpu_lock is held by the control CPU which is running
1733 1728 * mp_start_cpu_common().
1734 1729 * Need to clear CPU_QUIESCED flag before calling any function which
1735 1730 * may cause thread context switching, such as kmem_alloc() etc.
1736 1731 * The idle thread checks for CPU_QUIESCED flag and loops for ever if
1737 1732 * it's set. So the startup thread may have no chance to switch back
1738 1733 * again if it's switched away with CPU_QUIESCED set.
1739 1734 */
1740 1735 cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
1741 1736
1742 1737 /*
1743 1738 * Setup this processor for XSAVE.
1744 1739 */
1745 1740 if (fp_save_mech == FP_XSAVE) {
1746 1741 xsave_setup_msr(cp);
1747 1742 }
1748 1743
1749 1744 cpuid_pass2(cp);
1750 1745 cpuid_pass3(cp);
1751 1746 cpuid_pass4(cp, NULL);
1752 1747
1753 1748 /*
1754 1749 * Correct cpu_idstr and cpu_brandstr on target CPU after
1755 1750 * cpuid_pass1() is done.
1756 1751 */
1757 1752 (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN);
1758 1753 (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN);
1759 1754
1760 1755 cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
1761 1756
1762 1757 post_startup_cpu_fixups();
1763 1758
1764 1759 cpu_event_init_cpu(cp);
1765 1760
1766 1761 /*
1767 1762 * Enable preemption here so that contention for any locks acquired
1768 1763 * later in mp_startup_common may be preempted if the thread owning
1769 1764 * those locks is continuously executing on other CPUs (for example,
1770 1765 * this CPU must be preemptible to allow other CPUs to pause it during
1771 1766 * their startup phases). It's safe to enable preemption here because
1772 1767 * the CPU state is pretty-much fully constructed.
1773 1768 */
1774 1769 curthread->t_preempt = 0;
1775 1770
1776 1771 /* The base spl should still be at LOCK LEVEL here */
1777 1772 ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
1778 1773 set_base_spl(); /* Restore the spl to its proper value */
1779 1774
1780 1775 pghw_physid_create(cp);
1781 1776 /*
1782 1777 * Delegate initialization tasks, which need to access the cpu_lock,
1783 1778 * to mp_start_cpu_common() because we can't acquire the cpu_lock here
1784 1779 * during CPU DR operations.
1785 1780 */
1786 1781 mp_startup_signal(&procset_slave, cp->cpu_id);
1787 1782 mp_startup_wait(&procset_master, cp->cpu_id);
1788 1783 pg_cmt_cpu_startup(cp);
1789 1784
1790 1785 if (boot) {
1791 1786 mutex_enter(&cpu_lock);
1792 1787 cp->cpu_flags &= ~CPU_OFFLINE;
1793 1788 cpu_enable_intr(cp);
1794 1789 cpu_add_active(cp);
1795 1790 mutex_exit(&cpu_lock);
1796 1791 }
1797 1792
1798 1793 /* Enable interrupts */
1799 1794 (void) spl0();
1800 1795
1801 1796 /*
1802 1797 * Fill out cpu_ucode_info. Update microcode if necessary.
1803 1798 */
1804 1799 ucode_check(cp);
1805 1800
1806 1801 #ifndef __xpv
1807 1802 {
1808 1803 /*
1809 1804 * Set up the CPU module for this CPU. This can't be done
1810 1805 * before this CPU is made CPU_READY, because we may (in
1811 1806 * heterogeneous systems) need to go load another CPU module.
1812 1807 * The act of attempting to load a module may trigger a
1813 1808 * cross-call, which will ASSERT unless this cpu is CPU_READY.
1814 1809 */
1815 1810 cmi_hdl_t hdl;
1816 1811
1817 1812 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
1818 1813 cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) {
1819 1814 if (is_x86_feature(x86_featureset, X86FSET_MCA))
1820 1815 cmi_mca_init(hdl);
1821 1816 cp->cpu_m.mcpu_cmi_hdl = hdl;
1822 1817 }
1823 1818 }
1824 1819 #endif /* __xpv */
1825 1820
1826 1821 if (boothowto & RB_DEBUG)
1827 1822 kdi_cpu_init();
1828 1823
1829 1824 /*
1830 1825 * Setting the bit in cpu_ready_set must be the last operation in
1831 1826 * processor initialization; the boot CPU will continue to boot once
1832 1827 * it sees this bit set for all active CPUs.
1833 1828 */
1834 1829 CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
1835 1830
1836 1831 (void) mach_cpu_create_device_node(cp, NULL);
1837 1832
1838 1833 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
1839 1834 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
1840 1835 cmn_err(CE_CONT, "?cpu%d initialization complete - online\n",
1841 1836 cp->cpu_id);
1842 1837
1843 1838 /*
1844 1839 * Now we are done with the startup thread, so free it up.
1845 1840 */
1846 1841 thread_exit();
1847 1842 panic("mp_startup: cannot return");
1848 1843 /*NOTREACHED*/
1849 1844 }
1850 1845
1851 1846 /*
1852 1847 * Startup function for 'other' CPUs at boot time (besides boot cpu).
1853 1848 */
1854 1849 static void
1855 1850 mp_startup_boot(void)
1856 1851 {
1857 1852 mp_startup_common(B_TRUE);
1858 1853 }
1859 1854
1860 1855 /*
1861 1856 * Startup function for hotplug CPUs at runtime.
1862 1857 */
1863 1858 void
1864 1859 mp_startup_hotplug(void)
1865 1860 {
1866 1861 mp_startup_common(B_FALSE);
1867 1862 }
1868 1863
1869 1864 /*
1870 1865 * Start CPU on user request.
1871 1866 */
1872 1867 /* ARGSUSED */
1873 1868 int
1874 1869 mp_cpu_start(struct cpu *cp)
1875 1870 {
1876 1871 ASSERT(MUTEX_HELD(&cpu_lock));
1877 1872 return (0);
1878 1873 }
1879 1874
1880 1875 /*
1881 1876 * Stop CPU on user request.
1882 1877 */
1883 1878 int
1884 1879 mp_cpu_stop(struct cpu *cp)
1885 1880 {
1886 1881 extern int cbe_psm_timer_mode;
1887 1882 ASSERT(MUTEX_HELD(&cpu_lock));
1888 1883
1889 1884 #ifdef __xpv
1890 1885 /*
1891 1886 * We can't offline vcpu0.
1892 1887 */
1893 1888 if (cp->cpu_id == 0)
1894 1889 return (EBUSY);
1895 1890 #endif
1896 1891
1897 1892 /*
1898 1893 * If TIMER_PERIODIC mode is used, CPU0 is the one running it;
1899 1894 * can't stop it. (This is true only for machines with no TSC.)
1900 1895 */
1901 1896
1902 1897 if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0))
1903 1898 return (EBUSY);
1904 1899
1905 1900 return (0);
1906 1901 }
1907 1902
1908 1903 /*
1909 1904 * Take the specified CPU out of participation in interrupts.
1910 1905 */
1911 1906 int
1912 1907 cpu_disable_intr(struct cpu *cp)
1913 1908 {
1914 1909 if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS)
1915 1910 return (EBUSY);
1916 1911
1917 1912 cp->cpu_flags &= ~CPU_ENABLE;
1918 1913 return (0);
1919 1914 }
1920 1915
1921 1916 /*
1922 1917 * Allow the specified CPU to participate in interrupts.
1923 1918 */
1924 1919 void
1925 1920 cpu_enable_intr(struct cpu *cp)
1926 1921 {
1927 1922 ASSERT(MUTEX_HELD(&cpu_lock));
1928 1923 cp->cpu_flags |= CPU_ENABLE;
1929 1924 psm_enable_intr(cp->cpu_id);
1930 1925 }
1931 1926
1932 1927 void
1933 1928 mp_cpu_faulted_enter(struct cpu *cp)
1934 1929 {
1935 1930 #ifdef __xpv
1936 1931 _NOTE(ARGUNUSED(cp));
1937 1932 #else
1938 1933 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
1939 1934
1940 1935 if (hdl != NULL) {
1941 1936 cmi_hdl_hold(hdl);
1942 1937 } else {
1943 1938 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
1944 1939 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
1945 1940 }
1946 1941 if (hdl != NULL) {
1947 1942 cmi_faulted_enter(hdl);
1948 1943 cmi_hdl_rele(hdl);
1949 1944 }
1950 1945 #endif
1951 1946 }
1952 1947
1953 1948 void
1954 1949 mp_cpu_faulted_exit(struct cpu *cp)
1955 1950 {
1956 1951 #ifdef __xpv
1957 1952 _NOTE(ARGUNUSED(cp));
1958 1953 #else
1959 1954 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
1960 1955
1961 1956 if (hdl != NULL) {
1962 1957 cmi_hdl_hold(hdl);
1963 1958 } else {
1964 1959 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
1965 1960 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
1966 1961 }
1967 1962 if (hdl != NULL) {
1968 1963 cmi_faulted_exit(hdl);
1969 1964 cmi_hdl_rele(hdl);
1970 1965 }
1971 1966 #endif
1972 1967 }
1973 1968
1974 1969 /*
1975 1970 * The following two routines are used as context operators on threads belonging
1976 1971 * to processes with a private LDT (see sysi86). Due to the rarity of such
1977 1972 * processes, these routines are currently written for best code readability and
1978 1973 * organization rather than speed. We could avoid checking x86_featureset at
1979 1974 * every context switch by installing different context ops, depending on
1980 1975 * x86_featureset, at LDT creation time -- one for each combination of fast
1981 1976 * syscall features.
1982 1977 */
1983 1978
1984 1979 /*ARGSUSED*/
1985 1980 void
1986 1981 cpu_fast_syscall_disable(void *arg)
1987 1982 {
1988 1983 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
1989 1984 is_x86_feature(x86_featureset, X86FSET_SEP))
1990 1985 cpu_sep_disable();
1991 1986 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
1992 1987 is_x86_feature(x86_featureset, X86FSET_ASYSC))
1993 1988 cpu_asysc_disable();
1994 1989 }
1995 1990
1996 1991 /*ARGSUSED*/
1997 1992 void
1998 1993 cpu_fast_syscall_enable(void *arg)
1999 1994 {
2000 1995 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
2001 1996 is_x86_feature(x86_featureset, X86FSET_SEP))
2002 1997 cpu_sep_enable();
2003 1998 if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
2004 1999 is_x86_feature(x86_featureset, X86FSET_ASYSC))
2005 2000 cpu_asysc_enable();
2006 2001 }
2007 2002
2008 2003 static void
2009 2004 cpu_sep_enable(void)
2010 2005 {
2011 2006 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP));
2012 2007 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
2013 2008
2014 2009 wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL);
2015 2010 }
2016 2011
2017 2012 static void
2018 2013 cpu_sep_disable(void)
2019 2014 {
2020 2015 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP));
2021 2016 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
2022 2017
2023 2018 /*
2024 2019 * Setting the SYSENTER_CS_MSR register to 0 causes software executing
2025 2020 * the sysenter or sysexit instruction to trigger a #gp fault.
2026 2021 */
2027 2022 wrmsr(MSR_INTC_SEP_CS, 0);
2028 2023 }
2029 2024
2030 2025 static void
2031 2026 cpu_asysc_enable(void)
2032 2027 {
2033 2028 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC));
2034 2029 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
2035 2030
2036 2031 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) |
2037 2032 (uint64_t)(uintptr_t)AMD_EFER_SCE);
2038 2033 }
2039 2034
2040 2035 static void
2041 2036 cpu_asysc_disable(void)
2042 2037 {
2043 2038 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC));
2044 2039 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
2045 2040
2046 2041 /*
2047 2042 * Turn off the SCE (syscall enable) bit in the EFER register. Software
2048 2043 * executing syscall or sysret with this bit off will incur a #ud trap.
2049 2044 */
2050 2045 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) &
2051 2046 ~((uint64_t)(uintptr_t)AMD_EFER_SCE));
2052 2047 }
↓ open down ↓ |
1623 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX