Print this page
4444 remove unused cpuid-related globals
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 25 */
26 26 /*
27 27 * Copyright (c) 2010, Intel Corporation.
28 28 * All rights reserved.
29 29 */
30 30 /*
31 31 * Portions Copyright 2009 Advanced Micro Devices, Inc.
32 32 */
33 33 /*
34 34 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
35 35 */
36 36 /*
37 37 * Various routines to handle identification
38 38 * and classification of x86 processors.
39 39 */
40 40
41 41 #include <sys/types.h>
42 42 #include <sys/archsystm.h>
43 43 #include <sys/x86_archext.h>
44 44 #include <sys/kmem.h>
45 45 #include <sys/systm.h>
46 46 #include <sys/cmn_err.h>
47 47 #include <sys/sunddi.h>
48 48 #include <sys/sunndi.h>
49 49 #include <sys/cpuvar.h>
50 50 #include <sys/processor.h>
51 51 #include <sys/sysmacros.h>
52 52 #include <sys/pg.h>
53 53 #include <sys/fp.h>
54 54 #include <sys/controlregs.h>
55 55 #include <sys/bitmap.h>
56 56 #include <sys/auxv_386.h>
57 57 #include <sys/memnode.h>
58 58 #include <sys/pci_cfgspace.h>
59 59
60 60 #ifdef __xpv
61 61 #include <sys/hypervisor.h>
62 62 #else
63 63 #include <sys/ontrap.h>
64 64 #endif
65 65
66 66 /*
67 67 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
68 68 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
69 69 * them accordingly. For most modern processors, feature detection occurs here
70 70 * in pass 1.
71 71 *
72 72 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
73 73 * for the boot CPU and does the basic analysis that the early kernel needs.
74 74 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
75 75 * CPU.
76 76 *
77 77 * Pass 1 includes:
78 78 *
79 79 * o Determining vendor/model/family/stepping and setting x86_type and
80 80 * x86_vendor accordingly.
81 81 * o Processing the feature flags returned by the cpuid instruction while
82 82 * applying any workarounds or tricks for the specific processor.
83 83 * o Mapping the feature flags into Solaris feature bits (X86_*).
84 84 * o Processing extended feature flags if supported by the processor,
85 85 * again while applying specific processor knowledge.
86 86 * o Determining the CMT characteristics of the system.
87 87 *
88 88 * Pass 1 is done on non-boot CPUs during their initialization and the results
89 89 * are used only as a meager attempt at ensuring that all processors within the
90 90 * system support the same features.
91 91 *
92 92 * Pass 2 of cpuid feature analysis happens just at the beginning
93 93 * of startup(). It just copies in and corrects the remainder
94 94 * of the cpuid data we depend on: standard cpuid functions that we didn't
95 95 * need for pass1 feature analysis, and extended cpuid functions beyond the
96 96 * simple feature processing done in pass1.
97 97 *
98 98 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
99 99 * particular kernel memory allocation has been made available. It creates a
100 100 * readable brand string based on the data collected in the first two passes.
101 101 *
102 102 * Pass 4 of cpuid analysis is invoked after post_startup() when all
103 103 * the support infrastructure for various hardware features has been
104 104 * initialized. It determines which processor features will be reported
105 105 * to userland via the aux vector.
106 106 *
107 107 * All passes are executed on all CPUs, but only the boot CPU determines what
108 108 * features the kernel will use.
109 109 *
110 110 * Much of the worst junk in this file is for the support of processors
111 111 * that didn't really implement the cpuid instruction properly.
112 112 *
↓ open down ↓ |
112 lines elided |
↑ open up ↑ |
113 113 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
114 114 * the pass numbers. Accordingly, changes to the pass code may require changes
115 115 * to the accessor code.
116 116 */
117 117
118 118 uint_t x86_vendor = X86_VENDOR_IntelClone;
119 119 uint_t x86_type = X86_TYPE_OTHER;
120 120 uint_t x86_clflush_size = 0;
121 121
122 122 uint_t pentiumpro_bug4046376;
123 -uint_t pentiumpro_bug4064495;
124 123
125 124 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
126 125
127 126 static char *x86_feature_names[NUM_X86_FEATURES] = {
128 127 "lgpg",
129 128 "tsc",
130 129 "msr",
131 130 "mtrr",
132 131 "pge",
133 132 "de",
134 133 "cmov",
135 134 "mmx",
136 135 "mca",
137 136 "pae",
138 137 "cv8",
139 138 "pat",
140 139 "sep",
141 140 "sse",
142 141 "sse2",
143 142 "htt",
144 143 "asysc",
145 144 "nx",
146 145 "sse3",
147 146 "cx16",
148 147 "cmp",
149 148 "tscp",
150 149 "mwait",
151 150 "sse4a",
152 151 "cpuid",
153 152 "ssse3",
154 153 "sse4_1",
155 154 "sse4_2",
156 155 "1gpg",
157 156 "clfsh",
158 157 "64",
159 158 "aes",
160 159 "pclmulqdq",
161 160 "xsave",
162 161 "avx",
163 162 "vmx",
164 163 "svm",
165 164 "topoext",
166 165 "f16c",
167 166 "rdrand"
168 167 };
169 168
170 169 boolean_t
171 170 is_x86_feature(void *featureset, uint_t feature)
172 171 {
173 172 ASSERT(feature < NUM_X86_FEATURES);
174 173 return (BT_TEST((ulong_t *)featureset, feature));
175 174 }
176 175
177 176 void
178 177 add_x86_feature(void *featureset, uint_t feature)
179 178 {
180 179 ASSERT(feature < NUM_X86_FEATURES);
181 180 BT_SET((ulong_t *)featureset, feature);
182 181 }
183 182
184 183 void
185 184 remove_x86_feature(void *featureset, uint_t feature)
186 185 {
187 186 ASSERT(feature < NUM_X86_FEATURES);
188 187 BT_CLEAR((ulong_t *)featureset, feature);
189 188 }
190 189
191 190 boolean_t
192 191 compare_x86_featureset(void *setA, void *setB)
193 192 {
194 193 /*
195 194 * We assume that the unused bits of the bitmap are always zero.
196 195 */
197 196 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
198 197 return (B_TRUE);
199 198 } else {
200 199 return (B_FALSE);
201 200 }
202 201 }
203 202
204 203 void
205 204 print_x86_featureset(void *featureset)
206 205 {
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
207 206 uint_t i;
208 207
209 208 for (i = 0; i < NUM_X86_FEATURES; i++) {
210 209 if (is_x86_feature(featureset, i)) {
211 210 cmn_err(CE_CONT, "?x86_feature: %s\n",
212 211 x86_feature_names[i]);
213 212 }
214 213 }
215 214 }
216 215
217 -uint_t enable486;
218 -
219 216 static size_t xsave_state_size = 0;
220 217 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
221 218 boolean_t xsave_force_disable = B_FALSE;
222 219
223 220 /*
224 221 * This is set to platform type we are running on.
225 222 */
226 223 static int platform_type = -1;
227 224
228 225 #if !defined(__xpv)
229 226 /*
230 227 * Variable to patch if hypervisor platform detection needs to be
231 228 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
232 229 */
233 230 int enable_platform_detection = 1;
234 231 #endif
235 232
236 233 /*
237 234 * monitor/mwait info.
238 235 *
239 236 * size_actual and buf_actual are the real address and size allocated to get
240 237 * proper mwait_buf alignement. buf_actual and size_actual should be passed
241 238 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
242 239 * processor cache-line alignment, but this is not guarantied in the furture.
243 240 */
244 241 struct mwait_info {
245 242 size_t mon_min; /* min size to avoid missed wakeups */
246 243 size_t mon_max; /* size to avoid false wakeups */
247 244 size_t size_actual; /* size actually allocated */
248 245 void *buf_actual; /* memory actually allocated */
249 246 uint32_t support; /* processor support of monitor/mwait */
250 247 };
251 248
252 249 /*
253 250 * xsave/xrestor info.
254 251 *
255 252 * This structure contains HW feature bits and size of the xsave save area.
256 253 * Note: the kernel will use the maximum size required for all hardware
257 254 * features. It is not optimize for potential memory savings if features at
258 255 * the end of the save area are not enabled.
259 256 */
260 257 struct xsave_info {
261 258 uint32_t xsav_hw_features_low; /* Supported HW features */
262 259 uint32_t xsav_hw_features_high; /* Supported HW features */
263 260 size_t xsav_max_size; /* max size save area for HW features */
264 261 size_t ymm_size; /* AVX: size of ymm save area */
265 262 size_t ymm_offset; /* AVX: offset for ymm save area */
266 263 };
267 264
268 265
269 266 /*
270 267 * These constants determine how many of the elements of the
271 268 * cpuid we cache in the cpuid_info data structure; the
272 269 * remaining elements are accessible via the cpuid instruction.
273 270 */
274 271
275 272 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
276 273 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
277 274
278 275 /*
279 276 * Some terminology needs to be explained:
280 277 * - Socket: Something that can be plugged into a motherboard.
281 278 * - Package: Same as socket
282 279 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
283 280 * differently: there, chip is the same as processor node (below)
284 281 * - Processor node: Some AMD processors have more than one
285 282 * "subprocessor" embedded in a package. These subprocessors (nodes)
286 283 * are fully-functional processors themselves with cores, caches,
287 284 * memory controllers, PCI configuration spaces. They are connected
288 285 * inside the package with Hypertransport links. On single-node
289 286 * processors, processor node is equivalent to chip/socket/package.
290 287 * - Compute Unit: Some AMD processors pair cores in "compute units" that
291 288 * share the FPU and the I$ and L2 caches.
292 289 */
293 290
294 291 struct cpuid_info {
295 292 uint_t cpi_pass; /* last pass completed */
296 293 /*
297 294 * standard function information
298 295 */
299 296 uint_t cpi_maxeax; /* fn 0: %eax */
300 297 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
301 298 uint_t cpi_vendor; /* enum of cpi_vendorstr */
302 299
303 300 uint_t cpi_family; /* fn 1: extended family */
304 301 uint_t cpi_model; /* fn 1: extended model */
305 302 uint_t cpi_step; /* fn 1: stepping */
306 303 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
307 304 /* AMD: package/socket # */
308 305 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
309 306 int cpi_clogid; /* fn 1: %ebx: thread # */
310 307 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
311 308 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
312 309 uint_t cpi_ncache; /* fn 2: number of elements */
313 310 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
314 311 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
315 312 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
316 313 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
317 314 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */
318 315 /*
319 316 * extended function information
320 317 */
321 318 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
322 319 char cpi_brandstr[49]; /* fn 0x8000000[234] */
323 320 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
324 321 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
325 322 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
326 323
327 324 id_t cpi_coreid; /* same coreid => strands share core */
328 325 int cpi_pkgcoreid; /* core number within single package */
329 326 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
330 327 /* Intel: fn 4: %eax[31-26] */
331 328 /*
332 329 * supported feature information
333 330 */
334 331 uint32_t cpi_support[5];
335 332 #define STD_EDX_FEATURES 0
336 333 #define AMD_EDX_FEATURES 1
337 334 #define TM_EDX_FEATURES 2
338 335 #define STD_ECX_FEATURES 3
339 336 #define AMD_ECX_FEATURES 4
340 337 /*
341 338 * Synthesized information, where known.
342 339 */
343 340 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
344 341 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
345 342 uint32_t cpi_socket; /* Chip package/socket type */
346 343
347 344 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
348 345 uint32_t cpi_apicid;
349 346 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
350 347 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
351 348 /* Intel: 1 */
352 349 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
353 350 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
354 351
355 352 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
356 353 };
357 354
358 355
359 356 static struct cpuid_info cpuid_info0;
360 357
361 358 /*
362 359 * These bit fields are defined by the Intel Application Note AP-485
363 360 * "Intel Processor Identification and the CPUID Instruction"
364 361 */
365 362 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
366 363 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
367 364 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
368 365 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
369 366 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
370 367 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
371 368
372 369 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
373 370 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
374 371 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
375 372 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
376 373
377 374 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
378 375 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
379 376 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
380 377 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
381 378
382 379 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
383 380 #define CPI_XMAXEAX_MAX 0x80000100
384 381 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
385 382 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
386 383
387 384 /*
388 385 * Function 4 (Deterministic Cache Parameters) macros
389 386 * Defined by Intel Application Note AP-485
390 387 */
391 388 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
392 389 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
393 390 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
394 391 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
395 392 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
396 393 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
397 394 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
398 395
399 396 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
400 397 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
401 398 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
402 399
403 400 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
404 401
405 402 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
406 403
407 404
408 405 /*
409 406 * A couple of shorthand macros to identify "later" P6-family chips
410 407 * like the Pentium M and Core. First, the "older" P6-based stuff
411 408 * (loosely defined as "pre-Pentium-4"):
412 409 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
413 410 */
414 411
415 412 #define IS_LEGACY_P6(cpi) ( \
416 413 cpi->cpi_family == 6 && \
417 414 (cpi->cpi_model == 1 || \
418 415 cpi->cpi_model == 3 || \
419 416 cpi->cpi_model == 5 || \
420 417 cpi->cpi_model == 6 || \
421 418 cpi->cpi_model == 7 || \
422 419 cpi->cpi_model == 8 || \
423 420 cpi->cpi_model == 0xA || \
424 421 cpi->cpi_model == 0xB) \
425 422 )
426 423
427 424 /* A "new F6" is everything with family 6 that's not the above */
428 425 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
429 426
430 427 /* Extended family/model support */
431 428 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
432 429 cpi->cpi_family >= 0xf)
433 430
434 431 /*
435 432 * Info for monitor/mwait idle loop.
436 433 *
437 434 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
438 435 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
439 436 * 2006.
440 437 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
441 438 * Documentation Updates" #33633, Rev 2.05, December 2006.
442 439 */
443 440 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
444 441 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
445 442 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
446 443 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
447 444 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
448 445 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
449 446 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
450 447 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
451 448 /*
452 449 * Number of sub-cstates for a given c-state.
453 450 */
454 451 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
455 452 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
456 453
457 454 /*
458 455 * XSAVE leaf 0xD enumeration
459 456 */
460 457 #define CPUID_LEAFD_2_YMM_OFFSET 576
461 458 #define CPUID_LEAFD_2_YMM_SIZE 256
462 459
463 460 /*
464 461 * Functions we consune from cpuid_subr.c; don't publish these in a header
465 462 * file to try and keep people using the expected cpuid_* interfaces.
466 463 */
467 464 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
468 465 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
469 466 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
470 467 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
471 468 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
472 469
473 470 /*
474 471 * Apply up various platform-dependent restrictions where the
475 472 * underlying platform restrictions mean the CPU can be marked
476 473 * as less capable than its cpuid instruction would imply.
477 474 */
478 475 #if defined(__xpv)
479 476 static void
480 477 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
481 478 {
482 479 switch (eax) {
483 480 case 1: {
484 481 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
485 482 0 : CPUID_INTC_EDX_MCA;
486 483 cp->cp_edx &=
487 484 ~(mcamask |
488 485 CPUID_INTC_EDX_PSE |
489 486 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
490 487 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
491 488 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
492 489 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
493 490 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
494 491 break;
495 492 }
496 493
497 494 case 0x80000001:
498 495 cp->cp_edx &=
499 496 ~(CPUID_AMD_EDX_PSE |
500 497 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
501 498 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
502 499 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
503 500 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
504 501 CPUID_AMD_EDX_TSCP);
505 502 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
506 503 break;
507 504 default:
508 505 break;
509 506 }
510 507
511 508 switch (vendor) {
512 509 case X86_VENDOR_Intel:
513 510 switch (eax) {
514 511 case 4:
515 512 /*
516 513 * Zero out the (ncores-per-chip - 1) field
517 514 */
518 515 cp->cp_eax &= 0x03fffffff;
519 516 break;
520 517 default:
521 518 break;
522 519 }
523 520 break;
524 521 case X86_VENDOR_AMD:
525 522 switch (eax) {
526 523
527 524 case 0x80000001:
528 525 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
529 526 break;
530 527
531 528 case 0x80000008:
532 529 /*
533 530 * Zero out the (ncores-per-chip - 1) field
534 531 */
535 532 cp->cp_ecx &= 0xffffff00;
536 533 break;
537 534 default:
538 535 break;
539 536 }
540 537 break;
541 538 default:
542 539 break;
543 540 }
544 541 }
545 542 #else
546 543 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
547 544 #endif
548 545
549 546 /*
550 547 * Some undocumented ways of patching the results of the cpuid
551 548 * instruction to permit running Solaris 10 on future cpus that
552 549 * we don't currently support. Could be set to non-zero values
553 550 * via settings in eeprom.
554 551 */
555 552
556 553 uint32_t cpuid_feature_ecx_include;
557 554 uint32_t cpuid_feature_ecx_exclude;
558 555 uint32_t cpuid_feature_edx_include;
559 556 uint32_t cpuid_feature_edx_exclude;
560 557
561 558 /*
562 559 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
563 560 */
564 561 void
565 562 cpuid_alloc_space(cpu_t *cpu)
566 563 {
567 564 /*
568 565 * By convention, cpu0 is the boot cpu, which is set up
569 566 * before memory allocation is available. All other cpus get
570 567 * their cpuid_info struct allocated here.
571 568 */
572 569 ASSERT(cpu->cpu_id != 0);
573 570 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
574 571 cpu->cpu_m.mcpu_cpi =
575 572 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
576 573 }
577 574
578 575 void
579 576 cpuid_free_space(cpu_t *cpu)
580 577 {
581 578 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
582 579 int i;
583 580
584 581 ASSERT(cpi != NULL);
585 582 ASSERT(cpi != &cpuid_info0);
586 583
587 584 /*
588 585 * Free up any function 4 related dynamic storage
589 586 */
590 587 for (i = 1; i < cpi->cpi_std_4_size; i++)
591 588 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
592 589 if (cpi->cpi_std_4_size > 0)
593 590 kmem_free(cpi->cpi_std_4,
594 591 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
595 592
596 593 kmem_free(cpi, sizeof (*cpi));
597 594 cpu->cpu_m.mcpu_cpi = NULL;
598 595 }
599 596
600 597 #if !defined(__xpv)
601 598 /*
602 599 * Determine the type of the underlying platform. This is used to customize
603 600 * initialization of various subsystems (e.g. TSC). determine_platform() must
604 601 * only ever be called once to prevent two processors from seeing different
605 602 * values of platform_type. Must be called before cpuid_pass1(), the earliest
606 603 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
607 604 */
608 605 void
609 606 determine_platform(void)
610 607 {
611 608 struct cpuid_regs cp;
612 609 uint32_t base;
613 610 uint32_t regs[4];
614 611 char *hvstr = (char *)regs;
615 612
616 613 ASSERT(platform_type == -1);
617 614
618 615 platform_type = HW_NATIVE;
619 616
620 617 if (!enable_platform_detection)
621 618 return;
622 619
623 620 /*
624 621 * If Hypervisor CPUID bit is set, try to determine hypervisor
625 622 * vendor signature, and set platform type accordingly.
626 623 *
627 624 * References:
628 625 * http://lkml.org/lkml/2008/10/1/246
629 626 * http://kb.vmware.com/kb/1009458
630 627 */
631 628 cp.cp_eax = 0x1;
632 629 (void) __cpuid_insn(&cp);
633 630 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
634 631 cp.cp_eax = 0x40000000;
635 632 (void) __cpuid_insn(&cp);
636 633 regs[0] = cp.cp_ebx;
637 634 regs[1] = cp.cp_ecx;
638 635 regs[2] = cp.cp_edx;
639 636 regs[3] = 0;
640 637 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
641 638 platform_type = HW_XEN_HVM;
642 639 return;
643 640 }
644 641 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
645 642 platform_type = HW_VMWARE;
646 643 return;
647 644 }
648 645 if (strcmp(hvstr, HVSIG_KVM) == 0) {
649 646 platform_type = HW_KVM;
650 647 return;
651 648 }
652 649 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
653 650 platform_type = HW_MICROSOFT;
654 651 } else {
655 652 /*
656 653 * Check older VMware hardware versions. VMware hypervisor is
657 654 * detected by performing an IN operation to VMware hypervisor
658 655 * port and checking that value returned in %ebx is VMware
659 656 * hypervisor magic value.
660 657 *
661 658 * References: http://kb.vmware.com/kb/1009458
662 659 */
663 660 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
664 661 if (regs[1] == VMWARE_HVMAGIC) {
665 662 platform_type = HW_VMWARE;
666 663 return;
667 664 }
668 665 }
669 666
670 667 /*
671 668 * Check Xen hypervisor. In a fully virtualized domain,
672 669 * Xen's pseudo-cpuid function returns a string representing the
673 670 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
674 671 * supported cpuid function. We need at least a (base + 2) leaf value
675 672 * to do what we want to do. Try different base values, since the
676 673 * hypervisor might use a different one depending on whether Hyper-V
677 674 * emulation is switched on by default or not.
678 675 */
679 676 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
680 677 cp.cp_eax = base;
681 678 (void) __cpuid_insn(&cp);
682 679 regs[0] = cp.cp_ebx;
683 680 regs[1] = cp.cp_ecx;
684 681 regs[2] = cp.cp_edx;
685 682 regs[3] = 0;
686 683 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
687 684 cp.cp_eax >= (base + 2)) {
688 685 platform_type &= ~HW_NATIVE;
689 686 platform_type |= HW_XEN_HVM;
690 687 return;
691 688 }
692 689 }
693 690 }
694 691
695 692 int
696 693 get_hwenv(void)
697 694 {
698 695 ASSERT(platform_type != -1);
699 696 return (platform_type);
700 697 }
701 698
702 699 int
703 700 is_controldom(void)
704 701 {
705 702 return (0);
706 703 }
707 704
708 705 #else
709 706
710 707 int
711 708 get_hwenv(void)
712 709 {
713 710 return (HW_XEN_PV);
714 711 }
715 712
716 713 int
717 714 is_controldom(void)
718 715 {
719 716 return (DOMAIN_IS_INITDOMAIN(xen_info));
720 717 }
721 718
722 719 #endif /* __xpv */
723 720
724 721 static void
725 722 cpuid_intel_getids(cpu_t *cpu, void *feature)
726 723 {
727 724 uint_t i;
728 725 uint_t chipid_shift = 0;
729 726 uint_t coreid_shift = 0;
730 727 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
731 728
732 729 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
733 730 chipid_shift++;
734 731
735 732 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
736 733 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
737 734
738 735 if (is_x86_feature(feature, X86FSET_CMP)) {
739 736 /*
740 737 * Multi-core (and possibly multi-threaded)
741 738 * processors.
742 739 */
743 740 uint_t ncpu_per_core;
744 741 if (cpi->cpi_ncore_per_chip == 1)
745 742 ncpu_per_core = cpi->cpi_ncpu_per_chip;
746 743 else if (cpi->cpi_ncore_per_chip > 1)
747 744 ncpu_per_core = cpi->cpi_ncpu_per_chip /
748 745 cpi->cpi_ncore_per_chip;
749 746 /*
750 747 * 8bit APIC IDs on dual core Pentiums
751 748 * look like this:
752 749 *
753 750 * +-----------------------+------+------+
754 751 * | Physical Package ID | MC | HT |
755 752 * +-----------------------+------+------+
756 753 * <------- chipid -------->
757 754 * <------- coreid --------------->
758 755 * <--- clogid -->
759 756 * <------>
760 757 * pkgcoreid
761 758 *
762 759 * Where the number of bits necessary to
763 760 * represent MC and HT fields together equals
764 761 * to the minimum number of bits necessary to
765 762 * store the value of cpi->cpi_ncpu_per_chip.
766 763 * Of those bits, the MC part uses the number
767 764 * of bits necessary to store the value of
768 765 * cpi->cpi_ncore_per_chip.
769 766 */
770 767 for (i = 1; i < ncpu_per_core; i <<= 1)
771 768 coreid_shift++;
772 769 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
773 770 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
774 771 } else if (is_x86_feature(feature, X86FSET_HTT)) {
775 772 /*
776 773 * Single-core multi-threaded processors.
777 774 */
778 775 cpi->cpi_coreid = cpi->cpi_chipid;
779 776 cpi->cpi_pkgcoreid = 0;
780 777 }
781 778 cpi->cpi_procnodeid = cpi->cpi_chipid;
782 779 cpi->cpi_compunitid = cpi->cpi_coreid;
783 780 }
784 781
785 782 static void
786 783 cpuid_amd_getids(cpu_t *cpu)
787 784 {
788 785 int i, first_half, coreidsz;
789 786 uint32_t nb_caps_reg;
790 787 uint_t node2_1;
791 788 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
792 789 struct cpuid_regs *cp;
793 790
794 791 /*
795 792 * AMD CMP chips currently have a single thread per core.
796 793 *
797 794 * Since no two cpus share a core we must assign a distinct coreid
798 795 * per cpu, and we do this by using the cpu_id. This scheme does not,
799 796 * however, guarantee that sibling cores of a chip will have sequential
800 797 * coreids starting at a multiple of the number of cores per chip -
801 798 * that is usually the case, but if the ACPI MADT table is presented
802 799 * in a different order then we need to perform a few more gymnastics
803 800 * for the pkgcoreid.
804 801 *
805 802 * All processors in the system have the same number of enabled
806 803 * cores. Cores within a processor are always numbered sequentially
807 804 * from 0 regardless of how many or which are disabled, and there
808 805 * is no way for operating system to discover the real core id when some
809 806 * are disabled.
810 807 *
811 808 * In family 0x15, the cores come in pairs called compute units. They
812 809 * share I$ and L2 caches and the FPU. Enumeration of this feature is
813 810 * simplified by the new topology extensions CPUID leaf, indicated by
814 811 * the X86 feature X86FSET_TOPOEXT.
815 812 */
816 813
817 814 cpi->cpi_coreid = cpu->cpu_id;
818 815 cpi->cpi_compunitid = cpu->cpu_id;
819 816
820 817 if (cpi->cpi_xmaxeax >= 0x80000008) {
821 818
822 819 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
823 820
824 821 /*
825 822 * In AMD parlance chip is really a node while Solaris
826 823 * sees chip as equivalent to socket/package.
827 824 */
828 825 cpi->cpi_ncore_per_chip =
829 826 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
830 827 if (coreidsz == 0) {
831 828 /* Use legacy method */
832 829 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
833 830 coreidsz++;
834 831 if (coreidsz == 0)
835 832 coreidsz = 1;
836 833 }
837 834 } else {
838 835 /* Assume single-core part */
839 836 cpi->cpi_ncore_per_chip = 1;
840 837 coreidsz = 1;
841 838 }
842 839
843 840 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
844 841 cpi->cpi_apicid & ((1<<coreidsz) - 1);
845 842 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
846 843
847 844 /* Get node ID, compute unit ID */
848 845 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
849 846 cpi->cpi_xmaxeax >= 0x8000001e) {
850 847 cp = &cpi->cpi_extd[0x1e];
851 848 cp->cp_eax = 0x8000001e;
852 849 (void) __cpuid_insn(cp);
853 850
854 851 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
855 852 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
856 853 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
857 854 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
858 855 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
859 856 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
860 857 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
861 858 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
862 859 } else if (cpi->cpi_family == 0x10) {
863 860 /*
864 861 * See if we are a multi-node processor.
865 862 * All processors in the system have the same number of nodes
866 863 */
867 864 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
868 865 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
869 866 /* Single-node */
870 867 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
871 868 coreidsz);
872 869 } else {
873 870
874 871 /*
875 872 * Multi-node revision D (2 nodes per package
876 873 * are supported)
877 874 */
878 875 cpi->cpi_procnodes_per_pkg = 2;
879 876
880 877 first_half = (cpi->cpi_pkgcoreid <=
881 878 (cpi->cpi_ncore_per_chip/2 - 1));
882 879
883 880 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
884 881 /* We are BSP */
885 882 cpi->cpi_procnodeid = (first_half ? 0 : 1);
886 883 } else {
887 884
888 885 /* We are AP */
889 886 /* NodeId[2:1] bits to use for reading F3xe8 */
890 887 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
891 888
892 889 nb_caps_reg =
893 890 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
894 891
895 892 /*
896 893 * Check IntNodeNum bit (31:30, but bit 31 is
897 894 * always 0 on dual-node processors)
898 895 */
899 896 if (BITX(nb_caps_reg, 30, 30) == 0)
900 897 cpi->cpi_procnodeid = node2_1 +
901 898 !first_half;
902 899 else
903 900 cpi->cpi_procnodeid = node2_1 +
904 901 first_half;
905 902 }
906 903 }
907 904 } else {
908 905 cpi->cpi_procnodeid = 0;
909 906 }
910 907
911 908 cpi->cpi_chipid =
912 909 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
913 910 }
914 911
915 912 /*
916 913 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
917 914 */
918 915 void
919 916 setup_xfem(void)
920 917 {
921 918 uint64_t flags = XFEATURE_LEGACY_FP;
922 919
923 920 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
924 921
925 922 if (is_x86_feature(x86_featureset, X86FSET_SSE))
926 923 flags |= XFEATURE_SSE;
927 924
928 925 if (is_x86_feature(x86_featureset, X86FSET_AVX))
929 926 flags |= XFEATURE_AVX;
930 927
931 928 set_xcr(XFEATURE_ENABLED_MASK, flags);
932 929
933 930 xsave_bv_all = flags;
934 931 }
935 932
936 933 void
937 934 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
938 935 {
939 936 uint32_t mask_ecx, mask_edx;
940 937 struct cpuid_info *cpi;
941 938 struct cpuid_regs *cp;
942 939 int xcpuid;
943 940 #if !defined(__xpv)
944 941 extern int idle_cpu_prefer_mwait;
945 942 #endif
946 943
947 944 /*
948 945 * Space statically allocated for BSP, ensure pointer is set
949 946 */
950 947 if (cpu->cpu_id == 0) {
951 948 if (cpu->cpu_m.mcpu_cpi == NULL)
952 949 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
953 950 }
954 951
955 952 add_x86_feature(featureset, X86FSET_CPUID);
956 953
957 954 cpi = cpu->cpu_m.mcpu_cpi;
958 955 ASSERT(cpi != NULL);
959 956 cp = &cpi->cpi_std[0];
960 957 cp->cp_eax = 0;
961 958 cpi->cpi_maxeax = __cpuid_insn(cp);
962 959 {
963 960 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
964 961 *iptr++ = cp->cp_ebx;
965 962 *iptr++ = cp->cp_edx;
966 963 *iptr++ = cp->cp_ecx;
967 964 *(char *)&cpi->cpi_vendorstr[12] = '\0';
968 965 }
969 966
970 967 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
971 968 x86_vendor = cpi->cpi_vendor; /* for compatibility */
972 969
973 970 /*
974 971 * Limit the range in case of weird hardware
975 972 */
976 973 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
977 974 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
978 975 if (cpi->cpi_maxeax < 1)
979 976 goto pass1_done;
980 977
981 978 cp = &cpi->cpi_std[1];
982 979 cp->cp_eax = 1;
983 980 (void) __cpuid_insn(cp);
984 981
985 982 /*
986 983 * Extract identifying constants for easy access.
987 984 */
988 985 cpi->cpi_model = CPI_MODEL(cpi);
989 986 cpi->cpi_family = CPI_FAMILY(cpi);
990 987
991 988 if (cpi->cpi_family == 0xf)
992 989 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
993 990
994 991 /*
995 992 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
996 993 * Intel, and presumably everyone else, uses model == 0xf, as
997 994 * one would expect (max value means possible overflow). Sigh.
998 995 */
999 996
1000 997 switch (cpi->cpi_vendor) {
1001 998 case X86_VENDOR_Intel:
1002 999 if (IS_EXTENDED_MODEL_INTEL(cpi))
1003 1000 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1004 1001 break;
1005 1002 case X86_VENDOR_AMD:
1006 1003 if (CPI_FAMILY(cpi) == 0xf)
1007 1004 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1008 1005 break;
1009 1006 default:
1010 1007 if (cpi->cpi_model == 0xf)
1011 1008 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1012 1009 break;
1013 1010 }
1014 1011
1015 1012 cpi->cpi_step = CPI_STEP(cpi);
1016 1013 cpi->cpi_brandid = CPI_BRANDID(cpi);
1017 1014
1018 1015 /*
1019 1016 * *default* assumptions:
1020 1017 * - believe %edx feature word
1021 1018 * - ignore %ecx feature word
1022 1019 * - 32-bit virtual and physical addressing
1023 1020 */
1024 1021 mask_edx = 0xffffffff;
1025 1022 mask_ecx = 0;
↓ open down ↓ |
797 lines elided |
↑ open up ↑ |
1026 1023
1027 1024 cpi->cpi_pabits = cpi->cpi_vabits = 32;
1028 1025
1029 1026 switch (cpi->cpi_vendor) {
1030 1027 case X86_VENDOR_Intel:
1031 1028 if (cpi->cpi_family == 5)
1032 1029 x86_type = X86_TYPE_P5;
1033 1030 else if (IS_LEGACY_P6(cpi)) {
1034 1031 x86_type = X86_TYPE_P6;
1035 1032 pentiumpro_bug4046376 = 1;
1036 - pentiumpro_bug4064495 = 1;
1037 1033 /*
1038 1034 * Clear the SEP bit when it was set erroneously
1039 1035 */
1040 1036 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
1041 1037 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
1042 1038 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
1043 1039 x86_type = X86_TYPE_P4;
1044 1040 /*
1045 1041 * We don't currently depend on any of the %ecx
1046 1042 * features until Prescott, so we'll only check
1047 1043 * this from P4 onwards. We might want to revisit
1048 1044 * that idea later.
1049 1045 */
1050 1046 mask_ecx = 0xffffffff;
1051 1047 } else if (cpi->cpi_family > 0xf)
1052 1048 mask_ecx = 0xffffffff;
1053 1049 /*
1054 1050 * We don't support MONITOR/MWAIT if leaf 5 is not available
1055 1051 * to obtain the monitor linesize.
1056 1052 */
1057 1053 if (cpi->cpi_maxeax < 5)
1058 1054 mask_ecx &= ~CPUID_INTC_ECX_MON;
1059 1055 break;
1060 1056 case X86_VENDOR_IntelClone:
1061 1057 default:
1062 1058 break;
1063 1059 case X86_VENDOR_AMD:
1064 1060 #if defined(OPTERON_ERRATUM_108)
1065 1061 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1066 1062 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1067 1063 cpi->cpi_model = 0xc;
1068 1064 } else
1069 1065 #endif
1070 1066 if (cpi->cpi_family == 5) {
1071 1067 /*
1072 1068 * AMD K5 and K6
1073 1069 *
1074 1070 * These CPUs have an incomplete implementation
1075 1071 * of MCA/MCE which we mask away.
1076 1072 */
1077 1073 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1078 1074
1079 1075 /*
1080 1076 * Model 0 uses the wrong (APIC) bit
1081 1077 * to indicate PGE. Fix it here.
1082 1078 */
1083 1079 if (cpi->cpi_model == 0) {
1084 1080 if (cp->cp_edx & 0x200) {
1085 1081 cp->cp_edx &= ~0x200;
1086 1082 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1087 1083 }
1088 1084 }
1089 1085
1090 1086 /*
1091 1087 * Early models had problems w/ MMX; disable.
1092 1088 */
1093 1089 if (cpi->cpi_model < 6)
1094 1090 mask_edx &= ~CPUID_INTC_EDX_MMX;
1095 1091 }
1096 1092
1097 1093 /*
1098 1094 * For newer families, SSE3 and CX16, at least, are valid;
1099 1095 * enable all
1100 1096 */
1101 1097 if (cpi->cpi_family >= 0xf)
1102 1098 mask_ecx = 0xffffffff;
1103 1099 /*
1104 1100 * We don't support MONITOR/MWAIT if leaf 5 is not available
1105 1101 * to obtain the monitor linesize.
1106 1102 */
1107 1103 if (cpi->cpi_maxeax < 5)
1108 1104 mask_ecx &= ~CPUID_INTC_ECX_MON;
1109 1105
1110 1106 #if !defined(__xpv)
1111 1107 /*
1112 1108 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1113 1109 * processors. AMD does not intend MWAIT to be used in the cpu
1114 1110 * idle loop on current and future processors. 10h and future
1115 1111 * AMD processors use more power in MWAIT than HLT.
1116 1112 * Pre-family-10h Opterons do not have the MWAIT instruction.
1117 1113 */
1118 1114 idle_cpu_prefer_mwait = 0;
1119 1115 #endif
1120 1116
1121 1117 break;
1122 1118 case X86_VENDOR_TM:
1123 1119 /*
1124 1120 * workaround the NT workaround in CMS 4.1
1125 1121 */
1126 1122 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1127 1123 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1128 1124 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1129 1125 break;
1130 1126 case X86_VENDOR_Centaur:
1131 1127 /*
1132 1128 * workaround the NT workarounds again
1133 1129 */
1134 1130 if (cpi->cpi_family == 6)
1135 1131 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1136 1132 break;
1137 1133 case X86_VENDOR_Cyrix:
1138 1134 /*
1139 1135 * We rely heavily on the probing in locore
1140 1136 * to actually figure out what parts, if any,
1141 1137 * of the Cyrix cpuid instruction to believe.
1142 1138 */
1143 1139 switch (x86_type) {
1144 1140 case X86_TYPE_CYRIX_486:
1145 1141 mask_edx = 0;
1146 1142 break;
1147 1143 case X86_TYPE_CYRIX_6x86:
1148 1144 mask_edx = 0;
1149 1145 break;
1150 1146 case X86_TYPE_CYRIX_6x86L:
1151 1147 mask_edx =
1152 1148 CPUID_INTC_EDX_DE |
1153 1149 CPUID_INTC_EDX_CX8;
1154 1150 break;
1155 1151 case X86_TYPE_CYRIX_6x86MX:
1156 1152 mask_edx =
1157 1153 CPUID_INTC_EDX_DE |
1158 1154 CPUID_INTC_EDX_MSR |
1159 1155 CPUID_INTC_EDX_CX8 |
1160 1156 CPUID_INTC_EDX_PGE |
1161 1157 CPUID_INTC_EDX_CMOV |
1162 1158 CPUID_INTC_EDX_MMX;
1163 1159 break;
1164 1160 case X86_TYPE_CYRIX_GXm:
1165 1161 mask_edx =
1166 1162 CPUID_INTC_EDX_MSR |
1167 1163 CPUID_INTC_EDX_CX8 |
1168 1164 CPUID_INTC_EDX_CMOV |
1169 1165 CPUID_INTC_EDX_MMX;
1170 1166 break;
1171 1167 case X86_TYPE_CYRIX_MediaGX:
1172 1168 break;
1173 1169 case X86_TYPE_CYRIX_MII:
1174 1170 case X86_TYPE_VIA_CYRIX_III:
1175 1171 mask_edx =
1176 1172 CPUID_INTC_EDX_DE |
1177 1173 CPUID_INTC_EDX_TSC |
1178 1174 CPUID_INTC_EDX_MSR |
1179 1175 CPUID_INTC_EDX_CX8 |
1180 1176 CPUID_INTC_EDX_PGE |
1181 1177 CPUID_INTC_EDX_CMOV |
1182 1178 CPUID_INTC_EDX_MMX;
1183 1179 break;
1184 1180 default:
1185 1181 break;
1186 1182 }
1187 1183 break;
1188 1184 }
1189 1185
1190 1186 #if defined(__xpv)
1191 1187 /*
1192 1188 * Do not support MONITOR/MWAIT under a hypervisor
1193 1189 */
1194 1190 mask_ecx &= ~CPUID_INTC_ECX_MON;
1195 1191 /*
1196 1192 * Do not support XSAVE under a hypervisor for now
1197 1193 */
1198 1194 xsave_force_disable = B_TRUE;
1199 1195
1200 1196 #endif /* __xpv */
1201 1197
1202 1198 if (xsave_force_disable) {
1203 1199 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1204 1200 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1205 1201 mask_ecx &= ~CPUID_INTC_ECX_F16C;
1206 1202 }
1207 1203
1208 1204 /*
1209 1205 * Now we've figured out the masks that determine
1210 1206 * which bits we choose to believe, apply the masks
1211 1207 * to the feature words, then map the kernel's view
1212 1208 * of these feature words into its feature word.
1213 1209 */
1214 1210 cp->cp_edx &= mask_edx;
1215 1211 cp->cp_ecx &= mask_ecx;
1216 1212
1217 1213 /*
1218 1214 * apply any platform restrictions (we don't call this
1219 1215 * immediately after __cpuid_insn here, because we need the
1220 1216 * workarounds applied above first)
1221 1217 */
1222 1218 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1223 1219
1224 1220 /*
1225 1221 * fold in overrides from the "eeprom" mechanism
1226 1222 */
1227 1223 cp->cp_edx |= cpuid_feature_edx_include;
1228 1224 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1229 1225
1230 1226 cp->cp_ecx |= cpuid_feature_ecx_include;
1231 1227 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1232 1228
1233 1229 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1234 1230 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1235 1231 }
1236 1232 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1237 1233 add_x86_feature(featureset, X86FSET_TSC);
1238 1234 }
1239 1235 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1240 1236 add_x86_feature(featureset, X86FSET_MSR);
1241 1237 }
1242 1238 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1243 1239 add_x86_feature(featureset, X86FSET_MTRR);
1244 1240 }
1245 1241 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1246 1242 add_x86_feature(featureset, X86FSET_PGE);
1247 1243 }
1248 1244 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1249 1245 add_x86_feature(featureset, X86FSET_CMOV);
1250 1246 }
1251 1247 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1252 1248 add_x86_feature(featureset, X86FSET_MMX);
1253 1249 }
1254 1250 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1255 1251 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1256 1252 add_x86_feature(featureset, X86FSET_MCA);
1257 1253 }
1258 1254 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1259 1255 add_x86_feature(featureset, X86FSET_PAE);
1260 1256 }
1261 1257 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1262 1258 add_x86_feature(featureset, X86FSET_CX8);
1263 1259 }
1264 1260 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1265 1261 add_x86_feature(featureset, X86FSET_CX16);
1266 1262 }
1267 1263 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1268 1264 add_x86_feature(featureset, X86FSET_PAT);
1269 1265 }
1270 1266 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1271 1267 add_x86_feature(featureset, X86FSET_SEP);
1272 1268 }
1273 1269 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1274 1270 /*
1275 1271 * In our implementation, fxsave/fxrstor
1276 1272 * are prerequisites before we'll even
1277 1273 * try and do SSE things.
1278 1274 */
1279 1275 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1280 1276 add_x86_feature(featureset, X86FSET_SSE);
1281 1277 }
1282 1278 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1283 1279 add_x86_feature(featureset, X86FSET_SSE2);
1284 1280 }
1285 1281 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1286 1282 add_x86_feature(featureset, X86FSET_SSE3);
1287 1283 }
1288 1284 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1289 1285 add_x86_feature(featureset, X86FSET_SSSE3);
1290 1286 }
1291 1287 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1292 1288 add_x86_feature(featureset, X86FSET_SSE4_1);
1293 1289 }
1294 1290 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1295 1291 add_x86_feature(featureset, X86FSET_SSE4_2);
1296 1292 }
1297 1293 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1298 1294 add_x86_feature(featureset, X86FSET_AES);
1299 1295 }
1300 1296 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1301 1297 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1302 1298 }
1303 1299
1304 1300 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1305 1301 add_x86_feature(featureset, X86FSET_XSAVE);
1306 1302
1307 1303 /* We only test AVX when there is XSAVE */
1308 1304 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1309 1305 add_x86_feature(featureset,
1310 1306 X86FSET_AVX);
1311 1307
1312 1308 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
1313 1309 add_x86_feature(featureset,
1314 1310 X86FSET_F16C);
1315 1311 }
1316 1312 }
1317 1313 }
1318 1314 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1319 1315 add_x86_feature(featureset, X86FSET_DE);
1320 1316 }
1321 1317 #if !defined(__xpv)
1322 1318 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1323 1319
1324 1320 /*
1325 1321 * We require the CLFLUSH instruction for erratum workaround
1326 1322 * to use MONITOR/MWAIT.
1327 1323 */
1328 1324 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1329 1325 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1330 1326 add_x86_feature(featureset, X86FSET_MWAIT);
1331 1327 } else {
1332 1328 extern int idle_cpu_assert_cflush_monitor;
1333 1329
1334 1330 /*
1335 1331 * All processors we are aware of which have
1336 1332 * MONITOR/MWAIT also have CLFLUSH.
1337 1333 */
1338 1334 if (idle_cpu_assert_cflush_monitor) {
1339 1335 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1340 1336 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1341 1337 }
1342 1338 }
1343 1339 }
1344 1340 #endif /* __xpv */
1345 1341
1346 1342 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1347 1343 add_x86_feature(featureset, X86FSET_VMX);
1348 1344 }
1349 1345
1350 1346 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
1351 1347 add_x86_feature(featureset, X86FSET_RDRAND);
1352 1348
1353 1349 /*
1354 1350 * Only need it first time, rest of the cpus would follow suit.
1355 1351 * we only capture this for the bootcpu.
1356 1352 */
1357 1353 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1358 1354 add_x86_feature(featureset, X86FSET_CLFSH);
1359 1355 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1360 1356 }
1361 1357 if (is_x86_feature(featureset, X86FSET_PAE))
1362 1358 cpi->cpi_pabits = 36;
1363 1359
1364 1360 /*
1365 1361 * Hyperthreading configuration is slightly tricky on Intel
1366 1362 * and pure clones, and even trickier on AMD.
1367 1363 *
1368 1364 * (AMD chose to set the HTT bit on their CMP processors,
1369 1365 * even though they're not actually hyperthreaded. Thus it
1370 1366 * takes a bit more work to figure out what's really going
1371 1367 * on ... see the handling of the CMP_LGCY bit below)
1372 1368 */
1373 1369 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1374 1370 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1375 1371 if (cpi->cpi_ncpu_per_chip > 1)
1376 1372 add_x86_feature(featureset, X86FSET_HTT);
1377 1373 } else {
1378 1374 cpi->cpi_ncpu_per_chip = 1;
1379 1375 }
1380 1376
1381 1377 /*
1382 1378 * Work on the "extended" feature information, doing
1383 1379 * some basic initialization for cpuid_pass2()
1384 1380 */
1385 1381 xcpuid = 0;
1386 1382 switch (cpi->cpi_vendor) {
1387 1383 case X86_VENDOR_Intel:
1388 1384 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1389 1385 xcpuid++;
1390 1386 break;
1391 1387 case X86_VENDOR_AMD:
1392 1388 if (cpi->cpi_family > 5 ||
1393 1389 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1394 1390 xcpuid++;
1395 1391 break;
1396 1392 case X86_VENDOR_Cyrix:
1397 1393 /*
1398 1394 * Only these Cyrix CPUs are -known- to support
1399 1395 * extended cpuid operations.
1400 1396 */
1401 1397 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1402 1398 x86_type == X86_TYPE_CYRIX_GXm)
1403 1399 xcpuid++;
1404 1400 break;
1405 1401 case X86_VENDOR_Centaur:
1406 1402 case X86_VENDOR_TM:
1407 1403 default:
1408 1404 xcpuid++;
1409 1405 break;
1410 1406 }
1411 1407
1412 1408 if (xcpuid) {
1413 1409 cp = &cpi->cpi_extd[0];
1414 1410 cp->cp_eax = 0x80000000;
1415 1411 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1416 1412 }
1417 1413
1418 1414 if (cpi->cpi_xmaxeax & 0x80000000) {
1419 1415
1420 1416 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1421 1417 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1422 1418
1423 1419 switch (cpi->cpi_vendor) {
1424 1420 case X86_VENDOR_Intel:
1425 1421 case X86_VENDOR_AMD:
1426 1422 if (cpi->cpi_xmaxeax < 0x80000001)
1427 1423 break;
1428 1424 cp = &cpi->cpi_extd[1];
1429 1425 cp->cp_eax = 0x80000001;
1430 1426 (void) __cpuid_insn(cp);
1431 1427
1432 1428 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1433 1429 cpi->cpi_family == 5 &&
1434 1430 cpi->cpi_model == 6 &&
1435 1431 cpi->cpi_step == 6) {
1436 1432 /*
1437 1433 * K6 model 6 uses bit 10 to indicate SYSC
1438 1434 * Later models use bit 11. Fix it here.
1439 1435 */
1440 1436 if (cp->cp_edx & 0x400) {
1441 1437 cp->cp_edx &= ~0x400;
1442 1438 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1443 1439 }
1444 1440 }
1445 1441
1446 1442 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1447 1443
1448 1444 /*
1449 1445 * Compute the additions to the kernel's feature word.
1450 1446 */
1451 1447 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1452 1448 add_x86_feature(featureset, X86FSET_NX);
1453 1449 }
1454 1450
1455 1451 /*
1456 1452 * Regardless whether or not we boot 64-bit,
1457 1453 * we should have a way to identify whether
1458 1454 * the CPU is capable of running 64-bit.
1459 1455 */
1460 1456 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1461 1457 add_x86_feature(featureset, X86FSET_64);
1462 1458 }
1463 1459
1464 1460 #if defined(__amd64)
1465 1461 /* 1 GB large page - enable only for 64 bit kernel */
1466 1462 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1467 1463 add_x86_feature(featureset, X86FSET_1GPG);
1468 1464 }
1469 1465 #endif
1470 1466
1471 1467 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1472 1468 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1473 1469 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1474 1470 add_x86_feature(featureset, X86FSET_SSE4A);
1475 1471 }
1476 1472
1477 1473 /*
1478 1474 * If both the HTT and CMP_LGCY bits are set,
1479 1475 * then we're not actually HyperThreaded. Read
1480 1476 * "AMD CPUID Specification" for more details.
1481 1477 */
1482 1478 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1483 1479 is_x86_feature(featureset, X86FSET_HTT) &&
1484 1480 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1485 1481 remove_x86_feature(featureset, X86FSET_HTT);
1486 1482 add_x86_feature(featureset, X86FSET_CMP);
1487 1483 }
1488 1484 #if defined(__amd64)
1489 1485 /*
1490 1486 * It's really tricky to support syscall/sysret in
1491 1487 * the i386 kernel; we rely on sysenter/sysexit
1492 1488 * instead. In the amd64 kernel, things are -way-
1493 1489 * better.
1494 1490 */
1495 1491 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1496 1492 add_x86_feature(featureset, X86FSET_ASYSC);
1497 1493 }
1498 1494
1499 1495 /*
1500 1496 * While we're thinking about system calls, note
1501 1497 * that AMD processors don't support sysenter
1502 1498 * in long mode at all, so don't try to program them.
1503 1499 */
1504 1500 if (x86_vendor == X86_VENDOR_AMD) {
1505 1501 remove_x86_feature(featureset, X86FSET_SEP);
1506 1502 }
1507 1503 #endif
1508 1504 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1509 1505 add_x86_feature(featureset, X86FSET_TSCP);
1510 1506 }
1511 1507
1512 1508 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1513 1509 add_x86_feature(featureset, X86FSET_SVM);
1514 1510 }
1515 1511
1516 1512 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1517 1513 add_x86_feature(featureset, X86FSET_TOPOEXT);
1518 1514 }
1519 1515 break;
1520 1516 default:
1521 1517 break;
1522 1518 }
1523 1519
1524 1520 /*
1525 1521 * Get CPUID data about processor cores and hyperthreads.
1526 1522 */
1527 1523 switch (cpi->cpi_vendor) {
1528 1524 case X86_VENDOR_Intel:
1529 1525 if (cpi->cpi_maxeax >= 4) {
1530 1526 cp = &cpi->cpi_std[4];
1531 1527 cp->cp_eax = 4;
1532 1528 cp->cp_ecx = 0;
1533 1529 (void) __cpuid_insn(cp);
1534 1530 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1535 1531 }
1536 1532 /*FALLTHROUGH*/
1537 1533 case X86_VENDOR_AMD:
1538 1534 if (cpi->cpi_xmaxeax < 0x80000008)
1539 1535 break;
1540 1536 cp = &cpi->cpi_extd[8];
1541 1537 cp->cp_eax = 0x80000008;
1542 1538 (void) __cpuid_insn(cp);
1543 1539 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1544 1540
1545 1541 /*
1546 1542 * Virtual and physical address limits from
1547 1543 * cpuid override previously guessed values.
1548 1544 */
1549 1545 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1550 1546 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1551 1547 break;
1552 1548 default:
1553 1549 break;
1554 1550 }
1555 1551
1556 1552 /*
1557 1553 * Derive the number of cores per chip
1558 1554 */
1559 1555 switch (cpi->cpi_vendor) {
1560 1556 case X86_VENDOR_Intel:
1561 1557 if (cpi->cpi_maxeax < 4) {
1562 1558 cpi->cpi_ncore_per_chip = 1;
1563 1559 break;
1564 1560 } else {
1565 1561 cpi->cpi_ncore_per_chip =
1566 1562 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1567 1563 }
1568 1564 break;
1569 1565 case X86_VENDOR_AMD:
1570 1566 if (cpi->cpi_xmaxeax < 0x80000008) {
1571 1567 cpi->cpi_ncore_per_chip = 1;
1572 1568 break;
1573 1569 } else {
1574 1570 /*
1575 1571 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1576 1572 * 1 less than the number of physical cores on
1577 1573 * the chip. In family 0x10 this value can
1578 1574 * be affected by "downcoring" - it reflects
1579 1575 * 1 less than the number of cores actually
1580 1576 * enabled on this node.
1581 1577 */
1582 1578 cpi->cpi_ncore_per_chip =
1583 1579 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1584 1580 }
1585 1581 break;
1586 1582 default:
1587 1583 cpi->cpi_ncore_per_chip = 1;
1588 1584 break;
1589 1585 }
1590 1586
1591 1587 /*
1592 1588 * Get CPUID data about TSC Invariance in Deep C-State.
1593 1589 */
1594 1590 switch (cpi->cpi_vendor) {
1595 1591 case X86_VENDOR_Intel:
1596 1592 if (cpi->cpi_maxeax >= 7) {
1597 1593 cp = &cpi->cpi_extd[7];
1598 1594 cp->cp_eax = 0x80000007;
1599 1595 cp->cp_ecx = 0;
1600 1596 (void) __cpuid_insn(cp);
1601 1597 }
1602 1598 break;
1603 1599 default:
1604 1600 break;
1605 1601 }
1606 1602 } else {
1607 1603 cpi->cpi_ncore_per_chip = 1;
1608 1604 }
1609 1605
1610 1606 /*
1611 1607 * If more than one core, then this processor is CMP.
1612 1608 */
1613 1609 if (cpi->cpi_ncore_per_chip > 1) {
1614 1610 add_x86_feature(featureset, X86FSET_CMP);
1615 1611 }
1616 1612
1617 1613 /*
1618 1614 * If the number of cores is the same as the number
1619 1615 * of CPUs, then we cannot have HyperThreading.
1620 1616 */
1621 1617 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1622 1618 remove_x86_feature(featureset, X86FSET_HTT);
1623 1619 }
1624 1620
1625 1621 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1626 1622 cpi->cpi_procnodes_per_pkg = 1;
1627 1623 cpi->cpi_cores_per_compunit = 1;
1628 1624 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1629 1625 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1630 1626 /*
1631 1627 * Single-core single-threaded processors.
1632 1628 */
1633 1629 cpi->cpi_chipid = -1;
1634 1630 cpi->cpi_clogid = 0;
1635 1631 cpi->cpi_coreid = cpu->cpu_id;
1636 1632 cpi->cpi_pkgcoreid = 0;
1637 1633 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1638 1634 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1639 1635 else
1640 1636 cpi->cpi_procnodeid = cpi->cpi_chipid;
1641 1637 } else if (cpi->cpi_ncpu_per_chip > 1) {
1642 1638 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1643 1639 cpuid_intel_getids(cpu, featureset);
1644 1640 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1645 1641 cpuid_amd_getids(cpu);
1646 1642 else {
1647 1643 /*
1648 1644 * All other processors are currently
1649 1645 * assumed to have single cores.
1650 1646 */
1651 1647 cpi->cpi_coreid = cpi->cpi_chipid;
1652 1648 cpi->cpi_pkgcoreid = 0;
1653 1649 cpi->cpi_procnodeid = cpi->cpi_chipid;
1654 1650 cpi->cpi_compunitid = cpi->cpi_chipid;
1655 1651 }
1656 1652 }
1657 1653
1658 1654 /*
1659 1655 * Synthesize chip "revision" and socket type
1660 1656 */
1661 1657 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1662 1658 cpi->cpi_model, cpi->cpi_step);
1663 1659 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1664 1660 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1665 1661 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1666 1662 cpi->cpi_model, cpi->cpi_step);
1667 1663
1668 1664 pass1_done:
1669 1665 cpi->cpi_pass = 1;
1670 1666 }
1671 1667
1672 1668 /*
1673 1669 * Make copies of the cpuid table entries we depend on, in
1674 1670 * part for ease of parsing now, in part so that we have only
1675 1671 * one place to correct any of it, in part for ease of
1676 1672 * later export to userland, and in part so we can look at
1677 1673 * this stuff in a crash dump.
1678 1674 */
1679 1675
1680 1676 /*ARGSUSED*/
1681 1677 void
1682 1678 cpuid_pass2(cpu_t *cpu)
1683 1679 {
1684 1680 uint_t n, nmax;
1685 1681 int i;
1686 1682 struct cpuid_regs *cp;
1687 1683 uint8_t *dp;
1688 1684 uint32_t *iptr;
1689 1685 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1690 1686
1691 1687 ASSERT(cpi->cpi_pass == 1);
1692 1688
1693 1689 if (cpi->cpi_maxeax < 1)
1694 1690 goto pass2_done;
1695 1691
1696 1692 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1697 1693 nmax = NMAX_CPI_STD;
1698 1694 /*
1699 1695 * (We already handled n == 0 and n == 1 in pass 1)
1700 1696 */
1701 1697 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1702 1698 cp->cp_eax = n;
1703 1699
1704 1700 /*
1705 1701 * CPUID function 4 expects %ecx to be initialized
1706 1702 * with an index which indicates which cache to return
1707 1703 * information about. The OS is expected to call function 4
1708 1704 * with %ecx set to 0, 1, 2, ... until it returns with
1709 1705 * EAX[4:0] set to 0, which indicates there are no more
1710 1706 * caches.
1711 1707 *
1712 1708 * Here, populate cpi_std[4] with the information returned by
1713 1709 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1714 1710 * when dynamic memory allocation becomes available.
1715 1711 *
1716 1712 * Note: we need to explicitly initialize %ecx here, since
1717 1713 * function 4 may have been previously invoked.
1718 1714 */
1719 1715 if (n == 4)
1720 1716 cp->cp_ecx = 0;
1721 1717
1722 1718 (void) __cpuid_insn(cp);
1723 1719 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1724 1720 switch (n) {
1725 1721 case 2:
1726 1722 /*
1727 1723 * "the lower 8 bits of the %eax register
1728 1724 * contain a value that identifies the number
1729 1725 * of times the cpuid [instruction] has to be
1730 1726 * executed to obtain a complete image of the
1731 1727 * processor's caching systems."
1732 1728 *
1733 1729 * How *do* they make this stuff up?
1734 1730 */
1735 1731 cpi->cpi_ncache = sizeof (*cp) *
1736 1732 BITX(cp->cp_eax, 7, 0);
1737 1733 if (cpi->cpi_ncache == 0)
1738 1734 break;
1739 1735 cpi->cpi_ncache--; /* skip count byte */
1740 1736
1741 1737 /*
1742 1738 * Well, for now, rather than attempt to implement
1743 1739 * this slightly dubious algorithm, we just look
1744 1740 * at the first 15 ..
1745 1741 */
1746 1742 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1747 1743 cpi->cpi_ncache = sizeof (*cp) - 1;
1748 1744
1749 1745 dp = cpi->cpi_cacheinfo;
1750 1746 if (BITX(cp->cp_eax, 31, 31) == 0) {
1751 1747 uint8_t *p = (void *)&cp->cp_eax;
1752 1748 for (i = 1; i < 4; i++)
1753 1749 if (p[i] != 0)
1754 1750 *dp++ = p[i];
1755 1751 }
1756 1752 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1757 1753 uint8_t *p = (void *)&cp->cp_ebx;
1758 1754 for (i = 0; i < 4; i++)
1759 1755 if (p[i] != 0)
1760 1756 *dp++ = p[i];
1761 1757 }
1762 1758 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1763 1759 uint8_t *p = (void *)&cp->cp_ecx;
1764 1760 for (i = 0; i < 4; i++)
1765 1761 if (p[i] != 0)
1766 1762 *dp++ = p[i];
1767 1763 }
1768 1764 if (BITX(cp->cp_edx, 31, 31) == 0) {
1769 1765 uint8_t *p = (void *)&cp->cp_edx;
1770 1766 for (i = 0; i < 4; i++)
1771 1767 if (p[i] != 0)
1772 1768 *dp++ = p[i];
1773 1769 }
1774 1770 break;
1775 1771
1776 1772 case 3: /* Processor serial number, if PSN supported */
1777 1773 break;
1778 1774
1779 1775 case 4: /* Deterministic cache parameters */
1780 1776 break;
1781 1777
1782 1778 case 5: /* Monitor/Mwait parameters */
1783 1779 {
1784 1780 size_t mwait_size;
1785 1781
1786 1782 /*
1787 1783 * check cpi_mwait.support which was set in cpuid_pass1
1788 1784 */
1789 1785 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1790 1786 break;
1791 1787
1792 1788 /*
1793 1789 * Protect ourself from insane mwait line size.
1794 1790 * Workaround for incomplete hardware emulator(s).
1795 1791 */
1796 1792 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1797 1793 if (mwait_size < sizeof (uint32_t) ||
1798 1794 !ISP2(mwait_size)) {
1799 1795 #if DEBUG
1800 1796 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1801 1797 "size %ld", cpu->cpu_id, (long)mwait_size);
1802 1798 #endif
1803 1799 break;
1804 1800 }
1805 1801
1806 1802 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1807 1803 cpi->cpi_mwait.mon_max = mwait_size;
1808 1804 if (MWAIT_EXTENSION(cpi)) {
1809 1805 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1810 1806 if (MWAIT_INT_ENABLE(cpi))
1811 1807 cpi->cpi_mwait.support |=
1812 1808 MWAIT_ECX_INT_ENABLE;
1813 1809 }
1814 1810 break;
1815 1811 }
1816 1812 default:
1817 1813 break;
1818 1814 }
1819 1815 }
1820 1816
1821 1817 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1822 1818 struct cpuid_regs regs;
1823 1819
1824 1820 cp = ®s;
1825 1821 cp->cp_eax = 0xB;
1826 1822 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1827 1823
1828 1824 (void) __cpuid_insn(cp);
1829 1825
1830 1826 /*
1831 1827 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1832 1828 * indicates that the extended topology enumeration leaf is
1833 1829 * available.
1834 1830 */
1835 1831 if (cp->cp_ebx) {
1836 1832 uint32_t x2apic_id;
1837 1833 uint_t coreid_shift = 0;
1838 1834 uint_t ncpu_per_core = 1;
1839 1835 uint_t chipid_shift = 0;
1840 1836 uint_t ncpu_per_chip = 1;
1841 1837 uint_t i;
1842 1838 uint_t level;
1843 1839
1844 1840 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1845 1841 cp->cp_eax = 0xB;
1846 1842 cp->cp_ecx = i;
1847 1843
1848 1844 (void) __cpuid_insn(cp);
1849 1845 level = CPI_CPU_LEVEL_TYPE(cp);
1850 1846
1851 1847 if (level == 1) {
1852 1848 x2apic_id = cp->cp_edx;
1853 1849 coreid_shift = BITX(cp->cp_eax, 4, 0);
1854 1850 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1855 1851 } else if (level == 2) {
1856 1852 x2apic_id = cp->cp_edx;
1857 1853 chipid_shift = BITX(cp->cp_eax, 4, 0);
1858 1854 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1859 1855 }
1860 1856 }
1861 1857
1862 1858 cpi->cpi_apicid = x2apic_id;
1863 1859 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1864 1860 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1865 1861 ncpu_per_core;
1866 1862 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1867 1863 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1868 1864 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1869 1865 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1870 1866 }
1871 1867
1872 1868 /* Make cp NULL so that we don't stumble on others */
1873 1869 cp = NULL;
1874 1870 }
1875 1871
1876 1872 /*
1877 1873 * XSAVE enumeration
1878 1874 */
1879 1875 if (cpi->cpi_maxeax >= 0xD) {
1880 1876 struct cpuid_regs regs;
1881 1877 boolean_t cpuid_d_valid = B_TRUE;
1882 1878
1883 1879 cp = ®s;
1884 1880 cp->cp_eax = 0xD;
1885 1881 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1886 1882
1887 1883 (void) __cpuid_insn(cp);
1888 1884
1889 1885 /*
1890 1886 * Sanity checks for debug
1891 1887 */
1892 1888 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1893 1889 (cp->cp_eax & XFEATURE_SSE) == 0) {
1894 1890 cpuid_d_valid = B_FALSE;
1895 1891 }
1896 1892
1897 1893 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1898 1894 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1899 1895 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1900 1896
1901 1897 /*
1902 1898 * If the hw supports AVX, get the size and offset in the save
1903 1899 * area for the ymm state.
1904 1900 */
1905 1901 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1906 1902 cp->cp_eax = 0xD;
1907 1903 cp->cp_ecx = 2;
1908 1904 cp->cp_edx = cp->cp_ebx = 0;
1909 1905
1910 1906 (void) __cpuid_insn(cp);
1911 1907
1912 1908 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1913 1909 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1914 1910 cpuid_d_valid = B_FALSE;
1915 1911 }
1916 1912
1917 1913 cpi->cpi_xsave.ymm_size = cp->cp_eax;
1918 1914 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1919 1915 }
1920 1916
1921 1917 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1922 1918 xsave_state_size = 0;
1923 1919 } else if (cpuid_d_valid) {
1924 1920 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1925 1921 } else {
1926 1922 /* Broken CPUID 0xD, probably in HVM */
1927 1923 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1928 1924 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1929 1925 ", ymm_size = %d, ymm_offset = %d\n",
1930 1926 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1931 1927 cpi->cpi_xsave.xsav_hw_features_high,
1932 1928 (int)cpi->cpi_xsave.xsav_max_size,
1933 1929 (int)cpi->cpi_xsave.ymm_size,
1934 1930 (int)cpi->cpi_xsave.ymm_offset);
1935 1931
1936 1932 if (xsave_state_size != 0) {
1937 1933 /*
1938 1934 * This must be a non-boot CPU. We cannot
1939 1935 * continue, because boot cpu has already
1940 1936 * enabled XSAVE.
1941 1937 */
1942 1938 ASSERT(cpu->cpu_id != 0);
1943 1939 cmn_err(CE_PANIC, "cpu%d: we have already "
1944 1940 "enabled XSAVE on boot cpu, cannot "
1945 1941 "continue.", cpu->cpu_id);
1946 1942 } else {
1947 1943 /*
1948 1944 * Must be from boot CPU, OK to disable XSAVE.
1949 1945 */
1950 1946 ASSERT(cpu->cpu_id == 0);
1951 1947 remove_x86_feature(x86_featureset,
1952 1948 X86FSET_XSAVE);
1953 1949 remove_x86_feature(x86_featureset, X86FSET_AVX);
1954 1950 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1955 1951 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1956 1952 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_F16C;
1957 1953 xsave_force_disable = B_TRUE;
1958 1954 }
1959 1955 }
1960 1956 }
1961 1957
1962 1958
1963 1959 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1964 1960 goto pass2_done;
1965 1961
1966 1962 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1967 1963 nmax = NMAX_CPI_EXTD;
1968 1964 /*
1969 1965 * Copy the extended properties, fixing them as we go.
1970 1966 * (We already handled n == 0 and n == 1 in pass 1)
1971 1967 */
1972 1968 iptr = (void *)cpi->cpi_brandstr;
1973 1969 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1974 1970 cp->cp_eax = 0x80000000 + n;
1975 1971 (void) __cpuid_insn(cp);
1976 1972 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1977 1973 switch (n) {
1978 1974 case 2:
1979 1975 case 3:
1980 1976 case 4:
1981 1977 /*
1982 1978 * Extract the brand string
1983 1979 */
1984 1980 *iptr++ = cp->cp_eax;
1985 1981 *iptr++ = cp->cp_ebx;
1986 1982 *iptr++ = cp->cp_ecx;
1987 1983 *iptr++ = cp->cp_edx;
1988 1984 break;
1989 1985 case 5:
1990 1986 switch (cpi->cpi_vendor) {
1991 1987 case X86_VENDOR_AMD:
1992 1988 /*
1993 1989 * The Athlon and Duron were the first
1994 1990 * parts to report the sizes of the
1995 1991 * TLB for large pages. Before then,
1996 1992 * we don't trust the data.
1997 1993 */
1998 1994 if (cpi->cpi_family < 6 ||
1999 1995 (cpi->cpi_family == 6 &&
2000 1996 cpi->cpi_model < 1))
2001 1997 cp->cp_eax = 0;
2002 1998 break;
2003 1999 default:
2004 2000 break;
2005 2001 }
2006 2002 break;
2007 2003 case 6:
2008 2004 switch (cpi->cpi_vendor) {
2009 2005 case X86_VENDOR_AMD:
2010 2006 /*
2011 2007 * The Athlon and Duron were the first
2012 2008 * AMD parts with L2 TLB's.
2013 2009 * Before then, don't trust the data.
2014 2010 */
2015 2011 if (cpi->cpi_family < 6 ||
2016 2012 cpi->cpi_family == 6 &&
2017 2013 cpi->cpi_model < 1)
2018 2014 cp->cp_eax = cp->cp_ebx = 0;
2019 2015 /*
2020 2016 * AMD Duron rev A0 reports L2
2021 2017 * cache size incorrectly as 1K
2022 2018 * when it is really 64K
2023 2019 */
2024 2020 if (cpi->cpi_family == 6 &&
2025 2021 cpi->cpi_model == 3 &&
2026 2022 cpi->cpi_step == 0) {
2027 2023 cp->cp_ecx &= 0xffff;
2028 2024 cp->cp_ecx |= 0x400000;
2029 2025 }
2030 2026 break;
2031 2027 case X86_VENDOR_Cyrix: /* VIA C3 */
2032 2028 /*
2033 2029 * VIA C3 processors are a bit messed
2034 2030 * up w.r.t. encoding cache sizes in %ecx
2035 2031 */
2036 2032 if (cpi->cpi_family != 6)
2037 2033 break;
2038 2034 /*
2039 2035 * model 7 and 8 were incorrectly encoded
2040 2036 *
2041 2037 * xxx is model 8 really broken?
2042 2038 */
2043 2039 if (cpi->cpi_model == 7 ||
2044 2040 cpi->cpi_model == 8)
2045 2041 cp->cp_ecx =
2046 2042 BITX(cp->cp_ecx, 31, 24) << 16 |
2047 2043 BITX(cp->cp_ecx, 23, 16) << 12 |
2048 2044 BITX(cp->cp_ecx, 15, 8) << 8 |
2049 2045 BITX(cp->cp_ecx, 7, 0);
2050 2046 /*
2051 2047 * model 9 stepping 1 has wrong associativity
2052 2048 */
2053 2049 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
2054 2050 cp->cp_ecx |= 8 << 12;
2055 2051 break;
2056 2052 case X86_VENDOR_Intel:
2057 2053 /*
2058 2054 * Extended L2 Cache features function.
2059 2055 * First appeared on Prescott.
2060 2056 */
2061 2057 default:
2062 2058 break;
2063 2059 }
2064 2060 break;
2065 2061 default:
2066 2062 break;
2067 2063 }
2068 2064 }
2069 2065
2070 2066 pass2_done:
2071 2067 cpi->cpi_pass = 2;
2072 2068 }
2073 2069
2074 2070 static const char *
2075 2071 intel_cpubrand(const struct cpuid_info *cpi)
2076 2072 {
2077 2073 int i;
2078 2074
2079 2075 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2080 2076 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2081 2077 return ("i486");
2082 2078
2083 2079 switch (cpi->cpi_family) {
2084 2080 case 5:
2085 2081 return ("Intel Pentium(r)");
2086 2082 case 6:
2087 2083 switch (cpi->cpi_model) {
2088 2084 uint_t celeron, xeon;
2089 2085 const struct cpuid_regs *cp;
2090 2086 case 0:
2091 2087 case 1:
2092 2088 case 2:
2093 2089 return ("Intel Pentium(r) Pro");
2094 2090 case 3:
2095 2091 case 4:
2096 2092 return ("Intel Pentium(r) II");
2097 2093 case 6:
2098 2094 return ("Intel Celeron(r)");
2099 2095 case 5:
2100 2096 case 7:
2101 2097 celeron = xeon = 0;
2102 2098 cp = &cpi->cpi_std[2]; /* cache info */
2103 2099
2104 2100 for (i = 1; i < 4; i++) {
2105 2101 uint_t tmp;
2106 2102
2107 2103 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2108 2104 if (tmp == 0x40)
2109 2105 celeron++;
2110 2106 if (tmp >= 0x44 && tmp <= 0x45)
2111 2107 xeon++;
2112 2108 }
2113 2109
2114 2110 for (i = 0; i < 2; i++) {
2115 2111 uint_t tmp;
2116 2112
2117 2113 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2118 2114 if (tmp == 0x40)
2119 2115 celeron++;
2120 2116 else if (tmp >= 0x44 && tmp <= 0x45)
2121 2117 xeon++;
2122 2118 }
2123 2119
2124 2120 for (i = 0; i < 4; i++) {
2125 2121 uint_t tmp;
2126 2122
2127 2123 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2128 2124 if (tmp == 0x40)
2129 2125 celeron++;
2130 2126 else if (tmp >= 0x44 && tmp <= 0x45)
2131 2127 xeon++;
2132 2128 }
2133 2129
2134 2130 for (i = 0; i < 4; i++) {
2135 2131 uint_t tmp;
2136 2132
2137 2133 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2138 2134 if (tmp == 0x40)
2139 2135 celeron++;
2140 2136 else if (tmp >= 0x44 && tmp <= 0x45)
2141 2137 xeon++;
2142 2138 }
2143 2139
2144 2140 if (celeron)
2145 2141 return ("Intel Celeron(r)");
2146 2142 if (xeon)
2147 2143 return (cpi->cpi_model == 5 ?
2148 2144 "Intel Pentium(r) II Xeon(tm)" :
2149 2145 "Intel Pentium(r) III Xeon(tm)");
2150 2146 return (cpi->cpi_model == 5 ?
2151 2147 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2152 2148 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2153 2149 default:
2154 2150 break;
2155 2151 }
2156 2152 default:
2157 2153 break;
2158 2154 }
2159 2155
2160 2156 /* BrandID is present if the field is nonzero */
2161 2157 if (cpi->cpi_brandid != 0) {
2162 2158 static const struct {
2163 2159 uint_t bt_bid;
2164 2160 const char *bt_str;
2165 2161 } brand_tbl[] = {
2166 2162 { 0x1, "Intel(r) Celeron(r)" },
2167 2163 { 0x2, "Intel(r) Pentium(r) III" },
2168 2164 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2169 2165 { 0x4, "Intel(r) Pentium(r) III" },
2170 2166 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2171 2167 { 0x7, "Mobile Intel(r) Celeron(r)" },
2172 2168 { 0x8, "Intel(r) Pentium(r) 4" },
2173 2169 { 0x9, "Intel(r) Pentium(r) 4" },
2174 2170 { 0xa, "Intel(r) Celeron(r)" },
2175 2171 { 0xb, "Intel(r) Xeon(tm)" },
2176 2172 { 0xc, "Intel(r) Xeon(tm) MP" },
2177 2173 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2178 2174 { 0xf, "Mobile Intel(r) Celeron(r)" },
2179 2175 { 0x11, "Mobile Genuine Intel(r)" },
2180 2176 { 0x12, "Intel(r) Celeron(r) M" },
2181 2177 { 0x13, "Mobile Intel(r) Celeron(r)" },
2182 2178 { 0x14, "Intel(r) Celeron(r)" },
2183 2179 { 0x15, "Mobile Genuine Intel(r)" },
2184 2180 { 0x16, "Intel(r) Pentium(r) M" },
2185 2181 { 0x17, "Mobile Intel(r) Celeron(r)" }
2186 2182 };
2187 2183 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2188 2184 uint_t sgn;
2189 2185
2190 2186 sgn = (cpi->cpi_family << 8) |
2191 2187 (cpi->cpi_model << 4) | cpi->cpi_step;
2192 2188
2193 2189 for (i = 0; i < btblmax; i++)
2194 2190 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2195 2191 break;
2196 2192 if (i < btblmax) {
2197 2193 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2198 2194 return ("Intel(r) Celeron(r)");
2199 2195 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2200 2196 return ("Intel(r) Xeon(tm) MP");
2201 2197 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2202 2198 return ("Intel(r) Xeon(tm)");
2203 2199 return (brand_tbl[i].bt_str);
2204 2200 }
2205 2201 }
2206 2202
2207 2203 return (NULL);
2208 2204 }
2209 2205
2210 2206 static const char *
2211 2207 amd_cpubrand(const struct cpuid_info *cpi)
2212 2208 {
2213 2209 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2214 2210 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2215 2211 return ("i486 compatible");
2216 2212
2217 2213 switch (cpi->cpi_family) {
2218 2214 case 5:
2219 2215 switch (cpi->cpi_model) {
2220 2216 case 0:
2221 2217 case 1:
2222 2218 case 2:
2223 2219 case 3:
2224 2220 case 4:
2225 2221 case 5:
2226 2222 return ("AMD-K5(r)");
2227 2223 case 6:
2228 2224 case 7:
2229 2225 return ("AMD-K6(r)");
2230 2226 case 8:
2231 2227 return ("AMD-K6(r)-2");
2232 2228 case 9:
2233 2229 return ("AMD-K6(r)-III");
2234 2230 default:
2235 2231 return ("AMD (family 5)");
2236 2232 }
2237 2233 case 6:
2238 2234 switch (cpi->cpi_model) {
2239 2235 case 1:
2240 2236 return ("AMD-K7(tm)");
2241 2237 case 0:
2242 2238 case 2:
2243 2239 case 4:
2244 2240 return ("AMD Athlon(tm)");
2245 2241 case 3:
2246 2242 case 7:
2247 2243 return ("AMD Duron(tm)");
2248 2244 case 6:
2249 2245 case 8:
2250 2246 case 10:
2251 2247 /*
2252 2248 * Use the L2 cache size to distinguish
2253 2249 */
2254 2250 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2255 2251 "AMD Athlon(tm)" : "AMD Duron(tm)");
2256 2252 default:
2257 2253 return ("AMD (family 6)");
2258 2254 }
2259 2255 default:
2260 2256 break;
2261 2257 }
2262 2258
2263 2259 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2264 2260 cpi->cpi_brandid != 0) {
2265 2261 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2266 2262 case 3:
2267 2263 return ("AMD Opteron(tm) UP 1xx");
2268 2264 case 4:
2269 2265 return ("AMD Opteron(tm) DP 2xx");
2270 2266 case 5:
2271 2267 return ("AMD Opteron(tm) MP 8xx");
2272 2268 default:
2273 2269 return ("AMD Opteron(tm)");
2274 2270 }
2275 2271 }
2276 2272
2277 2273 return (NULL);
2278 2274 }
2279 2275
2280 2276 static const char *
2281 2277 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2282 2278 {
2283 2279 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2284 2280 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2285 2281 type == X86_TYPE_CYRIX_486)
2286 2282 return ("i486 compatible");
2287 2283
2288 2284 switch (type) {
2289 2285 case X86_TYPE_CYRIX_6x86:
2290 2286 return ("Cyrix 6x86");
2291 2287 case X86_TYPE_CYRIX_6x86L:
2292 2288 return ("Cyrix 6x86L");
2293 2289 case X86_TYPE_CYRIX_6x86MX:
2294 2290 return ("Cyrix 6x86MX");
2295 2291 case X86_TYPE_CYRIX_GXm:
2296 2292 return ("Cyrix GXm");
2297 2293 case X86_TYPE_CYRIX_MediaGX:
2298 2294 return ("Cyrix MediaGX");
2299 2295 case X86_TYPE_CYRIX_MII:
2300 2296 return ("Cyrix M2");
2301 2297 case X86_TYPE_VIA_CYRIX_III:
2302 2298 return ("VIA Cyrix M3");
2303 2299 default:
2304 2300 /*
2305 2301 * Have another wild guess ..
2306 2302 */
2307 2303 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2308 2304 return ("Cyrix 5x86");
2309 2305 else if (cpi->cpi_family == 5) {
2310 2306 switch (cpi->cpi_model) {
2311 2307 case 2:
2312 2308 return ("Cyrix 6x86"); /* Cyrix M1 */
2313 2309 case 4:
2314 2310 return ("Cyrix MediaGX");
2315 2311 default:
2316 2312 break;
2317 2313 }
2318 2314 } else if (cpi->cpi_family == 6) {
2319 2315 switch (cpi->cpi_model) {
2320 2316 case 0:
2321 2317 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2322 2318 case 5:
2323 2319 case 6:
2324 2320 case 7:
2325 2321 case 8:
2326 2322 case 9:
2327 2323 return ("VIA C3");
2328 2324 default:
2329 2325 break;
2330 2326 }
2331 2327 }
2332 2328 break;
2333 2329 }
2334 2330 return (NULL);
2335 2331 }
2336 2332
2337 2333 /*
2338 2334 * This only gets called in the case that the CPU extended
2339 2335 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2340 2336 * aren't available, or contain null bytes for some reason.
2341 2337 */
2342 2338 static void
2343 2339 fabricate_brandstr(struct cpuid_info *cpi)
2344 2340 {
2345 2341 const char *brand = NULL;
2346 2342
2347 2343 switch (cpi->cpi_vendor) {
2348 2344 case X86_VENDOR_Intel:
2349 2345 brand = intel_cpubrand(cpi);
2350 2346 break;
2351 2347 case X86_VENDOR_AMD:
2352 2348 brand = amd_cpubrand(cpi);
2353 2349 break;
2354 2350 case X86_VENDOR_Cyrix:
2355 2351 brand = cyrix_cpubrand(cpi, x86_type);
2356 2352 break;
2357 2353 case X86_VENDOR_NexGen:
2358 2354 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2359 2355 brand = "NexGen Nx586";
2360 2356 break;
2361 2357 case X86_VENDOR_Centaur:
2362 2358 if (cpi->cpi_family == 5)
2363 2359 switch (cpi->cpi_model) {
2364 2360 case 4:
2365 2361 brand = "Centaur C6";
2366 2362 break;
2367 2363 case 8:
2368 2364 brand = "Centaur C2";
2369 2365 break;
2370 2366 case 9:
2371 2367 brand = "Centaur C3";
2372 2368 break;
2373 2369 default:
2374 2370 break;
2375 2371 }
2376 2372 break;
2377 2373 case X86_VENDOR_Rise:
2378 2374 if (cpi->cpi_family == 5 &&
2379 2375 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2380 2376 brand = "Rise mP6";
2381 2377 break;
2382 2378 case X86_VENDOR_SiS:
2383 2379 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2384 2380 brand = "SiS 55x";
2385 2381 break;
2386 2382 case X86_VENDOR_TM:
2387 2383 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2388 2384 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2389 2385 break;
2390 2386 case X86_VENDOR_NSC:
2391 2387 case X86_VENDOR_UMC:
2392 2388 default:
2393 2389 break;
2394 2390 }
2395 2391 if (brand) {
2396 2392 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2397 2393 return;
2398 2394 }
2399 2395
2400 2396 /*
2401 2397 * If all else fails ...
2402 2398 */
2403 2399 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2404 2400 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2405 2401 cpi->cpi_model, cpi->cpi_step);
2406 2402 }
2407 2403
2408 2404 /*
2409 2405 * This routine is called just after kernel memory allocation
2410 2406 * becomes available on cpu0, and as part of mp_startup() on
2411 2407 * the other cpus.
2412 2408 *
2413 2409 * Fixup the brand string, and collect any information from cpuid
2414 2410 * that requires dynamically allocated storage to represent.
2415 2411 */
2416 2412 /*ARGSUSED*/
2417 2413 void
2418 2414 cpuid_pass3(cpu_t *cpu)
2419 2415 {
2420 2416 int i, max, shft, level, size;
2421 2417 struct cpuid_regs regs;
2422 2418 struct cpuid_regs *cp;
2423 2419 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2424 2420
2425 2421 ASSERT(cpi->cpi_pass == 2);
2426 2422
2427 2423 /*
2428 2424 * Function 4: Deterministic cache parameters
2429 2425 *
2430 2426 * Take this opportunity to detect the number of threads
2431 2427 * sharing the last level cache, and construct a corresponding
2432 2428 * cache id. The respective cpuid_info members are initialized
2433 2429 * to the default case of "no last level cache sharing".
2434 2430 */
2435 2431 cpi->cpi_ncpu_shr_last_cache = 1;
2436 2432 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2437 2433
2438 2434 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2439 2435
2440 2436 /*
2441 2437 * Find the # of elements (size) returned by fn 4, and along
2442 2438 * the way detect last level cache sharing details.
2443 2439 */
2444 2440 bzero(®s, sizeof (regs));
2445 2441 cp = ®s;
2446 2442 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2447 2443 cp->cp_eax = 4;
2448 2444 cp->cp_ecx = i;
2449 2445
2450 2446 (void) __cpuid_insn(cp);
2451 2447
2452 2448 if (CPI_CACHE_TYPE(cp) == 0)
2453 2449 break;
2454 2450 level = CPI_CACHE_LVL(cp);
2455 2451 if (level > max) {
2456 2452 max = level;
2457 2453 cpi->cpi_ncpu_shr_last_cache =
2458 2454 CPI_NTHR_SHR_CACHE(cp) + 1;
2459 2455 }
2460 2456 }
2461 2457 cpi->cpi_std_4_size = size = i;
2462 2458
2463 2459 /*
2464 2460 * Allocate the cpi_std_4 array. The first element
2465 2461 * references the regs for fn 4, %ecx == 0, which
2466 2462 * cpuid_pass2() stashed in cpi->cpi_std[4].
2467 2463 */
2468 2464 if (size > 0) {
2469 2465 cpi->cpi_std_4 =
2470 2466 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2471 2467 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2472 2468
2473 2469 /*
2474 2470 * Allocate storage to hold the additional regs
2475 2471 * for function 4, %ecx == 1 .. cpi_std_4_size.
2476 2472 *
2477 2473 * The regs for fn 4, %ecx == 0 has already
2478 2474 * been allocated as indicated above.
2479 2475 */
2480 2476 for (i = 1; i < size; i++) {
2481 2477 cp = cpi->cpi_std_4[i] =
2482 2478 kmem_zalloc(sizeof (regs), KM_SLEEP);
2483 2479 cp->cp_eax = 4;
2484 2480 cp->cp_ecx = i;
2485 2481
2486 2482 (void) __cpuid_insn(cp);
2487 2483 }
2488 2484 }
2489 2485 /*
2490 2486 * Determine the number of bits needed to represent
2491 2487 * the number of CPUs sharing the last level cache.
2492 2488 *
2493 2489 * Shift off that number of bits from the APIC id to
2494 2490 * derive the cache id.
2495 2491 */
2496 2492 shft = 0;
2497 2493 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2498 2494 shft++;
2499 2495 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2500 2496 }
2501 2497
2502 2498 /*
2503 2499 * Now fixup the brand string
2504 2500 */
2505 2501 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2506 2502 fabricate_brandstr(cpi);
2507 2503 } else {
2508 2504
2509 2505 /*
2510 2506 * If we successfully extracted a brand string from the cpuid
2511 2507 * instruction, clean it up by removing leading spaces and
2512 2508 * similar junk.
2513 2509 */
2514 2510 if (cpi->cpi_brandstr[0]) {
2515 2511 size_t maxlen = sizeof (cpi->cpi_brandstr);
2516 2512 char *src, *dst;
2517 2513
2518 2514 dst = src = (char *)cpi->cpi_brandstr;
2519 2515 src[maxlen - 1] = '\0';
2520 2516 /*
2521 2517 * strip leading spaces
2522 2518 */
2523 2519 while (*src == ' ')
2524 2520 src++;
2525 2521 /*
2526 2522 * Remove any 'Genuine' or "Authentic" prefixes
2527 2523 */
2528 2524 if (strncmp(src, "Genuine ", 8) == 0)
2529 2525 src += 8;
2530 2526 if (strncmp(src, "Authentic ", 10) == 0)
2531 2527 src += 10;
2532 2528
2533 2529 /*
2534 2530 * Now do an in-place copy.
2535 2531 * Map (R) to (r) and (TM) to (tm).
2536 2532 * The era of teletypes is long gone, and there's
2537 2533 * -really- no need to shout.
2538 2534 */
2539 2535 while (*src != '\0') {
2540 2536 if (src[0] == '(') {
2541 2537 if (strncmp(src + 1, "R)", 2) == 0) {
2542 2538 (void) strncpy(dst, "(r)", 3);
2543 2539 src += 3;
2544 2540 dst += 3;
2545 2541 continue;
2546 2542 }
2547 2543 if (strncmp(src + 1, "TM)", 3) == 0) {
2548 2544 (void) strncpy(dst, "(tm)", 4);
2549 2545 src += 4;
2550 2546 dst += 4;
2551 2547 continue;
2552 2548 }
2553 2549 }
2554 2550 *dst++ = *src++;
2555 2551 }
2556 2552 *dst = '\0';
2557 2553
2558 2554 /*
2559 2555 * Finally, remove any trailing spaces
2560 2556 */
2561 2557 while (--dst > cpi->cpi_brandstr)
2562 2558 if (*dst == ' ')
2563 2559 *dst = '\0';
2564 2560 else
2565 2561 break;
2566 2562 } else
2567 2563 fabricate_brandstr(cpi);
2568 2564 }
2569 2565 cpi->cpi_pass = 3;
2570 2566 }
2571 2567
2572 2568 /*
2573 2569 * This routine is called out of bind_hwcap() much later in the life
2574 2570 * of the kernel (post_startup()). The job of this routine is to resolve
2575 2571 * the hardware feature support and kernel support for those features into
2576 2572 * what we're actually going to tell applications via the aux vector.
2577 2573 */
2578 2574 void
2579 2575 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
2580 2576 {
2581 2577 struct cpuid_info *cpi;
2582 2578 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
2583 2579
2584 2580 if (cpu == NULL)
2585 2581 cpu = CPU;
2586 2582 cpi = cpu->cpu_m.mcpu_cpi;
2587 2583
2588 2584 ASSERT(cpi->cpi_pass == 3);
2589 2585
2590 2586 if (cpi->cpi_maxeax >= 1) {
2591 2587 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2592 2588 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2593 2589
2594 2590 *edx = CPI_FEATURES_EDX(cpi);
2595 2591 *ecx = CPI_FEATURES_ECX(cpi);
2596 2592
2597 2593 /*
2598 2594 * [these require explicit kernel support]
2599 2595 */
2600 2596 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2601 2597 *edx &= ~CPUID_INTC_EDX_SEP;
2602 2598
2603 2599 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2604 2600 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2605 2601 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2606 2602 *edx &= ~CPUID_INTC_EDX_SSE2;
2607 2603
2608 2604 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2609 2605 *edx &= ~CPUID_INTC_EDX_HTT;
2610 2606
2611 2607 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2612 2608 *ecx &= ~CPUID_INTC_ECX_SSE3;
2613 2609
2614 2610 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2615 2611 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2616 2612 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2617 2613 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2618 2614 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2619 2615 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2620 2616 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2621 2617 *ecx &= ~CPUID_INTC_ECX_AES;
2622 2618 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2623 2619 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2624 2620 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2625 2621 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2626 2622 CPUID_INTC_ECX_OSXSAVE);
2627 2623 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2628 2624 *ecx &= ~CPUID_INTC_ECX_AVX;
2629 2625 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
2630 2626 *ecx &= ~CPUID_INTC_ECX_F16C;
2631 2627
2632 2628 /*
2633 2629 * [no explicit support required beyond x87 fp context]
2634 2630 */
2635 2631 if (!fpu_exists)
2636 2632 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2637 2633
2638 2634 /*
2639 2635 * Now map the supported feature vector to things that we
2640 2636 * think userland will care about.
2641 2637 */
2642 2638 if (*edx & CPUID_INTC_EDX_SEP)
2643 2639 hwcap_flags |= AV_386_SEP;
2644 2640 if (*edx & CPUID_INTC_EDX_SSE)
2645 2641 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2646 2642 if (*edx & CPUID_INTC_EDX_SSE2)
2647 2643 hwcap_flags |= AV_386_SSE2;
2648 2644 if (*ecx & CPUID_INTC_ECX_SSE3)
2649 2645 hwcap_flags |= AV_386_SSE3;
2650 2646 if (*ecx & CPUID_INTC_ECX_SSSE3)
2651 2647 hwcap_flags |= AV_386_SSSE3;
2652 2648 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2653 2649 hwcap_flags |= AV_386_SSE4_1;
2654 2650 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2655 2651 hwcap_flags |= AV_386_SSE4_2;
2656 2652 if (*ecx & CPUID_INTC_ECX_MOVBE)
2657 2653 hwcap_flags |= AV_386_MOVBE;
2658 2654 if (*ecx & CPUID_INTC_ECX_AES)
2659 2655 hwcap_flags |= AV_386_AES;
2660 2656 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2661 2657 hwcap_flags |= AV_386_PCLMULQDQ;
2662 2658 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2663 2659 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2664 2660 hwcap_flags |= AV_386_XSAVE;
2665 2661
2666 2662 if (*ecx & CPUID_INTC_ECX_AVX) {
2667 2663 hwcap_flags |= AV_386_AVX;
2668 2664 if (*ecx & CPUID_INTC_ECX_F16C)
2669 2665 hwcap_flags_2 |= AV_386_2_F16C;
2670 2666 }
2671 2667 }
2672 2668 if (*ecx & CPUID_INTC_ECX_VMX)
2673 2669 hwcap_flags |= AV_386_VMX;
2674 2670 if (*ecx & CPUID_INTC_ECX_POPCNT)
2675 2671 hwcap_flags |= AV_386_POPCNT;
2676 2672 if (*edx & CPUID_INTC_EDX_FPU)
2677 2673 hwcap_flags |= AV_386_FPU;
2678 2674 if (*edx & CPUID_INTC_EDX_MMX)
2679 2675 hwcap_flags |= AV_386_MMX;
2680 2676
2681 2677 if (*edx & CPUID_INTC_EDX_TSC)
2682 2678 hwcap_flags |= AV_386_TSC;
2683 2679 if (*edx & CPUID_INTC_EDX_CX8)
2684 2680 hwcap_flags |= AV_386_CX8;
2685 2681 if (*edx & CPUID_INTC_EDX_CMOV)
2686 2682 hwcap_flags |= AV_386_CMOV;
2687 2683 if (*ecx & CPUID_INTC_ECX_CX16)
2688 2684 hwcap_flags |= AV_386_CX16;
2689 2685
2690 2686 if (*ecx & CPUID_INTC_ECX_RDRAND)
2691 2687 hwcap_flags_2 |= AV_386_2_RDRAND;
2692 2688 }
2693 2689
2694 2690 if (cpi->cpi_xmaxeax < 0x80000001)
2695 2691 goto pass4_done;
2696 2692
2697 2693 switch (cpi->cpi_vendor) {
2698 2694 struct cpuid_regs cp;
2699 2695 uint32_t *edx, *ecx;
2700 2696
2701 2697 case X86_VENDOR_Intel:
2702 2698 /*
2703 2699 * Seems like Intel duplicated what we necessary
2704 2700 * here to make the initial crop of 64-bit OS's work.
2705 2701 * Hopefully, those are the only "extended" bits
2706 2702 * they'll add.
2707 2703 */
2708 2704 /*FALLTHROUGH*/
2709 2705
2710 2706 case X86_VENDOR_AMD:
2711 2707 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2712 2708 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2713 2709
2714 2710 *edx = CPI_FEATURES_XTD_EDX(cpi);
2715 2711 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2716 2712
2717 2713 /*
2718 2714 * [these features require explicit kernel support]
2719 2715 */
2720 2716 switch (cpi->cpi_vendor) {
2721 2717 case X86_VENDOR_Intel:
2722 2718 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2723 2719 *edx &= ~CPUID_AMD_EDX_TSCP;
2724 2720 break;
2725 2721
2726 2722 case X86_VENDOR_AMD:
2727 2723 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2728 2724 *edx &= ~CPUID_AMD_EDX_TSCP;
2729 2725 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2730 2726 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2731 2727 break;
2732 2728
2733 2729 default:
2734 2730 break;
2735 2731 }
2736 2732
2737 2733 /*
2738 2734 * [no explicit support required beyond
2739 2735 * x87 fp context and exception handlers]
2740 2736 */
2741 2737 if (!fpu_exists)
2742 2738 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2743 2739 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2744 2740
2745 2741 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2746 2742 *edx &= ~CPUID_AMD_EDX_NX;
2747 2743 #if !defined(__amd64)
2748 2744 *edx &= ~CPUID_AMD_EDX_LM;
2749 2745 #endif
2750 2746 /*
2751 2747 * Now map the supported feature vector to
2752 2748 * things that we think userland will care about.
2753 2749 */
2754 2750 #if defined(__amd64)
2755 2751 if (*edx & CPUID_AMD_EDX_SYSC)
2756 2752 hwcap_flags |= AV_386_AMD_SYSC;
2757 2753 #endif
2758 2754 if (*edx & CPUID_AMD_EDX_MMXamd)
2759 2755 hwcap_flags |= AV_386_AMD_MMX;
2760 2756 if (*edx & CPUID_AMD_EDX_3DNow)
2761 2757 hwcap_flags |= AV_386_AMD_3DNow;
2762 2758 if (*edx & CPUID_AMD_EDX_3DNowx)
2763 2759 hwcap_flags |= AV_386_AMD_3DNowx;
2764 2760 if (*ecx & CPUID_AMD_ECX_SVM)
2765 2761 hwcap_flags |= AV_386_AMD_SVM;
2766 2762
2767 2763 switch (cpi->cpi_vendor) {
2768 2764 case X86_VENDOR_AMD:
2769 2765 if (*edx & CPUID_AMD_EDX_TSCP)
2770 2766 hwcap_flags |= AV_386_TSCP;
2771 2767 if (*ecx & CPUID_AMD_ECX_AHF64)
2772 2768 hwcap_flags |= AV_386_AHF;
2773 2769 if (*ecx & CPUID_AMD_ECX_SSE4A)
2774 2770 hwcap_flags |= AV_386_AMD_SSE4A;
2775 2771 if (*ecx & CPUID_AMD_ECX_LZCNT)
2776 2772 hwcap_flags |= AV_386_AMD_LZCNT;
2777 2773 break;
2778 2774
2779 2775 case X86_VENDOR_Intel:
2780 2776 if (*edx & CPUID_AMD_EDX_TSCP)
2781 2777 hwcap_flags |= AV_386_TSCP;
2782 2778 /*
2783 2779 * Aarrgh.
2784 2780 * Intel uses a different bit in the same word.
2785 2781 */
2786 2782 if (*ecx & CPUID_INTC_ECX_AHF64)
2787 2783 hwcap_flags |= AV_386_AHF;
2788 2784 break;
2789 2785
2790 2786 default:
2791 2787 break;
2792 2788 }
2793 2789 break;
2794 2790
2795 2791 case X86_VENDOR_TM:
2796 2792 cp.cp_eax = 0x80860001;
2797 2793 (void) __cpuid_insn(&cp);
2798 2794 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2799 2795 break;
2800 2796
2801 2797 default:
2802 2798 break;
2803 2799 }
2804 2800
2805 2801 pass4_done:
2806 2802 cpi->cpi_pass = 4;
2807 2803 if (hwcap_out != NULL) {
2808 2804 hwcap_out[0] = hwcap_flags;
2809 2805 hwcap_out[1] = hwcap_flags_2;
2810 2806 }
2811 2807 }
2812 2808
2813 2809
2814 2810 /*
2815 2811 * Simulate the cpuid instruction using the data we previously
2816 2812 * captured about this CPU. We try our best to return the truth
2817 2813 * about the hardware, independently of kernel support.
2818 2814 */
2819 2815 uint32_t
2820 2816 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2821 2817 {
2822 2818 struct cpuid_info *cpi;
2823 2819 struct cpuid_regs *xcp;
2824 2820
2825 2821 if (cpu == NULL)
2826 2822 cpu = CPU;
2827 2823 cpi = cpu->cpu_m.mcpu_cpi;
2828 2824
2829 2825 ASSERT(cpuid_checkpass(cpu, 3));
2830 2826
2831 2827 /*
2832 2828 * CPUID data is cached in two separate places: cpi_std for standard
2833 2829 * CPUID functions, and cpi_extd for extended CPUID functions.
2834 2830 */
2835 2831 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2836 2832 xcp = &cpi->cpi_std[cp->cp_eax];
2837 2833 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2838 2834 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2839 2835 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2840 2836 else
2841 2837 /*
2842 2838 * The caller is asking for data from an input parameter which
2843 2839 * the kernel has not cached. In this case we go fetch from
2844 2840 * the hardware and return the data directly to the user.
2845 2841 */
2846 2842 return (__cpuid_insn(cp));
2847 2843
2848 2844 cp->cp_eax = xcp->cp_eax;
2849 2845 cp->cp_ebx = xcp->cp_ebx;
2850 2846 cp->cp_ecx = xcp->cp_ecx;
2851 2847 cp->cp_edx = xcp->cp_edx;
2852 2848 return (cp->cp_eax);
2853 2849 }
2854 2850
2855 2851 int
2856 2852 cpuid_checkpass(cpu_t *cpu, int pass)
2857 2853 {
2858 2854 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2859 2855 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2860 2856 }
2861 2857
2862 2858 int
2863 2859 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2864 2860 {
2865 2861 ASSERT(cpuid_checkpass(cpu, 3));
2866 2862
2867 2863 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2868 2864 }
2869 2865
2870 2866 int
2871 2867 cpuid_is_cmt(cpu_t *cpu)
2872 2868 {
2873 2869 if (cpu == NULL)
2874 2870 cpu = CPU;
2875 2871
2876 2872 ASSERT(cpuid_checkpass(cpu, 1));
2877 2873
2878 2874 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2879 2875 }
2880 2876
2881 2877 /*
2882 2878 * AMD and Intel both implement the 64-bit variant of the syscall
2883 2879 * instruction (syscallq), so if there's -any- support for syscall,
2884 2880 * cpuid currently says "yes, we support this".
2885 2881 *
2886 2882 * However, Intel decided to -not- implement the 32-bit variant of the
2887 2883 * syscall instruction, so we provide a predicate to allow our caller
2888 2884 * to test that subtlety here.
2889 2885 *
2890 2886 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2891 2887 * even in the case where the hardware would in fact support it.
2892 2888 */
2893 2889 /*ARGSUSED*/
2894 2890 int
2895 2891 cpuid_syscall32_insn(cpu_t *cpu)
2896 2892 {
2897 2893 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2898 2894
2899 2895 #if !defined(__xpv)
2900 2896 if (cpu == NULL)
2901 2897 cpu = CPU;
2902 2898
2903 2899 /*CSTYLED*/
2904 2900 {
2905 2901 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2906 2902
2907 2903 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2908 2904 cpi->cpi_xmaxeax >= 0x80000001 &&
2909 2905 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2910 2906 return (1);
2911 2907 }
2912 2908 #endif
2913 2909 return (0);
2914 2910 }
2915 2911
2916 2912 int
2917 2913 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2918 2914 {
2919 2915 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2920 2916
2921 2917 static const char fmt[] =
2922 2918 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2923 2919 static const char fmt_ht[] =
2924 2920 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2925 2921
2926 2922 ASSERT(cpuid_checkpass(cpu, 1));
2927 2923
2928 2924 if (cpuid_is_cmt(cpu))
2929 2925 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2930 2926 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2931 2927 cpi->cpi_family, cpi->cpi_model,
2932 2928 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2933 2929 return (snprintf(s, n, fmt,
2934 2930 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2935 2931 cpi->cpi_family, cpi->cpi_model,
2936 2932 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2937 2933 }
2938 2934
2939 2935 const char *
2940 2936 cpuid_getvendorstr(cpu_t *cpu)
2941 2937 {
2942 2938 ASSERT(cpuid_checkpass(cpu, 1));
2943 2939 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2944 2940 }
2945 2941
2946 2942 uint_t
2947 2943 cpuid_getvendor(cpu_t *cpu)
2948 2944 {
2949 2945 ASSERT(cpuid_checkpass(cpu, 1));
2950 2946 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2951 2947 }
2952 2948
2953 2949 uint_t
2954 2950 cpuid_getfamily(cpu_t *cpu)
2955 2951 {
2956 2952 ASSERT(cpuid_checkpass(cpu, 1));
2957 2953 return (cpu->cpu_m.mcpu_cpi->cpi_family);
2958 2954 }
2959 2955
2960 2956 uint_t
2961 2957 cpuid_getmodel(cpu_t *cpu)
2962 2958 {
2963 2959 ASSERT(cpuid_checkpass(cpu, 1));
2964 2960 return (cpu->cpu_m.mcpu_cpi->cpi_model);
2965 2961 }
2966 2962
2967 2963 uint_t
2968 2964 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2969 2965 {
2970 2966 ASSERT(cpuid_checkpass(cpu, 1));
2971 2967 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2972 2968 }
2973 2969
2974 2970 uint_t
2975 2971 cpuid_get_ncore_per_chip(cpu_t *cpu)
2976 2972 {
2977 2973 ASSERT(cpuid_checkpass(cpu, 1));
2978 2974 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2979 2975 }
2980 2976
2981 2977 uint_t
2982 2978 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2983 2979 {
2984 2980 ASSERT(cpuid_checkpass(cpu, 2));
2985 2981 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2986 2982 }
2987 2983
2988 2984 id_t
2989 2985 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2990 2986 {
2991 2987 ASSERT(cpuid_checkpass(cpu, 2));
2992 2988 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2993 2989 }
2994 2990
2995 2991 uint_t
2996 2992 cpuid_getstep(cpu_t *cpu)
2997 2993 {
2998 2994 ASSERT(cpuid_checkpass(cpu, 1));
2999 2995 return (cpu->cpu_m.mcpu_cpi->cpi_step);
3000 2996 }
3001 2997
3002 2998 uint_t
3003 2999 cpuid_getsig(struct cpu *cpu)
3004 3000 {
3005 3001 ASSERT(cpuid_checkpass(cpu, 1));
3006 3002 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
3007 3003 }
3008 3004
3009 3005 uint32_t
3010 3006 cpuid_getchiprev(struct cpu *cpu)
3011 3007 {
3012 3008 ASSERT(cpuid_checkpass(cpu, 1));
3013 3009 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
3014 3010 }
3015 3011
3016 3012 const char *
3017 3013 cpuid_getchiprevstr(struct cpu *cpu)
3018 3014 {
3019 3015 ASSERT(cpuid_checkpass(cpu, 1));
3020 3016 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
3021 3017 }
3022 3018
3023 3019 uint32_t
3024 3020 cpuid_getsockettype(struct cpu *cpu)
3025 3021 {
3026 3022 ASSERT(cpuid_checkpass(cpu, 1));
3027 3023 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
3028 3024 }
3029 3025
3030 3026 const char *
3031 3027 cpuid_getsocketstr(cpu_t *cpu)
3032 3028 {
3033 3029 static const char *socketstr = NULL;
3034 3030 struct cpuid_info *cpi;
3035 3031
3036 3032 ASSERT(cpuid_checkpass(cpu, 1));
3037 3033 cpi = cpu->cpu_m.mcpu_cpi;
3038 3034
3039 3035 /* Assume that socket types are the same across the system */
3040 3036 if (socketstr == NULL)
3041 3037 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
3042 3038 cpi->cpi_model, cpi->cpi_step);
3043 3039
3044 3040
3045 3041 return (socketstr);
3046 3042 }
3047 3043
3048 3044 int
3049 3045 cpuid_get_chipid(cpu_t *cpu)
3050 3046 {
3051 3047 ASSERT(cpuid_checkpass(cpu, 1));
3052 3048
3053 3049 if (cpuid_is_cmt(cpu))
3054 3050 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
3055 3051 return (cpu->cpu_id);
3056 3052 }
3057 3053
3058 3054 id_t
3059 3055 cpuid_get_coreid(cpu_t *cpu)
3060 3056 {
3061 3057 ASSERT(cpuid_checkpass(cpu, 1));
3062 3058 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
3063 3059 }
3064 3060
3065 3061 int
3066 3062 cpuid_get_pkgcoreid(cpu_t *cpu)
3067 3063 {
3068 3064 ASSERT(cpuid_checkpass(cpu, 1));
3069 3065 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3070 3066 }
3071 3067
3072 3068 int
3073 3069 cpuid_get_clogid(cpu_t *cpu)
3074 3070 {
3075 3071 ASSERT(cpuid_checkpass(cpu, 1));
3076 3072 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3077 3073 }
3078 3074
3079 3075 int
3080 3076 cpuid_get_cacheid(cpu_t *cpu)
3081 3077 {
3082 3078 ASSERT(cpuid_checkpass(cpu, 1));
3083 3079 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3084 3080 }
3085 3081
3086 3082 uint_t
3087 3083 cpuid_get_procnodeid(cpu_t *cpu)
3088 3084 {
3089 3085 ASSERT(cpuid_checkpass(cpu, 1));
3090 3086 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3091 3087 }
3092 3088
3093 3089 uint_t
3094 3090 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3095 3091 {
3096 3092 ASSERT(cpuid_checkpass(cpu, 1));
3097 3093 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3098 3094 }
3099 3095
3100 3096 uint_t
3101 3097 cpuid_get_compunitid(cpu_t *cpu)
3102 3098 {
3103 3099 ASSERT(cpuid_checkpass(cpu, 1));
3104 3100 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3105 3101 }
3106 3102
3107 3103 uint_t
3108 3104 cpuid_get_cores_per_compunit(cpu_t *cpu)
3109 3105 {
3110 3106 ASSERT(cpuid_checkpass(cpu, 1));
3111 3107 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3112 3108 }
3113 3109
3114 3110 /*ARGSUSED*/
3115 3111 int
3116 3112 cpuid_have_cr8access(cpu_t *cpu)
3117 3113 {
3118 3114 #if defined(__amd64)
3119 3115 return (1);
3120 3116 #else
3121 3117 struct cpuid_info *cpi;
3122 3118
3123 3119 ASSERT(cpu != NULL);
3124 3120 cpi = cpu->cpu_m.mcpu_cpi;
3125 3121 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3126 3122 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3127 3123 return (1);
3128 3124 return (0);
3129 3125 #endif
3130 3126 }
3131 3127
3132 3128 uint32_t
3133 3129 cpuid_get_apicid(cpu_t *cpu)
3134 3130 {
3135 3131 ASSERT(cpuid_checkpass(cpu, 1));
3136 3132 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3137 3133 return (UINT32_MAX);
3138 3134 } else {
3139 3135 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3140 3136 }
3141 3137 }
3142 3138
3143 3139 void
3144 3140 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3145 3141 {
3146 3142 struct cpuid_info *cpi;
3147 3143
3148 3144 if (cpu == NULL)
3149 3145 cpu = CPU;
3150 3146 cpi = cpu->cpu_m.mcpu_cpi;
3151 3147
3152 3148 ASSERT(cpuid_checkpass(cpu, 1));
3153 3149
3154 3150 if (pabits)
3155 3151 *pabits = cpi->cpi_pabits;
3156 3152 if (vabits)
3157 3153 *vabits = cpi->cpi_vabits;
3158 3154 }
3159 3155
3160 3156 /*
3161 3157 * Returns the number of data TLB entries for a corresponding
3162 3158 * pagesize. If it can't be computed, or isn't known, the
3163 3159 * routine returns zero. If you ask about an architecturally
3164 3160 * impossible pagesize, the routine will panic (so that the
3165 3161 * hat implementor knows that things are inconsistent.)
3166 3162 */
3167 3163 uint_t
3168 3164 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3169 3165 {
3170 3166 struct cpuid_info *cpi;
3171 3167 uint_t dtlb_nent = 0;
3172 3168
3173 3169 if (cpu == NULL)
3174 3170 cpu = CPU;
3175 3171 cpi = cpu->cpu_m.mcpu_cpi;
3176 3172
3177 3173 ASSERT(cpuid_checkpass(cpu, 1));
3178 3174
3179 3175 /*
3180 3176 * Check the L2 TLB info
3181 3177 */
3182 3178 if (cpi->cpi_xmaxeax >= 0x80000006) {
3183 3179 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3184 3180
3185 3181 switch (pagesize) {
3186 3182
3187 3183 case 4 * 1024:
3188 3184 /*
3189 3185 * All zero in the top 16 bits of the register
3190 3186 * indicates a unified TLB. Size is in low 16 bits.
3191 3187 */
3192 3188 if ((cp->cp_ebx & 0xffff0000) == 0)
3193 3189 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3194 3190 else
3195 3191 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3196 3192 break;
3197 3193
3198 3194 case 2 * 1024 * 1024:
3199 3195 if ((cp->cp_eax & 0xffff0000) == 0)
3200 3196 dtlb_nent = cp->cp_eax & 0x0000ffff;
3201 3197 else
3202 3198 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3203 3199 break;
3204 3200
3205 3201 default:
3206 3202 panic("unknown L2 pagesize");
3207 3203 /*NOTREACHED*/
3208 3204 }
3209 3205 }
3210 3206
3211 3207 if (dtlb_nent != 0)
3212 3208 return (dtlb_nent);
3213 3209
3214 3210 /*
3215 3211 * No L2 TLB support for this size, try L1.
3216 3212 */
3217 3213 if (cpi->cpi_xmaxeax >= 0x80000005) {
3218 3214 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3219 3215
3220 3216 switch (pagesize) {
3221 3217 case 4 * 1024:
3222 3218 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3223 3219 break;
3224 3220 case 2 * 1024 * 1024:
3225 3221 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3226 3222 break;
3227 3223 default:
3228 3224 panic("unknown L1 d-TLB pagesize");
3229 3225 /*NOTREACHED*/
3230 3226 }
3231 3227 }
3232 3228
3233 3229 return (dtlb_nent);
3234 3230 }
3235 3231
3236 3232 /*
3237 3233 * Return 0 if the erratum is not present or not applicable, positive
3238 3234 * if it is, and negative if the status of the erratum is unknown.
3239 3235 *
3240 3236 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3241 3237 * Processors" #25759, Rev 3.57, August 2005
3242 3238 */
3243 3239 int
3244 3240 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3245 3241 {
3246 3242 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3247 3243 uint_t eax;
3248 3244
3249 3245 /*
3250 3246 * Bail out if this CPU isn't an AMD CPU, or if it's
3251 3247 * a legacy (32-bit) AMD CPU.
3252 3248 */
3253 3249 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3254 3250 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3255 3251 cpi->cpi_family == 6)
3256 3252
3257 3253 return (0);
3258 3254
3259 3255 eax = cpi->cpi_std[1].cp_eax;
3260 3256
3261 3257 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3262 3258 #define SH_B3(eax) (eax == 0xf51)
3263 3259 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3264 3260
3265 3261 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3266 3262
3267 3263 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3268 3264 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3269 3265 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3270 3266 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3271 3267
3272 3268 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3273 3269 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3274 3270 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3275 3271 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3276 3272
3277 3273 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3278 3274 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3279 3275 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3280 3276 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3281 3277 #define BH_E4(eax) (eax == 0x20fb1)
3282 3278 #define SH_E5(eax) (eax == 0x20f42)
3283 3279 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3284 3280 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3285 3281 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3286 3282 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3287 3283 DH_E6(eax) || JH_E6(eax))
3288 3284
3289 3285 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3290 3286 #define DR_B0(eax) (eax == 0x100f20)
3291 3287 #define DR_B1(eax) (eax == 0x100f21)
3292 3288 #define DR_BA(eax) (eax == 0x100f2a)
3293 3289 #define DR_B2(eax) (eax == 0x100f22)
3294 3290 #define DR_B3(eax) (eax == 0x100f23)
3295 3291 #define RB_C0(eax) (eax == 0x100f40)
3296 3292
3297 3293 switch (erratum) {
3298 3294 case 1:
3299 3295 return (cpi->cpi_family < 0x10);
3300 3296 case 51: /* what does the asterisk mean? */
3301 3297 return (B(eax) || SH_C0(eax) || CG(eax));
3302 3298 case 52:
3303 3299 return (B(eax));
3304 3300 case 57:
3305 3301 return (cpi->cpi_family <= 0x11);
3306 3302 case 58:
3307 3303 return (B(eax));
3308 3304 case 60:
3309 3305 return (cpi->cpi_family <= 0x11);
3310 3306 case 61:
3311 3307 case 62:
3312 3308 case 63:
3313 3309 case 64:
3314 3310 case 65:
3315 3311 case 66:
3316 3312 case 68:
3317 3313 case 69:
3318 3314 case 70:
3319 3315 case 71:
3320 3316 return (B(eax));
3321 3317 case 72:
3322 3318 return (SH_B0(eax));
3323 3319 case 74:
3324 3320 return (B(eax));
3325 3321 case 75:
3326 3322 return (cpi->cpi_family < 0x10);
3327 3323 case 76:
3328 3324 return (B(eax));
3329 3325 case 77:
3330 3326 return (cpi->cpi_family <= 0x11);
3331 3327 case 78:
3332 3328 return (B(eax) || SH_C0(eax));
3333 3329 case 79:
3334 3330 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3335 3331 case 80:
3336 3332 case 81:
3337 3333 case 82:
3338 3334 return (B(eax));
3339 3335 case 83:
3340 3336 return (B(eax) || SH_C0(eax) || CG(eax));
3341 3337 case 85:
3342 3338 return (cpi->cpi_family < 0x10);
3343 3339 case 86:
3344 3340 return (SH_C0(eax) || CG(eax));
3345 3341 case 88:
3346 3342 #if !defined(__amd64)
3347 3343 return (0);
3348 3344 #else
3349 3345 return (B(eax) || SH_C0(eax));
3350 3346 #endif
3351 3347 case 89:
3352 3348 return (cpi->cpi_family < 0x10);
3353 3349 case 90:
3354 3350 return (B(eax) || SH_C0(eax) || CG(eax));
3355 3351 case 91:
3356 3352 case 92:
3357 3353 return (B(eax) || SH_C0(eax));
3358 3354 case 93:
3359 3355 return (SH_C0(eax));
3360 3356 case 94:
3361 3357 return (B(eax) || SH_C0(eax) || CG(eax));
3362 3358 case 95:
3363 3359 #if !defined(__amd64)
3364 3360 return (0);
3365 3361 #else
3366 3362 return (B(eax) || SH_C0(eax));
3367 3363 #endif
3368 3364 case 96:
3369 3365 return (B(eax) || SH_C0(eax) || CG(eax));
3370 3366 case 97:
3371 3367 case 98:
3372 3368 return (SH_C0(eax) || CG(eax));
3373 3369 case 99:
3374 3370 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3375 3371 case 100:
3376 3372 return (B(eax) || SH_C0(eax));
3377 3373 case 101:
3378 3374 case 103:
3379 3375 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3380 3376 case 104:
3381 3377 return (SH_C0(eax) || CG(eax) || D0(eax));
3382 3378 case 105:
3383 3379 case 106:
3384 3380 case 107:
3385 3381 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3386 3382 case 108:
3387 3383 return (DH_CG(eax));
3388 3384 case 109:
3389 3385 return (SH_C0(eax) || CG(eax) || D0(eax));
3390 3386 case 110:
3391 3387 return (D0(eax) || EX(eax));
3392 3388 case 111:
3393 3389 return (CG(eax));
3394 3390 case 112:
3395 3391 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3396 3392 case 113:
3397 3393 return (eax == 0x20fc0);
3398 3394 case 114:
3399 3395 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3400 3396 case 115:
3401 3397 return (SH_E0(eax) || JH_E1(eax));
3402 3398 case 116:
3403 3399 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3404 3400 case 117:
3405 3401 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3406 3402 case 118:
3407 3403 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3408 3404 JH_E6(eax));
3409 3405 case 121:
3410 3406 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3411 3407 case 122:
3412 3408 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3413 3409 case 123:
3414 3410 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3415 3411 case 131:
3416 3412 return (cpi->cpi_family < 0x10);
3417 3413 case 6336786:
3418 3414 /*
3419 3415 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3420 3416 * if this is a K8 family or newer processor
3421 3417 */
3422 3418 if (CPI_FAMILY(cpi) == 0xf) {
3423 3419 struct cpuid_regs regs;
3424 3420 regs.cp_eax = 0x80000007;
3425 3421 (void) __cpuid_insn(®s);
3426 3422 return (!(regs.cp_edx & 0x100));
3427 3423 }
3428 3424 return (0);
3429 3425 case 6323525:
3430 3426 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3431 3427 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3432 3428
3433 3429 case 6671130:
3434 3430 /*
3435 3431 * check for processors (pre-Shanghai) that do not provide
3436 3432 * optimal management of 1gb ptes in its tlb.
3437 3433 */
3438 3434 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3439 3435
3440 3436 case 298:
3441 3437 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3442 3438 DR_B2(eax) || RB_C0(eax));
3443 3439
3444 3440 case 721:
3445 3441 #if defined(__amd64)
3446 3442 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3447 3443 #else
3448 3444 return (0);
3449 3445 #endif
3450 3446
3451 3447 default:
3452 3448 return (-1);
3453 3449
3454 3450 }
3455 3451 }
3456 3452
3457 3453 /*
3458 3454 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3459 3455 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3460 3456 */
3461 3457 int
3462 3458 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3463 3459 {
3464 3460 struct cpuid_info *cpi;
3465 3461 uint_t osvwid;
3466 3462 static int osvwfeature = -1;
3467 3463 uint64_t osvwlength;
3468 3464
3469 3465
3470 3466 cpi = cpu->cpu_m.mcpu_cpi;
3471 3467
3472 3468 /* confirm OSVW supported */
3473 3469 if (osvwfeature == -1) {
3474 3470 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3475 3471 } else {
3476 3472 /* assert that osvw feature setting is consistent on all cpus */
3477 3473 ASSERT(osvwfeature ==
3478 3474 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3479 3475 }
3480 3476 if (!osvwfeature)
3481 3477 return (-1);
3482 3478
3483 3479 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3484 3480
3485 3481 switch (erratum) {
3486 3482 case 298: /* osvwid is 0 */
3487 3483 osvwid = 0;
3488 3484 if (osvwlength <= (uint64_t)osvwid) {
3489 3485 /* osvwid 0 is unknown */
3490 3486 return (-1);
3491 3487 }
3492 3488
3493 3489 /*
3494 3490 * Check the OSVW STATUS MSR to determine the state
3495 3491 * of the erratum where:
3496 3492 * 0 - fixed by HW
3497 3493 * 1 - BIOS has applied the workaround when BIOS
3498 3494 * workaround is available. (Or for other errata,
3499 3495 * OS workaround is required.)
3500 3496 * For a value of 1, caller will confirm that the
3501 3497 * erratum 298 workaround has indeed been applied by BIOS.
3502 3498 *
3503 3499 * A 1 may be set in cpus that have a HW fix
3504 3500 * in a mixed cpu system. Regarding erratum 298:
3505 3501 * In a multiprocessor platform, the workaround above
3506 3502 * should be applied to all processors regardless of
3507 3503 * silicon revision when an affected processor is
3508 3504 * present.
3509 3505 */
3510 3506
3511 3507 return (rdmsr(MSR_AMD_OSVW_STATUS +
3512 3508 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3513 3509 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3514 3510
3515 3511 default:
3516 3512 return (-1);
3517 3513 }
3518 3514 }
3519 3515
3520 3516 static const char assoc_str[] = "associativity";
3521 3517 static const char line_str[] = "line-size";
3522 3518 static const char size_str[] = "size";
3523 3519
3524 3520 static void
3525 3521 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3526 3522 uint32_t val)
3527 3523 {
3528 3524 char buf[128];
3529 3525
3530 3526 /*
3531 3527 * ndi_prop_update_int() is used because it is desirable for
3532 3528 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3533 3529 */
3534 3530 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3535 3531 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3536 3532 }
3537 3533
3538 3534 /*
3539 3535 * Intel-style cache/tlb description
3540 3536 *
3541 3537 * Standard cpuid level 2 gives a randomly ordered
3542 3538 * selection of tags that index into a table that describes
3543 3539 * cache and tlb properties.
3544 3540 */
3545 3541
3546 3542 static const char l1_icache_str[] = "l1-icache";
3547 3543 static const char l1_dcache_str[] = "l1-dcache";
3548 3544 static const char l2_cache_str[] = "l2-cache";
3549 3545 static const char l3_cache_str[] = "l3-cache";
3550 3546 static const char itlb4k_str[] = "itlb-4K";
3551 3547 static const char dtlb4k_str[] = "dtlb-4K";
3552 3548 static const char itlb2M_str[] = "itlb-2M";
3553 3549 static const char itlb4M_str[] = "itlb-4M";
3554 3550 static const char dtlb4M_str[] = "dtlb-4M";
3555 3551 static const char dtlb24_str[] = "dtlb0-2M-4M";
3556 3552 static const char itlb424_str[] = "itlb-4K-2M-4M";
3557 3553 static const char itlb24_str[] = "itlb-2M-4M";
3558 3554 static const char dtlb44_str[] = "dtlb-4K-4M";
3559 3555 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3560 3556 static const char sl2_cache_str[] = "sectored-l2-cache";
3561 3557 static const char itrace_str[] = "itrace-cache";
3562 3558 static const char sl3_cache_str[] = "sectored-l3-cache";
3563 3559 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3564 3560
3565 3561 static const struct cachetab {
3566 3562 uint8_t ct_code;
3567 3563 uint8_t ct_assoc;
3568 3564 uint16_t ct_line_size;
3569 3565 size_t ct_size;
3570 3566 const char *ct_label;
3571 3567 } intel_ctab[] = {
3572 3568 /*
3573 3569 * maintain descending order!
3574 3570 *
3575 3571 * Codes ignored - Reason
3576 3572 * ----------------------
3577 3573 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3578 3574 * f0H/f1H - Currently we do not interpret prefetch size by design
3579 3575 */
3580 3576 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3581 3577 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3582 3578 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3583 3579 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3584 3580 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3585 3581 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3586 3582 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3587 3583 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3588 3584 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3589 3585 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3590 3586 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3591 3587 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3592 3588 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3593 3589 { 0xc0, 4, 0, 8, dtlb44_str },
3594 3590 { 0xba, 4, 0, 64, dtlb4k_str },
3595 3591 { 0xb4, 4, 0, 256, dtlb4k_str },
3596 3592 { 0xb3, 4, 0, 128, dtlb4k_str },
3597 3593 { 0xb2, 4, 0, 64, itlb4k_str },
3598 3594 { 0xb0, 4, 0, 128, itlb4k_str },
3599 3595 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3600 3596 { 0x86, 4, 64, 512*1024, l2_cache_str},
3601 3597 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3602 3598 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3603 3599 { 0x83, 8, 32, 512*1024, l2_cache_str},
3604 3600 { 0x82, 8, 32, 256*1024, l2_cache_str},
3605 3601 { 0x80, 8, 64, 512*1024, l2_cache_str},
3606 3602 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3607 3603 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3608 3604 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3609 3605 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3610 3606 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3611 3607 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3612 3608 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3613 3609 { 0x73, 8, 0, 64*1024, itrace_str},
3614 3610 { 0x72, 8, 0, 32*1024, itrace_str},
3615 3611 { 0x71, 8, 0, 16*1024, itrace_str},
3616 3612 { 0x70, 8, 0, 12*1024, itrace_str},
3617 3613 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3618 3614 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3619 3615 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3620 3616 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3621 3617 { 0x5d, 0, 0, 256, dtlb44_str},
3622 3618 { 0x5c, 0, 0, 128, dtlb44_str},
3623 3619 { 0x5b, 0, 0, 64, dtlb44_str},
3624 3620 { 0x5a, 4, 0, 32, dtlb24_str},
3625 3621 { 0x59, 0, 0, 16, dtlb4k_str},
3626 3622 { 0x57, 4, 0, 16, dtlb4k_str},
3627 3623 { 0x56, 4, 0, 16, dtlb4M_str},
3628 3624 { 0x55, 0, 0, 7, itlb24_str},
3629 3625 { 0x52, 0, 0, 256, itlb424_str},
3630 3626 { 0x51, 0, 0, 128, itlb424_str},
3631 3627 { 0x50, 0, 0, 64, itlb424_str},
3632 3628 { 0x4f, 0, 0, 32, itlb4k_str},
3633 3629 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3634 3630 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3635 3631 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3636 3632 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3637 3633 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3638 3634 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3639 3635 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3640 3636 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3641 3637 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3642 3638 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3643 3639 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3644 3640 { 0x43, 4, 32, 512*1024, l2_cache_str},
3645 3641 { 0x42, 4, 32, 256*1024, l2_cache_str},
3646 3642 { 0x41, 4, 32, 128*1024, l2_cache_str},
3647 3643 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3648 3644 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3649 3645 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3650 3646 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3651 3647 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3652 3648 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3653 3649 { 0x30, 8, 64, 32*1024, l1_icache_str},
3654 3650 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3655 3651 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3656 3652 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3657 3653 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3658 3654 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3659 3655 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3660 3656 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3661 3657 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3662 3658 { 0x0b, 4, 0, 4, itlb4M_str},
3663 3659 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3664 3660 { 0x08, 4, 32, 16*1024, l1_icache_str},
3665 3661 { 0x06, 4, 32, 8*1024, l1_icache_str},
3666 3662 { 0x05, 4, 0, 32, dtlb4M_str},
3667 3663 { 0x04, 4, 0, 8, dtlb4M_str},
3668 3664 { 0x03, 4, 0, 64, dtlb4k_str},
3669 3665 { 0x02, 4, 0, 2, itlb4M_str},
3670 3666 { 0x01, 4, 0, 32, itlb4k_str},
3671 3667 { 0 }
3672 3668 };
3673 3669
3674 3670 static const struct cachetab cyrix_ctab[] = {
3675 3671 { 0x70, 4, 0, 32, "tlb-4K" },
3676 3672 { 0x80, 4, 16, 16*1024, "l1-cache" },
3677 3673 { 0 }
3678 3674 };
3679 3675
3680 3676 /*
3681 3677 * Search a cache table for a matching entry
3682 3678 */
3683 3679 static const struct cachetab *
3684 3680 find_cacheent(const struct cachetab *ct, uint_t code)
3685 3681 {
3686 3682 if (code != 0) {
3687 3683 for (; ct->ct_code != 0; ct++)
3688 3684 if (ct->ct_code <= code)
3689 3685 break;
3690 3686 if (ct->ct_code == code)
3691 3687 return (ct);
3692 3688 }
3693 3689 return (NULL);
3694 3690 }
3695 3691
3696 3692 /*
3697 3693 * Populate cachetab entry with L2 or L3 cache-information using
3698 3694 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3699 3695 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3700 3696 * information is found.
3701 3697 */
3702 3698 static int
3703 3699 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3704 3700 {
3705 3701 uint32_t level, i;
3706 3702 int ret = 0;
3707 3703
3708 3704 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3709 3705 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3710 3706
3711 3707 if (level == 2 || level == 3) {
3712 3708 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3713 3709 ct->ct_line_size =
3714 3710 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3715 3711 ct->ct_size = ct->ct_assoc *
3716 3712 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3717 3713 ct->ct_line_size *
3718 3714 (cpi->cpi_std_4[i]->cp_ecx + 1);
3719 3715
3720 3716 if (level == 2) {
3721 3717 ct->ct_label = l2_cache_str;
3722 3718 } else if (level == 3) {
3723 3719 ct->ct_label = l3_cache_str;
3724 3720 }
3725 3721 ret = 1;
3726 3722 }
3727 3723 }
3728 3724
3729 3725 return (ret);
3730 3726 }
3731 3727
3732 3728 /*
3733 3729 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3734 3730 * The walk is terminated if the walker returns non-zero.
3735 3731 */
3736 3732 static void
3737 3733 intel_walk_cacheinfo(struct cpuid_info *cpi,
3738 3734 void *arg, int (*func)(void *, const struct cachetab *))
3739 3735 {
3740 3736 const struct cachetab *ct;
3741 3737 struct cachetab des_49_ct, des_b1_ct;
3742 3738 uint8_t *dp;
3743 3739 int i;
3744 3740
3745 3741 if ((dp = cpi->cpi_cacheinfo) == NULL)
3746 3742 return;
3747 3743 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3748 3744 /*
3749 3745 * For overloaded descriptor 0x49 we use cpuid function 4
3750 3746 * if supported by the current processor, to create
3751 3747 * cache information.
3752 3748 * For overloaded descriptor 0xb1 we use X86_PAE flag
3753 3749 * to disambiguate the cache information.
3754 3750 */
3755 3751 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3756 3752 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3757 3753 ct = &des_49_ct;
3758 3754 } else if (*dp == 0xb1) {
3759 3755 des_b1_ct.ct_code = 0xb1;
3760 3756 des_b1_ct.ct_assoc = 4;
3761 3757 des_b1_ct.ct_line_size = 0;
3762 3758 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3763 3759 des_b1_ct.ct_size = 8;
3764 3760 des_b1_ct.ct_label = itlb2M_str;
3765 3761 } else {
3766 3762 des_b1_ct.ct_size = 4;
3767 3763 des_b1_ct.ct_label = itlb4M_str;
3768 3764 }
3769 3765 ct = &des_b1_ct;
3770 3766 } else {
3771 3767 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3772 3768 continue;
3773 3769 }
3774 3770 }
3775 3771
3776 3772 if (func(arg, ct) != 0) {
3777 3773 break;
3778 3774 }
3779 3775 }
3780 3776 }
3781 3777
3782 3778 /*
3783 3779 * (Like the Intel one, except for Cyrix CPUs)
3784 3780 */
3785 3781 static void
3786 3782 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3787 3783 void *arg, int (*func)(void *, const struct cachetab *))
3788 3784 {
3789 3785 const struct cachetab *ct;
3790 3786 uint8_t *dp;
3791 3787 int i;
3792 3788
3793 3789 if ((dp = cpi->cpi_cacheinfo) == NULL)
3794 3790 return;
3795 3791 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3796 3792 /*
3797 3793 * Search Cyrix-specific descriptor table first ..
3798 3794 */
3799 3795 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3800 3796 if (func(arg, ct) != 0)
3801 3797 break;
3802 3798 continue;
3803 3799 }
3804 3800 /*
3805 3801 * .. else fall back to the Intel one
3806 3802 */
3807 3803 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3808 3804 if (func(arg, ct) != 0)
3809 3805 break;
3810 3806 continue;
3811 3807 }
3812 3808 }
3813 3809 }
3814 3810
3815 3811 /*
3816 3812 * A cacheinfo walker that adds associativity, line-size, and size properties
3817 3813 * to the devinfo node it is passed as an argument.
3818 3814 */
3819 3815 static int
3820 3816 add_cacheent_props(void *arg, const struct cachetab *ct)
3821 3817 {
3822 3818 dev_info_t *devi = arg;
3823 3819
3824 3820 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3825 3821 if (ct->ct_line_size != 0)
3826 3822 add_cache_prop(devi, ct->ct_label, line_str,
3827 3823 ct->ct_line_size);
3828 3824 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3829 3825 return (0);
3830 3826 }
3831 3827
3832 3828
3833 3829 static const char fully_assoc[] = "fully-associative?";
3834 3830
3835 3831 /*
3836 3832 * AMD style cache/tlb description
3837 3833 *
3838 3834 * Extended functions 5 and 6 directly describe properties of
3839 3835 * tlbs and various cache levels.
3840 3836 */
3841 3837 static void
3842 3838 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3843 3839 {
3844 3840 switch (assoc) {
3845 3841 case 0: /* reserved; ignore */
3846 3842 break;
3847 3843 default:
3848 3844 add_cache_prop(devi, label, assoc_str, assoc);
3849 3845 break;
3850 3846 case 0xff:
3851 3847 add_cache_prop(devi, label, fully_assoc, 1);
3852 3848 break;
3853 3849 }
3854 3850 }
3855 3851
3856 3852 static void
3857 3853 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3858 3854 {
3859 3855 if (size == 0)
3860 3856 return;
3861 3857 add_cache_prop(devi, label, size_str, size);
3862 3858 add_amd_assoc(devi, label, assoc);
3863 3859 }
3864 3860
3865 3861 static void
3866 3862 add_amd_cache(dev_info_t *devi, const char *label,
3867 3863 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3868 3864 {
3869 3865 if (size == 0 || line_size == 0)
3870 3866 return;
3871 3867 add_amd_assoc(devi, label, assoc);
3872 3868 /*
3873 3869 * Most AMD parts have a sectored cache. Multiple cache lines are
3874 3870 * associated with each tag. A sector consists of all cache lines
3875 3871 * associated with a tag. For example, the AMD K6-III has a sector
3876 3872 * size of 2 cache lines per tag.
3877 3873 */
3878 3874 if (lines_per_tag != 0)
3879 3875 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3880 3876 add_cache_prop(devi, label, line_str, line_size);
3881 3877 add_cache_prop(devi, label, size_str, size * 1024);
3882 3878 }
3883 3879
3884 3880 static void
3885 3881 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3886 3882 {
3887 3883 switch (assoc) {
3888 3884 case 0: /* off */
3889 3885 break;
3890 3886 case 1:
3891 3887 case 2:
3892 3888 case 4:
3893 3889 add_cache_prop(devi, label, assoc_str, assoc);
3894 3890 break;
3895 3891 case 6:
3896 3892 add_cache_prop(devi, label, assoc_str, 8);
3897 3893 break;
3898 3894 case 8:
3899 3895 add_cache_prop(devi, label, assoc_str, 16);
3900 3896 break;
3901 3897 case 0xf:
3902 3898 add_cache_prop(devi, label, fully_assoc, 1);
3903 3899 break;
3904 3900 default: /* reserved; ignore */
3905 3901 break;
3906 3902 }
3907 3903 }
3908 3904
3909 3905 static void
3910 3906 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3911 3907 {
3912 3908 if (size == 0 || assoc == 0)
3913 3909 return;
3914 3910 add_amd_l2_assoc(devi, label, assoc);
3915 3911 add_cache_prop(devi, label, size_str, size);
3916 3912 }
3917 3913
3918 3914 static void
3919 3915 add_amd_l2_cache(dev_info_t *devi, const char *label,
3920 3916 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3921 3917 {
3922 3918 if (size == 0 || assoc == 0 || line_size == 0)
3923 3919 return;
3924 3920 add_amd_l2_assoc(devi, label, assoc);
3925 3921 if (lines_per_tag != 0)
3926 3922 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3927 3923 add_cache_prop(devi, label, line_str, line_size);
3928 3924 add_cache_prop(devi, label, size_str, size * 1024);
3929 3925 }
3930 3926
3931 3927 static void
3932 3928 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3933 3929 {
3934 3930 struct cpuid_regs *cp;
3935 3931
3936 3932 if (cpi->cpi_xmaxeax < 0x80000005)
3937 3933 return;
3938 3934 cp = &cpi->cpi_extd[5];
3939 3935
3940 3936 /*
3941 3937 * 4M/2M L1 TLB configuration
3942 3938 *
3943 3939 * We report the size for 2M pages because AMD uses two
3944 3940 * TLB entries for one 4M page.
3945 3941 */
3946 3942 add_amd_tlb(devi, "dtlb-2M",
3947 3943 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3948 3944 add_amd_tlb(devi, "itlb-2M",
3949 3945 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3950 3946
3951 3947 /*
3952 3948 * 4K L1 TLB configuration
3953 3949 */
3954 3950
3955 3951 switch (cpi->cpi_vendor) {
3956 3952 uint_t nentries;
3957 3953 case X86_VENDOR_TM:
3958 3954 if (cpi->cpi_family >= 5) {
3959 3955 /*
3960 3956 * Crusoe processors have 256 TLB entries, but
3961 3957 * cpuid data format constrains them to only
3962 3958 * reporting 255 of them.
3963 3959 */
3964 3960 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3965 3961 nentries = 256;
3966 3962 /*
3967 3963 * Crusoe processors also have a unified TLB
3968 3964 */
3969 3965 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3970 3966 nentries);
3971 3967 break;
3972 3968 }
3973 3969 /*FALLTHROUGH*/
3974 3970 default:
3975 3971 add_amd_tlb(devi, itlb4k_str,
3976 3972 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3977 3973 add_amd_tlb(devi, dtlb4k_str,
3978 3974 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3979 3975 break;
3980 3976 }
3981 3977
3982 3978 /*
3983 3979 * data L1 cache configuration
3984 3980 */
3985 3981
3986 3982 add_amd_cache(devi, l1_dcache_str,
3987 3983 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3988 3984 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3989 3985
3990 3986 /*
3991 3987 * code L1 cache configuration
3992 3988 */
3993 3989
3994 3990 add_amd_cache(devi, l1_icache_str,
3995 3991 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3996 3992 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3997 3993
3998 3994 if (cpi->cpi_xmaxeax < 0x80000006)
3999 3995 return;
4000 3996 cp = &cpi->cpi_extd[6];
4001 3997
4002 3998 /* Check for a unified L2 TLB for large pages */
4003 3999
4004 4000 if (BITX(cp->cp_eax, 31, 16) == 0)
4005 4001 add_amd_l2_tlb(devi, "l2-tlb-2M",
4006 4002 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4007 4003 else {
4008 4004 add_amd_l2_tlb(devi, "l2-dtlb-2M",
4009 4005 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4010 4006 add_amd_l2_tlb(devi, "l2-itlb-2M",
4011 4007 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4012 4008 }
4013 4009
4014 4010 /* Check for a unified L2 TLB for 4K pages */
4015 4011
4016 4012 if (BITX(cp->cp_ebx, 31, 16) == 0) {
4017 4013 add_amd_l2_tlb(devi, "l2-tlb-4K",
4018 4014 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4019 4015 } else {
4020 4016 add_amd_l2_tlb(devi, "l2-dtlb-4K",
4021 4017 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4022 4018 add_amd_l2_tlb(devi, "l2-itlb-4K",
4023 4019 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4024 4020 }
4025 4021
4026 4022 add_amd_l2_cache(devi, l2_cache_str,
4027 4023 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
4028 4024 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
4029 4025 }
4030 4026
4031 4027 /*
4032 4028 * There are two basic ways that the x86 world describes it cache
4033 4029 * and tlb architecture - Intel's way and AMD's way.
4034 4030 *
4035 4031 * Return which flavor of cache architecture we should use
4036 4032 */
4037 4033 static int
4038 4034 x86_which_cacheinfo(struct cpuid_info *cpi)
4039 4035 {
4040 4036 switch (cpi->cpi_vendor) {
4041 4037 case X86_VENDOR_Intel:
4042 4038 if (cpi->cpi_maxeax >= 2)
4043 4039 return (X86_VENDOR_Intel);
4044 4040 break;
4045 4041 case X86_VENDOR_AMD:
4046 4042 /*
4047 4043 * The K5 model 1 was the first part from AMD that reported
4048 4044 * cache sizes via extended cpuid functions.
4049 4045 */
4050 4046 if (cpi->cpi_family > 5 ||
4051 4047 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
4052 4048 return (X86_VENDOR_AMD);
4053 4049 break;
4054 4050 case X86_VENDOR_TM:
4055 4051 if (cpi->cpi_family >= 5)
4056 4052 return (X86_VENDOR_AMD);
4057 4053 /*FALLTHROUGH*/
4058 4054 default:
4059 4055 /*
4060 4056 * If they have extended CPU data for 0x80000005
4061 4057 * then we assume they have AMD-format cache
4062 4058 * information.
4063 4059 *
4064 4060 * If not, and the vendor happens to be Cyrix,
4065 4061 * then try our-Cyrix specific handler.
4066 4062 *
4067 4063 * If we're not Cyrix, then assume we're using Intel's
4068 4064 * table-driven format instead.
4069 4065 */
4070 4066 if (cpi->cpi_xmaxeax >= 0x80000005)
4071 4067 return (X86_VENDOR_AMD);
4072 4068 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4073 4069 return (X86_VENDOR_Cyrix);
4074 4070 else if (cpi->cpi_maxeax >= 2)
4075 4071 return (X86_VENDOR_Intel);
4076 4072 break;
4077 4073 }
4078 4074 return (-1);
4079 4075 }
4080 4076
4081 4077 void
4082 4078 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4083 4079 struct cpuid_info *cpi)
4084 4080 {
4085 4081 dev_info_t *cpu_devi;
4086 4082 int create;
4087 4083
4088 4084 cpu_devi = (dev_info_t *)dip;
4089 4085
4090 4086 /* device_type */
4091 4087 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4092 4088 "device_type", "cpu");
4093 4089
4094 4090 /* reg */
4095 4091 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4096 4092 "reg", cpu_id);
4097 4093
4098 4094 /* cpu-mhz, and clock-frequency */
4099 4095 if (cpu_freq > 0) {
4100 4096 long long mul;
4101 4097
4102 4098 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4103 4099 "cpu-mhz", cpu_freq);
4104 4100 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4105 4101 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4106 4102 "clock-frequency", (int)mul);
4107 4103 }
4108 4104
4109 4105 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4110 4106 return;
4111 4107 }
4112 4108
4113 4109 /* vendor-id */
4114 4110 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4115 4111 "vendor-id", cpi->cpi_vendorstr);
4116 4112
4117 4113 if (cpi->cpi_maxeax == 0) {
4118 4114 return;
4119 4115 }
4120 4116
4121 4117 /*
4122 4118 * family, model, and step
4123 4119 */
4124 4120 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4125 4121 "family", CPI_FAMILY(cpi));
4126 4122 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4127 4123 "cpu-model", CPI_MODEL(cpi));
4128 4124 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4129 4125 "stepping-id", CPI_STEP(cpi));
4130 4126
4131 4127 /* type */
4132 4128 switch (cpi->cpi_vendor) {
4133 4129 case X86_VENDOR_Intel:
4134 4130 create = 1;
4135 4131 break;
4136 4132 default:
4137 4133 create = 0;
4138 4134 break;
4139 4135 }
4140 4136 if (create)
4141 4137 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4142 4138 "type", CPI_TYPE(cpi));
4143 4139
4144 4140 /* ext-family */
4145 4141 switch (cpi->cpi_vendor) {
4146 4142 case X86_VENDOR_Intel:
4147 4143 case X86_VENDOR_AMD:
4148 4144 create = cpi->cpi_family >= 0xf;
4149 4145 break;
4150 4146 default:
4151 4147 create = 0;
4152 4148 break;
4153 4149 }
4154 4150 if (create)
4155 4151 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4156 4152 "ext-family", CPI_FAMILY_XTD(cpi));
4157 4153
4158 4154 /* ext-model */
4159 4155 switch (cpi->cpi_vendor) {
4160 4156 case X86_VENDOR_Intel:
4161 4157 create = IS_EXTENDED_MODEL_INTEL(cpi);
4162 4158 break;
4163 4159 case X86_VENDOR_AMD:
4164 4160 create = CPI_FAMILY(cpi) == 0xf;
4165 4161 break;
4166 4162 default:
4167 4163 create = 0;
4168 4164 break;
4169 4165 }
4170 4166 if (create)
4171 4167 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4172 4168 "ext-model", CPI_MODEL_XTD(cpi));
4173 4169
4174 4170 /* generation */
4175 4171 switch (cpi->cpi_vendor) {
4176 4172 case X86_VENDOR_AMD:
4177 4173 /*
4178 4174 * AMD K5 model 1 was the first part to support this
4179 4175 */
4180 4176 create = cpi->cpi_xmaxeax >= 0x80000001;
4181 4177 break;
4182 4178 default:
4183 4179 create = 0;
4184 4180 break;
4185 4181 }
4186 4182 if (create)
4187 4183 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4188 4184 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4189 4185
4190 4186 /* brand-id */
4191 4187 switch (cpi->cpi_vendor) {
4192 4188 case X86_VENDOR_Intel:
4193 4189 /*
4194 4190 * brand id first appeared on Pentium III Xeon model 8,
4195 4191 * and Celeron model 8 processors and Opteron
4196 4192 */
4197 4193 create = cpi->cpi_family > 6 ||
4198 4194 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4199 4195 break;
4200 4196 case X86_VENDOR_AMD:
4201 4197 create = cpi->cpi_family >= 0xf;
4202 4198 break;
4203 4199 default:
4204 4200 create = 0;
4205 4201 break;
4206 4202 }
4207 4203 if (create && cpi->cpi_brandid != 0) {
4208 4204 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4209 4205 "brand-id", cpi->cpi_brandid);
4210 4206 }
4211 4207
4212 4208 /* chunks, and apic-id */
4213 4209 switch (cpi->cpi_vendor) {
4214 4210 /*
4215 4211 * first available on Pentium IV and Opteron (K8)
4216 4212 */
4217 4213 case X86_VENDOR_Intel:
4218 4214 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4219 4215 break;
4220 4216 case X86_VENDOR_AMD:
4221 4217 create = cpi->cpi_family >= 0xf;
4222 4218 break;
4223 4219 default:
4224 4220 create = 0;
4225 4221 break;
4226 4222 }
4227 4223 if (create) {
4228 4224 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4229 4225 "chunks", CPI_CHUNKS(cpi));
4230 4226 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4231 4227 "apic-id", cpi->cpi_apicid);
4232 4228 if (cpi->cpi_chipid >= 0) {
4233 4229 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4234 4230 "chip#", cpi->cpi_chipid);
4235 4231 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4236 4232 "clog#", cpi->cpi_clogid);
4237 4233 }
4238 4234 }
4239 4235
4240 4236 /* cpuid-features */
4241 4237 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4242 4238 "cpuid-features", CPI_FEATURES_EDX(cpi));
4243 4239
4244 4240
4245 4241 /* cpuid-features-ecx */
4246 4242 switch (cpi->cpi_vendor) {
4247 4243 case X86_VENDOR_Intel:
4248 4244 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4249 4245 break;
4250 4246 case X86_VENDOR_AMD:
4251 4247 create = cpi->cpi_family >= 0xf;
4252 4248 break;
4253 4249 default:
4254 4250 create = 0;
4255 4251 break;
4256 4252 }
4257 4253 if (create)
4258 4254 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4259 4255 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4260 4256
4261 4257 /* ext-cpuid-features */
4262 4258 switch (cpi->cpi_vendor) {
4263 4259 case X86_VENDOR_Intel:
4264 4260 case X86_VENDOR_AMD:
4265 4261 case X86_VENDOR_Cyrix:
4266 4262 case X86_VENDOR_TM:
4267 4263 case X86_VENDOR_Centaur:
4268 4264 create = cpi->cpi_xmaxeax >= 0x80000001;
4269 4265 break;
4270 4266 default:
4271 4267 create = 0;
4272 4268 break;
4273 4269 }
4274 4270 if (create) {
4275 4271 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4276 4272 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4277 4273 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4278 4274 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4279 4275 }
4280 4276
4281 4277 /*
4282 4278 * Brand String first appeared in Intel Pentium IV, AMD K5
4283 4279 * model 1, and Cyrix GXm. On earlier models we try and
4284 4280 * simulate something similar .. so this string should always
4285 4281 * same -something- about the processor, however lame.
4286 4282 */
4287 4283 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4288 4284 "brand-string", cpi->cpi_brandstr);
4289 4285
4290 4286 /*
4291 4287 * Finally, cache and tlb information
4292 4288 */
4293 4289 switch (x86_which_cacheinfo(cpi)) {
4294 4290 case X86_VENDOR_Intel:
4295 4291 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4296 4292 break;
4297 4293 case X86_VENDOR_Cyrix:
4298 4294 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4299 4295 break;
4300 4296 case X86_VENDOR_AMD:
4301 4297 amd_cache_info(cpi, cpu_devi);
4302 4298 break;
4303 4299 default:
4304 4300 break;
4305 4301 }
4306 4302 }
4307 4303
4308 4304 struct l2info {
4309 4305 int *l2i_csz;
4310 4306 int *l2i_lsz;
4311 4307 int *l2i_assoc;
4312 4308 int l2i_ret;
4313 4309 };
4314 4310
4315 4311 /*
4316 4312 * A cacheinfo walker that fetches the size, line-size and associativity
4317 4313 * of the L2 cache
4318 4314 */
4319 4315 static int
4320 4316 intel_l2cinfo(void *arg, const struct cachetab *ct)
4321 4317 {
4322 4318 struct l2info *l2i = arg;
4323 4319 int *ip;
4324 4320
4325 4321 if (ct->ct_label != l2_cache_str &&
4326 4322 ct->ct_label != sl2_cache_str)
4327 4323 return (0); /* not an L2 -- keep walking */
4328 4324
4329 4325 if ((ip = l2i->l2i_csz) != NULL)
4330 4326 *ip = ct->ct_size;
4331 4327 if ((ip = l2i->l2i_lsz) != NULL)
4332 4328 *ip = ct->ct_line_size;
4333 4329 if ((ip = l2i->l2i_assoc) != NULL)
4334 4330 *ip = ct->ct_assoc;
4335 4331 l2i->l2i_ret = ct->ct_size;
4336 4332 return (1); /* was an L2 -- terminate walk */
4337 4333 }
4338 4334
4339 4335 /*
4340 4336 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4341 4337 *
4342 4338 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4343 4339 * value is the associativity, the associativity for the L2 cache and
4344 4340 * tlb is encoded in the following table. The 4 bit L2 value serves as
4345 4341 * an index into the amd_afd[] array to determine the associativity.
4346 4342 * -1 is undefined. 0 is fully associative.
4347 4343 */
4348 4344
4349 4345 static int amd_afd[] =
4350 4346 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4351 4347
4352 4348 static void
4353 4349 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4354 4350 {
4355 4351 struct cpuid_regs *cp;
4356 4352 uint_t size, assoc;
4357 4353 int i;
4358 4354 int *ip;
4359 4355
4360 4356 if (cpi->cpi_xmaxeax < 0x80000006)
4361 4357 return;
4362 4358 cp = &cpi->cpi_extd[6];
4363 4359
4364 4360 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4365 4361 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4366 4362 uint_t cachesz = size * 1024;
4367 4363 assoc = amd_afd[i];
4368 4364
4369 4365 ASSERT(assoc != -1);
4370 4366
4371 4367 if ((ip = l2i->l2i_csz) != NULL)
4372 4368 *ip = cachesz;
4373 4369 if ((ip = l2i->l2i_lsz) != NULL)
4374 4370 *ip = BITX(cp->cp_ecx, 7, 0);
4375 4371 if ((ip = l2i->l2i_assoc) != NULL)
4376 4372 *ip = assoc;
4377 4373 l2i->l2i_ret = cachesz;
4378 4374 }
4379 4375 }
4380 4376
4381 4377 int
4382 4378 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4383 4379 {
4384 4380 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4385 4381 struct l2info __l2info, *l2i = &__l2info;
4386 4382
4387 4383 l2i->l2i_csz = csz;
4388 4384 l2i->l2i_lsz = lsz;
4389 4385 l2i->l2i_assoc = assoc;
4390 4386 l2i->l2i_ret = -1;
4391 4387
4392 4388 switch (x86_which_cacheinfo(cpi)) {
4393 4389 case X86_VENDOR_Intel:
4394 4390 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4395 4391 break;
4396 4392 case X86_VENDOR_Cyrix:
4397 4393 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4398 4394 break;
4399 4395 case X86_VENDOR_AMD:
4400 4396 amd_l2cacheinfo(cpi, l2i);
4401 4397 break;
4402 4398 default:
4403 4399 break;
4404 4400 }
4405 4401 return (l2i->l2i_ret);
4406 4402 }
4407 4403
4408 4404 #if !defined(__xpv)
4409 4405
4410 4406 uint32_t *
4411 4407 cpuid_mwait_alloc(cpu_t *cpu)
4412 4408 {
4413 4409 uint32_t *ret;
4414 4410 size_t mwait_size;
4415 4411
4416 4412 ASSERT(cpuid_checkpass(CPU, 2));
4417 4413
4418 4414 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4419 4415 if (mwait_size == 0)
4420 4416 return (NULL);
4421 4417
4422 4418 /*
4423 4419 * kmem_alloc() returns cache line size aligned data for mwait_size
4424 4420 * allocations. mwait_size is currently cache line sized. Neither
4425 4421 * of these implementation details are guarantied to be true in the
4426 4422 * future.
4427 4423 *
4428 4424 * First try allocating mwait_size as kmem_alloc() currently returns
4429 4425 * correctly aligned memory. If kmem_alloc() does not return
4430 4426 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4431 4427 *
4432 4428 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4433 4429 * decide to free this memory.
4434 4430 */
4435 4431 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4436 4432 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4437 4433 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4438 4434 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4439 4435 *ret = MWAIT_RUNNING;
4440 4436 return (ret);
4441 4437 } else {
4442 4438 kmem_free(ret, mwait_size);
4443 4439 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4444 4440 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4445 4441 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4446 4442 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4447 4443 *ret = MWAIT_RUNNING;
4448 4444 return (ret);
4449 4445 }
4450 4446 }
4451 4447
4452 4448 void
4453 4449 cpuid_mwait_free(cpu_t *cpu)
4454 4450 {
4455 4451 if (cpu->cpu_m.mcpu_cpi == NULL) {
4456 4452 return;
4457 4453 }
4458 4454
4459 4455 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4460 4456 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4461 4457 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4462 4458 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4463 4459 }
4464 4460
4465 4461 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4466 4462 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4467 4463 }
4468 4464
4469 4465 void
4470 4466 patch_tsc_read(int flag)
4471 4467 {
4472 4468 size_t cnt;
4473 4469
4474 4470 switch (flag) {
4475 4471 case X86_NO_TSC:
4476 4472 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4477 4473 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4478 4474 break;
4479 4475 case X86_HAVE_TSCP:
4480 4476 cnt = &_tscp_end - &_tscp_start;
4481 4477 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4482 4478 break;
4483 4479 case X86_TSC_MFENCE:
4484 4480 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4485 4481 (void) memcpy((void *)tsc_read,
4486 4482 (void *)&_tsc_mfence_start, cnt);
4487 4483 break;
4488 4484 case X86_TSC_LFENCE:
4489 4485 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4490 4486 (void) memcpy((void *)tsc_read,
4491 4487 (void *)&_tsc_lfence_start, cnt);
4492 4488 break;
4493 4489 default:
4494 4490 break;
4495 4491 }
4496 4492 }
4497 4493
4498 4494 int
4499 4495 cpuid_deep_cstates_supported(void)
4500 4496 {
4501 4497 struct cpuid_info *cpi;
4502 4498 struct cpuid_regs regs;
4503 4499
4504 4500 ASSERT(cpuid_checkpass(CPU, 1));
4505 4501
4506 4502 cpi = CPU->cpu_m.mcpu_cpi;
4507 4503
4508 4504 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4509 4505 return (0);
4510 4506
4511 4507 switch (cpi->cpi_vendor) {
4512 4508 case X86_VENDOR_Intel:
4513 4509 if (cpi->cpi_xmaxeax < 0x80000007)
4514 4510 return (0);
4515 4511
4516 4512 /*
4517 4513 * TSC run at a constant rate in all ACPI C-states?
4518 4514 */
4519 4515 regs.cp_eax = 0x80000007;
4520 4516 (void) __cpuid_insn(®s);
4521 4517 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4522 4518
4523 4519 default:
4524 4520 return (0);
4525 4521 }
4526 4522 }
4527 4523
4528 4524 #endif /* !__xpv */
4529 4525
4530 4526 void
4531 4527 post_startup_cpu_fixups(void)
4532 4528 {
4533 4529 #ifndef __xpv
4534 4530 /*
4535 4531 * Some AMD processors support C1E state. Entering this state will
4536 4532 * cause the local APIC timer to stop, which we can't deal with at
4537 4533 * this time.
4538 4534 */
4539 4535 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4540 4536 on_trap_data_t otd;
4541 4537 uint64_t reg;
4542 4538
4543 4539 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4544 4540 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4545 4541 /* Disable C1E state if it is enabled by BIOS */
4546 4542 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4547 4543 AMD_ACTONCMPHALT_MASK) {
4548 4544 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4549 4545 AMD_ACTONCMPHALT_SHIFT);
4550 4546 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4551 4547 }
4552 4548 }
4553 4549 no_trap();
4554 4550 }
4555 4551 #endif /* !__xpv */
4556 4552 }
4557 4553
4558 4554 /*
4559 4555 * Setup necessary registers to enable XSAVE feature on this processor.
4560 4556 * This function needs to be called early enough, so that no xsave/xrstor
4561 4557 * ops will execute on the processor before the MSRs are properly set up.
4562 4558 *
4563 4559 * Current implementation has the following assumption:
4564 4560 * - cpuid_pass1() is done, so that X86 features are known.
4565 4561 * - fpu_probe() is done, so that fp_save_mech is chosen.
4566 4562 */
4567 4563 void
4568 4564 xsave_setup_msr(cpu_t *cpu)
4569 4565 {
4570 4566 ASSERT(fp_save_mech == FP_XSAVE);
4571 4567 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4572 4568
4573 4569 /* Enable OSXSAVE in CR4. */
4574 4570 setcr4(getcr4() | CR4_OSXSAVE);
4575 4571 /*
4576 4572 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4577 4573 * correct value.
4578 4574 */
4579 4575 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4580 4576 setup_xfem();
4581 4577 }
4582 4578
4583 4579 /*
4584 4580 * Starting with the Westmere processor the local
4585 4581 * APIC timer will continue running in all C-states,
4586 4582 * including the deepest C-states.
4587 4583 */
4588 4584 int
4589 4585 cpuid_arat_supported(void)
4590 4586 {
4591 4587 struct cpuid_info *cpi;
4592 4588 struct cpuid_regs regs;
4593 4589
4594 4590 ASSERT(cpuid_checkpass(CPU, 1));
4595 4591 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4596 4592
4597 4593 cpi = CPU->cpu_m.mcpu_cpi;
4598 4594
4599 4595 switch (cpi->cpi_vendor) {
4600 4596 case X86_VENDOR_Intel:
4601 4597 /*
4602 4598 * Always-running Local APIC Timer is
4603 4599 * indicated by CPUID.6.EAX[2].
4604 4600 */
4605 4601 if (cpi->cpi_maxeax >= 6) {
4606 4602 regs.cp_eax = 6;
4607 4603 (void) cpuid_insn(NULL, ®s);
4608 4604 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4609 4605 } else {
4610 4606 return (0);
4611 4607 }
4612 4608 default:
4613 4609 return (0);
4614 4610 }
4615 4611 }
4616 4612
4617 4613 /*
4618 4614 * Check support for Intel ENERGY_PERF_BIAS feature
4619 4615 */
4620 4616 int
4621 4617 cpuid_iepb_supported(struct cpu *cp)
4622 4618 {
4623 4619 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4624 4620 struct cpuid_regs regs;
4625 4621
4626 4622 ASSERT(cpuid_checkpass(cp, 1));
4627 4623
4628 4624 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4629 4625 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4630 4626 return (0);
4631 4627 }
4632 4628
4633 4629 /*
4634 4630 * Intel ENERGY_PERF_BIAS MSR is indicated by
4635 4631 * capability bit CPUID.6.ECX.3
4636 4632 */
4637 4633 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4638 4634 return (0);
4639 4635
4640 4636 regs.cp_eax = 0x6;
4641 4637 (void) cpuid_insn(NULL, ®s);
4642 4638 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4643 4639 }
4644 4640
4645 4641 /*
4646 4642 * Check support for TSC deadline timer
4647 4643 *
4648 4644 * TSC deadline timer provides a superior software programming
4649 4645 * model over local APIC timer that eliminates "time drifts".
4650 4646 * Instead of specifying a relative time, software specifies an
4651 4647 * absolute time as the target at which the processor should
4652 4648 * generate a timer event.
4653 4649 */
4654 4650 int
4655 4651 cpuid_deadline_tsc_supported(void)
4656 4652 {
4657 4653 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4658 4654 struct cpuid_regs regs;
4659 4655
4660 4656 ASSERT(cpuid_checkpass(CPU, 1));
4661 4657 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4662 4658
4663 4659 switch (cpi->cpi_vendor) {
4664 4660 case X86_VENDOR_Intel:
4665 4661 if (cpi->cpi_maxeax >= 1) {
4666 4662 regs.cp_eax = 1;
4667 4663 (void) cpuid_insn(NULL, ®s);
4668 4664 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4669 4665 } else {
4670 4666 return (0);
4671 4667 }
4672 4668 default:
4673 4669 return (0);
4674 4670 }
4675 4671 }
4676 4672
4677 4673 #if defined(__amd64) && !defined(__xpv)
4678 4674 /*
4679 4675 * Patch in versions of bcopy for high performance Intel Nhm processors
4680 4676 * and later...
4681 4677 */
4682 4678 void
4683 4679 patch_memops(uint_t vendor)
4684 4680 {
4685 4681 size_t cnt, i;
4686 4682 caddr_t to, from;
4687 4683
4688 4684 if ((vendor == X86_VENDOR_Intel) &&
4689 4685 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4690 4686 cnt = &bcopy_patch_end - &bcopy_patch_start;
4691 4687 to = &bcopy_ck_size;
4692 4688 from = &bcopy_patch_start;
4693 4689 for (i = 0; i < cnt; i++) {
4694 4690 *to++ = *from++;
4695 4691 }
4696 4692 }
4697 4693 }
4698 4694 #endif /* __amd64 && !__xpv */
4699 4695
4700 4696 /*
4701 4697 * This function finds the number of bits to represent the number of cores per
4702 4698 * chip and the number of strands per core for the Intel platforms.
4703 4699 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4704 4700 */
4705 4701 void
4706 4702 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4707 4703 {
4708 4704 struct cpuid_regs regs;
4709 4705 struct cpuid_regs *cp = ®s;
4710 4706
4711 4707 if (vendor != X86_VENDOR_Intel) {
4712 4708 return;
4713 4709 }
4714 4710
4715 4711 /* if the cpuid level is 0xB, extended topo is available. */
4716 4712 cp->cp_eax = 0;
4717 4713 if (__cpuid_insn(cp) >= 0xB) {
4718 4714
4719 4715 cp->cp_eax = 0xB;
4720 4716 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4721 4717 (void) __cpuid_insn(cp);
4722 4718
4723 4719 /*
4724 4720 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4725 4721 * indicates that the extended topology enumeration leaf is
4726 4722 * available.
4727 4723 */
4728 4724 if (cp->cp_ebx) {
4729 4725 uint_t coreid_shift = 0;
4730 4726 uint_t chipid_shift = 0;
4731 4727 uint_t i;
4732 4728 uint_t level;
4733 4729
4734 4730 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4735 4731 cp->cp_eax = 0xB;
4736 4732 cp->cp_ecx = i;
4737 4733
4738 4734 (void) __cpuid_insn(cp);
4739 4735 level = CPI_CPU_LEVEL_TYPE(cp);
4740 4736
4741 4737 if (level == 1) {
4742 4738 /*
4743 4739 * Thread level processor topology
4744 4740 * Number of bits shift right APIC ID
4745 4741 * to get the coreid.
4746 4742 */
4747 4743 coreid_shift = BITX(cp->cp_eax, 4, 0);
4748 4744 } else if (level == 2) {
4749 4745 /*
4750 4746 * Core level processor topology
4751 4747 * Number of bits shift right APIC ID
4752 4748 * to get the chipid.
4753 4749 */
4754 4750 chipid_shift = BITX(cp->cp_eax, 4, 0);
4755 4751 }
4756 4752 }
4757 4753
4758 4754 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4759 4755 *strand_nbits = coreid_shift;
4760 4756 *core_nbits = chipid_shift - coreid_shift;
4761 4757 }
4762 4758 }
4763 4759 }
4764 4760 }
↓ open down ↓ |
3718 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX