]>
Commit | Line | Data |
---|---|---|
5bcae85e SL |
1 | //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file is based on LLVM's lib/Support/Host.cpp. | |
11 | // It implements the operating system Host concept and builtin | |
12 | // __cpu_model for the compiler_rt library, for x86 only. | |
13 | // | |
14 | //===----------------------------------------------------------------------===// | |
15 | ||
16 | #if (defined(__i386__) || defined(_M_IX86) || \ | |
17 | defined(__x86_64__) || defined(_M_X64)) && \ | |
18 | (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) | |
19 | ||
20 | #include <assert.h> | |
21 | ||
22 | #define bool int | |
23 | #define true 1 | |
24 | #define false 0 | |
25 | ||
26 | #ifdef _MSC_VER | |
27 | #include <intrin.h> | |
28 | #endif | |
29 | ||
2c00a5a8 XL |
30 | #ifndef __has_attribute |
31 | #define __has_attribute(attr) 0 | |
32 | #endif | |
33 | ||
5bcae85e SL |
34 | enum VendorSignatures { |
35 | SIG_INTEL = 0x756e6547 /* Genu */, | |
36 | SIG_AMD = 0x68747541 /* Auth */ | |
37 | }; | |
38 | ||
39 | enum ProcessorVendors { | |
40 | VENDOR_INTEL = 1, | |
41 | VENDOR_AMD, | |
42 | VENDOR_OTHER, | |
43 | VENDOR_MAX | |
44 | }; | |
45 | ||
46 | enum ProcessorTypes { | |
2c00a5a8 | 47 | INTEL_BONNELL = 1, |
5bcae85e SL |
48 | INTEL_CORE2, |
49 | INTEL_COREI7, | |
50 | AMDFAM10H, | |
51 | AMDFAM15H, | |
2c00a5a8 XL |
52 | INTEL_SILVERMONT, |
53 | INTEL_KNL, | |
54 | AMD_BTVER1, | |
55 | AMD_BTVER2, | |
56 | AMDFAM17H, | |
57 | INTEL_KNM, | |
5bcae85e SL |
58 | CPU_TYPE_MAX |
59 | }; | |
60 | ||
61 | enum ProcessorSubtypes { | |
62 | INTEL_COREI7_NEHALEM = 1, | |
63 | INTEL_COREI7_WESTMERE, | |
64 | INTEL_COREI7_SANDYBRIDGE, | |
65 | AMDFAM10H_BARCELONA, | |
66 | AMDFAM10H_SHANGHAI, | |
67 | AMDFAM10H_ISTANBUL, | |
68 | AMDFAM15H_BDVER1, | |
69 | AMDFAM15H_BDVER2, | |
2c00a5a8 XL |
70 | AMDFAM15H_BDVER3, |
71 | AMDFAM15H_BDVER4, | |
72 | AMDFAM17H_ZNVER1, | |
5bcae85e SL |
73 | INTEL_COREI7_IVYBRIDGE, |
74 | INTEL_COREI7_HASWELL, | |
75 | INTEL_COREI7_BROADWELL, | |
76 | INTEL_COREI7_SKYLAKE, | |
77 | INTEL_COREI7_SKYLAKE_AVX512, | |
2c00a5a8 | 78 | INTEL_COREI7_CANNONLAKE, |
5bcae85e SL |
79 | CPU_SUBTYPE_MAX |
80 | }; | |
81 | ||
82 | enum ProcessorFeatures { | |
83 | FEATURE_CMOV = 0, | |
84 | FEATURE_MMX, | |
85 | FEATURE_POPCNT, | |
86 | FEATURE_SSE, | |
87 | FEATURE_SSE2, | |
88 | FEATURE_SSE3, | |
89 | FEATURE_SSSE3, | |
90 | FEATURE_SSE4_1, | |
91 | FEATURE_SSE4_2, | |
92 | FEATURE_AVX, | |
93 | FEATURE_AVX2, | |
2c00a5a8 XL |
94 | FEATURE_SSE4_A, |
95 | FEATURE_FMA4, | |
96 | FEATURE_XOP, | |
97 | FEATURE_FMA, | |
98 | FEATURE_AVX512F, | |
99 | FEATURE_BMI, | |
100 | FEATURE_BMI2, | |
101 | FEATURE_AES, | |
102 | FEATURE_PCLMUL, | |
103 | FEATURE_AVX512VL, | |
104 | FEATURE_AVX512BW, | |
105 | FEATURE_AVX512DQ, | |
106 | FEATURE_AVX512CD, | |
107 | FEATURE_AVX512ER, | |
108 | FEATURE_AVX512PF, | |
109 | FEATURE_AVX512VBMI, | |
110 | FEATURE_AVX512IFMA, | |
111 | FEATURE_AVX5124VNNIW, | |
112 | FEATURE_AVX5124FMAPS, | |
113 | FEATURE_AVX512VPOPCNTDQ | |
5bcae85e SL |
114 | }; |
115 | ||
116 | // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). | |
117 | // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID | |
118 | // support. Consequently, for i386, the presence of CPUID is checked first | |
119 | // via the corresponding eflags bit. | |
120 | static bool isCpuIdSupported() { | |
121 | #if defined(__GNUC__) || defined(__clang__) | |
122 | #if defined(__i386__) | |
123 | int __cpuid_supported; | |
124 | __asm__(" pushfl\n" | |
125 | " popl %%eax\n" | |
126 | " movl %%eax,%%ecx\n" | |
127 | " xorl $0x00200000,%%eax\n" | |
128 | " pushl %%eax\n" | |
129 | " popfl\n" | |
130 | " pushfl\n" | |
131 | " popl %%eax\n" | |
132 | " movl $0,%0\n" | |
133 | " cmpl %%eax,%%ecx\n" | |
134 | " je 1f\n" | |
135 | " movl $1,%0\n" | |
136 | "1:" | |
137 | : "=r"(__cpuid_supported) | |
138 | : | |
139 | : "eax", "ecx"); | |
140 | if (!__cpuid_supported) | |
141 | return false; | |
142 | #endif | |
143 | return true; | |
144 | #endif | |
145 | return true; | |
146 | } | |
147 | ||
148 | // This code is copied from lib/Support/Host.cpp. | |
149 | // Changes to either file should be mirrored in the other. | |
150 | ||
151 | /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in | |
152 | /// the specified arguments. If we can't run cpuid on the host, return true. | |
2c00a5a8 | 153 | static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, |
5bcae85e SL |
154 | unsigned *rECX, unsigned *rEDX) { |
155 | #if defined(__GNUC__) || defined(__clang__) | |
156 | #if defined(__x86_64__) | |
2c00a5a8 XL |
157 | // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. |
158 | // FIXME: should we save this for Clang? | |
5bcae85e SL |
159 | __asm__("movq\t%%rbx, %%rsi\n\t" |
160 | "cpuid\n\t" | |
161 | "xchgq\t%%rbx, %%rsi\n\t" | |
162 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |
163 | : "a"(value)); | |
2c00a5a8 | 164 | return false; |
5bcae85e SL |
165 | #elif defined(__i386__) |
166 | __asm__("movl\t%%ebx, %%esi\n\t" | |
167 | "cpuid\n\t" | |
168 | "xchgl\t%%ebx, %%esi\n\t" | |
169 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |
170 | : "a"(value)); | |
2c00a5a8 | 171 | return false; |
5bcae85e | 172 | #else |
2c00a5a8 | 173 | return true; |
5bcae85e SL |
174 | #endif |
175 | #elif defined(_MSC_VER) | |
176 | // The MSVC intrinsic is portable across x86 and x64. | |
177 | int registers[4]; | |
178 | __cpuid(registers, value); | |
179 | *rEAX = registers[0]; | |
180 | *rEBX = registers[1]; | |
181 | *rECX = registers[2]; | |
182 | *rEDX = registers[3]; | |
2c00a5a8 | 183 | return false; |
5bcae85e | 184 | #else |
2c00a5a8 | 185 | return true; |
5bcae85e SL |
186 | #endif |
187 | } | |
188 | ||
189 | /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return | |
190 | /// the 4 values in the specified arguments. If we can't run cpuid on the host, | |
191 | /// return true. | |
2c00a5a8 | 192 | static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, |
5bcae85e SL |
193 | unsigned *rEAX, unsigned *rEBX, unsigned *rECX, |
194 | unsigned *rEDX) { | |
5bcae85e | 195 | #if defined(__GNUC__) || defined(__clang__) |
2c00a5a8 | 196 | #if defined(__x86_64__) |
5bcae85e SL |
197 | // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. |
198 | // FIXME: should we save this for Clang? | |
199 | __asm__("movq\t%%rbx, %%rsi\n\t" | |
200 | "cpuid\n\t" | |
201 | "xchgq\t%%rbx, %%rsi\n\t" | |
202 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |
203 | : "a"(value), "c"(subleaf)); | |
2c00a5a8 XL |
204 | return false; |
205 | #elif defined(__i386__) | |
5bcae85e SL |
206 | __asm__("movl\t%%ebx, %%esi\n\t" |
207 | "cpuid\n\t" | |
208 | "xchgl\t%%ebx, %%esi\n\t" | |
209 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |
210 | : "a"(value), "c"(subleaf)); | |
2c00a5a8 | 211 | return false; |
5bcae85e | 212 | #else |
2c00a5a8 | 213 | return true; |
5bcae85e | 214 | #endif |
2c00a5a8 XL |
215 | #elif defined(_MSC_VER) |
216 | int registers[4]; | |
217 | __cpuidex(registers, value, subleaf); | |
218 | *rEAX = registers[0]; | |
219 | *rEBX = registers[1]; | |
220 | *rECX = registers[2]; | |
221 | *rEDX = registers[3]; | |
222 | return false; | |
5bcae85e | 223 | #else |
2c00a5a8 | 224 | return true; |
5bcae85e SL |
225 | #endif |
226 | } | |
227 | ||
228 | // Read control register 0 (XCR0). Used to detect features such as AVX. | |
229 | static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { | |
230 | #if defined(__GNUC__) || defined(__clang__) | |
231 | // Check xgetbv; this uses a .byte sequence instead of the instruction | |
232 | // directly because older assemblers do not include support for xgetbv and | |
233 | // there is no easy way to conditionally compile based on the assembler used. | |
234 | __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); | |
235 | return false; | |
236 | #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) | |
237 | unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); | |
238 | *rEAX = Result; | |
239 | *rEDX = Result >> 32; | |
240 | return false; | |
241 | #else | |
242 | return true; | |
243 | #endif | |
244 | } | |
245 | ||
246 | static void detectX86FamilyModel(unsigned EAX, unsigned *Family, | |
247 | unsigned *Model) { | |
248 | *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 | |
249 | *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 | |
250 | if (*Family == 6 || *Family == 0xf) { | |
251 | if (*Family == 0xf) | |
252 | // Examine extended family ID if family ID is F. | |
253 | *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 | |
254 | // Examine extended model ID if family ID is 6 or F. | |
255 | *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 | |
256 | } | |
257 | } | |
258 | ||
2c00a5a8 XL |
259 | static void |
260 | getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, | |
261 | unsigned Brand_id, unsigned Features, | |
262 | unsigned *Type, unsigned *Subtype) { | |
5bcae85e SL |
263 | if (Brand_id != 0) |
264 | return; | |
265 | switch (Family) { | |
5bcae85e SL |
266 | case 6: |
267 | switch (Model) { | |
5bcae85e SL |
268 | case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile |
269 | // processor, Intel Core 2 Quad processor, Intel Core 2 Quad | |
270 | // mobile processor, Intel Core 2 Extreme processor, Intel | |
271 | // Pentium Dual-Core processor, Intel Xeon processor, model | |
272 | // 0Fh. All processors are manufactured using the 65 nm process. | |
273 | case 0x16: // Intel Celeron processor model 16h. All processors are | |
274 | // manufactured using the 65 nm process | |
5bcae85e SL |
275 | case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model |
276 | // 17h. All processors are manufactured using the 45 nm process. | |
277 | // | |
278 | // 45nm: Penryn , Wolfdale, Yorkfield (XE) | |
279 | case 0x1d: // Intel Xeon processor MP. All processors are manufactured using | |
280 | // the 45 nm process. | |
281 | *Type = INTEL_CORE2; // "penryn" | |
5bcae85e SL |
282 | break; |
283 | case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All | |
284 | // processors are manufactured using the 45 nm process. | |
285 | case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. | |
286 | // As found in a Summer 2010 model iMac. | |
287 | case 0x1f: | |
2c00a5a8 | 288 | case 0x2e: // Nehalem EX |
5bcae85e SL |
289 | *Type = INTEL_COREI7; // "nehalem" |
290 | *Subtype = INTEL_COREI7_NEHALEM; | |
291 | break; | |
292 | case 0x25: // Intel Core i7, laptop version. | |
293 | case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All | |
294 | // processors are manufactured using the 32 nm process. | |
295 | case 0x2f: // Westmere EX | |
296 | *Type = INTEL_COREI7; // "westmere" | |
297 | *Subtype = INTEL_COREI7_WESTMERE; | |
298 | break; | |
299 | case 0x2a: // Intel Core i7 processor. All processors are manufactured | |
300 | // using the 32 nm process. | |
301 | case 0x2d: | |
302 | *Type = INTEL_COREI7; //"sandybridge" | |
303 | *Subtype = INTEL_COREI7_SANDYBRIDGE; | |
304 | break; | |
305 | case 0x3a: | |
2c00a5a8 | 306 | case 0x3e: // Ivy Bridge EP |
5bcae85e SL |
307 | *Type = INTEL_COREI7; // "ivybridge" |
308 | *Subtype = INTEL_COREI7_IVYBRIDGE; | |
309 | break; | |
310 | ||
311 | // Haswell: | |
312 | case 0x3c: | |
313 | case 0x3f: | |
314 | case 0x45: | |
315 | case 0x46: | |
316 | *Type = INTEL_COREI7; // "haswell" | |
317 | *Subtype = INTEL_COREI7_HASWELL; | |
318 | break; | |
319 | ||
320 | // Broadwell: | |
321 | case 0x3d: | |
322 | case 0x47: | |
323 | case 0x4f: | |
324 | case 0x56: | |
325 | *Type = INTEL_COREI7; // "broadwell" | |
326 | *Subtype = INTEL_COREI7_BROADWELL; | |
327 | break; | |
328 | ||
329 | // Skylake: | |
2c00a5a8 XL |
330 | case 0x4e: // Skylake mobile |
331 | case 0x5e: // Skylake desktop | |
332 | case 0x8e: // Kaby Lake mobile | |
333 | case 0x9e: // Kaby Lake desktop | |
5bcae85e SL |
334 | *Type = INTEL_COREI7; // "skylake" |
335 | *Subtype = INTEL_COREI7_SKYLAKE; | |
336 | break; | |
337 | ||
2c00a5a8 XL |
338 | // Skylake Xeon: |
339 | case 0x55: | |
340 | *Type = INTEL_COREI7; | |
341 | *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" | |
342 | break; | |
343 | ||
344 | // Cannonlake: | |
345 | case 0x66: | |
346 | *Type = INTEL_COREI7; | |
347 | *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake" | |
348 | break; | |
349 | ||
5bcae85e SL |
350 | case 0x1c: // Most 45 nm Intel Atom processors |
351 | case 0x26: // 45 nm Atom Lincroft | |
352 | case 0x27: // 32 nm Atom Medfield | |
353 | case 0x35: // 32 nm Atom Midview | |
354 | case 0x36: // 32 nm Atom Midview | |
2c00a5a8 | 355 | *Type = INTEL_BONNELL; |
5bcae85e SL |
356 | break; // "bonnell" |
357 | ||
358 | // Atom Silvermont codes from the Intel software optimization guide. | |
359 | case 0x37: | |
360 | case 0x4a: | |
361 | case 0x4d: | |
362 | case 0x5a: | |
363 | case 0x5d: | |
364 | case 0x4c: // really airmont | |
2c00a5a8 | 365 | *Type = INTEL_SILVERMONT; |
5bcae85e SL |
366 | break; // "silvermont" |
367 | ||
368 | case 0x57: | |
2c00a5a8 | 369 | *Type = INTEL_KNL; // knl |
5bcae85e SL |
370 | break; |
371 | ||
2c00a5a8 XL |
372 | case 0x85: |
373 | *Type = INTEL_KNM; // knm | |
5bcae85e SL |
374 | break; |
375 | ||
2c00a5a8 | 376 | default: // Unknown family 6 CPU. |
5bcae85e | 377 | break; |
2c00a5a8 | 378 | break; |
5bcae85e | 379 | } |
5bcae85e | 380 | default: |
2c00a5a8 | 381 | break; // Unknown. |
5bcae85e SL |
382 | } |
383 | } | |
384 | ||
2c00a5a8 XL |
385 | static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, |
386 | unsigned Features, unsigned *Type, | |
5bcae85e SL |
387 | unsigned *Subtype) { |
388 | // FIXME: this poorly matches the generated SubtargetFeatureKV table. There | |
389 | // appears to be no way to generate the wide variety of AMD-specific targets | |
390 | // from the information returned from CPUID. | |
391 | switch (Family) { | |
5bcae85e SL |
392 | case 16: |
393 | *Type = AMDFAM10H; // "amdfam10" | |
394 | switch (Model) { | |
395 | case 2: | |
396 | *Subtype = AMDFAM10H_BARCELONA; | |
397 | break; | |
398 | case 4: | |
399 | *Subtype = AMDFAM10H_SHANGHAI; | |
400 | break; | |
401 | case 8: | |
402 | *Subtype = AMDFAM10H_ISTANBUL; | |
403 | break; | |
5bcae85e | 404 | } |
2c00a5a8 | 405 | break; |
5bcae85e | 406 | case 20: |
2c00a5a8 | 407 | *Type = AMD_BTVER1; |
5bcae85e SL |
408 | break; // "btver1"; |
409 | case 21: | |
410 | *Type = AMDFAM15H; | |
2c00a5a8 | 411 | if (Model >= 0x60 && Model <= 0x7f) { |
5bcae85e | 412 | *Subtype = AMDFAM15H_BDVER4; |
2c00a5a8 | 413 | break; // "bdver4"; 60h-7Fh: Excavator |
5bcae85e SL |
414 | } |
415 | if (Model >= 0x30 && Model <= 0x3f) { | |
416 | *Subtype = AMDFAM15H_BDVER3; | |
417 | break; // "bdver3"; 30h-3Fh: Steamroller | |
418 | } | |
419 | if (Model >= 0x10 && Model <= 0x1f) { | |
420 | *Subtype = AMDFAM15H_BDVER2; | |
421 | break; // "bdver2"; 10h-1Fh: Piledriver | |
422 | } | |
423 | if (Model <= 0x0f) { | |
424 | *Subtype = AMDFAM15H_BDVER1; | |
425 | break; // "bdver1"; 00h-0Fh: Bulldozer | |
426 | } | |
427 | break; | |
428 | case 22: | |
2c00a5a8 | 429 | *Type = AMD_BTVER2; |
5bcae85e | 430 | break; // "btver2" |
2c00a5a8 XL |
431 | case 23: |
432 | *Type = AMDFAM17H; | |
433 | *Subtype = AMDFAM17H_ZNVER1; | |
434 | break; | |
5bcae85e SL |
435 | default: |
436 | break; // "generic" | |
437 | } | |
438 | } | |
439 | ||
2c00a5a8 XL |
440 | static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, |
441 | unsigned *FeaturesOut) { | |
5bcae85e | 442 | unsigned Features = 0; |
2c00a5a8 XL |
443 | unsigned EAX, EBX; |
444 | ||
445 | if ((EDX >> 15) & 1) | |
446 | Features |= 1 << FEATURE_CMOV; | |
447 | if ((EDX >> 23) & 1) | |
448 | Features |= 1 << FEATURE_MMX; | |
449 | if ((EDX >> 25) & 1) | |
450 | Features |= 1 << FEATURE_SSE; | |
451 | if ((EDX >> 26) & 1) | |
452 | Features |= 1 << FEATURE_SSE2; | |
453 | ||
454 | if ((ECX >> 0) & 1) | |
455 | Features |= 1 << FEATURE_SSE3; | |
456 | if ((ECX >> 1) & 1) | |
457 | Features |= 1 << FEATURE_PCLMUL; | |
458 | if ((ECX >> 9) & 1) | |
459 | Features |= 1 << FEATURE_SSSE3; | |
460 | if ((ECX >> 12) & 1) | |
461 | Features |= 1 << FEATURE_FMA; | |
462 | if ((ECX >> 19) & 1) | |
463 | Features |= 1 << FEATURE_SSE4_1; | |
464 | if ((ECX >> 20) & 1) | |
465 | Features |= 1 << FEATURE_SSE4_2; | |
466 | if ((ECX >> 23) & 1) | |
467 | Features |= 1 << FEATURE_POPCNT; | |
468 | if ((ECX >> 25) & 1) | |
469 | Features |= 1 << FEATURE_AES; | |
5bcae85e SL |
470 | |
471 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV | |
472 | // indicates that the AVX registers will be saved and restored on context | |
473 | // switch, then we have full AVX support. | |
474 | const unsigned AVXBits = (1 << 27) | (1 << 28); | |
475 | bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && | |
476 | ((EAX & 0x6) == 0x6); | |
477 | bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); | |
2c00a5a8 XL |
478 | |
479 | if (HasAVX) | |
480 | Features |= 1 << FEATURE_AVX; | |
481 | ||
482 | bool HasLeaf7 = | |
483 | MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); | |
484 | ||
485 | if (HasLeaf7 && ((EBX >> 3) & 1)) | |
486 | Features |= 1 << FEATURE_BMI; | |
487 | if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) | |
488 | Features |= 1 << FEATURE_AVX2; | |
489 | if (HasLeaf7 && ((EBX >> 9) & 1)) | |
490 | Features |= 1 << FEATURE_BMI2; | |
491 | if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) | |
492 | Features |= 1 << FEATURE_AVX512F; | |
493 | if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) | |
494 | Features |= 1 << FEATURE_AVX512DQ; | |
495 | if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) | |
496 | Features |= 1 << FEATURE_AVX512IFMA; | |
497 | if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) | |
498 | Features |= 1 << FEATURE_AVX512PF; | |
499 | if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) | |
500 | Features |= 1 << FEATURE_AVX512ER; | |
501 | if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) | |
502 | Features |= 1 << FEATURE_AVX512CD; | |
503 | if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) | |
504 | Features |= 1 << FEATURE_AVX512BW; | |
505 | if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) | |
506 | Features |= 1 << FEATURE_AVX512VL; | |
507 | ||
508 | if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) | |
509 | Features |= 1 << FEATURE_AVX512VBMI; | |
510 | if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) | |
511 | Features |= 1 << FEATURE_AVX512VPOPCNTDQ; | |
512 | ||
513 | if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) | |
514 | Features |= 1 << FEATURE_AVX5124VNNIW; | |
515 | if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) | |
516 | Features |= 1 << FEATURE_AVX5124FMAPS; | |
517 | ||
518 | unsigned MaxExtLevel; | |
519 | getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); | |
520 | ||
521 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && | |
522 | !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); | |
523 | if (HasExtLeaf1 && ((ECX >> 6) & 1)) | |
524 | Features |= 1 << FEATURE_SSE4_A; | |
525 | if (HasExtLeaf1 && ((ECX >> 11) & 1)) | |
526 | Features |= 1 << FEATURE_XOP; | |
527 | if (HasExtLeaf1 && ((ECX >> 16) & 1)) | |
528 | Features |= 1 << FEATURE_FMA4; | |
529 | ||
530 | *FeaturesOut = Features; | |
5bcae85e SL |
531 | } |
532 | ||
2c00a5a8 XL |
533 | #if defined(HAVE_INIT_PRIORITY) |
534 | #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) | |
535 | #elif __has_attribute(__constructor__) | |
536 | #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) | |
5bcae85e | 537 | #else |
2c00a5a8 XL |
538 | // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that |
539 | // this runs during initialization. | |
540 | #define CONSTRUCTOR_ATTRIBUTE | |
5bcae85e SL |
541 | #endif |
542 | ||
2c00a5a8 | 543 | int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; |
5bcae85e SL |
544 | |
545 | struct __processor_model { | |
546 | unsigned int __cpu_vendor; | |
547 | unsigned int __cpu_type; | |
548 | unsigned int __cpu_subtype; | |
549 | unsigned int __cpu_features[1]; | |
550 | } __cpu_model = {0, 0, 0, {0}}; | |
551 | ||
552 | /* A constructor function that is sets __cpu_model and __cpu_features with | |
553 | the right values. This needs to run only once. This constructor is | |
554 | given the highest priority and it should run before constructors without | |
555 | the priority set. However, it still runs after ifunc initializers and | |
556 | needs to be called explicitly there. */ | |
557 | ||
2c00a5a8 | 558 | int CONSTRUCTOR_ATTRIBUTE |
5bcae85e | 559 | __cpu_indicator_init(void) { |
2c00a5a8 XL |
560 | unsigned EAX, EBX, ECX, EDX; |
561 | unsigned MaxLeaf = 5; | |
562 | unsigned Vendor; | |
563 | unsigned Model, Family, Brand_id; | |
564 | unsigned Features = 0; | |
5bcae85e SL |
565 | |
566 | /* This function needs to run just once. */ | |
567 | if (__cpu_model.__cpu_vendor) | |
568 | return 0; | |
569 | ||
570 | if (!isCpuIdSupported()) | |
571 | return -1; | |
572 | ||
573 | /* Assume cpuid insn present. Run in level 0 to get vendor id. */ | |
2c00a5a8 | 574 | if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { |
5bcae85e SL |
575 | __cpu_model.__cpu_vendor = VENDOR_OTHER; |
576 | return -1; | |
577 | } | |
578 | getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); | |
579 | detectX86FamilyModel(EAX, &Family, &Model); | |
580 | Brand_id = EBX & 0xff; | |
581 | ||
582 | /* Find available features. */ | |
2c00a5a8 | 583 | getAvailableFeatures(ECX, EDX, MaxLeaf, &Features); |
5bcae85e SL |
584 | __cpu_model.__cpu_features[0] = Features; |
585 | ||
586 | if (Vendor == SIG_INTEL) { | |
587 | /* Get CPU type. */ | |
588 | getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, | |
589 | &(__cpu_model.__cpu_type), | |
590 | &(__cpu_model.__cpu_subtype)); | |
591 | __cpu_model.__cpu_vendor = VENDOR_INTEL; | |
592 | } else if (Vendor == SIG_AMD) { | |
593 | /* Get CPU type. */ | |
594 | getAMDProcessorTypeAndSubtype(Family, Model, Features, | |
595 | &(__cpu_model.__cpu_type), | |
596 | &(__cpu_model.__cpu_subtype)); | |
597 | __cpu_model.__cpu_vendor = VENDOR_AMD; | |
598 | } else | |
599 | __cpu_model.__cpu_vendor = VENDOR_OTHER; | |
600 | ||
601 | assert(__cpu_model.__cpu_vendor < VENDOR_MAX); | |
602 | assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); | |
603 | assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); | |
604 | ||
605 | return 0; | |
606 | } | |
607 | ||
608 | #endif |