1 //! x86 run-time feature detection is OS independent.
3 #[cfg(target_arch = "x86")]
4 use core
::arch
::x86
::*;
5 #[cfg(target_arch = "x86_64")]
6 use core
::arch
::x86_64
::*;
10 use crate::detect
::{bit, cache, Feature}
;
12 /// Run-time feature detection on x86 works by using the CPUID instruction.
14 /// The [CPUID Wikipedia page][wiki_cpuid] contains
15 /// all the information about which flags to set to query which values, and in
16 /// which registers these are reported.
18 /// The definitive references are:
19 /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
20 /// Instruction Set Reference, A-Z][intel64_ref].
21 /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
22 /// System Instructions][amd64_ref].
24 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
25 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
26 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
27 #[allow(clippy::similar_names)]
28 pub(crate) fn detect_features() -> cache
::Initializer
{
29 let mut value
= cache
::Initializer
::default();
31 // If the x86 CPU does not support the CPUID instruction then it is too
32 // old to support any of the currently-detectable features.
37 // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
38 // has `cpuid` support.
40 // 0. EAX = 0: Basic Information:
41 // - EAX returns the "Highest Function Parameter", that is, the maximum
42 // leaf value for subsequent calls of `cpuinfo` in range [0,
43 // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
44 // returned in EBX, EDX, and ECX (in that order):
45 let (max_basic_leaf
, vendor_id
) = unsafe {
52 let vendor_id
: [[u8; 4]; 3] = [
57 let vendor_id
: [u8; 12] = mem
::transmute(vendor_id
);
58 (max_basic_leaf
, vendor_id
)
61 if max_basic_leaf
< 1 {
62 // Earlier Intel 486, CPUID not implemented
66 // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
67 // Contains information about most x86 features.
72 } = unsafe { __cpuid(0x0000_0001_u32) }
;
74 // EAX = 7, ECX = 0: Queries "Extended Features";
75 // Contains information about bmi,bmi2, and avx2 support.
76 let (extended_features_ebx
, extended_features_ecx
) = if max_basic_leaf
>= 7 {
77 let CpuidResult { ebx, ecx, .. }
= unsafe { __cpuid(0x0000_0007_u32) }
;
80 (0, 0) // CPUID does not support "Extended Features"
83 // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
84 // - EAX returns the max leaf value for extended information, that is,
85 // `cpuid` calls in range [0x8000_0000; u32::MAX]:
87 eax
: extended_max_basic_leaf
,
89 } = unsafe { __cpuid(0x8000_0000_u32) }
;
91 // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
93 let extended_proc_info_ecx
= if extended_max_basic_leaf
>= 1 {
94 let CpuidResult { ecx, .. }
= unsafe { __cpuid(0x8000_0001_u32) }
;
101 // borrows value till the end of this scope:
102 let mut enable
= |r
, rb
, f
| {
103 if bit
::test(r
as usize, rb
) {
108 enable(proc_info_ecx
, 0, Feature
::sse3
);
109 enable(proc_info_ecx
, 1, Feature
::pclmulqdq
);
110 enable(proc_info_ecx
, 9, Feature
::ssse3
);
111 enable(proc_info_ecx
, 13, Feature
::cmpxchg16b
);
112 enable(proc_info_ecx
, 19, Feature
::sse4_1
);
113 enable(proc_info_ecx
, 20, Feature
::sse4_2
);
114 enable(proc_info_ecx
, 23, Feature
::popcnt
);
115 enable(proc_info_ecx
, 25, Feature
::aes
);
116 enable(proc_info_ecx
, 29, Feature
::f16c
);
117 enable(proc_info_ecx
, 30, Feature
::rdrand
);
118 enable(extended_features_ebx
, 18, Feature
::rdseed
);
119 enable(extended_features_ebx
, 19, Feature
::adx
);
120 enable(extended_features_ebx
, 11, Feature
::rtm
);
121 enable(proc_info_edx
, 4, Feature
::tsc
);
122 enable(proc_info_edx
, 23, Feature
::mmx
);
123 enable(proc_info_edx
, 24, Feature
::fxsr
);
124 enable(proc_info_edx
, 25, Feature
::sse
);
125 enable(proc_info_edx
, 26, Feature
::sse2
);
126 enable(extended_features_ebx
, 29, Feature
::sha
);
128 enable(extended_features_ebx
, 3, Feature
::bmi1
);
129 enable(extended_features_ebx
, 8, Feature
::bmi2
);
131 // `XSAVE` and `AVX` support:
132 let cpu_xsave
= bit
::test(proc_info_ecx
as usize, 26);
134 // 0. Here the CPU supports `XSAVE`.
136 // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
137 // supports saving the state of the AVX/AVX2 vector registers on
138 // context-switches, see:
140 // - [intel: is avx enabled?][is_avx_enabled],
141 // - [mozilla: sse.cpp][mozilla_sse_cpp].
143 // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
144 // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
145 let cpu_osxsave
= bit
::test(proc_info_ecx
as usize, 27);
148 // 2. The OS must have signaled the CPU that it supports saving and
151 // * SSE -> `XCR0.SSE[1]`
152 // * AVX -> `XCR0.AVX[2]`
153 // * AVX-512 -> `XCR0.AVX-512[7:5]`.
155 // by setting the corresponding bits of `XCR0` to `1`.
157 // This is safe because the CPU supports `xsave`
158 // and the OS has set `osxsave`.
159 let xcr0
= unsafe { _xgetbv(0) }
;
160 // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
161 let os_avx_support
= xcr0
& 6 == 6;
162 // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
163 let os_avx512_support
= xcr0
& 224 == 224;
165 // Only if the OS and the CPU support saving/restoring the AVX
166 // registers we enable `xsave` support:
168 // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
169 // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
170 // Developer’s Manual, Volume 1: Basic Architecture":
172 // "Software enables the XSAVE feature set by setting
173 // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
174 // instruction). If this bit is 0, execution of any of XGETBV,
175 // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
176 // causes an invalid-opcode exception (#UD)"
178 enable(proc_info_ecx
, 26, Feature
::xsave
);
180 // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
181 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
183 if max_basic_leaf
>= 0xd {
185 eax
: proc_extended_state1_eax
,
187 } = unsafe { __cpuid_count(0xd_u32, 1) }
;
188 enable(proc_extended_state1_eax
, 0, Feature
::xsaveopt
);
189 enable(proc_extended_state1_eax
, 1, Feature
::xsavec
);
190 enable(proc_extended_state1_eax
, 3, Feature
::xsaves
);
193 // FMA (uses 256-bit wide registers):
194 enable(proc_info_ecx
, 12, Feature
::fma
);
197 enable(proc_info_ecx
, 28, Feature
::avx
);
198 enable(extended_features_ebx
, 5, Feature
::avx2
);
200 // For AVX-512 the OS also needs to support saving/restoring
201 // the extended state, only then we enable AVX-512 support:
202 if os_avx512_support
{
203 enable(extended_features_ebx
, 16, Feature
::avx512f
);
204 enable(extended_features_ebx
, 17, Feature
::avx512dq
);
205 enable(extended_features_ebx
, 21, Feature
::avx512ifma
);
206 enable(extended_features_ebx
, 26, Feature
::avx512pf
);
207 enable(extended_features_ebx
, 27, Feature
::avx512er
);
208 enable(extended_features_ebx
, 28, Feature
::avx512cd
);
209 enable(extended_features_ebx
, 30, Feature
::avx512bw
);
210 enable(extended_features_ebx
, 31, Feature
::avx512vl
);
211 enable(extended_features_ecx
, 1, Feature
::avx512vbmi
);
212 enable(extended_features_ecx
, 5, Feature
::avx512bf16
);
213 enable(extended_features_ecx
, 6, Feature
::avx512vbmi2
);
214 enable(extended_features_ecx
, 8, Feature
::avx512gfni
);
215 enable(extended_features_ecx
, 8, Feature
::avx512vp2intersect
);
216 enable(extended_features_ecx
, 9, Feature
::avx512vaes
);
217 enable(extended_features_ecx
, 10, Feature
::avx512vpclmulqdq
);
218 enable(extended_features_ecx
, 11, Feature
::avx512vnni
);
219 enable(extended_features_ecx
, 12, Feature
::avx512bitalg
);
220 enable(extended_features_ecx
, 14, Feature
::avx512vpopcntdq
);
226 // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
227 // On intel CPUs with popcnt, lzcnt implements the
228 // "missing part" of ABM, so we map both to the same
231 // The `is_x86_feature_detected!("lzcnt")` macro then
232 // internally maps to Feature::abm.
233 enable(extended_proc_info_ecx
, 5, Feature
::lzcnt
);
235 // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
236 // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
237 // number(Family 18h).
239 // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
242 // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
243 // Related Hygon kernel patch can be found on
244 // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
245 if vendor_id
== *b
"AuthenticAMD" || vendor_id
== *b
"HygonGenuine" {
246 // These features are available on AMD arch CPUs:
247 enable(extended_proc_info_ecx
, 6, Feature
::sse4a
);
248 enable(extended_proc_info_ecx
, 21, Feature
::tbm
);
252 // Unfortunately, some Skylake chips erroneously report support for BMI1 and
253 // BMI2 without actual support. These chips don't support AVX, and it seems
254 // that all Intel chips with non-erroneous support BMI do (I didn't check
255 // other vendors), so we can disable these flags for chips that don't also
256 // report support for AVX.
258 // It's possible this will pessimize future chips that do support BMI and
259 // not AVX, but this seems minor compared to a hard crash you get when
260 // executing an unsupported instruction (to put it another way, it's safe
261 // for us to under-report CPU features, but not to over-report them). Still,
262 // to limit any impact this may have in the future, we only do this for
263 // Intel chips, as it's a bug only present in their chips.
265 // This bug is documented as `SKL052` in the errata section of this document:
266 // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf
267 if vendor_id
== *b
"GenuineIntel" && !value
.test(Feature
::avx
as u32) {
268 value
.unset(Feature
::bmi1
as u32);
269 value
.unset(Feature
::bmi2
as u32);