1 //! x86 run-time feature detection is OS independent.
3 #[cfg(target_arch = "x86")]
4 use crate::arch
::x86
::*;
5 #[cfg(target_arch = "x86_64")]
6 use crate::arch
::x86_64
::*;
10 use crate::detect
::{Feature, cache, bit}
;
12 /// Performs run-time feature detection.
14 pub fn check_for(x
: Feature
) -> bool
{
15 cache
::test(x
as u32, detect_features
)
18 /// Run-time feature detection on x86 works by using the CPUID instruction.
20 /// The [CPUID Wikipedia page][wiki_cpuid] contains
21 /// all the information about which flags to set to query which values, and in
22 /// which registers these are reported.
24 /// The definitive references are:
25 /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
26 /// Instruction Set Reference, A-Z][intel64_ref].
27 /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
28 /// System Instructions][amd64_ref].
30 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
31 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
32 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
33 #[allow(clippy::similar_names)]
34 fn detect_features() -> cache
::Initializer
{
35 let mut value
= cache
::Initializer
::default();
37 // If the x86 CPU does not support the CPUID instruction then it is too
38 // old to support any of the currently-detectable features.
43 // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
44 // has `cpuid` support.
46 // 0. EAX = 0: Basic Information:
47 // - EAX returns the "Highest Function Parameter", that is, the maximum
48 // leaf value for subsequent calls of `cpuinfo` in range [0,
49 // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
50 // returned in EBX, EDX, and ECX (in that order):
51 let (max_basic_leaf
, vendor_id
) = unsafe {
58 let vendor_id
: [[u8; 4]; 3] = [
63 let vendor_id
: [u8; 12] = mem
::transmute(vendor_id
);
64 (max_basic_leaf
, vendor_id
)
67 if max_basic_leaf
< 1 {
68 // Earlier Intel 486, CPUID not implemented
72 // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
73 // Contains information about most x86 features.
78 } = unsafe { __cpuid(0x0000_0001_u32) }
;
80 // EAX = 7, ECX = 0: Queries "Extended Features";
81 // Contains information about bmi,bmi2, and avx2 support.
82 let (extended_features_ebx
, extended_features_ecx
) = if max_basic_leaf
>= 7
84 let CpuidResult { ebx, ecx, .. }
= unsafe { __cpuid(0x0000_0007_u32) }
;
87 (0, 0) // CPUID does not support "Extended Features"
90 // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
91 // - EAX returns the max leaf value for extended information, that is,
92 // `cpuid` calls in range [0x8000_0000; u32::MAX]:
94 eax
: extended_max_basic_leaf
,
96 } = unsafe { __cpuid(0x8000_0000_u32) }
;
98 // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
100 let extended_proc_info_ecx
= if extended_max_basic_leaf
>= 1 {
101 let CpuidResult { ecx, .. }
= unsafe { __cpuid(0x8000_0001_u32) }
;
108 // borrows value till the end of this scope:
109 let mut enable
= |r
, rb
, f
| {
110 if bit
::test(r
as usize, rb
) {
115 enable(proc_info_ecx
, 0, Feature
::sse3
);
116 enable(proc_info_ecx
, 1, Feature
::pclmulqdq
);
117 enable(proc_info_ecx
, 9, Feature
::ssse3
);
118 enable(proc_info_ecx
, 13, Feature
::cmpxchg16b
);
119 enable(proc_info_ecx
, 19, Feature
::sse4_1
);
120 enable(proc_info_ecx
, 20, Feature
::sse4_2
);
121 enable(proc_info_ecx
, 23, Feature
::popcnt
);
122 enable(proc_info_ecx
, 25, Feature
::aes
);
123 enable(proc_info_ecx
, 29, Feature
::f16c
);
124 enable(proc_info_ecx
, 30, Feature
::rdrand
);
125 enable(extended_features_ebx
, 18, Feature
::rdseed
);
126 enable(extended_features_ebx
, 19, Feature
::adx
);
127 enable(extended_features_ebx
, 11, Feature
::rtm
);
128 enable(proc_info_edx
, 4, Feature
::tsc
);
129 enable(proc_info_edx
, 23, Feature
::mmx
);
130 enable(proc_info_edx
, 24, Feature
::fxsr
);
131 enable(proc_info_edx
, 25, Feature
::sse
);
132 enable(proc_info_edx
, 26, Feature
::sse2
);
133 enable(extended_features_ebx
, 29, Feature
::sha
);
135 enable(extended_features_ebx
, 3, Feature
::bmi
);
136 enable(extended_features_ebx
, 8, Feature
::bmi2
);
138 // `XSAVE` and `AVX` support:
139 let cpu_xsave
= bit
::test(proc_info_ecx
as usize, 26);
141 // 0. Here the CPU supports `XSAVE`.
143 // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
144 // supports saving the state of the AVX/AVX2 vector registers on
145 // context-switches, see:
147 // - [intel: is avx enabled?][is_avx_enabled],
148 // - [mozilla: sse.cpp][mozilla_sse_cpp].
150 // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
151 // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
152 let cpu_osxsave
= bit
::test(proc_info_ecx
as usize, 27);
155 // 2. The OS must have signaled the CPU that it supports saving and
158 // * SSE -> `XCR0.SSE[1]`
159 // * AVX -> `XCR0.AVX[2]`
160 // * AVX-512 -> `XCR0.AVX-512[7:5]`.
162 // by setting the corresponding bits of `XCR0` to `1`.
164 // This is safe because the CPU supports `xsave`
165 // and the OS has set `osxsave`.
166 let xcr0
= unsafe { _xgetbv(0) }
;
167 // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
168 let os_avx_support
= xcr0
& 6 == 6;
169 // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
170 let os_avx512_support
= xcr0
& 224 == 224;
172 // Only if the OS and the CPU support saving/restoring the AVX
173 // registers we enable `xsave` support:
175 // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
176 // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
177 // Developer’s Manual, Volume 1: Basic Architecture":
179 // "Software enables the XSAVE feature set by setting
180 // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
181 // instruction). If this bit is 0, execution of any of XGETBV,
182 // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
183 // causes an invalid-opcode exception (#UD)"
185 enable(proc_info_ecx
, 26, Feature
::xsave
);
187 // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
188 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
190 if max_basic_leaf
>= 0xd {
192 eax
: proc_extended_state1_eax
,
194 } = unsafe { __cpuid_count(0xd_u32, 1) }
;
195 enable(proc_extended_state1_eax
, 0, Feature
::xsaveopt
);
196 enable(proc_extended_state1_eax
, 1, Feature
::xsavec
);
197 enable(proc_extended_state1_eax
, 3, Feature
::xsaves
);
200 // FMA (uses 256-bit wide registers):
201 enable(proc_info_ecx
, 12, Feature
::fma
);
204 enable(proc_info_ecx
, 28, Feature
::avx
);
205 enable(extended_features_ebx
, 5, Feature
::avx2
);
207 // For AVX-512 the OS also needs to support saving/restoring
208 // the extended state, only then we enable AVX-512 support:
209 if os_avx512_support
{
210 enable(extended_features_ebx
, 16, Feature
::avx512f
);
211 enable(extended_features_ebx
, 17, Feature
::avx512dq
);
212 enable(extended_features_ebx
, 21, Feature
::avx512_ifma
);
213 enable(extended_features_ebx
, 26, Feature
::avx512pf
);
214 enable(extended_features_ebx
, 27, Feature
::avx512er
);
215 enable(extended_features_ebx
, 28, Feature
::avx512cd
);
216 enable(extended_features_ebx
, 30, Feature
::avx512bw
);
217 enable(extended_features_ebx
, 31, Feature
::avx512vl
);
218 enable(extended_features_ecx
, 1, Feature
::avx512_vbmi
);
220 extended_features_ecx
,
222 Feature
::avx512_vpopcntdq
,
229 // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
230 // On intel CPUs with popcnt, lzcnt implements the
231 // "missing part" of ABM, so we map both to the same
234 // The `is_x86_feature_detected!("lzcnt")` macro then
235 // internally maps to Feature::abm.
236 enable(extended_proc_info_ecx
, 5, Feature
::abm
);
237 // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
238 // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
239 // number(Family 18h).
241 // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
244 // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
245 // Related Hygon kernel patch can be found on
246 // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
247 if vendor_id
== *b
"AuthenticAMD" || vendor_id
== *b
"HygonGenuine" {
248 // These features are available on AMD arch CPUs:
249 enable(extended_proc_info_ecx
, 6, Feature
::sse4a
);
250 enable(extended_proc_info_ecx
, 21, Feature
::tbm
);
263 println
!("aes: {:?}", is_x86_feature_detected
!("aes"));
264 println
!("pclmulqdq: {:?}", is_x86_feature_detected
!("pclmulqdq"));
265 println
!("rdrand: {:?}", is_x86_feature_detected
!("rdrand"));
266 println
!("rdseed: {:?}", is_x86_feature_detected
!("rdseed"));
267 println
!("tsc: {:?}", is_x86_feature_detected
!("tsc"));
268 println
!("sse: {:?}", is_x86_feature_detected
!("sse"));
269 println
!("sse2: {:?}", is_x86_feature_detected
!("sse2"));
270 println
!("sse3: {:?}", is_x86_feature_detected
!("sse3"));
271 println
!("ssse3: {:?}", is_x86_feature_detected
!("ssse3"));
272 println
!("sse4.1: {:?}", is_x86_feature_detected
!("sse4.1"));
273 println
!("sse4.2: {:?}", is_x86_feature_detected
!("sse4.2"));
274 println
!("sse4a: {:?}", is_x86_feature_detected
!("sse4a"));
275 println
!("sha: {:?}", is_x86_feature_detected
!("sha"));
276 println
!("avx: {:?}", is_x86_feature_detected
!("avx"));
277 println
!("avx2: {:?}", is_x86_feature_detected
!("avx2"));
278 println
!("avx512f {:?}", is_x86_feature_detected
!("avx512f"));
279 println
!("avx512cd {:?}", is_x86_feature_detected
!("avx512cd"));
280 println
!("avx512er {:?}", is_x86_feature_detected
!("avx512er"));
281 println
!("avx512pf {:?}", is_x86_feature_detected
!("avx512pf"));
282 println
!("avx512bw {:?}", is_x86_feature_detected
!("avx512bw"));
283 println
!("avx512dq {:?}", is_x86_feature_detected
!("avx512dq"));
284 println
!("avx512vl {:?}", is_x86_feature_detected
!("avx512vl"));
285 println
!("avx512_ifma {:?}", is_x86_feature_detected
!("avx512ifma"));
286 println
!("avx512_vbmi {:?}", is_x86_feature_detected
!("avx512vbmi"));
288 "avx512_vpopcntdq {:?}",
289 is_x86_feature_detected
!("avx512vpopcntdq")
291 println
!("fma: {:?}", is_x86_feature_detected
!("fma"));
292 println
!("abm: {:?}", is_x86_feature_detected
!("abm"));
293 println
!("bmi: {:?}", is_x86_feature_detected
!("bmi1"));
294 println
!("bmi2: {:?}", is_x86_feature_detected
!("bmi2"));
295 println
!("tbm: {:?}", is_x86_feature_detected
!("tbm"));
296 println
!("popcnt: {:?}", is_x86_feature_detected
!("popcnt"));
297 println
!("lzcnt: {:?}", is_x86_feature_detected
!("lzcnt"));
298 println
!("fxsr: {:?}", is_x86_feature_detected
!("fxsr"));
299 println
!("xsave: {:?}", is_x86_feature_detected
!("xsave"));
300 println
!("xsaveopt: {:?}", is_x86_feature_detected
!("xsaveopt"));
301 println
!("xsaves: {:?}", is_x86_feature_detected
!("xsaves"));
302 println
!("xsavec: {:?}", is_x86_feature_detected
!("xsavec"));
303 println
!("cmpxchg16b: {:?}", is_x86_feature_detected
!("cmpxchg16b"));
304 println
!("adx: {:?}", is_x86_feature_detected
!("adx"));
305 println
!("rtm: {:?}", is_x86_feature_detected
!("rtm"));
309 fn compare_with_cupid() {
310 let information
= cupid
::master().unwrap();
311 assert_eq
!(is_x86_feature_detected
!("aes"), information
.aesni());
312 assert_eq
!(is_x86_feature_detected
!("pclmulqdq"), information
.pclmulqdq());
313 assert_eq
!(is_x86_feature_detected
!("rdrand"), information
.rdrand());
314 assert_eq
!(is_x86_feature_detected
!("rdseed"), information
.rdseed());
315 assert_eq
!(is_x86_feature_detected
!("tsc"), information
.tsc());
316 assert_eq
!(is_x86_feature_detected
!("sse"), information
.sse());
317 assert_eq
!(is_x86_feature_detected
!("sse2"), information
.sse2());
318 assert_eq
!(is_x86_feature_detected
!("sse3"), information
.sse3());
319 assert_eq
!(is_x86_feature_detected
!("ssse3"), information
.ssse3());
320 assert_eq
!(is_x86_feature_detected
!("sse4.1"), information
.sse4_1());
321 assert_eq
!(is_x86_feature_detected
!("sse4.2"), information
.sse4_2());
322 assert_eq
!(is_x86_feature_detected
!("sse4a"), information
.sse4a());
323 assert_eq
!(is_x86_feature_detected
!("sha"), information
.sha());
324 assert_eq
!(is_x86_feature_detected
!("avx"), information
.avx());
325 assert_eq
!(is_x86_feature_detected
!("avx2"), information
.avx2());
326 assert_eq
!(is_x86_feature_detected
!("avx512f"), information
.avx512f());
327 assert_eq
!(is_x86_feature_detected
!("avx512cd"), information
.avx512cd());
328 assert_eq
!(is_x86_feature_detected
!("avx512er"), information
.avx512er());
329 assert_eq
!(is_x86_feature_detected
!("avx512pf"), information
.avx512pf());
330 assert_eq
!(is_x86_feature_detected
!("avx512bw"), information
.avx512bw());
331 assert_eq
!(is_x86_feature_detected
!("avx512dq"), information
.avx512dq());
332 assert_eq
!(is_x86_feature_detected
!("avx512vl"), information
.avx512vl());
334 is_x86_feature_detected
!("avx512ifma"),
335 information
.avx512_ifma()
338 is_x86_feature_detected
!("avx512vbmi"),
339 information
.avx512_vbmi()
342 is_x86_feature_detected
!("avx512vpopcntdq"),
343 information
.avx512_vpopcntdq()
345 assert_eq
!(is_x86_feature_detected
!("fma"), information
.fma());
346 assert_eq
!(is_x86_feature_detected
!("bmi1"), information
.bmi1());
347 assert_eq
!(is_x86_feature_detected
!("bmi2"), information
.bmi2());
348 assert_eq
!(is_x86_feature_detected
!("popcnt"), information
.popcnt());
349 assert_eq
!(is_x86_feature_detected
!("abm"), information
.lzcnt());
350 assert_eq
!(is_x86_feature_detected
!("tbm"), information
.tbm());
351 assert_eq
!(is_x86_feature_detected
!("lzcnt"), information
.lzcnt());
352 assert_eq
!(is_x86_feature_detected
!("xsave"), information
.xsave());
353 assert_eq
!(is_x86_feature_detected
!("xsaveopt"), information
.xsaveopt());
355 is_x86_feature_detected
!("xsavec"),
356 information
.xsavec_and_xrstor()
359 is_x86_feature_detected
!("xsaves"),
360 information
.xsaves_xrstors_and_ia32_xss()
363 is_x86_feature_detected
!("cmpxchg16b"),
364 information
.cmpxchg16b(),
367 is_x86_feature_detected
!("adx"),
371 is_x86_feature_detected
!("rtm"),