]> git.proxmox.com Git - rustc.git/blob - src/stdsimd/crates/std_detect/src/detect/os/x86.rs
New upstream version 1.34.2+dfsg1
[rustc.git] / src / stdsimd / crates / std_detect / src / detect / os / x86.rs
1 //! x86 run-time feature detection is OS independent.
2
3 #[cfg(target_arch = "x86")]
4 use arch::x86::*;
5 #[cfg(target_arch = "x86_64")]
6 use arch::x86_64::*;
7
8 use mem;
9
10 use crate::detect::{Feature, cache, bit};
11
12 /// Performs run-time feature detection.
13 #[inline]
14 pub fn check_for(x: Feature) -> bool {
15 cache::test(x as u32, detect_features)
16 }
17
18 /// Run-time feature detection on x86 works by using the CPUID instruction.
19 ///
20 /// The [CPUID Wikipedia page][wiki_cpuid] contains
21 /// all the information about which flags to set to query which values, and in
22 /// which registers these are reported.
23 ///
24 /// The definitive references are:
25 /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
26 /// Instruction Set Reference, A-Z][intel64_ref].
27 /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
28 /// System Instructions][amd64_ref].
29 ///
30 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
31 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
32 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
33 #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
34 fn detect_features() -> cache::Initializer {
35 let mut value = cache::Initializer::default();
36
37 // If the x86 CPU does not support the CPUID instruction then it is too
38 // old to support any of the currently-detectable features.
39 if !has_cpuid() {
40 return value;
41 }
42
43 // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
44 // has `cpuid` support.
45
46 // 0. EAX = 0: Basic Information:
47 // - EAX returns the "Highest Function Parameter", that is, the maximum
48 // leaf value for subsequent calls of `cpuinfo` in range [0,
49 // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
50 // returned in EBX, EDX, and ECX (in that order):
51 let (max_basic_leaf, vendor_id) = unsafe {
52 let CpuidResult {
53 eax: max_basic_leaf,
54 ebx,
55 ecx,
56 edx,
57 } = __cpuid(0);
58 let vendor_id: [[u8; 4]; 3] = [
59 mem::transmute(ebx),
60 mem::transmute(edx),
61 mem::transmute(ecx),
62 ];
63 let vendor_id: [u8; 12] = mem::transmute(vendor_id);
64 (max_basic_leaf, vendor_id)
65 };
66
67 if max_basic_leaf < 1 {
68 // Earlier Intel 486, CPUID not implemented
69 return value;
70 }
71
72 // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
73 // Contains information about most x86 features.
74 let CpuidResult {
75 ecx: proc_info_ecx,
76 edx: proc_info_edx,
77 ..
78 } = unsafe { __cpuid(0x0000_0001_u32) };
79
80 // EAX = 7, ECX = 0: Queries "Extended Features";
81 // Contains information about bmi,bmi2, and avx2 support.
82 let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
83 {
84 let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
85 (ebx, ecx)
86 } else {
87 (0, 0) // CPUID does not support "Extended Features"
88 };
89
90 // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
91 // - EAX returns the max leaf value for extended information, that is,
92 // `cpuid` calls in range [0x8000_0000; u32::MAX]:
93 let CpuidResult {
94 eax: extended_max_basic_leaf,
95 ..
96 } = unsafe { __cpuid(0x8000_0000_u32) };
97
98 // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
99 // Bits"
100 let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
101 let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
102 ecx
103 } else {
104 0
105 };
106
107 {
108 // borrows value till the end of this scope:
109 let mut enable = |r, rb, f| {
110 if bit::test(r as usize, rb) {
111 value.set(f as u32);
112 }
113 };
114
115 enable(proc_info_ecx, 0, Feature::sse3);
116 enable(proc_info_ecx, 9, Feature::ssse3);
117 enable(proc_info_ecx, 13, Feature::cmpxchg16b);
118 enable(proc_info_ecx, 19, Feature::sse4_1);
119 enable(proc_info_ecx, 20, Feature::sse4_2);
120 enable(proc_info_ecx, 23, Feature::popcnt);
121 enable(proc_info_ecx, 25, Feature::aes);
122 enable(proc_info_ecx, 1, Feature::pclmulqdq);
123 enable(proc_info_ecx, 30, Feature::rdrand);
124 enable(extended_features_ebx, 18, Feature::rdseed);
125 enable(extended_features_ebx, 19, Feature::adx);
126 enable(proc_info_edx, 4, Feature::tsc);
127 enable(proc_info_edx, 23, Feature::mmx);
128 enable(proc_info_edx, 24, Feature::fxsr);
129 enable(proc_info_edx, 25, Feature::sse);
130 enable(proc_info_edx, 26, Feature::sse2);
131 enable(extended_features_ebx, 29, Feature::sha);
132
133 enable(extended_features_ebx, 3, Feature::bmi);
134 enable(extended_features_ebx, 8, Feature::bmi2);
135
136 // `XSAVE` and `AVX` support:
137 let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
138 if cpu_xsave {
139 // 0. Here the CPU supports `XSAVE`.
140
141 // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
142 // supports saving the state of the AVX/AVX2 vector registers on
143 // context-switches, see:
144 //
145 // - [intel: is avx enabled?][is_avx_enabled],
146 // - [mozilla: sse.cpp][mozilla_sse_cpp].
147 //
148 // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
149 // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
150 let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
151
152 if cpu_osxsave {
153 // 2. The OS must have signaled the CPU that it supports saving and
154 // restoring the:
155 //
156 // * SSE -> `XCR0.SSE[1]`
157 // * AVX -> `XCR0.AVX[2]`
158 // * AVX-512 -> `XCR0.AVX-512[7:5]`.
159 //
160 // by setting the corresponding bits of `XCR0` to `1`.
161 //
162 // This is safe because the CPU supports `xsave`
163 // and the OS has set `osxsave`.
164 let xcr0 = unsafe { _xgetbv(0) };
165 // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
166 let os_avx_support = xcr0 & 6 == 6;
167 // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
168 let os_avx512_support = xcr0 & 224 == 224;
169
170 // Only if the OS and the CPU support saving/restoring the AVX
171 // registers we enable `xsave` support:
172 if os_avx_support {
173 // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
174 // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
175 // Developer’s Manual, Volume 1: Basic Architecture":
176 //
177 // "Software enables the XSAVE feature set by setting
178 // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
179 // instruction). If this bit is 0, execution of any of XGETBV,
180 // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
181 // causes an invalid-opcode exception (#UD)"
182 //
183 enable(proc_info_ecx, 26, Feature::xsave);
184
185 // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
186 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
187 // ECX = 1):
188 if max_basic_leaf >= 0xd {
189 let CpuidResult {
190 eax: proc_extended_state1_eax,
191 ..
192 } = unsafe { __cpuid_count(0xd_u32, 1) };
193 enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
194 enable(proc_extended_state1_eax, 1, Feature::xsavec);
195 enable(proc_extended_state1_eax, 3, Feature::xsaves);
196 }
197
198 // FMA (uses 256-bit wide registers):
199 enable(proc_info_ecx, 12, Feature::fma);
200
201 // And AVX/AVX2:
202 enable(proc_info_ecx, 28, Feature::avx);
203 enable(extended_features_ebx, 5, Feature::avx2);
204
205 // For AVX-512 the OS also needs to support saving/restoring
206 // the extended state, only then we enable AVX-512 support:
207 if os_avx512_support {
208 enable(extended_features_ebx, 16, Feature::avx512f);
209 enable(extended_features_ebx, 17, Feature::avx512dq);
210 enable(extended_features_ebx, 21, Feature::avx512_ifma);
211 enable(extended_features_ebx, 26, Feature::avx512pf);
212 enable(extended_features_ebx, 27, Feature::avx512er);
213 enable(extended_features_ebx, 28, Feature::avx512cd);
214 enable(extended_features_ebx, 30, Feature::avx512bw);
215 enable(extended_features_ebx, 31, Feature::avx512vl);
216 enable(extended_features_ecx, 1, Feature::avx512_vbmi);
217 enable(
218 extended_features_ecx,
219 14,
220 Feature::avx512_vpopcntdq,
221 );
222 }
223 }
224 }
225 }
226
227 // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
228 // On intel CPUs with popcnt, lzcnt implements the
229 // "missing part" of ABM, so we map both to the same
230 // internal feature.
231 //
232 // The `is_x86_feature_detected!("lzcnt")` macro then
233 // internally maps to Feature::abm.
234 enable(extended_proc_info_ecx, 5, Feature::abm);
235 if vendor_id == *b"AuthenticAMD" {
236 // These features are only available on AMD CPUs:
237 enable(extended_proc_info_ecx, 6, Feature::sse4a);
238 enable(extended_proc_info_ecx, 21, Feature::tbm);
239 }
240 }
241
242 value
243 }
244
245 #[cfg(test)]
246 mod tests {
247 extern crate cupid;
248
249 #[test]
250 fn dump() {
251 println!("aes: {:?}", is_x86_feature_detected!("aes"));
252 println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq"));
253 println!("rdrand: {:?}", is_x86_feature_detected!("rdrand"));
254 println!("rdseed: {:?}", is_x86_feature_detected!("rdseed"));
255 println!("tsc: {:?}", is_x86_feature_detected!("tsc"));
256 println!("sse: {:?}", is_x86_feature_detected!("sse"));
257 println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
258 println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
259 println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
260 println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
261 println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
262 println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
263 println!("sha: {:?}", is_x86_feature_detected!("sha"));
264 println!("avx: {:?}", is_x86_feature_detected!("avx"));
265 println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
266 println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
267 println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
268 println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
269 println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
270 println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
271 println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
272 println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
273 println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
274 println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
275 println!(
276 "avx512_vpopcntdq {:?}",
277 is_x86_feature_detected!("avx512vpopcntdq")
278 );
279 println!("fma: {:?}", is_x86_feature_detected!("fma"));
280 println!("abm: {:?}", is_x86_feature_detected!("abm"));
281 println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
282 println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
283 println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
284 println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
285 println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
286 println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
287 println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
288 println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
289 println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
290 println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
291 println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
292 println!("adx: {:?}", is_x86_feature_detected!("adx"));
293 }
294
295 #[test]
296 fn compare_with_cupid() {
297 let information = cupid::master().unwrap();
298 assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
299 assert_eq!(is_x86_feature_detected!("pclmulqdq"), information.pclmulqdq());
300 assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
301 assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
302 assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
303 assert_eq!(is_x86_feature_detected!("sse"), information.sse());
304 assert_eq!(is_x86_feature_detected!("sse2"), information.sse2());
305 assert_eq!(is_x86_feature_detected!("sse3"), information.sse3());
306 assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3());
307 assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
308 assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
309 assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
310 assert_eq!(is_x86_feature_detected!("sha"), information.sha());
311 assert_eq!(is_x86_feature_detected!("avx"), information.avx());
312 assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
313 assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
314 assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd());
315 assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er());
316 assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf());
317 assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw());
318 assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq());
319 assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl());
320 assert_eq!(
321 is_x86_feature_detected!("avx512ifma"),
322 information.avx512_ifma()
323 );
324 assert_eq!(
325 is_x86_feature_detected!("avx512vbmi"),
326 information.avx512_vbmi()
327 );
328 assert_eq!(
329 is_x86_feature_detected!("avx512vpopcntdq"),
330 information.avx512_vpopcntdq()
331 );
332 assert_eq!(is_x86_feature_detected!("fma"), information.fma());
333 assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1());
334 assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2());
335 assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt());
336 assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt());
337 assert_eq!(is_x86_feature_detected!("tbm"), information.tbm());
338 assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt());
339 assert_eq!(is_x86_feature_detected!("xsave"), information.xsave());
340 assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt());
341 assert_eq!(
342 is_x86_feature_detected!("xsavec"),
343 information.xsavec_and_xrstor()
344 );
345 assert_eq!(
346 is_x86_feature_detected!("xsaves"),
347 information.xsaves_xrstors_and_ia32_xss()
348 );
349 assert_eq!(
350 is_x86_feature_detected!("cmpxchg16b"),
351 information.cmpxchg16b(),
352 );
353 assert_eq!(
354 is_x86_feature_detected!("adx"),
355 information.adx(),
356 );
357 }
358 }