]>
Commit | Line | Data |
---|---|---|
416331ca | 1 | //! Runtime support needed for testing the stdarch crate. |
0531ce1d XL |
2 | //! |
3 | //! This basically just disassembles the current executable and then parses the | |
4 | //! output once globally and then provides the `assert` function which makes | |
5 | //! assertions about the disassembly of a function. | |
cdc7bbd5 | 6 | #![deny(rust_2018_idioms)] |
48663c56 | 7 | #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] |
0531ce1d | 8 | |
0531ce1d XL |
9 | #[macro_use] |
10 | extern crate lazy_static; | |
0bf4aa26 XL |
11 | #[macro_use] |
12 | extern crate cfg_if; | |
0531ce1d XL |
13 | |
14 | pub use assert_instr_macro::*; | |
15 | pub use simd_test_macro::*; | |
c295e0f8 | 16 | use std::{cmp, collections::HashSet, env, hash, hint::black_box, str}; |
0bf4aa26 XL |
17 | |
18 | cfg_if! { | |
19 | if #[cfg(target_arch = "wasm32")] { | |
0bf4aa26 XL |
20 | pub mod wasm; |
21 | use wasm::disassemble_myself; | |
22 | } else { | |
23 | mod disassembly; | |
17df50a5 | 24 | use crate::disassembly::disassemble_myself; |
0bf4aa26 XL |
25 | } |
26 | } | |
0531ce1d XL |
27 | |
28 | lazy_static! { | |
416331ca | 29 | static ref DISASSEMBLY: HashSet<Function> = disassemble_myself(); |
0531ce1d XL |
30 | } |
31 | ||
416331ca | 32 | #[derive(Debug)] |
0531ce1d | 33 | struct Function { |
416331ca XL |
34 | name: String, |
35 | instrs: Vec<String>, | |
36 | } | |
37 | impl Function { | |
38 | fn new(n: &str) -> Self { | |
39 | Self { | |
40 | name: n.to_string(), | |
41 | instrs: Vec::new(), | |
42 | } | |
43 | } | |
0531ce1d XL |
44 | } |
45 | ||
416331ca XL |
46 | impl cmp::PartialEq for Function { |
47 | fn eq(&self, other: &Self) -> bool { | |
48 | self.name == other.name | |
49 | } | |
0531ce1d | 50 | } |
416331ca | 51 | impl cmp::Eq for Function {} |
0531ce1d | 52 | |
416331ca XL |
53 | impl hash::Hash for Function { |
54 | fn hash<H: hash::Hasher>(&self, state: &mut H) { | |
55 | self.name.hash(state) | |
0531ce1d XL |
56 | } |
57 | } | |
58 | ||
59 | /// Main entry point for this crate, called by the `#[assert_instr]` macro. | |
60 | /// | |
61 | /// This asserts that the function at `fnptr` contains the instruction | |
62 | /// `expected` provided. | |
fc512014 XL |
63 | pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { |
64 | // Make sure that the shim is not removed | |
65 | black_box(shim_addr); | |
66 | ||
9ffffee4 | 67 | //eprintln!("shim name: {fnname}"); |
416331ca XL |
68 | let function = &DISASSEMBLY |
69 | .get(&Function::new(fnname)) | |
9ffffee4 | 70 | .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly")); |
416331ca | 71 | //eprintln!(" function: {:?}", function); |
0531ce1d XL |
72 | |
73 | let mut instrs = &function.instrs[..]; | |
416331ca | 74 | while instrs.last().map_or(false, |s| s == "nop") { |
0531ce1d XL |
75 | instrs = &instrs[..instrs.len() - 1]; |
76 | } | |
77 | ||
c295e0f8 XL |
78 | // Look for `expected` as the first part of any instruction in this |
79 | // function, e.g., tzcntl in tzcntl %rax,%rax. | |
80 | // | |
17df50a5 XL |
81 | // There are two cases when the expected instruction is nop: |
82 | // 1. The expected intrinsic is compiled away so we can't | |
83 | // check for it - aka the intrinsic is not generating any code. | |
84 | // 2. It is a mark, indicating that the instruction will be | |
85 | // compiled into other instructions - mainly because of llvm | |
86 | // optimization. | |
c295e0f8 XL |
87 | let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected)); |
88 | ||
89 | // Look for subroutine call instructions in the disassembly to detect whether | |
90 | // inlining failed: all intrinsics are `#[inline(always)]`, so calling one | |
91 | // intrinsic from another should not generate subroutine call instructions. | |
92 | let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") { | |
93 | instrs.iter().any(|s| s.starts_with("call ")) | |
94 | } else if cfg!(target_arch = "x86") { | |
95 | instrs.windows(2).any(|s| { | |
96 | // On 32-bit x86 position independent code will call itself and be | |
97 | // immediately followed by a `pop` to learn about the current address. | |
98 | // Let's not take that into account when considering whether a function | |
99 | // failed inlining something. | |
100 | s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment | |
101 | }) | |
e8be2606 | 102 | } else if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) { |
c295e0f8 XL |
103 | instrs.iter().any(|s| s.starts_with("bl ")) |
104 | } else { | |
105 | // FIXME: Add detection for other archs | |
106 | false | |
107 | }; | |
0531ce1d | 108 | |
416331ca | 109 | let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") |
0731742a XL |
110 | .ok() |
111 | .map_or_else( | |
112 | || match expected { | |
532ac7d7 XL |
113 | // `cpuid` returns a pretty big aggregate structure, so exempt |
114 | // it from the slightly more restrictive 22 instructions below. | |
0731742a XL |
115 | "cpuid" => 30, |
116 | ||
532ac7d7 | 117 | // Apparently, on Windows, LLVM generates a bunch of |
353b0b11 | 118 | // saves/restores of xmm registers around these instructions, |
532ac7d7 XL |
119 | // which exceeds the limit of 20 below. As it seems dictated by |
120 | // Windows's ABI (I believe?), we probably can't do much | |
121 | // about it. | |
0731742a XL |
122 | "vzeroall" | "vzeroupper" if cfg!(windows) => 30, |
123 | ||
124 | // Intrinsics using `cvtpi2ps` are typically "composites" and | |
125 | // in some cases exceed the limit. | |
126 | "cvtpi2ps" => 25, | |
17df50a5 XL |
127 | // core_arch/src/arm_shared/simd32 |
128 | // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) | |
129 | "usad8" | "vfma" | "vfms" => 27, | |
532ac7d7 | 130 | "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, |
c295e0f8 | 131 | // core_arch/src/arm_shared/simd32 |
781aab86 FG |
132 | // vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit) |
133 | "vld3" => 28, | |
3c0e092e | 134 | // core_arch/src/arm_shared/simd32 |
781aab86 FG |
135 | // vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit) |
136 | "vld4" => 37, | |
3c0e092e | 137 | // core_arch/src/arm_shared/simd32 |
c295e0f8 XL |
138 | // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) |
139 | "vst1" => 41, | |
3c0e092e | 140 | // core_arch/src/arm_shared/simd32 |
781aab86 FG |
141 | // vst3q_u32_vst3 : #instructions = 25 >= 22 (limit) |
142 | "vst3" => 26, | |
143 | // core_arch/src/arm_shared/simd32 | |
144 | // vst4q_u32_vst4 : #instructions = 33 >= 22 (limit) | |
145 | "vst4" => 34, | |
532ac7d7 | 146 | |
3c0e092e XL |
147 | // core_arch/src/arm_shared/simd32 |
148 | // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit) | |
149 | "nop" if fnname.contains("vst1q_p64") => 34, | |
150 | ||
0731742a | 151 | // Original limit was 20 instructions, but ARM DSP Intrinsics |
532ac7d7 XL |
152 | // are exactly 20 instructions long. So, bump the limit to 22 |
153 | // instead of adding here a long list of exceptions. | |
0731742a XL |
154 | _ => 22, |
155 | }, | |
156 | |v| v.parse().unwrap(), | |
157 | ); | |
0531ce1d XL |
158 | let probably_only_one_instruction = instrs.len() < instruction_limit; |
159 | ||
160 | if found && probably_only_one_instruction && !inlining_failed { | |
161 | return; | |
162 | } | |
163 | ||
164 | // Help debug by printing out the found disassembly, and then panic as we | |
165 | // didn't find the instruction. | |
9ffffee4 | 166 | println!("disassembly for {fnname}: ",); |
0531ce1d | 167 | for (i, instr) in instrs.iter().enumerate() { |
9ffffee4 | 168 | println!("\t{i:2}: {instr}"); |
0531ce1d XL |
169 | } |
170 | ||
171 | if !found { | |
172 | panic!( | |
173 | "failed to find instruction `{}` in the disassembly", | |
174 | expected | |
175 | ); | |
176 | } else if !probably_only_one_instruction { | |
177 | panic!( | |
178 | "instruction found, but the disassembly contains too many \ | |
179 | instructions: #instructions = {} >= {} (limit)", | |
180 | instrs.len(), | |
181 | instruction_limit | |
182 | ); | |
183 | } else if inlining_failed { | |
184 | panic!( | |
c295e0f8 XL |
185 | "instruction found, but the disassembly contains subroutine \ |
186 | call instructions, which hint that inlining failed" | |
0531ce1d XL |
187 | ); |
188 | } | |
189 | } | |
190 | ||
c620b35d FG |
191 | pub fn assert_skip_test_ok(name: &str, missing_features: &[&str]) { |
192 | println!("Skipping test `{name}` due to missing target features:"); | |
193 | for feature in missing_features { | |
194 | println!(" - {feature}"); | |
195 | } | |
196 | match env::var("STDARCH_TEST_EVERYTHING") { | |
197 | Ok(_) => panic!("skipped test `{name}` when it shouldn't be skipped"), | |
198 | Err(_) => println!("Set STDARCH_TEST_EVERYTHING to make this an error."), | |
0531ce1d | 199 | } |
0531ce1d | 200 | } |
8faf50e0 XL |
201 | |
202 | // See comment in `assert-instr-macro` crate for why this exists | |
c295e0f8 | 203 | pub static mut _DONT_DEDUP: *const u8 = std::ptr::null(); |