1 //! Runtime support needed for testing the stdarch crate.
3 //! This basically just disassembles the current executable and then parses the
4 //! output once globally and then provides the `assert` function which makes
5 //! assertions about the disassembly of a function.
6 #![deny(rust_2018_idioms)]
7 #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
10 extern crate lazy_static
;
14 pub use assert_instr_macro
::*;
15 pub use simd_test_macro
::*;
16 use std
::{cmp, collections::HashSet, env, hash, hint::black_box, str}
;
19 if #[cfg(target_arch = "wasm32")] {
21 use wasm
::disassemble_myself
;
24 use crate::disassembly
::disassemble_myself
;
29 static ref DISASSEMBLY
: HashSet
<Function
> = disassemble_myself();
38 fn new(n
: &str) -> Self {
46 impl cmp
::PartialEq
for Function
{
47 fn eq(&self, other
: &Self) -> bool
{
48 self.name
== other
.name
51 impl cmp
::Eq
for Function {}
53 impl hash
::Hash
for Function
{
54 fn hash
<H
: hash
::Hasher
>(&self, state
: &mut H
) {
59 /// Main entry point for this crate, called by the `#[assert_instr]` macro.
61 /// This asserts that the function at `fnptr` contains the instruction
62 /// `expected` provided.
63 pub fn assert(shim_addr
: usize, fnname
: &str, expected
: &str) {
64 // Make sure that the shim is not removed
67 //eprintln!("shim name: {fnname}");
68 let function
= &DISASSEMBLY
69 .get(&Function
::new(fnname
))
70 .unwrap_or_else(|| panic
!("function \"{fnname}\" not found in the disassembly"));
71 //eprintln!(" function: {:?}", function);
73 let mut instrs
= &function
.instrs
[..];
74 while instrs
.last().map_or(false, |s
| s
== "nop") {
75 instrs
= &instrs
[..instrs
.len() - 1];
78 // Look for `expected` as the first part of any instruction in this
79 // function, e.g., tzcntl in tzcntl %rax,%rax.
81 // There are two cases when the expected instruction is nop:
82 // 1. The expected intrinsic is compiled away so we can't
83 // check for it - aka the intrinsic is not generating any code.
84 // 2. It is a mark, indicating that the instruction will be
85 // compiled into other instructions - mainly because of llvm
87 let found
= expected
== "nop" || instrs
.iter().any(|s
| s
.starts_with(expected
));
89 // Look for subroutine call instructions in the disassembly to detect whether
90 // inlining failed: all intrinsics are `#[inline(always)]`, so calling one
91 // intrinsic from another should not generate subroutine call instructions.
92 let inlining_failed
= if cfg
!(target_arch
= "x86_64") || cfg
!(target_arch
= "wasm32") {
93 instrs
.iter().any(|s
| s
.starts_with("call "))
94 } else if cfg
!(target_arch
= "x86") {
95 instrs
.windows(2).any(|s
| {
96 // On 32-bit x86 position independent code will call itself and be
97 // immediately followed by a `pop` to learn about the current address.
98 // Let's not take that into account when considering whether a function
99 // failed inlining something.
100 s
[0].starts_with("call ") && s
[1].starts_with("pop") // FIXME: original logic but does not match comment
102 } else if cfg
!(target_arch
= "aarch64") {
103 instrs
.iter().any(|s
| s
.starts_with("bl "))
105 // FIXME: Add detection for other archs
109 let instruction_limit
= std
::env
::var("STDARCH_ASSERT_INSTR_LIMIT")
113 // `cpuid` returns a pretty big aggregate structure, so exempt
114 // it from the slightly more restrictive 22 instructions below.
117 // Apparently, on Windows, LLVM generates a bunch of
118 // saves/restores of xmm registers around these instructions,
119 // which exceeds the limit of 20 below. As it seems dictated by
120 // Windows's ABI (I believe?), we probably can't do much
122 "vzeroall" | "vzeroupper" if cfg
!(windows
) => 30,
124 // Intrinsics using `cvtpi2ps` are typically "composites" and
125 // in some cases exceed the limit.
127 // core_arch/src/arm_shared/simd32
128 // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit)
129 "usad8" | "vfma" | "vfms" => 27,
130 "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
131 // core_arch/src/arm_shared/simd32
132 // vst1q_s64_x4_vst1 : #instructions = 22 >= 22 (limit)
134 // core_arch/src/arm_shared/simd32
135 // vld4q_lane_u32_vld4 : #instructions = 31 >= 22 (limit)
137 // core_arch/src/arm_shared/simd32
138 // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
140 // core_arch/src/arm_shared/simd32
141 // vst4q_u32_vst4 : #instructions = 26 >= 22 (limit)
144 // Temporary, currently the fptosi.sat and fptoui.sat LLVM
145 // intrinsics emit unnecessary code on arm. This can be
146 // removed once it has been addressed in LLVM.
147 "fcvtzu" | "fcvtzs" | "vcvt" => 64,
149 // core_arch/src/arm_shared/simd32
150 // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
151 "nop" if fnname
.contains("vst1q_p64") => 34,
153 // Original limit was 20 instructions, but ARM DSP Intrinsics
154 // are exactly 20 instructions long. So, bump the limit to 22
155 // instead of adding here a long list of exceptions.
158 |v
| v
.parse().unwrap(),
160 let probably_only_one_instruction
= instrs
.len() < instruction_limit
;
162 if found
&& probably_only_one_instruction
&& !inlining_failed
{
166 // Help debug by printing out the found disassembly, and then panic as we
167 // didn't find the instruction.
168 println
!("disassembly for {fnname}: ",);
169 for (i
, instr
) in instrs
.iter().enumerate() {
170 println
!("\t{i:2}: {instr}");
175 "failed to find instruction `{}` in the disassembly",
178 } else if !probably_only_one_instruction
{
180 "instruction found, but the disassembly contains too many \
181 instructions: #instructions = {} >= {} (limit)",
185 } else if inlining_failed
{
187 "instruction found, but the disassembly contains subroutine \
188 call instructions, which hint that inlining failed"
193 pub fn assert_skip_test_ok(name
: &str) {
194 if env
::var("STDARCH_TEST_EVERYTHING").is_err() {
197 panic
!("skipped test `{name}` when it shouldn't be skipped");
200 // See comment in `assert-instr-macro` crate for why this exists
201 pub static mut _DONT_DEDUP
: *const u8 = std
::ptr
::null();