]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/core_arch/src/x86/sse4a.rs
New upstream version 1.59.0+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / sse4a.rs
1 //! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
2
3 use crate::{
4 core_arch::{simd::*, x86::*},
5 mem::transmute,
6 };
7
8 #[cfg(test)]
9 use stdarch_test::assert_instr;
10
11 #[allow(improper_ctypes)]
12 extern "C" {
13 #[link_name = "llvm.x86.sse4a.extrq"]
14 fn extrq(x: i64x2, y: i8x16) -> i64x2;
15 #[link_name = "llvm.x86.sse4a.insertq"]
16 fn insertq(x: i64x2, y: i64x2) -> i64x2;
17 #[link_name = "llvm.x86.sse4a.movnt.sd"]
18 fn movntsd(x: *mut f64, y: __m128d);
19 #[link_name = "llvm.x86.sse4a.movnt.ss"]
20 fn movntss(x: *mut f32, y: __m128);
21 }
22
23 // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
24 // FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
25
26 /// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
27 ///
28 /// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
29 /// `[5:0]` bits of `y` specify the length of the bit-range to extract. All
30 /// other bits are ignored.
31 ///
32 /// If the length is zero, it is interpreted as `64`. If the length and index
33 /// are zero, the lower 64 bits of `x` are extracted.
34 ///
35 /// If `length == 0 && index > 0` or `length + index > 64` the result is
36 /// undefined.
37 #[inline]
38 #[target_feature(enable = "sse4a")]
39 #[cfg_attr(test, assert_instr(extrq))]
40 #[stable(feature = "simd_x86", since = "1.27.0")]
41 pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
42 transmute(extrq(x.as_i64x2(), y.as_i8x16()))
43 }
44
45 /// Inserts the `[length:0]` bits of `y` into `x` at `index`.
46 ///
47 /// The bits of `y`:
48 ///
49 /// - `[69:64]` specify the `length`,
50 /// - `[77:72]` specify the index.
51 ///
52 /// If the `length` is zero it is interpreted as `64`. If `index + length > 64`
53 /// or `index > 0 && length == 0` the result is undefined.
54 #[inline]
55 #[target_feature(enable = "sse4a")]
56 #[cfg_attr(test, assert_instr(insertq))]
57 #[stable(feature = "simd_x86", since = "1.27.0")]
58 pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
59 transmute(insertq(x.as_i64x2(), y.as_i64x2()))
60 }
61
62 /// Non-temporal store of `a.0` into `p`.
63 ///
64 /// Writes 64-bit data to a memory location without polluting the caches.
65 #[inline]
66 #[target_feature(enable = "sse4a")]
67 #[cfg_attr(test, assert_instr(movntsd))]
68 #[stable(feature = "simd_x86", since = "1.27.0")]
69 pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
70 movntsd(p, a);
71 }
72
73 /// Non-temporal store of `a.0` into `p`.
74 ///
75 /// Writes 32-bit data to a memory location without polluting the caches.
76 #[inline]
77 #[target_feature(enable = "sse4a")]
78 #[cfg_attr(test, assert_instr(movntss))]
79 #[stable(feature = "simd_x86", since = "1.27.0")]
80 pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
81 movntss(p, a);
82 }
83
84 #[cfg(test)]
85 mod tests {
86 use crate::core_arch::x86::*;
87 use stdarch_test::simd_test;
88
89 #[simd_test(enable = "sse4a")]
90 unsafe fn test_mm_extract_si64() {
91 let b = 0b0110_0000_0000_i64;
92 // ^^^^ bit range extracted
93 let x = _mm_setr_epi64x(b, 0);
94 let v = 0b001000___00___000100_i64;
95 // ^idx: 2^3 = 8 ^length = 2^2 = 4
96 let y = _mm_setr_epi64x(v, 0);
97 let e = _mm_setr_epi64x(0b0110_i64, 0);
98 let r = _mm_extract_si64(x, y);
99 assert_eq_m128i(r, e);
100 }
101
102 #[simd_test(enable = "sse4a")]
103 unsafe fn test_mm_insert_si64() {
104 let i = 0b0110_i64;
105 // ^^^^ bit range inserted
106 let z = 0b1010_1010_1010i64;
107 // ^^^^ bit range replaced
108 let e = 0b0110_1010_1010i64;
109 // ^^^^ replaced 1010 with 0110
110 let x = _mm_setr_epi64x(z, 0);
111 let expected = _mm_setr_epi64x(e, 0);
112 let v = 0b001000___00___000100_i64;
113 // ^idx: 2^3 = 8 ^length = 2^2 = 4
114 let y = _mm_setr_epi64x(i, v);
115 let r = _mm_insert_si64(x, y);
116 assert_eq_m128i(r, expected);
117 }
118
119 #[repr(align(16))]
120 struct MemoryF64 {
121 data: [f64; 2],
122 }
123
124 #[simd_test(enable = "sse4a")]
125 unsafe fn test_mm_stream_sd() {
126 let mut mem = MemoryF64 {
127 data: [1.0_f64, 2.0],
128 };
129 {
130 let vals = &mut mem.data;
131 let d = vals.as_mut_ptr();
132
133 let x = _mm_setr_pd(3.0, 4.0);
134
135 _mm_stream_sd(d, x);
136 }
137 assert_eq!(mem.data[0], 3.0);
138 assert_eq!(mem.data[1], 2.0);
139 }
140
141 #[repr(align(16))]
142 struct MemoryF32 {
143 data: [f32; 4],
144 }
145
146 #[simd_test(enable = "sse4a")]
147 unsafe fn test_mm_stream_ss() {
148 let mut mem = MemoryF32 {
149 data: [1.0_f32, 2.0, 3.0, 4.0],
150 };
151 {
152 let vals = &mut mem.data;
153 let d = vals.as_mut_ptr();
154
155 let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
156
157 _mm_stream_ss(d, x);
158 }
159 assert_eq!(mem.data[0], 5.0);
160 assert_eq!(mem.data[1], 2.0);
161 assert_eq!(mem.data[2], 3.0);
162 assert_eq!(mem.data[3], 4.0);
163 }
164 }