]> git.proxmox.com Git - rustc.git/blame - src/stdsimd/coresimd/ppsv/codegen/sum.rs
New upstream version 1.26.2+dfsg1
[rustc.git] / src / stdsimd / coresimd / ppsv / codegen / sum.rs
CommitLineData
0531ce1d
XL
1//! Code generation for the sum reduction.
2use coresimd::simd::*;
3
4/// LLVM intrinsics used in the sum reduction
5#[allow(improper_ctypes)]
6extern "C" {
7 #[link_name = "llvm.experimental.vector.reduce.add.i8.v2i8"]
8 fn reduce_add_i8x2(x: i8x2) -> i8;
9 #[link_name = "llvm.experimental.vector.reduce.add.u8.v2u8"]
10 fn reduce_add_u8x2(x: u8x2) -> u8;
11 #[link_name = "llvm.experimental.vector.reduce.add.i16.v2i16"]
12 fn reduce_add_i16x2(x: i16x2) -> i16;
13 #[link_name = "llvm.experimental.vector.reduce.add.u16.v2u16"]
14 fn reduce_add_u16x2(x: u16x2) -> u16;
15 #[link_name = "llvm.experimental.vector.reduce.add.i32.v2i32"]
16 fn reduce_add_i32x2(x: i32x2) -> i32;
17 #[link_name = "llvm.experimental.vector.reduce.add.u32.v2u32"]
18 fn reduce_add_u32x2(x: u32x2) -> u32;
19 #[link_name = "llvm.experimental.vector.reduce.add.i64.v2i64"]
20 fn reduce_add_i64x2(x: i64x2) -> i64;
21 #[link_name = "llvm.experimental.vector.reduce.add.u64.v2u64"]
22 fn reduce_add_u64x2(x: u64x2) -> u64;
23 #[link_name = "llvm.experimental.vector.reduce.add.i8.v4i8"]
24 fn reduce_add_i8x4(x: i8x4) -> i8;
25 #[link_name = "llvm.experimental.vector.reduce.add.u8.v4u8"]
26 fn reduce_add_u8x4(x: u8x4) -> u8;
27 #[link_name = "llvm.experimental.vector.reduce.add.i16.v4i16"]
28 fn reduce_add_i16x4(x: i16x4) -> i16;
29 #[link_name = "llvm.experimental.vector.reduce.add.u16.v4u16"]
30 fn reduce_add_u16x4(x: u16x4) -> u16;
31 #[link_name = "llvm.experimental.vector.reduce.add.i32.v4i32"]
32 fn reduce_add_i32x4(x: i32x4) -> i32;
33 #[link_name = "llvm.experimental.vector.reduce.add.u32.v4u32"]
34 fn reduce_add_u32x4(x: u32x4) -> u32;
35 #[link_name = "llvm.experimental.vector.reduce.add.i64.v4i64"]
36 fn reduce_add_i64x4(x: i64x4) -> i64;
37 #[link_name = "llvm.experimental.vector.reduce.add.u64.v4u64"]
38 fn reduce_add_u64x4(x: u64x4) -> u64;
39 #[link_name = "llvm.experimental.vector.reduce.add.i8.v8i8"]
40 fn reduce_add_i8x8(x: i8x8) -> i8;
41 #[link_name = "llvm.experimental.vector.reduce.add.u8.v8u8"]
42 fn reduce_add_u8x8(x: u8x8) -> u8;
43 #[link_name = "llvm.experimental.vector.reduce.add.i16.v8i16"]
44 fn reduce_add_i16x8(x: i16x8) -> i16;
45 #[link_name = "llvm.experimental.vector.reduce.add.u16.v8u16"]
46 fn reduce_add_u16x8(x: u16x8) -> u16;
47 #[link_name = "llvm.experimental.vector.reduce.add.i32.v8i32"]
48 fn reduce_add_i32x8(x: i32x8) -> i32;
49 #[link_name = "llvm.experimental.vector.reduce.add.u32.v8u32"]
50 fn reduce_add_u32x8(x: u32x8) -> u32;
51 #[link_name = "llvm.experimental.vector.reduce.add.i64.v8i64"]
52 fn reduce_add_i64x8(x: i64x8) -> i64;
53 #[link_name = "llvm.experimental.vector.reduce.add.u64.v8u64"]
54 fn reduce_add_u64x8(x: u64x8) -> u64;
55 #[link_name = "llvm.experimental.vector.reduce.add.i8.v16i8"]
56 fn reduce_add_i8x16(x: i8x16) -> i8;
57 #[link_name = "llvm.experimental.vector.reduce.add.u8.v16u8"]
58 fn reduce_add_u8x16(x: u8x16) -> u8;
59 #[link_name = "llvm.experimental.vector.reduce.add.i16.v16i16"]
60 fn reduce_add_i16x16(x: i16x16) -> i16;
61 #[link_name = "llvm.experimental.vector.reduce.add.u16.v16u16"]
62 fn reduce_add_u16x16(x: u16x16) -> u16;
63 #[link_name = "llvm.experimental.vector.reduce.add.i32.v16i32"]
64 fn reduce_add_i32x16(x: i32x16) -> i32;
65 #[link_name = "llvm.experimental.vector.reduce.add.u32.v16u32"]
66 fn reduce_add_u32x16(x: u32x16) -> u32;
67 #[link_name = "llvm.experimental.vector.reduce.add.i8.v32i8"]
68 fn reduce_add_i8x32(x: i8x32) -> i8;
69 #[link_name = "llvm.experimental.vector.reduce.add.u8.v32u8"]
70 fn reduce_add_u8x32(x: u8x32) -> u8;
71 #[link_name = "llvm.experimental.vector.reduce.add.i16.v32i16"]
72 fn reduce_add_i16x32(x: i16x32) -> i16;
73 #[link_name = "llvm.experimental.vector.reduce.add.u16.v32u16"]
74 fn reduce_add_u16x32(x: u16x32) -> u16;
75 #[link_name = "llvm.experimental.vector.reduce.add.i8.v64i8"]
76 fn reduce_add_i8x64(x: i8x64) -> i8;
77 #[link_name = "llvm.experimental.vector.reduce.add.u8.v64u8"]
78 fn reduce_add_u8x64(x: u8x64) -> u8;
79 #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v2f32"]
80 fn reduce_fadd_f32x2(acc: f32, x: f32x2) -> f32;
81 #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v2f64"]
82 fn reduce_fadd_f64x2(acc: f64, x: f64x2) -> f64;
83 #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v4f32"]
84 fn reduce_fadd_f32x4(acc: f32, x: f32x4) -> f32;
85 #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v4f64"]
86 fn reduce_fadd_f64x4(acc: f64, x: f64x4) -> f64;
87 #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v8f32"]
88 fn reduce_fadd_f32x8(acc: f32, x: f32x8) -> f32;
89 #[link_name = "llvm.experimental.vector.reduce.fadd.f64.v8f64"]
90 fn reduce_fadd_f64x8(acc: f64, x: f64x8) -> f64;
91 #[link_name = "llvm.experimental.vector.reduce.fadd.f32.v16f32"]
92 fn reduce_fadd_f32x16(acc: f32, x: f32x16) -> f32;
93}
94
95/// Reduction: horizontal sum of the vector elements.
96pub trait ReduceAdd {
97 /// Result type of the reduction.
98 type Acc;
99 /// Computes the horizontal sum of the vector elements.
100 fn reduce_add(self) -> Self::Acc;
101}
102
103macro_rules! red_add {
104 ($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
105 impl ReduceAdd for $id {
106 type Acc = $elem_ty;
107 #[cfg(not(target_arch = "aarch64"))]
108 #[inline]
109 fn reduce_add(self) -> Self::Acc {
110 unsafe { $llvm_intr(self) }
111 }
112 // FIXME: broken in AArch64
113 #[cfg(target_arch = "aarch64")]
114 #[inline]
115 fn reduce_add(self) -> Self::Acc {
116 let mut x = self.extract(0) as Self::Acc;
117 for i in 1..$id::lanes() {
118 x += self.extract(i) as Self::Acc;
119 }
120 x
121 }
122 }
123 };
124}
125red_add!(i8x2, i8, reduce_add_i8x2);
126red_add!(u8x2, u8, reduce_add_u8x2);
127red_add!(i16x2, i16, reduce_add_i16x2);
128red_add!(u16x2, u16, reduce_add_u16x2);
129red_add!(i32x2, i32, reduce_add_i32x2);
130red_add!(u32x2, u32, reduce_add_u32x2);
131red_add!(i64x2, i64, reduce_add_i64x2);
132red_add!(u64x2, u64, reduce_add_u64x2);
133red_add!(i8x4, i8, reduce_add_i8x4);
134red_add!(u8x4, u8, reduce_add_u8x4);
135red_add!(i16x4, i16, reduce_add_i16x4);
136red_add!(u16x4, u16, reduce_add_u16x4);
137red_add!(i32x4, i32, reduce_add_i32x4);
138red_add!(u32x4, u32, reduce_add_u32x4);
139red_add!(i64x4, i64, reduce_add_i64x4);
140red_add!(u64x4, u64, reduce_add_u64x4);
141red_add!(i8x8, i8, reduce_add_i8x8);
142red_add!(u8x8, u8, reduce_add_u8x8);
143red_add!(i16x8, i16, reduce_add_i16x8);
144red_add!(u16x8, u16, reduce_add_u16x8);
145red_add!(i32x8, i32, reduce_add_i32x8);
146red_add!(u32x8, u32, reduce_add_u32x8);
147red_add!(i64x8, i64, reduce_add_i64x8);
148red_add!(u64x8, u64, reduce_add_u64x8);
149red_add!(i8x16, i8, reduce_add_i8x16);
150red_add!(u8x16, u8, reduce_add_u8x16);
151red_add!(i16x16, i16, reduce_add_i16x16);
152red_add!(u16x16, u16, reduce_add_u16x16);
153red_add!(i32x16, i32, reduce_add_i32x16);
154red_add!(u32x16, u32, reduce_add_u32x16);
155red_add!(i8x32, i8, reduce_add_i8x32);
156red_add!(u8x32, u8, reduce_add_u8x32);
157red_add!(i16x32, i16, reduce_add_i16x32);
158red_add!(u16x32, u16, reduce_add_u16x32);
159red_add!(i8x64, i8, reduce_add_i8x64);
160red_add!(u8x64, u8, reduce_add_u8x64);
161
162macro_rules! red_fadd {
163 ($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
164 impl ReduceAdd for $id {
165 type Acc = $elem_ty;
166 #[inline]
167 fn reduce_add(self) -> Self::Acc {
168 // FIXME:
169 //unsafe { $llvm_intr(0. as $elem_ty, self) }
170 let mut x = self.extract(0);
171 for i in 1..$id::lanes() {
172 x += self.extract(i);
173 }
174 x
175 }
176 }
177 };
178}
179
180red_fadd!(f32x2, f32, reduce_fadd_f32x2);
181red_fadd!(f64x2, f64, reduce_fadd_f64x2);
182red_fadd!(f32x4, f32, reduce_fadd_f32x4);
183red_fadd!(f64x4, f64, reduce_fadd_f64x4);
184red_fadd!(f32x8, f32, reduce_fadd_f32x8);
185red_fadd!(f64x8, f64, reduce_fadd_f64x8);
186red_fadd!(f32x16, f32, reduce_fadd_f32x16);
187
188#[cfg(test)]
189mod tests {
190 use super::ReduceAdd;
191 use coresimd::simd::*;
192
193 // note: these are tested in the portable vector API tests
194
195 #[test]
196 fn reduce_add_i32x4() {
197 let v = i32x4::splat(1);
198 assert_eq!(v.reduce_add(), 4_i32);
199 }
200 #[test]
201 fn reduce_add_u32x4() {
202 let v = u32x4::splat(1);
203 assert_eq!(v.reduce_add(), 4_u32);
204 }
205 #[test]
206 fn reduce_add_f32x4() {
207 let v = f32x4::splat(1.);
208 assert_eq!(v.reduce_add(), 4.);
209 }
210}