]>
git.proxmox.com Git - rustc.git/blob - src/stdsimd/coresimd/ppsv/codegen/max.rs
1 //! Code generation for the max reduction.
4 /// LLVM intrinsics used in the max reduction
5 #[allow(improper_ctypes)]
7 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v2i8"]
8 fn reduce_max_i8x2(x
: i8x2
) -> i8;
9 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v2u8"]
10 fn reduce_max_u8x2(x
: u8x2
) -> u8;
11 #[link_name = "llvm.experimental.vector.reduce.smax.i16.v2i16"]
12 fn reduce_max_i16x2(x
: i16x2
) -> i16;
13 #[link_name = "llvm.experimental.vector.reduce.umax.u16.v2u16"]
14 fn reduce_max_u16x2(x
: u16x2
) -> u16;
15 #[link_name = "llvm.experimental.vector.reduce.smax.i32.v2i32"]
16 fn reduce_max_i32x2(x
: i32x2
) -> i32;
17 #[link_name = "llvm.experimental.vector.reduce.umax.u32.v2u32"]
18 fn reduce_max_u32x2(x
: u32x2
) -> u32;
19 #[link_name = "llvm.experimental.vector.reduce.smax.i64.v2i64"]
20 fn reduce_max_i64x2(x
: i64x2
) -> i64;
21 #[link_name = "llvm.experimental.vector.reduce.umax.u64.v2u64"]
22 fn reduce_max_u64x2(x
: u64x2
) -> u64;
23 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v4i8"]
24 fn reduce_max_i8x4(x
: i8x4
) -> i8;
25 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v4u8"]
26 fn reduce_max_u8x4(x
: u8x4
) -> u8;
27 #[link_name = "llvm.experimental.vector.reduce.smax.i16.v4i16"]
28 fn reduce_max_i16x4(x
: i16x4
) -> i16;
29 #[link_name = "llvm.experimental.vector.reduce.umax.u16.v4u16"]
30 fn reduce_max_u16x4(x
: u16x4
) -> u16;
31 #[link_name = "llvm.experimental.vector.reduce.smax.i32.v4i32"]
32 fn reduce_max_i32x4(x
: i32x4
) -> i32;
33 #[link_name = "llvm.experimental.vector.reduce.umax.u32.v4u32"]
34 fn reduce_max_u32x4(x
: u32x4
) -> u32;
35 #[link_name = "llvm.experimental.vector.reduce.smax.i64.v4i64"]
36 fn reduce_max_i64x4(x
: i64x4
) -> i64;
37 #[link_name = "llvm.experimental.vector.reduce.umax.u64.v4u64"]
38 fn reduce_max_u64x4(x
: u64x4
) -> u64;
39 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v8i8"]
40 fn reduce_max_i8x8(x
: i8x8
) -> i8;
41 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v8u8"]
42 fn reduce_max_u8x8(x
: u8x8
) -> u8;
43 #[link_name = "llvm.experimental.vector.reduce.smax.i16.v8i16"]
44 fn reduce_max_i16x8(x
: i16x8
) -> i16;
45 #[link_name = "llvm.experimental.vector.reduce.umax.u16.v8u16"]
46 fn reduce_max_u16x8(x
: u16x8
) -> u16;
47 #[link_name = "llvm.experimental.vector.reduce.smax.i32.v8i32"]
48 fn reduce_max_i32x8(x
: i32x8
) -> i32;
49 #[link_name = "llvm.experimental.vector.reduce.umax.u32.v8u32"]
50 fn reduce_max_u32x8(x
: u32x8
) -> u32;
51 #[link_name = "llvm.experimental.vector.reduce.smax.i64.v8i64"]
52 fn reduce_max_i64x8(x
: i64x8
) -> i64;
53 #[link_name = "llvm.experimental.vector.reduce.umax.u64.v8u64"]
54 fn reduce_max_u64x8(x
: u64x8
) -> u64;
55 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v16i8"]
56 fn reduce_max_i8x16(x
: i8x16
) -> i8;
57 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v16u8"]
58 fn reduce_max_u8x16(x
: u8x16
) -> u8;
59 #[link_name = "llvm.experimental.vector.reduce.smax.i16.v16i16"]
60 fn reduce_max_i16x16(x
: i16x16
) -> i16;
61 #[link_name = "llvm.experimental.vector.reduce.umax.u16.v16u16"]
62 fn reduce_max_u16x16(x
: u16x16
) -> u16;
63 #[link_name = "llvm.experimental.vector.reduce.smax.i32.v16i32"]
64 fn reduce_max_i32x16(x
: i32x16
) -> i32;
65 #[link_name = "llvm.experimental.vector.reduce.umax.u32.v16u32"]
66 fn reduce_max_u32x16(x
: u32x16
) -> u32;
67 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v32i8"]
68 fn reduce_max_i8x32(x
: i8x32
) -> i8;
69 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v32u8"]
70 fn reduce_max_u8x32(x
: u8x32
) -> u8;
71 #[link_name = "llvm.experimental.vector.reduce.smax.i16.v32i16"]
72 fn reduce_max_i16x32(x
: i16x32
) -> i16;
73 #[link_name = "llvm.experimental.vector.reduce.umax.u16.v32u16"]
74 fn reduce_max_u16x32(x
: u16x32
) -> u16;
75 #[link_name = "llvm.experimental.vector.reduce.smax.i8.v64i8"]
76 fn reduce_max_i8x64(x
: i8x64
) -> i8;
77 #[link_name = "llvm.experimental.vector.reduce.umax.u8.v64u8"]
78 fn reduce_max_u8x64(x
: u8x64
) -> u8;
79 #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v2f32"]
80 fn reduce_fmax_f32x2(x
: f32x2
) -> f32;
81 #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v2f64"]
82 fn reduce_fmax_f64x2(x
: f64x2
) -> f64;
83 #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v4f32"]
84 fn reduce_fmax_f32x4(x
: f32x4
) -> f32;
85 #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v4f64"]
86 fn reduce_fmax_f64x4(x
: f64x4
) -> f64;
87 #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v8f32"]
88 fn reduce_fmax_f32x8(x
: f32x8
) -> f32;
89 #[link_name = "llvm.experimental.vector.reduce.fmax.f64.v8f64"]
90 fn reduce_fmax_f64x8(x
: f64x8
) -> f64;
91 #[link_name = "llvm.experimental.vector.reduce.fmax.f32.v16f32"]
92 fn reduce_fmax_f32x16(x
: f32x16
) -> f32;
95 /// Reduction: horizontal max of the vector elements.
96 #[cfg_attr(feature = "cargo-clippy", allow(stutter))]
98 /// Result type of the reduction.
100 /// Computes the horizontal max of the vector elements.
101 fn reduce_max(self) -> Self::Acc
;
104 macro_rules
! red_max
{
105 ($id
:ident
, $elem_ty
:ident
, $llvm_intr
:ident
) => {
106 impl ReduceMax
for $id
{
108 #[cfg(not(target_arch = "aarch64"))]
110 fn reduce_max(self) -> Self::Acc
{
111 unsafe { $llvm_intr(self) }
113 // FIXME: broken on AArch64
114 #[cfg(target_arch = "aarch64")]
115 #[allow(unused_imports)]
117 fn reduce_max(self) -> Self::Acc
{
120 let mut x
= self.extract(0);
121 for i
in 1..$id
::lanes() {
122 x
= x
.max(self.extract(i
));
129 red_max
!(i8x2
, i8, reduce_max_i8x2
);
130 red_max
!(u8x2
, u8, reduce_max_u8x2
);
131 red_max
!(i16x2
, i16, reduce_max_i16x2
);
132 red_max
!(u16x2
, u16, reduce_max_u16x2
);
133 red_max
!(i32x2
, i32, reduce_max_i32x2
);
134 red_max
!(u32x2
, u32, reduce_max_u32x2
);
135 red_max
!(i64x2
, i64, reduce_max_i64x2
);
136 red_max
!(u64x2
, u64, reduce_max_u64x2
);
137 red_max
!(i8x4
, i8, reduce_max_i8x4
);
138 red_max
!(u8x4
, u8, reduce_max_u8x4
);
139 red_max
!(i16x4
, i16, reduce_max_i16x4
);
140 red_max
!(u16x4
, u16, reduce_max_u16x4
);
141 red_max
!(i32x4
, i32, reduce_max_i32x4
);
142 red_max
!(u32x4
, u32, reduce_max_u32x4
);
143 red_max
!(i64x4
, i64, reduce_max_i64x4
);
144 red_max
!(u64x4
, u64, reduce_max_u64x4
);
145 red_max
!(i8x8
, i8, reduce_max_i8x8
);
146 red_max
!(u8x8
, u8, reduce_max_u8x8
);
147 red_max
!(i16x8
, i16, reduce_max_i16x8
);
148 red_max
!(u16x8
, u16, reduce_max_u16x8
);
149 red_max
!(i32x8
, i32, reduce_max_i32x8
);
150 red_max
!(u32x8
, u32, reduce_max_u32x8
);
151 red_max
!(i64x8
, i64, reduce_max_i64x8
);
152 red_max
!(u64x8
, u64, reduce_max_u64x8
);
153 red_max
!(i8x16
, i8, reduce_max_i8x16
);
154 red_max
!(u8x16
, u8, reduce_max_u8x16
);
155 red_max
!(i16x16
, i16, reduce_max_i16x16
);
156 red_max
!(u16x16
, u16, reduce_max_u16x16
);
157 red_max
!(i32x16
, i32, reduce_max_i32x16
);
158 red_max
!(u32x16
, u32, reduce_max_u32x16
);
159 red_max
!(i8x32
, i8, reduce_max_i8x32
);
160 red_max
!(u8x32
, u8, reduce_max_u8x32
);
161 red_max
!(i16x32
, i16, reduce_max_i16x32
);
162 red_max
!(u16x32
, u16, reduce_max_u16x32
);
163 red_max
!(i8x64
, i8, reduce_max_i8x64
);
164 red_max
!(u8x64
, u8, reduce_max_u8x64
);
166 red_max
!(f32x2
, f32, reduce_fmax_f32x2
);
167 red_max
!(f64x2
, f64, reduce_fmax_f64x2
);
168 red_max
!(f32x4
, f32, reduce_fmax_f32x4
);
169 red_max
!(f64x4
, f64, reduce_fmax_f64x4
);
170 red_max
!(f32x8
, f32, reduce_fmax_f32x8
);
171 red_max
!(f64x8
, f64, reduce_fmax_f64x8
);
172 red_max
!(f32x16
, f32, reduce_fmax_f32x16
);
176 use super::ReduceMax
;
177 use coresimd
::simd
::*;
179 // note: these are tested in the portable vector API tests
182 fn reduce_max_i32x4() {
183 let v
= i32x4
::new(1, 2, -1, 3);
184 assert_eq
!(v
.reduce_max(), 3_i32);
187 fn reduce_max_u32x4() {
188 let v
= u32x4
::new(4, 2, 7, 3);
189 assert_eq
!(v
.reduce_max(), 7_u32);
192 fn reduce_max_f32x4() {
193 let v
= f32x4
::new(4., 2., -1., 3.);
194 assert_eq
!(v
.reduce_max(), 4.);