]>
Commit | Line | Data |
---|---|---|
29967ef6 XL |
1 | //! Codegen `extern "platform-intrinsic"` intrinsics. |
2 | ||
5e7ed085 FG |
3 | use rustc_middle::ty::subst::SubstsRef; |
4 | use rustc_span::Symbol; | |
2b03887a | 5 | use rustc_target::abi::Endian; |
5e7ed085 | 6 | |
29967ef6 XL |
7 | use super::*; |
8 | use crate::prelude::*; | |
9 | ||
5e7ed085 FG |
10 | fn report_simd_type_validation_error( |
11 | fx: &mut FunctionCx<'_, '_, '_>, | |
12 | intrinsic: Symbol, | |
13 | span: Span, | |
14 | ty: Ty<'_>, | |
15 | ) { | |
16 | fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty)); | |
17 | // Prevent verifier error | |
f2b60f7d | 18 | fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); |
5e7ed085 FG |
19 | } |
20 | ||
29967ef6 | 21 | pub(super) fn codegen_simd_intrinsic_call<'tcx>( |
6a06907d | 22 | fx: &mut FunctionCx<'_, '_, 'tcx>, |
5e7ed085 FG |
23 | intrinsic: Symbol, |
24 | _substs: SubstsRef<'tcx>, | |
29967ef6 XL |
25 | args: &[mir::Operand<'tcx>], |
26 | ret: CPlace<'tcx>, | |
27 | span: Span, | |
28 | ) { | |
064997fb | 29 | match intrinsic { |
2b03887a | 30 | sym::simd_as | sym::simd_cast => { |
064997fb | 31 | intrinsic_args!(fx, args => (a); intrinsic); |
29967ef6 | 32 | |
5e7ed085 FG |
33 | if !a.layout().ty.is_simd() { |
34 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
35 | return; | |
36 | } | |
37 | ||
38 | simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| { | |
39 | let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap(); | |
29967ef6 | 40 | |
5e7ed085 FG |
41 | let from_signed = type_sign(lane_ty); |
42 | let to_signed = type_sign(ret_lane_ty); | |
29967ef6 | 43 | |
5e7ed085 | 44 | clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed) |
29967ef6 | 45 | }); |
064997fb FG |
46 | } |
47 | ||
48 | sym::simd_eq | sym::simd_ne | sym::simd_lt | sym::simd_le | sym::simd_gt | sym::simd_ge => { | |
49 | intrinsic_args!(fx, args => (x, y); intrinsic); | |
29967ef6 | 50 | |
5e7ed085 FG |
51 | if !x.layout().ty.is_simd() { |
52 | report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty); | |
53 | return; | |
54 | } | |
55 | ||
56 | // FIXME use vector instructions when possible | |
57 | simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| { | |
58 | let res_lane = match (lane_ty.kind(), intrinsic) { | |
59 | (ty::Uint(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane), | |
064997fb FG |
60 | (ty::Uint(_), sym::simd_ne) => { |
61 | fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane) | |
62 | } | |
5e7ed085 FG |
63 | (ty::Uint(_), sym::simd_lt) => { |
64 | fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane) | |
65 | } | |
66 | (ty::Uint(_), sym::simd_le) => { | |
67 | fx.bcx.ins().icmp(IntCC::UnsignedLessThanOrEqual, x_lane, y_lane) | |
68 | } | |
69 | (ty::Uint(_), sym::simd_gt) => { | |
70 | fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane) | |
71 | } | |
72 | (ty::Uint(_), sym::simd_ge) => { | |
73 | fx.bcx.ins().icmp(IntCC::UnsignedGreaterThanOrEqual, x_lane, y_lane) | |
74 | } | |
75 | ||
76 | (ty::Int(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane), | |
064997fb FG |
77 | (ty::Int(_), sym::simd_ne) => { |
78 | fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane) | |
79 | } | |
80 | (ty::Int(_), sym::simd_lt) => { | |
81 | fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane) | |
82 | } | |
5e7ed085 FG |
83 | (ty::Int(_), sym::simd_le) => { |
84 | fx.bcx.ins().icmp(IntCC::SignedLessThanOrEqual, x_lane, y_lane) | |
85 | } | |
86 | (ty::Int(_), sym::simd_gt) => { | |
87 | fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane) | |
88 | } | |
89 | (ty::Int(_), sym::simd_ge) => { | |
90 | fx.bcx.ins().icmp(IntCC::SignedGreaterThanOrEqual, x_lane, y_lane) | |
91 | } | |
92 | ||
064997fb FG |
93 | (ty::Float(_), sym::simd_eq) => { |
94 | fx.bcx.ins().fcmp(FloatCC::Equal, x_lane, y_lane) | |
95 | } | |
96 | (ty::Float(_), sym::simd_ne) => { | |
97 | fx.bcx.ins().fcmp(FloatCC::NotEqual, x_lane, y_lane) | |
98 | } | |
99 | (ty::Float(_), sym::simd_lt) => { | |
100 | fx.bcx.ins().fcmp(FloatCC::LessThan, x_lane, y_lane) | |
101 | } | |
5e7ed085 FG |
102 | (ty::Float(_), sym::simd_le) => { |
103 | fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, x_lane, y_lane) | |
104 | } | |
064997fb FG |
105 | (ty::Float(_), sym::simd_gt) => { |
106 | fx.bcx.ins().fcmp(FloatCC::GreaterThan, x_lane, y_lane) | |
107 | } | |
5e7ed085 FG |
108 | (ty::Float(_), sym::simd_ge) => { |
109 | fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, x_lane, y_lane) | |
110 | } | |
111 | ||
112 | _ => unreachable!(), | |
113 | }; | |
114 | ||
9c376795 | 115 | bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane) |
5e7ed085 | 116 | }); |
064997fb | 117 | } |
29967ef6 XL |
118 | |
119 | // simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U | |
064997fb FG |
120 | _ if intrinsic.as_str().starts_with("simd_shuffle") => { |
121 | let (x, y, idx) = match args { | |
122 | [x, y, idx] => (x, y, idx), | |
123 | _ => { | |
124 | bug!("wrong number of args for intrinsic {intrinsic}"); | |
125 | } | |
126 | }; | |
127 | let x = codegen_operand(fx, x); | |
128 | let y = codegen_operand(fx, y); | |
129 | ||
5e7ed085 FG |
130 | if !x.layout().ty.is_simd() { |
131 | report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty); | |
132 | return; | |
133 | } | |
29967ef6 | 134 | |
a2a8927a XL |
135 | // If this intrinsic is the older "simd_shuffleN" form, simply parse the integer. |
136 | // If there is no suffix, use the index array length. | |
137 | let n: u16 = if intrinsic == sym::simd_shuffle { | |
138 | // Make sure this is actually an array, since typeck only checks the length-suffixed | |
139 | // version of this intrinsic. | |
140 | let idx_ty = fx.monomorphize(idx.ty(fx.mir, fx.tcx)); | |
141 | match idx_ty.kind() { | |
064997fb FG |
142 | ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => len |
143 | .try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all()) | |
144 | .unwrap_or_else(|| { | |
a2a8927a | 145 | span_bug!(span, "could not evaluate shuffle index array length") |
064997fb FG |
146 | }) |
147 | .try_into() | |
148 | .unwrap(), | |
a2a8927a XL |
149 | _ => { |
150 | fx.tcx.sess.span_err( | |
151 | span, | |
152 | &format!( | |
153 | "simd_shuffle index must be an array of `u32`, got `{}`", | |
154 | idx_ty, | |
155 | ), | |
156 | ); | |
157 | // Prevent verifier error | |
f2b60f7d | 158 | fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); |
a2a8927a XL |
159 | return; |
160 | } | |
161 | } | |
162 | } else { | |
2b03887a | 163 | // FIXME remove this case |
a2a8927a XL |
164 | intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap() |
165 | }; | |
29967ef6 XL |
166 | |
167 | assert_eq!(x.layout(), y.layout()); | |
168 | let layout = x.layout(); | |
169 | ||
5869c6ff XL |
170 | let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); |
171 | let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); | |
29967ef6 | 172 | |
5869c6ff XL |
173 | assert_eq!(lane_ty, ret_lane_ty); |
174 | assert_eq!(u64::from(n), ret_lane_count); | |
29967ef6 XL |
175 | |
176 | let total_len = lane_count * 2; | |
177 | ||
178 | let indexes = { | |
179 | use rustc_middle::mir::interpret::*; | |
064997fb FG |
180 | let idx_const = crate::constant::mir_operand_get_const_val(fx, idx) |
181 | .expect("simd_shuffle* idx not const"); | |
29967ef6 | 182 | |
6a06907d XL |
183 | let idx_bytes = match idx_const { |
184 | ConstValue::ByRef { alloc, offset } => { | |
064997fb FG |
185 | let size = Size::from_bytes( |
186 | 4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */ | |
187 | ); | |
f2b60f7d FG |
188 | alloc |
189 | .inner() | |
190 | .get_bytes_strip_provenance(fx, alloc_range(offset, size)) | |
191 | .unwrap() | |
29967ef6 XL |
192 | } |
193 | _ => unreachable!("{:?}", idx_const), | |
194 | }; | |
195 | ||
064997fb FG |
196 | (0..ret_lane_count) |
197 | .map(|i| { | |
198 | let i = usize::try_from(i).unwrap(); | |
199 | let idx = rustc_middle::mir::interpret::read_target_uint( | |
200 | fx.tcx.data_layout.endian, | |
201 | &idx_bytes[4 * i..4 * i + 4], | |
202 | ) | |
203 | .expect("read_target_uint"); | |
204 | u16::try_from(idx).expect("try_from u32") | |
205 | }) | |
206 | .collect::<Vec<u16>>() | |
29967ef6 XL |
207 | }; |
208 | ||
209 | for &idx in &indexes { | |
5869c6ff | 210 | assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len); |
29967ef6 XL |
211 | } |
212 | ||
213 | for (out_idx, in_idx) in indexes.into_iter().enumerate() { | |
5869c6ff | 214 | let in_lane = if u64::from(in_idx) < lane_count { |
94222f64 | 215 | x.value_lane(fx, in_idx.into()) |
29967ef6 | 216 | } else { |
94222f64 | 217 | y.value_lane(fx, u64::from(in_idx) - lane_count) |
29967ef6 | 218 | }; |
94222f64 | 219 | let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap()); |
29967ef6 XL |
220 | out_lane.write_cvalue(fx, in_lane); |
221 | } | |
064997fb FG |
222 | } |
223 | ||
224 | sym::simd_insert => { | |
225 | let (base, idx, val) = match args { | |
226 | [base, idx, val] => (base, idx, val), | |
227 | _ => { | |
228 | bug!("wrong number of args for intrinsic {intrinsic}"); | |
229 | } | |
230 | }; | |
231 | let base = codegen_operand(fx, base); | |
232 | let val = codegen_operand(fx, val); | |
29967ef6 | 233 | |
29967ef6 | 234 | // FIXME validate |
064997fb FG |
235 | let idx_const = if let Some(idx_const) = |
236 | crate::constant::mir_operand_get_const_val(fx, idx) | |
237 | { | |
29967ef6 XL |
238 | idx_const |
239 | } else { | |
064997fb | 240 | fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); |
29967ef6 XL |
241 | }; |
242 | ||
064997fb FG |
243 | let idx = idx_const |
244 | .try_to_bits(Size::from_bytes(4 /* u32*/)) | |
245 | .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); | |
5869c6ff | 246 | let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); |
29967ef6 | 247 | if idx >= lane_count.into() { |
064997fb FG |
248 | fx.tcx.sess.span_fatal( |
249 | fx.mir.span, | |
250 | &format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), | |
251 | ); | |
29967ef6 XL |
252 | } |
253 | ||
254 | ret.write_cvalue(fx, base); | |
255 | let ret_lane = ret.place_field(fx, mir::Field::new(idx.try_into().unwrap())); | |
256 | ret_lane.write_cvalue(fx, val); | |
064997fb FG |
257 | } |
258 | ||
259 | sym::simd_extract => { | |
260 | let (v, idx) = match args { | |
261 | [v, idx] => (v, idx), | |
262 | _ => { | |
263 | bug!("wrong number of args for intrinsic {intrinsic}"); | |
264 | } | |
265 | }; | |
266 | let v = codegen_operand(fx, v); | |
29967ef6 | 267 | |
5e7ed085 FG |
268 | if !v.layout().ty.is_simd() { |
269 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
270 | return; | |
271 | } | |
272 | ||
064997fb FG |
273 | let idx_const = if let Some(idx_const) = |
274 | crate::constant::mir_operand_get_const_val(fx, idx) | |
275 | { | |
29967ef6 XL |
276 | idx_const |
277 | } else { | |
064997fb | 278 | fx.tcx.sess.span_warn(span, "Index argument for `simd_extract` is not a constant"); |
f2b60f7d FG |
279 | let trap_block = fx.bcx.create_block(); |
280 | let dummy_block = fx.bcx.create_block(); | |
281 | let true_ = fx.bcx.ins().iconst(types::I8, 1); | |
282 | fx.bcx.ins().brnz(true_, trap_block, &[]); | |
283 | fx.bcx.ins().jump(dummy_block, &[]); | |
284 | fx.bcx.switch_to_block(trap_block); | |
285 | crate::trap::trap_unimplemented( | |
fc512014 | 286 | fx, |
fc512014 XL |
287 | "Index argument for `simd_extract` is not a constant", |
288 | ); | |
f2b60f7d | 289 | fx.bcx.switch_to_block(dummy_block); |
fc512014 | 290 | return; |
29967ef6 XL |
291 | }; |
292 | ||
064997fb FG |
293 | let idx = idx_const |
294 | .try_to_bits(Size::from_bytes(4 /* u32*/)) | |
295 | .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); | |
5869c6ff | 296 | let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); |
29967ef6 | 297 | if idx >= lane_count.into() { |
064997fb FG |
298 | fx.tcx.sess.span_fatal( |
299 | fx.mir.span, | |
300 | &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), | |
301 | ); | |
29967ef6 XL |
302 | } |
303 | ||
94222f64 | 304 | let ret_lane = v.value_lane(fx, idx.try_into().unwrap()); |
29967ef6 | 305 | ret.write_cvalue(fx, ret_lane); |
064997fb FG |
306 | } |
307 | ||
308 | sym::simd_neg => { | |
309 | intrinsic_args!(fx, args => (a); intrinsic); | |
29967ef6 | 310 | |
5e7ed085 FG |
311 | if !a.layout().ty.is_simd() { |
312 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
313 | return; | |
314 | } | |
315 | ||
064997fb FG |
316 | simd_for_each_lane( |
317 | fx, | |
318 | a, | |
319 | ret, | |
320 | &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() { | |
94222f64 XL |
321 | ty::Int(_) => fx.bcx.ins().ineg(lane), |
322 | ty::Float(_) => fx.bcx.ins().fneg(lane), | |
323 | _ => unreachable!(), | |
064997fb FG |
324 | }, |
325 | ); | |
326 | } | |
327 | ||
328 | sym::simd_add | |
329 | | sym::simd_sub | |
330 | | sym::simd_mul | |
331 | | sym::simd_div | |
332 | | sym::simd_rem | |
333 | | sym::simd_shl | |
334 | | sym::simd_shr | |
335 | | sym::simd_and | |
336 | | sym::simd_or | |
337 | | sym::simd_xor => { | |
338 | intrinsic_args!(fx, args => (x, y); intrinsic); | |
94222f64 | 339 | |
5e7ed085 | 340 | // FIXME use vector instructions when possible |
064997fb FG |
341 | simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { |
342 | match (lane_ty.kind(), intrinsic) { | |
343 | (ty::Uint(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane), | |
344 | (ty::Uint(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane), | |
345 | (ty::Uint(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane), | |
346 | (ty::Uint(_), sym::simd_div) => fx.bcx.ins().udiv(x_lane, y_lane), | |
347 | (ty::Uint(_), sym::simd_rem) => fx.bcx.ins().urem(x_lane, y_lane), | |
348 | ||
349 | (ty::Int(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane), | |
350 | (ty::Int(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane), | |
351 | (ty::Int(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane), | |
352 | (ty::Int(_), sym::simd_div) => fx.bcx.ins().sdiv(x_lane, y_lane), | |
353 | (ty::Int(_), sym::simd_rem) => fx.bcx.ins().srem(x_lane, y_lane), | |
354 | ||
355 | (ty::Float(_), sym::simd_add) => fx.bcx.ins().fadd(x_lane, y_lane), | |
356 | (ty::Float(_), sym::simd_sub) => fx.bcx.ins().fsub(x_lane, y_lane), | |
357 | (ty::Float(_), sym::simd_mul) => fx.bcx.ins().fmul(x_lane, y_lane), | |
358 | (ty::Float(_), sym::simd_div) => fx.bcx.ins().fdiv(x_lane, y_lane), | |
359 | (ty::Float(FloatTy::F32), sym::simd_rem) => fx.lib_call( | |
360 | "fmodf", | |
361 | vec![AbiParam::new(types::F32), AbiParam::new(types::F32)], | |
362 | vec![AbiParam::new(types::F32)], | |
363 | &[x_lane, y_lane], | |
364 | )[0], | |
365 | (ty::Float(FloatTy::F64), sym::simd_rem) => fx.lib_call( | |
366 | "fmod", | |
367 | vec![AbiParam::new(types::F64), AbiParam::new(types::F64)], | |
368 | vec![AbiParam::new(types::F64)], | |
369 | &[x_lane, y_lane], | |
370 | )[0], | |
371 | ||
372 | (ty::Uint(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane), | |
373 | (ty::Uint(_), sym::simd_shr) => fx.bcx.ins().ushr(x_lane, y_lane), | |
374 | (ty::Uint(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane), | |
375 | (ty::Uint(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane), | |
376 | (ty::Uint(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane), | |
377 | ||
378 | (ty::Int(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane), | |
379 | (ty::Int(_), sym::simd_shr) => fx.bcx.ins().sshr(x_lane, y_lane), | |
380 | (ty::Int(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane), | |
381 | (ty::Int(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane), | |
382 | (ty::Int(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane), | |
383 | ||
384 | _ => unreachable!(), | |
385 | } | |
94222f64 | 386 | }); |
064997fb FG |
387 | } |
388 | ||
389 | sym::simd_fma => { | |
390 | intrinsic_args!(fx, args => (a, b, c); intrinsic); | |
94222f64 | 391 | |
5e7ed085 FG |
392 | if !a.layout().ty.is_simd() { |
393 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
394 | return; | |
395 | } | |
29967ef6 XL |
396 | assert_eq!(a.layout(), b.layout()); |
397 | assert_eq!(a.layout(), c.layout()); | |
04454e1e | 398 | assert_eq!(a.layout(), ret.layout()); |
29967ef6 | 399 | |
04454e1e FG |
400 | let layout = a.layout(); |
401 | let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); | |
f2b60f7d | 402 | let res_lane_layout = fx.layout_of(lane_ty); |
29967ef6 XL |
403 | |
404 | for lane in 0..lane_count { | |
f2b60f7d FG |
405 | let a_lane = a.value_lane(fx, lane).load_scalar(fx); |
406 | let b_lane = b.value_lane(fx, lane).load_scalar(fx); | |
407 | let c_lane = c.value_lane(fx, lane).load_scalar(fx); | |
29967ef6 | 408 | |
f2b60f7d FG |
409 | let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane); |
410 | let res_lane = CValue::by_val(res_lane, res_lane_layout); | |
29967ef6 | 411 | |
94222f64 | 412 | ret.place_lane(fx, lane).write_cvalue(fx, res_lane); |
29967ef6 | 413 | } |
064997fb FG |
414 | } |
415 | ||
416 | sym::simd_fmin | sym::simd_fmax => { | |
417 | intrinsic_args!(fx, args => (x, y); intrinsic); | |
29967ef6 | 418 | |
5e7ed085 FG |
419 | if !x.layout().ty.is_simd() { |
420 | report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty); | |
421 | return; | |
422 | } | |
423 | ||
424 | // FIXME use vector instructions when possible | |
425 | simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { | |
426 | match lane_ty.kind() { | |
064997fb | 427 | ty::Float(_) => {} |
5e7ed085 FG |
428 | _ => unreachable!("{:?}", lane_ty), |
429 | } | |
430 | match intrinsic { | |
04454e1e FG |
431 | sym::simd_fmin => crate::num::codegen_float_min(fx, x_lane, y_lane), |
432 | sym::simd_fmax => crate::num::codegen_float_max(fx, x_lane, y_lane), | |
5e7ed085 FG |
433 | _ => unreachable!(), |
434 | } | |
435 | }); | |
064997fb FG |
436 | } |
437 | ||
438 | sym::simd_round => { | |
439 | intrinsic_args!(fx, args => (a); intrinsic); | |
29967ef6 | 440 | |
5e7ed085 FG |
441 | if !a.layout().ty.is_simd() { |
442 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
443 | return; | |
444 | } | |
445 | ||
064997fb FG |
446 | simd_for_each_lane( |
447 | fx, | |
448 | a, | |
449 | ret, | |
450 | &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() { | |
94222f64 XL |
451 | ty::Float(FloatTy::F32) => fx.lib_call( |
452 | "roundf", | |
453 | vec![AbiParam::new(types::F32)], | |
454 | vec![AbiParam::new(types::F32)], | |
455 | &[lane], | |
456 | )[0], | |
457 | ty::Float(FloatTy::F64) => fx.lib_call( | |
458 | "round", | |
459 | vec![AbiParam::new(types::F64)], | |
460 | vec![AbiParam::new(types::F64)], | |
461 | &[lane], | |
462 | )[0], | |
5e7ed085 | 463 | _ => unreachable!("{:?}", lane_ty), |
064997fb FG |
464 | }, |
465 | ); | |
466 | } | |
467 | ||
468 | sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => { | |
469 | intrinsic_args!(fx, args => (a); intrinsic); | |
5e7ed085 | 470 | |
5e7ed085 FG |
471 | if !a.layout().ty.is_simd() { |
472 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
473 | return; | |
474 | } | |
475 | ||
476 | simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| { | |
477 | match lane_ty.kind() { | |
064997fb | 478 | ty::Float(_) => {} |
5e7ed085 FG |
479 | _ => unreachable!("{:?}", lane_ty), |
480 | } | |
481 | match intrinsic { | |
482 | sym::simd_fabs => fx.bcx.ins().fabs(lane), | |
483 | sym::simd_fsqrt => fx.bcx.ins().sqrt(lane), | |
484 | sym::simd_ceil => fx.bcx.ins().ceil(lane), | |
485 | sym::simd_floor => fx.bcx.ins().floor(lane), | |
486 | sym::simd_trunc => fx.bcx.ins().trunc(lane), | |
487 | _ => unreachable!(), | |
488 | } | |
94222f64 | 489 | }); |
064997fb FG |
490 | } |
491 | ||
492 | sym::simd_reduce_add_ordered | sym::simd_reduce_add_unordered => { | |
493 | intrinsic_args!(fx, args => (v, acc); intrinsic); | |
494 | let acc = acc.load_scalar(fx); | |
94222f64 | 495 | |
5e7ed085 FG |
496 | // FIXME there must be no acc param for integer vectors |
497 | if !v.layout().ty.is_simd() { | |
498 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
499 | return; | |
500 | } | |
501 | ||
502 | simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| { | |
503 | if lane_ty.is_floating_point() { | |
fc512014 XL |
504 | fx.bcx.ins().fadd(a, b) |
505 | } else { | |
506 | fx.bcx.ins().iadd(a, b) | |
507 | } | |
508 | }); | |
064997fb FG |
509 | } |
510 | ||
511 | sym::simd_reduce_mul_ordered | sym::simd_reduce_mul_unordered => { | |
512 | intrinsic_args!(fx, args => (v, acc); intrinsic); | |
513 | let acc = acc.load_scalar(fx); | |
fc512014 | 514 | |
5e7ed085 FG |
515 | // FIXME there must be no acc param for integer vectors |
516 | if !v.layout().ty.is_simd() { | |
517 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
518 | return; | |
519 | } | |
520 | ||
521 | simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| { | |
522 | if lane_ty.is_floating_point() { | |
fc512014 XL |
523 | fx.bcx.ins().fmul(a, b) |
524 | } else { | |
525 | fx.bcx.ins().imul(a, b) | |
526 | } | |
527 | }); | |
064997fb FG |
528 | } |
529 | ||
530 | sym::simd_reduce_all => { | |
531 | intrinsic_args!(fx, args => (v); intrinsic); | |
fc512014 | 532 | |
5e7ed085 FG |
533 | if !v.layout().ty.is_simd() { |
534 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
535 | return; | |
536 | } | |
537 | ||
538 | simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b)); | |
064997fb FG |
539 | } |
540 | ||
541 | sym::simd_reduce_any => { | |
542 | intrinsic_args!(fx, args => (v); intrinsic); | |
fc512014 | 543 | |
5e7ed085 FG |
544 | if !v.layout().ty.is_simd() { |
545 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
546 | return; | |
547 | } | |
548 | ||
549 | simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b)); | |
064997fb FG |
550 | } |
551 | ||
552 | sym::simd_reduce_and => { | |
553 | intrinsic_args!(fx, args => (v); intrinsic); | |
fc512014 | 554 | |
5e7ed085 FG |
555 | if !v.layout().ty.is_simd() { |
556 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
557 | return; | |
558 | } | |
559 | ||
560 | simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b)); | |
064997fb FG |
561 | } |
562 | ||
563 | sym::simd_reduce_or => { | |
564 | intrinsic_args!(fx, args => (v); intrinsic); | |
94222f64 | 565 | |
5e7ed085 FG |
566 | if !v.layout().ty.is_simd() { |
567 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
568 | return; | |
569 | } | |
570 | ||
571 | simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b)); | |
064997fb FG |
572 | } |
573 | ||
574 | sym::simd_reduce_xor => { | |
575 | intrinsic_args!(fx, args => (v); intrinsic); | |
94222f64 | 576 | |
5e7ed085 FG |
577 | if !v.layout().ty.is_simd() { |
578 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
579 | return; | |
580 | } | |
581 | ||
582 | simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b)); | |
064997fb FG |
583 | } |
584 | ||
585 | sym::simd_reduce_min => { | |
586 | intrinsic_args!(fx, args => (v); intrinsic); | |
94222f64 | 587 | |
5e7ed085 FG |
588 | if !v.layout().ty.is_simd() { |
589 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
590 | return; | |
591 | } | |
592 | ||
593 | simd_reduce(fx, v, None, ret, &|fx, ty, a, b| { | |
594 | let lt = match ty.kind() { | |
a2a8927a XL |
595 | ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b), |
596 | ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b), | |
04454e1e | 597 | ty::Float(_) => return crate::num::codegen_float_min(fx, a, b), |
a2a8927a XL |
598 | _ => unreachable!(), |
599 | }; | |
94222f64 XL |
600 | fx.bcx.ins().select(lt, a, b) |
601 | }); | |
064997fb FG |
602 | } |
603 | ||
604 | sym::simd_reduce_max => { | |
605 | intrinsic_args!(fx, args => (v); intrinsic); | |
94222f64 | 606 | |
5e7ed085 FG |
607 | if !v.layout().ty.is_simd() { |
608 | report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty); | |
609 | return; | |
610 | } | |
611 | ||
612 | simd_reduce(fx, v, None, ret, &|fx, ty, a, b| { | |
613 | let gt = match ty.kind() { | |
a2a8927a XL |
614 | ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b), |
615 | ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b), | |
04454e1e | 616 | ty::Float(_) => return crate::num::codegen_float_max(fx, a, b), |
a2a8927a XL |
617 | _ => unreachable!(), |
618 | }; | |
94222f64 XL |
619 | fx.bcx.ins().select(gt, a, b) |
620 | }); | |
064997fb FG |
621 | } |
622 | ||
623 | sym::simd_select => { | |
624 | intrinsic_args!(fx, args => (m, a, b); intrinsic); | |
94222f64 | 625 | |
5e7ed085 FG |
626 | if !m.layout().ty.is_simd() { |
627 | report_simd_type_validation_error(fx, intrinsic, span, m.layout().ty); | |
628 | return; | |
629 | } | |
630 | if !a.layout().ty.is_simd() { | |
631 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
632 | return; | |
633 | } | |
94222f64 XL |
634 | assert_eq!(a.layout(), b.layout()); |
635 | ||
636 | let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); | |
637 | let lane_layout = fx.layout_of(lane_ty); | |
638 | ||
639 | for lane in 0..lane_count { | |
640 | let m_lane = m.value_lane(fx, lane).load_scalar(fx); | |
641 | let a_lane = a.value_lane(fx, lane).load_scalar(fx); | |
642 | let b_lane = b.value_lane(fx, lane).load_scalar(fx); | |
643 | ||
644 | let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0); | |
064997fb FG |
645 | let res_lane = |
646 | CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout); | |
94222f64 XL |
647 | |
648 | ret.place_lane(fx, lane).write_cvalue(fx, res_lane); | |
649 | } | |
064997fb | 650 | } |
94222f64 | 651 | |
2b03887a FG |
652 | sym::simd_select_bitmask => { |
653 | intrinsic_args!(fx, args => (m, a, b); intrinsic); | |
654 | ||
655 | if !a.layout().ty.is_simd() { | |
656 | report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); | |
657 | return; | |
658 | } | |
659 | assert_eq!(a.layout(), b.layout()); | |
660 | ||
661 | let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); | |
662 | let lane_layout = fx.layout_of(lane_ty); | |
663 | ||
664 | let m = m.load_scalar(fx); | |
665 | ||
666 | for lane in 0..lane_count { | |
667 | let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64); | |
668 | let m_lane = fx.bcx.ins().band_imm(m_lane, 1); | |
669 | let a_lane = a.value_lane(fx, lane).load_scalar(fx); | |
670 | let b_lane = b.value_lane(fx, lane).load_scalar(fx); | |
671 | ||
672 | let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0); | |
673 | let res_lane = | |
674 | CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout); | |
675 | ||
676 | ret.place_lane(fx, lane).write_cvalue(fx, res_lane); | |
677 | } | |
678 | } | |
679 | ||
680 | sym::simd_bitmask => { | |
681 | intrinsic_args!(fx, args => (a); intrinsic); | |
682 | ||
683 | let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); | |
684 | let lane_clif_ty = fx.clif_type(lane_ty).unwrap(); | |
685 | ||
686 | // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a | |
687 | // vector mask and returns the most significant bit (MSB) of each lane in the form | |
688 | // of either: | |
689 | // * an unsigned integer | |
690 | // * an array of `u8` | |
691 | // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. | |
692 | // | |
693 | // The bit order of the result depends on the byte endianness, LSB-first for little | |
694 | // endian and MSB-first for big endian. | |
695 | let expected_int_bits = lane_count.max(8); | |
696 | let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64); | |
697 | ||
698 | match lane_ty.kind() { | |
699 | ty::Int(_) | ty::Uint(_) => {} | |
700 | _ => { | |
701 | fx.tcx.sess.span_fatal( | |
702 | span, | |
703 | &format!( | |
704 | "invalid monomorphization of `simd_bitmask` intrinsic: \ | |
705 | vector argument `{}`'s element type `{}`, expected integer element \ | |
706 | type", | |
707 | a.layout().ty, | |
708 | lane_ty | |
709 | ), | |
710 | ); | |
711 | } | |
712 | } | |
713 | ||
714 | let res_type = | |
715 | Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap(); | |
9c376795 | 716 | let mut res = type_zero_value(&mut fx.bcx, res_type); |
2b03887a FG |
717 | |
718 | let lanes = match fx.tcx.sess.target.endian { | |
719 | Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>, | |
720 | Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>, | |
721 | }; | |
722 | for lane in lanes { | |
723 | let a_lane = a.value_lane(fx, lane).load_scalar(fx); | |
724 | ||
725 | // extract sign bit of an int | |
726 | let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1)); | |
727 | ||
728 | // shift sign bit into result | |
729 | let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false); | |
730 | res = fx.bcx.ins().ishl_imm(res, 1); | |
731 | res = fx.bcx.ins().bor(res, a_lane_sign); | |
732 | } | |
733 | ||
734 | match ret.layout().ty.kind() { | |
735 | ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {} | |
736 | ty::Array(elem, len) | |
737 | if matches!(elem.kind(), ty::Uint(ty::UintTy::U8)) | |
738 | && len.try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all()) | |
739 | == Some(expected_bytes) => {} | |
740 | _ => { | |
741 | fx.tcx.sess.span_fatal( | |
742 | span, | |
743 | &format!( | |
744 | "invalid monomorphization of `simd_bitmask` intrinsic: \ | |
745 | cannot return `{}`, expected `u{}` or `[u8; {}]`", | |
746 | ret.layout().ty, | |
747 | expected_int_bits, | |
748 | expected_bytes | |
749 | ), | |
750 | ); | |
751 | } | |
752 | } | |
753 | ||
754 | let res = CValue::by_val(res, ret.layout()); | |
755 | ret.write_cvalue(fx, res); | |
756 | } | |
757 | ||
758 | sym::simd_saturating_add | sym::simd_saturating_sub => { | |
759 | intrinsic_args!(fx, args => (x, y); intrinsic); | |
760 | ||
761 | let bin_op = match intrinsic { | |
762 | sym::simd_saturating_add => BinOp::Add, | |
763 | sym::simd_saturating_sub => BinOp::Sub, | |
764 | _ => unreachable!(), | |
765 | }; | |
766 | ||
767 | // FIXME use vector instructions when possible | |
768 | simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { | |
769 | crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane) | |
770 | }); | |
771 | } | |
772 | ||
773 | // simd_arith_offset | |
94222f64 XL |
774 | // simd_scatter |
775 | // simd_gather | |
064997fb FG |
776 | _ => { |
777 | fx.tcx.sess.span_fatal(span, &format!("Unknown SIMD intrinsic {}", intrinsic)); | |
778 | } | |
29967ef6 XL |
779 | } |
780 | } |