]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
New upstream version 1.76.0+dfsg1
[rustc.git] / compiler / rustc_codegen_cranelift / src / intrinsics / simd.rs
1 //! Codegen `extern "platform-intrinsic"` intrinsics.
2
3 use cranelift_codegen::ir::immediates::Offset32;
4 use rustc_target::abi::Endian;
5
6 use super::*;
7 use crate::prelude::*;
8
9 fn report_simd_type_validation_error(
10 fx: &mut FunctionCx<'_, '_, '_>,
11 intrinsic: Symbol,
12 span: Span,
13 ty: Ty<'_>,
14 ) {
15 fx.tcx.sess.span_err(span, format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
16 // Prevent verifier error
17 fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
18 }
19
20 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
21 fx: &mut FunctionCx<'_, '_, 'tcx>,
22 intrinsic: Symbol,
23 generic_args: GenericArgsRef<'tcx>,
24 args: &[mir::Operand<'tcx>],
25 ret: CPlace<'tcx>,
26 target: BasicBlock,
27 span: Span,
28 ) {
29 match intrinsic {
30 sym::simd_as | sym::simd_cast => {
31 intrinsic_args!(fx, args => (a); intrinsic);
32
33 if !a.layout().ty.is_simd() {
34 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
35 return;
36 }
37
38 simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| {
39 let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
40
41 let from_signed = type_sign(lane_ty);
42 let to_signed = type_sign(ret_lane_ty);
43
44 clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed)
45 });
46 }
47
48 sym::simd_eq | sym::simd_ne | sym::simd_lt | sym::simd_le | sym::simd_gt | sym::simd_ge => {
49 intrinsic_args!(fx, args => (x, y); intrinsic);
50
51 if !x.layout().ty.is_simd() {
52 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
53 return;
54 }
55
56 // FIXME use vector instructions when possible
57 simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
58 let res_lane = match (lane_ty.kind(), intrinsic) {
59 (ty::Uint(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
60 (ty::Uint(_), sym::simd_ne) => {
61 fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
62 }
63 (ty::Uint(_), sym::simd_lt) => {
64 fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane)
65 }
66 (ty::Uint(_), sym::simd_le) => {
67 fx.bcx.ins().icmp(IntCC::UnsignedLessThanOrEqual, x_lane, y_lane)
68 }
69 (ty::Uint(_), sym::simd_gt) => {
70 fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane)
71 }
72 (ty::Uint(_), sym::simd_ge) => {
73 fx.bcx.ins().icmp(IntCC::UnsignedGreaterThanOrEqual, x_lane, y_lane)
74 }
75
76 (ty::Int(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
77 (ty::Int(_), sym::simd_ne) => {
78 fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
79 }
80 (ty::Int(_), sym::simd_lt) => {
81 fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane)
82 }
83 (ty::Int(_), sym::simd_le) => {
84 fx.bcx.ins().icmp(IntCC::SignedLessThanOrEqual, x_lane, y_lane)
85 }
86 (ty::Int(_), sym::simd_gt) => {
87 fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane)
88 }
89 (ty::Int(_), sym::simd_ge) => {
90 fx.bcx.ins().icmp(IntCC::SignedGreaterThanOrEqual, x_lane, y_lane)
91 }
92
93 (ty::Float(_), sym::simd_eq) => {
94 fx.bcx.ins().fcmp(FloatCC::Equal, x_lane, y_lane)
95 }
96 (ty::Float(_), sym::simd_ne) => {
97 fx.bcx.ins().fcmp(FloatCC::NotEqual, x_lane, y_lane)
98 }
99 (ty::Float(_), sym::simd_lt) => {
100 fx.bcx.ins().fcmp(FloatCC::LessThan, x_lane, y_lane)
101 }
102 (ty::Float(_), sym::simd_le) => {
103 fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, x_lane, y_lane)
104 }
105 (ty::Float(_), sym::simd_gt) => {
106 fx.bcx.ins().fcmp(FloatCC::GreaterThan, x_lane, y_lane)
107 }
108 (ty::Float(_), sym::simd_ge) => {
109 fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, x_lane, y_lane)
110 }
111
112 _ => unreachable!(),
113 };
114
115 bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
116 });
117 }
118
119 // simd_shuffle_generic<T, U, const I: &[u32]>(x: T, y: T) -> U
120 sym::simd_shuffle_generic => {
121 let [x, y] = args else {
122 bug!("wrong number of args for intrinsic {intrinsic}");
123 };
124 let x = codegen_operand(fx, x);
125 let y = codegen_operand(fx, y);
126
127 if !x.layout().ty.is_simd() {
128 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
129 return;
130 }
131
132 let idx = generic_args[2]
133 .expect_const()
134 .eval(fx.tcx, ty::ParamEnv::reveal_all(), Some(span))
135 .unwrap()
136 .unwrap_branch();
137
138 assert_eq!(x.layout(), y.layout());
139 let layout = x.layout();
140
141 let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
142 let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
143
144 assert_eq!(lane_ty, ret_lane_ty);
145 assert_eq!(idx.len() as u64, ret_lane_count);
146
147 let total_len = lane_count * 2;
148
149 let indexes =
150 idx.iter().map(|idx| idx.unwrap_leaf().try_to_u32().unwrap()).collect::<Vec<u32>>();
151
152 for &idx in &indexes {
153 assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
154 }
155
156 for (out_idx, in_idx) in indexes.into_iter().enumerate() {
157 let in_lane = if u64::from(in_idx) < lane_count {
158 x.value_lane(fx, in_idx.into())
159 } else {
160 y.value_lane(fx, u64::from(in_idx) - lane_count)
161 };
162 let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
163 out_lane.write_cvalue(fx, in_lane);
164 }
165 }
166
167 // simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U
168 sym::simd_shuffle => {
169 let (x, y, idx) = match args {
170 [x, y, idx] => (x, y, idx),
171 _ => {
172 bug!("wrong number of args for intrinsic {intrinsic}");
173 }
174 };
175 let x = codegen_operand(fx, x);
176 let y = codegen_operand(fx, y);
177
178 if !x.layout().ty.is_simd() {
179 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
180 return;
181 }
182
183 // Make sure this is actually an array, since typeck only checks the length-suffixed
184 // version of this intrinsic.
185 let idx_ty = fx.monomorphize(idx.ty(fx.mir, fx.tcx));
186 let n: u16 = match idx_ty.kind() {
187 ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => len
188 .try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
189 .unwrap_or_else(|| {
190 span_bug!(span, "could not evaluate shuffle index array length")
191 })
192 .try_into()
193 .unwrap(),
194 _ => {
195 fx.tcx.sess.span_err(
196 span,
197 format!("simd_shuffle index must be an array of `u32`, got `{}`", idx_ty),
198 );
199 // Prevent verifier error
200 fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
201 return;
202 }
203 };
204
205 assert_eq!(x.layout(), y.layout());
206 let layout = x.layout();
207
208 let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
209 let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
210
211 assert_eq!(lane_ty, ret_lane_ty);
212 assert_eq!(u64::from(n), ret_lane_count);
213
214 let total_len = lane_count * 2;
215
216 let indexes = {
217 use rustc_middle::mir::interpret::*;
218 let idx_const = match idx {
219 Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0,
220 Operand::Copy(_) | Operand::Move(_) => unreachable!("{idx:?}"),
221 };
222
223 let idx_bytes = match idx_const {
224 ConstValue::Indirect { alloc_id, offset } => {
225 let alloc = fx.tcx.global_alloc(alloc_id).unwrap_memory();
226 let size = Size::from_bytes(
227 4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */
228 );
229 alloc
230 .inner()
231 .get_bytes_strip_provenance(fx, alloc_range(offset, size))
232 .unwrap()
233 }
234 _ => unreachable!("{:?}", idx_const),
235 };
236
237 (0..ret_lane_count)
238 .map(|i| {
239 let i = usize::try_from(i).unwrap();
240 let idx = rustc_middle::mir::interpret::read_target_uint(
241 fx.tcx.data_layout.endian,
242 &idx_bytes[4 * i..4 * i + 4],
243 )
244 .expect("read_target_uint");
245 u16::try_from(idx).expect("try_from u32")
246 })
247 .collect::<Vec<u16>>()
248 };
249
250 for &idx in &indexes {
251 assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
252 }
253
254 for (out_idx, in_idx) in indexes.into_iter().enumerate() {
255 let in_lane = if u64::from(in_idx) < lane_count {
256 x.value_lane(fx, in_idx.into())
257 } else {
258 y.value_lane(fx, u64::from(in_idx) - lane_count)
259 };
260 let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
261 out_lane.write_cvalue(fx, in_lane);
262 }
263 }
264
265 sym::simd_insert => {
266 let (base, idx, val) = match args {
267 [base, idx, val] => (base, idx, val),
268 _ => {
269 bug!("wrong number of args for intrinsic {intrinsic}");
270 }
271 };
272 let base = codegen_operand(fx, base);
273 let val = codegen_operand(fx, val);
274
275 // FIXME validate
276 let idx_const = if let Some(idx_const) =
277 crate::constant::mir_operand_get_const_val(fx, idx)
278 {
279 idx_const
280 } else {
281 fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant");
282 };
283
284 let idx: u32 = idx_const
285 .try_to_u32()
286 .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
287 let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
288 if u64::from(idx) >= lane_count {
289 fx.tcx.sess.span_fatal(
290 fx.mir.span,
291 format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
292 );
293 }
294
295 ret.write_cvalue(fx, base);
296 let ret_lane = ret.place_lane(fx, idx.try_into().unwrap());
297 ret_lane.write_cvalue(fx, val);
298 }
299
300 sym::simd_extract => {
301 let (v, idx) = match args {
302 [v, idx] => (v, idx),
303 _ => {
304 bug!("wrong number of args for intrinsic {intrinsic}");
305 }
306 };
307 let v = codegen_operand(fx, v);
308
309 if !v.layout().ty.is_simd() {
310 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
311 return;
312 }
313
314 let idx_const = if let Some(idx_const) =
315 crate::constant::mir_operand_get_const_val(fx, idx)
316 {
317 idx_const
318 } else {
319 fx.tcx.sess.span_warn(span, "Index argument for `simd_extract` is not a constant");
320 let trap_block = fx.bcx.create_block();
321 let true_ = fx.bcx.ins().iconst(types::I8, 1);
322 let ret_block = fx.get_block(target);
323 fx.bcx.ins().brif(true_, trap_block, &[], ret_block, &[]);
324 fx.bcx.switch_to_block(trap_block);
325 crate::trap::trap_unimplemented(
326 fx,
327 "Index argument for `simd_extract` is not a constant",
328 );
329 return;
330 };
331
332 let idx = idx_const
333 .try_to_u32()
334 .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
335 let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
336 if u64::from(idx) >= lane_count {
337 fx.tcx.sess.span_fatal(
338 fx.mir.span,
339 format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),
340 );
341 }
342
343 let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
344 ret.write_cvalue(fx, ret_lane);
345 }
346
347 sym::simd_neg
348 | sym::simd_bswap
349 | sym::simd_bitreverse
350 | sym::simd_ctlz
351 | sym::simd_cttz => {
352 intrinsic_args!(fx, args => (a); intrinsic);
353
354 if !a.layout().ty.is_simd() {
355 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
356 return;
357 }
358
359 simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| match (
360 lane_ty.kind(),
361 intrinsic,
362 ) {
363 (ty::Int(_), sym::simd_neg) => fx.bcx.ins().ineg(lane),
364 (ty::Float(_), sym::simd_neg) => fx.bcx.ins().fneg(lane),
365
366 (ty::Uint(ty::UintTy::U8) | ty::Int(ty::IntTy::I8), sym::simd_bswap) => lane,
367 (ty::Uint(_) | ty::Int(_), sym::simd_bswap) => fx.bcx.ins().bswap(lane),
368 (ty::Uint(_) | ty::Int(_), sym::simd_bitreverse) => fx.bcx.ins().bitrev(lane),
369 (ty::Uint(_) | ty::Int(_), sym::simd_ctlz) => fx.bcx.ins().clz(lane),
370 (ty::Uint(_) | ty::Int(_), sym::simd_cttz) => fx.bcx.ins().ctz(lane),
371
372 _ => unreachable!(),
373 });
374 }
375
376 sym::simd_add
377 | sym::simd_sub
378 | sym::simd_mul
379 | sym::simd_div
380 | sym::simd_rem
381 | sym::simd_shl
382 | sym::simd_shr
383 | sym::simd_and
384 | sym::simd_or
385 | sym::simd_xor => {
386 intrinsic_args!(fx, args => (x, y); intrinsic);
387
388 // FIXME use vector instructions when possible
389 simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
390 match (lane_ty.kind(), intrinsic) {
391 (ty::Uint(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
392 (ty::Uint(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
393 (ty::Uint(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
394 (ty::Uint(_), sym::simd_div) => fx.bcx.ins().udiv(x_lane, y_lane),
395 (ty::Uint(_), sym::simd_rem) => fx.bcx.ins().urem(x_lane, y_lane),
396
397 (ty::Int(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
398 (ty::Int(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
399 (ty::Int(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
400 (ty::Int(_), sym::simd_div) => fx.bcx.ins().sdiv(x_lane, y_lane),
401 (ty::Int(_), sym::simd_rem) => fx.bcx.ins().srem(x_lane, y_lane),
402
403 (ty::Float(_), sym::simd_add) => fx.bcx.ins().fadd(x_lane, y_lane),
404 (ty::Float(_), sym::simd_sub) => fx.bcx.ins().fsub(x_lane, y_lane),
405 (ty::Float(_), sym::simd_mul) => fx.bcx.ins().fmul(x_lane, y_lane),
406 (ty::Float(_), sym::simd_div) => fx.bcx.ins().fdiv(x_lane, y_lane),
407 (ty::Float(FloatTy::F32), sym::simd_rem) => fx.lib_call(
408 "fmodf",
409 vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
410 vec![AbiParam::new(types::F32)],
411 &[x_lane, y_lane],
412 )[0],
413 (ty::Float(FloatTy::F64), sym::simd_rem) => fx.lib_call(
414 "fmod",
415 vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
416 vec![AbiParam::new(types::F64)],
417 &[x_lane, y_lane],
418 )[0],
419
420 (ty::Uint(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
421 (ty::Uint(_), sym::simd_shr) => fx.bcx.ins().ushr(x_lane, y_lane),
422 (ty::Uint(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
423 (ty::Uint(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
424 (ty::Uint(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
425
426 (ty::Int(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
427 (ty::Int(_), sym::simd_shr) => fx.bcx.ins().sshr(x_lane, y_lane),
428 (ty::Int(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
429 (ty::Int(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
430 (ty::Int(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
431
432 _ => unreachable!(),
433 }
434 });
435 }
436
437 sym::simd_fma => {
438 intrinsic_args!(fx, args => (a, b, c); intrinsic);
439
440 if !a.layout().ty.is_simd() {
441 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
442 return;
443 }
444 assert_eq!(a.layout(), b.layout());
445 assert_eq!(a.layout(), c.layout());
446 assert_eq!(a.layout(), ret.layout());
447
448 let layout = a.layout();
449 let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
450 let res_lane_layout = fx.layout_of(lane_ty);
451
452 for lane in 0..lane_count {
453 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
454 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
455 let c_lane = c.value_lane(fx, lane).load_scalar(fx);
456
457 let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane);
458 let res_lane = CValue::by_val(res_lane, res_lane_layout);
459
460 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
461 }
462 }
463
464 sym::simd_fmin | sym::simd_fmax => {
465 intrinsic_args!(fx, args => (x, y); intrinsic);
466
467 if !x.layout().ty.is_simd() {
468 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
469 return;
470 }
471
472 // FIXME use vector instructions when possible
473 simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
474 match lane_ty.kind() {
475 ty::Float(_) => {}
476 _ => unreachable!("{:?}", lane_ty),
477 }
478 match intrinsic {
479 sym::simd_fmin => crate::num::codegen_float_min(fx, x_lane, y_lane),
480 sym::simd_fmax => crate::num::codegen_float_max(fx, x_lane, y_lane),
481 _ => unreachable!(),
482 }
483 });
484 }
485
486 sym::simd_fpow => {
487 intrinsic_args!(fx, args => (a, b); intrinsic);
488
489 if !a.layout().ty.is_simd() {
490 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
491 return;
492 }
493
494 simd_pair_for_each_lane(fx, a, b, ret, &|fx, lane_ty, _ret_lane_ty, a_lane, b_lane| {
495 match lane_ty.kind() {
496 ty::Float(FloatTy::F32) => fx.lib_call(
497 "powf",
498 vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
499 vec![AbiParam::new(types::F32)],
500 &[a_lane, b_lane],
501 )[0],
502 ty::Float(FloatTy::F64) => fx.lib_call(
503 "pow",
504 vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
505 vec![AbiParam::new(types::F64)],
506 &[a_lane, b_lane],
507 )[0],
508 _ => unreachable!("{:?}", lane_ty),
509 }
510 });
511 }
512
513 sym::simd_fpowi => {
514 intrinsic_args!(fx, args => (a, exp); intrinsic);
515 let exp = exp.load_scalar(fx);
516
517 if !a.layout().ty.is_simd() {
518 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
519 return;
520 }
521
522 simd_for_each_lane(
523 fx,
524 a,
525 ret,
526 &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() {
527 ty::Float(FloatTy::F32) => fx.lib_call(
528 "__powisf2", // compiler-builtins
529 vec![AbiParam::new(types::F32), AbiParam::new(types::I32)],
530 vec![AbiParam::new(types::F32)],
531 &[lane, exp],
532 )[0],
533 ty::Float(FloatTy::F64) => fx.lib_call(
534 "__powidf2", // compiler-builtins
535 vec![AbiParam::new(types::F64), AbiParam::new(types::I32)],
536 vec![AbiParam::new(types::F64)],
537 &[lane, exp],
538 )[0],
539 _ => unreachable!("{:?}", lane_ty),
540 },
541 );
542 }
543
544 sym::simd_fsin
545 | sym::simd_fcos
546 | sym::simd_fexp
547 | sym::simd_fexp2
548 | sym::simd_flog
549 | sym::simd_flog10
550 | sym::simd_flog2
551 | sym::simd_round => {
552 intrinsic_args!(fx, args => (a); intrinsic);
553
554 if !a.layout().ty.is_simd() {
555 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
556 return;
557 }
558
559 simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
560 let lane_ty = match lane_ty.kind() {
561 ty::Float(FloatTy::F32) => types::F32,
562 ty::Float(FloatTy::F64) => types::F64,
563 _ => unreachable!("{:?}", lane_ty),
564 };
565 let name = match (intrinsic, lane_ty) {
566 (sym::simd_fsin, types::F32) => "sinf",
567 (sym::simd_fsin, types::F64) => "sin",
568 (sym::simd_fcos, types::F32) => "cosf",
569 (sym::simd_fcos, types::F64) => "cos",
570 (sym::simd_fexp, types::F32) => "expf",
571 (sym::simd_fexp, types::F64) => "exp",
572 (sym::simd_fexp2, types::F32) => "exp2f",
573 (sym::simd_fexp2, types::F64) => "exp2",
574 (sym::simd_flog, types::F32) => "logf",
575 (sym::simd_flog, types::F64) => "log",
576 (sym::simd_flog10, types::F32) => "log10f",
577 (sym::simd_flog10, types::F64) => "log10",
578 (sym::simd_flog2, types::F32) => "log2f",
579 (sym::simd_flog2, types::F64) => "log2",
580 (sym::simd_round, types::F32) => "roundf",
581 (sym::simd_round, types::F64) => "round",
582 _ => unreachable!("{:?}", intrinsic),
583 };
584 fx.lib_call(
585 name,
586 vec![AbiParam::new(lane_ty)],
587 vec![AbiParam::new(lane_ty)],
588 &[lane],
589 )[0]
590 });
591 }
592
593 sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => {
594 intrinsic_args!(fx, args => (a); intrinsic);
595
596 if !a.layout().ty.is_simd() {
597 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
598 return;
599 }
600
601 simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
602 match lane_ty.kind() {
603 ty::Float(_) => {}
604 _ => unreachable!("{:?}", lane_ty),
605 }
606 match intrinsic {
607 sym::simd_fabs => fx.bcx.ins().fabs(lane),
608 sym::simd_fsqrt => fx.bcx.ins().sqrt(lane),
609 sym::simd_ceil => fx.bcx.ins().ceil(lane),
610 sym::simd_floor => fx.bcx.ins().floor(lane),
611 sym::simd_trunc => fx.bcx.ins().trunc(lane),
612 _ => unreachable!(),
613 }
614 });
615 }
616
617 sym::simd_reduce_add_ordered => {
618 intrinsic_args!(fx, args => (v, acc); intrinsic);
619 let acc = acc.load_scalar(fx);
620
621 // FIXME there must be no acc param for integer vectors
622 if !v.layout().ty.is_simd() {
623 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
624 return;
625 }
626
627 simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
628 if lane_ty.is_floating_point() {
629 fx.bcx.ins().fadd(a, b)
630 } else {
631 fx.bcx.ins().iadd(a, b)
632 }
633 });
634 }
635
636 sym::simd_reduce_add_unordered => {
637 intrinsic_args!(fx, args => (v); intrinsic);
638
639 // FIXME there must be no acc param for integer vectors
640 if !v.layout().ty.is_simd() {
641 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
642 return;
643 }
644
645 simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
646 if lane_ty.is_floating_point() {
647 fx.bcx.ins().fadd(a, b)
648 } else {
649 fx.bcx.ins().iadd(a, b)
650 }
651 });
652 }
653
654 sym::simd_reduce_mul_ordered => {
655 intrinsic_args!(fx, args => (v, acc); intrinsic);
656 let acc = acc.load_scalar(fx);
657
658 // FIXME there must be no acc param for integer vectors
659 if !v.layout().ty.is_simd() {
660 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
661 return;
662 }
663
664 simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
665 if lane_ty.is_floating_point() {
666 fx.bcx.ins().fmul(a, b)
667 } else {
668 fx.bcx.ins().imul(a, b)
669 }
670 });
671 }
672
673 sym::simd_reduce_mul_unordered => {
674 intrinsic_args!(fx, args => (v); intrinsic);
675
676 // FIXME there must be no acc param for integer vectors
677 if !v.layout().ty.is_simd() {
678 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
679 return;
680 }
681
682 simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
683 if lane_ty.is_floating_point() {
684 fx.bcx.ins().fmul(a, b)
685 } else {
686 fx.bcx.ins().imul(a, b)
687 }
688 });
689 }
690
691 sym::simd_reduce_all => {
692 intrinsic_args!(fx, args => (v); intrinsic);
693
694 if !v.layout().ty.is_simd() {
695 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
696 return;
697 }
698
699 simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
700 }
701
702 sym::simd_reduce_any => {
703 intrinsic_args!(fx, args => (v); intrinsic);
704
705 if !v.layout().ty.is_simd() {
706 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
707 return;
708 }
709
710 simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
711 }
712
713 sym::simd_reduce_and => {
714 intrinsic_args!(fx, args => (v); intrinsic);
715
716 if !v.layout().ty.is_simd() {
717 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
718 return;
719 }
720
721 simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b));
722 }
723
724 sym::simd_reduce_or => {
725 intrinsic_args!(fx, args => (v); intrinsic);
726
727 if !v.layout().ty.is_simd() {
728 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
729 return;
730 }
731
732 simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b));
733 }
734
735 sym::simd_reduce_xor => {
736 intrinsic_args!(fx, args => (v); intrinsic);
737
738 if !v.layout().ty.is_simd() {
739 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
740 return;
741 }
742
743 simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
744 }
745
746 sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
747 intrinsic_args!(fx, args => (v); intrinsic);
748
749 if !v.layout().ty.is_simd() {
750 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
751 return;
752 }
753
754 simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
755 let lt = match ty.kind() {
756 ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
757 ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
758 ty::Float(_) => return crate::num::codegen_float_min(fx, a, b),
759 _ => unreachable!(),
760 };
761 fx.bcx.ins().select(lt, a, b)
762 });
763 }
764
765 sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
766 intrinsic_args!(fx, args => (v); intrinsic);
767
768 if !v.layout().ty.is_simd() {
769 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
770 return;
771 }
772
773 simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
774 let gt = match ty.kind() {
775 ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
776 ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
777 ty::Float(_) => return crate::num::codegen_float_max(fx, a, b),
778 _ => unreachable!(),
779 };
780 fx.bcx.ins().select(gt, a, b)
781 });
782 }
783
784 sym::simd_select => {
785 intrinsic_args!(fx, args => (m, a, b); intrinsic);
786
787 if !m.layout().ty.is_simd() {
788 report_simd_type_validation_error(fx, intrinsic, span, m.layout().ty);
789 return;
790 }
791 if !a.layout().ty.is_simd() {
792 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
793 return;
794 }
795 assert_eq!(a.layout(), b.layout());
796
797 let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
798 let lane_layout = fx.layout_of(lane_ty);
799
800 for lane in 0..lane_count {
801 let m_lane = m.value_lane(fx, lane).load_scalar(fx);
802 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
803 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
804
805 let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
806 let res_lane =
807 CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
808
809 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
810 }
811 }
812
813 sym::simd_select_bitmask => {
814 intrinsic_args!(fx, args => (m, a, b); intrinsic);
815
816 if !a.layout().ty.is_simd() {
817 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
818 return;
819 }
820 assert_eq!(a.layout(), b.layout());
821
822 let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
823 let lane_layout = fx.layout_of(lane_ty);
824
825 let m = m.load_scalar(fx);
826
827 for lane in 0..lane_count {
828 let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
829 let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
830 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
831 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
832
833 let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
834 let res_lane =
835 CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
836
837 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
838 }
839 }
840
841 sym::simd_bitmask => {
842 intrinsic_args!(fx, args => (a); intrinsic);
843
844 let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
845 let lane_clif_ty = fx.clif_type(lane_ty).unwrap();
846
847 // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
848 // vector mask and returns the most significant bit (MSB) of each lane in the form
849 // of either:
850 // * an unsigned integer
851 // * an array of `u8`
852 // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
853 //
854 // The bit order of the result depends on the byte endianness, LSB-first for little
855 // endian and MSB-first for big endian.
856 let expected_int_bits = lane_count.max(8);
857 let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
858
859 match lane_ty.kind() {
860 ty::Int(_) | ty::Uint(_) => {}
861 _ => {
862 fx.tcx.sess.span_fatal(
863 span,
864 format!(
865 "invalid monomorphization of `simd_bitmask` intrinsic: \
866 vector argument `{}`'s element type `{}`, expected integer element \
867 type",
868 a.layout().ty,
869 lane_ty
870 ),
871 );
872 }
873 }
874
875 let res_type =
876 Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
877 let mut res = type_zero_value(&mut fx.bcx, res_type);
878
879 let lanes = match fx.tcx.sess.target.endian {
880 Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
881 Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>,
882 };
883 for lane in lanes {
884 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
885
886 // extract sign bit of an int
887 let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1));
888
889 // shift sign bit into result
890 let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false);
891 res = fx.bcx.ins().ishl_imm(res, 1);
892 res = fx.bcx.ins().bor(res, a_lane_sign);
893 }
894
895 match ret.layout().ty.kind() {
896 ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {}
897 ty::Array(elem, len)
898 if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
899 && len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
900 == Some(expected_bytes) => {}
901 _ => {
902 fx.tcx.sess.span_fatal(
903 span,
904 format!(
905 "invalid monomorphization of `simd_bitmask` intrinsic: \
906 cannot return `{}`, expected `u{}` or `[u8; {}]`",
907 ret.layout().ty,
908 expected_int_bits,
909 expected_bytes
910 ),
911 );
912 }
913 }
914
915 let res = CValue::by_val(res, ret.layout());
916 ret.write_cvalue(fx, res);
917 }
918
919 sym::simd_saturating_add | sym::simd_saturating_sub => {
920 intrinsic_args!(fx, args => (x, y); intrinsic);
921
922 let bin_op = match intrinsic {
923 sym::simd_saturating_add => BinOp::Add,
924 sym::simd_saturating_sub => BinOp::Sub,
925 _ => unreachable!(),
926 };
927
928 // FIXME use vector instructions when possible
929 simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
930 crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane)
931 });
932 }
933
934 sym::simd_expose_addr | sym::simd_from_exposed_addr | sym::simd_cast_ptr => {
935 intrinsic_args!(fx, args => (arg); intrinsic);
936 ret.write_cvalue_transmute(fx, arg);
937 }
938
939 sym::simd_arith_offset => {
940 intrinsic_args!(fx, args => (ptr, offset); intrinsic);
941
942 let (lane_count, ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
943 let pointee_ty = ptr_lane_ty.builtin_deref(true).unwrap().ty;
944 let pointee_size = fx.layout_of(pointee_ty).size.bytes();
945 let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
946 let ret_lane_layout = fx.layout_of(ret_lane_ty);
947 assert_eq!(lane_count, ret_lane_count);
948
949 for lane_idx in 0..lane_count {
950 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
951 let offset_lane = offset.value_lane(fx, lane_idx).load_scalar(fx);
952
953 let ptr_diff = if pointee_size != 1 {
954 fx.bcx.ins().imul_imm(offset_lane, pointee_size as i64)
955 } else {
956 offset_lane
957 };
958 let res_lane = fx.bcx.ins().iadd(ptr_lane, ptr_diff);
959 let res_lane = CValue::by_val(res_lane, ret_lane_layout);
960
961 ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
962 }
963 }
964
965 sym::simd_gather => {
966 intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
967
968 let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
969 let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
970 let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
971 let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
972 assert_eq!(val_lane_count, ptr_lane_count);
973 assert_eq!(val_lane_count, mask_lane_count);
974 assert_eq!(val_lane_count, ret_lane_count);
975
976 let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
977 let ret_lane_layout = fx.layout_of(ret_lane_ty);
978
979 for lane_idx in 0..ptr_lane_count {
980 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
981 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
982 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
983
984 let if_enabled = fx.bcx.create_block();
985 let if_disabled = fx.bcx.create_block();
986 let next = fx.bcx.create_block();
987 let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
988
989 fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
990 fx.bcx.seal_block(if_enabled);
991 fx.bcx.seal_block(if_disabled);
992
993 fx.bcx.switch_to_block(if_enabled);
994 let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
995 fx.bcx.ins().jump(next, &[res]);
996
997 fx.bcx.switch_to_block(if_disabled);
998 fx.bcx.ins().jump(next, &[val_lane]);
999
1000 fx.bcx.seal_block(next);
1001 fx.bcx.switch_to_block(next);
1002
1003 fx.bcx.ins().nop();
1004
1005 ret.place_lane(fx, lane_idx)
1006 .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
1007 }
1008 }
1009
1010 sym::simd_masked_load => {
1011 intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
1012
1013 let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
1014 let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
1015 let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
1016 assert_eq!(val_lane_count, mask_lane_count);
1017 assert_eq!(val_lane_count, ret_lane_count);
1018
1019 let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
1020 let ret_lane_layout = fx.layout_of(ret_lane_ty);
1021 let ptr_val = ptr.load_scalar(fx);
1022
1023 for lane_idx in 0..ret_lane_count {
1024 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
1025 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
1026
1027 let if_enabled = fx.bcx.create_block();
1028 let if_disabled = fx.bcx.create_block();
1029 let next = fx.bcx.create_block();
1030 let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
1031
1032 fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
1033 fx.bcx.seal_block(if_enabled);
1034 fx.bcx.seal_block(if_disabled);
1035
1036 fx.bcx.switch_to_block(if_enabled);
1037 let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
1038 let res = fx.bcx.ins().load(
1039 lane_clif_ty,
1040 MemFlags::trusted(),
1041 ptr_val,
1042 Offset32::new(offset),
1043 );
1044 fx.bcx.ins().jump(next, &[res]);
1045
1046 fx.bcx.switch_to_block(if_disabled);
1047 fx.bcx.ins().jump(next, &[val_lane]);
1048
1049 fx.bcx.seal_block(next);
1050 fx.bcx.switch_to_block(next);
1051
1052 fx.bcx.ins().nop();
1053
1054 ret.place_lane(fx, lane_idx)
1055 .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
1056 }
1057 }
1058
1059 sym::simd_scatter => {
1060 intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
1061
1062 let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
1063 let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
1064 let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
1065 assert_eq!(val_lane_count, ptr_lane_count);
1066 assert_eq!(val_lane_count, mask_lane_count);
1067
1068 for lane_idx in 0..ptr_lane_count {
1069 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
1070 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
1071 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
1072
1073 let if_enabled = fx.bcx.create_block();
1074 let next = fx.bcx.create_block();
1075
1076 fx.bcx.ins().brif(mask_lane, if_enabled, &[], next, &[]);
1077 fx.bcx.seal_block(if_enabled);
1078
1079 fx.bcx.switch_to_block(if_enabled);
1080 fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_lane, 0);
1081 fx.bcx.ins().jump(next, &[]);
1082
1083 fx.bcx.seal_block(next);
1084 fx.bcx.switch_to_block(next);
1085 }
1086 }
1087
1088 _ => {
1089 fx.tcx.sess.span_err(span, format!("Unknown SIMD intrinsic {}", intrinsic));
1090 // Prevent verifier error
1091 fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
1092 return;
1093 }
1094 }
1095 let ret_block = fx.get_block(target);
1096 fx.bcx.ins().jump(ret_block, &[]);
1097 }