1 //! Codegen `extern "platform-intrinsic"` intrinsics.
3 use cranelift_codegen
::ir
::immediates
::Offset32
;
4 use rustc_target
::abi
::Endian
;
9 fn report_simd_type_validation_error(
10 fx
: &mut FunctionCx
<'_
, '_
, '_
>,
15 fx
.tcx
.sess
.span_err(span
, format
!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic
, ty
));
16 // Prevent verifier error
17 fx
.bcx
.ins().trap(TrapCode
::UnreachableCodeReached
);
20 pub(super) fn codegen_simd_intrinsic_call
<'tcx
>(
21 fx
: &mut FunctionCx
<'_
, '_
, 'tcx
>,
23 generic_args
: GenericArgsRef
<'tcx
>,
24 args
: &[mir
::Operand
<'tcx
>],
30 sym
::simd_as
| sym
::simd_cast
=> {
31 intrinsic_args
!(fx
, args
=> (a
); intrinsic
);
33 if !a
.layout().ty
.is_simd() {
34 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
38 simd_for_each_lane(fx
, a
, ret
, &|fx
, lane_ty
, ret_lane_ty
, lane
| {
39 let ret_lane_clif_ty
= fx
.clif_type(ret_lane_ty
).unwrap();
41 let from_signed
= type_sign(lane_ty
);
42 let to_signed
= type_sign(ret_lane_ty
);
44 clif_int_or_float_cast(fx
, lane
, from_signed
, ret_lane_clif_ty
, to_signed
)
48 sym
::simd_eq
| sym
::simd_ne
| sym
::simd_lt
| sym
::simd_le
| sym
::simd_gt
| sym
::simd_ge
=> {
49 intrinsic_args
!(fx
, args
=> (x
, y
); intrinsic
);
51 if !x
.layout().ty
.is_simd() {
52 report_simd_type_validation_error(fx
, intrinsic
, span
, x
.layout().ty
);
56 // FIXME use vector instructions when possible
57 simd_pair_for_each_lane(fx
, x
, y
, ret
, &|fx
, lane_ty
, res_lane_ty
, x_lane
, y_lane
| {
58 let res_lane
= match (lane_ty
.kind(), intrinsic
) {
59 (ty
::Uint(_
), sym
::simd_eq
) => fx
.bcx
.ins().icmp(IntCC
::Equal
, x_lane
, y_lane
),
60 (ty
::Uint(_
), sym
::simd_ne
) => {
61 fx
.bcx
.ins().icmp(IntCC
::NotEqual
, x_lane
, y_lane
)
63 (ty
::Uint(_
), sym
::simd_lt
) => {
64 fx
.bcx
.ins().icmp(IntCC
::UnsignedLessThan
, x_lane
, y_lane
)
66 (ty
::Uint(_
), sym
::simd_le
) => {
67 fx
.bcx
.ins().icmp(IntCC
::UnsignedLessThanOrEqual
, x_lane
, y_lane
)
69 (ty
::Uint(_
), sym
::simd_gt
) => {
70 fx
.bcx
.ins().icmp(IntCC
::UnsignedGreaterThan
, x_lane
, y_lane
)
72 (ty
::Uint(_
), sym
::simd_ge
) => {
73 fx
.bcx
.ins().icmp(IntCC
::UnsignedGreaterThanOrEqual
, x_lane
, y_lane
)
76 (ty
::Int(_
), sym
::simd_eq
) => fx
.bcx
.ins().icmp(IntCC
::Equal
, x_lane
, y_lane
),
77 (ty
::Int(_
), sym
::simd_ne
) => {
78 fx
.bcx
.ins().icmp(IntCC
::NotEqual
, x_lane
, y_lane
)
80 (ty
::Int(_
), sym
::simd_lt
) => {
81 fx
.bcx
.ins().icmp(IntCC
::SignedLessThan
, x_lane
, y_lane
)
83 (ty
::Int(_
), sym
::simd_le
) => {
84 fx
.bcx
.ins().icmp(IntCC
::SignedLessThanOrEqual
, x_lane
, y_lane
)
86 (ty
::Int(_
), sym
::simd_gt
) => {
87 fx
.bcx
.ins().icmp(IntCC
::SignedGreaterThan
, x_lane
, y_lane
)
89 (ty
::Int(_
), sym
::simd_ge
) => {
90 fx
.bcx
.ins().icmp(IntCC
::SignedGreaterThanOrEqual
, x_lane
, y_lane
)
93 (ty
::Float(_
), sym
::simd_eq
) => {
94 fx
.bcx
.ins().fcmp(FloatCC
::Equal
, x_lane
, y_lane
)
96 (ty
::Float(_
), sym
::simd_ne
) => {
97 fx
.bcx
.ins().fcmp(FloatCC
::NotEqual
, x_lane
, y_lane
)
99 (ty
::Float(_
), sym
::simd_lt
) => {
100 fx
.bcx
.ins().fcmp(FloatCC
::LessThan
, x_lane
, y_lane
)
102 (ty
::Float(_
), sym
::simd_le
) => {
103 fx
.bcx
.ins().fcmp(FloatCC
::LessThanOrEqual
, x_lane
, y_lane
)
105 (ty
::Float(_
), sym
::simd_gt
) => {
106 fx
.bcx
.ins().fcmp(FloatCC
::GreaterThan
, x_lane
, y_lane
)
108 (ty
::Float(_
), sym
::simd_ge
) => {
109 fx
.bcx
.ins().fcmp(FloatCC
::GreaterThanOrEqual
, x_lane
, y_lane
)
115 bool_to_zero_or_max_uint(fx
, res_lane_ty
, res_lane
)
119 // simd_shuffle_generic<T, U, const I: &[u32]>(x: T, y: T) -> U
120 sym
::simd_shuffle_generic
=> {
121 let [x
, y
] = args
else {
122 bug
!("wrong number of args for intrinsic {intrinsic}");
124 let x
= codegen_operand(fx
, x
);
125 let y
= codegen_operand(fx
, y
);
127 if !x
.layout().ty
.is_simd() {
128 report_simd_type_validation_error(fx
, intrinsic
, span
, x
.layout().ty
);
132 let idx
= generic_args
[2]
134 .eval(fx
.tcx
, ty
::ParamEnv
::reveal_all(), Some(span
))
138 assert_eq
!(x
.layout(), y
.layout());
139 let layout
= x
.layout();
141 let (lane_count
, lane_ty
) = layout
.ty
.simd_size_and_type(fx
.tcx
);
142 let (ret_lane_count
, ret_lane_ty
) = ret
.layout().ty
.simd_size_and_type(fx
.tcx
);
144 assert_eq
!(lane_ty
, ret_lane_ty
);
145 assert_eq
!(idx
.len() as u64, ret_lane_count
);
147 let total_len
= lane_count
* 2;
150 idx
.iter().map(|idx
| idx
.unwrap_leaf().try_to_u32().unwrap()).collect
::<Vec
<u32>>();
152 for &idx
in &indexes
{
153 assert
!(u64::from(idx
) < total_len
, "idx {} out of range 0..{}", idx
, total_len
);
156 for (out_idx
, in_idx
) in indexes
.into_iter().enumerate() {
157 let in_lane
= if u64::from(in_idx
) < lane_count
{
158 x
.value_lane(fx
, in_idx
.into())
160 y
.value_lane(fx
, u64::from(in_idx
) - lane_count
)
162 let out_lane
= ret
.place_lane(fx
, u64::try_from(out_idx
).unwrap());
163 out_lane
.write_cvalue(fx
, in_lane
);
167 // simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U
168 sym
::simd_shuffle
=> {
169 let (x
, y
, idx
) = match args
{
170 [x
, y
, idx
] => (x
, y
, idx
),
172 bug
!("wrong number of args for intrinsic {intrinsic}");
175 let x
= codegen_operand(fx
, x
);
176 let y
= codegen_operand(fx
, y
);
178 if !x
.layout().ty
.is_simd() {
179 report_simd_type_validation_error(fx
, intrinsic
, span
, x
.layout().ty
);
183 // Make sure this is actually an array, since typeck only checks the length-suffixed
184 // version of this intrinsic.
185 let idx_ty
= fx
.monomorphize(idx
.ty(fx
.mir
, fx
.tcx
));
186 let n
: u16 = match idx_ty
.kind() {
187 ty
::Array(ty
, len
) if matches
!(ty
.kind(), ty
::Uint(ty
::UintTy
::U32
)) => len
188 .try_eval_target_usize(fx
.tcx
, ty
::ParamEnv
::reveal_all())
190 span_bug
!(span
, "could not evaluate shuffle index array length")
195 fx
.tcx
.sess
.span_err(
197 format
!("simd_shuffle index must be an array of `u32`, got `{}`", idx_ty
),
199 // Prevent verifier error
200 fx
.bcx
.ins().trap(TrapCode
::UnreachableCodeReached
);
205 assert_eq
!(x
.layout(), y
.layout());
206 let layout
= x
.layout();
208 let (lane_count
, lane_ty
) = layout
.ty
.simd_size_and_type(fx
.tcx
);
209 let (ret_lane_count
, ret_lane_ty
) = ret
.layout().ty
.simd_size_and_type(fx
.tcx
);
211 assert_eq
!(lane_ty
, ret_lane_ty
);
212 assert_eq
!(u64::from(n
), ret_lane_count
);
214 let total_len
= lane_count
* 2;
217 use rustc_middle
::mir
::interpret
::*;
218 let idx_const
= match idx
{
219 Operand
::Constant(const_
) => crate::constant
::eval_mir_constant(fx
, const_
).0,
220 Operand
::Copy(_
) | Operand
::Move(_
) => unreachable
!("{idx:?}"),
223 let idx_bytes
= match idx_const
{
224 ConstValue
::Indirect { alloc_id, offset }
=> {
225 let alloc
= fx
.tcx
.global_alloc(alloc_id
).unwrap_memory();
226 let size
= Size
::from_bytes(
227 4 * ret_lane_count
, /* size_of([u32; ret_lane_count]) */
231 .get_bytes_strip_provenance(fx
, alloc_range(offset
, size
))
234 _
=> unreachable
!("{:?}", idx_const
),
239 let i
= usize::try_from(i
).unwrap();
240 let idx
= rustc_middle
::mir
::interpret
::read_target_uint(
241 fx
.tcx
.data_layout
.endian
,
242 &idx_bytes
[4 * i
..4 * i
+ 4],
244 .expect("read_target_uint");
245 u16::try_from(idx
).expect("try_from u32")
247 .collect
::<Vec
<u16>>()
250 for &idx
in &indexes
{
251 assert
!(u64::from(idx
) < total_len
, "idx {} out of range 0..{}", idx
, total_len
);
254 for (out_idx
, in_idx
) in indexes
.into_iter().enumerate() {
255 let in_lane
= if u64::from(in_idx
) < lane_count
{
256 x
.value_lane(fx
, in_idx
.into())
258 y
.value_lane(fx
, u64::from(in_idx
) - lane_count
)
260 let out_lane
= ret
.place_lane(fx
, u64::try_from(out_idx
).unwrap());
261 out_lane
.write_cvalue(fx
, in_lane
);
265 sym
::simd_insert
=> {
266 let (base
, idx
, val
) = match args
{
267 [base
, idx
, val
] => (base
, idx
, val
),
269 bug
!("wrong number of args for intrinsic {intrinsic}");
272 let base
= codegen_operand(fx
, base
);
273 let val
= codegen_operand(fx
, val
);
276 let idx_const
= if let Some(idx_const
) =
277 crate::constant
::mir_operand_get_const_val(fx
, idx
)
281 fx
.tcx
.sess
.span_fatal(span
, "Index argument for `simd_insert` is not a constant");
284 let idx
: u32 = idx_const
286 .unwrap_or_else(|_
| panic
!("kind not scalar: {:?}", idx_const
));
287 let (lane_count
, _lane_ty
) = base
.layout().ty
.simd_size_and_type(fx
.tcx
);
288 if u64::from(idx
) >= lane_count
{
289 fx
.tcx
.sess
.span_fatal(
291 format
!("[simd_insert] idx {} >= lane_count {}", idx
, lane_count
),
295 ret
.write_cvalue(fx
, base
);
296 let ret_lane
= ret
.place_lane(fx
, idx
.try_into().unwrap());
297 ret_lane
.write_cvalue(fx
, val
);
300 sym
::simd_extract
=> {
301 let (v
, idx
) = match args
{
302 [v
, idx
] => (v
, idx
),
304 bug
!("wrong number of args for intrinsic {intrinsic}");
307 let v
= codegen_operand(fx
, v
);
309 if !v
.layout().ty
.is_simd() {
310 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
314 let idx_const
= if let Some(idx_const
) =
315 crate::constant
::mir_operand_get_const_val(fx
, idx
)
319 fx
.tcx
.sess
.span_warn(span
, "Index argument for `simd_extract` is not a constant");
320 let trap_block
= fx
.bcx
.create_block();
321 let true_
= fx
.bcx
.ins().iconst(types
::I8
, 1);
322 let ret_block
= fx
.get_block(target
);
323 fx
.bcx
.ins().brif(true_
, trap_block
, &[], ret_block
, &[]);
324 fx
.bcx
.switch_to_block(trap_block
);
325 crate::trap
::trap_unimplemented(
327 "Index argument for `simd_extract` is not a constant",
334 .unwrap_or_else(|_
| panic
!("kind not scalar: {:?}", idx_const
));
335 let (lane_count
, _lane_ty
) = v
.layout().ty
.simd_size_and_type(fx
.tcx
);
336 if u64::from(idx
) >= lane_count
{
337 fx
.tcx
.sess
.span_fatal(
339 format
!("[simd_extract] idx {} >= lane_count {}", idx
, lane_count
),
343 let ret_lane
= v
.value_lane(fx
, idx
.try_into().unwrap());
344 ret
.write_cvalue(fx
, ret_lane
);
349 | sym
::simd_bitreverse
351 | sym
::simd_cttz
=> {
352 intrinsic_args
!(fx
, args
=> (a
); intrinsic
);
354 if !a
.layout().ty
.is_simd() {
355 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
359 simd_for_each_lane(fx
, a
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, lane
| match (
363 (ty
::Int(_
), sym
::simd_neg
) => fx
.bcx
.ins().ineg(lane
),
364 (ty
::Float(_
), sym
::simd_neg
) => fx
.bcx
.ins().fneg(lane
),
366 (ty
::Uint(ty
::UintTy
::U8
) | ty
::Int(ty
::IntTy
::I8
), sym
::simd_bswap
) => lane
,
367 (ty
::Uint(_
) | ty
::Int(_
), sym
::simd_bswap
) => fx
.bcx
.ins().bswap(lane
),
368 (ty
::Uint(_
) | ty
::Int(_
), sym
::simd_bitreverse
) => fx
.bcx
.ins().bitrev(lane
),
369 (ty
::Uint(_
) | ty
::Int(_
), sym
::simd_ctlz
) => fx
.bcx
.ins().clz(lane
),
370 (ty
::Uint(_
) | ty
::Int(_
), sym
::simd_cttz
) => fx
.bcx
.ins().ctz(lane
),
386 intrinsic_args
!(fx
, args
=> (x
, y
); intrinsic
);
388 // FIXME use vector instructions when possible
389 simd_pair_for_each_lane(fx
, x
, y
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, x_lane
, y_lane
| {
390 match (lane_ty
.kind(), intrinsic
) {
391 (ty
::Uint(_
), sym
::simd_add
) => fx
.bcx
.ins().iadd(x_lane
, y_lane
),
392 (ty
::Uint(_
), sym
::simd_sub
) => fx
.bcx
.ins().isub(x_lane
, y_lane
),
393 (ty
::Uint(_
), sym
::simd_mul
) => fx
.bcx
.ins().imul(x_lane
, y_lane
),
394 (ty
::Uint(_
), sym
::simd_div
) => fx
.bcx
.ins().udiv(x_lane
, y_lane
),
395 (ty
::Uint(_
), sym
::simd_rem
) => fx
.bcx
.ins().urem(x_lane
, y_lane
),
397 (ty
::Int(_
), sym
::simd_add
) => fx
.bcx
.ins().iadd(x_lane
, y_lane
),
398 (ty
::Int(_
), sym
::simd_sub
) => fx
.bcx
.ins().isub(x_lane
, y_lane
),
399 (ty
::Int(_
), sym
::simd_mul
) => fx
.bcx
.ins().imul(x_lane
, y_lane
),
400 (ty
::Int(_
), sym
::simd_div
) => fx
.bcx
.ins().sdiv(x_lane
, y_lane
),
401 (ty
::Int(_
), sym
::simd_rem
) => fx
.bcx
.ins().srem(x_lane
, y_lane
),
403 (ty
::Float(_
), sym
::simd_add
) => fx
.bcx
.ins().fadd(x_lane
, y_lane
),
404 (ty
::Float(_
), sym
::simd_sub
) => fx
.bcx
.ins().fsub(x_lane
, y_lane
),
405 (ty
::Float(_
), sym
::simd_mul
) => fx
.bcx
.ins().fmul(x_lane
, y_lane
),
406 (ty
::Float(_
), sym
::simd_div
) => fx
.bcx
.ins().fdiv(x_lane
, y_lane
),
407 (ty
::Float(FloatTy
::F32
), sym
::simd_rem
) => fx
.lib_call(
409 vec
![AbiParam
::new(types
::F32
), AbiParam
::new(types
::F32
)],
410 vec
![AbiParam
::new(types
::F32
)],
413 (ty
::Float(FloatTy
::F64
), sym
::simd_rem
) => fx
.lib_call(
415 vec
![AbiParam
::new(types
::F64
), AbiParam
::new(types
::F64
)],
416 vec
![AbiParam
::new(types
::F64
)],
420 (ty
::Uint(_
), sym
::simd_shl
) => fx
.bcx
.ins().ishl(x_lane
, y_lane
),
421 (ty
::Uint(_
), sym
::simd_shr
) => fx
.bcx
.ins().ushr(x_lane
, y_lane
),
422 (ty
::Uint(_
), sym
::simd_and
) => fx
.bcx
.ins().band(x_lane
, y_lane
),
423 (ty
::Uint(_
), sym
::simd_or
) => fx
.bcx
.ins().bor(x_lane
, y_lane
),
424 (ty
::Uint(_
), sym
::simd_xor
) => fx
.bcx
.ins().bxor(x_lane
, y_lane
),
426 (ty
::Int(_
), sym
::simd_shl
) => fx
.bcx
.ins().ishl(x_lane
, y_lane
),
427 (ty
::Int(_
), sym
::simd_shr
) => fx
.bcx
.ins().sshr(x_lane
, y_lane
),
428 (ty
::Int(_
), sym
::simd_and
) => fx
.bcx
.ins().band(x_lane
, y_lane
),
429 (ty
::Int(_
), sym
::simd_or
) => fx
.bcx
.ins().bor(x_lane
, y_lane
),
430 (ty
::Int(_
), sym
::simd_xor
) => fx
.bcx
.ins().bxor(x_lane
, y_lane
),
438 intrinsic_args
!(fx
, args
=> (a
, b
, c
); intrinsic
);
440 if !a
.layout().ty
.is_simd() {
441 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
444 assert_eq
!(a
.layout(), b
.layout());
445 assert_eq
!(a
.layout(), c
.layout());
446 assert_eq
!(a
.layout(), ret
.layout());
448 let layout
= a
.layout();
449 let (lane_count
, lane_ty
) = layout
.ty
.simd_size_and_type(fx
.tcx
);
450 let res_lane_layout
= fx
.layout_of(lane_ty
);
452 for lane
in 0..lane_count
{
453 let a_lane
= a
.value_lane(fx
, lane
).load_scalar(fx
);
454 let b_lane
= b
.value_lane(fx
, lane
).load_scalar(fx
);
455 let c_lane
= c
.value_lane(fx
, lane
).load_scalar(fx
);
457 let res_lane
= fx
.bcx
.ins().fma(a_lane
, b_lane
, c_lane
);
458 let res_lane
= CValue
::by_val(res_lane
, res_lane_layout
);
460 ret
.place_lane(fx
, lane
).write_cvalue(fx
, res_lane
);
464 sym
::simd_fmin
| sym
::simd_fmax
=> {
465 intrinsic_args
!(fx
, args
=> (x
, y
); intrinsic
);
467 if !x
.layout().ty
.is_simd() {
468 report_simd_type_validation_error(fx
, intrinsic
, span
, x
.layout().ty
);
472 // FIXME use vector instructions when possible
473 simd_pair_for_each_lane(fx
, x
, y
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, x_lane
, y_lane
| {
474 match lane_ty
.kind() {
476 _
=> unreachable
!("{:?}", lane_ty
),
479 sym
::simd_fmin
=> crate::num
::codegen_float_min(fx
, x_lane
, y_lane
),
480 sym
::simd_fmax
=> crate::num
::codegen_float_max(fx
, x_lane
, y_lane
),
487 intrinsic_args
!(fx
, args
=> (a
, b
); intrinsic
);
489 if !a
.layout().ty
.is_simd() {
490 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
494 simd_pair_for_each_lane(fx
, a
, b
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, a_lane
, b_lane
| {
495 match lane_ty
.kind() {
496 ty
::Float(FloatTy
::F32
) => fx
.lib_call(
498 vec
![AbiParam
::new(types
::F32
), AbiParam
::new(types
::F32
)],
499 vec
![AbiParam
::new(types
::F32
)],
502 ty
::Float(FloatTy
::F64
) => fx
.lib_call(
504 vec
![AbiParam
::new(types
::F64
), AbiParam
::new(types
::F64
)],
505 vec
![AbiParam
::new(types
::F64
)],
508 _
=> unreachable
!("{:?}", lane_ty
),
514 intrinsic_args
!(fx
, args
=> (a
, exp
); intrinsic
);
515 let exp
= exp
.load_scalar(fx
);
517 if !a
.layout().ty
.is_simd() {
518 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
526 &|fx
, lane_ty
, _ret_lane_ty
, lane
| match lane_ty
.kind() {
527 ty
::Float(FloatTy
::F32
) => fx
.lib_call(
528 "__powisf2", // compiler-builtins
529 vec
![AbiParam
::new(types
::F32
), AbiParam
::new(types
::I32
)],
530 vec
![AbiParam
::new(types
::F32
)],
533 ty
::Float(FloatTy
::F64
) => fx
.lib_call(
534 "__powidf2", // compiler-builtins
535 vec
![AbiParam
::new(types
::F64
), AbiParam
::new(types
::I32
)],
536 vec
![AbiParam
::new(types
::F64
)],
539 _
=> unreachable
!("{:?}", lane_ty
),
551 | sym
::simd_round
=> {
552 intrinsic_args
!(fx
, args
=> (a
); intrinsic
);
554 if !a
.layout().ty
.is_simd() {
555 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
559 simd_for_each_lane(fx
, a
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, lane
| {
560 let lane_ty
= match lane_ty
.kind() {
561 ty
::Float(FloatTy
::F32
) => types
::F32
,
562 ty
::Float(FloatTy
::F64
) => types
::F64
,
563 _
=> unreachable
!("{:?}", lane_ty
),
565 let name
= match (intrinsic
, lane_ty
) {
566 (sym
::simd_fsin
, types
::F32
) => "sinf",
567 (sym
::simd_fsin
, types
::F64
) => "sin",
568 (sym
::simd_fcos
, types
::F32
) => "cosf",
569 (sym
::simd_fcos
, types
::F64
) => "cos",
570 (sym
::simd_fexp
, types
::F32
) => "expf",
571 (sym
::simd_fexp
, types
::F64
) => "exp",
572 (sym
::simd_fexp2
, types
::F32
) => "exp2f",
573 (sym
::simd_fexp2
, types
::F64
) => "exp2",
574 (sym
::simd_flog
, types
::F32
) => "logf",
575 (sym
::simd_flog
, types
::F64
) => "log",
576 (sym
::simd_flog10
, types
::F32
) => "log10f",
577 (sym
::simd_flog10
, types
::F64
) => "log10",
578 (sym
::simd_flog2
, types
::F32
) => "log2f",
579 (sym
::simd_flog2
, types
::F64
) => "log2",
580 (sym
::simd_round
, types
::F32
) => "roundf",
581 (sym
::simd_round
, types
::F64
) => "round",
582 _
=> unreachable
!("{:?}", intrinsic
),
586 vec
![AbiParam
::new(lane_ty
)],
587 vec
![AbiParam
::new(lane_ty
)],
593 sym
::simd_fabs
| sym
::simd_fsqrt
| sym
::simd_ceil
| sym
::simd_floor
| sym
::simd_trunc
=> {
594 intrinsic_args
!(fx
, args
=> (a
); intrinsic
);
596 if !a
.layout().ty
.is_simd() {
597 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
601 simd_for_each_lane(fx
, a
, ret
, &|fx
, lane_ty
, _ret_lane_ty
, lane
| {
602 match lane_ty
.kind() {
604 _
=> unreachable
!("{:?}", lane_ty
),
607 sym
::simd_fabs
=> fx
.bcx
.ins().fabs(lane
),
608 sym
::simd_fsqrt
=> fx
.bcx
.ins().sqrt(lane
),
609 sym
::simd_ceil
=> fx
.bcx
.ins().ceil(lane
),
610 sym
::simd_floor
=> fx
.bcx
.ins().floor(lane
),
611 sym
::simd_trunc
=> fx
.bcx
.ins().trunc(lane
),
617 sym
::simd_reduce_add_ordered
=> {
618 intrinsic_args
!(fx
, args
=> (v
, acc
); intrinsic
);
619 let acc
= acc
.load_scalar(fx
);
621 // FIXME there must be no acc param for integer vectors
622 if !v
.layout().ty
.is_simd() {
623 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
627 simd_reduce(fx
, v
, Some(acc
), ret
, &|fx
, lane_ty
, a
, b
| {
628 if lane_ty
.is_floating_point() {
629 fx
.bcx
.ins().fadd(a
, b
)
631 fx
.bcx
.ins().iadd(a
, b
)
636 sym
::simd_reduce_add_unordered
=> {
637 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
639 // FIXME there must be no acc param for integer vectors
640 if !v
.layout().ty
.is_simd() {
641 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
645 simd_reduce(fx
, v
, None
, ret
, &|fx
, lane_ty
, a
, b
| {
646 if lane_ty
.is_floating_point() {
647 fx
.bcx
.ins().fadd(a
, b
)
649 fx
.bcx
.ins().iadd(a
, b
)
654 sym
::simd_reduce_mul_ordered
=> {
655 intrinsic_args
!(fx
, args
=> (v
, acc
); intrinsic
);
656 let acc
= acc
.load_scalar(fx
);
658 // FIXME there must be no acc param for integer vectors
659 if !v
.layout().ty
.is_simd() {
660 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
664 simd_reduce(fx
, v
, Some(acc
), ret
, &|fx
, lane_ty
, a
, b
| {
665 if lane_ty
.is_floating_point() {
666 fx
.bcx
.ins().fmul(a
, b
)
668 fx
.bcx
.ins().imul(a
, b
)
673 sym
::simd_reduce_mul_unordered
=> {
674 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
676 // FIXME there must be no acc param for integer vectors
677 if !v
.layout().ty
.is_simd() {
678 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
682 simd_reduce(fx
, v
, None
, ret
, &|fx
, lane_ty
, a
, b
| {
683 if lane_ty
.is_floating_point() {
684 fx
.bcx
.ins().fmul(a
, b
)
686 fx
.bcx
.ins().imul(a
, b
)
691 sym
::simd_reduce_all
=> {
692 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
694 if !v
.layout().ty
.is_simd() {
695 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
699 simd_reduce_bool(fx
, v
, ret
, &|fx
, a
, b
| fx
.bcx
.ins().band(a
, b
));
702 sym
::simd_reduce_any
=> {
703 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
705 if !v
.layout().ty
.is_simd() {
706 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
710 simd_reduce_bool(fx
, v
, ret
, &|fx
, a
, b
| fx
.bcx
.ins().bor(a
, b
));
713 sym
::simd_reduce_and
=> {
714 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
716 if !v
.layout().ty
.is_simd() {
717 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
721 simd_reduce(fx
, v
, None
, ret
, &|fx
, _ty
, a
, b
| fx
.bcx
.ins().band(a
, b
));
724 sym
::simd_reduce_or
=> {
725 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
727 if !v
.layout().ty
.is_simd() {
728 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
732 simd_reduce(fx
, v
, None
, ret
, &|fx
, _ty
, a
, b
| fx
.bcx
.ins().bor(a
, b
));
735 sym
::simd_reduce_xor
=> {
736 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
738 if !v
.layout().ty
.is_simd() {
739 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
743 simd_reduce(fx
, v
, None
, ret
, &|fx
, _ty
, a
, b
| fx
.bcx
.ins().bxor(a
, b
));
746 sym
::simd_reduce_min
| sym
::simd_reduce_min_nanless
=> {
747 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
749 if !v
.layout().ty
.is_simd() {
750 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
754 simd_reduce(fx
, v
, None
, ret
, &|fx
, ty
, a
, b
| {
755 let lt
= match ty
.kind() {
756 ty
::Int(_
) => fx
.bcx
.ins().icmp(IntCC
::SignedLessThan
, a
, b
),
757 ty
::Uint(_
) => fx
.bcx
.ins().icmp(IntCC
::UnsignedLessThan
, a
, b
),
758 ty
::Float(_
) => return crate::num
::codegen_float_min(fx
, a
, b
),
761 fx
.bcx
.ins().select(lt
, a
, b
)
765 sym
::simd_reduce_max
| sym
::simd_reduce_max_nanless
=> {
766 intrinsic_args
!(fx
, args
=> (v
); intrinsic
);
768 if !v
.layout().ty
.is_simd() {
769 report_simd_type_validation_error(fx
, intrinsic
, span
, v
.layout().ty
);
773 simd_reduce(fx
, v
, None
, ret
, &|fx
, ty
, a
, b
| {
774 let gt
= match ty
.kind() {
775 ty
::Int(_
) => fx
.bcx
.ins().icmp(IntCC
::SignedGreaterThan
, a
, b
),
776 ty
::Uint(_
) => fx
.bcx
.ins().icmp(IntCC
::UnsignedGreaterThan
, a
, b
),
777 ty
::Float(_
) => return crate::num
::codegen_float_max(fx
, a
, b
),
780 fx
.bcx
.ins().select(gt
, a
, b
)
784 sym
::simd_select
=> {
785 intrinsic_args
!(fx
, args
=> (m
, a
, b
); intrinsic
);
787 if !m
.layout().ty
.is_simd() {
788 report_simd_type_validation_error(fx
, intrinsic
, span
, m
.layout().ty
);
791 if !a
.layout().ty
.is_simd() {
792 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
795 assert_eq
!(a
.layout(), b
.layout());
797 let (lane_count
, lane_ty
) = a
.layout().ty
.simd_size_and_type(fx
.tcx
);
798 let lane_layout
= fx
.layout_of(lane_ty
);
800 for lane
in 0..lane_count
{
801 let m_lane
= m
.value_lane(fx
, lane
).load_scalar(fx
);
802 let a_lane
= a
.value_lane(fx
, lane
).load_scalar(fx
);
803 let b_lane
= b
.value_lane(fx
, lane
).load_scalar(fx
);
805 let m_lane
= fx
.bcx
.ins().icmp_imm(IntCC
::Equal
, m_lane
, 0);
807 CValue
::by_val(fx
.bcx
.ins().select(m_lane
, b_lane
, a_lane
), lane_layout
);
809 ret
.place_lane(fx
, lane
).write_cvalue(fx
, res_lane
);
813 sym
::simd_select_bitmask
=> {
814 intrinsic_args
!(fx
, args
=> (m
, a
, b
); intrinsic
);
816 if !a
.layout().ty
.is_simd() {
817 report_simd_type_validation_error(fx
, intrinsic
, span
, a
.layout().ty
);
820 assert_eq
!(a
.layout(), b
.layout());
822 let (lane_count
, lane_ty
) = a
.layout().ty
.simd_size_and_type(fx
.tcx
);
823 let lane_layout
= fx
.layout_of(lane_ty
);
825 let m
= m
.load_scalar(fx
);
827 for lane
in 0..lane_count
{
828 let m_lane
= fx
.bcx
.ins().ushr_imm(m
, u64::from(lane
) as i64);
829 let m_lane
= fx
.bcx
.ins().band_imm(m_lane
, 1);
830 let a_lane
= a
.value_lane(fx
, lane
).load_scalar(fx
);
831 let b_lane
= b
.value_lane(fx
, lane
).load_scalar(fx
);
833 let m_lane
= fx
.bcx
.ins().icmp_imm(IntCC
::Equal
, m_lane
, 0);
835 CValue
::by_val(fx
.bcx
.ins().select(m_lane
, b_lane
, a_lane
), lane_layout
);
837 ret
.place_lane(fx
, lane
).write_cvalue(fx
, res_lane
);
841 sym
::simd_bitmask
=> {
842 intrinsic_args
!(fx
, args
=> (a
); intrinsic
);
844 let (lane_count
, lane_ty
) = a
.layout().ty
.simd_size_and_type(fx
.tcx
);
845 let lane_clif_ty
= fx
.clif_type(lane_ty
).unwrap();
847 // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
848 // vector mask and returns the most significant bit (MSB) of each lane in the form
850 // * an unsigned integer
851 // * an array of `u8`
852 // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
854 // The bit order of the result depends on the byte endianness, LSB-first for little
855 // endian and MSB-first for big endian.
856 let expected_int_bits
= lane_count
.max(8);
857 let expected_bytes
= expected_int_bits
/ 8 + ((expected_int_bits
% 8 > 0) as u64);
859 match lane_ty
.kind() {
860 ty
::Int(_
) | ty
::Uint(_
) => {}
862 fx
.tcx
.sess
.span_fatal(
865 "invalid monomorphization of `simd_bitmask` intrinsic: \
866 vector argument `{}`'s element type `{}`, expected integer element \
876 Type
::int_with_byte_size(u16::try_from(expected_bytes
).unwrap()).unwrap();
877 let mut res
= type_zero_value(&mut fx
.bcx
, res_type
);
879 let lanes
= match fx
.tcx
.sess
.target
.endian
{
880 Endian
::Big
=> Box
::new(0..lane_count
) as Box
<dyn Iterator
<Item
= u64>>,
881 Endian
::Little
=> Box
::new((0..lane_count
).rev()) as Box
<dyn Iterator
<Item
= u64>>,
884 let a_lane
= a
.value_lane(fx
, lane
).load_scalar(fx
);
886 // extract sign bit of an int
887 let a_lane_sign
= fx
.bcx
.ins().ushr_imm(a_lane
, i64::from(lane_clif_ty
.bits() - 1));
889 // shift sign bit into result
890 let a_lane_sign
= clif_intcast(fx
, a_lane_sign
, res_type
, false);
891 res
= fx
.bcx
.ins().ishl_imm(res
, 1);
892 res
= fx
.bcx
.ins().bor(res
, a_lane_sign
);
895 match ret
.layout().ty
.kind() {
896 ty
::Uint(i
) if i
.bit_width() == Some(expected_int_bits
) => {}
898 if matches
!(elem
.kind(), ty
::Uint(ty
::UintTy
::U8
))
899 && len
.try_eval_target_usize(fx
.tcx
, ty
::ParamEnv
::reveal_all())
900 == Some(expected_bytes
) => {}
902 fx
.tcx
.sess
.span_fatal(
905 "invalid monomorphization of `simd_bitmask` intrinsic: \
906 cannot return `{}`, expected `u{}` or `[u8; {}]`",
915 let res
= CValue
::by_val(res
, ret
.layout());
916 ret
.write_cvalue(fx
, res
);
919 sym
::simd_saturating_add
| sym
::simd_saturating_sub
=> {
920 intrinsic_args
!(fx
, args
=> (x
, y
); intrinsic
);
922 let bin_op
= match intrinsic
{
923 sym
::simd_saturating_add
=> BinOp
::Add
,
924 sym
::simd_saturating_sub
=> BinOp
::Sub
,
928 // FIXME use vector instructions when possible
929 simd_pair_for_each_lane_typed(fx
, x
, y
, ret
, &|fx
, x_lane
, y_lane
| {
930 crate::num
::codegen_saturating_int_binop(fx
, bin_op
, x_lane
, y_lane
)
934 sym
::simd_expose_addr
| sym
::simd_from_exposed_addr
| sym
::simd_cast_ptr
=> {
935 intrinsic_args
!(fx
, args
=> (arg
); intrinsic
);
936 ret
.write_cvalue_transmute(fx
, arg
);
939 sym
::simd_arith_offset
=> {
940 intrinsic_args
!(fx
, args
=> (ptr
, offset
); intrinsic
);
942 let (lane_count
, ptr_lane_ty
) = ptr
.layout().ty
.simd_size_and_type(fx
.tcx
);
943 let pointee_ty
= ptr_lane_ty
.builtin_deref(true).unwrap().ty
;
944 let pointee_size
= fx
.layout_of(pointee_ty
).size
.bytes();
945 let (ret_lane_count
, ret_lane_ty
) = ret
.layout().ty
.simd_size_and_type(fx
.tcx
);
946 let ret_lane_layout
= fx
.layout_of(ret_lane_ty
);
947 assert_eq
!(lane_count
, ret_lane_count
);
949 for lane_idx
in 0..lane_count
{
950 let ptr_lane
= ptr
.value_lane(fx
, lane_idx
).load_scalar(fx
);
951 let offset_lane
= offset
.value_lane(fx
, lane_idx
).load_scalar(fx
);
953 let ptr_diff
= if pointee_size
!= 1 {
954 fx
.bcx
.ins().imul_imm(offset_lane
, pointee_size
as i64)
958 let res_lane
= fx
.bcx
.ins().iadd(ptr_lane
, ptr_diff
);
959 let res_lane
= CValue
::by_val(res_lane
, ret_lane_layout
);
961 ret
.place_lane(fx
, lane_idx
).write_cvalue(fx
, res_lane
);
965 sym
::simd_gather
=> {
966 intrinsic_args
!(fx
, args
=> (val
, ptr
, mask
); intrinsic
);
968 let (val_lane_count
, val_lane_ty
) = val
.layout().ty
.simd_size_and_type(fx
.tcx
);
969 let (ptr_lane_count
, _ptr_lane_ty
) = ptr
.layout().ty
.simd_size_and_type(fx
.tcx
);
970 let (mask_lane_count
, _mask_lane_ty
) = mask
.layout().ty
.simd_size_and_type(fx
.tcx
);
971 let (ret_lane_count
, ret_lane_ty
) = ret
.layout().ty
.simd_size_and_type(fx
.tcx
);
972 assert_eq
!(val_lane_count
, ptr_lane_count
);
973 assert_eq
!(val_lane_count
, mask_lane_count
);
974 assert_eq
!(val_lane_count
, ret_lane_count
);
976 let lane_clif_ty
= fx
.clif_type(val_lane_ty
).unwrap();
977 let ret_lane_layout
= fx
.layout_of(ret_lane_ty
);
979 for lane_idx
in 0..ptr_lane_count
{
980 let val_lane
= val
.value_lane(fx
, lane_idx
).load_scalar(fx
);
981 let ptr_lane
= ptr
.value_lane(fx
, lane_idx
).load_scalar(fx
);
982 let mask_lane
= mask
.value_lane(fx
, lane_idx
).load_scalar(fx
);
984 let if_enabled
= fx
.bcx
.create_block();
985 let if_disabled
= fx
.bcx
.create_block();
986 let next
= fx
.bcx
.create_block();
987 let res_lane
= fx
.bcx
.append_block_param(next
, lane_clif_ty
);
989 fx
.bcx
.ins().brif(mask_lane
, if_enabled
, &[], if_disabled
, &[]);
990 fx
.bcx
.seal_block(if_enabled
);
991 fx
.bcx
.seal_block(if_disabled
);
993 fx
.bcx
.switch_to_block(if_enabled
);
994 let res
= fx
.bcx
.ins().load(lane_clif_ty
, MemFlags
::trusted(), ptr_lane
, 0);
995 fx
.bcx
.ins().jump(next
, &[res
]);
997 fx
.bcx
.switch_to_block(if_disabled
);
998 fx
.bcx
.ins().jump(next
, &[val_lane
]);
1000 fx
.bcx
.seal_block(next
);
1001 fx
.bcx
.switch_to_block(next
);
1005 ret
.place_lane(fx
, lane_idx
)
1006 .write_cvalue(fx
, CValue
::by_val(res_lane
, ret_lane_layout
));
1010 sym
::simd_masked_load
=> {
1011 intrinsic_args
!(fx
, args
=> (mask
, ptr
, val
); intrinsic
);
1013 let (val_lane_count
, val_lane_ty
) = val
.layout().ty
.simd_size_and_type(fx
.tcx
);
1014 let (mask_lane_count
, _mask_lane_ty
) = mask
.layout().ty
.simd_size_and_type(fx
.tcx
);
1015 let (ret_lane_count
, ret_lane_ty
) = ret
.layout().ty
.simd_size_and_type(fx
.tcx
);
1016 assert_eq
!(val_lane_count
, mask_lane_count
);
1017 assert_eq
!(val_lane_count
, ret_lane_count
);
1019 let lane_clif_ty
= fx
.clif_type(val_lane_ty
).unwrap();
1020 let ret_lane_layout
= fx
.layout_of(ret_lane_ty
);
1021 let ptr_val
= ptr
.load_scalar(fx
);
1023 for lane_idx
in 0..ret_lane_count
{
1024 let val_lane
= val
.value_lane(fx
, lane_idx
).load_scalar(fx
);
1025 let mask_lane
= mask
.value_lane(fx
, lane_idx
).load_scalar(fx
);
1027 let if_enabled
= fx
.bcx
.create_block();
1028 let if_disabled
= fx
.bcx
.create_block();
1029 let next
= fx
.bcx
.create_block();
1030 let res_lane
= fx
.bcx
.append_block_param(next
, lane_clif_ty
);
1032 fx
.bcx
.ins().brif(mask_lane
, if_enabled
, &[], if_disabled
, &[]);
1033 fx
.bcx
.seal_block(if_enabled
);
1034 fx
.bcx
.seal_block(if_disabled
);
1036 fx
.bcx
.switch_to_block(if_enabled
);
1037 let offset
= lane_idx
as i32 * lane_clif_ty
.bytes() as i32;
1038 let res
= fx
.bcx
.ins().load(
1040 MemFlags
::trusted(),
1042 Offset32
::new(offset
),
1044 fx
.bcx
.ins().jump(next
, &[res
]);
1046 fx
.bcx
.switch_to_block(if_disabled
);
1047 fx
.bcx
.ins().jump(next
, &[val_lane
]);
1049 fx
.bcx
.seal_block(next
);
1050 fx
.bcx
.switch_to_block(next
);
1054 ret
.place_lane(fx
, lane_idx
)
1055 .write_cvalue(fx
, CValue
::by_val(res_lane
, ret_lane_layout
));
1059 sym
::simd_scatter
=> {
1060 intrinsic_args
!(fx
, args
=> (mask
, ptr
, val
); intrinsic
);
1062 let (val_lane_count
, _val_lane_ty
) = val
.layout().ty
.simd_size_and_type(fx
.tcx
);
1063 let (ptr_lane_count
, _ptr_lane_ty
) = ptr
.layout().ty
.simd_size_and_type(fx
.tcx
);
1064 let (mask_lane_count
, _mask_lane_ty
) = mask
.layout().ty
.simd_size_and_type(fx
.tcx
);
1065 assert_eq
!(val_lane_count
, ptr_lane_count
);
1066 assert_eq
!(val_lane_count
, mask_lane_count
);
1068 for lane_idx
in 0..ptr_lane_count
{
1069 let val_lane
= val
.value_lane(fx
, lane_idx
).load_scalar(fx
);
1070 let ptr_lane
= ptr
.value_lane(fx
, lane_idx
).load_scalar(fx
);
1071 let mask_lane
= mask
.value_lane(fx
, lane_idx
).load_scalar(fx
);
1073 let if_enabled
= fx
.bcx
.create_block();
1074 let next
= fx
.bcx
.create_block();
1076 fx
.bcx
.ins().brif(mask_lane
, if_enabled
, &[], next
, &[]);
1077 fx
.bcx
.seal_block(if_enabled
);
1079 fx
.bcx
.switch_to_block(if_enabled
);
1080 fx
.bcx
.ins().store(MemFlags
::trusted(), val_lane
, ptr_lane
, 0);
1081 fx
.bcx
.ins().jump(next
, &[]);
1083 fx
.bcx
.seal_block(next
);
1084 fx
.bcx
.switch_to_block(next
);
1089 fx
.tcx
.sess
.span_err(span
, format
!("Unknown SIMD intrinsic {}", intrinsic
));
1090 // Prevent verifier error
1091 fx
.bcx
.ins().trap(TrapCode
::UnreachableCodeReached
);
1095 let ret_block
= fx
.get_block(target
);
1096 fx
.bcx
.ins().jump(ret_block
, &[]);