compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs

   1 //! Codegen `extern "platform-intrinsic"` intrinsics.
   2
   3 use cranelift_codegen::ir::immediates::Offset32;
   4 use rustc_target::abi::Endian;
   5
   6 use super::*;
   7 use crate::prelude::*;
   8
   9 fn report_simd_type_validation_error(
  10     fx: &mut FunctionCx<'_, '_, '_>,
  11     intrinsic: Symbol,
  12     span: Span,
  13     ty: Ty<'_>,
  14 ) {
  15     fx.tcx.sess.span_err(span, format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
  16     // Prevent verifier error
  17     fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
  18 }
  19
  20 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
  21     fx: &mut FunctionCx<'_, '_, 'tcx>,
  22     intrinsic: Symbol,
  23     generic_args: GenericArgsRef<'tcx>,
  24     args: &[mir::Operand<'tcx>],
  25     ret: CPlace<'tcx>,
  26     target: BasicBlock,
  27     span: Span,
  28 ) {
  29     match intrinsic {
  30         sym::simd_as | sym::simd_cast => {
  31             intrinsic_args!(fx, args => (a); intrinsic);
  32
  33             if !a.layout().ty.is_simd() {
  34                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
  35                 return;
  36             }
  37
  38             simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| {
  39                 let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
  40
  41                 let from_signed = type_sign(lane_ty);
  42                 let to_signed = type_sign(ret_lane_ty);
  43
  44                 clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed)
  45             });
  46         }
  47
  48         sym::simd_eq | sym::simd_ne | sym::simd_lt | sym::simd_le | sym::simd_gt | sym::simd_ge => {
  49             intrinsic_args!(fx, args => (x, y); intrinsic);
  50
  51             if !x.layout().ty.is_simd() {
  52                 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
  53                 return;
  54             }
  55
  56             // FIXME use vector instructions when possible
  57             simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
  58                 let res_lane = match (lane_ty.kind(), intrinsic) {
  59                     (ty::Uint(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
  60                     (ty::Uint(_), sym::simd_ne) => {
  61                         fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
  62                     }
  63                     (ty::Uint(_), sym::simd_lt) => {
  64                         fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane)
  65                     }
  66                     (ty::Uint(_), sym::simd_le) => {
  67                         fx.bcx.ins().icmp(IntCC::UnsignedLessThanOrEqual, x_lane, y_lane)
  68                     }
  69                     (ty::Uint(_), sym::simd_gt) => {
  70                         fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane)
  71                     }
  72                     (ty::Uint(_), sym::simd_ge) => {
  73                         fx.bcx.ins().icmp(IntCC::UnsignedGreaterThanOrEqual, x_lane, y_lane)
  74                     }
  75
  76                     (ty::Int(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
  77                     (ty::Int(_), sym::simd_ne) => {
  78                         fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
  79                     }
  80                     (ty::Int(_), sym::simd_lt) => {
  81                         fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane)
  82                     }
  83                     (ty::Int(_), sym::simd_le) => {
  84                         fx.bcx.ins().icmp(IntCC::SignedLessThanOrEqual, x_lane, y_lane)
  85                     }
  86                     (ty::Int(_), sym::simd_gt) => {
  87                         fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane)
  88                     }
  89                     (ty::Int(_), sym::simd_ge) => {
  90                         fx.bcx.ins().icmp(IntCC::SignedGreaterThanOrEqual, x_lane, y_lane)
  91                     }
  92
  93                     (ty::Float(_), sym::simd_eq) => {
  94                         fx.bcx.ins().fcmp(FloatCC::Equal, x_lane, y_lane)
  95                     }
  96                     (ty::Float(_), sym::simd_ne) => {
  97                         fx.bcx.ins().fcmp(FloatCC::NotEqual, x_lane, y_lane)
  98                     }
  99                     (ty::Float(_), sym::simd_lt) => {
 100                         fx.bcx.ins().fcmp(FloatCC::LessThan, x_lane, y_lane)
 101                     }
 102                     (ty::Float(_), sym::simd_le) => {
 103                         fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, x_lane, y_lane)
 104                     }
 105                     (ty::Float(_), sym::simd_gt) => {
 106                         fx.bcx.ins().fcmp(FloatCC::GreaterThan, x_lane, y_lane)
 107                     }
 108                     (ty::Float(_), sym::simd_ge) => {
 109                         fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, x_lane, y_lane)
 110                     }
 111
 112                     _ => unreachable!(),
 113                 };
 114
 115                 bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
 116             });
 117         }
 118
 119         // simd_shuffle_generic<T, U, const I: &[u32]>(x: T, y: T) -> U
 120         sym::simd_shuffle_generic => {
 121             let [x, y] = args else {
 122                 bug!("wrong number of args for intrinsic {intrinsic}");
 123             };
 124             let x = codegen_operand(fx, x);
 125             let y = codegen_operand(fx, y);
 126
 127             if !x.layout().ty.is_simd() {
 128                 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
 129                 return;
 130             }
 131
 132             let idx = generic_args[2]
 133                 .expect_const()
 134                 .eval(fx.tcx, ty::ParamEnv::reveal_all(), Some(span))
 135                 .unwrap()
 136                 .unwrap_branch();
 137
 138             assert_eq!(x.layout(), y.layout());
 139             let layout = x.layout();
 140
 141             let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
 142             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
 143
 144             assert_eq!(lane_ty, ret_lane_ty);
 145             assert_eq!(idx.len() as u64, ret_lane_count);
 146
 147             let total_len = lane_count * 2;
 148
 149             let indexes =
 150                 idx.iter().map(|idx| idx.unwrap_leaf().try_to_u32().unwrap()).collect::<Vec<u32>>();
 151
 152             for &idx in &indexes {
 153                 assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
 154             }
 155
 156             for (out_idx, in_idx) in indexes.into_iter().enumerate() {
 157                 let in_lane = if u64::from(in_idx) < lane_count {
 158                     x.value_lane(fx, in_idx.into())
 159                 } else {
 160                     y.value_lane(fx, u64::from(in_idx) - lane_count)
 161                 };
 162                 let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
 163                 out_lane.write_cvalue(fx, in_lane);
 164             }
 165         }
 166
 167         // simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U
 168         sym::simd_shuffle => {
 169             let (x, y, idx) = match args {
 170                 [x, y, idx] => (x, y, idx),
 171                 _ => {
 172                     bug!("wrong number of args for intrinsic {intrinsic}");
 173                 }
 174             };
 175             let x = codegen_operand(fx, x);
 176             let y = codegen_operand(fx, y);
 177
 178             if !x.layout().ty.is_simd() {
 179                 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
 180                 return;
 181             }
 182
 183             // Make sure this is actually an array, since typeck only checks the length-suffixed
 184             // version of this intrinsic.
 185             let idx_ty = fx.monomorphize(idx.ty(fx.mir, fx.tcx));
 186             let n: u16 = match idx_ty.kind() {
 187                 ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => len
 188                     .try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
 189                     .unwrap_or_else(|| {
 190                         span_bug!(span, "could not evaluate shuffle index array length")
 191                     })
 192                     .try_into()
 193                     .unwrap(),
 194                 _ => {
 195                     fx.tcx.sess.span_err(
 196                         span,
 197                         format!("simd_shuffle index must be an array of `u32`, got `{}`", idx_ty),
 198                     );
 199                     // Prevent verifier error
 200                     fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
 201                     return;
 202                 }
 203             };
 204
 205             assert_eq!(x.layout(), y.layout());
 206             let layout = x.layout();
 207
 208             let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
 209             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
 210
 211             assert_eq!(lane_ty, ret_lane_ty);
 212             assert_eq!(u64::from(n), ret_lane_count);
 213
 214             let total_len = lane_count * 2;
 215
 216             let indexes = {
 217                 use rustc_middle::mir::interpret::*;
 218                 let idx_const = match idx {
 219                     Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0,
 220                     Operand::Copy(_) | Operand::Move(_) => unreachable!("{idx:?}"),
 221                 };
 222
 223                 let idx_bytes = match idx_const {
 224                     ConstValue::Indirect { alloc_id, offset } => {
 225                         let alloc = fx.tcx.global_alloc(alloc_id).unwrap_memory();
 226                         let size = Size::from_bytes(
 227                             4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */
 228                         );
 229                         alloc
 230                             .inner()
 231                             .get_bytes_strip_provenance(fx, alloc_range(offset, size))
 232                             .unwrap()
 233                     }
 234                     _ => unreachable!("{:?}", idx_const),
 235                 };
 236
 237                 (0..ret_lane_count)
 238                     .map(|i| {
 239                         let i = usize::try_from(i).unwrap();
 240                         let idx = rustc_middle::mir::interpret::read_target_uint(
 241                             fx.tcx.data_layout.endian,
 242                             &idx_bytes[4 * i..4 * i + 4],
 243                         )
 244                         .expect("read_target_uint");
 245                         u16::try_from(idx).expect("try_from u32")
 246                     })
 247                     .collect::<Vec<u16>>()
 248             };
 249
 250             for &idx in &indexes {
 251                 assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
 252             }
 253
 254             for (out_idx, in_idx) in indexes.into_iter().enumerate() {
 255                 let in_lane = if u64::from(in_idx) < lane_count {
 256                     x.value_lane(fx, in_idx.into())
 257                 } else {
 258                     y.value_lane(fx, u64::from(in_idx) - lane_count)
 259                 };
 260                 let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
 261                 out_lane.write_cvalue(fx, in_lane);
 262             }
 263         }
 264
 265         sym::simd_insert => {
 266             let (base, idx, val) = match args {
 267                 [base, idx, val] => (base, idx, val),
 268                 _ => {
 269                     bug!("wrong number of args for intrinsic {intrinsic}");
 270                 }
 271             };
 272             let base = codegen_operand(fx, base);
 273             let val = codegen_operand(fx, val);
 274
 275             // FIXME validate
 276             let idx_const = if let Some(idx_const) =
 277                 crate::constant::mir_operand_get_const_val(fx, idx)
 278             {
 279                 idx_const
 280             } else {
 281                 fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant");
 282             };
 283
 284             let idx: u32 = idx_const
 285                 .try_to_u32()
 286                 .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
 287             let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
 288             if u64::from(idx) >= lane_count {
 289                 fx.tcx.sess.span_fatal(
 290                     fx.mir.span,
 291                     format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
 292                 );
 293             }
 294
 295             ret.write_cvalue(fx, base);
 296             let ret_lane = ret.place_lane(fx, idx.try_into().unwrap());
 297             ret_lane.write_cvalue(fx, val);
 298         }
 299
 300         sym::simd_extract => {
 301             let (v, idx) = match args {
 302                 [v, idx] => (v, idx),
 303                 _ => {
 304                     bug!("wrong number of args for intrinsic {intrinsic}");
 305                 }
 306             };
 307             let v = codegen_operand(fx, v);
 308
 309             if !v.layout().ty.is_simd() {
 310                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 311                 return;
 312             }
 313
 314             let idx_const = if let Some(idx_const) =
 315                 crate::constant::mir_operand_get_const_val(fx, idx)
 316             {
 317                 idx_const
 318             } else {
 319                 fx.tcx.sess.span_warn(span, "Index argument for `simd_extract` is not a constant");
 320                 let trap_block = fx.bcx.create_block();
 321                 let true_ = fx.bcx.ins().iconst(types::I8, 1);
 322                 let ret_block = fx.get_block(target);
 323                 fx.bcx.ins().brif(true_, trap_block, &[], ret_block, &[]);
 324                 fx.bcx.switch_to_block(trap_block);
 325                 crate::trap::trap_unimplemented(
 326                     fx,
 327                     "Index argument for `simd_extract` is not a constant",
 328                 );
 329                 return;
 330             };
 331
 332             let idx = idx_const
 333                 .try_to_u32()
 334                 .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
 335             let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
 336             if u64::from(idx) >= lane_count {
 337                 fx.tcx.sess.span_fatal(
 338                     fx.mir.span,
 339                     format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),
 340                 );
 341             }
 342
 343             let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
 344             ret.write_cvalue(fx, ret_lane);
 345         }
 346
 347         sym::simd_neg
 348         | sym::simd_bswap
 349         | sym::simd_bitreverse
 350         | sym::simd_ctlz
 351         | sym::simd_cttz => {
 352             intrinsic_args!(fx, args => (a); intrinsic);
 353
 354             if !a.layout().ty.is_simd() {
 355                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 356                 return;
 357             }
 358
 359             simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| match (
 360                 lane_ty.kind(),
 361                 intrinsic,
 362             ) {
 363                 (ty::Int(_), sym::simd_neg) => fx.bcx.ins().ineg(lane),
 364                 (ty::Float(_), sym::simd_neg) => fx.bcx.ins().fneg(lane),
 365
 366                 (ty::Uint(ty::UintTy::U8) | ty::Int(ty::IntTy::I8), sym::simd_bswap) => lane,
 367                 (ty::Uint(_) | ty::Int(_), sym::simd_bswap) => fx.bcx.ins().bswap(lane),
 368                 (ty::Uint(_) | ty::Int(_), sym::simd_bitreverse) => fx.bcx.ins().bitrev(lane),
 369                 (ty::Uint(_) | ty::Int(_), sym::simd_ctlz) => fx.bcx.ins().clz(lane),
 370                 (ty::Uint(_) | ty::Int(_), sym::simd_cttz) => fx.bcx.ins().ctz(lane),
 371
 372                 _ => unreachable!(),
 373             });
 374         }
 375
 376         sym::simd_add
 377         | sym::simd_sub
 378         | sym::simd_mul
 379         | sym::simd_div
 380         | sym::simd_rem
 381         | sym::simd_shl
 382         | sym::simd_shr
 383         | sym::simd_and
 384         | sym::simd_or
 385         | sym::simd_xor => {
 386             intrinsic_args!(fx, args => (x, y); intrinsic);
 387
 388             // FIXME use vector instructions when possible
 389             simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
 390                 match (lane_ty.kind(), intrinsic) {
 391                     (ty::Uint(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
 392                     (ty::Uint(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
 393                     (ty::Uint(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
 394                     (ty::Uint(_), sym::simd_div) => fx.bcx.ins().udiv(x_lane, y_lane),
 395                     (ty::Uint(_), sym::simd_rem) => fx.bcx.ins().urem(x_lane, y_lane),
 396
 397                     (ty::Int(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
 398                     (ty::Int(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
 399                     (ty::Int(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
 400                     (ty::Int(_), sym::simd_div) => fx.bcx.ins().sdiv(x_lane, y_lane),
 401                     (ty::Int(_), sym::simd_rem) => fx.bcx.ins().srem(x_lane, y_lane),
 402
 403                     (ty::Float(_), sym::simd_add) => fx.bcx.ins().fadd(x_lane, y_lane),
 404                     (ty::Float(_), sym::simd_sub) => fx.bcx.ins().fsub(x_lane, y_lane),
 405                     (ty::Float(_), sym::simd_mul) => fx.bcx.ins().fmul(x_lane, y_lane),
 406                     (ty::Float(_), sym::simd_div) => fx.bcx.ins().fdiv(x_lane, y_lane),
 407                     (ty::Float(FloatTy::F32), sym::simd_rem) => fx.lib_call(
 408                         "fmodf",
 409                         vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
 410                         vec![AbiParam::new(types::F32)],
 411                         &[x_lane, y_lane],
 412                     )[0],
 413                     (ty::Float(FloatTy::F64), sym::simd_rem) => fx.lib_call(
 414                         "fmod",
 415                         vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
 416                         vec![AbiParam::new(types::F64)],
 417                         &[x_lane, y_lane],
 418                     )[0],
 419
 420                     (ty::Uint(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
 421                     (ty::Uint(_), sym::simd_shr) => fx.bcx.ins().ushr(x_lane, y_lane),
 422                     (ty::Uint(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
 423                     (ty::Uint(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
 424                     (ty::Uint(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
 425
 426                     (ty::Int(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
 427                     (ty::Int(_), sym::simd_shr) => fx.bcx.ins().sshr(x_lane, y_lane),
 428                     (ty::Int(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
 429                     (ty::Int(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
 430                     (ty::Int(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
 431
 432                     _ => unreachable!(),
 433                 }
 434             });
 435         }
 436
 437         sym::simd_fma => {
 438             intrinsic_args!(fx, args => (a, b, c); intrinsic);
 439
 440             if !a.layout().ty.is_simd() {
 441                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 442                 return;
 443             }
 444             assert_eq!(a.layout(), b.layout());
 445             assert_eq!(a.layout(), c.layout());
 446             assert_eq!(a.layout(), ret.layout());
 447
 448             let layout = a.layout();
 449             let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
 450             let res_lane_layout = fx.layout_of(lane_ty);
 451
 452             for lane in 0..lane_count {
 453                 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
 454                 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
 455                 let c_lane = c.value_lane(fx, lane).load_scalar(fx);
 456
 457                 let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane);
 458                 let res_lane = CValue::by_val(res_lane, res_lane_layout);
 459
 460                 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
 461             }
 462         }
 463
 464         sym::simd_fmin | sym::simd_fmax => {
 465             intrinsic_args!(fx, args => (x, y); intrinsic);
 466
 467             if !x.layout().ty.is_simd() {
 468                 report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
 469                 return;
 470             }
 471
 472             // FIXME use vector instructions when possible
 473             simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
 474                 match lane_ty.kind() {
 475                     ty::Float(_) => {}
 476                     _ => unreachable!("{:?}", lane_ty),
 477                 }
 478                 match intrinsic {
 479                     sym::simd_fmin => crate::num::codegen_float_min(fx, x_lane, y_lane),
 480                     sym::simd_fmax => crate::num::codegen_float_max(fx, x_lane, y_lane),
 481                     _ => unreachable!(),
 482                 }
 483             });
 484         }
 485
 486         sym::simd_fpow => {
 487             intrinsic_args!(fx, args => (a, b); intrinsic);
 488
 489             if !a.layout().ty.is_simd() {
 490                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 491                 return;
 492             }
 493
 494             simd_pair_for_each_lane(fx, a, b, ret, &|fx, lane_ty, _ret_lane_ty, a_lane, b_lane| {
 495                 match lane_ty.kind() {
 496                     ty::Float(FloatTy::F32) => fx.lib_call(
 497                         "powf",
 498                         vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
 499                         vec![AbiParam::new(types::F32)],
 500                         &[a_lane, b_lane],
 501                     )[0],
 502                     ty::Float(FloatTy::F64) => fx.lib_call(
 503                         "pow",
 504                         vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
 505                         vec![AbiParam::new(types::F64)],
 506                         &[a_lane, b_lane],
 507                     )[0],
 508                     _ => unreachable!("{:?}", lane_ty),
 509                 }
 510             });
 511         }
 512
 513         sym::simd_fpowi => {
 514             intrinsic_args!(fx, args => (a, exp); intrinsic);
 515             let exp = exp.load_scalar(fx);
 516
 517             if !a.layout().ty.is_simd() {
 518                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 519                 return;
 520             }
 521
 522             simd_for_each_lane(
 523                 fx,
 524                 a,
 525                 ret,
 526                 &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() {
 527                     ty::Float(FloatTy::F32) => fx.lib_call(
 528                         "__powisf2", // compiler-builtins
 529                         vec![AbiParam::new(types::F32), AbiParam::new(types::I32)],
 530                         vec![AbiParam::new(types::F32)],
 531                         &[lane, exp],
 532                     )[0],
 533                     ty::Float(FloatTy::F64) => fx.lib_call(
 534                         "__powidf2", // compiler-builtins
 535                         vec![AbiParam::new(types::F64), AbiParam::new(types::I32)],
 536                         vec![AbiParam::new(types::F64)],
 537                         &[lane, exp],
 538                     )[0],
 539                     _ => unreachable!("{:?}", lane_ty),
 540                 },
 541             );
 542         }
 543
 544         sym::simd_fsin
 545         | sym::simd_fcos
 546         | sym::simd_fexp
 547         | sym::simd_fexp2
 548         | sym::simd_flog
 549         | sym::simd_flog10
 550         | sym::simd_flog2
 551         | sym::simd_round => {
 552             intrinsic_args!(fx, args => (a); intrinsic);
 553
 554             if !a.layout().ty.is_simd() {
 555                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 556                 return;
 557             }
 558
 559             simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
 560                 let lane_ty = match lane_ty.kind() {
 561                     ty::Float(FloatTy::F32) => types::F32,
 562                     ty::Float(FloatTy::F64) => types::F64,
 563                     _ => unreachable!("{:?}", lane_ty),
 564                 };
 565                 let name = match (intrinsic, lane_ty) {
 566                     (sym::simd_fsin, types::F32) => "sinf",
 567                     (sym::simd_fsin, types::F64) => "sin",
 568                     (sym::simd_fcos, types::F32) => "cosf",
 569                     (sym::simd_fcos, types::F64) => "cos",
 570                     (sym::simd_fexp, types::F32) => "expf",
 571                     (sym::simd_fexp, types::F64) => "exp",
 572                     (sym::simd_fexp2, types::F32) => "exp2f",
 573                     (sym::simd_fexp2, types::F64) => "exp2",
 574                     (sym::simd_flog, types::F32) => "logf",
 575                     (sym::simd_flog, types::F64) => "log",
 576                     (sym::simd_flog10, types::F32) => "log10f",
 577                     (sym::simd_flog10, types::F64) => "log10",
 578                     (sym::simd_flog2, types::F32) => "log2f",
 579                     (sym::simd_flog2, types::F64) => "log2",
 580                     (sym::simd_round, types::F32) => "roundf",
 581                     (sym::simd_round, types::F64) => "round",
 582                     _ => unreachable!("{:?}", intrinsic),
 583                 };
 584                 fx.lib_call(
 585                     name,
 586                     vec![AbiParam::new(lane_ty)],
 587                     vec![AbiParam::new(lane_ty)],
 588                     &[lane],
 589                 )[0]
 590             });
 591         }
 592
 593         sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => {
 594             intrinsic_args!(fx, args => (a); intrinsic);
 595
 596             if !a.layout().ty.is_simd() {
 597                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 598                 return;
 599             }
 600
 601             simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
 602                 match lane_ty.kind() {
 603                     ty::Float(_) => {}
 604                     _ => unreachable!("{:?}", lane_ty),
 605                 }
 606                 match intrinsic {
 607                     sym::simd_fabs => fx.bcx.ins().fabs(lane),
 608                     sym::simd_fsqrt => fx.bcx.ins().sqrt(lane),
 609                     sym::simd_ceil => fx.bcx.ins().ceil(lane),
 610                     sym::simd_floor => fx.bcx.ins().floor(lane),
 611                     sym::simd_trunc => fx.bcx.ins().trunc(lane),
 612                     _ => unreachable!(),
 613                 }
 614             });
 615         }
 616
 617         sym::simd_reduce_add_ordered => {
 618             intrinsic_args!(fx, args => (v, acc); intrinsic);
 619             let acc = acc.load_scalar(fx);
 620
 621             // FIXME there must be no acc param for integer vectors
 622             if !v.layout().ty.is_simd() {
 623                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 624                 return;
 625             }
 626
 627             simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
 628                 if lane_ty.is_floating_point() {
 629                     fx.bcx.ins().fadd(a, b)
 630                 } else {
 631                     fx.bcx.ins().iadd(a, b)
 632                 }
 633             });
 634         }
 635
 636         sym::simd_reduce_add_unordered => {
 637             intrinsic_args!(fx, args => (v); intrinsic);
 638
 639             // FIXME there must be no acc param for integer vectors
 640             if !v.layout().ty.is_simd() {
 641                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 642                 return;
 643             }
 644
 645             simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
 646                 if lane_ty.is_floating_point() {
 647                     fx.bcx.ins().fadd(a, b)
 648                 } else {
 649                     fx.bcx.ins().iadd(a, b)
 650                 }
 651             });
 652         }
 653
 654         sym::simd_reduce_mul_ordered => {
 655             intrinsic_args!(fx, args => (v, acc); intrinsic);
 656             let acc = acc.load_scalar(fx);
 657
 658             // FIXME there must be no acc param for integer vectors
 659             if !v.layout().ty.is_simd() {
 660                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 661                 return;
 662             }
 663
 664             simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
 665                 if lane_ty.is_floating_point() {
 666                     fx.bcx.ins().fmul(a, b)
 667                 } else {
 668                     fx.bcx.ins().imul(a, b)
 669                 }
 670             });
 671         }
 672
 673         sym::simd_reduce_mul_unordered => {
 674             intrinsic_args!(fx, args => (v); intrinsic);
 675
 676             // FIXME there must be no acc param for integer vectors
 677             if !v.layout().ty.is_simd() {
 678                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 679                 return;
 680             }
 681
 682             simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
 683                 if lane_ty.is_floating_point() {
 684                     fx.bcx.ins().fmul(a, b)
 685                 } else {
 686                     fx.bcx.ins().imul(a, b)
 687                 }
 688             });
 689         }
 690
 691         sym::simd_reduce_all => {
 692             intrinsic_args!(fx, args => (v); intrinsic);
 693
 694             if !v.layout().ty.is_simd() {
 695                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 696                 return;
 697             }
 698
 699             simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
 700         }
 701
 702         sym::simd_reduce_any => {
 703             intrinsic_args!(fx, args => (v); intrinsic);
 704
 705             if !v.layout().ty.is_simd() {
 706                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 707                 return;
 708             }
 709
 710             simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
 711         }
 712
 713         sym::simd_reduce_and => {
 714             intrinsic_args!(fx, args => (v); intrinsic);
 715
 716             if !v.layout().ty.is_simd() {
 717                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 718                 return;
 719             }
 720
 721             simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b));
 722         }
 723
 724         sym::simd_reduce_or => {
 725             intrinsic_args!(fx, args => (v); intrinsic);
 726
 727             if !v.layout().ty.is_simd() {
 728                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 729                 return;
 730             }
 731
 732             simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b));
 733         }
 734
 735         sym::simd_reduce_xor => {
 736             intrinsic_args!(fx, args => (v); intrinsic);
 737
 738             if !v.layout().ty.is_simd() {
 739                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 740                 return;
 741             }
 742
 743             simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
 744         }
 745
 746         sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
 747             intrinsic_args!(fx, args => (v); intrinsic);
 748
 749             if !v.layout().ty.is_simd() {
 750                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 751                 return;
 752             }
 753
 754             simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
 755                 let lt = match ty.kind() {
 756                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
 757                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
 758                     ty::Float(_) => return crate::num::codegen_float_min(fx, a, b),
 759                     _ => unreachable!(),
 760                 };
 761                 fx.bcx.ins().select(lt, a, b)
 762             });
 763         }
 764
 765         sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
 766             intrinsic_args!(fx, args => (v); intrinsic);
 767
 768             if !v.layout().ty.is_simd() {
 769                 report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
 770                 return;
 771             }
 772
 773             simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
 774                 let gt = match ty.kind() {
 775                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
 776                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
 777                     ty::Float(_) => return crate::num::codegen_float_max(fx, a, b),
 778                     _ => unreachable!(),
 779                 };
 780                 fx.bcx.ins().select(gt, a, b)
 781             });
 782         }
 783
 784         sym::simd_select => {
 785             intrinsic_args!(fx, args => (m, a, b); intrinsic);
 786
 787             if !m.layout().ty.is_simd() {
 788                 report_simd_type_validation_error(fx, intrinsic, span, m.layout().ty);
 789                 return;
 790             }
 791             if !a.layout().ty.is_simd() {
 792                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 793                 return;
 794             }
 795             assert_eq!(a.layout(), b.layout());
 796
 797             let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
 798             let lane_layout = fx.layout_of(lane_ty);
 799
 800             for lane in 0..lane_count {
 801                 let m_lane = m.value_lane(fx, lane).load_scalar(fx);
 802                 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
 803                 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
 804
 805                 let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
 806                 let res_lane =
 807                     CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
 808
 809                 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
 810             }
 811         }
 812
 813         sym::simd_select_bitmask => {
 814             intrinsic_args!(fx, args => (m, a, b); intrinsic);
 815
 816             if !a.layout().ty.is_simd() {
 817                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
 818                 return;
 819             }
 820             assert_eq!(a.layout(), b.layout());
 821
 822             let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
 823             let lane_layout = fx.layout_of(lane_ty);
 824
 825             let m = m.load_scalar(fx);
 826
 827             for lane in 0..lane_count {
 828                 let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
 829                 let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
 830                 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
 831                 let b_lane = b.value_lane(fx, lane).load_scalar(fx);
 832
 833                 let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
 834                 let res_lane =
 835                     CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
 836
 837                 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
 838             }
 839         }
 840
 841         sym::simd_bitmask => {
 842             intrinsic_args!(fx, args => (a); intrinsic);
 843
 844             let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
 845             let lane_clif_ty = fx.clif_type(lane_ty).unwrap();
 846
 847             // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
 848             // vector mask and returns the most significant bit (MSB) of each lane in the form
 849             // of either:
 850             // * an unsigned integer
 851             // * an array of `u8`
 852             // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
 853             //
 854             // The bit order of the result depends on the byte endianness, LSB-first for little
 855             // endian and MSB-first for big endian.
 856             let expected_int_bits = lane_count.max(8);
 857             let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
 858
 859             match lane_ty.kind() {
 860                 ty::Int(_) | ty::Uint(_) => {}
 861                 _ => {
 862                     fx.tcx.sess.span_fatal(
 863                         span,
 864                         format!(
 865                             "invalid monomorphization of `simd_bitmask` intrinsic: \
 866                             vector argument `{}`'s element type `{}`, expected integer element \
 867                             type",
 868                             a.layout().ty,
 869                             lane_ty
 870                         ),
 871                     );
 872                 }
 873             }
 874
 875             let res_type =
 876                 Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
 877             let mut res = type_zero_value(&mut fx.bcx, res_type);
 878
 879             let lanes = match fx.tcx.sess.target.endian {
 880                 Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
 881                 Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>,
 882             };
 883             for lane in lanes {
 884                 let a_lane = a.value_lane(fx, lane).load_scalar(fx);
 885
 886                 // extract sign bit of an int
 887                 let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1));
 888
 889                 // shift sign bit into result
 890                 let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false);
 891                 res = fx.bcx.ins().ishl_imm(res, 1);
 892                 res = fx.bcx.ins().bor(res, a_lane_sign);
 893             }
 894
 895             match ret.layout().ty.kind() {
 896                 ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {}
 897                 ty::Array(elem, len)
 898                     if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
 899                         && len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
 900                             == Some(expected_bytes) => {}
 901                 _ => {
 902                     fx.tcx.sess.span_fatal(
 903                         span,
 904                         format!(
 905                             "invalid monomorphization of `simd_bitmask` intrinsic: \
 906                             cannot return `{}`, expected `u{}` or `[u8; {}]`",
 907                             ret.layout().ty,
 908                             expected_int_bits,
 909                             expected_bytes
 910                         ),
 911                     );
 912                 }
 913             }
 914
 915             let res = CValue::by_val(res, ret.layout());
 916             ret.write_cvalue(fx, res);
 917         }
 918
 919         sym::simd_saturating_add | sym::simd_saturating_sub => {
 920             intrinsic_args!(fx, args => (x, y); intrinsic);
 921
 922             let bin_op = match intrinsic {
 923                 sym::simd_saturating_add => BinOp::Add,
 924                 sym::simd_saturating_sub => BinOp::Sub,
 925                 _ => unreachable!(),
 926             };
 927
 928             // FIXME use vector instructions when possible
 929             simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
 930                 crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane)
 931             });
 932         }
 933
 934         sym::simd_expose_addr | sym::simd_from_exposed_addr | sym::simd_cast_ptr => {
 935             intrinsic_args!(fx, args => (arg); intrinsic);
 936             ret.write_cvalue_transmute(fx, arg);
 937         }
 938
 939         sym::simd_arith_offset => {
 940             intrinsic_args!(fx, args => (ptr, offset); intrinsic);
 941
 942             let (lane_count, ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
 943             let pointee_ty = ptr_lane_ty.builtin_deref(true).unwrap().ty;
 944             let pointee_size = fx.layout_of(pointee_ty).size.bytes();
 945             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
 946             let ret_lane_layout = fx.layout_of(ret_lane_ty);
 947             assert_eq!(lane_count, ret_lane_count);
 948
 949             for lane_idx in 0..lane_count {
 950                 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
 951                 let offset_lane = offset.value_lane(fx, lane_idx).load_scalar(fx);
 952
 953                 let ptr_diff = if pointee_size != 1 {
 954                     fx.bcx.ins().imul_imm(offset_lane, pointee_size as i64)
 955                 } else {
 956                     offset_lane
 957                 };
 958                 let res_lane = fx.bcx.ins().iadd(ptr_lane, ptr_diff);
 959                 let res_lane = CValue::by_val(res_lane, ret_lane_layout);
 960
 961                 ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
 962             }
 963         }
 964
 965         sym::simd_gather => {
 966             intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
 967
 968             let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
 969             let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
 970             let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
 971             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
 972             assert_eq!(val_lane_count, ptr_lane_count);
 973             assert_eq!(val_lane_count, mask_lane_count);
 974             assert_eq!(val_lane_count, ret_lane_count);
 975
 976             let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
 977             let ret_lane_layout = fx.layout_of(ret_lane_ty);
 978
 979             for lane_idx in 0..ptr_lane_count {
 980                 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
 981                 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
 982                 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
 983
 984                 let if_enabled = fx.bcx.create_block();
 985                 let if_disabled = fx.bcx.create_block();
 986                 let next = fx.bcx.create_block();
 987                 let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
 988
 989                 fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
 990                 fx.bcx.seal_block(if_enabled);
 991                 fx.bcx.seal_block(if_disabled);
 992
 993                 fx.bcx.switch_to_block(if_enabled);
 994                 let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
 995                 fx.bcx.ins().jump(next, &[res]);
 996
 997                 fx.bcx.switch_to_block(if_disabled);
 998                 fx.bcx.ins().jump(next, &[val_lane]);
 999
1000                 fx.bcx.seal_block(next);
1001                 fx.bcx.switch_to_block(next);
1002
1003                 fx.bcx.ins().nop();
1004
1005                 ret.place_lane(fx, lane_idx)
1006                     .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
1007             }
1008         }
1009
1010         sym::simd_masked_load => {
1011             intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
1012
1013             let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
1014             let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
1015             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
1016             assert_eq!(val_lane_count, mask_lane_count);
1017             assert_eq!(val_lane_count, ret_lane_count);
1018
1019             let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
1020             let ret_lane_layout = fx.layout_of(ret_lane_ty);
1021             let ptr_val = ptr.load_scalar(fx);
1022
1023             for lane_idx in 0..ret_lane_count {
1024                 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
1025                 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
1026
1027                 let if_enabled = fx.bcx.create_block();
1028                 let if_disabled = fx.bcx.create_block();
1029                 let next = fx.bcx.create_block();
1030                 let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
1031
1032                 fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
1033                 fx.bcx.seal_block(if_enabled);
1034                 fx.bcx.seal_block(if_disabled);
1035
1036                 fx.bcx.switch_to_block(if_enabled);
1037                 let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
1038                 let res = fx.bcx.ins().load(
1039                     lane_clif_ty,
1040                     MemFlags::trusted(),
1041                     ptr_val,
1042                     Offset32::new(offset),
1043                 );
1044                 fx.bcx.ins().jump(next, &[res]);
1045
1046                 fx.bcx.switch_to_block(if_disabled);
1047                 fx.bcx.ins().jump(next, &[val_lane]);
1048
1049                 fx.bcx.seal_block(next);
1050                 fx.bcx.switch_to_block(next);
1051
1052                 fx.bcx.ins().nop();
1053
1054                 ret.place_lane(fx, lane_idx)
1055                     .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
1056             }
1057         }
1058
1059         sym::simd_scatter => {
1060             intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
1061
1062             let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
1063             let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
1064             let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
1065             assert_eq!(val_lane_count, ptr_lane_count);
1066             assert_eq!(val_lane_count, mask_lane_count);
1067
1068             for lane_idx in 0..ptr_lane_count {
1069                 let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
1070                 let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
1071                 let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
1072
1073                 let if_enabled = fx.bcx.create_block();
1074                 let next = fx.bcx.create_block();
1075
1076                 fx.bcx.ins().brif(mask_lane, if_enabled, &[], next, &[]);
1077                 fx.bcx.seal_block(if_enabled);
1078
1079                 fx.bcx.switch_to_block(if_enabled);
1080                 fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_lane, 0);
1081                 fx.bcx.ins().jump(next, &[]);
1082
1083                 fx.bcx.seal_block(next);
1084                 fx.bcx.switch_to_block(next);
1085             }
1086         }
1087
1088         _ => {
1089             fx.tcx.sess.span_err(span, format!("Unknown SIMD intrinsic {}", intrinsic));
1090             // Prevent verifier error
1091             fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
1092             return;
1093         }
1094     }
1095     let ret_block = fx.get_block(target);
1096     fx.bcx.ins().jump(ret_block, &[]);
1097 }