library/stdarch/crates/core_arch/src/wasm32/simd128.rs

   1 //! This module implements the [WebAssembly `SIMD128` ISA].
   2 //!
   3 //! [WebAssembly `SIMD128` ISA]:
   4 //! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md
   5
   6 #![unstable(feature = "wasm_simd", issue = "74372")]
   7 #![allow(non_camel_case_types)]
   8 #![allow(unused_imports)]
   9
  10 use crate::{
  11     core_arch::{simd::*, simd_llvm::*},
  12     marker::Sized,
  13     mem::transmute,
  14     ptr,
  15 };
  16
  17 #[cfg(test)]
  18 use stdarch_test::assert_instr;
  19
  20 types! {
  21     /// WASM-specific 128-bit wide SIMD vector type.
  22     // N.B., internals here are arbitrary.
  23     pub struct v128(i32, i32, i32, i32);
  24 }
  25
  26 #[allow(non_camel_case_types)]
  27 #[unstable(feature = "stdsimd_internal", issue = "none")]
  28 pub(crate) trait v128Ext: Sized {
  29     unsafe fn as_v128(self) -> v128;
  30
  31     #[inline]
  32     #[target_feature(enable = "simd128")]
  33     unsafe fn as_u8x16(self) -> u8x16 {
  34         transmute(self.as_v128())
  35     }
  36
  37     #[inline]
  38     #[target_feature(enable = "simd128")]
  39     unsafe fn as_u16x8(self) -> u16x8 {
  40         transmute(self.as_v128())
  41     }
  42
  43     #[inline]
  44     #[target_feature(enable = "simd128")]
  45     unsafe fn as_u32x4(self) -> u32x4 {
  46         transmute(self.as_v128())
  47     }
  48
  49     #[inline]
  50     #[target_feature(enable = "simd128")]
  51     unsafe fn as_u64x2(self) -> u64x2 {
  52         transmute(self.as_v128())
  53     }
  54
  55     #[inline]
  56     #[target_feature(enable = "simd128")]
  57     unsafe fn as_i8x16(self) -> i8x16 {
  58         transmute(self.as_v128())
  59     }
  60
  61     #[inline]
  62     #[target_feature(enable = "simd128")]
  63     unsafe fn as_i16x8(self) -> i16x8 {
  64         transmute(self.as_v128())
  65     }
  66
  67     #[inline]
  68     #[target_feature(enable = "simd128")]
  69     unsafe fn as_i32x4(self) -> i32x4 {
  70         transmute(self.as_v128())
  71     }
  72
  73     #[inline]
  74     #[target_feature(enable = "simd128")]
  75     unsafe fn as_i64x2(self) -> i64x2 {
  76         transmute(self.as_v128())
  77     }
  78
  79     #[inline]
  80     #[target_feature(enable = "simd128")]
  81     unsafe fn as_f32x4(self) -> f32x4 {
  82         transmute(self.as_v128())
  83     }
  84
  85     #[inline]
  86     #[target_feature(enable = "simd128")]
  87     unsafe fn as_f64x2(self) -> f64x2 {
  88         transmute(self.as_v128())
  89     }
  90 }
  91
  92 impl v128Ext for v128 {
  93     #[inline]
  94     #[target_feature(enable = "simd128")]
  95     unsafe fn as_v128(self) -> Self {
  96         self
  97     }
  98 }
  99
 100 #[allow(improper_ctypes)]
 101 extern "C" {
 102     #[link_name = "llvm.wasm.anytrue.v16i8"]
 103     fn llvm_i8x16_any_true(x: i8x16) -> i32;
 104     #[link_name = "llvm.wasm.alltrue.v16i8"]
 105     fn llvm_i8x16_all_true(x: i8x16) -> i32;
 106     #[link_name = "llvm.sadd.sat.v16i8"]
 107     fn llvm_i8x16_add_saturate_s(a: i8x16, b: i8x16) -> i8x16;
 108     #[link_name = "llvm.uadd.sat.v16i8"]
 109     fn llvm_i8x16_add_saturate_u(a: i8x16, b: i8x16) -> i8x16;
 110     #[link_name = "llvm.wasm.sub.saturate.signed.v16i8"]
 111     fn llvm_i8x16_sub_saturate_s(a: i8x16, b: i8x16) -> i8x16;
 112     #[link_name = "llvm.wasm.sub.saturate.unsigned.v16i8"]
 113     fn llvm_i8x16_sub_saturate_u(a: i8x16, b: i8x16) -> i8x16;
 114
 115     #[link_name = "llvm.wasm.anytrue.v8i16"]
 116     fn llvm_i16x8_any_true(x: i16x8) -> i32;
 117     #[link_name = "llvm.wasm.alltrue.v8i16"]
 118     fn llvm_i16x8_all_true(x: i16x8) -> i32;
 119     #[link_name = "llvm.sadd.sat.v8i16"]
 120     fn llvm_i16x8_add_saturate_s(a: i16x8, b: i16x8) -> i16x8;
 121     #[link_name = "llvm.uadd.sat.v8i16"]
 122     fn llvm_i16x8_add_saturate_u(a: i16x8, b: i16x8) -> i16x8;
 123     #[link_name = "llvm.wasm.sub.saturate.signed.v8i16"]
 124     fn llvm_i16x8_sub_saturate_s(a: i16x8, b: i16x8) -> i16x8;
 125     #[link_name = "llvm.wasm.sub.saturate.unsigned.v8i16"]
 126     fn llvm_i16x8_sub_saturate_u(a: i16x8, b: i16x8) -> i16x8;
 127
 128     #[link_name = "llvm.wasm.anytrue.v4i32"]
 129     fn llvm_i32x4_any_true(x: i32x4) -> i32;
 130     #[link_name = "llvm.wasm.alltrue.v4i32"]
 131     fn llvm_i32x4_all_true(x: i32x4) -> i32;
 132
 133     #[link_name = "llvm.fabs.v4f32"]
 134     fn llvm_f32x4_abs(x: f32x4) -> f32x4;
 135     #[link_name = "llvm.sqrt.v4f32"]
 136     fn llvm_f32x4_sqrt(x: f32x4) -> f32x4;
 137     #[link_name = "llvm.minimum.v4f32"]
 138     fn llvm_f32x4_min(x: f32x4, y: f32x4) -> f32x4;
 139     #[link_name = "llvm.maximum.v4f32"]
 140     fn llvm_f32x4_max(x: f32x4, y: f32x4) -> f32x4;
 141     #[link_name = "llvm.fabs.v2f64"]
 142     fn llvm_f64x2_abs(x: f64x2) -> f64x2;
 143     #[link_name = "llvm.sqrt.v2f64"]
 144     fn llvm_f64x2_sqrt(x: f64x2) -> f64x2;
 145     #[link_name = "llvm.minimum.v2f64"]
 146     fn llvm_f64x2_min(x: f64x2, y: f64x2) -> f64x2;
 147     #[link_name = "llvm.maximum.v2f64"]
 148     fn llvm_f64x2_max(x: f64x2, y: f64x2) -> f64x2;
 149
 150     #[link_name = "llvm.wasm.bitselect.v16i8"]
 151     fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16;
 152     #[link_name = "llvm.wasm.swizzle"]
 153     fn llvm_swizzle(a: i8x16, b: i8x16) -> i8x16;
 154
 155     #[link_name = "llvm.wasm.bitmask.v16i8"]
 156     fn llvm_bitmask_i8x16(a: i8x16) -> i32;
 157     #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"]
 158     fn llvm_narrow_i8x16_s(a: i16x8, b: i16x8) -> i8x16;
 159     #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"]
 160     fn llvm_narrow_i8x16_u(a: i16x8, b: i16x8) -> i8x16;
 161     #[link_name = "llvm.wasm.avgr.unsigned.v16i8"]
 162     fn llvm_avgr_u_i8x16(a: i8x16, b: i8x16) -> i8x16;
 163
 164     #[link_name = "llvm.wasm.bitmask.v8i16"]
 165     fn llvm_bitmask_i16x8(a: i16x8) -> i32;
 166     #[link_name = "llvm.wasm.narrow.signed.v8i16.v8i16"]
 167     fn llvm_narrow_i16x8_s(a: i32x4, b: i32x4) -> i16x8;
 168     #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v8i16"]
 169     fn llvm_narrow_i16x8_u(a: i32x4, b: i32x4) -> i16x8;
 170     #[link_name = "llvm.wasm.avgr.unsigned.v8i16"]
 171     fn llvm_avgr_u_i16x8(a: i16x8, b: i16x8) -> i16x8;
 172     #[link_name = "llvm.wasm.widen.low.signed.v8i16.v16i8"]
 173     fn llvm_widen_low_i16x8_s(a: i8x16) -> i16x8;
 174     #[link_name = "llvm.wasm.widen.high.signed.v8i16.v16i8"]
 175     fn llvm_widen_high_i16x8_s(a: i8x16) -> i16x8;
 176     #[link_name = "llvm.wasm.widen.low.unsigned.v8i16.v16i8"]
 177     fn llvm_widen_low_i16x8_u(a: i8x16) -> i16x8;
 178     #[link_name = "llvm.wasm.widen.high.unsigned.v8i16.v16i8"]
 179     fn llvm_widen_high_i16x8_u(a: i8x16) -> i16x8;
 180
 181     #[link_name = "llvm.wasm.bitmask.v4i32"]
 182     fn llvm_bitmask_i32x4(a: i32x4) -> i32;
 183     #[link_name = "llvm.wasm.avgr.unsigned.v4i32"]
 184     fn llvm_avgr_u_i32x4(a: i32x4, b: i32x4) -> i32x4;
 185     #[link_name = "llvm.wasm.widen.low.signed.v4i32.v8i16"]
 186     fn llvm_widen_low_i32x4_s(a: i16x8) -> i32x4;
 187     #[link_name = "llvm.wasm.widen.high.signed.v4i32.v8i16"]
 188     fn llvm_widen_high_i32x4_s(a: i16x8) -> i32x4;
 189     #[link_name = "llvm.wasm.widen.low.unsigned.v4i32.v8i16"]
 190     fn llvm_widen_low_i32x4_u(a: i16x8) -> i32x4;
 191     #[link_name = "llvm.wasm.widen.high.unsigned.v4i32.v8i16"]
 192     fn llvm_widen_high_i32x4_u(a: i16x8) -> i32x4;
 193 }
 194
 195 /// Loads a `v128` vector from the given heap address.
 196 #[inline]
 197 #[cfg_attr(test, assert_instr(v128.load))]
 198 #[target_feature(enable = "simd128")]
 199 pub unsafe fn v128_load(m: *const v128) -> v128 {
 200     *m
 201 }
 202
 203 /// Load eight 8-bit integers and sign extend each one to a 16-bit lane
 204 #[inline]
 205 #[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_s))]
 206 #[target_feature(enable = "simd128")]
 207 pub unsafe fn i16x8_load8x8_s(m: *const i8) -> v128 {
 208     transmute(simd_cast::<_, i16x8>(*(m as *const i8x8)))
 209 }
 210
 211 /// Load eight 8-bit integers and zero extend each one to a 16-bit lane
 212 #[inline]
 213 #[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_u))]
 214 #[target_feature(enable = "simd128")]
 215 pub unsafe fn i16x8_load8x8_u(m: *const u8) -> v128 {
 216     transmute(simd_cast::<_, u16x8>(*(m as *const u8x8)))
 217 }
 218
 219 /// Load four 16-bit integers and sign extend each one to a 32-bit lane
 220 #[inline]
 221 #[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_s))]
 222 #[target_feature(enable = "simd128")]
 223 pub unsafe fn i32x4_load16x4_s(m: *const i16) -> v128 {
 224     transmute(simd_cast::<_, i32x4>(*(m as *const i16x4)))
 225 }
 226
 227 /// Load four 16-bit integers and zero extend each one to a 32-bit lane
 228 #[inline]
 229 #[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_u))]
 230 #[target_feature(enable = "simd128")]
 231 pub unsafe fn i32x4_load16x4_u(m: *const u16) -> v128 {
 232     transmute(simd_cast::<_, u32x4>(*(m as *const u16x4)))
 233 }
 234
 235 /// Load two 32-bit integers and sign extend each one to a 64-bit lane
 236 #[inline]
 237 #[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_s))]
 238 #[target_feature(enable = "simd128")]
 239 pub unsafe fn i64x2_load32x2_s(m: *const i32) -> v128 {
 240     transmute(simd_cast::<_, i64x2>(*(m as *const i32x2)))
 241 }
 242
 243 /// Load two 32-bit integers and zero extend each one to a 64-bit lane
 244 #[inline]
 245 #[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_u))]
 246 #[target_feature(enable = "simd128")]
 247 pub unsafe fn i64x2_load32x2_u(m: *const u32) -> v128 {
 248     transmute(simd_cast::<_, u64x2>(*(m as *const u32x2)))
 249 }
 250
 251 /// Load a single element and splat to all lanes of a v128 vector.
 252 #[inline]
 253 #[cfg_attr(all(test, all_simd), assert_instr(v8x16.load_splat))]
 254 #[target_feature(enable = "simd128")]
 255 pub unsafe fn v8x16_load_splat(m: *const u8) -> v128 {
 256     let v = *m;
 257     transmute(u8x16(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v))
 258 }
 259
 260 /// Load a single element and splat to all lanes of a v128 vector.
 261 #[inline]
 262 #[cfg_attr(all(test, all_simd), assert_instr(v16x8.load_splat))]
 263 #[target_feature(enable = "simd128")]
 264 pub unsafe fn v16x8_load_splat(m: *const u16) -> v128 {
 265     let v = *m;
 266     transmute(u16x8(v, v, v, v, v, v, v, v))
 267 }
 268
 269 /// Load a single element and splat to all lanes of a v128 vector.
 270 #[inline]
 271 #[cfg_attr(all(test, all_simd), assert_instr(v32x4.load_splat))]
 272 #[target_feature(enable = "simd128")]
 273 pub unsafe fn v32x4_load_splat(m: *const u32) -> v128 {
 274     let v = *m;
 275     transmute(u32x4(v, v, v, v))
 276 }
 277
 278 /// Load a single element and splat to all lanes of a v128 vector.
 279 #[inline]
 280 #[cfg_attr(all(test, all_simd), assert_instr(v64x2.load_splat))]
 281 #[target_feature(enable = "simd128")]
 282 pub unsafe fn v64x2_load_splat(m: *const u64) -> v128 {
 283     let v = *m;
 284     transmute(u64x2(v, v))
 285 }
 286
 287 /// Stores a `v128` vector to the given heap address.
 288 #[inline]
 289 #[cfg_attr(test, assert_instr(v128.store))]
 290 #[target_feature(enable = "simd128")]
 291 pub unsafe fn v128_store(m: *mut v128, a: v128) {
 292     *m = a;
 293 }
 294
 295 /// Materializes a constant SIMD value from the immediate operands.
 296 ///
 297 /// This function generates a `v128.const` instruction as if the generated
 298 /// vector was interpreted as sixteen 8-bit integers.
 299 #[inline]
 300 #[target_feature(enable = "simd128")]
 301 #[cfg_attr(
 302     all(test, all_simd),
 303     assert_instr(
 304         v128.const,
 305         a0 = 0,
 306         a1 = 1,
 307         a2 = 2,
 308         a3 = 3,
 309         a4 = 4,
 310         a5 = 5,
 311         a6 = 6,
 312         a7 = 7,
 313         a8 = 8,
 314         a9 = 9,
 315         a10 = 10,
 316         a11 = 11,
 317         a12 = 12,
 318         a13 = 13,
 319         a14 = 14,
 320         a15 = 15,
 321     )
 322 )]
 323 pub const unsafe fn i8x16_const(
 324     a0: i8,
 325     a1: i8,
 326     a2: i8,
 327     a3: i8,
 328     a4: i8,
 329     a5: i8,
 330     a6: i8,
 331     a7: i8,
 332     a8: i8,
 333     a9: i8,
 334     a10: i8,
 335     a11: i8,
 336     a12: i8,
 337     a13: i8,
 338     a14: i8,
 339     a15: i8,
 340 ) -> v128 {
 341     transmute(i8x16(
 342         a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
 343     ))
 344 }
 345
 346 /// Materializes a constant SIMD value from the immediate operands.
 347 ///
 348 /// This function generates a `v128.const` instruction as if the generated
 349 /// vector was interpreted as eight 16-bit integers.
 350 #[inline]
 351 #[target_feature(enable = "simd128")]
 352 #[cfg_attr(
 353     all(test, all_simd),
 354     assert_instr(
 355         v128.const,
 356         a0 = 0,
 357         a1 = 1,
 358         a2 = 2,
 359         a3 = 3,
 360         a4 = 4,
 361         a5 = 5,
 362         a6 = 6,
 363         a7 = 7,
 364     )
 365 )]
 366 pub const unsafe fn i16x8_const(
 367     a0: i16,
 368     a1: i16,
 369     a2: i16,
 370     a3: i16,
 371     a4: i16,
 372     a5: i16,
 373     a6: i16,
 374     a7: i16,
 375 ) -> v128 {
 376     transmute(i16x8(a0, a1, a2, a3, a4, a5, a6, a7))
 377 }
 378
 379 /// Materializes a constant SIMD value from the immediate operands.
 380 ///
 381 /// This function generates a `v128.const` instruction as if the generated
 382 /// vector was interpreted as four 32-bit integers.
 383 #[inline]
 384 #[target_feature(enable = "simd128")]
 385 #[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))]
 386 pub const unsafe fn i32x4_const(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 {
 387     transmute(i32x4(a0, a1, a2, a3))
 388 }
 389
 390 /// Materializes a constant SIMD value from the immediate operands.
 391 ///
 392 /// This function generates a `v128.const` instruction as if the generated
 393 /// vector was interpreted as two 64-bit integers.
 394 #[inline]
 395 #[target_feature(enable = "simd128")]
 396 #[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1))]
 397 pub const unsafe fn i64x2_const(a0: i64, a1: i64) -> v128 {
 398     transmute(i64x2(a0, a1))
 399 }
 400
 401 /// Materializes a constant SIMD value from the immediate operands.
 402 ///
 403 /// This function generates a `v128.const` instruction as if the generated
 404 /// vector was interpreted as four 32-bit floats.
 405 #[inline]
 406 #[target_feature(enable = "simd128")]
 407 #[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))]
 408 pub const unsafe fn f32x4_const(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 {
 409     transmute(f32x4(a0, a1, a2, a3))
 410 }
 411
 412 /// Materializes a constant SIMD value from the immediate operands.
 413 ///
 414 /// This function generates a `v128.const` instruction as if the generated
 415 /// vector was interpreted as two 64-bit floats.
 416 #[inline]
 417 #[target_feature(enable = "simd128")]
 418 #[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0))]
 419 pub const unsafe fn f64x2_const(a0: f64, a1: f64) -> v128 {
 420     transmute(f64x2(a0, a1))
 421 }
 422
 423 /// Returns a new vector with lanes selected from the lanes of the two input
 424 /// vectors `$a` and `$b` specified in the 16 immediate operands.
 425 ///
 426 /// The `$a` and `$b` expressions must have type `v128`, and this macro
 427 /// generates a wasm instruction that is encoded with 16 bytes providing the
 428 /// indices of the elements to return. The indices `i` in range [0, 15] select
 429 /// the `i`-th element of `a`. The indices in range [16, 31] select the `i -
 430 /// 16`-th element of `b`.
 431 ///
 432 /// Note that this is a macro due to the codegen requirements of all of the
 433 /// index expressions `$i*` must be constant. A compiler error will be
 434 /// generated if any of the expressions are not constant.
 435 ///
 436 /// All indexes `$i*` must have the type `u32`.
 437 #[inline]
 438 #[target_feature(enable = "simd128")]
 439 pub unsafe fn v8x16_shuffle<
 440     const I0: usize,
 441     const I1: usize,
 442     const I2: usize,
 443     const I3: usize,
 444     const I4: usize,
 445     const I5: usize,
 446     const I6: usize,
 447     const I7: usize,
 448     const I8: usize,
 449     const I9: usize,
 450     const I10: usize,
 451     const I11: usize,
 452     const I12: usize,
 453     const I13: usize,
 454     const I14: usize,
 455     const I15: usize,
 456 >(
 457     a: v128,
 458     b: v128,
 459 ) -> v128 {
 460     let shuf = simd_shuffle16::<u8x16, u8x16>(
 461         a.as_u8x16(),
 462         b.as_u8x16(),
 463         [
 464             I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
 465             I8 as u32, I9 as u32, I10 as u32, I11 as u32, I12 as u32, I13 as u32, I14 as u32,
 466             I15 as u32,
 467         ],
 468     );
 469     transmute(shuf)
 470 }
 471
 472 #[cfg(test)]
 473 #[assert_instr(v8x16.shuffle)]
 474 #[target_feature(enable = "simd128")]
 475 unsafe fn v8x16_shuffle_test(a: v128, b: v128) -> v128 {
 476     v8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b)
 477 }
 478
 479 /// Same as [`v8x16_shuffle`], except operates as if the inputs were eight
 480 /// 16-bit integers, only taking 8 indices to shuffle.
 481 ///
 482 /// Indices in the range [0, 7] select from `a` while [8, 15] select from `b`.
 483 /// Note that this will generate the `v8x16.shuffle` instruction, since there
 484 /// is no native `v16x8.shuffle` instruction (there is no need for one since
 485 /// `v8x16.shuffle` suffices).
 486 #[inline]
 487 #[target_feature(enable = "simd128")]
 488 pub unsafe fn v16x8_shuffle<
 489     const I0: usize,
 490     const I1: usize,
 491     const I2: usize,
 492     const I3: usize,
 493     const I4: usize,
 494     const I5: usize,
 495     const I6: usize,
 496     const I7: usize,
 497 >(
 498     a: v128,
 499     b: v128,
 500 ) -> v128 {
 501     let shuf = simd_shuffle8::<u16x8, u16x8>(
 502         a.as_u16x8(),
 503         b.as_u16x8(),
 504         [
 505             I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
 506         ],
 507     );
 508     transmute(shuf)
 509 }
 510
 511 #[cfg(test)]
 512 #[assert_instr(v8x16.shuffle)]
 513 #[target_feature(enable = "simd128")]
 514 unsafe fn v16x8_shuffle_test(a: v128, b: v128) -> v128 {
 515     v16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a, b)
 516 }
 517
 518 /// Same as [`v8x16_shuffle`], except operates as if the inputs were four
 519 /// 32-bit integers, only taking 4 indices to shuffle.
 520 ///
 521 /// Indices in the range [0, 3] select from `a` while [4, 7] select from `b`.
 522 /// Note that this will generate the `v8x16.shuffle` instruction, since there
 523 /// is no native `v32x4.shuffle` instruction (there is no need for one since
 524 /// `v8x16.shuffle` suffices).
 525 #[inline]
 526 #[target_feature(enable = "simd128")]
 527 pub unsafe fn v32x4_shuffle<const I0: usize, const I1: usize, const I2: usize, const I3: usize>(
 528     a: v128,
 529     b: v128,
 530 ) -> v128 {
 531     let shuf = simd_shuffle4::<u32x4, u32x4>(
 532         a.as_u32x4(),
 533         b.as_u32x4(),
 534         [I0 as u32, I1 as u32, I2 as u32, I3 as u32],
 535     );
 536     transmute(shuf)
 537 }
 538
 539 #[cfg(test)]
 540 #[assert_instr(v8x16.shuffle)]
 541 #[target_feature(enable = "simd128")]
 542 unsafe fn v32x4_shuffle_test(a: v128, b: v128) -> v128 {
 543     v32x4_shuffle::<0, 2, 4, 6>(a, b)
 544 }
 545
 546 /// Same as [`v8x16_shuffle`], except operates as if the inputs were two
 547 /// 64-bit integers, only taking 2 indices to shuffle.
 548 ///
 549 /// Indices in the range [0, 1] select from `a` while [2, 3] select from `b`.
 550 /// Note that this will generate the `v8x16.shuffle` instruction, since there
 551 /// is no native `v64x2.shuffle` instruction (there is no need for one since
 552 /// `v8x16.shuffle` suffices).
 553 #[inline]
 554 #[target_feature(enable = "simd128")]
 555 pub unsafe fn v64x2_shuffle<const I0: usize, const I1: usize>(a: v128, b: v128) -> v128 {
 556     let shuf = simd_shuffle2::<u64x2, u64x2>(a.as_u64x2(), b.as_u64x2(), [I0 as u32, I1 as u32]);
 557     transmute(shuf)
 558 }
 559
 560 #[cfg(test)]
 561 #[assert_instr(v8x16.shuffle)]
 562 #[target_feature(enable = "simd128")]
 563 unsafe fn v64x2_shuffle_test(a: v128, b: v128) -> v128 {
 564     v64x2_shuffle::<0, 2>(a, b)
 565 }
 566
 567 /// Returns a new vector with lanes selected from the lanes of the first input
 568 /// vector `a` specified in the second input vector `s`.
 569 ///
 570 /// The indices `i` in range [0, 15] select the `i`-th element of `a`. For
 571 /// indices outside of the range the resulting lane is 0.
 572 #[inline]
 573 #[cfg_attr(test, assert_instr(v8x16.swizzle))]
 574 #[target_feature(enable = "simd128")]
 575 pub unsafe fn v8x16_swizzle(a: v128, s: v128) -> v128 {
 576     transmute(llvm_swizzle(transmute(a), transmute(s)))
 577 }
 578
 579 /// Creates a vector with identical lanes.
 580 ///
 581 /// Constructs a vector with `x` replicated to all 16 lanes.
 582 #[inline]
 583 #[cfg_attr(test, assert_instr(i8x16.splat))]
 584 #[target_feature(enable = "simd128")]
 585 pub unsafe fn i8x16_splat(a: i8) -> v128 {
 586     transmute(i8x16::splat(a))
 587 }
 588
 589 /// Creates a vector with identical lanes.
 590 ///
 591 /// Construct a vector with `x` replicated to all 8 lanes.
 592 #[inline]
 593 #[cfg_attr(test, assert_instr(i16x8.splat))]
 594 #[target_feature(enable = "simd128")]
 595 pub unsafe fn i16x8_splat(a: i16) -> v128 {
 596     transmute(i16x8::splat(a))
 597 }
 598
 599 /// Creates a vector with identical lanes.
 600 ///
 601 /// Constructs a vector with `x` replicated to all 4 lanes.
 602 #[inline]
 603 #[cfg_attr(test, assert_instr(i32x4.splat))]
 604 #[target_feature(enable = "simd128")]
 605 pub unsafe fn i32x4_splat(a: i32) -> v128 {
 606     transmute(i32x4::splat(a))
 607 }
 608
 609 /// Creates a vector with identical lanes.
 610 ///
 611 /// Construct a vector with `x` replicated to all 2 lanes.
 612 #[inline]
 613 #[cfg_attr(test, assert_instr(i64x2.splat))]
 614 #[target_feature(enable = "simd128")]
 615 pub unsafe fn i64x2_splat(a: i64) -> v128 {
 616     transmute(i64x2::splat(a))
 617 }
 618
 619 /// Creates a vector with identical lanes.
 620 ///
 621 /// Constructs a vector with `x` replicated to all 4 lanes.
 622 #[inline]
 623 #[cfg_attr(test, assert_instr(f32x4.splat))]
 624 #[target_feature(enable = "simd128")]
 625 pub unsafe fn f32x4_splat(a: f32) -> v128 {
 626     transmute(f32x4::splat(a))
 627 }
 628
 629 /// Creates a vector with identical lanes.
 630 ///
 631 /// Constructs a vector with `x` replicated to all 2 lanes.
 632 #[inline]
 633 #[cfg_attr(test, assert_instr(f64x2.splat))]
 634 #[target_feature(enable = "simd128")]
 635 pub unsafe fn f64x2_splat(a: f64) -> v128 {
 636     transmute(f64x2::splat(a))
 637 }
 638
 639 /// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 640 ///
 641 /// Extracts the scalar value of lane specified in the immediate mode operand
 642 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 643 #[inline]
 644 #[target_feature(enable = "simd128")]
 645 pub unsafe fn i8x16_extract_lane<const N: usize>(a: v128) -> i8 {
 646     simd_extract(a.as_i8x16(), N as u32)
 647 }
 648
 649 #[cfg(test)]
 650 #[assert_instr(i8x16.extract_lane_s)]
 651 #[target_feature(enable = "simd128")]
 652 unsafe fn i8x16_extract_lane_s(a: v128) -> i32 {
 653     i8x16_extract_lane::<0>(a) as i32
 654 }
 655
 656 #[cfg(test)]
 657 #[assert_instr(i8x16.extract_lane_u)]
 658 #[target_feature(enable = "simd128")]
 659 unsafe fn i8x16_extract_lane_u(a: v128) -> u32 {
 660     i8x16_extract_lane::<0>(a) as u8 as u32
 661 }
 662
 663 /// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 664 ///
 665 /// Replaces the scalar value of lane specified in the immediate mode operand
 666 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 667 #[inline]
 668 #[target_feature(enable = "simd128")]
 669 pub unsafe fn i8x16_replace_lane<const N: usize>(a: v128, val: i8) -> v128 {
 670     transmute(simd_insert(a.as_i8x16(), N as u32, val))
 671 }
 672
 673 #[cfg(test)]
 674 #[assert_instr(i8x16.replace_lane)]
 675 #[target_feature(enable = "simd128")]
 676 unsafe fn i8x16_replace_lane_test(a: v128, val: i8) -> v128 {
 677     i8x16_replace_lane::<0>(a, val)
 678 }
 679
 680 /// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 681 ///
 682 /// Extracts a the scalar value of lane specified in the immediate mode operand
 683 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 684 #[inline]
 685 #[target_feature(enable = "simd128")]
 686 pub unsafe fn i16x8_extract_lane<const N: usize>(a: v128) -> i16 {
 687     simd_extract(a.as_i16x8(), N as u32)
 688 }
 689
 690 #[cfg(test)]
 691 #[assert_instr(i16x8.extract_lane_s)]
 692 #[target_feature(enable = "simd128")]
 693 unsafe fn i16x8_extract_lane_s(a: v128) -> i32 {
 694     i16x8_extract_lane::<0>(a) as i32
 695 }
 696
 697 #[cfg(test)]
 698 #[assert_instr(i16x8.extract_lane_u)]
 699 #[target_feature(enable = "simd128")]
 700 unsafe fn i16x8_extract_lane_u(a: v128) -> u32 {
 701     i16x8_extract_lane::<0>(a) as u16 as u32
 702 }
 703
 704 /// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 705 ///
 706 /// Replaces the scalar value of lane specified in the immediate mode operand
 707 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 708 #[inline]
 709 #[target_feature(enable = "simd128")]
 710 pub unsafe fn i16x8_replace_lane<const N: usize>(a: v128, val: i16) -> v128 {
 711     transmute(simd_insert(a.as_i16x8(), N as u32, val))
 712 }
 713
 714 #[cfg(test)]
 715 #[assert_instr(i16x8.replace_lane)]
 716 #[target_feature(enable = "simd128")]
 717 unsafe fn i16x8_replace_lane_test(a: v128, val: i16) -> v128 {
 718     i16x8_replace_lane::<0>(a, val)
 719 }
 720
 721 /// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 722 ///
 723 /// Extracts the scalar value of lane specified in the immediate mode operand
 724 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 725 #[inline]
 726 #[target_feature(enable = "simd128")]
 727 pub unsafe fn i32x4_extract_lane<const N: usize>(a: v128) -> i32 {
 728     simd_extract(a.as_i32x4(), N as u32)
 729 }
 730
 731 #[cfg(test)]
 732 #[assert_instr(i32x4.extract_lane)]
 733 #[target_feature(enable = "simd128")]
 734 unsafe fn i32x4_extract_lane_test(a: v128) -> i32 {
 735     i32x4_extract_lane::<0>(a)
 736 }
 737
 738 /// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 739 ///
 740 /// Replaces the scalar value of lane specified in the immediate mode operand
 741 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 742 #[inline]
 743 #[target_feature(enable = "simd128")]
 744 pub unsafe fn i32x4_replace_lane<const N: usize>(a: v128, val: i32) -> v128 {
 745     transmute(simd_insert(a.as_i32x4(), N as u32, val))
 746 }
 747
 748 #[cfg(test)]
 749 #[assert_instr(i32x4.replace_lane)]
 750 #[target_feature(enable = "simd128")]
 751 unsafe fn i32x4_replace_lane_test(a: v128, val: i32) -> v128 {
 752     i32x4_replace_lane::<0>(a, val)
 753 }
 754
 755 /// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 756 ///
 757 /// Extracts the scalar value of lane specified in the immediate mode operand
 758 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 759 #[inline]
 760 #[target_feature(enable = "simd128")]
 761 pub unsafe fn i64x2_extract_lane<const N: usize>(a: v128) -> i64 {
 762     simd_extract(a.as_i64x2(), N as u32)
 763 }
 764
 765 #[cfg(test)]
 766 #[assert_instr(i64x2.extract_lane)]
 767 #[target_feature(enable = "simd128")]
 768 unsafe fn i64x2_extract_lane_test(a: v128) -> i64 {
 769     i64x2_extract_lane::<0>(a)
 770 }
 771
 772 /// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 773 ///
 774 /// Replaces the scalar value of lane specified in the immediate mode operand
 775 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 776 #[inline]
 777 #[target_feature(enable = "simd128")]
 778 pub unsafe fn i64x2_replace_lane<const N: usize>(a: v128, val: i64) -> v128 {
 779     transmute(simd_insert(a.as_i64x2(), N as u32, val))
 780 }
 781
 782 #[cfg(test)]
 783 #[assert_instr(i64x2.replace_lane)]
 784 #[target_feature(enable = "simd128")]
 785 unsafe fn i64x2_replace_lane_test(a: v128, val: i64) -> v128 {
 786     i64x2_replace_lane::<0>(a, val)
 787 }
 788
 789 /// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 790 ///
 791 /// Extracts the scalar value of lane specified fn the immediate mode operand
 792 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 793 #[inline]
 794 #[target_feature(enable = "simd128")]
 795 pub unsafe fn f32x4_extract_lane<const N: usize>(a: v128) -> f32 {
 796     simd_extract(a.as_f32x4(), N as u32)
 797 }
 798
 799 #[cfg(test)]
 800 #[assert_instr(f32x4.extract_lane)]
 801 #[target_feature(enable = "simd128")]
 802 unsafe fn f32x4_extract_lane_test(a: v128) -> f32 {
 803     f32x4_extract_lane::<0>(a)
 804 }
 805
 806 /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 807 ///
 808 /// Replaces the scalar value of lane specified fn the immediate mode operand
 809 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 810 #[inline]
 811 #[target_feature(enable = "simd128")]
 812 pub unsafe fn f32x4_replace_lane<const N: usize>(a: v128, val: f32) -> v128 {
 813     transmute(simd_insert(a.as_f32x4(), N as u32, val))
 814 }
 815
 816 #[cfg(test)]
 817 #[assert_instr(f32x4.replace_lane)]
 818 #[target_feature(enable = "simd128")]
 819 unsafe fn f32x4_replace_lane_test(a: v128, val: f32) -> v128 {
 820     f32x4_replace_lane::<0>(a, val)
 821 }
 822
 823 /// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 824 ///
 825 /// Extracts the scalar value of lane specified fn the immediate mode operand
 826 /// `N` from `a`. If `N` fs out of bounds then it is a compile time error.
 827 #[inline]
 828 #[target_feature(enable = "simd128")]
 829 pub unsafe fn f64x2_extract_lane<const N: usize>(a: v128) -> f64 {
 830     simd_extract(a.as_f64x2(), N as u32)
 831 }
 832
 833 #[cfg(test)]
 834 #[assert_instr(f64x2.extract_lane)]
 835 #[target_feature(enable = "simd128")]
 836 unsafe fn f64x2_extract_lane_test(a: v128) -> f64 {
 837     f64x2_extract_lane::<0>(a)
 838 }
 839
 840 /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 841 ///
 842 /// Replaces the scalar value of lane specified in the immediate mode operand
 843 /// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 844 #[inline]
 845 #[target_feature(enable = "simd128")]
 846 pub unsafe fn f64x2_replace_lane<const N: usize>(a: v128, val: f64) -> v128 {
 847     transmute(simd_insert(a.as_f64x2(), N as u32, val))
 848 }
 849
 850 #[cfg(test)]
 851 #[assert_instr(f64x2.replace_lane)]
 852 #[target_feature(enable = "simd128")]
 853 unsafe fn f64x2_replace_lane_test(a: v128, val: f64) -> v128 {
 854     f64x2_replace_lane::<0>(a, val)
 855 }
 856
 857 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 858 /// integers.
 859 ///
 860 /// Returns a new vector where each lane is all ones if the pairwise elements
 861 /// were equal, or all zeros if the elements were not equal.
 862 #[inline]
 863 #[cfg_attr(test, assert_instr(i8x16.eq))]
 864 #[target_feature(enable = "simd128")]
 865 pub unsafe fn i8x16_eq(a: v128, b: v128) -> v128 {
 866     transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 867 }
 868
 869 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 870 /// integers.
 871 ///
 872 /// Returns a new vector where each lane is all ones if the pairwise elements
 873 /// were not equal, or all zeros if the elements were equal.
 874 #[inline]
 875 #[cfg_attr(test, assert_instr(i8x16.ne))]
 876 #[target_feature(enable = "simd128")]
 877 pub unsafe fn i8x16_ne(a: v128, b: v128) -> v128 {
 878     transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 879 }
 880
 881 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 882 /// signed integers.
 883 ///
 884 /// Returns a new vector where each lane is all ones if the pairwise left
 885 /// element is less than the pairwise right element, or all zeros otherwise.
 886 #[inline]
 887 #[cfg_attr(test, assert_instr(i8x16.lt_s))]
 888 #[target_feature(enable = "simd128")]
 889 pub unsafe fn i8x16_lt_s(a: v128, b: v128) -> v128 {
 890     transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 891 }
 892
 893 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 894 /// unsigned integers.
 895 ///
 896 /// Returns a new vector where each lane is all ones if the pairwise left
 897 /// element is less than the pairwise right element, or all zeros otherwise.
 898 #[inline]
 899 #[cfg_attr(test, assert_instr(i8x16.lt_u))]
 900 #[target_feature(enable = "simd128")]
 901 pub unsafe fn i8x16_lt_u(a: v128, b: v128) -> v128 {
 902     transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 903 }
 904
 905 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 906 /// signed integers.
 907 ///
 908 /// Returns a new vector where each lane is all ones if the pairwise left
 909 /// element is greater than the pairwise right element, or all zeros otherwise.
 910 #[inline]
 911 #[cfg_attr(test, assert_instr(i8x16.gt_s))]
 912 #[target_feature(enable = "simd128")]
 913 pub unsafe fn i8x16_gt_s(a: v128, b: v128) -> v128 {
 914     transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 915 }
 916
 917 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 918 /// unsigned integers.
 919 ///
 920 /// Returns a new vector where each lane is all ones if the pairwise left
 921 /// element is greater than the pairwise right element, or all zeros otherwise.
 922 #[inline]
 923 #[cfg_attr(test, assert_instr(i8x16.gt_u))]
 924 #[target_feature(enable = "simd128")]
 925 pub unsafe fn i8x16_gt_u(a: v128, b: v128) -> v128 {
 926     transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 927 }
 928
 929 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 930 /// signed integers.
 931 ///
 932 /// Returns a new vector where each lane is all ones if the pairwise left
 933 /// element is less than the pairwise right element, or all zeros otherwise.
 934 #[inline]
 935 #[cfg_attr(test, assert_instr(i8x16.le_s))]
 936 #[target_feature(enable = "simd128")]
 937 pub unsafe fn i8x16_le_s(a: v128, b: v128) -> v128 {
 938     transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 939 }
 940
 941 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 942 /// unsigned integers.
 943 ///
 944 /// Returns a new vector where each lane is all ones if the pairwise left
 945 /// element is less than the pairwise right element, or all zeros otherwise.
 946 #[inline]
 947 #[cfg_attr(test, assert_instr(i8x16.le_u))]
 948 #[target_feature(enable = "simd128")]
 949 pub unsafe fn i8x16_le_u(a: v128, b: v128) -> v128 {
 950     transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 951 }
 952
 953 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 954 /// signed integers.
 955 ///
 956 /// Returns a new vector where each lane is all ones if the pairwise left
 957 /// element is greater than the pairwise right element, or all zeros otherwise.
 958 #[inline]
 959 #[cfg_attr(test, assert_instr(i8x16.ge_s))]
 960 #[target_feature(enable = "simd128")]
 961 pub unsafe fn i8x16_ge_s(a: v128, b: v128) -> v128 {
 962     transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 963 }
 964
 965 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
 966 /// unsigned integers.
 967 ///
 968 /// Returns a new vector where each lane is all ones if the pairwise left
 969 /// element is greater than the pairwise right element, or all zeros otherwise.
 970 #[inline]
 971 #[cfg_attr(test, assert_instr(i8x16.ge_u))]
 972 #[target_feature(enable = "simd128")]
 973 pub unsafe fn i8x16_ge_u(a: v128, b: v128) -> v128 {
 974     transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 975 }
 976
 977 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
 978 /// integers.
 979 ///
 980 /// Returns a new vector where each lane is all ones if the pairwise elements
 981 /// were equal, or all zeros if the elements were not equal.
 982 #[inline]
 983 #[cfg_attr(test, assert_instr(i16x8.eq))]
 984 #[target_feature(enable = "simd128")]
 985 pub unsafe fn i16x8_eq(a: v128, b: v128) -> v128 {
 986     transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 987 }
 988
 989 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
 990 /// integers.
 991 ///
 992 /// Returns a new vector where each lane is all ones if the pairwise elements
 993 /// were not equal, or all zeros if the elements were equal.
 994 #[inline]
 995 #[cfg_attr(test, assert_instr(i16x8.ne))]
 996 #[target_feature(enable = "simd128")]
 997 pub unsafe fn i16x8_ne(a: v128, b: v128) -> v128 {
 998     transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 999 }
1000
1001 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1002 /// signed integers.
1003 ///
1004 /// Returns a new vector where each lane is all ones if the pairwise left
1005 /// element is less than the pairwise right element, or all zeros otherwise.
1006 #[inline]
1007 #[cfg_attr(test, assert_instr(i16x8.lt_s))]
1008 #[target_feature(enable = "simd128")]
1009 pub unsafe fn i16x8_lt_s(a: v128, b: v128) -> v128 {
1010     transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
1011 }
1012
1013 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1014 /// unsigned integers.
1015 ///
1016 /// Returns a new vector where each lane is all ones if the pairwise left
1017 /// element is less than the pairwise right element, or all zeros otherwise.
1018 #[inline]
1019 #[cfg_attr(test, assert_instr(i16x8.lt_u))]
1020 #[target_feature(enable = "simd128")]
1021 pub unsafe fn i16x8_lt_u(a: v128, b: v128) -> v128 {
1022     transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
1023 }
1024
1025 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1026 /// signed integers.
1027 ///
1028 /// Returns a new vector where each lane is all ones if the pairwise left
1029 /// element is greater than the pairwise right element, or all zeros otherwise.
1030 #[inline]
1031 #[cfg_attr(test, assert_instr(i16x8.gt_s))]
1032 #[target_feature(enable = "simd128")]
1033 pub unsafe fn i16x8_gt_s(a: v128, b: v128) -> v128 {
1034     transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
1035 }
1036
1037 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1038 /// unsigned integers.
1039 ///
1040 /// Returns a new vector where each lane is all ones if the pairwise left
1041 /// element is greater than the pairwise right element, or all zeros otherwise.
1042 #[inline]
1043 #[cfg_attr(test, assert_instr(i16x8.gt_u))]
1044 #[target_feature(enable = "simd128")]
1045 pub unsafe fn i16x8_gt_u(a: v128, b: v128) -> v128 {
1046     transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
1047 }
1048
1049 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1050 /// signed integers.
1051 ///
1052 /// Returns a new vector where each lane is all ones if the pairwise left
1053 /// element is less than the pairwise right element, or all zeros otherwise.
1054 #[inline]
1055 #[cfg_attr(test, assert_instr(i16x8.le_s))]
1056 #[target_feature(enable = "simd128")]
1057 pub unsafe fn i16x8_le_s(a: v128, b: v128) -> v128 {
1058     transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
1059 }
1060
1061 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1062 /// unsigned integers.
1063 ///
1064 /// Returns a new vector where each lane is all ones if the pairwise left
1065 /// element is less than the pairwise right element, or all zeros otherwise.
1066 #[inline]
1067 #[cfg_attr(test, assert_instr(i16x8.le_u))]
1068 #[target_feature(enable = "simd128")]
1069 pub unsafe fn i16x8_le_u(a: v128, b: v128) -> v128 {
1070     transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
1071 }
1072
1073 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1074 /// signed integers.
1075 ///
1076 /// Returns a new vector where each lane is all ones if the pairwise left
1077 /// element is greater than the pairwise right element, or all zeros otherwise.
1078 #[inline]
1079 #[cfg_attr(test, assert_instr(i16x8.ge_s))]
1080 #[target_feature(enable = "simd128")]
1081 pub unsafe fn i16x8_ge_s(a: v128, b: v128) -> v128 {
1082     transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
1083 }
1084
1085 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
1086 /// unsigned integers.
1087 ///
1088 /// Returns a new vector where each lane is all ones if the pairwise left
1089 /// element is greater than the pairwise right element, or all zeros otherwise.
1090 #[inline]
1091 #[cfg_attr(test, assert_instr(i16x8.ge_u))]
1092 #[target_feature(enable = "simd128")]
1093 pub unsafe fn i16x8_ge_u(a: v128, b: v128) -> v128 {
1094     transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
1095 }
1096
1097 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1098 /// integers.
1099 ///
1100 /// Returns a new vector where each lane is all ones if the pairwise elements
1101 /// were equal, or all zeros if the elements were not equal.
1102 #[inline]
1103 #[cfg_attr(test, assert_instr(i32x4.eq))]
1104 #[target_feature(enable = "simd128")]
1105 pub unsafe fn i32x4_eq(a: v128, b: v128) -> v128 {
1106     transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1107 }
1108
1109 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1110 /// integers.
1111 ///
1112 /// Returns a new vector where each lane is all ones if the pairwise elements
1113 /// were not equal, or all zeros if the elements were equal.
1114 #[inline]
1115 #[cfg_attr(test, assert_instr(i32x4.ne))]
1116 #[target_feature(enable = "simd128")]
1117 pub unsafe fn i32x4_ne(a: v128, b: v128) -> v128 {
1118     transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1119 }
1120
1121 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1122 /// signed integers.
1123 ///
1124 /// Returns a new vector where each lane is all ones if the pairwise left
1125 /// element is less than the pairwise right element, or all zeros otherwise.
1126 #[inline]
1127 #[cfg_attr(test, assert_instr(i32x4.lt_s))]
1128 #[target_feature(enable = "simd128")]
1129 pub unsafe fn i32x4_lt_s(a: v128, b: v128) -> v128 {
1130     transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1131 }
1132
1133 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1134 /// unsigned integers.
1135 ///
1136 /// Returns a new vector where each lane is all ones if the pairwise left
1137 /// element is less than the pairwise right element, or all zeros otherwise.
1138 #[inline]
1139 #[cfg_attr(test, assert_instr(i32x4.lt_u))]
1140 #[target_feature(enable = "simd128")]
1141 pub unsafe fn i32x4_lt_u(a: v128, b: v128) -> v128 {
1142     transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
1143 }
1144
1145 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1146 /// signed integers.
1147 ///
1148 /// Returns a new vector where each lane is all ones if the pairwise left
1149 /// element is greater than the pairwise right element, or all zeros otherwise.
1150 #[inline]
1151 #[cfg_attr(test, assert_instr(i32x4.gt_s))]
1152 #[target_feature(enable = "simd128")]
1153 pub unsafe fn i32x4_gt_s(a: v128, b: v128) -> v128 {
1154     transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1155 }
1156
1157 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1158 /// unsigned integers.
1159 ///
1160 /// Returns a new vector where each lane is all ones if the pairwise left
1161 /// element is greater than the pairwise right element, or all zeros otherwise.
1162 #[inline]
1163 #[cfg_attr(test, assert_instr(i32x4.gt_u))]
1164 #[target_feature(enable = "simd128")]
1165 pub unsafe fn i32x4_gt_u(a: v128, b: v128) -> v128 {
1166     transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
1167 }
1168
1169 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1170 /// signed integers.
1171 ///
1172 /// Returns a new vector where each lane is all ones if the pairwise left
1173 /// element is less than the pairwise right element, or all zeros otherwise.
1174 #[inline]
1175 #[cfg_attr(test, assert_instr(i32x4.le_s))]
1176 #[target_feature(enable = "simd128")]
1177 pub unsafe fn i32x4_le_s(a: v128, b: v128) -> v128 {
1178     transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1179 }
1180
1181 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1182 /// unsigned integers.
1183 ///
1184 /// Returns a new vector where each lane is all ones if the pairwise left
1185 /// element is less than the pairwise right element, or all zeros otherwise.
1186 #[inline]
1187 #[cfg_attr(test, assert_instr(i32x4.le_u))]
1188 #[target_feature(enable = "simd128")]
1189 pub unsafe fn i32x4_le_u(a: v128, b: v128) -> v128 {
1190     transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
1191 }
1192
1193 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1194 /// signed integers.
1195 ///
1196 /// Returns a new vector where each lane is all ones if the pairwise left
1197 /// element is greater than the pairwise right element, or all zeros otherwise.
1198 #[inline]
1199 #[cfg_attr(test, assert_instr(i32x4.ge_s))]
1200 #[target_feature(enable = "simd128")]
1201 pub unsafe fn i32x4_ge_s(a: v128, b: v128) -> v128 {
1202     transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
1203 }
1204
1205 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1206 /// unsigned integers.
1207 ///
1208 /// Returns a new vector where each lane is all ones if the pairwise left
1209 /// element is greater than the pairwise right element, or all zeros otherwise.
1210 #[inline]
1211 #[cfg_attr(test, assert_instr(i32x4.ge_u))]
1212 #[target_feature(enable = "simd128")]
1213 pub unsafe fn i32x4_ge_u(a: v128, b: v128) -> v128 {
1214     transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
1215 }
1216
1217 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1218 /// floating point numbers.
1219 ///
1220 /// Returns a new vector where each lane is all ones if the pairwise elements
1221 /// were equal, or all zeros if the elements were not equal.
1222 #[inline]
1223 #[cfg_attr(test, assert_instr(f32x4.eq))]
1224 #[target_feature(enable = "simd128")]
1225 pub unsafe fn f32x4_eq(a: v128, b: v128) -> v128 {
1226     transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1227 }
1228
1229 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1230 /// floating point numbers.
1231 ///
1232 /// Returns a new vector where each lane is all ones if the pairwise elements
1233 /// were not equal, or all zeros if the elements were equal.
1234 #[inline]
1235 #[cfg_attr(test, assert_instr(f32x4.ne))]
1236 #[target_feature(enable = "simd128")]
1237 pub unsafe fn f32x4_ne(a: v128, b: v128) -> v128 {
1238     transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1239 }
1240
1241 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1242 /// floating point numbers.
1243 ///
1244 /// Returns a new vector where each lane is all ones if the pairwise left
1245 /// element is less than the pairwise right element, or all zeros otherwise.
1246 #[inline]
1247 #[cfg_attr(test, assert_instr(f32x4.lt))]
1248 #[target_feature(enable = "simd128")]
1249 pub unsafe fn f32x4_lt(a: v128, b: v128) -> v128 {
1250     transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1251 }
1252
1253 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1254 /// floating point numbers.
1255 ///
1256 /// Returns a new vector where each lane is all ones if the pairwise left
1257 /// element is greater than the pairwise right element, or all zeros otherwise.
1258 #[inline]
1259 #[cfg_attr(test, assert_instr(f32x4.gt))]
1260 #[target_feature(enable = "simd128")]
1261 pub unsafe fn f32x4_gt(a: v128, b: v128) -> v128 {
1262     transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1263 }
1264
1265 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1266 /// floating point numbers.
1267 ///
1268 /// Returns a new vector where each lane is all ones if the pairwise left
1269 /// element is less than the pairwise right element, or all zeros otherwise.
1270 #[inline]
1271 #[cfg_attr(test, assert_instr(f32x4.le))]
1272 #[target_feature(enable = "simd128")]
1273 pub unsafe fn f32x4_le(a: v128, b: v128) -> v128 {
1274     transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1275 }
1276
1277 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
1278 /// floating point numbers.
1279 ///
1280 /// Returns a new vector where each lane is all ones if the pairwise left
1281 /// element is greater than the pairwise right element, or all zeros otherwise.
1282 #[inline]
1283 #[cfg_attr(test, assert_instr(f32x4.ge))]
1284 #[target_feature(enable = "simd128")]
1285 pub unsafe fn f32x4_ge(a: v128, b: v128) -> v128 {
1286     transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
1287 }
1288
1289 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1290 /// floating point numbers.
1291 ///
1292 /// Returns a new vector where each lane is all ones if the pairwise elements
1293 /// were equal, or all zeros if the elements were not equal.
1294 #[inline]
1295 #[cfg_attr(test, assert_instr(f64x2.eq))]
1296 #[target_feature(enable = "simd128")]
1297 pub unsafe fn f64x2_eq(a: v128, b: v128) -> v128 {
1298     transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1299 }
1300
1301 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1302 /// floating point numbers.
1303 ///
1304 /// Returns a new vector where each lane is all ones if the pairwise elements
1305 /// were not equal, or all zeros if the elements were equal.
1306 #[inline]
1307 #[cfg_attr(test, assert_instr(f64x2.ne))]
1308 #[target_feature(enable = "simd128")]
1309 pub unsafe fn f64x2_ne(a: v128, b: v128) -> v128 {
1310     transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1311 }
1312
1313 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1314 /// floating point numbers.
1315 ///
1316 /// Returns a new vector where each lane is all ones if the pairwise left
1317 /// element is less than the pairwise right element, or all zeros otherwise.
1318 #[inline]
1319 #[cfg_attr(test, assert_instr(f64x2.lt))]
1320 #[target_feature(enable = "simd128")]
1321 pub unsafe fn f64x2_lt(a: v128, b: v128) -> v128 {
1322     transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1323 }
1324
1325 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1326 /// floating point numbers.
1327 ///
1328 /// Returns a new vector where each lane is all ones if the pairwise left
1329 /// element is greater than the pairwise right element, or all zeros otherwise.
1330 #[inline]
1331 #[cfg_attr(test, assert_instr(f64x2.gt))]
1332 #[target_feature(enable = "simd128")]
1333 pub unsafe fn f64x2_gt(a: v128, b: v128) -> v128 {
1334     transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1335 }
1336
1337 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1338 /// floating point numbers.
1339 ///
1340 /// Returns a new vector where each lane is all ones if the pairwise left
1341 /// element is less than the pairwise right element, or all zeros otherwise.
1342 #[inline]
1343 #[cfg_attr(test, assert_instr(f64x2.le))]
1344 #[target_feature(enable = "simd128")]
1345 pub unsafe fn f64x2_le(a: v128, b: v128) -> v128 {
1346     transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1347 }
1348
1349 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
1350 /// floating point numbers.
1351 ///
1352 /// Returns a new vector where each lane is all ones if the pairwise left
1353 /// element is greater than the pairwise right element, or all zeros otherwise.
1354 #[inline]
1355 #[cfg_attr(test, assert_instr(f64x2.ge))]
1356 #[target_feature(enable = "simd128")]
1357 pub unsafe fn f64x2_ge(a: v128, b: v128) -> v128 {
1358     transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
1359 }
1360
1361 /// Flips each bit of the 128-bit input vector.
1362 #[inline]
1363 #[cfg_attr(test, assert_instr(v128.not))]
1364 #[target_feature(enable = "simd128")]
1365 pub unsafe fn v128_not(a: v128) -> v128 {
1366     transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0)))
1367 }
1368
1369 /// Performs a bitwise and of the two input 128-bit vectors, returning the
1370 /// resulting vector.
1371 #[inline]
1372 #[cfg_attr(test, assert_instr(v128.and))]
1373 #[target_feature(enable = "simd128")]
1374 pub unsafe fn v128_and(a: v128, b: v128) -> v128 {
1375     transmute(simd_and(a.as_i64x2(), b.as_i64x2()))
1376 }
1377
1378 /// Bitwise AND of bits of `a` and the logical inverse of bits of `b`.
1379 ///
1380 /// This operation is equivalent to `v128.and(a, v128.not(b))`
1381 #[inline]
1382 #[cfg_attr(all(test, all_simd), assert_instr(v128.andnot))]
1383 #[target_feature(enable = "simd128")]
1384 pub unsafe fn v128_andnot(a: v128, b: v128) -> v128 {
1385     transmute(simd_and(
1386         a.as_i64x2(),
1387         simd_xor(b.as_i64x2(), i64x2(-1, -1)),
1388     ))
1389 }
1390
1391 /// Performs a bitwise or of the two input 128-bit vectors, returning the
1392 /// resulting vector.
1393 #[inline]
1394 #[cfg_attr(test, assert_instr(v128.or))]
1395 #[target_feature(enable = "simd128")]
1396 pub unsafe fn v128_or(a: v128, b: v128) -> v128 {
1397     transmute(simd_or(a.as_i64x2(), b.as_i64x2()))
1398 }
1399
1400 /// Performs a bitwise xor of the two input 128-bit vectors, returning the
1401 /// resulting vector.
1402 #[inline]
1403 #[cfg_attr(test, assert_instr(v128.xor))]
1404 #[target_feature(enable = "simd128")]
1405 pub unsafe fn v128_xor(a: v128, b: v128) -> v128 {
1406     transmute(simd_xor(a.as_i64x2(), b.as_i64x2()))
1407 }
1408
1409 /// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0.
1410 #[inline]
1411 #[cfg_attr(test, assert_instr(v128.bitselect))]
1412 #[target_feature(enable = "simd128")]
1413 pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
1414     transmute(llvm_bitselect(v1.as_i8x16(), v2.as_i8x16(), c.as_i8x16()))
1415 }
1416
1417 /// Lane-wise wrapping absolute value.
1418 #[inline]
1419 // #[cfg_attr(test, assert_instr(i8x16.abs))] // FIXME support not in our LLVM yet
1420 #[target_feature(enable = "simd128")]
1421 pub unsafe fn i8x16_abs(a: v128) -> v128 {
1422     let a = transmute::<_, i8x16>(a);
1423     let zero = i8x16::splat(0);
1424     transmute(simd_select::<m8x16, i8x16>(
1425         simd_lt(a, zero),
1426         simd_sub(zero, a),
1427         a,
1428     ))
1429 }
1430
1431 /// Negates a 128-bit vectors intepreted as sixteen 8-bit signed integers
1432 #[inline]
1433 #[cfg_attr(test, assert_instr(i8x16.neg))]
1434 #[target_feature(enable = "simd128")]
1435 pub unsafe fn i8x16_neg(a: v128) -> v128 {
1436     transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1)))
1437 }
1438
1439 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
1440 #[inline]
1441 #[cfg_attr(test, assert_instr(i8x16.any_true))]
1442 #[target_feature(enable = "simd128")]
1443 pub unsafe fn i8x16_any_true(a: v128) -> i32 {
1444     llvm_i8x16_any_true(a.as_i8x16())
1445 }
1446
1447 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
1448 #[inline]
1449 #[cfg_attr(test, assert_instr(i8x16.all_true))]
1450 #[target_feature(enable = "simd128")]
1451 pub unsafe fn i8x16_all_true(a: v128) -> i32 {
1452     llvm_i8x16_all_true(a.as_i8x16())
1453 }
1454
1455 // FIXME: not available in our LLVM yet
1456 // /// Extracts the high bit for each lane in `a` and produce a scalar mask with
1457 // /// all bits concatenated.
1458 // #[inline]
1459 // #[cfg_attr(test, assert_instr(i8x16.all_true))]
1460 // pub unsafe fn i8x16_bitmask(a: v128) -> i32 {
1461 //     llvm_bitmask_i8x16(transmute(a))
1462 // }
1463
1464 /// Converts two input vectors into a smaller lane vector by narrowing each
1465 /// lane.
1466 ///
1467 /// Signed saturation to 0x7f or 0x80 is used and the input lanes are always
1468 /// interpreted as signed integers.
1469 #[inline]
1470 #[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_s))]
1471 #[target_feature(enable = "simd128")]
1472 pub unsafe fn i8x16_narrow_i16x8_s(a: v128, b: v128) -> v128 {
1473     transmute(llvm_narrow_i8x16_s(transmute(a), transmute(b)))
1474 }
1475
1476 /// Converts two input vectors into a smaller lane vector by narrowing each
1477 /// lane.
1478 ///
1479 /// Signed saturation to 0x00 or 0xff is used and the input lanes are always
1480 /// interpreted as signed integers.
1481 #[inline]
1482 #[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_u))]
1483 #[target_feature(enable = "simd128")]
1484 pub unsafe fn i8x16_narrow_i16x8_u(a: v128, b: v128) -> v128 {
1485     transmute(llvm_narrow_i8x16_u(transmute(a), transmute(b)))
1486 }
1487
1488 /// Shifts each lane to the left by the specified number of bits.
1489 ///
1490 /// Only the low bits of the shift amount are used if the shift amount is
1491 /// greater than the lane width.
1492 #[inline]
1493 #[cfg_attr(test, assert_instr(i8x16.shl))]
1494 #[target_feature(enable = "simd128")]
1495 pub unsafe fn i8x16_shl(a: v128, amt: u32) -> v128 {
1496     transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8)))
1497 }
1498
1499 /// Shifts each lane to the right by the specified number of bits, sign
1500 /// extending.
1501 ///
1502 /// Only the low bits of the shift amount are used if the shift amount is
1503 /// greater than the lane width.
1504 #[inline]
1505 #[cfg_attr(test, assert_instr(i8x16.shr_s))]
1506 #[target_feature(enable = "simd128")]
1507 pub unsafe fn i8x16_shr_s(a: v128, amt: u32) -> v128 {
1508     transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8)))
1509 }
1510
1511 /// Shifts each lane to the right by the specified number of bits, shifting in
1512 /// zeros.
1513 ///
1514 /// Only the low bits of the shift amount are used if the shift amount is
1515 /// greater than the lane width.
1516 #[inline]
1517 #[cfg_attr(test, assert_instr(i8x16.shr_u))]
1518 #[target_feature(enable = "simd128")]
1519 pub unsafe fn i8x16_shr_u(a: v128, amt: u32) -> v128 {
1520     transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8)))
1521 }
1522
1523 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
1524 #[inline]
1525 #[cfg_attr(test, assert_instr(i8x16.add))]
1526 #[target_feature(enable = "simd128")]
1527 pub unsafe fn i8x16_add(a: v128, b: v128) -> v128 {
1528     transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
1529 }
1530
1531 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed
1532 /// integers, saturating on overflow to `i8::MAX`.
1533 #[inline]
1534 #[cfg_attr(test, assert_instr(i8x16.add_saturate_s))]
1535 #[target_feature(enable = "simd128")]
1536 pub unsafe fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 {
1537     transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16()))
1538 }
1539
1540 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned
1541 /// integers, saturating on overflow to `u8::MAX`.
1542 #[inline]
1543 #[cfg_attr(test, assert_instr(i8x16.add_saturate_u))]
1544 #[target_feature(enable = "simd128")]
1545 pub unsafe fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 {
1546     transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16()))
1547 }
1548
1549 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers.
1550 #[inline]
1551 #[cfg_attr(test, assert_instr(i8x16.sub))]
1552 #[target_feature(enable = "simd128")]
1553 pub unsafe fn i8x16_sub(a: v128, b: v128) -> v128 {
1554     transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
1555 }
1556
1557 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
1558 /// signed integers, saturating on overflow to `i8::MIN`.
1559 #[inline]
1560 #[cfg_attr(test, assert_instr(i8x16.sub_saturate_s))]
1561 #[target_feature(enable = "simd128")]
1562 pub unsafe fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 {
1563     transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16()))
1564 }
1565
1566 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
1567 /// unsigned integers, saturating on overflow to 0.
1568 #[inline]
1569 #[cfg_attr(test, assert_instr(i8x16.sub_saturate_u))]
1570 #[target_feature(enable = "simd128")]
1571 pub unsafe fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 {
1572     transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16()))
1573 }
1574
1575 /// Compares lane-wise signed integers, and returns the minimum of
1576 /// each pair.
1577 #[inline]
1578 #[cfg_attr(test, assert_instr(i8x16.min_s))]
1579 #[target_feature(enable = "simd128")]
1580 pub unsafe fn i8x16_min_s(a: v128, b: v128) -> v128 {
1581     let a = a.as_i8x16();
1582     let b = b.as_i8x16();
1583     transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
1584 }
1585
1586 /// Compares lane-wise unsigned integers, and returns the minimum of
1587 /// each pair.
1588 #[inline]
1589 #[cfg_attr(test, assert_instr(i8x16.min_u))]
1590 #[target_feature(enable = "simd128")]
1591 pub unsafe fn i8x16_min_u(a: v128, b: v128) -> v128 {
1592     let a = transmute::<_, u8x16>(a);
1593     let b = transmute::<_, u8x16>(b);
1594     transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
1595 }
1596
1597 /// Compares lane-wise signed integers, and returns the maximum of
1598 /// each pair.
1599 #[inline]
1600 #[cfg_attr(test, assert_instr(i8x16.max_s))]
1601 #[target_feature(enable = "simd128")]
1602 pub unsafe fn i8x16_max_s(a: v128, b: v128) -> v128 {
1603     let a = transmute::<_, i8x16>(a);
1604     let b = transmute::<_, i8x16>(b);
1605     transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
1606 }
1607
1608 /// Compares lane-wise unsigned integers, and returns the maximum of
1609 /// each pair.
1610 #[inline]
1611 #[cfg_attr(test, assert_instr(i8x16.max_u))]
1612 #[target_feature(enable = "simd128")]
1613 pub unsafe fn i8x16_max_u(a: v128, b: v128) -> v128 {
1614     let a = transmute::<_, u8x16>(a);
1615     let b = transmute::<_, u8x16>(b);
1616     transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
1617 }
1618
1619 /// Lane-wise rounding average.
1620 #[inline]
1621 #[cfg_attr(test, assert_instr(i8x16.avgr_u))]
1622 #[target_feature(enable = "simd128")]
1623 pub unsafe fn i8x16_avgr_u(a: v128, b: v128) -> v128 {
1624     transmute(llvm_avgr_u_i8x16(transmute(a), transmute(b)))
1625 }
1626
1627 /// Lane-wise wrapping absolute value.
1628 #[inline]
1629 // #[cfg_attr(test, assert_instr(i16x8.abs))] // FIXME support not in our LLVM yet
1630 #[target_feature(enable = "simd128")]
1631 pub unsafe fn i16x8_abs(a: v128) -> v128 {
1632     let a = transmute::<_, i16x8>(a);
1633     let zero = i16x8::splat(0);
1634     transmute(simd_select::<m16x8, i16x8>(
1635         simd_lt(a, zero),
1636         simd_sub(zero, a),
1637         a,
1638     ))
1639 }
1640
1641 /// Negates a 128-bit vectors intepreted as eight 16-bit signed integers
1642 #[inline]
1643 #[cfg_attr(test, assert_instr(i16x8.neg))]
1644 #[target_feature(enable = "simd128")]
1645 pub unsafe fn i16x8_neg(a: v128) -> v128 {
1646     transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1)))
1647 }
1648
1649 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
1650 #[inline]
1651 #[cfg_attr(test, assert_instr(i16x8.any_true))]
1652 #[target_feature(enable = "simd128")]
1653 pub unsafe fn i16x8_any_true(a: v128) -> i32 {
1654     llvm_i16x8_any_true(a.as_i16x8())
1655 }
1656
1657 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
1658 #[inline]
1659 #[cfg_attr(test, assert_instr(i16x8.all_true))]
1660 #[target_feature(enable = "simd128")]
1661 pub unsafe fn i16x8_all_true(a: v128) -> i32 {
1662     llvm_i16x8_all_true(a.as_i16x8())
1663 }
1664
1665 // FIXME: not available in our LLVM yet
1666 // /// Extracts the high bit for each lane in `a` and produce a scalar mask with
1667 // /// all bits concatenated.
1668 // #[inline]
1669 // #[cfg_attr(test, assert_instr(i16x8.all_true))]
1670 // pub unsafe fn i16x8_bitmask(a: v128) -> i32 {
1671 //     llvm_bitmask_i16x8(transmute(a))
1672 // }
1673
1674 /// Converts two input vectors into a smaller lane vector by narrowing each
1675 /// lane.
1676 ///
1677 /// Signed saturation to 0x7fff or 0x8000 is used and the input lanes are always
1678 /// interpreted as signed integers.
1679 #[inline]
1680 #[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_s))]
1681 #[target_feature(enable = "simd128")]
1682 pub unsafe fn i16x8_narrow_i32x4_s(a: v128, b: v128) -> v128 {
1683     transmute(llvm_narrow_i16x8_s(transmute(a), transmute(b)))
1684 }
1685
1686 /// Converts two input vectors into a smaller lane vector by narrowing each
1687 /// lane.
1688 ///
1689 /// Signed saturation to 0x0000 or 0xffff is used and the input lanes are always
1690 /// interpreted as signed integers.
1691 #[inline]
1692 #[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_u))]
1693 #[target_feature(enable = "simd128")]
1694 pub unsafe fn i16x8_narrow_i32x4_u(a: v128, b: v128) -> v128 {
1695     transmute(llvm_narrow_i16x8_u(transmute(a), transmute(b)))
1696 }
1697
1698 /// Converts low half of the smaller lane vector to a larger lane
1699 /// vector, sign extended.
1700 #[inline]
1701 #[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_s))]
1702 pub unsafe fn i16x8_widen_low_i8x16_s(a: v128) -> v128 {
1703     transmute(llvm_widen_low_i16x8_s(transmute(a)))
1704 }
1705
1706 /// Converts high half of the smaller lane vector to a larger lane
1707 /// vector, sign extended.
1708 #[inline]
1709 #[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_s))]
1710 pub unsafe fn i16x8_widen_high_i8x16_s(a: v128) -> v128 {
1711     transmute(llvm_widen_high_i16x8_s(transmute(a)))
1712 }
1713
1714 /// Converts low half of the smaller lane vector to a larger lane
1715 /// vector, zero extended.
1716 #[inline]
1717 #[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_u))]
1718 pub unsafe fn i16x8_widen_low_i8x16_u(a: v128) -> v128 {
1719     transmute(llvm_widen_low_i16x8_u(transmute(a)))
1720 }
1721
1722 /// Converts high half of the smaller lane vector to a larger lane
1723 /// vector, zero extended.
1724 #[inline]
1725 #[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_u))]
1726 pub unsafe fn i16x8_widen_high_i8x16_u(a: v128) -> v128 {
1727     transmute(llvm_widen_high_i16x8_u(transmute(a)))
1728 }
1729
1730 /// Shifts each lane to the left by the specified number of bits.
1731 ///
1732 /// Only the low bits of the shift amount are used if the shift amount is
1733 /// greater than the lane width.
1734 #[inline]
1735 #[cfg_attr(test, assert_instr(i16x8.shl))]
1736 #[target_feature(enable = "simd128")]
1737 pub unsafe fn i16x8_shl(a: v128, amt: u32) -> v128 {
1738     transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16)))
1739 }
1740
1741 /// Shifts each lane to the right by the specified number of bits, sign
1742 /// extending.
1743 ///
1744 /// Only the low bits of the shift amount are used if the shift amount is
1745 /// greater than the lane width.
1746 #[inline]
1747 #[cfg_attr(test, assert_instr(i16x8.shr_s))]
1748 #[target_feature(enable = "simd128")]
1749 pub unsafe fn i16x8_shr_s(a: v128, amt: u32) -> v128 {
1750     transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16)))
1751 }
1752
1753 /// Shifts each lane to the right by the specified number of bits, shifting in
1754 /// zeros.
1755 ///
1756 /// Only the low bits of the shift amount are used if the shift amount is
1757 /// greater than the lane width.
1758 #[inline]
1759 #[cfg_attr(test, assert_instr(i16x8.shr_u))]
1760 #[target_feature(enable = "simd128")]
1761 pub unsafe fn i16x8_shr_u(a: v128, amt: u32) -> v128 {
1762     transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16)))
1763 }
1764
1765 /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
1766 #[inline]
1767 #[cfg_attr(test, assert_instr(i16x8.add))]
1768 #[target_feature(enable = "simd128")]
1769 pub unsafe fn i16x8_add(a: v128, b: v128) -> v128 {
1770     transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
1771 }
1772
1773 /// Adds two 128-bit vectors as if they were two packed eight 16-bit signed
1774 /// integers, saturating on overflow to `i16::MAX`.
1775 #[inline]
1776 #[cfg_attr(test, assert_instr(i16x8.add_saturate_s))]
1777 #[target_feature(enable = "simd128")]
1778 pub unsafe fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 {
1779     transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8()))
1780 }
1781
1782 /// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned
1783 /// integers, saturating on overflow to `u16::MAX`.
1784 #[inline]
1785 #[cfg_attr(test, assert_instr(i16x8.add_saturate_u))]
1786 #[target_feature(enable = "simd128")]
1787 pub unsafe fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 {
1788     transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8()))
1789 }
1790
1791 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers.
1792 #[inline]
1793 #[cfg_attr(test, assert_instr(i16x8.sub))]
1794 #[target_feature(enable = "simd128")]
1795 pub unsafe fn i16x8_sub(a: v128, b: v128) -> v128 {
1796     transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
1797 }
1798
1799 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
1800 /// signed integers, saturating on overflow to `i16::MIN`.
1801 #[inline]
1802 #[cfg_attr(test, assert_instr(i16x8.sub_saturate_s))]
1803 #[target_feature(enable = "simd128")]
1804 pub unsafe fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 {
1805     transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8()))
1806 }
1807
1808 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
1809 /// unsigned integers, saturating on overflow to 0.
1810 #[inline]
1811 #[cfg_attr(test, assert_instr(i16x8.sub_saturate_u))]
1812 #[target_feature(enable = "simd128")]
1813 pub unsafe fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 {
1814     transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8()))
1815 }
1816
1817 /// Multiplies two 128-bit vectors as if they were two packed eight 16-bit
1818 /// signed integers.
1819 #[inline]
1820 #[cfg_attr(test, assert_instr(i16x8.mul))]
1821 #[target_feature(enable = "simd128")]
1822 pub unsafe fn i16x8_mul(a: v128, b: v128) -> v128 {
1823     transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
1824 }
1825
1826 /// Compares lane-wise signed integers, and returns the minimum of
1827 /// each pair.
1828 #[inline]
1829 #[cfg_attr(test, assert_instr(i16x8.min_s))]
1830 #[target_feature(enable = "simd128")]
1831 pub unsafe fn i16x8_min_s(a: v128, b: v128) -> v128 {
1832     let a = transmute::<_, i16x8>(a);
1833     let b = transmute::<_, i16x8>(b);
1834     transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
1835 }
1836
1837 /// Compares lane-wise unsigned integers, and returns the minimum of
1838 /// each pair.
1839 #[inline]
1840 #[cfg_attr(test, assert_instr(i16x8.min_u))]
1841 #[target_feature(enable = "simd128")]
1842 pub unsafe fn i16x8_min_u(a: v128, b: v128) -> v128 {
1843     let a = transmute::<_, u16x8>(a);
1844     let b = transmute::<_, u16x8>(b);
1845     transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
1846 }
1847
1848 /// Compares lane-wise signed integers, and returns the maximum of
1849 /// each pair.
1850 #[inline]
1851 #[cfg_attr(test, assert_instr(i16x8.max_s))]
1852 #[target_feature(enable = "simd128")]
1853 pub unsafe fn i16x8_max_s(a: v128, b: v128) -> v128 {
1854     let a = transmute::<_, i16x8>(a);
1855     let b = transmute::<_, i16x8>(b);
1856     transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
1857 }
1858
1859 /// Compares lane-wise unsigned integers, and returns the maximum of
1860 /// each pair.
1861 #[inline]
1862 #[cfg_attr(test, assert_instr(i16x8.max_u))]
1863 #[target_feature(enable = "simd128")]
1864 pub unsafe fn i16x8_max_u(a: v128, b: v128) -> v128 {
1865     let a = transmute::<_, u16x8>(a);
1866     let b = transmute::<_, u16x8>(b);
1867     transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
1868 }
1869
1870 /// Lane-wise rounding average.
1871 #[inline]
1872 #[cfg_attr(test, assert_instr(i16x8.avgr_u))]
1873 #[target_feature(enable = "simd128")]
1874 pub unsafe fn i16x8_avgr_u(a: v128, b: v128) -> v128 {
1875     transmute(llvm_avgr_u_i16x8(transmute(a), transmute(b)))
1876 }
1877
1878 /// Lane-wise wrapping absolute value.
1879 #[inline]
1880 // #[cfg_attr(test, assert_instr(i32x4.abs))] // FIXME support not in our LLVM yet
1881 #[target_feature(enable = "simd128")]
1882 pub unsafe fn i32x4_abs(a: v128) -> v128 {
1883     let a = transmute::<_, i32x4>(a);
1884     let zero = i32x4::splat(0);
1885     transmute(simd_select::<m32x4, i32x4>(
1886         simd_lt(a, zero),
1887         simd_sub(zero, a),
1888         a,
1889     ))
1890 }
1891
1892 /// Negates a 128-bit vectors intepreted as four 32-bit signed integers
1893 #[inline]
1894 #[cfg_attr(test, assert_instr(i32x4.neg))]
1895 #[target_feature(enable = "simd128")]
1896 pub unsafe fn i32x4_neg(a: v128) -> v128 {
1897     transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1)))
1898 }
1899
1900 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
1901 #[inline]
1902 #[cfg_attr(test, assert_instr(i32x4.any_true))]
1903 #[target_feature(enable = "simd128")]
1904 pub unsafe fn i32x4_any_true(a: v128) -> i32 {
1905     llvm_i32x4_any_true(a.as_i32x4())
1906 }
1907
1908 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
1909 #[inline]
1910 #[cfg_attr(test, assert_instr(i32x4.all_true))]
1911 #[target_feature(enable = "simd128")]
1912 pub unsafe fn i32x4_all_true(a: v128) -> i32 {
1913     llvm_i32x4_all_true(a.as_i32x4())
1914 }
1915
1916 // FIXME: not available in our LLVM yet
1917 // /// Extracts the high bit for each lane in `a` and produce a scalar mask with
1918 // /// all bits concatenated.
1919 // #[inline]
1920 // #[cfg_attr(test, assert_instr(i32x4.all_true))]
1921 // pub unsafe fn i32x4_bitmask(a: v128) -> i32 {
1922 //     llvm_bitmask_i32x4(transmute(a))
1923 // }
1924
1925 /// Converts low half of the smaller lane vector to a larger lane
1926 /// vector, sign extended.
1927 #[inline]
1928 #[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_s))]
1929 pub unsafe fn i32x4_widen_low_i16x8_s(a: v128) -> v128 {
1930     transmute(llvm_widen_low_i32x4_s(transmute(a)))
1931 }
1932
1933 /// Converts high half of the smaller lane vector to a larger lane
1934 /// vector, sign extended.
1935 #[inline]
1936 #[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_s))]
1937 pub unsafe fn i32x4_widen_high_i16x8_s(a: v128) -> v128 {
1938     transmute(llvm_widen_high_i32x4_s(transmute(a)))
1939 }
1940
1941 /// Converts low half of the smaller lane vector to a larger lane
1942 /// vector, zero extended.
1943 #[inline]
1944 #[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_u))]
1945 pub unsafe fn i32x4_widen_low_i16x8_u(a: v128) -> v128 {
1946     transmute(llvm_widen_low_i32x4_u(transmute(a)))
1947 }
1948
1949 /// Converts high half of the smaller lane vector to a larger lane
1950 /// vector, zero extended.
1951 #[inline]
1952 #[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_u))]
1953 pub unsafe fn i32x4_widen_high_i16x8_u(a: v128) -> v128 {
1954     transmute(llvm_widen_high_i32x4_u(transmute(a)))
1955 }
1956
1957 /// Shifts each lane to the left by the specified number of bits.
1958 ///
1959 /// Only the low bits of the shift amount are used if the shift amount is
1960 /// greater than the lane width.
1961 #[inline]
1962 #[cfg_attr(test, assert_instr(i32x4.shl))]
1963 #[target_feature(enable = "simd128")]
1964 pub unsafe fn i32x4_shl(a: v128, amt: u32) -> v128 {
1965     transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32)))
1966 }
1967
1968 /// Shifts each lane to the right by the specified number of bits, sign
1969 /// extending.
1970 ///
1971 /// Only the low bits of the shift amount are used if the shift amount is
1972 /// greater than the lane width.
1973 #[inline]
1974 #[cfg_attr(test, assert_instr(i32x4.shr_s))]
1975 #[target_feature(enable = "simd128")]
1976 pub unsafe fn i32x4_shr_s(a: v128, amt: u32) -> v128 {
1977     transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32)))
1978 }
1979
1980 /// Shifts each lane to the right by the specified number of bits, shifting in
1981 /// zeros.
1982 ///
1983 /// Only the low bits of the shift amount are used if the shift amount is
1984 /// greater than the lane width.
1985 #[inline]
1986 #[cfg_attr(test, assert_instr(i32x4.shr_u))]
1987 #[target_feature(enable = "simd128")]
1988 pub unsafe fn i32x4_shr_u(a: v128, amt: u32) -> v128 {
1989     transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32)))
1990 }
1991
1992 /// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
1993 #[inline]
1994 #[cfg_attr(test, assert_instr(i32x4.add))]
1995 #[target_feature(enable = "simd128")]
1996 pub unsafe fn i32x4_add(a: v128, b: v128) -> v128 {
1997     transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
1998 }
1999
2000 /// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers.
2001 #[inline]
2002 #[cfg_attr(test, assert_instr(i32x4.sub))]
2003 #[target_feature(enable = "simd128")]
2004 pub unsafe fn i32x4_sub(a: v128, b: v128) -> v128 {
2005     transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
2006 }
2007
2008 /// Multiplies two 128-bit vectors as if they were two packed four 32-bit
2009 /// signed integers.
2010 #[inline]
2011 #[cfg_attr(test, assert_instr(i32x4.mul))]
2012 #[target_feature(enable = "simd128")]
2013 pub unsafe fn i32x4_mul(a: v128, b: v128) -> v128 {
2014     transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
2015 }
2016
2017 /// Compares lane-wise signed integers, and returns the minimum of
2018 /// each pair.
2019 #[inline]
2020 #[cfg_attr(test, assert_instr(i32x4.min_s))]
2021 #[target_feature(enable = "simd128")]
2022 pub unsafe fn i32x4_min_s(a: v128, b: v128) -> v128 {
2023     let a = transmute::<_, i32x4>(a);
2024     let b = transmute::<_, i32x4>(b);
2025     transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
2026 }
2027
2028 /// Compares lane-wise unsigned integers, and returns the minimum of
2029 /// each pair.
2030 #[inline]
2031 #[cfg_attr(test, assert_instr(i32x4.min_u))]
2032 #[target_feature(enable = "simd128")]
2033 pub unsafe fn i32x4_min_u(a: v128, b: v128) -> v128 {
2034     let a = transmute::<_, u32x4>(a);
2035     let b = transmute::<_, u32x4>(b);
2036     transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
2037 }
2038
2039 /// Compares lane-wise signed integers, and returns the maximum of
2040 /// each pair.
2041 #[inline]
2042 #[cfg_attr(test, assert_instr(i32x4.max_s))]
2043 #[target_feature(enable = "simd128")]
2044 pub unsafe fn i32x4_max_s(a: v128, b: v128) -> v128 {
2045     let a = transmute::<_, i32x4>(a);
2046     let b = transmute::<_, i32x4>(b);
2047     transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
2048 }
2049
2050 /// Compares lane-wise unsigned integers, and returns the maximum of
2051 /// each pair.
2052 #[inline]
2053 #[cfg_attr(test, assert_instr(i32x4.max_u))]
2054 #[target_feature(enable = "simd128")]
2055 pub unsafe fn i32x4_max_u(a: v128, b: v128) -> v128 {
2056     let a = transmute::<_, u32x4>(a);
2057     let b = transmute::<_, u32x4>(b);
2058     transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
2059 }
2060
2061 /// Negates a 128-bit vectors intepreted as two 64-bit signed integers
2062 #[inline]
2063 #[cfg_attr(test, assert_instr(i64x2.neg))]
2064 #[target_feature(enable = "simd128")]
2065 pub unsafe fn i64x2_neg(a: v128) -> v128 {
2066     transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1)))
2067 }
2068
2069 /// Shifts each lane to the left by the specified number of bits.
2070 ///
2071 /// Only the low bits of the shift amount are used if the shift amount is
2072 /// greater than the lane width.
2073 #[inline]
2074 #[cfg_attr(test, assert_instr(i64x2.shl))]
2075 #[target_feature(enable = "simd128")]
2076 pub unsafe fn i64x2_shl(a: v128, amt: u32) -> v128 {
2077     transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64)))
2078 }
2079
2080 /// Shifts each lane to the right by the specified number of bits, sign
2081 /// extending.
2082 ///
2083 /// Only the low bits of the shift amount are used if the shift amount is
2084 /// greater than the lane width.
2085 #[inline]
2086 #[cfg_attr(test, assert_instr(i64x2.shr_s))]
2087 #[target_feature(enable = "simd128")]
2088 pub unsafe fn i64x2_shr_s(a: v128, amt: u32) -> v128 {
2089     transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64)))
2090 }
2091
2092 /// Shifts each lane to the right by the specified number of bits, shifting in
2093 /// zeros.
2094 ///
2095 /// Only the low bits of the shift amount are used if the shift amount is
2096 /// greater than the lane width.
2097 #[inline]
2098 #[cfg_attr(test, assert_instr(i64x2.shr_u))]
2099 #[target_feature(enable = "simd128")]
2100 pub unsafe fn i64x2_shr_u(a: v128, amt: u32) -> v128 {
2101     transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64)))
2102 }
2103
2104 /// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
2105 #[inline]
2106 #[cfg_attr(test, assert_instr(i64x2.add))]
2107 #[target_feature(enable = "simd128")]
2108 pub unsafe fn i64x2_add(a: v128, b: v128) -> v128 {
2109     transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
2110 }
2111
2112 /// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers.
2113 #[inline]
2114 #[cfg_attr(test, assert_instr(i64x2.sub))]
2115 #[target_feature(enable = "simd128")]
2116 pub unsafe fn i64x2_sub(a: v128, b: v128) -> v128 {
2117     transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
2118 }
2119
2120 /// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers.
2121 #[inline]
2122 // #[cfg_attr(test, assert_instr(i64x2.mul))] // FIXME: not present in our LLVM
2123 #[target_feature(enable = "simd128")]
2124 pub unsafe fn i64x2_mul(a: v128, b: v128) -> v128 {
2125     transmute(simd_mul(a.as_i64x2(), b.as_i64x2()))
2126 }
2127
2128 /// Calculates the absolute value of each lane of a 128-bit vector interpreted
2129 /// as four 32-bit floating point numbers.
2130 #[inline]
2131 #[cfg_attr(test, assert_instr(f32x4.abs))]
2132 #[target_feature(enable = "simd128")]
2133 pub unsafe fn f32x4_abs(a: v128) -> v128 {
2134     transmute(llvm_f32x4_abs(a.as_f32x4()))
2135 }
2136
2137 /// Negates each lane of a 128-bit vector interpreted as four 32-bit floating
2138 /// point numbers.
2139 #[inline]
2140 #[cfg_attr(test, assert_instr(f32x4.neg))]
2141 #[target_feature(enable = "simd128")]
2142 pub unsafe fn f32x4_neg(a: v128) -> v128 {
2143     f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0)))
2144 }
2145
2146 /// Calculates the square root of each lane of a 128-bit vector interpreted as
2147 /// four 32-bit floating point numbers.
2148 #[inline]
2149 #[cfg_attr(test, assert_instr(f32x4.sqrt))]
2150 #[target_feature(enable = "simd128")]
2151 pub unsafe fn f32x4_sqrt(a: v128) -> v128 {
2152     transmute(llvm_f32x4_sqrt(a.as_f32x4()))
2153 }
2154
2155 /// Adds pairwise lanes of two 128-bit vectors interpreted as four 32-bit
2156 /// floating point numbers.
2157 #[inline]
2158 #[cfg_attr(test, assert_instr(f32x4.add))]
2159 #[target_feature(enable = "simd128")]
2160 pub unsafe fn f32x4_add(a: v128, b: v128) -> v128 {
2161     transmute(simd_add(a.as_f32x4(), b.as_f32x4()))
2162 }
2163
2164 /// Subtracts pairwise lanes of two 128-bit vectors interpreted as four 32-bit
2165 /// floating point numbers.
2166 #[inline]
2167 #[cfg_attr(test, assert_instr(f32x4.sub))]
2168 #[target_feature(enable = "simd128")]
2169 pub unsafe fn f32x4_sub(a: v128, b: v128) -> v128 {
2170     transmute(simd_sub(a.as_f32x4(), b.as_f32x4()))
2171 }
2172
2173 /// Multiplies pairwise lanes of two 128-bit vectors interpreted as four 32-bit
2174 /// floating point numbers.
2175 #[inline]
2176 #[cfg_attr(test, assert_instr(f32x4.mul))]
2177 #[target_feature(enable = "simd128")]
2178 pub unsafe fn f32x4_mul(a: v128, b: v128) -> v128 {
2179     transmute(simd_mul(a.as_f32x4(), b.as_f32x4()))
2180 }
2181
2182 /// Divides pairwise lanes of two 128-bit vectors interpreted as four 32-bit
2183 /// floating point numbers.
2184 #[inline]
2185 #[cfg_attr(test, assert_instr(f32x4.div))]
2186 #[target_feature(enable = "simd128")]
2187 pub unsafe fn f32x4_div(a: v128, b: v128) -> v128 {
2188     transmute(simd_div(a.as_f32x4(), b.as_f32x4()))
2189 }
2190
2191 /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted
2192 /// as four 32-bit floating point numbers.
2193 #[inline]
2194 #[cfg_attr(test, assert_instr(f32x4.min))]
2195 #[target_feature(enable = "simd128")]
2196 pub unsafe fn f32x4_min(a: v128, b: v128) -> v128 {
2197     transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4()))
2198 }
2199
2200 /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted
2201 /// as four 32-bit floating point numbers.
2202 #[inline]
2203 #[cfg_attr(test, assert_instr(f32x4.max))]
2204 #[target_feature(enable = "simd128")]
2205 pub unsafe fn f32x4_max(a: v128, b: v128) -> v128 {
2206     transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4()))
2207 }
2208
2209 /// Calculates the absolute value of each lane of a 128-bit vector interpreted
2210 /// as two 64-bit floating point numbers.
2211 #[inline]
2212 #[cfg_attr(test, assert_instr(f64x2.abs))]
2213 #[target_feature(enable = "simd128")]
2214 pub unsafe fn f64x2_abs(a: v128) -> v128 {
2215     transmute(llvm_f64x2_abs(a.as_f64x2()))
2216 }
2217
2218 /// Negates each lane of a 128-bit vector interpreted as two 64-bit floating
2219 /// point numbers.
2220 #[inline]
2221 #[cfg_attr(test, assert_instr(f64x2.neg))]
2222 #[target_feature(enable = "simd128")]
2223 pub unsafe fn f64x2_neg(a: v128) -> v128 {
2224     f64x2_mul(a, transmute(f64x2(-1.0, -1.0)))
2225 }
2226
2227 /// Calculates the square root of each lane of a 128-bit vector interpreted as
2228 /// two 64-bit floating point numbers.
2229 #[inline]
2230 #[cfg_attr(test, assert_instr(f64x2.sqrt))]
2231 #[target_feature(enable = "simd128")]
2232 pub unsafe fn f64x2_sqrt(a: v128) -> v128 {
2233     transmute(llvm_f64x2_sqrt(a.as_f64x2()))
2234 }
2235
2236 /// Adds pairwise lanes of two 128-bit vectors interpreted as two 64-bit
2237 /// floating point numbers.
2238 #[inline]
2239 #[cfg_attr(test, assert_instr(f64x2.add))]
2240 #[target_feature(enable = "simd128")]
2241 pub unsafe fn f64x2_add(a: v128, b: v128) -> v128 {
2242     transmute(simd_add(a.as_f64x2(), b.as_f64x2()))
2243 }
2244
2245 /// Subtracts pairwise lanes of two 128-bit vectors interpreted as two 64-bit
2246 /// floating point numbers.
2247 #[inline]
2248 #[cfg_attr(test, assert_instr(f64x2.sub))]
2249 #[target_feature(enable = "simd128")]
2250 pub unsafe fn f64x2_sub(a: v128, b: v128) -> v128 {
2251     transmute(simd_sub(a.as_f64x2(), b.as_f64x2()))
2252 }
2253
2254 /// Multiplies pairwise lanes of two 128-bit vectors interpreted as two 64-bit
2255 /// floating point numbers.
2256 #[inline]
2257 #[cfg_attr(test, assert_instr(f64x2.mul))]
2258 #[target_feature(enable = "simd128")]
2259 pub unsafe fn f64x2_mul(a: v128, b: v128) -> v128 {
2260     transmute(simd_mul(a.as_f64x2(), b.as_f64x2()))
2261 }
2262
2263 /// Divides pairwise lanes of two 128-bit vectors interpreted as two 64-bit
2264 /// floating point numbers.
2265 #[inline]
2266 #[cfg_attr(test, assert_instr(f64x2.div))]
2267 #[target_feature(enable = "simd128")]
2268 pub unsafe fn f64x2_div(a: v128, b: v128) -> v128 {
2269     transmute(simd_div(a.as_f64x2(), b.as_f64x2()))
2270 }
2271
2272 /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted
2273 /// as two 64-bit floating point numbers.
2274 #[inline]
2275 #[cfg_attr(test, assert_instr(f64x2.min))]
2276 #[target_feature(enable = "simd128")]
2277 pub unsafe fn f64x2_min(a: v128, b: v128) -> v128 {
2278     transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2()))
2279 }
2280
2281 /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted
2282 /// as two 64-bit floating point numbers.
2283 #[inline]
2284 #[cfg_attr(test, assert_instr(f64x2.max))]
2285 #[target_feature(enable = "simd128")]
2286 pub unsafe fn f64x2_max(a: v128, b: v128) -> v128 {
2287     transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2()))
2288 }
2289
2290 /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
2291 /// into a 128-bit vector of four 32-bit signed integers.
2292 ///
2293 /// NaN is converted to 0 and if it's out of bounds it becomes the nearest
2294 /// representable intger.
2295 #[inline]
2296 #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_s"))]
2297 #[target_feature(enable = "simd128")]
2298 pub unsafe fn i32x4_trunc_sat_f32x4_s(a: v128) -> v128 {
2299     transmute(simd_cast::<_, i32x4>(a.as_f32x4()))
2300 }
2301
2302 /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
2303 /// into a 128-bit vector of four 32-bit unsigned integers.
2304 ///
2305 /// NaN is converted to 0 and if it's out of bounds it becomes the nearest
2306 /// representable intger.
2307 #[inline]
2308 #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_u"))]
2309 #[target_feature(enable = "simd128")]
2310 pub unsafe fn i32x4_trunc_sat_f32x4_u(a: v128) -> v128 {
2311     transmute(simd_cast::<_, u32x4>(a.as_f32x4()))
2312 }
2313
2314 /// Converts a 128-bit vector interpreted as four 32-bit signed integers into a
2315 /// 128-bit vector of four 32-bit floating point numbers.
2316 #[inline]
2317 #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_s"))]
2318 #[target_feature(enable = "simd128")]
2319 pub unsafe fn f32x4_convert_i32x4_s(a: v128) -> v128 {
2320     transmute(simd_cast::<_, f32x4>(a.as_i32x4()))
2321 }
2322
2323 /// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a
2324 /// 128-bit vector of four 32-bit floating point numbers.
2325 #[inline]
2326 #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_u"))]
2327 #[target_feature(enable = "simd128")]
2328 pub unsafe fn f32x4_convert_i32x4_u(a: v128) -> v128 {
2329     transmute(simd_cast::<_, f32x4>(a.as_u32x4()))
2330 }
2331
2332 #[cfg(test)]
2333 pub mod tests {
2334     use super::*;
2335     use std;
2336     use std::mem;
2337     use std::num::Wrapping;
2338     use std::prelude::v1::*;
2339
2340     fn compare_bytes(a: v128, b: v128) {
2341         let a: [u8; 16] = unsafe { transmute(a) };
2342         let b: [u8; 16] = unsafe { transmute(b) };
2343         assert_eq!(a, b);
2344     }
2345
2346     #[test]
2347     fn test_v128_const() {
2348         const A: v128 =
2349             unsafe { super::i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) };
2350         compare_bytes(A, A);
2351     }
2352
2353     macro_rules! test_splat {
2354         ($test_id:ident: $val:expr => $($vals:expr),*) => {
2355             #[test]
2356             fn $test_id() {
2357                 unsafe {
2358                 let a = super::$test_id($val);
2359                 let b: v128 = transmute([$($vals as u8),*]);
2360                 compare_bytes(a, b);
2361                 }
2362             }
2363         }
2364     }
2365
2366     test_splat!(i8x16_splat: 42 => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
2367     test_splat!(i16x8_splat: 42 => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
2368     test_splat!(i32x4_splat: 42 => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
2369     test_splat!(i64x2_splat: 42 => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
2370     test_splat!(f32x4_splat: 42. => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
2371     test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
2372
2373     // tests extract and replace lanes
2374     macro_rules! test_extract {
2375         (
2376             name: $test_id:ident,
2377             extract: $extract:ident,
2378             replace: $replace:ident,
2379             elem: $elem:ty,
2380             count: $count:expr,
2381             indices: [$($idx:expr),*],
2382         ) => {
2383             #[test]
2384             fn $test_id() {
2385                 unsafe {
2386                     let arr: [$elem; $count] = [123 as $elem; $count];
2387                     let vec: v128 = transmute(arr);
2388                     $(
2389                         assert_eq!($extract::<$idx>(vec), 123 as $elem);
2390                     )*
2391
2392                     // create a vector from array and check that the indices contain
2393                     // the same values as in the array:
2394                     let arr: [$elem; $count] = [$($idx as $elem),*];
2395                     let vec: v128 = transmute(arr);
2396                     $(
2397                         assert_eq!($extract::<$idx>(vec), $idx as $elem);
2398
2399                         let tmp = $replace::<$idx>(vec, 124 as $elem);
2400                         assert_eq!($extract::<$idx>(tmp), 124 as $elem);
2401                     )*
2402                 }
2403             }
2404         }
2405     }
2406
2407     test_extract! {
2408         name: test_i8x16_extract_replace,
2409         extract: i8x16_extract_lane,
2410         replace: i8x16_replace_lane,
2411         elem: i8,
2412         count: 16,
2413         indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2414     }
2415     test_extract! {
2416         name: test_i16x8_extract_replace,
2417         extract: i16x8_extract_lane,
2418         replace: i16x8_replace_lane,
2419         elem: i16,
2420         count: 8,
2421         indices: [0, 1, 2, 3, 4, 5, 6, 7],
2422     }
2423     test_extract! {
2424         name: test_i32x4_extract_replace,
2425         extract: i32x4_extract_lane,
2426         replace: i32x4_replace_lane,
2427         elem: i32,
2428         count: 4,
2429         indices: [0, 1, 2, 3],
2430     }
2431     test_extract! {
2432         name: test_i64x2_extract_replace,
2433         extract: i64x2_extract_lane,
2434         replace: i64x2_replace_lane,
2435         elem: i64,
2436         count: 2,
2437         indices: [0, 1],
2438     }
2439     test_extract! {
2440         name: test_f32x4_extract_replace,
2441         extract: f32x4_extract_lane,
2442         replace: f32x4_replace_lane,
2443         elem: f32,
2444         count: 4,
2445         indices: [0, 1, 2, 3],
2446     }
2447     test_extract! {
2448         name: test_f64x2_extract_replace,
2449         extract: f64x2_extract_lane,
2450         replace: f64x2_replace_lane,
2451         elem: f64,
2452         count: 2,
2453         indices: [0, 1],
2454     }
2455
2456     macro_rules! test_binop {
2457         (
2458             $($name:ident => {
2459                 $([$($vec1:tt)*] ($op:tt | $f:ident) [$($vec2:tt)*],)*
2460             })*
2461         ) => ($(
2462             #[test]
2463             fn $name() {
2464                 unsafe {
2465                     $(
2466                         let v1 = [$($vec1)*];
2467                         let v2 = [$($vec2)*];
2468                         let v1_v128: v128 = mem::transmute(v1);
2469                         let v2_v128: v128 = mem::transmute(v2);
2470                         let v3_v128 = super::$f(v1_v128, v2_v128);
2471                         let mut v3 = [$($vec1)*];
2472                         drop(v3);
2473                         v3 = mem::transmute(v3_v128);
2474
2475                         for (i, actual) in v3.iter().enumerate() {
2476                             let expected = (Wrapping(v1[i]) $op Wrapping(v2[i])).0;
2477                             assert_eq!(*actual, expected);
2478                         }
2479                     )*
2480                 }
2481             }
2482         )*)
2483     }
2484
2485     macro_rules! test_unop {
2486         (
2487             $($name:ident => {
2488                 $(($op:tt | $f:ident) [$($vec1:tt)*],)*
2489             })*
2490         ) => ($(
2491             #[test]
2492             fn $name() {
2493                 unsafe {
2494                     $(
2495                         let v1 = [$($vec1)*];
2496                         let v1_v128: v128 = mem::transmute(v1);
2497                         let v2_v128 = super::$f(v1_v128);
2498                         let mut v2 = [$($vec1)*];
2499                         drop(v2);
2500                         v2 = mem::transmute(v2_v128);
2501
2502                         for (i, actual) in v2.iter().enumerate() {
2503                             let expected = ($op Wrapping(v1[i])).0;
2504                             assert_eq!(*actual, expected);
2505                         }
2506                     )*
2507                 }
2508             }
2509         )*)
2510     }
2511
2512     test_binop! {
2513         test_i8x16_add => {
2514             [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2515                 (+ | i8x16_add)
2516             [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2517
2518             [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
2519                 (+ | i8x16_add)
2520             [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
2521
2522             [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
2523                 (+ | i8x16_add)
2524             [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24],
2525         }
2526         test_i8x16_sub => {
2527             [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2528                 (- | i8x16_sub)
2529             [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2530
2531             [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
2532                 (- | i8x16_sub)
2533             [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
2534
2535             [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
2536                 (- | i8x16_sub)
2537             [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
2538         }
2539
2540         test_i16x8_add => {
2541             [0i16, 0, 0, 0, 0, 0, 0, 0]
2542                 (+ | i16x8_add)
2543             [1i16, 1, 1, 1, 1, 1, 1, 1],
2544
2545             [1i16, 2, 3, 4, 5, 6, 7, 8]
2546                 (+ | i16x8_add)
2547             [32767, 8, -2494,-4, 4882, -4, 848, 3830],
2548         }
2549
2550         test_i16x8_sub => {
2551             [0i16, 0, 0, 0, 0, 0, 0, 0]
2552                 (- | i16x8_sub)
2553             [1i16, 1, 1, 1, 1, 1, 1, 1],
2554
2555             [1i16, 2, 3, 4, 5, 6, 7, 8]
2556                 (- | i16x8_sub)
2557             [32767, 8, -2494,-4, 4882, -4, 848, 3830],
2558         }
2559
2560         test_i16x8_mul => {
2561             [0i16, 0, 0, 0, 0, 0, 0, 0]
2562                 (* | i16x8_mul)
2563             [1i16, 1, 1, 1, 1, 1, 1, 1],
2564
2565             [1i16, 2, 3, 4, 5, 6, 7, 8]
2566                 (* | i16x8_mul)
2567             [32767, 8, -2494,-4, 4882, -4, 848, 3830],
2568         }
2569
2570         test_i32x4_add => {
2571             [0i32, 0, 0, 0] (+ | i32x4_add) [1, 2, 3, 4],
2572             [1i32, 1283, i32::MAX, i32::MIN]
2573                 (+ | i32x4_add)
2574             [i32::MAX; 4],
2575         }
2576
2577         test_i32x4_sub => {
2578             [0i32, 0, 0, 0] (- | i32x4_sub) [1, 2, 3, 4],
2579             [1i32, 1283, i32::MAX, i32::MIN]
2580                 (- | i32x4_sub)
2581             [i32::MAX; 4],
2582         }
2583
2584         test_i32x4_mul => {
2585             [0i32, 0, 0, 0] (* | i32x4_mul) [1, 2, 3, 4],
2586             [1i32, 1283, i32::MAX, i32::MIN]
2587                 (* | i32x4_mul)
2588             [i32::MAX; 4],
2589         }
2590
2591         // TODO: test_i64x2_add
2592         // TODO: test_i64x2_sub
2593     }
2594
2595     test_unop! {
2596         test_i8x16_neg => {
2597             (- | i8x16_neg)
2598             [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2599
2600             (- | i8x16_neg)
2601             [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
2602
2603             (- | i8x16_neg)
2604             [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13],
2605         }
2606
2607         test_i16x8_neg => {
2608             (- | i16x8_neg) [1i16, 1, 1, 1, 1, 1, 1, 1],
2609             (- | i16x8_neg) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847],
2610         }
2611
2612         test_i32x4_neg => {
2613             (- | i32x4_neg) [1i32, 2, 3, 4],
2614             (- | i32x4_neg) [i32::MIN, i32::MAX, 0, 4],
2615         }
2616
2617         // TODO: test_i64x2_neg
2618     }
2619
2620     #[test]
2621     fn test_v8x16_shuffle() {
2622         unsafe {
2623             let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
2624             let b = [
2625                 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2626             ];
2627
2628             let vec_a: v128 = transmute(a);
2629             let vec_b: v128 = transmute(b);
2630
2631             let vec_r = v8x16_shuffle::<0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30>(
2632                 vec_a, vec_b,
2633             );
2634
2635             let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
2636             let vec_e: v128 = transmute(e);
2637             compare_bytes(vec_r, vec_e);
2638         }
2639     }
2640
2641     macro_rules! floating_point {
2642         (f32) => {
2643             true
2644         };
2645         (f64) => {
2646             true
2647         };
2648         ($id:ident) => {
2649             false
2650         };
2651     }
2652
2653     trait IsNan: Sized {
2654         fn is_nan(self) -> bool {
2655             false
2656         }
2657     }
2658     impl IsNan for i8 {}
2659     impl IsNan for i16 {}
2660     impl IsNan for i32 {}
2661     impl IsNan for i64 {}
2662
2663     macro_rules! test_bop {
2664          ($id:ident[$ety:ident; $ecount:expr] |
2665           $binary_op:ident [$op_test_id:ident] :
2666           ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
2667              test_bop!(
2668                  $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
2669                  ([$($in_a),*], [$($in_b),*]) => [$($out),*]
2670              );
2671
2672          };
2673          ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
2674           $binary_op:ident [$op_test_id:ident] :
2675           ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
2676              #[test]
2677              fn $op_test_id() {
2678                  unsafe {
2679                      let a_input: [$ety; $ecount] = [$($in_a),*];
2680                      let b_input: [$ety; $ecount] = [$($in_b),*];
2681                      let output: [$oty; $ecount] = [$($out),*];
2682
2683                      let a_vec_in: v128 = transmute(a_input);
2684                      let b_vec_in: v128 = transmute(b_input);
2685                      let vec_res: v128 = $binary_op(a_vec_in, b_vec_in);
2686
2687                      let res: [$oty; $ecount] = transmute(vec_res);
2688
2689                      if !floating_point!($ety) {
2690                          assert_eq!(res, output);
2691                      } else {
2692                          for i in 0..$ecount {
2693                              let r = res[i];
2694                              let o = output[i];
2695                              assert_eq!(r.is_nan(), o.is_nan());
2696                              if !r.is_nan() {
2697                                  assert_eq!(r, o);
2698                              }
2699                          }
2700                      }
2701                  }
2702              }
2703          }
2704      }
2705
2706     macro_rules! test_bops {
2707          ($id:ident[$ety:ident; $ecount:expr] |
2708           $binary_op:ident [$op_test_id:ident]:
2709           ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
2710              #[test]
2711              fn $op_test_id() {
2712                  unsafe {
2713                      let a_input: [$ety; $ecount] = [$($in_a),*];
2714                      let output: [$ety; $ecount] = [$($out),*];
2715
2716                      let a_vec_in: v128 = transmute(a_input);
2717                      let vec_res: v128 = $binary_op(a_vec_in, $in_b);
2718
2719                      let res: [$ety; $ecount] = transmute(vec_res);
2720                      assert_eq!(res, output);
2721                  }
2722              }
2723          }
2724      }
2725
2726     macro_rules! test_uop {
2727          ($id:ident[$ety:ident; $ecount:expr] |
2728           $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
2729              #[test]
2730              fn $op_test_id() {
2731                  unsafe {
2732                      let a_input: [$ety; $ecount] = [$($in_a),*];
2733                      let output: [$ety; $ecount] = [$($out),*];
2734
2735                      let a_vec_in: v128 = transmute(a_input);
2736                      let vec_res: v128 = $unary_op(a_vec_in);
2737
2738                      let res: [$ety; $ecount] = transmute(vec_res);
2739                      assert_eq!(res, output);
2740                  }
2741              }
2742          }
2743      }
2744
2745     test_bops!(i8x16[i8; 16] | i8x16_shl[i8x16_shl_test]:
2746                ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
2747                [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
2748     test_bops!(i16x8[i16; 8] | i16x8_shl[i16x8_shl_test]:
2749                 ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
2750                 [0, -2, 4, 6, 8, 10, 12, -2]);
2751     test_bops!(i32x4[i32; 4] | i32x4_shl[i32x4_shl_test]:
2752                 ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
2753     test_bops!(i64x2[i64; 2] | i64x2_shl[i64x2_shl_test]:
2754                 ([0, -1], 1) => [0, -2]);
2755
2756     test_bops!(i8x16[i8; 16] | i8x16_shr_s[i8x16_shr_s_test]:
2757                ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
2758                [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
2759     test_bops!(i16x8[i16; 8] | i16x8_shr_s[i16x8_shr_s_test]:
2760                ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
2761                [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]);
2762     test_bops!(i32x4[i32; 4] | i32x4_shr_s[i32x4_shr_s_test]:
2763                ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
2764     test_bops!(i64x2[i64; 2] | i64x2_shr_s[i64x2_shr_s_test]:
2765                ([0, -1], 1) => [0, -1]);
2766
2767     test_bops!(i8x16[i8; 16] | i8x16_shr_u[i8x16_uhr_u_test]:
2768                 ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
2769                 [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
2770     test_bops!(i16x8[i16; 8] | i16x8_shr_u[i16x8_uhr_u_test]:
2771                 ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
2772                 [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]);
2773     test_bops!(i32x4[i32; 4] | i32x4_shr_u[i32x4_uhr_u_test]:
2774                 ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]);
2775     test_bops!(i64x2[i64; 2] | i64x2_shr_u[i64x2_uhr_u_test]:
2776                 ([0, -1], 1) => [0, i64::MAX]);
2777
2778     #[test]
2779     fn v128_bitwise_logical_ops() {
2780         unsafe {
2781             let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
2782             let b: [u32; 4] = [u32::MAX; 4];
2783             let c: [u32; 4] = [0; 4];
2784
2785             let vec_a: v128 = transmute(a);
2786             let vec_b: v128 = transmute(b);
2787             let vec_c: v128 = transmute(c);
2788
2789             let r: v128 = v128_and(vec_a, vec_a);
2790             compare_bytes(r, vec_a);
2791             let r: v128 = v128_and(vec_a, vec_b);
2792             compare_bytes(r, vec_a);
2793             let r: v128 = v128_or(vec_a, vec_b);
2794             compare_bytes(r, vec_b);
2795             let r: v128 = v128_not(vec_b);
2796             compare_bytes(r, vec_c);
2797             let r: v128 = v128_xor(vec_a, vec_c);
2798             compare_bytes(r, vec_a);
2799
2800             let r: v128 = v128_bitselect(vec_b, vec_c, vec_b);
2801             compare_bytes(r, vec_b);
2802             let r: v128 = v128_bitselect(vec_b, vec_c, vec_c);
2803             compare_bytes(r, vec_c);
2804             let r: v128 = v128_bitselect(vec_b, vec_c, vec_a);
2805             compare_bytes(r, vec_a);
2806         }
2807     }
2808
2809     macro_rules! test_bool_red {
2810          ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
2811              #[test]
2812              fn $test_id() {
2813                  unsafe {
2814                      let vec_a: v128 = transmute([$($true),*]); // true
2815                      let vec_b: v128 = transmute([$($false),*]); // false
2816                      let vec_c: v128 = transmute([$($alt),*]); // alternating
2817
2818                      assert_eq!($any(vec_a), 1);
2819                      assert_eq!($any(vec_b), 0);
2820                      assert_eq!($any(vec_c), 1);
2821
2822                      assert_eq!($all(vec_a), 1);
2823                      assert_eq!($all(vec_b), 0);
2824                      assert_eq!($all(vec_c), 0);
2825                  }
2826              }
2827          }
2828      }
2829
2830     test_bool_red!(
2831         [i8x16_boolean_reductions, i8x16_any_true, i8x16_all_true]
2832             | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2833             | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2834             | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
2835     );
2836     test_bool_red!(
2837         [i16x8_boolean_reductions, i16x8_any_true, i16x8_all_true]
2838             | [1_i16, 1, 1, 1, 1, 1, 1, 1]
2839             | [0_i16, 0, 0, 0, 0, 0, 0, 0]
2840             | [1_i16, 0, 1, 0, 1, 0, 1, 0]
2841     );
2842     test_bool_red!(
2843         [i32x4_boolean_reductions, i32x4_any_true, i32x4_all_true]
2844             | [1_i32, 1, 1, 1]
2845             | [0_i32, 0, 0, 0]
2846             | [1_i32, 0, 1, 0]
2847     );
2848
2849     test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]:
2850               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2851                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
2852               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
2853     test_bop!(i16x8[i16; 8] | i16x8_eq[i16x8_eq_test]:
2854                ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
2855                [-1, 0, -1, 0 ,-1, 0, -1, -1]);
2856     test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]:
2857                ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
2858     test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]:
2859                ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
2860     test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
2861
2862     test_bop!(i8x16[i8; 16] | i8x16_ne[i8x16_ne_test]:
2863                ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2864                 [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
2865                [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
2866     test_bop!(i16x8[i16; 8] | i16x8_ne[i16x8_ne_test]:
2867                ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
2868                [0, -1, 0, -1 ,0, -1, 0, 0]);
2869     test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]:
2870                ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
2871     test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]:
2872                ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
2873     test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
2874
2875     test_bop!(i8x16[i8; 16] | i8x16_lt_s[i8x16_lt_test]:
2876                ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2877                 [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
2878                [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
2879     test_bop!(i16x8[i16; 8] | i16x8_lt_s[i16x8_lt_test]:
2880                ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
2881                [0, -1, 0, -1 ,0, -1, 0, 0]);
2882     test_bop!(i32x4[i32; 4] | i32x4_lt_s[i32x4_lt_test]:
2883                ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
2884     test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]:
2885                ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
2886     test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
2887
2888     test_bop!(i8x16[i8; 16] | i8x16_gt_s[i8x16_gt_test]:
2889            ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
2890             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
2891                [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
2892     test_bop!(i16x8[i16; 8] | i16x8_gt_s[i16x8_gt_test]:
2893                ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
2894                [0, -1, 0, -1 ,0, -1, 0, 0]);
2895     test_bop!(i32x4[i32; 4] | i32x4_gt_s[i32x4_gt_test]:
2896                ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
2897     test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]:
2898                ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
2899     test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
2900
2901     test_bop!(i8x16[i8; 16] | i8x16_ge_s[i8x16_ge_test]:
2902                ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2903                 [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
2904                [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
2905     test_bop!(i16x8[i16; 8] | i16x8_ge_s[i16x8_ge_test]:
2906                ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
2907                [-1, 0, -1, 0 ,-1, 0, -1, -1]);
2908     test_bop!(i32x4[i32; 4] | i32x4_ge_s[i32x4_ge_test]:
2909                ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
2910     test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]:
2911                ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
2912     test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
2913
2914     test_bop!(i8x16[i8; 16] | i8x16_le_s[i8x16_le_test]:
2915                ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
2916                 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
2917                ) =>
2918                [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
2919     test_bop!(i16x8[i16; 8] | i16x8_le_s[i16x8_le_test]:
2920                ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
2921                [-1, 0, -1, 0 ,-1, 0, -1, -1]);
2922     test_bop!(i32x4[i32; 4] | i32x4_le_s[i32x4_le_test]:
2923                ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
2924     test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]:
2925                ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
2926     test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
2927
2928     #[test]
2929     fn v128_bitwise_load_store() {
2930         unsafe {
2931             let mut arr: [i32; 4] = [0, 1, 2, 3];
2932
2933             let vec = v128_load(arr.as_ptr() as *const v128);
2934             let vec = i32x4_add(vec, vec);
2935             v128_store(arr.as_mut_ptr() as *mut v128, vec);
2936
2937             assert_eq!(arr, [0, 2, 4, 6]);
2938         }
2939     }
2940
2941     test_uop!(f32x4[f32; 4] | f32x4_neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
2942     test_uop!(f32x4[f32; 4] | f32x4_abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
2943     test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test]:
2944               ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
2945     test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test_nan]:
2946               ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
2947               => [0., -3., -4., std::f32::NAN]);
2948     test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test]:
2949               ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
2950     test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test_nan]:
2951               ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
2952               => [1., -1., 7., std::f32::NAN]);
2953     test_bop!(f32x4[f32; 4] | f32x4_add[f32x4_add_test]:
2954               ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
2955     test_bop!(f32x4[f32; 4] | f32x4_sub[f32x4_sub_test]:
2956               ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
2957     test_bop!(f32x4[f32; 4] | f32x4_mul[f32x4_mul_test]:
2958               ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
2959     test_bop!(f32x4[f32; 4] | f32x4_div[f32x4_div_test]:
2960               ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
2961
2962     test_uop!(f64x2[f64; 2] | f64x2_neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
2963     test_uop!(f64x2[f64; 2] | f64x2_abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
2964     test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test]:
2965                ([0., -1.], [1., -3.]) => [0., -3.]);
2966     test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test_nan]:
2967                ([7., 8.], [-4., std::f64::NAN])
2968                => [ -4., std::f64::NAN]);
2969     test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test]:
2970                ([0., -1.], [1., -3.]) => [1., -1.]);
2971     test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test_nan]:
2972                ([7., 8.], [ -4., std::f64::NAN])
2973                => [7., std::f64::NAN]);
2974     test_bop!(f64x2[f64; 2] | f64x2_add[f64x2_add_test]:
2975                ([0., -1.], [1., -3.]) => [1., -4.]);
2976     test_bop!(f64x2[f64; 2] | f64x2_sub[f64x2_sub_test]:
2977                ([0., -1.], [1., -3.]) => [-1., 2.]);
2978     test_bop!(f64x2[f64; 2] | f64x2_mul[f64x2_mul_test]:
2979                ([0., -1.], [1., -3.]) => [0., 3.]);
2980     test_bop!(f64x2[f64; 2] | f64x2_div[f64x2_div_test]:
2981                ([0., -8.], [1., 4.]) => [0., -2.]);
2982
2983     macro_rules! test_conv {
2984         ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
2985             #[test]
2986             fn $test_id() {
2987                 unsafe {
2988                     let from: v128 = transmute($from);
2989                     let to: v128 = transmute($to);
2990
2991                     let r: v128 = $conv_id(from);
2992
2993                     compare_bytes(r, to);
2994                 }
2995             }
2996         };
2997     }
2998
2999     test_conv!(
3000         f32x4_convert_s_i32x4 | f32x4_convert_i32x4_s | f32x4 | [1_i32, 2, 3, 4],
3001         [1_f32, 2., 3., 4.]
3002     );
3003     test_conv!(
3004         f32x4_convert_u_i32x4 | f32x4_convert_i32x4_u | f32x4 | [u32::MAX, 2, 3, 4],
3005         [u32::MAX as f32, 2., 3., 4.]
3006     );
3007
3008     // FIXME: this fails, and produces 0 instead of saturating at i32::MAX
3009     // test_conv!(
3010     //     i32x4_trunc_s_f32x4_sat
3011     //         | i32x4_trunc_sat_f32x4_s
3012     //         | i32x4
3013     //         | [f32::NAN, 2., (i32::MAX as f32 + 1.), 4.],
3014     //     [0, 2, i32::MAX, 4]
3015     // );
3016     // FIXME: add other saturating tests
3017 }