vendor/packed_simd_2/src/codegen/math/float/mul_add.rs

   1 //! Vertical floating-point `mul_add`
   2 #![allow(unused)]
   3 use crate::*;
   4
   5 // FIXME: 64-bit 1 element mul_add
   6
   7 pub(crate) trait MulAdd {
   8     fn mul_add(self, y: Self, z: Self) -> Self;
   9 }
  10
  11 #[cfg(not(target_arch = "s390x"))]
  12 #[allow(improper_ctypes)]
  13 extern "C" {
  14     #[link_name = "llvm.fma.v2f32"]
  15     fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
  16     #[link_name = "llvm.fma.v4f32"]
  17     fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
  18     #[link_name = "llvm.fma.v8f32"]
  19     fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
  20     #[link_name = "llvm.fma.v16f32"]
  21     fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
  22     /* FIXME 64-bit single elem vectors
  23     #[link_name = "llvm.fma.v1f64"]
  24     fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
  25     */
  26     #[link_name = "llvm.fma.v2f64"]
  27     fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
  28     #[link_name = "llvm.fma.v4f64"]
  29     fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
  30     #[link_name = "llvm.fma.v8f64"]
  31     fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
  32 }
  33
  34 gen_tertiary_impl_table!(MulAdd, mul_add);
  35
  36 cfg_if! {
  37     if #[cfg(target_arch = "s390x")] {
  38         // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
  39         macro_rules! impl_broken {
  40             ($id:ident) => {
  41                 impl MulAdd for $id {
  42                     #[inline]
  43                     fn mul_add(self, y: Self, z: Self) -> Self {
  44                         self * y + z
  45                     }
  46                 }
  47             };
  48         }
  49
  50         impl_broken!(f32x2);
  51         impl_broken!(f32x4);
  52         impl_broken!(f32x8);
  53         impl_broken!(f32x16);
  54
  55         impl_broken!(f64x2);
  56         impl_broken!(f64x4);
  57         impl_broken!(f64x8);
  58     } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
  59         use sleef_sys::*;
  60         cfg_if! {
  61             if #[cfg(target_feature = "avx2")] {
  62                 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
  63                 impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
  64                 impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
  65
  66                 impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
  67                 impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
  68                 impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
  69                 impl_tertiary!(f64x4: Sleef_fmad4_avx2);
  70             } else if #[cfg(target_feature = "avx")] {
  71                 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
  72                 impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
  73                 impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
  74
  75                 impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
  76                 impl_tertiary!(f32x8: Sleef_fmaf8_avx);
  77                 impl_tertiary!(f64x2: Sleef_fmad2_sse4);
  78                 impl_tertiary!(f64x4: Sleef_fmad4_avx);
  79             } else if #[cfg(target_feature = "sse4.2")] {
  80                 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
  81                 impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
  82                 impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
  83
  84                 impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
  85                 impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
  86                 impl_tertiary!(f64x2: Sleef_fmad2_sse4);
  87                 impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
  88             } else {
  89                 impl_tertiary!(f32x2: fma_v2f32);
  90                 impl_tertiary!(f32x16: fma_v16f32);
  91                 impl_tertiary!(f64x8: fma_v8f64);
  92
  93                 impl_tertiary!(f32x4: fma_v4f32);
  94                 impl_tertiary!(f32x8: fma_v8f32);
  95                 impl_tertiary!(f64x2: fma_v2f64);
  96                 impl_tertiary!(f64x4: fma_v4f64);
  97             }
  98         }
  99     } else {
 100         impl_tertiary!(f32x2: fma_v2f32);
 101         impl_tertiary!(f32x4: fma_v4f32);
 102         impl_tertiary!(f32x8: fma_v8f32);
 103         impl_tertiary!(f32x16: fma_v16f32);
 104         // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
 105         impl_tertiary!(f64x2: fma_v2f64);
 106         impl_tertiary!(f64x4: fma_v4f64);
 107         impl_tertiary!(f64x8: fma_v8f64);
 108     }
 109 }