[rustc.git] / vendor / packed_simd_2 / src / api / reductions / integer_arithmetic.rs

//! Implements portable horizontal integer vector arithmetic reductions.

macro_rules! impl_reduction_integer_arithmetic {
    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
     | $test_tt:tt) => {
        impl $id {
            /// Horizontal wrapping sum of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_sum(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_add_ordered;
                    let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_add(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }

            /// Horizontal wrapping product of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_product(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_mul_ordered;
                    let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_mul(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }
        }

        impl crate::iter::Sum for $id {
            #[inline]
            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), crate::ops::Add::add)
            }
        }

        impl crate::iter::Product for $id {
            #[inline]
            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), crate::ops::Mul::mul)
            }
        }

        impl<'a> crate::iter::Sum<&'a $id> for $id {
            #[inline]
            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
            }
        }

        impl<'a> crate::iter::Product<&'a $id> for $id {
            #[inline]
            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
            }
        }

        test_if! {
            $test_tt:
            paste::item! {
                pub mod [<$id _reduction_int_arith>] {
                    use super::*;

                    fn alternating(x: usize) -> $id {
                        let mut v = $id::splat(1 as $elem_ty);
                        for i in 0..$id::lanes() {
                            if i % x == 0 {
                                v = v.replace(i, 2 as $elem_ty);
                            }
                        }
                        v
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
                        let v = alternating(2);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_sum(),
                                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_sum(),
                                2 as $elem_ty
                            );
                        }
                    }
                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vwrapping_sum = v.wrapping_sum();

                        let mut wrapping_sum = start;
                        for _ in 1..$id::lanes() {
                            wrapping_sum = wrapping_sum.wrapping_add(start);
                        }
                        assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 1 as $elem_ty);
                        let f = match $id::lanes() {
                            64 => 16,
                            32 => 8,
                            16 => 4,
                            _ => 2,
                        };
                        let v = alternating(f);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_product(),
                                (2_usize.pow(($id::lanes() / f) as u32)
                                 as $elem_ty)
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_product(),
                                2 as $elem_ty
                            );
                        }
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vmul = v.wrapping_product();

                        let mut mul = start;
                        for _ in 1..$id::lanes() {
                            mul = mul.wrapping_mul(start);
                        }
                        assert_eq!(mul, vmul, "v = {:?}", v);
                    }
                }
            }
        }
    };
}
Commit	Line	Data
f20569fa XL	1	//! Implements portable horizontal integer vector arithmetic reductions.
	2
	3	macro_rules! impl_reduction_integer_arithmetic {
	4	([$elem_ty:ident; $elem_count:expr]: $id:ident \| $ielem_ty:ident
	5	\| $test_tt:tt) => {
	6	impl $id {
	7	/// Horizontal wrapping sum of the vector elements.
	8	///
	9	/// The intrinsic performs a tree-reduction of the vector elements.
	10	/// That is, for an 8 element vector:
	11	///
	12	/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
	13	///
	14	/// If an operation overflows it returns the mathematical result
	15	/// modulo `2^n` where `n` is the number of times it overflows.
	16	#[inline]
	17	pub fn wrapping_sum(self) -> $elem_ty {
	18	#[cfg(not(target_arch = "aarch64"))]
	19	{
	20	use crate::llvm::simd_reduce_add_ordered;
f25598a0	21	let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
f20569fa XL	22	v as $elem_ty
	23	}
	24	#[cfg(target_arch = "aarch64")]
	25	{
	26	// FIXME: broken on AArch64
	27	// https://github.com/rust-lang-nursery/packed_simd/issues/15
	28	let mut x = self.extract(0) as $elem_ty;
	29	for i in 1..$id::lanes() {
	30	x = x.wrapping_add(self.extract(i) as $elem_ty);
	31	}
	32	x
	33	}
	34	}
	35
	36	/// Horizontal wrapping product of the vector elements.
	37	///
	38	/// The intrinsic performs a tree-reduction of the vector elements.
	39	/// That is, for an 8 element vector:
	40	///
	41	/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
	42	///
	43	/// If an operation overflows it returns the mathematical result
	44	/// modulo `2^n` where `n` is the number of times it overflows.
	45	#[inline]
	46	pub fn wrapping_product(self) -> $elem_ty {
	47	#[cfg(not(target_arch = "aarch64"))]
	48	{
	49	use crate::llvm::simd_reduce_mul_ordered;
f25598a0	50	let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
f20569fa XL	51	v as $elem_ty
	52	}
	53	#[cfg(target_arch = "aarch64")]
	54	{
	55	// FIXME: broken on AArch64
	56	// https://github.com/rust-lang-nursery/packed_simd/issues/15
	57	let mut x = self.extract(0) as $elem_ty;
	58	for i in 1..$id::lanes() {
	59	x = x.wrapping_mul(self.extract(i) as $elem_ty);
	60	}
	61	x
	62	}
	63	}
	64	}
	65
	66	impl crate::iter::Sum for $id {
	67	#[inline]
	68	fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
	69	iter.fold($id::splat(0), crate::ops::Add::add)
	70	}
	71	}
	72
	73	impl crate::iter::Product for $id {
	74	#[inline]
	75	fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
	76	iter.fold($id::splat(1), crate::ops::Mul::mul)
	77	}
	78	}
	79
	80	impl<'a> crate::iter::Sum<&'a $id> for $id {
	81	#[inline]
	82	fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
	83	iter.fold($id::splat(0), \|a, b\| crate::ops::Add::add(a, *b))
	84	}
	85	}
	86
	87	impl<'a> crate::iter::Product<&'a $id> for $id {
	88	#[inline]
	89	fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
	90	iter.fold($id::splat(1), \|a, b\| crate::ops::Mul::mul(a, *b))
	91	}
	92	}
	93
	94	test_if! {
	95	$test_tt:
	96	paste::item! {
	97	pub mod [<$id _reduction_int_arith>] {
	98	use super::*;
	99
	100	fn alternating(x: usize) -> $id {
	101	let mut v = $id::splat(1 as $elem_ty);
	102	for i in 0..$id::lanes() {
	103	if i % x == 0 {
	104	v = v.replace(i, 2 as $elem_ty);
	105	}
	106	}
	107	v
	108	}
	109
	110	#[cfg_attr(not(target_arch = "wasm32"), test)]
	111	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
	112	fn wrapping_sum() {
	113	let v = $id::splat(0 as $elem_ty);
	114	assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
115	let v = $id::splat(1 as $elem_ty);
116	assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
117	let v = alternating(2);
118	if $id::lanes() > 1 {
119	assert_eq!(
120	v.wrapping_sum(),
121	($id::lanes() / 2 + $id::lanes()) as $elem_ty
122	);
123	} else {
124	assert_eq!(
125	v.wrapping_sum(),
126	2 as $elem_ty
127	);
128	}
129	}
130	#[cfg_attr(not(target_arch = "wasm32"), test)]
131	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
132	fn wrapping_sum_overflow() {
133	let start = $elem_ty::max_value()
134	- ($id::lanes() as $elem_ty / 2);
135
136	let v = $id::splat(start as $elem_ty);
137	let vwrapping_sum = v.wrapping_sum();
138
139	let mut wrapping_sum = start;
140	for _ in 1..$id::lanes() {
141	wrapping_sum = wrapping_sum.wrapping_add(start);
142	}
143	assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
144	}
145
146	#[cfg_attr(not(target_arch = "wasm32"), test)]
147	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
148	fn wrapping_product() {
149	let v = $id::splat(0 as $elem_ty);
150	assert_eq!(v.wrapping_product(), 0 as $elem_ty);
151	let v = $id::splat(1 as $elem_ty);
152	assert_eq!(v.wrapping_product(), 1 as $elem_ty);
153	let f = match $id::lanes() {
154	64 => 16,
155	32 => 8,
156	16 => 4,
157	_ => 2,
158	};
159	let v = alternating(f);
160	if $id::lanes() > 1 {
161	assert_eq!(
162	v.wrapping_product(),
163	(2_usize.pow(($id::lanes() / f) as u32)
164	as $elem_ty)
165	);
166	} else {
167	assert_eq!(
168	v.wrapping_product(),
169	2 as $elem_ty
170	);
171	}
172	}
173
174	#[cfg_attr(not(target_arch = "wasm32"), test)]
175	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
176	fn wrapping_product_overflow() {
177	let start = $elem_ty::max_value()
178	- ($id::lanes() as $elem_ty / 2);
179
180	let v = $id::splat(start as $elem_ty);
181	let vmul = v.wrapping_product();
182
183	let mut mul = start;
184	for _ in 1..$id::lanes() {
185	mul = mul.wrapping_mul(start);
186	}
187	assert_eq!(mul, vmul, "v = {:?}", v);
188	}
189	}
190	}
191	}
192	};
193	}