[rustc.git] / vendor / packed_simd / src / api / reductions / integer_arithmetic.rs

//! Implements portable horizontal integer vector arithmetic reductions.

macro_rules! impl_reduction_integer_arithmetic {
    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
     | $test_tt:tt) => {
        impl $id {
            /// Horizontal wrapping sum of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_sum(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_add_ordered;
                    let v: $ielem_ty = unsafe {
                        simd_reduce_add_ordered(self.0, 0 as $ielem_ty)
                    };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_add(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }

            /// Horizontal wrapping product of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_product(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_mul_ordered;
                    let v: $ielem_ty = unsafe {
                        simd_reduce_mul_ordered(self.0, 1 as $ielem_ty)
                    };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_mul(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }
        }

        impl crate::iter::Sum for $id {
            #[inline]
            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), crate::ops::Add::add)
            }
        }

        impl crate::iter::Product for $id {
            #[inline]
            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), crate::ops::Mul::mul)
            }
        }

        impl<'a> crate::iter::Sum<&'a $id> for $id {
            #[inline]
            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
            }
        }

        impl<'a> crate::iter::Product<&'a $id> for $id {
            #[inline]
            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
            }
        }

        test_if! {
            $test_tt:
            paste::item! {
                pub mod [<$id _reduction_int_arith>] {
                    use super::*;

                    fn alternating(x: usize) -> $id {
                        let mut v = $id::splat(1 as $elem_ty);
                        for i in 0..$id::lanes() {
                            if i % x == 0 {
                                v = v.replace(i, 2 as $elem_ty);
                            }
                        }
                        v
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
                        let v = alternating(2);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_sum(),
                                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_sum(),
                                2 as $elem_ty
                            );
                        }
                    }
                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vwrapping_sum = v.wrapping_sum();

                        let mut wrapping_sum = start;
                        for _ in 1..$id::lanes() {
                            wrapping_sum = wrapping_sum.wrapping_add(start);
                        }
                        assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 1 as $elem_ty);
                        let f = match $id::lanes() {
                            64 => 16,
                            32 => 8,
                            16 => 4,
                            _ => 2,
                        };
                        let v = alternating(f);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_product(),
                                (2_usize.pow(($id::lanes() / f) as u32)
                                 as $elem_ty)
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_product(),
                                2 as $elem_ty
                            );
                        }
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vmul = v.wrapping_product();

                        let mut mul = start;
                        for _ in 1..$id::lanes() {
                            mul = mul.wrapping_mul(start);
                        }
                        assert_eq!(mul, vmul, "v = {:?}", v);
                    }
                }
            }
        }
    };
}
Commit	Line	Data
f20569fa XL	1	//! Implements portable horizontal integer vector arithmetic reductions.
	2
	3	macro_rules! impl_reduction_integer_arithmetic {
	4	([$elem_ty:ident; $elem_count:expr]: $id:ident \| $ielem_ty:ident
	5	\| $test_tt:tt) => {
	6	impl $id {
	7	/// Horizontal wrapping sum of the vector elements.
	8	///
	9	/// The intrinsic performs a tree-reduction of the vector elements.
	10	/// That is, for an 8 element vector:
	11	///
	12	/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
	13	///
	14	/// If an operation overflows it returns the mathematical result
	15	/// modulo `2^n` where `n` is the number of times it overflows.
	16	#[inline]
	17	pub fn wrapping_sum(self) -> $elem_ty {
	18	#[cfg(not(target_arch = "aarch64"))]
	19	{
	20	use crate::llvm::simd_reduce_add_ordered;
	21	let v: $ielem_ty = unsafe {
	22	simd_reduce_add_ordered(self.0, 0 as $ielem_ty)
	23	};
	24	v as $elem_ty
	25	}
	26	#[cfg(target_arch = "aarch64")]
	27	{
	28	// FIXME: broken on AArch64
	29	// https://github.com/rust-lang-nursery/packed_simd/issues/15
	30	let mut x = self.extract(0) as $elem_ty;
	31	for i in 1..$id::lanes() {
	32	x = x.wrapping_add(self.extract(i) as $elem_ty);
	33	}
	34	x
	35	}
	36	}
	37
	38	/// Horizontal wrapping product of the vector elements.
	39	///
	40	/// The intrinsic performs a tree-reduction of the vector elements.
	41	/// That is, for an 8 element vector:
	42	///
	43	/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
	44	///
	45	/// If an operation overflows it returns the mathematical result
	46	/// modulo `2^n` where `n` is the number of times it overflows.
	47	#[inline]
	48	pub fn wrapping_product(self) -> $elem_ty {
	49	#[cfg(not(target_arch = "aarch64"))]
	50	{
	51	use crate::llvm::simd_reduce_mul_ordered;
	52	let v: $ielem_ty = unsafe {
	53	simd_reduce_mul_ordered(self.0, 1 as $ielem_ty)
	54	};
	55	v as $elem_ty
	56	}
	57	#[cfg(target_arch = "aarch64")]
	58	{
	59	// FIXME: broken on AArch64
	60	// https://github.com/rust-lang-nursery/packed_simd/issues/15
	61	let mut x = self.extract(0) as $elem_ty;
	62	for i in 1..$id::lanes() {
	63	x = x.wrapping_mul(self.extract(i) as $elem_ty);
	64	}
65	x
66	}
67	}
68	}
69
70	impl crate::iter::Sum for $id {
71	#[inline]
72	fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
73	iter.fold($id::splat(0), crate::ops::Add::add)
74	}
75	}
76
77	impl crate::iter::Product for $id {
78	#[inline]
79	fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
80	iter.fold($id::splat(1), crate::ops::Mul::mul)
81	}
82	}
83
84	impl<'a> crate::iter::Sum<&'a $id> for $id {
85	#[inline]
86	fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
87	iter.fold($id::splat(0), \|a, b\| crate::ops::Add::add(a, *b))
88	}
89	}
90
91	impl<'a> crate::iter::Product<&'a $id> for $id {
92	#[inline]
93	fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
94	iter.fold($id::splat(1), \|a, b\| crate::ops::Mul::mul(a, *b))
95	}
96	}
97
98	test_if! {
99	$test_tt:
100	paste::item! {
101	pub mod [<$id _reduction_int_arith>] {
102	use super::*;
103
104	fn alternating(x: usize) -> $id {
105	let mut v = $id::splat(1 as $elem_ty);
106	for i in 0..$id::lanes() {
107	if i % x == 0 {
108	v = v.replace(i, 2 as $elem_ty);
109	}
110	}
111	v
112	}
113
114	#[cfg_attr(not(target_arch = "wasm32"), test)]
115	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
116	fn wrapping_sum() {
117	let v = $id::splat(0 as $elem_ty);
118	assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
119	let v = $id::splat(1 as $elem_ty);
120	assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
121	let v = alternating(2);
122	if $id::lanes() > 1 {
123	assert_eq!(
124	v.wrapping_sum(),
125	($id::lanes() / 2 + $id::lanes()) as $elem_ty
126	);
127	} else {
128	assert_eq!(
129	v.wrapping_sum(),
130	2 as $elem_ty
131	);
132	}
133	}
134	#[cfg_attr(not(target_arch = "wasm32"), test)]
135	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
136	fn wrapping_sum_overflow() {
137	let start = $elem_ty::max_value()
138	- ($id::lanes() as $elem_ty / 2);
139
140	let v = $id::splat(start as $elem_ty);
141	let vwrapping_sum = v.wrapping_sum();
142
143	let mut wrapping_sum = start;
144	for _ in 1..$id::lanes() {
145	wrapping_sum = wrapping_sum.wrapping_add(start);
146	}
147	assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
148	}
149
150	#[cfg_attr(not(target_arch = "wasm32"), test)]
151	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
152	fn wrapping_product() {
153	let v = $id::splat(0 as $elem_ty);
154	assert_eq!(v.wrapping_product(), 0 as $elem_ty);
155	let v = $id::splat(1 as $elem_ty);
156	assert_eq!(v.wrapping_product(), 1 as $elem_ty);
157	let f = match $id::lanes() {
158	64 => 16,
159	32 => 8,
160	16 => 4,
161	_ => 2,
162	};
163	let v = alternating(f);
164	if $id::lanes() > 1 {
165	assert_eq!(
166	v.wrapping_product(),
167	(2_usize.pow(($id::lanes() / f) as u32)
168	as $elem_ty)
169	);
170	} else {
171	assert_eq!(
172	v.wrapping_product(),
173	2 as $elem_ty
174	);
175	}
176	}
177
178	#[cfg_attr(not(target_arch = "wasm32"), test)]
179	#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
180	fn wrapping_product_overflow() {
181	let start = $elem_ty::max_value()
182	- ($id::lanes() as $elem_ty / 2);
183
184	let v = $id::splat(start as $elem_ty);
185	let vmul = v.wrapping_product();
186
187	let mut mul = start;
188	for _ in 1..$id::lanes() {
189	mul = mul.wrapping_mul(start);
190	}
191	assert_eq!(mul, vmul, "v = {:?}", v);
192	}
193	}
194	}
195	}
196	};
197	}