]> git.proxmox.com Git - rustc.git/blob - vendor/packed_simd_2/src/lib.rs
New upstream version 1.53.0+dfsg1
[rustc.git] / vendor / packed_simd_2 / src / lib.rs
1 //! # Portable packed SIMD vectors
2 //!
3 //! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366:
4 //! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) .
5 //!
6 //! The examples available in the
7 //! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples)
8 //! sub-directory of the crate showcase how to use the library in practice.
9 //!
10 //! ## Table of contents
11 //!
12 //! - [Introduction](#introduction)
13 //! - [Vector types](#vector-types)
14 //! - [Conditional operations](#conditional-operations)
15 //! - [Conversions](#conversions)
16 //! - [Performance
17 //! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/)
18 //!
19 //! ## Introduction
20 //!
21 //! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N`
22 //! elements of type `T` as well as many type aliases for this type: for
23 //! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`.
24 //!
25 //! The operations on packed vectors are, by default, "vertical", that is, they
26 //! are applied to each vector lane in isolation of the others:
27 //!
28 //! ```
29 //! # use packed_simd::*;
30 //! let a = i32x4::new(1, 2, 3, 4);
31 //! let b = i32x4::new(5, 6, 7, 8);
32 //! assert_eq!(a + b, i32x4::new(6, 8, 10, 12));
33 //! ```
34 //!
35 //! Many "horizontal" operations are also provided:
36 //!
37 //! ```
38 //! # use packed_simd::*;
39 //! # let a = i32x4::new(1, 2, 3, 4);
40 //! assert_eq!(a.wrapping_sum(), 10);
41 //! ```
42 //!
43 //! In virtually all architectures vertical operations are fast, while
44 //! horizontal operations are, by comparison, much slower. That is, the
45 //! most portably-efficient way of performing a reduction over a slice
46 //! is to collect the results into a vector using vertical operations,
47 //! and performing a single horizontal operation at the end:
48 //!
49 //! ```
50 //! # use packed_simd::*;
51 //! fn reduce(x: &[i32]) -> i32 {
52 //! assert!(x.len() % 4 == 0);
53 //! let mut sum = i32x4::splat(0); // [0, 0, 0, 0]
54 //! for i in (0..x.len()).step_by(4) {
55 //! sum += i32x4::from_slice_unaligned(&x[i..]);
56 //! }
57 //! sum.wrapping_sum()
58 //! }
59 //!
60 //! let x = [0, 1, 2, 3, 4, 5, 6, 7];
61 //! assert_eq!(reduce(&x), 28);
62 //! ```
63 //!
64 //! ## Vector types
65 //!
66 //! The vector type aliases are named according to the following scheme:
67 //!
68 //! > `{element_type}x{number_of_lanes} == Simd<[element_type;
69 //! number_of_lanes]>`
70 //!
71 //! where the following element types are supported:
72 //!
73 //! * `i{element_width}`: signed integer
74 //! * `u{element_width}`: unsigned integer
75 //! * `f{element_width}`: float
76 //! * `m{element_width}`: mask (see below)
77 //! * `*{const,mut} T`: `const` and `mut` pointers
78 //!
79 //! ## Basic operations
80 //!
81 //! ```
82 //! # use packed_simd::*;
83 //! // Sets all elements to `0`:
84 //! let a = i32x4::splat(0);
85 //!
86 //! // Reads a vector from a slice:
87 //! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5];
88 //! let b = i32x4::from_slice_unaligned(&arr);
89 //!
90 //! // Reads the 4-th element of a vector:
91 //! assert_eq!(b.extract(3), 1);
92 //!
93 //! // Returns a new vector where the 4-th element is replaced with `1`:
94 //! let a = a.replace(3, 1);
95 //! assert_eq!(a, b);
96 //!
97 //! // Writes a vector to a slice:
98 //! let a = a.replace(2, 1);
99 //! a.write_to_slice_unaligned(&mut arr[4..]);
100 //! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]);
101 //! ```
102 //!
103 //! ## Conditional operations
104 //!
105 //! One often needs to perform an operation on some lanes of the vector. Vector
106 //! masks, like `m32x4`, allow selecting on which vector lanes an operation is
107 //! to be performed:
108 //!
109 //! ```
110 //! # use packed_simd::*;
111 //! let a = i32x4::new(1, 1, 2, 2);
112 //!
113 //! // Add `1` to the first two lanes of the vector.
114 //! let m = m16x4::new(true, true, false, false);
115 //! let a = m.select(a + 1, a);
116 //! assert_eq!(a, i32x4::splat(2));
117 //! ```
118 //!
119 //! The elements of a vector mask are either `true` or `false`. Here `true`
120 //! means that a lane is "selected", while `false` means that a lane is not
121 //! selected.
122 //!
123 //! All vector masks implement a `mask.select(a: T, b: T) -> T` method that
124 //! works on all vectors that have the same number of lanes as the mask. The
125 //! resulting vector contains the elements of `a` for those lanes for which the
126 //! mask is `true`, and the elements of `b` otherwise.
127 //!
128 //! The example constructs a mask with the first two lanes set to `true` and
129 //! the last two lanes set to `false`. This selects the first two lanes of `a +
130 //! 1` and the last two lanes of `a`, producing a vector where the first two
131 //! lanes have been incremented by `1`.
132 //!
133 //! > note: mask `select` can be used on vector types that have the same number
134 //! > of lanes as the mask. The example shows this by using [`m16x4`] instead
135 //! > of [`m32x4`]. It is _typically_ more performant to use a mask element
136 //! > width equal to the element width of the vectors being operated upon.
137 //! > This is, however, not true for 512-bit wide vectors when targetting
138 //! > AVX-512, where the most efficient masks use only 1-bit per element.
139 //!
140 //! All vertical comparison operations returns masks:
141 //!
142 //! ```
143 //! # use packed_simd::*;
144 //! let a = i32x4::new(1, 1, 3, 3);
145 //! let b = i32x4::new(2, 2, 0, 0);
146 //!
147 //! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne).
148 //! let m = a.ge(i32x4::splat(2));
149 //!
150 //! if m.any() {
151 //! // all / any / none allow coherent control flow
152 //! let d = m.select(a, b);
153 //! assert_eq!(d, i32x4::new(2, 2, 3, 3));
154 //! }
155 //! ```
156 //!
157 //! ## Conversions
158 //!
159 //! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for
160 //! vectors with the same number of lanes when the conversion is value
161 //! preserving (same as in `std`).
162 //!
163 //! * **safe bitwise conversions**: The cargo feature `into_bits` provides the
164 //! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise
165 //! `transmute`s when all bit patterns of the source type are valid bit
166 //! patterns of the target type and are also implemented for the
167 //! architecture-specific vector types of `std::arch`. For example, `let x:
168 //! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit
169 //! patterns are valid `u8x8` bit patterns. However, the opposite is not
170 //! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this
171 //! operation cannot be peformed safely using `x.into_bits()`; one needs to
172 //! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the
173 //! value in the `u8x8` is a valid bit-pattern of `m8x8`.
174 //!
175 //! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`]
176 //! (`x.cast()`), just like `as`:
177 //!
178 //! * casting integer vectors whose lane types have the same size (e.g.
179 //! `i32xN` -> `u32xN`) is a **no-op**,
180 //!
181 //! * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
182 //! `u8xN`) will **truncate**,
183 //!
184 //! * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
185 //! `u32xN`) will:
186 //! * **zero-extend** if the source is unsigned, or
187 //! * **sign-extend** if the source is signed,
188 //!
189 //! * casting from a float to an integer will **round the float towards
190 //! zero**,
191 //!
192 //! * casting from an integer to float will produce the floating point
193 //! representation of the integer, **rounding to nearest, ties to even**,
194 //!
195 //! * casting from an `f32` to an `f64` is perfect and lossless,
196 //!
197 //! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
198 //!
199 //! Numeric casts are not very "precise": sometimes lossy, sometimes value
200 //! preserving, etc.
201
202 #![feature(
203 repr_simd,
204 rustc_attrs,
205 const_fn,
206 platform_intrinsics,
207 stdsimd,
208 aarch64_target_feature,
209 arm_target_feature,
210 link_llvm_intrinsics,
211 core_intrinsics,
212 stmt_expr_attributes,
213 crate_visibility_modifier,
214 custom_inner_attributes,
215 llvm_asm
216 )]
217 #![allow(non_camel_case_types, non_snake_case,
218 // FIXME: these types are unsound in C FFI already
219 // See https://github.com/rust-lang/rust/issues/53346
220 improper_ctypes_definitions,
221 clippy::cast_possible_truncation,
222 clippy::cast_lossless,
223 clippy::cast_possible_wrap,
224 clippy::cast_precision_loss,
225 // TODO: manually add the `#[must_use]` attribute where appropriate
226 clippy::must_use_candidate,
227 // This lint is currently broken for generic code
228 // See https://github.com/rust-lang/rust-clippy/issues/3410
229 clippy::use_self,
230 clippy::wrong_self_convention,
231 )]
232 #![cfg_attr(test, feature(hashmap_internals))]
233 #![deny(rust_2018_idioms, clippy::missing_inline_in_public_items)]
234 #![no_std]
235
236 use cfg_if::cfg_if;
237
238 cfg_if! {
239 if #[cfg(feature = "core_arch")] {
240 #[allow(unused_imports)]
241 use core_arch as arch;
242 } else {
243 #[allow(unused_imports)]
244 use core::arch;
245 }
246 }
247
248 #[cfg(all(target_arch = "wasm32", test))]
249 use wasm_bindgen_test::*;
250
251 #[allow(unused_imports)]
252 use core::{
253 /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128,
254 i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice,
255 u128, u16, u32, u64, u8, usize,
256 };
257
258 #[macro_use]
259 mod testing;
260 #[macro_use]
261 mod api;
262 mod codegen;
263 mod sealed;
264
265 pub use crate::sealed::{Simd as SimdVector, Shuffle, SimdArray, Mask};
266
267 /// Packed SIMD vector type.
268 ///
269 /// # Examples
270 ///
271 /// ```
272 /// # use packed_simd::Simd;
273 /// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3);
274 /// assert_eq!(v.extract(2), 2);
275 /// ```
276 #[repr(transparent)]
277 #[derive(Copy, Clone)]
278 pub struct Simd<A: sealed::SimdArray>(
279 // FIXME: this type should be private,
280 // but it currently must be public for the
281 // `shuffle!` macro to work: it needs to
282 // access the internal `repr(simd)` type
283 // to call the shuffle intrinsics.
284 #[doc(hidden)] pub <A as sealed::SimdArray>::Tuple,
285 );
286
287 impl<A: sealed::SimdArray> sealed::Seal for Simd<A> {}
288
289 /// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd`
290 /// and/or `Ord` traits.
291 #[repr(transparent)]
292 #[derive(Copy, Clone, Debug)]
293 #[allow(clippy::missing_inline_in_public_items)]
294 pub struct LexicographicallyOrdered<T>(T);
295
296 mod masks;
297 pub use self::masks::*;
298
299 mod v16;
300 pub use self::v16::*;
301
302 mod v32;
303 pub use self::v32::*;
304
305 mod v64;
306 pub use self::v64::*;
307
308 mod v128;
309 pub use self::v128::*;
310
311 mod v256;
312 pub use self::v256::*;
313
314 mod v512;
315 pub use self::v512::*;
316
317 mod vSize;
318 pub use self::vSize::*;
319
320 mod vPtr;
321 pub use self::vPtr::*;
322
323 pub use self::api::cast::*;
324
325 #[cfg(feature = "into_bits")]
326 pub use self::api::into_bits::*;
327
328 // Re-export the shuffle intrinsics required by the `shuffle!` macro.
329 #[doc(hidden)]
330 pub use self::codegen::llvm::{
331 __shuffle_vector16, __shuffle_vector2, __shuffle_vector32,
332 __shuffle_vector4, __shuffle_vector64, __shuffle_vector8,
333 };
334
335 crate mod llvm {
336 crate use crate::codegen::llvm::*;
337 }