// ARM Neon intrinsic specification. // // This file contains the specification for a number of // intrinsics that allows us to generate them along with // their test cases. // // To the syntax of the file - it's not very intelligently parsed! // // # Comments // start with AT LEAST two, or four or more slashes so // is a // comment /////// is too. // // # Sections // Sections start with EXACTLY three slashes followed // by AT LEAST one space. Sections are used for two things: // // 1) they serve as the doc comment for the given intrinics. // 2) they reset all variables (name, fn, etc.) // // # Variables // // name - The prefix of the function, suffixes are auto // generated by the type they get passed. // // fn - The function to call in rust-land. // // aarch64 - The intrinsic to check on aarch64 architecture. // If this is given but no arm intrinsic is provided, // the function will exclusively be generated for // aarch64. // This is used to generate both aarch64 specific and // shared intrinics by first only specifying th aarch64 // variant then the arm variant. // // arm - The arm v7 intrinics used to checked for arm code // generation. All neon functions available in arm are // also available in aarch64. If no aarch64 intrinic was // set they are assumed to be the same. // Intrinics ending with a `.` will have a size suffixes // added (such as `i8` or `i64`) that is not sign specific // Intrinics ending with a `.s` will have a size suffixes // added (such as `s8` or `u64`) that is sign specific // // a - First input for tests, it gets scaled to the size of // the type. // // b - Second input for tests, it gets scaled to the size of // the type. // // # special values // // TRUE - 'true' all bits are set to 1 // FALSE - 'false' all bits are set to 0 // FF - same as 'true' // MIN - minimal value (either 0 or the lowest negative number) // MAX - maximal value proper to overflow // // # validate // Validates a and b aginst the expected result of the test. // The special values 'TRUE' and 'FALSE' can be used to // represent the correct NEON representation of true or // false values. It too gets scaled to the type. // // Validate needs to be called before generate as it sets // up the rules for validation that get generated for each // type. // # generate // The generate command generates the intrinsics, it uses the // Variables set and can be called multiple times while overwriting // some of the variables. /// Vector bitwise and name = vand fn = simd_and arm = vand aarch64 = and a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 generate int*_t, uint*_t, int64x*_t, uint64x*_t /// Vector bitwise or (immediate, inclusive) name = vorr fn = simd_or arm = vorr aarch64 = orr a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F generate int*_t, uint*_t, int64x*_t, uint64x*_t /// Vector bitwise exclusive or (vector) name = veor fn = simd_xor arm = veor aarch64 = eor a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F generate int*_t, uint*_t, int64x*_t, uint64x*_t //////////////////// // Absolute difference between the arguments //////////////////// /// Absolute difference between the arguments name = vabd a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 validate 15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15 arm = vabd.s aarch64 = sabd link-arm = vabds._EXT_ link-aarch64 = sabd._EXT_ generate int*_t arm = vabd.s aarch64 = uabd link-arm = vabdu._EXT_ link-aarch64 = uabd._EXT_ generate uint*_t /// Absolute difference between the arguments of Floating name = vabd a = 1.0, 2.0, 5.0, -4.0 b = 9.0, 3.0, 2.0, 8.0 validate 8.0, 1.0, 3.0, 12.0 aarch64 = fabd link-aarch64 = fabd._EXT_ generate float64x*_t arm = vabd.s aarch64 = fabd link-arm = vabds._EXT_ link-aarch64 = fabd._EXT_ generate float*_t //////////////////// // Absolute difference Long //////////////////// /// Unsigned Absolute difference Long name = vabdl multi_fn = simd_cast, {vabd-unsigned-noext, a, b} a = 1, 2, 3, 4, 4, 3, 2, 1 b = 10, 10, 10, 10, 10, 10, 10, 10 validate 9, 8, 7, 6, 6, 7, 8, 9 arm = vabdl.s aarch64 = uabdl generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t /// Signed Absolute difference Long name = vabdl multi_fn = simd_cast, c:uint8x8_t, {vabd-signed-noext, a, b} multi_fn = simd_cast, c a = 1, 2, 3, 4, 4, 3, 2, 1 b = 10, 10, 10, 10, 10, 10, 10, 10 validate 9, 8, 7, 6, 6, 7, 8, 9 arm = vabdl.s aarch64 = sabdl generate int8x8_t:int8x8_t:int16x8_t /// Signed Absolute difference Long name = vabdl multi_fn = simd_cast, c:uint16x4_t, {vabd-signed-noext, a, b} multi_fn = simd_cast, c a = 1, 2, 11, 12 b = 10, 10, 10, 10 validate 9, 8, 1, 2 arm = vabdl.s aarch64 = sabdl generate int16x4_t:int16x4_t:int32x4_t /// Signed Absolute difference Long name = vabdl multi_fn = simd_cast, c:uint32x2_t, {vabd-signed-noext, a, b} multi_fn = simd_cast, c a = 1, 11 b = 10, 10 validate 9, 1 arm = vabdl.s aarch64 = sabdl generate int32x2_t:int32x2_t:int64x2_t /// Unsigned Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, {vabd_u8, c, d} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 validate 1, 0, 1, 2, 3, 4, 5, 6 aarch64 = uabdl generate uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, {vabd_u16, c, d} a = 1, 2, 3, 4, 8, 9, 11, 12 b = 10, 10, 10, 10, 10, 10, 10, 10 validate 2, 1, 1, 2 aarch64 = uabdl generate uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3] multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3] multi_fn = simd_cast, {vabd_u32, c, d} a = 1, 2, 3, 4 b = 10, 10, 10, 10 validate 7, 6 aarch64 = uabdl generate uint32x4_t:uint32x4_t:uint64x2_t /// Signed Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 validate 1, 0, 1, 2, 3, 4, 5, 6 aarch64 = sabdl generate int8x16_t:int8x16_t:int16x8_t /// Signed Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4, 9, 10, 11, 12 b = 10, 10, 10, 10, 10, 10, 10, 10 validate 1, 0, 1, 2 aarch64 = sabdl generate int16x8_t:int16x8_t:int32x4_t /// Signed Absolute difference Long name = vabdl_high no-q multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3] multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3] multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4 b = 10, 10, 10, 10 validate 7, 6 aarch64 = sabdl generate int32x4_t:int32x4_t:int64x2_t //////////////////// // equality //////////////////// /// Compare bitwise Equal (vector) name = vceq fn = simd_eq a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE aarch64 = cmeq generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t arm = vceq. generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t /// Floating-point compare equal name = vceq fn = simd_eq a = 1.2, 3.4, 5.6, 7.8 b = 1.2, 3.4, 5.6, 7.8 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmeq generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vceq. // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Signed compare bitwise equal to zero name = vceqz fn = simd_eq a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = cmeq generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t /// Unsigned compare bitwise equal to zero name = vceqz fn = simd_eq a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = cmeq generate uint*_t, uint64x*_t /// Floating-point compare bitwise equal to zero name = vceqz fn = simd_eq a = 0.0, 1.2, 3.4, 5.6 fixed = 0.0, 0.0, 0.0, 0.0 validate TRUE, FALSE, FALSE, FALSE aarch64 = fcmeq generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Signed compare bitwise Test bits nonzero name = vtst multi_fn = simd_and, c:in_t, a, b multi_fn = fixed, d:in_t multi_fn = simd_ne, c, transmute(d) a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmtst generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t arm = vtst generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t /// Unsigned compare bitwise Test bits nonzero name = vtst multi_fn = simd_and, c:in_t, a, b multi_fn = fixed, d:in_t multi_fn = simd_ne, c, transmute(d) a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmtst generate uint64x*_t arm = vtst generate uint*_t //////////////////// // Floating-point absolute value //////////////////// /// Floating-point absolute value name = vabs fn = simd_fabs a = -0.1, -2.2, -3.3, -6.6 validate 0.1, 2.2, 3.3, 6.6 aarch64 = fabs generate float64x1_t:float64x1_t, float64x2_t:float64x2_t arm = vabs generate float32x2_t:float32x2_t, float32x4_t:float32x4_t //////////////////// // greater then //////////////////// /// Compare signed greater than name = vcgt fn = simd_gt a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmgt generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t arm = vcgt.s generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t /// Compare unsigned highe name = vcgt fn = simd_gt a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmhi generate uint64x*_t arm = vcgt.s generate uint*_t /// Floating-point compare greater than name = vcgt fn = simd_gt a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmgt generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vcgt.s // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t //////////////////// // lesser then //////////////////// /// Compare signed less than name = vclt fn = simd_lt a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmgt generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t arm = vcgt.s generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t /// Compare unsigned less than name = vclt fn = simd_lt a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmhi generate uint64x*_t arm = vcgt.s generate uint*_t /// Floating-point compare less than name = vclt fn = simd_lt a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmgt generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vcgt.s // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t //////////////////// // lesser then equals //////////////////// /// Compare signed less than or equal name = vcle fn = simd_le a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmge generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t arm = vcge.s generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t /// Compare unsigned less than or equal name = vcle fn = simd_le a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmhs generate uint64x*_t arm = vcge.s generate uint*_t /// Floating-point compare less than or equal name = vcle fn = simd_le a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmge generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t arm = vcge.s generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t //////////////////// // greater then equals //////////////////// /// Compare signed greater than or equal name = vcge fn = simd_ge a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmge generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t arm = vcge.s generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t /// Compare unsigned greater than or equal name = vcge fn = simd_ge a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmhs generate uint64x*_t arm = vcge.s generate uint*_t /// Floating-point compare greater than or equal name = vcge fn = simd_ge a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmge generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vcge.s // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Compare signed greater than or equal to zero name = vcgez fn = simd_ge a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmge generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t /// Floating-point compare greater than or equal to zero name = vcgez fn = simd_ge a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmge generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Compare signed greater than zero name = vcgtz fn = simd_gt a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = cmgt generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t /// Floating-point compare greater than zero name = vcgtz fn = simd_gt a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE aarch64 = fcmgt generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Compare signed less than or equal to zero name = vclez fn = simd_le a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = cmgt generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t /// Floating-point compare less than or equal to zero name = vclez fn = simd_le a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = fcmle generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Compare signed less than zero name = vcltz fn = simd_lt a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = sshr generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t /// Floating-point compare less than zero name = vcltz fn = simd_lt a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = fcmlt generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Count leading sign bits name = vcls a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0 arm = vcls.s aarch64 = cls link-arm = vcls._EXT_ link-aarch64 = cls._EXT_ generate int*_t /// Signed count leading sign bits name = vclz multi_fn = self-signed-ext, a a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1 arm = vclz. aarch64 = clz generate int*_t /// Unsigned count leading sign bits name = vclz multi_fn = transmute, {self-signed-ext, transmute(a)} a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0 arm = vclz. aarch64 = clz generate uint*_t /// Floating-point absolute compare greater than name = vcagt a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE aarch64 = facgt link-aarch64 = facgt._EXT2_._EXT_ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacgt.s link-arm = vacgt._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Floating-point absolute compare greater than or equal name = vcage a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 validate TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE aarch64 = facge link-aarch64 = facge._EXT2_._EXT_ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacge.s link-arm = vacge._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Floating-point absolute compare less than name = vcalt multi_fn = vcagt-self-noext, b, a a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 validate FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE aarch64 = facgt generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacgt.s generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Floating-point absolute compare less than or equal name = vcale multi_fn = vcage-self-noext , b, a a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 validate FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE aarch64 = facge generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacge.s generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = mov generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x2_t generate uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x2_t generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1., 2., 3., 4. b = 0., 0.5, 0., 0. n = 0:1 validate 0.5, 2., 3., 4. aarch64 = mov generate float32x2_t, float32x4_t, float64x2_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len} multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = mov generate int8x8_t:int8x16_t:int8x8_t, int16x4_t:int16x8_t:int16x4_t, int32x2_t:int32x4_t:int32x2_t generate uint8x8_t:uint8x16_t:uint8x8_t, uint16x4_t:uint16x8_t:uint16x4_t, uint32x2_t:uint32x4_t:uint32x2_t generate poly8x8_t:poly8x16_t:poly8x8_t, poly16x4_t:poly16x8_t:poly16x4_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len} multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2} a = 1., 2., 3., 4. b = 0., 0.5, 0., 0. n = 0:1 validate 0.5, 2., 3., 4. aarch64 = mov generate float32x2_t:float32x4_t:float32x2_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = mov generate int8x16_t:int8x8_t:int8x16_t, int16x8_t:int16x4_t:int16x8_t, int32x4_t:int32x2_t:int32x4_t generate uint8x16_t:uint8x8_t:uint8x16_t, uint16x8_t:uint16x4_t:uint16x8_t, uint32x4_t:uint32x2_t:uint32x4_t generate poly8x16_t:poly8x8_t:poly8x16_t, poly16x8_t:poly16x4_t:poly16x8_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1:0 validate 1, MAX aarch64 = zip1 generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t /// Insert vector element from another vector element name = vcopy lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1., 2., 3., 4. b = 0.5, 0., 0., 0. n = 1:0 validate 1., 0.5, 3., 4. aarch64 = mov generate float32x4_t:float32x2_t:float32x4_t aarch64 = zip1 generate float64x2_t:float64x1_t:float64x2_t /// Insert vector element from another vector element name = vcreate out-suffix multi_fn = transmute, a a = 1 validate 1, 0, 0, 0, 0, 0, 0, 0 aarch64 = nop arm = nop generate u64:int8x8_t, u64:int16x4_t: u64:int32x2_t, u64:int64x1_t generate u64:uint8x8_t, u64:uint16x4_t: u64:uint32x2_t, u64:uint64x1_t generate u64:poly8x8_t, u64:poly16x4_t target = crypto generate u64:poly64x1_t /// Insert vector element from another vector element name = vcreate out-suffix multi_fn = transmute, a a = 0 validate 0., 0. aarch64 = nop generate u64:float64x1_t arm = nop generate u64:float32x2_t /// Fixed-point convert to floating-point name = vcvt double-suffixes fn = simd_cast a = 1, 2, 3, 4 validate 1., 2., 3., 4. aarch64 = scvtf generate int64x1_t:float64x1_t, int64x2_t:float64x2_t aarch64 = ucvtf generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t arm = vcvt aarch64 = scvtf generate int32x2_t:float32x2_t, int32x4_t:float32x4_t aarch64 = ucvtf generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t /// Floating-point convert to higher precision long name = vcvt double-suffixes fn = simd_cast a = -1.2, 1.2 validate -1.2f32 as f64, 1.2f32 as f64 aarch64 = fcvtl generate float32x2_t:float64x2_t /// Floating-point convert to higher precision long name = vcvt_high noq-double-suffixes multi_fn = simd_shuffle2!, b:float32x2_t, a, a, [2, 3] multi_fn = simd_cast, b a = -1.2, 1.2, 2.3, 3.4 validate 2.3f32 as f64, 3.4f32 as f64 aarch64 = fcvtl generate float32x4_t:float64x2_t /// Floating-point convert to lower precision narrow name = vcvt double-suffixes fn = simd_cast a = -1.2, 1.2 validate -1.2f64 as f32, 1.2f64 as f32 aarch64 = fcvtn generate float64x2_t:float32x2_t /// Floating-point convert to lower precision narrow name = vcvt_high noq-double-suffixes multi_fn = simd_shuffle4!, a, {simd_cast, b}, [0, 1, 2, 3] a = -1.2, 1.2 b = -2.3, 3.4 validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32 aarch64 = fcvtn generate float32x2_t:float64x2_t:float32x4_t /// Floating-point convert to lower precision narrow, rounding to odd name = vcvtx double-suffixes a = -1.0, 2.0 validate -1.0, 2.0 aarch64 = fcvtxn link-aarch64 = fcvtxn._EXT2_._EXT_ generate float64x2_t:float32x2_t /// Floating-point convert to lower precision narrow, rounding to odd name = vcvtx_high noq-double-suffixes multi_fn = simd_shuffle4!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3] a = -1.0, 2.0 b = -3.0, 4.0 validate -1.0, 2.0, -3.0, 4.0 aarch64 = fcvtxn generate float32x2_t:float64x2_t:float32x4_t /// Fixed-point convert to floating-point name = vcvt double-n-suffixes constn = N multi_fn = static_assert-N-1-bits a = 1, 2, 3, 4 n = 2 validate 0.25, 0.5, 0.75, 1. aarch64 = scvtf link-aarch64 = vcvtfxs2fp._EXT2_._EXT_ const-aarch64 = N generate int64x1_t:float64x1_t, int64x2_t:float64x2_t, i32:f32, i64:f64 aarch64 = ucvtf link-aarch64 = vcvtfxu2fp._EXT2_._EXT_ const-aarch64 = N generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t, u32:f32, u64:f64 aarch64 = scvtf link-aarch64 = vcvtfxs2fp._EXT2_._EXT_ arm = vcvt link-arm = vcvtfxs2fp._EXT2_._EXT_ const-arm = N:i32 generate int32x2_t:float32x2_t, int32x4_t:float32x4_t aarch64 = ucvtf link-aarch64 = vcvtfxu2fp._EXT2_._EXT_ arm = vcvt link-arm = vcvtfxu2fp._EXT2_._EXT_ const-arm = N:i32 generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t /// Floating-point convert to fixed-point, rounding toward zero name = vcvt double-n-suffixes constn = N multi_fn = static_assert-N-1-bits a = 0.25, 0.5, 0.75, 1. n = 2 validate 1, 2, 3, 4 aarch64 = fcvtzs link-aarch64 = vcvtfp2fxs._EXT2_._EXT_ const-aarch64 = N generate float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64 aarch64 = fcvtzu link-aarch64 = vcvtfp2fxu._EXT2_._EXT_ const-aarch64 = N generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64 aarch64 = fcvtzs link-aarch64 = vcvtfp2fxs._EXT2_._EXT_ arm = vcvt link-arm = vcvtfp2fxs._EXT2_._EXT_ const-arm = N:i32 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t aarch64 = fcvtzu link-aarch64 = vcvtfp2fxu._EXT2_._EXT_ arm = vcvt link-arm = vcvtfp2fxu._EXT2_._EXT_ const-arm = N:i32 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Fixed-point convert to floating-point name = vcvt double-suffixes multi_fn = a as out_t a = 1 validate 1. aarch64 = scvtf generate i32:f32, i64:f64 aarch64 = ucvtf generate u32:f32, u64:f64 /// Fixed-point convert to floating-point name = vcvt double-suffixes multi_fn = a as out_t a = 1. validate 1 aarch64 = fcvtzs generate f32:i32, f64:i64 aarch64 = fcvtzu generate f32:u32, f64:u64 /// Floating-point convert to signed fixed-point, rounding toward zero name = vcvt double-suffixes fn = simd_cast a = -1.1, 2.1, -2.9, 3.9 validate -1, 2, -2, 3 aarch64 = fcvtzs generate float64x1_t:int64x1_t, float64x2_t:int64x2_t arm = vcvt generate float32x2_t:int32x2_t, float32x4_t:int32x4_t /// Floating-point convert to unsigned fixed-point, rounding toward zero name = vcvt double-suffixes fn = simd_cast a = 1.1, 2.1, 2.9, 3.9 validate 1, 2, 2, 3 aarch64 = fcvtzu generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vcvt generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t /// Floating-point convert to signed integer, rounding to nearest with ties to away name = vcvta double-suffixes a = -1.1, 2.1, -2.9, 3.9 validate -1, 2, -3, 4 aarch64 = fcvtas link-aarch64 = fcvtas._EXT2_._EXT_ generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t /// Floating-point convert to integer, rounding to nearest with ties to away name = vcvta double-suffixes a = 2.9 validate 3 aarch64 = fcvtas link-aarch64 = fcvtas._EXT2_._EXT_ generate f32:i32, f64:i64 aarch64 = fcvtau link-aarch64 = fcvtau._EXT2_._EXT_ generate f32:u32, f64:u64 /// Floating-point convert to signed integer, rounding to nearest with ties to even name = vcvtn double-suffixes a = -1.5, 2.1, -2.9, 3.9 validate -2, 2, -3, 4 aarch64 = fcvtns link-aarch64 = fcvtns._EXT2_._EXT_ generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64 /// Floating-point convert to signed integer, rounding toward minus infinity name = vcvtm double-suffixes a = -1.1, 2.1, -2.9, 3.9 validate -2, 2, -3, 3 aarch64 = fcvtms link-aarch64 = fcvtms._EXT2_._EXT_ generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64 /// Floating-point convert to signed integer, rounding toward plus infinity name = vcvtp double-suffixes a = -1.1, 2.1, -2.9, 3.9 validate -1, 3, -2, 4 aarch64 = fcvtps link-aarch64 = fcvtps._EXT2_._EXT_ generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64 /// Floating-point convert to unsigned integer, rounding to nearest with ties to away name = vcvta double-suffixes a = 1.1, 2.1, 2.9, 3.9 validate 1, 2, 3, 4 aarch64 = fcvtau link-aarch64 = fcvtau._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t /// Floating-point convert to unsigned integer, rounding to nearest with ties to even name = vcvtn double-suffixes a = 1.5, 2.1, 2.9, 3.9 validate 2, 2, 3, 4 aarch64 = fcvtnu link-aarch64 = fcvtnu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64 /// Floating-point convert to unsigned integer, rounding toward minus infinity name = vcvtm double-suffixes a = 1.1, 2.1, 2.9, 3.9 validate 1, 2, 2, 3 aarch64 = fcvtmu link-aarch64 = fcvtmu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64 /// Floating-point convert to unsigned integer, rounding toward plus infinity name = vcvtp double-suffixes a = 1.1, 2.1, 2.9, 3.9 validate 2, 3, 3, 4 aarch64 = fcvtpu link-aarch64 = fcvtpu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64 /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 aarch64 = dup generate poly64x2_t, poly64x1_t:poly64x2_t arm = vdup.l generate int*_t generate int8x16_t:int8x8_t, int16x8_t:int16x4_t, int32x4_t:int32x2_t generate int8x8_t:int8x16_t, int16x4_t:int16x8_t, int32x2_t:int32x4_t generate uint*_t generate uint8x16_t:uint8x8_t, uint16x8_t:uint16x4_t, uint32x4_t:uint32x2_t generate uint8x8_t:uint8x16_t, uint16x4_t:uint16x8_t, uint32x2_t:uint32x4_t generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t generate poly8x16_t:poly8x8_t, poly16x8_t:poly16x4_t generate poly8x8_t:poly8x16_t, poly16x4_t:poly16x8_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 aarch64 = dup arm = vmov generate int64x2_t, int64x1_t:int64x2_t, uint64x2_t, uint64x1_t:uint64x2_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1., 1., 1., 4. n = HFLEN validate 1., 1., 1., 1. aarch64 = dup generate float64x2_t, float64x1_t:float64x2_t arm = vdup.l generate float*_t, float32x4_t:float32x2_t, float32x2_t:float32x4_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = a a = 0 n = HFLEN validate 0 aarch64 = nop generate poly64x1_t arm = nop generate int64x1_t, uint64x1_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = a a = 0. n = HFLEN validate 0. aarch64 = nop generate float64x1_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = transmute--, {simd_extract, a, N as u32} a = 0, 1 n = HFLEN validate 1 aarch64 = nop generate poly64x2_t:poly64x1_t arm = vmov generate int64x2_t:int64x1_t, uint64x2_t:uint64x1_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = transmute--, {simd_extract, a, N as u32} a = 0., 1. n = HFLEN validate 1. aarch64 = nop generate float64x2_t:float64x1_t /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, a, N as u32 a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1 aarch64 = nop generate int8x8_t:i8, int8x16_t:i8, int16x4_t:i16, int16x8_t:i16, int32x2_t:i32, int32x4_t:i32, int64x1_t:i64, int64x2_t:i64 generate uint8x8_t:u8, uint8x16_t:u8, uint16x4_t:u16, uint16x8_t:u16, uint32x2_t:u32, uint32x4_t:u32, uint64x1_t:u64, uint64x2_t:u64 generate poly8x8_t:p8, poly8x16_t:p8, poly16x4_t:p16, poly16x8_t:p16 /// Set all vector lanes to the same value name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, a, N as u32 a = 1., 1., 1., 4. n = HFLEN validate 1. aarch64 = nop generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64 /// Extract vector from pair of vectors name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19 arm = "vext.8" aarch64 = ext generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t /// Extract vector from pair of vectors name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19 aarch64 = ext generate poly64x2_t arm = vmov generate int64x2_t, uint64x2_t /// Extract vector from pair of vectors name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0., 2., 2., 3. b = 3., 4., 5., 6., n = HFLEN validate 2., 3., 3., 4. aarch64 = ext generate float64x2_t arm = "vext.8" generate float*_t /// Multiply-add to accumulator name = vmla multi_fn = simd_add, a, {simd_mul, b, c} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 arm = vmla. aarch64 = mla generate int*_t, uint*_t /// Floating-point multiply-add to accumulator name = vmla multi_fn = simd_add, a, {simd_mul, b, c} a = 0., 1., 2., 3. b = 2., 2., 2., 2. c = 3., 3., 3., 3. validate 6., 7., 8., 9. aarch64 = fmul generate float64x*_t arm = vmla. generate float*_t /// Vector multiply accumulate with scalar name = vmla n-suffix multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 aarch64 = mla arm = vmla. generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t /// Vector multiply accumulate with scalar name = vmla n-suffix multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} a = 0., 1., 2., 3. b = 2., 2., 2., 2. c = 3. validate 6., 7., 8., 9. aarch64 = fmul arm = vmla. generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t /// Vector multiply accumulate with scalar name = vmla in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 aarch64 = mla arm = vmla. generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t /// Vector multiply accumulate with scalar name = vmla in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0., 1., 2., 3. b = 2., 2., 2., 2. c = 0., 3., 0., 0. n = 1 validate 6., 7., 8., 9. aarch64 = fmul arm = vmla. generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t /// Signed multiply-add long name = vmlal multi_fn = simd_add, a, {vmull-self-noext, b, c} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 arm = vmlal.s aarch64 = smlal generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Unsigned multiply-add long name = vmlal multi_fn = simd_add, a, {vmull-self-noext, b, c} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 arm = vmlal.s aarch64 = umlal generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t /// Vector widening multiply accumulate with scalar name = vmlal n-suffix multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 arm = vmlal.s aarch64 = smlal generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t aarch64 = umlal generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t /// Vector widening multiply accumulate with scalar name = vmlal_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 arm = vmlal.s aarch64 = smlal generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t aarch64 = umlal generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t /// Signed multiply-add long name = vmlal_high no-q multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlal-noqself-noext, a, b, c a = 8, 7, 6, 5, 4, 3, 2, 1 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = smlal2 generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Unsigned multiply-add long name = vmlal_high no-q multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlal-noqself-noext, a, b, c a = 8, 7, 6, 5, 4, 3, 2, 1 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = umlal2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t /// Multiply-add long name = vmlal_high_n no-q multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} a = 8, 7, 6, 5, 4, 3, 2, 1 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 2 validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = smlal2 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t aarch64 = umlal2 generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t /// Multiply-add long name = vmlal_high_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 8, 7, 6, 5, 4, 3, 2, 1 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = smlal2 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t aarch64 = umlal2 generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t /// Multiply-subtract from accumulator name = vmls multi_fn = simd_sub, a, {simd_mul, b, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 arm = vmls. aarch64 = mls generate int*_t, uint*_t /// Floating-point multiply-subtract from accumulator name = vmls multi_fn = simd_sub, a, {simd_mul, b, c} a = 6., 7., 8., 9. b = 2., 2., 2., 2. c = 3., 3., 3., 3. validate 0., 1., 2., 3. aarch64 = fmul generate float64x*_t arm = vmls. generate float*_t /// Vector multiply subtract with scalar name = vmls n-suffix multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = mls arm = vmls. generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t /// Vector multiply subtract with scalar name = vmls n-suffix multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} a = 6., 7., 8., 9. b = 2., 2., 2., 2. c = 3. validate 0., 1., 2., 3. aarch64 = fmul arm = vmls. generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t /// Vector multiply subtract with scalar name = vmls in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = mls arm = vmls. generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t /// Vector multiply subtract with scalar name = vmls in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6., 7., 8., 9. b = 2., 2., 2., 2. c = 0., 3., 0., 0. n = 1 validate 0., 1., 2., 3. aarch64 = fmul arm = vmls. generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t /// Signed multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 arm = vmlsl.s aarch64 = smlsl generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Unsigned multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 arm = vmlsl.s aarch64 = umlsl generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t /// Vector widening multiply subtract with scalar name = vmlsl n-suffix multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 arm = vmlsl.s aarch64 = smlsl generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t aarch64 = umlsl generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t /// Vector widening multiply subtract with scalar name = vmlsl_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 arm = vmlsl.s aarch64 = smlsl generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t aarch64 = umlsl generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t /// Signed multiply-subtract long name = vmlsl_high no-q multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlsl-noqself-noext, a, b, c a = 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = smlsl2 generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Unsigned multiply-subtract long name = vmlsl_high no-q multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlsl-noqself-noext, a, b, c a = 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = umlsl2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t /// Multiply-subtract long name = vmlsl_high_n no-q multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} a = 14, 15, 16, 17, 18, 19, 20, 21 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 2 validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = smlsl2 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t aarch64 = umlsl2 generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t /// Multiply-subtract long name = vmlsl_high_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 14, 15, 16, 17, 18, 19, 20, 21 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = smlsl2 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t aarch64 = umlsl2 generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t /// Extract narrow name = vmovn_high no-q multi_fn = simd_cast, c:in_t0, b multi_fn = simd_shuffle-out_len-!, a, c, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 4, 5 b = 2, 3, 4, 5, 12, 13, 14, 15 validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15 aarch64 = xtn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Negate name = vneg fn = simd_neg a = 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8 validate 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8 aarch64 = neg generate int64x*_t arm = vneg.s generate int*_t /// Negate name = vneg fn = simd_neg a = 0., 1., -1., 2., -2., 3., -3., 4. validate 0., -1., 1., -2., 2., -3., 3., -4. aarch64 = fneg generate float64x*_t arm = vneg.s generate float*_t /// Signed saturating negate name = vqneg a = MIN, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7 validate MAX, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 link-arm = vqneg._EXT_ link-aarch64 = sqneg._EXT_ aarch64 = sqneg generate int64x*_t arm = vqneg.s generate int*_t /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26 arm = vqsub.s aarch64 = uqsub link-arm = llvm.usub.sat._EXT_ link-aarch64 = uqsub._EXT_ generate uint*_t, uint64x*_t arm = vqsub.s aarch64 = sqsub link-arm = llvm.ssub.sat._EXT_ link-aarch64 = sqsub._EXT_ generate int*_t, int64x*_t /// Saturating subtract name = vqsub multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqsub-in_ntt-noext, a, b}, 0 a = 42 b = 1 validate 41 aarch64 = sqsub generate i8, i16 aarch64 = uqsub generate u8, u16 /// Saturating subtract name = vqsub a = 42 b = 1 validate 41 aarch64 = uqsub link-aarch64 = uqsub._EXT_ generate u32, u64 aarch64 = sqsub link-aarch64 = sqsub._EXT_ generate i32, i64 /// Halving add name = vhadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29 arm = vhadd.s aarch64 = uhadd link-aarch64 = uhadd._EXT_ link-arm = vhaddu._EXT_ generate uint*_t arm = vhadd.s aarch64 = shadd link-aarch64 = shadd._EXT_ link-arm = vhadds._EXT_ generate int*_t /// Reverse bit order name = vrbit a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120 aarch64 = rbit link-aarch64 = rbit._EXT_ generate int8x8_t, int8x16_t /// Reverse bit order name = vrbit multi_fn = transmute, {vrbit-signed-noext, transmute(a)} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120 aarch64 = rbit generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t /// Rounding halving add name = vrhadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29 arm = vrhadd.s aarch64 = urhadd link-arm = vrhaddu._EXT_ link-aarch64 = urhadd._EXT_ generate uint*_t arm = vrhadd.s aarch64 = srhadd link-arm = vrhadds._EXT_ link-aarch64 = srhadd._EXT_ generate int*_t /// Floating-point round to integral exact, using current rounding mode name = vrndx a = -1.5, 0.5, 1.5, 2.5 validate -2.0, 0.0, 2.0, 2.0 aarch64 = frintx link-aarch64 = llvm.rint._EXT_ generate float*_t, float64x*_t /// Floating-point round to integral, to nearest with ties to away name = vrnda a = -1.5, 0.5, 1.5, 2.5 validate -2.0, 1.0, 2.0, 3.0 aarch64 = frinta link-aarch64 = llvm.round._EXT_ generate float*_t, float64x*_t /// Floating-point round to integral, to nearest with ties to even name = vrndn a = -1.5, 0.5, 1.5, 2.5 validate -2.0, 0.0, 2.0, 2.0 link-aarch64 = frintn._EXT_ aarch64 = frintn generate float64x*_t target = fp-armv8 arm = vrintn link-arm = vrintn._EXT_ generate float*_t /// Floating-point round to integral, toward minus infinity name = vrndm a = -1.5, 0.5, 1.5, 2.5 validate -2.0, 0.0, 1.0, 2.0 aarch64 = frintm link-aarch64 = llvm.floor._EXT_ generate float*_t, float64x*_t /// Floating-point round to integral, toward plus infinity name = vrndp a = -1.5, 0.5, 1.5, 2.5 validate -1.0, 1.0, 2.0, 3.0 aarch64 = frintp link-aarch64 = llvm.ceil._EXT_ generate float*_t, float64x*_t /// Floating-point round to integral, toward zero name = vrnd a = -1.5, 0.5, 1.5, 2.5 validate -1.0, 0.0, 1.0, 2.0 aarch64 = frintz link-aarch64 = llvm.trunc._EXT_ generate float*_t, float64x*_t /// Floating-point round to integral, using current rounding mode name = vrndi a = -1.5, 0.5, 1.5, 2.5 validate -2.0, 0.0, 2.0, 2.0 aarch64 = frinti link-aarch64 = llvm.nearbyint._EXT_ generate float*_t, float64x*_t /// Saturating add name = vqadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 arm = vqadd.s aarch64 = uqadd link-arm = llvm.uadd.sat._EXT_ link-aarch64 = uqadd._EXT_ generate uint*_t, uint64x*_t arm = vqadd.s aarch64 = sqadd link-arm = llvm.sadd.sat._EXT_ link-aarch64 = sqadd._EXT_ generate int*_t, int64x*_t /// Saturating add name = vqadd multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqadd-in_ntt-noext, a, b}, 0 a = 42 b = 1 validate 43 aarch64 = sqadd generate i8, i16 aarch64 = uqadd generate u8, u16 /// Saturating add name = vqadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 aarch64 = uqadd link-aarch64 = uqadd._EXT_ generate u32, u64 aarch64 = sqadd link-aarch64 = sqadd._EXT_ generate i32, i64 /// Multiply name = vmul a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32 arm = vmul. aarch64 = mul fn = simd_mul generate int*_t, uint*_t /// Polynomial multiply name = vmul a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48 aarch64 = pmul link-aarch64 = pmul._EXT_ arm = vmul link-arm = vmulp._EXT_ generate poly8x8_t, poly8x16_t /// Multiply name = vmul fn = simd_mul a = 1.0, 2.0, 1.0, 2.0 b = 2.0, 3.0, 4.0, 5.0 validate 2.0, 6.0, 4.0, 10.0 aarch64 = fmul generate float64x*_t arm = vmul. generate float*_t /// Vector multiply by scalar name = vmul out-n-suffix multi_fn = simd_mul, a, {vdup-nout-noext, b} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32 arm = vmul aarch64 = mul generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t /// Vector multiply by scalar name = vmul out-n-suffix multi_fn = simd_mul, a, {vdup-nout-noext, b} a = 1., 2., 3., 4. b = 2. validate 2., 4., 6., 8. aarch64 = fmul generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t arm = vmul generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t /// Multiply name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32 aarch64 = mul arm = vmul generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t /// Floating-point multiply name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_mul, a, {transmute--, {simd_extract, b, LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 validate 2., 4., 6., 8. aarch64 = fmul generate float64x1_t, float64x1_t:float64x2_t:float64x1_t /// Floating-point multiply name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 validate 2., 4., 6., 8. aarch64 = fmul generate float64x2_t:float64x1_t:float64x2_t, float64x2_t arm = vmul generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t /// Floating-point multiply name = vmuls_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_extract, b:f32, b, LANE as u32 multi_fn = a * b a = 1. b = 2., 0., 0., 0. n = 0 validate 2. aarch64 = fmul generate f32:float32x2_t:f32, f32:float32x4_t:f32 /// Floating-point multiply name = vmuld_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_extract, b:f64, b, LANE as u32 multi_fn = a * b a = 1. b = 2., 0. n = 0 validate 2. aarch64 = fmul generate f64:float64x1_t:f64, f64:float64x2_t:f64 /// Signed multiply long name = vmull a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32 arm = vmull.s aarch64 = smull link-arm = vmulls._EXT_ link-aarch64 = smull._EXT_ generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t /// Signed multiply long name = vmull_high no-q multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 9, 20, 11, 24, 13, 28, 15, 32 aarch64 = smull2 generate int8x16_t:int8x16_t:int16x8_t, int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t /// Unsigned multiply long name = vmull a = 1, 2, 3, 4, 5, 6, 7, 8 b = 1, 2, 1, 2, 1, 2, 1, 2 validate 1, 4, 3, 8, 5, 12, 7, 16 arm = vmull.s aarch64 = umull link-arm = vmullu._EXT_ link-aarch64 = umull._EXT_ generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t /// Unsigned multiply long name = vmull_high no-q multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 9, 20, 11, 24, 13, 28, 15, 32 aarch64 = umull2 generate uint8x16_t:uint8x16_t:uint16x8_t, uint16x8_t:uint16x8_t:uint32x4_t, uint32x4_t:uint32x4_t:uint64x2_t /// Polynomial multiply long name = vmull a = 1, 2, 3, 4, 5, 6, 7, 8 b = 1, 3, 1, 3, 1, 3, 1, 3 validate 1, 6, 3, 12, 5, 10, 7, 24 arm = vmull.s aarch64 = pmull link-arm = vmullp._EXT_ link-aarch64 = pmull._EXT_ generate poly8x8_t:poly8x8_t:poly16x8_t /// Polynomial multiply long name = vmull no-q a = 15 b = 3 validate 17 target = crypto aarch64 = pmull link-aarch64 = pmull64:p64:p64:p64:int8x16_t // Because of the support status of llvm, vmull_p64 is currently only available on aarch64 // arm = vmull // link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t generate p64:p64:p128 /// Polynomial multiply long name = vmull_high no-q multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 9, 30, 11, 20, 13, 18, 15, 48 aarch64 = pmull generate poly8x16_t:poly8x16_t:poly16x8_t /// Polynomial multiply long name = vmull_high no-q multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1} a = 1, 15 b = 1, 3 validate 17 target = crypto aarch64 = pmull generate poly64x2_t:poly64x2_t:p128 /// Vector long multiply with scalar name = vmull n-suffix multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b} a = 1, 2, 3, 4, 5, 6, 7, 8 b = 2 validate 2, 4, 6, 8, 10, 12, 14, 16 arm = vmull aarch64 = smull generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t aarch64 = umull generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t /// Vector long multiply by scalar name = vmull_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32 arm = vmull aarch64 = smull generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t aarch64 = umull generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t /// Multiply long name = vmull_high_n no-q multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b} a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 2 validate 18, 20, 22, 24, 26, 28, 30, 32 aarch64 = smull2 generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t aarch64 = umull2 generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t /// Multiply long name = vmull_high_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 validate 18, 20, 22, 24, 26, 28, 30, 32 aarch64 = smull2 generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t aarch64 = umull2 generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t /// Floating-point multiply extended name = vmulx a = 1., 2., 3., 4. b = 2., 2., 2., 2. validate 2., 4., 6., 8. aarch64 = fmulx link-aarch64 = fmulx._EXT_ generate float*_t, float64x*_t /// Floating-point multiply extended name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vmulx-in0-noext, a, {transmute--, {simd_extract, b, LANE as u32}} a = 1. b = 2., 0. n = 0 validate 2. aarch64 = fmulx generate float64x1_t, float64x1_t:float64x2_t:float64x1_t /// Floating-point multiply extended name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 validate 2., 4., 6., 8. aarch64 = fmulx generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t generate float64x2_t:float64x1_t:float64x2_t, float64x2_t /// Floating-point multiply extended name = vmulx a = 2. b = 3. validate 6. aarch64 = fmulx link-aarch64 = fmulx._EXT_ generate f32, f64 /// Floating-point multiply extended name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32} a = 2. b = 3., 0., 0., 0. n = 0 validate 6. aarch64 = fmulx generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64 /// Floating-point fused Multiply-Add to accumulator(vector) name = vfma multi_fn = vfma-self-_, b, c, a a = 8.0, 18.0, 12.0, 10.0 b = 6.0, 4.0, 7.0, 8.0 c = 2.0, 3.0, 4.0, 5.0 validate 20.0, 30.0, 40.0, 50.0 link-aarch64 = llvm.fma._EXT_ aarch64 = fmadd generate float64x1_t aarch64 = fmla generate float64x2_t target = fp-armv8 arm = vfma link-arm = llvm.fma._EXT_ generate float*_t /// Floating-point fused Multiply-Add to accumulator(vector) name = vfma n-suffix multi_fn = vfma-self-noext, a, b, {vdup-nself-noext, c} a = 2.0, 3.0, 4.0, 5.0 b = 6.0, 4.0, 7.0, 8.0 c = 8.0 validate 50.0, 35.0, 60.0, 69.0 aarch64 = fmadd generate float64x1_t:float64x1_t:f64:float64x1_t aarch64 = fmla generate float64x2_t:float64x2_t:f64:float64x2_t target = fp-armv8 arm = vfma generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t /// Floating-point fused multiply-add to accumulator name = vfma in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}} a = 2., 3., 4., 5. b = 6., 4., 7., 8. c = 2., 0., 0., 0. n = 0 validate 14., 11., 18., 21. aarch64 = fmla generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t aarch64 = fmadd generate float64x1_t aarch64 = fmla generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t /// Floating-point fused multiply-add to accumulator name = vfma in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = simd_extract, c:out_t, c, LANE as u32 multi_fn = vfma-in2lane-_, b, c, a a = 2. b = 6. c = 3., 0., 0., 0. n = 0 validate 20. aarch64 = fmla link-aarch64 = llvm.fma._EXT_:f32:f32:f32:f32 generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32 link-aarch64 = llvm.fma._EXT_:f64:f64:f64:f64 aarch64 = fmadd generate f64:f64:float64x1_t:f64 aarch64 = fmla generate f64:f64:float64x2_t:f64 /// Floating-point fused multiply-subtract from accumulator name = vfms multi_fn = simd_neg, b:in_t, b multi_fn = vfma-self-noext, a, b, c a = 20.0, 30.0, 40.0, 50.0 b = 6.0, 4.0, 7.0, 8.0 c = 2.0, 3.0, 4.0, 5.0 validate 8.0, 18.0, 12.0, 10.0 aarch64 = fmsub generate float64x1_t aarch64 = fmls generate float64x2_t target = fp-armv8 arm = vfms generate float*_t /// Floating-point fused Multiply-subtract to accumulator(vector) name = vfms n-suffix multi_fn = vfms-self-noext, a, b, {vdup-nself-noext, c} a = 50.0, 35.0, 60.0, 69.0 b = 6.0, 4.0, 7.0, 8.0 c = 8.0 validate 2.0, 3.0, 4.0, 5.0 aarch64 = fmsub generate float64x1_t:float64x1_t:f64:float64x1_t aarch64 = fmls generate float64x2_t:float64x2_t:f64:float64x2_t target = fp-armv8 arm = vfms generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t /// Floating-point fused multiply-subtract to accumulator name = vfms in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}} a = 14., 11., 18., 21. b = 6., 4., 7., 8. c = 2., 0., 0., 0. n = 0 validate 2., 3., 4., 5. aarch64 = fmls generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t aarch64 = fmsub generate float64x1_t aarch64 = fmls generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t /// Floating-point fused multiply-subtract to accumulator name = vfms in2-lane-suffixes constn = LANE multi_fn = vfma-in2lane-::, a, -b, c a = 14. b = 6. c = 2., 0., 0., 0. n = 0 validate 2. aarch64 = fmls generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32 aarch64 = fmsub generate f64:f64:float64x1_t:f64 aarch64 = fmls generate f64:f64:float64x2_t:f64 /// Divide name = vdiv fn = simd_div a = 2.0, 6.0, 4.0, 10.0 b = 1.0, 2.0, 1.0, 2.0 validate 2.0, 3.0, 4.0, 5.0 aarch64 = fdiv generate float*_t, float64x*_t /// Subtract name = vsub a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 arm = vsub. aarch64 = sub fn = simd_sub generate int*_t, uint*_t, int64x*_t, uint64x*_t /// Subtract name = vsub fn = simd_sub a = 1.0, 4.0, 3.0, 8.0 b = 1.0, 2.0, 3.0, 4.0 validate 0.0, 2.0, 0.0, 4.0 aarch64 = fsub generate float64x*_t arm = vsub. generate float*_t /// Signed Add Long across Vector name = vaddlv a = 1, 2, 3, 4 validate 10 aarch64 = saddlv link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_ generate int16x4_t:i32 /// Signed Add Long across Vector name = vaddlv a = 1, 2, 3, 4, 5, 6, 7, 8 validate 36 aarch64 = saddlv link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_ generate int16x8_t:i32 /// Signed Add Long across Vector name = vaddlv a = 1, 2 validate 3 aarch64 = saddlp link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_ generate int32x2_t:i64 /// Signed Add Long across Vector name = vaddlv a = 1, 2, 3, 4 validate 10 aarch64 = saddlv link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_ generate int32x4_t:i64 /// Unsigned Add Long across Vector name = vaddlv a = 1, 2, 3, 4 validate 10 aarch64 = uaddlv link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_ generate uint16x4_t:u32 /// Unsigned Add Long across Vector name = vaddlv a = 1, 2, 3, 4, 5, 6, 7, 8 validate 36 aarch64 = uaddlv link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_ generate uint16x8_t:u32 /// Unsigned Add Long across Vector name = vaddlv a = 1, 2 validate 3 aarch64 = uaddlp link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_ generate uint32x2_t:u64 /// Unsigned Add Long across Vector name = vaddlv a = 1, 2, 3, 4 validate 10 aarch64 = uaddlv link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_ generate uint32x4_t:u64 /// Subtract returning high narrow name = vsubhn no-q multi_fn = fixed, c:in_t multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)} a = MAX, MIN, 1, 1, MAX, MIN, 1, 1 b = 1, 0, 0, 0, 1, 0, 0, 0 fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS validate MAX, MIN, 0, 0, MAX, MIN, 0, 0 arm = vsubhn aarch64 = subhn generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Subtract returning high narrow name = vsubhn_high no-q multi_fn = vsubhn-noqself-noext, d:in_t0, b, c multi_fn = simd_shuffle-out_len-!, a, d, {asc-0-out_len} a = MAX, 0, MAX, 0, MAX, 0, MAX, 0 b = MAX, 1, MAX, 1, MAX, 1, MAX, 1 c = 1, 0, 1, 0, 1, 0, 1, 0 validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0 arm = vsubhn aarch64 = subhn2 generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t /// Signed halving subtract name = vhsub a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 arm = vhsub.s aarch64 = uhsub link-arm = vhsubu._EXT_ link-aarch64 = uhsub._EXT_ generate uint*_t arm = vhsub.s aarch64 = shsub link-arm = vhsubs._EXT_ link-aarch64 = shsub._EXT_ generate int*_t /// Signed Subtract Wide name = vsubw no-q multi_fn = simd_sub, a, {simd_cast, b} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 arm = vsubw aarch64 = ssubw generate int16x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int64x2_t /// Unsigned Subtract Wide name = vsubw no-q multi_fn = simd_sub, a, {simd_cast, b} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 arm = vsubw aarch64 = usubw generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint64x2_t /// Signed Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle8!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 validate 0, 0, 0, 0, 0, 0, 0, 0 aarch64 = ssubw generate int16x8_t:int8x16_t:int16x8_t /// Signed Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle4!, c:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11 b = 0, 1, 2, 3, 8, 9, 10, 11 validate 0, 0, 0, 0 aarch64 = ssubw generate int32x4_t:int16x8_t:int32x4_t /// Signed Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle2!, c:int32x2_t, b, b, [2, 3] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9 b = 6, 7, 8, 9 validate 0, 0 aarch64 = ssubw generate int64x2_t:int32x4_t:int64x2_t /// Unsigned Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle8!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11, 12, 13, 14, 15 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 0, 0, 0, 0, 0, 0, 0, 0 aarch64 = usubw generate uint16x8_t:uint8x16_t:uint16x8_t /// Unsigned Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle4!, c:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11 b = 0, 1, 2, 3, 8, 9, 10, 11 validate 0, 0, 0, 0 aarch64 = usubw generate uint32x4_t:uint16x8_t:uint32x4_t /// Unsigned Subtract Wide name = vsubw_high no-q multi_fn = simd_shuffle2!, c:uint32x2_t, b, b, [2, 3] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9 b = 6, 7, 8, 9 validate 0, 0 aarch64 = usubw generate uint64x2_t:uint32x4_t:uint64x2_t /// Signed Subtract Long name = vsubl no-q multi_fn = simd_cast, c:out_t, a multi_fn = simd_cast, d:out_t, b multi_fn = simd_sub, c, d a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 arm = vsubl aarch64 = ssubl generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t /// Unsigned Subtract Long name = vsubl no-q multi_fn = simd_cast, c:out_t, a multi_fn = simd_cast, d:out_t, b multi_fn = simd_sub, c, d a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 arm = vsubl aarch64 = usubl generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t /// Signed Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle8!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 validate 6, 7, 8, 9, 10, 11, 12, 13 aarch64 = ssubl generate int8x16_t:int8x16_t:int16x8_t /// Signed Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle4!, e:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 8, 9, 10, 11, 12, 13, 14, 15 b = 6, 6, 6, 6, 8, 8, 8, 8 validate 4, 5, 6, 7 aarch64 = ssubl generate int16x8_t:int16x8_t:int32x4_t /// Signed Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle2!, e:int32x2_t, b, b, [2, 3] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 12, 13, 14, 15 b = 6, 6, 8, 8 validate 6, 7 aarch64 = ssubl generate int32x4_t:int32x4_t:int64x2_t /// Unsigned Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle8!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 validate 6, 7, 8, 9, 10, 11, 12, 13 aarch64 = usubl generate uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle4!, e:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 8, 9, 10, 11, 12, 13, 14, 15 b = 6, 6, 6, 6, 8, 8, 8, 8 validate 4, 5, 6, 7 aarch64 = usubl generate uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Subtract Long name = vsubl_high no-q multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3] multi_fn = simd_cast, d:out_t, c multi_fn = simd_shuffle2!, e:uint32x2_t, b, b, [2, 3] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f a = 12, 13, 14, 15 b = 6, 6, 8, 8 validate 6, 7 aarch64 = usubl generate uint32x4_t:uint32x4_t:uint64x2_t /// Maximum (vector) name = vmax a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 validate 16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16 arm = vmax aarch64 = smax link-arm = vmaxs._EXT_ link-aarch64 = smax._EXT_ generate int*_t arm = vmax aarch64 = umax link-arm = vmaxu._EXT_ link-aarch64 = umax._EXT_ generate uint*_t /// Maximum (vector) name = vmax a = 1.0, -2.0, 3.0, -4.0 b = 0.0, 3.0, 2.0, 8.0 validate 1.0, 3.0, 3.0, 8.0 aarch64 = fmax link-aarch64 = fmax._EXT_ generate float64x*_t arm = vmax aarch64 = fmax link-arm = vmaxs._EXT_ link-aarch64 = fmax._EXT_ generate float*_t /// Floating-point Maximun Number (vector) name = vmaxnm a = 1.0, 2.0, 3.0, -4.0 b = 8.0, 16.0, -1.0, 6.0 validate 8.0, 16.0, 3.0, 6.0 aarch64 = fmaxnm link-aarch64 = fmaxnm._EXT_ generate float64x*_t target = fp-armv8 arm = vmaxnm aarch64 = fmaxnm link-arm = vmaxnm._EXT_ link-aarch64 = fmaxnm._EXT_ generate float*_t /// Floating-point Maximum Number Pairwise (vector). name = vpmaxnm a = 1.0, 2.0 b = 6.0, -3.0 validate 2.0, 6.0 aarch64 = fmaxnmp link-aarch64 = fmaxnmp._EXT_ generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t /// Floating-point Maximum Number Pairwise (vector). name = vpmaxnm a = 1.0, 2.0, 3.0, -4.0 b = 8.0, 16.0, -1.0, 6.0 validate 2.0, 3.0, 16.0, 6.0 aarch64 = fmaxnmp link-aarch64 = fmaxnmp._EXT_ generate float32x4_t:float32x4_t:float32x4_t /// Minimum (vector) name = vmin a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 validate 1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1 arm = vmin aarch64 = smin link-arm = vmins._EXT_ link-aarch64 = smin._EXT_ generate int*_t arm = vmin aarch64 = umin link-arm = vminu._EXT_ link-aarch64 = umin._EXT_ generate uint*_t /// Minimum (vector) name = vmin a = 1.0, -2.0, 3.0, -4.0 b = 0.0, 3.0, 2.0, 8.0 validate 0.0, -2.0, 2.0, -4.0 aarch64 = fmin link-aarch64 = fmin._EXT_ generate float64x*_t arm = vmin aarch64 = fmin link-arm = vmins._EXT_ link-aarch64 = fmin._EXT_ generate float*_t /// Floating-point Minimun Number (vector) name = vminnm a = 1.0, 2.0, 3.0, -4.0 b = 8.0, 16.0, -1.0, 6.0 validate 1.0, 2.0, -1.0, -4.0 aarch64 = fminnm link-aarch64 = fminnm._EXT_ generate float64x*_t target = fp-armv8 arm = vminnm aarch64 = fminnm link-arm = vminnm._EXT_ link-aarch64 = fminnm._EXT_ generate float*_t /// Floating-point Minimum Number Pairwise (vector). name = vpminnm a = 1.0, 2.0 b = 6.0, -3.0 validate 1.0, -3.0 aarch64 = fminnmp link-aarch64 = fminnmp._EXT_ generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t /// Floating-point Minimum Number Pairwise (vector). name = vpminnm a = 1.0, 2.0, 3.0, -4.0 b = 8.0, 16.0, -1.0, 6.0 validate 1.0, -4.0, 8.0, -1.0 aarch64 = fminnmp link-aarch64 = fminnmp._EXT_ generate float32x4_t:float32x4_t:float32x4_t /// Signed saturating doubling multiply long name = vqdmull a = 0, 1, 2, 3, 4, 5, 6, 7 b = 1, 2, 3, 4, 5, 6, 7, 8 validate 0, 4, 12, 24, 40, 60, 84, 108 aarch64 = sqdmull link-aarch64 = sqdmull._EXT2_ arm = vqdmull link-arm = vqdmull._EXT2_ generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t /// Signed saturating doubling multiply long name = vqdmull multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqdmull-in_ntt-noext, a, b}, 0 a = 2 b = 3 validate 12 aarch64 = sqdmull generate i16:i16:i32 /// Signed saturating doubling multiply long name = vqdmull a = 2 b = 3 validate 12 aarch64 = sqdmull link-aarch64 = sqdmulls.scalar generate i32:i32:i64 /// Vector saturating doubling long multiply with scalar name = vqdmull_n no-q multi_fn = vqdmull-in_ntt-noext, a, {vdup_n-in_ntt-noext, b} a = 2, 4, 6, 8 b = 2 validate 8, 16, 24, 32 aarch64 = sqdmull arm = vqdmull generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t /// Signed saturating doubling multiply long name = vqdmull_high no-q multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen} multi_fn = simd_shuffle-out_len-!, b:half, b, b, {asc-halflen-halflen} multi_fn = vqdmull-noqself-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 1, 2, 5, 6, 5, 6, 7, 8 validate 40, 60, 84, 112 aarch64 = sqdmull2 generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply long name = vqdmull_high_n no-q multi_fn = simd_shuffle-out_len-!, a:in_ntt, a, a, {asc-out_len-out_len} multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = vqdmull-in_ntt-noext, a, b a = 0, 2, 8, 10, 8, 10, 12, 14 b = 2 validate 32, 40, 48, 56 aarch64 = sqdmull2 generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t /// Vector saturating doubling long multiply by scalar name = vqdmull_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, b:in_t0, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-noqself-noext, a, b a = 1, 2, 3, 4 b = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 4, 8, 12, 16 aarch64 = sqdmull generate int16x4_t:int16x8_t:int32x4_t, int32x2_t:int32x4_t:int64x2_t arm = vqdmull generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t /// Signed saturating doubling multiply long name = vqdmullh_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, b:in_t0, b, N as u32 multi_fn = vqdmullh-noqself-noext, a, b a = 2 b = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 8 aarch64 = sqdmull generate i16:int16x4_t:i32, i16:int16x8_t:i32 /// Signed saturating doubling multiply long name = vqdmulls_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, b:in_t0, b, N as u32 multi_fn = vqdmulls-noqself-noext, a, b a = 2 b = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 8 aarch64 = sqdmull generate i32:int32x2_t:i64, i32:int32x4_t:i64 /// Signed saturating doubling multiply long name = vqdmull_high_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, a:in_t, a, a, {asc-out_len-out_len} multi_fn = simd_shuffle-out_len-!, b:in_t, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-self-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 16, 20, 24, 28 aarch64 = sqdmull2 generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t /// Signed saturating doubling multiply long name = vqdmull_high_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-out_len-out_len} multi_fn = simd_shuffle-out_len-!, b:half, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-noqself-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 16, 20, 24, 28 aarch64 = sqdmull2 generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply-add long name = vqdmlal multi_fn = vqadd-out-noext, a, {vqdmull-self-noext, b, c} a = 1, 1, 1, 1 b = 1, 2, 3, 4 c = 2, 2, 2, 2 validate 5, 9, 13, 17 aarch64 = sqdmlal arm = vqdmlal generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Vector widening saturating doubling multiply accumulate with scalar name = vqdmlal n-suffix multi_fn = vqadd-out-noext, a, {vqdmull_n-self-noext, b, c} a = 1, 1, 1, 1 b = 1, 2, 3, 4 c = 2 validate 5, 9, 13, 17 aarch64 = sqdmlal arm = vqdmlal generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t /// Signed saturating doubling multiply-add long name = vqdmlal_high no-q multi_fn = vqadd-out-noext, a, {vqdmull_high-noqself-noext, b, c} a = 1, 2, 3, 4 b = 0, 1, 4, 5, 4, 5, 6, 7 c = 1, 2, 5, 6, 5, 6, 7, 8 validate 41, 62, 87, 116 aarch64 = sqdmlal2 generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply-add long name = vqdmlal_high_n no-q multi_fn = vqadd-out-noext, a, {vqdmull_high_n-noqself-noext, b, c} a = 1, 2, 3, 4 b = 0, 2, 8, 10, 8, 10, 12, 14 c = 2 validate 33, 42, 51, 60 aarch64 = sqdmlal2 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t /// Vector widening saturating doubling multiply accumulate with scalar name = vqdmlal_lane in2-suffix constn = N multi_fn = static_assert_imm-in2_exp_len-N multi_fn = vqadd-out-noext, a, {vqdmull_lane-in2-::, b, c} a = 1, 2, 3, 4 b = 1, 2, 3, 4 c = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate 5, 10, 15, 20 aarch64 = sqdmlal generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t arm = vqdmlal generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Signed saturating doubling multiply-add long name = vqdmlal_high_lane in2-suffix constn = N multi_fn = static_assert_imm-in2_exp_len-N multi_fn = vqadd-out-noext, a, {vqdmull_high_lane-in2-::, b, c} a = 1, 2, 3, 4 b = 0, 1, 4, 5, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate 17, 22, 27, 32 aarch64 = sqdmlal2 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply-subtract long name = vqdmlsl multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c} a = 3, 7, 11, 15 b = 1, 2, 3, 4 c = 2, 2, 2, 2 validate -1, -1, -1, -1 aarch64 = sqdmlsl arm = vqdmlsl generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Vector widening saturating doubling multiply subtract with scalar name = vqdmlsl n-suffix multi_fn = vqsub-out-noext, a, {vqdmull_n-self-noext, b, c} a = 3, 7, 11, 15 b = 1, 2, 3, 4 c = 2 validate -1, -1, -1, -1 aarch64 = sqdmlsl arm = vqdmlsl generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t /// Signed saturating doubling multiply-subtract long name = vqdmlsl_high no-q multi_fn = vqsub-out-noext, a, {vqdmull_high-noqself-noext, b, c} a = 39, 58, 81, 108 b = 0, 1, 4, 5, 4, 5, 6, 7 c = 1, 2, 5, 6, 5, 6, 7, 8 validate -1, -2, -3, -4 aarch64 = sqdmlsl2 generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply-subtract long name = vqdmlsl_high_n no-q multi_fn = vqsub-out-noext, a, {vqdmull_high_n-noqself-noext, b, c} a = 31, 38, 45, 52 b = 0, 2, 8, 10, 8, 10, 12, 14 c = 2 validate -1, -2, -3, -4 aarch64 = sqdmlsl2 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t /// Vector widening saturating doubling multiply subtract with scalar name = vqdmlsl_lane in2-suffix constn = N multi_fn = static_assert_imm-in2_exp_len-N multi_fn = vqsub-out-noext, a, {vqdmull_lane-in2-::, b, c} a = 3, 6, 9, 12 b = 1, 2, 3, 4 c = 0, 2, 2, 0, 2, 0, 0, 0 n = HFLEN validate -1, -2, -3, -4 aarch64 = sqdmlsl generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t arm = vqdmlsl generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Signed saturating doubling multiply-subtract long name = vqdmlsl_high_lane in2-suffix constn = N multi_fn = static_assert_imm-in2_exp_len-N multi_fn = vqsub-out-noext, a, {vqdmull_high_lane-in2-::, b, c} a = 15, 18, 21, 24 b = 0, 1, 4, 5, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate -1, -2, -3, -4 aarch64 = sqdmlsl2 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply returning high half name = vqdmulh a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 2, 2, 2, 2, 2, 2, 2, 2 validate 1, 1, 1, 1, 1, 1, 1, 1 aarch64 = sqdmulh link-aarch64 = sqdmulh._EXT_ arm = vqdmulh link-arm = vqdmulh._EXT_ generate int16x4_t, int16x8_t, int32x2_t, int32x4_t /// Signed saturating doubling multiply returning high half name = vqdmulh multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqdmulh-in_ntt-noext, a, b}, 0 a = 1 b = 2 validate 0 aarch64 = sqdmulh generate i16, i32 /// Vector saturating doubling multiply high with scalar name = vqdmulh_n out-suffix multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = vqdmulh-out-noext, a, b a = MAX, MAX, MAX, MAX b = 2 validate 1, 1, 1, 1 aarch64 = sqdmulh arm = vqdmulh generate int16x4_t:i16:int16x4_t, int32x2_t:i32:int32x2_t /// Vector saturating doubling multiply high with scalar name = vqdmulhq_n out-suffix multi_fn = vdupq_n-in_ntt-noext, b:out_t, b multi_fn = vqdmulh-out-noext, a, b a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 2 validate 1, 1, 1, 1, 1, 1, 1, 1 aarch64 = sqdmulh arm = vqdmulh generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t /// Signed saturating doubling multiply returning high half name = vqdmulhh_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, b:in_t0, b, N as u32 multi_fn = vqdmulhh-out_ntt-noext, a, b a = 2 b = 0, 0, MAX, 0, 0, 0, 0, 0 n = 2 validate 1 aarch64 = sqdmulh generate i16:int16x4_t:i16, i16:int16x8_t:i16 /// Signed saturating doubling multiply returning high half name = vqdmulhs_lane constn = N multi_fn = static_assert_imm-in_exp_len-N multi_fn = simd_extract, b:in_t0, b, N as u32 multi_fn = vqdmulhs-out_ntt-noext, a, b a = 2 b = 0, MAX, 0, 0 n = 1 validate 1 aarch64 = sqdmulh generate i32:int32x2_t:i32, i32:int32x4_t:i32 /// Signed saturating extract narrow name = vqmovn no-q a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX aarch64 = sqxtn link-aarch64 = sqxtn._EXT2_ arm = vqmovn link-arm = vqmovns._EXT2_ generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t /// Unsigned saturating extract narrow name = vqmovn no-q a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX aarch64 = uqxtn link-aarch64 = uqxtn._EXT2_ arm = vqmovn link-arm = vqmovnu._EXT2_ generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Saturating extract narrow name = vqmovn multi_fn = simd_extract, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 a = 1 validate 1 aarch64 = sqxtn generate i16:i8, i32:i16 aarch64 = uqxtn generate u16:u8, u32:u16 /// Saturating extract narrow name = vqmovn a = 1 validate 1 aarch64 = sqxtn link-aarch64 = scalar.sqxtn._EXT2_._EXT_ generate i64:i32 aarch64 = uqxtn link-aarch64 = scalar.uqxtn._EXT2_._EXT_ generate u64:u32 /// Signed saturating extract narrow name = vqmovn_high no-q multi_fn = simd_shuffle-out_len-!, a, {vqmovn-noqself-noext, b}, {asc-0-out_len} a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX aarch64 = sqxtn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t aarch64 = uqxtn2 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Signed saturating extract unsigned narrow name = vqmovun no-q a = -1, -1, -1, -1, -1, -1, -1, -1 validate 0, 0, 0, 0, 0, 0, 0, 0 aarch64 = sqxtun link-aarch64 = sqxtun._EXT2_ arm = vqmovun link-arm = vqmovnsu._EXT2_ generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t /// Signed saturating extract unsigned narrow name = vqmovun multi_fn = simd_extract, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 a = 1 validate 1 aarch64 = sqxtun generate i16:u8, i32:u16, i64:u32 /// Signed saturating extract unsigned narrow name = vqmovun_high no-q multi_fn = simd_shuffle-out_len-!, a, {vqmovun-noqself-noext, b}, {asc-0-out_len} a = 0, 0, 0, 0, 0, 0, 0, 0 b = -1, -1, -1, -1, -1, -1, -1, -1 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 aarch64 = sqxtun2 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t /// Signed saturating rounding doubling multiply returning high half name = vqrdmulh a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 2, 2, 2, 2, 2, 2, 2, 2 validate 2, 2, 2, 2, 2, 2, 2, 2 aarch64 = sqrdmulh link-aarch64 = sqrdmulh._EXT_ arm = vqrdmulh link-arm = vqrdmulh._EXT_ generate int16x4_t, int16x8_t, int32x2_t, int32x4_t /// Signed saturating rounding doubling multiply returning high half name = vqrdmulh multi_fn = simd_extract, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1 b = 2 validate 0 aarch64 = sqrdmulh generate i16, i32 /// Vector saturating rounding doubling multiply high with scalar name = vqrdmulh out-n-suffix multi_fn = vqrdmulh-out-noext, a, {vdup-nout-noext, b} a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 2 validate 2, 2, 2, 2, 2, 2, 2, 2 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t /// Vector rounding saturating doubling multiply high by scalar name = vqrdmulh lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_shuffle-out_len-!, b:out_t, b, b, {dup-out_len-LANE as u32} multi_fn = vqrdmulh-out-noext, a, b a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 0, 2, 0, 0, 0, 0, 0, 0, n = 1 validate 2, 2, 2, 2, 2, 2, 2, 2 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t /// Signed saturating rounding doubling multiply returning high half name = vqrdmulh lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = vqrdmulh-out-noext, a, {simd_extract, b, LANE as u32} a = 1 b = 0, 2, 0, 0, 0, 0, 0, 0, n = 1 validate 0 aarch64 = sqrdmulh generate i16:int16x4_t:i16, i16:int16x8_t:i16, i32:int32x2_t:i32, i32:int32x4_t:i32 /// Signed saturating rounding doubling multiply accumulate returning high half name = vqrdmlah multi_fn = vqadd-out-noext, a, {vqrdmulh-out-noext, b, c} a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 2, 2, 2, 2, 2, 2, 2, 2 validate 3, 3, 3, 3, 3, 3, 3, 3 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t, int16x8_t, int32x2_t, int32x4_t /// Signed saturating rounding doubling multiply accumulate returning high half name = vqrdmlah multi_fn = vqadd-self-noext, a, {vqrdmulh-self-noext, b, c} a = 1 b = 1 c = 2 validate 1 aarch64 = sqrdmulh generate i16, i32 /// Signed saturating rounding doubling multiply accumulate returning high half name = vqrdmlah in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vqadd-self-noext, a, {vqrdmulh-in2lane-::, b, c} a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate 3, 3, 3, 3, 3, 3, 3, 3 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t /// Signed saturating rounding doubling multiply accumulate returning high half name = vqrdmlah in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vqadd-self-noext, a, {vqrdmulh-in2lane-::, b, c} a = 1 b = 1 c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate 1 aarch64 = sqrdmulh generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32 /// Signed saturating rounding doubling multiply subtract returning high half name = vqrdmlsh multi_fn = vqsub-out-noext, a, {vqrdmulh-out-noext, b, c} a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 2, 2, 2, 2, 2, 2, 2, 2 validate -1, -1, -1, -1, -1, -1, -1, -1 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t, int16x8_t, int32x2_t, int32x4_t /// Signed saturating rounding doubling multiply subtract returning high half name = vqrdmlsh multi_fn = vqsub-self-noext, a, {vqrdmulh-self-noext, b, c} a = 1 b = 1 c = 2 validate 1 aarch64 = sqrdmulh generate i16, i32 /// Signed saturating rounding doubling multiply subtract returning high half name = vqrdmlsh in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vqsub-self-noext, a, {vqrdmulh-in2lane-::, b, c} a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate -1, -1, -1, -1, -1, -1, -1, -1 aarch64 = sqrdmulh arm = vqrdmulh generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t /// Signed saturating rounding doubling multiply subtract returning high half name = vqrdmlsh in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE multi_fn = vqsub-self-noext, a, {vqrdmulh-in2lane-::, b, c} a = 1 b = 1 c = 0, 2, 0, 0, 0, 0, 0, 0 n = 1 validate 1 aarch64 = sqrdmulh generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32 /// Signed saturating rounding shift left name = vqrshl a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqrshl link-aarch64 = sqrshl._EXT_ generate i32, i64 arm = vqrshl link-arm = vqrshifts._EXT_ generate int*_t, int64x*_t /// Signed saturating rounding shift left name = vqrshl multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0 a = 1 b = 2 validate 4 aarch64 = sqrshl generate i8, i16 /// Unsigned signed saturating rounding shift left name = vqrshl out-suffix a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqrshl link-aarch64 = uqrshl._EXT_ generate u32:i32:u32, u64:i64:u64 arm = vqrshl link-arm = vqrshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t /// Unsigned signed saturating rounding shift left name = vqrshl out-suffix multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0 a = 1 b = 2 validate 4 aarch64 = uqrshl generate u8:i8:u8, u16:i16:u16 /// Signed saturating rounded shift right narrow name = vqrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = MIN, 4, 8, 12, 16, 20, 24, 28 n = 2 validate MIN, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqrshrn link-aarch64 = sqrshrn._EXT2_ const-aarch64 = N arm = vqrshrn link-arm = vqrshiftns._EXT2_ const-arm = -N as ttn generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t /// Signed saturating rounded shift right narrow name = vqrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 aarch64 = sqrshrn generate i16:i8, i32:i16, i64:i32 /// Signed saturating rounded shift right narrow name = vqrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 aarch64 = sqrshrn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t /// Unsigned signed saturating rounded shift right narrow name = vqrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = MIN, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = uqrshrn link-aarch64 = uqrshrn._EXT2_ const-aarch64 = N arm = vqrshrn link-arm = vqrshiftnu._EXT2_ const-arm = -N as ttn generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Unsigned saturating rounded shift right narrow name = vqrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 aarch64 = uqrshrn generate u16:u8, u32:u16, u64:u32 /// Unsigned saturating rounded shift right narrow name = vqrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 aarch64 = uqrshrn2 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Signed saturating rounded shift right unsigned narrow name = vqrshrun noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqrshrun link-aarch64 = sqrshrun._EXT2_ const-aarch64 = N arm = vqrshrun link-arm = vqrshiftnsu._EXT2_ const-arm = -N as ttn generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t /// Signed saturating rounded shift right unsigned narrow name = vqrshrun noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a multi_fn = simd_extract, {vqrshrun_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 aarch64 = sqrshrun generate i16:u8, i32:u16, i64:u32 /// Signed saturating rounded shift right unsigned narrow name = vqrshrun_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqrshrun_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 aarch64 = sqrshrun2 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t /// Signed saturating shift left name = vqshl a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqshl link-aarch64 = sqshl._EXT_ generate i64 arm = vqshl link-arm = vqshifts._EXT_ generate int*_t, int64x*_t /// Signed saturating shift left name = vqshl multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b} multi_fn = simd_extract, c, 0 a = 1 b = 2 validate 4 aarch64 = sqshl generate i8, i16, i32 /// Unsigned saturating shift left name = vqshl out-suffix a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqshl link-aarch64 = uqshl._EXT_ generate u64:i64:u64 arm = vqshl link-arm = vqshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t /// Unsigned saturating shift left name = vqshl out-suffix multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b} multi_fn = simd_extract, c, 0 a = 1 b = 2 validate 4 aarch64 = uqshl generate u8:i8:u8, u16:i16:u16, u32:i32:u32 /// Signed saturating shift left name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N.try_into().unwrap()} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 n = 2 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqshl arm = vqshl generate int*_t, int64x*_t /// Signed saturating shift left name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 a = 1 n = 2 validate 4 aarch64 = sqshl generate i8, i16, i32, i64 /// Unsigned saturating shift left name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N.try_into().unwrap()} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 n = 2 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqshl arm = vqshl generate uint*_t, uint64x*_t /// Unsigned saturating shift left name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 a = 1 n = 2 validate 4 aarch64 = uqshl generate u8, u16, u32, u64 /// Signed saturating shift right narrow name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqshrn link-aarch64 = sqshrn._EXT2_ const-aarch64 = N generate i64:i32 arm = vqshrn link-arm = vqshiftns._EXT2_ const-arm = -N as ttn generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t /// Signed saturating shift right narrow name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 aarch64 = sqshrn generate i16:i8, i32:i16 /// Signed saturating shift right narrow name = vqshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = sqshrn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t /// Unsigned saturating shift right narrow name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = uqshrn link-aarch64 = uqshrn._EXT2_ const-aarch64 = N generate u64:u32 arm = vqshrn link-arm = vqshiftnu._EXT2_ const-arm = -N as ttn generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Unsigned saturating shift right narrow name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 aarch64 = uqshrn generate u16:u8, u32:u16 /// Unsigned saturating shift right narrow name = vqshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = uqshrn2 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Signed saturating shift right unsigned narrow name = vqshrun noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqshrun link-aarch64 = sqshrun._EXT2_ const-aarch64 = N arm = vqshrun link-arm = vqshiftnsu._EXT2_ const-arm = -N as ttn generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t /// Signed saturating shift right unsigned narrow name = vqshrun noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_extract, {vqshrun_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 aarch64 = sqshrun generate i16:u8, i32:u16, i64:u32 /// Signed saturating shift right unsigned narrow name = vqshrun_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vqshrun_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = sqshrun2 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t /// Calculates the square root of each lane. name = vsqrt fn = simd_fsqrt a = 4.0, 9.0, 16.0, 25.0 validate 2.0, 3.0, 4.0, 5.0 aarch64 = fsqrt generate float*_t, float64x*_t /// Reciprocal square-root estimate. name = vrsqrte a = 1.0, 2.0, 3.0, 4.0 validate 0.998046875, 0.705078125, 0.576171875, 0.4990234375 aarch64 = frsqrte link-aarch64 = frsqrte._EXT_ generate float64x*_t arm = vrsqrte link-arm = vrsqrte._EXT_ generate float*_t /// Reciprocal estimate. name = vrecpe a = 4.0, 3.0, 2.0, 1.0 validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875 aarch64 = frecpe link-aarch64 = frecpe._EXT_ generate float64x*_t arm = vrecpe link-arm = vrecpe._EXT_ generate float*_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = str generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t arm = str generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t generate poly8x16_t:uint8x16_t, int8x16_t:uint8x16_t, poly16x8_t:uint16x8_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t generate int8x8_t:poly8x8_t, uint8x8_t:poly8x8_t, int16x4_t:poly16x4_t, uint16x4_t:poly16x4_t generate int8x16_t:poly8x16_t, uint8x16_t:poly8x16_t, int16x8_t:poly16x8_t, uint16x8_t:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 1, 2, 3, 4, 5, 6, 7 validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 aarch64 = str generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t arm = str generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = str generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t arm = str generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 1, 2, 3 validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 aarch64 = str generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t arm = str generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 validate 0, 1, 2, 3 aarch64 = str generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t arm = str generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 1 validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 aarch64 = str generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t arm = str generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 validate 0, 1 aarch64 = str generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t arm = str generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0., 0., 0., 0., 0., 0., 0., 0. validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 aarch64 = str generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t arm = str generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate 0., 0., 0., 0., 0., 0., 0., 0. aarch64 = str generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t arm = str generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t /// Vector reinterpret cast operation name = vreinterpret double-suffixes fn = transmute a = 0., 0., 0., 0., 0., 0., 0., 0. validate 0., 0., 0., 0., 0., 0., 0., 0. aarch64 = str generate float32x2_t:float64x1_t, float64x1_t:float32x2_t generate float32x4_t:float64x2_t, float64x2_t:float32x4_t /// Signed rounding shift left name = vrshl a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = srshl link-aarch64 = srshl._EXT_ generate i64 arm = vrshl link-arm = vrshifts._EXT_ generate int*_t, int64x*_t /// Unsigned rounding shift left name = vrshl out-suffix a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = urshl link-aarch64 = urshl._EXT_ generate u64:i64:u64 arm = vrshl link-arm = vrshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t /// Signed rounding shift right name = vrshr n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N).try_into().unwrap()} a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = srshr arm = vrshr generate int*_t, int64x*_t /// Signed rounding shift right name = vrshr n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshl-self-noext, a, -N as i64 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = srshr generate i64 /// Unsigned rounding shift right name = vrshr n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N).try_into().unwrap()} a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = urshr arm = vrshr generate uint*_t, uint64x*_t /// Unsigned rounding shift right name = vrshr n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshl-self-noext, a, -N as i64 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = urshr generate u64 /// Rounding shift right narrow name = vrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = rshrn link-aarch64 = rshrn._EXT2_ const-aarch64 = N arm = vrshrn link-arm = vrshiftn._EXT2_ const-arm = -N as ttn generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t /// Rounding shift right narrow name = vrshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = transmute, {vrshrn_n-noqsigned-::, transmute(a)} a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = rshrn arm = vrshrn generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Rounding shift right narrow name = vrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = rshrn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Signed rounding shift right and accumulate name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = simd_add, a, {vrshr-nself-::, b} a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 aarch64 = srsra arm = vrsra generate int*_t, int64x*_t /// Unsigned rounding shift right and accumulate name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = simd_add, a, {vrshr-nself-::, b} a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 aarch64 = ursra arm = vrsra generate uint*_t, uint64x*_t /// Signed rounding shift right and accumulate. name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshr-nself-::, b:in_t, b multi_fn = a + b a = 1 b = 4 n = 2 validate 2 aarch64 = srsra generate i64 /// Ungisned rounding shift right and accumulate. name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = vrshr-nself-::, b:in_t, b multi_fn = a + b a = 1 b = 4 n = 2 validate 2 aarch64 = ursra generate u64 /// Insert vector element from another vector element name = vset_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 0 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = nop arm = nop generate i8:int8x8_t:int8x8_t, i16:int16x4_t:int16x4_t generate i32:int32x2_t:int32x2_t, i64:int64x1_t:int64x1_t generate u8:uint8x8_t:uint8x8_t, u16:uint16x4_t:uint16x4_t generate u32:uint32x2_t:uint32x2_t, u64:uint64x1_t:uint64x1_t generate p8:poly8x8_t:poly8x8_t, p16:poly16x4_t:poly16x4_t target = crypto generate p64:poly64x1_t:poly64x1_t /// Insert vector element from another vector element name = vsetq_lane no-q constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 0 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 aarch64 = nop arm = nop generate i8:int8x16_t:int8x16_t, i16:int16x8_t:int16x8_t generate i32:int32x4_t:int32x4_t, i64:int64x2_t:int64x2_t generate u8:uint8x16_t:uint8x16_t, u16:uint16x8_t:uint16x8_t generate u32:uint32x4_t:uint32x4_t, u64:uint64x2_t:uint64x2_t generate p8:poly8x16_t:poly8x16_t, p16:poly16x8_t:poly16x8_t target = crypto generate p64:poly64x2_t:poly64x2_t /// Insert vector element from another vector element name = vset_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1. b = 0., 2., 3., 4. n = 0 validate 1., 2., 3., 4. aarch64 = nop generate f64:float64x1_t:float64x1_t arm = nop generate f32:float32x2_t:float32x2_t /// Insert vector element from another vector element name = vsetq_lane no-q constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1. b = 0., 2., 3., 4. n = 0 validate 1., 2., 3., 4. aarch64 = nop generate f64:float64x2_t:float64x2_t arm = nop generate f32:float32x4_t:float32x4_t /// Signed Shift left name = vshl a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = sshl link-aarch64 = sshl._EXT_ arm = vshl link-arm = vshifts._EXT_ generate int*_t, int64x*_t /// Signed Shift left name = vshl multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)} a = 1 b = 2 validate 4 aarch64 = sshl generate i64 /// Unsigned Shift left name = vshl out-suffix a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = ushl link-aarch64 = ushl._EXT_ arm = vshl link-arm = vshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t /// Unsigned Shift left out-suffix name = vshl multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)} a = 1 b = 2 validate 4 aarch64 = ushl generate u64:i64:u64 /// Shift left name = vshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N multi_fn = simd_shl, a, {vdup-nself-noext, N.try_into().unwrap()} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 arm = vshl aarch64 = shl generate int*_t, uint*_t, int64x*_t, uint64x*_t /// Signed shift left long name = vshll n-suffix constn = N multi_fn = static_assert-N-0-bits multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N.try_into().unwrap()} a = 1, 2, 3, 4, 5, 6, 7, 8 n = 2 validate 4, 8, 12, 16, 20, 24, 28, 32 arm = vshll.s aarch64 = sshll generate int8x8_t:int16x8_t, int16x4_t:int32x4_t, int32x2_t:int64x2_t aarch64 = ushll generate uint8x8_t:uint16x8_t, uint16x4_t:uint32x4_t, uint32x2_t:uint64x2_t /// Signed shift left long name = vshll_high_n no-q constn = N multi_fn = static_assert-N-0-bits multi_fn = simd_shuffle-out_len-!, b:half, a, a, {asc-halflen-halflen} multi_fn = vshll_n-noqself-::, b a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8 n = 2 validate 4, 8, 12, 16, 20, 24, 28, 32 aarch64 = sshll2 generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t aarch64 = ushll2 generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t /// Shift right name = vshr n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()} a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 arm = vshr.s aarch64 = sshr generate int*_t, int64x*_t aarch64 = ushr generate uint*_t, uint64x*_t /// Shift right narrow name = vshrn_n no-q constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}} a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 arm = vshrn. aarch64 = shrn generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Shift right narrow name = vshrn_high_n no-q constn = N multi_fn = static_assert-N-1-halfbits multi_fn = simd_shuffle-out_len-!, a, {vshrn_n-noqself-::, b}, {asc-0-out_len} a = 1, 2, 5, 6, 5, 6, 7, 8 b = 20, 24, 28, 32, 52, 56, 60, 64 n = 2 validate 1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16 aarch64 = shrn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t /// Signed shift right and accumulate name = vsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = simd_add, a, {vshr-nself-::, b} a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 aarch64 = ssra arm = vsra generate int*_t, int64x*_t /// Unsigned shift right and accumulate name = vsra n-suffix constn = N multi_fn = static_assert-N-1-bits multi_fn = simd_add, a, {vshr-nself-::, b} a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 aarch64 = usra arm = vsra generate uint*_t, uint64x*_t /// Transpose vectors name = vtrn1 multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 aarch64 = trn1 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t aarch64 = zip1 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Transpose vectors name = vtrn1 multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 0., 1., 4., 5., 8., 9., 12., 13. aarch64 = trn1 generate float32x4_t aarch64 = zip1 generate float32x2_t, float64x2_t /// Transpose vectors name = vtrn2 multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 aarch64 = trn2 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t aarch64 = zip2 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Transpose vectors name = vtrn2 multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 2., 3., 6., 7., 10., 11., 14., 15. aarch64 = trn2 generate float32x4_t aarch64 = zip2 generate float32x2_t, float64x2_t /// Zip vectors name = vzip1 multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = zip1 generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t /// Zip vectors name = vzip1 multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 0., 1., 2., 3., 4., 5., 6., 7. aarch64 = zip1 generate float32x2_t, float32x4_t, float64x2_t /// Zip vectors name = vzip2 multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len} a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31 validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 aarch64 = zip2 generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t /// Zip vectors name = vzip2 multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len} a = 0., 8., 8., 10., 8., 10., 12., 14. b = 1., 9., 9., 11., 9., 11., 13., 15. validate 8., 9., 10., 11., 12., 13., 14., 15. aarch64 = zip2 generate float32x2_t, float32x4_t, float64x2_t /// Unzip vectors name = vuzp1 multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len} a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0 b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0 validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16 aarch64 = uzp1 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t aarch64 = zip1 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Unzip vectors name = vuzp1 multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len} a = 0., 8., 1., 9., 4., 12., 5., 13. b = 1., 10., 3., 11., 6., 14., 7., 15. validate 0., 1., 1., 3., 4., 5., 6., 7. aarch64 = uzp1 generate float32x4_t aarch64 = zip1 generate float32x2_t, float64x2_t /// Unzip vectors name = vuzp2 multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len} a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24 b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32 validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32 aarch64 = uzp2 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t aarch64 = zip2 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Unzip vectors name = vuzp2 multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len} a = 0., 8., 1., 9., 4., 12., 5., 13. b = 2., 9., 3., 11., 6., 14., 7., 15. validate 8., 9., 9., 11., 12., 13., 14., 15. aarch64 = uzp2 generate float32x4_t aarch64 = zip2 generate float32x2_t, float64x2_t //////////////////// // Unsigned Absolute difference and Accumulate Long //////////////////// /// Unsigned Absolute difference and Accumulate Long name = vabal multi_fn = vabd-unsigned-noext, b, c, d:in_t multi_fn = simd_add, a, {simd_cast, d} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20 arm = vabal.s aarch64 = uabal generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_shuffle8!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = vabd_u8, d, e, f:uint8x8_t multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 20, 20, 20, 20, 20, 20, 20, 20 aarch64 = uabal generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_shuffle4!, e:uint16x4_t, c, c, [4, 5, 6, 7] multi_fn = vabd_u16, d, e, f:uint16x4_t multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12 b = 1, 2, 3, 4, 9, 10, 11, 12 c = 10, 10, 10, 10, 20, 0, 2, 4 validate 20, 20, 20, 20 aarch64 = uabal generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3] multi_fn = simd_shuffle2!, e:uint32x2_t, c, c, [2, 3] multi_fn = vabd_u32, d, e, f:uint32x2_t multi_fn = simd_add, a, {simd_cast, f} a = 15, 16 b = 1, 2, 15, 16 c = 10, 10, 10, 12 validate 20, 20 aarch64 = uabal generate uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t //////////////////// // Signed Absolute difference and Accumulate Long //////////////////// /// Signed Absolute difference and Accumulate Long name = vabal multi_fn = vabd-signed-noext, b, c, d:int8x8_t multi_fn = simd_cast, e:uint8x8_t, d multi_fn = simd_add, a, {simd_cast, e} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20 arm = vabal.s aarch64 = sabal generate int16x8_t:int8x8_t:int8x8_t:int16x8_t /// Signed Absolute difference and Accumulate Long name = vabal multi_fn = vabd-signed-noext, b, c, d:int16x4_t multi_fn = simd_cast, e:uint16x4_t, d multi_fn = simd_add, a, {simd_cast, e} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20 arm = vabal.s aarch64 = sabal generate int32x4_t:int16x4_t:int16x4_t:int32x4_t /// Signed Absolute difference and Accumulate Long name = vabal multi_fn = vabd-signed-noext, b, c, d:int32x2_t multi_fn = simd_cast, e:uint32x2_t, d multi_fn = simd_add, a, {simd_cast, e} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20 arm = vabal.s aarch64 = sabal generate int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_shuffle8!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = vabd_s8, d, e, f:int8x8_t multi_fn = simd_cast, f:uint8x8_t, f multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12 validate 20, 20, 20, 20, 20, 20, 20, 20 aarch64 = sabal generate int16x8_t:int8x16_t:int8x16_t:int16x8_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_shuffle4!, e:int16x4_t, c, c, [4, 5, 6, 7] multi_fn = vabd_s16, d, e, f:int16x4_t multi_fn = simd_cast, f:uint16x4_t, f multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12 b = 1, 2, 3, 4, 9, 10, 11, 12 c = 10, 10, 10, 10, 20, 0, 2, 4 validate 20, 20, 20, 20 aarch64 = sabal generate int32x4_t:int16x8_t:int16x8_t:int32x4_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3] multi_fn = simd_shuffle2!, e:int32x2_t, c, c, [2, 3] multi_fn = vabd_s32, d, e, f:int32x2_t multi_fn = simd_cast, f:uint32x2_t, f multi_fn = simd_add, a, {simd_cast, f} a = 15, 16 b = 1, 2, 15, 16 c = 10, 10, 10, 12 validate 20, 20 aarch64 = sabal generate int64x2_t:int32x4_t:int32x4_t:int64x2_t //////////////////// // Singned saturating Absolute value //////////////////// /// Singned saturating Absolute value name = vqabs a = MIN, MAX, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5 validate MAX, MAX, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5 arm = vqabs.s aarch64 = sqabs link-arm = vqabs._EXT_ link-aarch64 = sqabs._EXT_ generate int*_t /// Singned saturating Absolute value name = vqabs a = MIN, -7 validate MAX, 7 aarch64 = sqabs link-aarch64 = sqabs._EXT_ generate int64x*_t