]>
Commit | Line | Data |
---|---|---|
a04b68e1 RH |
1 | /* |
2 | * ARM AdvSIMD / SVE Vector Helpers | |
3 | * | |
4 | * Copyright (c) 2020 Linaro | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
50f57e09 | 9 | * version 2.1 of the License, or (at your option) any later version. |
a04b68e1 RH |
10 | * |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #ifndef TARGET_ARM_VEC_INTERNALS_H | |
21 | #define TARGET_ARM_VEC_INTERNALS_H | |
22 | ||
93966af1 RH |
23 | /* |
24 | * Note that vector data is stored in host-endian 64-bit chunks, | |
25 | * so addressing units smaller than that needs a host-endian fixup. | |
26 | * | |
27 | * The H<N> macros are used when indexing an array of elements of size N. | |
28 | * | |
29 | * The H1_<N> macros are used when performing byte arithmetic and then | |
30 | * casting the final pointer to a type of size N. | |
31 | */ | |
e03b5686 | 32 | #if HOST_BIG_ENDIAN |
93966af1 RH |
33 | #define H1(x) ((x) ^ 7) |
34 | #define H1_2(x) ((x) ^ 6) | |
35 | #define H1_4(x) ((x) ^ 4) | |
36 | #define H2(x) ((x) ^ 3) | |
37 | #define H4(x) ((x) ^ 1) | |
38 | #else | |
39 | #define H1(x) (x) | |
40 | #define H1_2(x) (x) | |
41 | #define H1_4(x) (x) | |
42 | #define H2(x) (x) | |
43 | #define H4(x) (x) | |
44 | #endif | |
6e802db3 PM |
45 | /* |
46 | * Access to 64-bit elements isn't host-endian dependent; we provide H8 | |
47 | * and H1_8 so that when a function is being generated from a macro we | |
48 | * can pass these rather than an empty macro argument, for clarity. | |
49 | */ | |
50 | #define H8(x) (x) | |
51 | #define H1_8(x) (x) | |
93966af1 | 52 | |
77f96148 PM |
53 | /* Data for expanding active predicate bits to bytes, for byte elements. */ |
54 | extern const uint64_t expand_pred_b_data[256]; | |
55 | ||
a04b68e1 RH |
56 | static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) |
57 | { | |
58 | uint64_t *d = vd + opr_sz; | |
59 | uintptr_t i; | |
60 | ||
61 | for (i = opr_sz; i < max_sz; i += 8) { | |
62 | *d++ = 0; | |
63 | } | |
64 | } | |
65 | ||
8b3f15b0 RH |
66 | static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits, |
67 | bool round, uint32_t *sat) | |
68 | { | |
69 | if (shift <= -bits) { | |
70 | /* Rounding the sign bit always produces 0. */ | |
71 | if (round) { | |
72 | return 0; | |
73 | } | |
74 | return src >> 31; | |
75 | } else if (shift < 0) { | |
76 | if (round) { | |
77 | src >>= -shift - 1; | |
78 | return (src >> 1) + (src & 1); | |
79 | } | |
80 | return src >> -shift; | |
81 | } else if (shift < bits) { | |
82 | int32_t val = src << shift; | |
83 | if (bits == 32) { | |
84 | if (!sat || val >> shift == src) { | |
85 | return val; | |
86 | } | |
87 | } else { | |
88 | int32_t extval = sextract32(val, 0, bits); | |
89 | if (!sat || val == extval) { | |
90 | return extval; | |
91 | } | |
92 | } | |
93 | } else if (!sat || src == 0) { | |
94 | return 0; | |
95 | } | |
96 | ||
97 | *sat = 1; | |
98 | return (1u << (bits - 1)) - (src >= 0); | |
99 | } | |
100 | ||
101 | static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits, | |
102 | bool round, uint32_t *sat) | |
103 | { | |
104 | if (shift <= -(bits + round)) { | |
105 | return 0; | |
106 | } else if (shift < 0) { | |
107 | if (round) { | |
108 | src >>= -shift - 1; | |
109 | return (src >> 1) + (src & 1); | |
110 | } | |
111 | return src >> -shift; | |
112 | } else if (shift < bits) { | |
113 | uint32_t val = src << shift; | |
114 | if (bits == 32) { | |
115 | if (!sat || val >> shift == src) { | |
116 | return val; | |
117 | } | |
118 | } else { | |
119 | uint32_t extval = extract32(val, 0, bits); | |
120 | if (!sat || val == extval) { | |
121 | return extval; | |
122 | } | |
123 | } | |
124 | } else if (!sat || src == 0) { | |
125 | return 0; | |
126 | } | |
127 | ||
128 | *sat = 1; | |
129 | return MAKE_64BIT_MASK(0, bits); | |
130 | } | |
131 | ||
132 | static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits, | |
133 | bool round, uint32_t *sat) | |
134 | { | |
135 | if (sat && src < 0) { | |
136 | *sat = 1; | |
137 | return 0; | |
138 | } | |
139 | return do_uqrshl_bhs(src, shift, bits, round, sat); | |
140 | } | |
141 | ||
142 | static inline int64_t do_sqrshl_d(int64_t src, int64_t shift, | |
143 | bool round, uint32_t *sat) | |
144 | { | |
145 | if (shift <= -64) { | |
146 | /* Rounding the sign bit always produces 0. */ | |
147 | if (round) { | |
148 | return 0; | |
149 | } | |
150 | return src >> 63; | |
151 | } else if (shift < 0) { | |
152 | if (round) { | |
153 | src >>= -shift - 1; | |
154 | return (src >> 1) + (src & 1); | |
155 | } | |
156 | return src >> -shift; | |
157 | } else if (shift < 64) { | |
158 | int64_t val = src << shift; | |
159 | if (!sat || val >> shift == src) { | |
160 | return val; | |
161 | } | |
162 | } else if (!sat || src == 0) { | |
163 | return 0; | |
164 | } | |
165 | ||
166 | *sat = 1; | |
167 | return src < 0 ? INT64_MIN : INT64_MAX; | |
168 | } | |
169 | ||
170 | static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift, | |
171 | bool round, uint32_t *sat) | |
172 | { | |
173 | if (shift <= -(64 + round)) { | |
174 | return 0; | |
175 | } else if (shift < 0) { | |
176 | if (round) { | |
177 | src >>= -shift - 1; | |
178 | return (src >> 1) + (src & 1); | |
179 | } | |
180 | return src >> -shift; | |
181 | } else if (shift < 64) { | |
182 | uint64_t val = src << shift; | |
183 | if (!sat || val >> shift == src) { | |
184 | return val; | |
185 | } | |
186 | } else if (!sat || src == 0) { | |
187 | return 0; | |
188 | } | |
189 | ||
190 | *sat = 1; | |
191 | return UINT64_MAX; | |
192 | } | |
193 | ||
194 | static inline int64_t do_suqrshl_d(int64_t src, int64_t shift, | |
195 | bool round, uint32_t *sat) | |
196 | { | |
197 | if (sat && src < 0) { | |
198 | *sat = 1; | |
199 | return 0; | |
200 | } | |
201 | return do_uqrshl_d(src, shift, round, sat); | |
202 | } | |
203 | ||
d782d3ca RH |
204 | int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool); |
205 | int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); | |
206 | int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); | |
207 | int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); | |
208 | ||
c1bd78cb PM |
209 | /* |
210 | * 8 x 8 -> 16 vector polynomial multiply where the inputs are | |
211 | * in the low 8 bits of each 16-bit element | |
212 | */ | |
213 | uint64_t pmull_h(uint64_t op1, uint64_t op2); | |
214 | /* | |
215 | * 16 x 16 -> 32 vector polynomial multiply where the inputs are | |
216 | * in the low 16 bits of each 32-bit element | |
217 | */ | |
218 | uint64_t pmull_w(uint64_t op1, uint64_t op2); | |
219 | ||
a04b68e1 | 220 | #endif /* TARGET_ARM_VEC_INTERNALS_H */ |