]>
Commit | Line | Data |
---|---|---|
a0c9400a SG |
1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* | |
1dc33f26 | 3 | * QEMU LoongArch vector helper functions. |
a0c9400a SG |
4 | * |
5 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | |
6 | */ | |
c037fbc9 SG |
7 | |
8 | #include "qemu/osdep.h" | |
9 | #include "cpu.h" | |
10 | #include "exec/exec-all.h" | |
11 | #include "exec/helper-proto.h" | |
aca67472 SG |
12 | #include "fpu/softfloat.h" |
13 | #include "internals.h" | |
d0dfa19a | 14 | #include "tcg/tcg.h" |
008a3b16 | 15 | #include "vec.h" |
64cf6b99 | 16 | #include "tcg/tcg-gvec-desc.h" |
c037fbc9 SG |
17 | |
18 | #define DO_ADD(a, b) (a + b) | |
19 | #define DO_SUB(a, b) (a - b) | |
20 | ||
21 | #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ | |
04711da1 | 22 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
c037fbc9 SG |
23 | { \ |
24 | int i; \ | |
04711da1 SG |
25 | VReg *Vd = (VReg *)vd; \ |
26 | VReg *Vj = (VReg *)vj; \ | |
27 | VReg *Vk = (VReg *)vk; \ | |
c037fbc9 | 28 | typedef __typeof(Vd->E1(0)) TD; \ |
64cf6b99 | 29 | int oprsz = simd_oprsz(desc); \ |
c037fbc9 | 30 | \ |
64cf6b99 | 31 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
c037fbc9 SG |
32 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ |
33 | } \ | |
34 | } | |
35 | ||
36 | DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) | |
37 | DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) | |
38 | DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) | |
39 | ||
04711da1 | 40 | void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
c037fbc9 | 41 | { |
64cf6b99 | 42 | int i; |
04711da1 SG |
43 | VReg *Vd = (VReg *)vd; |
44 | VReg *Vj = (VReg *)vj; | |
45 | VReg *Vk = (VReg *)vk; | |
64cf6b99 | 46 | int oprsz = simd_oprsz(desc); |
c037fbc9 | 47 | |
64cf6b99 SG |
48 | for (i = 0; i < oprsz / 16 ; i++) { |
49 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), | |
50 | int128_makes64(Vk->D(2 * i))); | |
51 | } | |
c037fbc9 SG |
52 | } |
53 | ||
54 | DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) | |
55 | DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) | |
56 | DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) | |
57 | ||
04711da1 | 58 | void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
c037fbc9 | 59 | { |
64cf6b99 | 60 | int i; |
04711da1 SG |
61 | VReg *Vd = (VReg *)vd; |
62 | VReg *Vj = (VReg *)vj; | |
63 | VReg *Vk = (VReg *)vk; | |
64cf6b99 | 64 | int oprsz = simd_oprsz(desc); |
c037fbc9 | 65 | |
64cf6b99 SG |
66 | for (i = 0; i < oprsz / 16; i++) { |
67 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), | |
68 | int128_makes64(Vk->D(2 * i))); | |
69 | } | |
c037fbc9 SG |
70 | } |
71 | ||
72 | DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) | |
73 | DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) | |
74 | DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) | |
75 | ||
04711da1 | 76 | void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) |
c037fbc9 | 77 | { |
64cf6b99 | 78 | int i; |
04711da1 SG |
79 | VReg *Vd = (VReg *)vd; |
80 | VReg *Vj = (VReg *)vj; | |
81 | VReg *Vk = (VReg *)vk; | |
64cf6b99 | 82 | int oprsz = simd_oprsz(desc); |
c037fbc9 | 83 | |
64cf6b99 SG |
84 | for (i = 0; i < oprsz / 16; i ++) { |
85 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | |
86 | int128_make64(Vk->UD(2 * i))); | |
87 | } | |
c037fbc9 SG |
88 | } |
89 | ||
90 | DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) | |
91 | DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) | |
92 | DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) | |
93 | ||
04711da1 | 94 | void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) |
c037fbc9 | 95 | { |
64cf6b99 | 96 | int i; |
04711da1 SG |
97 | VReg *Vd = (VReg *)vd; |
98 | VReg *Vj = (VReg *)vj; | |
99 | VReg *Vk = (VReg *)vk; | |
64cf6b99 | 100 | int oprsz = simd_oprsz(desc); |
c037fbc9 | 101 | |
64cf6b99 SG |
102 | for (i = 0; i < oprsz / 16; i++) { |
103 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), | |
104 | int128_make64(Vk->UD(2 * i))); | |
105 | } | |
c037fbc9 | 106 | } |
2d5f950c SG |
107 | |
108 | #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ | |
85995f07 | 109 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
2d5f950c SG |
110 | { \ |
111 | int i; \ | |
112 | VReg *Vd = (VReg *)vd; \ | |
113 | VReg *Vj = (VReg *)vj; \ | |
114 | VReg *Vk = (VReg *)vk; \ | |
115 | typedef __typeof(Vd->E1(0)) TD; \ | |
85995f07 SG |
116 | int oprsz = simd_oprsz(desc); \ |
117 | \ | |
118 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
2d5f950c SG |
119 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ |
120 | } \ | |
121 | } | |
122 | ||
123 | #define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ | |
85995f07 | 124 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
2d5f950c SG |
125 | { \ |
126 | int i; \ | |
127 | VReg *Vd = (VReg *)vd; \ | |
128 | VReg *Vj = (VReg *)vj; \ | |
129 | VReg *Vk = (VReg *)vk; \ | |
130 | typedef __typeof(Vd->E1(0)) TD; \ | |
85995f07 SG |
131 | int oprsz = simd_oprsz(desc); \ |
132 | \ | |
133 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
2d5f950c SG |
134 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ |
135 | } \ | |
136 | } | |
137 | ||
85995f07 | 138 | void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 139 | { |
85995f07 | 140 | int i; |
2d5f950c SG |
141 | VReg *Vd = (VReg *)vd; |
142 | VReg *Vj = (VReg *)vj; | |
143 | VReg *Vk = (VReg *)vk; | |
85995f07 | 144 | int oprsz = simd_oprsz(desc); |
2d5f950c | 145 | |
85995f07 SG |
146 | for (i = 0; i < oprsz / 16; i++) { |
147 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), | |
148 | int128_makes64(Vk->D(2 * i))); | |
149 | } | |
2d5f950c SG |
150 | } |
151 | ||
152 | DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) | |
153 | DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) | |
154 | DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) | |
155 | ||
85995f07 | 156 | void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 157 | { |
85995f07 | 158 | int i; |
2d5f950c SG |
159 | VReg *Vd = (VReg *)vd; |
160 | VReg *Vj = (VReg *)vj; | |
161 | VReg *Vk = (VReg *)vk; | |
85995f07 | 162 | int oprsz = simd_oprsz(desc); |
2d5f950c | 163 | |
85995f07 SG |
164 | for (i = 0; i < oprsz / 16; i++) { |
165 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), | |
166 | int128_makes64(Vk->D(2 * i +1))); | |
167 | } | |
2d5f950c SG |
168 | } |
169 | ||
170 | DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) | |
171 | DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) | |
172 | DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) | |
173 | ||
85995f07 | 174 | void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 175 | { |
85995f07 | 176 | int i; |
2d5f950c SG |
177 | VReg *Vd = (VReg *)vd; |
178 | VReg *Vj = (VReg *)vj; | |
179 | VReg *Vk = (VReg *)vk; | |
85995f07 | 180 | int oprsz = simd_oprsz(desc); |
2d5f950c | 181 | |
85995f07 SG |
182 | for (i = 0; i < oprsz / 16; i++) { |
183 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), | |
184 | int128_makes64(Vk->D(2 * i))); | |
185 | } | |
2d5f950c SG |
186 | } |
187 | ||
188 | DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) | |
189 | DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) | |
190 | DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) | |
191 | ||
85995f07 | 192 | void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 193 | { |
85995f07 | 194 | int i; |
2d5f950c SG |
195 | VReg *Vd = (VReg *)vd; |
196 | VReg *Vj = (VReg *)vj; | |
197 | VReg *Vk = (VReg *)vk; | |
85995f07 | 198 | int oprsz = simd_oprsz(desc); |
2d5f950c | 199 | |
85995f07 SG |
200 | for (i = 0; i < oprsz / 16; i++) { |
201 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), | |
202 | int128_makes64(Vk->D(2 * i + 1))); | |
203 | } | |
2d5f950c SG |
204 | } |
205 | ||
206 | DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) | |
207 | DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) | |
208 | DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) | |
209 | ||
85995f07 | 210 | void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 211 | { |
85995f07 | 212 | int i; |
2d5f950c SG |
213 | VReg *Vd = (VReg *)vd; |
214 | VReg *Vj = (VReg *)vj; | |
215 | VReg *Vk = (VReg *)vk; | |
85995f07 | 216 | int oprsz = simd_oprsz(desc); |
2d5f950c | 217 | |
85995f07 SG |
218 | for (i = 0; i < oprsz / 16; i++) { |
219 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), | |
220 | int128_make64(Vk->UD(2 * i))); | |
221 | } | |
2d5f950c SG |
222 | } |
223 | ||
224 | DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) | |
225 | DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) | |
226 | DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) | |
227 | ||
85995f07 | 228 | void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 229 | { |
85995f07 | 230 | int i; |
2d5f950c SG |
231 | VReg *Vd = (VReg *)vd; |
232 | VReg *Vj = (VReg *)vj; | |
233 | VReg *Vk = (VReg *)vk; | |
85995f07 | 234 | int oprsz = simd_oprsz(desc); |
2d5f950c | 235 | |
85995f07 SG |
236 | for (i = 0; i < oprsz / 16; i++) { |
237 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | |
238 | int128_make64(Vk->UD(2 * i + 1))); | |
239 | } | |
2d5f950c SG |
240 | } |
241 | ||
242 | DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) | |
243 | DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) | |
244 | DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) | |
245 | ||
85995f07 | 246 | void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 247 | { |
85995f07 | 248 | int i; |
2d5f950c SG |
249 | VReg *Vd = (VReg *)vd; |
250 | VReg *Vj = (VReg *)vj; | |
251 | VReg *Vk = (VReg *)vk; | |
85995f07 | 252 | int oprsz = simd_oprsz(desc); |
2d5f950c | 253 | |
85995f07 SG |
254 | for (i = 0; i < oprsz / 16; i++) { |
255 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), | |
256 | int128_make64(Vk->UD(2 * i))); | |
257 | } | |
2d5f950c SG |
258 | } |
259 | ||
260 | DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) | |
261 | DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) | |
262 | DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) | |
263 | ||
85995f07 | 264 | void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 265 | { |
85995f07 | 266 | int i; |
2d5f950c SG |
267 | VReg *Vd = (VReg *)vd; |
268 | VReg *Vj = (VReg *)vj; | |
269 | VReg *Vk = (VReg *)vk; | |
85995f07 | 270 | int oprsz = simd_oprsz(desc); |
2d5f950c | 271 | |
85995f07 SG |
272 | for (i = 0; i < oprsz / 16; i++) { |
273 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), | |
274 | int128_make64(Vk->UD(2 * i + 1))); | |
275 | } | |
2d5f950c SG |
276 | } |
277 | ||
278 | DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) | |
279 | DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) | |
280 | DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) | |
281 | ||
282 | #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | |
85995f07 | 283 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
2d5f950c SG |
284 | { \ |
285 | int i; \ | |
286 | VReg *Vd = (VReg *)vd; \ | |
287 | VReg *Vj = (VReg *)vj; \ | |
288 | VReg *Vk = (VReg *)vk; \ | |
289 | typedef __typeof(Vd->ES1(0)) TDS; \ | |
290 | typedef __typeof(Vd->EU1(0)) TDU; \ | |
85995f07 SG |
291 | int oprsz = simd_oprsz(desc); \ |
292 | \ | |
293 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
2d5f950c SG |
294 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ |
295 | } \ | |
296 | } | |
297 | ||
298 | #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | |
85995f07 | 299 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
2d5f950c SG |
300 | { \ |
301 | int i; \ | |
302 | VReg *Vd = (VReg *)vd; \ | |
303 | VReg *Vj = (VReg *)vj; \ | |
304 | VReg *Vk = (VReg *)vk; \ | |
305 | typedef __typeof(Vd->ES1(0)) TDS; \ | |
306 | typedef __typeof(Vd->EU1(0)) TDU; \ | |
85995f07 SG |
307 | int oprsz = simd_oprsz(desc); \ |
308 | \ | |
309 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
2d5f950c SG |
310 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ |
311 | } \ | |
312 | } | |
313 | ||
85995f07 | 314 | void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 315 | { |
85995f07 | 316 | int i; |
2d5f950c SG |
317 | VReg *Vd = (VReg *)vd; |
318 | VReg *Vj = (VReg *)vj; | |
319 | VReg *Vk = (VReg *)vk; | |
85995f07 | 320 | int oprsz = simd_oprsz(desc); |
2d5f950c | 321 | |
85995f07 SG |
322 | for (i = 0; i < oprsz / 16; i++) { |
323 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), | |
324 | int128_makes64(Vk->D(2 * i))); | |
325 | } | |
2d5f950c SG |
326 | } |
327 | ||
328 | DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) | |
329 | DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) | |
330 | DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) | |
331 | ||
85995f07 | 332 | void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) |
2d5f950c | 333 | { |
85995f07 | 334 | int i; |
2d5f950c SG |
335 | VReg *Vd = (VReg *)vd; |
336 | VReg *Vj = (VReg *)vj; | |
337 | VReg *Vk = (VReg *)vk; | |
85995f07 | 338 | int oprsz = simd_oprsz(desc); |
2d5f950c | 339 | |
85995f07 SG |
340 | for (i = 0; i < oprsz / 16; i++) { |
341 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | |
342 | int128_makes64(Vk->D(2 * i + 1))); | |
343 | } | |
2d5f950c SG |
344 | } |
345 | ||
346 | DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) | |
347 | DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) | |
348 | DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) | |
39e9b0a7 SG |
349 | |
350 | #define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) | |
351 | #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) | |
352 | ||
ee7250d0 SG |
353 | #define DO_3OP(NAME, BIT, E, DO_OP) \ |
354 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
355 | { \ | |
356 | int i; \ | |
357 | VReg *Vd = (VReg *)vd; \ | |
358 | VReg *Vj = (VReg *)vj; \ | |
359 | VReg *Vk = (VReg *)vk; \ | |
360 | int oprsz = simd_oprsz(desc); \ | |
361 | \ | |
362 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
363 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | |
364 | } \ | |
39e9b0a7 SG |
365 | } |
366 | ||
367 | DO_3OP(vavg_b, 8, B, DO_VAVG) | |
368 | DO_3OP(vavg_h, 16, H, DO_VAVG) | |
369 | DO_3OP(vavg_w, 32, W, DO_VAVG) | |
370 | DO_3OP(vavg_d, 64, D, DO_VAVG) | |
371 | DO_3OP(vavgr_b, 8, B, DO_VAVGR) | |
372 | DO_3OP(vavgr_h, 16, H, DO_VAVGR) | |
373 | DO_3OP(vavgr_w, 32, W, DO_VAVGR) | |
374 | DO_3OP(vavgr_d, 64, D, DO_VAVGR) | |
375 | DO_3OP(vavg_bu, 8, UB, DO_VAVG) | |
376 | DO_3OP(vavg_hu, 16, UH, DO_VAVG) | |
377 | DO_3OP(vavg_wu, 32, UW, DO_VAVG) | |
378 | DO_3OP(vavg_du, 64, UD, DO_VAVG) | |
379 | DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) | |
380 | DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) | |
381 | DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) | |
382 | DO_3OP(vavgr_du, 64, UD, DO_VAVGR) | |
49725659 SG |
383 | |
384 | #define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) | |
385 | ||
386 | DO_3OP(vabsd_b, 8, B, DO_VABSD) | |
387 | DO_3OP(vabsd_h, 16, H, DO_VABSD) | |
388 | DO_3OP(vabsd_w, 32, W, DO_VABSD) | |
389 | DO_3OP(vabsd_d, 64, D, DO_VABSD) | |
390 | DO_3OP(vabsd_bu, 8, UB, DO_VABSD) | |
391 | DO_3OP(vabsd_hu, 16, UH, DO_VABSD) | |
392 | DO_3OP(vabsd_wu, 32, UW, DO_VABSD) | |
393 | DO_3OP(vabsd_du, 64, UD, DO_VABSD) | |
af448cb3 SG |
394 | |
395 | #define DO_VABS(a) ((a < 0) ? (-a) : (a)) | |
396 | ||
27f5485d SG |
397 | #define DO_VADDA(NAME, BIT, E) \ |
398 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
399 | { \ | |
400 | int i; \ | |
401 | VReg *Vd = (VReg *)vd; \ | |
402 | VReg *Vj = (VReg *)vj; \ | |
403 | VReg *Vk = (VReg *)vk; \ | |
404 | int oprsz = simd_oprsz(desc); \ | |
405 | \ | |
406 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
407 | Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ | |
408 | } \ | |
af448cb3 SG |
409 | } |
410 | ||
27f5485d SG |
411 | DO_VADDA(vadda_b, 8, B) |
412 | DO_VADDA(vadda_h, 16, H) | |
413 | DO_VADDA(vadda_w, 32, W) | |
414 | DO_VADDA(vadda_d, 64, D) | |
9ab29520 SG |
415 | |
416 | #define DO_MIN(a, b) (a < b ? a : b) | |
417 | #define DO_MAX(a, b) (a > b ? a : b) | |
418 | ||
c09360fa SG |
419 | #define VMINMAXI(NAME, BIT, E, DO_OP) \ |
420 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
421 | { \ | |
422 | int i; \ | |
423 | VReg *Vd = (VReg *)vd; \ | |
424 | VReg *Vj = (VReg *)vj; \ | |
425 | typedef __typeof(Vd->E(0)) TD; \ | |
426 | int oprsz = simd_oprsz(desc); \ | |
427 | \ | |
428 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
429 | Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | |
430 | } \ | |
9ab29520 SG |
431 | } |
432 | ||
433 | VMINMAXI(vmini_b, 8, B, DO_MIN) | |
434 | VMINMAXI(vmini_h, 16, H, DO_MIN) | |
435 | VMINMAXI(vmini_w, 32, W, DO_MIN) | |
436 | VMINMAXI(vmini_d, 64, D, DO_MIN) | |
437 | VMINMAXI(vmaxi_b, 8, B, DO_MAX) | |
438 | VMINMAXI(vmaxi_h, 16, H, DO_MAX) | |
439 | VMINMAXI(vmaxi_w, 32, W, DO_MAX) | |
440 | VMINMAXI(vmaxi_d, 64, D, DO_MAX) | |
441 | VMINMAXI(vmini_bu, 8, UB, DO_MIN) | |
442 | VMINMAXI(vmini_hu, 16, UH, DO_MIN) | |
443 | VMINMAXI(vmini_wu, 32, UW, DO_MIN) | |
444 | VMINMAXI(vmini_du, 64, UD, DO_MIN) | |
445 | VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) | |
446 | VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) | |
447 | VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) | |
448 | VMINMAXI(vmaxi_du, 64, UD, DO_MAX) | |
cd1c49ad | 449 | |
342dc1cf SG |
450 | #define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ |
451 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
452 | { \ | |
453 | int i; \ | |
454 | VReg *Vd = (VReg *)vd; \ | |
455 | VReg *Vj = (VReg *)vj; \ | |
456 | VReg *Vk = (VReg *)vk; \ | |
457 | typedef __typeof(Vd->E1(0)) T; \ | |
458 | int oprsz = simd_oprsz(desc); \ | |
459 | \ | |
460 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
461 | Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ | |
462 | } \ | |
cd1c49ad SG |
463 | } |
464 | ||
342dc1cf | 465 | void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) |
cd1c49ad | 466 | { |
342dc1cf SG |
467 | int i; |
468 | uint64_t l, h; | |
cd1c49ad SG |
469 | VReg *Vd = (VReg *)vd; |
470 | VReg *Vj = (VReg *)vj; | |
471 | VReg *Vk = (VReg *)vk; | |
342dc1cf | 472 | int oprsz = simd_oprsz(desc); |
cd1c49ad | 473 | |
342dc1cf SG |
474 | for (i = 0; i < oprsz / 8; i++) { |
475 | muls64(&l, &h, Vj->D(i), Vk->D(i)); | |
476 | Vd->D(i) = h; | |
477 | } | |
cd1c49ad SG |
478 | } |
479 | ||
480 | DO_VMUH(vmuh_b, 8, H, B, DO_MUH) | |
481 | DO_VMUH(vmuh_h, 16, W, H, DO_MUH) | |
482 | DO_VMUH(vmuh_w, 32, D, W, DO_MUH) | |
483 | ||
342dc1cf | 484 | void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) |
cd1c49ad | 485 | { |
342dc1cf SG |
486 | int i; |
487 | uint64_t l, h; | |
cd1c49ad SG |
488 | VReg *Vd = (VReg *)vd; |
489 | VReg *Vj = (VReg *)vj; | |
490 | VReg *Vk = (VReg *)vk; | |
342dc1cf | 491 | int oprsz = simd_oprsz(desc); |
cd1c49ad | 492 | |
342dc1cf SG |
493 | for (i = 0; i < oprsz / 8; i++) { |
494 | mulu64(&l, &h, Vj->D(i), Vk->D(i)); | |
495 | Vd->D(i) = h; | |
496 | } | |
cd1c49ad SG |
497 | } |
498 | ||
499 | DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) | |
500 | DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) | |
501 | DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) | |
502 | ||
503 | #define DO_MUL(a, b) (a * b) | |
504 | ||
505 | DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) | |
506 | DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) | |
507 | DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) | |
508 | ||
509 | DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) | |
510 | DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) | |
511 | DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) | |
512 | ||
513 | DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) | |
514 | DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) | |
515 | DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) | |
516 | ||
517 | DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) | |
518 | DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) | |
519 | DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) | |
520 | ||
521 | DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) | |
522 | DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) | |
523 | DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | |
524 | ||
525 | DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | |
526 | DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | |
527 | DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | |
d3aec65b SG |
528 | |
529 | #define DO_MADD(a, b, c) (a + b * c) | |
530 | #define DO_MSUB(a, b, c) (a - b * c) | |
531 | ||
3f450c17 SG |
532 | #define VMADDSUB(NAME, BIT, E, DO_OP) \ |
533 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
534 | { \ | |
535 | int i; \ | |
536 | VReg *Vd = (VReg *)vd; \ | |
537 | VReg *Vj = (VReg *)vj; \ | |
538 | VReg *Vk = (VReg *)vk; \ | |
539 | int oprsz = simd_oprsz(desc); \ | |
540 | \ | |
541 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
542 | Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ | |
543 | } \ | |
d3aec65b SG |
544 | } |
545 | ||
546 | VMADDSUB(vmadd_b, 8, B, DO_MADD) | |
547 | VMADDSUB(vmadd_h, 16, H, DO_MADD) | |
548 | VMADDSUB(vmadd_w, 32, W, DO_MADD) | |
549 | VMADDSUB(vmadd_d, 64, D, DO_MADD) | |
550 | VMADDSUB(vmsub_b, 8, B, DO_MSUB) | |
551 | VMADDSUB(vmsub_h, 16, H, DO_MSUB) | |
552 | VMADDSUB(vmsub_w, 32, W, DO_MSUB) | |
553 | VMADDSUB(vmsub_d, 64, D, DO_MSUB) | |
554 | ||
555 | #define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ | |
3f450c17 | 556 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
d3aec65b SG |
557 | { \ |
558 | int i; \ | |
559 | VReg *Vd = (VReg *)vd; \ | |
560 | VReg *Vj = (VReg *)vj; \ | |
561 | VReg *Vk = (VReg *)vk; \ | |
562 | typedef __typeof(Vd->E1(0)) TD; \ | |
3f450c17 | 563 | int oprsz = simd_oprsz(desc); \ |
d3aec65b | 564 | \ |
3f450c17 | 565 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
d3aec65b SG |
566 | Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ |
567 | } \ | |
568 | } | |
569 | ||
570 | VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) | |
571 | VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) | |
572 | VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) | |
573 | VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) | |
574 | VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) | |
575 | VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) | |
576 | ||
3f450c17 SG |
577 | #define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ |
578 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
579 | { \ | |
580 | int i; \ | |
581 | VReg *Vd = (VReg *)vd; \ | |
582 | VReg *Vj = (VReg *)vj; \ | |
583 | VReg *Vk = (VReg *)vk; \ | |
584 | typedef __typeof(Vd->E1(0)) TD; \ | |
585 | int oprsz = simd_oprsz(desc); \ | |
586 | \ | |
587 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
588 | Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ | |
589 | (TD)Vk->E2(2 * i + 1)); \ | |
590 | } \ | |
d3aec65b SG |
591 | } |
592 | ||
593 | VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) | |
594 | VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) | |
595 | VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) | |
596 | VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) | |
597 | VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) | |
598 | VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) | |
599 | ||
3f450c17 SG |
600 | #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
601 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
602 | { \ | |
603 | int i; \ | |
604 | VReg *Vd = (VReg *)vd; \ | |
605 | VReg *Vj = (VReg *)vj; \ | |
606 | VReg *Vk = (VReg *)vk; \ | |
607 | typedef __typeof(Vd->ES1(0)) TS1; \ | |
608 | typedef __typeof(Vd->EU1(0)) TU1; \ | |
609 | int oprsz = simd_oprsz(desc); \ | |
610 | \ | |
611 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
612 | Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ | |
613 | (TS1)Vk->ES2(2 * i)); \ | |
614 | } \ | |
d3aec65b SG |
615 | } |
616 | ||
617 | VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) | |
618 | VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) | |
619 | VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | |
620 | ||
3f450c17 SG |
621 | #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
622 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
623 | { \ | |
624 | int i; \ | |
625 | VReg *Vd = (VReg *)vd; \ | |
626 | VReg *Vj = (VReg *)vj; \ | |
627 | VReg *Vk = (VReg *)vk; \ | |
628 | typedef __typeof(Vd->ES1(0)) TS1; \ | |
629 | typedef __typeof(Vd->EU1(0)) TU1; \ | |
630 | int oprsz = simd_oprsz(desc); \ | |
631 | \ | |
632 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
633 | Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ | |
634 | (TS1)Vk->ES2(2 * i + 1)); \ | |
635 | } \ | |
d3aec65b SG |
636 | } |
637 | ||
638 | VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | |
639 | VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | |
640 | VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | |
4cc4c0f7 SG |
641 | |
642 | #define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) | |
643 | #define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) | |
644 | #define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ | |
645 | unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) | |
646 | #define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ | |
647 | unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | |
648 | ||
04711da1 SG |
649 | #define VDIV(NAME, BIT, E, DO_OP) \ |
650 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
651 | { \ | |
652 | int i; \ | |
653 | VReg *Vd = (VReg *)vd; \ | |
654 | VReg *Vj = (VReg *)vj; \ | |
655 | VReg *Vk = (VReg *)vk; \ | |
abb693de SG |
656 | int oprsz = simd_oprsz(desc); \ |
657 | \ | |
658 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
04711da1 SG |
659 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ |
660 | } \ | |
4cc4c0f7 SG |
661 | } |
662 | ||
663 | VDIV(vdiv_b, 8, B, DO_DIV) | |
664 | VDIV(vdiv_h, 16, H, DO_DIV) | |
665 | VDIV(vdiv_w, 32, W, DO_DIV) | |
666 | VDIV(vdiv_d, 64, D, DO_DIV) | |
667 | VDIV(vdiv_bu, 8, UB, DO_DIVU) | |
668 | VDIV(vdiv_hu, 16, UH, DO_DIVU) | |
669 | VDIV(vdiv_wu, 32, UW, DO_DIVU) | |
670 | VDIV(vdiv_du, 64, UD, DO_DIVU) | |
671 | VDIV(vmod_b, 8, B, DO_REM) | |
672 | VDIV(vmod_h, 16, H, DO_REM) | |
673 | VDIV(vmod_w, 32, W, DO_REM) | |
674 | VDIV(vmod_d, 64, D, DO_REM) | |
675 | VDIV(vmod_bu, 8, UB, DO_REMU) | |
676 | VDIV(vmod_hu, 16, UH, DO_REMU) | |
677 | VDIV(vmod_wu, 32, UW, DO_REMU) | |
678 | VDIV(vmod_du, 64, UD, DO_REMU) | |
cbe44190 | 679 | |
e5c7f031 SG |
680 | #define VSAT_S(NAME, BIT, E) \ |
681 | void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ | |
682 | { \ | |
683 | int i; \ | |
684 | VReg *Vd = (VReg *)vd; \ | |
685 | VReg *Vj = (VReg *)vj; \ | |
686 | typedef __typeof(Vd->E(0)) TD; \ | |
687 | int oprsz = simd_oprsz(desc); \ | |
688 | \ | |
689 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
690 | Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ | |
691 | Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ | |
692 | } \ | |
cbe44190 SG |
693 | } |
694 | ||
695 | VSAT_S(vsat_b, 8, B) | |
696 | VSAT_S(vsat_h, 16, H) | |
697 | VSAT_S(vsat_w, 32, W) | |
698 | VSAT_S(vsat_d, 64, D) | |
699 | ||
e5c7f031 SG |
700 | #define VSAT_U(NAME, BIT, E) \ |
701 | void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ | |
702 | { \ | |
703 | int i; \ | |
704 | VReg *Vd = (VReg *)vd; \ | |
705 | VReg *Vj = (VReg *)vj; \ | |
706 | typedef __typeof(Vd->E(0)) TD; \ | |
707 | int oprsz = simd_oprsz(desc); \ | |
708 | \ | |
709 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
710 | Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ | |
711 | } \ | |
cbe44190 SG |
712 | } |
713 | ||
714 | VSAT_U(vsat_bu, 8, UB) | |
715 | VSAT_U(vsat_hu, 16, UH) | |
716 | VSAT_U(vsat_wu, 32, UW) | |
717 | VSAT_U(vsat_du, 64, UD) | |
3734ad93 | 718 | |
f0db0beb SG |
719 | #define VEXTH(NAME, BIT, E1, E2) \ |
720 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | |
721 | { \ | |
722 | int i, j, ofs; \ | |
723 | VReg *Vd = (VReg *)vd; \ | |
724 | VReg *Vj = (VReg *)vj; \ | |
725 | int oprsz = simd_oprsz(desc); \ | |
726 | \ | |
727 | ofs = LSX_LEN / BIT; \ | |
728 | for (i = 0; i < oprsz / 16; i++) { \ | |
729 | for (j = 0; j < ofs; j++) { \ | |
730 | Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \ | |
731 | } \ | |
732 | } \ | |
ff27e335 SG |
733 | } |
734 | ||
735 | void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) | |
3734ad93 | 736 | { |
f0db0beb | 737 | int i; |
ff27e335 SG |
738 | VReg *Vd = (VReg *)vd; |
739 | VReg *Vj = (VReg *)vj; | |
f0db0beb | 740 | int oprsz = simd_oprsz(desc); |
3734ad93 | 741 | |
f0db0beb SG |
742 | for (i = 0; i < oprsz / 16; i++) { |
743 | Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); | |
744 | } | |
3734ad93 SG |
745 | } |
746 | ||
ff27e335 | 747 | void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) |
3734ad93 | 748 | { |
f0db0beb | 749 | int i; |
ff27e335 SG |
750 | VReg *Vd = (VReg *)vd; |
751 | VReg *Vj = (VReg *)vj; | |
f0db0beb | 752 | int oprsz = simd_oprsz(desc); |
3734ad93 | 753 | |
f0db0beb SG |
754 | for (i = 0; i < oprsz / 16; i++) { |
755 | Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); | |
756 | } | |
3734ad93 SG |
757 | } |
758 | ||
759 | VEXTH(vexth_h_b, 16, H, B) | |
760 | VEXTH(vexth_w_h, 32, W, H) | |
761 | VEXTH(vexth_d_w, 64, D, W) | |
762 | VEXTH(vexth_hu_bu, 16, UH, UB) | |
763 | VEXTH(vexth_wu_hu, 32, UW, UH) | |
764 | VEXTH(vexth_du_wu, 64, UD, UW) | |
f0e395df | 765 | |
790acb2a SG |
766 | #define VEXT2XV(NAME, BIT, E1, E2) \ |
767 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | |
768 | { \ | |
769 | int i; \ | |
770 | VReg temp = {}; \ | |
771 | VReg *Vd = (VReg *)vd; \ | |
772 | VReg *Vj = (VReg *)vj; \ | |
773 | int oprsz = simd_oprsz(desc); \ | |
774 | \ | |
775 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ | |
776 | temp.E1(i) = Vj->E2(i); \ | |
777 | } \ | |
778 | *Vd = temp; \ | |
779 | } | |
780 | ||
781 | VEXT2XV(vext2xv_h_b, 16, H, B) | |
782 | VEXT2XV(vext2xv_w_b, 32, W, B) | |
783 | VEXT2XV(vext2xv_d_b, 64, D, B) | |
784 | VEXT2XV(vext2xv_w_h, 32, W, H) | |
785 | VEXT2XV(vext2xv_d_h, 64, D, H) | |
786 | VEXT2XV(vext2xv_d_w, 64, D, W) | |
787 | VEXT2XV(vext2xv_hu_bu, 16, UH, UB) | |
788 | VEXT2XV(vext2xv_wu_bu, 32, UW, UB) | |
789 | VEXT2XV(vext2xv_du_bu, 64, UD, UB) | |
790 | VEXT2XV(vext2xv_wu_hu, 32, UW, UH) | |
791 | VEXT2XV(vext2xv_du_hu, 64, UD, UH) | |
792 | VEXT2XV(vext2xv_du_wu, 64, UD, UW) | |
793 | ||
f0e395df SG |
794 | #define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) |
795 | ||
796 | DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) | |
797 | DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) | |
798 | DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) | |
799 | DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) | |
789f4a4c SG |
800 | |
801 | static uint64_t do_vmskltz_b(int64_t val) | |
802 | { | |
803 | uint64_t m = 0x8080808080808080ULL; | |
804 | uint64_t c = val & m; | |
805 | c |= c << 7; | |
806 | c |= c << 14; | |
807 | c |= c << 28; | |
808 | return c >> 56; | |
809 | } | |
810 | ||
ff27e335 | 811 | void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 812 | { |
97074674 | 813 | int i; |
789f4a4c | 814 | uint16_t temp = 0; |
ff27e335 SG |
815 | VReg *Vd = (VReg *)vd; |
816 | VReg *Vj = (VReg *)vj; | |
97074674 | 817 | int oprsz = simd_oprsz(desc); |
789f4a4c | 818 | |
97074674 SG |
819 | for (i = 0; i < oprsz / 16; i++) { |
820 | temp = 0; | |
821 | temp = do_vmskltz_b(Vj->D(2 * i)); | |
822 | temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); | |
823 | Vd->D(2 * i) = temp; | |
824 | Vd->D(2 * i + 1) = 0; | |
825 | } | |
789f4a4c SG |
826 | } |
827 | ||
828 | static uint64_t do_vmskltz_h(int64_t val) | |
829 | { | |
830 | uint64_t m = 0x8000800080008000ULL; | |
831 | uint64_t c = val & m; | |
832 | c |= c << 15; | |
833 | c |= c << 30; | |
834 | return c >> 60; | |
835 | } | |
836 | ||
ff27e335 | 837 | void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 838 | { |
97074674 | 839 | int i; |
789f4a4c | 840 | uint16_t temp = 0; |
ff27e335 SG |
841 | VReg *Vd = (VReg *)vd; |
842 | VReg *Vj = (VReg *)vj; | |
97074674 | 843 | int oprsz = simd_oprsz(desc); |
789f4a4c | 844 | |
97074674 SG |
845 | for (i = 0; i < oprsz / 16; i++) { |
846 | temp = 0; | |
847 | temp = do_vmskltz_h(Vj->D(2 * i)); | |
848 | temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); | |
849 | Vd->D(2 * i) = temp; | |
850 | Vd->D(2 * i + 1) = 0; | |
851 | } | |
789f4a4c SG |
852 | } |
853 | ||
854 | static uint64_t do_vmskltz_w(int64_t val) | |
855 | { | |
856 | uint64_t m = 0x8000000080000000ULL; | |
857 | uint64_t c = val & m; | |
858 | c |= c << 31; | |
859 | return c >> 62; | |
860 | } | |
861 | ||
ff27e335 | 862 | void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 863 | { |
97074674 | 864 | int i; |
789f4a4c | 865 | uint16_t temp = 0; |
ff27e335 SG |
866 | VReg *Vd = (VReg *)vd; |
867 | VReg *Vj = (VReg *)vj; | |
97074674 | 868 | int oprsz = simd_oprsz(desc); |
789f4a4c | 869 | |
97074674 SG |
870 | for (i = 0; i < oprsz / 16; i++) { |
871 | temp = 0; | |
872 | temp = do_vmskltz_w(Vj->D(2 * i)); | |
873 | temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); | |
874 | Vd->D(2 * i) = temp; | |
875 | Vd->D(2 * i + 1) = 0; | |
876 | } | |
789f4a4c SG |
877 | } |
878 | ||
879 | static uint64_t do_vmskltz_d(int64_t val) | |
880 | { | |
881 | return (uint64_t)val >> 63; | |
882 | } | |
ff27e335 | 883 | void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 884 | { |
97074674 | 885 | int i; |
789f4a4c | 886 | uint16_t temp = 0; |
ff27e335 SG |
887 | VReg *Vd = (VReg *)vd; |
888 | VReg *Vj = (VReg *)vj; | |
97074674 | 889 | int oprsz = simd_oprsz(desc); |
789f4a4c | 890 | |
97074674 SG |
891 | for (i = 0; i < oprsz / 16; i++) { |
892 | temp = 0; | |
893 | temp = do_vmskltz_d(Vj->D(2 * i)); | |
894 | temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); | |
895 | Vd->D(2 * i) = temp; | |
896 | Vd->D(2 * i + 1) = 0; | |
897 | } | |
789f4a4c SG |
898 | } |
899 | ||
ff27e335 | 900 | void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 901 | { |
97074674 | 902 | int i; |
789f4a4c | 903 | uint16_t temp = 0; |
ff27e335 SG |
904 | VReg *Vd = (VReg *)vd; |
905 | VReg *Vj = (VReg *)vj; | |
97074674 | 906 | int oprsz = simd_oprsz(desc); |
789f4a4c | 907 | |
97074674 SG |
908 | for (i = 0; i < oprsz / 16; i++) { |
909 | temp = 0; | |
910 | temp = do_vmskltz_b(Vj->D(2 * i)); | |
911 | temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); | |
912 | Vd->D(2 * i) = (uint16_t)(~temp); | |
913 | Vd->D(2 * i + 1) = 0; | |
914 | } | |
789f4a4c SG |
915 | } |
916 | ||
917 | static uint64_t do_vmskez_b(uint64_t a) | |
918 | { | |
919 | uint64_t m = 0x7f7f7f7f7f7f7f7fULL; | |
920 | uint64_t c = ~(((a & m) + m) | a | m); | |
921 | c |= c << 7; | |
922 | c |= c << 14; | |
923 | c |= c << 28; | |
924 | return c >> 56; | |
925 | } | |
926 | ||
ff27e335 | 927 | void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) |
789f4a4c | 928 | { |
97074674 | 929 | int i; |
789f4a4c | 930 | uint16_t temp = 0; |
ff27e335 SG |
931 | VReg *Vd = (VReg *)vd; |
932 | VReg *Vj = (VReg *)vj; | |
97074674 | 933 | int oprsz = simd_oprsz(desc); |
789f4a4c | 934 | |
97074674 SG |
935 | for (i = 0; i < oprsz / 16; i++) { |
936 | temp = 0; | |
937 | temp = do_vmskez_b(Vj->D(2 * i)); | |
938 | temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); | |
939 | Vd->D(2 * i) = (uint16_t)(~temp); | |
940 | Vd->D(2 * i + 1) = 0; | |
941 | } | |
789f4a4c | 942 | } |
f205a539 | 943 | |
4472a45a | 944 | void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
f205a539 SG |
945 | { |
946 | int i; | |
947 | VReg *Vd = (VReg *)vd; | |
948 | VReg *Vj = (VReg *)vj; | |
949 | ||
4472a45a | 950 | for (i = 0; i < simd_oprsz(desc); i++) { |
f205a539 SG |
951 | Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); |
952 | } | |
953 | } | |
9b21a7a5 | 954 | |
329517d5 SG |
955 | #define VSLLWIL(NAME, BIT, E1, E2) \ |
956 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
957 | { \ | |
958 | int i; \ | |
959 | VReg temp; \ | |
960 | VReg *Vd = (VReg *)vd; \ | |
961 | VReg *Vj = (VReg *)vj; \ | |
962 | typedef __typeof(temp.E1(0)) TD; \ | |
963 | \ | |
964 | temp.D(0) = 0; \ | |
965 | temp.D(1) = 0; \ | |
966 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
967 | temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ | |
968 | } \ | |
969 | *Vd = temp; \ | |
9b21a7a5 SG |
970 | } |
971 | ||
ff27e335 | 972 | void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) |
9b21a7a5 | 973 | { |
ff27e335 SG |
974 | VReg *Vd = (VReg *)vd; |
975 | VReg *Vj = (VReg *)vj; | |
9b21a7a5 SG |
976 | |
977 | Vd->Q(0) = int128_makes64(Vj->D(0)); | |
978 | } | |
979 | ||
ff27e335 | 980 | void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) |
9b21a7a5 | 981 | { |
ff27e335 SG |
982 | VReg *Vd = (VReg *)vd; |
983 | VReg *Vj = (VReg *)vj; | |
9b21a7a5 SG |
984 | |
985 | Vd->Q(0) = int128_make64(Vj->D(0)); | |
986 | } | |
987 | ||
988 | VSLLWIL(vsllwil_h_b, 16, H, B) | |
989 | VSLLWIL(vsllwil_w_h, 32, W, H) | |
990 | VSLLWIL(vsllwil_d_w, 64, D, W) | |
991 | VSLLWIL(vsllwil_hu_bu, 16, UH, UB) | |
992 | VSLLWIL(vsllwil_wu_hu, 32, UW, UH) | |
993 | VSLLWIL(vsllwil_du_wu, 64, UD, UW) | |
ecb93716 SG |
994 | |
995 | #define do_vsrlr(E, T) \ | |
996 | static T do_vsrlr_ ##E(T s1, int sh) \ | |
997 | { \ | |
998 | if (sh == 0) { \ | |
999 | return s1; \ | |
1000 | } else { \ | |
1001 | return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ | |
1002 | } \ | |
1003 | } | |
1004 | ||
1005 | do_vsrlr(B, uint8_t) | |
1006 | do_vsrlr(H, uint16_t) | |
1007 | do_vsrlr(W, uint32_t) | |
1008 | do_vsrlr(D, uint64_t) | |
1009 | ||
1010 | #define VSRLR(NAME, BIT, T, E) \ | |
04711da1 | 1011 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
ecb93716 SG |
1012 | { \ |
1013 | int i; \ | |
04711da1 SG |
1014 | VReg *Vd = (VReg *)vd; \ |
1015 | VReg *Vj = (VReg *)vj; \ | |
1016 | VReg *Vk = (VReg *)vk; \ | |
ecb93716 SG |
1017 | \ |
1018 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1019 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | |
1020 | } \ | |
1021 | } | |
1022 | ||
1023 | VSRLR(vsrlr_b, 8, uint8_t, B) | |
1024 | VSRLR(vsrlr_h, 16, uint16_t, H) | |
1025 | VSRLR(vsrlr_w, 32, uint32_t, W) | |
1026 | VSRLR(vsrlr_d, 64, uint64_t, D) | |
1027 | ||
329517d5 SG |
1028 | #define VSRLRI(NAME, BIT, E) \ |
1029 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1030 | { \ | |
1031 | int i; \ | |
1032 | VReg *Vd = (VReg *)vd; \ | |
1033 | VReg *Vj = (VReg *)vj; \ | |
1034 | \ | |
1035 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1036 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ | |
1037 | } \ | |
ecb93716 SG |
1038 | } |
1039 | ||
1040 | VSRLRI(vsrlri_b, 8, B) | |
1041 | VSRLRI(vsrlri_h, 16, H) | |
1042 | VSRLRI(vsrlri_w, 32, W) | |
1043 | VSRLRI(vsrlri_d, 64, D) | |
1044 | ||
1045 | #define do_vsrar(E, T) \ | |
1046 | static T do_vsrar_ ##E(T s1, int sh) \ | |
1047 | { \ | |
1048 | if (sh == 0) { \ | |
1049 | return s1; \ | |
1050 | } else { \ | |
1051 | return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ | |
1052 | } \ | |
1053 | } | |
1054 | ||
1055 | do_vsrar(B, int8_t) | |
1056 | do_vsrar(H, int16_t) | |
1057 | do_vsrar(W, int32_t) | |
1058 | do_vsrar(D, int64_t) | |
1059 | ||
1060 | #define VSRAR(NAME, BIT, T, E) \ | |
04711da1 | 1061 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
ecb93716 SG |
1062 | { \ |
1063 | int i; \ | |
04711da1 SG |
1064 | VReg *Vd = (VReg *)vd; \ |
1065 | VReg *Vj = (VReg *)vj; \ | |
1066 | VReg *Vk = (VReg *)vk; \ | |
ecb93716 SG |
1067 | \ |
1068 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1069 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | |
1070 | } \ | |
1071 | } | |
1072 | ||
1073 | VSRAR(vsrar_b, 8, uint8_t, B) | |
1074 | VSRAR(vsrar_h, 16, uint16_t, H) | |
1075 | VSRAR(vsrar_w, 32, uint32_t, W) | |
1076 | VSRAR(vsrar_d, 64, uint64_t, D) | |
1077 | ||
329517d5 SG |
1078 | #define VSRARI(NAME, BIT, E) \ |
1079 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1080 | { \ | |
1081 | int i; \ | |
1082 | VReg *Vd = (VReg *)vd; \ | |
1083 | VReg *Vj = (VReg *)vj; \ | |
1084 | \ | |
1085 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1086 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ | |
1087 | } \ | |
ecb93716 SG |
1088 | } |
1089 | ||
1090 | VSRARI(vsrari_b, 8, B) | |
1091 | VSRARI(vsrari_h, 16, H) | |
1092 | VSRARI(vsrari_w, 32, W) | |
1093 | VSRARI(vsrari_d, 64, D) | |
d79fb8dd SG |
1094 | |
1095 | #define R_SHIFT(a, b) (a >> b) | |
1096 | ||
1097 | #define VSRLN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1098 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
d79fb8dd SG |
1099 | { \ |
1100 | int i; \ | |
04711da1 SG |
1101 | VReg *Vd = (VReg *)vd; \ |
1102 | VReg *Vj = (VReg *)vj; \ | |
1103 | VReg *Vk = (VReg *)vk; \ | |
d79fb8dd SG |
1104 | \ |
1105 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1106 | Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ | |
1107 | } \ | |
1108 | Vd->D(1) = 0; \ | |
1109 | } | |
1110 | ||
1111 | VSRLN(vsrln_b_h, 16, uint16_t, B, H) | |
1112 | VSRLN(vsrln_h_w, 32, uint32_t, H, W) | |
1113 | VSRLN(vsrln_w_d, 64, uint64_t, W, D) | |
1114 | ||
04711da1 SG |
1115 | #define VSRAN(NAME, BIT, T, E1, E2) \ |
1116 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
1117 | { \ | |
1118 | int i; \ | |
1119 | VReg *Vd = (VReg *)vd; \ | |
1120 | VReg *Vj = (VReg *)vj; \ | |
1121 | VReg *Vk = (VReg *)vk; \ | |
1122 | \ | |
1123 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1124 | Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ | |
1125 | } \ | |
1126 | Vd->D(1) = 0; \ | |
d79fb8dd SG |
1127 | } |
1128 | ||
1129 | VSRAN(vsran_b_h, 16, uint16_t, B, H) | |
1130 | VSRAN(vsran_h_w, 32, uint32_t, H, W) | |
1131 | VSRAN(vsran_w_d, 64, uint64_t, W, D) | |
1132 | ||
329517d5 SG |
1133 | #define VSRLNI(NAME, BIT, T, E1, E2) \ |
1134 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1135 | { \ | |
1136 | int i, max; \ | |
1137 | VReg temp; \ | |
1138 | VReg *Vd = (VReg *)vd; \ | |
1139 | VReg *Vj = (VReg *)vj; \ | |
1140 | \ | |
1141 | temp.D(0) = 0; \ | |
1142 | temp.D(1) = 0; \ | |
1143 | max = LSX_LEN/BIT; \ | |
1144 | for (i = 0; i < max; i++) { \ | |
1145 | temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ | |
1146 | temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ | |
1147 | } \ | |
1148 | *Vd = temp; \ | |
1149 | } | |
1150 | ||
1151 | void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | |
d79fb8dd SG |
1152 | { |
1153 | VReg temp; | |
329517d5 SG |
1154 | VReg *Vd = (VReg *)vd; |
1155 | VReg *Vj = (VReg *)vj; | |
d79fb8dd SG |
1156 | |
1157 | temp.D(0) = 0; | |
1158 | temp.D(1) = 0; | |
1159 | temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128)); | |
1160 | temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128)); | |
1161 | *Vd = temp; | |
1162 | } | |
1163 | ||
1164 | VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) | |
1165 | VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) | |
1166 | VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) | |
1167 | ||
329517d5 SG |
1168 | #define VSRANI(NAME, BIT, E1, E2) \ |
1169 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1170 | { \ | |
1171 | int i, max; \ | |
1172 | VReg temp; \ | |
1173 | VReg *Vd = (VReg *)vd; \ | |
1174 | VReg *Vj = (VReg *)vj; \ | |
1175 | \ | |
1176 | temp.D(0) = 0; \ | |
1177 | temp.D(1) = 0; \ | |
1178 | max = LSX_LEN/BIT; \ | |
1179 | for (i = 0; i < max; i++) { \ | |
1180 | temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ | |
1181 | temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ | |
1182 | } \ | |
1183 | *Vd = temp; \ | |
1184 | } | |
1185 | ||
1186 | void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | |
d79fb8dd SG |
1187 | { |
1188 | VReg temp; | |
329517d5 SG |
1189 | VReg *Vd = (VReg *)vd; |
1190 | VReg *Vj = (VReg *)vj; | |
d79fb8dd SG |
1191 | |
1192 | temp.D(0) = 0; | |
1193 | temp.D(1) = 0; | |
1194 | temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128)); | |
1195 | temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128)); | |
1196 | *Vd = temp; | |
1197 | } | |
1198 | ||
1199 | VSRANI(vsrani_b_h, 16, B, H) | |
1200 | VSRANI(vsrani_h_w, 32, H, W) | |
1201 | VSRANI(vsrani_w_d, 64, W, D) | |
a5200a17 SG |
1202 | |
1203 | #define VSRLRN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1204 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
a5200a17 SG |
1205 | { \ |
1206 | int i; \ | |
04711da1 SG |
1207 | VReg *Vd = (VReg *)vd; \ |
1208 | VReg *Vj = (VReg *)vj; \ | |
1209 | VReg *Vk = (VReg *)vk; \ | |
a5200a17 SG |
1210 | \ |
1211 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1212 | Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | |
1213 | } \ | |
1214 | Vd->D(1) = 0; \ | |
1215 | } | |
1216 | ||
1217 | VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H) | |
1218 | VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) | |
1219 | VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) | |
1220 | ||
1221 | #define VSRARN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1222 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
a5200a17 SG |
1223 | { \ |
1224 | int i; \ | |
04711da1 SG |
1225 | VReg *Vd = (VReg *)vd; \ |
1226 | VReg *Vj = (VReg *)vj; \ | |
1227 | VReg *Vk = (VReg *)vk; \ | |
a5200a17 SG |
1228 | \ |
1229 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1230 | Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | |
1231 | } \ | |
1232 | Vd->D(1) = 0; \ | |
1233 | } | |
1234 | ||
1235 | VSRARN(vsrarn_b_h, 16, uint8_t, B, H) | |
1236 | VSRARN(vsrarn_h_w, 32, uint16_t, H, W) | |
1237 | VSRARN(vsrarn_w_d, 64, uint32_t, W, D) | |
1238 | ||
329517d5 SG |
1239 | #define VSRLRNI(NAME, BIT, E1, E2) \ |
1240 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1241 | { \ | |
1242 | int i, max; \ | |
1243 | VReg temp; \ | |
1244 | VReg *Vd = (VReg *)vd; \ | |
1245 | VReg *Vj = (VReg *)vj; \ | |
1246 | \ | |
1247 | temp.D(0) = 0; \ | |
1248 | temp.D(1) = 0; \ | |
1249 | max = LSX_LEN/BIT; \ | |
1250 | for (i = 0; i < max; i++) { \ | |
1251 | temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ | |
1252 | temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ | |
1253 | } \ | |
1254 | *Vd = temp; \ | |
1255 | } | |
1256 | ||
1257 | void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | |
a5200a17 SG |
1258 | { |
1259 | VReg temp; | |
329517d5 SG |
1260 | VReg *Vd = (VReg *)vd; |
1261 | VReg *Vj = (VReg *)vj; | |
a5200a17 SG |
1262 | Int128 r1, r2; |
1263 | ||
1264 | if (imm == 0) { | |
1265 | temp.D(0) = int128_getlo(Vj->Q(0)); | |
1266 | temp.D(1) = int128_getlo(Vd->Q(0)); | |
1267 | } else { | |
1268 | r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); | |
1269 | r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); | |
1270 | ||
1271 | temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1)); | |
1272 | temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2)); | |
1273 | } | |
1274 | *Vd = temp; | |
1275 | } | |
1276 | ||
1277 | VSRLRNI(vsrlrni_b_h, 16, B, H) | |
1278 | VSRLRNI(vsrlrni_h_w, 32, H, W) | |
1279 | VSRLRNI(vsrlrni_w_d, 64, W, D) | |
1280 | ||
329517d5 SG |
1281 | #define VSRARNI(NAME, BIT, E1, E2) \ |
1282 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
1283 | { \ | |
1284 | int i, max; \ | |
1285 | VReg temp; \ | |
1286 | VReg *Vd = (VReg *)vd; \ | |
1287 | VReg *Vj = (VReg *)vj; \ | |
1288 | \ | |
1289 | temp.D(0) = 0; \ | |
1290 | temp.D(1) = 0; \ | |
1291 | max = LSX_LEN/BIT; \ | |
1292 | for (i = 0; i < max; i++) { \ | |
1293 | temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ | |
1294 | temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ | |
1295 | } \ | |
1296 | *Vd = temp; \ | |
1297 | } | |
1298 | ||
1299 | void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | |
a5200a17 SG |
1300 | { |
1301 | VReg temp; | |
329517d5 SG |
1302 | VReg *Vd = (VReg *)vd; |
1303 | VReg *Vj = (VReg *)vj; | |
a5200a17 SG |
1304 | Int128 r1, r2; |
1305 | ||
1306 | if (imm == 0) { | |
1307 | temp.D(0) = int128_getlo(Vj->Q(0)); | |
1308 | temp.D(1) = int128_getlo(Vd->Q(0)); | |
1309 | } else { | |
1310 | r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | |
1311 | r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | |
1312 | ||
1313 | temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1)); | |
1314 | temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2)); | |
1315 | } | |
1316 | *Vd = temp; | |
1317 | } | |
1318 | ||
1319 | VSRARNI(vsrarni_b_h, 16, B, H) | |
1320 | VSRARNI(vsrarni_h_w, 32, H, W) | |
1321 | VSRARNI(vsrarni_w_d, 64, W, D) | |
83b3815d SG |
1322 | |
1323 | #define SSRLNS(NAME, T1, T2, T3) \ | |
1324 | static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ | |
1325 | { \ | |
1326 | T1 shft_res; \ | |
1327 | if (sa == 0) { \ | |
1328 | shft_res = e2; \ | |
1329 | } else { \ | |
1330 | shft_res = (((T1)e2) >> sa); \ | |
1331 | } \ | |
1332 | T3 mask; \ | |
1333 | mask = (1ull << sh) -1; \ | |
1334 | if (shft_res > mask) { \ | |
1335 | return mask; \ | |
1336 | } else { \ | |
1337 | return shft_res; \ | |
1338 | } \ | |
1339 | } | |
1340 | ||
1341 | SSRLNS(B, uint16_t, int16_t, uint8_t) | |
1342 | SSRLNS(H, uint32_t, int32_t, uint16_t) | |
1343 | SSRLNS(W, uint64_t, int64_t, uint32_t) | |
1344 | ||
1345 | #define VSSRLN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1346 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
83b3815d SG |
1347 | { \ |
1348 | int i; \ | |
04711da1 SG |
1349 | VReg *Vd = (VReg *)vd; \ |
1350 | VReg *Vj = (VReg *)vj; \ | |
1351 | VReg *Vk = (VReg *)vk; \ | |
83b3815d SG |
1352 | \ |
1353 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1354 | Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ | |
1355 | } \ | |
1356 | Vd->D(1) = 0; \ | |
1357 | } | |
1358 | ||
1359 | VSSRLN(vssrln_b_h, 16, uint16_t, B, H) | |
1360 | VSSRLN(vssrln_h_w, 32, uint32_t, H, W) | |
1361 | VSSRLN(vssrln_w_d, 64, uint64_t, W, D) | |
1362 | ||
1363 | #define SSRANS(E, T1, T2) \ | |
1364 | static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ | |
1365 | { \ | |
1366 | T1 shft_res; \ | |
1367 | if (sa == 0) { \ | |
1368 | shft_res = e2; \ | |
1369 | } else { \ | |
1370 | shft_res = e2 >> sa; \ | |
1371 | } \ | |
1372 | T2 mask; \ | |
1373 | mask = (1ll << sh) -1; \ | |
1374 | if (shft_res > mask) { \ | |
1375 | return mask; \ | |
1376 | } else if (shft_res < -(mask +1)) { \ | |
1377 | return ~mask; \ | |
1378 | } else { \ | |
1379 | return shft_res; \ | |
1380 | } \ | |
1381 | } | |
1382 | ||
1383 | SSRANS(B, int16_t, int8_t) | |
1384 | SSRANS(H, int32_t, int16_t) | |
1385 | SSRANS(W, int64_t, int32_t) | |
1386 | ||
1387 | #define VSSRAN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1388 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
83b3815d SG |
1389 | { \ |
1390 | int i; \ | |
04711da1 SG |
1391 | VReg *Vd = (VReg *)vd; \ |
1392 | VReg *Vj = (VReg *)vj; \ | |
1393 | VReg *Vk = (VReg *)vk; \ | |
83b3815d SG |
1394 | \ |
1395 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1396 | Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | |
1397 | } \ | |
1398 | Vd->D(1) = 0; \ | |
1399 | } | |
1400 | ||
1401 | VSSRAN(vssran_b_h, 16, uint16_t, B, H) | |
1402 | VSSRAN(vssran_h_w, 32, uint32_t, H, W) | |
1403 | VSSRAN(vssran_w_d, 64, uint64_t, W, D) | |
1404 | ||
1405 | #define SSRLNU(E, T1, T2, T3) \ | |
1406 | static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ | |
1407 | { \ | |
1408 | T1 shft_res; \ | |
1409 | if (sa == 0) { \ | |
1410 | shft_res = e2; \ | |
1411 | } else { \ | |
1412 | shft_res = (((T1)e2) >> sa); \ | |
1413 | } \ | |
1414 | T2 mask; \ | |
1415 | mask = (1ull << sh) -1; \ | |
1416 | if (shft_res > mask) { \ | |
1417 | return mask; \ | |
1418 | } else { \ | |
1419 | return shft_res; \ | |
1420 | } \ | |
1421 | } | |
1422 | ||
1423 | SSRLNU(B, uint16_t, uint8_t, int16_t) | |
1424 | SSRLNU(H, uint32_t, uint16_t, int32_t) | |
1425 | SSRLNU(W, uint64_t, uint32_t, int64_t) | |
1426 | ||
1427 | #define VSSRLNU(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1428 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
83b3815d SG |
1429 | { \ |
1430 | int i; \ | |
04711da1 SG |
1431 | VReg *Vd = (VReg *)vd; \ |
1432 | VReg *Vj = (VReg *)vj; \ | |
1433 | VReg *Vk = (VReg *)vk; \ | |
83b3815d SG |
1434 | \ |
1435 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1436 | Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | |
1437 | } \ | |
1438 | Vd->D(1) = 0; \ | |
1439 | } | |
1440 | ||
1441 | VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H) | |
1442 | VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W) | |
1443 | VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D) | |
1444 | ||
1445 | #define SSRANU(E, T1, T2, T3) \ | |
1446 | static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ | |
1447 | { \ | |
1448 | T1 shft_res; \ | |
1449 | if (sa == 0) { \ | |
1450 | shft_res = e2; \ | |
1451 | } else { \ | |
1452 | shft_res = e2 >> sa; \ | |
1453 | } \ | |
1454 | if (e2 < 0) { \ | |
1455 | shft_res = 0; \ | |
1456 | } \ | |
1457 | T2 mask; \ | |
1458 | mask = (1ull << sh) -1; \ | |
1459 | if (shft_res > mask) { \ | |
1460 | return mask; \ | |
1461 | } else { \ | |
1462 | return shft_res; \ | |
1463 | } \ | |
1464 | } | |
1465 | ||
1466 | SSRANU(B, uint16_t, uint8_t, int16_t) | |
1467 | SSRANU(H, uint32_t, uint16_t, int32_t) | |
1468 | SSRANU(W, uint64_t, uint32_t, int64_t) | |
1469 | ||
1470 | #define VSSRANU(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1471 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
83b3815d SG |
1472 | { \ |
1473 | int i; \ | |
04711da1 SG |
1474 | VReg *Vd = (VReg *)vd; \ |
1475 | VReg *Vj = (VReg *)vj; \ | |
1476 | VReg *Vk = (VReg *)vk; \ | |
83b3815d SG |
1477 | \ |
1478 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1479 | Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | |
1480 | } \ | |
1481 | Vd->D(1) = 0; \ | |
1482 | } | |
1483 | ||
1484 | VSSRANU(vssran_bu_h, 16, uint16_t, B, H) | |
1485 | VSSRANU(vssran_hu_w, 32, uint32_t, H, W) | |
1486 | VSSRANU(vssran_wu_d, 64, uint64_t, W, D) | |
1487 | ||
1488 | #define VSSRLNI(NAME, BIT, E1, E2) \ | |
329517d5 | 1489 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
83b3815d SG |
1490 | { \ |
1491 | int i; \ | |
1492 | VReg temp; \ | |
329517d5 SG |
1493 | VReg *Vd = (VReg *)vd; \ |
1494 | VReg *Vj = (VReg *)vj; \ | |
83b3815d SG |
1495 | \ |
1496 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1497 | temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | |
1498 | temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | |
1499 | } \ | |
1500 | *Vd = temp; \ | |
1501 | } | |
1502 | ||
329517d5 | 1503 | void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
83b3815d SG |
1504 | { |
1505 | Int128 shft_res1, shft_res2, mask; | |
329517d5 SG |
1506 | VReg *Vd = (VReg *)vd; |
1507 | VReg *Vj = (VReg *)vj; | |
83b3815d SG |
1508 | |
1509 | if (imm == 0) { | |
1510 | shft_res1 = Vj->Q(0); | |
1511 | shft_res2 = Vd->Q(0); | |
1512 | } else { | |
1513 | shft_res1 = int128_urshift(Vj->Q(0), imm); | |
1514 | shft_res2 = int128_urshift(Vd->Q(0), imm); | |
1515 | } | |
1516 | mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | |
1517 | ||
1518 | if (int128_ult(mask, shft_res1)) { | |
1519 | Vd->D(0) = int128_getlo(mask); | |
1520 | }else { | |
1521 | Vd->D(0) = int128_getlo(shft_res1); | |
1522 | } | |
1523 | ||
1524 | if (int128_ult(mask, shft_res2)) { | |
1525 | Vd->D(1) = int128_getlo(mask); | |
1526 | }else { | |
1527 | Vd->D(1) = int128_getlo(shft_res2); | |
1528 | } | |
1529 | } | |
1530 | ||
1531 | VSSRLNI(vssrlni_b_h, 16, B, H) | |
1532 | VSSRLNI(vssrlni_h_w, 32, H, W) | |
1533 | VSSRLNI(vssrlni_w_d, 64, W, D) | |
1534 | ||
1535 | #define VSSRANI(NAME, BIT, E1, E2) \ | |
329517d5 | 1536 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
83b3815d SG |
1537 | { \ |
1538 | int i; \ | |
1539 | VReg temp; \ | |
329517d5 SG |
1540 | VReg *Vd = (VReg *)vd; \ |
1541 | VReg *Vj = (VReg *)vj; \ | |
83b3815d SG |
1542 | \ |
1543 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1544 | temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | |
1545 | temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | |
1546 | } \ | |
1547 | *Vd = temp; \ | |
1548 | } | |
1549 | ||
329517d5 | 1550 | void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
83b3815d SG |
1551 | { |
1552 | Int128 shft_res1, shft_res2, mask, min; | |
329517d5 SG |
1553 | VReg *Vd = (VReg *)vd; |
1554 | VReg *Vj = (VReg *)vj; | |
83b3815d SG |
1555 | |
1556 | if (imm == 0) { | |
1557 | shft_res1 = Vj->Q(0); | |
1558 | shft_res2 = Vd->Q(0); | |
1559 | } else { | |
1560 | shft_res1 = int128_rshift(Vj->Q(0), imm); | |
1561 | shft_res2 = int128_rshift(Vd->Q(0), imm); | |
1562 | } | |
1563 | mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | |
1564 | min = int128_lshift(int128_one(), 63); | |
1565 | ||
1566 | if (int128_gt(shft_res1, mask)) { | |
1567 | Vd->D(0) = int128_getlo(mask); | |
1568 | } else if (int128_lt(shft_res1, int128_neg(min))) { | |
1569 | Vd->D(0) = int128_getlo(min); | |
1570 | } else { | |
1571 | Vd->D(0) = int128_getlo(shft_res1); | |
1572 | } | |
1573 | ||
1574 | if (int128_gt(shft_res2, mask)) { | |
1575 | Vd->D(1) = int128_getlo(mask); | |
1576 | } else if (int128_lt(shft_res2, int128_neg(min))) { | |
1577 | Vd->D(1) = int128_getlo(min); | |
1578 | } else { | |
1579 | Vd->D(1) = int128_getlo(shft_res2); | |
1580 | } | |
1581 | } | |
1582 | ||
1583 | VSSRANI(vssrani_b_h, 16, B, H) | |
1584 | VSSRANI(vssrani_h_w, 32, H, W) | |
1585 | VSSRANI(vssrani_w_d, 64, W, D) | |
1586 | ||
1587 | #define VSSRLNUI(NAME, BIT, E1, E2) \ | |
329517d5 | 1588 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
83b3815d SG |
1589 | { \ |
1590 | int i; \ | |
1591 | VReg temp; \ | |
329517d5 SG |
1592 | VReg *Vd = (VReg *)vd; \ |
1593 | VReg *Vj = (VReg *)vj; \ | |
83b3815d SG |
1594 | \ |
1595 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1596 | temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | |
1597 | temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | |
1598 | } \ | |
1599 | *Vd = temp; \ | |
1600 | } | |
1601 | ||
329517d5 | 1602 | void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
83b3815d SG |
1603 | { |
1604 | Int128 shft_res1, shft_res2, mask; | |
329517d5 SG |
1605 | VReg *Vd = (VReg *)vd; |
1606 | VReg *Vj = (VReg *)vj; | |
83b3815d SG |
1607 | |
1608 | if (imm == 0) { | |
1609 | shft_res1 = Vj->Q(0); | |
1610 | shft_res2 = Vd->Q(0); | |
1611 | } else { | |
1612 | shft_res1 = int128_urshift(Vj->Q(0), imm); | |
1613 | shft_res2 = int128_urshift(Vd->Q(0), imm); | |
1614 | } | |
1615 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | |
1616 | ||
1617 | if (int128_ult(mask, shft_res1)) { | |
1618 | Vd->D(0) = int128_getlo(mask); | |
1619 | }else { | |
1620 | Vd->D(0) = int128_getlo(shft_res1); | |
1621 | } | |
1622 | ||
1623 | if (int128_ult(mask, shft_res2)) { | |
1624 | Vd->D(1) = int128_getlo(mask); | |
1625 | }else { | |
1626 | Vd->D(1) = int128_getlo(shft_res2); | |
1627 | } | |
1628 | } | |
1629 | ||
1630 | VSSRLNUI(vssrlni_bu_h, 16, B, H) | |
1631 | VSSRLNUI(vssrlni_hu_w, 32, H, W) | |
1632 | VSSRLNUI(vssrlni_wu_d, 64, W, D) | |
1633 | ||
1634 | #define VSSRANUI(NAME, BIT, E1, E2) \ | |
329517d5 | 1635 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
83b3815d SG |
1636 | { \ |
1637 | int i; \ | |
1638 | VReg temp; \ | |
329517d5 SG |
1639 | VReg *Vd = (VReg *)vd; \ |
1640 | VReg *Vj = (VReg *)vj; \ | |
83b3815d SG |
1641 | \ |
1642 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1643 | temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ | |
1644 | temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \ | |
1645 | } \ | |
1646 | *Vd = temp; \ | |
1647 | } | |
1648 | ||
329517d5 | 1649 | void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
83b3815d SG |
1650 | { |
1651 | Int128 shft_res1, shft_res2, mask; | |
329517d5 SG |
1652 | VReg *Vd = (VReg *)vd; |
1653 | VReg *Vj = (VReg *)vj; | |
83b3815d SG |
1654 | |
1655 | if (imm == 0) { | |
1656 | shft_res1 = Vj->Q(0); | |
1657 | shft_res2 = Vd->Q(0); | |
1658 | } else { | |
1659 | shft_res1 = int128_rshift(Vj->Q(0), imm); | |
1660 | shft_res2 = int128_rshift(Vd->Q(0), imm); | |
1661 | } | |
1662 | ||
1663 | if (int128_lt(Vj->Q(0), int128_zero())) { | |
1664 | shft_res1 = int128_zero(); | |
1665 | } | |
1666 | ||
1667 | if (int128_lt(Vd->Q(0), int128_zero())) { | |
1668 | shft_res2 = int128_zero(); | |
1669 | } | |
1670 | ||
1671 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | |
1672 | ||
1673 | if (int128_ult(mask, shft_res1)) { | |
1674 | Vd->D(0) = int128_getlo(mask); | |
1675 | }else { | |
1676 | Vd->D(0) = int128_getlo(shft_res1); | |
1677 | } | |
1678 | ||
1679 | if (int128_ult(mask, shft_res2)) { | |
1680 | Vd->D(1) = int128_getlo(mask); | |
1681 | }else { | |
1682 | Vd->D(1) = int128_getlo(shft_res2); | |
1683 | } | |
1684 | } | |
1685 | ||
1686 | VSSRANUI(vssrani_bu_h, 16, B, H) | |
1687 | VSSRANUI(vssrani_hu_w, 32, H, W) | |
1688 | VSSRANUI(vssrani_wu_d, 64, W, D) | |
162cd32c SG |
1689 | |
1690 | #define SSRLRNS(E1, E2, T1, T2, T3) \ | |
1691 | static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ | |
1692 | { \ | |
1693 | T1 shft_res; \ | |
1694 | \ | |
1695 | shft_res = do_vsrlr_ ## E2(e2, sa); \ | |
1696 | T1 mask; \ | |
1697 | mask = (1ull << sh) -1; \ | |
1698 | if (shft_res > mask) { \ | |
1699 | return mask; \ | |
1700 | } else { \ | |
1701 | return shft_res; \ | |
1702 | } \ | |
1703 | } | |
1704 | ||
1705 | SSRLRNS(B, H, uint16_t, int16_t, uint8_t) | |
1706 | SSRLRNS(H, W, uint32_t, int32_t, uint16_t) | |
1707 | SSRLRNS(W, D, uint64_t, int64_t, uint32_t) | |
1708 | ||
1709 | #define VSSRLRN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1710 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
162cd32c SG |
1711 | { \ |
1712 | int i; \ | |
04711da1 SG |
1713 | VReg *Vd = (VReg *)vd; \ |
1714 | VReg *Vj = (VReg *)vj; \ | |
1715 | VReg *Vk = (VReg *)vk; \ | |
162cd32c SG |
1716 | \ |
1717 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1718 | Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | |
1719 | } \ | |
1720 | Vd->D(1) = 0; \ | |
1721 | } | |
1722 | ||
1723 | VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H) | |
1724 | VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W) | |
1725 | VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D) | |
1726 | ||
1727 | #define SSRARNS(E1, E2, T1, T2) \ | |
1728 | static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ | |
1729 | { \ | |
1730 | T1 shft_res; \ | |
1731 | \ | |
1732 | shft_res = do_vsrar_ ## E2(e2, sa); \ | |
1733 | T2 mask; \ | |
1734 | mask = (1ll << sh) -1; \ | |
1735 | if (shft_res > mask) { \ | |
1736 | return mask; \ | |
1737 | } else if (shft_res < -(mask +1)) { \ | |
1738 | return ~mask; \ | |
1739 | } else { \ | |
1740 | return shft_res; \ | |
1741 | } \ | |
1742 | } | |
1743 | ||
1744 | SSRARNS(B, H, int16_t, int8_t) | |
1745 | SSRARNS(H, W, int32_t, int16_t) | |
1746 | SSRARNS(W, D, int64_t, int32_t) | |
1747 | ||
1748 | #define VSSRARN(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1749 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
162cd32c SG |
1750 | { \ |
1751 | int i; \ | |
04711da1 SG |
1752 | VReg *Vd = (VReg *)vd; \ |
1753 | VReg *Vj = (VReg *)vj; \ | |
1754 | VReg *Vk = (VReg *)vk; \ | |
162cd32c SG |
1755 | \ |
1756 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1757 | Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | |
1758 | } \ | |
1759 | Vd->D(1) = 0; \ | |
1760 | } | |
1761 | ||
1762 | VSSRARN(vssrarn_b_h, 16, uint16_t, B, H) | |
1763 | VSSRARN(vssrarn_h_w, 32, uint32_t, H, W) | |
1764 | VSSRARN(vssrarn_w_d, 64, uint64_t, W, D) | |
1765 | ||
1766 | #define SSRLRNU(E1, E2, T1, T2, T3) \ | |
1767 | static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ | |
1768 | { \ | |
1769 | T1 shft_res; \ | |
1770 | \ | |
1771 | shft_res = do_vsrlr_ ## E2(e2, sa); \ | |
1772 | \ | |
1773 | T2 mask; \ | |
1774 | mask = (1ull << sh) -1; \ | |
1775 | if (shft_res > mask) { \ | |
1776 | return mask; \ | |
1777 | } else { \ | |
1778 | return shft_res; \ | |
1779 | } \ | |
1780 | } | |
1781 | ||
1782 | SSRLRNU(B, H, uint16_t, uint8_t, int16_t) | |
1783 | SSRLRNU(H, W, uint32_t, uint16_t, int32_t) | |
1784 | SSRLRNU(W, D, uint64_t, uint32_t, int64_t) | |
1785 | ||
1786 | #define VSSRLRNU(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1787 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
162cd32c SG |
1788 | { \ |
1789 | int i; \ | |
04711da1 SG |
1790 | VReg *Vd = (VReg *)vd; \ |
1791 | VReg *Vj = (VReg *)vj; \ | |
1792 | VReg *Vk = (VReg *)vk; \ | |
162cd32c SG |
1793 | \ |
1794 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1795 | Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | |
1796 | } \ | |
1797 | Vd->D(1) = 0; \ | |
1798 | } | |
1799 | ||
1800 | VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H) | |
1801 | VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W) | |
1802 | VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D) | |
1803 | ||
1804 | #define SSRARNU(E1, E2, T1, T2, T3) \ | |
1805 | static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ | |
1806 | { \ | |
1807 | T1 shft_res; \ | |
1808 | \ | |
1809 | if (e2 < 0) { \ | |
1810 | shft_res = 0; \ | |
1811 | } else { \ | |
1812 | shft_res = do_vsrar_ ## E2(e2, sa); \ | |
1813 | } \ | |
1814 | T2 mask; \ | |
1815 | mask = (1ull << sh) -1; \ | |
1816 | if (shft_res > mask) { \ | |
1817 | return mask; \ | |
1818 | } else { \ | |
1819 | return shft_res; \ | |
1820 | } \ | |
1821 | } | |
1822 | ||
1823 | SSRARNU(B, H, uint16_t, uint8_t, int16_t) | |
1824 | SSRARNU(H, W, uint32_t, uint16_t, int32_t) | |
1825 | SSRARNU(W, D, uint64_t, uint32_t, int64_t) | |
1826 | ||
1827 | #define VSSRARNU(NAME, BIT, T, E1, E2) \ | |
04711da1 | 1828 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
162cd32c SG |
1829 | { \ |
1830 | int i; \ | |
04711da1 SG |
1831 | VReg *Vd = (VReg *)vd; \ |
1832 | VReg *Vj = (VReg *)vj; \ | |
1833 | VReg *Vk = (VReg *)vk; \ | |
162cd32c SG |
1834 | \ |
1835 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1836 | Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | |
1837 | } \ | |
1838 | Vd->D(1) = 0; \ | |
1839 | } | |
1840 | ||
1841 | VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H) | |
1842 | VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) | |
1843 | VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) | |
1844 | ||
1845 | #define VSSRLRNI(NAME, BIT, E1, E2) \ | |
329517d5 | 1846 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
162cd32c SG |
1847 | { \ |
1848 | int i; \ | |
1849 | VReg temp; \ | |
329517d5 SG |
1850 | VReg *Vd = (VReg *)vd; \ |
1851 | VReg *Vj = (VReg *)vj; \ | |
162cd32c SG |
1852 | \ |
1853 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1854 | temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | |
1855 | temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | |
1856 | } \ | |
1857 | *Vd = temp; \ | |
1858 | } | |
1859 | ||
1860 | #define VSSRLRNI_Q(NAME, sh) \ | |
329517d5 | 1861 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
162cd32c SG |
1862 | { \ |
1863 | Int128 shft_res1, shft_res2, mask, r1, r2; \ | |
329517d5 SG |
1864 | VReg *Vd = (VReg *)vd; \ |
1865 | VReg *Vj = (VReg *)vj; \ | |
162cd32c SG |
1866 | \ |
1867 | if (imm == 0) { \ | |
1868 | shft_res1 = Vj->Q(0); \ | |
1869 | shft_res2 = Vd->Q(0); \ | |
1870 | } else { \ | |
1871 | r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \ | |
1872 | r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \ | |
1873 | \ | |
1874 | shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \ | |
1875 | shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \ | |
1876 | } \ | |
1877 | \ | |
1878 | mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \ | |
1879 | \ | |
1880 | if (int128_ult(mask, shft_res1)) { \ | |
1881 | Vd->D(0) = int128_getlo(mask); \ | |
1882 | }else { \ | |
1883 | Vd->D(0) = int128_getlo(shft_res1); \ | |
1884 | } \ | |
1885 | \ | |
1886 | if (int128_ult(mask, shft_res2)) { \ | |
1887 | Vd->D(1) = int128_getlo(mask); \ | |
1888 | }else { \ | |
1889 | Vd->D(1) = int128_getlo(shft_res2); \ | |
1890 | } \ | |
1891 | } | |
1892 | ||
1893 | VSSRLRNI(vssrlrni_b_h, 16, B, H) | |
1894 | VSSRLRNI(vssrlrni_h_w, 32, H, W) | |
1895 | VSSRLRNI(vssrlrni_w_d, 64, W, D) | |
1896 | VSSRLRNI_Q(vssrlrni_d_q, 63) | |
1897 | ||
1898 | #define VSSRARNI(NAME, BIT, E1, E2) \ | |
329517d5 | 1899 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
162cd32c SG |
1900 | { \ |
1901 | int i; \ | |
1902 | VReg temp; \ | |
329517d5 SG |
1903 | VReg *Vd = (VReg *)vd; \ |
1904 | VReg *Vj = (VReg *)vj; \ | |
162cd32c SG |
1905 | \ |
1906 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1907 | temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | |
1908 | temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | |
1909 | } \ | |
1910 | *Vd = temp; \ | |
1911 | } | |
1912 | ||
329517d5 | 1913 | void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
162cd32c SG |
1914 | { |
1915 | Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | |
329517d5 SG |
1916 | VReg *Vd = (VReg *)vd; |
1917 | VReg *Vj = (VReg *)vj; | |
162cd32c SG |
1918 | |
1919 | if (imm == 0) { | |
1920 | shft_res1 = Vj->Q(0); | |
1921 | shft_res2 = Vd->Q(0); | |
1922 | } else { | |
1923 | r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | |
1924 | r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | |
1925 | ||
1926 | shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | |
1927 | shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | |
1928 | } | |
1929 | ||
1930 | mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | |
1931 | mask2 = int128_lshift(int128_one(), 63); | |
1932 | ||
1933 | if (int128_gt(shft_res1, mask1)) { | |
1934 | Vd->D(0) = int128_getlo(mask1); | |
1935 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { | |
1936 | Vd->D(0) = int128_getlo(mask2); | |
1937 | } else { | |
1938 | Vd->D(0) = int128_getlo(shft_res1); | |
1939 | } | |
1940 | ||
1941 | if (int128_gt(shft_res2, mask1)) { | |
1942 | Vd->D(1) = int128_getlo(mask1); | |
1943 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { | |
1944 | Vd->D(1) = int128_getlo(mask2); | |
1945 | } else { | |
1946 | Vd->D(1) = int128_getlo(shft_res2); | |
1947 | } | |
1948 | } | |
1949 | ||
1950 | VSSRARNI(vssrarni_b_h, 16, B, H) | |
1951 | VSSRARNI(vssrarni_h_w, 32, H, W) | |
1952 | VSSRARNI(vssrarni_w_d, 64, W, D) | |
1953 | ||
1954 | #define VSSRLRNUI(NAME, BIT, E1, E2) \ | |
329517d5 | 1955 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
162cd32c SG |
1956 | { \ |
1957 | int i; \ | |
1958 | VReg temp; \ | |
329517d5 SG |
1959 | VReg *Vd = (VReg *)vd; \ |
1960 | VReg *Vj = (VReg *)vj; \ | |
162cd32c SG |
1961 | \ |
1962 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1963 | temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | |
1964 | temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | |
1965 | } \ | |
1966 | *Vd = temp; \ | |
1967 | } | |
1968 | ||
1969 | VSSRLRNUI(vssrlrni_bu_h, 16, B, H) | |
1970 | VSSRLRNUI(vssrlrni_hu_w, 32, H, W) | |
1971 | VSSRLRNUI(vssrlrni_wu_d, 64, W, D) | |
1972 | VSSRLRNI_Q(vssrlrni_du_q, 64) | |
1973 | ||
1974 | #define VSSRARNUI(NAME, BIT, E1, E2) \ | |
329517d5 | 1975 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
162cd32c SG |
1976 | { \ |
1977 | int i; \ | |
1978 | VReg temp; \ | |
329517d5 SG |
1979 | VReg *Vd = (VReg *)vd; \ |
1980 | VReg *Vj = (VReg *)vj; \ | |
162cd32c SG |
1981 | \ |
1982 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
1983 | temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | |
1984 | temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | |
1985 | } \ | |
1986 | *Vd = temp; \ | |
1987 | } | |
1988 | ||
329517d5 | 1989 | void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
162cd32c SG |
1990 | { |
1991 | Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | |
329517d5 SG |
1992 | VReg *Vd = (VReg *)vd; |
1993 | VReg *Vj = (VReg *)vj; | |
162cd32c SG |
1994 | |
1995 | if (imm == 0) { | |
1996 | shft_res1 = Vj->Q(0); | |
1997 | shft_res2 = Vd->Q(0); | |
1998 | } else { | |
1999 | r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | |
2000 | r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | |
2001 | ||
2002 | shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | |
2003 | shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | |
2004 | } | |
2005 | ||
2006 | if (int128_lt(Vj->Q(0), int128_zero())) { | |
2007 | shft_res1 = int128_zero(); | |
2008 | } | |
2009 | if (int128_lt(Vd->Q(0), int128_zero())) { | |
2010 | shft_res2 = int128_zero(); | |
2011 | } | |
2012 | ||
2013 | mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | |
2014 | mask2 = int128_lshift(int128_one(), 64); | |
2015 | ||
2016 | if (int128_gt(shft_res1, mask1)) { | |
2017 | Vd->D(0) = int128_getlo(mask1); | |
2018 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { | |
2019 | Vd->D(0) = int128_getlo(mask2); | |
2020 | } else { | |
2021 | Vd->D(0) = int128_getlo(shft_res1); | |
2022 | } | |
2023 | ||
2024 | if (int128_gt(shft_res2, mask1)) { | |
2025 | Vd->D(1) = int128_getlo(mask1); | |
2026 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { | |
2027 | Vd->D(1) = int128_getlo(mask2); | |
2028 | } else { | |
2029 | Vd->D(1) = int128_getlo(shft_res2); | |
2030 | } | |
2031 | } | |
2032 | ||
2033 | VSSRARNUI(vssrarni_bu_h, 16, B, H) | |
2034 | VSSRARNUI(vssrarni_hu_w, 32, H, W) | |
2035 | VSSRARNUI(vssrarni_wu_d, 64, W, D) | |
2e105e12 | 2036 | |
ff27e335 SG |
2037 | #define DO_2OP(NAME, BIT, E, DO_OP) \ |
2038 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | |
2039 | { \ | |
2040 | int i; \ | |
2041 | VReg *Vd = (VReg *)vd; \ | |
2042 | VReg *Vj = (VReg *)vj; \ | |
2043 | \ | |
2044 | for (i = 0; i < LSX_LEN/BIT; i++) \ | |
2045 | { \ | |
2046 | Vd->E(i) = DO_OP(Vj->E(i)); \ | |
2047 | } \ | |
2e105e12 SG |
2048 | } |
2049 | ||
2050 | #define DO_CLO_B(N) (clz32(~N & 0xff) - 24) | |
2051 | #define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) | |
2052 | #define DO_CLO_W(N) (clz32(~N)) | |
2053 | #define DO_CLO_D(N) (clz64(~N)) | |
2054 | #define DO_CLZ_B(N) (clz32(N) - 24) | |
2055 | #define DO_CLZ_H(N) (clz32(N) - 16) | |
2056 | #define DO_CLZ_W(N) (clz32(N)) | |
2057 | #define DO_CLZ_D(N) (clz64(N)) | |
2058 | ||
2059 | DO_2OP(vclo_b, 8, UB, DO_CLO_B) | |
2060 | DO_2OP(vclo_h, 16, UH, DO_CLO_H) | |
2061 | DO_2OP(vclo_w, 32, UW, DO_CLO_W) | |
2062 | DO_2OP(vclo_d, 64, UD, DO_CLO_D) | |
2063 | DO_2OP(vclz_b, 8, UB, DO_CLZ_B) | |
2064 | DO_2OP(vclz_h, 16, UH, DO_CLZ_H) | |
2065 | DO_2OP(vclz_w, 32, UW, DO_CLZ_W) | |
2066 | DO_2OP(vclz_d, 64, UD, DO_CLZ_D) | |
bb22ee57 | 2067 | |
ff27e335 SG |
2068 | #define VPCNT(NAME, BIT, E, FN) \ |
2069 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | |
2070 | { \ | |
2071 | int i; \ | |
2072 | VReg *Vd = (VReg *)vd; \ | |
2073 | VReg *Vj = (VReg *)vj; \ | |
2074 | \ | |
2075 | for (i = 0; i < LSX_LEN/BIT; i++) \ | |
2076 | { \ | |
2077 | Vd->E(i) = FN(Vj->E(i)); \ | |
2078 | } \ | |
bb22ee57 SG |
2079 | } |
2080 | ||
2081 | VPCNT(vpcnt_b, 8, UB, ctpop8) | |
2082 | VPCNT(vpcnt_h, 16, UH, ctpop16) | |
2083 | VPCNT(vpcnt_w, 32, UW, ctpop32) | |
2084 | VPCNT(vpcnt_d, 64, UD, ctpop64) | |
0b1e6705 SG |
2085 | |
2086 | #define DO_BITCLR(a, bit) (a & ~(1ull << bit)) | |
2087 | #define DO_BITSET(a, bit) (a | 1ull << bit) | |
2088 | #define DO_BITREV(a, bit) (a ^ (1ull << bit)) | |
2089 | ||
2090 | #define DO_BIT(NAME, BIT, E, DO_OP) \ | |
2091 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | |
2092 | { \ | |
2093 | int i; \ | |
2094 | VReg *Vd = (VReg *)vd; \ | |
2095 | VReg *Vj = (VReg *)vj; \ | |
2096 | VReg *Vk = (VReg *)vk; \ | |
2097 | \ | |
2098 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2099 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ | |
2100 | } \ | |
2101 | } | |
2102 | ||
2103 | DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) | |
2104 | DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) | |
2105 | DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) | |
2106 | DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) | |
2107 | DO_BIT(vbitset_b, 8, UB, DO_BITSET) | |
2108 | DO_BIT(vbitset_h, 16, UH, DO_BITSET) | |
2109 | DO_BIT(vbitset_w, 32, UW, DO_BITSET) | |
2110 | DO_BIT(vbitset_d, 64, UD, DO_BITSET) | |
2111 | DO_BIT(vbitrev_b, 8, UB, DO_BITREV) | |
2112 | DO_BIT(vbitrev_h, 16, UH, DO_BITREV) | |
2113 | DO_BIT(vbitrev_w, 32, UW, DO_BITREV) | |
2114 | DO_BIT(vbitrev_d, 64, UD, DO_BITREV) | |
2115 | ||
2116 | #define DO_BITI(NAME, BIT, E, DO_OP) \ | |
2117 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | |
2118 | { \ | |
2119 | int i; \ | |
2120 | VReg *Vd = (VReg *)vd; \ | |
2121 | VReg *Vj = (VReg *)vj; \ | |
2122 | \ | |
2123 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2124 | Vd->E(i) = DO_OP(Vj->E(i), imm); \ | |
2125 | } \ | |
2126 | } | |
2127 | ||
2128 | DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) | |
2129 | DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) | |
2130 | DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) | |
2131 | DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) | |
2132 | DO_BITI(vbitseti_b, 8, UB, DO_BITSET) | |
2133 | DO_BITI(vbitseti_h, 16, UH, DO_BITSET) | |
2134 | DO_BITI(vbitseti_w, 32, UW, DO_BITSET) | |
2135 | DO_BITI(vbitseti_d, 64, UD, DO_BITSET) | |
2136 | DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) | |
2137 | DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) | |
2138 | DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) | |
2139 | DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) | |
ac95a0b9 | 2140 | |
04711da1 SG |
2141 | #define VFRSTP(NAME, BIT, MASK, E) \ |
2142 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2143 | { \ | |
2144 | int i, m; \ | |
2145 | VReg *Vd = (VReg *)vd; \ | |
2146 | VReg *Vj = (VReg *)vj; \ | |
2147 | VReg *Vk = (VReg *)vk; \ | |
2148 | \ | |
2149 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2150 | if (Vj->E(i) < 0) { \ | |
2151 | break; \ | |
2152 | } \ | |
2153 | } \ | |
2154 | m = Vk->E(0) & MASK; \ | |
2155 | Vd->E(m) = i; \ | |
ac95a0b9 SG |
2156 | } |
2157 | ||
2158 | VFRSTP(vfrstp_b, 8, 0xf, B) | |
2159 | VFRSTP(vfrstp_h, 16, 0x7, H) | |
2160 | ||
329517d5 SG |
2161 | #define VFRSTPI(NAME, BIT, E) \ |
2162 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
2163 | { \ | |
2164 | int i, m; \ | |
2165 | VReg *Vd = (VReg *)vd; \ | |
2166 | VReg *Vj = (VReg *)vj; \ | |
2167 | \ | |
2168 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2169 | if (Vj->E(i) < 0) { \ | |
2170 | break; \ | |
2171 | } \ | |
2172 | } \ | |
2173 | m = imm % (LSX_LEN/BIT); \ | |
2174 | Vd->E(m) = i; \ | |
ac95a0b9 SG |
2175 | } |
2176 | ||
2177 | VFRSTPI(vfrstpi_b, 8, B) | |
2178 | VFRSTPI(vfrstpi_h, 16, H) | |
aca67472 SG |
2179 | |
2180 | static void vec_update_fcsr0_mask(CPULoongArchState *env, | |
2181 | uintptr_t pc, int mask) | |
2182 | { | |
2183 | int flags = get_float_exception_flags(&env->fp_status); | |
2184 | ||
2185 | set_float_exception_flags(0, &env->fp_status); | |
2186 | ||
2187 | flags &= ~mask; | |
2188 | ||
2189 | if (flags) { | |
2190 | flags = ieee_ex_to_loongarch(flags); | |
2191 | UPDATE_FP_CAUSE(env->fcsr0, flags); | |
2192 | } | |
2193 | ||
2194 | if (GET_FP_ENABLES(env->fcsr0) & flags) { | |
2195 | do_raise_exception(env, EXCCODE_FPE, pc); | |
2196 | } else { | |
2197 | UPDATE_FP_FLAGS(env->fcsr0, flags); | |
2198 | } | |
2199 | } | |
2200 | ||
2201 | static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) | |
2202 | { | |
2203 | vec_update_fcsr0_mask(env, pc, 0); | |
2204 | } | |
2205 | ||
2206 | static inline void vec_clear_cause(CPULoongArchState *env) | |
2207 | { | |
2208 | SET_FP_CAUSE(env->fcsr0, 0); | |
2209 | } | |
2210 | ||
2211 | #define DO_3OP_F(NAME, BIT, E, FN) \ | |
3b286753 SG |
2212 | void HELPER(NAME)(void *vd, void *vj, void *vk, \ |
2213 | CPULoongArchState *env, uint32_t desc) \ | |
aca67472 SG |
2214 | { \ |
2215 | int i; \ | |
3b286753 SG |
2216 | VReg *Vd = (VReg *)vd; \ |
2217 | VReg *Vj = (VReg *)vj; \ | |
2218 | VReg *Vk = (VReg *)vk; \ | |
aca67472 SG |
2219 | \ |
2220 | vec_clear_cause(env); \ | |
2221 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2222 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | |
2223 | vec_update_fcsr0(env, GETPC()); \ | |
2224 | } \ | |
2225 | } | |
2226 | ||
2227 | DO_3OP_F(vfadd_s, 32, UW, float32_add) | |
2228 | DO_3OP_F(vfadd_d, 64, UD, float64_add) | |
2229 | DO_3OP_F(vfsub_s, 32, UW, float32_sub) | |
2230 | DO_3OP_F(vfsub_d, 64, UD, float64_sub) | |
2231 | DO_3OP_F(vfmul_s, 32, UW, float32_mul) | |
2232 | DO_3OP_F(vfmul_d, 64, UD, float64_mul) | |
2233 | DO_3OP_F(vfdiv_s, 32, UW, float32_div) | |
2234 | DO_3OP_F(vfdiv_d, 64, UD, float64_div) | |
2235 | DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) | |
2236 | DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) | |
2237 | DO_3OP_F(vfmin_s, 32, UW, float32_minnum) | |
2238 | DO_3OP_F(vfmin_d, 64, UD, float64_minnum) | |
2239 | DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) | |
2240 | DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) | |
2241 | DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) | |
2242 | DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) | |
2243 | ||
2244 | #define DO_4OP_F(NAME, BIT, E, FN, flags) \ | |
e2600dad SG |
2245 | void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ |
2246 | CPULoongArchState *env, uint32_t desc) \ | |
aca67472 SG |
2247 | { \ |
2248 | int i; \ | |
e2600dad SG |
2249 | VReg *Vd = (VReg *)vd; \ |
2250 | VReg *Vj = (VReg *)vj; \ | |
2251 | VReg *Vk = (VReg *)vk; \ | |
2252 | VReg *Va = (VReg *)va; \ | |
aca67472 SG |
2253 | \ |
2254 | vec_clear_cause(env); \ | |
2255 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2256 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ | |
2257 | vec_update_fcsr0(env, GETPC()); \ | |
2258 | } \ | |
2259 | } | |
2260 | ||
2261 | DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) | |
2262 | DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) | |
2263 | DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) | |
2264 | DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) | |
2265 | DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) | |
2266 | DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) | |
2267 | DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, | |
2268 | float_muladd_negate_c | float_muladd_negate_result) | |
2269 | DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, | |
2270 | float_muladd_negate_c | float_muladd_negate_result) | |
2271 | ||
226bf881 SG |
2272 | #define DO_2OP_F(NAME, BIT, E, FN) \ |
2273 | void HELPER(NAME)(void *vd, void *vj, \ | |
2274 | CPULoongArchState *env, uint32_t desc) \ | |
2275 | { \ | |
2276 | int i; \ | |
2277 | VReg *Vd = (VReg *)vd; \ | |
2278 | VReg *Vj = (VReg *)vj; \ | |
2279 | \ | |
2280 | vec_clear_cause(env); \ | |
2281 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2282 | Vd->E(i) = FN(env, Vj->E(i)); \ | |
2283 | } \ | |
aca67472 SG |
2284 | } |
2285 | ||
2286 | #define FLOGB(BIT, T) \ | |
2287 | static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ | |
2288 | { \ | |
2289 | T fp, fd; \ | |
2290 | float_status *status = &env->fp_status; \ | |
2291 | FloatRoundMode old_mode = get_float_rounding_mode(status); \ | |
2292 | \ | |
2293 | set_float_rounding_mode(float_round_down, status); \ | |
2294 | fp = float ## BIT ##_log2(fj, status); \ | |
2295 | fd = float ## BIT ##_round_to_int(fp, status); \ | |
2296 | set_float_rounding_mode(old_mode, status); \ | |
2297 | vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ | |
2298 | return fd; \ | |
2299 | } | |
2300 | ||
2301 | FLOGB(32, uint32_t) | |
2302 | FLOGB(64, uint64_t) | |
2303 | ||
226bf881 SG |
2304 | #define FCLASS(NAME, BIT, E, FN) \ |
2305 | void HELPER(NAME)(void *vd, void *vj, \ | |
2306 | CPULoongArchState *env, uint32_t desc) \ | |
2307 | { \ | |
2308 | int i; \ | |
2309 | VReg *Vd = (VReg *)vd; \ | |
2310 | VReg *Vj = (VReg *)vj; \ | |
2311 | \ | |
2312 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2313 | Vd->E(i) = FN(env, Vj->E(i)); \ | |
2314 | } \ | |
aca67472 SG |
2315 | } |
2316 | ||
2317 | FCLASS(vfclass_s, 32, UW, helper_fclass_s) | |
2318 | FCLASS(vfclass_d, 64, UD, helper_fclass_d) | |
2319 | ||
2320 | #define FSQRT(BIT, T) \ | |
2321 | static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ | |
2322 | { \ | |
2323 | T fd; \ | |
2324 | fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ | |
2325 | vec_update_fcsr0(env, GETPC()); \ | |
2326 | return fd; \ | |
2327 | } | |
2328 | ||
2329 | FSQRT(32, uint32_t) | |
2330 | FSQRT(64, uint64_t) | |
2331 | ||
2332 | #define FRECIP(BIT, T) \ | |
2333 | static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ | |
2334 | { \ | |
2335 | T fd; \ | |
2336 | fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ | |
2337 | vec_update_fcsr0(env, GETPC()); \ | |
2338 | return fd; \ | |
2339 | } | |
2340 | ||
2341 | FRECIP(32, uint32_t) | |
2342 | FRECIP(64, uint64_t) | |
2343 | ||
2344 | #define FRSQRT(BIT, T) \ | |
2345 | static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ | |
2346 | { \ | |
2347 | T fd, fp; \ | |
2348 | fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ | |
2349 | fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ | |
2350 | vec_update_fcsr0(env, GETPC()); \ | |
2351 | return fd; \ | |
2352 | } | |
2353 | ||
2354 | FRSQRT(32, uint32_t) | |
2355 | FRSQRT(64, uint64_t) | |
2356 | ||
2357 | DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) | |
2358 | DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) | |
2359 | DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) | |
2360 | DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) | |
2361 | DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) | |
2362 | DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) | |
2363 | DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) | |
2364 | DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) | |
399665d2 SG |
2365 | |
2366 | static uint32_t float16_cvt_float32(uint16_t h, float_status *status) | |
2367 | { | |
2368 | return float16_to_float32(h, true, status); | |
2369 | } | |
2370 | static uint64_t float32_cvt_float64(uint32_t s, float_status *status) | |
2371 | { | |
2372 | return float32_to_float64(s, status); | |
2373 | } | |
2374 | ||
2375 | static uint16_t float32_cvt_float16(uint32_t s, float_status *status) | |
2376 | { | |
2377 | return float32_to_float16(s, true, status); | |
2378 | } | |
2379 | static uint32_t float64_cvt_float32(uint64_t d, float_status *status) | |
2380 | { | |
2381 | return float64_to_float32(d, status); | |
2382 | } | |
2383 | ||
226bf881 SG |
2384 | void HELPER(vfcvtl_s_h)(void *vd, void *vj, |
2385 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2386 | { |
2387 | int i; | |
2388 | VReg temp; | |
226bf881 SG |
2389 | VReg *Vd = (VReg *)vd; |
2390 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2391 | |
2392 | vec_clear_cause(env); | |
2393 | for (i = 0; i < LSX_LEN/32; i++) { | |
2394 | temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status); | |
2395 | vec_update_fcsr0(env, GETPC()); | |
2396 | } | |
2397 | *Vd = temp; | |
2398 | } | |
2399 | ||
226bf881 SG |
2400 | void HELPER(vfcvtl_d_s)(void *vd, void *vj, |
2401 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2402 | { |
2403 | int i; | |
2404 | VReg temp; | |
226bf881 SG |
2405 | VReg *Vd = (VReg *)vd; |
2406 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2407 | |
2408 | vec_clear_cause(env); | |
2409 | for (i = 0; i < LSX_LEN/64; i++) { | |
2410 | temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status); | |
2411 | vec_update_fcsr0(env, GETPC()); | |
2412 | } | |
2413 | *Vd = temp; | |
2414 | } | |
2415 | ||
226bf881 SG |
2416 | void HELPER(vfcvth_s_h)(void *vd, void *vj, |
2417 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2418 | { |
2419 | int i; | |
2420 | VReg temp; | |
226bf881 SG |
2421 | VReg *Vd = (VReg *)vd; |
2422 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2423 | |
2424 | vec_clear_cause(env); | |
2425 | for (i = 0; i < LSX_LEN/32; i++) { | |
2426 | temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status); | |
2427 | vec_update_fcsr0(env, GETPC()); | |
2428 | } | |
2429 | *Vd = temp; | |
2430 | } | |
2431 | ||
226bf881 SG |
2432 | void HELPER(vfcvth_d_s)(void *vd, void *vj, |
2433 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2434 | { |
2435 | int i; | |
2436 | VReg temp; | |
226bf881 SG |
2437 | VReg *Vd = (VReg *)vd; |
2438 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2439 | |
2440 | vec_clear_cause(env); | |
2441 | for (i = 0; i < LSX_LEN/64; i++) { | |
2442 | temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status); | |
2443 | vec_update_fcsr0(env, GETPC()); | |
2444 | } | |
2445 | *Vd = temp; | |
2446 | } | |
2447 | ||
3b286753 SG |
2448 | void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, |
2449 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2450 | { |
2451 | int i; | |
2452 | VReg temp; | |
3b286753 SG |
2453 | VReg *Vd = (VReg *)vd; |
2454 | VReg *Vj = (VReg *)vj; | |
2455 | VReg *Vk = (VReg *)vk; | |
399665d2 SG |
2456 | |
2457 | vec_clear_cause(env); | |
2458 | for(i = 0; i < LSX_LEN/32; i++) { | |
2459 | temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status); | |
2460 | temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status); | |
2461 | vec_update_fcsr0(env, GETPC()); | |
2462 | } | |
2463 | *Vd = temp; | |
2464 | } | |
2465 | ||
3b286753 SG |
2466 | void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, |
2467 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2468 | { |
2469 | int i; | |
2470 | VReg temp; | |
3b286753 SG |
2471 | VReg *Vd = (VReg *)vd; |
2472 | VReg *Vj = (VReg *)vj; | |
2473 | VReg *Vk = (VReg *)vk; | |
399665d2 SG |
2474 | |
2475 | vec_clear_cause(env); | |
2476 | for(i = 0; i < LSX_LEN/64; i++) { | |
2477 | temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status); | |
2478 | temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status); | |
2479 | vec_update_fcsr0(env, GETPC()); | |
2480 | } | |
2481 | *Vd = temp; | |
2482 | } | |
2483 | ||
226bf881 SG |
2484 | void HELPER(vfrint_s)(void *vd, void *vj, |
2485 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2486 | { |
2487 | int i; | |
226bf881 SG |
2488 | VReg *Vd = (VReg *)vd; |
2489 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2490 | |
2491 | vec_clear_cause(env); | |
2492 | for (i = 0; i < 4; i++) { | |
2493 | Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); | |
2494 | vec_update_fcsr0(env, GETPC()); | |
2495 | } | |
2496 | } | |
2497 | ||
226bf881 SG |
2498 | void HELPER(vfrint_d)(void *vd, void *vj, |
2499 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2500 | { |
2501 | int i; | |
226bf881 SG |
2502 | VReg *Vd = (VReg *)vd; |
2503 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2504 | |
2505 | vec_clear_cause(env); | |
2506 | for (i = 0; i < 2; i++) { | |
2507 | Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); | |
2508 | vec_update_fcsr0(env, GETPC()); | |
2509 | } | |
2510 | } | |
2511 | ||
2512 | #define FCVT_2OP(NAME, BIT, E, MODE) \ | |
226bf881 SG |
2513 | void HELPER(NAME)(void *vd, void *vj, \ |
2514 | CPULoongArchState *env, uint32_t desc) \ | |
399665d2 SG |
2515 | { \ |
2516 | int i; \ | |
226bf881 SG |
2517 | VReg *Vd = (VReg *)vd; \ |
2518 | VReg *Vj = (VReg *)vj; \ | |
399665d2 SG |
2519 | \ |
2520 | vec_clear_cause(env); \ | |
2521 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2522 | FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ | |
2523 | set_float_rounding_mode(MODE, &env->fp_status); \ | |
2524 | Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ | |
2525 | set_float_rounding_mode(old_mode, &env->fp_status); \ | |
2526 | vec_update_fcsr0(env, GETPC()); \ | |
2527 | } \ | |
2528 | } | |
2529 | ||
2530 | FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) | |
2531 | FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) | |
2532 | FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) | |
2533 | FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) | |
2534 | FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) | |
2535 | FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) | |
2536 | FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) | |
2537 | FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) | |
2538 | ||
2539 | #define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ | |
2540 | static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ | |
2541 | { \ | |
2542 | T2 fd; \ | |
2543 | FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ | |
2544 | \ | |
2545 | set_float_rounding_mode(MODE, &env->fp_status); \ | |
2546 | fd = do_## FMT1 ##_to_## FMT2(env, fj); \ | |
2547 | set_float_rounding_mode(old_mode, &env->fp_status); \ | |
2548 | return fd; \ | |
2549 | } | |
2550 | ||
2551 | #define DO_FTINT(FMT1, FMT2, T1, T2) \ | |
2552 | static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ | |
2553 | { \ | |
2554 | T2 fd; \ | |
2555 | \ | |
2556 | fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ | |
2557 | if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ | |
2558 | if (FMT1 ##_is_any_nan(fj)) { \ | |
2559 | fd = 0; \ | |
2560 | } \ | |
2561 | } \ | |
2562 | vec_update_fcsr0(env, GETPC()); \ | |
2563 | return fd; \ | |
2564 | } | |
2565 | ||
2566 | DO_FTINT(float32, int32, uint32_t, uint32_t) | |
2567 | DO_FTINT(float64, int64, uint64_t, uint64_t) | |
2568 | DO_FTINT(float32, uint32, uint32_t, uint32_t) | |
2569 | DO_FTINT(float64, uint64, uint64_t, uint64_t) | |
2570 | DO_FTINT(float64, int32, uint64_t, uint32_t) | |
2571 | DO_FTINT(float32, int64, uint32_t, uint64_t) | |
2572 | ||
2573 | FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) | |
2574 | FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) | |
2575 | FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) | |
2576 | FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) | |
2577 | FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) | |
2578 | FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) | |
2579 | FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) | |
2580 | FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) | |
2581 | ||
2582 | DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) | |
2583 | DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) | |
2584 | DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) | |
2585 | DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) | |
2586 | DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) | |
2587 | DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) | |
2588 | DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) | |
2589 | DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) | |
2590 | DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) | |
2591 | DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) | |
2592 | ||
2593 | FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) | |
2594 | FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) | |
2595 | ||
2596 | DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) | |
2597 | DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) | |
2598 | DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) | |
2599 | DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) | |
2600 | ||
2601 | FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) | |
2602 | FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) | |
2603 | FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) | |
2604 | FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) | |
2605 | ||
2606 | #define FTINT_W_D(NAME, FN) \ | |
3b286753 SG |
2607 | void HELPER(NAME)(void *vd, void *vj, void *vk, \ |
2608 | CPULoongArchState *env, uint32_t desc) \ | |
399665d2 SG |
2609 | { \ |
2610 | int i; \ | |
2611 | VReg temp; \ | |
3b286753 SG |
2612 | VReg *Vd = (VReg *)vd; \ |
2613 | VReg *Vj = (VReg *)vj; \ | |
2614 | VReg *Vk = (VReg *)vk; \ | |
399665d2 SG |
2615 | \ |
2616 | vec_clear_cause(env); \ | |
2617 | for (i = 0; i < 2; i++) { \ | |
2618 | temp.W(i + 2) = FN(env, Vj->UD(i)); \ | |
2619 | temp.W(i) = FN(env, Vk->UD(i)); \ | |
2620 | } \ | |
2621 | *Vd = temp; \ | |
2622 | } | |
2623 | ||
2624 | FTINT_W_D(vftint_w_d, do_float64_to_int32) | |
2625 | FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) | |
2626 | FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) | |
2627 | FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) | |
2628 | FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) | |
2629 | ||
2630 | FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) | |
2631 | FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | |
2632 | FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | |
2633 | FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | |
2634 | FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) | |
2635 | FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | |
2636 | FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | |
2637 | FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | |
2638 | ||
226bf881 SG |
2639 | #define FTINTL_L_S(NAME, FN) \ |
2640 | void HELPER(NAME)(void *vd, void *vj, \ | |
2641 | CPULoongArchState *env, uint32_t desc) \ | |
2642 | { \ | |
2643 | int i; \ | |
2644 | VReg temp; \ | |
2645 | VReg *Vd = (VReg *)vd; \ | |
2646 | VReg *Vj = (VReg *)vj; \ | |
2647 | \ | |
2648 | vec_clear_cause(env); \ | |
2649 | for (i = 0; i < 2; i++) { \ | |
2650 | temp.D(i) = FN(env, Vj->UW(i)); \ | |
2651 | } \ | |
2652 | *Vd = temp; \ | |
399665d2 SG |
2653 | } |
2654 | ||
2655 | FTINTL_L_S(vftintl_l_s, do_float32_to_int64) | |
2656 | FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) | |
2657 | FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) | |
2658 | FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) | |
2659 | FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) | |
2660 | ||
226bf881 SG |
2661 | #define FTINTH_L_S(NAME, FN) \ |
2662 | void HELPER(NAME)(void *vd, void *vj, \ | |
2663 | CPULoongArchState *env, uint32_t desc) \ | |
2664 | { \ | |
2665 | int i; \ | |
2666 | VReg temp; \ | |
2667 | VReg *Vd = (VReg *)vd; \ | |
2668 | VReg *Vj = (VReg *)vj; \ | |
2669 | \ | |
2670 | vec_clear_cause(env); \ | |
2671 | for (i = 0; i < 2; i++) { \ | |
2672 | temp.D(i) = FN(env, Vj->UW(i + 2)); \ | |
2673 | } \ | |
2674 | *Vd = temp; \ | |
399665d2 SG |
2675 | } |
2676 | ||
2677 | FTINTH_L_S(vftinth_l_s, do_float32_to_int64) | |
2678 | FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) | |
2679 | FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) | |
2680 | FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) | |
2681 | FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) | |
2682 | ||
2683 | #define FFINT(NAME, FMT1, FMT2, T1, T2) \ | |
2684 | static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ | |
2685 | { \ | |
2686 | T2 fd; \ | |
2687 | \ | |
2688 | fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ | |
2689 | vec_update_fcsr0(env, GETPC()); \ | |
2690 | return fd; \ | |
2691 | } | |
2692 | ||
2693 | FFINT(s_w, int32, float32, int32_t, uint32_t) | |
2694 | FFINT(d_l, int64, float64, int64_t, uint64_t) | |
2695 | FFINT(s_wu, uint32, float32, uint32_t, uint32_t) | |
2696 | FFINT(d_lu, uint64, float64, uint64_t, uint64_t) | |
2697 | ||
2698 | DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) | |
2699 | DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) | |
2700 | DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) | |
2701 | DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) | |
2702 | ||
226bf881 SG |
2703 | void HELPER(vffintl_d_w)(void *vd, void *vj, |
2704 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2705 | { |
2706 | int i; | |
2707 | VReg temp; | |
226bf881 SG |
2708 | VReg *Vd = (VReg *)vd; |
2709 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2710 | |
2711 | vec_clear_cause(env); | |
2712 | for (i = 0; i < 2; i++) { | |
2713 | temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status); | |
2714 | vec_update_fcsr0(env, GETPC()); | |
2715 | } | |
2716 | *Vd = temp; | |
2717 | } | |
2718 | ||
226bf881 SG |
2719 | void HELPER(vffinth_d_w)(void *vd, void *vj, |
2720 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2721 | { |
2722 | int i; | |
2723 | VReg temp; | |
226bf881 SG |
2724 | VReg *Vd = (VReg *)vd; |
2725 | VReg *Vj = (VReg *)vj; | |
399665d2 SG |
2726 | |
2727 | vec_clear_cause(env); | |
2728 | for (i = 0; i < 2; i++) { | |
2729 | temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status); | |
2730 | vec_update_fcsr0(env, GETPC()); | |
2731 | } | |
2732 | *Vd = temp; | |
2733 | } | |
2734 | ||
3b286753 SG |
2735 | void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, |
2736 | CPULoongArchState *env, uint32_t desc) | |
399665d2 SG |
2737 | { |
2738 | int i; | |
2739 | VReg temp; | |
3b286753 SG |
2740 | VReg *Vd = (VReg *)vd; |
2741 | VReg *Vj = (VReg *)vj; | |
2742 | VReg *Vk = (VReg *)vk; | |
399665d2 SG |
2743 | |
2744 | vec_clear_cause(env); | |
2745 | for (i = 0; i < 2; i++) { | |
2746 | temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status); | |
2747 | temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status); | |
2748 | vec_update_fcsr0(env, GETPC()); | |
2749 | } | |
2750 | *Vd = temp; | |
2751 | } | |
f435e1e5 SG |
2752 | |
2753 | #define VSEQ(a, b) (a == b ? -1 : 0) | |
2754 | #define VSLE(a, b) (a <= b ? -1 : 0) | |
2755 | #define VSLT(a, b) (a < b ? -1 : 0) | |
2756 | ||
2757 | #define VCMPI(NAME, BIT, E, DO_OP) \ | |
2758 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | |
2759 | { \ | |
2760 | int i; \ | |
2761 | VReg *Vd = (VReg *)vd; \ | |
2762 | VReg *Vj = (VReg *)vj; \ | |
2763 | typedef __typeof(Vd->E(0)) TD; \ | |
2764 | \ | |
2765 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2766 | Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | |
2767 | } \ | |
2768 | } | |
2769 | ||
2770 | VCMPI(vseqi_b, 8, B, VSEQ) | |
2771 | VCMPI(vseqi_h, 16, H, VSEQ) | |
2772 | VCMPI(vseqi_w, 32, W, VSEQ) | |
2773 | VCMPI(vseqi_d, 64, D, VSEQ) | |
2774 | VCMPI(vslei_b, 8, B, VSLE) | |
2775 | VCMPI(vslei_h, 16, H, VSLE) | |
2776 | VCMPI(vslei_w, 32, W, VSLE) | |
2777 | VCMPI(vslei_d, 64, D, VSLE) | |
2778 | VCMPI(vslei_bu, 8, UB, VSLE) | |
2779 | VCMPI(vslei_hu, 16, UH, VSLE) | |
2780 | VCMPI(vslei_wu, 32, UW, VSLE) | |
2781 | VCMPI(vslei_du, 64, UD, VSLE) | |
2782 | VCMPI(vslti_b, 8, B, VSLT) | |
2783 | VCMPI(vslti_h, 16, H, VSLT) | |
2784 | VCMPI(vslti_w, 32, W, VSLT) | |
2785 | VCMPI(vslti_d, 64, D, VSLT) | |
2786 | VCMPI(vslti_bu, 8, UB, VSLT) | |
2787 | VCMPI(vslti_hu, 16, UH, VSLT) | |
2788 | VCMPI(vslti_wu, 32, UW, VSLT) | |
2789 | VCMPI(vslti_du, 64, UD, VSLT) | |
386c4e86 SG |
2790 | |
2791 | static uint64_t vfcmp_common(CPULoongArchState *env, | |
2792 | FloatRelation cmp, uint32_t flags) | |
2793 | { | |
2794 | uint64_t ret = 0; | |
2795 | ||
2796 | switch (cmp) { | |
2797 | case float_relation_less: | |
2798 | ret = (flags & FCMP_LT); | |
2799 | break; | |
2800 | case float_relation_equal: | |
2801 | ret = (flags & FCMP_EQ); | |
2802 | break; | |
2803 | case float_relation_greater: | |
2804 | ret = (flags & FCMP_GT); | |
2805 | break; | |
2806 | case float_relation_unordered: | |
2807 | ret = (flags & FCMP_UN); | |
2808 | break; | |
2809 | default: | |
2810 | g_assert_not_reached(); | |
2811 | } | |
2812 | ||
2813 | if (ret) { | |
2814 | ret = -1; | |
2815 | } | |
2816 | ||
2817 | return ret; | |
2818 | } | |
2819 | ||
2820 | #define VFCMP(NAME, BIT, E, FN) \ | |
2821 | void HELPER(NAME)(CPULoongArchState *env, \ | |
2822 | uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ | |
2823 | { \ | |
2824 | int i; \ | |
2825 | VReg t; \ | |
2826 | VReg *Vd = &(env->fpr[vd].vreg); \ | |
2827 | VReg *Vj = &(env->fpr[vj].vreg); \ | |
2828 | VReg *Vk = &(env->fpr[vk].vreg); \ | |
2829 | \ | |
2830 | vec_clear_cause(env); \ | |
2831 | for (i = 0; i < LSX_LEN/BIT ; i++) { \ | |
2832 | FloatRelation cmp; \ | |
2833 | cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | |
2834 | t.E(i) = vfcmp_common(env, cmp, flags); \ | |
2835 | vec_update_fcsr0(env, GETPC()); \ | |
2836 | } \ | |
2837 | *Vd = t; \ | |
2838 | } | |
2839 | ||
2840 | VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) | |
2841 | VFCMP(vfcmp_s_s, 32, UW, float32_compare) | |
2842 | VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) | |
2843 | VFCMP(vfcmp_s_d, 64, UD, float64_compare) | |
d0dfa19a SG |
2844 | |
2845 | void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | |
2846 | { | |
2847 | int i; | |
2848 | VReg *Vd = (VReg *)vd; | |
2849 | VReg *Vj = (VReg *)vj; | |
2850 | ||
2851 | for (i = 0; i < 16; i++) { | |
2852 | Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); | |
2853 | } | |
2854 | } | |
2855 | ||
2856 | /* Copy from target/arm/tcg/sve_helper.c */ | |
2857 | static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) | |
2858 | { | |
2859 | uint64_t bits = 8 << esz; | |
2860 | uint64_t ones = dup_const(esz, 1); | |
2861 | uint64_t signs = ones << (bits - 1); | |
2862 | uint64_t cmp0, cmp1; | |
2863 | ||
2864 | cmp1 = dup_const(esz, n); | |
2865 | cmp0 = cmp1 ^ m0; | |
2866 | cmp1 = cmp1 ^ m1; | |
2867 | cmp0 = (cmp0 - ones) & ~cmp0; | |
2868 | cmp1 = (cmp1 - ones) & ~cmp1; | |
2869 | return (cmp0 | cmp1) & signs; | |
2870 | } | |
2871 | ||
2872 | #define SETANYEQZ(NAME, MO) \ | |
2873 | void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | |
2874 | { \ | |
2875 | VReg *Vj = &(env->fpr[vj].vreg); \ | |
2876 | \ | |
2877 | env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ | |
2878 | } | |
2879 | SETANYEQZ(vsetanyeqz_b, MO_8) | |
2880 | SETANYEQZ(vsetanyeqz_h, MO_16) | |
2881 | SETANYEQZ(vsetanyeqz_w, MO_32) | |
2882 | SETANYEQZ(vsetanyeqz_d, MO_64) | |
2883 | ||
2884 | #define SETALLNEZ(NAME, MO) \ | |
2885 | void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | |
2886 | { \ | |
2887 | VReg *Vj = &(env->fpr[vj].vreg); \ | |
2888 | \ | |
2889 | env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ | |
2890 | } | |
2891 | SETALLNEZ(vsetallnez_b, MO_8) | |
2892 | SETALLNEZ(vsetallnez_h, MO_16) | |
2893 | SETALLNEZ(vsetallnez_w, MO_32) | |
2894 | SETALLNEZ(vsetallnez_d, MO_64) | |
d5e5563c | 2895 | |
04711da1 SG |
2896 | #define VPACKEV(NAME, BIT, E) \ |
2897 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2898 | { \ | |
2899 | int i; \ | |
2900 | VReg temp; \ | |
2901 | VReg *Vd = (VReg *)vd; \ | |
2902 | VReg *Vj = (VReg *)vj; \ | |
2903 | VReg *Vk = (VReg *)vk; \ | |
2904 | \ | |
2905 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2906 | temp.E(2 * i + 1) = Vj->E(2 * i); \ | |
2907 | temp.E(2 *i) = Vk->E(2 * i); \ | |
2908 | } \ | |
2909 | *Vd = temp; \ | |
d5e5563c SG |
2910 | } |
2911 | ||
2912 | VPACKEV(vpackev_b, 16, B) | |
2913 | VPACKEV(vpackev_h, 32, H) | |
2914 | VPACKEV(vpackev_w, 64, W) | |
2915 | VPACKEV(vpackev_d, 128, D) | |
2916 | ||
04711da1 SG |
2917 | #define VPACKOD(NAME, BIT, E) \ |
2918 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2919 | { \ | |
2920 | int i; \ | |
2921 | VReg temp; \ | |
2922 | VReg *Vd = (VReg *)vd; \ | |
2923 | VReg *Vj = (VReg *)vj; \ | |
2924 | VReg *Vk = (VReg *)vk; \ | |
2925 | \ | |
2926 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2927 | temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ | |
2928 | temp.E(2 * i) = Vk->E(2 * i + 1); \ | |
2929 | } \ | |
2930 | *Vd = temp; \ | |
d5e5563c SG |
2931 | } |
2932 | ||
2933 | VPACKOD(vpackod_b, 16, B) | |
2934 | VPACKOD(vpackod_h, 32, H) | |
2935 | VPACKOD(vpackod_w, 64, W) | |
2936 | VPACKOD(vpackod_d, 128, D) | |
2937 | ||
04711da1 SG |
2938 | #define VPICKEV(NAME, BIT, E) \ |
2939 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2940 | { \ | |
2941 | int i; \ | |
2942 | VReg temp; \ | |
2943 | VReg *Vd = (VReg *)vd; \ | |
2944 | VReg *Vj = (VReg *)vj; \ | |
2945 | VReg *Vk = (VReg *)vk; \ | |
2946 | \ | |
2947 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2948 | temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ | |
2949 | temp.E(i) = Vk->E(2 * i); \ | |
2950 | } \ | |
2951 | *Vd = temp; \ | |
d5e5563c SG |
2952 | } |
2953 | ||
2954 | VPICKEV(vpickev_b, 16, B) | |
2955 | VPICKEV(vpickev_h, 32, H) | |
2956 | VPICKEV(vpickev_w, 64, W) | |
2957 | VPICKEV(vpickev_d, 128, D) | |
2958 | ||
04711da1 SG |
2959 | #define VPICKOD(NAME, BIT, E) \ |
2960 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2961 | { \ | |
2962 | int i; \ | |
2963 | VReg temp; \ | |
2964 | VReg *Vd = (VReg *)vd; \ | |
2965 | VReg *Vj = (VReg *)vj; \ | |
2966 | VReg *Vk = (VReg *)vk; \ | |
2967 | \ | |
2968 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2969 | temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ | |
2970 | temp.E(i) = Vk->E(2 * i + 1); \ | |
2971 | } \ | |
2972 | *Vd = temp; \ | |
d5e5563c SG |
2973 | } |
2974 | ||
2975 | VPICKOD(vpickod_b, 16, B) | |
2976 | VPICKOD(vpickod_h, 32, H) | |
2977 | VPICKOD(vpickod_w, 64, W) | |
2978 | VPICKOD(vpickod_d, 128, D) | |
e93dd431 | 2979 | |
04711da1 SG |
2980 | #define VILVL(NAME, BIT, E) \ |
2981 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
2982 | { \ | |
2983 | int i; \ | |
2984 | VReg temp; \ | |
2985 | VReg *Vd = (VReg *)vd; \ | |
2986 | VReg *Vj = (VReg *)vj; \ | |
2987 | VReg *Vk = (VReg *)vk; \ | |
2988 | \ | |
2989 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
2990 | temp.E(2 * i + 1) = Vj->E(i); \ | |
2991 | temp.E(2 * i) = Vk->E(i); \ | |
2992 | } \ | |
2993 | *Vd = temp; \ | |
e93dd431 SG |
2994 | } |
2995 | ||
2996 | VILVL(vilvl_b, 16, B) | |
2997 | VILVL(vilvl_h, 32, H) | |
2998 | VILVL(vilvl_w, 64, W) | |
2999 | VILVL(vilvl_d, 128, D) | |
3000 | ||
04711da1 SG |
3001 | #define VILVH(NAME, BIT, E) \ |
3002 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
3003 | { \ | |
3004 | int i; \ | |
3005 | VReg temp; \ | |
3006 | VReg *Vd = (VReg *)vd; \ | |
3007 | VReg *Vj = (VReg *)vj; \ | |
3008 | VReg *Vk = (VReg *)vk; \ | |
3009 | \ | |
3010 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
3011 | temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ | |
3012 | temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ | |
3013 | } \ | |
3014 | *Vd = temp; \ | |
e93dd431 SG |
3015 | } |
3016 | ||
3017 | VILVH(vilvh_b, 16, B) | |
3018 | VILVH(vilvh_h, 32, H) | |
3019 | VILVH(vilvh_w, 64, W) | |
3020 | VILVH(vilvh_d, 128, D) | |
3021 | ||
eb48ab22 | 3022 | void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) |
e93dd431 SG |
3023 | { |
3024 | int i, m; | |
3025 | VReg temp; | |
eb48ab22 SG |
3026 | VReg *Vd = (VReg *)vd; |
3027 | VReg *Vj = (VReg *)vj; | |
3028 | VReg *Vk = (VReg *)vk; | |
3029 | VReg *Va = (VReg *)va; | |
e93dd431 SG |
3030 | |
3031 | m = LSX_LEN/8; | |
3032 | for (i = 0; i < m ; i++) { | |
3033 | uint64_t k = (uint8_t)Va->B(i) % (2 * m); | |
3034 | temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m); | |
3035 | } | |
3036 | *Vd = temp; | |
3037 | } | |
3038 | ||
04711da1 SG |
3039 | #define VSHUF(NAME, BIT, E) \ |
3040 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | |
3041 | { \ | |
3042 | int i, m; \ | |
3043 | VReg temp; \ | |
3044 | VReg *Vd = (VReg *)vd; \ | |
3045 | VReg *Vj = (VReg *)vj; \ | |
3046 | VReg *Vk = (VReg *)vk; \ | |
3047 | \ | |
3048 | m = LSX_LEN/BIT; \ | |
3049 | for (i = 0; i < m; i++) { \ | |
3050 | uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ | |
3051 | temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ | |
3052 | } \ | |
3053 | *Vd = temp; \ | |
e93dd431 SG |
3054 | } |
3055 | ||
3056 | VSHUF(vshuf_h, 16, H) | |
3057 | VSHUF(vshuf_w, 32, W) | |
3058 | VSHUF(vshuf_d, 64, D) | |
3059 | ||
329517d5 SG |
3060 | #define VSHUF4I(NAME, BIT, E) \ |
3061 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
3062 | { \ | |
3063 | int i; \ | |
3064 | VReg temp; \ | |
3065 | VReg *Vd = (VReg *)vd; \ | |
3066 | VReg *Vj = (VReg *)vj; \ | |
3067 | \ | |
3068 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | |
3069 | temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ | |
3070 | (2 * ((i) & 0x03))) & 0x03)); \ | |
3071 | } \ | |
3072 | *Vd = temp; \ | |
e93dd431 SG |
3073 | } |
3074 | ||
3075 | VSHUF4I(vshuf4i_b, 8, B) | |
3076 | VSHUF4I(vshuf4i_h, 16, H) | |
3077 | VSHUF4I(vshuf4i_w, 32, W) | |
3078 | ||
329517d5 | 3079 | void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
e93dd431 | 3080 | { |
329517d5 SG |
3081 | VReg *Vd = (VReg *)vd; |
3082 | VReg *Vj = (VReg *)vj; | |
e93dd431 SG |
3083 | |
3084 | VReg temp; | |
3085 | temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); | |
3086 | temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); | |
3087 | *Vd = temp; | |
3088 | } | |
3089 | ||
329517d5 | 3090 | void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
e93dd431 SG |
3091 | { |
3092 | VReg temp; | |
329517d5 SG |
3093 | VReg *Vd = (VReg *)vd; |
3094 | VReg *Vj = (VReg *)vj; | |
e93dd431 SG |
3095 | |
3096 | temp.W(0) = Vj->W(imm & 0x3); | |
3097 | temp.W(1) = Vj->W((imm >> 2) & 0x3); | |
3098 | temp.W(2) = Vd->W((imm >> 4) & 0x3); | |
3099 | temp.W(3) = Vd->W((imm >> 6) & 0x3); | |
3100 | *Vd = temp; | |
3101 | } | |
3102 | ||
329517d5 SG |
3103 | #define VEXTRINS(NAME, BIT, E, MASK) \ |
3104 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | |
3105 | { \ | |
3106 | int ins, extr; \ | |
3107 | VReg *Vd = (VReg *)vd; \ | |
3108 | VReg *Vj = (VReg *)vj; \ | |
3109 | \ | |
3110 | ins = (imm >> 4) & MASK; \ | |
3111 | extr = imm & MASK; \ | |
3112 | Vd->E(ins) = Vj->E(extr); \ | |
e93dd431 SG |
3113 | } |
3114 | ||
3115 | VEXTRINS(vextrins_b, 8, B, 0xf) | |
3116 | VEXTRINS(vextrins_h, 16, H, 0x7) | |
3117 | VEXTRINS(vextrins_w, 32, W, 0x3) | |
3118 | VEXTRINS(vextrins_d, 64, D, 0x1) |