]> git.proxmox.com Git - mirror_qemu.git/blob - target/loongarch/insn_trans/trans_vec.c.inc
target/loongarch: Implement xvabsd
[mirror_qemu.git] / target / loongarch / insn_trans / trans_vec.c.inc
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * LoongArch vector translate functions
4 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
5 */
6
7 #ifndef CONFIG_USER_ONLY
8
9 static bool check_vec(DisasContext *ctx, uint32_t oprsz)
10 {
11 if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
12 generate_exception(ctx, EXCCODE_SXD);
13 return false;
14 }
15
16 if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
17 generate_exception(ctx, EXCCODE_ASXD);
18 return false;
19 }
20
21 return true;
22 }
23
24 #else
25
26 static bool check_vec(DisasContext *ctx, uint32_t oprsz)
27 {
28 return true;
29 }
30
31 #endif
32
33 static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
34 gen_helper_gvec_4_ptr *fn)
35 {
36 tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
37 vec_full_offset(a->vj),
38 vec_full_offset(a->vk),
39 vec_full_offset(a->va),
40 cpu_env,
41 oprsz, ctx->vl / 8, 0, fn);
42 return true;
43 }
44
45 static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
46 gen_helper_gvec_4_ptr *fn)
47 {
48 if (!check_vec(ctx, 16)) {
49 return true;
50 }
51
52 return gen_vvvv_ptr_vl(ctx, a, 16, fn);
53 }
54
55 static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
56 gen_helper_gvec_4 *fn)
57 {
58 tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
59 vec_full_offset(a->vj),
60 vec_full_offset(a->vk),
61 vec_full_offset(a->va),
62 oprsz, ctx->vl / 8, 0, fn);
63 return true;
64 }
65
66 static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
67 gen_helper_gvec_4 *fn)
68 {
69 if (!check_vec(ctx, 16)) {
70 return true;
71 }
72
73 return gen_vvvv_vl(ctx, a, 16, fn);
74 }
75
76 static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
77 gen_helper_gvec_3_ptr *fn)
78 {
79 tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
80 vec_full_offset(a->vj),
81 vec_full_offset(a->vk),
82 cpu_env,
83 oprsz, ctx->vl / 8, 0, fn);
84 return true;
85 }
86
87 static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
88 gen_helper_gvec_3_ptr *fn)
89 {
90 if (!check_vec(ctx, 16)) {
91 return true;
92 }
93
94 return gen_vvv_ptr_vl(ctx, a, 16, fn);
95 }
96
97 static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
98 gen_helper_gvec_3 *fn)
99 {
100 if (!check_vec(ctx, oprsz)) {
101 return true;
102 }
103
104 tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
105 vec_full_offset(a->vj),
106 vec_full_offset(a->vk),
107 oprsz, ctx->vl / 8, 0, fn);
108 return true;
109 }
110
111 static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
112 {
113 return gen_vvv_vl(ctx, a, 16, fn);
114 }
115
116 static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
117 {
118 return gen_vvv_vl(ctx, a, 32, fn);
119 }
120
121 static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
122 gen_helper_gvec_2_ptr *fn)
123 {
124 tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
125 vec_full_offset(a->vj),
126 cpu_env,
127 oprsz, ctx->vl / 8, 0, fn);
128 return true;
129 }
130
131 static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
132 gen_helper_gvec_2_ptr *fn)
133 {
134 if (!check_vec(ctx, 16)) {
135 return true;
136 }
137
138 return gen_vv_ptr_vl(ctx, a, 16, fn);
139 }
140
141 static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
142 gen_helper_gvec_2 *fn)
143 {
144 tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
145 vec_full_offset(a->vj),
146 oprsz, ctx->vl / 8, 0, fn);
147 return true;
148 }
149
150 static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
151 {
152 if (!check_vec(ctx, 16)) {
153 return true;
154 }
155
156 return gen_vv_vl(ctx, a, 16, fn);
157 }
158
159 static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
160 gen_helper_gvec_2i *fn)
161 {
162 tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
163 vec_full_offset(a->vj),
164 tcg_constant_i64(a->imm),
165 oprsz, ctx->vl / 8, 0, fn);
166 return true;
167 }
168
169 static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
170 {
171 if (!check_vec(ctx, 16)) {
172 return true;
173 }
174
175 return gen_vv_i_vl(ctx, a, 16, fn);
176 }
177
178 static bool gen_cv(DisasContext *ctx, arg_cv *a,
179 void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
180 {
181 TCGv_i32 vj = tcg_constant_i32(a->vj);
182 TCGv_i32 cd = tcg_constant_i32(a->cd);
183
184 if (!check_vec(ctx, 16)) {
185 return true;
186 }
187
188 func(cpu_env, cd, vj);
189 return true;
190 }
191
192 static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
193 uint32_t oprsz, MemOp mop,
194 void (*func)(unsigned, uint32_t, uint32_t,
195 uint32_t, uint32_t, uint32_t))
196 {
197 uint32_t vd_ofs = vec_full_offset(a->vd);
198 uint32_t vj_ofs = vec_full_offset(a->vj);
199 uint32_t vk_ofs = vec_full_offset(a->vk);
200
201 if (!check_vec(ctx, oprsz)) {
202 return true;
203 }
204
205 func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
206 return true;
207 }
208
209 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
210 void (*func)(unsigned, uint32_t, uint32_t,
211 uint32_t, uint32_t, uint32_t))
212 {
213 return gvec_vvv_vl(ctx, a, 16, mop, func);
214 }
215
216 static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
217 void (*func)(unsigned, uint32_t, uint32_t,
218 uint32_t, uint32_t, uint32_t))
219 {
220 return gvec_vvv_vl(ctx, a, 32, mop, func);
221 }
222
223 static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
224 uint32_t oprsz, MemOp mop,
225 void (*func)(unsigned, uint32_t, uint32_t,
226 uint32_t, uint32_t))
227 {
228 uint32_t vd_ofs = vec_full_offset(a->vd);
229 uint32_t vj_ofs = vec_full_offset(a->vj);
230
231 if (!check_vec(ctx, oprsz)) {
232 return true;
233 }
234
235 func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
236 return true;
237 }
238
239
240 static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
241 void (*func)(unsigned, uint32_t, uint32_t,
242 uint32_t, uint32_t))
243 {
244 return gvec_vv_vl(ctx, a, 16, mop, func);
245 }
246
247 static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
248 void (*func)(unsigned, uint32_t, uint32_t,
249 uint32_t, uint32_t))
250 {
251 return gvec_vv_vl(ctx, a, 32, mop, func);
252 }
253
254 static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
255 uint32_t oprsz, MemOp mop,
256 void (*func)(unsigned, uint32_t, uint32_t,
257 int64_t, uint32_t, uint32_t))
258 {
259 uint32_t vd_ofs = vec_full_offset(a->vd);
260 uint32_t vj_ofs = vec_full_offset(a->vj);
261
262 if (!check_vec(ctx, oprsz)) {
263 return true;
264 }
265
266 func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
267 return true;
268 }
269
270 static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
271 void (*func)(unsigned, uint32_t, uint32_t,
272 int64_t, uint32_t, uint32_t))
273 {
274 return gvec_vv_i_vl(ctx, a, 16, mop, func);
275 }
276
277 static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
278 void (*func)(unsigned, uint32_t, uint32_t,
279 int64_t, uint32_t, uint32_t))
280 {
281 return gvec_vv_i_vl(ctx,a, 32, mop, func);
282 }
283
284 static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
285 uint32_t oprsz, MemOp mop)
286 {
287 uint32_t vd_ofs = vec_full_offset(a->vd);
288 uint32_t vj_ofs = vec_full_offset(a->vj);
289
290 if (!check_vec(ctx, oprsz)) {
291 return true;
292 }
293
294 tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
295 return true;
296 }
297
298 static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
299 {
300 return gvec_subi_vl(ctx, a, 16, mop);
301 }
302
303 static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
304 {
305 return gvec_subi_vl(ctx, a, 32, mop);
306 }
307
308 TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
309 TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
310 TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
311 TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
312 TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
313 TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
314 TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
315 TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
316
317 static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
318 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
319 TCGv_i64, TCGv_i64, TCGv_i64))
320 {
321 int i;
322 TCGv_i64 rh, rl, ah, al, bh, bl;
323
324 if (!check_vec(ctx, oprsz)) {
325 return true;
326 }
327
328 rh = tcg_temp_new_i64();
329 rl = tcg_temp_new_i64();
330 ah = tcg_temp_new_i64();
331 al = tcg_temp_new_i64();
332 bh = tcg_temp_new_i64();
333 bl = tcg_temp_new_i64();
334
335 for (i = 0; i < oprsz / 16; i++) {
336 get_vreg64(ah, a->vj, 1 + i * 2);
337 get_vreg64(al, a->vj, i * 2);
338 get_vreg64(bh, a->vk, 1 + i * 2);
339 get_vreg64(bl, a->vk, i * 2);
340
341 func(rl, rh, al, ah, bl, bh);
342
343 set_vreg64(rh, a->vd, 1 + i * 2);
344 set_vreg64(rl, a->vd, i * 2);
345 }
346 return true;
347 }
348
349 static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
350 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
351 TCGv_i64, TCGv_i64, TCGv_i64))
352 {
353 return gen_vaddsub_q_vl(ctx, a, 16, func);
354 }
355
356 static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
357 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
358 TCGv_i64, TCGv_i64, TCGv_i64))
359 {
360 return gen_vaddsub_q_vl(ctx, a, 32, func);
361 }
362
363 TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
364 TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
365 TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
366 TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
367 TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
368 TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
369 TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
370 TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
371
372 TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
373 TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
374 TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
375 TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
376
377 TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
378 TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
379 TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
380 TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
381 TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
382 TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
383 TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
384 TRANS(vsubi_du, LSX, gvec_subi, MO_64)
385 TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
386 TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
387 TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
388 TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
389 TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
390 TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
391 TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
392 TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
393
394 TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
395 TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
396 TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
397 TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
398 TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
399 TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
400 TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
401 TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
402
403 TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
404 TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
405 TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
406 TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
407 TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
408 TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
409 TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
410 TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
411 TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
412 TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
413 TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
414 TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
415 TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
416 TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
417 TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
418 TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
419
420 TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
421 TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
422 TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
423 TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
424 TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
425 TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
426 TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
427 TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
428 TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
429 TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
430 TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
431 TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
432 TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
433 TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
434 TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
435 TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
436
437 TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
438 TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
439 TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
440 TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
441 TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
442 TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
443 TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
444 TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
445 TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
446 TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
447 TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
448 TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
449 TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
450 TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
451 TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
452 TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
453
454 TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
455 TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
456 TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
457 TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
458 TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
459 TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
460 TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
461 TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
462 TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
463 TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
464 TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
465 TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
466 TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
467 TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
468 TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
469 TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
470
471 static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
472 {
473 TCGv_vec t1, t2;
474
475 int halfbits = 4 << vece;
476
477 t1 = tcg_temp_new_vec_matching(a);
478 t2 = tcg_temp_new_vec_matching(b);
479
480 /* Sign-extend the even elements from a */
481 tcg_gen_shli_vec(vece, t1, a, halfbits);
482 tcg_gen_sari_vec(vece, t1, t1, halfbits);
483
484 /* Sign-extend the even elements from b */
485 tcg_gen_shli_vec(vece, t2, b, halfbits);
486 tcg_gen_sari_vec(vece, t2, t2, halfbits);
487
488 tcg_gen_add_vec(vece, t, t1, t2);
489 }
490
491 static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
492 {
493 TCGv_i32 t1, t2;
494
495 t1 = tcg_temp_new_i32();
496 t2 = tcg_temp_new_i32();
497 tcg_gen_ext16s_i32(t1, a);
498 tcg_gen_ext16s_i32(t2, b);
499 tcg_gen_add_i32(t, t1, t2);
500 }
501
502 static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
503 {
504 TCGv_i64 t1, t2;
505
506 t1 = tcg_temp_new_i64();
507 t2 = tcg_temp_new_i64();
508 tcg_gen_ext32s_i64(t1, a);
509 tcg_gen_ext32s_i64(t2, b);
510 tcg_gen_add_i64(t, t1, t2);
511 }
512
513 static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
514 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
515 {
516 static const TCGOpcode vecop_list[] = {
517 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
518 };
519 static const GVecGen3 op[4] = {
520 {
521 .fniv = gen_vaddwev_s,
522 .fno = gen_helper_vaddwev_h_b,
523 .opt_opc = vecop_list,
524 .vece = MO_16
525 },
526 {
527 .fni4 = gen_vaddwev_w_h,
528 .fniv = gen_vaddwev_s,
529 .fno = gen_helper_vaddwev_w_h,
530 .opt_opc = vecop_list,
531 .vece = MO_32
532 },
533 {
534 .fni8 = gen_vaddwev_d_w,
535 .fniv = gen_vaddwev_s,
536 .fno = gen_helper_vaddwev_d_w,
537 .opt_opc = vecop_list,
538 .vece = MO_64
539 },
540 {
541 .fno = gen_helper_vaddwev_q_d,
542 .vece = MO_128
543 },
544 };
545
546 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
547 }
548
549 TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
550 TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
551 TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
552 TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
553 TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
554 TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
555 TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
556 TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
557
558 static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
559 {
560 TCGv_i32 t1, t2;
561
562 t1 = tcg_temp_new_i32();
563 t2 = tcg_temp_new_i32();
564 tcg_gen_sari_i32(t1, a, 16);
565 tcg_gen_sari_i32(t2, b, 16);
566 tcg_gen_add_i32(t, t1, t2);
567 }
568
569 static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
570 {
571 TCGv_i64 t1, t2;
572
573 t1 = tcg_temp_new_i64();
574 t2 = tcg_temp_new_i64();
575 tcg_gen_sari_i64(t1, a, 32);
576 tcg_gen_sari_i64(t2, b, 32);
577 tcg_gen_add_i64(t, t1, t2);
578 }
579
580 static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
581 {
582 TCGv_vec t1, t2;
583
584 int halfbits = 4 << vece;
585
586 t1 = tcg_temp_new_vec_matching(a);
587 t2 = tcg_temp_new_vec_matching(b);
588
589 /* Sign-extend the odd elements for vector */
590 tcg_gen_sari_vec(vece, t1, a, halfbits);
591 tcg_gen_sari_vec(vece, t2, b, halfbits);
592
593 tcg_gen_add_vec(vece, t, t1, t2);
594 }
595
596 static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
597 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
598 {
599 static const TCGOpcode vecop_list[] = {
600 INDEX_op_sari_vec, INDEX_op_add_vec, 0
601 };
602 static const GVecGen3 op[4] = {
603 {
604 .fniv = gen_vaddwod_s,
605 .fno = gen_helper_vaddwod_h_b,
606 .opt_opc = vecop_list,
607 .vece = MO_16
608 },
609 {
610 .fni4 = gen_vaddwod_w_h,
611 .fniv = gen_vaddwod_s,
612 .fno = gen_helper_vaddwod_w_h,
613 .opt_opc = vecop_list,
614 .vece = MO_32
615 },
616 {
617 .fni8 = gen_vaddwod_d_w,
618 .fniv = gen_vaddwod_s,
619 .fno = gen_helper_vaddwod_d_w,
620 .opt_opc = vecop_list,
621 .vece = MO_64
622 },
623 {
624 .fno = gen_helper_vaddwod_q_d,
625 .vece = MO_128
626 },
627 };
628
629 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
630 }
631
632 TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
633 TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
634 TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
635 TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
636 TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
637 TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
638 TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
639 TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
640
641
642 static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
643 {
644 TCGv_vec t1, t2;
645
646 int halfbits = 4 << vece;
647
648 t1 = tcg_temp_new_vec_matching(a);
649 t2 = tcg_temp_new_vec_matching(b);
650
651 /* Sign-extend the even elements from a */
652 tcg_gen_shli_vec(vece, t1, a, halfbits);
653 tcg_gen_sari_vec(vece, t1, t1, halfbits);
654
655 /* Sign-extend the even elements from b */
656 tcg_gen_shli_vec(vece, t2, b, halfbits);
657 tcg_gen_sari_vec(vece, t2, t2, halfbits);
658
659 tcg_gen_sub_vec(vece, t, t1, t2);
660 }
661
662 static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
663 {
664 TCGv_i32 t1, t2;
665
666 t1 = tcg_temp_new_i32();
667 t2 = tcg_temp_new_i32();
668 tcg_gen_ext16s_i32(t1, a);
669 tcg_gen_ext16s_i32(t2, b);
670 tcg_gen_sub_i32(t, t1, t2);
671 }
672
673 static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
674 {
675 TCGv_i64 t1, t2;
676
677 t1 = tcg_temp_new_i64();
678 t2 = tcg_temp_new_i64();
679 tcg_gen_ext32s_i64(t1, a);
680 tcg_gen_ext32s_i64(t2, b);
681 tcg_gen_sub_i64(t, t1, t2);
682 }
683
684 static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
685 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
686 {
687 static const TCGOpcode vecop_list[] = {
688 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
689 };
690 static const GVecGen3 op[4] = {
691 {
692 .fniv = gen_vsubwev_s,
693 .fno = gen_helper_vsubwev_h_b,
694 .opt_opc = vecop_list,
695 .vece = MO_16
696 },
697 {
698 .fni4 = gen_vsubwev_w_h,
699 .fniv = gen_vsubwev_s,
700 .fno = gen_helper_vsubwev_w_h,
701 .opt_opc = vecop_list,
702 .vece = MO_32
703 },
704 {
705 .fni8 = gen_vsubwev_d_w,
706 .fniv = gen_vsubwev_s,
707 .fno = gen_helper_vsubwev_d_w,
708 .opt_opc = vecop_list,
709 .vece = MO_64
710 },
711 {
712 .fno = gen_helper_vsubwev_q_d,
713 .vece = MO_128
714 },
715 };
716
717 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
718 }
719
720 TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
721 TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
722 TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
723 TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
724 TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
725 TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
726 TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
727 TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
728
729 static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
730 {
731 TCGv_vec t1, t2;
732
733 int halfbits = 4 << vece;
734
735 t1 = tcg_temp_new_vec_matching(a);
736 t2 = tcg_temp_new_vec_matching(b);
737
738 /* Sign-extend the odd elements for vector */
739 tcg_gen_sari_vec(vece, t1, a, halfbits);
740 tcg_gen_sari_vec(vece, t2, b, halfbits);
741
742 tcg_gen_sub_vec(vece, t, t1, t2);
743 }
744
745 static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
746 {
747 TCGv_i32 t1, t2;
748
749 t1 = tcg_temp_new_i32();
750 t2 = tcg_temp_new_i32();
751 tcg_gen_sari_i32(t1, a, 16);
752 tcg_gen_sari_i32(t2, b, 16);
753 tcg_gen_sub_i32(t, t1, t2);
754 }
755
756 static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
757 {
758 TCGv_i64 t1, t2;
759
760 t1 = tcg_temp_new_i64();
761 t2 = tcg_temp_new_i64();
762 tcg_gen_sari_i64(t1, a, 32);
763 tcg_gen_sari_i64(t2, b, 32);
764 tcg_gen_sub_i64(t, t1, t2);
765 }
766
767 static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
768 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
769 {
770 static const TCGOpcode vecop_list[] = {
771 INDEX_op_sari_vec, INDEX_op_sub_vec, 0
772 };
773 static const GVecGen3 op[4] = {
774 {
775 .fniv = gen_vsubwod_s,
776 .fno = gen_helper_vsubwod_h_b,
777 .opt_opc = vecop_list,
778 .vece = MO_16
779 },
780 {
781 .fni4 = gen_vsubwod_w_h,
782 .fniv = gen_vsubwod_s,
783 .fno = gen_helper_vsubwod_w_h,
784 .opt_opc = vecop_list,
785 .vece = MO_32
786 },
787 {
788 .fni8 = gen_vsubwod_d_w,
789 .fniv = gen_vsubwod_s,
790 .fno = gen_helper_vsubwod_d_w,
791 .opt_opc = vecop_list,
792 .vece = MO_64
793 },
794 {
795 .fno = gen_helper_vsubwod_q_d,
796 .vece = MO_128
797 },
798 };
799
800 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
801 }
802
803 TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
804 TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
805 TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
806 TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
807 TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
808 TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
809 TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
810 TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
811
812 static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
813 {
814 TCGv_vec t1, t2, t3;
815
816 t1 = tcg_temp_new_vec_matching(a);
817 t2 = tcg_temp_new_vec_matching(b);
818 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
819 tcg_gen_and_vec(vece, t1, a, t3);
820 tcg_gen_and_vec(vece, t2, b, t3);
821 tcg_gen_add_vec(vece, t, t1, t2);
822 }
823
824 static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
825 {
826 TCGv_i32 t1, t2;
827
828 t1 = tcg_temp_new_i32();
829 t2 = tcg_temp_new_i32();
830 tcg_gen_ext16u_i32(t1, a);
831 tcg_gen_ext16u_i32(t2, b);
832 tcg_gen_add_i32(t, t1, t2);
833 }
834
835 static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
836 {
837 TCGv_i64 t1, t2;
838
839 t1 = tcg_temp_new_i64();
840 t2 = tcg_temp_new_i64();
841 tcg_gen_ext32u_i64(t1, a);
842 tcg_gen_ext32u_i64(t2, b);
843 tcg_gen_add_i64(t, t1, t2);
844 }
845
846 static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
847 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
848 {
849 static const TCGOpcode vecop_list[] = {
850 INDEX_op_add_vec, 0
851 };
852 static const GVecGen3 op[4] = {
853 {
854 .fniv = gen_vaddwev_u,
855 .fno = gen_helper_vaddwev_h_bu,
856 .opt_opc = vecop_list,
857 .vece = MO_16
858 },
859 {
860 .fni4 = gen_vaddwev_w_hu,
861 .fniv = gen_vaddwev_u,
862 .fno = gen_helper_vaddwev_w_hu,
863 .opt_opc = vecop_list,
864 .vece = MO_32
865 },
866 {
867 .fni8 = gen_vaddwev_d_wu,
868 .fniv = gen_vaddwev_u,
869 .fno = gen_helper_vaddwev_d_wu,
870 .opt_opc = vecop_list,
871 .vece = MO_64
872 },
873 {
874 .fno = gen_helper_vaddwev_q_du,
875 .vece = MO_128
876 },
877 };
878
879 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
880 }
881
882 TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
883 TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
884 TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
885 TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
886 TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
887 TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
888 TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
889 TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
890
891 static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
892 {
893 TCGv_vec t1, t2;
894
895 int halfbits = 4 << vece;
896
897 t1 = tcg_temp_new_vec_matching(a);
898 t2 = tcg_temp_new_vec_matching(b);
899
900 /* Zero-extend the odd elements for vector */
901 tcg_gen_shri_vec(vece, t1, a, halfbits);
902 tcg_gen_shri_vec(vece, t2, b, halfbits);
903
904 tcg_gen_add_vec(vece, t, t1, t2);
905 }
906
907 static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
908 {
909 TCGv_i32 t1, t2;
910
911 t1 = tcg_temp_new_i32();
912 t2 = tcg_temp_new_i32();
913 tcg_gen_shri_i32(t1, a, 16);
914 tcg_gen_shri_i32(t2, b, 16);
915 tcg_gen_add_i32(t, t1, t2);
916 }
917
918 static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
919 {
920 TCGv_i64 t1, t2;
921
922 t1 = tcg_temp_new_i64();
923 t2 = tcg_temp_new_i64();
924 tcg_gen_shri_i64(t1, a, 32);
925 tcg_gen_shri_i64(t2, b, 32);
926 tcg_gen_add_i64(t, t1, t2);
927 }
928
929 static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
930 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
931 {
932 static const TCGOpcode vecop_list[] = {
933 INDEX_op_shri_vec, INDEX_op_add_vec, 0
934 };
935 static const GVecGen3 op[4] = {
936 {
937 .fniv = gen_vaddwod_u,
938 .fno = gen_helper_vaddwod_h_bu,
939 .opt_opc = vecop_list,
940 .vece = MO_16
941 },
942 {
943 .fni4 = gen_vaddwod_w_hu,
944 .fniv = gen_vaddwod_u,
945 .fno = gen_helper_vaddwod_w_hu,
946 .opt_opc = vecop_list,
947 .vece = MO_32
948 },
949 {
950 .fni8 = gen_vaddwod_d_wu,
951 .fniv = gen_vaddwod_u,
952 .fno = gen_helper_vaddwod_d_wu,
953 .opt_opc = vecop_list,
954 .vece = MO_64
955 },
956 {
957 .fno = gen_helper_vaddwod_q_du,
958 .vece = MO_128
959 },
960 };
961
962 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
963 }
964
965 TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
966 TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
967 TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
968 TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
969 TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
970 TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
971 TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
972 TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
973
974 static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
975 {
976 TCGv_vec t1, t2, t3;
977
978 t1 = tcg_temp_new_vec_matching(a);
979 t2 = tcg_temp_new_vec_matching(b);
980 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
981 tcg_gen_and_vec(vece, t1, a, t3);
982 tcg_gen_and_vec(vece, t2, b, t3);
983 tcg_gen_sub_vec(vece, t, t1, t2);
984 }
985
986 static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
987 {
988 TCGv_i32 t1, t2;
989
990 t1 = tcg_temp_new_i32();
991 t2 = tcg_temp_new_i32();
992 tcg_gen_ext16u_i32(t1, a);
993 tcg_gen_ext16u_i32(t2, b);
994 tcg_gen_sub_i32(t, t1, t2);
995 }
996
997 static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
998 {
999 TCGv_i64 t1, t2;
1000
1001 t1 = tcg_temp_new_i64();
1002 t2 = tcg_temp_new_i64();
1003 tcg_gen_ext32u_i64(t1, a);
1004 tcg_gen_ext32u_i64(t2, b);
1005 tcg_gen_sub_i64(t, t1, t2);
1006 }
1007
1008 static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1009 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1010 {
1011 static const TCGOpcode vecop_list[] = {
1012 INDEX_op_sub_vec, 0
1013 };
1014 static const GVecGen3 op[4] = {
1015 {
1016 .fniv = gen_vsubwev_u,
1017 .fno = gen_helper_vsubwev_h_bu,
1018 .opt_opc = vecop_list,
1019 .vece = MO_16
1020 },
1021 {
1022 .fni4 = gen_vsubwev_w_hu,
1023 .fniv = gen_vsubwev_u,
1024 .fno = gen_helper_vsubwev_w_hu,
1025 .opt_opc = vecop_list,
1026 .vece = MO_32
1027 },
1028 {
1029 .fni8 = gen_vsubwev_d_wu,
1030 .fniv = gen_vsubwev_u,
1031 .fno = gen_helper_vsubwev_d_wu,
1032 .opt_opc = vecop_list,
1033 .vece = MO_64
1034 },
1035 {
1036 .fno = gen_helper_vsubwev_q_du,
1037 .vece = MO_128
1038 },
1039 };
1040
1041 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1042 }
1043
1044 TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
1045 TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
1046 TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
1047 TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
1048 TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
1049 TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
1050 TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
1051 TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
1052
1053 static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1054 {
1055 TCGv_vec t1, t2;
1056
1057 int halfbits = 4 << vece;
1058
1059 t1 = tcg_temp_new_vec_matching(a);
1060 t2 = tcg_temp_new_vec_matching(b);
1061
1062 /* Zero-extend the odd elements for vector */
1063 tcg_gen_shri_vec(vece, t1, a, halfbits);
1064 tcg_gen_shri_vec(vece, t2, b, halfbits);
1065
1066 tcg_gen_sub_vec(vece, t, t1, t2);
1067 }
1068
1069 static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1070 {
1071 TCGv_i32 t1, t2;
1072
1073 t1 = tcg_temp_new_i32();
1074 t2 = tcg_temp_new_i32();
1075 tcg_gen_shri_i32(t1, a, 16);
1076 tcg_gen_shri_i32(t2, b, 16);
1077 tcg_gen_sub_i32(t, t1, t2);
1078 }
1079
1080 static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1081 {
1082 TCGv_i64 t1, t2;
1083
1084 t1 = tcg_temp_new_i64();
1085 t2 = tcg_temp_new_i64();
1086 tcg_gen_shri_i64(t1, a, 32);
1087 tcg_gen_shri_i64(t2, b, 32);
1088 tcg_gen_sub_i64(t, t1, t2);
1089 }
1090
1091 static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1092 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1093 {
1094 static const TCGOpcode vecop_list[] = {
1095 INDEX_op_shri_vec, INDEX_op_sub_vec, 0
1096 };
1097 static const GVecGen3 op[4] = {
1098 {
1099 .fniv = gen_vsubwod_u,
1100 .fno = gen_helper_vsubwod_h_bu,
1101 .opt_opc = vecop_list,
1102 .vece = MO_16
1103 },
1104 {
1105 .fni4 = gen_vsubwod_w_hu,
1106 .fniv = gen_vsubwod_u,
1107 .fno = gen_helper_vsubwod_w_hu,
1108 .opt_opc = vecop_list,
1109 .vece = MO_32
1110 },
1111 {
1112 .fni8 = gen_vsubwod_d_wu,
1113 .fniv = gen_vsubwod_u,
1114 .fno = gen_helper_vsubwod_d_wu,
1115 .opt_opc = vecop_list,
1116 .vece = MO_64
1117 },
1118 {
1119 .fno = gen_helper_vsubwod_q_du,
1120 .vece = MO_128
1121 },
1122 };
1123
1124 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1125 }
1126
1127 TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
1128 TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
1129 TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
1130 TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
1131 TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
1132 TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
1133 TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
1134 TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
1135
1136 static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1137 {
1138 TCGv_vec t1, t2, t3;
1139
1140 int halfbits = 4 << vece;
1141
1142 t1 = tcg_temp_new_vec_matching(a);
1143 t2 = tcg_temp_new_vec_matching(b);
1144 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
1145
1146 /* Zero-extend the even elements from a */
1147 tcg_gen_and_vec(vece, t1, a, t3);
1148
1149 /* Sign-extend the even elements from b */
1150 tcg_gen_shli_vec(vece, t2, b, halfbits);
1151 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1152
1153 tcg_gen_add_vec(vece, t, t1, t2);
1154 }
1155
1156 static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1157 {
1158 TCGv_i32 t1, t2;
1159
1160 t1 = tcg_temp_new_i32();
1161 t2 = tcg_temp_new_i32();
1162 tcg_gen_ext16u_i32(t1, a);
1163 tcg_gen_ext16s_i32(t2, b);
1164 tcg_gen_add_i32(t, t1, t2);
1165 }
1166
1167 static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1168 {
1169 TCGv_i64 t1, t2;
1170
1171 t1 = tcg_temp_new_i64();
1172 t2 = tcg_temp_new_i64();
1173 tcg_gen_ext32u_i64(t1, a);
1174 tcg_gen_ext32s_i64(t2, b);
1175 tcg_gen_add_i64(t, t1, t2);
1176 }
1177
1178 static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1179 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1180 {
1181 static const TCGOpcode vecop_list[] = {
1182 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1183 };
1184 static const GVecGen3 op[4] = {
1185 {
1186 .fniv = gen_vaddwev_u_s,
1187 .fno = gen_helper_vaddwev_h_bu_b,
1188 .opt_opc = vecop_list,
1189 .vece = MO_16
1190 },
1191 {
1192 .fni4 = gen_vaddwev_w_hu_h,
1193 .fniv = gen_vaddwev_u_s,
1194 .fno = gen_helper_vaddwev_w_hu_h,
1195 .opt_opc = vecop_list,
1196 .vece = MO_32
1197 },
1198 {
1199 .fni8 = gen_vaddwev_d_wu_w,
1200 .fniv = gen_vaddwev_u_s,
1201 .fno = gen_helper_vaddwev_d_wu_w,
1202 .opt_opc = vecop_list,
1203 .vece = MO_64
1204 },
1205 {
1206 .fno = gen_helper_vaddwev_q_du_d,
1207 .vece = MO_128
1208 },
1209 };
1210
1211 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1212 }
1213
1214 TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
1215 TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
1216 TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
1217 TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
1218 TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
1219 TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
1220 TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
1221 TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
1222
1223 static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1224 {
1225 TCGv_vec t1, t2;
1226
1227 int halfbits = 4 << vece;
1228
1229 t1 = tcg_temp_new_vec_matching(a);
1230 t2 = tcg_temp_new_vec_matching(b);
1231
1232 /* Zero-extend the odd elements from a */
1233 tcg_gen_shri_vec(vece, t1, a, halfbits);
1234 /* Sign-extend the odd elements from b */
1235 tcg_gen_sari_vec(vece, t2, b, halfbits);
1236
1237 tcg_gen_add_vec(vece, t, t1, t2);
1238 }
1239
1240 static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1241 {
1242 TCGv_i32 t1, t2;
1243
1244 t1 = tcg_temp_new_i32();
1245 t2 = tcg_temp_new_i32();
1246 tcg_gen_shri_i32(t1, a, 16);
1247 tcg_gen_sari_i32(t2, b, 16);
1248 tcg_gen_add_i32(t, t1, t2);
1249 }
1250
1251 static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1252 {
1253 TCGv_i64 t1, t2;
1254
1255 t1 = tcg_temp_new_i64();
1256 t2 = tcg_temp_new_i64();
1257 tcg_gen_shri_i64(t1, a, 32);
1258 tcg_gen_sari_i64(t2, b, 32);
1259 tcg_gen_add_i64(t, t1, t2);
1260 }
1261
1262 static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1263 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1264 {
1265 static const TCGOpcode vecop_list[] = {
1266 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1267 };
1268 static const GVecGen3 op[4] = {
1269 {
1270 .fniv = gen_vaddwod_u_s,
1271 .fno = gen_helper_vaddwod_h_bu_b,
1272 .opt_opc = vecop_list,
1273 .vece = MO_16
1274 },
1275 {
1276 .fni4 = gen_vaddwod_w_hu_h,
1277 .fniv = gen_vaddwod_u_s,
1278 .fno = gen_helper_vaddwod_w_hu_h,
1279 .opt_opc = vecop_list,
1280 .vece = MO_32
1281 },
1282 {
1283 .fni8 = gen_vaddwod_d_wu_w,
1284 .fniv = gen_vaddwod_u_s,
1285 .fno = gen_helper_vaddwod_d_wu_w,
1286 .opt_opc = vecop_list,
1287 .vece = MO_64
1288 },
1289 {
1290 .fno = gen_helper_vaddwod_q_du_d,
1291 .vece = MO_128
1292 },
1293 };
1294
1295 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1296 }
1297
1298 TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
1299 TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
1300 TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
1301 TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
1302 TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
1303 TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
1304 TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
1305 TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
1306
1307 static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
1308 void (*gen_shr_vec)(unsigned, TCGv_vec,
1309 TCGv_vec, int64_t),
1310 void (*gen_round_vec)(unsigned, TCGv_vec,
1311 TCGv_vec, TCGv_vec))
1312 {
1313 TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1314 gen_round_vec(vece, tmp, a, b);
1315 tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1316 gen_shr_vec(vece, a, a, 1);
1317 gen_shr_vec(vece, b, b, 1);
1318 tcg_gen_add_vec(vece, t, a, b);
1319 tcg_gen_add_vec(vece, t, t, tmp);
1320 }
1321
1322 static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1323 {
1324 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1325 }
1326
1327 static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1328 {
1329 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1330 }
1331
1332 static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1333 {
1334 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1335 }
1336
1337 static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1338 {
1339 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1340 }
1341
1342 static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1343 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1344 {
1345 static const TCGOpcode vecop_list[] = {
1346 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1347 };
1348 static const GVecGen3 op[4] = {
1349 {
1350 .fniv = gen_vavg_s,
1351 .fno = gen_helper_vavg_b,
1352 .opt_opc = vecop_list,
1353 .vece = MO_8
1354 },
1355 {
1356 .fniv = gen_vavg_s,
1357 .fno = gen_helper_vavg_h,
1358 .opt_opc = vecop_list,
1359 .vece = MO_16
1360 },
1361 {
1362 .fniv = gen_vavg_s,
1363 .fno = gen_helper_vavg_w,
1364 .opt_opc = vecop_list,
1365 .vece = MO_32
1366 },
1367 {
1368 .fniv = gen_vavg_s,
1369 .fno = gen_helper_vavg_d,
1370 .opt_opc = vecop_list,
1371 .vece = MO_64
1372 },
1373 };
1374
1375 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1376 }
1377
1378 static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1379 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1380 {
1381 static const TCGOpcode vecop_list[] = {
1382 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1383 };
1384 static const GVecGen3 op[4] = {
1385 {
1386 .fniv = gen_vavg_u,
1387 .fno = gen_helper_vavg_bu,
1388 .opt_opc = vecop_list,
1389 .vece = MO_8
1390 },
1391 {
1392 .fniv = gen_vavg_u,
1393 .fno = gen_helper_vavg_hu,
1394 .opt_opc = vecop_list,
1395 .vece = MO_16
1396 },
1397 {
1398 .fniv = gen_vavg_u,
1399 .fno = gen_helper_vavg_wu,
1400 .opt_opc = vecop_list,
1401 .vece = MO_32
1402 },
1403 {
1404 .fniv = gen_vavg_u,
1405 .fno = gen_helper_vavg_du,
1406 .opt_opc = vecop_list,
1407 .vece = MO_64
1408 },
1409 };
1410
1411 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1412 }
1413
1414 TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
1415 TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
1416 TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
1417 TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
1418 TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
1419 TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
1420 TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
1421 TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
1422 TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
1423 TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
1424 TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
1425 TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
1426 TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
1427 TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
1428 TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
1429 TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
1430
1431 static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1432 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1433 {
1434 static const TCGOpcode vecop_list[] = {
1435 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1436 };
1437 static const GVecGen3 op[4] = {
1438 {
1439 .fniv = gen_vavgr_s,
1440 .fno = gen_helper_vavgr_b,
1441 .opt_opc = vecop_list,
1442 .vece = MO_8
1443 },
1444 {
1445 .fniv = gen_vavgr_s,
1446 .fno = gen_helper_vavgr_h,
1447 .opt_opc = vecop_list,
1448 .vece = MO_16
1449 },
1450 {
1451 .fniv = gen_vavgr_s,
1452 .fno = gen_helper_vavgr_w,
1453 .opt_opc = vecop_list,
1454 .vece = MO_32
1455 },
1456 {
1457 .fniv = gen_vavgr_s,
1458 .fno = gen_helper_vavgr_d,
1459 .opt_opc = vecop_list,
1460 .vece = MO_64
1461 },
1462 };
1463
1464 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1465 }
1466
1467 static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1468 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1469 {
1470 static const TCGOpcode vecop_list[] = {
1471 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1472 };
1473 static const GVecGen3 op[4] = {
1474 {
1475 .fniv = gen_vavgr_u,
1476 .fno = gen_helper_vavgr_bu,
1477 .opt_opc = vecop_list,
1478 .vece = MO_8
1479 },
1480 {
1481 .fniv = gen_vavgr_u,
1482 .fno = gen_helper_vavgr_hu,
1483 .opt_opc = vecop_list,
1484 .vece = MO_16
1485 },
1486 {
1487 .fniv = gen_vavgr_u,
1488 .fno = gen_helper_vavgr_wu,
1489 .opt_opc = vecop_list,
1490 .vece = MO_32
1491 },
1492 {
1493 .fniv = gen_vavgr_u,
1494 .fno = gen_helper_vavgr_du,
1495 .opt_opc = vecop_list,
1496 .vece = MO_64
1497 },
1498 };
1499
1500 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1501 }
1502
1503 TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
1504 TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
1505 TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
1506 TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
1507 TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
1508 TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
1509 TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
1510 TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
1511 TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
1512 TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
1513 TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
1514 TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
1515 TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
1516 TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
1517 TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
1518 TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
1519
1520 static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1521 {
1522 tcg_gen_smax_vec(vece, t, a, b);
1523 tcg_gen_smin_vec(vece, a, a, b);
1524 tcg_gen_sub_vec(vece, t, t, a);
1525 }
1526
1527 static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1528 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1529 {
1530 static const TCGOpcode vecop_list[] = {
1531 INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1532 };
1533 static const GVecGen3 op[4] = {
1534 {
1535 .fniv = gen_vabsd_s,
1536 .fno = gen_helper_vabsd_b,
1537 .opt_opc = vecop_list,
1538 .vece = MO_8
1539 },
1540 {
1541 .fniv = gen_vabsd_s,
1542 .fno = gen_helper_vabsd_h,
1543 .opt_opc = vecop_list,
1544 .vece = MO_16
1545 },
1546 {
1547 .fniv = gen_vabsd_s,
1548 .fno = gen_helper_vabsd_w,
1549 .opt_opc = vecop_list,
1550 .vece = MO_32
1551 },
1552 {
1553 .fniv = gen_vabsd_s,
1554 .fno = gen_helper_vabsd_d,
1555 .opt_opc = vecop_list,
1556 .vece = MO_64
1557 },
1558 };
1559
1560 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1561 }
1562
1563 static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1564 {
1565 tcg_gen_umax_vec(vece, t, a, b);
1566 tcg_gen_umin_vec(vece, a, a, b);
1567 tcg_gen_sub_vec(vece, t, t, a);
1568 }
1569
1570 static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1571 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1572 {
1573 static const TCGOpcode vecop_list[] = {
1574 INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1575 };
1576 static const GVecGen3 op[4] = {
1577 {
1578 .fniv = gen_vabsd_u,
1579 .fno = gen_helper_vabsd_bu,
1580 .opt_opc = vecop_list,
1581 .vece = MO_8
1582 },
1583 {
1584 .fniv = gen_vabsd_u,
1585 .fno = gen_helper_vabsd_hu,
1586 .opt_opc = vecop_list,
1587 .vece = MO_16
1588 },
1589 {
1590 .fniv = gen_vabsd_u,
1591 .fno = gen_helper_vabsd_wu,
1592 .opt_opc = vecop_list,
1593 .vece = MO_32
1594 },
1595 {
1596 .fniv = gen_vabsd_u,
1597 .fno = gen_helper_vabsd_du,
1598 .opt_opc = vecop_list,
1599 .vece = MO_64
1600 },
1601 };
1602
1603 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1604 }
1605
1606 TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
1607 TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
1608 TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
1609 TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
1610 TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
1611 TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
1612 TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
1613 TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
1614 TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
1615 TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
1616 TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
1617 TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
1618 TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
1619 TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
1620 TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
1621 TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
1622
1623 static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1624 {
1625 TCGv_vec t1, t2;
1626
1627 t1 = tcg_temp_new_vec_matching(a);
1628 t2 = tcg_temp_new_vec_matching(b);
1629
1630 tcg_gen_abs_vec(vece, t1, a);
1631 tcg_gen_abs_vec(vece, t2, b);
1632 tcg_gen_add_vec(vece, t, t1, t2);
1633 }
1634
1635 static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1636 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1637 {
1638 static const TCGOpcode vecop_list[] = {
1639 INDEX_op_abs_vec, INDEX_op_add_vec, 0
1640 };
1641 static const GVecGen3 op[4] = {
1642 {
1643 .fniv = gen_vadda,
1644 .fno = gen_helper_vadda_b,
1645 .opt_opc = vecop_list,
1646 .vece = MO_8
1647 },
1648 {
1649 .fniv = gen_vadda,
1650 .fno = gen_helper_vadda_h,
1651 .opt_opc = vecop_list,
1652 .vece = MO_16
1653 },
1654 {
1655 .fniv = gen_vadda,
1656 .fno = gen_helper_vadda_w,
1657 .opt_opc = vecop_list,
1658 .vece = MO_32
1659 },
1660 {
1661 .fniv = gen_vadda,
1662 .fno = gen_helper_vadda_d,
1663 .opt_opc = vecop_list,
1664 .vece = MO_64
1665 },
1666 };
1667
1668 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1669 }
1670
1671 TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
1672 TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
1673 TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
1674 TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
1675
1676 TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1677 TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1678 TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1679 TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1680 TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1681 TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1682 TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1683 TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1684
1685 TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1686 TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1687 TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1688 TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1689 TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1690 TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1691 TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1692 TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1693
1694 static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1695 {
1696 tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1697 }
1698
1699 static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1700 {
1701 tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1702 }
1703
1704 static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1705 {
1706 tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1707 }
1708
1709 static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1710 {
1711 tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1712 }
1713
1714 static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1715 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1716 {
1717 static const TCGOpcode vecop_list[] = {
1718 INDEX_op_smin_vec, 0
1719 };
1720 static const GVecGen2i op[4] = {
1721 {
1722 .fniv = gen_vmini_s,
1723 .fnoi = gen_helper_vmini_b,
1724 .opt_opc = vecop_list,
1725 .vece = MO_8
1726 },
1727 {
1728 .fniv = gen_vmini_s,
1729 .fnoi = gen_helper_vmini_h,
1730 .opt_opc = vecop_list,
1731 .vece = MO_16
1732 },
1733 {
1734 .fniv = gen_vmini_s,
1735 .fnoi = gen_helper_vmini_w,
1736 .opt_opc = vecop_list,
1737 .vece = MO_32
1738 },
1739 {
1740 .fniv = gen_vmini_s,
1741 .fnoi = gen_helper_vmini_d,
1742 .opt_opc = vecop_list,
1743 .vece = MO_64
1744 },
1745 };
1746
1747 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1748 }
1749
1750 static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1751 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1752 {
1753 static const TCGOpcode vecop_list[] = {
1754 INDEX_op_umin_vec, 0
1755 };
1756 static const GVecGen2i op[4] = {
1757 {
1758 .fniv = gen_vmini_u,
1759 .fnoi = gen_helper_vmini_bu,
1760 .opt_opc = vecop_list,
1761 .vece = MO_8
1762 },
1763 {
1764 .fniv = gen_vmini_u,
1765 .fnoi = gen_helper_vmini_hu,
1766 .opt_opc = vecop_list,
1767 .vece = MO_16
1768 },
1769 {
1770 .fniv = gen_vmini_u,
1771 .fnoi = gen_helper_vmini_wu,
1772 .opt_opc = vecop_list,
1773 .vece = MO_32
1774 },
1775 {
1776 .fniv = gen_vmini_u,
1777 .fnoi = gen_helper_vmini_du,
1778 .opt_opc = vecop_list,
1779 .vece = MO_64
1780 },
1781 };
1782
1783 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1784 }
1785
1786 TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
1787 TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
1788 TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
1789 TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
1790 TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
1791 TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
1792 TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
1793 TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
1794
1795 static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1796 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1797 {
1798 static const TCGOpcode vecop_list[] = {
1799 INDEX_op_smax_vec, 0
1800 };
1801 static const GVecGen2i op[4] = {
1802 {
1803 .fniv = gen_vmaxi_s,
1804 .fnoi = gen_helper_vmaxi_b,
1805 .opt_opc = vecop_list,
1806 .vece = MO_8
1807 },
1808 {
1809 .fniv = gen_vmaxi_s,
1810 .fnoi = gen_helper_vmaxi_h,
1811 .opt_opc = vecop_list,
1812 .vece = MO_16
1813 },
1814 {
1815 .fniv = gen_vmaxi_s,
1816 .fnoi = gen_helper_vmaxi_w,
1817 .opt_opc = vecop_list,
1818 .vece = MO_32
1819 },
1820 {
1821 .fniv = gen_vmaxi_s,
1822 .fnoi = gen_helper_vmaxi_d,
1823 .opt_opc = vecop_list,
1824 .vece = MO_64
1825 },
1826 };
1827
1828 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1829 }
1830
1831 static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1832 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1833 {
1834 static const TCGOpcode vecop_list[] = {
1835 INDEX_op_umax_vec, 0
1836 };
1837 static const GVecGen2i op[4] = {
1838 {
1839 .fniv = gen_vmaxi_u,
1840 .fnoi = gen_helper_vmaxi_bu,
1841 .opt_opc = vecop_list,
1842 .vece = MO_8
1843 },
1844 {
1845 .fniv = gen_vmaxi_u,
1846 .fnoi = gen_helper_vmaxi_hu,
1847 .opt_opc = vecop_list,
1848 .vece = MO_16
1849 },
1850 {
1851 .fniv = gen_vmaxi_u,
1852 .fnoi = gen_helper_vmaxi_wu,
1853 .opt_opc = vecop_list,
1854 .vece = MO_32
1855 },
1856 {
1857 .fniv = gen_vmaxi_u,
1858 .fnoi = gen_helper_vmaxi_du,
1859 .opt_opc = vecop_list,
1860 .vece = MO_64
1861 },
1862 };
1863
1864 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1865 }
1866
1867 TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
1868 TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
1869 TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
1870 TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
1871 TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
1872 TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
1873 TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
1874 TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
1875
1876 TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1877 TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1878 TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1879 TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1880
1881 static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1882 {
1883 TCGv_i32 discard = tcg_temp_new_i32();
1884 tcg_gen_muls2_i32(discard, t, a, b);
1885 }
1886
1887 static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1888 {
1889 TCGv_i64 discard = tcg_temp_new_i64();
1890 tcg_gen_muls2_i64(discard, t, a, b);
1891 }
1892
1893 static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1894 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1895 {
1896 static const GVecGen3 op[4] = {
1897 {
1898 .fno = gen_helper_vmuh_b,
1899 .vece = MO_8
1900 },
1901 {
1902 .fno = gen_helper_vmuh_h,
1903 .vece = MO_16
1904 },
1905 {
1906 .fni4 = gen_vmuh_w,
1907 .fno = gen_helper_vmuh_w,
1908 .vece = MO_32
1909 },
1910 {
1911 .fni8 = gen_vmuh_d,
1912 .fno = gen_helper_vmuh_d,
1913 .vece = MO_64
1914 },
1915 };
1916
1917 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1918 }
1919
1920 TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
1921 TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
1922 TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
1923 TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
1924
1925 static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1926 {
1927 TCGv_i32 discard = tcg_temp_new_i32();
1928 tcg_gen_mulu2_i32(discard, t, a, b);
1929 }
1930
1931 static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1932 {
1933 TCGv_i64 discard = tcg_temp_new_i64();
1934 tcg_gen_mulu2_i64(discard, t, a, b);
1935 }
1936
1937 static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1938 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1939 {
1940 static const GVecGen3 op[4] = {
1941 {
1942 .fno = gen_helper_vmuh_bu,
1943 .vece = MO_8
1944 },
1945 {
1946 .fno = gen_helper_vmuh_hu,
1947 .vece = MO_16
1948 },
1949 {
1950 .fni4 = gen_vmuh_wu,
1951 .fno = gen_helper_vmuh_wu,
1952 .vece = MO_32
1953 },
1954 {
1955 .fni8 = gen_vmuh_du,
1956 .fno = gen_helper_vmuh_du,
1957 .vece = MO_64
1958 },
1959 };
1960
1961 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1962 }
1963
1964 TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
1965 TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
1966 TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
1967 TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
1968
1969 static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1970 {
1971 TCGv_vec t1, t2;
1972 int halfbits = 4 << vece;
1973
1974 t1 = tcg_temp_new_vec_matching(a);
1975 t2 = tcg_temp_new_vec_matching(b);
1976 tcg_gen_shli_vec(vece, t1, a, halfbits);
1977 tcg_gen_sari_vec(vece, t1, t1, halfbits);
1978 tcg_gen_shli_vec(vece, t2, b, halfbits);
1979 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1980 tcg_gen_mul_vec(vece, t, t1, t2);
1981 }
1982
1983 static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1984 {
1985 TCGv_i32 t1, t2;
1986
1987 t1 = tcg_temp_new_i32();
1988 t2 = tcg_temp_new_i32();
1989 tcg_gen_ext16s_i32(t1, a);
1990 tcg_gen_ext16s_i32(t2, b);
1991 tcg_gen_mul_i32(t, t1, t2);
1992 }
1993
1994 static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1995 {
1996 TCGv_i64 t1, t2;
1997
1998 t1 = tcg_temp_new_i64();
1999 t2 = tcg_temp_new_i64();
2000 tcg_gen_ext32s_i64(t1, a);
2001 tcg_gen_ext32s_i64(t2, b);
2002 tcg_gen_mul_i64(t, t1, t2);
2003 }
2004
2005 static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2006 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2007 {
2008 static const TCGOpcode vecop_list[] = {
2009 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2010 };
2011 static const GVecGen3 op[3] = {
2012 {
2013 .fniv = gen_vmulwev_s,
2014 .fno = gen_helper_vmulwev_h_b,
2015 .opt_opc = vecop_list,
2016 .vece = MO_16
2017 },
2018 {
2019 .fni4 = gen_vmulwev_w_h,
2020 .fniv = gen_vmulwev_s,
2021 .fno = gen_helper_vmulwev_w_h,
2022 .opt_opc = vecop_list,
2023 .vece = MO_32
2024 },
2025 {
2026 .fni8 = gen_vmulwev_d_w,
2027 .fniv = gen_vmulwev_s,
2028 .fno = gen_helper_vmulwev_d_w,
2029 .opt_opc = vecop_list,
2030 .vece = MO_64
2031 },
2032 };
2033
2034 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2035 }
2036
2037 TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
2038 TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
2039 TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
2040
2041 static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
2042 TCGv_i64 arg1, TCGv_i64 arg2)
2043 {
2044 tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
2045 }
2046
2047 #define VMUL_Q(NAME, FN, idx1, idx2) \
2048 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
2049 { \
2050 TCGv_i64 rh, rl, arg1, arg2; \
2051 \
2052 if (!avail_LSX(ctx)) { \
2053 return false; \
2054 } \
2055 \
2056 rh = tcg_temp_new_i64(); \
2057 rl = tcg_temp_new_i64(); \
2058 arg1 = tcg_temp_new_i64(); \
2059 arg2 = tcg_temp_new_i64(); \
2060 \
2061 get_vreg64(arg1, a->vj, idx1); \
2062 get_vreg64(arg2, a->vk, idx2); \
2063 \
2064 tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \
2065 \
2066 set_vreg64(rh, a->vd, 1); \
2067 set_vreg64(rl, a->vd, 0); \
2068 \
2069 return true; \
2070 }
2071
2072 VMUL_Q(vmulwev_q_d, muls2, 0, 0)
2073 VMUL_Q(vmulwod_q_d, muls2, 1, 1)
2074 VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
2075 VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
2076 VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
2077 VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
2078
2079 static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2080 {
2081 TCGv_vec t1, t2;
2082 int halfbits = 4 << vece;
2083
2084 t1 = tcg_temp_new_vec_matching(a);
2085 t2 = tcg_temp_new_vec_matching(b);
2086 tcg_gen_sari_vec(vece, t1, a, halfbits);
2087 tcg_gen_sari_vec(vece, t2, b, halfbits);
2088 tcg_gen_mul_vec(vece, t, t1, t2);
2089 }
2090
2091 static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2092 {
2093 TCGv_i32 t1, t2;
2094
2095 t1 = tcg_temp_new_i32();
2096 t2 = tcg_temp_new_i32();
2097 tcg_gen_sari_i32(t1, a, 16);
2098 tcg_gen_sari_i32(t2, b, 16);
2099 tcg_gen_mul_i32(t, t1, t2);
2100 }
2101
2102 static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2103 {
2104 TCGv_i64 t1, t2;
2105
2106 t1 = tcg_temp_new_i64();
2107 t2 = tcg_temp_new_i64();
2108 tcg_gen_sari_i64(t1, a, 32);
2109 tcg_gen_sari_i64(t2, b, 32);
2110 tcg_gen_mul_i64(t, t1, t2);
2111 }
2112
2113 static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2114 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2115 {
2116 static const TCGOpcode vecop_list[] = {
2117 INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2118 };
2119 static const GVecGen3 op[3] = {
2120 {
2121 .fniv = gen_vmulwod_s,
2122 .fno = gen_helper_vmulwod_h_b,
2123 .opt_opc = vecop_list,
2124 .vece = MO_16
2125 },
2126 {
2127 .fni4 = gen_vmulwod_w_h,
2128 .fniv = gen_vmulwod_s,
2129 .fno = gen_helper_vmulwod_w_h,
2130 .opt_opc = vecop_list,
2131 .vece = MO_32
2132 },
2133 {
2134 .fni8 = gen_vmulwod_d_w,
2135 .fniv = gen_vmulwod_s,
2136 .fno = gen_helper_vmulwod_d_w,
2137 .opt_opc = vecop_list,
2138 .vece = MO_64
2139 },
2140 };
2141
2142 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2143 }
2144
2145 TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
2146 TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
2147 TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
2148
2149 static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2150 {
2151 TCGv_vec t1, t2, mask;
2152
2153 t1 = tcg_temp_new_vec_matching(a);
2154 t2 = tcg_temp_new_vec_matching(b);
2155 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2156 tcg_gen_and_vec(vece, t1, a, mask);
2157 tcg_gen_and_vec(vece, t2, b, mask);
2158 tcg_gen_mul_vec(vece, t, t1, t2);
2159 }
2160
2161 static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2162 {
2163 TCGv_i32 t1, t2;
2164
2165 t1 = tcg_temp_new_i32();
2166 t2 = tcg_temp_new_i32();
2167 tcg_gen_ext16u_i32(t1, a);
2168 tcg_gen_ext16u_i32(t2, b);
2169 tcg_gen_mul_i32(t, t1, t2);
2170 }
2171
2172 static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2173 {
2174 TCGv_i64 t1, t2;
2175
2176 t1 = tcg_temp_new_i64();
2177 t2 = tcg_temp_new_i64();
2178 tcg_gen_ext32u_i64(t1, a);
2179 tcg_gen_ext32u_i64(t2, b);
2180 tcg_gen_mul_i64(t, t1, t2);
2181 }
2182
2183 static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2184 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2185 {
2186 static const TCGOpcode vecop_list[] = {
2187 INDEX_op_mul_vec, 0
2188 };
2189 static const GVecGen3 op[3] = {
2190 {
2191 .fniv = gen_vmulwev_u,
2192 .fno = gen_helper_vmulwev_h_bu,
2193 .opt_opc = vecop_list,
2194 .vece = MO_16
2195 },
2196 {
2197 .fni4 = gen_vmulwev_w_hu,
2198 .fniv = gen_vmulwev_u,
2199 .fno = gen_helper_vmulwev_w_hu,
2200 .opt_opc = vecop_list,
2201 .vece = MO_32
2202 },
2203 {
2204 .fni8 = gen_vmulwev_d_wu,
2205 .fniv = gen_vmulwev_u,
2206 .fno = gen_helper_vmulwev_d_wu,
2207 .opt_opc = vecop_list,
2208 .vece = MO_64
2209 },
2210 };
2211
2212 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2213 }
2214
2215 TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
2216 TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
2217 TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
2218
2219 static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2220 {
2221 TCGv_vec t1, t2;
2222 int halfbits = 4 << vece;
2223
2224 t1 = tcg_temp_new_vec_matching(a);
2225 t2 = tcg_temp_new_vec_matching(b);
2226 tcg_gen_shri_vec(vece, t1, a, halfbits);
2227 tcg_gen_shri_vec(vece, t2, b, halfbits);
2228 tcg_gen_mul_vec(vece, t, t1, t2);
2229 }
2230
2231 static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2232 {
2233 TCGv_i32 t1, t2;
2234
2235 t1 = tcg_temp_new_i32();
2236 t2 = tcg_temp_new_i32();
2237 tcg_gen_shri_i32(t1, a, 16);
2238 tcg_gen_shri_i32(t2, b, 16);
2239 tcg_gen_mul_i32(t, t1, t2);
2240 }
2241
2242 static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2243 {
2244 TCGv_i64 t1, t2;
2245
2246 t1 = tcg_temp_new_i64();
2247 t2 = tcg_temp_new_i64();
2248 tcg_gen_shri_i64(t1, a, 32);
2249 tcg_gen_shri_i64(t2, b, 32);
2250 tcg_gen_mul_i64(t, t1, t2);
2251 }
2252
2253 static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2254 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2255 {
2256 static const TCGOpcode vecop_list[] = {
2257 INDEX_op_shri_vec, INDEX_op_mul_vec, 0
2258 };
2259 static const GVecGen3 op[3] = {
2260 {
2261 .fniv = gen_vmulwod_u,
2262 .fno = gen_helper_vmulwod_h_bu,
2263 .opt_opc = vecop_list,
2264 .vece = MO_16
2265 },
2266 {
2267 .fni4 = gen_vmulwod_w_hu,
2268 .fniv = gen_vmulwod_u,
2269 .fno = gen_helper_vmulwod_w_hu,
2270 .opt_opc = vecop_list,
2271 .vece = MO_32
2272 },
2273 {
2274 .fni8 = gen_vmulwod_d_wu,
2275 .fniv = gen_vmulwod_u,
2276 .fno = gen_helper_vmulwod_d_wu,
2277 .opt_opc = vecop_list,
2278 .vece = MO_64
2279 },
2280 };
2281
2282 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2283 }
2284
2285 TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
2286 TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
2287 TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
2288
2289 static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2290 {
2291 TCGv_vec t1, t2, mask;
2292 int halfbits = 4 << vece;
2293
2294 t1 = tcg_temp_new_vec_matching(a);
2295 t2 = tcg_temp_new_vec_matching(b);
2296 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2297 tcg_gen_and_vec(vece, t1, a, mask);
2298 tcg_gen_shli_vec(vece, t2, b, halfbits);
2299 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2300 tcg_gen_mul_vec(vece, t, t1, t2);
2301 }
2302
2303 static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2304 {
2305 TCGv_i32 t1, t2;
2306
2307 t1 = tcg_temp_new_i32();
2308 t2 = tcg_temp_new_i32();
2309 tcg_gen_ext16u_i32(t1, a);
2310 tcg_gen_ext16s_i32(t2, b);
2311 tcg_gen_mul_i32(t, t1, t2);
2312 }
2313
2314 static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2315 {
2316 TCGv_i64 t1, t2;
2317
2318 t1 = tcg_temp_new_i64();
2319 t2 = tcg_temp_new_i64();
2320 tcg_gen_ext32u_i64(t1, a);
2321 tcg_gen_ext32s_i64(t2, b);
2322 tcg_gen_mul_i64(t, t1, t2);
2323 }
2324
2325 static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2326 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2327 {
2328 static const TCGOpcode vecop_list[] = {
2329 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2330 };
2331 static const GVecGen3 op[3] = {
2332 {
2333 .fniv = gen_vmulwev_u_s,
2334 .fno = gen_helper_vmulwev_h_bu_b,
2335 .opt_opc = vecop_list,
2336 .vece = MO_16
2337 },
2338 {
2339 .fni4 = gen_vmulwev_w_hu_h,
2340 .fniv = gen_vmulwev_u_s,
2341 .fno = gen_helper_vmulwev_w_hu_h,
2342 .opt_opc = vecop_list,
2343 .vece = MO_32
2344 },
2345 {
2346 .fni8 = gen_vmulwev_d_wu_w,
2347 .fniv = gen_vmulwev_u_s,
2348 .fno = gen_helper_vmulwev_d_wu_w,
2349 .opt_opc = vecop_list,
2350 .vece = MO_64
2351 },
2352 };
2353
2354 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2355 }
2356
2357 TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
2358 TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
2359 TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
2360
2361 static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2362 {
2363 TCGv_vec t1, t2;
2364 int halfbits = 4 << vece;
2365
2366 t1 = tcg_temp_new_vec_matching(a);
2367 t2 = tcg_temp_new_vec_matching(b);
2368 tcg_gen_shri_vec(vece, t1, a, halfbits);
2369 tcg_gen_sari_vec(vece, t2, b, halfbits);
2370 tcg_gen_mul_vec(vece, t, t1, t2);
2371 }
2372
2373 static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2374 {
2375 TCGv_i32 t1, t2;
2376
2377 t1 = tcg_temp_new_i32();
2378 t2 = tcg_temp_new_i32();
2379 tcg_gen_shri_i32(t1, a, 16);
2380 tcg_gen_sari_i32(t2, b, 16);
2381 tcg_gen_mul_i32(t, t1, t2);
2382 }
2383 static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2384 {
2385 TCGv_i64 t1, t2;
2386
2387 t1 = tcg_temp_new_i64();
2388 t2 = tcg_temp_new_i64();
2389 tcg_gen_shri_i64(t1, a, 32);
2390 tcg_gen_sari_i64(t2, b, 32);
2391 tcg_gen_mul_i64(t, t1, t2);
2392 }
2393
2394 static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2395 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2396 {
2397 static const TCGOpcode vecop_list[] = {
2398 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2399 };
2400 static const GVecGen3 op[3] = {
2401 {
2402 .fniv = gen_vmulwod_u_s,
2403 .fno = gen_helper_vmulwod_h_bu_b,
2404 .opt_opc = vecop_list,
2405 .vece = MO_16
2406 },
2407 {
2408 .fni4 = gen_vmulwod_w_hu_h,
2409 .fniv = gen_vmulwod_u_s,
2410 .fno = gen_helper_vmulwod_w_hu_h,
2411 .opt_opc = vecop_list,
2412 .vece = MO_32
2413 },
2414 {
2415 .fni8 = gen_vmulwod_d_wu_w,
2416 .fniv = gen_vmulwod_u_s,
2417 .fno = gen_helper_vmulwod_d_wu_w,
2418 .opt_opc = vecop_list,
2419 .vece = MO_64
2420 },
2421 };
2422
2423 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2424 }
2425
2426 TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
2427 TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
2428 TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
2429
2430 static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2431 {
2432 TCGv_vec t1;
2433
2434 t1 = tcg_temp_new_vec_matching(t);
2435 tcg_gen_mul_vec(vece, t1, a, b);
2436 tcg_gen_add_vec(vece, t, t, t1);
2437 }
2438
2439 static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2440 {
2441 TCGv_i32 t1;
2442
2443 t1 = tcg_temp_new_i32();
2444 tcg_gen_mul_i32(t1, a, b);
2445 tcg_gen_add_i32(t, t, t1);
2446 }
2447
2448 static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2449 {
2450 TCGv_i64 t1;
2451
2452 t1 = tcg_temp_new_i64();
2453 tcg_gen_mul_i64(t1, a, b);
2454 tcg_gen_add_i64(t, t, t1);
2455 }
2456
2457 static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2458 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2459 {
2460 static const TCGOpcode vecop_list[] = {
2461 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2462 };
2463 static const GVecGen3 op[4] = {
2464 {
2465 .fniv = gen_vmadd,
2466 .fno = gen_helper_vmadd_b,
2467 .load_dest = true,
2468 .opt_opc = vecop_list,
2469 .vece = MO_8
2470 },
2471 {
2472 .fniv = gen_vmadd,
2473 .fno = gen_helper_vmadd_h,
2474 .load_dest = true,
2475 .opt_opc = vecop_list,
2476 .vece = MO_16
2477 },
2478 {
2479 .fni4 = gen_vmadd_w,
2480 .fniv = gen_vmadd,
2481 .fno = gen_helper_vmadd_w,
2482 .load_dest = true,
2483 .opt_opc = vecop_list,
2484 .vece = MO_32
2485 },
2486 {
2487 .fni8 = gen_vmadd_d,
2488 .fniv = gen_vmadd,
2489 .fno = gen_helper_vmadd_d,
2490 .load_dest = true,
2491 .opt_opc = vecop_list,
2492 .vece = MO_64
2493 },
2494 };
2495
2496 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2497 }
2498
2499 TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
2500 TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
2501 TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
2502 TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
2503
2504 static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2505 {
2506 TCGv_vec t1;
2507
2508 t1 = tcg_temp_new_vec_matching(t);
2509 tcg_gen_mul_vec(vece, t1, a, b);
2510 tcg_gen_sub_vec(vece, t, t, t1);
2511 }
2512
2513 static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2514 {
2515 TCGv_i32 t1;
2516
2517 t1 = tcg_temp_new_i32();
2518 tcg_gen_mul_i32(t1, a, b);
2519 tcg_gen_sub_i32(t, t, t1);
2520 }
2521
2522 static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2523 {
2524 TCGv_i64 t1;
2525
2526 t1 = tcg_temp_new_i64();
2527 tcg_gen_mul_i64(t1, a, b);
2528 tcg_gen_sub_i64(t, t, t1);
2529 }
2530
2531 static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2532 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2533 {
2534 static const TCGOpcode vecop_list[] = {
2535 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2536 };
2537 static const GVecGen3 op[4] = {
2538 {
2539 .fniv = gen_vmsub,
2540 .fno = gen_helper_vmsub_b,
2541 .load_dest = true,
2542 .opt_opc = vecop_list,
2543 .vece = MO_8
2544 },
2545 {
2546 .fniv = gen_vmsub,
2547 .fno = gen_helper_vmsub_h,
2548 .load_dest = true,
2549 .opt_opc = vecop_list,
2550 .vece = MO_16
2551 },
2552 {
2553 .fni4 = gen_vmsub_w,
2554 .fniv = gen_vmsub,
2555 .fno = gen_helper_vmsub_w,
2556 .load_dest = true,
2557 .opt_opc = vecop_list,
2558 .vece = MO_32
2559 },
2560 {
2561 .fni8 = gen_vmsub_d,
2562 .fniv = gen_vmsub,
2563 .fno = gen_helper_vmsub_d,
2564 .load_dest = true,
2565 .opt_opc = vecop_list,
2566 .vece = MO_64
2567 },
2568 };
2569
2570 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2571 }
2572
2573 TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
2574 TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
2575 TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
2576 TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
2577
2578 static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2579 {
2580 TCGv_vec t1, t2, t3;
2581 int halfbits = 4 << vece;
2582
2583 t1 = tcg_temp_new_vec_matching(a);
2584 t2 = tcg_temp_new_vec_matching(b);
2585 t3 = tcg_temp_new_vec_matching(t);
2586 tcg_gen_shli_vec(vece, t1, a, halfbits);
2587 tcg_gen_sari_vec(vece, t1, t1, halfbits);
2588 tcg_gen_shli_vec(vece, t2, b, halfbits);
2589 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2590 tcg_gen_mul_vec(vece, t3, t1, t2);
2591 tcg_gen_add_vec(vece, t, t, t3);
2592 }
2593
2594 static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2595 {
2596 TCGv_i32 t1;
2597
2598 t1 = tcg_temp_new_i32();
2599 gen_vmulwev_w_h(t1, a, b);
2600 tcg_gen_add_i32(t, t, t1);
2601 }
2602
2603 static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2604 {
2605 TCGv_i64 t1;
2606
2607 t1 = tcg_temp_new_i64();
2608 gen_vmulwev_d_w(t1, a, b);
2609 tcg_gen_add_i64(t, t, t1);
2610 }
2611
2612 static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2613 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2614 {
2615 static const TCGOpcode vecop_list[] = {
2616 INDEX_op_shli_vec, INDEX_op_sari_vec,
2617 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2618 };
2619 static const GVecGen3 op[3] = {
2620 {
2621 .fniv = gen_vmaddwev_s,
2622 .fno = gen_helper_vmaddwev_h_b,
2623 .load_dest = true,
2624 .opt_opc = vecop_list,
2625 .vece = MO_16
2626 },
2627 {
2628 .fni4 = gen_vmaddwev_w_h,
2629 .fniv = gen_vmaddwev_s,
2630 .fno = gen_helper_vmaddwev_w_h,
2631 .load_dest = true,
2632 .opt_opc = vecop_list,
2633 .vece = MO_32
2634 },
2635 {
2636 .fni8 = gen_vmaddwev_d_w,
2637 .fniv = gen_vmaddwev_s,
2638 .fno = gen_helper_vmaddwev_d_w,
2639 .load_dest = true,
2640 .opt_opc = vecop_list,
2641 .vece = MO_64
2642 },
2643 };
2644
2645 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2646 }
2647
2648 TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
2649 TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
2650 TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
2651
2652 #define VMADD_Q(NAME, FN, idx1, idx2) \
2653 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
2654 { \
2655 TCGv_i64 rh, rl, arg1, arg2, th, tl; \
2656 \
2657 if (!avail_LSX(ctx)) { \
2658 return false; \
2659 } \
2660 \
2661 rh = tcg_temp_new_i64(); \
2662 rl = tcg_temp_new_i64(); \
2663 arg1 = tcg_temp_new_i64(); \
2664 arg2 = tcg_temp_new_i64(); \
2665 th = tcg_temp_new_i64(); \
2666 tl = tcg_temp_new_i64(); \
2667 \
2668 get_vreg64(arg1, a->vj, idx1); \
2669 get_vreg64(arg2, a->vk, idx2); \
2670 get_vreg64(rh, a->vd, 1); \
2671 get_vreg64(rl, a->vd, 0); \
2672 \
2673 tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
2674 tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
2675 \
2676 set_vreg64(rh, a->vd, 1); \
2677 set_vreg64(rl, a->vd, 0); \
2678 \
2679 return true; \
2680 }
2681
2682 VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
2683 VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
2684 VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
2685 VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
2686 VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
2687 VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
2688
2689 static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2690 {
2691 TCGv_vec t1, t2, t3;
2692 int halfbits = 4 << vece;
2693
2694 t1 = tcg_temp_new_vec_matching(a);
2695 t2 = tcg_temp_new_vec_matching(b);
2696 t3 = tcg_temp_new_vec_matching(t);
2697 tcg_gen_sari_vec(vece, t1, a, halfbits);
2698 tcg_gen_sari_vec(vece, t2, b, halfbits);
2699 tcg_gen_mul_vec(vece, t3, t1, t2);
2700 tcg_gen_add_vec(vece, t, t, t3);
2701 }
2702
2703 static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2704 {
2705 TCGv_i32 t1;
2706
2707 t1 = tcg_temp_new_i32();
2708 gen_vmulwod_w_h(t1, a, b);
2709 tcg_gen_add_i32(t, t, t1);
2710 }
2711
2712 static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2713 {
2714 TCGv_i64 t1;
2715
2716 t1 = tcg_temp_new_i64();
2717 gen_vmulwod_d_w(t1, a, b);
2718 tcg_gen_add_i64(t, t, t1);
2719 }
2720
2721 static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2722 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2723 {
2724 static const TCGOpcode vecop_list[] = {
2725 INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2726 };
2727 static const GVecGen3 op[3] = {
2728 {
2729 .fniv = gen_vmaddwod_s,
2730 .fno = gen_helper_vmaddwod_h_b,
2731 .load_dest = true,
2732 .opt_opc = vecop_list,
2733 .vece = MO_16
2734 },
2735 {
2736 .fni4 = gen_vmaddwod_w_h,
2737 .fniv = gen_vmaddwod_s,
2738 .fno = gen_helper_vmaddwod_w_h,
2739 .load_dest = true,
2740 .opt_opc = vecop_list,
2741 .vece = MO_32
2742 },
2743 {
2744 .fni8 = gen_vmaddwod_d_w,
2745 .fniv = gen_vmaddwod_s,
2746 .fno = gen_helper_vmaddwod_d_w,
2747 .load_dest = true,
2748 .opt_opc = vecop_list,
2749 .vece = MO_64
2750 },
2751 };
2752
2753 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2754 }
2755
2756 TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
2757 TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
2758 TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
2759
2760 static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2761 {
2762 TCGv_vec t1, t2, mask;
2763
2764 t1 = tcg_temp_new_vec_matching(t);
2765 t2 = tcg_temp_new_vec_matching(b);
2766 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2767 tcg_gen_and_vec(vece, t1, a, mask);
2768 tcg_gen_and_vec(vece, t2, b, mask);
2769 tcg_gen_mul_vec(vece, t1, t1, t2);
2770 tcg_gen_add_vec(vece, t, t, t1);
2771 }
2772
2773 static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2774 {
2775 TCGv_i32 t1;
2776
2777 t1 = tcg_temp_new_i32();
2778 gen_vmulwev_w_hu(t1, a, b);
2779 tcg_gen_add_i32(t, t, t1);
2780 }
2781
2782 static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2783 {
2784 TCGv_i64 t1;
2785
2786 t1 = tcg_temp_new_i64();
2787 gen_vmulwev_d_wu(t1, a, b);
2788 tcg_gen_add_i64(t, t, t1);
2789 }
2790
2791 static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2792 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2793 {
2794 static const TCGOpcode vecop_list[] = {
2795 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2796 };
2797 static const GVecGen3 op[3] = {
2798 {
2799 .fniv = gen_vmaddwev_u,
2800 .fno = gen_helper_vmaddwev_h_bu,
2801 .load_dest = true,
2802 .opt_opc = vecop_list,
2803 .vece = MO_16
2804 },
2805 {
2806 .fni4 = gen_vmaddwev_w_hu,
2807 .fniv = gen_vmaddwev_u,
2808 .fno = gen_helper_vmaddwev_w_hu,
2809 .load_dest = true,
2810 .opt_opc = vecop_list,
2811 .vece = MO_32
2812 },
2813 {
2814 .fni8 = gen_vmaddwev_d_wu,
2815 .fniv = gen_vmaddwev_u,
2816 .fno = gen_helper_vmaddwev_d_wu,
2817 .load_dest = true,
2818 .opt_opc = vecop_list,
2819 .vece = MO_64
2820 },
2821 };
2822
2823 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2824 }
2825
2826 TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
2827 TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
2828 TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
2829
2830 static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2831 {
2832 TCGv_vec t1, t2, t3;
2833 int halfbits = 4 << vece;
2834
2835 t1 = tcg_temp_new_vec_matching(a);
2836 t2 = tcg_temp_new_vec_matching(b);
2837 t3 = tcg_temp_new_vec_matching(t);
2838 tcg_gen_shri_vec(vece, t1, a, halfbits);
2839 tcg_gen_shri_vec(vece, t2, b, halfbits);
2840 tcg_gen_mul_vec(vece, t3, t1, t2);
2841 tcg_gen_add_vec(vece, t, t, t3);
2842 }
2843
2844 static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2845 {
2846 TCGv_i32 t1;
2847
2848 t1 = tcg_temp_new_i32();
2849 gen_vmulwod_w_hu(t1, a, b);
2850 tcg_gen_add_i32(t, t, t1);
2851 }
2852
2853 static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2854 {
2855 TCGv_i64 t1;
2856
2857 t1 = tcg_temp_new_i64();
2858 gen_vmulwod_d_wu(t1, a, b);
2859 tcg_gen_add_i64(t, t, t1);
2860 }
2861
2862 static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2863 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2864 {
2865 static const TCGOpcode vecop_list[] = {
2866 INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2867 };
2868 static const GVecGen3 op[3] = {
2869 {
2870 .fniv = gen_vmaddwod_u,
2871 .fno = gen_helper_vmaddwod_h_bu,
2872 .load_dest = true,
2873 .opt_opc = vecop_list,
2874 .vece = MO_16
2875 },
2876 {
2877 .fni4 = gen_vmaddwod_w_hu,
2878 .fniv = gen_vmaddwod_u,
2879 .fno = gen_helper_vmaddwod_w_hu,
2880 .load_dest = true,
2881 .opt_opc = vecop_list,
2882 .vece = MO_32
2883 },
2884 {
2885 .fni8 = gen_vmaddwod_d_wu,
2886 .fniv = gen_vmaddwod_u,
2887 .fno = gen_helper_vmaddwod_d_wu,
2888 .load_dest = true,
2889 .opt_opc = vecop_list,
2890 .vece = MO_64
2891 },
2892 };
2893
2894 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2895 }
2896
2897 TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
2898 TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
2899 TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
2900
2901 static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2902 {
2903 TCGv_vec t1, t2, mask;
2904 int halfbits = 4 << vece;
2905
2906 t1 = tcg_temp_new_vec_matching(a);
2907 t2 = tcg_temp_new_vec_matching(b);
2908 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2909 tcg_gen_and_vec(vece, t1, a, mask);
2910 tcg_gen_shli_vec(vece, t2, b, halfbits);
2911 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2912 tcg_gen_mul_vec(vece, t1, t1, t2);
2913 tcg_gen_add_vec(vece, t, t, t1);
2914 }
2915
2916 static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2917 {
2918 TCGv_i32 t1;
2919
2920 t1 = tcg_temp_new_i32();
2921 gen_vmulwev_w_hu_h(t1, a, b);
2922 tcg_gen_add_i32(t, t, t1);
2923 }
2924
2925 static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2926 {
2927 TCGv_i64 t1;
2928
2929 t1 = tcg_temp_new_i64();
2930 gen_vmulwev_d_wu_w(t1, a, b);
2931 tcg_gen_add_i64(t, t, t1);
2932 }
2933
2934 static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2935 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2936 {
2937 static const TCGOpcode vecop_list[] = {
2938 INDEX_op_shli_vec, INDEX_op_sari_vec,
2939 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2940 };
2941 static const GVecGen3 op[3] = {
2942 {
2943 .fniv = gen_vmaddwev_u_s,
2944 .fno = gen_helper_vmaddwev_h_bu_b,
2945 .load_dest = true,
2946 .opt_opc = vecop_list,
2947 .vece = MO_16
2948 },
2949 {
2950 .fni4 = gen_vmaddwev_w_hu_h,
2951 .fniv = gen_vmaddwev_u_s,
2952 .fno = gen_helper_vmaddwev_w_hu_h,
2953 .load_dest = true,
2954 .opt_opc = vecop_list,
2955 .vece = MO_32
2956 },
2957 {
2958 .fni8 = gen_vmaddwev_d_wu_w,
2959 .fniv = gen_vmaddwev_u_s,
2960 .fno = gen_helper_vmaddwev_d_wu_w,
2961 .load_dest = true,
2962 .opt_opc = vecop_list,
2963 .vece = MO_64
2964 },
2965 };
2966
2967 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2968 }
2969
2970 TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
2971 TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
2972 TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
2973
2974 static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2975 {
2976 TCGv_vec t1, t2, t3;
2977 int halfbits = 4 << vece;
2978
2979 t1 = tcg_temp_new_vec_matching(a);
2980 t2 = tcg_temp_new_vec_matching(b);
2981 t3 = tcg_temp_new_vec_matching(t);
2982 tcg_gen_shri_vec(vece, t1, a, halfbits);
2983 tcg_gen_sari_vec(vece, t2, b, halfbits);
2984 tcg_gen_mul_vec(vece, t3, t1, t2);
2985 tcg_gen_add_vec(vece, t, t, t3);
2986 }
2987
2988 static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2989 {
2990 TCGv_i32 t1;
2991
2992 t1 = tcg_temp_new_i32();
2993 gen_vmulwod_w_hu_h(t1, a, b);
2994 tcg_gen_add_i32(t, t, t1);
2995 }
2996
2997 static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2998 {
2999 TCGv_i64 t1;
3000
3001 t1 = tcg_temp_new_i64();
3002 gen_vmulwod_d_wu_w(t1, a, b);
3003 tcg_gen_add_i64(t, t, t1);
3004 }
3005
3006 static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3007 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3008 {
3009 static const TCGOpcode vecop_list[] = {
3010 INDEX_op_shri_vec, INDEX_op_sari_vec,
3011 INDEX_op_mul_vec, INDEX_op_add_vec, 0
3012 };
3013 static const GVecGen3 op[3] = {
3014 {
3015 .fniv = gen_vmaddwod_u_s,
3016 .fno = gen_helper_vmaddwod_h_bu_b,
3017 .load_dest = true,
3018 .opt_opc = vecop_list,
3019 .vece = MO_16
3020 },
3021 {
3022 .fni4 = gen_vmaddwod_w_hu_h,
3023 .fniv = gen_vmaddwod_u_s,
3024 .fno = gen_helper_vmaddwod_w_hu_h,
3025 .load_dest = true,
3026 .opt_opc = vecop_list,
3027 .vece = MO_32
3028 },
3029 {
3030 .fni8 = gen_vmaddwod_d_wu_w,
3031 .fniv = gen_vmaddwod_u_s,
3032 .fno = gen_helper_vmaddwod_d_wu_w,
3033 .load_dest = true,
3034 .opt_opc = vecop_list,
3035 .vece = MO_64
3036 },
3037 };
3038
3039 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3040 }
3041
3042 TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
3043 TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
3044 TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
3045
3046 TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
3047 TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
3048 TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
3049 TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
3050 TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
3051 TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
3052 TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
3053 TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
3054 TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
3055 TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
3056 TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
3057 TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
3058 TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
3059 TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
3060 TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
3061 TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
3062
3063 static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3064 {
3065 TCGv_vec min;
3066
3067 min = tcg_temp_new_vec_matching(t);
3068 tcg_gen_not_vec(vece, min, max);
3069 tcg_gen_smax_vec(vece, t, a, min);
3070 tcg_gen_smin_vec(vece, t, t, max);
3071 }
3072
3073 static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3074 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3075 {
3076 static const TCGOpcode vecop_list[] = {
3077 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
3078 };
3079 static const GVecGen2s op[4] = {
3080 {
3081 .fniv = gen_vsat_s,
3082 .fno = gen_helper_vsat_b,
3083 .opt_opc = vecop_list,
3084 .vece = MO_8
3085 },
3086 {
3087 .fniv = gen_vsat_s,
3088 .fno = gen_helper_vsat_h,
3089 .opt_opc = vecop_list,
3090 .vece = MO_16
3091 },
3092 {
3093 .fniv = gen_vsat_s,
3094 .fno = gen_helper_vsat_w,
3095 .opt_opc = vecop_list,
3096 .vece = MO_32
3097 },
3098 {
3099 .fniv = gen_vsat_s,
3100 .fno = gen_helper_vsat_d,
3101 .opt_opc = vecop_list,
3102 .vece = MO_64
3103 },
3104 };
3105
3106 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3107 tcg_constant_i64((1ll<< imm) -1), &op[vece]);
3108 }
3109
3110 TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
3111 TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
3112 TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
3113 TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
3114
3115 static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3116 {
3117 tcg_gen_umin_vec(vece, t, a, max);
3118 }
3119
3120 static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3121 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3122 {
3123 uint64_t max;
3124 static const TCGOpcode vecop_list[] = {
3125 INDEX_op_umin_vec, 0
3126 };
3127 static const GVecGen2s op[4] = {
3128 {
3129 .fniv = gen_vsat_u,
3130 .fno = gen_helper_vsat_bu,
3131 .opt_opc = vecop_list,
3132 .vece = MO_8
3133 },
3134 {
3135 .fniv = gen_vsat_u,
3136 .fno = gen_helper_vsat_hu,
3137 .opt_opc = vecop_list,
3138 .vece = MO_16
3139 },
3140 {
3141 .fniv = gen_vsat_u,
3142 .fno = gen_helper_vsat_wu,
3143 .opt_opc = vecop_list,
3144 .vece = MO_32
3145 },
3146 {
3147 .fniv = gen_vsat_u,
3148 .fno = gen_helper_vsat_du,
3149 .opt_opc = vecop_list,
3150 .vece = MO_64
3151 },
3152 };
3153
3154 max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
3155 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3156 tcg_constant_i64(max), &op[vece]);
3157 }
3158
3159 TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
3160 TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
3161 TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
3162 TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
3163
3164 TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
3165 TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
3166 TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
3167 TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
3168 TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
3169 TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
3170 TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
3171 TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
3172
3173 static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3174 {
3175 TCGv_vec t1, zero;
3176
3177 t1 = tcg_temp_new_vec_matching(t);
3178 zero = tcg_constant_vec_matching(t, vece, 0);
3179
3180 tcg_gen_neg_vec(vece, t1, b);
3181 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
3182 tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
3183 }
3184
3185 static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3186 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3187 {
3188 static const TCGOpcode vecop_list[] = {
3189 INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
3190 };
3191 static const GVecGen3 op[4] = {
3192 {
3193 .fniv = gen_vsigncov,
3194 .fno = gen_helper_vsigncov_b,
3195 .opt_opc = vecop_list,
3196 .vece = MO_8
3197 },
3198 {
3199 .fniv = gen_vsigncov,
3200 .fno = gen_helper_vsigncov_h,
3201 .opt_opc = vecop_list,
3202 .vece = MO_16
3203 },
3204 {
3205 .fniv = gen_vsigncov,
3206 .fno = gen_helper_vsigncov_w,
3207 .opt_opc = vecop_list,
3208 .vece = MO_32
3209 },
3210 {
3211 .fniv = gen_vsigncov,
3212 .fno = gen_helper_vsigncov_d,
3213 .opt_opc = vecop_list,
3214 .vece = MO_64
3215 },
3216 };
3217
3218 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3219 }
3220
3221 TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
3222 TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
3223 TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
3224 TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
3225
3226 TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
3227 TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
3228 TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
3229 TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
3230 TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
3231 TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
3232
3233 #define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
3234
3235 static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
3236 {
3237 int mode;
3238 uint64_t data, t;
3239
3240 /*
3241 * imm bit [11:8] is mode, mode value is 0-12.
3242 * other values are invalid.
3243 */
3244 mode = (imm >> 8) & 0xf;
3245 t = imm & 0xff;
3246 switch (mode) {
3247 case 0:
3248 /* data: {2{24'0, imm[7:0]}} */
3249 data = (t << 32) | t ;
3250 break;
3251 case 1:
3252 /* data: {2{16'0, imm[7:0], 8'0}} */
3253 data = (t << 24) | (t << 8);
3254 break;
3255 case 2:
3256 /* data: {2{8'0, imm[7:0], 16'0}} */
3257 data = (t << 48) | (t << 16);
3258 break;
3259 case 3:
3260 /* data: {2{imm[7:0], 24'0}} */
3261 data = (t << 56) | (t << 24);
3262 break;
3263 case 4:
3264 /* data: {4{8'0, imm[7:0]}} */
3265 data = (t << 48) | (t << 32) | (t << 16) | t;
3266 break;
3267 case 5:
3268 /* data: {4{imm[7:0], 8'0}} */
3269 data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
3270 break;
3271 case 6:
3272 /* data: {2{16'0, imm[7:0], 8'1}} */
3273 data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
3274 break;
3275 case 7:
3276 /* data: {2{8'0, imm[7:0], 16'1}} */
3277 data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
3278 break;
3279 case 8:
3280 /* data: {8{imm[7:0]}} */
3281 data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
3282 (t << 24) | (t << 16) | (t << 8) | t;
3283 break;
3284 case 9:
3285 /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
3286 {
3287 uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
3288 b0 = t& 0x1;
3289 b1 = (t & 0x2) >> 1;
3290 b2 = (t & 0x4) >> 2;
3291 b3 = (t & 0x8) >> 3;
3292 b4 = (t & 0x10) >> 4;
3293 b5 = (t & 0x20) >> 5;
3294 b6 = (t & 0x40) >> 6;
3295 b7 = (t & 0x80) >> 7;
3296 data = (EXPAND_BYTE(b7) << 56) |
3297 (EXPAND_BYTE(b6) << 48) |
3298 (EXPAND_BYTE(b5) << 40) |
3299 (EXPAND_BYTE(b4) << 32) |
3300 (EXPAND_BYTE(b3) << 24) |
3301 (EXPAND_BYTE(b2) << 16) |
3302 (EXPAND_BYTE(b1) << 8) |
3303 EXPAND_BYTE(b0);
3304 }
3305 break;
3306 case 10:
3307 /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
3308 {
3309 uint64_t b6, b7;
3310 uint64_t t0, t1;
3311 b6 = (imm & 0x40) >> 6;
3312 b7 = (imm & 0x80) >> 7;
3313 t0 = (imm & 0x3f);
3314 t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
3315 data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
3316 }
3317 break;
3318 case 11:
3319 /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
3320 {
3321 uint64_t b6,b7;
3322 uint64_t t0, t1;
3323 b6 = (imm & 0x40) >> 6;
3324 b7 = (imm & 0x80) >> 7;
3325 t0 = (imm & 0x3f);
3326 t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
3327 data = (t1 << 25) | (t0 << 19);
3328 }
3329 break;
3330 case 12:
3331 /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
3332 {
3333 uint64_t b6,b7;
3334 uint64_t t0, t1;
3335 b6 = (imm & 0x40) >> 6;
3336 b7 = (imm & 0x80) >> 7;
3337 t0 = (imm & 0x3f);
3338 t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
3339 data = (t1 << 54) | (t0 << 48);
3340 }
3341 break;
3342 default:
3343 generate_exception(ctx, EXCCODE_INE);
3344 g_assert_not_reached();
3345 }
3346 return data;
3347 }
3348
3349 static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
3350 {
3351 int sel, vece;
3352 uint64_t value;
3353
3354 if (!avail_LSX(ctx)) {
3355 return false;
3356 }
3357
3358 if (!check_vec(ctx, 16)) {
3359 return true;
3360 }
3361
3362 sel = (a->imm >> 12) & 0x1;
3363
3364 if (sel) {
3365 value = vldi_get_value(ctx, a->imm);
3366 vece = MO_64;
3367 } else {
3368 value = ((int32_t)(a->imm << 22)) >> 22;
3369 vece = (a->imm >> 10) & 0x3;
3370 }
3371
3372 tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
3373 tcg_constant_i64(value));
3374 return true;
3375 }
3376
3377 TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
3378 TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
3379 TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
3380 TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
3381
3382 static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
3383 {
3384 uint32_t vd_ofs, vj_ofs, vk_ofs;
3385
3386 if (!avail_LSX(ctx)) {
3387 return false;
3388 }
3389
3390 if (!check_vec(ctx, 16)) {
3391 return true;
3392 }
3393
3394 vd_ofs = vec_full_offset(a->vd);
3395 vj_ofs = vec_full_offset(a->vj);
3396 vk_ofs = vec_full_offset(a->vk);
3397
3398 tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
3399 return true;
3400 }
3401 TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
3402 TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
3403 TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
3404 TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
3405
3406 static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3407 {
3408 TCGv_vec t1;
3409
3410 t1 = tcg_constant_vec_matching(t, vece, imm);
3411 tcg_gen_nor_vec(vece, t, a, t1);
3412 }
3413
3414 static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
3415 {
3416 tcg_gen_movi_i64(t, dup_const(MO_8, imm));
3417 tcg_gen_nor_i64(t, a, t);
3418 }
3419
3420 static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3421 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3422 {
3423 static const TCGOpcode vecop_list[] = {
3424 INDEX_op_nor_vec, 0
3425 };
3426 static const GVecGen2i op = {
3427 .fni8 = gen_vnori_b,
3428 .fniv = gen_vnori,
3429 .fnoi = gen_helper_vnori_b,
3430 .opt_opc = vecop_list,
3431 .vece = MO_8
3432 };
3433
3434 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
3435 }
3436
3437 TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
3438
3439 TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
3440 TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
3441 TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
3442 TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
3443 TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
3444 TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
3445 TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
3446 TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
3447
3448 TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
3449 TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
3450 TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
3451 TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
3452 TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
3453 TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
3454 TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
3455 TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
3456
3457 TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
3458 TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
3459 TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
3460 TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
3461 TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
3462 TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
3463 TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
3464 TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
3465
3466 TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
3467 TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
3468 TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
3469 TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
3470 TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
3471 TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
3472 TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
3473 TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
3474
3475 TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
3476 TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
3477 TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
3478 TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
3479 TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
3480 TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
3481 TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
3482 TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
3483
3484 TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
3485 TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
3486 TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
3487 TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
3488 TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
3489 TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
3490 TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
3491 TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
3492
3493 TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
3494 TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
3495 TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
3496 TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
3497 TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
3498 TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
3499 TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
3500 TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
3501
3502 TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
3503 TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
3504 TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
3505 TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
3506 TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
3507 TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
3508
3509 TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
3510 TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
3511 TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
3512 TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
3513 TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
3514 TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
3515 TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
3516 TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
3517
3518 TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
3519 TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
3520 TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
3521 TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
3522 TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
3523 TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
3524
3525 TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
3526 TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
3527 TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
3528 TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
3529 TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
3530 TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
3531 TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
3532 TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
3533
3534 TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
3535 TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
3536 TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
3537 TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
3538 TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
3539 TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
3540 TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
3541 TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
3542 TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
3543 TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
3544 TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
3545 TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
3546
3547 TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
3548 TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
3549 TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
3550 TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
3551 TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
3552 TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
3553 TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
3554 TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
3555 TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
3556 TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
3557 TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
3558 TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
3559 TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
3560 TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
3561 TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
3562 TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
3563
3564 TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
3565 TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
3566 TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
3567 TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
3568 TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
3569 TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
3570 TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
3571 TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
3572 TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
3573 TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
3574 TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
3575 TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
3576
3577 TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
3578 TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
3579 TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
3580 TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
3581 TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
3582 TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
3583 TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
3584 TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
3585 TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
3586 TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
3587 TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
3588 TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
3589 TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
3590 TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
3591 TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
3592 TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
3593
3594 TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
3595 TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
3596 TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
3597 TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
3598 TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
3599 TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
3600 TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
3601 TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
3602
3603 TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
3604 TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
3605 TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
3606 TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
3607
3608 static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
3609 void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3610 {
3611 TCGv_vec mask, lsh, t1, one;
3612
3613 lsh = tcg_temp_new_vec_matching(t);
3614 t1 = tcg_temp_new_vec_matching(t);
3615 mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
3616 one = tcg_constant_vec_matching(t, vece, 1);
3617
3618 tcg_gen_and_vec(vece, lsh, b, mask);
3619 tcg_gen_shlv_vec(vece, t1, one, lsh);
3620 func(vece, t, a, t1);
3621 }
3622
3623 static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3624 {
3625 do_vbit(vece, t, a, b, tcg_gen_andc_vec);
3626 }
3627
3628 static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3629 {
3630 do_vbit(vece, t, a, b, tcg_gen_or_vec);
3631 }
3632
3633 static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3634 {
3635 do_vbit(vece, t, a, b, tcg_gen_xor_vec);
3636 }
3637
3638 static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3639 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3640 {
3641 static const TCGOpcode vecop_list[] = {
3642 INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
3643 };
3644 static const GVecGen3 op[4] = {
3645 {
3646 .fniv = gen_vbitclr,
3647 .fno = gen_helper_vbitclr_b,
3648 .opt_opc = vecop_list,
3649 .vece = MO_8
3650 },
3651 {
3652 .fniv = gen_vbitclr,
3653 .fno = gen_helper_vbitclr_h,
3654 .opt_opc = vecop_list,
3655 .vece = MO_16
3656 },
3657 {
3658 .fniv = gen_vbitclr,
3659 .fno = gen_helper_vbitclr_w,
3660 .opt_opc = vecop_list,
3661 .vece = MO_32
3662 },
3663 {
3664 .fniv = gen_vbitclr,
3665 .fno = gen_helper_vbitclr_d,
3666 .opt_opc = vecop_list,
3667 .vece = MO_64
3668 },
3669 };
3670
3671 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3672 }
3673
3674 TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
3675 TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
3676 TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
3677 TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
3678
3679 static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
3680 void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3681 {
3682 int lsh;
3683 TCGv_vec t1, one;
3684
3685 lsh = imm & ((8 << vece) -1);
3686 t1 = tcg_temp_new_vec_matching(t);
3687 one = tcg_constant_vec_matching(t, vece, 1);
3688
3689 tcg_gen_shli_vec(vece, t1, one, lsh);
3690 func(vece, t, a, t1);
3691 }
3692
3693 static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3694 {
3695 do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
3696 }
3697
3698 static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3699 {
3700 do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
3701 }
3702
3703 static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3704 {
3705 do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
3706 }
3707
3708 static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3709 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3710 {
3711 static const TCGOpcode vecop_list[] = {
3712 INDEX_op_shli_vec, INDEX_op_andc_vec, 0
3713 };
3714 static const GVecGen2i op[4] = {
3715 {
3716 .fniv = gen_vbitclri,
3717 .fnoi = gen_helper_vbitclri_b,
3718 .opt_opc = vecop_list,
3719 .vece = MO_8
3720 },
3721 {
3722 .fniv = gen_vbitclri,
3723 .fnoi = gen_helper_vbitclri_h,
3724 .opt_opc = vecop_list,
3725 .vece = MO_16
3726 },
3727 {
3728 .fniv = gen_vbitclri,
3729 .fnoi = gen_helper_vbitclri_w,
3730 .opt_opc = vecop_list,
3731 .vece = MO_32
3732 },
3733 {
3734 .fniv = gen_vbitclri,
3735 .fnoi = gen_helper_vbitclri_d,
3736 .opt_opc = vecop_list,
3737 .vece = MO_64
3738 },
3739 };
3740
3741 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3742 }
3743
3744 TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
3745 TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
3746 TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
3747 TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
3748
3749 static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3750 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3751 {
3752 static const TCGOpcode vecop_list[] = {
3753 INDEX_op_shlv_vec, 0
3754 };
3755 static const GVecGen3 op[4] = {
3756 {
3757 .fniv = gen_vbitset,
3758 .fno = gen_helper_vbitset_b,
3759 .opt_opc = vecop_list,
3760 .vece = MO_8
3761 },
3762 {
3763 .fniv = gen_vbitset,
3764 .fno = gen_helper_vbitset_h,
3765 .opt_opc = vecop_list,
3766 .vece = MO_16
3767 },
3768 {
3769 .fniv = gen_vbitset,
3770 .fno = gen_helper_vbitset_w,
3771 .opt_opc = vecop_list,
3772 .vece = MO_32
3773 },
3774 {
3775 .fniv = gen_vbitset,
3776 .fno = gen_helper_vbitset_d,
3777 .opt_opc = vecop_list,
3778 .vece = MO_64
3779 },
3780 };
3781
3782 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3783 }
3784
3785 TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
3786 TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
3787 TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
3788 TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
3789
3790 static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3791 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3792 {
3793 static const TCGOpcode vecop_list[] = {
3794 INDEX_op_shli_vec, 0
3795 };
3796 static const GVecGen2i op[4] = {
3797 {
3798 .fniv = gen_vbitseti,
3799 .fnoi = gen_helper_vbitseti_b,
3800 .opt_opc = vecop_list,
3801 .vece = MO_8
3802 },
3803 {
3804 .fniv = gen_vbitseti,
3805 .fnoi = gen_helper_vbitseti_h,
3806 .opt_opc = vecop_list,
3807 .vece = MO_16
3808 },
3809 {
3810 .fniv = gen_vbitseti,
3811 .fnoi = gen_helper_vbitseti_w,
3812 .opt_opc = vecop_list,
3813 .vece = MO_32
3814 },
3815 {
3816 .fniv = gen_vbitseti,
3817 .fnoi = gen_helper_vbitseti_d,
3818 .opt_opc = vecop_list,
3819 .vece = MO_64
3820 },
3821 };
3822
3823 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3824 }
3825
3826 TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
3827 TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
3828 TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
3829 TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
3830
3831 static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3832 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3833 {
3834 static const TCGOpcode vecop_list[] = {
3835 INDEX_op_shlv_vec, 0
3836 };
3837 static const GVecGen3 op[4] = {
3838 {
3839 .fniv = gen_vbitrev,
3840 .fno = gen_helper_vbitrev_b,
3841 .opt_opc = vecop_list,
3842 .vece = MO_8
3843 },
3844 {
3845 .fniv = gen_vbitrev,
3846 .fno = gen_helper_vbitrev_h,
3847 .opt_opc = vecop_list,
3848 .vece = MO_16
3849 },
3850 {
3851 .fniv = gen_vbitrev,
3852 .fno = gen_helper_vbitrev_w,
3853 .opt_opc = vecop_list,
3854 .vece = MO_32
3855 },
3856 {
3857 .fniv = gen_vbitrev,
3858 .fno = gen_helper_vbitrev_d,
3859 .opt_opc = vecop_list,
3860 .vece = MO_64
3861 },
3862 };
3863
3864 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3865 }
3866
3867 TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
3868 TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
3869 TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
3870 TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
3871
3872 static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3873 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3874 {
3875 static const TCGOpcode vecop_list[] = {
3876 INDEX_op_shli_vec, 0
3877 };
3878 static const GVecGen2i op[4] = {
3879 {
3880 .fniv = gen_vbitrevi,
3881 .fnoi = gen_helper_vbitrevi_b,
3882 .opt_opc = vecop_list,
3883 .vece = MO_8
3884 },
3885 {
3886 .fniv = gen_vbitrevi,
3887 .fnoi = gen_helper_vbitrevi_h,
3888 .opt_opc = vecop_list,
3889 .vece = MO_16
3890 },
3891 {
3892 .fniv = gen_vbitrevi,
3893 .fnoi = gen_helper_vbitrevi_w,
3894 .opt_opc = vecop_list,
3895 .vece = MO_32
3896 },
3897 {
3898 .fniv = gen_vbitrevi,
3899 .fnoi = gen_helper_vbitrevi_d,
3900 .opt_opc = vecop_list,
3901 .vece = MO_64
3902 },
3903 };
3904
3905 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3906 }
3907
3908 TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
3909 TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
3910 TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
3911 TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
3912
3913 TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
3914 TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
3915 TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
3916 TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
3917
3918 TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
3919 TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
3920 TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
3921 TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
3922 TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
3923 TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
3924 TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
3925 TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
3926
3927 TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
3928 TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
3929 TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
3930 TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
3931 TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
3932 TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
3933 TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
3934 TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
3935
3936 TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
3937 TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
3938 TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
3939 TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
3940
3941 TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
3942 TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
3943 TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
3944 TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
3945
3946 TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
3947 TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
3948
3949 TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
3950 TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
3951
3952 TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
3953 TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
3954 TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
3955 TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
3956 TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
3957 TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
3958
3959 TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
3960 TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
3961 TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
3962 TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
3963 TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
3964 TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
3965
3966 TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
3967 TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
3968 TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
3969 TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
3970 TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
3971 TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
3972 TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
3973 TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
3974 TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
3975 TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
3976
3977 TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
3978 TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
3979 TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
3980 TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
3981 TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
3982 TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
3983 TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
3984 TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
3985 TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
3986 TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
3987 TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
3988 TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
3989 TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
3990 TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
3991 TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
3992 TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
3993 TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
3994 TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
3995 TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
3996 TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
3997 TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
3998 TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
3999 TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
4000 TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
4001 TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
4002 TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
4003 TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
4004 TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
4005 TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
4006
4007 TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
4008 TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
4009 TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
4010 TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
4011 TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
4012 TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
4013 TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
4014
4015 static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
4016 {
4017 uint32_t vd_ofs, vj_ofs, vk_ofs;
4018
4019 if (!check_vec(ctx, 16)) {
4020 return true;
4021 }
4022
4023 vd_ofs = vec_full_offset(a->vd);
4024 vj_ofs = vec_full_offset(a->vj);
4025 vk_ofs = vec_full_offset(a->vk);
4026
4027 tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
4028 return true;
4029 }
4030
4031 static void do_cmpi_vec(TCGCond cond,
4032 unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4033 {
4034 tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm));
4035 }
4036
4037 static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4038 {
4039 do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm);
4040 }
4041
4042 static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4043 {
4044 do_cmpi_vec(TCG_COND_LE, vece, t, a, imm);
4045 }
4046
4047 static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4048 {
4049 do_cmpi_vec(TCG_COND_LT, vece, t, a, imm);
4050 }
4051
4052 static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4053 {
4054 do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm);
4055 }
4056
4057 static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4058 {
4059 do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
4060 }
4061
4062 #define DO_CMPI_S(NAME) \
4063 static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
4064 { \
4065 uint32_t vd_ofs, vj_ofs; \
4066 \
4067 if (!check_vec(ctx, 16)) { \
4068 return true; \
4069 } \
4070 \
4071 static const TCGOpcode vecop_list[] = { \
4072 INDEX_op_cmp_vec, 0 \
4073 }; \
4074 static const GVecGen2i op[4] = { \
4075 { \
4076 .fniv = gen_## NAME ##_s_vec, \
4077 .fnoi = gen_helper_## NAME ##_b, \
4078 .opt_opc = vecop_list, \
4079 .vece = MO_8 \
4080 }, \
4081 { \
4082 .fniv = gen_## NAME ##_s_vec, \
4083 .fnoi = gen_helper_## NAME ##_h, \
4084 .opt_opc = vecop_list, \
4085 .vece = MO_16 \
4086 }, \
4087 { \
4088 .fniv = gen_## NAME ##_s_vec, \
4089 .fnoi = gen_helper_## NAME ##_w, \
4090 .opt_opc = vecop_list, \
4091 .vece = MO_32 \
4092 }, \
4093 { \
4094 .fniv = gen_## NAME ##_s_vec, \
4095 .fnoi = gen_helper_## NAME ##_d, \
4096 .opt_opc = vecop_list, \
4097 .vece = MO_64 \
4098 } \
4099 }; \
4100 \
4101 vd_ofs = vec_full_offset(a->vd); \
4102 vj_ofs = vec_full_offset(a->vj); \
4103 \
4104 tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
4105 \
4106 return true; \
4107 }
4108
4109 DO_CMPI_S(vseqi)
4110 DO_CMPI_S(vslei)
4111 DO_CMPI_S(vslti)
4112
4113 #define DO_CMPI_U(NAME) \
4114 static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
4115 { \
4116 uint32_t vd_ofs, vj_ofs; \
4117 \
4118 if (!check_vec(ctx, 16)) { \
4119 return true; \
4120 } \
4121 \
4122 static const TCGOpcode vecop_list[] = { \
4123 INDEX_op_cmp_vec, 0 \
4124 }; \
4125 static const GVecGen2i op[4] = { \
4126 { \
4127 .fniv = gen_## NAME ##_u_vec, \
4128 .fnoi = gen_helper_## NAME ##_bu, \
4129 .opt_opc = vecop_list, \
4130 .vece = MO_8 \
4131 }, \
4132 { \
4133 .fniv = gen_## NAME ##_u_vec, \
4134 .fnoi = gen_helper_## NAME ##_hu, \
4135 .opt_opc = vecop_list, \
4136 .vece = MO_16 \
4137 }, \
4138 { \
4139 .fniv = gen_## NAME ##_u_vec, \
4140 .fnoi = gen_helper_## NAME ##_wu, \
4141 .opt_opc = vecop_list, \
4142 .vece = MO_32 \
4143 }, \
4144 { \
4145 .fniv = gen_## NAME ##_u_vec, \
4146 .fnoi = gen_helper_## NAME ##_du, \
4147 .opt_opc = vecop_list, \
4148 .vece = MO_64 \
4149 } \
4150 }; \
4151 \
4152 vd_ofs = vec_full_offset(a->vd); \
4153 vj_ofs = vec_full_offset(a->vj); \
4154 \
4155 tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
4156 \
4157 return true; \
4158 }
4159
4160 DO_CMPI_U(vslei)
4161 DO_CMPI_U(vslti)
4162
4163 TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
4164 TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
4165 TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
4166 TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
4167 TRANS(vseqi_b, LSX, do_vseqi_s, MO_8)
4168 TRANS(vseqi_h, LSX, do_vseqi_s, MO_16)
4169 TRANS(vseqi_w, LSX, do_vseqi_s, MO_32)
4170 TRANS(vseqi_d, LSX, do_vseqi_s, MO_64)
4171
4172 TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
4173 TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
4174 TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
4175 TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
4176 TRANS(vslei_b, LSX, do_vslei_s, MO_8)
4177 TRANS(vslei_h, LSX, do_vslei_s, MO_16)
4178 TRANS(vslei_w, LSX, do_vslei_s, MO_32)
4179 TRANS(vslei_d, LSX, do_vslei_s, MO_64)
4180 TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
4181 TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
4182 TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
4183 TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
4184 TRANS(vslei_bu, LSX, do_vslei_u, MO_8)
4185 TRANS(vslei_hu, LSX, do_vslei_u, MO_16)
4186 TRANS(vslei_wu, LSX, do_vslei_u, MO_32)
4187 TRANS(vslei_du, LSX, do_vslei_u, MO_64)
4188
4189 TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
4190 TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
4191 TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
4192 TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
4193 TRANS(vslti_b, LSX, do_vslti_s, MO_8)
4194 TRANS(vslti_h, LSX, do_vslti_s, MO_16)
4195 TRANS(vslti_w, LSX, do_vslti_s, MO_32)
4196 TRANS(vslti_d, LSX, do_vslti_s, MO_64)
4197 TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
4198 TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
4199 TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
4200 TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
4201 TRANS(vslti_bu, LSX, do_vslti_u, MO_8)
4202 TRANS(vslti_hu, LSX, do_vslti_u, MO_16)
4203 TRANS(vslti_wu, LSX, do_vslti_u, MO_32)
4204 TRANS(vslti_du, LSX, do_vslti_u, MO_64)
4205
4206 static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
4207 {
4208 uint32_t flags;
4209 void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4210 TCGv_i32 vd = tcg_constant_i32(a->vd);
4211 TCGv_i32 vj = tcg_constant_i32(a->vj);
4212 TCGv_i32 vk = tcg_constant_i32(a->vk);
4213
4214 if (!avail_LSX(ctx)) {
4215 return false;
4216 }
4217
4218 if (!check_vec(ctx, 16)) {
4219 return true;
4220 }
4221
4222 fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
4223 flags = get_fcmp_flags(a->fcond >> 1);
4224 fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
4225
4226 return true;
4227 }
4228
4229 static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
4230 {
4231 uint32_t flags;
4232 void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4233 TCGv_i32 vd = tcg_constant_i32(a->vd);
4234 TCGv_i32 vj = tcg_constant_i32(a->vj);
4235 TCGv_i32 vk = tcg_constant_i32(a->vk);
4236
4237 if (!avail_LSX(ctx)) {
4238 return false;
4239 }
4240
4241 if (!check_vec(ctx, 16)) {
4242 return true;
4243 }
4244
4245 fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
4246 flags = get_fcmp_flags(a->fcond >> 1);
4247 fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
4248
4249 return true;
4250 }
4251
4252 static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
4253 {
4254 if (!avail_LSX(ctx)) {
4255 return false;
4256 }
4257
4258 if (!check_vec(ctx, 16)) {
4259 return true;
4260 }
4261
4262 tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
4263 vec_full_offset(a->vk), vec_full_offset(a->vj),
4264 16, ctx->vl/8);
4265 return true;
4266 }
4267
4268 static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
4269 {
4270 tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
4271 }
4272
4273 static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
4274 {
4275 static const GVecGen2i op = {
4276 .fniv = gen_vbitseli,
4277 .fnoi = gen_helper_vbitseli_b,
4278 .vece = MO_8,
4279 .load_dest = true
4280 };
4281
4282 if (!avail_LSX(ctx)) {
4283 return false;
4284 }
4285
4286 if (!check_vec(ctx, 16)) {
4287 return true;
4288 }
4289
4290 tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
4291 16, ctx->vl/8, a->imm, &op);
4292 return true;
4293 }
4294
4295 #define VSET(NAME, COND) \
4296 static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
4297 { \
4298 TCGv_i64 t1, al, ah; \
4299 \
4300 al = tcg_temp_new_i64(); \
4301 ah = tcg_temp_new_i64(); \
4302 t1 = tcg_temp_new_i64(); \
4303 \
4304 get_vreg64(ah, a->vj, 1); \
4305 get_vreg64(al, a->vj, 0); \
4306 \
4307 if (!avail_LSX(ctx)) { \
4308 return false; \
4309 } \
4310 \
4311 if (!check_vec(ctx, 16)) { \
4312 return true; \
4313 } \
4314 \
4315 tcg_gen_or_i64(t1, al, ah); \
4316 tcg_gen_setcondi_i64(COND, t1, t1, 0); \
4317 tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4318 \
4319 return true; \
4320 }
4321
4322 VSET(vseteqz_v, TCG_COND_EQ)
4323 VSET(vsetnez_v, TCG_COND_NE)
4324
4325 TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
4326 TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
4327 TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
4328 TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
4329 TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
4330 TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
4331 TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
4332 TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
4333
4334 static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
4335 {
4336 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4337
4338 if (!avail_LSX(ctx)) {
4339 return false;
4340 }
4341
4342 if (!check_vec(ctx, 16)) {
4343 return true;
4344 }
4345
4346 tcg_gen_st8_i64(src, cpu_env,
4347 offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
4348 return true;
4349 }
4350
4351 static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
4352 {
4353 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4354
4355 if (!avail_LSX(ctx)) {
4356 return false;
4357 }
4358
4359 if (!check_vec(ctx, 16)) {
4360 return true;
4361 }
4362
4363 tcg_gen_st16_i64(src, cpu_env,
4364 offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
4365 return true;
4366 }
4367
4368 static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
4369 {
4370 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4371
4372 if (!avail_LSX(ctx)) {
4373 return false;
4374 }
4375
4376 if (!check_vec(ctx, 16)) {
4377 return true;
4378 }
4379
4380 tcg_gen_st32_i64(src, cpu_env,
4381 offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
4382 return true;
4383 }
4384
4385 static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
4386 {
4387 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4388
4389 if (!avail_LSX(ctx)) {
4390 return false;
4391 }
4392
4393 if (!check_vec(ctx, 16)) {
4394 return true;
4395 }
4396
4397 tcg_gen_st_i64(src, cpu_env,
4398 offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
4399 return true;
4400 }
4401
4402 static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
4403 {
4404 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4405
4406 if (!avail_LSX(ctx)) {
4407 return false;
4408 }
4409
4410 if (!check_vec(ctx, 16)) {
4411 return true;
4412 }
4413
4414 tcg_gen_ld8s_i64(dst, cpu_env,
4415 offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
4416 return true;
4417 }
4418
4419 static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
4420 {
4421 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4422
4423 if (!avail_LSX(ctx)) {
4424 return false;
4425 }
4426
4427 if (!check_vec(ctx, 16)) {
4428 return true;
4429 }
4430
4431 tcg_gen_ld16s_i64(dst, cpu_env,
4432 offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
4433 return true;
4434 }
4435
4436 static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
4437 {
4438 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4439
4440 if (!avail_LSX(ctx)) {
4441 return false;
4442 }
4443
4444 if (!check_vec(ctx, 16)) {
4445 return true;
4446 }
4447
4448 tcg_gen_ld32s_i64(dst, cpu_env,
4449 offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
4450 return true;
4451 }
4452
4453 static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
4454 {
4455 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4456
4457 if (!avail_LSX(ctx)) {
4458 return false;
4459 }
4460
4461 if (!check_vec(ctx, 16)) {
4462 return true;
4463 }
4464
4465 tcg_gen_ld_i64(dst, cpu_env,
4466 offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
4467 return true;
4468 }
4469
4470 static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
4471 {
4472 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4473
4474 if (!avail_LSX(ctx)) {
4475 return false;
4476 }
4477
4478 if (!check_vec(ctx, 16)) {
4479 return true;
4480 }
4481
4482 tcg_gen_ld8u_i64(dst, cpu_env,
4483 offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
4484 return true;
4485 }
4486
4487 static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
4488 {
4489 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4490
4491 if (!avail_LSX(ctx)) {
4492 return false;
4493 }
4494
4495 if (!check_vec(ctx, 16)) {
4496 return true;
4497 }
4498
4499 tcg_gen_ld16u_i64(dst, cpu_env,
4500 offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
4501 return true;
4502 }
4503
4504 static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
4505 {
4506 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4507
4508 if (!avail_LSX(ctx)) {
4509 return false;
4510 }
4511
4512 if (!check_vec(ctx, 16)) {
4513 return true;
4514 }
4515
4516 tcg_gen_ld32u_i64(dst, cpu_env,
4517 offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
4518 return true;
4519 }
4520
4521 static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
4522 {
4523 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4524
4525 if (!avail_LSX(ctx)) {
4526 return false;
4527 }
4528
4529 if (!check_vec(ctx, 16)) {
4530 return true;
4531 }
4532
4533 tcg_gen_ld_i64(dst, cpu_env,
4534 offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
4535 return true;
4536 }
4537
4538 static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
4539 uint32_t oprsz, MemOp mop)
4540 {
4541 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4542
4543 if (!check_vec(ctx, oprsz)) {
4544 return true;
4545 }
4546
4547 tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
4548 oprsz, ctx->vl/8, src);
4549 return true;
4550 }
4551
4552 static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
4553 {
4554 return gvec_dup_vl(ctx, a, 16, mop);
4555 }
4556
4557 static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
4558 {
4559 return gvec_dup_vl(ctx, a, 32, mop);
4560 }
4561
4562 TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
4563 TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
4564 TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
4565 TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
4566 TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
4567 TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
4568 TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
4569 TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
4570
4571 static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
4572 {
4573 if (!avail_LSX(ctx)) {
4574 return false;
4575 }
4576
4577 if (!check_vec(ctx, 16)) {
4578 return true;
4579 }
4580
4581 tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
4582 offsetof(CPULoongArchState,
4583 fpr[a->vj].vreg.B((a->imm))),
4584 16, ctx->vl/8);
4585 return true;
4586 }
4587
4588 static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
4589 {
4590 if (!avail_LSX(ctx)) {
4591 return false;
4592 }
4593
4594 if (!check_vec(ctx, 16)) {
4595 return true;
4596 }
4597
4598 tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
4599 offsetof(CPULoongArchState,
4600 fpr[a->vj].vreg.H((a->imm))),
4601 16, ctx->vl/8);
4602 return true;
4603 }
4604 static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
4605 {
4606 if (!avail_LSX(ctx)) {
4607 return false;
4608 }
4609
4610 if (!check_vec(ctx, 16)) {
4611 return true;
4612 }
4613
4614 tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
4615 offsetof(CPULoongArchState,
4616 fpr[a->vj].vreg.W((a->imm))),
4617 16, ctx->vl/8);
4618 return true;
4619 }
4620 static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
4621 {
4622 if (!avail_LSX(ctx)) {
4623 return false;
4624 }
4625
4626 if (!check_vec(ctx, 16)) {
4627 return true;
4628 }
4629
4630 tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
4631 offsetof(CPULoongArchState,
4632 fpr[a->vj].vreg.D((a->imm))),
4633 16, ctx->vl/8);
4634 return true;
4635 }
4636
4637 static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
4638 void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
4639 {
4640 TCGv_i64 t0 = tcg_temp_new_i64();
4641 TCGv_ptr t1 = tcg_temp_new_ptr();
4642 TCGv_i64 t2 = tcg_temp_new_i64();
4643
4644 if (!avail_LSX(ctx)) {
4645 return false;
4646 }
4647
4648 if (!check_vec(ctx, 16)) {
4649 return true;
4650 }
4651
4652 tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
4653 tcg_gen_shli_i64(t0, t0, vece);
4654 if (HOST_BIG_ENDIAN) {
4655 tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
4656 }
4657
4658 tcg_gen_trunc_i64_ptr(t1, t0);
4659 tcg_gen_add_ptr(t1, t1, cpu_env);
4660 func(t2, t1, vec_full_offset(a->vj));
4661 tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
4662
4663 return true;
4664 }
4665
4666 TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
4667 TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
4668 TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
4669 TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
4670
4671 static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
4672 {
4673 int ofs;
4674 TCGv_i64 desthigh, destlow, high, low;
4675
4676 if (!avail_LSX(ctx)) {
4677 return false;
4678 }
4679
4680 if (!check_vec(ctx, 16)) {
4681 return true;
4682 }
4683
4684 desthigh = tcg_temp_new_i64();
4685 destlow = tcg_temp_new_i64();
4686 high = tcg_temp_new_i64();
4687 low = tcg_temp_new_i64();
4688
4689 get_vreg64(low, a->vj, 0);
4690
4691 ofs = ((a->imm) & 0xf) * 8;
4692 if (ofs < 64) {
4693 get_vreg64(high, a->vj, 1);
4694 tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
4695 tcg_gen_shli_i64(destlow, low, ofs);
4696 } else {
4697 tcg_gen_shli_i64(desthigh, low, ofs - 64);
4698 destlow = tcg_constant_i64(0);
4699 }
4700
4701 set_vreg64(desthigh, a->vd, 1);
4702 set_vreg64(destlow, a->vd, 0);
4703
4704 return true;
4705 }
4706
4707 static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
4708 {
4709 TCGv_i64 desthigh, destlow, high, low;
4710 int ofs;
4711
4712 if (!avail_LSX(ctx)) {
4713 return false;
4714 }
4715
4716 if (!check_vec(ctx, 16)) {
4717 return true;
4718 }
4719
4720 desthigh = tcg_temp_new_i64();
4721 destlow = tcg_temp_new_i64();
4722 high = tcg_temp_new_i64();
4723 low = tcg_temp_new_i64();
4724
4725 get_vreg64(high, a->vj, 1);
4726
4727 ofs = ((a->imm) & 0xf) * 8;
4728 if (ofs < 64) {
4729 get_vreg64(low, a->vj, 0);
4730 tcg_gen_extract2_i64(destlow, low, high, ofs);
4731 tcg_gen_shri_i64(desthigh, high, ofs);
4732 } else {
4733 tcg_gen_shri_i64(destlow, high, ofs - 64);
4734 desthigh = tcg_constant_i64(0);
4735 }
4736
4737 set_vreg64(desthigh, a->vd, 1);
4738 set_vreg64(destlow, a->vd, 0);
4739
4740 return true;
4741 }
4742
4743 TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
4744 TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
4745 TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
4746 TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
4747 TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
4748 TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
4749 TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
4750 TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
4751
4752 TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
4753 TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
4754 TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
4755 TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
4756 TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
4757 TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
4758 TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
4759 TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
4760
4761 TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
4762 TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
4763 TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
4764 TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
4765 TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
4766 TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
4767 TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
4768 TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
4769
4770 TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
4771 TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
4772 TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
4773 TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
4774 TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
4775 TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
4776 TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
4777 TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
4778
4779 TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
4780
4781 TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
4782 TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
4783 TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
4784 TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
4785
4786 static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
4787 {
4788 TCGv addr;
4789 TCGv_i64 rl, rh;
4790 TCGv_i128 val;
4791
4792 if (!avail_LSX(ctx)) {
4793 return false;
4794 }
4795
4796 if (!check_vec(ctx, 16)) {
4797 return true;
4798 }
4799
4800 addr = gpr_src(ctx, a->rj, EXT_NONE);
4801 val = tcg_temp_new_i128();
4802 rl = tcg_temp_new_i64();
4803 rh = tcg_temp_new_i64();
4804
4805 addr = make_address_i(ctx, addr, a->imm);
4806
4807 tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4808 tcg_gen_extr_i128_i64(rl, rh, val);
4809 set_vreg64(rh, a->vd, 1);
4810 set_vreg64(rl, a->vd, 0);
4811
4812 return true;
4813 }
4814
4815 static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
4816 {
4817 TCGv addr;
4818 TCGv_i128 val;
4819 TCGv_i64 ah, al;
4820
4821 if (!avail_LSX(ctx)) {
4822 return false;
4823 }
4824
4825 if (!check_vec(ctx, 16)) {
4826 return true;
4827 }
4828
4829 addr = gpr_src(ctx, a->rj, EXT_NONE);
4830 val = tcg_temp_new_i128();
4831 ah = tcg_temp_new_i64();
4832 al = tcg_temp_new_i64();
4833
4834 addr = make_address_i(ctx, addr, a->imm);
4835
4836 get_vreg64(ah, a->vd, 1);
4837 get_vreg64(al, a->vd, 0);
4838 tcg_gen_concat_i64_i128(val, al, ah);
4839 tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4840
4841 return true;
4842 }
4843
4844 static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
4845 {
4846 TCGv addr, src1, src2;
4847 TCGv_i64 rl, rh;
4848 TCGv_i128 val;
4849
4850 if (!avail_LSX(ctx)) {
4851 return false;
4852 }
4853
4854 if (!check_vec(ctx, 16)) {
4855 return true;
4856 }
4857
4858 src1 = gpr_src(ctx, a->rj, EXT_NONE);
4859 src2 = gpr_src(ctx, a->rk, EXT_NONE);
4860 val = tcg_temp_new_i128();
4861 rl = tcg_temp_new_i64();
4862 rh = tcg_temp_new_i64();
4863
4864 addr = make_address_x(ctx, src1, src2);
4865 tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4866 tcg_gen_extr_i128_i64(rl, rh, val);
4867 set_vreg64(rh, a->vd, 1);
4868 set_vreg64(rl, a->vd, 0);
4869
4870 return true;
4871 }
4872
4873 static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
4874 {
4875 TCGv addr, src1, src2;
4876 TCGv_i64 ah, al;
4877 TCGv_i128 val;
4878
4879 if (!avail_LSX(ctx)) {
4880 return false;
4881 }
4882
4883 if (!check_vec(ctx, 16)) {
4884 return true;
4885 }
4886
4887 src1 = gpr_src(ctx, a->rj, EXT_NONE);
4888 src2 = gpr_src(ctx, a->rk, EXT_NONE);
4889 val = tcg_temp_new_i128();
4890 ah = tcg_temp_new_i64();
4891 al = tcg_temp_new_i64();
4892
4893 addr = make_address_x(ctx, src1, src2);
4894 get_vreg64(ah, a->vd, 1);
4895 get_vreg64(al, a->vd, 0);
4896 tcg_gen_concat_i64_i128(val, al, ah);
4897 tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4898
4899 return true;
4900 }
4901
4902 #define VLDREPL(NAME, MO) \
4903 static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
4904 { \
4905 TCGv addr; \
4906 TCGv_i64 val; \
4907 \
4908 if (!avail_LSX(ctx)) { \
4909 return false; \
4910 } \
4911 \
4912 if (!check_vec(ctx, 16)) { \
4913 return true; \
4914 } \
4915 \
4916 addr = gpr_src(ctx, a->rj, EXT_NONE); \
4917 val = tcg_temp_new_i64(); \
4918 \
4919 addr = make_address_i(ctx, addr, a->imm); \
4920 \
4921 tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \
4922 tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
4923 \
4924 return true; \
4925 }
4926
4927 VLDREPL(vldrepl_b, MO_8)
4928 VLDREPL(vldrepl_h, MO_16)
4929 VLDREPL(vldrepl_w, MO_32)
4930 VLDREPL(vldrepl_d, MO_64)
4931
4932 #define VSTELM(NAME, MO, E) \
4933 static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
4934 { \
4935 TCGv addr; \
4936 TCGv_i64 val; \
4937 \
4938 if (!avail_LSX(ctx)) { \
4939 return false; \
4940 } \
4941 \
4942 if (!check_vec(ctx, 16)) { \
4943 return true; \
4944 } \
4945 \
4946 addr = gpr_src(ctx, a->rj, EXT_NONE); \
4947 val = tcg_temp_new_i64(); \
4948 \
4949 addr = make_address_i(ctx, addr, a->imm); \
4950 \
4951 tcg_gen_ld_i64(val, cpu_env, \
4952 offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
4953 tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \
4954 \
4955 return true; \
4956 }
4957
4958 VSTELM(vstelm_b, MO_8, B)
4959 VSTELM(vstelm_h, MO_16, H)
4960 VSTELM(vstelm_w, MO_32, W)
4961 VSTELM(vstelm_d, MO_64, D)