]>
git.proxmox.com Git - mirror_qemu.git/blob - target/s390x/tcg/vec_int_helper.c
2 * QEMU TCG support -- s390x vector integer instruction support
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
18 static bool s390_vec_is_zero(const S390Vector
*v
)
20 return !v
->doubleword
[0] && !v
->doubleword
[1];
23 static void s390_vec_xor(S390Vector
*res
, const S390Vector
*a
,
26 res
->doubleword
[0] = a
->doubleword
[0] ^ b
->doubleword
[0];
27 res
->doubleword
[1] = a
->doubleword
[1] ^ b
->doubleword
[1];
30 static void s390_vec_and(S390Vector
*res
, const S390Vector
*a
,
33 res
->doubleword
[0] = a
->doubleword
[0] & b
->doubleword
[0];
34 res
->doubleword
[1] = a
->doubleword
[1] & b
->doubleword
[1];
37 static bool s390_vec_equal(const S390Vector
*a
, const S390Vector
*b
)
39 return a
->doubleword
[0] == b
->doubleword
[0] &&
40 a
->doubleword
[1] == b
->doubleword
[1];
43 static void s390_vec_shl(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
47 g_assert(count
< 128);
49 d
->doubleword
[0] = a
->doubleword
[0];
50 d
->doubleword
[1] = a
->doubleword
[1];
51 } else if (count
== 64) {
52 d
->doubleword
[0] = a
->doubleword
[1];
54 } else if (count
< 64) {
55 tmp
= extract64(a
->doubleword
[1], 64 - count
, count
);
56 d
->doubleword
[1] = a
->doubleword
[1] << count
;
57 d
->doubleword
[0] = (a
->doubleword
[0] << count
) | tmp
;
59 d
->doubleword
[0] = a
->doubleword
[1] << (count
- 64);
64 static void s390_vec_sar(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
69 d
->doubleword
[0] = a
->doubleword
[0];
70 d
->doubleword
[1] = a
->doubleword
[1];
71 } else if (count
== 64) {
72 tmp
= (int64_t)a
->doubleword
[0] >> 63;
73 d
->doubleword
[1] = a
->doubleword
[0];
74 d
->doubleword
[0] = tmp
;
75 } else if (count
< 64) {
76 tmp
= a
->doubleword
[1] >> count
;
77 d
->doubleword
[1] = deposit64(tmp
, 64 - count
, count
, a
->doubleword
[0]);
78 d
->doubleword
[0] = (int64_t)a
->doubleword
[0] >> count
;
80 tmp
= (int64_t)a
->doubleword
[0] >> 63;
81 d
->doubleword
[1] = (int64_t)a
->doubleword
[0] >> (count
- 64);
82 d
->doubleword
[0] = tmp
;
86 static void s390_vec_shr(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
90 g_assert(count
< 128);
92 d
->doubleword
[0] = a
->doubleword
[0];
93 d
->doubleword
[1] = a
->doubleword
[1];
94 } else if (count
== 64) {
95 d
->doubleword
[1] = a
->doubleword
[0];
97 } else if (count
< 64) {
98 tmp
= a
->doubleword
[1] >> count
;
99 d
->doubleword
[1] = deposit64(tmp
, 64 - count
, count
, a
->doubleword
[0]);
100 d
->doubleword
[0] = a
->doubleword
[0] >> count
;
102 d
->doubleword
[1] = a
->doubleword
[0] >> (count
- 64);
103 d
->doubleword
[0] = 0;
106 #define DEF_VAVG(BITS) \
107 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
112 for (i = 0; i < (128 / BITS); i++) { \
113 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
114 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
116 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
122 #define DEF_VAVGL(BITS) \
123 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
128 for (i = 0; i < (128 / BITS); i++) { \
129 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
130 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
132 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
138 #define DEF_VCLZ(BITS) \
139 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
143 for (i = 0; i < (128 / BITS); i++) { \
144 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
146 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
152 #define DEF_VCTZ(BITS) \
153 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
157 for (i = 0; i < (128 / BITS); i++) { \
158 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
160 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
166 /* like binary multiplication, but XOR instead of addition */
167 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \
168 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \
171 uint##TBITS##_t res = 0; \
182 DEF_GALOIS_MULTIPLY(8, 16)
183 DEF_GALOIS_MULTIPLY(16, 32)
184 DEF_GALOIS_MULTIPLY(32, 64)
186 static S390Vector
galois_multiply64(uint64_t a
, uint64_t b
)
196 while (!s390_vec_is_zero(&vb
)) {
197 if (vb
.doubleword
[1] & 0x1) {
198 s390_vec_xor(&res
, &res
, &va
);
200 s390_vec_shl(&va
, &va
, 1);
201 s390_vec_shr(&vb
, &vb
, 1);
206 #define DEF_VGFM(BITS, TBITS) \
207 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \
212 for (i = 0; i < (128 / TBITS); i++) { \
213 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
214 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
215 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
217 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
218 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
219 d = d ^ galois_multiply32(a, b); \
220 s390_vec_write_element##TBITS(v1, i, d); \
227 void HELPER(gvec_vgfm64
)(void *v1
, const void *v2
, const void *v3
,
230 S390Vector tmp1
, tmp2
;
233 a
= s390_vec_read_element64(v2
, 0);
234 b
= s390_vec_read_element64(v3
, 0);
235 tmp1
= galois_multiply64(a
, b
);
236 a
= s390_vec_read_element64(v2
, 1);
237 b
= s390_vec_read_element64(v3
, 1);
238 tmp2
= galois_multiply64(a
, b
);
239 s390_vec_xor(v1
, &tmp1
, &tmp2
);
242 #define DEF_VGFMA(BITS, TBITS) \
243 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \
244 const void *v4, uint32_t desc) \
248 for (i = 0; i < (128 / TBITS); i++) { \
249 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
250 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
251 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
253 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
254 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
255 d = d ^ galois_multiply32(a, b); \
256 d = d ^ s390_vec_read_element##TBITS(v4, i); \
257 s390_vec_write_element##TBITS(v1, i, d); \
264 void HELPER(gvec_vgfma64
)(void *v1
, const void *v2
, const void *v3
,
265 const void *v4
, uint32_t desc
)
267 S390Vector tmp1
, tmp2
;
270 a
= s390_vec_read_element64(v2
, 0);
271 b
= s390_vec_read_element64(v3
, 0);
272 tmp1
= galois_multiply64(a
, b
);
273 a
= s390_vec_read_element64(v2
, 1);
274 b
= s390_vec_read_element64(v3
, 1);
275 tmp2
= galois_multiply64(a
, b
);
276 s390_vec_xor(&tmp1
, &tmp1
, &tmp2
);
277 s390_vec_xor(v1
, &tmp1
, v4
);
280 #define DEF_VMAL(BITS) \
281 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
282 const void *v4, uint32_t desc) \
286 for (i = 0; i < (128 / BITS); i++) { \
287 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
288 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
289 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
291 s390_vec_write_element##BITS(v1, i, a * b + c); \
297 #define DEF_VMAH(BITS) \
298 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
299 const void *v4, uint32_t desc) \
303 for (i = 0; i < (128 / BITS); i++) { \
304 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
305 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
306 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
308 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
314 #define DEF_VMALH(BITS) \
315 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
316 const void *v4, uint32_t desc) \
320 for (i = 0; i < (128 / BITS); i++) { \
321 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
322 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
323 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
325 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
331 #define DEF_VMAE(BITS, TBITS) \
332 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
333 const void *v4, uint32_t desc) \
337 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
338 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
339 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
340 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
342 s390_vec_write_element##TBITS(v1, i, a * b + c); \
349 #define DEF_VMALE(BITS, TBITS) \
350 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
351 const void *v4, uint32_t desc) \
355 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
356 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
357 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
358 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
360 s390_vec_write_element##TBITS(v1, i, a * b + c); \
367 #define DEF_VMAO(BITS, TBITS) \
368 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
369 const void *v4, uint32_t desc) \
373 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
374 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
375 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
376 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
378 s390_vec_write_element##TBITS(v1, i, a * b + c); \
385 #define DEF_VMALO(BITS, TBITS) \
386 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
387 const void *v4, uint32_t desc) \
391 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
392 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
393 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
394 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
396 s390_vec_write_element##TBITS(v1, i, a * b + c); \
403 #define DEF_VMH(BITS) \
404 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
409 for (i = 0; i < (128 / BITS); i++) { \
410 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
411 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
413 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
419 #define DEF_VMLH(BITS) \
420 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
425 for (i = 0; i < (128 / BITS); i++) { \
426 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
427 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
429 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
435 #define DEF_VME(BITS, TBITS) \
436 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
441 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
442 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
443 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
445 s390_vec_write_element##TBITS(v1, i, a * b); \
452 #define DEF_VMLE(BITS, TBITS) \
453 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
458 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
459 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
460 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
462 s390_vec_write_element##TBITS(v1, i, a * b); \
469 #define DEF_VMO(BITS, TBITS) \
470 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
475 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
476 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
477 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
479 s390_vec_write_element##TBITS(v1, i, a * b); \
486 #define DEF_VMLO(BITS, TBITS) \
487 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
492 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
493 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
494 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
496 s390_vec_write_element##TBITS(v1, i, a * b); \
503 #define DEF_VPOPCT(BITS) \
504 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
508 for (i = 0; i < (128 / BITS); i++) { \
509 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
511 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
517 #define DEF_VERIM(BITS) \
518 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
521 const uint8_t count = simd_data(desc); \
524 for (i = 0; i < (128 / BITS); i++) { \
525 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
526 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
527 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
528 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
530 s390_vec_write_element##BITS(v1, i, d); \
536 void HELPER(gvec_vsl
)(void *v1
, const void *v2
, uint64_t count
,
539 s390_vec_shl(v1
, v2
, count
);
542 void HELPER(gvec_vsl_ve2
)(void *v1
, const void *v2
, const void *v3
,
546 uint32_t sh
, e0
, e1
= 0;
549 for (i
= 15; i
>= 0; --i
, e1
= e0
) {
550 e0
= s390_vec_read_element8(v2
, i
);
551 sh
= s390_vec_read_element8(v3
, i
) & 7;
553 s390_vec_write_element8(&tmp
, i
, rol32(e0
| (e1
<< 24), sh
));
556 *(S390Vector
*)v1
= tmp
;
559 void HELPER(gvec_vsra
)(void *v1
, const void *v2
, uint64_t count
,
562 s390_vec_sar(v1
, v2
, count
);
565 void HELPER(gvec_vsra_ve2
)(void *v1
, const void *v2
, const void *v3
,
569 uint32_t sh
, e0
, e1
= 0;
572 /* Byte 0 is special only. */
573 e0
= (int32_t)(int8_t)s390_vec_read_element8(v2
, i
);
574 sh
= s390_vec_read_element8(v3
, i
) & 7;
575 s390_vec_write_element8(&tmp
, i
, e0
>> sh
);
578 for (i
= 1; i
< 16; ++i
, e1
= e0
) {
579 e0
= s390_vec_read_element8(v2
, i
);
580 sh
= s390_vec_read_element8(v3
, i
) & 7;
581 s390_vec_write_element8(&tmp
, i
, (e0
| e1
<< 8) >> sh
);
584 *(S390Vector
*)v1
= tmp
;
587 void HELPER(gvec_vsrl
)(void *v1
, const void *v2
, uint64_t count
,
590 s390_vec_shr(v1
, v2
, count
);
593 void HELPER(gvec_vsrl_ve2
)(void *v1
, const void *v2
, const void *v3
,
597 uint32_t sh
, e0
, e1
= 0;
599 for (int i
= 0; i
< 16; ++i
, e1
= e0
) {
600 e0
= s390_vec_read_element8(v2
, i
);
601 sh
= s390_vec_read_element8(v3
, i
) & 7;
603 s390_vec_write_element8(&tmp
, i
, (e0
| (e1
<< 8)) >> sh
);
606 *(S390Vector
*)v1
= tmp
;
609 #define DEF_VSCBI(BITS) \
610 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \
615 for (i = 0; i < (128 / BITS); i++) { \
616 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
617 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
619 s390_vec_write_element##BITS(v1, i, a >= b); \
625 void HELPER(gvec_vtm
)(void *v1
, const void *v2
, CPUS390XState
*env
,
630 s390_vec_and(&tmp
, v1
, v2
);
631 if (s390_vec_is_zero(&tmp
)) {
632 /* Selected bits all zeros; or all mask bits zero */
634 } else if (s390_vec_equal(&tmp
, v2
)) {
635 /* Selected bits all ones */
638 /* Selected bits a mix of zeros and ones */