]>
git.proxmox.com Git - mirror_qemu.git/blob - target/s390x/vec_int_helper.c
2 * QEMU TCG support -- s390x vector integer instruction support
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
16 #include "exec/helper-proto.h"
17 #include "tcg/tcg-gvec-desc.h"
19 static bool s390_vec_is_zero(const S390Vector
*v
)
21 return !v
->doubleword
[0] && !v
->doubleword
[1];
24 static void s390_vec_xor(S390Vector
*res
, const S390Vector
*a
,
27 res
->doubleword
[0] = a
->doubleword
[0] ^ b
->doubleword
[0];
28 res
->doubleword
[1] = a
->doubleword
[1] ^ b
->doubleword
[1];
31 static void s390_vec_shl(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
35 g_assert(count
< 128);
37 d
->doubleword
[0] = a
->doubleword
[0];
38 d
->doubleword
[1] = a
->doubleword
[1];
39 } else if (count
== 64) {
40 d
->doubleword
[0] = a
->doubleword
[1];
42 } else if (count
< 64) {
43 tmp
= extract64(a
->doubleword
[1], 64 - count
, count
);
44 d
->doubleword
[1] = a
->doubleword
[1] << count
;
45 d
->doubleword
[0] = (a
->doubleword
[0] << count
) | tmp
;
47 d
->doubleword
[0] = a
->doubleword
[1] << (count
- 64);
52 static void s390_vec_sar(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
57 d
->doubleword
[0] = a
->doubleword
[0];
58 d
->doubleword
[1] = a
->doubleword
[1];
59 } else if (count
== 64) {
60 d
->doubleword
[1] = a
->doubleword
[0];
62 } else if (count
< 64) {
63 tmp
= a
->doubleword
[1] >> count
;
64 d
->doubleword
[1] = deposit64(tmp
, 64 - count
, count
, a
->doubleword
[0]);
65 d
->doubleword
[0] = (int64_t)a
->doubleword
[0] >> count
;
67 d
->doubleword
[1] = (int64_t)a
->doubleword
[0] >> (count
- 64);
72 static void s390_vec_shr(S390Vector
*d
, const S390Vector
*a
, uint64_t count
)
76 g_assert(count
< 128);
78 d
->doubleword
[0] = a
->doubleword
[0];
79 d
->doubleword
[1] = a
->doubleword
[1];
80 } else if (count
== 64) {
81 d
->doubleword
[1] = a
->doubleword
[0];
83 } else if (count
< 64) {
84 tmp
= a
->doubleword
[1] >> count
;
85 d
->doubleword
[1] = deposit64(tmp
, 64 - count
, count
, a
->doubleword
[0]);
86 d
->doubleword
[0] = a
->doubleword
[0] >> count
;
88 d
->doubleword
[1] = a
->doubleword
[0] >> (count
- 64);
92 #define DEF_VAVG(BITS) \
93 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
98 for (i = 0; i < (128 / BITS); i++) { \
99 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
100 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
102 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
108 #define DEF_VAVGL(BITS) \
109 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
114 for (i = 0; i < (128 / BITS); i++) { \
115 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
116 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
118 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
124 #define DEF_VCLZ(BITS) \
125 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
129 for (i = 0; i < (128 / BITS); i++) { \
130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
132 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
138 #define DEF_VCTZ(BITS) \
139 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
143 for (i = 0; i < (128 / BITS); i++) { \
144 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
146 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
152 /* like binary multiplication, but XOR instead of addition */
153 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \
154 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \
157 uint##TBITS##_t res = 0; \
168 DEF_GALOIS_MULTIPLY(8, 16)
169 DEF_GALOIS_MULTIPLY(16, 32)
170 DEF_GALOIS_MULTIPLY(32, 64)
172 static S390Vector
galois_multiply64(uint64_t a
, uint64_t b
)
182 while (!s390_vec_is_zero(&vb
)) {
183 if (vb
.doubleword
[1] & 0x1) {
184 s390_vec_xor(&res
, &res
, &va
);
186 s390_vec_shl(&va
, &va
, 1);
187 s390_vec_shr(&vb
, &vb
, 1);
192 #define DEF_VGFM(BITS, TBITS) \
193 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \
198 for (i = 0; i < (128 / TBITS); i++) { \
199 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
200 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
201 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
203 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
204 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
205 d = d ^ galois_multiply32(a, b); \
206 s390_vec_write_element##TBITS(v1, i, d); \
213 void HELPER(gvec_vgfm64
)(void *v1
, const void *v2
, const void *v3
,
216 S390Vector tmp1
, tmp2
;
219 a
= s390_vec_read_element64(v2
, 0);
220 b
= s390_vec_read_element64(v3
, 0);
221 tmp1
= galois_multiply64(a
, b
);
222 a
= s390_vec_read_element64(v2
, 1);
223 b
= s390_vec_read_element64(v3
, 1);
224 tmp2
= galois_multiply64(a
, b
);
225 s390_vec_xor(v1
, &tmp1
, &tmp2
);
228 #define DEF_VGFMA(BITS, TBITS) \
229 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \
230 const void *v4, uint32_t desc) \
234 for (i = 0; i < (128 / TBITS); i++) { \
235 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
236 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
237 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
239 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
240 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
241 d = d ^ galois_multiply32(a, b); \
242 d = d ^ s390_vec_read_element##TBITS(v4, i); \
243 s390_vec_write_element##TBITS(v1, i, d); \
250 void HELPER(gvec_vgfma64
)(void *v1
, const void *v2
, const void *v3
,
251 const void *v4
, uint32_t desc
)
253 S390Vector tmp1
, tmp2
;
256 a
= s390_vec_read_element64(v2
, 0);
257 b
= s390_vec_read_element64(v3
, 0);
258 tmp1
= galois_multiply64(a
, b
);
259 a
= s390_vec_read_element64(v2
, 1);
260 b
= s390_vec_read_element64(v3
, 1);
261 tmp2
= galois_multiply64(a
, b
);
262 s390_vec_xor(&tmp1
, &tmp1
, &tmp2
);
263 s390_vec_xor(v1
, &tmp1
, v4
);
266 #define DEF_VMAL(BITS) \
267 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
268 const void *v4, uint32_t desc) \
272 for (i = 0; i < (128 / BITS); i++) { \
273 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
274 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
275 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
277 s390_vec_write_element##BITS(v1, i, a * b + c); \
283 #define DEF_VMAH(BITS) \
284 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
285 const void *v4, uint32_t desc) \
289 for (i = 0; i < (128 / BITS); i++) { \
290 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
291 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
292 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
294 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
300 #define DEF_VMALH(BITS) \
301 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
302 const void *v4, uint32_t desc) \
306 for (i = 0; i < (128 / BITS); i++) { \
307 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
308 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
309 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
311 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
317 #define DEF_VMAE(BITS, TBITS) \
318 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
319 const void *v4, uint32_t desc) \
323 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
324 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
325 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
326 int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
328 s390_vec_write_element##TBITS(v1, i, a * b + c); \
335 #define DEF_VMALE(BITS, TBITS) \
336 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
337 const void *v4, uint32_t desc) \
341 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
342 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
343 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
344 uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
346 s390_vec_write_element##TBITS(v1, i, a * b + c); \
353 #define DEF_VMAO(BITS, TBITS) \
354 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
355 const void *v4, uint32_t desc) \
359 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
360 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
361 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
362 int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
364 s390_vec_write_element##TBITS(v1, i, a * b + c); \
371 #define DEF_VMALO(BITS, TBITS) \
372 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
373 const void *v4, uint32_t desc) \
377 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
378 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
379 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
380 uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
382 s390_vec_write_element##TBITS(v1, i, a * b + c); \
389 #define DEF_VMH(BITS) \
390 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
395 for (i = 0; i < (128 / BITS); i++) { \
396 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
397 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
399 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
405 #define DEF_VMLH(BITS) \
406 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
411 for (i = 0; i < (128 / BITS); i++) { \
412 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
413 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
415 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
421 #define DEF_VME(BITS, TBITS) \
422 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
427 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
428 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
429 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
431 s390_vec_write_element##TBITS(v1, i, a * b); \
438 #define DEF_VMLE(BITS, TBITS) \
439 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
444 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
445 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
446 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
448 s390_vec_write_element##TBITS(v1, i, a * b); \
455 #define DEF_VMO(BITS, TBITS) \
456 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
461 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
462 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
463 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
465 s390_vec_write_element##TBITS(v1, i, a * b); \
472 #define DEF_VMLO(BITS, TBITS) \
473 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
478 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
479 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
480 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
482 s390_vec_write_element##TBITS(v1, i, a * b); \
489 #define DEF_VPOPCT(BITS) \
490 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
494 for (i = 0; i < (128 / BITS); i++) { \
495 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
497 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
503 #define DEF_VERLLV(BITS) \
504 void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
509 for (i = 0; i < (128 / BITS); i++) { \
510 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
511 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
513 s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
519 #define DEF_VERLL(BITS) \
520 void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
525 for (i = 0; i < (128 / BITS); i++) { \
526 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
528 s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
534 #define DEF_VERIM(BITS) \
535 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
538 const uint8_t count = simd_data(desc); \
541 for (i = 0; i < (128 / BITS); i++) { \
542 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
543 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
544 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
545 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
547 s390_vec_write_element##BITS(v1, i, d); \
553 void HELPER(gvec_vsl
)(void *v1
, const void *v2
, uint64_t count
,
556 s390_vec_shl(v1
, v2
, count
);
559 void HELPER(gvec_vsra
)(void *v1
, const void *v2
, uint64_t count
,
562 s390_vec_sar(v1
, v2
, count
);