]> git.proxmox.com Git - mirror_qemu.git/blob - target/s390x/vec_int_helper.c
s390x/tcg: Implement VECTOR SHIFT RIGHT ARITHMETIC
[mirror_qemu.git] / target / s390x / vec_int_helper.c
1 /*
2 * QEMU TCG support -- s390x vector integer instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "vec.h"
16 #include "exec/helper-proto.h"
17 #include "tcg/tcg-gvec-desc.h"
18
19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21 return !v->doubleword[0] && !v->doubleword[1];
22 }
23
24 static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25 const S390Vector *b)
26 {
27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29 }
30
31 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
32 {
33 uint64_t tmp;
34
35 g_assert(count < 128);
36 if (count == 0) {
37 d->doubleword[0] = a->doubleword[0];
38 d->doubleword[1] = a->doubleword[1];
39 } else if (count == 64) {
40 d->doubleword[0] = a->doubleword[1];
41 d->doubleword[1] = 0;
42 } else if (count < 64) {
43 tmp = extract64(a->doubleword[1], 64 - count, count);
44 d->doubleword[1] = a->doubleword[1] << count;
45 d->doubleword[0] = (a->doubleword[0] << count) | tmp;
46 } else {
47 d->doubleword[0] = a->doubleword[1] << (count - 64);
48 d->doubleword[1] = 0;
49 }
50 }
51
52 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
53 {
54 uint64_t tmp;
55
56 if (count == 0) {
57 d->doubleword[0] = a->doubleword[0];
58 d->doubleword[1] = a->doubleword[1];
59 } else if (count == 64) {
60 d->doubleword[1] = a->doubleword[0];
61 d->doubleword[0] = 0;
62 } else if (count < 64) {
63 tmp = a->doubleword[1] >> count;
64 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
65 d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
66 } else {
67 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
68 d->doubleword[0] = 0;
69 }
70 }
71
72 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
73 {
74 uint64_t tmp;
75
76 g_assert(count < 128);
77 if (count == 0) {
78 d->doubleword[0] = a->doubleword[0];
79 d->doubleword[1] = a->doubleword[1];
80 } else if (count == 64) {
81 d->doubleword[1] = a->doubleword[0];
82 d->doubleword[0] = 0;
83 } else if (count < 64) {
84 tmp = a->doubleword[1] >> count;
85 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
86 d->doubleword[0] = a->doubleword[0] >> count;
87 } else {
88 d->doubleword[1] = a->doubleword[0] >> (count - 64);
89 d->doubleword[0] = 0;
90 }
91 }
92 #define DEF_VAVG(BITS) \
93 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
94 uint32_t desc) \
95 { \
96 int i; \
97 \
98 for (i = 0; i < (128 / BITS); i++) { \
99 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
100 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
101 \
102 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
103 } \
104 }
105 DEF_VAVG(8)
106 DEF_VAVG(16)
107
108 #define DEF_VAVGL(BITS) \
109 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
110 uint32_t desc) \
111 { \
112 int i; \
113 \
114 for (i = 0; i < (128 / BITS); i++) { \
115 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
116 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
117 \
118 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
119 } \
120 }
121 DEF_VAVGL(8)
122 DEF_VAVGL(16)
123
124 #define DEF_VCLZ(BITS) \
125 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
126 { \
127 int i; \
128 \
129 for (i = 0; i < (128 / BITS); i++) { \
130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
131 \
132 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
133 } \
134 }
135 DEF_VCLZ(8)
136 DEF_VCLZ(16)
137
138 #define DEF_VCTZ(BITS) \
139 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
140 { \
141 int i; \
142 \
143 for (i = 0; i < (128 / BITS); i++) { \
144 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
145 \
146 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
147 } \
148 }
149 DEF_VCTZ(8)
150 DEF_VCTZ(16)
151
152 /* like binary multiplication, but XOR instead of addition */
153 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \
154 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \
155 uint##TBITS##_t b) \
156 { \
157 uint##TBITS##_t res = 0; \
158 \
159 while (b) { \
160 if (b & 0x1) { \
161 res = res ^ a; \
162 } \
163 a = a << 1; \
164 b = b >> 1; \
165 } \
166 return res; \
167 }
168 DEF_GALOIS_MULTIPLY(8, 16)
169 DEF_GALOIS_MULTIPLY(16, 32)
170 DEF_GALOIS_MULTIPLY(32, 64)
171
172 static S390Vector galois_multiply64(uint64_t a, uint64_t b)
173 {
174 S390Vector res = {};
175 S390Vector va = {
176 .doubleword[1] = a,
177 };
178 S390Vector vb = {
179 .doubleword[1] = b,
180 };
181
182 while (!s390_vec_is_zero(&vb)) {
183 if (vb.doubleword[1] & 0x1) {
184 s390_vec_xor(&res, &res, &va);
185 }
186 s390_vec_shl(&va, &va, 1);
187 s390_vec_shr(&vb, &vb, 1);
188 }
189 return res;
190 }
191
192 #define DEF_VGFM(BITS, TBITS) \
193 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \
194 uint32_t desc) \
195 { \
196 int i; \
197 \
198 for (i = 0; i < (128 / TBITS); i++) { \
199 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
200 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
201 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
202 \
203 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
204 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
205 d = d ^ galois_multiply32(a, b); \
206 s390_vec_write_element##TBITS(v1, i, d); \
207 } \
208 }
209 DEF_VGFM(8, 16)
210 DEF_VGFM(16, 32)
211 DEF_VGFM(32, 64)
212
213 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
214 uint32_t desc)
215 {
216 S390Vector tmp1, tmp2;
217 uint64_t a, b;
218
219 a = s390_vec_read_element64(v2, 0);
220 b = s390_vec_read_element64(v3, 0);
221 tmp1 = galois_multiply64(a, b);
222 a = s390_vec_read_element64(v2, 1);
223 b = s390_vec_read_element64(v3, 1);
224 tmp2 = galois_multiply64(a, b);
225 s390_vec_xor(v1, &tmp1, &tmp2);
226 }
227
228 #define DEF_VGFMA(BITS, TBITS) \
229 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \
230 const void *v4, uint32_t desc) \
231 { \
232 int i; \
233 \
234 for (i = 0; i < (128 / TBITS); i++) { \
235 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
236 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
237 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
238 \
239 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
240 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
241 d = d ^ galois_multiply32(a, b); \
242 d = d ^ s390_vec_read_element##TBITS(v4, i); \
243 s390_vec_write_element##TBITS(v1, i, d); \
244 } \
245 }
246 DEF_VGFMA(8, 16)
247 DEF_VGFMA(16, 32)
248 DEF_VGFMA(32, 64)
249
250 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251 const void *v4, uint32_t desc)
252 {
253 S390Vector tmp1, tmp2;
254 uint64_t a, b;
255
256 a = s390_vec_read_element64(v2, 0);
257 b = s390_vec_read_element64(v3, 0);
258 tmp1 = galois_multiply64(a, b);
259 a = s390_vec_read_element64(v2, 1);
260 b = s390_vec_read_element64(v3, 1);
261 tmp2 = galois_multiply64(a, b);
262 s390_vec_xor(&tmp1, &tmp1, &tmp2);
263 s390_vec_xor(v1, &tmp1, v4);
264 }
265
266 #define DEF_VMAL(BITS) \
267 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
268 const void *v4, uint32_t desc) \
269 { \
270 int i; \
271 \
272 for (i = 0; i < (128 / BITS); i++) { \
273 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
274 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
275 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
276 \
277 s390_vec_write_element##BITS(v1, i, a * b + c); \
278 } \
279 }
280 DEF_VMAL(8)
281 DEF_VMAL(16)
282
283 #define DEF_VMAH(BITS) \
284 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
285 const void *v4, uint32_t desc) \
286 { \
287 int i; \
288 \
289 for (i = 0; i < (128 / BITS); i++) { \
290 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
291 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
292 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
293 \
294 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
295 } \
296 }
297 DEF_VMAH(8)
298 DEF_VMAH(16)
299
300 #define DEF_VMALH(BITS) \
301 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
302 const void *v4, uint32_t desc) \
303 { \
304 int i; \
305 \
306 for (i = 0; i < (128 / BITS); i++) { \
307 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
308 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
309 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
310 \
311 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
312 } \
313 }
314 DEF_VMALH(8)
315 DEF_VMALH(16)
316
317 #define DEF_VMAE(BITS, TBITS) \
318 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
319 const void *v4, uint32_t desc) \
320 { \
321 int i, j; \
322 \
323 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
324 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
325 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
326 int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
327 \
328 s390_vec_write_element##TBITS(v1, i, a * b + c); \
329 } \
330 }
331 DEF_VMAE(8, 16)
332 DEF_VMAE(16, 32)
333 DEF_VMAE(32, 64)
334
335 #define DEF_VMALE(BITS, TBITS) \
336 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
337 const void *v4, uint32_t desc) \
338 { \
339 int i, j; \
340 \
341 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
342 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
343 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
344 uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
345 \
346 s390_vec_write_element##TBITS(v1, i, a * b + c); \
347 } \
348 }
349 DEF_VMALE(8, 16)
350 DEF_VMALE(16, 32)
351 DEF_VMALE(32, 64)
352
353 #define DEF_VMAO(BITS, TBITS) \
354 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
355 const void *v4, uint32_t desc) \
356 { \
357 int i, j; \
358 \
359 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
360 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
361 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
362 int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
363 \
364 s390_vec_write_element##TBITS(v1, i, a * b + c); \
365 } \
366 }
367 DEF_VMAO(8, 16)
368 DEF_VMAO(16, 32)
369 DEF_VMAO(32, 64)
370
371 #define DEF_VMALO(BITS, TBITS) \
372 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
373 const void *v4, uint32_t desc) \
374 { \
375 int i, j; \
376 \
377 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
378 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
379 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
380 uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
381 \
382 s390_vec_write_element##TBITS(v1, i, a * b + c); \
383 } \
384 }
385 DEF_VMALO(8, 16)
386 DEF_VMALO(16, 32)
387 DEF_VMALO(32, 64)
388
389 #define DEF_VMH(BITS) \
390 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
391 uint32_t desc) \
392 { \
393 int i; \
394 \
395 for (i = 0; i < (128 / BITS); i++) { \
396 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
397 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
398 \
399 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
400 } \
401 }
402 DEF_VMH(8)
403 DEF_VMH(16)
404
405 #define DEF_VMLH(BITS) \
406 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
407 uint32_t desc) \
408 { \
409 int i; \
410 \
411 for (i = 0; i < (128 / BITS); i++) { \
412 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
413 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
414 \
415 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
416 } \
417 }
418 DEF_VMLH(8)
419 DEF_VMLH(16)
420
421 #define DEF_VME(BITS, TBITS) \
422 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
423 uint32_t desc) \
424 { \
425 int i, j; \
426 \
427 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
428 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
429 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
430 \
431 s390_vec_write_element##TBITS(v1, i, a * b); \
432 } \
433 }
434 DEF_VME(8, 16)
435 DEF_VME(16, 32)
436 DEF_VME(32, 64)
437
438 #define DEF_VMLE(BITS, TBITS) \
439 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
440 uint32_t desc) \
441 { \
442 int i, j; \
443 \
444 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
445 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
446 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
447 \
448 s390_vec_write_element##TBITS(v1, i, a * b); \
449 } \
450 }
451 DEF_VMLE(8, 16)
452 DEF_VMLE(16, 32)
453 DEF_VMLE(32, 64)
454
455 #define DEF_VMO(BITS, TBITS) \
456 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
457 uint32_t desc) \
458 { \
459 int i, j; \
460 \
461 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
462 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
463 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
464 \
465 s390_vec_write_element##TBITS(v1, i, a * b); \
466 } \
467 }
468 DEF_VMO(8, 16)
469 DEF_VMO(16, 32)
470 DEF_VMO(32, 64)
471
472 #define DEF_VMLO(BITS, TBITS) \
473 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
474 uint32_t desc) \
475 { \
476 int i, j; \
477 \
478 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
479 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
480 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
481 \
482 s390_vec_write_element##TBITS(v1, i, a * b); \
483 } \
484 }
485 DEF_VMLO(8, 16)
486 DEF_VMLO(16, 32)
487 DEF_VMLO(32, 64)
488
489 #define DEF_VPOPCT(BITS) \
490 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
491 { \
492 int i; \
493 \
494 for (i = 0; i < (128 / BITS); i++) { \
495 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
496 \
497 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
498 } \
499 }
500 DEF_VPOPCT(8)
501 DEF_VPOPCT(16)
502
503 #define DEF_VERLLV(BITS) \
504 void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
505 uint32_t desc) \
506 { \
507 int i; \
508 \
509 for (i = 0; i < (128 / BITS); i++) { \
510 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
511 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
512 \
513 s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
514 } \
515 }
516 DEF_VERLLV(8)
517 DEF_VERLLV(16)
518
519 #define DEF_VERLL(BITS) \
520 void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
521 uint32_t desc) \
522 { \
523 int i; \
524 \
525 for (i = 0; i < (128 / BITS); i++) { \
526 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
527 \
528 s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
529 } \
530 }
531 DEF_VERLL(8)
532 DEF_VERLL(16)
533
534 #define DEF_VERIM(BITS) \
535 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
536 uint32_t desc) \
537 { \
538 const uint8_t count = simd_data(desc); \
539 int i; \
540 \
541 for (i = 0; i < (128 / BITS); i++) { \
542 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
543 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
544 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
545 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
546 \
547 s390_vec_write_element##BITS(v1, i, d); \
548 } \
549 }
550 DEF_VERIM(8)
551 DEF_VERIM(16)
552
553 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
554 uint32_t desc)
555 {
556 s390_vec_shl(v1, v2, count);
557 }
558
559 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
560 uint32_t desc)
561 {
562 s390_vec_sar(v1, v2, count);
563 }