]> git.proxmox.com Git - mirror_qemu.git/blame - target/s390x/tcg/vec_int_helper.c
target/s390x: Use clmul_8* routines
[mirror_qemu.git] / target / s390x / tcg / vec_int_helper.c
CommitLineData
c1a81d4b
DH
1/*
2 * QEMU TCG support -- s390x vector integer instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12#include "qemu/osdep.h"
c1a81d4b
DH
13#include "cpu.h"
14#include "vec.h"
15#include "exec/helper-proto.h"
5c4b0ab4 16#include "tcg/tcg-gvec-desc.h"
2d8bc681 17#include "crypto/clmul.h"
c1a81d4b 18
697a45d6
DH
19static bool s390_vec_is_zero(const S390Vector *v)
20{
21 return !v->doubleword[0] && !v->doubleword[1];
22}
23
24static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25 const S390Vector *b)
26{
27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29}
30
db156ebf
DH
31static void s390_vec_and(S390Vector *res, const S390Vector *a,
32 const S390Vector *b)
33{
34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36}
37
38static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39{
40 return a->doubleword[0] == b->doubleword[0] &&
41 a->doubleword[1] == b->doubleword[1];
42}
43
697a45d6
DH
44static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45{
46 uint64_t tmp;
47
48 g_assert(count < 128);
49 if (count == 0) {
50 d->doubleword[0] = a->doubleword[0];
51 d->doubleword[1] = a->doubleword[1];
52 } else if (count == 64) {
53 d->doubleword[0] = a->doubleword[1];
54 d->doubleword[1] = 0;
55 } else if (count < 64) {
56 tmp = extract64(a->doubleword[1], 64 - count, count);
57 d->doubleword[1] = a->doubleword[1] << count;
58 d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59 } else {
60 d->doubleword[0] = a->doubleword[1] << (count - 64);
61 d->doubleword[1] = 0;
62 }
63}
64
5f724887
DH
65static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
66{
67 uint64_t tmp;
68
69 if (count == 0) {
70 d->doubleword[0] = a->doubleword[0];
71 d->doubleword[1] = a->doubleword[1];
72 } else if (count == 64) {
b57b3368 73 tmp = (int64_t)a->doubleword[0] >> 63;
5f724887 74 d->doubleword[1] = a->doubleword[0];
b57b3368 75 d->doubleword[0] = tmp;
5f724887
DH
76 } else if (count < 64) {
77 tmp = a->doubleword[1] >> count;
78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
80 } else {
b57b3368 81 tmp = (int64_t)a->doubleword[0] >> 63;
5f724887 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
b57b3368 83 d->doubleword[0] = tmp;
5f724887
DH
84 }
85}
86
697a45d6
DH
87static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88{
89 uint64_t tmp;
90
91 g_assert(count < 128);
92 if (count == 0) {
93 d->doubleword[0] = a->doubleword[0];
94 d->doubleword[1] = a->doubleword[1];
95 } else if (count == 64) {
96 d->doubleword[1] = a->doubleword[0];
97 d->doubleword[0] = 0;
98 } else if (count < 64) {
99 tmp = a->doubleword[1] >> count;
100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101 d->doubleword[0] = a->doubleword[0] >> count;
102 } else {
103 d->doubleword[1] = a->doubleword[0] >> (count - 64);
104 d->doubleword[0] = 0;
105 }
106}
c1a81d4b
DH
107#define DEF_VAVG(BITS) \
108void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
109 uint32_t desc) \
110{ \
111 int i; \
112 \
113 for (i = 0; i < (128 / BITS); i++) { \
114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
116 \
117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
118 } \
119}
120DEF_VAVG(8)
121DEF_VAVG(16)
801aa78b
DH
122
123#define DEF_VAVGL(BITS) \
124void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
125 uint32_t desc) \
126{ \
127 int i; \
128 \
129 for (i = 0; i < (128 / BITS); i++) { \
130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
132 \
133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
134 } \
135}
136DEF_VAVGL(8)
137DEF_VAVGL(16)
28863f1d
DH
138
139#define DEF_VCLZ(BITS) \
140void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
141{ \
142 int i; \
143 \
144 for (i = 0; i < (128 / BITS); i++) { \
145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
146 \
147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
148 } \
149}
150DEF_VCLZ(8)
151DEF_VCLZ(16)
449a8ac2
DH
152
153#define DEF_VCTZ(BITS) \
154void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
155{ \
156 int i; \
157 \
158 for (i = 0; i < (128 / BITS); i++) { \
159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
160 \
161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
162 } \
163}
164DEF_VCTZ(8)
165DEF_VCTZ(16)
697a45d6
DH
166
167/* like binary multiplication, but XOR instead of addition */
168#define DEF_GALOIS_MULTIPLY(BITS, TBITS) \
169static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \
170 uint##TBITS##_t b) \
171{ \
172 uint##TBITS##_t res = 0; \
173 \
174 while (b) { \
175 if (b & 0x1) { \
176 res = res ^ a; \
177 } \
178 a = a << 1; \
179 b = b >> 1; \
180 } \
181 return res; \
182}
697a45d6
DH
183DEF_GALOIS_MULTIPLY(16, 32)
184DEF_GALOIS_MULTIPLY(32, 64)
185
186static S390Vector galois_multiply64(uint64_t a, uint64_t b)
187{
188 S390Vector res = {};
189 S390Vector va = {
190 .doubleword[1] = a,
191 };
192 S390Vector vb = {
193 .doubleword[1] = b,
194 };
195
196 while (!s390_vec_is_zero(&vb)) {
197 if (vb.doubleword[1] & 0x1) {
198 s390_vec_xor(&res, &res, &va);
199 }
200 s390_vec_shl(&va, &va, 1);
201 s390_vec_shr(&vb, &vb, 1);
202 }
203 return res;
204}
205
2d8bc681
RH
206/*
207 * There is no carry across the two doublewords, so their order does
208 * not matter. Nor is there partial overlap between registers.
209 */
210static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
211{
212 return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
213}
214
215void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
216{
217 uint64_t *q1 = v1;
218 const uint64_t *q2 = v2, *q3 = v3;
219
220 q1[0] = do_gfma8(q2[0], q3[0], 0);
221 q1[1] = do_gfma8(q2[1], q3[1], 0);
222}
223
224void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
225 const void *v4, uint32_t desc)
226{
227 uint64_t *q1 = v1;
228 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
229
230 q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
231 q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
232}
233
697a45d6
DH
234#define DEF_VGFM(BITS, TBITS) \
235void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \
236 uint32_t desc) \
237{ \
238 int i; \
239 \
240 for (i = 0; i < (128 / TBITS); i++) { \
241 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
242 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
243 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
244 \
245 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
246 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
247 d = d ^ galois_multiply32(a, b); \
248 s390_vec_write_element##TBITS(v1, i, d); \
249 } \
250}
697a45d6
DH
251DEF_VGFM(16, 32)
252DEF_VGFM(32, 64)
253
254void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
255 uint32_t desc)
256{
257 S390Vector tmp1, tmp2;
258 uint64_t a, b;
259
260 a = s390_vec_read_element64(v2, 0);
261 b = s390_vec_read_element64(v3, 0);
262 tmp1 = galois_multiply64(a, b);
263 a = s390_vec_read_element64(v2, 1);
264 b = s390_vec_read_element64(v3, 1);
265 tmp2 = galois_multiply64(a, b);
266 s390_vec_xor(v1, &tmp1, &tmp2);
267}
268
269#define DEF_VGFMA(BITS, TBITS) \
270void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \
271 const void *v4, uint32_t desc) \
272{ \
273 int i; \
274 \
275 for (i = 0; i < (128 / TBITS); i++) { \
276 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
277 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
278 uint##TBITS##_t d = galois_multiply##BITS(a, b); \
279 \
280 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
281 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
282 d = d ^ galois_multiply32(a, b); \
283 d = d ^ s390_vec_read_element##TBITS(v4, i); \
284 s390_vec_write_element##TBITS(v1, i, d); \
285 } \
286}
697a45d6
DH
287DEF_VGFMA(16, 32)
288DEF_VGFMA(32, 64)
289
290void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
291 const void *v4, uint32_t desc)
292{
293 S390Vector tmp1, tmp2;
294 uint64_t a, b;
295
296 a = s390_vec_read_element64(v2, 0);
297 b = s390_vec_read_element64(v3, 0);
298 tmp1 = galois_multiply64(a, b);
299 a = s390_vec_read_element64(v2, 1);
300 b = s390_vec_read_element64(v3, 1);
301 tmp2 = galois_multiply64(a, b);
302 s390_vec_xor(&tmp1, &tmp1, &tmp2);
303 s390_vec_xor(v1, &tmp1, v4);
304}
1b430aec
DH
305
306#define DEF_VMAL(BITS) \
307void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
308 const void *v4, uint32_t desc) \
309{ \
310 int i; \
311 \
312 for (i = 0; i < (128 / BITS); i++) { \
313 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
314 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
315 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
316 \
317 s390_vec_write_element##BITS(v1, i, a * b + c); \
318 } \
319}
320DEF_VMAL(8)
321DEF_VMAL(16)
322
323#define DEF_VMAH(BITS) \
324void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
325 const void *v4, uint32_t desc) \
326{ \
327 int i; \
328 \
329 for (i = 0; i < (128 / BITS); i++) { \
330 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
331 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
332 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
333 \
334 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
335 } \
336}
337DEF_VMAH(8)
338DEF_VMAH(16)
339
340#define DEF_VMALH(BITS) \
341void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
342 const void *v4, uint32_t desc) \
343{ \
344 int i; \
345 \
346 for (i = 0; i < (128 / BITS); i++) { \
347 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
348 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
349 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
350 \
351 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
352 } \
353}
354DEF_VMALH(8)
355DEF_VMALH(16)
356
357#define DEF_VMAE(BITS, TBITS) \
358void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
359 const void *v4, uint32_t desc) \
360{ \
361 int i, j; \
362 \
363 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
364 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
365 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
8b952519 366 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
1b430aec
DH
367 \
368 s390_vec_write_element##TBITS(v1, i, a * b + c); \
369 } \
370}
371DEF_VMAE(8, 16)
372DEF_VMAE(16, 32)
373DEF_VMAE(32, 64)
374
375#define DEF_VMALE(BITS, TBITS) \
376void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
377 const void *v4, uint32_t desc) \
378{ \
379 int i, j; \
380 \
381 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
382 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
383 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
8b952519 384 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
1b430aec
DH
385 \
386 s390_vec_write_element##TBITS(v1, i, a * b + c); \
387 } \
388}
389DEF_VMALE(8, 16)
390DEF_VMALE(16, 32)
391DEF_VMALE(32, 64)
392
393#define DEF_VMAO(BITS, TBITS) \
394void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
395 const void *v4, uint32_t desc) \
396{ \
397 int i, j; \
398 \
399 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
400 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
401 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
8b952519 402 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
1b430aec
DH
403 \
404 s390_vec_write_element##TBITS(v1, i, a * b + c); \
405 } \
406}
407DEF_VMAO(8, 16)
408DEF_VMAO(16, 32)
409DEF_VMAO(32, 64)
410
411#define DEF_VMALO(BITS, TBITS) \
412void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
413 const void *v4, uint32_t desc) \
414{ \
415 int i, j; \
416 \
417 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
418 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
419 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
8b952519 420 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
1b430aec
DH
421 \
422 s390_vec_write_element##TBITS(v1, i, a * b + c); \
423 } \
424}
425DEF_VMALO(8, 16)
426DEF_VMALO(16, 32)
427DEF_VMALO(32, 64)
2bf3ee38
DH
428
429#define DEF_VMH(BITS) \
430void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
431 uint32_t desc) \
432{ \
433 int i; \
434 \
435 for (i = 0; i < (128 / BITS); i++) { \
436 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
437 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
438 \
439 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
440 } \
441}
442DEF_VMH(8)
443DEF_VMH(16)
444
445#define DEF_VMLH(BITS) \
446void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
447 uint32_t desc) \
448{ \
449 int i; \
450 \
451 for (i = 0; i < (128 / BITS); i++) { \
452 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
453 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
454 \
455 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
456 } \
457}
458DEF_VMLH(8)
459DEF_VMLH(16)
460
461#define DEF_VME(BITS, TBITS) \
462void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
463 uint32_t desc) \
464{ \
465 int i, j; \
466 \
467 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
468 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
469 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
470 \
471 s390_vec_write_element##TBITS(v1, i, a * b); \
472 } \
473}
474DEF_VME(8, 16)
475DEF_VME(16, 32)
476DEF_VME(32, 64)
477
478#define DEF_VMLE(BITS, TBITS) \
479void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
480 uint32_t desc) \
481{ \
482 int i, j; \
483 \
484 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
485 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
486 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
487 \
488 s390_vec_write_element##TBITS(v1, i, a * b); \
489 } \
490}
491DEF_VMLE(8, 16)
492DEF_VMLE(16, 32)
493DEF_VMLE(32, 64)
494
495#define DEF_VMO(BITS, TBITS) \
496void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
497 uint32_t desc) \
498{ \
499 int i, j; \
500 \
501 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
502 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
503 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
504 \
505 s390_vec_write_element##TBITS(v1, i, a * b); \
506 } \
507}
508DEF_VMO(8, 16)
509DEF_VMO(16, 32)
510DEF_VMO(32, 64)
511
512#define DEF_VMLO(BITS, TBITS) \
513void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
514 uint32_t desc) \
515{ \
516 int i, j; \
517 \
49a7ce4e 518 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
2bf3ee38
DH
519 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
520 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
521 \
522 s390_vec_write_element##TBITS(v1, i, a * b); \
523 } \
524}
525DEF_VMLO(8, 16)
526DEF_VMLO(16, 32)
527DEF_VMLO(32, 64)
c3838aaa
DH
528
529#define DEF_VPOPCT(BITS) \
530void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
531{ \
532 int i; \
533 \
534 for (i = 0; i < (128 / BITS); i++) { \
535 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
536 \
537 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
538 } \
539}
540DEF_VPOPCT(8)
541DEF_VPOPCT(16)
55236da2 542
5c4b0ab4
DH
543#define DEF_VERIM(BITS) \
544void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
545 uint32_t desc) \
546{ \
547 const uint8_t count = simd_data(desc); \
548 int i; \
549 \
550 for (i = 0; i < (128 / BITS); i++) { \
551 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
552 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
553 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
554 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
555 \
556 s390_vec_write_element##BITS(v1, i, d); \
557 } \
558}
559DEF_VERIM(8)
560DEF_VERIM(16)
dea33fc3
DH
561
562void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
563 uint32_t desc)
564{
565 s390_vec_shl(v1, v2, count);
566}
5f724887 567
b7a50eb7
DM
568void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
569 uint32_t desc)
570{
571 S390Vector tmp;
572 uint32_t sh, e0, e1 = 0;
573 int i;
574
575 for (i = 15; i >= 0; --i, e1 = e0) {
576 e0 = s390_vec_read_element8(v2, i);
577 sh = s390_vec_read_element8(v3, i) & 7;
578
579 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
580 }
581
582 *(S390Vector *)v1 = tmp;
583}
584
5f724887
DH
585void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
586 uint32_t desc)
587{
588 s390_vec_sar(v1, v2, count);
589}
8112274f 590
b7a50eb7
DM
591void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
592 uint32_t desc)
593{
594 S390Vector tmp;
595 uint32_t sh, e0, e1 = 0;
596 int i = 0;
597
598 /* Byte 0 is special only. */
599 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
600 sh = s390_vec_read_element8(v3, i) & 7;
601 s390_vec_write_element8(&tmp, i, e0 >> sh);
602
603 e1 = e0;
604 for (i = 1; i < 16; ++i, e1 = e0) {
605 e0 = s390_vec_read_element8(v2, i);
606 sh = s390_vec_read_element8(v3, i) & 7;
607 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
608 }
609
610 *(S390Vector *)v1 = tmp;
611}
612
8112274f
DH
613void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
614 uint32_t desc)
615{
616 s390_vec_shr(v1, v2, count);
617}
1ee2d7ba 618
b7a50eb7
DM
619void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
620 uint32_t desc)
621{
622 S390Vector tmp;
623 uint32_t sh, e0, e1 = 0;
624
625 for (int i = 0; i < 16; ++i, e1 = e0) {
626 e0 = s390_vec_read_element8(v2, i);
627 sh = s390_vec_read_element8(v3, i) & 7;
628
629 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
630 }
631
632 *(S390Vector *)v1 = tmp;
633}
634
1ee2d7ba
DH
635#define DEF_VSCBI(BITS) \
636void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \
637 uint32_t desc) \
638{ \
639 int i; \
640 \
641 for (i = 0; i < (128 / BITS); i++) { \
642 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
643 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
644 \
23e79774 645 s390_vec_write_element##BITS(v1, i, a >= b); \
1ee2d7ba
DH
646 } \
647}
648DEF_VSCBI(8)
649DEF_VSCBI(16)
db156ebf
DH
650
651void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
652 uint32_t desc)
653{
654 S390Vector tmp;
655
656 s390_vec_and(&tmp, v1, v2);
657 if (s390_vec_is_zero(&tmp)) {
658 /* Selected bits all zeros; or all mask bits zero */
659 env->cc_op = 0;
660 } else if (s390_vec_equal(&tmp, v2)) {
661 /* Selected bits all ones */
662 env->cc_op = 3;
663 } else {
664 /* Selected bits a mix of zeros and ones */
665 env->cc_op = 1;
666 }
667}