]> git.proxmox.com Git - mirror_qemu.git/blame - target/s390x/tcg/vec_fpu_helper.c
target/s390x: Update do_unaligned_access() comment
[mirror_qemu.git] / target / s390x / tcg / vec_fpu_helper.c
CommitLineData
3a0eae85
DH
1/*
2 * QEMU TCG support -- s390x vector floating point instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12#include "qemu/osdep.h"
3a0eae85 13#include "cpu.h"
b6b47223 14#include "s390x-internal.h"
3a0eae85
DH
15#include "vec.h"
16#include "tcg_s390x.h"
17#include "tcg/tcg-gvec-desc.h"
18#include "exec/exec-all.h"
19#include "exec/helper-proto.h"
20#include "fpu/softfloat.h"
21
22#define VIC_INVALID 0x1
23#define VIC_DIVBYZERO 0x2
24#define VIC_OVERFLOW 0x3
25#define VIC_UNDERFLOW 0x4
26#define VIC_INEXACT 0x5
27
28/* returns the VEX. If the VEX is 0, there is no trap */
29static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
30 uint8_t *vec_exc)
31{
32 uint8_t vece_exc = 0, trap_exc;
33 unsigned qemu_exc;
34
35 /* Retrieve and clear the softfloat exceptions */
36 qemu_exc = env->fpu_status.float_exception_flags;
37 if (qemu_exc == 0) {
38 return 0;
39 }
40 env->fpu_status.float_exception_flags = 0;
41
42 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
43
44 /* Add them to the vector-wide s390x exception bits */
45 *vec_exc |= vece_exc;
46
47 /* Check for traps and construct the VXC */
48 trap_exc = vece_exc & env->fpc >> 24;
49 if (trap_exc) {
50 if (trap_exc & S390_IEEE_MASK_INVALID) {
51 return enr << 4 | VIC_INVALID;
52 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
53 return enr << 4 | VIC_DIVBYZERO;
54 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
55 return enr << 4 | VIC_OVERFLOW;
56 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
57 return enr << 4 | VIC_UNDERFLOW;
58 } else if (!XxC) {
59 g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
60 /* inexact has lowest priority on traps */
61 return enr << 4 | VIC_INEXACT;
62 }
63 }
64 return 0;
65}
66
67static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
68 uintptr_t retaddr)
69{
70 if (vxc) {
71 /* on traps, the fpc flags are not updated, instruction is suppressed */
72 tcg_s390_vector_exception(env, vxc, retaddr);
73 }
74 if (vec_exc) {
75 /* indicate exceptions for all elements combined */
76 env->fpc |= vec_exc << 16;
77 }
78}
79
0987961d
DH
80static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
81{
82 return make_float32(s390_vec_read_element32(v, enr));
83}
84
863b9507
DH
85static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
86{
87 return make_float64(s390_vec_read_element64(v, enr));
88}
89
0987961d
DH
90static float128 s390_vec_read_float128(const S390Vector *v)
91{
92 return make_float128(s390_vec_read_element64(v, 0),
93 s390_vec_read_element64(v, 1));
94}
95
96static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
97{
98 return s390_vec_write_element32(v, enr, data);
99}
100
863b9507
DH
101static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
102{
103 return s390_vec_write_element64(v, enr, data);
104}
105
0987961d
DH
106static void s390_vec_write_float128(S390Vector *v, float128 data)
107{
108 s390_vec_write_element64(v, 0, data.high);
109 s390_vec_write_element64(v, 1, data.low);
110}
111
acb269a4
DH
112typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
113static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
114 bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
115 uintptr_t retaddr)
116{
117 uint8_t vxc, vec_exc = 0;
118 S390Vector tmp = {};
119 int i, old_mode;
120
121 old_mode = s390_swap_bfp_rounding_mode(env, erm);
122 for (i = 0; i < 4; i++) {
123 const float32 a = s390_vec_read_float32(v2, i);
124
125 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
126 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
127 if (s || vxc) {
128 break;
129 }
130 }
131 s390_restore_bfp_rounding_mode(env, old_mode);
132 handle_ieee_exc(env, vxc, vec_exc, retaddr);
133 *v1 = tmp;
134}
135
21bd6ea2 136typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
bb03fd84
DH
137static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
138 bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
139 uintptr_t retaddr)
140{
141 uint8_t vxc, vec_exc = 0;
142 S390Vector tmp = {};
143 int i, old_mode;
144
145 old_mode = s390_swap_bfp_rounding_mode(env, erm);
146 for (i = 0; i < 2; i++) {
21bd6ea2 147 const float64 a = s390_vec_read_float64(v2, i);
bb03fd84 148
21bd6ea2 149 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
bb03fd84
DH
150 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
151 if (s || vxc) {
152 break;
153 }
154 }
155 s390_restore_bfp_rounding_mode(env, old_mode);
156 handle_ieee_exc(env, vxc, vec_exc, retaddr);
157 *v1 = tmp;
158}
159
acb269a4
DH
160typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
161static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
162 bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
163 uintptr_t retaddr)
164{
165 const float128 a = s390_vec_read_float128(v2);
166 uint8_t vxc, vec_exc = 0;
167 S390Vector tmp = {};
168 int old_mode;
169
170 old_mode = s390_swap_bfp_rounding_mode(env, erm);
171 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
172 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
173 s390_restore_bfp_rounding_mode(env, old_mode);
174 handle_ieee_exc(env, vxc, vec_exc, retaddr);
175 *v1 = tmp;
176}
177
acc2d3a4
DM
178static float32 vcdg32(float32 a, float_status *s)
179{
180 return int32_to_float32(a, s);
181}
182
183static float32 vcdlg32(float32 a, float_status *s)
184{
185 return uint32_to_float32(a, s);
186}
187
188static float32 vcgd32(float32 a, float_status *s)
189{
190 const float32 tmp = float32_to_int32(a, s);
191
192 return float32_is_any_nan(a) ? INT32_MIN : tmp;
193}
194
195static float32 vclgd32(float32 a, float_status *s)
196{
197 const float32 tmp = float32_to_uint32(a, s);
198
199 return float32_is_any_nan(a) ? 0 : tmp;
200}
201
21bd6ea2
DH
202static float64 vcdg64(float64 a, float_status *s)
203{
204 return int64_to_float64(a, s);
205}
206
207static float64 vcdlg64(float64 a, float_status *s)
208{
209 return uint64_to_float64(a, s);
210}
211
212static float64 vcgd64(float64 a, float_status *s)
213{
214 const float64 tmp = float64_to_int64(a, s);
215
216 return float64_is_any_nan(a) ? INT64_MIN : tmp;
217}
218
219static float64 vclgd64(float64 a, float_status *s)
220{
221 const float64 tmp = float64_to_uint64(a, s);
222
223 return float64_is_any_nan(a) ? 0 : tmp;
224}
225
226#define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
227void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
228 uint32_t desc) \
229{ \
230 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
231 const bool se = extract32(simd_data(desc), 3, 1); \
232 const bool XxC = extract32(simd_data(desc), 2, 1); \
233 \
234 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
235}
236
acc2d3a4
DM
237#define DEF_GVEC_VOP2_32(NAME) \
238DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
239
21bd6ea2
DH
240#define DEF_GVEC_VOP2_64(NAME) \
241DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
242
243#define DEF_GVEC_VOP2(NAME, OP) \
acb269a4
DH
244DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \
245DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \
246DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
21bd6ea2 247
acc2d3a4
DM
248DEF_GVEC_VOP2_32(vcdg)
249DEF_GVEC_VOP2_32(vcdlg)
250DEF_GVEC_VOP2_32(vcgd)
251DEF_GVEC_VOP2_32(vclgd)
21bd6ea2
DH
252DEF_GVEC_VOP2_64(vcdg)
253DEF_GVEC_VOP2_64(vcdlg)
254DEF_GVEC_VOP2_64(vcgd)
255DEF_GVEC_VOP2_64(vclgd)
256DEF_GVEC_VOP2(vfi, round_to_int)
257DEF_GVEC_VOP2(vfsq, sqrt)
258
0987961d
DH
259typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
260static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
261 CPUS390XState *env, bool s, vop32_3_fn fn,
262 uintptr_t retaddr)
263{
264 uint8_t vxc, vec_exc = 0;
265 S390Vector tmp = {};
266 int i;
267
268 for (i = 0; i < 4; i++) {
269 const float32 a = s390_vec_read_float32(v2, i);
270 const float32 b = s390_vec_read_float32(v3, i);
271
272 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
273 vxc = check_ieee_exc(env, i, false, &vec_exc);
274 if (s || vxc) {
275 break;
276 }
277 }
278 handle_ieee_exc(env, vxc, vec_exc, retaddr);
279 *v1 = tmp;
280}
281
863b9507 282typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
3a0eae85
DH
283static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
284 CPUS390XState *env, bool s, vop64_3_fn fn,
285 uintptr_t retaddr)
286{
287 uint8_t vxc, vec_exc = 0;
288 S390Vector tmp = {};
289 int i;
290
291 for (i = 0; i < 2; i++) {
863b9507
DH
292 const float64 a = s390_vec_read_float64(v2, i);
293 const float64 b = s390_vec_read_float64(v3, i);
3a0eae85 294
863b9507 295 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
3a0eae85
DH
296 vxc = check_ieee_exc(env, i, false, &vec_exc);
297 if (s || vxc) {
298 break;
299 }
300 }
301 handle_ieee_exc(env, vxc, vec_exc, retaddr);
302 *v1 = tmp;
303}
304
0987961d
DH
305typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
306static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
307 CPUS390XState *env, bool s, vop128_3_fn fn,
308 uintptr_t retaddr)
309{
310 const float128 a = s390_vec_read_float128(v2);
311 const float128 b = s390_vec_read_float128(v3);
312 uint8_t vxc, vec_exc = 0;
313 S390Vector tmp = {};
314
315 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
316 vxc = check_ieee_exc(env, 0, false, &vec_exc);
317 handle_ieee_exc(env, vxc, vec_exc, retaddr);
318 *v1 = tmp;
319}
320
321#define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
322void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
323 CPUS390XState *env, uint32_t desc) \
863b9507
DH
324{ \
325 const bool se = extract32(simd_data(desc), 3, 1); \
326 \
0987961d 327 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
3a0eae85
DH
328}
329
0987961d
DH
330#define DEF_GVEC_VOP3(NAME, OP) \
331DEF_GVEC_VOP3_B(NAME, OP, 32) \
332DEF_GVEC_VOP3_B(NAME, OP, 64) \
333DEF_GVEC_VOP3_B(NAME, OP, 128)
334
863b9507
DH
335DEF_GVEC_VOP3(vfa, add)
336DEF_GVEC_VOP3(vfs, sub)
337DEF_GVEC_VOP3(vfd, div)
338DEF_GVEC_VOP3(vfm, mul)
5b89f0fb 339
1c6b5b47
DH
340static int wfc32(const S390Vector *v1, const S390Vector *v2,
341 CPUS390XState *env, bool signal, uintptr_t retaddr)
342{
343 /* only the zero-indexed elements are compared */
344 const float32 a = s390_vec_read_float32(v1, 0);
345 const float32 b = s390_vec_read_float32(v2, 0);
346 uint8_t vxc, vec_exc = 0;
347 int cmp;
348
349 if (signal) {
350 cmp = float32_compare(a, b, &env->fpu_status);
351 } else {
352 cmp = float32_compare_quiet(a, b, &env->fpu_status);
353 }
354 vxc = check_ieee_exc(env, 0, false, &vec_exc);
355 handle_ieee_exc(env, vxc, vec_exc, retaddr);
356
357 return float_comp_to_cc(env, cmp);
358}
359
5b89f0fb
DH
360static int wfc64(const S390Vector *v1, const S390Vector *v2,
361 CPUS390XState *env, bool signal, uintptr_t retaddr)
362{
363 /* only the zero-indexed elements are compared */
4da79375
DH
364 const float64 a = s390_vec_read_float64(v1, 0);
365 const float64 b = s390_vec_read_float64(v2, 0);
5b89f0fb
DH
366 uint8_t vxc, vec_exc = 0;
367 int cmp;
368
369 if (signal) {
370 cmp = float64_compare(a, b, &env->fpu_status);
371 } else {
372 cmp = float64_compare_quiet(a, b, &env->fpu_status);
373 }
374 vxc = check_ieee_exc(env, 0, false, &vec_exc);
375 handle_ieee_exc(env, vxc, vec_exc, retaddr);
376
377 return float_comp_to_cc(env, cmp);
378}
379
1c6b5b47
DH
380static int wfc128(const S390Vector *v1, const S390Vector *v2,
381 CPUS390XState *env, bool signal, uintptr_t retaddr)
382{
383 /* only the zero-indexed elements are compared */
384 const float128 a = s390_vec_read_float128(v1);
385 const float128 b = s390_vec_read_float128(v2);
386 uint8_t vxc, vec_exc = 0;
387 int cmp;
388
389 if (signal) {
390 cmp = float128_compare(a, b, &env->fpu_status);
391 } else {
392 cmp = float128_compare_quiet(a, b, &env->fpu_status);
393 }
394 vxc = check_ieee_exc(env, 0, false, &vec_exc);
395 handle_ieee_exc(env, vxc, vec_exc, retaddr);
396
397 return float_comp_to_cc(env, cmp);
398}
399
4da79375
DH
400#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
401void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
402 CPUS390XState *env, uint32_t desc) \
403{ \
404 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
5b89f0fb
DH
405}
406
4da79375 407#define DEF_GVEC_WFC(NAME, SIGNAL) \
1c6b5b47
DH
408 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \
409 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \
410 DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
4da79375
DH
411
412DEF_GVEC_WFC(wfc, false)
413DEF_GVEC_WFC(wfk, true)
2c806ab4 414
e384332c
DH
415typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
416static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
417 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
418{
419 uint8_t vxc, vec_exc = 0;
420 S390Vector tmp = {};
421 int match = 0;
422 int i;
423
424 for (i = 0; i < 4; i++) {
425 const float32 a = s390_vec_read_float32(v2, i);
426 const float32 b = s390_vec_read_float32(v3, i);
427
428 /* swap the order of the parameters, so we can use existing functions */
429 if (fn(b, a, &env->fpu_status)) {
430 match++;
431 s390_vec_write_element32(&tmp, i, -1u);
432 }
433 vxc = check_ieee_exc(env, i, false, &vec_exc);
434 if (s || vxc) {
435 break;
436 }
437 }
438
439 handle_ieee_exc(env, vxc, vec_exc, retaddr);
440 *v1 = tmp;
441 if (match) {
442 return s || match == 4 ? 0 : 1;
443 }
444 return 3;
445}
446
0673ecdf 447typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
2c806ab4
DH
448static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
449 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
450{
451 uint8_t vxc, vec_exc = 0;
452 S390Vector tmp = {};
453 int match = 0;
454 int i;
455
456 for (i = 0; i < 2; i++) {
64deb65a
DH
457 const float64 a = s390_vec_read_float64(v2, i);
458 const float64 b = s390_vec_read_float64(v3, i);
2c806ab4
DH
459
460 /* swap the order of the parameters, so we can use existing functions */
461 if (fn(b, a, &env->fpu_status)) {
462 match++;
463 s390_vec_write_element64(&tmp, i, -1ull);
464 }
465 vxc = check_ieee_exc(env, i, false, &vec_exc);
466 if (s || vxc) {
467 break;
468 }
469 }
470
471 handle_ieee_exc(env, vxc, vec_exc, retaddr);
472 *v1 = tmp;
473 if (match) {
474 return s || match == 2 ? 0 : 1;
475 }
476 return 3;
477}
478
e384332c
DH
479typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
480static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
481 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
482{
483 const float128 a = s390_vec_read_float128(v2);
484 const float128 b = s390_vec_read_float128(v3);
485 uint8_t vxc, vec_exc = 0;
486 S390Vector tmp = {};
487 bool match = false;
488
489 /* swap the order of the parameters, so we can use existing functions */
490 if (fn(b, a, &env->fpu_status)) {
491 match = true;
492 s390_vec_write_element64(&tmp, 0, -1ull);
493 s390_vec_write_element64(&tmp, 1, -1ull);
494 }
495 vxc = check_ieee_exc(env, 0, false, &vec_exc);
496 handle_ieee_exc(env, vxc, vec_exc, retaddr);
497 *v1 = tmp;
498 return match ? 0 : 3;
499}
500
64deb65a
DH
501#define DEF_GVEC_VFC_B(NAME, OP, BITS) \
502void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
503 CPUS390XState *env, uint32_t desc) \
504{ \
505 const bool se = extract32(simd_data(desc), 3, 1); \
e384332c
DH
506 const bool sq = extract32(simd_data(desc), 2, 1); \
507 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
64deb65a
DH
508 \
509 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
510} \
511 \
512void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
513 CPUS390XState *env, uint32_t desc) \
514{ \
515 const bool se = extract32(simd_data(desc), 3, 1); \
e384332c
DH
516 const bool sq = extract32(simd_data(desc), 2, 1); \
517 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
64deb65a
DH
518 \
519 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
2c806ab4
DH
520}
521
64deb65a 522#define DEF_GVEC_VFC(NAME, OP) \
e384332c
DH
523DEF_GVEC_VFC_B(NAME, OP, 32) \
524DEF_GVEC_VFC_B(NAME, OP, 64) \
525DEF_GVEC_VFC_B(NAME, OP, 128) \
2c806ab4 526
64deb65a
DH
527DEF_GVEC_VFC(vfce, eq)
528DEF_GVEC_VFC(vfch, lt)
529DEF_GVEC_VFC(vfche, le)
bb03fd84 530
860b707b
DH
531void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
532 uint32_t desc)
1a76e59d 533{
860b707b 534 const bool s = extract32(simd_data(desc), 3, 1);
1a76e59d
DH
535 uint8_t vxc, vec_exc = 0;
536 S390Vector tmp = {};
537 int i;
538
539 for (i = 0; i < 2; i++) {
540 /* load from even element */
541 const float32 a = s390_vec_read_element32(v2, i * 2);
542 const uint64_t ret = float32_to_float64(a, &env->fpu_status);
543
544 s390_vec_write_element64(&tmp, i, ret);
545 /* indicate the source element */
546 vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
547 if (s || vxc) {
548 break;
549 }
550 }
860b707b
DH
551 handle_ieee_exc(env, vxc, vec_exc, GETPC());
552 *(S390Vector *)v1 = tmp;
1a76e59d 553}
4500ede4 554
2e96005e
DH
555void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
556 uint32_t desc)
557{
558 /* load from even element */
559 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
560 &env->fpu_status);
561 uint8_t vxc, vec_exc = 0;
562
563 vxc = check_ieee_exc(env, 0, false, &vec_exc);
564 handle_ieee_exc(env, vxc, vec_exc, GETPC());
565 s390_vec_write_float128(v1, ret);
566}
567
977e43d9
DH
568void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
569 uint32_t desc)
4500ede4 570{
977e43d9
DH
571 const uint8_t erm = extract32(simd_data(desc), 4, 4);
572 const bool s = extract32(simd_data(desc), 3, 1);
573 const bool XxC = extract32(simd_data(desc), 2, 1);
4500ede4
DH
574 uint8_t vxc, vec_exc = 0;
575 S390Vector tmp = {};
576 int i, old_mode;
577
578 old_mode = s390_swap_bfp_rounding_mode(env, erm);
579 for (i = 0; i < 2; i++) {
580 float64 a = s390_vec_read_element64(v2, i);
581 uint32_t ret = float64_to_float32(a, &env->fpu_status);
582
583 /* place at even element */
584 s390_vec_write_element32(&tmp, i * 2, ret);
585 /* indicate the source element */
586 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
587 if (s || vxc) {
588 break;
589 }
590 }
591 s390_restore_bfp_rounding_mode(env, old_mode);
977e43d9
DH
592 handle_ieee_exc(env, vxc, vec_exc, GETPC());
593 *(S390Vector *)v1 = tmp;
4500ede4 594}
8d47d4d2 595
9cbc8be0
DH
596void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
597 uint32_t desc)
598{
599 const uint8_t erm = extract32(simd_data(desc), 4, 4);
600 const bool XxC = extract32(simd_data(desc), 2, 1);
601 uint8_t vxc, vec_exc = 0;
602 int old_mode;
603 float64 ret;
604
605 old_mode = s390_swap_bfp_rounding_mode(env, erm);
606 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
607 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
608 s390_restore_bfp_rounding_mode(env, old_mode);
609 handle_ieee_exc(env, vxc, vec_exc, GETPC());
610
611 /* place at even element, odd element is unpredictable */
612 s390_vec_write_float64(v1, 0, ret);
613}
614
e257abc8
DH
615static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
616 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
617 uintptr_t retaddr)
618{
619 uint8_t vxc, vec_exc = 0;
620 S390Vector tmp = {};
621 int i;
622
623 for (i = 0; i < 4; i++) {
624 const float32 a = s390_vec_read_float32(v2, i);
625 const float32 b = s390_vec_read_float32(v3, i);
626 const float32 c = s390_vec_read_float32(v4, i);
627 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
628
629 s390_vec_write_float32(&tmp, i, ret);
630 vxc = check_ieee_exc(env, i, false, &vec_exc);
631 if (s || vxc) {
632 break;
633 }
634 }
635 handle_ieee_exc(env, vxc, vec_exc, retaddr);
636 *v1 = tmp;
637}
638
c64c5984
DH
639static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
640 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
641 uintptr_t retaddr)
642{
643 uint8_t vxc, vec_exc = 0;
644 S390Vector tmp = {};
645 int i;
646
647 for (i = 0; i < 2; i++) {
34142ffd
DH
648 const float64 a = s390_vec_read_float64(v2, i);
649 const float64 b = s390_vec_read_float64(v3, i);
650 const float64 c = s390_vec_read_float64(v4, i);
651 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
c64c5984 652
34142ffd 653 s390_vec_write_float64(&tmp, i, ret);
c64c5984
DH
654 vxc = check_ieee_exc(env, i, false, &vec_exc);
655 if (s || vxc) {
656 break;
657 }
658 }
659 handle_ieee_exc(env, vxc, vec_exc, retaddr);
660 *v1 = tmp;
661}
662
e257abc8
DH
663static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
664 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
665 uintptr_t retaddr)
666{
667 const float128 a = s390_vec_read_float128(v2);
668 const float128 b = s390_vec_read_float128(v3);
669 const float128 c = s390_vec_read_float128(v4);
670 uint8_t vxc, vec_exc = 0;
671 float128 ret;
672
673 ret = float128_muladd(a, b, c, flags, &env->fpu_status);
674 vxc = check_ieee_exc(env, 0, false, &vec_exc);
675 handle_ieee_exc(env, vxc, vec_exc, retaddr);
676 s390_vec_write_float128(v1, ret);
677}
678
34142ffd
DH
679#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
680void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
681 const void *v4, CPUS390XState *env, \
682 uint32_t desc) \
683{ \
684 const bool se = extract32(simd_data(desc), 3, 1); \
685 \
686 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
c64c5984
DH
687}
688
34142ffd 689#define DEF_GVEC_VFMA(NAME, FLAGS) \
e257abc8
DH
690 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \
691 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \
692 DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
c64c5984 693
34142ffd
DH
694DEF_GVEC_VFMA(vfma, 0)
695DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
f0249730
DH
696DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
697DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
5938f20c 698
a38b5a0e
DH
699void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
700 uint32_t desc)
701{
702 uint16_t i3 = extract32(simd_data(desc), 4, 12);
703 bool s = extract32(simd_data(desc), 3, 1);
704 int i, match = 0;
705
706 for (i = 0; i < 4; i++) {
707 float32 a = s390_vec_read_float32(v2, i);
708
709 if (float32_dcmask(env, a) & i3) {
710 match++;
711 s390_vec_write_element32(v1, i, -1u);
712 } else {
713 s390_vec_write_element32(v1, i, 0);
714 }
715 if (s) {
716 break;
717 }
718 }
719
720 if (match == 4 || (s && match)) {
721 env->cc_op = 0;
722 } else if (match) {
723 env->cc_op = 1;
724 } else {
725 env->cc_op = 3;
726 }
727}
728
622ebe64
DH
729void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
730 uint32_t desc)
83b955f9 731{
622ebe64
DH
732 const uint16_t i3 = extract32(simd_data(desc), 4, 12);
733 const bool s = extract32(simd_data(desc), 3, 1);
83b955f9
DH
734 int i, match = 0;
735
736 for (i = 0; i < 2; i++) {
622ebe64 737 const float64 a = s390_vec_read_float64(v2, i);
83b955f9
DH
738
739 if (float64_dcmask(env, a) & i3) {
740 match++;
741 s390_vec_write_element64(v1, i, -1ull);
742 } else {
743 s390_vec_write_element64(v1, i, 0);
744 }
745 if (s) {
746 break;
747 }
748 }
749
622ebe64
DH
750 if (match == 2 || (s && match)) {
751 env->cc_op = 0;
752 } else if (match) {
753 env->cc_op = 1;
754 } else {
755 env->cc_op = 3;
83b955f9 756 }
83b955f9 757}
a38b5a0e
DH
758
759void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
760 uint32_t desc)
761{
762 const float128 a = s390_vec_read_float128(v2);
763 uint16_t i3 = extract32(simd_data(desc), 4, 12);
764
765 if (float128_dcmask(env, a) & i3) {
766 env->cc_op = 0;
767 s390_vec_write_element64(v1, 0, -1ull);
768 s390_vec_write_element64(v1, 1, -1ull);
769 } else {
770 env->cc_op = 3;
771 s390_vec_write_element64(v1, 0, 0);
772 s390_vec_write_element64(v1, 1, 0);
773 }
774}
da480752
DH
775
776typedef enum S390MinMaxType {
777 S390_MINMAX_TYPE_IEEE = 0,
778 S390_MINMAX_TYPE_JAVA,
779 S390_MINMAX_TYPE_C_MACRO,
780 S390_MINMAX_TYPE_CPP,
781 S390_MINMAX_TYPE_F,
782} S390MinMaxType;
783
784typedef enum S390MinMaxRes {
785 S390_MINMAX_RES_MINMAX = 0,
786 S390_MINMAX_RES_A,
787 S390_MINMAX_RES_B,
788 S390_MINMAX_RES_SILENCE_A,
789 S390_MINMAX_RES_SILENCE_B,
790} S390MinMaxRes;
791
792static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
793 S390MinMaxType type, float_status *s)
794{
795 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
796 const bool nan_a = dcmask_a & DCMASK_NAN;
797 const bool nan_b = dcmask_b & DCMASK_NAN;
798
799 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
800
801 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
802 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
803 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
804
805 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
806 s->float_exception_flags |= float_flag_invalid;
807 }
808 switch (type) {
809 case S390_MINMAX_TYPE_JAVA:
810 if (sig_a) {
811 return S390_MINMAX_RES_SILENCE_A;
812 } else if (sig_b) {
813 return S390_MINMAX_RES_SILENCE_B;
814 }
815 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
816 case S390_MINMAX_TYPE_F:
817 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
818 case S390_MINMAX_TYPE_C_MACRO:
819 s->float_exception_flags |= float_flag_invalid;
820 return S390_MINMAX_RES_B;
821 case S390_MINMAX_TYPE_CPP:
822 s->float_exception_flags |= float_flag_invalid;
823 return S390_MINMAX_RES_A;
824 default:
825 g_assert_not_reached();
826 }
13c59eb0 827 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
da480752
DH
828 switch (type) {
829 case S390_MINMAX_TYPE_JAVA:
830 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
831 case S390_MINMAX_TYPE_C_MACRO:
832 return S390_MINMAX_RES_B;
833 case S390_MINMAX_TYPE_F:
834 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
835 case S390_MINMAX_TYPE_CPP:
836 return S390_MINMAX_RES_A;
837 default:
838 g_assert_not_reached();
839 }
840 }
841 return S390_MINMAX_RES_MINMAX;
842}
843
844static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
845 S390MinMaxType type, float_status *s)
846{
847 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
848
849 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
850 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
851 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
852 const bool nan_a = dcmask_a & DCMASK_NAN;
853 const bool nan_b = dcmask_b & DCMASK_NAN;
854
855 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
856 s->float_exception_flags |= float_flag_invalid;
857 }
858 switch (type) {
859 case S390_MINMAX_TYPE_JAVA:
860 if (sig_a) {
861 return S390_MINMAX_RES_SILENCE_A;
862 } else if (sig_b) {
863 return S390_MINMAX_RES_SILENCE_B;
864 }
865 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
866 case S390_MINMAX_TYPE_F:
867 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
868 case S390_MINMAX_TYPE_C_MACRO:
869 s->float_exception_flags |= float_flag_invalid;
870 return S390_MINMAX_RES_B;
871 case S390_MINMAX_TYPE_CPP:
872 s->float_exception_flags |= float_flag_invalid;
873 return S390_MINMAX_RES_A;
874 default:
875 g_assert_not_reached();
876 }
13c59eb0 877 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
da480752
DH
878 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
879
880 switch (type) {
881 case S390_MINMAX_TYPE_JAVA:
882 case S390_MINMAX_TYPE_F:
883 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
884 case S390_MINMAX_TYPE_C_MACRO:
885 return S390_MINMAX_RES_B;
886 case S390_MINMAX_TYPE_CPP:
887 return S390_MINMAX_RES_A;
888 default:
889 g_assert_not_reached();
890 }
891 }
892 return S390_MINMAX_RES_MINMAX;
893}
894
895static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
896 S390MinMaxType type, bool is_min,
897 float_status *s)
898{
899 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
900 vfmax_res(dcmask_a, dcmask_b, type, s);
901}
902
903static void vfminmax32(S390Vector *v1, const S390Vector *v2,
904 const S390Vector *v3, CPUS390XState *env,
905 S390MinMaxType type, bool is_min, bool is_abs, bool se,
906 uintptr_t retaddr)
907{
908 float_status *s = &env->fpu_status;
909 uint8_t vxc, vec_exc = 0;
910 S390Vector tmp = {};
911 int i;
912
913 for (i = 0; i < 4; i++) {
914 float32 a = s390_vec_read_float32(v2, i);
915 float32 b = s390_vec_read_float32(v3, i);
916 float32 result;
917
918 if (type != S390_MINMAX_TYPE_IEEE) {
919 S390MinMaxRes res;
920
921 if (is_abs) {
922 a = float32_abs(a);
923 b = float32_abs(b);
924 }
925
926 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
927 type, is_min, s);
928 switch (res) {
929 case S390_MINMAX_RES_MINMAX:
930 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
931 break;
932 case S390_MINMAX_RES_A:
933 result = a;
934 break;
935 case S390_MINMAX_RES_B:
936 result = b;
937 break;
938 case S390_MINMAX_RES_SILENCE_A:
939 result = float32_silence_nan(a, s);
940 break;
941 case S390_MINMAX_RES_SILENCE_B:
942 result = float32_silence_nan(b, s);
943 break;
944 default:
945 g_assert_not_reached();
946 }
947 } else if (!is_abs) {
948 result = is_min ? float32_minnum(a, b, &env->fpu_status) :
949 float32_maxnum(a, b, &env->fpu_status);
950 } else {
951 result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
952 float32_maxnummag(a, b, &env->fpu_status);
953 }
954
955 s390_vec_write_float32(&tmp, i, result);
956 vxc = check_ieee_exc(env, i, false, &vec_exc);
957 if (se || vxc) {
958 break;
959 }
960 }
961 handle_ieee_exc(env, vxc, vec_exc, retaddr);
962 *v1 = tmp;
963}
964
965static void vfminmax64(S390Vector *v1, const S390Vector *v2,
966 const S390Vector *v3, CPUS390XState *env,
967 S390MinMaxType type, bool is_min, bool is_abs, bool se,
968 uintptr_t retaddr)
969{
970 float_status *s = &env->fpu_status;
971 uint8_t vxc, vec_exc = 0;
972 S390Vector tmp = {};
973 int i;
974
975 for (i = 0; i < 2; i++) {
976 float64 a = s390_vec_read_float64(v2, i);
977 float64 b = s390_vec_read_float64(v3, i);
978 float64 result;
979
980 if (type != S390_MINMAX_TYPE_IEEE) {
981 S390MinMaxRes res;
982
983 if (is_abs) {
984 a = float64_abs(a);
985 b = float64_abs(b);
986 }
987
988 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
989 type, is_min, s);
990 switch (res) {
991 case S390_MINMAX_RES_MINMAX:
992 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
993 break;
994 case S390_MINMAX_RES_A:
995 result = a;
996 break;
997 case S390_MINMAX_RES_B:
998 result = b;
999 break;
1000 case S390_MINMAX_RES_SILENCE_A:
1001 result = float64_silence_nan(a, s);
1002 break;
1003 case S390_MINMAX_RES_SILENCE_B:
1004 result = float64_silence_nan(b, s);
1005 break;
1006 default:
1007 g_assert_not_reached();
1008 }
1009 } else if (!is_abs) {
1010 result = is_min ? float64_minnum(a, b, &env->fpu_status) :
1011 float64_maxnum(a, b, &env->fpu_status);
1012 } else {
1013 result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
1014 float64_maxnummag(a, b, &env->fpu_status);
1015 }
1016
1017 s390_vec_write_float64(&tmp, i, result);
1018 vxc = check_ieee_exc(env, i, false, &vec_exc);
1019 if (se || vxc) {
1020 break;
1021 }
1022 }
1023 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1024 *v1 = tmp;
1025}
1026
1027static void vfminmax128(S390Vector *v1, const S390Vector *v2,
1028 const S390Vector *v3, CPUS390XState *env,
1029 S390MinMaxType type, bool is_min, bool is_abs, bool se,
1030 uintptr_t retaddr)
1031{
1032 float128 a = s390_vec_read_float128(v2);
1033 float128 b = s390_vec_read_float128(v3);
1034 float_status *s = &env->fpu_status;
1035 uint8_t vxc, vec_exc = 0;
1036 float128 result;
1037
1038 if (type != S390_MINMAX_TYPE_IEEE) {
1039 S390MinMaxRes res;
1040
1041 if (is_abs) {
1042 a = float128_abs(a);
1043 b = float128_abs(b);
1044 }
1045
1046 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1047 type, is_min, s);
1048 switch (res) {
1049 case S390_MINMAX_RES_MINMAX:
1050 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1051 break;
1052 case S390_MINMAX_RES_A:
1053 result = a;
1054 break;
1055 case S390_MINMAX_RES_B:
1056 result = b;
1057 break;
1058 case S390_MINMAX_RES_SILENCE_A:
1059 result = float128_silence_nan(a, s);
1060 break;
1061 case S390_MINMAX_RES_SILENCE_B:
1062 result = float128_silence_nan(b, s);
1063 break;
1064 default:
1065 g_assert_not_reached();
1066 }
1067 } else if (!is_abs) {
1068 result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1069 float128_maxnum(a, b, &env->fpu_status);
1070 } else {
1071 result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1072 float128_maxnummag(a, b, &env->fpu_status);
1073 }
1074
1075 vxc = check_ieee_exc(env, 0, false, &vec_exc);
1076 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1077 s390_vec_write_float128(v1, result);
1078}
1079
1080#define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \
1081void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
1082 CPUS390XState *env, uint32_t desc) \
1083{ \
1084 const bool se = extract32(simd_data(desc), 3, 1); \
1085 uint8_t type = extract32(simd_data(desc), 4, 4); \
1086 bool is_abs = false; \
1087 \
1088 if (type >= 8) { \
1089 is_abs = true; \
1090 type -= 8; \
1091 } \
1092 \
1093 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \
1094}
1095
1096#define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \
1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \
1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \
1099 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1100
1101DEF_GVEC_VFMINMAX(vfmax, false)
1102DEF_GVEC_VFMINMAX(vfmin, true)