]>
Commit | Line | Data |
---|---|---|
3a0eae85 DH |
1 | /* |
2 | * QEMU TCG support -- s390x vector floating point instruction support | |
3 | * | |
4 | * Copyright (C) 2019 Red Hat Inc | |
5 | * | |
6 | * Authors: | |
7 | * David Hildenbrand <david@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | #include "qemu/osdep.h" | |
3a0eae85 | 13 | #include "cpu.h" |
b6b47223 | 14 | #include "s390x-internal.h" |
3a0eae85 DH |
15 | #include "vec.h" |
16 | #include "tcg_s390x.h" | |
17 | #include "tcg/tcg-gvec-desc.h" | |
18 | #include "exec/exec-all.h" | |
19 | #include "exec/helper-proto.h" | |
20 | #include "fpu/softfloat.h" | |
21 | ||
22 | #define VIC_INVALID 0x1 | |
23 | #define VIC_DIVBYZERO 0x2 | |
24 | #define VIC_OVERFLOW 0x3 | |
25 | #define VIC_UNDERFLOW 0x4 | |
26 | #define VIC_INEXACT 0x5 | |
27 | ||
28 | /* returns the VEX. If the VEX is 0, there is no trap */ | |
29 | static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, | |
30 | uint8_t *vec_exc) | |
31 | { | |
32 | uint8_t vece_exc = 0, trap_exc; | |
33 | unsigned qemu_exc; | |
34 | ||
35 | /* Retrieve and clear the softfloat exceptions */ | |
36 | qemu_exc = env->fpu_status.float_exception_flags; | |
37 | if (qemu_exc == 0) { | |
38 | return 0; | |
39 | } | |
40 | env->fpu_status.float_exception_flags = 0; | |
41 | ||
42 | vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); | |
43 | ||
44 | /* Add them to the vector-wide s390x exception bits */ | |
45 | *vec_exc |= vece_exc; | |
46 | ||
47 | /* Check for traps and construct the VXC */ | |
48 | trap_exc = vece_exc & env->fpc >> 24; | |
49 | if (trap_exc) { | |
50 | if (trap_exc & S390_IEEE_MASK_INVALID) { | |
51 | return enr << 4 | VIC_INVALID; | |
52 | } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { | |
53 | return enr << 4 | VIC_DIVBYZERO; | |
54 | } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { | |
55 | return enr << 4 | VIC_OVERFLOW; | |
56 | } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { | |
57 | return enr << 4 | VIC_UNDERFLOW; | |
58 | } else if (!XxC) { | |
59 | g_assert(trap_exc & S390_IEEE_MASK_INEXACT); | |
60 | /* inexact has lowest priority on traps */ | |
61 | return enr << 4 | VIC_INEXACT; | |
62 | } | |
63 | } | |
64 | return 0; | |
65 | } | |
66 | ||
67 | static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, | |
68 | uintptr_t retaddr) | |
69 | { | |
70 | if (vxc) { | |
71 | /* on traps, the fpc flags are not updated, instruction is suppressed */ | |
72 | tcg_s390_vector_exception(env, vxc, retaddr); | |
73 | } | |
74 | if (vec_exc) { | |
75 | /* indicate exceptions for all elements combined */ | |
76 | env->fpc |= vec_exc << 16; | |
77 | } | |
78 | } | |
79 | ||
0987961d DH |
80 | static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) |
81 | { | |
82 | return make_float32(s390_vec_read_element32(v, enr)); | |
83 | } | |
84 | ||
863b9507 DH |
85 | static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) |
86 | { | |
87 | return make_float64(s390_vec_read_element64(v, enr)); | |
88 | } | |
89 | ||
0987961d DH |
90 | static float128 s390_vec_read_float128(const S390Vector *v) |
91 | { | |
92 | return make_float128(s390_vec_read_element64(v, 0), | |
93 | s390_vec_read_element64(v, 1)); | |
94 | } | |
95 | ||
96 | static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) | |
97 | { | |
98 | return s390_vec_write_element32(v, enr, data); | |
99 | } | |
100 | ||
863b9507 DH |
101 | static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) |
102 | { | |
103 | return s390_vec_write_element64(v, enr, data); | |
104 | } | |
105 | ||
0987961d DH |
106 | static void s390_vec_write_float128(S390Vector *v, float128 data) |
107 | { | |
108 | s390_vec_write_element64(v, 0, data.high); | |
109 | s390_vec_write_element64(v, 1, data.low); | |
110 | } | |
111 | ||
acb269a4 DH |
112 | typedef float32 (*vop32_2_fn)(float32 a, float_status *s); |
113 | static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, | |
114 | bool s, bool XxC, uint8_t erm, vop32_2_fn fn, | |
115 | uintptr_t retaddr) | |
116 | { | |
117 | uint8_t vxc, vec_exc = 0; | |
118 | S390Vector tmp = {}; | |
119 | int i, old_mode; | |
120 | ||
121 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
122 | for (i = 0; i < 4; i++) { | |
123 | const float32 a = s390_vec_read_float32(v2, i); | |
124 | ||
125 | s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); | |
126 | vxc = check_ieee_exc(env, i, XxC, &vec_exc); | |
127 | if (s || vxc) { | |
128 | break; | |
129 | } | |
130 | } | |
131 | s390_restore_bfp_rounding_mode(env, old_mode); | |
132 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
133 | *v1 = tmp; | |
134 | } | |
135 | ||
21bd6ea2 | 136 | typedef float64 (*vop64_2_fn)(float64 a, float_status *s); |
bb03fd84 DH |
137 | static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, |
138 | bool s, bool XxC, uint8_t erm, vop64_2_fn fn, | |
139 | uintptr_t retaddr) | |
140 | { | |
141 | uint8_t vxc, vec_exc = 0; | |
142 | S390Vector tmp = {}; | |
143 | int i, old_mode; | |
144 | ||
145 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
146 | for (i = 0; i < 2; i++) { | |
21bd6ea2 | 147 | const float64 a = s390_vec_read_float64(v2, i); |
bb03fd84 | 148 | |
21bd6ea2 | 149 | s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); |
bb03fd84 DH |
150 | vxc = check_ieee_exc(env, i, XxC, &vec_exc); |
151 | if (s || vxc) { | |
152 | break; | |
153 | } | |
154 | } | |
155 | s390_restore_bfp_rounding_mode(env, old_mode); | |
156 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
157 | *v1 = tmp; | |
158 | } | |
159 | ||
acb269a4 DH |
160 | typedef float128 (*vop128_2_fn)(float128 a, float_status *s); |
161 | static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, | |
162 | bool s, bool XxC, uint8_t erm, vop128_2_fn fn, | |
163 | uintptr_t retaddr) | |
164 | { | |
165 | const float128 a = s390_vec_read_float128(v2); | |
166 | uint8_t vxc, vec_exc = 0; | |
167 | S390Vector tmp = {}; | |
168 | int old_mode; | |
169 | ||
170 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
171 | s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); | |
172 | vxc = check_ieee_exc(env, 0, XxC, &vec_exc); | |
173 | s390_restore_bfp_rounding_mode(env, old_mode); | |
174 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
175 | *v1 = tmp; | |
176 | } | |
177 | ||
acc2d3a4 DM |
178 | static float32 vcdg32(float32 a, float_status *s) |
179 | { | |
180 | return int32_to_float32(a, s); | |
181 | } | |
182 | ||
183 | static float32 vcdlg32(float32 a, float_status *s) | |
184 | { | |
185 | return uint32_to_float32(a, s); | |
186 | } | |
187 | ||
188 | static float32 vcgd32(float32 a, float_status *s) | |
189 | { | |
190 | const float32 tmp = float32_to_int32(a, s); | |
191 | ||
192 | return float32_is_any_nan(a) ? INT32_MIN : tmp; | |
193 | } | |
194 | ||
195 | static float32 vclgd32(float32 a, float_status *s) | |
196 | { | |
197 | const float32 tmp = float32_to_uint32(a, s); | |
198 | ||
199 | return float32_is_any_nan(a) ? 0 : tmp; | |
200 | } | |
201 | ||
21bd6ea2 DH |
202 | static float64 vcdg64(float64 a, float_status *s) |
203 | { | |
204 | return int64_to_float64(a, s); | |
205 | } | |
206 | ||
207 | static float64 vcdlg64(float64 a, float_status *s) | |
208 | { | |
209 | return uint64_to_float64(a, s); | |
210 | } | |
211 | ||
212 | static float64 vcgd64(float64 a, float_status *s) | |
213 | { | |
214 | const float64 tmp = float64_to_int64(a, s); | |
215 | ||
216 | return float64_is_any_nan(a) ? INT64_MIN : tmp; | |
217 | } | |
218 | ||
219 | static float64 vclgd64(float64 a, float_status *s) | |
220 | { | |
221 | const float64 tmp = float64_to_uint64(a, s); | |
222 | ||
223 | return float64_is_any_nan(a) ? 0 : tmp; | |
224 | } | |
225 | ||
226 | #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ | |
227 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ | |
228 | uint32_t desc) \ | |
229 | { \ | |
230 | const uint8_t erm = extract32(simd_data(desc), 4, 4); \ | |
231 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
232 | const bool XxC = extract32(simd_data(desc), 2, 1); \ | |
233 | \ | |
234 | vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ | |
235 | } | |
236 | ||
acc2d3a4 DM |
237 | #define DEF_GVEC_VOP2_32(NAME) \ |
238 | DEF_GVEC_VOP2_FN(NAME, NAME##32, 32) | |
239 | ||
21bd6ea2 DH |
240 | #define DEF_GVEC_VOP2_64(NAME) \ |
241 | DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) | |
242 | ||
243 | #define DEF_GVEC_VOP2(NAME, OP) \ | |
acb269a4 DH |
244 | DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ |
245 | DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ | |
246 | DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) | |
21bd6ea2 | 247 | |
acc2d3a4 DM |
248 | DEF_GVEC_VOP2_32(vcdg) |
249 | DEF_GVEC_VOP2_32(vcdlg) | |
250 | DEF_GVEC_VOP2_32(vcgd) | |
251 | DEF_GVEC_VOP2_32(vclgd) | |
21bd6ea2 DH |
252 | DEF_GVEC_VOP2_64(vcdg) |
253 | DEF_GVEC_VOP2_64(vcdlg) | |
254 | DEF_GVEC_VOP2_64(vcgd) | |
255 | DEF_GVEC_VOP2_64(vclgd) | |
256 | DEF_GVEC_VOP2(vfi, round_to_int) | |
257 | DEF_GVEC_VOP2(vfsq, sqrt) | |
258 | ||
0987961d DH |
259 | typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); |
260 | static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, | |
261 | CPUS390XState *env, bool s, vop32_3_fn fn, | |
262 | uintptr_t retaddr) | |
263 | { | |
264 | uint8_t vxc, vec_exc = 0; | |
265 | S390Vector tmp = {}; | |
266 | int i; | |
267 | ||
268 | for (i = 0; i < 4; i++) { | |
269 | const float32 a = s390_vec_read_float32(v2, i); | |
270 | const float32 b = s390_vec_read_float32(v3, i); | |
271 | ||
272 | s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); | |
273 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
274 | if (s || vxc) { | |
275 | break; | |
276 | } | |
277 | } | |
278 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
279 | *v1 = tmp; | |
280 | } | |
281 | ||
863b9507 | 282 | typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); |
3a0eae85 DH |
283 | static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
284 | CPUS390XState *env, bool s, vop64_3_fn fn, | |
285 | uintptr_t retaddr) | |
286 | { | |
287 | uint8_t vxc, vec_exc = 0; | |
288 | S390Vector tmp = {}; | |
289 | int i; | |
290 | ||
291 | for (i = 0; i < 2; i++) { | |
863b9507 DH |
292 | const float64 a = s390_vec_read_float64(v2, i); |
293 | const float64 b = s390_vec_read_float64(v3, i); | |
3a0eae85 | 294 | |
863b9507 | 295 | s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); |
3a0eae85 DH |
296 | vxc = check_ieee_exc(env, i, false, &vec_exc); |
297 | if (s || vxc) { | |
298 | break; | |
299 | } | |
300 | } | |
301 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
302 | *v1 = tmp; | |
303 | } | |
304 | ||
0987961d DH |
305 | typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); |
306 | static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, | |
307 | CPUS390XState *env, bool s, vop128_3_fn fn, | |
308 | uintptr_t retaddr) | |
309 | { | |
310 | const float128 a = s390_vec_read_float128(v2); | |
311 | const float128 b = s390_vec_read_float128(v3); | |
312 | uint8_t vxc, vec_exc = 0; | |
313 | S390Vector tmp = {}; | |
314 | ||
315 | s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); | |
316 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
317 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
318 | *v1 = tmp; | |
319 | } | |
320 | ||
321 | #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ | |
322 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
323 | CPUS390XState *env, uint32_t desc) \ | |
863b9507 DH |
324 | { \ |
325 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
326 | \ | |
0987961d | 327 | vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ |
3a0eae85 DH |
328 | } |
329 | ||
0987961d DH |
330 | #define DEF_GVEC_VOP3(NAME, OP) \ |
331 | DEF_GVEC_VOP3_B(NAME, OP, 32) \ | |
332 | DEF_GVEC_VOP3_B(NAME, OP, 64) \ | |
333 | DEF_GVEC_VOP3_B(NAME, OP, 128) | |
334 | ||
863b9507 DH |
335 | DEF_GVEC_VOP3(vfa, add) |
336 | DEF_GVEC_VOP3(vfs, sub) | |
337 | DEF_GVEC_VOP3(vfd, div) | |
338 | DEF_GVEC_VOP3(vfm, mul) | |
5b89f0fb | 339 | |
1c6b5b47 DH |
340 | static int wfc32(const S390Vector *v1, const S390Vector *v2, |
341 | CPUS390XState *env, bool signal, uintptr_t retaddr) | |
342 | { | |
343 | /* only the zero-indexed elements are compared */ | |
344 | const float32 a = s390_vec_read_float32(v1, 0); | |
345 | const float32 b = s390_vec_read_float32(v2, 0); | |
346 | uint8_t vxc, vec_exc = 0; | |
347 | int cmp; | |
348 | ||
349 | if (signal) { | |
350 | cmp = float32_compare(a, b, &env->fpu_status); | |
351 | } else { | |
352 | cmp = float32_compare_quiet(a, b, &env->fpu_status); | |
353 | } | |
354 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
355 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
356 | ||
357 | return float_comp_to_cc(env, cmp); | |
358 | } | |
359 | ||
5b89f0fb DH |
360 | static int wfc64(const S390Vector *v1, const S390Vector *v2, |
361 | CPUS390XState *env, bool signal, uintptr_t retaddr) | |
362 | { | |
363 | /* only the zero-indexed elements are compared */ | |
4da79375 DH |
364 | const float64 a = s390_vec_read_float64(v1, 0); |
365 | const float64 b = s390_vec_read_float64(v2, 0); | |
5b89f0fb DH |
366 | uint8_t vxc, vec_exc = 0; |
367 | int cmp; | |
368 | ||
369 | if (signal) { | |
370 | cmp = float64_compare(a, b, &env->fpu_status); | |
371 | } else { | |
372 | cmp = float64_compare_quiet(a, b, &env->fpu_status); | |
373 | } | |
374 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
375 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
376 | ||
377 | return float_comp_to_cc(env, cmp); | |
378 | } | |
379 | ||
1c6b5b47 DH |
380 | static int wfc128(const S390Vector *v1, const S390Vector *v2, |
381 | CPUS390XState *env, bool signal, uintptr_t retaddr) | |
382 | { | |
383 | /* only the zero-indexed elements are compared */ | |
384 | const float128 a = s390_vec_read_float128(v1); | |
385 | const float128 b = s390_vec_read_float128(v2); | |
386 | uint8_t vxc, vec_exc = 0; | |
387 | int cmp; | |
388 | ||
389 | if (signal) { | |
390 | cmp = float128_compare(a, b, &env->fpu_status); | |
391 | } else { | |
392 | cmp = float128_compare_quiet(a, b, &env->fpu_status); | |
393 | } | |
394 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
395 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
396 | ||
397 | return float_comp_to_cc(env, cmp); | |
398 | } | |
399 | ||
4da79375 DH |
400 | #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ |
401 | void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ | |
402 | CPUS390XState *env, uint32_t desc) \ | |
403 | { \ | |
404 | env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ | |
5b89f0fb DH |
405 | } |
406 | ||
4da79375 | 407 | #define DEF_GVEC_WFC(NAME, SIGNAL) \ |
1c6b5b47 DH |
408 | DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ |
409 | DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ | |
410 | DEF_GVEC_WFC_B(NAME, SIGNAL, 128) | |
4da79375 DH |
411 | |
412 | DEF_GVEC_WFC(wfc, false) | |
413 | DEF_GVEC_WFC(wfk, true) | |
2c806ab4 | 414 | |
e384332c DH |
415 | typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); |
416 | static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, | |
417 | CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) | |
418 | { | |
419 | uint8_t vxc, vec_exc = 0; | |
420 | S390Vector tmp = {}; | |
421 | int match = 0; | |
422 | int i; | |
423 | ||
424 | for (i = 0; i < 4; i++) { | |
425 | const float32 a = s390_vec_read_float32(v2, i); | |
426 | const float32 b = s390_vec_read_float32(v3, i); | |
427 | ||
428 | /* swap the order of the parameters, so we can use existing functions */ | |
429 | if (fn(b, a, &env->fpu_status)) { | |
430 | match++; | |
431 | s390_vec_write_element32(&tmp, i, -1u); | |
432 | } | |
433 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
434 | if (s || vxc) { | |
435 | break; | |
436 | } | |
437 | } | |
438 | ||
439 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
440 | *v1 = tmp; | |
441 | if (match) { | |
442 | return s || match == 4 ? 0 : 1; | |
443 | } | |
444 | return 3; | |
445 | } | |
446 | ||
0673ecdf | 447 | typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); |
2c806ab4 DH |
448 | static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
449 | CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) | |
450 | { | |
451 | uint8_t vxc, vec_exc = 0; | |
452 | S390Vector tmp = {}; | |
453 | int match = 0; | |
454 | int i; | |
455 | ||
456 | for (i = 0; i < 2; i++) { | |
64deb65a DH |
457 | const float64 a = s390_vec_read_float64(v2, i); |
458 | const float64 b = s390_vec_read_float64(v3, i); | |
2c806ab4 DH |
459 | |
460 | /* swap the order of the parameters, so we can use existing functions */ | |
461 | if (fn(b, a, &env->fpu_status)) { | |
462 | match++; | |
463 | s390_vec_write_element64(&tmp, i, -1ull); | |
464 | } | |
465 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
466 | if (s || vxc) { | |
467 | break; | |
468 | } | |
469 | } | |
470 | ||
471 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
472 | *v1 = tmp; | |
473 | if (match) { | |
474 | return s || match == 2 ? 0 : 1; | |
475 | } | |
476 | return 3; | |
477 | } | |
478 | ||
e384332c DH |
479 | typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); |
480 | static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, | |
481 | CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) | |
482 | { | |
483 | const float128 a = s390_vec_read_float128(v2); | |
484 | const float128 b = s390_vec_read_float128(v3); | |
485 | uint8_t vxc, vec_exc = 0; | |
486 | S390Vector tmp = {}; | |
487 | bool match = false; | |
488 | ||
489 | /* swap the order of the parameters, so we can use existing functions */ | |
490 | if (fn(b, a, &env->fpu_status)) { | |
491 | match = true; | |
492 | s390_vec_write_element64(&tmp, 0, -1ull); | |
493 | s390_vec_write_element64(&tmp, 1, -1ull); | |
494 | } | |
495 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
496 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
497 | *v1 = tmp; | |
498 | return match ? 0 : 3; | |
499 | } | |
500 | ||
64deb65a DH |
501 | #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ |
502 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
503 | CPUS390XState *env, uint32_t desc) \ | |
504 | { \ | |
505 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
e384332c DH |
506 | const bool sq = extract32(simd_data(desc), 2, 1); \ |
507 | vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ | |
64deb65a DH |
508 | \ |
509 | vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ | |
510 | } \ | |
511 | \ | |
512 | void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ | |
513 | CPUS390XState *env, uint32_t desc) \ | |
514 | { \ | |
515 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
e384332c DH |
516 | const bool sq = extract32(simd_data(desc), 2, 1); \ |
517 | vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ | |
64deb65a DH |
518 | \ |
519 | env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ | |
2c806ab4 DH |
520 | } |
521 | ||
64deb65a | 522 | #define DEF_GVEC_VFC(NAME, OP) \ |
e384332c DH |
523 | DEF_GVEC_VFC_B(NAME, OP, 32) \ |
524 | DEF_GVEC_VFC_B(NAME, OP, 64) \ | |
525 | DEF_GVEC_VFC_B(NAME, OP, 128) \ | |
2c806ab4 | 526 | |
64deb65a DH |
527 | DEF_GVEC_VFC(vfce, eq) |
528 | DEF_GVEC_VFC(vfch, lt) | |
529 | DEF_GVEC_VFC(vfche, le) | |
bb03fd84 | 530 | |
860b707b DH |
531 | void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, |
532 | uint32_t desc) | |
1a76e59d | 533 | { |
860b707b | 534 | const bool s = extract32(simd_data(desc), 3, 1); |
1a76e59d DH |
535 | uint8_t vxc, vec_exc = 0; |
536 | S390Vector tmp = {}; | |
537 | int i; | |
538 | ||
539 | for (i = 0; i < 2; i++) { | |
540 | /* load from even element */ | |
541 | const float32 a = s390_vec_read_element32(v2, i * 2); | |
542 | const uint64_t ret = float32_to_float64(a, &env->fpu_status); | |
543 | ||
544 | s390_vec_write_element64(&tmp, i, ret); | |
545 | /* indicate the source element */ | |
546 | vxc = check_ieee_exc(env, i * 2, false, &vec_exc); | |
547 | if (s || vxc) { | |
548 | break; | |
549 | } | |
550 | } | |
860b707b DH |
551 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); |
552 | *(S390Vector *)v1 = tmp; | |
1a76e59d | 553 | } |
4500ede4 | 554 | |
2e96005e DH |
555 | void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, |
556 | uint32_t desc) | |
557 | { | |
558 | /* load from even element */ | |
559 | const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), | |
560 | &env->fpu_status); | |
561 | uint8_t vxc, vec_exc = 0; | |
562 | ||
563 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
564 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); | |
565 | s390_vec_write_float128(v1, ret); | |
566 | } | |
567 | ||
977e43d9 DH |
568 | void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, |
569 | uint32_t desc) | |
4500ede4 | 570 | { |
977e43d9 DH |
571 | const uint8_t erm = extract32(simd_data(desc), 4, 4); |
572 | const bool s = extract32(simd_data(desc), 3, 1); | |
573 | const bool XxC = extract32(simd_data(desc), 2, 1); | |
4500ede4 DH |
574 | uint8_t vxc, vec_exc = 0; |
575 | S390Vector tmp = {}; | |
576 | int i, old_mode; | |
577 | ||
578 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
579 | for (i = 0; i < 2; i++) { | |
580 | float64 a = s390_vec_read_element64(v2, i); | |
581 | uint32_t ret = float64_to_float32(a, &env->fpu_status); | |
582 | ||
583 | /* place at even element */ | |
584 | s390_vec_write_element32(&tmp, i * 2, ret); | |
585 | /* indicate the source element */ | |
586 | vxc = check_ieee_exc(env, i, XxC, &vec_exc); | |
587 | if (s || vxc) { | |
588 | break; | |
589 | } | |
590 | } | |
591 | s390_restore_bfp_rounding_mode(env, old_mode); | |
977e43d9 DH |
592 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); |
593 | *(S390Vector *)v1 = tmp; | |
4500ede4 | 594 | } |
8d47d4d2 | 595 | |
9cbc8be0 DH |
596 | void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, |
597 | uint32_t desc) | |
598 | { | |
599 | const uint8_t erm = extract32(simd_data(desc), 4, 4); | |
600 | const bool XxC = extract32(simd_data(desc), 2, 1); | |
601 | uint8_t vxc, vec_exc = 0; | |
602 | int old_mode; | |
603 | float64 ret; | |
604 | ||
605 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
606 | ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); | |
607 | vxc = check_ieee_exc(env, 0, XxC, &vec_exc); | |
608 | s390_restore_bfp_rounding_mode(env, old_mode); | |
609 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); | |
610 | ||
611 | /* place at even element, odd element is unpredictable */ | |
612 | s390_vec_write_float64(v1, 0, ret); | |
613 | } | |
614 | ||
e257abc8 DH |
615 | static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
616 | const S390Vector *v4, CPUS390XState *env, bool s, int flags, | |
617 | uintptr_t retaddr) | |
618 | { | |
619 | uint8_t vxc, vec_exc = 0; | |
620 | S390Vector tmp = {}; | |
621 | int i; | |
622 | ||
623 | for (i = 0; i < 4; i++) { | |
624 | const float32 a = s390_vec_read_float32(v2, i); | |
625 | const float32 b = s390_vec_read_float32(v3, i); | |
626 | const float32 c = s390_vec_read_float32(v4, i); | |
627 | float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); | |
628 | ||
629 | s390_vec_write_float32(&tmp, i, ret); | |
630 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
631 | if (s || vxc) { | |
632 | break; | |
633 | } | |
634 | } | |
635 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
636 | *v1 = tmp; | |
637 | } | |
638 | ||
c64c5984 DH |
639 | static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
640 | const S390Vector *v4, CPUS390XState *env, bool s, int flags, | |
641 | uintptr_t retaddr) | |
642 | { | |
643 | uint8_t vxc, vec_exc = 0; | |
644 | S390Vector tmp = {}; | |
645 | int i; | |
646 | ||
647 | for (i = 0; i < 2; i++) { | |
34142ffd DH |
648 | const float64 a = s390_vec_read_float64(v2, i); |
649 | const float64 b = s390_vec_read_float64(v3, i); | |
650 | const float64 c = s390_vec_read_float64(v4, i); | |
651 | const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); | |
c64c5984 | 652 | |
34142ffd | 653 | s390_vec_write_float64(&tmp, i, ret); |
c64c5984 DH |
654 | vxc = check_ieee_exc(env, i, false, &vec_exc); |
655 | if (s || vxc) { | |
656 | break; | |
657 | } | |
658 | } | |
659 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
660 | *v1 = tmp; | |
661 | } | |
662 | ||
e257abc8 DH |
663 | static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
664 | const S390Vector *v4, CPUS390XState *env, bool s, int flags, | |
665 | uintptr_t retaddr) | |
666 | { | |
667 | const float128 a = s390_vec_read_float128(v2); | |
668 | const float128 b = s390_vec_read_float128(v3); | |
669 | const float128 c = s390_vec_read_float128(v4); | |
670 | uint8_t vxc, vec_exc = 0; | |
671 | float128 ret; | |
672 | ||
673 | ret = float128_muladd(a, b, c, flags, &env->fpu_status); | |
674 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
675 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
676 | s390_vec_write_float128(v1, ret); | |
677 | } | |
678 | ||
34142ffd DH |
679 | #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ |
680 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
681 | const void *v4, CPUS390XState *env, \ | |
682 | uint32_t desc) \ | |
683 | { \ | |
684 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
685 | \ | |
686 | vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ | |
c64c5984 DH |
687 | } |
688 | ||
34142ffd | 689 | #define DEF_GVEC_VFMA(NAME, FLAGS) \ |
e257abc8 DH |
690 | DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ |
691 | DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ | |
692 | DEF_GVEC_VFMA_B(NAME, FLAGS, 128) | |
c64c5984 | 693 | |
34142ffd DH |
694 | DEF_GVEC_VFMA(vfma, 0) |
695 | DEF_GVEC_VFMA(vfms, float_muladd_negate_c) | |
f0249730 DH |
696 | DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) |
697 | DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) | |
5938f20c | 698 | |
a38b5a0e DH |
699 | void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, |
700 | uint32_t desc) | |
701 | { | |
702 | uint16_t i3 = extract32(simd_data(desc), 4, 12); | |
703 | bool s = extract32(simd_data(desc), 3, 1); | |
704 | int i, match = 0; | |
705 | ||
706 | for (i = 0; i < 4; i++) { | |
707 | float32 a = s390_vec_read_float32(v2, i); | |
708 | ||
709 | if (float32_dcmask(env, a) & i3) { | |
710 | match++; | |
711 | s390_vec_write_element32(v1, i, -1u); | |
712 | } else { | |
713 | s390_vec_write_element32(v1, i, 0); | |
714 | } | |
715 | if (s) { | |
716 | break; | |
717 | } | |
718 | } | |
719 | ||
720 | if (match == 4 || (s && match)) { | |
721 | env->cc_op = 0; | |
722 | } else if (match) { | |
723 | env->cc_op = 1; | |
724 | } else { | |
725 | env->cc_op = 3; | |
726 | } | |
727 | } | |
728 | ||
622ebe64 DH |
729 | void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, |
730 | uint32_t desc) | |
83b955f9 | 731 | { |
622ebe64 DH |
732 | const uint16_t i3 = extract32(simd_data(desc), 4, 12); |
733 | const bool s = extract32(simd_data(desc), 3, 1); | |
83b955f9 DH |
734 | int i, match = 0; |
735 | ||
736 | for (i = 0; i < 2; i++) { | |
622ebe64 | 737 | const float64 a = s390_vec_read_float64(v2, i); |
83b955f9 DH |
738 | |
739 | if (float64_dcmask(env, a) & i3) { | |
740 | match++; | |
741 | s390_vec_write_element64(v1, i, -1ull); | |
742 | } else { | |
743 | s390_vec_write_element64(v1, i, 0); | |
744 | } | |
745 | if (s) { | |
746 | break; | |
747 | } | |
748 | } | |
749 | ||
622ebe64 DH |
750 | if (match == 2 || (s && match)) { |
751 | env->cc_op = 0; | |
752 | } else if (match) { | |
753 | env->cc_op = 1; | |
754 | } else { | |
755 | env->cc_op = 3; | |
83b955f9 | 756 | } |
83b955f9 | 757 | } |
a38b5a0e DH |
758 | |
759 | void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, | |
760 | uint32_t desc) | |
761 | { | |
762 | const float128 a = s390_vec_read_float128(v2); | |
763 | uint16_t i3 = extract32(simd_data(desc), 4, 12); | |
764 | ||
765 | if (float128_dcmask(env, a) & i3) { | |
766 | env->cc_op = 0; | |
767 | s390_vec_write_element64(v1, 0, -1ull); | |
768 | s390_vec_write_element64(v1, 1, -1ull); | |
769 | } else { | |
770 | env->cc_op = 3; | |
771 | s390_vec_write_element64(v1, 0, 0); | |
772 | s390_vec_write_element64(v1, 1, 0); | |
773 | } | |
774 | } | |
da480752 DH |
775 | |
776 | typedef enum S390MinMaxType { | |
777 | S390_MINMAX_TYPE_IEEE = 0, | |
778 | S390_MINMAX_TYPE_JAVA, | |
779 | S390_MINMAX_TYPE_C_MACRO, | |
780 | S390_MINMAX_TYPE_CPP, | |
781 | S390_MINMAX_TYPE_F, | |
782 | } S390MinMaxType; | |
783 | ||
784 | typedef enum S390MinMaxRes { | |
785 | S390_MINMAX_RES_MINMAX = 0, | |
786 | S390_MINMAX_RES_A, | |
787 | S390_MINMAX_RES_B, | |
788 | S390_MINMAX_RES_SILENCE_A, | |
789 | S390_MINMAX_RES_SILENCE_B, | |
790 | } S390MinMaxRes; | |
791 | ||
792 | static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, | |
793 | S390MinMaxType type, float_status *s) | |
794 | { | |
795 | const bool neg_a = dcmask_a & DCMASK_NEGATIVE; | |
796 | const bool nan_a = dcmask_a & DCMASK_NAN; | |
797 | const bool nan_b = dcmask_b & DCMASK_NAN; | |
798 | ||
799 | g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); | |
800 | ||
801 | if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { | |
802 | const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; | |
803 | const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; | |
804 | ||
805 | if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { | |
806 | s->float_exception_flags |= float_flag_invalid; | |
807 | } | |
808 | switch (type) { | |
809 | case S390_MINMAX_TYPE_JAVA: | |
810 | if (sig_a) { | |
811 | return S390_MINMAX_RES_SILENCE_A; | |
812 | } else if (sig_b) { | |
813 | return S390_MINMAX_RES_SILENCE_B; | |
814 | } | |
815 | return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; | |
816 | case S390_MINMAX_TYPE_F: | |
817 | return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; | |
818 | case S390_MINMAX_TYPE_C_MACRO: | |
819 | s->float_exception_flags |= float_flag_invalid; | |
820 | return S390_MINMAX_RES_B; | |
821 | case S390_MINMAX_TYPE_CPP: | |
822 | s->float_exception_flags |= float_flag_invalid; | |
823 | return S390_MINMAX_RES_A; | |
824 | default: | |
825 | g_assert_not_reached(); | |
826 | } | |
13c59eb0 | 827 | } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { |
da480752 DH |
828 | switch (type) { |
829 | case S390_MINMAX_TYPE_JAVA: | |
830 | return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; | |
831 | case S390_MINMAX_TYPE_C_MACRO: | |
832 | return S390_MINMAX_RES_B; | |
833 | case S390_MINMAX_TYPE_F: | |
834 | return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; | |
835 | case S390_MINMAX_TYPE_CPP: | |
836 | return S390_MINMAX_RES_A; | |
837 | default: | |
838 | g_assert_not_reached(); | |
839 | } | |
840 | } | |
841 | return S390_MINMAX_RES_MINMAX; | |
842 | } | |
843 | ||
844 | static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, | |
845 | S390MinMaxType type, float_status *s) | |
846 | { | |
847 | g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); | |
848 | ||
849 | if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { | |
850 | const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; | |
851 | const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; | |
852 | const bool nan_a = dcmask_a & DCMASK_NAN; | |
853 | const bool nan_b = dcmask_b & DCMASK_NAN; | |
854 | ||
855 | if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { | |
856 | s->float_exception_flags |= float_flag_invalid; | |
857 | } | |
858 | switch (type) { | |
859 | case S390_MINMAX_TYPE_JAVA: | |
860 | if (sig_a) { | |
861 | return S390_MINMAX_RES_SILENCE_A; | |
862 | } else if (sig_b) { | |
863 | return S390_MINMAX_RES_SILENCE_B; | |
864 | } | |
865 | return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; | |
866 | case S390_MINMAX_TYPE_F: | |
867 | return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; | |
868 | case S390_MINMAX_TYPE_C_MACRO: | |
869 | s->float_exception_flags |= float_flag_invalid; | |
870 | return S390_MINMAX_RES_B; | |
871 | case S390_MINMAX_TYPE_CPP: | |
872 | s->float_exception_flags |= float_flag_invalid; | |
873 | return S390_MINMAX_RES_A; | |
874 | default: | |
875 | g_assert_not_reached(); | |
876 | } | |
13c59eb0 | 877 | } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { |
da480752 DH |
878 | const bool neg_a = dcmask_a & DCMASK_NEGATIVE; |
879 | ||
880 | switch (type) { | |
881 | case S390_MINMAX_TYPE_JAVA: | |
882 | case S390_MINMAX_TYPE_F: | |
883 | return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; | |
884 | case S390_MINMAX_TYPE_C_MACRO: | |
885 | return S390_MINMAX_RES_B; | |
886 | case S390_MINMAX_TYPE_CPP: | |
887 | return S390_MINMAX_RES_A; | |
888 | default: | |
889 | g_assert_not_reached(); | |
890 | } | |
891 | } | |
892 | return S390_MINMAX_RES_MINMAX; | |
893 | } | |
894 | ||
895 | static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, | |
896 | S390MinMaxType type, bool is_min, | |
897 | float_status *s) | |
898 | { | |
899 | return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : | |
900 | vfmax_res(dcmask_a, dcmask_b, type, s); | |
901 | } | |
902 | ||
903 | static void vfminmax32(S390Vector *v1, const S390Vector *v2, | |
904 | const S390Vector *v3, CPUS390XState *env, | |
905 | S390MinMaxType type, bool is_min, bool is_abs, bool se, | |
906 | uintptr_t retaddr) | |
907 | { | |
908 | float_status *s = &env->fpu_status; | |
909 | uint8_t vxc, vec_exc = 0; | |
910 | S390Vector tmp = {}; | |
911 | int i; | |
912 | ||
913 | for (i = 0; i < 4; i++) { | |
914 | float32 a = s390_vec_read_float32(v2, i); | |
915 | float32 b = s390_vec_read_float32(v3, i); | |
916 | float32 result; | |
917 | ||
918 | if (type != S390_MINMAX_TYPE_IEEE) { | |
919 | S390MinMaxRes res; | |
920 | ||
921 | if (is_abs) { | |
922 | a = float32_abs(a); | |
923 | b = float32_abs(b); | |
924 | } | |
925 | ||
926 | res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), | |
927 | type, is_min, s); | |
928 | switch (res) { | |
929 | case S390_MINMAX_RES_MINMAX: | |
930 | result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); | |
931 | break; | |
932 | case S390_MINMAX_RES_A: | |
933 | result = a; | |
934 | break; | |
935 | case S390_MINMAX_RES_B: | |
936 | result = b; | |
937 | break; | |
938 | case S390_MINMAX_RES_SILENCE_A: | |
939 | result = float32_silence_nan(a, s); | |
940 | break; | |
941 | case S390_MINMAX_RES_SILENCE_B: | |
942 | result = float32_silence_nan(b, s); | |
943 | break; | |
944 | default: | |
945 | g_assert_not_reached(); | |
946 | } | |
947 | } else if (!is_abs) { | |
948 | result = is_min ? float32_minnum(a, b, &env->fpu_status) : | |
949 | float32_maxnum(a, b, &env->fpu_status); | |
950 | } else { | |
951 | result = is_min ? float32_minnummag(a, b, &env->fpu_status) : | |
952 | float32_maxnummag(a, b, &env->fpu_status); | |
953 | } | |
954 | ||
955 | s390_vec_write_float32(&tmp, i, result); | |
956 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
957 | if (se || vxc) { | |
958 | break; | |
959 | } | |
960 | } | |
961 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
962 | *v1 = tmp; | |
963 | } | |
964 | ||
965 | static void vfminmax64(S390Vector *v1, const S390Vector *v2, | |
966 | const S390Vector *v3, CPUS390XState *env, | |
967 | S390MinMaxType type, bool is_min, bool is_abs, bool se, | |
968 | uintptr_t retaddr) | |
969 | { | |
970 | float_status *s = &env->fpu_status; | |
971 | uint8_t vxc, vec_exc = 0; | |
972 | S390Vector tmp = {}; | |
973 | int i; | |
974 | ||
975 | for (i = 0; i < 2; i++) { | |
976 | float64 a = s390_vec_read_float64(v2, i); | |
977 | float64 b = s390_vec_read_float64(v3, i); | |
978 | float64 result; | |
979 | ||
980 | if (type != S390_MINMAX_TYPE_IEEE) { | |
981 | S390MinMaxRes res; | |
982 | ||
983 | if (is_abs) { | |
984 | a = float64_abs(a); | |
985 | b = float64_abs(b); | |
986 | } | |
987 | ||
988 | res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), | |
989 | type, is_min, s); | |
990 | switch (res) { | |
991 | case S390_MINMAX_RES_MINMAX: | |
992 | result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); | |
993 | break; | |
994 | case S390_MINMAX_RES_A: | |
995 | result = a; | |
996 | break; | |
997 | case S390_MINMAX_RES_B: | |
998 | result = b; | |
999 | break; | |
1000 | case S390_MINMAX_RES_SILENCE_A: | |
1001 | result = float64_silence_nan(a, s); | |
1002 | break; | |
1003 | case S390_MINMAX_RES_SILENCE_B: | |
1004 | result = float64_silence_nan(b, s); | |
1005 | break; | |
1006 | default: | |
1007 | g_assert_not_reached(); | |
1008 | } | |
1009 | } else if (!is_abs) { | |
1010 | result = is_min ? float64_minnum(a, b, &env->fpu_status) : | |
1011 | float64_maxnum(a, b, &env->fpu_status); | |
1012 | } else { | |
1013 | result = is_min ? float64_minnummag(a, b, &env->fpu_status) : | |
1014 | float64_maxnummag(a, b, &env->fpu_status); | |
1015 | } | |
1016 | ||
1017 | s390_vec_write_float64(&tmp, i, result); | |
1018 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
1019 | if (se || vxc) { | |
1020 | break; | |
1021 | } | |
1022 | } | |
1023 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
1024 | *v1 = tmp; | |
1025 | } | |
1026 | ||
1027 | static void vfminmax128(S390Vector *v1, const S390Vector *v2, | |
1028 | const S390Vector *v3, CPUS390XState *env, | |
1029 | S390MinMaxType type, bool is_min, bool is_abs, bool se, | |
1030 | uintptr_t retaddr) | |
1031 | { | |
1032 | float128 a = s390_vec_read_float128(v2); | |
1033 | float128 b = s390_vec_read_float128(v3); | |
1034 | float_status *s = &env->fpu_status; | |
1035 | uint8_t vxc, vec_exc = 0; | |
1036 | float128 result; | |
1037 | ||
1038 | if (type != S390_MINMAX_TYPE_IEEE) { | |
1039 | S390MinMaxRes res; | |
1040 | ||
1041 | if (is_abs) { | |
1042 | a = float128_abs(a); | |
1043 | b = float128_abs(b); | |
1044 | } | |
1045 | ||
1046 | res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), | |
1047 | type, is_min, s); | |
1048 | switch (res) { | |
1049 | case S390_MINMAX_RES_MINMAX: | |
1050 | result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); | |
1051 | break; | |
1052 | case S390_MINMAX_RES_A: | |
1053 | result = a; | |
1054 | break; | |
1055 | case S390_MINMAX_RES_B: | |
1056 | result = b; | |
1057 | break; | |
1058 | case S390_MINMAX_RES_SILENCE_A: | |
1059 | result = float128_silence_nan(a, s); | |
1060 | break; | |
1061 | case S390_MINMAX_RES_SILENCE_B: | |
1062 | result = float128_silence_nan(b, s); | |
1063 | break; | |
1064 | default: | |
1065 | g_assert_not_reached(); | |
1066 | } | |
1067 | } else if (!is_abs) { | |
1068 | result = is_min ? float128_minnum(a, b, &env->fpu_status) : | |
1069 | float128_maxnum(a, b, &env->fpu_status); | |
1070 | } else { | |
1071 | result = is_min ? float128_minnummag(a, b, &env->fpu_status) : | |
1072 | float128_maxnummag(a, b, &env->fpu_status); | |
1073 | } | |
1074 | ||
1075 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
1076 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
1077 | s390_vec_write_float128(v1, result); | |
1078 | } | |
1079 | ||
1080 | #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ | |
1081 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
1082 | CPUS390XState *env, uint32_t desc) \ | |
1083 | { \ | |
1084 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
1085 | uint8_t type = extract32(simd_data(desc), 4, 4); \ | |
1086 | bool is_abs = false; \ | |
1087 | \ | |
1088 | if (type >= 8) { \ | |
1089 | is_abs = true; \ | |
1090 | type -= 8; \ | |
1091 | } \ | |
1092 | \ | |
1093 | vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ | |
1094 | } | |
1095 | ||
1096 | #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ | |
1097 | DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ | |
1098 | DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ | |
1099 | DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) | |
1100 | ||
1101 | DEF_GVEC_VFMINMAX(vfmax, false) | |
1102 | DEF_GVEC_VFMINMAX(vfmin, true) |