]>
Commit | Line | Data |
---|---|---|
3a0eae85 DH |
1 | /* |
2 | * QEMU TCG support -- s390x vector floating point instruction support | |
3 | * | |
4 | * Copyright (C) 2019 Red Hat Inc | |
5 | * | |
6 | * Authors: | |
7 | * David Hildenbrand <david@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | #include "qemu/osdep.h" | |
13 | #include "qemu-common.h" | |
14 | #include "cpu.h" | |
15 | #include "internal.h" | |
16 | #include "vec.h" | |
17 | #include "tcg_s390x.h" | |
18 | #include "tcg/tcg-gvec-desc.h" | |
19 | #include "exec/exec-all.h" | |
20 | #include "exec/helper-proto.h" | |
21 | #include "fpu/softfloat.h" | |
22 | ||
23 | #define VIC_INVALID 0x1 | |
24 | #define VIC_DIVBYZERO 0x2 | |
25 | #define VIC_OVERFLOW 0x3 | |
26 | #define VIC_UNDERFLOW 0x4 | |
27 | #define VIC_INEXACT 0x5 | |
28 | ||
29 | /* returns the VEX. If the VEX is 0, there is no trap */ | |
30 | static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, | |
31 | uint8_t *vec_exc) | |
32 | { | |
33 | uint8_t vece_exc = 0, trap_exc; | |
34 | unsigned qemu_exc; | |
35 | ||
36 | /* Retrieve and clear the softfloat exceptions */ | |
37 | qemu_exc = env->fpu_status.float_exception_flags; | |
38 | if (qemu_exc == 0) { | |
39 | return 0; | |
40 | } | |
41 | env->fpu_status.float_exception_flags = 0; | |
42 | ||
43 | vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); | |
44 | ||
45 | /* Add them to the vector-wide s390x exception bits */ | |
46 | *vec_exc |= vece_exc; | |
47 | ||
48 | /* Check for traps and construct the VXC */ | |
49 | trap_exc = vece_exc & env->fpc >> 24; | |
50 | if (trap_exc) { | |
51 | if (trap_exc & S390_IEEE_MASK_INVALID) { | |
52 | return enr << 4 | VIC_INVALID; | |
53 | } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { | |
54 | return enr << 4 | VIC_DIVBYZERO; | |
55 | } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { | |
56 | return enr << 4 | VIC_OVERFLOW; | |
57 | } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { | |
58 | return enr << 4 | VIC_UNDERFLOW; | |
59 | } else if (!XxC) { | |
60 | g_assert(trap_exc & S390_IEEE_MASK_INEXACT); | |
61 | /* inexact has lowest priority on traps */ | |
62 | return enr << 4 | VIC_INEXACT; | |
63 | } | |
64 | } | |
65 | return 0; | |
66 | } | |
67 | ||
68 | static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, | |
69 | uintptr_t retaddr) | |
70 | { | |
71 | if (vxc) { | |
72 | /* on traps, the fpc flags are not updated, instruction is suppressed */ | |
73 | tcg_s390_vector_exception(env, vxc, retaddr); | |
74 | } | |
75 | if (vec_exc) { | |
76 | /* indicate exceptions for all elements combined */ | |
77 | env->fpc |= vec_exc << 16; | |
78 | } | |
79 | } | |
80 | ||
863b9507 DH |
81 | static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) |
82 | { | |
83 | return make_float64(s390_vec_read_element64(v, enr)); | |
84 | } | |
85 | ||
86 | static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) | |
87 | { | |
88 | return s390_vec_write_element64(v, enr, data); | |
89 | } | |
90 | ||
21bd6ea2 | 91 | typedef float64 (*vop64_2_fn)(float64 a, float_status *s); |
bb03fd84 DH |
92 | static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, |
93 | bool s, bool XxC, uint8_t erm, vop64_2_fn fn, | |
94 | uintptr_t retaddr) | |
95 | { | |
96 | uint8_t vxc, vec_exc = 0; | |
97 | S390Vector tmp = {}; | |
98 | int i, old_mode; | |
99 | ||
100 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
101 | for (i = 0; i < 2; i++) { | |
21bd6ea2 | 102 | const float64 a = s390_vec_read_float64(v2, i); |
bb03fd84 | 103 | |
21bd6ea2 | 104 | s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); |
bb03fd84 DH |
105 | vxc = check_ieee_exc(env, i, XxC, &vec_exc); |
106 | if (s || vxc) { | |
107 | break; | |
108 | } | |
109 | } | |
110 | s390_restore_bfp_rounding_mode(env, old_mode); | |
111 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
112 | *v1 = tmp; | |
113 | } | |
114 | ||
21bd6ea2 DH |
115 | static float64 vcdg64(float64 a, float_status *s) |
116 | { | |
117 | return int64_to_float64(a, s); | |
118 | } | |
119 | ||
120 | static float64 vcdlg64(float64 a, float_status *s) | |
121 | { | |
122 | return uint64_to_float64(a, s); | |
123 | } | |
124 | ||
125 | static float64 vcgd64(float64 a, float_status *s) | |
126 | { | |
127 | const float64 tmp = float64_to_int64(a, s); | |
128 | ||
129 | return float64_is_any_nan(a) ? INT64_MIN : tmp; | |
130 | } | |
131 | ||
132 | static float64 vclgd64(float64 a, float_status *s) | |
133 | { | |
134 | const float64 tmp = float64_to_uint64(a, s); | |
135 | ||
136 | return float64_is_any_nan(a) ? 0 : tmp; | |
137 | } | |
138 | ||
139 | #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ | |
140 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ | |
141 | uint32_t desc) \ | |
142 | { \ | |
143 | const uint8_t erm = extract32(simd_data(desc), 4, 4); \ | |
144 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
145 | const bool XxC = extract32(simd_data(desc), 2, 1); \ | |
146 | \ | |
147 | vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ | |
148 | } | |
149 | ||
150 | #define DEF_GVEC_VOP2_64(NAME) \ | |
151 | DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) | |
152 | ||
153 | #define DEF_GVEC_VOP2(NAME, OP) \ | |
154 | DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) | |
155 | ||
156 | DEF_GVEC_VOP2_64(vcdg) | |
157 | DEF_GVEC_VOP2_64(vcdlg) | |
158 | DEF_GVEC_VOP2_64(vcgd) | |
159 | DEF_GVEC_VOP2_64(vclgd) | |
160 | DEF_GVEC_VOP2(vfi, round_to_int) | |
161 | DEF_GVEC_VOP2(vfsq, sqrt) | |
162 | ||
863b9507 | 163 | typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); |
3a0eae85 DH |
164 | static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
165 | CPUS390XState *env, bool s, vop64_3_fn fn, | |
166 | uintptr_t retaddr) | |
167 | { | |
168 | uint8_t vxc, vec_exc = 0; | |
169 | S390Vector tmp = {}; | |
170 | int i; | |
171 | ||
172 | for (i = 0; i < 2; i++) { | |
863b9507 DH |
173 | const float64 a = s390_vec_read_float64(v2, i); |
174 | const float64 b = s390_vec_read_float64(v3, i); | |
3a0eae85 | 175 | |
863b9507 | 176 | s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); |
3a0eae85 DH |
177 | vxc = check_ieee_exc(env, i, false, &vec_exc); |
178 | if (s || vxc) { | |
179 | break; | |
180 | } | |
181 | } | |
182 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
183 | *v1 = tmp; | |
184 | } | |
185 | ||
863b9507 DH |
186 | #define DEF_GVEC_VOP3(NAME, OP) \ |
187 | void HELPER(gvec_##NAME##64)(void *v1, const void *v2, const void *v3, \ | |
188 | CPUS390XState *env, uint32_t desc) \ | |
189 | { \ | |
190 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
191 | \ | |
192 | vop64_3(v1, v2, v3, env, se, float64_##OP, GETPC()); \ | |
3a0eae85 DH |
193 | } |
194 | ||
863b9507 DH |
195 | DEF_GVEC_VOP3(vfa, add) |
196 | DEF_GVEC_VOP3(vfs, sub) | |
197 | DEF_GVEC_VOP3(vfd, div) | |
198 | DEF_GVEC_VOP3(vfm, mul) | |
5b89f0fb DH |
199 | |
200 | static int wfc64(const S390Vector *v1, const S390Vector *v2, | |
201 | CPUS390XState *env, bool signal, uintptr_t retaddr) | |
202 | { | |
203 | /* only the zero-indexed elements are compared */ | |
4da79375 DH |
204 | const float64 a = s390_vec_read_float64(v1, 0); |
205 | const float64 b = s390_vec_read_float64(v2, 0); | |
5b89f0fb DH |
206 | uint8_t vxc, vec_exc = 0; |
207 | int cmp; | |
208 | ||
209 | if (signal) { | |
210 | cmp = float64_compare(a, b, &env->fpu_status); | |
211 | } else { | |
212 | cmp = float64_compare_quiet(a, b, &env->fpu_status); | |
213 | } | |
214 | vxc = check_ieee_exc(env, 0, false, &vec_exc); | |
215 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
216 | ||
217 | return float_comp_to_cc(env, cmp); | |
218 | } | |
219 | ||
4da79375 DH |
220 | #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ |
221 | void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ | |
222 | CPUS390XState *env, uint32_t desc) \ | |
223 | { \ | |
224 | env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ | |
5b89f0fb DH |
225 | } |
226 | ||
4da79375 DH |
227 | #define DEF_GVEC_WFC(NAME, SIGNAL) \ |
228 | DEF_GVEC_WFC_B(NAME, SIGNAL, 64) | |
229 | ||
230 | DEF_GVEC_WFC(wfc, false) | |
231 | DEF_GVEC_WFC(wfk, true) | |
2c806ab4 | 232 | |
0673ecdf | 233 | typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); |
2c806ab4 DH |
234 | static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
235 | CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) | |
236 | { | |
237 | uint8_t vxc, vec_exc = 0; | |
238 | S390Vector tmp = {}; | |
239 | int match = 0; | |
240 | int i; | |
241 | ||
242 | for (i = 0; i < 2; i++) { | |
64deb65a DH |
243 | const float64 a = s390_vec_read_float64(v2, i); |
244 | const float64 b = s390_vec_read_float64(v3, i); | |
2c806ab4 DH |
245 | |
246 | /* swap the order of the parameters, so we can use existing functions */ | |
247 | if (fn(b, a, &env->fpu_status)) { | |
248 | match++; | |
249 | s390_vec_write_element64(&tmp, i, -1ull); | |
250 | } | |
251 | vxc = check_ieee_exc(env, i, false, &vec_exc); | |
252 | if (s || vxc) { | |
253 | break; | |
254 | } | |
255 | } | |
256 | ||
257 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
258 | *v1 = tmp; | |
259 | if (match) { | |
260 | return s || match == 2 ? 0 : 1; | |
261 | } | |
262 | return 3; | |
263 | } | |
264 | ||
64deb65a DH |
265 | #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ |
266 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
267 | CPUS390XState *env, uint32_t desc) \ | |
268 | { \ | |
269 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
270 | vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \ | |
271 | \ | |
272 | vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ | |
273 | } \ | |
274 | \ | |
275 | void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ | |
276 | CPUS390XState *env, uint32_t desc) \ | |
277 | { \ | |
278 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
279 | vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \ | |
280 | \ | |
281 | env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ | |
2c806ab4 DH |
282 | } |
283 | ||
64deb65a DH |
284 | #define DEF_GVEC_VFC(NAME, OP) \ |
285 | DEF_GVEC_VFC_B(NAME, OP, 64) | |
2c806ab4 | 286 | |
64deb65a DH |
287 | DEF_GVEC_VFC(vfce, eq) |
288 | DEF_GVEC_VFC(vfch, lt) | |
289 | DEF_GVEC_VFC(vfche, le) | |
bb03fd84 | 290 | |
860b707b DH |
291 | void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, |
292 | uint32_t desc) | |
1a76e59d | 293 | { |
860b707b | 294 | const bool s = extract32(simd_data(desc), 3, 1); |
1a76e59d DH |
295 | uint8_t vxc, vec_exc = 0; |
296 | S390Vector tmp = {}; | |
297 | int i; | |
298 | ||
299 | for (i = 0; i < 2; i++) { | |
300 | /* load from even element */ | |
301 | const float32 a = s390_vec_read_element32(v2, i * 2); | |
302 | const uint64_t ret = float32_to_float64(a, &env->fpu_status); | |
303 | ||
304 | s390_vec_write_element64(&tmp, i, ret); | |
305 | /* indicate the source element */ | |
306 | vxc = check_ieee_exc(env, i * 2, false, &vec_exc); | |
307 | if (s || vxc) { | |
308 | break; | |
309 | } | |
310 | } | |
860b707b DH |
311 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); |
312 | *(S390Vector *)v1 = tmp; | |
1a76e59d | 313 | } |
4500ede4 | 314 | |
977e43d9 DH |
315 | void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, |
316 | uint32_t desc) | |
4500ede4 | 317 | { |
977e43d9 DH |
318 | const uint8_t erm = extract32(simd_data(desc), 4, 4); |
319 | const bool s = extract32(simd_data(desc), 3, 1); | |
320 | const bool XxC = extract32(simd_data(desc), 2, 1); | |
4500ede4 DH |
321 | uint8_t vxc, vec_exc = 0; |
322 | S390Vector tmp = {}; | |
323 | int i, old_mode; | |
324 | ||
325 | old_mode = s390_swap_bfp_rounding_mode(env, erm); | |
326 | for (i = 0; i < 2; i++) { | |
327 | float64 a = s390_vec_read_element64(v2, i); | |
328 | uint32_t ret = float64_to_float32(a, &env->fpu_status); | |
329 | ||
330 | /* place at even element */ | |
331 | s390_vec_write_element32(&tmp, i * 2, ret); | |
332 | /* indicate the source element */ | |
333 | vxc = check_ieee_exc(env, i, XxC, &vec_exc); | |
334 | if (s || vxc) { | |
335 | break; | |
336 | } | |
337 | } | |
338 | s390_restore_bfp_rounding_mode(env, old_mode); | |
977e43d9 DH |
339 | handle_ieee_exc(env, vxc, vec_exc, GETPC()); |
340 | *(S390Vector *)v1 = tmp; | |
4500ede4 | 341 | } |
8d47d4d2 | 342 | |
c64c5984 DH |
343 | static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, |
344 | const S390Vector *v4, CPUS390XState *env, bool s, int flags, | |
345 | uintptr_t retaddr) | |
346 | { | |
347 | uint8_t vxc, vec_exc = 0; | |
348 | S390Vector tmp = {}; | |
349 | int i; | |
350 | ||
351 | for (i = 0; i < 2; i++) { | |
34142ffd DH |
352 | const float64 a = s390_vec_read_float64(v2, i); |
353 | const float64 b = s390_vec_read_float64(v3, i); | |
354 | const float64 c = s390_vec_read_float64(v4, i); | |
355 | const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); | |
c64c5984 | 356 | |
34142ffd | 357 | s390_vec_write_float64(&tmp, i, ret); |
c64c5984 DH |
358 | vxc = check_ieee_exc(env, i, false, &vec_exc); |
359 | if (s || vxc) { | |
360 | break; | |
361 | } | |
362 | } | |
363 | handle_ieee_exc(env, vxc, vec_exc, retaddr); | |
364 | *v1 = tmp; | |
365 | } | |
366 | ||
34142ffd DH |
367 | #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ |
368 | void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ | |
369 | const void *v4, CPUS390XState *env, \ | |
370 | uint32_t desc) \ | |
371 | { \ | |
372 | const bool se = extract32(simd_data(desc), 3, 1); \ | |
373 | \ | |
374 | vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ | |
c64c5984 DH |
375 | } |
376 | ||
34142ffd DH |
377 | #define DEF_GVEC_VFMA(NAME, FLAGS) \ |
378 | DEF_GVEC_VFMA_B(NAME, FLAGS, 64) | |
c64c5984 | 379 | |
34142ffd DH |
380 | DEF_GVEC_VFMA(vfma, 0) |
381 | DEF_GVEC_VFMA(vfms, float_muladd_negate_c) | |
5938f20c | 382 | |
622ebe64 DH |
383 | void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, |
384 | uint32_t desc) | |
83b955f9 | 385 | { |
622ebe64 DH |
386 | const uint16_t i3 = extract32(simd_data(desc), 4, 12); |
387 | const bool s = extract32(simd_data(desc), 3, 1); | |
83b955f9 DH |
388 | int i, match = 0; |
389 | ||
390 | for (i = 0; i < 2; i++) { | |
622ebe64 | 391 | const float64 a = s390_vec_read_float64(v2, i); |
83b955f9 DH |
392 | |
393 | if (float64_dcmask(env, a) & i3) { | |
394 | match++; | |
395 | s390_vec_write_element64(v1, i, -1ull); | |
396 | } else { | |
397 | s390_vec_write_element64(v1, i, 0); | |
398 | } | |
399 | if (s) { | |
400 | break; | |
401 | } | |
402 | } | |
403 | ||
622ebe64 DH |
404 | if (match == 2 || (s && match)) { |
405 | env->cc_op = 0; | |
406 | } else if (match) { | |
407 | env->cc_op = 1; | |
408 | } else { | |
409 | env->cc_op = 3; | |
83b955f9 | 410 | } |
83b955f9 | 411 | } |