]>
Commit | Line | Data |
---|---|---|
b2391681 TS |
1 | /* |
2 | * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | #include "qemu/osdep.h" | |
19 | #include "fpu/softfloat.h" | |
20 | #include "cpu.h" | |
21 | #include "fma_emu.h" | |
22 | #include "arch.h" | |
23 | #include "macros.h" | |
24 | ||
25 | #define SF_BIAS 127 | |
26 | #define SF_MAXEXP 254 | |
27 | #define SF_MANTBITS 23 | |
28 | #define float32_nan make_float32(0xffffffff) | |
29 | ||
30 | #define BITS_MASK_8 0x5555555555555555ULL | |
31 | #define PAIR_MASK_8 0x3333333333333333ULL | |
32 | #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL | |
33 | #define BYTE_MASK_8 0x00ff00ff00ff00ffULL | |
34 | #define HALF_MASK_8 0x0000ffff0000ffffULL | |
35 | #define WORD_MASK_8 0x00000000ffffffffULL | |
36 | ||
37 | uint64_t interleave(uint32_t odd, uint32_t even) | |
38 | { | |
39 | /* Convert to long long */ | |
40 | uint64_t myodd = odd; | |
41 | uint64_t myeven = even; | |
42 | /* First, spread bits out */ | |
43 | myodd = (myodd | (myodd << 16)) & HALF_MASK_8; | |
44 | myeven = (myeven | (myeven << 16)) & HALF_MASK_8; | |
45 | myodd = (myodd | (myodd << 8)) & BYTE_MASK_8; | |
46 | myeven = (myeven | (myeven << 8)) & BYTE_MASK_8; | |
47 | myodd = (myodd | (myodd << 4)) & NYBL_MASK_8; | |
48 | myeven = (myeven | (myeven << 4)) & NYBL_MASK_8; | |
49 | myodd = (myodd | (myodd << 2)) & PAIR_MASK_8; | |
50 | myeven = (myeven | (myeven << 2)) & PAIR_MASK_8; | |
51 | myodd = (myodd | (myodd << 1)) & BITS_MASK_8; | |
52 | myeven = (myeven | (myeven << 1)) & BITS_MASK_8; | |
53 | /* Now OR together */ | |
54 | return myeven | (myodd << 1); | |
55 | } | |
56 | ||
57 | uint64_t deinterleave(uint64_t src) | |
58 | { | |
59 | /* Get odd and even bits */ | |
60 | uint64_t myodd = ((src >> 1) & BITS_MASK_8); | |
61 | uint64_t myeven = (src & BITS_MASK_8); | |
62 | ||
63 | /* Unspread bits */ | |
64 | myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8; | |
65 | myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8; | |
66 | myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8; | |
67 | myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8; | |
68 | myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8; | |
69 | myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8; | |
70 | myeven = (myeven | (myeven >> 8)) & HALF_MASK_8; | |
71 | myodd = (myodd | (myodd >> 8)) & HALF_MASK_8; | |
72 | myeven = (myeven | (myeven >> 16)) & WORD_MASK_8; | |
73 | myodd = (myodd | (myodd >> 16)) & WORD_MASK_8; | |
74 | ||
75 | /* Return odd bits in upper half */ | |
76 | return myeven | (myodd << 32); | |
77 | } | |
78 | ||
79 | uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c) | |
80 | { | |
81 | uint64_t tmpa, tmpb, tmpc; | |
82 | tmpa = fGETUWORD(0, a); | |
83 | tmpb = fGETUWORD(0, b); | |
84 | tmpc = tmpa + tmpb + c; | |
85 | tmpa = fGETUWORD(1, a); | |
86 | tmpb = fGETUWORD(1, b); | |
87 | tmpc = tmpa + tmpb + fGETUWORD(1, tmpc); | |
88 | tmpc = fGETUWORD(1, tmpc); | |
89 | return tmpc; | |
90 | } | |
91 | ||
92 | int32_t conv_round(int32_t a, int n) | |
93 | { | |
94 | int64_t val; | |
95 | ||
96 | if (n == 0) { | |
97 | val = a; | |
98 | } else if ((a & ((1 << (n - 1)) - 1)) == 0) { /* N-1..0 all zero? */ | |
99 | /* Add LSB from int part */ | |
100 | val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1)); | |
101 | } else { | |
102 | val = ((fSE32_64(a)) + (1 << (n - 1))); | |
103 | } | |
104 | ||
105 | val = val >> n; | |
106 | return (int32_t)val; | |
107 | } | |
108 | ||
109 | /* Floating Point Stuff */ | |
110 | ||
111 | static const int softfloat_roundingmodes[] = { | |
112 | float_round_nearest_even, | |
113 | float_round_to_zero, | |
114 | float_round_down, | |
115 | float_round_up, | |
116 | }; | |
117 | ||
118 | void arch_fpop_start(CPUHexagonState *env) | |
119 | { | |
120 | set_float_exception_flags(0, &env->fp_status); | |
121 | set_float_rounding_mode( | |
122 | softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)], | |
123 | &env->fp_status); | |
124 | } | |
125 | ||
126 | #ifdef CONFIG_USER_ONLY | |
127 | /* | |
128 | * Hexagon Linux kernel only sets the relevant bits in USR (user status | |
129 | * register). The exception isn't raised to user mode, so we don't | |
130 | * model it in qemu user mode. | |
131 | */ | |
132 | #define RAISE_FP_EXCEPTION do {} while (0) | |
133 | #endif | |
134 | ||
135 | #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \ | |
136 | do { \ | |
137 | if (flags & FLAG) { \ | |
138 | if (GET_USR_FIELD(USR_##MYF) == 0) { \ | |
139 | SET_USR_FIELD(USR_##MYF, 1); \ | |
140 | if (GET_USR_FIELD(USR_##MYE)) { \ | |
141 | RAISE_FP_EXCEPTION; \ | |
142 | } \ | |
143 | } \ | |
144 | } \ | |
145 | } while (0) | |
146 | ||
147 | void arch_fpop_end(CPUHexagonState *env) | |
148 | { | |
149 | int flags = get_float_exception_flags(&env->fp_status); | |
150 | if (flags != 0) { | |
151 | SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); | |
152 | SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE); | |
153 | SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE); | |
154 | SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE); | |
155 | SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE); | |
156 | } | |
157 | } | |
158 | ||
159 | static float32 float32_mul_pow2(float32 a, uint32_t p, float_status *fp_status) | |
160 | { | |
161 | float32 b = make_float32((SF_BIAS + p) << SF_MANTBITS); | |
162 | return float32_mul(a, b, fp_status); | |
163 | } | |
164 | ||
165 | int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, | |
166 | float_status *fp_status) | |
167 | { | |
168 | int n_exp; | |
169 | int d_exp; | |
170 | int ret = 0; | |
171 | float32 RsV, RtV, RdV; | |
172 | int PeV = 0; | |
173 | RsV = *Rs; | |
174 | RtV = *Rt; | |
175 | if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) { | |
176 | if (extract32(RsV & RtV, 22, 1) == 0) { | |
177 | float_raise(float_flag_invalid, fp_status); | |
178 | } | |
179 | RdV = RsV = RtV = float32_nan; | |
180 | } else if (float32_is_any_nan(RsV)) { | |
181 | if (extract32(RsV, 22, 1) == 0) { | |
182 | float_raise(float_flag_invalid, fp_status); | |
183 | } | |
184 | RdV = RsV = RtV = float32_nan; | |
185 | } else if (float32_is_any_nan(RtV)) { | |
186 | /* or put NaN in num/den fixup? */ | |
187 | if (extract32(RtV, 22, 1) == 0) { | |
188 | float_raise(float_flag_invalid, fp_status); | |
189 | } | |
190 | RdV = RsV = RtV = float32_nan; | |
191 | } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) { | |
192 | /* or put Inf in num fixup? */ | |
193 | RdV = RsV = RtV = float32_nan; | |
194 | float_raise(float_flag_invalid, fp_status); | |
195 | } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) { | |
196 | /* or put zero in num fixup? */ | |
197 | RdV = RsV = RtV = float32_nan; | |
198 | float_raise(float_flag_invalid, fp_status); | |
199 | } else if (float32_is_zero(RtV)) { | |
200 | /* or put Inf in num fixup? */ | |
201 | uint8_t RsV_sign = float32_is_neg(RsV); | |
202 | uint8_t RtV_sign = float32_is_neg(RtV); | |
203 | RsV = infinite_float32(RsV_sign ^ RtV_sign); | |
204 | RtV = float32_one; | |
205 | RdV = float32_one; | |
206 | if (float32_is_infinity(RsV)) { | |
207 | float_raise(float_flag_divbyzero, fp_status); | |
208 | } | |
209 | } else if (float32_is_infinity(RtV)) { | |
210 | RsV = make_float32(0x80000000 & (RsV ^ RtV)); | |
211 | RtV = float32_one; | |
212 | RdV = float32_one; | |
213 | } else if (float32_is_zero(RsV)) { | |
214 | /* Does this just work itself out? */ | |
215 | /* No, 0/Inf causes problems. */ | |
216 | RsV = make_float32(0x80000000 & (RsV ^ RtV)); | |
217 | RtV = float32_one; | |
218 | RdV = float32_one; | |
219 | } else if (float32_is_infinity(RsV)) { | |
220 | uint8_t RsV_sign = float32_is_neg(RsV); | |
221 | uint8_t RtV_sign = float32_is_neg(RtV); | |
222 | RsV = infinite_float32(RsV_sign ^ RtV_sign); | |
223 | RtV = float32_one; | |
224 | RdV = float32_one; | |
225 | } else { | |
226 | PeV = 0x00; | |
227 | /* Basic checks passed */ | |
228 | n_exp = float32_getexp(RsV); | |
229 | d_exp = float32_getexp(RtV); | |
230 | if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { | |
231 | /* Near quotient underflow / inexact Q */ | |
232 | PeV = 0x80; | |
233 | RtV = float32_mul_pow2(RtV, -64, fp_status); | |
234 | RsV = float32_mul_pow2(RsV, 64, fp_status); | |
235 | } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) { | |
236 | /* Near quotient overflow */ | |
237 | PeV = 0x40; | |
238 | RtV = float32_mul_pow2(RtV, 32, fp_status); | |
239 | RsV = float32_mul_pow2(RsV, -32, fp_status); | |
240 | } else if (n_exp <= SF_MANTBITS + 2) { | |
241 | RtV = float32_mul_pow2(RtV, 64, fp_status); | |
242 | RsV = float32_mul_pow2(RsV, 64, fp_status); | |
243 | } else if (d_exp <= 1) { | |
244 | RtV = float32_mul_pow2(RtV, 32, fp_status); | |
245 | RsV = float32_mul_pow2(RsV, 32, fp_status); | |
246 | } else if (d_exp > 252) { | |
247 | RtV = float32_mul_pow2(RtV, -32, fp_status); | |
248 | RsV = float32_mul_pow2(RsV, -32, fp_status); | |
249 | } | |
250 | RdV = 0; | |
251 | ret = 1; | |
252 | } | |
253 | *Rs = RsV; | |
254 | *Rt = RtV; | |
255 | *Rd = RdV; | |
256 | *adjust = PeV; | |
257 | return ret; | |
258 | } | |
259 | ||
260 | int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, | |
261 | float_status *fp_status) | |
262 | { | |
263 | float32 RsV, RdV; | |
264 | int PeV = 0; | |
265 | int r_exp; | |
266 | int ret = 0; | |
267 | RsV = *Rs; | |
268 | if (float32_is_infinity(RsV)) { | |
269 | if (extract32(RsV, 22, 1) == 0) { | |
270 | float_raise(float_flag_invalid, fp_status); | |
271 | } | |
272 | RdV = RsV = float32_nan; | |
273 | } else if (float32_lt(RsV, float32_zero, fp_status)) { | |
274 | /* Negative nonzero values are NaN */ | |
275 | float_raise(float_flag_invalid, fp_status); | |
276 | RsV = float32_nan; | |
277 | RdV = float32_nan; | |
278 | } else if (float32_is_infinity(RsV)) { | |
279 | /* or put Inf in num fixup? */ | |
280 | RsV = infinite_float32(1); | |
281 | RdV = infinite_float32(1); | |
282 | } else if (float32_is_zero(RsV)) { | |
283 | /* or put zero in num fixup? */ | |
284 | RdV = float32_one; | |
285 | } else { | |
286 | PeV = 0x00; | |
287 | /* Basic checks passed */ | |
288 | r_exp = float32_getexp(RsV); | |
289 | if (r_exp <= 24) { | |
290 | RsV = float32_mul_pow2(RsV, 64, fp_status); | |
291 | PeV = 0xe0; | |
292 | } | |
293 | RdV = 0; | |
294 | ret = 1; | |
295 | } | |
296 | *Rs = RsV; | |
297 | *Rd = RdV; | |
298 | *adjust = PeV; | |
299 | return ret; | |
300 | } |