]>
Commit | Line | Data |
---|---|---|
591596b7 LV |
1 | /* |
2 | * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, | |
3 | * derived from NetBSD M68040 FPSP functions, | |
4 | * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic | |
5 | * Package. Those parts of the code (and some later contributions) are | |
6 | * provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file will be taken to be licensed under | |
14 | * the Softfloat-2a license unless specifically indicated otherwise. | |
15 | */ | |
16 | ||
808d77bc LMP |
17 | /* |
18 | * Portions of this work are licensed under the terms of the GNU GPL, | |
591596b7 LV |
19 | * version 2 or later. See the COPYING file in the top-level directory. |
20 | */ | |
21 | ||
22 | #include "qemu/osdep.h" | |
23 | #include "softfloat.h" | |
24 | #include "fpu/softfloat-macros.h" | |
4b5c65b8 | 25 | #include "softfloat_fpsp_tables.h" |
591596b7 | 26 | |
c84813b8 | 27 | #define pi_exp 0x4000 |
8c992abc | 28 | #define piby2_exp 0x3FFF |
e2326300 | 29 | #define pi_sig UINT64_C(0xc90fdaa22168c235) |
8c992abc | 30 | |
0d379c17 LV |
31 | static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) |
32 | { | |
33 | if (floatx80_is_signaling_nan(a, status)) { | |
34 | float_raise(float_flag_invalid, status); | |
1c0c951f | 35 | a = floatx80_silence_nan(a, status); |
0d379c17 LV |
36 | } |
37 | ||
38 | if (status->default_nan_mode) { | |
39 | return floatx80_default_nan(status); | |
40 | } | |
41 | ||
1c0c951f | 42 | return a; |
0d379c17 LV |
43 | } |
44 | ||
808d77bc LMP |
45 | /* |
46 | * Returns the mantissa of the extended double-precision floating-point | |
47 | * value `a'. | |
48 | */ | |
0d379c17 LV |
49 | |
50 | floatx80 floatx80_getman(floatx80 a, float_status *status) | |
51 | { | |
c120391c | 52 | bool aSign; |
0d379c17 LV |
53 | int32_t aExp; |
54 | uint64_t aSig; | |
55 | ||
56 | aSig = extractFloatx80Frac(a); | |
57 | aExp = extractFloatx80Exp(a); | |
58 | aSign = extractFloatx80Sign(a); | |
59 | ||
60 | if (aExp == 0x7FFF) { | |
61 | if ((uint64_t) (aSig << 1)) { | |
62 | return propagateFloatx80NaNOneArg(a , status); | |
63 | } | |
64 | float_raise(float_flag_invalid , status); | |
65 | return floatx80_default_nan(status); | |
66 | } | |
67 | ||
68 | if (aExp == 0) { | |
69 | if (aSig == 0) { | |
70 | return packFloatx80(aSign, 0, 0); | |
71 | } | |
72 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
73 | } | |
74 | ||
75 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
76 | 0x3FFF, aSig, 0, status); | |
77 | } | |
78 | ||
808d77bc LMP |
79 | /* |
80 | * Returns the exponent of the extended double-precision floating-point | |
81 | * value `a' as an extended double-precision value. | |
82 | */ | |
0d379c17 LV |
83 | |
84 | floatx80 floatx80_getexp(floatx80 a, float_status *status) | |
85 | { | |
c120391c | 86 | bool aSign; |
0d379c17 LV |
87 | int32_t aExp; |
88 | uint64_t aSig; | |
89 | ||
90 | aSig = extractFloatx80Frac(a); | |
91 | aExp = extractFloatx80Exp(a); | |
92 | aSign = extractFloatx80Sign(a); | |
93 | ||
94 | if (aExp == 0x7FFF) { | |
95 | if ((uint64_t) (aSig << 1)) { | |
96 | return propagateFloatx80NaNOneArg(a , status); | |
97 | } | |
98 | float_raise(float_flag_invalid , status); | |
99 | return floatx80_default_nan(status); | |
100 | } | |
101 | ||
102 | if (aExp == 0) { | |
103 | if (aSig == 0) { | |
104 | return packFloatx80(aSign, 0, 0); | |
105 | } | |
106 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
107 | } | |
108 | ||
109 | return int32_to_floatx80(aExp - 0x3FFF, status); | |
110 | } | |
111 | ||
808d77bc LMP |
112 | /* |
113 | * Scales extended double-precision floating-point value in operand `a' by | |
114 | * value `b'. The function truncates the value in the second operand 'b' to | |
115 | * an integral value and adds that value to the exponent of the operand 'a'. | |
116 | * The operation performed according to the IEC/IEEE Standard for Binary | |
117 | * Floating-Point Arithmetic. | |
118 | */ | |
0d379c17 LV |
119 | |
120 | floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) | |
121 | { | |
c120391c | 122 | bool aSign, bSign; |
0d379c17 LV |
123 | int32_t aExp, bExp, shiftCount; |
124 | uint64_t aSig, bSig; | |
125 | ||
126 | aSig = extractFloatx80Frac(a); | |
127 | aExp = extractFloatx80Exp(a); | |
128 | aSign = extractFloatx80Sign(a); | |
129 | bSig = extractFloatx80Frac(b); | |
130 | bExp = extractFloatx80Exp(b); | |
131 | bSign = extractFloatx80Sign(b); | |
132 | ||
133 | if (bExp == 0x7FFF) { | |
134 | if ((uint64_t) (bSig << 1) || | |
135 | ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { | |
136 | return propagateFloatx80NaN(a, b, status); | |
137 | } | |
138 | float_raise(float_flag_invalid , status); | |
139 | return floatx80_default_nan(status); | |
140 | } | |
141 | if (aExp == 0x7FFF) { | |
142 | if ((uint64_t) (aSig << 1)) { | |
143 | return propagateFloatx80NaN(a, b, status); | |
144 | } | |
145 | return packFloatx80(aSign, floatx80_infinity.high, | |
146 | floatx80_infinity.low); | |
147 | } | |
148 | if (aExp == 0) { | |
149 | if (aSig == 0) { | |
150 | return packFloatx80(aSign, 0, 0); | |
151 | } | |
152 | if (bExp < 0x3FFF) { | |
153 | return a; | |
154 | } | |
155 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
156 | } | |
157 | ||
158 | if (bExp < 0x3FFF) { | |
159 | return a; | |
160 | } | |
161 | ||
162 | if (0x400F < bExp) { | |
163 | aExp = bSign ? -0x6001 : 0xE000; | |
164 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
165 | aSign, aExp, aSig, 0, status); | |
166 | } | |
167 | ||
168 | shiftCount = 0x403E - bExp; | |
169 | bSig >>= shiftCount; | |
170 | aExp = bSign ? (aExp - bSig) : (aExp + bSig); | |
171 | ||
172 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
173 | aSign, aExp, aSig, 0, status); | |
174 | } | |
9a069775 LV |
175 | |
176 | floatx80 floatx80_move(floatx80 a, float_status *status) | |
177 | { | |
c120391c | 178 | bool aSign; |
9a069775 LV |
179 | int32_t aExp; |
180 | uint64_t aSig; | |
181 | ||
182 | aSig = extractFloatx80Frac(a); | |
183 | aExp = extractFloatx80Exp(a); | |
184 | aSign = extractFloatx80Sign(a); | |
185 | ||
186 | if (aExp == 0x7FFF) { | |
187 | if ((uint64_t)(aSig << 1)) { | |
188 | return propagateFloatx80NaNOneArg(a, status); | |
189 | } | |
190 | return a; | |
191 | } | |
192 | if (aExp == 0) { | |
193 | if (aSig == 0) { | |
194 | return a; | |
195 | } | |
196 | normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, | |
197 | aSign, aExp, aSig, 0, status); | |
198 | } | |
199 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
200 | aExp, aSig, 0, status); | |
201 | } | |
4b5c65b8 | 202 | |
808d77bc LMP |
203 | /* |
204 | * Algorithms for transcendental functions supported by MC68881 and MC68882 | |
205 | * mathematical coprocessors. The functions are derived from FPSP library. | |
206 | */ | |
4b5c65b8 LV |
207 | |
208 | #define one_exp 0x3FFF | |
e2326300 | 209 | #define one_sig UINT64_C(0x8000000000000000) |
4b5c65b8 | 210 | |
808d77bc LMP |
211 | /* |
212 | * Function for compactifying extended double-precision floating point values. | |
213 | */ | |
4b5c65b8 LV |
214 | |
215 | static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) | |
216 | { | |
217 | return (aExp << 16) | (aSig >> 48); | |
218 | } | |
219 | ||
808d77bc LMP |
220 | /* |
221 | * Log base e of x plus 1 | |
222 | */ | |
4b5c65b8 LV |
223 | |
224 | floatx80 floatx80_lognp1(floatx80 a, float_status *status) | |
225 | { | |
c120391c | 226 | bool aSign; |
4b5c65b8 LV |
227 | int32_t aExp; |
228 | uint64_t aSig, fSig; | |
229 | ||
8da5f1db RH |
230 | FloatRoundMode user_rnd_mode; |
231 | FloatX80RoundPrec user_rnd_prec; | |
4b5c65b8 LV |
232 | |
233 | int32_t compact, j, k; | |
234 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
235 | ||
236 | aSig = extractFloatx80Frac(a); | |
237 | aExp = extractFloatx80Exp(a); | |
238 | aSign = extractFloatx80Sign(a); | |
239 | ||
240 | if (aExp == 0x7FFF) { | |
241 | if ((uint64_t) (aSig << 1)) { | |
242 | propagateFloatx80NaNOneArg(a, status); | |
243 | } | |
244 | if (aSign) { | |
245 | float_raise(float_flag_invalid, status); | |
246 | return floatx80_default_nan(status); | |
247 | } | |
248 | return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); | |
249 | } | |
250 | ||
251 | if (aExp == 0 && aSig == 0) { | |
252 | return packFloatx80(aSign, 0, 0); | |
253 | } | |
254 | ||
255 | if (aSign && aExp >= one_exp) { | |
256 | if (aExp == one_exp && aSig == one_sig) { | |
257 | float_raise(float_flag_divbyzero, status); | |
981348af LV |
258 | return packFloatx80(aSign, floatx80_infinity.high, |
259 | floatx80_infinity.low); | |
4b5c65b8 LV |
260 | } |
261 | float_raise(float_flag_invalid, status); | |
262 | return floatx80_default_nan(status); | |
263 | } | |
264 | ||
265 | if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { | |
266 | /* <= min threshold */ | |
267 | float_raise(float_flag_inexact, status); | |
268 | return floatx80_move(a, status); | |
269 | } | |
270 | ||
271 | user_rnd_mode = status->float_rounding_mode; | |
272 | user_rnd_prec = status->floatx80_rounding_precision; | |
273 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 274 | status->floatx80_rounding_precision = floatx80_precision_x; |
4b5c65b8 LV |
275 | |
276 | compact = floatx80_make_compact(aExp, aSig); | |
277 | ||
278 | fp0 = a; /* Z */ | |
279 | fp1 = a; | |
280 | ||
281 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
282 | status), status); /* X = (1+Z) */ | |
283 | ||
284 | aExp = extractFloatx80Exp(fp0); | |
285 | aSig = extractFloatx80Frac(fp0); | |
286 | ||
287 | compact = floatx80_make_compact(aExp, aSig); | |
288 | ||
289 | if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { | |
290 | /* |X| < 1/2 or |X| > 3/2 */ | |
291 | k = aExp - 0x3FFF; | |
292 | fp1 = int32_to_floatx80(k, status); | |
293 | ||
e2326300 | 294 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
4b5c65b8 LV |
295 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
296 | ||
297 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
298 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
299 | ||
300 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
301 | ||
302 | lp1cont1: | |
303 | /* LP1CONT1 */ | |
304 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
e2326300 | 305 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
4b5c65b8 LV |
306 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
307 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
308 | ||
309 | fp3 = fp2; | |
310 | fp1 = fp2; | |
311 | ||
312 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
313 | make_float64(0x3FC2499AB5E4040B), status), | |
314 | status); /* V*A6 */ | |
315 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
316 | make_float64(0xBFC555B5848CB7DB), status), | |
317 | status); /* V*A5 */ | |
318 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
319 | make_float64(0x3FC99999987D8730), status), | |
320 | status); /* A4+V*A6 */ | |
321 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
322 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
323 | status); /* A3+V*A5 */ | |
324 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
325 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
326 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
327 | make_float64(0x3FD55555555555A4), status), | |
328 | status); /* A2+V*(A4+V*A6) */ | |
329 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
330 | make_float64(0xBFE0000000000008), status), | |
331 | status); /* A1+V*(A3+V*A5) */ | |
332 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
333 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
334 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
335 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
336 | ||
337 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
338 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
339 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
340 | ||
341 | status->float_rounding_mode = user_rnd_mode; | |
342 | status->floatx80_rounding_precision = user_rnd_prec; | |
343 | ||
344 | a = floatx80_add(fp0, klog2, status); | |
345 | ||
346 | float_raise(float_flag_inexact, status); | |
347 | ||
348 | return a; | |
349 | } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
350 | /* |X| < 1/16 or |X| > -1/16 */ | |
351 | /* LP1CARE */ | |
e2326300 | 352 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
4b5c65b8 LV |
353 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ |
354 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ | |
355 | ||
356 | if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ | |
357 | /* KISZERO */ | |
358 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), | |
359 | status), f, status); /* 1-F */ | |
360 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ | |
361 | fp1 = packFloatx80(0, 0, 0); /* K = 0 */ | |
362 | } else { | |
363 | /* KISNEG */ | |
364 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), | |
365 | status), f, status); /* 2-F */ | |
366 | fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ | |
367 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ | |
368 | fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ | |
369 | } | |
370 | goto lp1cont1; | |
371 | } else { | |
372 | /* LP1ONE16 */ | |
373 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ | |
374 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
375 | status), status); /* FP0 IS 1+X */ | |
376 | ||
377 | /* LP1CONT2 */ | |
378 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
379 | saveu = fp1; | |
380 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
381 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
382 | ||
383 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
384 | status); /* B5 */ | |
385 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
386 | status); /* B4 */ | |
387 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
388 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
389 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
390 | make_float64(0x3F624924928BCCFF), status), | |
391 | status); /* B3+W*B5 */ | |
392 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
393 | make_float64(0x3F899999999995EC), status), | |
394 | status); /* B2+W*B4 */ | |
395 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
396 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
397 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
398 | make_float64(0x3FB5555555555555), status), | |
399 | status); /* B1+W*(B3+W*B5) */ | |
400 | ||
401 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
402 | fp1 = floatx80_add(fp1, fp2, | |
403 | status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
404 | fp0 = floatx80_mul(fp0, fp1, | |
405 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
406 | ||
407 | status->float_rounding_mode = user_rnd_mode; | |
408 | status->floatx80_rounding_precision = user_rnd_prec; | |
409 | ||
410 | a = floatx80_add(fp0, saveu, status); | |
411 | ||
412 | /*if (!floatx80_is_zero(a)) { */ | |
413 | float_raise(float_flag_inexact, status); | |
414 | /*} */ | |
415 | ||
416 | return a; | |
417 | } | |
418 | } | |
50067bd1 | 419 | |
808d77bc LMP |
420 | /* |
421 | * Log base e | |
422 | */ | |
50067bd1 LV |
423 | |
424 | floatx80 floatx80_logn(floatx80 a, float_status *status) | |
425 | { | |
c120391c | 426 | bool aSign; |
50067bd1 LV |
427 | int32_t aExp; |
428 | uint64_t aSig, fSig; | |
429 | ||
8da5f1db RH |
430 | FloatRoundMode user_rnd_mode; |
431 | FloatX80RoundPrec user_rnd_prec; | |
50067bd1 LV |
432 | |
433 | int32_t compact, j, k, adjk; | |
434 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
435 | ||
436 | aSig = extractFloatx80Frac(a); | |
437 | aExp = extractFloatx80Exp(a); | |
438 | aSign = extractFloatx80Sign(a); | |
439 | ||
440 | if (aExp == 0x7FFF) { | |
441 | if ((uint64_t) (aSig << 1)) { | |
442 | propagateFloatx80NaNOneArg(a, status); | |
443 | } | |
444 | if (aSign == 0) { | |
445 | return packFloatx80(0, floatx80_infinity.high, | |
446 | floatx80_infinity.low); | |
447 | } | |
448 | } | |
449 | ||
450 | adjk = 0; | |
451 | ||
452 | if (aExp == 0) { | |
453 | if (aSig == 0) { /* zero */ | |
454 | float_raise(float_flag_divbyzero, status); | |
455 | return packFloatx80(1, floatx80_infinity.high, | |
456 | floatx80_infinity.low); | |
457 | } | |
458 | if ((aSig & one_sig) == 0) { /* denormal */ | |
459 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
460 | adjk = -100; | |
461 | aExp += 100; | |
462 | a = packFloatx80(aSign, aExp, aSig); | |
463 | } | |
464 | } | |
465 | ||
466 | if (aSign) { | |
467 | float_raise(float_flag_invalid, status); | |
468 | return floatx80_default_nan(status); | |
469 | } | |
470 | ||
471 | user_rnd_mode = status->float_rounding_mode; | |
472 | user_rnd_prec = status->floatx80_rounding_precision; | |
473 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 474 | status->floatx80_rounding_precision = floatx80_precision_x; |
50067bd1 LV |
475 | |
476 | compact = floatx80_make_compact(aExp, aSig); | |
477 | ||
478 | if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
479 | /* |X| < 15/16 or |X| > 17/16 */ | |
480 | k = aExp - 0x3FFF; | |
481 | k += adjk; | |
482 | fp1 = int32_to_floatx80(k, status); | |
483 | ||
e2326300 | 484 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
50067bd1 LV |
485 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
486 | ||
487 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
488 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
489 | ||
490 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
491 | ||
492 | /* LP1CONT1 */ | |
493 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
e2326300 | 494 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
50067bd1 LV |
495 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
496 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
497 | ||
498 | fp3 = fp2; | |
499 | fp1 = fp2; | |
500 | ||
501 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
502 | make_float64(0x3FC2499AB5E4040B), status), | |
503 | status); /* V*A6 */ | |
504 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
505 | make_float64(0xBFC555B5848CB7DB), status), | |
506 | status); /* V*A5 */ | |
507 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
508 | make_float64(0x3FC99999987D8730), status), | |
509 | status); /* A4+V*A6 */ | |
510 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
511 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
512 | status); /* A3+V*A5 */ | |
513 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
514 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
515 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
516 | make_float64(0x3FD55555555555A4), status), | |
517 | status); /* A2+V*(A4+V*A6) */ | |
518 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
519 | make_float64(0xBFE0000000000008), status), | |
520 | status); /* A1+V*(A3+V*A5) */ | |
521 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
522 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
523 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
524 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
525 | ||
526 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
527 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
528 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
529 | ||
530 | status->float_rounding_mode = user_rnd_mode; | |
531 | status->floatx80_rounding_precision = user_rnd_prec; | |
532 | ||
533 | a = floatx80_add(fp0, klog2, status); | |
534 | ||
535 | float_raise(float_flag_inexact, status); | |
536 | ||
537 | return a; | |
538 | } else { /* |X-1| >= 1/16 */ | |
539 | fp0 = a; | |
540 | fp1 = a; | |
541 | fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), | |
542 | status), status); /* FP1 IS X-1 */ | |
543 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
544 | status), status); /* FP0 IS X+1 */ | |
545 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ | |
546 | ||
547 | /* LP1CONT2 */ | |
548 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
549 | saveu = fp1; | |
550 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
551 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
552 | ||
553 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
554 | status); /* B5 */ | |
555 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
556 | status); /* B4 */ | |
557 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
558 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
559 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
560 | make_float64(0x3F624924928BCCFF), status), | |
561 | status); /* B3+W*B5 */ | |
562 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
563 | make_float64(0x3F899999999995EC), status), | |
564 | status); /* B2+W*B4 */ | |
565 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
566 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
567 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
568 | make_float64(0x3FB5555555555555), status), | |
569 | status); /* B1+W*(B3+W*B5) */ | |
570 | ||
571 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
572 | fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
573 | fp0 = floatx80_mul(fp0, fp1, | |
574 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
575 | ||
576 | status->float_rounding_mode = user_rnd_mode; | |
577 | status->floatx80_rounding_precision = user_rnd_prec; | |
578 | ||
579 | a = floatx80_add(fp0, saveu, status); | |
580 | ||
581 | /*if (!floatx80_is_zero(a)) { */ | |
582 | float_raise(float_flag_inexact, status); | |
583 | /*} */ | |
584 | ||
585 | return a; | |
586 | } | |
587 | } | |
248efb66 | 588 | |
808d77bc LMP |
589 | /* |
590 | * Log base 10 | |
591 | */ | |
248efb66 LV |
592 | |
593 | floatx80 floatx80_log10(floatx80 a, float_status *status) | |
594 | { | |
c120391c | 595 | bool aSign; |
248efb66 LV |
596 | int32_t aExp; |
597 | uint64_t aSig; | |
598 | ||
8da5f1db RH |
599 | FloatRoundMode user_rnd_mode; |
600 | FloatX80RoundPrec user_rnd_prec; | |
248efb66 LV |
601 | |
602 | floatx80 fp0, fp1; | |
603 | ||
604 | aSig = extractFloatx80Frac(a); | |
605 | aExp = extractFloatx80Exp(a); | |
606 | aSign = extractFloatx80Sign(a); | |
607 | ||
608 | if (aExp == 0x7FFF) { | |
609 | if ((uint64_t) (aSig << 1)) { | |
610 | propagateFloatx80NaNOneArg(a, status); | |
611 | } | |
612 | if (aSign == 0) { | |
613 | return packFloatx80(0, floatx80_infinity.high, | |
614 | floatx80_infinity.low); | |
615 | } | |
616 | } | |
617 | ||
618 | if (aExp == 0 && aSig == 0) { | |
619 | float_raise(float_flag_divbyzero, status); | |
620 | return packFloatx80(1, floatx80_infinity.high, | |
621 | floatx80_infinity.low); | |
622 | } | |
623 | ||
624 | if (aSign) { | |
625 | float_raise(float_flag_invalid, status); | |
626 | return floatx80_default_nan(status); | |
627 | } | |
628 | ||
629 | user_rnd_mode = status->float_rounding_mode; | |
630 | user_rnd_prec = status->floatx80_rounding_precision; | |
631 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 632 | status->floatx80_rounding_precision = floatx80_precision_x; |
248efb66 LV |
633 | |
634 | fp0 = floatx80_logn(a, status); | |
e2326300 | 635 | fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */ |
248efb66 LV |
636 | |
637 | status->float_rounding_mode = user_rnd_mode; | |
638 | status->floatx80_rounding_precision = user_rnd_prec; | |
639 | ||
640 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ | |
641 | ||
642 | float_raise(float_flag_inexact, status); | |
643 | ||
644 | return a; | |
645 | } | |
67b453ed | 646 | |
808d77bc LMP |
647 | /* |
648 | * Log base 2 | |
649 | */ | |
67b453ed LV |
650 | |
651 | floatx80 floatx80_log2(floatx80 a, float_status *status) | |
652 | { | |
c120391c | 653 | bool aSign; |
67b453ed LV |
654 | int32_t aExp; |
655 | uint64_t aSig; | |
656 | ||
8da5f1db RH |
657 | FloatRoundMode user_rnd_mode; |
658 | FloatX80RoundPrec user_rnd_prec; | |
67b453ed LV |
659 | |
660 | floatx80 fp0, fp1; | |
661 | ||
662 | aSig = extractFloatx80Frac(a); | |
663 | aExp = extractFloatx80Exp(a); | |
664 | aSign = extractFloatx80Sign(a); | |
665 | ||
666 | if (aExp == 0x7FFF) { | |
667 | if ((uint64_t) (aSig << 1)) { | |
668 | propagateFloatx80NaNOneArg(a, status); | |
669 | } | |
670 | if (aSign == 0) { | |
671 | return packFloatx80(0, floatx80_infinity.high, | |
672 | floatx80_infinity.low); | |
673 | } | |
674 | } | |
675 | ||
676 | if (aExp == 0) { | |
677 | if (aSig == 0) { | |
678 | float_raise(float_flag_divbyzero, status); | |
679 | return packFloatx80(1, floatx80_infinity.high, | |
680 | floatx80_infinity.low); | |
681 | } | |
682 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
683 | } | |
684 | ||
685 | if (aSign) { | |
686 | float_raise(float_flag_invalid, status); | |
687 | return floatx80_default_nan(status); | |
688 | } | |
689 | ||
690 | user_rnd_mode = status->float_rounding_mode; | |
691 | user_rnd_prec = status->floatx80_rounding_precision; | |
692 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 693 | status->floatx80_rounding_precision = floatx80_precision_x; |
67b453ed LV |
694 | |
695 | if (aSig == one_sig) { /* X is 2^k */ | |
696 | status->float_rounding_mode = user_rnd_mode; | |
697 | status->floatx80_rounding_precision = user_rnd_prec; | |
698 | ||
699 | a = int32_to_floatx80(aExp - 0x3FFF, status); | |
700 | } else { | |
701 | fp0 = floatx80_logn(a, status); | |
e2326300 | 702 | fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */ |
67b453ed LV |
703 | |
704 | status->float_rounding_mode = user_rnd_mode; | |
705 | status->floatx80_rounding_precision = user_rnd_prec; | |
706 | ||
707 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ | |
708 | } | |
709 | ||
710 | float_raise(float_flag_inexact, status); | |
711 | ||
712 | return a; | |
713 | } | |
40ad0873 | 714 | |
808d77bc LMP |
715 | /* |
716 | * e to x | |
717 | */ | |
40ad0873 LV |
718 | |
719 | floatx80 floatx80_etox(floatx80 a, float_status *status) | |
720 | { | |
c120391c | 721 | bool aSign; |
40ad0873 LV |
722 | int32_t aExp; |
723 | uint64_t aSig; | |
724 | ||
8da5f1db RH |
725 | FloatRoundMode user_rnd_mode; |
726 | FloatX80RoundPrec user_rnd_prec; | |
40ad0873 LV |
727 | |
728 | int32_t compact, n, j, k, m, m1; | |
729 | floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; | |
c120391c | 730 | bool adjflag; |
40ad0873 LV |
731 | |
732 | aSig = extractFloatx80Frac(a); | |
733 | aExp = extractFloatx80Exp(a); | |
734 | aSign = extractFloatx80Sign(a); | |
735 | ||
736 | if (aExp == 0x7FFF) { | |
737 | if ((uint64_t) (aSig << 1)) { | |
738 | return propagateFloatx80NaNOneArg(a, status); | |
739 | } | |
740 | if (aSign) { | |
741 | return packFloatx80(0, 0, 0); | |
742 | } | |
743 | return packFloatx80(0, floatx80_infinity.high, | |
744 | floatx80_infinity.low); | |
745 | } | |
746 | ||
747 | if (aExp == 0 && aSig == 0) { | |
748 | return packFloatx80(0, one_exp, one_sig); | |
749 | } | |
750 | ||
751 | user_rnd_mode = status->float_rounding_mode; | |
752 | user_rnd_prec = status->floatx80_rounding_precision; | |
753 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 754 | status->floatx80_rounding_precision = floatx80_precision_x; |
40ad0873 LV |
755 | |
756 | adjflag = 0; | |
757 | ||
758 | if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ | |
759 | compact = floatx80_make_compact(aExp, aSig); | |
760 | ||
761 | if (compact < 0x400CB167) { /* |X| < 16380 log2 */ | |
762 | fp0 = a; | |
763 | fp1 = a; | |
764 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
765 | make_float32(0x42B8AA3B), status), | |
766 | status); /* 64/log2 * X */ | |
767 | adjflag = 0; | |
768 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
769 | fp0 = int32_to_floatx80(n, status); | |
770 | ||
771 | j = n & 0x3F; /* J = N mod 64 */ | |
772 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
773 | if (n < 0 && j) { | |
808d77bc LMP |
774 | /* |
775 | * arithmetic right shift is division and | |
40ad0873 LV |
776 | * round towards minus infinity |
777 | */ | |
778 | m--; | |
779 | } | |
780 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
781 | ||
782 | expcont1: | |
783 | fp2 = fp0; /* N */ | |
784 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
785 | make_float32(0xBC317218), status), | |
786 | status); /* N * L1, L1 = lead(-log2/64) */ | |
e2326300 | 787 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
40ad0873 LV |
788 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
789 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
790 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
791 | ||
792 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
793 | fp2 = float32_to_floatx80(make_float32(0x3AB60B70), | |
794 | status); /* A5 */ | |
795 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ | |
796 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), | |
797 | status), fp1, | |
798 | status); /* fp3 is S*A4 */ | |
799 | fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( | |
800 | 0x3FA5555555554431), status), | |
801 | status); /* fp2 is A3+S*A5 */ | |
802 | fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( | |
803 | 0x3FC5555555554018), status), | |
804 | status); /* fp3 is A2+S*A4 */ | |
805 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ | |
806 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ | |
807 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
808 | make_float32(0x3F000000), status), | |
809 | status); /* fp2 is A1+S*(A3+S*A5) */ | |
810 | fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ | |
811 | fp2 = floatx80_mul(fp2, fp1, | |
812 | status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ | |
813 | fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ | |
814 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
815 | ||
816 | fp1 = exp_tbl[j]; | |
817 | fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ | |
818 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), | |
819 | status); /* accurate 2^(J/64) */ | |
820 | fp0 = floatx80_add(fp0, fp1, | |
821 | status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ | |
822 | ||
823 | scale = packFloatx80(0, m, one_sig); | |
824 | if (adjflag) { | |
825 | adjscale = packFloatx80(0, m1, one_sig); | |
826 | fp0 = floatx80_mul(fp0, adjscale, status); | |
827 | } | |
828 | ||
829 | status->float_rounding_mode = user_rnd_mode; | |
830 | status->floatx80_rounding_precision = user_rnd_prec; | |
831 | ||
832 | a = floatx80_mul(fp0, scale, status); | |
833 | ||
834 | float_raise(float_flag_inexact, status); | |
835 | ||
836 | return a; | |
837 | } else { /* |X| >= 16380 log2 */ | |
838 | if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ | |
839 | status->float_rounding_mode = user_rnd_mode; | |
840 | status->floatx80_rounding_precision = user_rnd_prec; | |
841 | if (aSign) { | |
842 | a = roundAndPackFloatx80( | |
843 | status->floatx80_rounding_precision, | |
844 | 0, -0x1000, aSig, 0, status); | |
845 | } else { | |
846 | a = roundAndPackFloatx80( | |
847 | status->floatx80_rounding_precision, | |
848 | 0, 0x8000, aSig, 0, status); | |
849 | } | |
850 | float_raise(float_flag_inexact, status); | |
851 | ||
852 | return a; | |
853 | } else { | |
854 | fp0 = a; | |
855 | fp1 = a; | |
856 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
857 | make_float32(0x42B8AA3B), status), | |
858 | status); /* 64/log2 * X */ | |
859 | adjflag = 1; | |
860 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
861 | fp0 = int32_to_floatx80(n, status); | |
862 | ||
863 | j = n & 0x3F; /* J = N mod 64 */ | |
864 | /* NOTE: this is really arithmetic right shift by 6 */ | |
865 | k = n / 64; | |
866 | if (n < 0 && j) { | |
867 | /* arithmetic right shift is division and | |
868 | * round towards minus infinity | |
869 | */ | |
870 | k--; | |
871 | } | |
872 | /* NOTE: this is really arithmetic right shift by 1 */ | |
873 | m1 = k / 2; | |
874 | if (k < 0 && (k & 1)) { | |
875 | /* arithmetic right shift is division and | |
876 | * round towards minus infinity | |
877 | */ | |
878 | m1--; | |
879 | } | |
880 | m = k - m1; | |
881 | m1 += 0x3FFF; /* biased exponent of 2^(M1) */ | |
882 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
883 | ||
884 | goto expcont1; | |
885 | } | |
886 | } | |
887 | } else { /* |X| < 2^(-65) */ | |
888 | status->float_rounding_mode = user_rnd_mode; | |
889 | status->floatx80_rounding_precision = user_rnd_prec; | |
890 | ||
891 | a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), | |
892 | status), status); /* 1 + X */ | |
893 | ||
894 | float_raise(float_flag_inexact, status); | |
895 | ||
896 | return a; | |
897 | } | |
898 | } | |
068f1615 | 899 | |
808d77bc LMP |
900 | /* |
901 | * 2 to x | |
902 | */ | |
068f1615 LV |
903 | |
904 | floatx80 floatx80_twotox(floatx80 a, float_status *status) | |
905 | { | |
c120391c | 906 | bool aSign; |
068f1615 LV |
907 | int32_t aExp; |
908 | uint64_t aSig; | |
909 | ||
8da5f1db RH |
910 | FloatRoundMode user_rnd_mode; |
911 | FloatX80RoundPrec user_rnd_prec; | |
068f1615 LV |
912 | |
913 | int32_t compact, n, j, l, m, m1; | |
914 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
915 | ||
916 | aSig = extractFloatx80Frac(a); | |
917 | aExp = extractFloatx80Exp(a); | |
918 | aSign = extractFloatx80Sign(a); | |
919 | ||
920 | if (aExp == 0x7FFF) { | |
921 | if ((uint64_t) (aSig << 1)) { | |
922 | return propagateFloatx80NaNOneArg(a, status); | |
923 | } | |
924 | if (aSign) { | |
925 | return packFloatx80(0, 0, 0); | |
926 | } | |
927 | return packFloatx80(0, floatx80_infinity.high, | |
928 | floatx80_infinity.low); | |
929 | } | |
930 | ||
931 | if (aExp == 0 && aSig == 0) { | |
932 | return packFloatx80(0, one_exp, one_sig); | |
933 | } | |
934 | ||
935 | user_rnd_mode = status->float_rounding_mode; | |
936 | user_rnd_prec = status->floatx80_rounding_precision; | |
937 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 938 | status->floatx80_rounding_precision = floatx80_precision_x; |
068f1615 LV |
939 | |
940 | fp0 = a; | |
941 | ||
942 | compact = floatx80_make_compact(aExp, aSig); | |
943 | ||
944 | if (compact < 0x3FB98000 || compact > 0x400D80C0) { | |
945 | /* |X| > 16480 or |X| < 2^(-70) */ | |
946 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
947 | status->float_rounding_mode = user_rnd_mode; | |
948 | status->floatx80_rounding_precision = user_rnd_prec; | |
949 | ||
950 | if (aSign) { | |
951 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
952 | 0, -0x1000, aSig, 0, status); | |
953 | } else { | |
954 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
955 | 0, 0x8000, aSig, 0, status); | |
956 | } | |
957 | } else { /* |X| < 2^(-70) */ | |
958 | status->float_rounding_mode = user_rnd_mode; | |
959 | status->floatx80_rounding_precision = user_rnd_prec; | |
960 | ||
961 | a = floatx80_add(fp0, float32_to_floatx80( | |
962 | make_float32(0x3F800000), status), | |
963 | status); /* 1 + X */ | |
964 | ||
965 | float_raise(float_flag_inexact, status); | |
966 | ||
967 | return a; | |
968 | } | |
969 | } else { /* 2^(-70) <= |X| <= 16480 */ | |
970 | fp1 = fp0; /* X */ | |
971 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
972 | make_float32(0x42800000), status), | |
973 | status); /* X * 64 */ | |
974 | n = floatx80_to_int32(fp1, status); | |
975 | fp1 = int32_to_floatx80(n, status); | |
976 | j = n & 0x3F; | |
977 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
978 | if (n < 0 && j) { | |
808d77bc LMP |
979 | /* |
980 | * arithmetic right shift is division and | |
068f1615 LV |
981 | * round towards minus infinity |
982 | */ | |
983 | l--; | |
984 | } | |
985 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
986 | if (l < 0 && (l & 1)) { | |
808d77bc LMP |
987 | /* |
988 | * arithmetic right shift is division and | |
068f1615 LV |
989 | * round towards minus infinity |
990 | */ | |
991 | m--; | |
992 | } | |
993 | m1 = l - m; | |
994 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
995 | ||
996 | adjfact = packFloatx80(0, m1, one_sig); | |
997 | fact1 = exp2_tbl[j]; | |
998 | fact1.high += m; | |
999 | fact2.high = exp2_tbl2[j] >> 16; | |
1000 | fact2.high += m; | |
1001 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1002 | fact2.low <<= 48; | |
1003 | ||
1004 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
1005 | make_float32(0x3C800000), status), | |
1006 | status); /* (1/64)*N */ | |
1007 | fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ | |
e2326300 | 1008 | fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */ |
068f1615 LV |
1009 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1010 | ||
6c25be6e LV |
1011 | /* EXPR */ |
1012 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1013 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1014 | status); /* A5 */ | |
1015 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1016 | status); /* A4 */ | |
1017 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1018 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1019 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1020 | make_float64(0x3FA5555555554CC1), status), | |
1021 | status); /* A3+S*A5 */ | |
1022 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1023 | make_float64(0x3FC5555555554A54), status), | |
1024 | status); /* A2+S*A4 */ | |
1025 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1026 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1027 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1028 | make_float64(0x3FE0000000000000), status), | |
1029 | status); /* A1+S*(A3+S*A5) */ | |
1030 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1031 | ||
1032 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1033 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1034 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1035 | ||
1036 | fp0 = floatx80_mul(fp0, fact1, status); | |
1037 | fp0 = floatx80_add(fp0, fact2, status); | |
1038 | fp0 = floatx80_add(fp0, fact1, status); | |
1039 | ||
1040 | status->float_rounding_mode = user_rnd_mode; | |
1041 | status->floatx80_rounding_precision = user_rnd_prec; | |
1042 | ||
1043 | a = floatx80_mul(fp0, adjfact, status); | |
1044 | ||
1045 | float_raise(float_flag_inexact, status); | |
1046 | ||
1047 | return a; | |
1048 | } | |
1049 | } | |
1050 | ||
808d77bc LMP |
1051 | /* |
1052 | * 10 to x | |
1053 | */ | |
6c25be6e LV |
1054 | |
1055 | floatx80 floatx80_tentox(floatx80 a, float_status *status) | |
1056 | { | |
c120391c | 1057 | bool aSign; |
6c25be6e LV |
1058 | int32_t aExp; |
1059 | uint64_t aSig; | |
1060 | ||
8da5f1db RH |
1061 | FloatRoundMode user_rnd_mode; |
1062 | FloatX80RoundPrec user_rnd_prec; | |
6c25be6e LV |
1063 | |
1064 | int32_t compact, n, j, l, m, m1; | |
1065 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
1066 | ||
1067 | aSig = extractFloatx80Frac(a); | |
1068 | aExp = extractFloatx80Exp(a); | |
1069 | aSign = extractFloatx80Sign(a); | |
1070 | ||
1071 | if (aExp == 0x7FFF) { | |
1072 | if ((uint64_t) (aSig << 1)) { | |
1073 | return propagateFloatx80NaNOneArg(a, status); | |
1074 | } | |
1075 | if (aSign) { | |
1076 | return packFloatx80(0, 0, 0); | |
1077 | } | |
1078 | return packFloatx80(0, floatx80_infinity.high, | |
1079 | floatx80_infinity.low); | |
1080 | } | |
1081 | ||
1082 | if (aExp == 0 && aSig == 0) { | |
1083 | return packFloatx80(0, one_exp, one_sig); | |
1084 | } | |
1085 | ||
1086 | user_rnd_mode = status->float_rounding_mode; | |
1087 | user_rnd_prec = status->floatx80_rounding_precision; | |
1088 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 1089 | status->floatx80_rounding_precision = floatx80_precision_x; |
6c25be6e LV |
1090 | |
1091 | fp0 = a; | |
1092 | ||
1093 | compact = floatx80_make_compact(aExp, aSig); | |
1094 | ||
1095 | if (compact < 0x3FB98000 || compact > 0x400B9B07) { | |
1096 | /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ | |
1097 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
1098 | status->float_rounding_mode = user_rnd_mode; | |
1099 | status->floatx80_rounding_precision = user_rnd_prec; | |
1100 | ||
1101 | if (aSign) { | |
1102 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1103 | 0, -0x1000, aSig, 0, status); | |
1104 | } else { | |
1105 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1106 | 0, 0x8000, aSig, 0, status); | |
1107 | } | |
1108 | } else { /* |X| < 2^(-70) */ | |
1109 | status->float_rounding_mode = user_rnd_mode; | |
1110 | status->floatx80_rounding_precision = user_rnd_prec; | |
1111 | ||
1112 | a = floatx80_add(fp0, float32_to_floatx80( | |
1113 | make_float32(0x3F800000), status), | |
1114 | status); /* 1 + X */ | |
1115 | ||
1116 | float_raise(float_flag_inexact, status); | |
1117 | ||
1118 | return a; | |
1119 | } | |
1120 | } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ | |
1121 | fp1 = fp0; /* X */ | |
1122 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1123 | make_float64(0x406A934F0979A371), | |
1124 | status), status); /* X*64*LOG10/LOG2 */ | |
1125 | n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ | |
1126 | fp1 = int32_to_floatx80(n, status); | |
1127 | ||
1128 | j = n & 0x3F; | |
1129 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
1130 | if (n < 0 && j) { | |
808d77bc LMP |
1131 | /* |
1132 | * arithmetic right shift is division and | |
6c25be6e LV |
1133 | * round towards minus infinity |
1134 | */ | |
1135 | l--; | |
1136 | } | |
1137 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
1138 | if (l < 0 && (l & 1)) { | |
808d77bc LMP |
1139 | /* |
1140 | * arithmetic right shift is division and | |
6c25be6e LV |
1141 | * round towards minus infinity |
1142 | */ | |
1143 | m--; | |
1144 | } | |
1145 | m1 = l - m; | |
1146 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
1147 | ||
1148 | adjfact = packFloatx80(0, m1, one_sig); | |
1149 | fact1 = exp2_tbl[j]; | |
1150 | fact1.high += m; | |
1151 | fact2.high = exp2_tbl2[j] >> 16; | |
1152 | fact2.high += m; | |
1153 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1154 | fact2.low <<= 48; | |
1155 | ||
1156 | fp2 = fp1; /* N */ | |
1157 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1158 | make_float64(0x3F734413509F8000), status), | |
1159 | status); /* N*(LOG2/64LOG10)_LEAD */ | |
e2326300 | 1160 | fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2)); |
6c25be6e LV |
1161 | fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ |
1162 | fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ | |
1163 | fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ | |
e2326300 | 1164 | fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */ |
6c25be6e LV |
1165 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1166 | ||
068f1615 LV |
1167 | /* EXPR */ |
1168 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1169 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1170 | status); /* A5 */ | |
1171 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1172 | status); /* A4 */ | |
1173 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1174 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1175 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1176 | make_float64(0x3FA5555555554CC1), status), | |
1177 | status); /* A3+S*A5 */ | |
1178 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1179 | make_float64(0x3FC5555555554A54), status), | |
1180 | status); /* A2+S*A4 */ | |
1181 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1182 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1183 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1184 | make_float64(0x3FE0000000000000), status), | |
1185 | status); /* A1+S*(A3+S*A5) */ | |
1186 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1187 | ||
1188 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1189 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1190 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1191 | ||
1192 | fp0 = floatx80_mul(fp0, fact1, status); | |
1193 | fp0 = floatx80_add(fp0, fact2, status); | |
1194 | fp0 = floatx80_add(fp0, fact1, status); | |
1195 | ||
1196 | status->float_rounding_mode = user_rnd_mode; | |
1197 | status->floatx80_rounding_precision = user_rnd_prec; | |
1198 | ||
1199 | a = floatx80_mul(fp0, adjfact, status); | |
1200 | ||
1201 | float_raise(float_flag_inexact, status); | |
1202 | ||
1203 | return a; | |
1204 | } | |
1205 | } | |
27340180 | 1206 | |
808d77bc LMP |
1207 | /* |
1208 | * Tangent | |
1209 | */ | |
27340180 LV |
1210 | |
1211 | floatx80 floatx80_tan(floatx80 a, float_status *status) | |
1212 | { | |
c120391c | 1213 | bool aSign, xSign; |
27340180 LV |
1214 | int32_t aExp, xExp; |
1215 | uint64_t aSig, xSig; | |
1216 | ||
8da5f1db RH |
1217 | FloatRoundMode user_rnd_mode; |
1218 | FloatX80RoundPrec user_rnd_prec; | |
27340180 LV |
1219 | |
1220 | int32_t compact, l, n, j; | |
1221 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; | |
1222 | float32 twoto63; | |
c120391c | 1223 | bool endflag; |
27340180 LV |
1224 | |
1225 | aSig = extractFloatx80Frac(a); | |
1226 | aExp = extractFloatx80Exp(a); | |
1227 | aSign = extractFloatx80Sign(a); | |
1228 | ||
1229 | if (aExp == 0x7FFF) { | |
1230 | if ((uint64_t) (aSig << 1)) { | |
1231 | return propagateFloatx80NaNOneArg(a, status); | |
1232 | } | |
1233 | float_raise(float_flag_invalid, status); | |
1234 | return floatx80_default_nan(status); | |
1235 | } | |
1236 | ||
1237 | if (aExp == 0 && aSig == 0) { | |
1238 | return packFloatx80(aSign, 0, 0); | |
1239 | } | |
1240 | ||
1241 | user_rnd_mode = status->float_rounding_mode; | |
1242 | user_rnd_prec = status->floatx80_rounding_precision; | |
1243 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 1244 | status->floatx80_rounding_precision = floatx80_precision_x; |
27340180 LV |
1245 | |
1246 | compact = floatx80_make_compact(aExp, aSig); | |
1247 | ||
1248 | fp0 = a; | |
1249 | ||
1250 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1251 | /* 2^(-40) > |X| > 15 PI */ | |
1252 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1253 | /* REDUCEX */ | |
1254 | fp1 = packFloatx80(0, 0, 0); | |
1255 | if (compact == 0x7FFEFFFF) { | |
1256 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1257 | UINT64_C(0xC90FDAA200000000)); |
27340180 | 1258 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1259 | UINT64_C(0x85A308D300000000)); |
27340180 LV |
1260 | fp0 = floatx80_add(fp0, twopi1, status); |
1261 | fp1 = fp0; | |
1262 | fp0 = floatx80_add(fp0, twopi2, status); | |
1263 | fp1 = floatx80_sub(fp1, fp0, status); | |
1264 | fp1 = floatx80_add(fp1, twopi2, status); | |
1265 | } | |
1266 | loop: | |
1267 | xSign = extractFloatx80Sign(fp0); | |
1268 | xExp = extractFloatx80Exp(fp0); | |
1269 | xExp -= 0x3FFF; | |
1270 | if (xExp <= 28) { | |
1271 | l = 0; | |
c120391c | 1272 | endflag = true; |
27340180 LV |
1273 | } else { |
1274 | l = xExp - 27; | |
c120391c | 1275 | endflag = false; |
27340180 LV |
1276 | } |
1277 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1278 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1279 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1280 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
27340180 LV |
1281 | |
1282 | /* SIGN(INARG)*2^63 IN SGL */ | |
1283 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1284 | ||
1285 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1286 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1287 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1288 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1289 | status); /* FP2 is N */ | |
1290 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1291 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1292 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1293 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1294 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1295 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1296 | fp3 = fp0; /* FP3 is A */ | |
1297 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1298 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1299 | ||
c120391c | 1300 | if (endflag) { |
27340180 LV |
1301 | n = floatx80_to_int32(fp2, status); |
1302 | goto tancont; | |
1303 | } | |
1304 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1305 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1306 | goto loop; | |
1307 | } else { | |
1308 | status->float_rounding_mode = user_rnd_mode; | |
1309 | status->floatx80_rounding_precision = user_rnd_prec; | |
1310 | ||
1311 | a = floatx80_move(a, status); | |
1312 | ||
1313 | float_raise(float_flag_inexact, status); | |
1314 | ||
1315 | return a; | |
1316 | } | |
1317 | } else { | |
1318 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1319 | make_float64(0x3FE45F306DC9C883), status), | |
1320 | status); /* X*2/PI */ | |
1321 | ||
1322 | n = floatx80_to_int32(fp1, status); | |
1323 | j = 32 + n; | |
1324 | ||
1325 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1326 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1327 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1328 | ||
1329 | tancont: | |
1330 | if (n & 1) { | |
1331 | /* NODD */ | |
1332 | fp1 = fp0; /* R */ | |
1333 | fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1334 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1335 | status); /* Q4 */ | |
1336 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1337 | status); /* P3 */ | |
1338 | fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ | |
1339 | fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ | |
1340 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1341 | make_float64(0xBF346F59B39BA65F), status), | |
1342 | status); /* Q3+SQ4 */ | |
e2326300 | 1343 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
27340180 LV |
1344 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1345 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ | |
1346 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ | |
e2326300 | 1347 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
27340180 | 1348 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
e2326300 | 1349 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
27340180 LV |
1350 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1351 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ | |
1352 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ | |
e2326300 | 1353 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
27340180 LV |
1354 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1355 | fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ | |
1356 | fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1357 | fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1358 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
1359 | make_float32(0x3F800000), status), | |
1360 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1361 | ||
1362 | xSign = extractFloatx80Sign(fp1); | |
1363 | xExp = extractFloatx80Exp(fp1); | |
1364 | xSig = extractFloatx80Frac(fp1); | |
1365 | xSign ^= 1; | |
1366 | fp1 = packFloatx80(xSign, xExp, xSig); | |
1367 | ||
1368 | status->float_rounding_mode = user_rnd_mode; | |
1369 | status->floatx80_rounding_precision = user_rnd_prec; | |
1370 | ||
1371 | a = floatx80_div(fp0, fp1, status); | |
1372 | ||
1373 | float_raise(float_flag_inexact, status); | |
1374 | ||
1375 | return a; | |
1376 | } else { | |
1377 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1378 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1379 | status); /* Q4 */ | |
1380 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1381 | status); /* P3 */ | |
1382 | fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ | |
1383 | fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ | |
1384 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1385 | make_float64(0xBF346F59B39BA65F), status), | |
1386 | status); /* Q3+SQ4 */ | |
e2326300 | 1387 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
27340180 LV |
1388 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1389 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ | |
1390 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ | |
e2326300 | 1391 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
27340180 | 1392 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
e2326300 | 1393 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
27340180 LV |
1394 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1395 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ | |
1396 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ | |
e2326300 | 1397 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
27340180 LV |
1398 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1399 | fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ | |
1400 | fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1401 | fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1402 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1403 | make_float32(0x3F800000), status), | |
1404 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1405 | ||
1406 | status->float_rounding_mode = user_rnd_mode; | |
1407 | status->floatx80_rounding_precision = user_rnd_prec; | |
1408 | ||
1409 | a = floatx80_div(fp0, fp1, status); | |
1410 | ||
1411 | float_raise(float_flag_inexact, status); | |
1412 | ||
1413 | return a; | |
1414 | } | |
1415 | } | |
1416 | } | |
5add1ac4 | 1417 | |
808d77bc LMP |
1418 | /* |
1419 | * Sine | |
1420 | */ | |
5add1ac4 LV |
1421 | |
1422 | floatx80 floatx80_sin(floatx80 a, float_status *status) | |
1423 | { | |
c120391c | 1424 | bool aSign, xSign; |
5add1ac4 LV |
1425 | int32_t aExp, xExp; |
1426 | uint64_t aSig, xSig; | |
1427 | ||
8da5f1db RH |
1428 | FloatRoundMode user_rnd_mode; |
1429 | FloatX80RoundPrec user_rnd_prec; | |
5add1ac4 LV |
1430 | |
1431 | int32_t compact, l, n, j; | |
1432 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1433 | float32 posneg1, twoto63; | |
c120391c | 1434 | bool endflag; |
5add1ac4 LV |
1435 | |
1436 | aSig = extractFloatx80Frac(a); | |
1437 | aExp = extractFloatx80Exp(a); | |
1438 | aSign = extractFloatx80Sign(a); | |
1439 | ||
1440 | if (aExp == 0x7FFF) { | |
1441 | if ((uint64_t) (aSig << 1)) { | |
1442 | return propagateFloatx80NaNOneArg(a, status); | |
1443 | } | |
1444 | float_raise(float_flag_invalid, status); | |
1445 | return floatx80_default_nan(status); | |
1446 | } | |
1447 | ||
1448 | if (aExp == 0 && aSig == 0) { | |
1449 | return packFloatx80(aSign, 0, 0); | |
1450 | } | |
1451 | ||
5add1ac4 LV |
1452 | user_rnd_mode = status->float_rounding_mode; |
1453 | user_rnd_prec = status->floatx80_rounding_precision; | |
1454 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 1455 | status->floatx80_rounding_precision = floatx80_precision_x; |
5add1ac4 LV |
1456 | |
1457 | compact = floatx80_make_compact(aExp, aSig); | |
1458 | ||
1459 | fp0 = a; | |
1460 | ||
1461 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1462 | /* 2^(-40) > |X| > 15 PI */ | |
1463 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1464 | /* REDUCEX */ | |
1465 | fp1 = packFloatx80(0, 0, 0); | |
1466 | if (compact == 0x7FFEFFFF) { | |
1467 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1468 | UINT64_C(0xC90FDAA200000000)); |
5add1ac4 | 1469 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1470 | UINT64_C(0x85A308D300000000)); |
5add1ac4 LV |
1471 | fp0 = floatx80_add(fp0, twopi1, status); |
1472 | fp1 = fp0; | |
1473 | fp0 = floatx80_add(fp0, twopi2, status); | |
1474 | fp1 = floatx80_sub(fp1, fp0, status); | |
1475 | fp1 = floatx80_add(fp1, twopi2, status); | |
1476 | } | |
1477 | loop: | |
1478 | xSign = extractFloatx80Sign(fp0); | |
1479 | xExp = extractFloatx80Exp(fp0); | |
1480 | xExp -= 0x3FFF; | |
1481 | if (xExp <= 28) { | |
1482 | l = 0; | |
c120391c | 1483 | endflag = true; |
5add1ac4 LV |
1484 | } else { |
1485 | l = xExp - 27; | |
c120391c | 1486 | endflag = false; |
5add1ac4 LV |
1487 | } |
1488 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1489 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1490 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1491 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
5add1ac4 LV |
1492 | |
1493 | /* SIGN(INARG)*2^63 IN SGL */ | |
1494 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1495 | ||
1496 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1497 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1498 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1499 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1500 | status); /* FP2 is N */ | |
1501 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1502 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1503 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1504 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1505 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1506 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1507 | fp3 = fp0; /* FP3 is A */ | |
1508 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1509 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1510 | ||
c120391c | 1511 | if (endflag) { |
5add1ac4 LV |
1512 | n = floatx80_to_int32(fp2, status); |
1513 | goto sincont; | |
1514 | } | |
1515 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1516 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1517 | goto loop; | |
1518 | } else { | |
1519 | /* SINSM */ | |
1520 | fp0 = float32_to_floatx80(make_float32(0x3F800000), | |
1521 | status); /* 1 */ | |
1522 | ||
1523 | status->float_rounding_mode = user_rnd_mode; | |
1524 | status->floatx80_rounding_precision = user_rnd_prec; | |
1525 | ||
6361d298 LV |
1526 | /* SINTINY */ |
1527 | a = floatx80_move(a, status); | |
5add1ac4 LV |
1528 | float_raise(float_flag_inexact, status); |
1529 | ||
1530 | return a; | |
1531 | } | |
1532 | } else { | |
1533 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1534 | make_float64(0x3FE45F306DC9C883), status), | |
1535 | status); /* X*2/PI */ | |
1536 | ||
1537 | n = floatx80_to_int32(fp1, status); | |
1538 | j = 32 + n; | |
1539 | ||
1540 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1541 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1542 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1543 | ||
1544 | sincont: | |
6361d298 | 1545 | if (n & 1) { |
5add1ac4 LV |
1546 | /* COSPOLY */ |
1547 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1548 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1549 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1550 | status); /* B8 */ | |
1551 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1552 | status); /* B7 */ | |
1553 | ||
1554 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1555 | xExp = extractFloatx80Exp(fp0); | |
1556 | xSig = extractFloatx80Frac(fp0); | |
1557 | ||
6361d298 | 1558 | if ((n >> 1) & 1) { |
5add1ac4 LV |
1559 | xSign ^= 1; |
1560 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1561 | } else { | |
1562 | xSign ^= 0; | |
1563 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1564 | } /* X IS NOW R'= SGN*R */ | |
1565 | ||
1566 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1567 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1568 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1569 | make_float64(0x3E21EED90612C972), status), | |
1570 | status); /* B6+TB8 */ | |
1571 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1572 | make_float64(0xBE927E4FB79D9FCF), status), | |
1573 | status); /* B5+TB7 */ | |
1574 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1575 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1576 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1577 | make_float64(0x3EFA01A01A01D423), status), | |
1578 | status); /* B4+T(B6+TB8) */ | |
e2326300 | 1579 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
5add1ac4 LV |
1580 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1581 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1582 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
e2326300 | 1583 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
5add1ac4 LV |
1584 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1585 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1586 | make_float32(0xBF000000), status), | |
1587 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1588 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1589 | fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ | |
1590 | * [S(B2+T(B4+T(B6+TB8)))] | |
1591 | */ | |
1592 | ||
1593 | x = packFloatx80(xSign, xExp, xSig); | |
1594 | fp0 = floatx80_mul(fp0, x, status); | |
1595 | ||
1596 | status->float_rounding_mode = user_rnd_mode; | |
1597 | status->floatx80_rounding_precision = user_rnd_prec; | |
1598 | ||
1599 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1600 | ||
1601 | float_raise(float_flag_inexact, status); | |
1602 | ||
1603 | return a; | |
1604 | } else { | |
1605 | /* SINPOLY */ | |
1606 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1607 | xExp = extractFloatx80Exp(fp0); | |
1608 | xSig = extractFloatx80Frac(fp0); | |
1609 | ||
6361d298 | 1610 | xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ |
5add1ac4 LV |
1611 | |
1612 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1613 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1614 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1615 | status); /* A7 */ | |
1616 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1617 | status); /* A6 */ | |
1618 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1619 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1620 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1621 | make_float64(0xBE5AE6452A118AE4), status), | |
1622 | status); /* A5+T*A7 */ | |
1623 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1624 | make_float64(0x3EC71DE3A5341531), status), | |
1625 | status); /* A4+T*A6 */ | |
1626 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1627 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1628 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1629 | make_float64(0xBF2A01A01A018B59), status), | |
1630 | status); /* A3+T(A5+TA7) */ | |
e2326300 | 1631 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
5add1ac4 LV |
1632 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1633 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1634 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
e2326300 | 1635 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
5add1ac4 LV |
1636 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1637 | fp1 = floatx80_add(fp1, fp2, | |
1638 | status); /* [A1+T(A3+T(A5+TA7))]+ | |
1639 | * [S(A2+T(A4+TA6))] | |
1640 | */ | |
1641 | ||
1642 | x = packFloatx80(xSign, xExp, xSig); | |
1643 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1644 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1645 | ||
1646 | status->float_rounding_mode = user_rnd_mode; | |
1647 | status->floatx80_rounding_precision = user_rnd_prec; | |
1648 | ||
1649 | a = floatx80_add(fp0, x, status); | |
1650 | ||
1651 | float_raise(float_flag_inexact, status); | |
1652 | ||
1653 | return a; | |
1654 | } | |
1655 | } | |
1656 | } | |
68d0ed37 | 1657 | |
808d77bc LMP |
1658 | /* |
1659 | * Cosine | |
1660 | */ | |
68d0ed37 LV |
1661 | |
1662 | floatx80 floatx80_cos(floatx80 a, float_status *status) | |
1663 | { | |
c120391c | 1664 | bool aSign, xSign; |
68d0ed37 LV |
1665 | int32_t aExp, xExp; |
1666 | uint64_t aSig, xSig; | |
1667 | ||
8da5f1db RH |
1668 | FloatRoundMode user_rnd_mode; |
1669 | FloatX80RoundPrec user_rnd_prec; | |
68d0ed37 LV |
1670 | |
1671 | int32_t compact, l, n, j; | |
1672 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1673 | float32 posneg1, twoto63; | |
c120391c | 1674 | bool endflag; |
68d0ed37 LV |
1675 | |
1676 | aSig = extractFloatx80Frac(a); | |
1677 | aExp = extractFloatx80Exp(a); | |
1678 | aSign = extractFloatx80Sign(a); | |
1679 | ||
1680 | if (aExp == 0x7FFF) { | |
1681 | if ((uint64_t) (aSig << 1)) { | |
1682 | return propagateFloatx80NaNOneArg(a, status); | |
1683 | } | |
1684 | float_raise(float_flag_invalid, status); | |
1685 | return floatx80_default_nan(status); | |
1686 | } | |
1687 | ||
1688 | if (aExp == 0 && aSig == 0) { | |
1689 | return packFloatx80(0, one_exp, one_sig); | |
1690 | } | |
1691 | ||
68d0ed37 LV |
1692 | user_rnd_mode = status->float_rounding_mode; |
1693 | user_rnd_prec = status->floatx80_rounding_precision; | |
1694 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 1695 | status->floatx80_rounding_precision = floatx80_precision_x; |
68d0ed37 LV |
1696 | |
1697 | compact = floatx80_make_compact(aExp, aSig); | |
1698 | ||
1699 | fp0 = a; | |
1700 | ||
1701 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1702 | /* 2^(-40) > |X| > 15 PI */ | |
1703 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1704 | /* REDUCEX */ | |
1705 | fp1 = packFloatx80(0, 0, 0); | |
1706 | if (compact == 0x7FFEFFFF) { | |
1707 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1708 | UINT64_C(0xC90FDAA200000000)); |
68d0ed37 | 1709 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1710 | UINT64_C(0x85A308D300000000)); |
68d0ed37 LV |
1711 | fp0 = floatx80_add(fp0, twopi1, status); |
1712 | fp1 = fp0; | |
1713 | fp0 = floatx80_add(fp0, twopi2, status); | |
1714 | fp1 = floatx80_sub(fp1, fp0, status); | |
1715 | fp1 = floatx80_add(fp1, twopi2, status); | |
1716 | } | |
1717 | loop: | |
1718 | xSign = extractFloatx80Sign(fp0); | |
1719 | xExp = extractFloatx80Exp(fp0); | |
1720 | xExp -= 0x3FFF; | |
1721 | if (xExp <= 28) { | |
1722 | l = 0; | |
c120391c | 1723 | endflag = true; |
68d0ed37 LV |
1724 | } else { |
1725 | l = xExp - 27; | |
c120391c | 1726 | endflag = false; |
68d0ed37 LV |
1727 | } |
1728 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1729 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1730 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1731 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
68d0ed37 LV |
1732 | |
1733 | /* SIGN(INARG)*2^63 IN SGL */ | |
1734 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1735 | ||
1736 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1737 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1738 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1739 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1740 | status); /* FP2 is N */ | |
1741 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1742 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1743 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1744 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1745 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1746 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1747 | fp3 = fp0; /* FP3 is A */ | |
1748 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1749 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1750 | ||
c120391c | 1751 | if (endflag) { |
68d0ed37 LV |
1752 | n = floatx80_to_int32(fp2, status); |
1753 | goto sincont; | |
1754 | } | |
1755 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1756 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1757 | goto loop; | |
1758 | } else { | |
1759 | /* SINSM */ | |
1760 | fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ | |
1761 | ||
1762 | status->float_rounding_mode = user_rnd_mode; | |
1763 | status->floatx80_rounding_precision = user_rnd_prec; | |
1764 | ||
6361d298 LV |
1765 | /* COSTINY */ |
1766 | a = floatx80_sub(fp0, float32_to_floatx80( | |
1767 | make_float32(0x00800000), status), | |
1768 | status); | |
68d0ed37 LV |
1769 | float_raise(float_flag_inexact, status); |
1770 | ||
1771 | return a; | |
1772 | } | |
1773 | } else { | |
1774 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1775 | make_float64(0x3FE45F306DC9C883), status), | |
1776 | status); /* X*2/PI */ | |
1777 | ||
1778 | n = floatx80_to_int32(fp1, status); | |
1779 | j = 32 + n; | |
1780 | ||
1781 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1782 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1783 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1784 | ||
1785 | sincont: | |
6361d298 | 1786 | if ((n + 1) & 1) { |
68d0ed37 LV |
1787 | /* COSPOLY */ |
1788 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1789 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1790 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1791 | status); /* B8 */ | |
1792 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1793 | status); /* B7 */ | |
1794 | ||
1795 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1796 | xExp = extractFloatx80Exp(fp0); | |
1797 | xSig = extractFloatx80Frac(fp0); | |
1798 | ||
6361d298 | 1799 | if (((n + 1) >> 1) & 1) { |
68d0ed37 LV |
1800 | xSign ^= 1; |
1801 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1802 | } else { | |
1803 | xSign ^= 0; | |
1804 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1805 | } /* X IS NOW R'= SGN*R */ | |
1806 | ||
1807 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1808 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1809 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1810 | make_float64(0x3E21EED90612C972), status), | |
1811 | status); /* B6+TB8 */ | |
1812 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1813 | make_float64(0xBE927E4FB79D9FCF), status), | |
1814 | status); /* B5+TB7 */ | |
1815 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1816 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1817 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1818 | make_float64(0x3EFA01A01A01D423), status), | |
1819 | status); /* B4+T(B6+TB8) */ | |
e2326300 | 1820 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
68d0ed37 LV |
1821 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1822 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1823 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
e2326300 | 1824 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
68d0ed37 LV |
1825 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1826 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1827 | make_float32(0xBF000000), status), | |
1828 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1829 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1830 | fp0 = floatx80_add(fp0, fp1, status); | |
1831 | /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ | |
1832 | ||
1833 | x = packFloatx80(xSign, xExp, xSig); | |
1834 | fp0 = floatx80_mul(fp0, x, status); | |
1835 | ||
1836 | status->float_rounding_mode = user_rnd_mode; | |
1837 | status->floatx80_rounding_precision = user_rnd_prec; | |
1838 | ||
1839 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1840 | ||
1841 | float_raise(float_flag_inexact, status); | |
1842 | ||
1843 | return a; | |
1844 | } else { | |
1845 | /* SINPOLY */ | |
1846 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1847 | xExp = extractFloatx80Exp(fp0); | |
1848 | xSig = extractFloatx80Frac(fp0); | |
1849 | ||
6361d298 | 1850 | xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ |
68d0ed37 LV |
1851 | |
1852 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1853 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1854 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1855 | status); /* A7 */ | |
1856 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1857 | status); /* A6 */ | |
1858 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1859 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1860 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1861 | make_float64(0xBE5AE6452A118AE4), status), | |
1862 | status); /* A5+T*A7 */ | |
1863 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1864 | make_float64(0x3EC71DE3A5341531), status), | |
1865 | status); /* A4+T*A6 */ | |
1866 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1867 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1868 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1869 | make_float64(0xBF2A01A01A018B59), status), | |
1870 | status); /* A3+T(A5+TA7) */ | |
e2326300 | 1871 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
68d0ed37 LV |
1872 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1873 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1874 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
e2326300 | 1875 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
68d0ed37 LV |
1876 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1877 | fp1 = floatx80_add(fp1, fp2, status); | |
1878 | /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ | |
1879 | ||
1880 | x = packFloatx80(xSign, xExp, xSig); | |
1881 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1882 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1883 | ||
1884 | status->float_rounding_mode = user_rnd_mode; | |
1885 | status->floatx80_rounding_precision = user_rnd_prec; | |
1886 | ||
1887 | a = floatx80_add(fp0, x, status); | |
1888 | ||
1889 | float_raise(float_flag_inexact, status); | |
1890 | ||
1891 | return a; | |
1892 | } | |
1893 | } | |
1894 | } | |
8c992abc | 1895 | |
808d77bc LMP |
1896 | /* |
1897 | * Arc tangent | |
1898 | */ | |
8c992abc LV |
1899 | |
1900 | floatx80 floatx80_atan(floatx80 a, float_status *status) | |
1901 | { | |
c120391c | 1902 | bool aSign; |
8c992abc LV |
1903 | int32_t aExp; |
1904 | uint64_t aSig; | |
1905 | ||
8da5f1db RH |
1906 | FloatRoundMode user_rnd_mode; |
1907 | FloatX80RoundPrec user_rnd_prec; | |
8c992abc LV |
1908 | |
1909 | int32_t compact, tbl_index; | |
1910 | floatx80 fp0, fp1, fp2, fp3, xsave; | |
1911 | ||
1912 | aSig = extractFloatx80Frac(a); | |
1913 | aExp = extractFloatx80Exp(a); | |
1914 | aSign = extractFloatx80Sign(a); | |
1915 | ||
1916 | if (aExp == 0x7FFF) { | |
1917 | if ((uint64_t) (aSig << 1)) { | |
1918 | return propagateFloatx80NaNOneArg(a, status); | |
1919 | } | |
1920 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
1921 | float_raise(float_flag_inexact, status); | |
1922 | return floatx80_move(a, status); | |
1923 | } | |
1924 | ||
1925 | if (aExp == 0 && aSig == 0) { | |
1926 | return packFloatx80(aSign, 0, 0); | |
1927 | } | |
1928 | ||
1929 | compact = floatx80_make_compact(aExp, aSig); | |
1930 | ||
1931 | user_rnd_mode = status->float_rounding_mode; | |
1932 | user_rnd_prec = status->floatx80_rounding_precision; | |
1933 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 1934 | status->floatx80_rounding_precision = floatx80_precision_x; |
8c992abc LV |
1935 | |
1936 | if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { | |
1937 | /* |X| >= 16 or |X| < 1/16 */ | |
1938 | if (compact > 0x3FFF8000) { /* |X| >= 16 */ | |
1939 | if (compact > 0x40638000) { /* |X| > 2^(100) */ | |
1940 | fp0 = packFloatx80(aSign, piby2_exp, pi_sig); | |
1941 | fp1 = packFloatx80(aSign, 0x0001, one_sig); | |
1942 | ||
1943 | status->float_rounding_mode = user_rnd_mode; | |
1944 | status->floatx80_rounding_precision = user_rnd_prec; | |
1945 | ||
1946 | a = floatx80_sub(fp0, fp1, status); | |
1947 | ||
1948 | float_raise(float_flag_inexact, status); | |
1949 | ||
1950 | return a; | |
1951 | } else { | |
1952 | fp0 = a; | |
1953 | fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ | |
1954 | fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ | |
1955 | xsave = fp1; | |
1956 | fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ | |
1957 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
1958 | fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), | |
1959 | status); /* C5 */ | |
1960 | fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), | |
1961 | status); /* C4 */ | |
1962 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ | |
1963 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ | |
1964 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1965 | make_float64(0xBFC24924827107B8), status), | |
1966 | status); /* C3+Z*C5 */ | |
1967 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1968 | make_float64(0x3FC999999996263E), status), | |
1969 | status); /* C2+Z*C4 */ | |
1970 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ | |
1971 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ | |
1972 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
1973 | make_float64(0xBFD5555555555536), status), | |
1974 | status); /* C1+Z*(C3+Z*C5) */ | |
1975 | fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ | |
1976 | /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ | |
1977 | fp1 = floatx80_add(fp1, fp2, status); | |
1978 | /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ | |
1979 | fp0 = floatx80_mul(fp0, fp1, status); | |
1980 | fp0 = floatx80_add(fp0, xsave, status); | |
1981 | fp1 = packFloatx80(aSign, piby2_exp, pi_sig); | |
1982 | ||
1983 | status->float_rounding_mode = user_rnd_mode; | |
1984 | status->floatx80_rounding_precision = user_rnd_prec; | |
1985 | ||
1986 | a = floatx80_add(fp0, fp1, status); | |
1987 | ||
1988 | float_raise(float_flag_inexact, status); | |
1989 | ||
1990 | return a; | |
1991 | } | |
1992 | } else { /* |X| < 1/16 */ | |
1993 | if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ | |
1994 | status->float_rounding_mode = user_rnd_mode; | |
1995 | status->floatx80_rounding_precision = user_rnd_prec; | |
1996 | ||
1997 | a = floatx80_move(a, status); | |
1998 | ||
1999 | float_raise(float_flag_inexact, status); | |
2000 | ||
2001 | return a; | |
2002 | } else { | |
2003 | fp0 = a; | |
2004 | xsave = a; | |
2005 | fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ | |
2006 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
2007 | fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), | |
2008 | status); /* B6 */ | |
2009 | fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), | |
2010 | status); /* B5 */ | |
2011 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ | |
2012 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ | |
2013 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2014 | make_float64(0x3FBC71C646940220), status), | |
2015 | status); /* B4+Z*B6 */ | |
2016 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2017 | make_float64(0xBFC24924921872F9), | |
2018 | status), status); /* B3+Z*B5 */ | |
2019 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ | |
2020 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ | |
2021 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2022 | make_float64(0x3FC9999999998FA9), status), | |
2023 | status); /* B2+Z*(B4+Z*B6) */ | |
2024 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2025 | make_float64(0xBFD5555555555555), status), | |
2026 | status); /* B1+Z*(B3+Z*B5) */ | |
2027 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ | |
2028 | fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ | |
2029 | /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ | |
2030 | fp1 = floatx80_add(fp1, fp2, status); | |
2031 | /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ | |
2032 | fp0 = floatx80_mul(fp0, fp1, status); | |
2033 | ||
2034 | status->float_rounding_mode = user_rnd_mode; | |
2035 | status->floatx80_rounding_precision = user_rnd_prec; | |
2036 | ||
2037 | a = floatx80_add(fp0, xsave, status); | |
2038 | ||
2039 | float_raise(float_flag_inexact, status); | |
2040 | ||
2041 | return a; | |
2042 | } | |
2043 | } | |
2044 | } else { | |
e2326300 AB |
2045 | aSig &= UINT64_C(0xF800000000000000); |
2046 | aSig |= UINT64_C(0x0400000000000000); | |
8c992abc LV |
2047 | xsave = packFloatx80(aSign, aExp, aSig); /* F */ |
2048 | fp0 = a; | |
2049 | fp1 = a; /* X */ | |
2050 | fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ | |
2051 | fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ | |
2052 | fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ | |
2053 | fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ | |
2054 | fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ | |
2055 | ||
2056 | tbl_index = compact; | |
2057 | ||
2058 | tbl_index &= 0x7FFF0000; | |
2059 | tbl_index -= 0x3FFB0000; | |
2060 | tbl_index >>= 1; | |
2061 | tbl_index += compact & 0x00007800; | |
2062 | tbl_index >>= 11; | |
2063 | ||
2064 | fp3 = atan_tbl[tbl_index]; | |
2065 | ||
2066 | fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ | |
2067 | ||
2068 | fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ | |
2069 | fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), | |
2070 | status); /* A3 */ | |
2071 | fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ | |
2072 | fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ | |
2073 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ | |
2074 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2075 | make_float64(0x4002AC6934A26DB3), status), | |
2076 | status); /* A2+V*(A3+V) */ | |
2077 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
2078 | make_float64(0xBFC2476F4E1DA28E), status), | |
2079 | status); /* A1+U*V */ | |
2080 | fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ | |
2081 | fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ | |
2082 | ||
2083 | status->float_rounding_mode = user_rnd_mode; | |
2084 | status->floatx80_rounding_precision = user_rnd_prec; | |
2085 | ||
2086 | a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ | |
2087 | ||
2088 | float_raise(float_flag_inexact, status); | |
2089 | ||
2090 | return a; | |
2091 | } | |
2092 | } | |
bc20b34e | 2093 | |
808d77bc LMP |
2094 | /* |
2095 | * Arc sine | |
2096 | */ | |
bc20b34e LV |
2097 | |
2098 | floatx80 floatx80_asin(floatx80 a, float_status *status) | |
2099 | { | |
c120391c | 2100 | bool aSign; |
bc20b34e LV |
2101 | int32_t aExp; |
2102 | uint64_t aSig; | |
2103 | ||
8da5f1db RH |
2104 | FloatRoundMode user_rnd_mode; |
2105 | FloatX80RoundPrec user_rnd_prec; | |
bc20b34e LV |
2106 | |
2107 | int32_t compact; | |
2108 | floatx80 fp0, fp1, fp2, one; | |
2109 | ||
2110 | aSig = extractFloatx80Frac(a); | |
2111 | aExp = extractFloatx80Exp(a); | |
2112 | aSign = extractFloatx80Sign(a); | |
2113 | ||
2114 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2115 | return propagateFloatx80NaNOneArg(a, status); | |
2116 | } | |
2117 | ||
2118 | if (aExp == 0 && aSig == 0) { | |
2119 | return packFloatx80(aSign, 0, 0); | |
2120 | } | |
2121 | ||
2122 | compact = floatx80_make_compact(aExp, aSig); | |
2123 | ||
2124 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2125 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2126 | float_raise(float_flag_inexact, status); | |
2127 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
2128 | return floatx80_move(a, status); | |
2129 | } else { /* |X| > 1 */ | |
2130 | float_raise(float_flag_invalid, status); | |
2131 | return floatx80_default_nan(status); | |
2132 | } | |
2133 | ||
2134 | } /* |X| < 1 */ | |
2135 | ||
2136 | user_rnd_mode = status->float_rounding_mode; | |
2137 | user_rnd_prec = status->floatx80_rounding_precision; | |
2138 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2139 | status->floatx80_rounding_precision = floatx80_precision_x; |
bc20b34e LV |
2140 | |
2141 | one = packFloatx80(0, one_exp, one_sig); | |
2142 | fp0 = a; | |
2143 | ||
2144 | fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2145 | fp2 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2146 | fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ | |
2147 | fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ | |
2148 | fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ | |
2149 | ||
2150 | status->float_rounding_mode = user_rnd_mode; | |
2151 | status->floatx80_rounding_precision = user_rnd_prec; | |
2152 | ||
2153 | a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ | |
2154 | ||
2155 | float_raise(float_flag_inexact, status); | |
2156 | ||
2157 | return a; | |
2158 | } | |
c84813b8 | 2159 | |
808d77bc LMP |
2160 | /* |
2161 | * Arc cosine | |
2162 | */ | |
c84813b8 LV |
2163 | |
2164 | floatx80 floatx80_acos(floatx80 a, float_status *status) | |
2165 | { | |
c120391c | 2166 | bool aSign; |
c84813b8 LV |
2167 | int32_t aExp; |
2168 | uint64_t aSig; | |
2169 | ||
8da5f1db RH |
2170 | FloatRoundMode user_rnd_mode; |
2171 | FloatX80RoundPrec user_rnd_prec; | |
c84813b8 LV |
2172 | |
2173 | int32_t compact; | |
2174 | floatx80 fp0, fp1, one; | |
2175 | ||
2176 | aSig = extractFloatx80Frac(a); | |
2177 | aExp = extractFloatx80Exp(a); | |
2178 | aSign = extractFloatx80Sign(a); | |
2179 | ||
2180 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2181 | return propagateFloatx80NaNOneArg(a, status); | |
2182 | } | |
2183 | if (aExp == 0 && aSig == 0) { | |
2184 | float_raise(float_flag_inexact, status); | |
2185 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2186 | piby2_exp, pi_sig, 0, status); | |
2187 | } | |
2188 | ||
2189 | compact = floatx80_make_compact(aExp, aSig); | |
2190 | ||
2191 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2192 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2193 | if (aSign) { /* X == -1 */ | |
2194 | a = packFloatx80(0, pi_exp, pi_sig); | |
2195 | float_raise(float_flag_inexact, status); | |
2196 | return floatx80_move(a, status); | |
2197 | } else { /* X == +1 */ | |
2198 | return packFloatx80(0, 0, 0); | |
2199 | } | |
2200 | } else { /* |X| > 1 */ | |
2201 | float_raise(float_flag_invalid, status); | |
2202 | return floatx80_default_nan(status); | |
2203 | } | |
2204 | } /* |X| < 1 */ | |
2205 | ||
2206 | user_rnd_mode = status->float_rounding_mode; | |
2207 | user_rnd_prec = status->floatx80_rounding_precision; | |
2208 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2209 | status->floatx80_rounding_precision = floatx80_precision_x; |
c84813b8 LV |
2210 | |
2211 | one = packFloatx80(0, one_exp, one_sig); | |
2212 | fp0 = a; | |
2213 | ||
2214 | fp1 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2215 | fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2216 | fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ | |
2217 | fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ | |
2218 | fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ | |
2219 | ||
2220 | status->float_rounding_mode = user_rnd_mode; | |
2221 | status->floatx80_rounding_precision = user_rnd_prec; | |
2222 | ||
2223 | a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ | |
2224 | ||
2225 | float_raise(float_flag_inexact, status); | |
2226 | ||
2227 | return a; | |
2228 | } | |
e3655afa | 2229 | |
808d77bc LMP |
2230 | /* |
2231 | * Hyperbolic arc tangent | |
2232 | */ | |
e3655afa LV |
2233 | |
2234 | floatx80 floatx80_atanh(floatx80 a, float_status *status) | |
2235 | { | |
c120391c | 2236 | bool aSign; |
e3655afa LV |
2237 | int32_t aExp; |
2238 | uint64_t aSig; | |
2239 | ||
8da5f1db RH |
2240 | FloatRoundMode user_rnd_mode; |
2241 | FloatX80RoundPrec user_rnd_prec; | |
e3655afa LV |
2242 | |
2243 | int32_t compact; | |
2244 | floatx80 fp0, fp1, fp2, one; | |
2245 | ||
2246 | aSig = extractFloatx80Frac(a); | |
2247 | aExp = extractFloatx80Exp(a); | |
2248 | aSign = extractFloatx80Sign(a); | |
2249 | ||
2250 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2251 | return propagateFloatx80NaNOneArg(a, status); | |
2252 | } | |
2253 | ||
2254 | if (aExp == 0 && aSig == 0) { | |
2255 | return packFloatx80(aSign, 0, 0); | |
2256 | } | |
2257 | ||
2258 | compact = floatx80_make_compact(aExp, aSig); | |
2259 | ||
2260 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2261 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2262 | float_raise(float_flag_divbyzero, status); | |
2263 | return packFloatx80(aSign, floatx80_infinity.high, | |
2264 | floatx80_infinity.low); | |
2265 | } else { /* |X| > 1 */ | |
2266 | float_raise(float_flag_invalid, status); | |
2267 | return floatx80_default_nan(status); | |
2268 | } | |
2269 | } /* |X| < 1 */ | |
2270 | ||
2271 | user_rnd_mode = status->float_rounding_mode; | |
2272 | user_rnd_prec = status->floatx80_rounding_precision; | |
2273 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2274 | status->floatx80_rounding_precision = floatx80_precision_x; |
e3655afa LV |
2275 | |
2276 | one = packFloatx80(0, one_exp, one_sig); | |
2277 | fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ | |
2278 | fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ | |
2279 | fp1 = packFloatx80(1, aExp, aSig); /* -Y */ | |
2280 | fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ | |
2281 | fp1 = floatx80_add(fp1, one, status); /* 1-Y */ | |
2282 | fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ | |
2283 | fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ | |
2284 | ||
2285 | status->float_rounding_mode = user_rnd_mode; | |
2286 | status->floatx80_rounding_precision = user_rnd_prec; | |
2287 | ||
2288 | a = floatx80_mul(fp0, fp2, | |
2289 | status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ | |
2290 | ||
2291 | float_raise(float_flag_inexact, status); | |
2292 | ||
2293 | return a; | |
2294 | } | |
9937b029 | 2295 | |
808d77bc LMP |
2296 | /* |
2297 | * e to x minus 1 | |
2298 | */ | |
9937b029 LV |
2299 | |
2300 | floatx80 floatx80_etoxm1(floatx80 a, float_status *status) | |
2301 | { | |
c120391c | 2302 | bool aSign; |
9937b029 LV |
2303 | int32_t aExp; |
2304 | uint64_t aSig; | |
2305 | ||
8da5f1db RH |
2306 | FloatRoundMode user_rnd_mode; |
2307 | FloatX80RoundPrec user_rnd_prec; | |
9937b029 LV |
2308 | |
2309 | int32_t compact, n, j, m, m1; | |
2310 | floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; | |
2311 | ||
2312 | aSig = extractFloatx80Frac(a); | |
2313 | aExp = extractFloatx80Exp(a); | |
2314 | aSign = extractFloatx80Sign(a); | |
2315 | ||
2316 | if (aExp == 0x7FFF) { | |
2317 | if ((uint64_t) (aSig << 1)) { | |
2318 | return propagateFloatx80NaNOneArg(a, status); | |
2319 | } | |
2320 | if (aSign) { | |
2321 | return packFloatx80(aSign, one_exp, one_sig); | |
2322 | } | |
2323 | return packFloatx80(0, floatx80_infinity.high, | |
2324 | floatx80_infinity.low); | |
2325 | } | |
2326 | ||
2327 | if (aExp == 0 && aSig == 0) { | |
2328 | return packFloatx80(aSign, 0, 0); | |
2329 | } | |
2330 | ||
2331 | user_rnd_mode = status->float_rounding_mode; | |
2332 | user_rnd_prec = status->floatx80_rounding_precision; | |
2333 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2334 | status->floatx80_rounding_precision = floatx80_precision_x; |
9937b029 LV |
2335 | |
2336 | if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ | |
2337 | compact = floatx80_make_compact(aExp, aSig); | |
2338 | ||
2339 | if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ | |
2340 | fp0 = a; | |
2341 | fp1 = a; | |
2342 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2343 | make_float32(0x42B8AA3B), status), | |
2344 | status); /* 64/log2 * X */ | |
2345 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
2346 | fp0 = int32_to_floatx80(n, status); | |
2347 | ||
2348 | j = n & 0x3F; /* J = N mod 64 */ | |
2349 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
2350 | if (n < 0 && j) { | |
808d77bc LMP |
2351 | /* |
2352 | * arithmetic right shift is division and | |
9937b029 LV |
2353 | * round towards minus infinity |
2354 | */ | |
2355 | m--; | |
2356 | } | |
2357 | m1 = -m; | |
2358 | /*m += 0x3FFF; // biased exponent of 2^(M) */ | |
2359 | /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ | |
2360 | ||
2361 | fp2 = fp0; /* N */ | |
2362 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2363 | make_float32(0xBC317218), status), | |
2364 | status); /* N * L1, L1 = lead(-log2/64) */ | |
e2326300 | 2365 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
9937b029 LV |
2366 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
2367 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
2368 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
2369 | ||
2370 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
2371 | fp2 = float32_to_floatx80(make_float32(0x3950097B), | |
2372 | status); /* A6 */ | |
2373 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ | |
2374 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), | |
2375 | status), fp1, status); /* fp3 is S*A5 */ | |
2376 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2377 | make_float64(0x3F81111111174385), status), | |
2378 | status); /* fp2 IS A4+S*A6 */ | |
2379 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2380 | make_float64(0x3FA5555555554F5A), status), | |
2381 | status); /* fp3 is A3+S*A5 */ | |
2382 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ | |
2383 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ | |
2384 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2385 | make_float64(0x3FC5555555555555), status), | |
2386 | status); /* fp2 IS A2+S*(A4+S*A6) */ | |
2387 | fp3 = floatx80_add(fp3, float32_to_floatx80( | |
2388 | make_float32(0x3F000000), status), | |
2389 | status); /* fp3 IS A1+S*(A3+S*A5) */ | |
2390 | fp2 = floatx80_mul(fp2, fp1, | |
2391 | status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ | |
2392 | fp1 = floatx80_mul(fp1, fp3, | |
2393 | status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ | |
2394 | fp2 = floatx80_mul(fp2, fp0, | |
2395 | status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ | |
2396 | fp0 = floatx80_add(fp0, fp1, | |
2397 | status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ | |
2398 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
2399 | ||
2400 | fp0 = floatx80_mul(fp0, exp_tbl[j], | |
2401 | status); /* 2^(J/64)*(Exp(R)-1) */ | |
2402 | ||
2403 | if (m >= 64) { | |
2404 | fp1 = float32_to_floatx80(exp_tbl2[j], status); | |
2405 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2406 | fp1 = floatx80_add(fp1, onebysc, status); | |
2407 | fp0 = floatx80_add(fp0, fp1, status); | |
2408 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2409 | } else if (m < -3) { | |
2410 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2411 | status), status); | |
2412 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2413 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2414 | fp0 = floatx80_add(fp0, onebysc, status); | |
2415 | } else { /* -3 <= m <= 63 */ | |
2416 | fp1 = exp_tbl[j]; | |
2417 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2418 | status), status); | |
2419 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2420 | fp1 = floatx80_add(fp1, onebysc, status); | |
2421 | fp0 = floatx80_add(fp0, fp1, status); | |
2422 | } | |
2423 | ||
2424 | sc = packFloatx80(0, m + 0x3FFF, one_sig); | |
2425 | ||
2426 | status->float_rounding_mode = user_rnd_mode; | |
2427 | status->floatx80_rounding_precision = user_rnd_prec; | |
2428 | ||
2429 | a = floatx80_mul(fp0, sc, status); | |
2430 | ||
2431 | float_raise(float_flag_inexact, status); | |
2432 | ||
2433 | return a; | |
2434 | } else { /* |X| > 70 log2 */ | |
2435 | if (aSign) { | |
2436 | fp0 = float32_to_floatx80(make_float32(0xBF800000), | |
2437 | status); /* -1 */ | |
2438 | ||
2439 | status->float_rounding_mode = user_rnd_mode; | |
2440 | status->floatx80_rounding_precision = user_rnd_prec; | |
2441 | ||
2442 | a = floatx80_add(fp0, float32_to_floatx80( | |
2443 | make_float32(0x00800000), status), | |
2444 | status); /* -1 + 2^(-126) */ | |
2445 | ||
2446 | float_raise(float_flag_inexact, status); | |
2447 | ||
2448 | return a; | |
2449 | } else { | |
2450 | status->float_rounding_mode = user_rnd_mode; | |
2451 | status->floatx80_rounding_precision = user_rnd_prec; | |
2452 | ||
2453 | return floatx80_etox(a, status); | |
2454 | } | |
2455 | } | |
2456 | } else { /* |X| < 1/4 */ | |
2457 | if (aExp >= 0x3FBE) { | |
2458 | fp0 = a; | |
2459 | fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ | |
2460 | fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), | |
2461 | status); /* B12 */ | |
2462 | fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ | |
2463 | fp2 = float32_to_floatx80(make_float32(0x310F8290), | |
2464 | status); /* B11 */ | |
2465 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
2466 | make_float32(0x32D73220), status), | |
2467 | status); /* B10 */ | |
2468 | fp2 = floatx80_mul(fp2, fp0, status); | |
2469 | fp1 = floatx80_mul(fp1, fp0, status); | |
2470 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
2471 | make_float32(0x3493F281), status), | |
2472 | status); /* B9 */ | |
2473 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2474 | make_float64(0x3EC71DE3A5774682), status), | |
2475 | status); /* B8 */ | |
2476 | fp2 = floatx80_mul(fp2, fp0, status); | |
2477 | fp1 = floatx80_mul(fp1, fp0, status); | |
2478 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2479 | make_float64(0x3EFA01A019D7CB68), status), | |
2480 | status); /* B7 */ | |
2481 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2482 | make_float64(0x3F2A01A01A019DF3), status), | |
2483 | status); /* B6 */ | |
2484 | fp2 = floatx80_mul(fp2, fp0, status); | |
2485 | fp1 = floatx80_mul(fp1, fp0, status); | |
2486 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2487 | make_float64(0x3F56C16C16C170E2), status), | |
2488 | status); /* B5 */ | |
2489 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2490 | make_float64(0x3F81111111111111), status), | |
2491 | status); /* B4 */ | |
2492 | fp2 = floatx80_mul(fp2, fp0, status); | |
2493 | fp1 = floatx80_mul(fp1, fp0, status); | |
2494 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2495 | make_float64(0x3FA5555555555555), status), | |
2496 | status); /* B3 */ | |
e2326300 | 2497 | fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB)); |
9937b029 LV |
2498 | fp1 = floatx80_add(fp1, fp3, status); /* B2 */ |
2499 | fp2 = floatx80_mul(fp2, fp0, status); | |
2500 | fp1 = floatx80_mul(fp1, fp0, status); | |
2501 | ||
2502 | fp2 = floatx80_mul(fp2, fp0, status); | |
2503 | fp1 = floatx80_mul(fp1, a, status); | |
2504 | ||
2505 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2506 | make_float32(0x3F000000), status), | |
2507 | status); /* S*B1 */ | |
2508 | fp1 = floatx80_add(fp1, fp2, status); /* Q */ | |
2509 | fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ | |
2510 | ||
2511 | status->float_rounding_mode = user_rnd_mode; | |
2512 | status->floatx80_rounding_precision = user_rnd_prec; | |
2513 | ||
2514 | a = floatx80_add(fp0, a, status); | |
2515 | ||
2516 | float_raise(float_flag_inexact, status); | |
2517 | ||
2518 | return a; | |
2519 | } else { /* |X| < 2^(-65) */ | |
2520 | sc = packFloatx80(1, 1, one_sig); | |
2521 | fp0 = a; | |
2522 | ||
2523 | if (aExp < 0x0033) { /* |X| < 2^(-16382) */ | |
2524 | fp0 = floatx80_mul(fp0, float64_to_floatx80( | |
2525 | make_float64(0x48B0000000000000), status), | |
2526 | status); | |
2527 | fp0 = floatx80_add(fp0, sc, status); | |
2528 | ||
2529 | status->float_rounding_mode = user_rnd_mode; | |
2530 | status->floatx80_rounding_precision = user_rnd_prec; | |
2531 | ||
2532 | a = floatx80_mul(fp0, float64_to_floatx80( | |
2533 | make_float64(0x3730000000000000), status), | |
2534 | status); | |
2535 | } else { | |
2536 | status->float_rounding_mode = user_rnd_mode; | |
2537 | status->floatx80_rounding_precision = user_rnd_prec; | |
2538 | ||
2539 | a = floatx80_add(fp0, sc, status); | |
2540 | } | |
2541 | ||
2542 | float_raise(float_flag_inexact, status); | |
2543 | ||
2544 | return a; | |
2545 | } | |
2546 | } | |
2547 | } | |
2548 | ||
808d77bc LMP |
2549 | /* |
2550 | * Hyperbolic tangent | |
2551 | */ | |
9937b029 LV |
2552 | |
2553 | floatx80 floatx80_tanh(floatx80 a, float_status *status) | |
2554 | { | |
c120391c | 2555 | bool aSign, vSign; |
9937b029 LV |
2556 | int32_t aExp, vExp; |
2557 | uint64_t aSig, vSig; | |
2558 | ||
8da5f1db RH |
2559 | FloatRoundMode user_rnd_mode; |
2560 | FloatX80RoundPrec user_rnd_prec; | |
9937b029 LV |
2561 | |
2562 | int32_t compact; | |
2563 | floatx80 fp0, fp1; | |
2564 | uint32_t sign; | |
2565 | ||
2566 | aSig = extractFloatx80Frac(a); | |
2567 | aExp = extractFloatx80Exp(a); | |
2568 | aSign = extractFloatx80Sign(a); | |
2569 | ||
2570 | if (aExp == 0x7FFF) { | |
2571 | if ((uint64_t) (aSig << 1)) { | |
2572 | return propagateFloatx80NaNOneArg(a, status); | |
2573 | } | |
2574 | return packFloatx80(aSign, one_exp, one_sig); | |
2575 | } | |
2576 | ||
2577 | if (aExp == 0 && aSig == 0) { | |
2578 | return packFloatx80(aSign, 0, 0); | |
2579 | } | |
2580 | ||
2581 | user_rnd_mode = status->float_rounding_mode; | |
2582 | user_rnd_prec = status->floatx80_rounding_precision; | |
2583 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2584 | status->floatx80_rounding_precision = floatx80_precision_x; |
9937b029 LV |
2585 | |
2586 | compact = floatx80_make_compact(aExp, aSig); | |
2587 | ||
2588 | if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { | |
2589 | /* TANHBORS */ | |
2590 | if (compact < 0x3FFF8000) { | |
2591 | /* TANHSM */ | |
2592 | status->float_rounding_mode = user_rnd_mode; | |
2593 | status->floatx80_rounding_precision = user_rnd_prec; | |
2594 | ||
2595 | a = floatx80_move(a, status); | |
2596 | ||
2597 | float_raise(float_flag_inexact, status); | |
2598 | ||
2599 | return a; | |
2600 | } else { | |
2601 | if (compact > 0x40048AA1) { | |
2602 | /* TANHHUGE */ | |
2603 | sign = 0x3F800000; | |
2604 | sign |= aSign ? 0x80000000 : 0x00000000; | |
2605 | fp0 = float32_to_floatx80(make_float32(sign), status); | |
2606 | sign &= 0x80000000; | |
2607 | sign ^= 0x80800000; /* -SIGN(X)*EPS */ | |
2608 | ||
2609 | status->float_rounding_mode = user_rnd_mode; | |
2610 | status->floatx80_rounding_precision = user_rnd_prec; | |
2611 | ||
2612 | a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), | |
2613 | status), status); | |
2614 | ||
2615 | float_raise(float_flag_inexact, status); | |
2616 | ||
2617 | return a; | |
2618 | } else { | |
2619 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2620 | fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ | |
2621 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
2622 | make_float32(0x3F800000), | |
2623 | status), status); /* EXP(Y)+1 */ | |
2624 | sign = aSign ? 0x80000000 : 0x00000000; | |
2625 | fp1 = floatx80_div(float32_to_floatx80(make_float32( | |
2626 | sign ^ 0xC0000000), status), fp0, | |
2627 | status); /* -SIGN(X)*2 / [EXP(Y)+1] */ | |
2628 | fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), | |
2629 | status); /* SIGN */ | |
2630 | ||
2631 | status->float_rounding_mode = user_rnd_mode; | |
2632 | status->floatx80_rounding_precision = user_rnd_prec; | |
2633 | ||
2634 | a = floatx80_add(fp1, fp0, status); | |
2635 | ||
2636 | float_raise(float_flag_inexact, status); | |
2637 | ||
2638 | return a; | |
2639 | } | |
2640 | } | |
2641 | } else { /* 2**(-40) < |X| < (5/2)LOG2 */ | |
2642 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2643 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2644 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), | |
2645 | status), | |
2646 | status); /* Z+2 */ | |
2647 | ||
2648 | vSign = extractFloatx80Sign(fp1); | |
2649 | vExp = extractFloatx80Exp(fp1); | |
2650 | vSig = extractFloatx80Frac(fp1); | |
2651 | ||
2652 | fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); | |
2653 | ||
2654 | status->float_rounding_mode = user_rnd_mode; | |
2655 | status->floatx80_rounding_precision = user_rnd_prec; | |
2656 | ||
2657 | a = floatx80_div(fp0, fp1, status); | |
2658 | ||
2659 | float_raise(float_flag_inexact, status); | |
2660 | ||
2661 | return a; | |
2662 | } | |
2663 | } | |
eee6b892 | 2664 | |
808d77bc LMP |
2665 | /* |
2666 | * Hyperbolic sine | |
2667 | */ | |
eee6b892 LV |
2668 | |
2669 | floatx80 floatx80_sinh(floatx80 a, float_status *status) | |
2670 | { | |
c120391c | 2671 | bool aSign; |
eee6b892 LV |
2672 | int32_t aExp; |
2673 | uint64_t aSig; | |
2674 | ||
8da5f1db RH |
2675 | FloatRoundMode user_rnd_mode; |
2676 | FloatX80RoundPrec user_rnd_prec; | |
eee6b892 LV |
2677 | |
2678 | int32_t compact; | |
2679 | floatx80 fp0, fp1, fp2; | |
2680 | float32 fact; | |
2681 | ||
2682 | aSig = extractFloatx80Frac(a); | |
2683 | aExp = extractFloatx80Exp(a); | |
2684 | aSign = extractFloatx80Sign(a); | |
2685 | ||
2686 | if (aExp == 0x7FFF) { | |
2687 | if ((uint64_t) (aSig << 1)) { | |
2688 | return propagateFloatx80NaNOneArg(a, status); | |
2689 | } | |
2690 | return packFloatx80(aSign, floatx80_infinity.high, | |
2691 | floatx80_infinity.low); | |
2692 | } | |
2693 | ||
2694 | if (aExp == 0 && aSig == 0) { | |
2695 | return packFloatx80(aSign, 0, 0); | |
2696 | } | |
2697 | ||
2698 | user_rnd_mode = status->float_rounding_mode; | |
2699 | user_rnd_prec = status->floatx80_rounding_precision; | |
2700 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2701 | status->floatx80_rounding_precision = floatx80_precision_x; |
eee6b892 LV |
2702 | |
2703 | compact = floatx80_make_compact(aExp, aSig); | |
2704 | ||
2705 | if (compact > 0x400CB167) { | |
2706 | /* SINHBIG */ | |
2707 | if (compact > 0x400CB2B3) { | |
2708 | status->float_rounding_mode = user_rnd_mode; | |
2709 | status->floatx80_rounding_precision = user_rnd_prec; | |
2710 | ||
2711 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
2712 | aSign, 0x8000, aSig, 0, status); | |
2713 | } else { | |
2714 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2715 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2716 | make_float64(0x40C62D38D3D64634), status), | |
2717 | status); /* (|X|-16381LOG2_LEAD) */ | |
2718 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2719 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2720 | status); /* |X| - 16381 LOG2, ACCURATE */ | |
2721 | fp0 = floatx80_etox(fp0, status); | |
2722 | fp2 = packFloatx80(aSign, 0x7FFB, one_sig); | |
2723 | ||
2724 | status->float_rounding_mode = user_rnd_mode; | |
2725 | status->floatx80_rounding_precision = user_rnd_prec; | |
2726 | ||
2727 | a = floatx80_mul(fp0, fp2, status); | |
2728 | ||
2729 | float_raise(float_flag_inexact, status); | |
2730 | ||
2731 | return a; | |
2732 | } | |
2733 | } else { /* |X| < 16380 LOG2 */ | |
2734 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2735 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2736 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
2737 | status), status); /* 1+Z */ | |
2738 | fp2 = fp0; | |
2739 | fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ | |
2740 | fp0 = floatx80_add(fp0, fp2, status); | |
2741 | ||
2742 | fact = packFloat32(aSign, 0x7E, 0); | |
2743 | ||
2744 | status->float_rounding_mode = user_rnd_mode; | |
2745 | status->floatx80_rounding_precision = user_rnd_prec; | |
2746 | ||
2747 | a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); | |
2748 | ||
2749 | float_raise(float_flag_inexact, status); | |
2750 | ||
2751 | return a; | |
2752 | } | |
2753 | } | |
02f9124e | 2754 | |
808d77bc LMP |
2755 | /* |
2756 | * Hyperbolic cosine | |
2757 | */ | |
02f9124e LV |
2758 | |
2759 | floatx80 floatx80_cosh(floatx80 a, float_status *status) | |
2760 | { | |
2761 | int32_t aExp; | |
2762 | uint64_t aSig; | |
2763 | ||
8da5f1db RH |
2764 | FloatRoundMode user_rnd_mode; |
2765 | FloatX80RoundPrec user_rnd_prec; | |
02f9124e LV |
2766 | |
2767 | int32_t compact; | |
2768 | floatx80 fp0, fp1; | |
2769 | ||
2770 | aSig = extractFloatx80Frac(a); | |
2771 | aExp = extractFloatx80Exp(a); | |
2772 | ||
2773 | if (aExp == 0x7FFF) { | |
2774 | if ((uint64_t) (aSig << 1)) { | |
2775 | return propagateFloatx80NaNOneArg(a, status); | |
2776 | } | |
2777 | return packFloatx80(0, floatx80_infinity.high, | |
2778 | floatx80_infinity.low); | |
2779 | } | |
2780 | ||
2781 | if (aExp == 0 && aSig == 0) { | |
2782 | return packFloatx80(0, one_exp, one_sig); | |
2783 | } | |
2784 | ||
2785 | user_rnd_mode = status->float_rounding_mode; | |
2786 | user_rnd_prec = status->floatx80_rounding_precision; | |
2787 | status->float_rounding_mode = float_round_nearest_even; | |
8da5f1db | 2788 | status->floatx80_rounding_precision = floatx80_precision_x; |
02f9124e LV |
2789 | |
2790 | compact = floatx80_make_compact(aExp, aSig); | |
2791 | ||
2792 | if (compact > 0x400CB167) { | |
2793 | if (compact > 0x400CB2B3) { | |
2794 | status->float_rounding_mode = user_rnd_mode; | |
2795 | status->floatx80_rounding_precision = user_rnd_prec; | |
2796 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2797 | 0x8000, one_sig, 0, status); | |
2798 | } else { | |
2799 | fp0 = packFloatx80(0, aExp, aSig); | |
2800 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2801 | make_float64(0x40C62D38D3D64634), status), | |
2802 | status); | |
2803 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2804 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2805 | status); | |
2806 | fp0 = floatx80_etox(fp0, status); | |
2807 | fp1 = packFloatx80(0, 0x7FFB, one_sig); | |
2808 | ||
2809 | status->float_rounding_mode = user_rnd_mode; | |
2810 | status->floatx80_rounding_precision = user_rnd_prec; | |
2811 | ||
2812 | a = floatx80_mul(fp0, fp1, status); | |
2813 | ||
2814 | float_raise(float_flag_inexact, status); | |
2815 | ||
2816 | return a; | |
2817 | } | |
2818 | } | |
2819 | ||
2820 | fp0 = packFloatx80(0, aExp, aSig); /* |X| */ | |
2821 | fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ | |
2822 | fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), | |
2823 | status), status); /* (1/2)*EXP(|X|) */ | |
2824 | fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ | |
2825 | fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ | |
2826 | ||
2827 | status->float_rounding_mode = user_rnd_mode; | |
2828 | status->floatx80_rounding_precision = user_rnd_prec; | |
2829 | ||
2830 | a = floatx80_add(fp0, fp1, status); | |
2831 | ||
2832 | float_raise(float_flag_inexact, status); | |
2833 | ||
2834 | return a; | |
2835 | } |