]>
Commit | Line | Data |
---|---|---|
591596b7 LV |
1 | /* |
2 | * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, | |
3 | * derived from NetBSD M68040 FPSP functions, | |
4 | * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic | |
5 | * Package. Those parts of the code (and some later contributions) are | |
6 | * provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file will be taken to be licensed under | |
14 | * the Softfloat-2a license unless specifically indicated otherwise. | |
15 | */ | |
16 | ||
808d77bc LMP |
17 | /* |
18 | * Portions of this work are licensed under the terms of the GNU GPL, | |
591596b7 LV |
19 | * version 2 or later. See the COPYING file in the top-level directory. |
20 | */ | |
21 | ||
22 | #include "qemu/osdep.h" | |
23 | #include "softfloat.h" | |
24 | #include "fpu/softfloat-macros.h" | |
4b5c65b8 | 25 | #include "softfloat_fpsp_tables.h" |
591596b7 | 26 | |
c84813b8 | 27 | #define pi_exp 0x4000 |
8c992abc | 28 | #define piby2_exp 0x3FFF |
e2326300 | 29 | #define pi_sig UINT64_C(0xc90fdaa22168c235) |
8c992abc | 30 | |
0d379c17 LV |
31 | static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) |
32 | { | |
33 | if (floatx80_is_signaling_nan(a, status)) { | |
34 | float_raise(float_flag_invalid, status); | |
1c0c951f | 35 | a = floatx80_silence_nan(a, status); |
0d379c17 LV |
36 | } |
37 | ||
38 | if (status->default_nan_mode) { | |
39 | return floatx80_default_nan(status); | |
40 | } | |
41 | ||
1c0c951f | 42 | return a; |
0d379c17 LV |
43 | } |
44 | ||
808d77bc LMP |
45 | /* |
46 | * Returns the modulo remainder of the extended double-precision floating-point | |
47 | * value `a' with respect to the corresponding value `b'. | |
48 | */ | |
591596b7 LV |
49 | |
50 | floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) | |
51 | { | |
52 | flag aSign, zSign; | |
53 | int32_t aExp, bExp, expDiff; | |
54 | uint64_t aSig0, aSig1, bSig; | |
55 | uint64_t qTemp, term0, term1; | |
56 | ||
57 | aSig0 = extractFloatx80Frac(a); | |
58 | aExp = extractFloatx80Exp(a); | |
59 | aSign = extractFloatx80Sign(a); | |
60 | bSig = extractFloatx80Frac(b); | |
61 | bExp = extractFloatx80Exp(b); | |
62 | ||
63 | if (aExp == 0x7FFF) { | |
64 | if ((uint64_t) (aSig0 << 1) | |
65 | || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { | |
66 | return propagateFloatx80NaN(a, b, status); | |
67 | } | |
68 | goto invalid; | |
69 | } | |
70 | if (bExp == 0x7FFF) { | |
71 | if ((uint64_t) (bSig << 1)) { | |
72 | return propagateFloatx80NaN(a, b, status); | |
73 | } | |
74 | return a; | |
75 | } | |
76 | if (bExp == 0) { | |
77 | if (bSig == 0) { | |
78 | invalid: | |
79 | float_raise(float_flag_invalid, status); | |
80 | return floatx80_default_nan(status); | |
81 | } | |
82 | normalizeFloatx80Subnormal(bSig, &bExp, &bSig); | |
83 | } | |
84 | if (aExp == 0) { | |
85 | if ((uint64_t) (aSig0 << 1) == 0) { | |
86 | return a; | |
87 | } | |
88 | normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); | |
89 | } | |
e2326300 | 90 | bSig |= UINT64_C(0x8000000000000000); |
591596b7 LV |
91 | zSign = aSign; |
92 | expDiff = aExp - bExp; | |
93 | aSig1 = 0; | |
94 | if (expDiff < 0) { | |
95 | return a; | |
96 | } | |
97 | qTemp = (bSig <= aSig0); | |
98 | if (qTemp) { | |
99 | aSig0 -= bSig; | |
100 | } | |
101 | expDiff -= 64; | |
102 | while (0 < expDiff) { | |
103 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); | |
104 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; | |
105 | mul64To128(bSig, qTemp, &term0, &term1); | |
106 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
107 | shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); | |
5a73e7f3 | 108 | expDiff -= 62; |
591596b7 LV |
109 | } |
110 | expDiff += 64; | |
111 | if (0 < expDiff) { | |
112 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); | |
113 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; | |
114 | qTemp >>= 64 - expDiff; | |
115 | mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); | |
116 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
117 | shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); | |
118 | while (le128(term0, term1, aSig0, aSig1)) { | |
119 | ++qTemp; | |
120 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
121 | } | |
122 | } | |
123 | return | |
124 | normalizeRoundAndPackFloatx80( | |
125 | 80, zSign, bExp + expDiff, aSig0, aSig1, status); | |
126 | } | |
0d379c17 | 127 | |
808d77bc LMP |
128 | /* |
129 | * Returns the mantissa of the extended double-precision floating-point | |
130 | * value `a'. | |
131 | */ | |
0d379c17 LV |
132 | |
133 | floatx80 floatx80_getman(floatx80 a, float_status *status) | |
134 | { | |
135 | flag aSign; | |
136 | int32_t aExp; | |
137 | uint64_t aSig; | |
138 | ||
139 | aSig = extractFloatx80Frac(a); | |
140 | aExp = extractFloatx80Exp(a); | |
141 | aSign = extractFloatx80Sign(a); | |
142 | ||
143 | if (aExp == 0x7FFF) { | |
144 | if ((uint64_t) (aSig << 1)) { | |
145 | return propagateFloatx80NaNOneArg(a , status); | |
146 | } | |
147 | float_raise(float_flag_invalid , status); | |
148 | return floatx80_default_nan(status); | |
149 | } | |
150 | ||
151 | if (aExp == 0) { | |
152 | if (aSig == 0) { | |
153 | return packFloatx80(aSign, 0, 0); | |
154 | } | |
155 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
156 | } | |
157 | ||
158 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
159 | 0x3FFF, aSig, 0, status); | |
160 | } | |
161 | ||
808d77bc LMP |
162 | /* |
163 | * Returns the exponent of the extended double-precision floating-point | |
164 | * value `a' as an extended double-precision value. | |
165 | */ | |
0d379c17 LV |
166 | |
167 | floatx80 floatx80_getexp(floatx80 a, float_status *status) | |
168 | { | |
169 | flag aSign; | |
170 | int32_t aExp; | |
171 | uint64_t aSig; | |
172 | ||
173 | aSig = extractFloatx80Frac(a); | |
174 | aExp = extractFloatx80Exp(a); | |
175 | aSign = extractFloatx80Sign(a); | |
176 | ||
177 | if (aExp == 0x7FFF) { | |
178 | if ((uint64_t) (aSig << 1)) { | |
179 | return propagateFloatx80NaNOneArg(a , status); | |
180 | } | |
181 | float_raise(float_flag_invalid , status); | |
182 | return floatx80_default_nan(status); | |
183 | } | |
184 | ||
185 | if (aExp == 0) { | |
186 | if (aSig == 0) { | |
187 | return packFloatx80(aSign, 0, 0); | |
188 | } | |
189 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
190 | } | |
191 | ||
192 | return int32_to_floatx80(aExp - 0x3FFF, status); | |
193 | } | |
194 | ||
808d77bc LMP |
195 | /* |
196 | * Scales extended double-precision floating-point value in operand `a' by | |
197 | * value `b'. The function truncates the value in the second operand 'b' to | |
198 | * an integral value and adds that value to the exponent of the operand 'a'. | |
199 | * The operation performed according to the IEC/IEEE Standard for Binary | |
200 | * Floating-Point Arithmetic. | |
201 | */ | |
0d379c17 LV |
202 | |
203 | floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) | |
204 | { | |
205 | flag aSign, bSign; | |
206 | int32_t aExp, bExp, shiftCount; | |
207 | uint64_t aSig, bSig; | |
208 | ||
209 | aSig = extractFloatx80Frac(a); | |
210 | aExp = extractFloatx80Exp(a); | |
211 | aSign = extractFloatx80Sign(a); | |
212 | bSig = extractFloatx80Frac(b); | |
213 | bExp = extractFloatx80Exp(b); | |
214 | bSign = extractFloatx80Sign(b); | |
215 | ||
216 | if (bExp == 0x7FFF) { | |
217 | if ((uint64_t) (bSig << 1) || | |
218 | ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { | |
219 | return propagateFloatx80NaN(a, b, status); | |
220 | } | |
221 | float_raise(float_flag_invalid , status); | |
222 | return floatx80_default_nan(status); | |
223 | } | |
224 | if (aExp == 0x7FFF) { | |
225 | if ((uint64_t) (aSig << 1)) { | |
226 | return propagateFloatx80NaN(a, b, status); | |
227 | } | |
228 | return packFloatx80(aSign, floatx80_infinity.high, | |
229 | floatx80_infinity.low); | |
230 | } | |
231 | if (aExp == 0) { | |
232 | if (aSig == 0) { | |
233 | return packFloatx80(aSign, 0, 0); | |
234 | } | |
235 | if (bExp < 0x3FFF) { | |
236 | return a; | |
237 | } | |
238 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
239 | } | |
240 | ||
241 | if (bExp < 0x3FFF) { | |
242 | return a; | |
243 | } | |
244 | ||
245 | if (0x400F < bExp) { | |
246 | aExp = bSign ? -0x6001 : 0xE000; | |
247 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
248 | aSign, aExp, aSig, 0, status); | |
249 | } | |
250 | ||
251 | shiftCount = 0x403E - bExp; | |
252 | bSig >>= shiftCount; | |
253 | aExp = bSign ? (aExp - bSig) : (aExp + bSig); | |
254 | ||
255 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
256 | aSign, aExp, aSig, 0, status); | |
257 | } | |
9a069775 LV |
258 | |
259 | floatx80 floatx80_move(floatx80 a, float_status *status) | |
260 | { | |
261 | flag aSign; | |
262 | int32_t aExp; | |
263 | uint64_t aSig; | |
264 | ||
265 | aSig = extractFloatx80Frac(a); | |
266 | aExp = extractFloatx80Exp(a); | |
267 | aSign = extractFloatx80Sign(a); | |
268 | ||
269 | if (aExp == 0x7FFF) { | |
270 | if ((uint64_t)(aSig << 1)) { | |
271 | return propagateFloatx80NaNOneArg(a, status); | |
272 | } | |
273 | return a; | |
274 | } | |
275 | if (aExp == 0) { | |
276 | if (aSig == 0) { | |
277 | return a; | |
278 | } | |
279 | normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, | |
280 | aSign, aExp, aSig, 0, status); | |
281 | } | |
282 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
283 | aExp, aSig, 0, status); | |
284 | } | |
4b5c65b8 | 285 | |
808d77bc LMP |
286 | /* |
287 | * Algorithms for transcendental functions supported by MC68881 and MC68882 | |
288 | * mathematical coprocessors. The functions are derived from FPSP library. | |
289 | */ | |
4b5c65b8 LV |
290 | |
291 | #define one_exp 0x3FFF | |
e2326300 | 292 | #define one_sig UINT64_C(0x8000000000000000) |
4b5c65b8 | 293 | |
808d77bc LMP |
294 | /* |
295 | * Function for compactifying extended double-precision floating point values. | |
296 | */ | |
4b5c65b8 LV |
297 | |
298 | static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) | |
299 | { | |
300 | return (aExp << 16) | (aSig >> 48); | |
301 | } | |
302 | ||
808d77bc LMP |
303 | /* |
304 | * Log base e of x plus 1 | |
305 | */ | |
4b5c65b8 LV |
306 | |
307 | floatx80 floatx80_lognp1(floatx80 a, float_status *status) | |
308 | { | |
309 | flag aSign; | |
310 | int32_t aExp; | |
311 | uint64_t aSig, fSig; | |
312 | ||
313 | int8_t user_rnd_mode, user_rnd_prec; | |
314 | ||
315 | int32_t compact, j, k; | |
316 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
317 | ||
318 | aSig = extractFloatx80Frac(a); | |
319 | aExp = extractFloatx80Exp(a); | |
320 | aSign = extractFloatx80Sign(a); | |
321 | ||
322 | if (aExp == 0x7FFF) { | |
323 | if ((uint64_t) (aSig << 1)) { | |
324 | propagateFloatx80NaNOneArg(a, status); | |
325 | } | |
326 | if (aSign) { | |
327 | float_raise(float_flag_invalid, status); | |
328 | return floatx80_default_nan(status); | |
329 | } | |
330 | return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); | |
331 | } | |
332 | ||
333 | if (aExp == 0 && aSig == 0) { | |
334 | return packFloatx80(aSign, 0, 0); | |
335 | } | |
336 | ||
337 | if (aSign && aExp >= one_exp) { | |
338 | if (aExp == one_exp && aSig == one_sig) { | |
339 | float_raise(float_flag_divbyzero, status); | |
981348af LV |
340 | return packFloatx80(aSign, floatx80_infinity.high, |
341 | floatx80_infinity.low); | |
4b5c65b8 LV |
342 | } |
343 | float_raise(float_flag_invalid, status); | |
344 | return floatx80_default_nan(status); | |
345 | } | |
346 | ||
347 | if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { | |
348 | /* <= min threshold */ | |
349 | float_raise(float_flag_inexact, status); | |
350 | return floatx80_move(a, status); | |
351 | } | |
352 | ||
353 | user_rnd_mode = status->float_rounding_mode; | |
354 | user_rnd_prec = status->floatx80_rounding_precision; | |
355 | status->float_rounding_mode = float_round_nearest_even; | |
356 | status->floatx80_rounding_precision = 80; | |
357 | ||
358 | compact = floatx80_make_compact(aExp, aSig); | |
359 | ||
360 | fp0 = a; /* Z */ | |
361 | fp1 = a; | |
362 | ||
363 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
364 | status), status); /* X = (1+Z) */ | |
365 | ||
366 | aExp = extractFloatx80Exp(fp0); | |
367 | aSig = extractFloatx80Frac(fp0); | |
368 | ||
369 | compact = floatx80_make_compact(aExp, aSig); | |
370 | ||
371 | if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { | |
372 | /* |X| < 1/2 or |X| > 3/2 */ | |
373 | k = aExp - 0x3FFF; | |
374 | fp1 = int32_to_floatx80(k, status); | |
375 | ||
e2326300 | 376 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
4b5c65b8 LV |
377 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
378 | ||
379 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
380 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
381 | ||
382 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
383 | ||
384 | lp1cont1: | |
385 | /* LP1CONT1 */ | |
386 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
e2326300 | 387 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
4b5c65b8 LV |
388 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
389 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
390 | ||
391 | fp3 = fp2; | |
392 | fp1 = fp2; | |
393 | ||
394 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
395 | make_float64(0x3FC2499AB5E4040B), status), | |
396 | status); /* V*A6 */ | |
397 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
398 | make_float64(0xBFC555B5848CB7DB), status), | |
399 | status); /* V*A5 */ | |
400 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
401 | make_float64(0x3FC99999987D8730), status), | |
402 | status); /* A4+V*A6 */ | |
403 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
404 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
405 | status); /* A3+V*A5 */ | |
406 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
407 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
408 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
409 | make_float64(0x3FD55555555555A4), status), | |
410 | status); /* A2+V*(A4+V*A6) */ | |
411 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
412 | make_float64(0xBFE0000000000008), status), | |
413 | status); /* A1+V*(A3+V*A5) */ | |
414 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
415 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
416 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
417 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
418 | ||
419 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
420 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
421 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
422 | ||
423 | status->float_rounding_mode = user_rnd_mode; | |
424 | status->floatx80_rounding_precision = user_rnd_prec; | |
425 | ||
426 | a = floatx80_add(fp0, klog2, status); | |
427 | ||
428 | float_raise(float_flag_inexact, status); | |
429 | ||
430 | return a; | |
431 | } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
432 | /* |X| < 1/16 or |X| > -1/16 */ | |
433 | /* LP1CARE */ | |
e2326300 | 434 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
4b5c65b8 LV |
435 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ |
436 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ | |
437 | ||
438 | if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ | |
439 | /* KISZERO */ | |
440 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), | |
441 | status), f, status); /* 1-F */ | |
442 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ | |
443 | fp1 = packFloatx80(0, 0, 0); /* K = 0 */ | |
444 | } else { | |
445 | /* KISNEG */ | |
446 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), | |
447 | status), f, status); /* 2-F */ | |
448 | fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ | |
449 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ | |
450 | fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ | |
451 | } | |
452 | goto lp1cont1; | |
453 | } else { | |
454 | /* LP1ONE16 */ | |
455 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ | |
456 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
457 | status), status); /* FP0 IS 1+X */ | |
458 | ||
459 | /* LP1CONT2 */ | |
460 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
461 | saveu = fp1; | |
462 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
463 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
464 | ||
465 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
466 | status); /* B5 */ | |
467 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
468 | status); /* B4 */ | |
469 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
470 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
471 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
472 | make_float64(0x3F624924928BCCFF), status), | |
473 | status); /* B3+W*B5 */ | |
474 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
475 | make_float64(0x3F899999999995EC), status), | |
476 | status); /* B2+W*B4 */ | |
477 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
478 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
479 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
480 | make_float64(0x3FB5555555555555), status), | |
481 | status); /* B1+W*(B3+W*B5) */ | |
482 | ||
483 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
484 | fp1 = floatx80_add(fp1, fp2, | |
485 | status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
486 | fp0 = floatx80_mul(fp0, fp1, | |
487 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
488 | ||
489 | status->float_rounding_mode = user_rnd_mode; | |
490 | status->floatx80_rounding_precision = user_rnd_prec; | |
491 | ||
492 | a = floatx80_add(fp0, saveu, status); | |
493 | ||
494 | /*if (!floatx80_is_zero(a)) { */ | |
495 | float_raise(float_flag_inexact, status); | |
496 | /*} */ | |
497 | ||
498 | return a; | |
499 | } | |
500 | } | |
50067bd1 | 501 | |
808d77bc LMP |
502 | /* |
503 | * Log base e | |
504 | */ | |
50067bd1 LV |
505 | |
506 | floatx80 floatx80_logn(floatx80 a, float_status *status) | |
507 | { | |
508 | flag aSign; | |
509 | int32_t aExp; | |
510 | uint64_t aSig, fSig; | |
511 | ||
512 | int8_t user_rnd_mode, user_rnd_prec; | |
513 | ||
514 | int32_t compact, j, k, adjk; | |
515 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
516 | ||
517 | aSig = extractFloatx80Frac(a); | |
518 | aExp = extractFloatx80Exp(a); | |
519 | aSign = extractFloatx80Sign(a); | |
520 | ||
521 | if (aExp == 0x7FFF) { | |
522 | if ((uint64_t) (aSig << 1)) { | |
523 | propagateFloatx80NaNOneArg(a, status); | |
524 | } | |
525 | if (aSign == 0) { | |
526 | return packFloatx80(0, floatx80_infinity.high, | |
527 | floatx80_infinity.low); | |
528 | } | |
529 | } | |
530 | ||
531 | adjk = 0; | |
532 | ||
533 | if (aExp == 0) { | |
534 | if (aSig == 0) { /* zero */ | |
535 | float_raise(float_flag_divbyzero, status); | |
536 | return packFloatx80(1, floatx80_infinity.high, | |
537 | floatx80_infinity.low); | |
538 | } | |
539 | if ((aSig & one_sig) == 0) { /* denormal */ | |
540 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
541 | adjk = -100; | |
542 | aExp += 100; | |
543 | a = packFloatx80(aSign, aExp, aSig); | |
544 | } | |
545 | } | |
546 | ||
547 | if (aSign) { | |
548 | float_raise(float_flag_invalid, status); | |
549 | return floatx80_default_nan(status); | |
550 | } | |
551 | ||
552 | user_rnd_mode = status->float_rounding_mode; | |
553 | user_rnd_prec = status->floatx80_rounding_precision; | |
554 | status->float_rounding_mode = float_round_nearest_even; | |
555 | status->floatx80_rounding_precision = 80; | |
556 | ||
557 | compact = floatx80_make_compact(aExp, aSig); | |
558 | ||
559 | if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
560 | /* |X| < 15/16 or |X| > 17/16 */ | |
561 | k = aExp - 0x3FFF; | |
562 | k += adjk; | |
563 | fp1 = int32_to_floatx80(k, status); | |
564 | ||
e2326300 | 565 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
50067bd1 LV |
566 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
567 | ||
568 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
569 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
570 | ||
571 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
572 | ||
573 | /* LP1CONT1 */ | |
574 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
e2326300 | 575 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
50067bd1 LV |
576 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
577 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
578 | ||
579 | fp3 = fp2; | |
580 | fp1 = fp2; | |
581 | ||
582 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
583 | make_float64(0x3FC2499AB5E4040B), status), | |
584 | status); /* V*A6 */ | |
585 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
586 | make_float64(0xBFC555B5848CB7DB), status), | |
587 | status); /* V*A5 */ | |
588 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
589 | make_float64(0x3FC99999987D8730), status), | |
590 | status); /* A4+V*A6 */ | |
591 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
592 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
593 | status); /* A3+V*A5 */ | |
594 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
595 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
596 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
597 | make_float64(0x3FD55555555555A4), status), | |
598 | status); /* A2+V*(A4+V*A6) */ | |
599 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
600 | make_float64(0xBFE0000000000008), status), | |
601 | status); /* A1+V*(A3+V*A5) */ | |
602 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
603 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
604 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
605 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
606 | ||
607 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
608 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
609 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
610 | ||
611 | status->float_rounding_mode = user_rnd_mode; | |
612 | status->floatx80_rounding_precision = user_rnd_prec; | |
613 | ||
614 | a = floatx80_add(fp0, klog2, status); | |
615 | ||
616 | float_raise(float_flag_inexact, status); | |
617 | ||
618 | return a; | |
619 | } else { /* |X-1| >= 1/16 */ | |
620 | fp0 = a; | |
621 | fp1 = a; | |
622 | fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), | |
623 | status), status); /* FP1 IS X-1 */ | |
624 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
625 | status), status); /* FP0 IS X+1 */ | |
626 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ | |
627 | ||
628 | /* LP1CONT2 */ | |
629 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
630 | saveu = fp1; | |
631 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
632 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
633 | ||
634 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
635 | status); /* B5 */ | |
636 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
637 | status); /* B4 */ | |
638 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
639 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
640 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
641 | make_float64(0x3F624924928BCCFF), status), | |
642 | status); /* B3+W*B5 */ | |
643 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
644 | make_float64(0x3F899999999995EC), status), | |
645 | status); /* B2+W*B4 */ | |
646 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
647 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
648 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
649 | make_float64(0x3FB5555555555555), status), | |
650 | status); /* B1+W*(B3+W*B5) */ | |
651 | ||
652 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
653 | fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
654 | fp0 = floatx80_mul(fp0, fp1, | |
655 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
656 | ||
657 | status->float_rounding_mode = user_rnd_mode; | |
658 | status->floatx80_rounding_precision = user_rnd_prec; | |
659 | ||
660 | a = floatx80_add(fp0, saveu, status); | |
661 | ||
662 | /*if (!floatx80_is_zero(a)) { */ | |
663 | float_raise(float_flag_inexact, status); | |
664 | /*} */ | |
665 | ||
666 | return a; | |
667 | } | |
668 | } | |
248efb66 | 669 | |
808d77bc LMP |
670 | /* |
671 | * Log base 10 | |
672 | */ | |
248efb66 LV |
673 | |
674 | floatx80 floatx80_log10(floatx80 a, float_status *status) | |
675 | { | |
676 | flag aSign; | |
677 | int32_t aExp; | |
678 | uint64_t aSig; | |
679 | ||
680 | int8_t user_rnd_mode, user_rnd_prec; | |
681 | ||
682 | floatx80 fp0, fp1; | |
683 | ||
684 | aSig = extractFloatx80Frac(a); | |
685 | aExp = extractFloatx80Exp(a); | |
686 | aSign = extractFloatx80Sign(a); | |
687 | ||
688 | if (aExp == 0x7FFF) { | |
689 | if ((uint64_t) (aSig << 1)) { | |
690 | propagateFloatx80NaNOneArg(a, status); | |
691 | } | |
692 | if (aSign == 0) { | |
693 | return packFloatx80(0, floatx80_infinity.high, | |
694 | floatx80_infinity.low); | |
695 | } | |
696 | } | |
697 | ||
698 | if (aExp == 0 && aSig == 0) { | |
699 | float_raise(float_flag_divbyzero, status); | |
700 | return packFloatx80(1, floatx80_infinity.high, | |
701 | floatx80_infinity.low); | |
702 | } | |
703 | ||
704 | if (aSign) { | |
705 | float_raise(float_flag_invalid, status); | |
706 | return floatx80_default_nan(status); | |
707 | } | |
708 | ||
709 | user_rnd_mode = status->float_rounding_mode; | |
710 | user_rnd_prec = status->floatx80_rounding_precision; | |
711 | status->float_rounding_mode = float_round_nearest_even; | |
712 | status->floatx80_rounding_precision = 80; | |
713 | ||
714 | fp0 = floatx80_logn(a, status); | |
e2326300 | 715 | fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */ |
248efb66 LV |
716 | |
717 | status->float_rounding_mode = user_rnd_mode; | |
718 | status->floatx80_rounding_precision = user_rnd_prec; | |
719 | ||
720 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ | |
721 | ||
722 | float_raise(float_flag_inexact, status); | |
723 | ||
724 | return a; | |
725 | } | |
67b453ed | 726 | |
808d77bc LMP |
727 | /* |
728 | * Log base 2 | |
729 | */ | |
67b453ed LV |
730 | |
731 | floatx80 floatx80_log2(floatx80 a, float_status *status) | |
732 | { | |
733 | flag aSign; | |
734 | int32_t aExp; | |
735 | uint64_t aSig; | |
736 | ||
737 | int8_t user_rnd_mode, user_rnd_prec; | |
738 | ||
739 | floatx80 fp0, fp1; | |
740 | ||
741 | aSig = extractFloatx80Frac(a); | |
742 | aExp = extractFloatx80Exp(a); | |
743 | aSign = extractFloatx80Sign(a); | |
744 | ||
745 | if (aExp == 0x7FFF) { | |
746 | if ((uint64_t) (aSig << 1)) { | |
747 | propagateFloatx80NaNOneArg(a, status); | |
748 | } | |
749 | if (aSign == 0) { | |
750 | return packFloatx80(0, floatx80_infinity.high, | |
751 | floatx80_infinity.low); | |
752 | } | |
753 | } | |
754 | ||
755 | if (aExp == 0) { | |
756 | if (aSig == 0) { | |
757 | float_raise(float_flag_divbyzero, status); | |
758 | return packFloatx80(1, floatx80_infinity.high, | |
759 | floatx80_infinity.low); | |
760 | } | |
761 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
762 | } | |
763 | ||
764 | if (aSign) { | |
765 | float_raise(float_flag_invalid, status); | |
766 | return floatx80_default_nan(status); | |
767 | } | |
768 | ||
769 | user_rnd_mode = status->float_rounding_mode; | |
770 | user_rnd_prec = status->floatx80_rounding_precision; | |
771 | status->float_rounding_mode = float_round_nearest_even; | |
772 | status->floatx80_rounding_precision = 80; | |
773 | ||
774 | if (aSig == one_sig) { /* X is 2^k */ | |
775 | status->float_rounding_mode = user_rnd_mode; | |
776 | status->floatx80_rounding_precision = user_rnd_prec; | |
777 | ||
778 | a = int32_to_floatx80(aExp - 0x3FFF, status); | |
779 | } else { | |
780 | fp0 = floatx80_logn(a, status); | |
e2326300 | 781 | fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */ |
67b453ed LV |
782 | |
783 | status->float_rounding_mode = user_rnd_mode; | |
784 | status->floatx80_rounding_precision = user_rnd_prec; | |
785 | ||
786 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ | |
787 | } | |
788 | ||
789 | float_raise(float_flag_inexact, status); | |
790 | ||
791 | return a; | |
792 | } | |
40ad0873 | 793 | |
808d77bc LMP |
794 | /* |
795 | * e to x | |
796 | */ | |
40ad0873 LV |
797 | |
798 | floatx80 floatx80_etox(floatx80 a, float_status *status) | |
799 | { | |
800 | flag aSign; | |
801 | int32_t aExp; | |
802 | uint64_t aSig; | |
803 | ||
804 | int8_t user_rnd_mode, user_rnd_prec; | |
805 | ||
806 | int32_t compact, n, j, k, m, m1; | |
807 | floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; | |
808 | flag adjflag; | |
809 | ||
810 | aSig = extractFloatx80Frac(a); | |
811 | aExp = extractFloatx80Exp(a); | |
812 | aSign = extractFloatx80Sign(a); | |
813 | ||
814 | if (aExp == 0x7FFF) { | |
815 | if ((uint64_t) (aSig << 1)) { | |
816 | return propagateFloatx80NaNOneArg(a, status); | |
817 | } | |
818 | if (aSign) { | |
819 | return packFloatx80(0, 0, 0); | |
820 | } | |
821 | return packFloatx80(0, floatx80_infinity.high, | |
822 | floatx80_infinity.low); | |
823 | } | |
824 | ||
825 | if (aExp == 0 && aSig == 0) { | |
826 | return packFloatx80(0, one_exp, one_sig); | |
827 | } | |
828 | ||
829 | user_rnd_mode = status->float_rounding_mode; | |
830 | user_rnd_prec = status->floatx80_rounding_precision; | |
831 | status->float_rounding_mode = float_round_nearest_even; | |
832 | status->floatx80_rounding_precision = 80; | |
833 | ||
834 | adjflag = 0; | |
835 | ||
836 | if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ | |
837 | compact = floatx80_make_compact(aExp, aSig); | |
838 | ||
839 | if (compact < 0x400CB167) { /* |X| < 16380 log2 */ | |
840 | fp0 = a; | |
841 | fp1 = a; | |
842 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
843 | make_float32(0x42B8AA3B), status), | |
844 | status); /* 64/log2 * X */ | |
845 | adjflag = 0; | |
846 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
847 | fp0 = int32_to_floatx80(n, status); | |
848 | ||
849 | j = n & 0x3F; /* J = N mod 64 */ | |
850 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
851 | if (n < 0 && j) { | |
808d77bc LMP |
852 | /* |
853 | * arithmetic right shift is division and | |
40ad0873 LV |
854 | * round towards minus infinity |
855 | */ | |
856 | m--; | |
857 | } | |
858 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
859 | ||
860 | expcont1: | |
861 | fp2 = fp0; /* N */ | |
862 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
863 | make_float32(0xBC317218), status), | |
864 | status); /* N * L1, L1 = lead(-log2/64) */ | |
e2326300 | 865 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
40ad0873 LV |
866 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
867 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
868 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
869 | ||
870 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
871 | fp2 = float32_to_floatx80(make_float32(0x3AB60B70), | |
872 | status); /* A5 */ | |
873 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ | |
874 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), | |
875 | status), fp1, | |
876 | status); /* fp3 is S*A4 */ | |
877 | fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( | |
878 | 0x3FA5555555554431), status), | |
879 | status); /* fp2 is A3+S*A5 */ | |
880 | fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( | |
881 | 0x3FC5555555554018), status), | |
882 | status); /* fp3 is A2+S*A4 */ | |
883 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ | |
884 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ | |
885 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
886 | make_float32(0x3F000000), status), | |
887 | status); /* fp2 is A1+S*(A3+S*A5) */ | |
888 | fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ | |
889 | fp2 = floatx80_mul(fp2, fp1, | |
890 | status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ | |
891 | fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ | |
892 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
893 | ||
894 | fp1 = exp_tbl[j]; | |
895 | fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ | |
896 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), | |
897 | status); /* accurate 2^(J/64) */ | |
898 | fp0 = floatx80_add(fp0, fp1, | |
899 | status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ | |
900 | ||
901 | scale = packFloatx80(0, m, one_sig); | |
902 | if (adjflag) { | |
903 | adjscale = packFloatx80(0, m1, one_sig); | |
904 | fp0 = floatx80_mul(fp0, adjscale, status); | |
905 | } | |
906 | ||
907 | status->float_rounding_mode = user_rnd_mode; | |
908 | status->floatx80_rounding_precision = user_rnd_prec; | |
909 | ||
910 | a = floatx80_mul(fp0, scale, status); | |
911 | ||
912 | float_raise(float_flag_inexact, status); | |
913 | ||
914 | return a; | |
915 | } else { /* |X| >= 16380 log2 */ | |
916 | if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ | |
917 | status->float_rounding_mode = user_rnd_mode; | |
918 | status->floatx80_rounding_precision = user_rnd_prec; | |
919 | if (aSign) { | |
920 | a = roundAndPackFloatx80( | |
921 | status->floatx80_rounding_precision, | |
922 | 0, -0x1000, aSig, 0, status); | |
923 | } else { | |
924 | a = roundAndPackFloatx80( | |
925 | status->floatx80_rounding_precision, | |
926 | 0, 0x8000, aSig, 0, status); | |
927 | } | |
928 | float_raise(float_flag_inexact, status); | |
929 | ||
930 | return a; | |
931 | } else { | |
932 | fp0 = a; | |
933 | fp1 = a; | |
934 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
935 | make_float32(0x42B8AA3B), status), | |
936 | status); /* 64/log2 * X */ | |
937 | adjflag = 1; | |
938 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
939 | fp0 = int32_to_floatx80(n, status); | |
940 | ||
941 | j = n & 0x3F; /* J = N mod 64 */ | |
942 | /* NOTE: this is really arithmetic right shift by 6 */ | |
943 | k = n / 64; | |
944 | if (n < 0 && j) { | |
945 | /* arithmetic right shift is division and | |
946 | * round towards minus infinity | |
947 | */ | |
948 | k--; | |
949 | } | |
950 | /* NOTE: this is really arithmetic right shift by 1 */ | |
951 | m1 = k / 2; | |
952 | if (k < 0 && (k & 1)) { | |
953 | /* arithmetic right shift is division and | |
954 | * round towards minus infinity | |
955 | */ | |
956 | m1--; | |
957 | } | |
958 | m = k - m1; | |
959 | m1 += 0x3FFF; /* biased exponent of 2^(M1) */ | |
960 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
961 | ||
962 | goto expcont1; | |
963 | } | |
964 | } | |
965 | } else { /* |X| < 2^(-65) */ | |
966 | status->float_rounding_mode = user_rnd_mode; | |
967 | status->floatx80_rounding_precision = user_rnd_prec; | |
968 | ||
969 | a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), | |
970 | status), status); /* 1 + X */ | |
971 | ||
972 | float_raise(float_flag_inexact, status); | |
973 | ||
974 | return a; | |
975 | } | |
976 | } | |
068f1615 | 977 | |
808d77bc LMP |
978 | /* |
979 | * 2 to x | |
980 | */ | |
068f1615 LV |
981 | |
982 | floatx80 floatx80_twotox(floatx80 a, float_status *status) | |
983 | { | |
984 | flag aSign; | |
985 | int32_t aExp; | |
986 | uint64_t aSig; | |
987 | ||
988 | int8_t user_rnd_mode, user_rnd_prec; | |
989 | ||
990 | int32_t compact, n, j, l, m, m1; | |
991 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
992 | ||
993 | aSig = extractFloatx80Frac(a); | |
994 | aExp = extractFloatx80Exp(a); | |
995 | aSign = extractFloatx80Sign(a); | |
996 | ||
997 | if (aExp == 0x7FFF) { | |
998 | if ((uint64_t) (aSig << 1)) { | |
999 | return propagateFloatx80NaNOneArg(a, status); | |
1000 | } | |
1001 | if (aSign) { | |
1002 | return packFloatx80(0, 0, 0); | |
1003 | } | |
1004 | return packFloatx80(0, floatx80_infinity.high, | |
1005 | floatx80_infinity.low); | |
1006 | } | |
1007 | ||
1008 | if (aExp == 0 && aSig == 0) { | |
1009 | return packFloatx80(0, one_exp, one_sig); | |
1010 | } | |
1011 | ||
1012 | user_rnd_mode = status->float_rounding_mode; | |
1013 | user_rnd_prec = status->floatx80_rounding_precision; | |
1014 | status->float_rounding_mode = float_round_nearest_even; | |
1015 | status->floatx80_rounding_precision = 80; | |
1016 | ||
1017 | fp0 = a; | |
1018 | ||
1019 | compact = floatx80_make_compact(aExp, aSig); | |
1020 | ||
1021 | if (compact < 0x3FB98000 || compact > 0x400D80C0) { | |
1022 | /* |X| > 16480 or |X| < 2^(-70) */ | |
1023 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
1024 | status->float_rounding_mode = user_rnd_mode; | |
1025 | status->floatx80_rounding_precision = user_rnd_prec; | |
1026 | ||
1027 | if (aSign) { | |
1028 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1029 | 0, -0x1000, aSig, 0, status); | |
1030 | } else { | |
1031 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1032 | 0, 0x8000, aSig, 0, status); | |
1033 | } | |
1034 | } else { /* |X| < 2^(-70) */ | |
1035 | status->float_rounding_mode = user_rnd_mode; | |
1036 | status->floatx80_rounding_precision = user_rnd_prec; | |
1037 | ||
1038 | a = floatx80_add(fp0, float32_to_floatx80( | |
1039 | make_float32(0x3F800000), status), | |
1040 | status); /* 1 + X */ | |
1041 | ||
1042 | float_raise(float_flag_inexact, status); | |
1043 | ||
1044 | return a; | |
1045 | } | |
1046 | } else { /* 2^(-70) <= |X| <= 16480 */ | |
1047 | fp1 = fp0; /* X */ | |
1048 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
1049 | make_float32(0x42800000), status), | |
1050 | status); /* X * 64 */ | |
1051 | n = floatx80_to_int32(fp1, status); | |
1052 | fp1 = int32_to_floatx80(n, status); | |
1053 | j = n & 0x3F; | |
1054 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
1055 | if (n < 0 && j) { | |
808d77bc LMP |
1056 | /* |
1057 | * arithmetic right shift is division and | |
068f1615 LV |
1058 | * round towards minus infinity |
1059 | */ | |
1060 | l--; | |
1061 | } | |
1062 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
1063 | if (l < 0 && (l & 1)) { | |
808d77bc LMP |
1064 | /* |
1065 | * arithmetic right shift is division and | |
068f1615 LV |
1066 | * round towards minus infinity |
1067 | */ | |
1068 | m--; | |
1069 | } | |
1070 | m1 = l - m; | |
1071 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
1072 | ||
1073 | adjfact = packFloatx80(0, m1, one_sig); | |
1074 | fact1 = exp2_tbl[j]; | |
1075 | fact1.high += m; | |
1076 | fact2.high = exp2_tbl2[j] >> 16; | |
1077 | fact2.high += m; | |
1078 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1079 | fact2.low <<= 48; | |
1080 | ||
1081 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
1082 | make_float32(0x3C800000), status), | |
1083 | status); /* (1/64)*N */ | |
1084 | fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ | |
e2326300 | 1085 | fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */ |
068f1615 LV |
1086 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1087 | ||
6c25be6e LV |
1088 | /* EXPR */ |
1089 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1090 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1091 | status); /* A5 */ | |
1092 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1093 | status); /* A4 */ | |
1094 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1095 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1096 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1097 | make_float64(0x3FA5555555554CC1), status), | |
1098 | status); /* A3+S*A5 */ | |
1099 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1100 | make_float64(0x3FC5555555554A54), status), | |
1101 | status); /* A2+S*A4 */ | |
1102 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1103 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1104 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1105 | make_float64(0x3FE0000000000000), status), | |
1106 | status); /* A1+S*(A3+S*A5) */ | |
1107 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1108 | ||
1109 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1110 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1111 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1112 | ||
1113 | fp0 = floatx80_mul(fp0, fact1, status); | |
1114 | fp0 = floatx80_add(fp0, fact2, status); | |
1115 | fp0 = floatx80_add(fp0, fact1, status); | |
1116 | ||
1117 | status->float_rounding_mode = user_rnd_mode; | |
1118 | status->floatx80_rounding_precision = user_rnd_prec; | |
1119 | ||
1120 | a = floatx80_mul(fp0, adjfact, status); | |
1121 | ||
1122 | float_raise(float_flag_inexact, status); | |
1123 | ||
1124 | return a; | |
1125 | } | |
1126 | } | |
1127 | ||
808d77bc LMP |
1128 | /* |
1129 | * 10 to x | |
1130 | */ | |
6c25be6e LV |
1131 | |
1132 | floatx80 floatx80_tentox(floatx80 a, float_status *status) | |
1133 | { | |
1134 | flag aSign; | |
1135 | int32_t aExp; | |
1136 | uint64_t aSig; | |
1137 | ||
1138 | int8_t user_rnd_mode, user_rnd_prec; | |
1139 | ||
1140 | int32_t compact, n, j, l, m, m1; | |
1141 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
1142 | ||
1143 | aSig = extractFloatx80Frac(a); | |
1144 | aExp = extractFloatx80Exp(a); | |
1145 | aSign = extractFloatx80Sign(a); | |
1146 | ||
1147 | if (aExp == 0x7FFF) { | |
1148 | if ((uint64_t) (aSig << 1)) { | |
1149 | return propagateFloatx80NaNOneArg(a, status); | |
1150 | } | |
1151 | if (aSign) { | |
1152 | return packFloatx80(0, 0, 0); | |
1153 | } | |
1154 | return packFloatx80(0, floatx80_infinity.high, | |
1155 | floatx80_infinity.low); | |
1156 | } | |
1157 | ||
1158 | if (aExp == 0 && aSig == 0) { | |
1159 | return packFloatx80(0, one_exp, one_sig); | |
1160 | } | |
1161 | ||
1162 | user_rnd_mode = status->float_rounding_mode; | |
1163 | user_rnd_prec = status->floatx80_rounding_precision; | |
1164 | status->float_rounding_mode = float_round_nearest_even; | |
1165 | status->floatx80_rounding_precision = 80; | |
1166 | ||
1167 | fp0 = a; | |
1168 | ||
1169 | compact = floatx80_make_compact(aExp, aSig); | |
1170 | ||
1171 | if (compact < 0x3FB98000 || compact > 0x400B9B07) { | |
1172 | /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ | |
1173 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
1174 | status->float_rounding_mode = user_rnd_mode; | |
1175 | status->floatx80_rounding_precision = user_rnd_prec; | |
1176 | ||
1177 | if (aSign) { | |
1178 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1179 | 0, -0x1000, aSig, 0, status); | |
1180 | } else { | |
1181 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1182 | 0, 0x8000, aSig, 0, status); | |
1183 | } | |
1184 | } else { /* |X| < 2^(-70) */ | |
1185 | status->float_rounding_mode = user_rnd_mode; | |
1186 | status->floatx80_rounding_precision = user_rnd_prec; | |
1187 | ||
1188 | a = floatx80_add(fp0, float32_to_floatx80( | |
1189 | make_float32(0x3F800000), status), | |
1190 | status); /* 1 + X */ | |
1191 | ||
1192 | float_raise(float_flag_inexact, status); | |
1193 | ||
1194 | return a; | |
1195 | } | |
1196 | } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ | |
1197 | fp1 = fp0; /* X */ | |
1198 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1199 | make_float64(0x406A934F0979A371), | |
1200 | status), status); /* X*64*LOG10/LOG2 */ | |
1201 | n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ | |
1202 | fp1 = int32_to_floatx80(n, status); | |
1203 | ||
1204 | j = n & 0x3F; | |
1205 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
1206 | if (n < 0 && j) { | |
808d77bc LMP |
1207 | /* |
1208 | * arithmetic right shift is division and | |
6c25be6e LV |
1209 | * round towards minus infinity |
1210 | */ | |
1211 | l--; | |
1212 | } | |
1213 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
1214 | if (l < 0 && (l & 1)) { | |
808d77bc LMP |
1215 | /* |
1216 | * arithmetic right shift is division and | |
6c25be6e LV |
1217 | * round towards minus infinity |
1218 | */ | |
1219 | m--; | |
1220 | } | |
1221 | m1 = l - m; | |
1222 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
1223 | ||
1224 | adjfact = packFloatx80(0, m1, one_sig); | |
1225 | fact1 = exp2_tbl[j]; | |
1226 | fact1.high += m; | |
1227 | fact2.high = exp2_tbl2[j] >> 16; | |
1228 | fact2.high += m; | |
1229 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1230 | fact2.low <<= 48; | |
1231 | ||
1232 | fp2 = fp1; /* N */ | |
1233 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1234 | make_float64(0x3F734413509F8000), status), | |
1235 | status); /* N*(LOG2/64LOG10)_LEAD */ | |
e2326300 | 1236 | fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2)); |
6c25be6e LV |
1237 | fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ |
1238 | fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ | |
1239 | fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ | |
e2326300 | 1240 | fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */ |
6c25be6e LV |
1241 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1242 | ||
068f1615 LV |
1243 | /* EXPR */ |
1244 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1245 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1246 | status); /* A5 */ | |
1247 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1248 | status); /* A4 */ | |
1249 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1250 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1251 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1252 | make_float64(0x3FA5555555554CC1), status), | |
1253 | status); /* A3+S*A5 */ | |
1254 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1255 | make_float64(0x3FC5555555554A54), status), | |
1256 | status); /* A2+S*A4 */ | |
1257 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1258 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1259 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1260 | make_float64(0x3FE0000000000000), status), | |
1261 | status); /* A1+S*(A3+S*A5) */ | |
1262 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1263 | ||
1264 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1265 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1266 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1267 | ||
1268 | fp0 = floatx80_mul(fp0, fact1, status); | |
1269 | fp0 = floatx80_add(fp0, fact2, status); | |
1270 | fp0 = floatx80_add(fp0, fact1, status); | |
1271 | ||
1272 | status->float_rounding_mode = user_rnd_mode; | |
1273 | status->floatx80_rounding_precision = user_rnd_prec; | |
1274 | ||
1275 | a = floatx80_mul(fp0, adjfact, status); | |
1276 | ||
1277 | float_raise(float_flag_inexact, status); | |
1278 | ||
1279 | return a; | |
1280 | } | |
1281 | } | |
27340180 | 1282 | |
808d77bc LMP |
1283 | /* |
1284 | * Tangent | |
1285 | */ | |
27340180 LV |
1286 | |
1287 | floatx80 floatx80_tan(floatx80 a, float_status *status) | |
1288 | { | |
1289 | flag aSign, xSign; | |
1290 | int32_t aExp, xExp; | |
1291 | uint64_t aSig, xSig; | |
1292 | ||
1293 | int8_t user_rnd_mode, user_rnd_prec; | |
1294 | ||
1295 | int32_t compact, l, n, j; | |
1296 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; | |
1297 | float32 twoto63; | |
1298 | flag endflag; | |
1299 | ||
1300 | aSig = extractFloatx80Frac(a); | |
1301 | aExp = extractFloatx80Exp(a); | |
1302 | aSign = extractFloatx80Sign(a); | |
1303 | ||
1304 | if (aExp == 0x7FFF) { | |
1305 | if ((uint64_t) (aSig << 1)) { | |
1306 | return propagateFloatx80NaNOneArg(a, status); | |
1307 | } | |
1308 | float_raise(float_flag_invalid, status); | |
1309 | return floatx80_default_nan(status); | |
1310 | } | |
1311 | ||
1312 | if (aExp == 0 && aSig == 0) { | |
1313 | return packFloatx80(aSign, 0, 0); | |
1314 | } | |
1315 | ||
1316 | user_rnd_mode = status->float_rounding_mode; | |
1317 | user_rnd_prec = status->floatx80_rounding_precision; | |
1318 | status->float_rounding_mode = float_round_nearest_even; | |
1319 | status->floatx80_rounding_precision = 80; | |
1320 | ||
1321 | compact = floatx80_make_compact(aExp, aSig); | |
1322 | ||
1323 | fp0 = a; | |
1324 | ||
1325 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1326 | /* 2^(-40) > |X| > 15 PI */ | |
1327 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1328 | /* REDUCEX */ | |
1329 | fp1 = packFloatx80(0, 0, 0); | |
1330 | if (compact == 0x7FFEFFFF) { | |
1331 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1332 | UINT64_C(0xC90FDAA200000000)); |
27340180 | 1333 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1334 | UINT64_C(0x85A308D300000000)); |
27340180 LV |
1335 | fp0 = floatx80_add(fp0, twopi1, status); |
1336 | fp1 = fp0; | |
1337 | fp0 = floatx80_add(fp0, twopi2, status); | |
1338 | fp1 = floatx80_sub(fp1, fp0, status); | |
1339 | fp1 = floatx80_add(fp1, twopi2, status); | |
1340 | } | |
1341 | loop: | |
1342 | xSign = extractFloatx80Sign(fp0); | |
1343 | xExp = extractFloatx80Exp(fp0); | |
1344 | xExp -= 0x3FFF; | |
1345 | if (xExp <= 28) { | |
1346 | l = 0; | |
1347 | endflag = 1; | |
1348 | } else { | |
1349 | l = xExp - 27; | |
1350 | endflag = 0; | |
1351 | } | |
1352 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1353 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1354 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1355 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
27340180 LV |
1356 | |
1357 | /* SIGN(INARG)*2^63 IN SGL */ | |
1358 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1359 | ||
1360 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1361 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1362 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1363 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1364 | status); /* FP2 is N */ | |
1365 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1366 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1367 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1368 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1369 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1370 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1371 | fp3 = fp0; /* FP3 is A */ | |
1372 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1373 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1374 | ||
1375 | if (endflag > 0) { | |
1376 | n = floatx80_to_int32(fp2, status); | |
1377 | goto tancont; | |
1378 | } | |
1379 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1380 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1381 | goto loop; | |
1382 | } else { | |
1383 | status->float_rounding_mode = user_rnd_mode; | |
1384 | status->floatx80_rounding_precision = user_rnd_prec; | |
1385 | ||
1386 | a = floatx80_move(a, status); | |
1387 | ||
1388 | float_raise(float_flag_inexact, status); | |
1389 | ||
1390 | return a; | |
1391 | } | |
1392 | } else { | |
1393 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1394 | make_float64(0x3FE45F306DC9C883), status), | |
1395 | status); /* X*2/PI */ | |
1396 | ||
1397 | n = floatx80_to_int32(fp1, status); | |
1398 | j = 32 + n; | |
1399 | ||
1400 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1401 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1402 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1403 | ||
1404 | tancont: | |
1405 | if (n & 1) { | |
1406 | /* NODD */ | |
1407 | fp1 = fp0; /* R */ | |
1408 | fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1409 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1410 | status); /* Q4 */ | |
1411 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1412 | status); /* P3 */ | |
1413 | fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ | |
1414 | fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ | |
1415 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1416 | make_float64(0xBF346F59B39BA65F), status), | |
1417 | status); /* Q3+SQ4 */ | |
e2326300 | 1418 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
27340180 LV |
1419 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1420 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ | |
1421 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ | |
e2326300 | 1422 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
27340180 | 1423 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
e2326300 | 1424 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
27340180 LV |
1425 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1426 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ | |
1427 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ | |
e2326300 | 1428 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
27340180 LV |
1429 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1430 | fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ | |
1431 | fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1432 | fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1433 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
1434 | make_float32(0x3F800000), status), | |
1435 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1436 | ||
1437 | xSign = extractFloatx80Sign(fp1); | |
1438 | xExp = extractFloatx80Exp(fp1); | |
1439 | xSig = extractFloatx80Frac(fp1); | |
1440 | xSign ^= 1; | |
1441 | fp1 = packFloatx80(xSign, xExp, xSig); | |
1442 | ||
1443 | status->float_rounding_mode = user_rnd_mode; | |
1444 | status->floatx80_rounding_precision = user_rnd_prec; | |
1445 | ||
1446 | a = floatx80_div(fp0, fp1, status); | |
1447 | ||
1448 | float_raise(float_flag_inexact, status); | |
1449 | ||
1450 | return a; | |
1451 | } else { | |
1452 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1453 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1454 | status); /* Q4 */ | |
1455 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1456 | status); /* P3 */ | |
1457 | fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ | |
1458 | fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ | |
1459 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1460 | make_float64(0xBF346F59B39BA65F), status), | |
1461 | status); /* Q3+SQ4 */ | |
e2326300 | 1462 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
27340180 LV |
1463 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1464 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ | |
1465 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ | |
e2326300 | 1466 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
27340180 | 1467 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
e2326300 | 1468 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
27340180 LV |
1469 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1470 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ | |
1471 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ | |
e2326300 | 1472 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
27340180 LV |
1473 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1474 | fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ | |
1475 | fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1476 | fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1477 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1478 | make_float32(0x3F800000), status), | |
1479 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1480 | ||
1481 | status->float_rounding_mode = user_rnd_mode; | |
1482 | status->floatx80_rounding_precision = user_rnd_prec; | |
1483 | ||
1484 | a = floatx80_div(fp0, fp1, status); | |
1485 | ||
1486 | float_raise(float_flag_inexact, status); | |
1487 | ||
1488 | return a; | |
1489 | } | |
1490 | } | |
1491 | } | |
5add1ac4 | 1492 | |
808d77bc LMP |
1493 | /* |
1494 | * Sine | |
1495 | */ | |
5add1ac4 LV |
1496 | |
1497 | floatx80 floatx80_sin(floatx80 a, float_status *status) | |
1498 | { | |
1499 | flag aSign, xSign; | |
1500 | int32_t aExp, xExp; | |
1501 | uint64_t aSig, xSig; | |
1502 | ||
1503 | int8_t user_rnd_mode, user_rnd_prec; | |
1504 | ||
1505 | int32_t compact, l, n, j; | |
1506 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1507 | float32 posneg1, twoto63; | |
6361d298 | 1508 | flag endflag; |
5add1ac4 LV |
1509 | |
1510 | aSig = extractFloatx80Frac(a); | |
1511 | aExp = extractFloatx80Exp(a); | |
1512 | aSign = extractFloatx80Sign(a); | |
1513 | ||
1514 | if (aExp == 0x7FFF) { | |
1515 | if ((uint64_t) (aSig << 1)) { | |
1516 | return propagateFloatx80NaNOneArg(a, status); | |
1517 | } | |
1518 | float_raise(float_flag_invalid, status); | |
1519 | return floatx80_default_nan(status); | |
1520 | } | |
1521 | ||
1522 | if (aExp == 0 && aSig == 0) { | |
1523 | return packFloatx80(aSign, 0, 0); | |
1524 | } | |
1525 | ||
5add1ac4 LV |
1526 | user_rnd_mode = status->float_rounding_mode; |
1527 | user_rnd_prec = status->floatx80_rounding_precision; | |
1528 | status->float_rounding_mode = float_round_nearest_even; | |
1529 | status->floatx80_rounding_precision = 80; | |
1530 | ||
1531 | compact = floatx80_make_compact(aExp, aSig); | |
1532 | ||
1533 | fp0 = a; | |
1534 | ||
1535 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1536 | /* 2^(-40) > |X| > 15 PI */ | |
1537 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1538 | /* REDUCEX */ | |
1539 | fp1 = packFloatx80(0, 0, 0); | |
1540 | if (compact == 0x7FFEFFFF) { | |
1541 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1542 | UINT64_C(0xC90FDAA200000000)); |
5add1ac4 | 1543 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1544 | UINT64_C(0x85A308D300000000)); |
5add1ac4 LV |
1545 | fp0 = floatx80_add(fp0, twopi1, status); |
1546 | fp1 = fp0; | |
1547 | fp0 = floatx80_add(fp0, twopi2, status); | |
1548 | fp1 = floatx80_sub(fp1, fp0, status); | |
1549 | fp1 = floatx80_add(fp1, twopi2, status); | |
1550 | } | |
1551 | loop: | |
1552 | xSign = extractFloatx80Sign(fp0); | |
1553 | xExp = extractFloatx80Exp(fp0); | |
1554 | xExp -= 0x3FFF; | |
1555 | if (xExp <= 28) { | |
1556 | l = 0; | |
1557 | endflag = 1; | |
1558 | } else { | |
1559 | l = xExp - 27; | |
1560 | endflag = 0; | |
1561 | } | |
1562 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1563 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1564 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1565 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
5add1ac4 LV |
1566 | |
1567 | /* SIGN(INARG)*2^63 IN SGL */ | |
1568 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1569 | ||
1570 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1571 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1572 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1573 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1574 | status); /* FP2 is N */ | |
1575 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1576 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1577 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1578 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1579 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1580 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1581 | fp3 = fp0; /* FP3 is A */ | |
1582 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1583 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1584 | ||
1585 | if (endflag > 0) { | |
1586 | n = floatx80_to_int32(fp2, status); | |
1587 | goto sincont; | |
1588 | } | |
1589 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1590 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1591 | goto loop; | |
1592 | } else { | |
1593 | /* SINSM */ | |
1594 | fp0 = float32_to_floatx80(make_float32(0x3F800000), | |
1595 | status); /* 1 */ | |
1596 | ||
1597 | status->float_rounding_mode = user_rnd_mode; | |
1598 | status->floatx80_rounding_precision = user_rnd_prec; | |
1599 | ||
6361d298 LV |
1600 | /* SINTINY */ |
1601 | a = floatx80_move(a, status); | |
5add1ac4 LV |
1602 | float_raise(float_flag_inexact, status); |
1603 | ||
1604 | return a; | |
1605 | } | |
1606 | } else { | |
1607 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1608 | make_float64(0x3FE45F306DC9C883), status), | |
1609 | status); /* X*2/PI */ | |
1610 | ||
1611 | n = floatx80_to_int32(fp1, status); | |
1612 | j = 32 + n; | |
1613 | ||
1614 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1615 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1616 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1617 | ||
1618 | sincont: | |
6361d298 | 1619 | if (n & 1) { |
5add1ac4 LV |
1620 | /* COSPOLY */ |
1621 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1622 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1623 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1624 | status); /* B8 */ | |
1625 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1626 | status); /* B7 */ | |
1627 | ||
1628 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1629 | xExp = extractFloatx80Exp(fp0); | |
1630 | xSig = extractFloatx80Frac(fp0); | |
1631 | ||
6361d298 | 1632 | if ((n >> 1) & 1) { |
5add1ac4 LV |
1633 | xSign ^= 1; |
1634 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1635 | } else { | |
1636 | xSign ^= 0; | |
1637 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1638 | } /* X IS NOW R'= SGN*R */ | |
1639 | ||
1640 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1641 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1642 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1643 | make_float64(0x3E21EED90612C972), status), | |
1644 | status); /* B6+TB8 */ | |
1645 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1646 | make_float64(0xBE927E4FB79D9FCF), status), | |
1647 | status); /* B5+TB7 */ | |
1648 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1649 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1650 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1651 | make_float64(0x3EFA01A01A01D423), status), | |
1652 | status); /* B4+T(B6+TB8) */ | |
e2326300 | 1653 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
5add1ac4 LV |
1654 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1655 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1656 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
e2326300 | 1657 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
5add1ac4 LV |
1658 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1659 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1660 | make_float32(0xBF000000), status), | |
1661 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1662 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1663 | fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ | |
1664 | * [S(B2+T(B4+T(B6+TB8)))] | |
1665 | */ | |
1666 | ||
1667 | x = packFloatx80(xSign, xExp, xSig); | |
1668 | fp0 = floatx80_mul(fp0, x, status); | |
1669 | ||
1670 | status->float_rounding_mode = user_rnd_mode; | |
1671 | status->floatx80_rounding_precision = user_rnd_prec; | |
1672 | ||
1673 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1674 | ||
1675 | float_raise(float_flag_inexact, status); | |
1676 | ||
1677 | return a; | |
1678 | } else { | |
1679 | /* SINPOLY */ | |
1680 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1681 | xExp = extractFloatx80Exp(fp0); | |
1682 | xSig = extractFloatx80Frac(fp0); | |
1683 | ||
6361d298 | 1684 | xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ |
5add1ac4 LV |
1685 | |
1686 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1687 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1688 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1689 | status); /* A7 */ | |
1690 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1691 | status); /* A6 */ | |
1692 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1693 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1694 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1695 | make_float64(0xBE5AE6452A118AE4), status), | |
1696 | status); /* A5+T*A7 */ | |
1697 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1698 | make_float64(0x3EC71DE3A5341531), status), | |
1699 | status); /* A4+T*A6 */ | |
1700 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1701 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1702 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1703 | make_float64(0xBF2A01A01A018B59), status), | |
1704 | status); /* A3+T(A5+TA7) */ | |
e2326300 | 1705 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
5add1ac4 LV |
1706 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1707 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1708 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
e2326300 | 1709 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
5add1ac4 LV |
1710 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1711 | fp1 = floatx80_add(fp1, fp2, | |
1712 | status); /* [A1+T(A3+T(A5+TA7))]+ | |
1713 | * [S(A2+T(A4+TA6))] | |
1714 | */ | |
1715 | ||
1716 | x = packFloatx80(xSign, xExp, xSig); | |
1717 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1718 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1719 | ||
1720 | status->float_rounding_mode = user_rnd_mode; | |
1721 | status->floatx80_rounding_precision = user_rnd_prec; | |
1722 | ||
1723 | a = floatx80_add(fp0, x, status); | |
1724 | ||
1725 | float_raise(float_flag_inexact, status); | |
1726 | ||
1727 | return a; | |
1728 | } | |
1729 | } | |
1730 | } | |
68d0ed37 | 1731 | |
808d77bc LMP |
1732 | /* |
1733 | * Cosine | |
1734 | */ | |
68d0ed37 LV |
1735 | |
1736 | floatx80 floatx80_cos(floatx80 a, float_status *status) | |
1737 | { | |
1738 | flag aSign, xSign; | |
1739 | int32_t aExp, xExp; | |
1740 | uint64_t aSig, xSig; | |
1741 | ||
1742 | int8_t user_rnd_mode, user_rnd_prec; | |
1743 | ||
1744 | int32_t compact, l, n, j; | |
1745 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1746 | float32 posneg1, twoto63; | |
6361d298 | 1747 | flag endflag; |
68d0ed37 LV |
1748 | |
1749 | aSig = extractFloatx80Frac(a); | |
1750 | aExp = extractFloatx80Exp(a); | |
1751 | aSign = extractFloatx80Sign(a); | |
1752 | ||
1753 | if (aExp == 0x7FFF) { | |
1754 | if ((uint64_t) (aSig << 1)) { | |
1755 | return propagateFloatx80NaNOneArg(a, status); | |
1756 | } | |
1757 | float_raise(float_flag_invalid, status); | |
1758 | return floatx80_default_nan(status); | |
1759 | } | |
1760 | ||
1761 | if (aExp == 0 && aSig == 0) { | |
1762 | return packFloatx80(0, one_exp, one_sig); | |
1763 | } | |
1764 | ||
68d0ed37 LV |
1765 | user_rnd_mode = status->float_rounding_mode; |
1766 | user_rnd_prec = status->floatx80_rounding_precision; | |
1767 | status->float_rounding_mode = float_round_nearest_even; | |
1768 | status->floatx80_rounding_precision = 80; | |
1769 | ||
1770 | compact = floatx80_make_compact(aExp, aSig); | |
1771 | ||
1772 | fp0 = a; | |
1773 | ||
1774 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1775 | /* 2^(-40) > |X| > 15 PI */ | |
1776 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1777 | /* REDUCEX */ | |
1778 | fp1 = packFloatx80(0, 0, 0); | |
1779 | if (compact == 0x7FFEFFFF) { | |
1780 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
e2326300 | 1781 | UINT64_C(0xC90FDAA200000000)); |
68d0ed37 | 1782 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
e2326300 | 1783 | UINT64_C(0x85A308D300000000)); |
68d0ed37 LV |
1784 | fp0 = floatx80_add(fp0, twopi1, status); |
1785 | fp1 = fp0; | |
1786 | fp0 = floatx80_add(fp0, twopi2, status); | |
1787 | fp1 = floatx80_sub(fp1, fp0, status); | |
1788 | fp1 = floatx80_add(fp1, twopi2, status); | |
1789 | } | |
1790 | loop: | |
1791 | xSign = extractFloatx80Sign(fp0); | |
1792 | xExp = extractFloatx80Exp(fp0); | |
1793 | xExp -= 0x3FFF; | |
1794 | if (xExp <= 28) { | |
1795 | l = 0; | |
1796 | endflag = 1; | |
1797 | } else { | |
1798 | l = xExp - 27; | |
1799 | endflag = 0; | |
1800 | } | |
1801 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
e2326300 AB |
1802 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1803 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); | |
1804 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); | |
68d0ed37 LV |
1805 | |
1806 | /* SIGN(INARG)*2^63 IN SGL */ | |
1807 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1808 | ||
1809 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1810 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1811 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1812 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1813 | status); /* FP2 is N */ | |
1814 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1815 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1816 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1817 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1818 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1819 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1820 | fp3 = fp0; /* FP3 is A */ | |
1821 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1822 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1823 | ||
1824 | if (endflag > 0) { | |
1825 | n = floatx80_to_int32(fp2, status); | |
1826 | goto sincont; | |
1827 | } | |
1828 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1829 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1830 | goto loop; | |
1831 | } else { | |
1832 | /* SINSM */ | |
1833 | fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ | |
1834 | ||
1835 | status->float_rounding_mode = user_rnd_mode; | |
1836 | status->floatx80_rounding_precision = user_rnd_prec; | |
1837 | ||
6361d298 LV |
1838 | /* COSTINY */ |
1839 | a = floatx80_sub(fp0, float32_to_floatx80( | |
1840 | make_float32(0x00800000), status), | |
1841 | status); | |
68d0ed37 LV |
1842 | float_raise(float_flag_inexact, status); |
1843 | ||
1844 | return a; | |
1845 | } | |
1846 | } else { | |
1847 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1848 | make_float64(0x3FE45F306DC9C883), status), | |
1849 | status); /* X*2/PI */ | |
1850 | ||
1851 | n = floatx80_to_int32(fp1, status); | |
1852 | j = 32 + n; | |
1853 | ||
1854 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1855 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1856 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1857 | ||
1858 | sincont: | |
6361d298 | 1859 | if ((n + 1) & 1) { |
68d0ed37 LV |
1860 | /* COSPOLY */ |
1861 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1862 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1863 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1864 | status); /* B8 */ | |
1865 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1866 | status); /* B7 */ | |
1867 | ||
1868 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1869 | xExp = extractFloatx80Exp(fp0); | |
1870 | xSig = extractFloatx80Frac(fp0); | |
1871 | ||
6361d298 | 1872 | if (((n + 1) >> 1) & 1) { |
68d0ed37 LV |
1873 | xSign ^= 1; |
1874 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1875 | } else { | |
1876 | xSign ^= 0; | |
1877 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1878 | } /* X IS NOW R'= SGN*R */ | |
1879 | ||
1880 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1881 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1882 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1883 | make_float64(0x3E21EED90612C972), status), | |
1884 | status); /* B6+TB8 */ | |
1885 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1886 | make_float64(0xBE927E4FB79D9FCF), status), | |
1887 | status); /* B5+TB7 */ | |
1888 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1889 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1890 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1891 | make_float64(0x3EFA01A01A01D423), status), | |
1892 | status); /* B4+T(B6+TB8) */ | |
e2326300 | 1893 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
68d0ed37 LV |
1894 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1895 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1896 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
e2326300 | 1897 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
68d0ed37 LV |
1898 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1899 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1900 | make_float32(0xBF000000), status), | |
1901 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1902 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1903 | fp0 = floatx80_add(fp0, fp1, status); | |
1904 | /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ | |
1905 | ||
1906 | x = packFloatx80(xSign, xExp, xSig); | |
1907 | fp0 = floatx80_mul(fp0, x, status); | |
1908 | ||
1909 | status->float_rounding_mode = user_rnd_mode; | |
1910 | status->floatx80_rounding_precision = user_rnd_prec; | |
1911 | ||
1912 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1913 | ||
1914 | float_raise(float_flag_inexact, status); | |
1915 | ||
1916 | return a; | |
1917 | } else { | |
1918 | /* SINPOLY */ | |
1919 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1920 | xExp = extractFloatx80Exp(fp0); | |
1921 | xSig = extractFloatx80Frac(fp0); | |
1922 | ||
6361d298 | 1923 | xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ |
68d0ed37 LV |
1924 | |
1925 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1926 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1927 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1928 | status); /* A7 */ | |
1929 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1930 | status); /* A6 */ | |
1931 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1932 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1933 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1934 | make_float64(0xBE5AE6452A118AE4), status), | |
1935 | status); /* A5+T*A7 */ | |
1936 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1937 | make_float64(0x3EC71DE3A5341531), status), | |
1938 | status); /* A4+T*A6 */ | |
1939 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1940 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1941 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1942 | make_float64(0xBF2A01A01A018B59), status), | |
1943 | status); /* A3+T(A5+TA7) */ | |
e2326300 | 1944 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
68d0ed37 LV |
1945 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1946 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1947 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
e2326300 | 1948 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
68d0ed37 LV |
1949 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1950 | fp1 = floatx80_add(fp1, fp2, status); | |
1951 | /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ | |
1952 | ||
1953 | x = packFloatx80(xSign, xExp, xSig); | |
1954 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1955 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1956 | ||
1957 | status->float_rounding_mode = user_rnd_mode; | |
1958 | status->floatx80_rounding_precision = user_rnd_prec; | |
1959 | ||
1960 | a = floatx80_add(fp0, x, status); | |
1961 | ||
1962 | float_raise(float_flag_inexact, status); | |
1963 | ||
1964 | return a; | |
1965 | } | |
1966 | } | |
1967 | } | |
8c992abc | 1968 | |
808d77bc LMP |
1969 | /* |
1970 | * Arc tangent | |
1971 | */ | |
8c992abc LV |
1972 | |
1973 | floatx80 floatx80_atan(floatx80 a, float_status *status) | |
1974 | { | |
1975 | flag aSign; | |
1976 | int32_t aExp; | |
1977 | uint64_t aSig; | |
1978 | ||
1979 | int8_t user_rnd_mode, user_rnd_prec; | |
1980 | ||
1981 | int32_t compact, tbl_index; | |
1982 | floatx80 fp0, fp1, fp2, fp3, xsave; | |
1983 | ||
1984 | aSig = extractFloatx80Frac(a); | |
1985 | aExp = extractFloatx80Exp(a); | |
1986 | aSign = extractFloatx80Sign(a); | |
1987 | ||
1988 | if (aExp == 0x7FFF) { | |
1989 | if ((uint64_t) (aSig << 1)) { | |
1990 | return propagateFloatx80NaNOneArg(a, status); | |
1991 | } | |
1992 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
1993 | float_raise(float_flag_inexact, status); | |
1994 | return floatx80_move(a, status); | |
1995 | } | |
1996 | ||
1997 | if (aExp == 0 && aSig == 0) { | |
1998 | return packFloatx80(aSign, 0, 0); | |
1999 | } | |
2000 | ||
2001 | compact = floatx80_make_compact(aExp, aSig); | |
2002 | ||
2003 | user_rnd_mode = status->float_rounding_mode; | |
2004 | user_rnd_prec = status->floatx80_rounding_precision; | |
2005 | status->float_rounding_mode = float_round_nearest_even; | |
2006 | status->floatx80_rounding_precision = 80; | |
2007 | ||
2008 | if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { | |
2009 | /* |X| >= 16 or |X| < 1/16 */ | |
2010 | if (compact > 0x3FFF8000) { /* |X| >= 16 */ | |
2011 | if (compact > 0x40638000) { /* |X| > 2^(100) */ | |
2012 | fp0 = packFloatx80(aSign, piby2_exp, pi_sig); | |
2013 | fp1 = packFloatx80(aSign, 0x0001, one_sig); | |
2014 | ||
2015 | status->float_rounding_mode = user_rnd_mode; | |
2016 | status->floatx80_rounding_precision = user_rnd_prec; | |
2017 | ||
2018 | a = floatx80_sub(fp0, fp1, status); | |
2019 | ||
2020 | float_raise(float_flag_inexact, status); | |
2021 | ||
2022 | return a; | |
2023 | } else { | |
2024 | fp0 = a; | |
2025 | fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ | |
2026 | fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ | |
2027 | xsave = fp1; | |
2028 | fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ | |
2029 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
2030 | fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), | |
2031 | status); /* C5 */ | |
2032 | fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), | |
2033 | status); /* C4 */ | |
2034 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ | |
2035 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ | |
2036 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2037 | make_float64(0xBFC24924827107B8), status), | |
2038 | status); /* C3+Z*C5 */ | |
2039 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2040 | make_float64(0x3FC999999996263E), status), | |
2041 | status); /* C2+Z*C4 */ | |
2042 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ | |
2043 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ | |
2044 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2045 | make_float64(0xBFD5555555555536), status), | |
2046 | status); /* C1+Z*(C3+Z*C5) */ | |
2047 | fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ | |
2048 | /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ | |
2049 | fp1 = floatx80_add(fp1, fp2, status); | |
2050 | /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ | |
2051 | fp0 = floatx80_mul(fp0, fp1, status); | |
2052 | fp0 = floatx80_add(fp0, xsave, status); | |
2053 | fp1 = packFloatx80(aSign, piby2_exp, pi_sig); | |
2054 | ||
2055 | status->float_rounding_mode = user_rnd_mode; | |
2056 | status->floatx80_rounding_precision = user_rnd_prec; | |
2057 | ||
2058 | a = floatx80_add(fp0, fp1, status); | |
2059 | ||
2060 | float_raise(float_flag_inexact, status); | |
2061 | ||
2062 | return a; | |
2063 | } | |
2064 | } else { /* |X| < 1/16 */ | |
2065 | if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ | |
2066 | status->float_rounding_mode = user_rnd_mode; | |
2067 | status->floatx80_rounding_precision = user_rnd_prec; | |
2068 | ||
2069 | a = floatx80_move(a, status); | |
2070 | ||
2071 | float_raise(float_flag_inexact, status); | |
2072 | ||
2073 | return a; | |
2074 | } else { | |
2075 | fp0 = a; | |
2076 | xsave = a; | |
2077 | fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ | |
2078 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
2079 | fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), | |
2080 | status); /* B6 */ | |
2081 | fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), | |
2082 | status); /* B5 */ | |
2083 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ | |
2084 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ | |
2085 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2086 | make_float64(0x3FBC71C646940220), status), | |
2087 | status); /* B4+Z*B6 */ | |
2088 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2089 | make_float64(0xBFC24924921872F9), | |
2090 | status), status); /* B3+Z*B5 */ | |
2091 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ | |
2092 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ | |
2093 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2094 | make_float64(0x3FC9999999998FA9), status), | |
2095 | status); /* B2+Z*(B4+Z*B6) */ | |
2096 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2097 | make_float64(0xBFD5555555555555), status), | |
2098 | status); /* B1+Z*(B3+Z*B5) */ | |
2099 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ | |
2100 | fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ | |
2101 | /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ | |
2102 | fp1 = floatx80_add(fp1, fp2, status); | |
2103 | /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ | |
2104 | fp0 = floatx80_mul(fp0, fp1, status); | |
2105 | ||
2106 | status->float_rounding_mode = user_rnd_mode; | |
2107 | status->floatx80_rounding_precision = user_rnd_prec; | |
2108 | ||
2109 | a = floatx80_add(fp0, xsave, status); | |
2110 | ||
2111 | float_raise(float_flag_inexact, status); | |
2112 | ||
2113 | return a; | |
2114 | } | |
2115 | } | |
2116 | } else { | |
e2326300 AB |
2117 | aSig &= UINT64_C(0xF800000000000000); |
2118 | aSig |= UINT64_C(0x0400000000000000); | |
8c992abc LV |
2119 | xsave = packFloatx80(aSign, aExp, aSig); /* F */ |
2120 | fp0 = a; | |
2121 | fp1 = a; /* X */ | |
2122 | fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ | |
2123 | fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ | |
2124 | fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ | |
2125 | fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ | |
2126 | fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ | |
2127 | ||
2128 | tbl_index = compact; | |
2129 | ||
2130 | tbl_index &= 0x7FFF0000; | |
2131 | tbl_index -= 0x3FFB0000; | |
2132 | tbl_index >>= 1; | |
2133 | tbl_index += compact & 0x00007800; | |
2134 | tbl_index >>= 11; | |
2135 | ||
2136 | fp3 = atan_tbl[tbl_index]; | |
2137 | ||
2138 | fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ | |
2139 | ||
2140 | fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ | |
2141 | fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), | |
2142 | status); /* A3 */ | |
2143 | fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ | |
2144 | fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ | |
2145 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ | |
2146 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2147 | make_float64(0x4002AC6934A26DB3), status), | |
2148 | status); /* A2+V*(A3+V) */ | |
2149 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
2150 | make_float64(0xBFC2476F4E1DA28E), status), | |
2151 | status); /* A1+U*V */ | |
2152 | fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ | |
2153 | fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ | |
2154 | ||
2155 | status->float_rounding_mode = user_rnd_mode; | |
2156 | status->floatx80_rounding_precision = user_rnd_prec; | |
2157 | ||
2158 | a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ | |
2159 | ||
2160 | float_raise(float_flag_inexact, status); | |
2161 | ||
2162 | return a; | |
2163 | } | |
2164 | } | |
bc20b34e | 2165 | |
808d77bc LMP |
2166 | /* |
2167 | * Arc sine | |
2168 | */ | |
bc20b34e LV |
2169 | |
2170 | floatx80 floatx80_asin(floatx80 a, float_status *status) | |
2171 | { | |
2172 | flag aSign; | |
2173 | int32_t aExp; | |
2174 | uint64_t aSig; | |
2175 | ||
2176 | int8_t user_rnd_mode, user_rnd_prec; | |
2177 | ||
2178 | int32_t compact; | |
2179 | floatx80 fp0, fp1, fp2, one; | |
2180 | ||
2181 | aSig = extractFloatx80Frac(a); | |
2182 | aExp = extractFloatx80Exp(a); | |
2183 | aSign = extractFloatx80Sign(a); | |
2184 | ||
2185 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2186 | return propagateFloatx80NaNOneArg(a, status); | |
2187 | } | |
2188 | ||
2189 | if (aExp == 0 && aSig == 0) { | |
2190 | return packFloatx80(aSign, 0, 0); | |
2191 | } | |
2192 | ||
2193 | compact = floatx80_make_compact(aExp, aSig); | |
2194 | ||
2195 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2196 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2197 | float_raise(float_flag_inexact, status); | |
2198 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
2199 | return floatx80_move(a, status); | |
2200 | } else { /* |X| > 1 */ | |
2201 | float_raise(float_flag_invalid, status); | |
2202 | return floatx80_default_nan(status); | |
2203 | } | |
2204 | ||
2205 | } /* |X| < 1 */ | |
2206 | ||
2207 | user_rnd_mode = status->float_rounding_mode; | |
2208 | user_rnd_prec = status->floatx80_rounding_precision; | |
2209 | status->float_rounding_mode = float_round_nearest_even; | |
2210 | status->floatx80_rounding_precision = 80; | |
2211 | ||
2212 | one = packFloatx80(0, one_exp, one_sig); | |
2213 | fp0 = a; | |
2214 | ||
2215 | fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2216 | fp2 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2217 | fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ | |
2218 | fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ | |
2219 | fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ | |
2220 | ||
2221 | status->float_rounding_mode = user_rnd_mode; | |
2222 | status->floatx80_rounding_precision = user_rnd_prec; | |
2223 | ||
2224 | a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ | |
2225 | ||
2226 | float_raise(float_flag_inexact, status); | |
2227 | ||
2228 | return a; | |
2229 | } | |
c84813b8 | 2230 | |
808d77bc LMP |
2231 | /* |
2232 | * Arc cosine | |
2233 | */ | |
c84813b8 LV |
2234 | |
2235 | floatx80 floatx80_acos(floatx80 a, float_status *status) | |
2236 | { | |
2237 | flag aSign; | |
2238 | int32_t aExp; | |
2239 | uint64_t aSig; | |
2240 | ||
2241 | int8_t user_rnd_mode, user_rnd_prec; | |
2242 | ||
2243 | int32_t compact; | |
2244 | floatx80 fp0, fp1, one; | |
2245 | ||
2246 | aSig = extractFloatx80Frac(a); | |
2247 | aExp = extractFloatx80Exp(a); | |
2248 | aSign = extractFloatx80Sign(a); | |
2249 | ||
2250 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2251 | return propagateFloatx80NaNOneArg(a, status); | |
2252 | } | |
2253 | if (aExp == 0 && aSig == 0) { | |
2254 | float_raise(float_flag_inexact, status); | |
2255 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2256 | piby2_exp, pi_sig, 0, status); | |
2257 | } | |
2258 | ||
2259 | compact = floatx80_make_compact(aExp, aSig); | |
2260 | ||
2261 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2262 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2263 | if (aSign) { /* X == -1 */ | |
2264 | a = packFloatx80(0, pi_exp, pi_sig); | |
2265 | float_raise(float_flag_inexact, status); | |
2266 | return floatx80_move(a, status); | |
2267 | } else { /* X == +1 */ | |
2268 | return packFloatx80(0, 0, 0); | |
2269 | } | |
2270 | } else { /* |X| > 1 */ | |
2271 | float_raise(float_flag_invalid, status); | |
2272 | return floatx80_default_nan(status); | |
2273 | } | |
2274 | } /* |X| < 1 */ | |
2275 | ||
2276 | user_rnd_mode = status->float_rounding_mode; | |
2277 | user_rnd_prec = status->floatx80_rounding_precision; | |
2278 | status->float_rounding_mode = float_round_nearest_even; | |
2279 | status->floatx80_rounding_precision = 80; | |
2280 | ||
2281 | one = packFloatx80(0, one_exp, one_sig); | |
2282 | fp0 = a; | |
2283 | ||
2284 | fp1 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2285 | fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2286 | fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ | |
2287 | fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ | |
2288 | fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ | |
2289 | ||
2290 | status->float_rounding_mode = user_rnd_mode; | |
2291 | status->floatx80_rounding_precision = user_rnd_prec; | |
2292 | ||
2293 | a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ | |
2294 | ||
2295 | float_raise(float_flag_inexact, status); | |
2296 | ||
2297 | return a; | |
2298 | } | |
e3655afa | 2299 | |
808d77bc LMP |
2300 | /* |
2301 | * Hyperbolic arc tangent | |
2302 | */ | |
e3655afa LV |
2303 | |
2304 | floatx80 floatx80_atanh(floatx80 a, float_status *status) | |
2305 | { | |
2306 | flag aSign; | |
2307 | int32_t aExp; | |
2308 | uint64_t aSig; | |
2309 | ||
2310 | int8_t user_rnd_mode, user_rnd_prec; | |
2311 | ||
2312 | int32_t compact; | |
2313 | floatx80 fp0, fp1, fp2, one; | |
2314 | ||
2315 | aSig = extractFloatx80Frac(a); | |
2316 | aExp = extractFloatx80Exp(a); | |
2317 | aSign = extractFloatx80Sign(a); | |
2318 | ||
2319 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2320 | return propagateFloatx80NaNOneArg(a, status); | |
2321 | } | |
2322 | ||
2323 | if (aExp == 0 && aSig == 0) { | |
2324 | return packFloatx80(aSign, 0, 0); | |
2325 | } | |
2326 | ||
2327 | compact = floatx80_make_compact(aExp, aSig); | |
2328 | ||
2329 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2330 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2331 | float_raise(float_flag_divbyzero, status); | |
2332 | return packFloatx80(aSign, floatx80_infinity.high, | |
2333 | floatx80_infinity.low); | |
2334 | } else { /* |X| > 1 */ | |
2335 | float_raise(float_flag_invalid, status); | |
2336 | return floatx80_default_nan(status); | |
2337 | } | |
2338 | } /* |X| < 1 */ | |
2339 | ||
2340 | user_rnd_mode = status->float_rounding_mode; | |
2341 | user_rnd_prec = status->floatx80_rounding_precision; | |
2342 | status->float_rounding_mode = float_round_nearest_even; | |
2343 | status->floatx80_rounding_precision = 80; | |
2344 | ||
2345 | one = packFloatx80(0, one_exp, one_sig); | |
2346 | fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ | |
2347 | fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ | |
2348 | fp1 = packFloatx80(1, aExp, aSig); /* -Y */ | |
2349 | fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ | |
2350 | fp1 = floatx80_add(fp1, one, status); /* 1-Y */ | |
2351 | fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ | |
2352 | fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ | |
2353 | ||
2354 | status->float_rounding_mode = user_rnd_mode; | |
2355 | status->floatx80_rounding_precision = user_rnd_prec; | |
2356 | ||
2357 | a = floatx80_mul(fp0, fp2, | |
2358 | status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ | |
2359 | ||
2360 | float_raise(float_flag_inexact, status); | |
2361 | ||
2362 | return a; | |
2363 | } | |
9937b029 | 2364 | |
808d77bc LMP |
2365 | /* |
2366 | * e to x minus 1 | |
2367 | */ | |
9937b029 LV |
2368 | |
2369 | floatx80 floatx80_etoxm1(floatx80 a, float_status *status) | |
2370 | { | |
2371 | flag aSign; | |
2372 | int32_t aExp; | |
2373 | uint64_t aSig; | |
2374 | ||
2375 | int8_t user_rnd_mode, user_rnd_prec; | |
2376 | ||
2377 | int32_t compact, n, j, m, m1; | |
2378 | floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; | |
2379 | ||
2380 | aSig = extractFloatx80Frac(a); | |
2381 | aExp = extractFloatx80Exp(a); | |
2382 | aSign = extractFloatx80Sign(a); | |
2383 | ||
2384 | if (aExp == 0x7FFF) { | |
2385 | if ((uint64_t) (aSig << 1)) { | |
2386 | return propagateFloatx80NaNOneArg(a, status); | |
2387 | } | |
2388 | if (aSign) { | |
2389 | return packFloatx80(aSign, one_exp, one_sig); | |
2390 | } | |
2391 | return packFloatx80(0, floatx80_infinity.high, | |
2392 | floatx80_infinity.low); | |
2393 | } | |
2394 | ||
2395 | if (aExp == 0 && aSig == 0) { | |
2396 | return packFloatx80(aSign, 0, 0); | |
2397 | } | |
2398 | ||
2399 | user_rnd_mode = status->float_rounding_mode; | |
2400 | user_rnd_prec = status->floatx80_rounding_precision; | |
2401 | status->float_rounding_mode = float_round_nearest_even; | |
2402 | status->floatx80_rounding_precision = 80; | |
2403 | ||
2404 | if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ | |
2405 | compact = floatx80_make_compact(aExp, aSig); | |
2406 | ||
2407 | if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ | |
2408 | fp0 = a; | |
2409 | fp1 = a; | |
2410 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2411 | make_float32(0x42B8AA3B), status), | |
2412 | status); /* 64/log2 * X */ | |
2413 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
2414 | fp0 = int32_to_floatx80(n, status); | |
2415 | ||
2416 | j = n & 0x3F; /* J = N mod 64 */ | |
2417 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
2418 | if (n < 0 && j) { | |
808d77bc LMP |
2419 | /* |
2420 | * arithmetic right shift is division and | |
9937b029 LV |
2421 | * round towards minus infinity |
2422 | */ | |
2423 | m--; | |
2424 | } | |
2425 | m1 = -m; | |
2426 | /*m += 0x3FFF; // biased exponent of 2^(M) */ | |
2427 | /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ | |
2428 | ||
2429 | fp2 = fp0; /* N */ | |
2430 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2431 | make_float32(0xBC317218), status), | |
2432 | status); /* N * L1, L1 = lead(-log2/64) */ | |
e2326300 | 2433 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
9937b029 LV |
2434 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
2435 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
2436 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
2437 | ||
2438 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
2439 | fp2 = float32_to_floatx80(make_float32(0x3950097B), | |
2440 | status); /* A6 */ | |
2441 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ | |
2442 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), | |
2443 | status), fp1, status); /* fp3 is S*A5 */ | |
2444 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2445 | make_float64(0x3F81111111174385), status), | |
2446 | status); /* fp2 IS A4+S*A6 */ | |
2447 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2448 | make_float64(0x3FA5555555554F5A), status), | |
2449 | status); /* fp3 is A3+S*A5 */ | |
2450 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ | |
2451 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ | |
2452 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2453 | make_float64(0x3FC5555555555555), status), | |
2454 | status); /* fp2 IS A2+S*(A4+S*A6) */ | |
2455 | fp3 = floatx80_add(fp3, float32_to_floatx80( | |
2456 | make_float32(0x3F000000), status), | |
2457 | status); /* fp3 IS A1+S*(A3+S*A5) */ | |
2458 | fp2 = floatx80_mul(fp2, fp1, | |
2459 | status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ | |
2460 | fp1 = floatx80_mul(fp1, fp3, | |
2461 | status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ | |
2462 | fp2 = floatx80_mul(fp2, fp0, | |
2463 | status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ | |
2464 | fp0 = floatx80_add(fp0, fp1, | |
2465 | status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ | |
2466 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
2467 | ||
2468 | fp0 = floatx80_mul(fp0, exp_tbl[j], | |
2469 | status); /* 2^(J/64)*(Exp(R)-1) */ | |
2470 | ||
2471 | if (m >= 64) { | |
2472 | fp1 = float32_to_floatx80(exp_tbl2[j], status); | |
2473 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2474 | fp1 = floatx80_add(fp1, onebysc, status); | |
2475 | fp0 = floatx80_add(fp0, fp1, status); | |
2476 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2477 | } else if (m < -3) { | |
2478 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2479 | status), status); | |
2480 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2481 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2482 | fp0 = floatx80_add(fp0, onebysc, status); | |
2483 | } else { /* -3 <= m <= 63 */ | |
2484 | fp1 = exp_tbl[j]; | |
2485 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2486 | status), status); | |
2487 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2488 | fp1 = floatx80_add(fp1, onebysc, status); | |
2489 | fp0 = floatx80_add(fp0, fp1, status); | |
2490 | } | |
2491 | ||
2492 | sc = packFloatx80(0, m + 0x3FFF, one_sig); | |
2493 | ||
2494 | status->float_rounding_mode = user_rnd_mode; | |
2495 | status->floatx80_rounding_precision = user_rnd_prec; | |
2496 | ||
2497 | a = floatx80_mul(fp0, sc, status); | |
2498 | ||
2499 | float_raise(float_flag_inexact, status); | |
2500 | ||
2501 | return a; | |
2502 | } else { /* |X| > 70 log2 */ | |
2503 | if (aSign) { | |
2504 | fp0 = float32_to_floatx80(make_float32(0xBF800000), | |
2505 | status); /* -1 */ | |
2506 | ||
2507 | status->float_rounding_mode = user_rnd_mode; | |
2508 | status->floatx80_rounding_precision = user_rnd_prec; | |
2509 | ||
2510 | a = floatx80_add(fp0, float32_to_floatx80( | |
2511 | make_float32(0x00800000), status), | |
2512 | status); /* -1 + 2^(-126) */ | |
2513 | ||
2514 | float_raise(float_flag_inexact, status); | |
2515 | ||
2516 | return a; | |
2517 | } else { | |
2518 | status->float_rounding_mode = user_rnd_mode; | |
2519 | status->floatx80_rounding_precision = user_rnd_prec; | |
2520 | ||
2521 | return floatx80_etox(a, status); | |
2522 | } | |
2523 | } | |
2524 | } else { /* |X| < 1/4 */ | |
2525 | if (aExp >= 0x3FBE) { | |
2526 | fp0 = a; | |
2527 | fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ | |
2528 | fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), | |
2529 | status); /* B12 */ | |
2530 | fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ | |
2531 | fp2 = float32_to_floatx80(make_float32(0x310F8290), | |
2532 | status); /* B11 */ | |
2533 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
2534 | make_float32(0x32D73220), status), | |
2535 | status); /* B10 */ | |
2536 | fp2 = floatx80_mul(fp2, fp0, status); | |
2537 | fp1 = floatx80_mul(fp1, fp0, status); | |
2538 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
2539 | make_float32(0x3493F281), status), | |
2540 | status); /* B9 */ | |
2541 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2542 | make_float64(0x3EC71DE3A5774682), status), | |
2543 | status); /* B8 */ | |
2544 | fp2 = floatx80_mul(fp2, fp0, status); | |
2545 | fp1 = floatx80_mul(fp1, fp0, status); | |
2546 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2547 | make_float64(0x3EFA01A019D7CB68), status), | |
2548 | status); /* B7 */ | |
2549 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2550 | make_float64(0x3F2A01A01A019DF3), status), | |
2551 | status); /* B6 */ | |
2552 | fp2 = floatx80_mul(fp2, fp0, status); | |
2553 | fp1 = floatx80_mul(fp1, fp0, status); | |
2554 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2555 | make_float64(0x3F56C16C16C170E2), status), | |
2556 | status); /* B5 */ | |
2557 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2558 | make_float64(0x3F81111111111111), status), | |
2559 | status); /* B4 */ | |
2560 | fp2 = floatx80_mul(fp2, fp0, status); | |
2561 | fp1 = floatx80_mul(fp1, fp0, status); | |
2562 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2563 | make_float64(0x3FA5555555555555), status), | |
2564 | status); /* B3 */ | |
e2326300 | 2565 | fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB)); |
9937b029 LV |
2566 | fp1 = floatx80_add(fp1, fp3, status); /* B2 */ |
2567 | fp2 = floatx80_mul(fp2, fp0, status); | |
2568 | fp1 = floatx80_mul(fp1, fp0, status); | |
2569 | ||
2570 | fp2 = floatx80_mul(fp2, fp0, status); | |
2571 | fp1 = floatx80_mul(fp1, a, status); | |
2572 | ||
2573 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2574 | make_float32(0x3F000000), status), | |
2575 | status); /* S*B1 */ | |
2576 | fp1 = floatx80_add(fp1, fp2, status); /* Q */ | |
2577 | fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ | |
2578 | ||
2579 | status->float_rounding_mode = user_rnd_mode; | |
2580 | status->floatx80_rounding_precision = user_rnd_prec; | |
2581 | ||
2582 | a = floatx80_add(fp0, a, status); | |
2583 | ||
2584 | float_raise(float_flag_inexact, status); | |
2585 | ||
2586 | return a; | |
2587 | } else { /* |X| < 2^(-65) */ | |
2588 | sc = packFloatx80(1, 1, one_sig); | |
2589 | fp0 = a; | |
2590 | ||
2591 | if (aExp < 0x0033) { /* |X| < 2^(-16382) */ | |
2592 | fp0 = floatx80_mul(fp0, float64_to_floatx80( | |
2593 | make_float64(0x48B0000000000000), status), | |
2594 | status); | |
2595 | fp0 = floatx80_add(fp0, sc, status); | |
2596 | ||
2597 | status->float_rounding_mode = user_rnd_mode; | |
2598 | status->floatx80_rounding_precision = user_rnd_prec; | |
2599 | ||
2600 | a = floatx80_mul(fp0, float64_to_floatx80( | |
2601 | make_float64(0x3730000000000000), status), | |
2602 | status); | |
2603 | } else { | |
2604 | status->float_rounding_mode = user_rnd_mode; | |
2605 | status->floatx80_rounding_precision = user_rnd_prec; | |
2606 | ||
2607 | a = floatx80_add(fp0, sc, status); | |
2608 | } | |
2609 | ||
2610 | float_raise(float_flag_inexact, status); | |
2611 | ||
2612 | return a; | |
2613 | } | |
2614 | } | |
2615 | } | |
2616 | ||
808d77bc LMP |
2617 | /* |
2618 | * Hyperbolic tangent | |
2619 | */ | |
9937b029 LV |
2620 | |
2621 | floatx80 floatx80_tanh(floatx80 a, float_status *status) | |
2622 | { | |
2623 | flag aSign, vSign; | |
2624 | int32_t aExp, vExp; | |
2625 | uint64_t aSig, vSig; | |
2626 | ||
2627 | int8_t user_rnd_mode, user_rnd_prec; | |
2628 | ||
2629 | int32_t compact; | |
2630 | floatx80 fp0, fp1; | |
2631 | uint32_t sign; | |
2632 | ||
2633 | aSig = extractFloatx80Frac(a); | |
2634 | aExp = extractFloatx80Exp(a); | |
2635 | aSign = extractFloatx80Sign(a); | |
2636 | ||
2637 | if (aExp == 0x7FFF) { | |
2638 | if ((uint64_t) (aSig << 1)) { | |
2639 | return propagateFloatx80NaNOneArg(a, status); | |
2640 | } | |
2641 | return packFloatx80(aSign, one_exp, one_sig); | |
2642 | } | |
2643 | ||
2644 | if (aExp == 0 && aSig == 0) { | |
2645 | return packFloatx80(aSign, 0, 0); | |
2646 | } | |
2647 | ||
2648 | user_rnd_mode = status->float_rounding_mode; | |
2649 | user_rnd_prec = status->floatx80_rounding_precision; | |
2650 | status->float_rounding_mode = float_round_nearest_even; | |
2651 | status->floatx80_rounding_precision = 80; | |
2652 | ||
2653 | compact = floatx80_make_compact(aExp, aSig); | |
2654 | ||
2655 | if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { | |
2656 | /* TANHBORS */ | |
2657 | if (compact < 0x3FFF8000) { | |
2658 | /* TANHSM */ | |
2659 | status->float_rounding_mode = user_rnd_mode; | |
2660 | status->floatx80_rounding_precision = user_rnd_prec; | |
2661 | ||
2662 | a = floatx80_move(a, status); | |
2663 | ||
2664 | float_raise(float_flag_inexact, status); | |
2665 | ||
2666 | return a; | |
2667 | } else { | |
2668 | if (compact > 0x40048AA1) { | |
2669 | /* TANHHUGE */ | |
2670 | sign = 0x3F800000; | |
2671 | sign |= aSign ? 0x80000000 : 0x00000000; | |
2672 | fp0 = float32_to_floatx80(make_float32(sign), status); | |
2673 | sign &= 0x80000000; | |
2674 | sign ^= 0x80800000; /* -SIGN(X)*EPS */ | |
2675 | ||
2676 | status->float_rounding_mode = user_rnd_mode; | |
2677 | status->floatx80_rounding_precision = user_rnd_prec; | |
2678 | ||
2679 | a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), | |
2680 | status), status); | |
2681 | ||
2682 | float_raise(float_flag_inexact, status); | |
2683 | ||
2684 | return a; | |
2685 | } else { | |
2686 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2687 | fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ | |
2688 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
2689 | make_float32(0x3F800000), | |
2690 | status), status); /* EXP(Y)+1 */ | |
2691 | sign = aSign ? 0x80000000 : 0x00000000; | |
2692 | fp1 = floatx80_div(float32_to_floatx80(make_float32( | |
2693 | sign ^ 0xC0000000), status), fp0, | |
2694 | status); /* -SIGN(X)*2 / [EXP(Y)+1] */ | |
2695 | fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), | |
2696 | status); /* SIGN */ | |
2697 | ||
2698 | status->float_rounding_mode = user_rnd_mode; | |
2699 | status->floatx80_rounding_precision = user_rnd_prec; | |
2700 | ||
2701 | a = floatx80_add(fp1, fp0, status); | |
2702 | ||
2703 | float_raise(float_flag_inexact, status); | |
2704 | ||
2705 | return a; | |
2706 | } | |
2707 | } | |
2708 | } else { /* 2**(-40) < |X| < (5/2)LOG2 */ | |
2709 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2710 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2711 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), | |
2712 | status), | |
2713 | status); /* Z+2 */ | |
2714 | ||
2715 | vSign = extractFloatx80Sign(fp1); | |
2716 | vExp = extractFloatx80Exp(fp1); | |
2717 | vSig = extractFloatx80Frac(fp1); | |
2718 | ||
2719 | fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); | |
2720 | ||
2721 | status->float_rounding_mode = user_rnd_mode; | |
2722 | status->floatx80_rounding_precision = user_rnd_prec; | |
2723 | ||
2724 | a = floatx80_div(fp0, fp1, status); | |
2725 | ||
2726 | float_raise(float_flag_inexact, status); | |
2727 | ||
2728 | return a; | |
2729 | } | |
2730 | } | |
eee6b892 | 2731 | |
808d77bc LMP |
2732 | /* |
2733 | * Hyperbolic sine | |
2734 | */ | |
eee6b892 LV |
2735 | |
2736 | floatx80 floatx80_sinh(floatx80 a, float_status *status) | |
2737 | { | |
2738 | flag aSign; | |
2739 | int32_t aExp; | |
2740 | uint64_t aSig; | |
2741 | ||
2742 | int8_t user_rnd_mode, user_rnd_prec; | |
2743 | ||
2744 | int32_t compact; | |
2745 | floatx80 fp0, fp1, fp2; | |
2746 | float32 fact; | |
2747 | ||
2748 | aSig = extractFloatx80Frac(a); | |
2749 | aExp = extractFloatx80Exp(a); | |
2750 | aSign = extractFloatx80Sign(a); | |
2751 | ||
2752 | if (aExp == 0x7FFF) { | |
2753 | if ((uint64_t) (aSig << 1)) { | |
2754 | return propagateFloatx80NaNOneArg(a, status); | |
2755 | } | |
2756 | return packFloatx80(aSign, floatx80_infinity.high, | |
2757 | floatx80_infinity.low); | |
2758 | } | |
2759 | ||
2760 | if (aExp == 0 && aSig == 0) { | |
2761 | return packFloatx80(aSign, 0, 0); | |
2762 | } | |
2763 | ||
2764 | user_rnd_mode = status->float_rounding_mode; | |
2765 | user_rnd_prec = status->floatx80_rounding_precision; | |
2766 | status->float_rounding_mode = float_round_nearest_even; | |
2767 | status->floatx80_rounding_precision = 80; | |
2768 | ||
2769 | compact = floatx80_make_compact(aExp, aSig); | |
2770 | ||
2771 | if (compact > 0x400CB167) { | |
2772 | /* SINHBIG */ | |
2773 | if (compact > 0x400CB2B3) { | |
2774 | status->float_rounding_mode = user_rnd_mode; | |
2775 | status->floatx80_rounding_precision = user_rnd_prec; | |
2776 | ||
2777 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
2778 | aSign, 0x8000, aSig, 0, status); | |
2779 | } else { | |
2780 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2781 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2782 | make_float64(0x40C62D38D3D64634), status), | |
2783 | status); /* (|X|-16381LOG2_LEAD) */ | |
2784 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2785 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2786 | status); /* |X| - 16381 LOG2, ACCURATE */ | |
2787 | fp0 = floatx80_etox(fp0, status); | |
2788 | fp2 = packFloatx80(aSign, 0x7FFB, one_sig); | |
2789 | ||
2790 | status->float_rounding_mode = user_rnd_mode; | |
2791 | status->floatx80_rounding_precision = user_rnd_prec; | |
2792 | ||
2793 | a = floatx80_mul(fp0, fp2, status); | |
2794 | ||
2795 | float_raise(float_flag_inexact, status); | |
2796 | ||
2797 | return a; | |
2798 | } | |
2799 | } else { /* |X| < 16380 LOG2 */ | |
2800 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2801 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2802 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
2803 | status), status); /* 1+Z */ | |
2804 | fp2 = fp0; | |
2805 | fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ | |
2806 | fp0 = floatx80_add(fp0, fp2, status); | |
2807 | ||
2808 | fact = packFloat32(aSign, 0x7E, 0); | |
2809 | ||
2810 | status->float_rounding_mode = user_rnd_mode; | |
2811 | status->floatx80_rounding_precision = user_rnd_prec; | |
2812 | ||
2813 | a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); | |
2814 | ||
2815 | float_raise(float_flag_inexact, status); | |
2816 | ||
2817 | return a; | |
2818 | } | |
2819 | } | |
02f9124e | 2820 | |
808d77bc LMP |
2821 | /* |
2822 | * Hyperbolic cosine | |
2823 | */ | |
02f9124e LV |
2824 | |
2825 | floatx80 floatx80_cosh(floatx80 a, float_status *status) | |
2826 | { | |
2827 | int32_t aExp; | |
2828 | uint64_t aSig; | |
2829 | ||
2830 | int8_t user_rnd_mode, user_rnd_prec; | |
2831 | ||
2832 | int32_t compact; | |
2833 | floatx80 fp0, fp1; | |
2834 | ||
2835 | aSig = extractFloatx80Frac(a); | |
2836 | aExp = extractFloatx80Exp(a); | |
2837 | ||
2838 | if (aExp == 0x7FFF) { | |
2839 | if ((uint64_t) (aSig << 1)) { | |
2840 | return propagateFloatx80NaNOneArg(a, status); | |
2841 | } | |
2842 | return packFloatx80(0, floatx80_infinity.high, | |
2843 | floatx80_infinity.low); | |
2844 | } | |
2845 | ||
2846 | if (aExp == 0 && aSig == 0) { | |
2847 | return packFloatx80(0, one_exp, one_sig); | |
2848 | } | |
2849 | ||
2850 | user_rnd_mode = status->float_rounding_mode; | |
2851 | user_rnd_prec = status->floatx80_rounding_precision; | |
2852 | status->float_rounding_mode = float_round_nearest_even; | |
2853 | status->floatx80_rounding_precision = 80; | |
2854 | ||
2855 | compact = floatx80_make_compact(aExp, aSig); | |
2856 | ||
2857 | if (compact > 0x400CB167) { | |
2858 | if (compact > 0x400CB2B3) { | |
2859 | status->float_rounding_mode = user_rnd_mode; | |
2860 | status->floatx80_rounding_precision = user_rnd_prec; | |
2861 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2862 | 0x8000, one_sig, 0, status); | |
2863 | } else { | |
2864 | fp0 = packFloatx80(0, aExp, aSig); | |
2865 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2866 | make_float64(0x40C62D38D3D64634), status), | |
2867 | status); | |
2868 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2869 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2870 | status); | |
2871 | fp0 = floatx80_etox(fp0, status); | |
2872 | fp1 = packFloatx80(0, 0x7FFB, one_sig); | |
2873 | ||
2874 | status->float_rounding_mode = user_rnd_mode; | |
2875 | status->floatx80_rounding_precision = user_rnd_prec; | |
2876 | ||
2877 | a = floatx80_mul(fp0, fp1, status); | |
2878 | ||
2879 | float_raise(float_flag_inexact, status); | |
2880 | ||
2881 | return a; | |
2882 | } | |
2883 | } | |
2884 | ||
2885 | fp0 = packFloatx80(0, aExp, aSig); /* |X| */ | |
2886 | fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ | |
2887 | fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), | |
2888 | status), status); /* (1/2)*EXP(|X|) */ | |
2889 | fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ | |
2890 | fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ | |
2891 | ||
2892 | status->float_rounding_mode = user_rnd_mode; | |
2893 | status->floatx80_rounding_precision = user_rnd_prec; | |
2894 | ||
2895 | a = floatx80_add(fp0, fp1, status); | |
2896 | ||
2897 | float_raise(float_flag_inexact, status); | |
2898 | ||
2899 | return a; | |
2900 | } |