]>
Commit | Line | Data |
---|---|---|
591596b7 LV |
1 | /* |
2 | * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, | |
3 | * derived from NetBSD M68040 FPSP functions, | |
4 | * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic | |
5 | * Package. Those parts of the code (and some later contributions) are | |
6 | * provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file will be taken to be licensed under | |
14 | * the Softfloat-2a license unless specifically indicated otherwise. | |
15 | */ | |
16 | ||
17 | /* Portions of this work are licensed under the terms of the GNU GPL, | |
18 | * version 2 or later. See the COPYING file in the top-level directory. | |
19 | */ | |
20 | ||
21 | #include "qemu/osdep.h" | |
22 | #include "softfloat.h" | |
23 | #include "fpu/softfloat-macros.h" | |
4b5c65b8 | 24 | #include "softfloat_fpsp_tables.h" |
591596b7 | 25 | |
c84813b8 | 26 | #define pi_exp 0x4000 |
8c992abc LV |
27 | #define piby2_exp 0x3FFF |
28 | #define pi_sig LIT64(0xc90fdaa22168c235) | |
29 | ||
0d379c17 LV |
30 | static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) |
31 | { | |
32 | if (floatx80_is_signaling_nan(a, status)) { | |
33 | float_raise(float_flag_invalid, status); | |
1c0c951f | 34 | a = floatx80_silence_nan(a, status); |
0d379c17 LV |
35 | } |
36 | ||
37 | if (status->default_nan_mode) { | |
38 | return floatx80_default_nan(status); | |
39 | } | |
40 | ||
1c0c951f | 41 | return a; |
0d379c17 LV |
42 | } |
43 | ||
591596b7 LV |
44 | /*---------------------------------------------------------------------------- |
45 | | Returns the modulo remainder of the extended double-precision floating-point | |
46 | | value `a' with respect to the corresponding value `b'. | |
47 | *----------------------------------------------------------------------------*/ | |
48 | ||
49 | floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) | |
50 | { | |
51 | flag aSign, zSign; | |
52 | int32_t aExp, bExp, expDiff; | |
53 | uint64_t aSig0, aSig1, bSig; | |
54 | uint64_t qTemp, term0, term1; | |
55 | ||
56 | aSig0 = extractFloatx80Frac(a); | |
57 | aExp = extractFloatx80Exp(a); | |
58 | aSign = extractFloatx80Sign(a); | |
59 | bSig = extractFloatx80Frac(b); | |
60 | bExp = extractFloatx80Exp(b); | |
61 | ||
62 | if (aExp == 0x7FFF) { | |
63 | if ((uint64_t) (aSig0 << 1) | |
64 | || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { | |
65 | return propagateFloatx80NaN(a, b, status); | |
66 | } | |
67 | goto invalid; | |
68 | } | |
69 | if (bExp == 0x7FFF) { | |
70 | if ((uint64_t) (bSig << 1)) { | |
71 | return propagateFloatx80NaN(a, b, status); | |
72 | } | |
73 | return a; | |
74 | } | |
75 | if (bExp == 0) { | |
76 | if (bSig == 0) { | |
77 | invalid: | |
78 | float_raise(float_flag_invalid, status); | |
79 | return floatx80_default_nan(status); | |
80 | } | |
81 | normalizeFloatx80Subnormal(bSig, &bExp, &bSig); | |
82 | } | |
83 | if (aExp == 0) { | |
84 | if ((uint64_t) (aSig0 << 1) == 0) { | |
85 | return a; | |
86 | } | |
87 | normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); | |
88 | } | |
89 | bSig |= LIT64(0x8000000000000000); | |
90 | zSign = aSign; | |
91 | expDiff = aExp - bExp; | |
92 | aSig1 = 0; | |
93 | if (expDiff < 0) { | |
94 | return a; | |
95 | } | |
96 | qTemp = (bSig <= aSig0); | |
97 | if (qTemp) { | |
98 | aSig0 -= bSig; | |
99 | } | |
100 | expDiff -= 64; | |
101 | while (0 < expDiff) { | |
102 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); | |
103 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; | |
104 | mul64To128(bSig, qTemp, &term0, &term1); | |
105 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
106 | shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); | |
5a73e7f3 | 107 | expDiff -= 62; |
591596b7 LV |
108 | } |
109 | expDiff += 64; | |
110 | if (0 < expDiff) { | |
111 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); | |
112 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; | |
113 | qTemp >>= 64 - expDiff; | |
114 | mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); | |
115 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
116 | shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); | |
117 | while (le128(term0, term1, aSig0, aSig1)) { | |
118 | ++qTemp; | |
119 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); | |
120 | } | |
121 | } | |
122 | return | |
123 | normalizeRoundAndPackFloatx80( | |
124 | 80, zSign, bExp + expDiff, aSig0, aSig1, status); | |
125 | } | |
0d379c17 LV |
126 | |
127 | /*---------------------------------------------------------------------------- | |
128 | | Returns the mantissa of the extended double-precision floating-point | |
129 | | value `a'. | |
130 | *----------------------------------------------------------------------------*/ | |
131 | ||
132 | floatx80 floatx80_getman(floatx80 a, float_status *status) | |
133 | { | |
134 | flag aSign; | |
135 | int32_t aExp; | |
136 | uint64_t aSig; | |
137 | ||
138 | aSig = extractFloatx80Frac(a); | |
139 | aExp = extractFloatx80Exp(a); | |
140 | aSign = extractFloatx80Sign(a); | |
141 | ||
142 | if (aExp == 0x7FFF) { | |
143 | if ((uint64_t) (aSig << 1)) { | |
144 | return propagateFloatx80NaNOneArg(a , status); | |
145 | } | |
146 | float_raise(float_flag_invalid , status); | |
147 | return floatx80_default_nan(status); | |
148 | } | |
149 | ||
150 | if (aExp == 0) { | |
151 | if (aSig == 0) { | |
152 | return packFloatx80(aSign, 0, 0); | |
153 | } | |
154 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
155 | } | |
156 | ||
157 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
158 | 0x3FFF, aSig, 0, status); | |
159 | } | |
160 | ||
161 | /*---------------------------------------------------------------------------- | |
162 | | Returns the exponent of the extended double-precision floating-point | |
163 | | value `a' as an extended double-precision value. | |
164 | *----------------------------------------------------------------------------*/ | |
165 | ||
166 | floatx80 floatx80_getexp(floatx80 a, float_status *status) | |
167 | { | |
168 | flag aSign; | |
169 | int32_t aExp; | |
170 | uint64_t aSig; | |
171 | ||
172 | aSig = extractFloatx80Frac(a); | |
173 | aExp = extractFloatx80Exp(a); | |
174 | aSign = extractFloatx80Sign(a); | |
175 | ||
176 | if (aExp == 0x7FFF) { | |
177 | if ((uint64_t) (aSig << 1)) { | |
178 | return propagateFloatx80NaNOneArg(a , status); | |
179 | } | |
180 | float_raise(float_flag_invalid , status); | |
181 | return floatx80_default_nan(status); | |
182 | } | |
183 | ||
184 | if (aExp == 0) { | |
185 | if (aSig == 0) { | |
186 | return packFloatx80(aSign, 0, 0); | |
187 | } | |
188 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
189 | } | |
190 | ||
191 | return int32_to_floatx80(aExp - 0x3FFF, status); | |
192 | } | |
193 | ||
194 | /*---------------------------------------------------------------------------- | |
195 | | Scales extended double-precision floating-point value in operand `a' by | |
196 | | value `b'. The function truncates the value in the second operand 'b' to | |
197 | | an integral value and adds that value to the exponent of the operand 'a'. | |
198 | | The operation performed according to the IEC/IEEE Standard for Binary | |
199 | | Floating-Point Arithmetic. | |
200 | *----------------------------------------------------------------------------*/ | |
201 | ||
202 | floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) | |
203 | { | |
204 | flag aSign, bSign; | |
205 | int32_t aExp, bExp, shiftCount; | |
206 | uint64_t aSig, bSig; | |
207 | ||
208 | aSig = extractFloatx80Frac(a); | |
209 | aExp = extractFloatx80Exp(a); | |
210 | aSign = extractFloatx80Sign(a); | |
211 | bSig = extractFloatx80Frac(b); | |
212 | bExp = extractFloatx80Exp(b); | |
213 | bSign = extractFloatx80Sign(b); | |
214 | ||
215 | if (bExp == 0x7FFF) { | |
216 | if ((uint64_t) (bSig << 1) || | |
217 | ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { | |
218 | return propagateFloatx80NaN(a, b, status); | |
219 | } | |
220 | float_raise(float_flag_invalid , status); | |
221 | return floatx80_default_nan(status); | |
222 | } | |
223 | if (aExp == 0x7FFF) { | |
224 | if ((uint64_t) (aSig << 1)) { | |
225 | return propagateFloatx80NaN(a, b, status); | |
226 | } | |
227 | return packFloatx80(aSign, floatx80_infinity.high, | |
228 | floatx80_infinity.low); | |
229 | } | |
230 | if (aExp == 0) { | |
231 | if (aSig == 0) { | |
232 | return packFloatx80(aSign, 0, 0); | |
233 | } | |
234 | if (bExp < 0x3FFF) { | |
235 | return a; | |
236 | } | |
237 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
238 | } | |
239 | ||
240 | if (bExp < 0x3FFF) { | |
241 | return a; | |
242 | } | |
243 | ||
244 | if (0x400F < bExp) { | |
245 | aExp = bSign ? -0x6001 : 0xE000; | |
246 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
247 | aSign, aExp, aSig, 0, status); | |
248 | } | |
249 | ||
250 | shiftCount = 0x403E - bExp; | |
251 | bSig >>= shiftCount; | |
252 | aExp = bSign ? (aExp - bSig) : (aExp + bSig); | |
253 | ||
254 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
255 | aSign, aExp, aSig, 0, status); | |
256 | } | |
9a069775 LV |
257 | |
258 | floatx80 floatx80_move(floatx80 a, float_status *status) | |
259 | { | |
260 | flag aSign; | |
261 | int32_t aExp; | |
262 | uint64_t aSig; | |
263 | ||
264 | aSig = extractFloatx80Frac(a); | |
265 | aExp = extractFloatx80Exp(a); | |
266 | aSign = extractFloatx80Sign(a); | |
267 | ||
268 | if (aExp == 0x7FFF) { | |
269 | if ((uint64_t)(aSig << 1)) { | |
270 | return propagateFloatx80NaNOneArg(a, status); | |
271 | } | |
272 | return a; | |
273 | } | |
274 | if (aExp == 0) { | |
275 | if (aSig == 0) { | |
276 | return a; | |
277 | } | |
278 | normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, | |
279 | aSign, aExp, aSig, 0, status); | |
280 | } | |
281 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, | |
282 | aExp, aSig, 0, status); | |
283 | } | |
4b5c65b8 LV |
284 | |
285 | /*---------------------------------------------------------------------------- | |
286 | | Algorithms for transcendental functions supported by MC68881 and MC68882 | |
287 | | mathematical coprocessors. The functions are derived from FPSP library. | |
288 | *----------------------------------------------------------------------------*/ | |
289 | ||
290 | #define one_exp 0x3FFF | |
291 | #define one_sig LIT64(0x8000000000000000) | |
292 | ||
293 | /*---------------------------------------------------------------------------- | |
294 | | Function for compactifying extended double-precision floating point values. | |
295 | *----------------------------------------------------------------------------*/ | |
296 | ||
297 | static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) | |
298 | { | |
299 | return (aExp << 16) | (aSig >> 48); | |
300 | } | |
301 | ||
302 | /*---------------------------------------------------------------------------- | |
303 | | Log base e of x plus 1 | |
304 | *----------------------------------------------------------------------------*/ | |
305 | ||
306 | floatx80 floatx80_lognp1(floatx80 a, float_status *status) | |
307 | { | |
308 | flag aSign; | |
309 | int32_t aExp; | |
310 | uint64_t aSig, fSig; | |
311 | ||
312 | int8_t user_rnd_mode, user_rnd_prec; | |
313 | ||
314 | int32_t compact, j, k; | |
315 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
316 | ||
317 | aSig = extractFloatx80Frac(a); | |
318 | aExp = extractFloatx80Exp(a); | |
319 | aSign = extractFloatx80Sign(a); | |
320 | ||
321 | if (aExp == 0x7FFF) { | |
322 | if ((uint64_t) (aSig << 1)) { | |
323 | propagateFloatx80NaNOneArg(a, status); | |
324 | } | |
325 | if (aSign) { | |
326 | float_raise(float_flag_invalid, status); | |
327 | return floatx80_default_nan(status); | |
328 | } | |
329 | return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); | |
330 | } | |
331 | ||
332 | if (aExp == 0 && aSig == 0) { | |
333 | return packFloatx80(aSign, 0, 0); | |
334 | } | |
335 | ||
336 | if (aSign && aExp >= one_exp) { | |
337 | if (aExp == one_exp && aSig == one_sig) { | |
338 | float_raise(float_flag_divbyzero, status); | |
981348af LV |
339 | return packFloatx80(aSign, floatx80_infinity.high, |
340 | floatx80_infinity.low); | |
4b5c65b8 LV |
341 | } |
342 | float_raise(float_flag_invalid, status); | |
343 | return floatx80_default_nan(status); | |
344 | } | |
345 | ||
346 | if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { | |
347 | /* <= min threshold */ | |
348 | float_raise(float_flag_inexact, status); | |
349 | return floatx80_move(a, status); | |
350 | } | |
351 | ||
352 | user_rnd_mode = status->float_rounding_mode; | |
353 | user_rnd_prec = status->floatx80_rounding_precision; | |
354 | status->float_rounding_mode = float_round_nearest_even; | |
355 | status->floatx80_rounding_precision = 80; | |
356 | ||
357 | compact = floatx80_make_compact(aExp, aSig); | |
358 | ||
359 | fp0 = a; /* Z */ | |
360 | fp1 = a; | |
361 | ||
362 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
363 | status), status); /* X = (1+Z) */ | |
364 | ||
365 | aExp = extractFloatx80Exp(fp0); | |
366 | aSig = extractFloatx80Frac(fp0); | |
367 | ||
368 | compact = floatx80_make_compact(aExp, aSig); | |
369 | ||
370 | if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { | |
371 | /* |X| < 1/2 or |X| > 3/2 */ | |
372 | k = aExp - 0x3FFF; | |
373 | fp1 = int32_to_floatx80(k, status); | |
374 | ||
375 | fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); | |
376 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ | |
377 | ||
378 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
379 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
380 | ||
381 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
382 | ||
383 | lp1cont1: | |
384 | /* LP1CONT1 */ | |
385 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
386 | logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); | |
387 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ | |
388 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
389 | ||
390 | fp3 = fp2; | |
391 | fp1 = fp2; | |
392 | ||
393 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
394 | make_float64(0x3FC2499AB5E4040B), status), | |
395 | status); /* V*A6 */ | |
396 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
397 | make_float64(0xBFC555B5848CB7DB), status), | |
398 | status); /* V*A5 */ | |
399 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
400 | make_float64(0x3FC99999987D8730), status), | |
401 | status); /* A4+V*A6 */ | |
402 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
403 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
404 | status); /* A3+V*A5 */ | |
405 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
406 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
407 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
408 | make_float64(0x3FD55555555555A4), status), | |
409 | status); /* A2+V*(A4+V*A6) */ | |
410 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
411 | make_float64(0xBFE0000000000008), status), | |
412 | status); /* A1+V*(A3+V*A5) */ | |
413 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
414 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
415 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
416 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
417 | ||
418 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
419 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
420 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
421 | ||
422 | status->float_rounding_mode = user_rnd_mode; | |
423 | status->floatx80_rounding_precision = user_rnd_prec; | |
424 | ||
425 | a = floatx80_add(fp0, klog2, status); | |
426 | ||
427 | float_raise(float_flag_inexact, status); | |
428 | ||
429 | return a; | |
430 | } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
431 | /* |X| < 1/16 or |X| > -1/16 */ | |
432 | /* LP1CARE */ | |
433 | fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); | |
434 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
435 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ | |
436 | ||
437 | if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ | |
438 | /* KISZERO */ | |
439 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), | |
440 | status), f, status); /* 1-F */ | |
441 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ | |
442 | fp1 = packFloatx80(0, 0, 0); /* K = 0 */ | |
443 | } else { | |
444 | /* KISNEG */ | |
445 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), | |
446 | status), f, status); /* 2-F */ | |
447 | fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ | |
448 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ | |
449 | fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ | |
450 | } | |
451 | goto lp1cont1; | |
452 | } else { | |
453 | /* LP1ONE16 */ | |
454 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ | |
455 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
456 | status), status); /* FP0 IS 1+X */ | |
457 | ||
458 | /* LP1CONT2 */ | |
459 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
460 | saveu = fp1; | |
461 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
462 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
463 | ||
464 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
465 | status); /* B5 */ | |
466 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
467 | status); /* B4 */ | |
468 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
469 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
470 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
471 | make_float64(0x3F624924928BCCFF), status), | |
472 | status); /* B3+W*B5 */ | |
473 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
474 | make_float64(0x3F899999999995EC), status), | |
475 | status); /* B2+W*B4 */ | |
476 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
477 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
478 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
479 | make_float64(0x3FB5555555555555), status), | |
480 | status); /* B1+W*(B3+W*B5) */ | |
481 | ||
482 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
483 | fp1 = floatx80_add(fp1, fp2, | |
484 | status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
485 | fp0 = floatx80_mul(fp0, fp1, | |
486 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
487 | ||
488 | status->float_rounding_mode = user_rnd_mode; | |
489 | status->floatx80_rounding_precision = user_rnd_prec; | |
490 | ||
491 | a = floatx80_add(fp0, saveu, status); | |
492 | ||
493 | /*if (!floatx80_is_zero(a)) { */ | |
494 | float_raise(float_flag_inexact, status); | |
495 | /*} */ | |
496 | ||
497 | return a; | |
498 | } | |
499 | } | |
50067bd1 LV |
500 | |
501 | /*---------------------------------------------------------------------------- | |
502 | | Log base e | |
503 | *----------------------------------------------------------------------------*/ | |
504 | ||
505 | floatx80 floatx80_logn(floatx80 a, float_status *status) | |
506 | { | |
507 | flag aSign; | |
508 | int32_t aExp; | |
509 | uint64_t aSig, fSig; | |
510 | ||
511 | int8_t user_rnd_mode, user_rnd_prec; | |
512 | ||
513 | int32_t compact, j, k, adjk; | |
514 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; | |
515 | ||
516 | aSig = extractFloatx80Frac(a); | |
517 | aExp = extractFloatx80Exp(a); | |
518 | aSign = extractFloatx80Sign(a); | |
519 | ||
520 | if (aExp == 0x7FFF) { | |
521 | if ((uint64_t) (aSig << 1)) { | |
522 | propagateFloatx80NaNOneArg(a, status); | |
523 | } | |
524 | if (aSign == 0) { | |
525 | return packFloatx80(0, floatx80_infinity.high, | |
526 | floatx80_infinity.low); | |
527 | } | |
528 | } | |
529 | ||
530 | adjk = 0; | |
531 | ||
532 | if (aExp == 0) { | |
533 | if (aSig == 0) { /* zero */ | |
534 | float_raise(float_flag_divbyzero, status); | |
535 | return packFloatx80(1, floatx80_infinity.high, | |
536 | floatx80_infinity.low); | |
537 | } | |
538 | if ((aSig & one_sig) == 0) { /* denormal */ | |
539 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
540 | adjk = -100; | |
541 | aExp += 100; | |
542 | a = packFloatx80(aSign, aExp, aSig); | |
543 | } | |
544 | } | |
545 | ||
546 | if (aSign) { | |
547 | float_raise(float_flag_invalid, status); | |
548 | return floatx80_default_nan(status); | |
549 | } | |
550 | ||
551 | user_rnd_mode = status->float_rounding_mode; | |
552 | user_rnd_prec = status->floatx80_rounding_precision; | |
553 | status->float_rounding_mode = float_round_nearest_even; | |
554 | status->floatx80_rounding_precision = 80; | |
555 | ||
556 | compact = floatx80_make_compact(aExp, aSig); | |
557 | ||
558 | if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { | |
559 | /* |X| < 15/16 or |X| > 17/16 */ | |
560 | k = aExp - 0x3FFF; | |
561 | k += adjk; | |
562 | fp1 = int32_to_floatx80(k, status); | |
563 | ||
564 | fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); | |
565 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ | |
566 | ||
567 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ | |
568 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ | |
569 | ||
570 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ | |
571 | ||
572 | /* LP1CONT1 */ | |
573 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ | |
574 | logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); | |
575 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ | |
576 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ | |
577 | ||
578 | fp3 = fp2; | |
579 | fp1 = fp2; | |
580 | ||
581 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
582 | make_float64(0x3FC2499AB5E4040B), status), | |
583 | status); /* V*A6 */ | |
584 | fp2 = floatx80_mul(fp2, float64_to_floatx80( | |
585 | make_float64(0xBFC555B5848CB7DB), status), | |
586 | status); /* V*A5 */ | |
587 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
588 | make_float64(0x3FC99999987D8730), status), | |
589 | status); /* A4+V*A6 */ | |
590 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
591 | make_float64(0xBFCFFFFFFF6F7E97), status), | |
592 | status); /* A3+V*A5 */ | |
593 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ | |
594 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ | |
595 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
596 | make_float64(0x3FD55555555555A4), status), | |
597 | status); /* A2+V*(A4+V*A6) */ | |
598 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
599 | make_float64(0xBFE0000000000008), status), | |
600 | status); /* A1+V*(A3+V*A5) */ | |
601 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ | |
602 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ | |
603 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ | |
604 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ | |
605 | ||
606 | fp1 = floatx80_add(fp1, log_tbl[j + 1], | |
607 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ | |
608 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ | |
609 | ||
610 | status->float_rounding_mode = user_rnd_mode; | |
611 | status->floatx80_rounding_precision = user_rnd_prec; | |
612 | ||
613 | a = floatx80_add(fp0, klog2, status); | |
614 | ||
615 | float_raise(float_flag_inexact, status); | |
616 | ||
617 | return a; | |
618 | } else { /* |X-1| >= 1/16 */ | |
619 | fp0 = a; | |
620 | fp1 = a; | |
621 | fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), | |
622 | status), status); /* FP1 IS X-1 */ | |
623 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
624 | status), status); /* FP0 IS X+1 */ | |
625 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ | |
626 | ||
627 | /* LP1CONT2 */ | |
628 | fp1 = floatx80_div(fp1, fp0, status); /* U */ | |
629 | saveu = fp1; | |
630 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ | |
631 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ | |
632 | ||
633 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), | |
634 | status); /* B5 */ | |
635 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), | |
636 | status); /* B4 */ | |
637 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ | |
638 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ | |
639 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
640 | make_float64(0x3F624924928BCCFF), status), | |
641 | status); /* B3+W*B5 */ | |
642 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
643 | make_float64(0x3F899999999995EC), status), | |
644 | status); /* B2+W*B4 */ | |
645 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ | |
646 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ | |
647 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
648 | make_float64(0x3FB5555555555555), status), | |
649 | status); /* B1+W*(B3+W*B5) */ | |
650 | ||
651 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ | |
652 | fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ | |
653 | fp0 = floatx80_mul(fp0, fp1, | |
654 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ | |
655 | ||
656 | status->float_rounding_mode = user_rnd_mode; | |
657 | status->floatx80_rounding_precision = user_rnd_prec; | |
658 | ||
659 | a = floatx80_add(fp0, saveu, status); | |
660 | ||
661 | /*if (!floatx80_is_zero(a)) { */ | |
662 | float_raise(float_flag_inexact, status); | |
663 | /*} */ | |
664 | ||
665 | return a; | |
666 | } | |
667 | } | |
248efb66 LV |
668 | |
669 | /*---------------------------------------------------------------------------- | |
670 | | Log base 10 | |
671 | *----------------------------------------------------------------------------*/ | |
672 | ||
673 | floatx80 floatx80_log10(floatx80 a, float_status *status) | |
674 | { | |
675 | flag aSign; | |
676 | int32_t aExp; | |
677 | uint64_t aSig; | |
678 | ||
679 | int8_t user_rnd_mode, user_rnd_prec; | |
680 | ||
681 | floatx80 fp0, fp1; | |
682 | ||
683 | aSig = extractFloatx80Frac(a); | |
684 | aExp = extractFloatx80Exp(a); | |
685 | aSign = extractFloatx80Sign(a); | |
686 | ||
687 | if (aExp == 0x7FFF) { | |
688 | if ((uint64_t) (aSig << 1)) { | |
689 | propagateFloatx80NaNOneArg(a, status); | |
690 | } | |
691 | if (aSign == 0) { | |
692 | return packFloatx80(0, floatx80_infinity.high, | |
693 | floatx80_infinity.low); | |
694 | } | |
695 | } | |
696 | ||
697 | if (aExp == 0 && aSig == 0) { | |
698 | float_raise(float_flag_divbyzero, status); | |
699 | return packFloatx80(1, floatx80_infinity.high, | |
700 | floatx80_infinity.low); | |
701 | } | |
702 | ||
703 | if (aSign) { | |
704 | float_raise(float_flag_invalid, status); | |
705 | return floatx80_default_nan(status); | |
706 | } | |
707 | ||
708 | user_rnd_mode = status->float_rounding_mode; | |
709 | user_rnd_prec = status->floatx80_rounding_precision; | |
710 | status->float_rounding_mode = float_round_nearest_even; | |
711 | status->floatx80_rounding_precision = 80; | |
712 | ||
713 | fp0 = floatx80_logn(a, status); | |
714 | fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */ | |
715 | ||
716 | status->float_rounding_mode = user_rnd_mode; | |
717 | status->floatx80_rounding_precision = user_rnd_prec; | |
718 | ||
719 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ | |
720 | ||
721 | float_raise(float_flag_inexact, status); | |
722 | ||
723 | return a; | |
724 | } | |
67b453ed LV |
725 | |
726 | /*---------------------------------------------------------------------------- | |
727 | | Log base 2 | |
728 | *----------------------------------------------------------------------------*/ | |
729 | ||
730 | floatx80 floatx80_log2(floatx80 a, float_status *status) | |
731 | { | |
732 | flag aSign; | |
733 | int32_t aExp; | |
734 | uint64_t aSig; | |
735 | ||
736 | int8_t user_rnd_mode, user_rnd_prec; | |
737 | ||
738 | floatx80 fp0, fp1; | |
739 | ||
740 | aSig = extractFloatx80Frac(a); | |
741 | aExp = extractFloatx80Exp(a); | |
742 | aSign = extractFloatx80Sign(a); | |
743 | ||
744 | if (aExp == 0x7FFF) { | |
745 | if ((uint64_t) (aSig << 1)) { | |
746 | propagateFloatx80NaNOneArg(a, status); | |
747 | } | |
748 | if (aSign == 0) { | |
749 | return packFloatx80(0, floatx80_infinity.high, | |
750 | floatx80_infinity.low); | |
751 | } | |
752 | } | |
753 | ||
754 | if (aExp == 0) { | |
755 | if (aSig == 0) { | |
756 | float_raise(float_flag_divbyzero, status); | |
757 | return packFloatx80(1, floatx80_infinity.high, | |
758 | floatx80_infinity.low); | |
759 | } | |
760 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); | |
761 | } | |
762 | ||
763 | if (aSign) { | |
764 | float_raise(float_flag_invalid, status); | |
765 | return floatx80_default_nan(status); | |
766 | } | |
767 | ||
768 | user_rnd_mode = status->float_rounding_mode; | |
769 | user_rnd_prec = status->floatx80_rounding_precision; | |
770 | status->float_rounding_mode = float_round_nearest_even; | |
771 | status->floatx80_rounding_precision = 80; | |
772 | ||
773 | if (aSig == one_sig) { /* X is 2^k */ | |
774 | status->float_rounding_mode = user_rnd_mode; | |
775 | status->floatx80_rounding_precision = user_rnd_prec; | |
776 | ||
777 | a = int32_to_floatx80(aExp - 0x3FFF, status); | |
778 | } else { | |
779 | fp0 = floatx80_logn(a, status); | |
780 | fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */ | |
781 | ||
782 | status->float_rounding_mode = user_rnd_mode; | |
783 | status->floatx80_rounding_precision = user_rnd_prec; | |
784 | ||
785 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ | |
786 | } | |
787 | ||
788 | float_raise(float_flag_inexact, status); | |
789 | ||
790 | return a; | |
791 | } | |
40ad0873 LV |
792 | |
793 | /*---------------------------------------------------------------------------- | |
794 | | e to x | |
795 | *----------------------------------------------------------------------------*/ | |
796 | ||
797 | floatx80 floatx80_etox(floatx80 a, float_status *status) | |
798 | { | |
799 | flag aSign; | |
800 | int32_t aExp; | |
801 | uint64_t aSig; | |
802 | ||
803 | int8_t user_rnd_mode, user_rnd_prec; | |
804 | ||
805 | int32_t compact, n, j, k, m, m1; | |
806 | floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; | |
807 | flag adjflag; | |
808 | ||
809 | aSig = extractFloatx80Frac(a); | |
810 | aExp = extractFloatx80Exp(a); | |
811 | aSign = extractFloatx80Sign(a); | |
812 | ||
813 | if (aExp == 0x7FFF) { | |
814 | if ((uint64_t) (aSig << 1)) { | |
815 | return propagateFloatx80NaNOneArg(a, status); | |
816 | } | |
817 | if (aSign) { | |
818 | return packFloatx80(0, 0, 0); | |
819 | } | |
820 | return packFloatx80(0, floatx80_infinity.high, | |
821 | floatx80_infinity.low); | |
822 | } | |
823 | ||
824 | if (aExp == 0 && aSig == 0) { | |
825 | return packFloatx80(0, one_exp, one_sig); | |
826 | } | |
827 | ||
828 | user_rnd_mode = status->float_rounding_mode; | |
829 | user_rnd_prec = status->floatx80_rounding_precision; | |
830 | status->float_rounding_mode = float_round_nearest_even; | |
831 | status->floatx80_rounding_precision = 80; | |
832 | ||
833 | adjflag = 0; | |
834 | ||
835 | if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ | |
836 | compact = floatx80_make_compact(aExp, aSig); | |
837 | ||
838 | if (compact < 0x400CB167) { /* |X| < 16380 log2 */ | |
839 | fp0 = a; | |
840 | fp1 = a; | |
841 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
842 | make_float32(0x42B8AA3B), status), | |
843 | status); /* 64/log2 * X */ | |
844 | adjflag = 0; | |
845 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
846 | fp0 = int32_to_floatx80(n, status); | |
847 | ||
848 | j = n & 0x3F; /* J = N mod 64 */ | |
849 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
850 | if (n < 0 && j) { | |
851 | /* arithmetic right shift is division and | |
852 | * round towards minus infinity | |
853 | */ | |
854 | m--; | |
855 | } | |
856 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
857 | ||
858 | expcont1: | |
859 | fp2 = fp0; /* N */ | |
860 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
861 | make_float32(0xBC317218), status), | |
862 | status); /* N * L1, L1 = lead(-log2/64) */ | |
863 | l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); | |
864 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ | |
865 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
866 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
867 | ||
868 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
869 | fp2 = float32_to_floatx80(make_float32(0x3AB60B70), | |
870 | status); /* A5 */ | |
871 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ | |
872 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), | |
873 | status), fp1, | |
874 | status); /* fp3 is S*A4 */ | |
875 | fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( | |
876 | 0x3FA5555555554431), status), | |
877 | status); /* fp2 is A3+S*A5 */ | |
878 | fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( | |
879 | 0x3FC5555555554018), status), | |
880 | status); /* fp3 is A2+S*A4 */ | |
881 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ | |
882 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ | |
883 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
884 | make_float32(0x3F000000), status), | |
885 | status); /* fp2 is A1+S*(A3+S*A5) */ | |
886 | fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ | |
887 | fp2 = floatx80_mul(fp2, fp1, | |
888 | status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ | |
889 | fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ | |
890 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
891 | ||
892 | fp1 = exp_tbl[j]; | |
893 | fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ | |
894 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), | |
895 | status); /* accurate 2^(J/64) */ | |
896 | fp0 = floatx80_add(fp0, fp1, | |
897 | status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ | |
898 | ||
899 | scale = packFloatx80(0, m, one_sig); | |
900 | if (adjflag) { | |
901 | adjscale = packFloatx80(0, m1, one_sig); | |
902 | fp0 = floatx80_mul(fp0, adjscale, status); | |
903 | } | |
904 | ||
905 | status->float_rounding_mode = user_rnd_mode; | |
906 | status->floatx80_rounding_precision = user_rnd_prec; | |
907 | ||
908 | a = floatx80_mul(fp0, scale, status); | |
909 | ||
910 | float_raise(float_flag_inexact, status); | |
911 | ||
912 | return a; | |
913 | } else { /* |X| >= 16380 log2 */ | |
914 | if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ | |
915 | status->float_rounding_mode = user_rnd_mode; | |
916 | status->floatx80_rounding_precision = user_rnd_prec; | |
917 | if (aSign) { | |
918 | a = roundAndPackFloatx80( | |
919 | status->floatx80_rounding_precision, | |
920 | 0, -0x1000, aSig, 0, status); | |
921 | } else { | |
922 | a = roundAndPackFloatx80( | |
923 | status->floatx80_rounding_precision, | |
924 | 0, 0x8000, aSig, 0, status); | |
925 | } | |
926 | float_raise(float_flag_inexact, status); | |
927 | ||
928 | return a; | |
929 | } else { | |
930 | fp0 = a; | |
931 | fp1 = a; | |
932 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
933 | make_float32(0x42B8AA3B), status), | |
934 | status); /* 64/log2 * X */ | |
935 | adjflag = 1; | |
936 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
937 | fp0 = int32_to_floatx80(n, status); | |
938 | ||
939 | j = n & 0x3F; /* J = N mod 64 */ | |
940 | /* NOTE: this is really arithmetic right shift by 6 */ | |
941 | k = n / 64; | |
942 | if (n < 0 && j) { | |
943 | /* arithmetic right shift is division and | |
944 | * round towards minus infinity | |
945 | */ | |
946 | k--; | |
947 | } | |
948 | /* NOTE: this is really arithmetic right shift by 1 */ | |
949 | m1 = k / 2; | |
950 | if (k < 0 && (k & 1)) { | |
951 | /* arithmetic right shift is division and | |
952 | * round towards minus infinity | |
953 | */ | |
954 | m1--; | |
955 | } | |
956 | m = k - m1; | |
957 | m1 += 0x3FFF; /* biased exponent of 2^(M1) */ | |
958 | m += 0x3FFF; /* biased exponent of 2^(M) */ | |
959 | ||
960 | goto expcont1; | |
961 | } | |
962 | } | |
963 | } else { /* |X| < 2^(-65) */ | |
964 | status->float_rounding_mode = user_rnd_mode; | |
965 | status->floatx80_rounding_precision = user_rnd_prec; | |
966 | ||
967 | a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), | |
968 | status), status); /* 1 + X */ | |
969 | ||
970 | float_raise(float_flag_inexact, status); | |
971 | ||
972 | return a; | |
973 | } | |
974 | } | |
068f1615 LV |
975 | |
976 | /*---------------------------------------------------------------------------- | |
977 | | 2 to x | |
978 | *----------------------------------------------------------------------------*/ | |
979 | ||
980 | floatx80 floatx80_twotox(floatx80 a, float_status *status) | |
981 | { | |
982 | flag aSign; | |
983 | int32_t aExp; | |
984 | uint64_t aSig; | |
985 | ||
986 | int8_t user_rnd_mode, user_rnd_prec; | |
987 | ||
988 | int32_t compact, n, j, l, m, m1; | |
989 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
990 | ||
991 | aSig = extractFloatx80Frac(a); | |
992 | aExp = extractFloatx80Exp(a); | |
993 | aSign = extractFloatx80Sign(a); | |
994 | ||
995 | if (aExp == 0x7FFF) { | |
996 | if ((uint64_t) (aSig << 1)) { | |
997 | return propagateFloatx80NaNOneArg(a, status); | |
998 | } | |
999 | if (aSign) { | |
1000 | return packFloatx80(0, 0, 0); | |
1001 | } | |
1002 | return packFloatx80(0, floatx80_infinity.high, | |
1003 | floatx80_infinity.low); | |
1004 | } | |
1005 | ||
1006 | if (aExp == 0 && aSig == 0) { | |
1007 | return packFloatx80(0, one_exp, one_sig); | |
1008 | } | |
1009 | ||
1010 | user_rnd_mode = status->float_rounding_mode; | |
1011 | user_rnd_prec = status->floatx80_rounding_precision; | |
1012 | status->float_rounding_mode = float_round_nearest_even; | |
1013 | status->floatx80_rounding_precision = 80; | |
1014 | ||
1015 | fp0 = a; | |
1016 | ||
1017 | compact = floatx80_make_compact(aExp, aSig); | |
1018 | ||
1019 | if (compact < 0x3FB98000 || compact > 0x400D80C0) { | |
1020 | /* |X| > 16480 or |X| < 2^(-70) */ | |
1021 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
1022 | status->float_rounding_mode = user_rnd_mode; | |
1023 | status->floatx80_rounding_precision = user_rnd_prec; | |
1024 | ||
1025 | if (aSign) { | |
1026 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1027 | 0, -0x1000, aSig, 0, status); | |
1028 | } else { | |
1029 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1030 | 0, 0x8000, aSig, 0, status); | |
1031 | } | |
1032 | } else { /* |X| < 2^(-70) */ | |
1033 | status->float_rounding_mode = user_rnd_mode; | |
1034 | status->floatx80_rounding_precision = user_rnd_prec; | |
1035 | ||
1036 | a = floatx80_add(fp0, float32_to_floatx80( | |
1037 | make_float32(0x3F800000), status), | |
1038 | status); /* 1 + X */ | |
1039 | ||
1040 | float_raise(float_flag_inexact, status); | |
1041 | ||
1042 | return a; | |
1043 | } | |
1044 | } else { /* 2^(-70) <= |X| <= 16480 */ | |
1045 | fp1 = fp0; /* X */ | |
1046 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
1047 | make_float32(0x42800000), status), | |
1048 | status); /* X * 64 */ | |
1049 | n = floatx80_to_int32(fp1, status); | |
1050 | fp1 = int32_to_floatx80(n, status); | |
1051 | j = n & 0x3F; | |
1052 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
1053 | if (n < 0 && j) { | |
1054 | /* arithmetic right shift is division and | |
1055 | * round towards minus infinity | |
1056 | */ | |
1057 | l--; | |
1058 | } | |
1059 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
1060 | if (l < 0 && (l & 1)) { | |
1061 | /* arithmetic right shift is division and | |
1062 | * round towards minus infinity | |
1063 | */ | |
1064 | m--; | |
1065 | } | |
1066 | m1 = l - m; | |
1067 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
1068 | ||
1069 | adjfact = packFloatx80(0, m1, one_sig); | |
1070 | fact1 = exp2_tbl[j]; | |
1071 | fact1.high += m; | |
1072 | fact2.high = exp2_tbl2[j] >> 16; | |
1073 | fact2.high += m; | |
1074 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1075 | fact2.low <<= 48; | |
1076 | ||
1077 | fp1 = floatx80_mul(fp1, float32_to_floatx80( | |
1078 | make_float32(0x3C800000), status), | |
1079 | status); /* (1/64)*N */ | |
1080 | fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ | |
1081 | fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */ | |
1082 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ | |
1083 | ||
6c25be6e LV |
1084 | /* EXPR */ |
1085 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1086 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1087 | status); /* A5 */ | |
1088 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1089 | status); /* A4 */ | |
1090 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1091 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1092 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1093 | make_float64(0x3FA5555555554CC1), status), | |
1094 | status); /* A3+S*A5 */ | |
1095 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1096 | make_float64(0x3FC5555555554A54), status), | |
1097 | status); /* A2+S*A4 */ | |
1098 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1099 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1100 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1101 | make_float64(0x3FE0000000000000), status), | |
1102 | status); /* A1+S*(A3+S*A5) */ | |
1103 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1104 | ||
1105 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1106 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1107 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1108 | ||
1109 | fp0 = floatx80_mul(fp0, fact1, status); | |
1110 | fp0 = floatx80_add(fp0, fact2, status); | |
1111 | fp0 = floatx80_add(fp0, fact1, status); | |
1112 | ||
1113 | status->float_rounding_mode = user_rnd_mode; | |
1114 | status->floatx80_rounding_precision = user_rnd_prec; | |
1115 | ||
1116 | a = floatx80_mul(fp0, adjfact, status); | |
1117 | ||
1118 | float_raise(float_flag_inexact, status); | |
1119 | ||
1120 | return a; | |
1121 | } | |
1122 | } | |
1123 | ||
1124 | /*---------------------------------------------------------------------------- | |
1125 | | 10 to x | |
1126 | *----------------------------------------------------------------------------*/ | |
1127 | ||
1128 | floatx80 floatx80_tentox(floatx80 a, float_status *status) | |
1129 | { | |
1130 | flag aSign; | |
1131 | int32_t aExp; | |
1132 | uint64_t aSig; | |
1133 | ||
1134 | int8_t user_rnd_mode, user_rnd_prec; | |
1135 | ||
1136 | int32_t compact, n, j, l, m, m1; | |
1137 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; | |
1138 | ||
1139 | aSig = extractFloatx80Frac(a); | |
1140 | aExp = extractFloatx80Exp(a); | |
1141 | aSign = extractFloatx80Sign(a); | |
1142 | ||
1143 | if (aExp == 0x7FFF) { | |
1144 | if ((uint64_t) (aSig << 1)) { | |
1145 | return propagateFloatx80NaNOneArg(a, status); | |
1146 | } | |
1147 | if (aSign) { | |
1148 | return packFloatx80(0, 0, 0); | |
1149 | } | |
1150 | return packFloatx80(0, floatx80_infinity.high, | |
1151 | floatx80_infinity.low); | |
1152 | } | |
1153 | ||
1154 | if (aExp == 0 && aSig == 0) { | |
1155 | return packFloatx80(0, one_exp, one_sig); | |
1156 | } | |
1157 | ||
1158 | user_rnd_mode = status->float_rounding_mode; | |
1159 | user_rnd_prec = status->floatx80_rounding_precision; | |
1160 | status->float_rounding_mode = float_round_nearest_even; | |
1161 | status->floatx80_rounding_precision = 80; | |
1162 | ||
1163 | fp0 = a; | |
1164 | ||
1165 | compact = floatx80_make_compact(aExp, aSig); | |
1166 | ||
1167 | if (compact < 0x3FB98000 || compact > 0x400B9B07) { | |
1168 | /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ | |
1169 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ | |
1170 | status->float_rounding_mode = user_rnd_mode; | |
1171 | status->floatx80_rounding_precision = user_rnd_prec; | |
1172 | ||
1173 | if (aSign) { | |
1174 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1175 | 0, -0x1000, aSig, 0, status); | |
1176 | } else { | |
1177 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
1178 | 0, 0x8000, aSig, 0, status); | |
1179 | } | |
1180 | } else { /* |X| < 2^(-70) */ | |
1181 | status->float_rounding_mode = user_rnd_mode; | |
1182 | status->floatx80_rounding_precision = user_rnd_prec; | |
1183 | ||
1184 | a = floatx80_add(fp0, float32_to_floatx80( | |
1185 | make_float32(0x3F800000), status), | |
1186 | status); /* 1 + X */ | |
1187 | ||
1188 | float_raise(float_flag_inexact, status); | |
1189 | ||
1190 | return a; | |
1191 | } | |
1192 | } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ | |
1193 | fp1 = fp0; /* X */ | |
1194 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1195 | make_float64(0x406A934F0979A371), | |
1196 | status), status); /* X*64*LOG10/LOG2 */ | |
1197 | n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ | |
1198 | fp1 = int32_to_floatx80(n, status); | |
1199 | ||
1200 | j = n & 0x3F; | |
1201 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
1202 | if (n < 0 && j) { | |
1203 | /* arithmetic right shift is division and | |
1204 | * round towards minus infinity | |
1205 | */ | |
1206 | l--; | |
1207 | } | |
1208 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ | |
1209 | if (l < 0 && (l & 1)) { | |
1210 | /* arithmetic right shift is division and | |
1211 | * round towards minus infinity | |
1212 | */ | |
1213 | m--; | |
1214 | } | |
1215 | m1 = l - m; | |
1216 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ | |
1217 | ||
1218 | adjfact = packFloatx80(0, m1, one_sig); | |
1219 | fact1 = exp2_tbl[j]; | |
1220 | fact1.high += m; | |
1221 | fact2.high = exp2_tbl2[j] >> 16; | |
1222 | fact2.high += m; | |
1223 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); | |
1224 | fact2.low <<= 48; | |
1225 | ||
1226 | fp2 = fp1; /* N */ | |
1227 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
1228 | make_float64(0x3F734413509F8000), status), | |
1229 | status); /* N*(LOG2/64LOG10)_LEAD */ | |
1230 | fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); | |
1231 | fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ | |
1232 | fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ | |
1233 | fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ | |
1234 | fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */ | |
1235 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ | |
1236 | ||
068f1615 LV |
1237 | /* EXPR */ |
1238 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1239 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), | |
1240 | status); /* A5 */ | |
1241 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), | |
1242 | status); /* A4 */ | |
1243 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ | |
1244 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ | |
1245 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1246 | make_float64(0x3FA5555555554CC1), status), | |
1247 | status); /* A3+S*A5 */ | |
1248 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1249 | make_float64(0x3FC5555555554A54), status), | |
1250 | status); /* A2+S*A4 */ | |
1251 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ | |
1252 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ | |
1253 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1254 | make_float64(0x3FE0000000000000), status), | |
1255 | status); /* A1+S*(A3+S*A5) */ | |
1256 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ | |
1257 | ||
1258 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ | |
1259 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ | |
1260 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ | |
1261 | ||
1262 | fp0 = floatx80_mul(fp0, fact1, status); | |
1263 | fp0 = floatx80_add(fp0, fact2, status); | |
1264 | fp0 = floatx80_add(fp0, fact1, status); | |
1265 | ||
1266 | status->float_rounding_mode = user_rnd_mode; | |
1267 | status->floatx80_rounding_precision = user_rnd_prec; | |
1268 | ||
1269 | a = floatx80_mul(fp0, adjfact, status); | |
1270 | ||
1271 | float_raise(float_flag_inexact, status); | |
1272 | ||
1273 | return a; | |
1274 | } | |
1275 | } | |
27340180 LV |
1276 | |
1277 | /*---------------------------------------------------------------------------- | |
1278 | | Tangent | |
1279 | *----------------------------------------------------------------------------*/ | |
1280 | ||
1281 | floatx80 floatx80_tan(floatx80 a, float_status *status) | |
1282 | { | |
1283 | flag aSign, xSign; | |
1284 | int32_t aExp, xExp; | |
1285 | uint64_t aSig, xSig; | |
1286 | ||
1287 | int8_t user_rnd_mode, user_rnd_prec; | |
1288 | ||
1289 | int32_t compact, l, n, j; | |
1290 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; | |
1291 | float32 twoto63; | |
1292 | flag endflag; | |
1293 | ||
1294 | aSig = extractFloatx80Frac(a); | |
1295 | aExp = extractFloatx80Exp(a); | |
1296 | aSign = extractFloatx80Sign(a); | |
1297 | ||
1298 | if (aExp == 0x7FFF) { | |
1299 | if ((uint64_t) (aSig << 1)) { | |
1300 | return propagateFloatx80NaNOneArg(a, status); | |
1301 | } | |
1302 | float_raise(float_flag_invalid, status); | |
1303 | return floatx80_default_nan(status); | |
1304 | } | |
1305 | ||
1306 | if (aExp == 0 && aSig == 0) { | |
1307 | return packFloatx80(aSign, 0, 0); | |
1308 | } | |
1309 | ||
1310 | user_rnd_mode = status->float_rounding_mode; | |
1311 | user_rnd_prec = status->floatx80_rounding_precision; | |
1312 | status->float_rounding_mode = float_round_nearest_even; | |
1313 | status->floatx80_rounding_precision = 80; | |
1314 | ||
1315 | compact = floatx80_make_compact(aExp, aSig); | |
1316 | ||
1317 | fp0 = a; | |
1318 | ||
1319 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1320 | /* 2^(-40) > |X| > 15 PI */ | |
1321 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1322 | /* REDUCEX */ | |
1323 | fp1 = packFloatx80(0, 0, 0); | |
1324 | if (compact == 0x7FFEFFFF) { | |
1325 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
1326 | LIT64(0xC90FDAA200000000)); | |
1327 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, | |
1328 | LIT64(0x85A308D300000000)); | |
1329 | fp0 = floatx80_add(fp0, twopi1, status); | |
1330 | fp1 = fp0; | |
1331 | fp0 = floatx80_add(fp0, twopi2, status); | |
1332 | fp1 = floatx80_sub(fp1, fp0, status); | |
1333 | fp1 = floatx80_add(fp1, twopi2, status); | |
1334 | } | |
1335 | loop: | |
1336 | xSign = extractFloatx80Sign(fp0); | |
1337 | xExp = extractFloatx80Exp(fp0); | |
1338 | xExp -= 0x3FFF; | |
1339 | if (xExp <= 28) { | |
1340 | l = 0; | |
1341 | endflag = 1; | |
1342 | } else { | |
1343 | l = xExp - 27; | |
1344 | endflag = 0; | |
1345 | } | |
1346 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
1347 | LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ | |
1348 | twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); | |
1349 | twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); | |
1350 | ||
1351 | /* SIGN(INARG)*2^63 IN SGL */ | |
1352 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1353 | ||
1354 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1355 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1356 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1357 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1358 | status); /* FP2 is N */ | |
1359 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1360 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1361 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1362 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1363 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1364 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1365 | fp3 = fp0; /* FP3 is A */ | |
1366 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1367 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1368 | ||
1369 | if (endflag > 0) { | |
1370 | n = floatx80_to_int32(fp2, status); | |
1371 | goto tancont; | |
1372 | } | |
1373 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1374 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1375 | goto loop; | |
1376 | } else { | |
1377 | status->float_rounding_mode = user_rnd_mode; | |
1378 | status->floatx80_rounding_precision = user_rnd_prec; | |
1379 | ||
1380 | a = floatx80_move(a, status); | |
1381 | ||
1382 | float_raise(float_flag_inexact, status); | |
1383 | ||
1384 | return a; | |
1385 | } | |
1386 | } else { | |
1387 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1388 | make_float64(0x3FE45F306DC9C883), status), | |
1389 | status); /* X*2/PI */ | |
1390 | ||
1391 | n = floatx80_to_int32(fp1, status); | |
1392 | j = 32 + n; | |
1393 | ||
1394 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1395 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1396 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1397 | ||
1398 | tancont: | |
1399 | if (n & 1) { | |
1400 | /* NODD */ | |
1401 | fp1 = fp0; /* R */ | |
1402 | fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1403 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1404 | status); /* Q4 */ | |
1405 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1406 | status); /* P3 */ | |
1407 | fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ | |
1408 | fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ | |
1409 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1410 | make_float64(0xBF346F59B39BA65F), status), | |
1411 | status); /* Q3+SQ4 */ | |
1412 | fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); | |
1413 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ | |
1414 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ | |
1415 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ | |
1416 | fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); | |
1417 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ | |
1418 | fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); | |
1419 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ | |
1420 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ | |
1421 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ | |
1422 | fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); | |
1423 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ | |
1424 | fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ | |
1425 | fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1426 | fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1427 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
1428 | make_float32(0x3F800000), status), | |
1429 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1430 | ||
1431 | xSign = extractFloatx80Sign(fp1); | |
1432 | xExp = extractFloatx80Exp(fp1); | |
1433 | xSig = extractFloatx80Frac(fp1); | |
1434 | xSign ^= 1; | |
1435 | fp1 = packFloatx80(xSign, xExp, xSig); | |
1436 | ||
1437 | status->float_rounding_mode = user_rnd_mode; | |
1438 | status->floatx80_rounding_precision = user_rnd_prec; | |
1439 | ||
1440 | a = floatx80_div(fp0, fp1, status); | |
1441 | ||
1442 | float_raise(float_flag_inexact, status); | |
1443 | ||
1444 | return a; | |
1445 | } else { | |
1446 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
1447 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), | |
1448 | status); /* Q4 */ | |
1449 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), | |
1450 | status); /* P3 */ | |
1451 | fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ | |
1452 | fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ | |
1453 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1454 | make_float64(0xBF346F59B39BA65F), status), | |
1455 | status); /* Q3+SQ4 */ | |
1456 | fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); | |
1457 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ | |
1458 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ | |
1459 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ | |
1460 | fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); | |
1461 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ | |
1462 | fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); | |
1463 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ | |
1464 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ | |
1465 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ | |
1466 | fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); | |
1467 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ | |
1468 | fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ | |
1469 | fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1470 | fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ | |
1471 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1472 | make_float32(0x3F800000), status), | |
1473 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ | |
1474 | ||
1475 | status->float_rounding_mode = user_rnd_mode; | |
1476 | status->floatx80_rounding_precision = user_rnd_prec; | |
1477 | ||
1478 | a = floatx80_div(fp0, fp1, status); | |
1479 | ||
1480 | float_raise(float_flag_inexact, status); | |
1481 | ||
1482 | return a; | |
1483 | } | |
1484 | } | |
1485 | } | |
5add1ac4 LV |
1486 | |
1487 | /*---------------------------------------------------------------------------- | |
1488 | | Sine | |
1489 | *----------------------------------------------------------------------------*/ | |
1490 | ||
1491 | floatx80 floatx80_sin(floatx80 a, float_status *status) | |
1492 | { | |
1493 | flag aSign, xSign; | |
1494 | int32_t aExp, xExp; | |
1495 | uint64_t aSig, xSig; | |
1496 | ||
1497 | int8_t user_rnd_mode, user_rnd_prec; | |
1498 | ||
1499 | int32_t compact, l, n, j; | |
1500 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1501 | float32 posneg1, twoto63; | |
6361d298 | 1502 | flag endflag; |
5add1ac4 LV |
1503 | |
1504 | aSig = extractFloatx80Frac(a); | |
1505 | aExp = extractFloatx80Exp(a); | |
1506 | aSign = extractFloatx80Sign(a); | |
1507 | ||
1508 | if (aExp == 0x7FFF) { | |
1509 | if ((uint64_t) (aSig << 1)) { | |
1510 | return propagateFloatx80NaNOneArg(a, status); | |
1511 | } | |
1512 | float_raise(float_flag_invalid, status); | |
1513 | return floatx80_default_nan(status); | |
1514 | } | |
1515 | ||
1516 | if (aExp == 0 && aSig == 0) { | |
1517 | return packFloatx80(aSign, 0, 0); | |
1518 | } | |
1519 | ||
5add1ac4 LV |
1520 | user_rnd_mode = status->float_rounding_mode; |
1521 | user_rnd_prec = status->floatx80_rounding_precision; | |
1522 | status->float_rounding_mode = float_round_nearest_even; | |
1523 | status->floatx80_rounding_precision = 80; | |
1524 | ||
1525 | compact = floatx80_make_compact(aExp, aSig); | |
1526 | ||
1527 | fp0 = a; | |
1528 | ||
1529 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1530 | /* 2^(-40) > |X| > 15 PI */ | |
1531 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1532 | /* REDUCEX */ | |
1533 | fp1 = packFloatx80(0, 0, 0); | |
1534 | if (compact == 0x7FFEFFFF) { | |
1535 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
1536 | LIT64(0xC90FDAA200000000)); | |
1537 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, | |
1538 | LIT64(0x85A308D300000000)); | |
1539 | fp0 = floatx80_add(fp0, twopi1, status); | |
1540 | fp1 = fp0; | |
1541 | fp0 = floatx80_add(fp0, twopi2, status); | |
1542 | fp1 = floatx80_sub(fp1, fp0, status); | |
1543 | fp1 = floatx80_add(fp1, twopi2, status); | |
1544 | } | |
1545 | loop: | |
1546 | xSign = extractFloatx80Sign(fp0); | |
1547 | xExp = extractFloatx80Exp(fp0); | |
1548 | xExp -= 0x3FFF; | |
1549 | if (xExp <= 28) { | |
1550 | l = 0; | |
1551 | endflag = 1; | |
1552 | } else { | |
1553 | l = xExp - 27; | |
1554 | endflag = 0; | |
1555 | } | |
1556 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
1557 | LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ | |
1558 | twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); | |
1559 | twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); | |
1560 | ||
1561 | /* SIGN(INARG)*2^63 IN SGL */ | |
1562 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1563 | ||
1564 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1565 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1566 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1567 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1568 | status); /* FP2 is N */ | |
1569 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1570 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1571 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1572 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1573 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1574 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1575 | fp3 = fp0; /* FP3 is A */ | |
1576 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1577 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1578 | ||
1579 | if (endflag > 0) { | |
1580 | n = floatx80_to_int32(fp2, status); | |
1581 | goto sincont; | |
1582 | } | |
1583 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1584 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1585 | goto loop; | |
1586 | } else { | |
1587 | /* SINSM */ | |
1588 | fp0 = float32_to_floatx80(make_float32(0x3F800000), | |
1589 | status); /* 1 */ | |
1590 | ||
1591 | status->float_rounding_mode = user_rnd_mode; | |
1592 | status->floatx80_rounding_precision = user_rnd_prec; | |
1593 | ||
6361d298 LV |
1594 | /* SINTINY */ |
1595 | a = floatx80_move(a, status); | |
5add1ac4 LV |
1596 | float_raise(float_flag_inexact, status); |
1597 | ||
1598 | return a; | |
1599 | } | |
1600 | } else { | |
1601 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1602 | make_float64(0x3FE45F306DC9C883), status), | |
1603 | status); /* X*2/PI */ | |
1604 | ||
1605 | n = floatx80_to_int32(fp1, status); | |
1606 | j = 32 + n; | |
1607 | ||
1608 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1609 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1610 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1611 | ||
1612 | sincont: | |
6361d298 | 1613 | if (n & 1) { |
5add1ac4 LV |
1614 | /* COSPOLY */ |
1615 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1616 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1617 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1618 | status); /* B8 */ | |
1619 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1620 | status); /* B7 */ | |
1621 | ||
1622 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1623 | xExp = extractFloatx80Exp(fp0); | |
1624 | xSig = extractFloatx80Frac(fp0); | |
1625 | ||
6361d298 | 1626 | if ((n >> 1) & 1) { |
5add1ac4 LV |
1627 | xSign ^= 1; |
1628 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1629 | } else { | |
1630 | xSign ^= 0; | |
1631 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1632 | } /* X IS NOW R'= SGN*R */ | |
1633 | ||
1634 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1635 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1636 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1637 | make_float64(0x3E21EED90612C972), status), | |
1638 | status); /* B6+TB8 */ | |
1639 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1640 | make_float64(0xBE927E4FB79D9FCF), status), | |
1641 | status); /* B5+TB7 */ | |
1642 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1643 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1644 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1645 | make_float64(0x3EFA01A01A01D423), status), | |
1646 | status); /* B4+T(B6+TB8) */ | |
1647 | fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); | |
1648 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ | |
1649 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1650 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
1651 | fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); | |
1652 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ | |
1653 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1654 | make_float32(0xBF000000), status), | |
1655 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1656 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1657 | fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ | |
1658 | * [S(B2+T(B4+T(B6+TB8)))] | |
1659 | */ | |
1660 | ||
1661 | x = packFloatx80(xSign, xExp, xSig); | |
1662 | fp0 = floatx80_mul(fp0, x, status); | |
1663 | ||
1664 | status->float_rounding_mode = user_rnd_mode; | |
1665 | status->floatx80_rounding_precision = user_rnd_prec; | |
1666 | ||
1667 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1668 | ||
1669 | float_raise(float_flag_inexact, status); | |
1670 | ||
1671 | return a; | |
1672 | } else { | |
1673 | /* SINPOLY */ | |
1674 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1675 | xExp = extractFloatx80Exp(fp0); | |
1676 | xSig = extractFloatx80Frac(fp0); | |
1677 | ||
6361d298 | 1678 | xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ |
5add1ac4 LV |
1679 | |
1680 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1681 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1682 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1683 | status); /* A7 */ | |
1684 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1685 | status); /* A6 */ | |
1686 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1687 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1688 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1689 | make_float64(0xBE5AE6452A118AE4), status), | |
1690 | status); /* A5+T*A7 */ | |
1691 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1692 | make_float64(0x3EC71DE3A5341531), status), | |
1693 | status); /* A4+T*A6 */ | |
1694 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1695 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1696 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1697 | make_float64(0xBF2A01A01A018B59), status), | |
1698 | status); /* A3+T(A5+TA7) */ | |
1699 | fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); | |
1700 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ | |
1701 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1702 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
1703 | fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); | |
1704 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ | |
1705 | fp1 = floatx80_add(fp1, fp2, | |
1706 | status); /* [A1+T(A3+T(A5+TA7))]+ | |
1707 | * [S(A2+T(A4+TA6))] | |
1708 | */ | |
1709 | ||
1710 | x = packFloatx80(xSign, xExp, xSig); | |
1711 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1712 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1713 | ||
1714 | status->float_rounding_mode = user_rnd_mode; | |
1715 | status->floatx80_rounding_precision = user_rnd_prec; | |
1716 | ||
1717 | a = floatx80_add(fp0, x, status); | |
1718 | ||
1719 | float_raise(float_flag_inexact, status); | |
1720 | ||
1721 | return a; | |
1722 | } | |
1723 | } | |
1724 | } | |
68d0ed37 LV |
1725 | |
1726 | /*---------------------------------------------------------------------------- | |
1727 | | Cosine | |
1728 | *----------------------------------------------------------------------------*/ | |
1729 | ||
1730 | floatx80 floatx80_cos(floatx80 a, float_status *status) | |
1731 | { | |
1732 | flag aSign, xSign; | |
1733 | int32_t aExp, xExp; | |
1734 | uint64_t aSig, xSig; | |
1735 | ||
1736 | int8_t user_rnd_mode, user_rnd_prec; | |
1737 | ||
1738 | int32_t compact, l, n, j; | |
1739 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; | |
1740 | float32 posneg1, twoto63; | |
6361d298 | 1741 | flag endflag; |
68d0ed37 LV |
1742 | |
1743 | aSig = extractFloatx80Frac(a); | |
1744 | aExp = extractFloatx80Exp(a); | |
1745 | aSign = extractFloatx80Sign(a); | |
1746 | ||
1747 | if (aExp == 0x7FFF) { | |
1748 | if ((uint64_t) (aSig << 1)) { | |
1749 | return propagateFloatx80NaNOneArg(a, status); | |
1750 | } | |
1751 | float_raise(float_flag_invalid, status); | |
1752 | return floatx80_default_nan(status); | |
1753 | } | |
1754 | ||
1755 | if (aExp == 0 && aSig == 0) { | |
1756 | return packFloatx80(0, one_exp, one_sig); | |
1757 | } | |
1758 | ||
68d0ed37 LV |
1759 | user_rnd_mode = status->float_rounding_mode; |
1760 | user_rnd_prec = status->floatx80_rounding_precision; | |
1761 | status->float_rounding_mode = float_round_nearest_even; | |
1762 | status->floatx80_rounding_precision = 80; | |
1763 | ||
1764 | compact = floatx80_make_compact(aExp, aSig); | |
1765 | ||
1766 | fp0 = a; | |
1767 | ||
1768 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { | |
1769 | /* 2^(-40) > |X| > 15 PI */ | |
1770 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ | |
1771 | /* REDUCEX */ | |
1772 | fp1 = packFloatx80(0, 0, 0); | |
1773 | if (compact == 0x7FFEFFFF) { | |
1774 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, | |
1775 | LIT64(0xC90FDAA200000000)); | |
1776 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, | |
1777 | LIT64(0x85A308D300000000)); | |
1778 | fp0 = floatx80_add(fp0, twopi1, status); | |
1779 | fp1 = fp0; | |
1780 | fp0 = floatx80_add(fp0, twopi2, status); | |
1781 | fp1 = floatx80_sub(fp1, fp0, status); | |
1782 | fp1 = floatx80_add(fp1, twopi2, status); | |
1783 | } | |
1784 | loop: | |
1785 | xSign = extractFloatx80Sign(fp0); | |
1786 | xExp = extractFloatx80Exp(fp0); | |
1787 | xExp -= 0x3FFF; | |
1788 | if (xExp <= 28) { | |
1789 | l = 0; | |
1790 | endflag = 1; | |
1791 | } else { | |
1792 | l = xExp - 27; | |
1793 | endflag = 0; | |
1794 | } | |
1795 | invtwopi = packFloatx80(0, 0x3FFE - l, | |
1796 | LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ | |
1797 | twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); | |
1798 | twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); | |
1799 | ||
1800 | /* SIGN(INARG)*2^63 IN SGL */ | |
1801 | twoto63 = packFloat32(xSign, 0xBE, 0); | |
1802 | ||
1803 | fp2 = floatx80_mul(fp0, invtwopi, status); | |
1804 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), | |
1805 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ | |
1806 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), | |
1807 | status); /* FP2 is N */ | |
1808 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ | |
1809 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ | |
1810 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ | |
1811 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ | |
1812 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ | |
1813 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ | |
1814 | fp3 = fp0; /* FP3 is A */ | |
1815 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ | |
1816 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ | |
1817 | ||
1818 | if (endflag > 0) { | |
1819 | n = floatx80_to_int32(fp2, status); | |
1820 | goto sincont; | |
1821 | } | |
1822 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ | |
1823 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ | |
1824 | goto loop; | |
1825 | } else { | |
1826 | /* SINSM */ | |
1827 | fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ | |
1828 | ||
1829 | status->float_rounding_mode = user_rnd_mode; | |
1830 | status->floatx80_rounding_precision = user_rnd_prec; | |
1831 | ||
6361d298 LV |
1832 | /* COSTINY */ |
1833 | a = floatx80_sub(fp0, float32_to_floatx80( | |
1834 | make_float32(0x00800000), status), | |
1835 | status); | |
68d0ed37 LV |
1836 | float_raise(float_flag_inexact, status); |
1837 | ||
1838 | return a; | |
1839 | } | |
1840 | } else { | |
1841 | fp1 = floatx80_mul(fp0, float64_to_floatx80( | |
1842 | make_float64(0x3FE45F306DC9C883), status), | |
1843 | status); /* X*2/PI */ | |
1844 | ||
1845 | n = floatx80_to_int32(fp1, status); | |
1846 | j = 32 + n; | |
1847 | ||
1848 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ | |
1849 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), | |
1850 | status); /* FP0 IS R = (X-Y1)-Y2 */ | |
1851 | ||
1852 | sincont: | |
6361d298 | 1853 | if ((n + 1) & 1) { |
68d0ed37 LV |
1854 | /* COSPOLY */ |
1855 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1856 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1857 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), | |
1858 | status); /* B8 */ | |
1859 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), | |
1860 | status); /* B7 */ | |
1861 | ||
1862 | xSign = extractFloatx80Sign(fp0); /* X IS S */ | |
1863 | xExp = extractFloatx80Exp(fp0); | |
1864 | xSig = extractFloatx80Frac(fp0); | |
1865 | ||
6361d298 | 1866 | if (((n + 1) >> 1) & 1) { |
68d0ed37 LV |
1867 | xSign ^= 1; |
1868 | posneg1 = make_float32(0xBF800000); /* -1 */ | |
1869 | } else { | |
1870 | xSign ^= 0; | |
1871 | posneg1 = make_float32(0x3F800000); /* 1 */ | |
1872 | } /* X IS NOW R'= SGN*R */ | |
1873 | ||
1874 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ | |
1875 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ | |
1876 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1877 | make_float64(0x3E21EED90612C972), status), | |
1878 | status); /* B6+TB8 */ | |
1879 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1880 | make_float64(0xBE927E4FB79D9FCF), status), | |
1881 | status); /* B5+TB7 */ | |
1882 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ | |
1883 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ | |
1884 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1885 | make_float64(0x3EFA01A01A01D423), status), | |
1886 | status); /* B4+T(B6+TB8) */ | |
1887 | fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); | |
1888 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ | |
1889 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ | |
1890 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ | |
1891 | fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); | |
1892 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ | |
1893 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
1894 | make_float32(0xBF000000), status), | |
1895 | status); /* B1+T(B3+T(B5+TB7)) */ | |
1896 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ | |
1897 | fp0 = floatx80_add(fp0, fp1, status); | |
1898 | /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ | |
1899 | ||
1900 | x = packFloatx80(xSign, xExp, xSig); | |
1901 | fp0 = floatx80_mul(fp0, x, status); | |
1902 | ||
1903 | status->float_rounding_mode = user_rnd_mode; | |
1904 | status->floatx80_rounding_precision = user_rnd_prec; | |
1905 | ||
1906 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); | |
1907 | ||
1908 | float_raise(float_flag_inexact, status); | |
1909 | ||
1910 | return a; | |
1911 | } else { | |
1912 | /* SINPOLY */ | |
1913 | xSign = extractFloatx80Sign(fp0); /* X IS R */ | |
1914 | xExp = extractFloatx80Exp(fp0); | |
1915 | xSig = extractFloatx80Frac(fp0); | |
1916 | ||
6361d298 | 1917 | xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ |
68d0ed37 LV |
1918 | |
1919 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ | |
1920 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ | |
1921 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), | |
1922 | status); /* A7 */ | |
1923 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), | |
1924 | status); /* A6 */ | |
1925 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ | |
1926 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ | |
1927 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1928 | make_float64(0xBE5AE6452A118AE4), status), | |
1929 | status); /* A5+T*A7 */ | |
1930 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
1931 | make_float64(0x3EC71DE3A5341531), status), | |
1932 | status); /* A4+T*A6 */ | |
1933 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ | |
1934 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ | |
1935 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
1936 | make_float64(0xBF2A01A01A018B59), status), | |
1937 | status); /* A3+T(A5+TA7) */ | |
1938 | fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); | |
1939 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ | |
1940 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ | |
1941 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ | |
1942 | fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); | |
1943 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ | |
1944 | fp1 = floatx80_add(fp1, fp2, status); | |
1945 | /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ | |
1946 | ||
1947 | x = packFloatx80(xSign, xExp, xSig); | |
1948 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ | |
1949 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ | |
1950 | ||
1951 | status->float_rounding_mode = user_rnd_mode; | |
1952 | status->floatx80_rounding_precision = user_rnd_prec; | |
1953 | ||
1954 | a = floatx80_add(fp0, x, status); | |
1955 | ||
1956 | float_raise(float_flag_inexact, status); | |
1957 | ||
1958 | return a; | |
1959 | } | |
1960 | } | |
1961 | } | |
8c992abc LV |
1962 | |
1963 | /*---------------------------------------------------------------------------- | |
1964 | | Arc tangent | |
1965 | *----------------------------------------------------------------------------*/ | |
1966 | ||
1967 | floatx80 floatx80_atan(floatx80 a, float_status *status) | |
1968 | { | |
1969 | flag aSign; | |
1970 | int32_t aExp; | |
1971 | uint64_t aSig; | |
1972 | ||
1973 | int8_t user_rnd_mode, user_rnd_prec; | |
1974 | ||
1975 | int32_t compact, tbl_index; | |
1976 | floatx80 fp0, fp1, fp2, fp3, xsave; | |
1977 | ||
1978 | aSig = extractFloatx80Frac(a); | |
1979 | aExp = extractFloatx80Exp(a); | |
1980 | aSign = extractFloatx80Sign(a); | |
1981 | ||
1982 | if (aExp == 0x7FFF) { | |
1983 | if ((uint64_t) (aSig << 1)) { | |
1984 | return propagateFloatx80NaNOneArg(a, status); | |
1985 | } | |
1986 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
1987 | float_raise(float_flag_inexact, status); | |
1988 | return floatx80_move(a, status); | |
1989 | } | |
1990 | ||
1991 | if (aExp == 0 && aSig == 0) { | |
1992 | return packFloatx80(aSign, 0, 0); | |
1993 | } | |
1994 | ||
1995 | compact = floatx80_make_compact(aExp, aSig); | |
1996 | ||
1997 | user_rnd_mode = status->float_rounding_mode; | |
1998 | user_rnd_prec = status->floatx80_rounding_precision; | |
1999 | status->float_rounding_mode = float_round_nearest_even; | |
2000 | status->floatx80_rounding_precision = 80; | |
2001 | ||
2002 | if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { | |
2003 | /* |X| >= 16 or |X| < 1/16 */ | |
2004 | if (compact > 0x3FFF8000) { /* |X| >= 16 */ | |
2005 | if (compact > 0x40638000) { /* |X| > 2^(100) */ | |
2006 | fp0 = packFloatx80(aSign, piby2_exp, pi_sig); | |
2007 | fp1 = packFloatx80(aSign, 0x0001, one_sig); | |
2008 | ||
2009 | status->float_rounding_mode = user_rnd_mode; | |
2010 | status->floatx80_rounding_precision = user_rnd_prec; | |
2011 | ||
2012 | a = floatx80_sub(fp0, fp1, status); | |
2013 | ||
2014 | float_raise(float_flag_inexact, status); | |
2015 | ||
2016 | return a; | |
2017 | } else { | |
2018 | fp0 = a; | |
2019 | fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ | |
2020 | fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ | |
2021 | xsave = fp1; | |
2022 | fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ | |
2023 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
2024 | fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), | |
2025 | status); /* C5 */ | |
2026 | fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), | |
2027 | status); /* C4 */ | |
2028 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ | |
2029 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ | |
2030 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2031 | make_float64(0xBFC24924827107B8), status), | |
2032 | status); /* C3+Z*C5 */ | |
2033 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2034 | make_float64(0x3FC999999996263E), status), | |
2035 | status); /* C2+Z*C4 */ | |
2036 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ | |
2037 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ | |
2038 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2039 | make_float64(0xBFD5555555555536), status), | |
2040 | status); /* C1+Z*(C3+Z*C5) */ | |
2041 | fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ | |
2042 | /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ | |
2043 | fp1 = floatx80_add(fp1, fp2, status); | |
2044 | /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ | |
2045 | fp0 = floatx80_mul(fp0, fp1, status); | |
2046 | fp0 = floatx80_add(fp0, xsave, status); | |
2047 | fp1 = packFloatx80(aSign, piby2_exp, pi_sig); | |
2048 | ||
2049 | status->float_rounding_mode = user_rnd_mode; | |
2050 | status->floatx80_rounding_precision = user_rnd_prec; | |
2051 | ||
2052 | a = floatx80_add(fp0, fp1, status); | |
2053 | ||
2054 | float_raise(float_flag_inexact, status); | |
2055 | ||
2056 | return a; | |
2057 | } | |
2058 | } else { /* |X| < 1/16 */ | |
2059 | if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ | |
2060 | status->float_rounding_mode = user_rnd_mode; | |
2061 | status->floatx80_rounding_precision = user_rnd_prec; | |
2062 | ||
2063 | a = floatx80_move(a, status); | |
2064 | ||
2065 | float_raise(float_flag_inexact, status); | |
2066 | ||
2067 | return a; | |
2068 | } else { | |
2069 | fp0 = a; | |
2070 | xsave = a; | |
2071 | fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ | |
2072 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ | |
2073 | fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), | |
2074 | status); /* B6 */ | |
2075 | fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), | |
2076 | status); /* B5 */ | |
2077 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ | |
2078 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ | |
2079 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2080 | make_float64(0x3FBC71C646940220), status), | |
2081 | status); /* B4+Z*B6 */ | |
2082 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2083 | make_float64(0xBFC24924921872F9), | |
2084 | status), status); /* B3+Z*B5 */ | |
2085 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ | |
2086 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ | |
2087 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2088 | make_float64(0x3FC9999999998FA9), status), | |
2089 | status); /* B2+Z*(B4+Z*B6) */ | |
2090 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2091 | make_float64(0xBFD5555555555555), status), | |
2092 | status); /* B1+Z*(B3+Z*B5) */ | |
2093 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ | |
2094 | fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ | |
2095 | /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ | |
2096 | fp1 = floatx80_add(fp1, fp2, status); | |
2097 | /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ | |
2098 | fp0 = floatx80_mul(fp0, fp1, status); | |
2099 | ||
2100 | status->float_rounding_mode = user_rnd_mode; | |
2101 | status->floatx80_rounding_precision = user_rnd_prec; | |
2102 | ||
2103 | a = floatx80_add(fp0, xsave, status); | |
2104 | ||
2105 | float_raise(float_flag_inexact, status); | |
2106 | ||
2107 | return a; | |
2108 | } | |
2109 | } | |
2110 | } else { | |
2111 | aSig &= LIT64(0xF800000000000000); | |
2112 | aSig |= LIT64(0x0400000000000000); | |
2113 | xsave = packFloatx80(aSign, aExp, aSig); /* F */ | |
2114 | fp0 = a; | |
2115 | fp1 = a; /* X */ | |
2116 | fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ | |
2117 | fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ | |
2118 | fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ | |
2119 | fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ | |
2120 | fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ | |
2121 | ||
2122 | tbl_index = compact; | |
2123 | ||
2124 | tbl_index &= 0x7FFF0000; | |
2125 | tbl_index -= 0x3FFB0000; | |
2126 | tbl_index >>= 1; | |
2127 | tbl_index += compact & 0x00007800; | |
2128 | tbl_index >>= 11; | |
2129 | ||
2130 | fp3 = atan_tbl[tbl_index]; | |
2131 | ||
2132 | fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ | |
2133 | ||
2134 | fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ | |
2135 | fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), | |
2136 | status); /* A3 */ | |
2137 | fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ | |
2138 | fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ | |
2139 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ | |
2140 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2141 | make_float64(0x4002AC6934A26DB3), status), | |
2142 | status); /* A2+V*(A3+V) */ | |
2143 | fp1 = floatx80_mul(fp1, float64_to_floatx80( | |
2144 | make_float64(0xBFC2476F4E1DA28E), status), | |
2145 | status); /* A1+U*V */ | |
2146 | fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ | |
2147 | fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ | |
2148 | ||
2149 | status->float_rounding_mode = user_rnd_mode; | |
2150 | status->floatx80_rounding_precision = user_rnd_prec; | |
2151 | ||
2152 | a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ | |
2153 | ||
2154 | float_raise(float_flag_inexact, status); | |
2155 | ||
2156 | return a; | |
2157 | } | |
2158 | } | |
bc20b34e LV |
2159 | |
2160 | /*---------------------------------------------------------------------------- | |
2161 | | Arc sine | |
2162 | *----------------------------------------------------------------------------*/ | |
2163 | ||
2164 | floatx80 floatx80_asin(floatx80 a, float_status *status) | |
2165 | { | |
2166 | flag aSign; | |
2167 | int32_t aExp; | |
2168 | uint64_t aSig; | |
2169 | ||
2170 | int8_t user_rnd_mode, user_rnd_prec; | |
2171 | ||
2172 | int32_t compact; | |
2173 | floatx80 fp0, fp1, fp2, one; | |
2174 | ||
2175 | aSig = extractFloatx80Frac(a); | |
2176 | aExp = extractFloatx80Exp(a); | |
2177 | aSign = extractFloatx80Sign(a); | |
2178 | ||
2179 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2180 | return propagateFloatx80NaNOneArg(a, status); | |
2181 | } | |
2182 | ||
2183 | if (aExp == 0 && aSig == 0) { | |
2184 | return packFloatx80(aSign, 0, 0); | |
2185 | } | |
2186 | ||
2187 | compact = floatx80_make_compact(aExp, aSig); | |
2188 | ||
2189 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2190 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2191 | float_raise(float_flag_inexact, status); | |
2192 | a = packFloatx80(aSign, piby2_exp, pi_sig); | |
2193 | return floatx80_move(a, status); | |
2194 | } else { /* |X| > 1 */ | |
2195 | float_raise(float_flag_invalid, status); | |
2196 | return floatx80_default_nan(status); | |
2197 | } | |
2198 | ||
2199 | } /* |X| < 1 */ | |
2200 | ||
2201 | user_rnd_mode = status->float_rounding_mode; | |
2202 | user_rnd_prec = status->floatx80_rounding_precision; | |
2203 | status->float_rounding_mode = float_round_nearest_even; | |
2204 | status->floatx80_rounding_precision = 80; | |
2205 | ||
2206 | one = packFloatx80(0, one_exp, one_sig); | |
2207 | fp0 = a; | |
2208 | ||
2209 | fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2210 | fp2 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2211 | fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ | |
2212 | fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ | |
2213 | fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ | |
2214 | ||
2215 | status->float_rounding_mode = user_rnd_mode; | |
2216 | status->floatx80_rounding_precision = user_rnd_prec; | |
2217 | ||
2218 | a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ | |
2219 | ||
2220 | float_raise(float_flag_inexact, status); | |
2221 | ||
2222 | return a; | |
2223 | } | |
c84813b8 LV |
2224 | |
2225 | /*---------------------------------------------------------------------------- | |
2226 | | Arc cosine | |
2227 | *----------------------------------------------------------------------------*/ | |
2228 | ||
2229 | floatx80 floatx80_acos(floatx80 a, float_status *status) | |
2230 | { | |
2231 | flag aSign; | |
2232 | int32_t aExp; | |
2233 | uint64_t aSig; | |
2234 | ||
2235 | int8_t user_rnd_mode, user_rnd_prec; | |
2236 | ||
2237 | int32_t compact; | |
2238 | floatx80 fp0, fp1, one; | |
2239 | ||
2240 | aSig = extractFloatx80Frac(a); | |
2241 | aExp = extractFloatx80Exp(a); | |
2242 | aSign = extractFloatx80Sign(a); | |
2243 | ||
2244 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2245 | return propagateFloatx80NaNOneArg(a, status); | |
2246 | } | |
2247 | if (aExp == 0 && aSig == 0) { | |
2248 | float_raise(float_flag_inexact, status); | |
2249 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2250 | piby2_exp, pi_sig, 0, status); | |
2251 | } | |
2252 | ||
2253 | compact = floatx80_make_compact(aExp, aSig); | |
2254 | ||
2255 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2256 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2257 | if (aSign) { /* X == -1 */ | |
2258 | a = packFloatx80(0, pi_exp, pi_sig); | |
2259 | float_raise(float_flag_inexact, status); | |
2260 | return floatx80_move(a, status); | |
2261 | } else { /* X == +1 */ | |
2262 | return packFloatx80(0, 0, 0); | |
2263 | } | |
2264 | } else { /* |X| > 1 */ | |
2265 | float_raise(float_flag_invalid, status); | |
2266 | return floatx80_default_nan(status); | |
2267 | } | |
2268 | } /* |X| < 1 */ | |
2269 | ||
2270 | user_rnd_mode = status->float_rounding_mode; | |
2271 | user_rnd_prec = status->floatx80_rounding_precision; | |
2272 | status->float_rounding_mode = float_round_nearest_even; | |
2273 | status->floatx80_rounding_precision = 80; | |
2274 | ||
2275 | one = packFloatx80(0, one_exp, one_sig); | |
2276 | fp0 = a; | |
2277 | ||
2278 | fp1 = floatx80_add(one, fp0, status); /* 1 + X */ | |
2279 | fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ | |
2280 | fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ | |
2281 | fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ | |
2282 | fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ | |
2283 | ||
2284 | status->float_rounding_mode = user_rnd_mode; | |
2285 | status->floatx80_rounding_precision = user_rnd_prec; | |
2286 | ||
2287 | a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ | |
2288 | ||
2289 | float_raise(float_flag_inexact, status); | |
2290 | ||
2291 | return a; | |
2292 | } | |
e3655afa LV |
2293 | |
2294 | /*---------------------------------------------------------------------------- | |
2295 | | Hyperbolic arc tangent | |
2296 | *----------------------------------------------------------------------------*/ | |
2297 | ||
2298 | floatx80 floatx80_atanh(floatx80 a, float_status *status) | |
2299 | { | |
2300 | flag aSign; | |
2301 | int32_t aExp; | |
2302 | uint64_t aSig; | |
2303 | ||
2304 | int8_t user_rnd_mode, user_rnd_prec; | |
2305 | ||
2306 | int32_t compact; | |
2307 | floatx80 fp0, fp1, fp2, one; | |
2308 | ||
2309 | aSig = extractFloatx80Frac(a); | |
2310 | aExp = extractFloatx80Exp(a); | |
2311 | aSign = extractFloatx80Sign(a); | |
2312 | ||
2313 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { | |
2314 | return propagateFloatx80NaNOneArg(a, status); | |
2315 | } | |
2316 | ||
2317 | if (aExp == 0 && aSig == 0) { | |
2318 | return packFloatx80(aSign, 0, 0); | |
2319 | } | |
2320 | ||
2321 | compact = floatx80_make_compact(aExp, aSig); | |
2322 | ||
2323 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ | |
2324 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ | |
2325 | float_raise(float_flag_divbyzero, status); | |
2326 | return packFloatx80(aSign, floatx80_infinity.high, | |
2327 | floatx80_infinity.low); | |
2328 | } else { /* |X| > 1 */ | |
2329 | float_raise(float_flag_invalid, status); | |
2330 | return floatx80_default_nan(status); | |
2331 | } | |
2332 | } /* |X| < 1 */ | |
2333 | ||
2334 | user_rnd_mode = status->float_rounding_mode; | |
2335 | user_rnd_prec = status->floatx80_rounding_precision; | |
2336 | status->float_rounding_mode = float_round_nearest_even; | |
2337 | status->floatx80_rounding_precision = 80; | |
2338 | ||
2339 | one = packFloatx80(0, one_exp, one_sig); | |
2340 | fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ | |
2341 | fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ | |
2342 | fp1 = packFloatx80(1, aExp, aSig); /* -Y */ | |
2343 | fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ | |
2344 | fp1 = floatx80_add(fp1, one, status); /* 1-Y */ | |
2345 | fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ | |
2346 | fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ | |
2347 | ||
2348 | status->float_rounding_mode = user_rnd_mode; | |
2349 | status->floatx80_rounding_precision = user_rnd_prec; | |
2350 | ||
2351 | a = floatx80_mul(fp0, fp2, | |
2352 | status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ | |
2353 | ||
2354 | float_raise(float_flag_inexact, status); | |
2355 | ||
2356 | return a; | |
2357 | } | |
9937b029 LV |
2358 | |
2359 | /*---------------------------------------------------------------------------- | |
2360 | | e to x minus 1 | |
2361 | *----------------------------------------------------------------------------*/ | |
2362 | ||
2363 | floatx80 floatx80_etoxm1(floatx80 a, float_status *status) | |
2364 | { | |
2365 | flag aSign; | |
2366 | int32_t aExp; | |
2367 | uint64_t aSig; | |
2368 | ||
2369 | int8_t user_rnd_mode, user_rnd_prec; | |
2370 | ||
2371 | int32_t compact, n, j, m, m1; | |
2372 | floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; | |
2373 | ||
2374 | aSig = extractFloatx80Frac(a); | |
2375 | aExp = extractFloatx80Exp(a); | |
2376 | aSign = extractFloatx80Sign(a); | |
2377 | ||
2378 | if (aExp == 0x7FFF) { | |
2379 | if ((uint64_t) (aSig << 1)) { | |
2380 | return propagateFloatx80NaNOneArg(a, status); | |
2381 | } | |
2382 | if (aSign) { | |
2383 | return packFloatx80(aSign, one_exp, one_sig); | |
2384 | } | |
2385 | return packFloatx80(0, floatx80_infinity.high, | |
2386 | floatx80_infinity.low); | |
2387 | } | |
2388 | ||
2389 | if (aExp == 0 && aSig == 0) { | |
2390 | return packFloatx80(aSign, 0, 0); | |
2391 | } | |
2392 | ||
2393 | user_rnd_mode = status->float_rounding_mode; | |
2394 | user_rnd_prec = status->floatx80_rounding_precision; | |
2395 | status->float_rounding_mode = float_round_nearest_even; | |
2396 | status->floatx80_rounding_precision = 80; | |
2397 | ||
2398 | if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ | |
2399 | compact = floatx80_make_compact(aExp, aSig); | |
2400 | ||
2401 | if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ | |
2402 | fp0 = a; | |
2403 | fp1 = a; | |
2404 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2405 | make_float32(0x42B8AA3B), status), | |
2406 | status); /* 64/log2 * X */ | |
2407 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ | |
2408 | fp0 = int32_to_floatx80(n, status); | |
2409 | ||
2410 | j = n & 0x3F; /* J = N mod 64 */ | |
2411 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ | |
2412 | if (n < 0 && j) { | |
2413 | /* arithmetic right shift is division and | |
2414 | * round towards minus infinity | |
2415 | */ | |
2416 | m--; | |
2417 | } | |
2418 | m1 = -m; | |
2419 | /*m += 0x3FFF; // biased exponent of 2^(M) */ | |
2420 | /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ | |
2421 | ||
2422 | fp2 = fp0; /* N */ | |
2423 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2424 | make_float32(0xBC317218), status), | |
2425 | status); /* N * L1, L1 = lead(-log2/64) */ | |
2426 | l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); | |
2427 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ | |
2428 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ | |
2429 | fp0 = floatx80_add(fp0, fp2, status); /* R */ | |
2430 | ||
2431 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ | |
2432 | fp2 = float32_to_floatx80(make_float32(0x3950097B), | |
2433 | status); /* A6 */ | |
2434 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ | |
2435 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), | |
2436 | status), fp1, status); /* fp3 is S*A5 */ | |
2437 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2438 | make_float64(0x3F81111111174385), status), | |
2439 | status); /* fp2 IS A4+S*A6 */ | |
2440 | fp3 = floatx80_add(fp3, float64_to_floatx80( | |
2441 | make_float64(0x3FA5555555554F5A), status), | |
2442 | status); /* fp3 is A3+S*A5 */ | |
2443 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ | |
2444 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ | |
2445 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2446 | make_float64(0x3FC5555555555555), status), | |
2447 | status); /* fp2 IS A2+S*(A4+S*A6) */ | |
2448 | fp3 = floatx80_add(fp3, float32_to_floatx80( | |
2449 | make_float32(0x3F000000), status), | |
2450 | status); /* fp3 IS A1+S*(A3+S*A5) */ | |
2451 | fp2 = floatx80_mul(fp2, fp1, | |
2452 | status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ | |
2453 | fp1 = floatx80_mul(fp1, fp3, | |
2454 | status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ | |
2455 | fp2 = floatx80_mul(fp2, fp0, | |
2456 | status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ | |
2457 | fp0 = floatx80_add(fp0, fp1, | |
2458 | status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ | |
2459 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ | |
2460 | ||
2461 | fp0 = floatx80_mul(fp0, exp_tbl[j], | |
2462 | status); /* 2^(J/64)*(Exp(R)-1) */ | |
2463 | ||
2464 | if (m >= 64) { | |
2465 | fp1 = float32_to_floatx80(exp_tbl2[j], status); | |
2466 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2467 | fp1 = floatx80_add(fp1, onebysc, status); | |
2468 | fp0 = floatx80_add(fp0, fp1, status); | |
2469 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2470 | } else if (m < -3) { | |
2471 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2472 | status), status); | |
2473 | fp0 = floatx80_add(fp0, exp_tbl[j], status); | |
2474 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2475 | fp0 = floatx80_add(fp0, onebysc, status); | |
2476 | } else { /* -3 <= m <= 63 */ | |
2477 | fp1 = exp_tbl[j]; | |
2478 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], | |
2479 | status), status); | |
2480 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ | |
2481 | fp1 = floatx80_add(fp1, onebysc, status); | |
2482 | fp0 = floatx80_add(fp0, fp1, status); | |
2483 | } | |
2484 | ||
2485 | sc = packFloatx80(0, m + 0x3FFF, one_sig); | |
2486 | ||
2487 | status->float_rounding_mode = user_rnd_mode; | |
2488 | status->floatx80_rounding_precision = user_rnd_prec; | |
2489 | ||
2490 | a = floatx80_mul(fp0, sc, status); | |
2491 | ||
2492 | float_raise(float_flag_inexact, status); | |
2493 | ||
2494 | return a; | |
2495 | } else { /* |X| > 70 log2 */ | |
2496 | if (aSign) { | |
2497 | fp0 = float32_to_floatx80(make_float32(0xBF800000), | |
2498 | status); /* -1 */ | |
2499 | ||
2500 | status->float_rounding_mode = user_rnd_mode; | |
2501 | status->floatx80_rounding_precision = user_rnd_prec; | |
2502 | ||
2503 | a = floatx80_add(fp0, float32_to_floatx80( | |
2504 | make_float32(0x00800000), status), | |
2505 | status); /* -1 + 2^(-126) */ | |
2506 | ||
2507 | float_raise(float_flag_inexact, status); | |
2508 | ||
2509 | return a; | |
2510 | } else { | |
2511 | status->float_rounding_mode = user_rnd_mode; | |
2512 | status->floatx80_rounding_precision = user_rnd_prec; | |
2513 | ||
2514 | return floatx80_etox(a, status); | |
2515 | } | |
2516 | } | |
2517 | } else { /* |X| < 1/4 */ | |
2518 | if (aExp >= 0x3FBE) { | |
2519 | fp0 = a; | |
2520 | fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ | |
2521 | fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), | |
2522 | status); /* B12 */ | |
2523 | fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ | |
2524 | fp2 = float32_to_floatx80(make_float32(0x310F8290), | |
2525 | status); /* B11 */ | |
2526 | fp1 = floatx80_add(fp1, float32_to_floatx80( | |
2527 | make_float32(0x32D73220), status), | |
2528 | status); /* B10 */ | |
2529 | fp2 = floatx80_mul(fp2, fp0, status); | |
2530 | fp1 = floatx80_mul(fp1, fp0, status); | |
2531 | fp2 = floatx80_add(fp2, float32_to_floatx80( | |
2532 | make_float32(0x3493F281), status), | |
2533 | status); /* B9 */ | |
2534 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2535 | make_float64(0x3EC71DE3A5774682), status), | |
2536 | status); /* B8 */ | |
2537 | fp2 = floatx80_mul(fp2, fp0, status); | |
2538 | fp1 = floatx80_mul(fp1, fp0, status); | |
2539 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2540 | make_float64(0x3EFA01A019D7CB68), status), | |
2541 | status); /* B7 */ | |
2542 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2543 | make_float64(0x3F2A01A01A019DF3), status), | |
2544 | status); /* B6 */ | |
2545 | fp2 = floatx80_mul(fp2, fp0, status); | |
2546 | fp1 = floatx80_mul(fp1, fp0, status); | |
2547 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2548 | make_float64(0x3F56C16C16C170E2), status), | |
2549 | status); /* B5 */ | |
2550 | fp1 = floatx80_add(fp1, float64_to_floatx80( | |
2551 | make_float64(0x3F81111111111111), status), | |
2552 | status); /* B4 */ | |
2553 | fp2 = floatx80_mul(fp2, fp0, status); | |
2554 | fp1 = floatx80_mul(fp1, fp0, status); | |
2555 | fp2 = floatx80_add(fp2, float64_to_floatx80( | |
2556 | make_float64(0x3FA5555555555555), status), | |
2557 | status); /* B3 */ | |
2558 | fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); | |
2559 | fp1 = floatx80_add(fp1, fp3, status); /* B2 */ | |
2560 | fp2 = floatx80_mul(fp2, fp0, status); | |
2561 | fp1 = floatx80_mul(fp1, fp0, status); | |
2562 | ||
2563 | fp2 = floatx80_mul(fp2, fp0, status); | |
2564 | fp1 = floatx80_mul(fp1, a, status); | |
2565 | ||
2566 | fp0 = floatx80_mul(fp0, float32_to_floatx80( | |
2567 | make_float32(0x3F000000), status), | |
2568 | status); /* S*B1 */ | |
2569 | fp1 = floatx80_add(fp1, fp2, status); /* Q */ | |
2570 | fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ | |
2571 | ||
2572 | status->float_rounding_mode = user_rnd_mode; | |
2573 | status->floatx80_rounding_precision = user_rnd_prec; | |
2574 | ||
2575 | a = floatx80_add(fp0, a, status); | |
2576 | ||
2577 | float_raise(float_flag_inexact, status); | |
2578 | ||
2579 | return a; | |
2580 | } else { /* |X| < 2^(-65) */ | |
2581 | sc = packFloatx80(1, 1, one_sig); | |
2582 | fp0 = a; | |
2583 | ||
2584 | if (aExp < 0x0033) { /* |X| < 2^(-16382) */ | |
2585 | fp0 = floatx80_mul(fp0, float64_to_floatx80( | |
2586 | make_float64(0x48B0000000000000), status), | |
2587 | status); | |
2588 | fp0 = floatx80_add(fp0, sc, status); | |
2589 | ||
2590 | status->float_rounding_mode = user_rnd_mode; | |
2591 | status->floatx80_rounding_precision = user_rnd_prec; | |
2592 | ||
2593 | a = floatx80_mul(fp0, float64_to_floatx80( | |
2594 | make_float64(0x3730000000000000), status), | |
2595 | status); | |
2596 | } else { | |
2597 | status->float_rounding_mode = user_rnd_mode; | |
2598 | status->floatx80_rounding_precision = user_rnd_prec; | |
2599 | ||
2600 | a = floatx80_add(fp0, sc, status); | |
2601 | } | |
2602 | ||
2603 | float_raise(float_flag_inexact, status); | |
2604 | ||
2605 | return a; | |
2606 | } | |
2607 | } | |
2608 | } | |
2609 | ||
2610 | /*---------------------------------------------------------------------------- | |
2611 | | Hyperbolic tangent | |
2612 | *----------------------------------------------------------------------------*/ | |
2613 | ||
2614 | floatx80 floatx80_tanh(floatx80 a, float_status *status) | |
2615 | { | |
2616 | flag aSign, vSign; | |
2617 | int32_t aExp, vExp; | |
2618 | uint64_t aSig, vSig; | |
2619 | ||
2620 | int8_t user_rnd_mode, user_rnd_prec; | |
2621 | ||
2622 | int32_t compact; | |
2623 | floatx80 fp0, fp1; | |
2624 | uint32_t sign; | |
2625 | ||
2626 | aSig = extractFloatx80Frac(a); | |
2627 | aExp = extractFloatx80Exp(a); | |
2628 | aSign = extractFloatx80Sign(a); | |
2629 | ||
2630 | if (aExp == 0x7FFF) { | |
2631 | if ((uint64_t) (aSig << 1)) { | |
2632 | return propagateFloatx80NaNOneArg(a, status); | |
2633 | } | |
2634 | return packFloatx80(aSign, one_exp, one_sig); | |
2635 | } | |
2636 | ||
2637 | if (aExp == 0 && aSig == 0) { | |
2638 | return packFloatx80(aSign, 0, 0); | |
2639 | } | |
2640 | ||
2641 | user_rnd_mode = status->float_rounding_mode; | |
2642 | user_rnd_prec = status->floatx80_rounding_precision; | |
2643 | status->float_rounding_mode = float_round_nearest_even; | |
2644 | status->floatx80_rounding_precision = 80; | |
2645 | ||
2646 | compact = floatx80_make_compact(aExp, aSig); | |
2647 | ||
2648 | if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { | |
2649 | /* TANHBORS */ | |
2650 | if (compact < 0x3FFF8000) { | |
2651 | /* TANHSM */ | |
2652 | status->float_rounding_mode = user_rnd_mode; | |
2653 | status->floatx80_rounding_precision = user_rnd_prec; | |
2654 | ||
2655 | a = floatx80_move(a, status); | |
2656 | ||
2657 | float_raise(float_flag_inexact, status); | |
2658 | ||
2659 | return a; | |
2660 | } else { | |
2661 | if (compact > 0x40048AA1) { | |
2662 | /* TANHHUGE */ | |
2663 | sign = 0x3F800000; | |
2664 | sign |= aSign ? 0x80000000 : 0x00000000; | |
2665 | fp0 = float32_to_floatx80(make_float32(sign), status); | |
2666 | sign &= 0x80000000; | |
2667 | sign ^= 0x80800000; /* -SIGN(X)*EPS */ | |
2668 | ||
2669 | status->float_rounding_mode = user_rnd_mode; | |
2670 | status->floatx80_rounding_precision = user_rnd_prec; | |
2671 | ||
2672 | a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), | |
2673 | status), status); | |
2674 | ||
2675 | float_raise(float_flag_inexact, status); | |
2676 | ||
2677 | return a; | |
2678 | } else { | |
2679 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2680 | fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ | |
2681 | fp0 = floatx80_add(fp0, float32_to_floatx80( | |
2682 | make_float32(0x3F800000), | |
2683 | status), status); /* EXP(Y)+1 */ | |
2684 | sign = aSign ? 0x80000000 : 0x00000000; | |
2685 | fp1 = floatx80_div(float32_to_floatx80(make_float32( | |
2686 | sign ^ 0xC0000000), status), fp0, | |
2687 | status); /* -SIGN(X)*2 / [EXP(Y)+1] */ | |
2688 | fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), | |
2689 | status); /* SIGN */ | |
2690 | ||
2691 | status->float_rounding_mode = user_rnd_mode; | |
2692 | status->floatx80_rounding_precision = user_rnd_prec; | |
2693 | ||
2694 | a = floatx80_add(fp1, fp0, status); | |
2695 | ||
2696 | float_raise(float_flag_inexact, status); | |
2697 | ||
2698 | return a; | |
2699 | } | |
2700 | } | |
2701 | } else { /* 2**(-40) < |X| < (5/2)LOG2 */ | |
2702 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ | |
2703 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2704 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), | |
2705 | status), | |
2706 | status); /* Z+2 */ | |
2707 | ||
2708 | vSign = extractFloatx80Sign(fp1); | |
2709 | vExp = extractFloatx80Exp(fp1); | |
2710 | vSig = extractFloatx80Frac(fp1); | |
2711 | ||
2712 | fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); | |
2713 | ||
2714 | status->float_rounding_mode = user_rnd_mode; | |
2715 | status->floatx80_rounding_precision = user_rnd_prec; | |
2716 | ||
2717 | a = floatx80_div(fp0, fp1, status); | |
2718 | ||
2719 | float_raise(float_flag_inexact, status); | |
2720 | ||
2721 | return a; | |
2722 | } | |
2723 | } | |
eee6b892 LV |
2724 | |
2725 | /*---------------------------------------------------------------------------- | |
2726 | | Hyperbolic sine | |
2727 | *----------------------------------------------------------------------------*/ | |
2728 | ||
2729 | floatx80 floatx80_sinh(floatx80 a, float_status *status) | |
2730 | { | |
2731 | flag aSign; | |
2732 | int32_t aExp; | |
2733 | uint64_t aSig; | |
2734 | ||
2735 | int8_t user_rnd_mode, user_rnd_prec; | |
2736 | ||
2737 | int32_t compact; | |
2738 | floatx80 fp0, fp1, fp2; | |
2739 | float32 fact; | |
2740 | ||
2741 | aSig = extractFloatx80Frac(a); | |
2742 | aExp = extractFloatx80Exp(a); | |
2743 | aSign = extractFloatx80Sign(a); | |
2744 | ||
2745 | if (aExp == 0x7FFF) { | |
2746 | if ((uint64_t) (aSig << 1)) { | |
2747 | return propagateFloatx80NaNOneArg(a, status); | |
2748 | } | |
2749 | return packFloatx80(aSign, floatx80_infinity.high, | |
2750 | floatx80_infinity.low); | |
2751 | } | |
2752 | ||
2753 | if (aExp == 0 && aSig == 0) { | |
2754 | return packFloatx80(aSign, 0, 0); | |
2755 | } | |
2756 | ||
2757 | user_rnd_mode = status->float_rounding_mode; | |
2758 | user_rnd_prec = status->floatx80_rounding_precision; | |
2759 | status->float_rounding_mode = float_round_nearest_even; | |
2760 | status->floatx80_rounding_precision = 80; | |
2761 | ||
2762 | compact = floatx80_make_compact(aExp, aSig); | |
2763 | ||
2764 | if (compact > 0x400CB167) { | |
2765 | /* SINHBIG */ | |
2766 | if (compact > 0x400CB2B3) { | |
2767 | status->float_rounding_mode = user_rnd_mode; | |
2768 | status->floatx80_rounding_precision = user_rnd_prec; | |
2769 | ||
2770 | return roundAndPackFloatx80(status->floatx80_rounding_precision, | |
2771 | aSign, 0x8000, aSig, 0, status); | |
2772 | } else { | |
2773 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2774 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2775 | make_float64(0x40C62D38D3D64634), status), | |
2776 | status); /* (|X|-16381LOG2_LEAD) */ | |
2777 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2778 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2779 | status); /* |X| - 16381 LOG2, ACCURATE */ | |
2780 | fp0 = floatx80_etox(fp0, status); | |
2781 | fp2 = packFloatx80(aSign, 0x7FFB, one_sig); | |
2782 | ||
2783 | status->float_rounding_mode = user_rnd_mode; | |
2784 | status->floatx80_rounding_precision = user_rnd_prec; | |
2785 | ||
2786 | a = floatx80_mul(fp0, fp2, status); | |
2787 | ||
2788 | float_raise(float_flag_inexact, status); | |
2789 | ||
2790 | return a; | |
2791 | } | |
2792 | } else { /* |X| < 16380 LOG2 */ | |
2793 | fp0 = floatx80_abs(a); /* Y = |X| */ | |
2794 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ | |
2795 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), | |
2796 | status), status); /* 1+Z */ | |
2797 | fp2 = fp0; | |
2798 | fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ | |
2799 | fp0 = floatx80_add(fp0, fp2, status); | |
2800 | ||
2801 | fact = packFloat32(aSign, 0x7E, 0); | |
2802 | ||
2803 | status->float_rounding_mode = user_rnd_mode; | |
2804 | status->floatx80_rounding_precision = user_rnd_prec; | |
2805 | ||
2806 | a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); | |
2807 | ||
2808 | float_raise(float_flag_inexact, status); | |
2809 | ||
2810 | return a; | |
2811 | } | |
2812 | } | |
02f9124e LV |
2813 | |
2814 | /*---------------------------------------------------------------------------- | |
2815 | | Hyperbolic cosine | |
2816 | *----------------------------------------------------------------------------*/ | |
2817 | ||
2818 | floatx80 floatx80_cosh(floatx80 a, float_status *status) | |
2819 | { | |
2820 | int32_t aExp; | |
2821 | uint64_t aSig; | |
2822 | ||
2823 | int8_t user_rnd_mode, user_rnd_prec; | |
2824 | ||
2825 | int32_t compact; | |
2826 | floatx80 fp0, fp1; | |
2827 | ||
2828 | aSig = extractFloatx80Frac(a); | |
2829 | aExp = extractFloatx80Exp(a); | |
2830 | ||
2831 | if (aExp == 0x7FFF) { | |
2832 | if ((uint64_t) (aSig << 1)) { | |
2833 | return propagateFloatx80NaNOneArg(a, status); | |
2834 | } | |
2835 | return packFloatx80(0, floatx80_infinity.high, | |
2836 | floatx80_infinity.low); | |
2837 | } | |
2838 | ||
2839 | if (aExp == 0 && aSig == 0) { | |
2840 | return packFloatx80(0, one_exp, one_sig); | |
2841 | } | |
2842 | ||
2843 | user_rnd_mode = status->float_rounding_mode; | |
2844 | user_rnd_prec = status->floatx80_rounding_precision; | |
2845 | status->float_rounding_mode = float_round_nearest_even; | |
2846 | status->floatx80_rounding_precision = 80; | |
2847 | ||
2848 | compact = floatx80_make_compact(aExp, aSig); | |
2849 | ||
2850 | if (compact > 0x400CB167) { | |
2851 | if (compact > 0x400CB2B3) { | |
2852 | status->float_rounding_mode = user_rnd_mode; | |
2853 | status->floatx80_rounding_precision = user_rnd_prec; | |
2854 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, | |
2855 | 0x8000, one_sig, 0, status); | |
2856 | } else { | |
2857 | fp0 = packFloatx80(0, aExp, aSig); | |
2858 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2859 | make_float64(0x40C62D38D3D64634), status), | |
2860 | status); | |
2861 | fp0 = floatx80_sub(fp0, float64_to_floatx80( | |
2862 | make_float64(0x3D6F90AEB1E75CC7), status), | |
2863 | status); | |
2864 | fp0 = floatx80_etox(fp0, status); | |
2865 | fp1 = packFloatx80(0, 0x7FFB, one_sig); | |
2866 | ||
2867 | status->float_rounding_mode = user_rnd_mode; | |
2868 | status->floatx80_rounding_precision = user_rnd_prec; | |
2869 | ||
2870 | a = floatx80_mul(fp0, fp1, status); | |
2871 | ||
2872 | float_raise(float_flag_inexact, status); | |
2873 | ||
2874 | return a; | |
2875 | } | |
2876 | } | |
2877 | ||
2878 | fp0 = packFloatx80(0, aExp, aSig); /* |X| */ | |
2879 | fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ | |
2880 | fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), | |
2881 | status), status); /* (1/2)*EXP(|X|) */ | |
2882 | fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ | |
2883 | fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ | |
2884 | ||
2885 | status->float_rounding_mode = user_rnd_mode; | |
2886 | status->floatx80_rounding_precision = user_rnd_prec; | |
2887 | ||
2888 | a = floatx80_add(fp0, fp1, status); | |
2889 | ||
2890 | float_raise(float_flag_inexact, status); | |
2891 | ||
2892 | return a; | |
2893 | } |