]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is dual licensed under the MIT and the University of Illinois Open | |
6 | // Source Licenses. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements single-precision soft-float division | |
11 | // with the IEEE-754 default rounding (to nearest, ties to even). | |
12 | // | |
13 | // For simplicity, this implementation currently flushes denormals to zero. | |
14 | // It should be a fairly straightforward exercise to implement gradual | |
15 | // underflow with correct rounding. | |
16 | // | |
17 | //===----------------------------------------------------------------------===// | |
18 | ||
19 | #define SINGLE_PRECISION | |
20 | #include "fp_lib.h" | |
21 | ||
1a4d82fc JJ |
22 | COMPILER_RT_ABI fp_t |
23 | __divsf3(fp_t a, fp_t b) { | |
24 | ||
25 | const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; | |
26 | const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; | |
27 | const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; | |
28 | ||
29 | rep_t aSignificand = toRep(a) & significandMask; | |
30 | rep_t bSignificand = toRep(b) & significandMask; | |
31 | int scale = 0; | |
32 | ||
33 | // Detect if a or b is zero, denormal, infinity, or NaN. | |
34 | if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { | |
35 | ||
36 | const rep_t aAbs = toRep(a) & absMask; | |
37 | const rep_t bAbs = toRep(b) & absMask; | |
38 | ||
39 | // NaN / anything = qNaN | |
40 | if (aAbs > infRep) return fromRep(toRep(a) | quietBit); | |
41 | // anything / NaN = qNaN | |
42 | if (bAbs > infRep) return fromRep(toRep(b) | quietBit); | |
43 | ||
44 | if (aAbs == infRep) { | |
45 | // infinity / infinity = NaN | |
46 | if (bAbs == infRep) return fromRep(qnanRep); | |
47 | // infinity / anything else = +/- infinity | |
48 | else return fromRep(aAbs | quotientSign); | |
49 | } | |
50 | ||
51 | // anything else / infinity = +/- 0 | |
52 | if (bAbs == infRep) return fromRep(quotientSign); | |
53 | ||
54 | if (!aAbs) { | |
55 | // zero / zero = NaN | |
56 | if (!bAbs) return fromRep(qnanRep); | |
57 | // zero / anything else = +/- zero | |
58 | else return fromRep(quotientSign); | |
59 | } | |
60 | // anything else / zero = +/- infinity | |
61 | if (!bAbs) return fromRep(infRep | quotientSign); | |
62 | ||
63 | // one or both of a or b is denormal, the other (if applicable) is a | |
64 | // normal number. Renormalize one or both of a and b, and set scale to | |
65 | // include the necessary exponent adjustment. | |
66 | if (aAbs < implicitBit) scale += normalize(&aSignificand); | |
67 | if (bAbs < implicitBit) scale -= normalize(&bSignificand); | |
68 | } | |
69 | ||
70 | // Or in the implicit significand bit. (If we fell through from the | |
71 | // denormal path it was already set by normalize( ), but setting it twice | |
72 | // won't hurt anything.) | |
73 | aSignificand |= implicitBit; | |
74 | bSignificand |= implicitBit; | |
75 | int quotientExponent = aExponent - bExponent + scale; | |
76 | ||
77 | // Align the significand of b as a Q31 fixed-point number in the range | |
78 | // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax | |
79 | // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This | |
80 | // is accurate to about 3.5 binary digits. | |
81 | uint32_t q31b = bSignificand << 8; | |
82 | uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; | |
83 | ||
84 | // Now refine the reciprocal estimate using a Newton-Raphson iteration: | |
85 | // | |
86 | // x1 = x0 * (2 - x0 * b) | |
87 | // | |
88 | // This doubles the number of correct binary digits in the approximation | |
89 | // with each iteration, so after three iterations, we have about 28 binary | |
90 | // digits of accuracy. | |
91 | uint32_t correction; | |
92 | correction = -((uint64_t)reciprocal * q31b >> 32); | |
93 | reciprocal = (uint64_t)reciprocal * correction >> 31; | |
94 | correction = -((uint64_t)reciprocal * q31b >> 32); | |
95 | reciprocal = (uint64_t)reciprocal * correction >> 31; | |
96 | correction = -((uint64_t)reciprocal * q31b >> 32); | |
97 | reciprocal = (uint64_t)reciprocal * correction >> 31; | |
98 | ||
99 | // Exhaustive testing shows that the error in reciprocal after three steps | |
100 | // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our | |
101 | // expectations. We bump the reciprocal by a tiny value to force the error | |
102 | // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to | |
103 | // be specific). This also causes 1/1 to give a sensible approximation | |
104 | // instead of zero (due to overflow). | |
105 | reciprocal -= 2; | |
106 | ||
107 | // The numerical reciprocal is accurate to within 2^-28, lies in the | |
108 | // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller | |
109 | // than the true reciprocal of b. Multiplying a by this reciprocal thus | |
110 | // gives a numerical q = a/b in Q24 with the following properties: | |
111 | // | |
112 | // 1. q < a/b | |
113 | // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) | |
114 | // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes | |
115 | // from the fact that we truncate the product, and the 2^27 term | |
116 | // is the error in the reciprocal of b scaled by the maximum | |
117 | // possible value of a. As a consequence of this error bound, | |
118 | // either q or nextafter(q) is the correctly rounded | |
119 | rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; | |
120 | ||
121 | // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). | |
122 | // In either case, we are going to compute a residual of the form | |
123 | // | |
124 | // r = a - q*b | |
125 | // | |
126 | // We know from the construction of q that r satisfies: | |
127 | // | |
128 | // 0 <= r < ulp(q)*b | |
129 | // | |
130 | // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we | |
131 | // already have the correct result. The exact halfway case cannot occur. | |
132 | // We also take this time to right shift quotient if it falls in the [1,2) | |
133 | // range and adjust the exponent accordingly. | |
134 | rep_t residual; | |
135 | if (quotient < (implicitBit << 1)) { | |
136 | residual = (aSignificand << 24) - quotient * bSignificand; | |
137 | quotientExponent--; | |
138 | } else { | |
139 | quotient >>= 1; | |
140 | residual = (aSignificand << 23) - quotient * bSignificand; | |
141 | } | |
142 | ||
143 | const int writtenExponent = quotientExponent + exponentBias; | |
144 | ||
145 | if (writtenExponent >= maxExponent) { | |
146 | // If we have overflowed the exponent, return infinity. | |
147 | return fromRep(infRep | quotientSign); | |
148 | } | |
149 | ||
150 | else if (writtenExponent < 1) { | |
151 | // Flush denormals to zero. In the future, it would be nice to add | |
152 | // code to round them correctly. | |
153 | return fromRep(quotientSign); | |
154 | } | |
155 | ||
156 | else { | |
157 | const bool round = (residual << 1) > bSignificand; | |
158 | // Clear the implicit bit | |
159 | rep_t absResult = quotient & significandMask; | |
160 | // Insert the exponent | |
161 | absResult |= (rep_t)writtenExponent << significandBits; | |
162 | // Round | |
163 | absResult += round; | |
164 | // Insert the sign and return | |
165 | return fromRep(absResult | quotientSign); | |
166 | } | |
167 | } | |
2c00a5a8 XL |
168 | |
169 | #if defined(__ARM_EABI__) | |
170 | #if defined(COMPILER_RT_ARMHF_TARGET) | |
171 | AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { | |
172 | return __divsf3(a, b); | |
173 | } | |
174 | #else | |
175 | AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3); | |
176 | #endif | |
177 | #endif |