]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | #ifndef FASTFLOAT_DECIMAL_TO_BINARY_H |
2 | #define FASTFLOAT_DECIMAL_TO_BINARY_H | |
3 | ||
4 | #include "float_common.h" | |
5 | #include "fast_table.h" | |
6 | #include <cfloat> | |
7 | #include <cinttypes> | |
8 | #include <cmath> | |
9 | #include <cstdint> | |
10 | #include <cstdio> | |
11 | #include <cstdlib> | |
12 | #include <cstring> | |
13 | ||
14 | namespace arrow_vendored { | |
15 | namespace fast_float { | |
16 | ||
17 | // This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating | |
18 | // the result, with the "high" part corresponding to the most significant bits and the | |
19 | // low part corresponding to the least significant bits. | |
20 | // | |
21 | template <int bit_precision> | |
22 | fastfloat_really_inline | |
23 | value128 compute_product_approximation(int64_t q, uint64_t w) { | |
24 | const int index = 2 * int(q - smallest_power_of_five); | |
25 | // For small values of q, e.g., q in [0,27], the answer is always exact because | |
26 | // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); | |
27 | // gives the exact answer. | |
28 | value128 firstproduct = full_multiplication(w, power_of_five_128[index]); | |
29 | static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); | |
30 | constexpr uint64_t precision_mask = (bit_precision < 64) ? | |
31 | (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) | |
32 | : uint64_t(0xFFFFFFFFFFFFFFFF); | |
33 | if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) | |
34 | // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. | |
35 | value128 secondproduct = full_multiplication(w, power_of_five_128[index + 1]); | |
36 | firstproduct.low += secondproduct.high; | |
37 | if(secondproduct.high > firstproduct.low) { | |
38 | firstproduct.high++; | |
39 | } | |
40 | } | |
41 | return firstproduct; | |
42 | } | |
43 | ||
44 | namespace { | |
45 | /** | |
46 | * For q in (0,350), we have that | |
47 | * f = (((152170 + 65536) * q ) >> 16); | |
48 | * is equal to | |
49 | * floor(p) + q | |
50 | * where | |
51 | * p = log(5**q)/log(2) = q * log(5)/log(2) | |
52 | * | |
53 | * For negative values of q in (-400,0), we have that | |
54 | * f = (((152170 + 65536) * q ) >> 16); | |
55 | * is equal to | |
56 | * -ceil(p) + q | |
57 | * where | |
58 | * p = log(5**-q)/log(2) = -q * log(5)/log(2) | |
59 | */ | |
60 | fastfloat_really_inline int power(int q) noexcept { | |
61 | return (((152170 + 65536) * q) >> 16) + 63; | |
62 | } | |
63 | } // namespace | |
64 | ||
65 | ||
66 | // w * 10 ** q | |
67 | // The returned value should be a valid ieee64 number that simply need to be packed. | |
68 | // However, in some very rare cases, the computation will fail. In such cases, we | |
69 | // return an adjusted_mantissa with a negative power of 2: the caller should recompute | |
70 | // in such cases. | |
71 | template <typename binary> | |
72 | fastfloat_really_inline | |
73 | adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { | |
74 | adjusted_mantissa answer; | |
75 | if ((w == 0) || (q < binary::smallest_power_of_ten())) { | |
76 | answer.power2 = 0; | |
77 | answer.mantissa = 0; | |
78 | // result should be zero | |
79 | return answer; | |
80 | } | |
81 | if (q > binary::largest_power_of_ten()) { | |
82 | // we want to get infinity: | |
83 | answer.power2 = binary::infinite_power(); | |
84 | answer.mantissa = 0; | |
85 | return answer; | |
86 | } | |
87 | // At this point in time q is in [smallest_power_of_five, largest_power_of_five]. | |
88 | ||
89 | // We want the most significant bit of i to be 1. Shift if needed. | |
90 | int lz = leading_zeroes(w); | |
91 | w <<= lz; | |
92 | ||
93 | // The required precision is binary::mantissa_explicit_bits() + 3 because | |
94 | // 1. We need the implicit bit | |
95 | // 2. We need an extra bit for rounding purposes | |
96 | // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift) | |
97 | ||
98 | value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w); | |
99 | if(product.low == 0xFFFFFFFFFFFFFFFF) { // could guard it further | |
100 | // In some very rare cases, this could happen, in which case we might need a more accurate | |
101 | // computation that what we can provide cheaply. This is very, very unlikely. | |
102 | // | |
103 | const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, | |
104 | // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation. | |
105 | if(!inside_safe_exponent) { | |
106 | answer.power2 = -1; // This (a negative value) indicates an error condition. | |
107 | return answer; | |
108 | } | |
109 | } | |
110 | // The "compute_product_approximation" function can be slightly slower than a branchless approach: | |
111 | // value128 product = compute_product(q, w); | |
112 | // but in practice, we can win big with the compute_product_approximation if its additional branch | |
113 | // is easily predicted. Which is best is data specific. | |
114 | int upperbit = int(product.high >> 63); | |
115 | ||
116 | answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); | |
117 | ||
118 | answer.power2 = int(power(int(q)) + upperbit - lz - binary::minimum_exponent()); | |
119 | if (answer.power2 <= 0) { // we have a subnormal? | |
120 | // Here have that answer.power2 <= 0 so -answer.power2 >= 0 | |
121 | if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. | |
122 | answer.power2 = 0; | |
123 | answer.mantissa = 0; | |
124 | // result should be zero | |
125 | return answer; | |
126 | } | |
127 | // next line is safe because -answer.power2 + 1 < 64 | |
128 | answer.mantissa >>= -answer.power2 + 1; | |
129 | // Thankfully, we can't have both "round-to-even" and subnormals because | |
130 | // "round-to-even" only occurs for powers close to 0. | |
131 | answer.mantissa += (answer.mantissa & 1); // round up | |
132 | answer.mantissa >>= 1; | |
133 | // There is a weird scenario where we don't have a subnormal but just. | |
134 | // Suppose we start with 2.2250738585072013e-308, we end up | |
135 | // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal | |
136 | // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round | |
137 | // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer | |
138 | // subnormal, but we can only know this after rounding. | |
139 | // So we only declare a subnormal if we are smaller than the threshold. | |
140 | answer.power2 = (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) ? 0 : 1; | |
141 | return answer; | |
142 | } | |
143 | ||
144 | // usually, we round *up*, but if we fall right in between and and we have an | |
145 | // even basis, we need to round down | |
146 | // We are only concerned with the cases where 5**q fits in single 64-bit word. | |
147 | if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && | |
148 | ((answer.mantissa & 3) == 1) ) { // we may fall between two floats! | |
149 | // To be in-between two floats we need that in doing | |
150 | // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); | |
151 | // ... we dropped out only zeroes. But if this happened, then we can go back!!! | |
152 | if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) { | |
153 | answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up | |
154 | } | |
155 | } | |
156 | ||
157 | answer.mantissa += (answer.mantissa & 1); // round up | |
158 | answer.mantissa >>= 1; | |
159 | if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { | |
160 | answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); | |
161 | answer.power2++; // undo previous addition | |
162 | } | |
163 | ||
164 | answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); | |
165 | if (answer.power2 >= binary::infinite_power()) { // infinity | |
166 | answer.power2 = binary::infinite_power(); | |
167 | answer.mantissa = 0; | |
168 | } | |
169 | return answer; | |
170 | } | |
171 | ||
172 | ||
173 | } // namespace fast_float | |
174 | } // namespace arrow_vendored | |
175 | ||
176 | #endif |