1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "cached-powers.h"
36 namespace double_conversion
{
38 // 2^53 = 9007199254740992.
39 // Any integer with at most 15 decimal digits will hence fit into a double
40 // (which has a 53bit significand) without loss of precision.
41 static const int kMaxExactDoubleIntegerDecimalDigits
= 15;
42 // 2^64 = 18446744073709551616 > 10^19
43 static const int kMaxUint64DecimalDigits
= 19;
45 // Max double: 1.7976931348623157 x 10^308
46 // Min non-zero double: 4.9406564584124654 x 10^-324
47 // Any x >= 10^309 is interpreted as +infinity.
48 // Any x <= 10^-324 is interpreted as 0.
49 // Note that 2.5e-324 (despite being smaller than the min double) will be read
50 // as non-zero (equal to the min non-zero double).
51 static const int kMaxDecimalPower
= 309;
52 static const int kMinDecimalPower
= -324;
54 // 2^64 = 18446744073709551616
55 static const uint64_t kMaxUint64
= UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF
);
58 static const double exact_powers_of_ten
[] = {
69 10000000000.0, // 10^10
77 1000000000000000000.0,
78 10000000000000000000.0,
79 100000000000000000000.0, // 10^20
80 1000000000000000000000.0,
81 // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
82 10000000000000000000000.0
84 static const int kExactPowersOfTenSize
= ARRAY_SIZE(exact_powers_of_ten
);
86 // Maximum number of significant digits in the decimal representation.
87 // In fact the value is 772 (see conversions.cc), but to give us some margin
88 // we round up to 780.
89 static const int kMaxSignificantDecimalDigits
= 780;
91 static Vector
<const char> TrimLeadingZeros(Vector
<const char> buffer
) {
92 for (int i
= 0; i
< buffer
.length(); i
++) {
93 if (buffer
[i
] != '0') {
94 return buffer
.SubVector(i
, buffer
.length());
97 return Vector
<const char>(buffer
.start(), 0);
101 static Vector
<const char> TrimTrailingZeros(Vector
<const char> buffer
) {
102 for (int i
= buffer
.length() - 1; i
>= 0; --i
) {
103 if (buffer
[i
] != '0') {
104 return buffer
.SubVector(0, i
+ 1);
107 return Vector
<const char>(buffer
.start(), 0);
111 static void CutToMaxSignificantDigits(Vector
<const char> buffer
,
113 char* significant_buffer
,
114 int* significant_exponent
) {
115 for (int i
= 0; i
< kMaxSignificantDecimalDigits
- 1; ++i
) {
116 significant_buffer
[i
] = buffer
[i
];
118 // The input buffer has been trimmed. Therefore the last digit must be
119 // different from '0'.
120 ASSERT(buffer
[buffer
.length() - 1] != '0');
121 // Set the last digit to be non-zero. This is sufficient to guarantee
123 significant_buffer
[kMaxSignificantDecimalDigits
- 1] = '1';
124 *significant_exponent
=
125 exponent
+ (buffer
.length() - kMaxSignificantDecimalDigits
);
129 // Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
130 // If possible the input-buffer is reused, but if the buffer needs to be
131 // modified (due to cutting), then the input needs to be copied into the
132 // buffer_copy_space.
133 static void TrimAndCut(Vector
<const char> buffer
, int exponent
,
134 char* buffer_copy_space
, int space_size
,
135 Vector
<const char>* trimmed
, int* updated_exponent
) {
136 Vector
<const char> left_trimmed
= TrimLeadingZeros(buffer
);
137 Vector
<const char> right_trimmed
= TrimTrailingZeros(left_trimmed
);
138 exponent
+= left_trimmed
.length() - right_trimmed
.length();
139 if (right_trimmed
.length() > kMaxSignificantDecimalDigits
) {
140 (void) space_size
; // Mark variable as used.
141 ASSERT(space_size
>= kMaxSignificantDecimalDigits
);
142 CutToMaxSignificantDigits(right_trimmed
, exponent
,
143 buffer_copy_space
, updated_exponent
);
144 *trimmed
= Vector
<const char>(buffer_copy_space
,
145 kMaxSignificantDecimalDigits
);
147 *trimmed
= right_trimmed
;
148 *updated_exponent
= exponent
;
153 // Reads digits from the buffer and converts them to a uint64.
154 // Reads in as many digits as fit into a uint64.
155 // When the string starts with "1844674407370955161" no further digit is read.
156 // Since 2^64 = 18446744073709551616 it would still be possible read another
157 // digit if it was less or equal than 6, but this would complicate the code.
158 static uint64_t ReadUint64(Vector
<const char> buffer
,
159 int* number_of_read_digits
) {
162 while (i
< buffer
.length() && result
<= (kMaxUint64
/ 10 - 1)) {
163 int digit
= buffer
[i
++] - '0';
164 ASSERT(0 <= digit
&& digit
<= 9);
165 result
= 10 * result
+ digit
;
167 *number_of_read_digits
= i
;
172 // Reads a DiyFp from the buffer.
173 // The returned DiyFp is not necessarily normalized.
174 // If remaining_decimals is zero then the returned DiyFp is accurate.
175 // Otherwise it has been rounded and has error of at most 1/2 ulp.
176 static void ReadDiyFp(Vector
<const char> buffer
,
178 int* remaining_decimals
) {
180 uint64_t significand
= ReadUint64(buffer
, &read_digits
);
181 if (buffer
.length() == read_digits
) {
182 *result
= DiyFp(significand
, 0);
183 *remaining_decimals
= 0;
185 // Round the significand.
186 if (buffer
[read_digits
] >= '5') {
189 // Compute the binary exponent.
191 *result
= DiyFp(significand
, exponent
);
192 *remaining_decimals
= buffer
.length() - read_digits
;
197 static bool DoubleStrtod(Vector
<const char> trimmed
,
200 #if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
201 // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
202 // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
203 // result is not accurate.
204 // We know that Windows32 uses 64 bits and is therefore accurate.
205 // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
209 if (trimmed
.length() <= kMaxExactDoubleIntegerDecimalDigits
) {
211 // The trimmed input fits into a double.
212 // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
213 // can compute the result-double simply by multiplying (resp. dividing) the
215 // This is possible because IEEE guarantees that floating-point operations
216 // return the best possible approximation.
217 if (exponent
< 0 && -exponent
< kExactPowersOfTenSize
) {
218 // 10^-exponent fits into a double.
219 *result
= static_cast<double>(ReadUint64(trimmed
, &read_digits
));
220 ASSERT(read_digits
== trimmed
.length());
221 *result
/= exact_powers_of_ten
[-exponent
];
224 if (0 <= exponent
&& exponent
< kExactPowersOfTenSize
) {
225 // 10^exponent fits into a double.
226 *result
= static_cast<double>(ReadUint64(trimmed
, &read_digits
));
227 ASSERT(read_digits
== trimmed
.length());
228 *result
*= exact_powers_of_ten
[exponent
];
231 int remaining_digits
=
232 kMaxExactDoubleIntegerDecimalDigits
- trimmed
.length();
233 if ((0 <= exponent
) &&
234 (exponent
- remaining_digits
< kExactPowersOfTenSize
)) {
235 // The trimmed string was short and we can multiply it with
236 // 10^remaining_digits. As a result the remaining exponent now fits
237 // into a double too.
238 *result
= static_cast<double>(ReadUint64(trimmed
, &read_digits
));
239 ASSERT(read_digits
== trimmed
.length());
240 *result
*= exact_powers_of_ten
[remaining_digits
];
241 *result
*= exact_powers_of_ten
[exponent
- remaining_digits
];
250 // Returns 10^exponent as an exact DiyFp.
251 // The given exponent must be in the range [1; kDecimalExponentDistance[.
252 static DiyFp
AdjustmentPowerOfTen(int exponent
) {
253 ASSERT(0 < exponent
);
254 ASSERT(exponent
< PowersOfTenCache::kDecimalExponentDistance
);
255 // Simply hardcode the remaining powers for the given decimal exponent
257 ASSERT(PowersOfTenCache::kDecimalExponentDistance
== 8);
259 case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
260 case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
261 case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
262 case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
263 case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
264 case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
265 case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
272 // If the function returns true then the result is the correct double.
273 // Otherwise it is either the correct double or the double that is just below
274 // the correct double.
275 static bool DiyFpStrtod(Vector
<const char> buffer
,
279 int remaining_decimals
;
280 ReadDiyFp(buffer
, &input
, &remaining_decimals
);
281 // Since we may have dropped some digits the input is not accurate.
282 // If remaining_decimals is different than 0 than the error is at most
283 // .5 ulp (unit in the last place).
284 // We don't want to deal with fractions and therefore keep a common
286 const int kDenominatorLog
= 3;
287 const int kDenominator
= 1 << kDenominatorLog
;
288 // Move the remaining decimals into the exponent.
289 exponent
+= remaining_decimals
;
290 uint64_t error
= (remaining_decimals
== 0 ? 0 : kDenominator
/ 2);
292 int old_e
= input
.e();
294 error
<<= old_e
- input
.e();
296 ASSERT(exponent
<= PowersOfTenCache::kMaxDecimalExponent
);
297 if (exponent
< PowersOfTenCache::kMinDecimalExponent
) {
302 int cached_decimal_exponent
;
303 PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent
,
305 &cached_decimal_exponent
);
307 if (cached_decimal_exponent
!= exponent
) {
308 int adjustment_exponent
= exponent
- cached_decimal_exponent
;
309 DiyFp adjustment_power
= AdjustmentPowerOfTen(adjustment_exponent
);
310 input
.Multiply(adjustment_power
);
311 if (kMaxUint64DecimalDigits
- buffer
.length() >= adjustment_exponent
) {
312 // The product of input with the adjustment power fits into a 64 bit
314 ASSERT(DiyFp::kSignificandSize
== 64);
316 // The adjustment power is exact. There is hence only an error of 0.5.
317 error
+= kDenominator
/ 2;
321 input
.Multiply(cached_power
);
322 // The error introduced by a multiplication of a*b equals
323 // error_a + error_b + error_a*error_b/2^64 + 0.5
324 // Substituting a with 'input' and b with 'cached_power' we have
325 // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
326 // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
327 int error_b
= kDenominator
/ 2;
328 int error_ab
= (error
== 0 ? 0 : 1); // We round up to 1.
329 int fixed_error
= kDenominator
/ 2;
330 error
+= error_b
+ error_ab
+ fixed_error
;
334 error
<<= old_e
- input
.e();
336 // See if the double's significand changes if we add/subtract the error.
337 int order_of_magnitude
= DiyFp::kSignificandSize
+ input
.e();
338 int effective_significand_size
=
339 Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude
);
340 int precision_digits_count
=
341 DiyFp::kSignificandSize
- effective_significand_size
;
342 if (precision_digits_count
+ kDenominatorLog
>= DiyFp::kSignificandSize
) {
343 // This can only happen for very small denormals. In this case the
344 // half-way multiplied by the denominator exceeds the range of an uint64.
345 // Simply shift everything to the right.
346 int shift_amount
= (precision_digits_count
+ kDenominatorLog
) -
347 DiyFp::kSignificandSize
+ 1;
348 input
.set_f(input
.f() >> shift_amount
);
349 input
.set_e(input
.e() + shift_amount
);
350 // We add 1 for the lost precision of error, and kDenominator for
351 // the lost precision of input.f().
352 error
= (error
>> shift_amount
) + 1 + kDenominator
;
353 precision_digits_count
-= shift_amount
;
355 // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
356 ASSERT(DiyFp::kSignificandSize
== 64);
357 ASSERT(precision_digits_count
< 64);
359 uint64_t precision_bits_mask
= (one64
<< precision_digits_count
) - 1;
360 uint64_t precision_bits
= input
.f() & precision_bits_mask
;
361 uint64_t half_way
= one64
<< (precision_digits_count
- 1);
362 precision_bits
*= kDenominator
;
363 half_way
*= kDenominator
;
364 DiyFp
rounded_input(input
.f() >> precision_digits_count
,
365 input
.e() + precision_digits_count
);
366 if (precision_bits
>= half_way
+ error
) {
367 rounded_input
.set_f(rounded_input
.f() + 1);
369 // If the last_bits are too close to the half-way case than we are too
370 // inaccurate and round down. In this case we return false so that we can
371 // fall back to a more precise algorithm.
373 *result
= Double(rounded_input
).value();
374 if (half_way
- error
< precision_bits
&& precision_bits
< half_way
+ error
) {
375 // Too imprecise. The caller will have to fall back to a slower version.
376 // However the returned number is guaranteed to be either the correct
377 // double, or the next-lower double.
386 // - -1 if buffer*10^exponent < diy_fp.
387 // - 0 if buffer*10^exponent == diy_fp.
388 // - +1 if buffer*10^exponent > diy_fp.
390 // buffer.length() + exponent <= kMaxDecimalPower + 1
391 // buffer.length() + exponent > kMinDecimalPower
392 // buffer.length() <= kMaxDecimalSignificantDigits
393 static int CompareBufferWithDiyFp(Vector
<const char> buffer
,
396 ASSERT(buffer
.length() + exponent
<= kMaxDecimalPower
+ 1);
397 ASSERT(buffer
.length() + exponent
> kMinDecimalPower
);
398 ASSERT(buffer
.length() <= kMaxSignificantDecimalDigits
);
399 // Make sure that the Bignum will be able to hold all our numbers.
400 // Our Bignum implementation has a separate field for exponents. Shifts will
401 // consume at most one bigit (< 64 bits).
402 // ln(10) == 3.3219...
403 ASSERT(((kMaxDecimalPower
+ 1) * 333 / 100) < Bignum::kMaxSignificantBits
);
404 Bignum buffer_bignum
;
405 Bignum diy_fp_bignum
;
406 buffer_bignum
.AssignDecimalString(buffer
);
407 diy_fp_bignum
.AssignUInt64(diy_fp
.f());
409 buffer_bignum
.MultiplyByPowerOfTen(exponent
);
411 diy_fp_bignum
.MultiplyByPowerOfTen(-exponent
);
413 if (diy_fp
.e() > 0) {
414 diy_fp_bignum
.ShiftLeft(diy_fp
.e());
416 buffer_bignum
.ShiftLeft(-diy_fp
.e());
418 return Bignum::Compare(buffer_bignum
, diy_fp_bignum
);
422 // Returns true if the guess is the correct double.
423 // Returns false, when guess is either correct or the next-lower double.
424 static bool ComputeGuess(Vector
<const char> trimmed
, int exponent
,
426 if (trimmed
.length() == 0) {
430 if (exponent
+ trimmed
.length() - 1 >= kMaxDecimalPower
) {
431 *guess
= Double::Infinity();
434 if (exponent
+ trimmed
.length() <= kMinDecimalPower
) {
439 if (DoubleStrtod(trimmed
, exponent
, guess
) ||
440 DiyFpStrtod(trimmed
, exponent
, guess
)) {
443 if (*guess
== Double::Infinity()) {
449 double Strtod(Vector
<const char> buffer
, int exponent
) {
450 char copy_buffer
[kMaxSignificantDecimalDigits
];
451 Vector
<const char> trimmed
;
452 int updated_exponent
;
453 TrimAndCut(buffer
, exponent
, copy_buffer
, kMaxSignificantDecimalDigits
,
454 &trimmed
, &updated_exponent
);
455 exponent
= updated_exponent
;
458 bool is_correct
= ComputeGuess(trimmed
, exponent
, &guess
);
459 if (is_correct
) return guess
;
461 DiyFp upper_boundary
= Double(guess
).UpperBoundary();
462 int comparison
= CompareBufferWithDiyFp(trimmed
, exponent
, upper_boundary
);
463 if (comparison
< 0) {
465 } else if (comparison
> 0) {
466 return Double(guess
).NextDouble();
467 } else if ((Double(guess
).Significand() & 1) == 0) {
468 // Round towards even.
471 return Double(guess
).NextDouble();
475 static float SanitizedDoubletof(double d
) {
477 // ASAN has a sanitize check that disallows casting doubles to floats if
479 // https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#available-checks
480 // The behavior should be covered by IEEE 754, but some projects use this
481 // flag, so work around it.
482 float max_finite
= 3.4028234663852885981170418348451692544e+38;
483 // The half-way point between the max-finite and infinity value.
484 // Since infinity has an even significand everything equal or greater than
485 // this value should become infinity.
486 double half_max_finite_infinity
=
487 3.40282356779733661637539395458142568448e+38;
488 if (d
>= max_finite
) {
489 if (d
>= half_max_finite_infinity
) {
490 return Single::Infinity();
495 return static_cast<float>(d
);
499 float Strtof(Vector
<const char> buffer
, int exponent
) {
500 char copy_buffer
[kMaxSignificantDecimalDigits
];
501 Vector
<const char> trimmed
;
502 int updated_exponent
;
503 TrimAndCut(buffer
, exponent
, copy_buffer
, kMaxSignificantDecimalDigits
,
504 &trimmed
, &updated_exponent
);
505 exponent
= updated_exponent
;
508 bool is_correct
= ComputeGuess(trimmed
, exponent
, &double_guess
);
510 float float_guess
= SanitizedDoubletof(double_guess
);
511 if (float_guess
== double_guess
) {
512 // This shortcut triggers for integer values.
516 // We must catch double-rounding. Say the double has been rounded up, and is
517 // now a boundary of a float, and rounds up again. This is why we have to
518 // look at previous too.
519 // Example (in decimal numbers):
521 // high-precision (4 digits): 1235
522 // low-precision (3 digits):
523 // when read from input: 123
524 // when rounded from high precision: 124.
525 // To do this we simply look at the neigbors of the correct result and see
526 // if they would round to the same float. If the guess is not correct we have
527 // to look at four values (since two different doubles could be the correct
530 double double_next
= Double(double_guess
).NextDouble();
531 double double_previous
= Double(double_guess
).PreviousDouble();
533 float f1
= SanitizedDoubletof(double_previous
);
534 float f2
= float_guess
;
535 float f3
= SanitizedDoubletof(double_next
);
540 double double_next2
= Double(double_next
).NextDouble();
541 f4
= SanitizedDoubletof(double_next2
);
543 (void) f2
; // Mark variable as used.
544 ASSERT(f1
<= f2
&& f2
<= f3
&& f3
<= f4
);
546 // If the guess doesn't lie near a single-precision boundary we can simply
547 // return its float-value.
552 ASSERT((f1
!= f2
&& f2
== f3
&& f3
== f4
) ||
553 (f1
== f2
&& f2
!= f3
&& f3
== f4
) ||
554 (f1
== f2
&& f2
== f3
&& f3
!= f4
));
556 // guess and next are the two possible candidates (in the same way that
557 // double_guess was the lower candidate for a double-precision guess).
560 DiyFp upper_boundary
;
562 float min_float
= 1e-45f
;
563 upper_boundary
= Double(static_cast<double>(min_float
) / 2).AsDiyFp();
565 upper_boundary
= Single(guess
).UpperBoundary();
567 int comparison
= CompareBufferWithDiyFp(trimmed
, exponent
, upper_boundary
);
568 if (comparison
< 0) {
570 } else if (comparison
> 0) {
572 } else if ((Single(guess
).Significand() & 1) == 0) {
573 // Round towards even.
580 } // namespace double_conversion