library/core/src/num/flt2dec/strategy/grisu.rs

   1 //! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly
   2 //! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's
   3 //! very quick for most inputs.
   4 //!
   5 //! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and
   6 //!   accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
   7
   8 use crate::num::diy_float::Fp;
   9 use crate::num::flt2dec::{round_up, Decoded, MAX_SIG_DIGITS};
  10
  11 // see the comments in `format_shortest_opt` for the rationale.
  12 #[doc(hidden)]
  13 pub const ALPHA: i16 = -60;
  14 #[doc(hidden)]
  15 pub const GAMMA: i16 = -32;
  16
  17 /*
  18 # the following Python code generates this table:
  19 for i in xrange(-308, 333, 8):
  20     if i >= 0: f = 10**i; e = 0
  21     else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
  22     l = f.bit_length()
  23     f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
  24     print '    (%#018x, %5d, %4d),' % (f, e, i)
  25 */
  26
  27 #[doc(hidden)]
  28 pub static CACHED_POW10: [(u64, i16, i16); 81] = [
  29     // (f, e, k)
  30     (0xe61acf033d1a45df, -1087, -308),
  31     (0xab70fe17c79ac6ca, -1060, -300),
  32     (0xff77b1fcbebcdc4f, -1034, -292),
  33     (0xbe5691ef416bd60c, -1007, -284),
  34     (0x8dd01fad907ffc3c, -980, -276),
  35     (0xd3515c2831559a83, -954, -268),
  36     (0x9d71ac8fada6c9b5, -927, -260),
  37     (0xea9c227723ee8bcb, -901, -252),
  38     (0xaecc49914078536d, -874, -244),
  39     (0x823c12795db6ce57, -847, -236),
  40     (0xc21094364dfb5637, -821, -228),
  41     (0x9096ea6f3848984f, -794, -220),
  42     (0xd77485cb25823ac7, -768, -212),
  43     (0xa086cfcd97bf97f4, -741, -204),
  44     (0xef340a98172aace5, -715, -196),
  45     (0xb23867fb2a35b28e, -688, -188),
  46     (0x84c8d4dfd2c63f3b, -661, -180),
  47     (0xc5dd44271ad3cdba, -635, -172),
  48     (0x936b9fcebb25c996, -608, -164),
  49     (0xdbac6c247d62a584, -582, -156),
  50     (0xa3ab66580d5fdaf6, -555, -148),
  51     (0xf3e2f893dec3f126, -529, -140),
  52     (0xb5b5ada8aaff80b8, -502, -132),
  53     (0x87625f056c7c4a8b, -475, -124),
  54     (0xc9bcff6034c13053, -449, -116),
  55     (0x964e858c91ba2655, -422, -108),
  56     (0xdff9772470297ebd, -396, -100),
  57     (0xa6dfbd9fb8e5b88f, -369, -92),
  58     (0xf8a95fcf88747d94, -343, -84),
  59     (0xb94470938fa89bcf, -316, -76),
  60     (0x8a08f0f8bf0f156b, -289, -68),
  61     (0xcdb02555653131b6, -263, -60),
  62     (0x993fe2c6d07b7fac, -236, -52),
  63     (0xe45c10c42a2b3b06, -210, -44),
  64     (0xaa242499697392d3, -183, -36),
  65     (0xfd87b5f28300ca0e, -157, -28),
  66     (0xbce5086492111aeb, -130, -20),
  67     (0x8cbccc096f5088cc, -103, -12),
  68     (0xd1b71758e219652c, -77, -4),
  69     (0x9c40000000000000, -50, 4),
  70     (0xe8d4a51000000000, -24, 12),
  71     (0xad78ebc5ac620000, 3, 20),
  72     (0x813f3978f8940984, 30, 28),
  73     (0xc097ce7bc90715b3, 56, 36),
  74     (0x8f7e32ce7bea5c70, 83, 44),
  75     (0xd5d238a4abe98068, 109, 52),
  76     (0x9f4f2726179a2245, 136, 60),
  77     (0xed63a231d4c4fb27, 162, 68),
  78     (0xb0de65388cc8ada8, 189, 76),
  79     (0x83c7088e1aab65db, 216, 84),
  80     (0xc45d1df942711d9a, 242, 92),
  81     (0x924d692ca61be758, 269, 100),
  82     (0xda01ee641a708dea, 295, 108),
  83     (0xa26da3999aef774a, 322, 116),
  84     (0xf209787bb47d6b85, 348, 124),
  85     (0xb454e4a179dd1877, 375, 132),
  86     (0x865b86925b9bc5c2, 402, 140),
  87     (0xc83553c5c8965d3d, 428, 148),
  88     (0x952ab45cfa97a0b3, 455, 156),
  89     (0xde469fbd99a05fe3, 481, 164),
  90     (0xa59bc234db398c25, 508, 172),
  91     (0xf6c69a72a3989f5c, 534, 180),
  92     (0xb7dcbf5354e9bece, 561, 188),
  93     (0x88fcf317f22241e2, 588, 196),
  94     (0xcc20ce9bd35c78a5, 614, 204),
  95     (0x98165af37b2153df, 641, 212),
  96     (0xe2a0b5dc971f303a, 667, 220),
  97     (0xa8d9d1535ce3b396, 694, 228),
  98     (0xfb9b7cd9a4a7443c, 720, 236),
  99     (0xbb764c4ca7a44410, 747, 244),
 100     (0x8bab8eefb6409c1a, 774, 252),
 101     (0xd01fef10a657842c, 800, 260),
 102     (0x9b10a4e5e9913129, 827, 268),
 103     (0xe7109bfba19c0c9d, 853, 276),
 104     (0xac2820d9623bf429, 880, 284),
 105     (0x80444b5e7aa7cf85, 907, 292),
 106     (0xbf21e44003acdd2d, 933, 300),
 107     (0x8e679c2f5e44ff8f, 960, 308),
 108     (0xd433179d9c8cb841, 986, 316),
 109     (0x9e19db92b4e31ba9, 1013, 324),
 110     (0xeb96bf6ebadf77d9, 1039, 332),
 111 ];
 112
 113 #[doc(hidden)]
 114 pub const CACHED_POW10_FIRST_E: i16 = -1087;
 115 #[doc(hidden)]
 116 pub const CACHED_POW10_LAST_E: i16 = 1039;
 117
 118 #[doc(hidden)]
 119 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
 120     let offset = CACHED_POW10_FIRST_E as i32;
 121     let range = (CACHED_POW10.len() as i32) - 1;
 122     let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
 123     let idx = ((gamma as i32) - offset) * range / domain;
 124     let (f, e, k) = CACHED_POW10[idx as usize];
 125     debug_assert!(alpha <= e && e <= gamma);
 126     (k, Fp { f, e })
 127 }
 128
 129 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
 130 #[doc(hidden)]
 131 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
 132     debug_assert!(x > 0);
 133
 134     const X9: u32 = 10_0000_0000;
 135     const X8: u32 = 1_0000_0000;
 136     const X7: u32 = 1000_0000;
 137     const X6: u32 = 100_0000;
 138     const X5: u32 = 10_0000;
 139     const X4: u32 = 1_0000;
 140     const X3: u32 = 1000;
 141     const X2: u32 = 100;
 142     const X1: u32 = 10;
 143
 144     if x < X4 {
 145         if x < X2 {
 146             if x < X1 { (0, 1) } else { (1, X1) }
 147         } else {
 148             if x < X3 { (2, X2) } else { (3, X3) }
 149         }
 150     } else {
 151         if x < X6 {
 152             if x < X5 { (4, X4) } else { (5, X5) }
 153         } else if x < X8 {
 154             if x < X7 { (6, X6) } else { (7, X7) }
 155         } else {
 156             if x < X9 { (8, X8) } else { (9, X9) }
 157         }
 158     }
 159 }
 160
 161 /// The shortest mode implementation for Grisu.
 162 ///
 163 /// It returns `None` when it would return an inexact representation otherwise.
 164 pub fn format_shortest_opt(
 165     d: &Decoded,
 166     buf: &mut [u8],
 167 ) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 168     assert!(d.mant > 0);
 169     assert!(d.minus > 0);
 170     assert!(d.plus > 0);
 171     assert!(d.mant.checked_add(d.plus).is_some());
 172     assert!(d.mant.checked_sub(d.minus).is_some());
 173     assert!(buf.len() >= MAX_SIG_DIGITS);
 174     assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
 175
 176     // start with the normalized values with the shared exponent
 177     let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
 178     let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
 179     let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
 180
 181     // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
 182     // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
 183     // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
 184     //
 185     // it is obviously desirable to maximize `GAMMA - ALPHA`,
 186     // so that we don't need many cached powers of 10, but there are some considerations:
 187     //
 188     // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
 189     //    (this is not really avoidable, remainder is required for accuracy estimation.)
 190     // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
 191     //    and it should not overflow.
 192     //
 193     // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
 194     // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
 195     let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
 196
 197     // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
 198     let plus = plus.mul(&cached);
 199     let minus = minus.mul(&cached);
 200     let v = v.mul(&cached);
 201     debug_assert_eq!(plus.e, minus.e);
 202     debug_assert_eq!(plus.e, v.e);
 203
 204     //         +- actual range of minus
 205     //   | <---|---------------------- unsafe region --------------------------> |
 206     //   |     |                                                                 |
 207     //   |  |<--->|  | <--------------- safe region ---------------> |           |
 208     //   |  |     |  |                                               |           |
 209     //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
 210     //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
 211     //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
 212     //   |   minus   |                 |     v     |                 |   plus    |
 213     // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
 214     //
 215     // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
 216     // as we don't know the error is positive or negative, we use two approximations spaced equally
 217     // and have the maximal error of 2 ulps.
 218     //
 219     // the "unsafe region" is a liberal interval which we initially generate.
 220     // the "safe region" is a conservative interval which we only accept.
 221     // we start with the correct repr within the unsafe region, and try to find the closest repr
 222     // to `v` which is also within the safe region. if we can't, we give up.
 223     let plus1 = plus.f + 1;
 224     //  let plus0 = plus.f - 1; // only for explanation
 225     //  let minus0 = minus.f + 1; // only for explanation
 226     let minus1 = minus.f - 1;
 227     let e = -plus.e as usize; // shared exponent
 228
 229     // divide `plus1` into integral and fractional parts.
 230     // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
 231     // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
 232     let plus1int = (plus1 >> e) as u32;
 233     let plus1frac = plus1 & ((1 << e) - 1);
 234
 235     // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
 236     // this is an upper bound of `kappa` below.
 237     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
 238
 239     let mut i = 0;
 240     let exp = max_kappa as i16 - minusk + 1;
 241
 242     // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
 243     //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
 244     //              representations (with the minimal number of significant digits) in that range.
 245     //
 246     // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
 247     // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
 248     // (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
 249     // the algorithm relies on the later verification phase to exclude `y`.
 250     let delta1 = plus1 - minus1;
 251     //  let delta1int = (delta1 >> e) as usize; // only for explanation
 252     let delta1frac = delta1 & ((1 << e) - 1);
 253
 254     // render integral parts, while checking for the accuracy at each step.
 255     let mut kappa = max_kappa as i16;
 256     let mut ten_kappa = max_ten_kappa; // 10^kappa
 257     let mut remainder = plus1int; // digits yet to be rendered
 258     loop {
 259         // we always have at least one digit to render, as `plus1 >= 10^kappa`
 260         // invariants:
 261         // - `delta1int <= remainder < 10^(kappa+1)`
 262         // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
 263         //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
 264
 265         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 266         let q = remainder / ten_kappa;
 267         let r = remainder % ten_kappa;
 268         debug_assert!(q < 10);
 269         buf[i] = b'0' + q as u8;
 270         i += 1;
 271
 272         let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
 273         if plus1rem < delta1 {
 274             // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
 275             let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
 276             return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
 277         }
 278
 279         // break the loop when we have rendered all integral digits.
 280         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 281         if i > max_kappa as usize {
 282             debug_assert_eq!(ten_kappa, 1);
 283             debug_assert_eq!(kappa, 0);
 284             break;
 285         }
 286
 287         // restore invariants
 288         kappa -= 1;
 289         ten_kappa /= 10;
 290         remainder = r;
 291     }
 292
 293     // render fractional parts, while checking for the accuracy at each step.
 294     // this time we rely on repeated multiplications, as division will lose the precision.
 295     let mut remainder = plus1frac;
 296     let mut threshold = delta1frac;
 297     let mut ulp = 1;
 298     loop {
 299         // the next digit should be significant as we've tested that before breaking out
 300         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 301         // - `remainder < 2^e`
 302         // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 303
 304         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 305         threshold *= 10;
 306         ulp *= 10;
 307
 308         // divide `remainder` by `10^kappa`.
 309         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 310         let q = remainder >> e;
 311         let r = remainder & ((1 << e) - 1);
 312         debug_assert!(q < 10);
 313         buf[i] = b'0' + q as u8;
 314         i += 1;
 315
 316         if r < threshold {
 317             let ten_kappa = 1 << e; // implicit divisor
 318             return round_and_weed(
 319                 &mut buf[..i],
 320                 exp,
 321                 r,
 322                 threshold,
 323                 (plus1 - v.f) * ulp,
 324                 ten_kappa,
 325                 ulp,
 326             );
 327         }
 328
 329         // restore invariants
 330         kappa -= 1;
 331         remainder = r;
 332     }
 333
 334     // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
 335     // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
 336     // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
 337     // we have to successively decrease the last digit and check if this is the optimal repr.
 338     // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
 339     //
 340     // the function checks if this "optimal" repr is actually within the ulp ranges,
 341     // and also, it is possible that the "second-to-optimal" repr can actually be optimal
 342     // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
 343     //
 344     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 345     // - `remainder = (plus1 % 10^kappa) * k`
 346     // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
 347     // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
 348     // - `ten_kappa = 10^kappa * k`
 349     // - `ulp = 2^-e * k`
 350     fn round_and_weed(
 351         buf: &mut [u8],
 352         exp: i16,
 353         remainder: u64,
 354         threshold: u64,
 355         plus1v: u64,
 356         ten_kappa: u64,
 357         ulp: u64,
 358     ) -> Option<(usize, i16)> {
 359         assert!(!buf.is_empty());
 360
 361         // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
 362         // the resulting representation should be the closest representation to both.
 363         //
 364         // here `plus1 - v` is used since calculations are done with respect to `plus1`
 365         // in order to avoid overflow/underflow (hence the seemingly swapped names).
 366         let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
 367         let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
 368
 369         // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
 370         let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
 371         {
 372             let last = buf.last_mut().unwrap();
 373
 374             // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
 375             // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
 376             // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
 377             // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
 378             // note that `plus1w(n)` is always increasing.
 379             //
 380             // we have three conditions to terminate. any of them will make the loop unable to
 381             // proceed, but we then have at least one valid representation known to be closest to
 382             // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
 383             //
 384             // TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one.
 385             // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
 386             // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
 387             // overflow on the calculation of `plus1w(n)`.
 388             //
 389             // TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`.
 390             // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
 391             // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
 392             // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
 393             // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
 394             // `threshold - plus1w(n) < 10^kappa` instead.
 395             //
 396             // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is
 397             // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
 398             // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
 399             // `z(n) > 0`. we have two cases to consider:
 400             //
 401             // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
 402             //   `z(n)` should be decreasing and this is clearly false.
 403             // - when `z(n+1) < 0`:
 404             //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
 405             //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
 406             //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >=
 407             //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
 408             //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
 409             //     combined with TC3a.
 410             //
 411             // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
 412             // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
 413             while plus1w < plus1v_up
 414                 && threshold - plus1w >= ten_kappa
 415                 && (plus1w + ten_kappa < plus1v_up
 416                     || plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up)
 417             {
 418                 *last -= 1;
 419                 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
 420                 plus1w += ten_kappa;
 421             }
 422         }
 423
 424         // check if this representation is also the closest representation to `v - 1 ulp`.
 425         //
 426         // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
 427         // replaced by `plus1v_down` instead. overflow analysis equally holds.
 428         if plus1w < plus1v_down
 429             && threshold - plus1w >= ten_kappa
 430             && (plus1w + ten_kappa < plus1v_down
 431                 || plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down)
 432         {
 433             return None;
 434         }
 435
 436         // now we have the closest representation to `v` between `plus1` and `minus1`.
 437         // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
 438         // i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
 439         // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
 440         if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
 441             Some((buf.len(), exp))
 442         } else {
 443             None
 444         }
 445     }
 446 }
 447
 448 /// The shortest mode implementation for Grisu with Dragon fallback.
 449 ///
 450 /// This should be used for most cases.
 451 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
 452     use crate::num::flt2dec::strategy::dragon::format_shortest as fallback;
 453     match format_shortest_opt(d, buf) {
 454         Some(ret) => ret,
 455         None => fallback(d, buf),
 456     }
 457 }
 458
 459 /// The exact and fixed mode implementation for Grisu.
 460 ///
 461 /// It returns `None` when it would return an inexact representation otherwise.
 462 pub fn format_exact_opt(
 463     d: &Decoded,
 464     buf: &mut [u8],
 465     limit: i16,
 466 ) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 467     assert!(d.mant > 0);
 468     assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
 469     assert!(!buf.is_empty());
 470
 471     // normalize and scale `v`.
 472     let v = Fp { f: d.mant, e: d.exp }.normalize();
 473     let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
 474     let v = v.mul(&cached);
 475
 476     // divide `v` into integral and fractional parts.
 477     let e = -v.e as usize;
 478     let vint = (v.f >> e) as u32;
 479     let vfrac = v.f & ((1 << e) - 1);
 480
 481     // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
 482     // as we don't know the error is positive or negative, we use two approximations
 483     // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
 484     //
 485     // the goal is to find the exactly rounded series of digits that are common to
 486     // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
 487     // if this is not possible, we don't know which one is the correct output for `v`,
 488     // so we give up and fall back.
 489     //
 490     // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
 491     // and we will scale it whenever `v` gets scaled.
 492     let mut err = 1;
 493
 494     // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
 495     // this is an upper bound of `kappa` below.
 496     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
 497
 498     let mut i = 0;
 499     let exp = max_kappa as i16 - minusk + 1;
 500
 501     // if we are working with the last-digit limitation, we need to shorten the buffer
 502     // before the actual rendering in order to avoid double rounding.
 503     // note that we have to enlarge the buffer again when rounding up happens!
 504     let len = if exp <= limit {
 505         // oops, we cannot even produce *one* digit.
 506         // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
 507         //
 508         // in principle we can immediately call `possibly_round` with an empty buffer,
 509         // but scaling `max_ten_kappa << e` by 10 can result in overflow.
 510         // thus we are being sloppy here and widen the error range by a factor of 10.
 511         // this will increase the false negative rate, but only very, *very* slightly;
 512         // it can only matter noticeably when the mantissa is bigger than 60 bits.
 513         return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
 514     } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
 515         (exp - limit) as usize
 516     } else {
 517         buf.len()
 518     };
 519     debug_assert!(len > 0);
 520
 521     // render integral parts.
 522     // the error is entirely fractional, so we don't need to check it in this part.
 523     let mut kappa = max_kappa as i16;
 524     let mut ten_kappa = max_ten_kappa; // 10^kappa
 525     let mut remainder = vint; // digits yet to be rendered
 526     loop {
 527         // we always have at least one digit to render
 528         // invariants:
 529         // - `remainder < 10^(kappa+1)`
 530         // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
 531         //   (it follows that `remainder = vint % 10^(kappa+1)`)
 532
 533         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 534         let q = remainder / ten_kappa;
 535         let r = remainder % ten_kappa;
 536         debug_assert!(q < 10);
 537         buf[i] = b'0' + q as u8;
 538         i += 1;
 539
 540         // is the buffer full? run the rounding pass with the remainder.
 541         if i == len {
 542             let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
 543             return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
 544         }
 545
 546         // break the loop when we have rendered all integral digits.
 547         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 548         if i > max_kappa as usize {
 549             debug_assert_eq!(ten_kappa, 1);
 550             debug_assert_eq!(kappa, 0);
 551             break;
 552         }
 553
 554         // restore invariants
 555         kappa -= 1;
 556         ten_kappa /= 10;
 557         remainder = r;
 558     }
 559
 560     // render fractional parts.
 561     //
 562     // in principle we can continue to the last available digit and check for the accuracy.
 563     // unfortunately we are working with the finite-sized integers, so we need some criterion
 564     // to detect the overflow. V8 uses `remainder > err`, which becomes false when
 565     // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
 566     // too many otherwise valid input.
 567     //
 568     // since the later phase has a correct overflow detection, we instead use tighter criterion:
 569     // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
 570     // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
 571     // the first two comparisons from `possibly_round`, for the reference.
 572     let mut remainder = vfrac;
 573     let maxerr = 1 << (e - 1);
 574     while err < maxerr {
 575         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 576         // - `remainder < 2^e`
 577         // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 578         // - `err = 10^(n-m)`
 579
 580         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 581         err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
 582
 583         // divide `remainder` by `10^kappa`.
 584         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 585         let q = remainder >> e;
 586         let r = remainder & ((1 << e) - 1);
 587         debug_assert!(q < 10);
 588         buf[i] = b'0' + q as u8;
 589         i += 1;
 590
 591         // is the buffer full? run the rounding pass with the remainder.
 592         if i == len {
 593             return possibly_round(buf, len, exp, limit, r, 1 << e, err);
 594         }
 595
 596         // restore invariants
 597         remainder = r;
 598     }
 599
 600     // further calculation is useless (`possibly_round` definitely fails), so we give up.
 601     return None;
 602
 603     // we've generated all requested digits of `v`, which should be also same to corresponding
 604     // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
 605     // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
 606     // to the rounded-up version of those digits. if the range contains multiple representations
 607     // of the same length, we cannot be sure and should return `None` instead.
 608     //
 609     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 610     // - `remainder = (v % 10^kappa) * k`
 611     // - `ten_kappa = 10^kappa * k`
 612     // - `ulp = 2^-e * k`
 613     fn possibly_round(
 614         buf: &mut [u8],
 615         mut len: usize,
 616         mut exp: i16,
 617         limit: i16,
 618         remainder: u64,
 619         ten_kappa: u64,
 620         ulp: u64,
 621     ) -> Option<(usize, i16)> {
 622         debug_assert!(remainder < ten_kappa);
 623
 624         //           10^kappa
 625         //    :   :   :<->:   :
 626         //    :   :   :   :   :
 627         //    :|1 ulp|1 ulp|  :
 628         //    :|<--->|<--->|  :
 629         // ----|-----|-----|----
 630         //     |     v     |
 631         // v - 1 ulp   v + 1 ulp
 632         //
 633         // (for the reference, the dotted line indicates the exact value for
 634         // possible representations in given number of digits.)
 635         //
 636         // error is too large that there are at least three possible representations
 637         // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
 638         if ulp >= ten_kappa {
 639             return None;
 640         }
 641
 642         //    10^kappa
 643         //   :<------->:
 644         //   :         :
 645         //   : |1 ulp|1 ulp|
 646         //   : |<--->|<--->|
 647         // ----|-----|-----|----
 648         //     |     v     |
 649         // v - 1 ulp   v + 1 ulp
 650         //
 651         // in fact, 1/2 ulp is enough to introduce two possible representations.
 652         // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
 653         // this won't overflow, as `ulp < ten_kappa` from the first check.
 654         if ten_kappa - ulp <= ulp {
 655             return None;
 656         }
 657
 658         //     remainder
 659         //       :<->|                           :
 660         //       :   |                           :
 661         //       :<--------- 10^kappa ---------->:
 662         //     | :   |                           :
 663         //     |1 ulp|1 ulp|                     :
 664         //     |<--->|<--->|                     :
 665         // ----|-----|-----|------------------------
 666         //     |     v     |
 667         // v - 1 ulp   v + 1 ulp
 668         //
 669         // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
 670         // then we can safely return. note that `v - 1 ulp` *can* be less than the current
 671         // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
 672         // the distance between `v - 1 ulp` and the current representation
 673         // cannot exceed `10^kappa / 2`.
 674         //
 675         // the condition equals to `remainder + ulp < 10^kappa / 2`.
 676         // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
 677         // we've already verified that `ulp < 10^kappa / 2`, so as long as
 678         // `10^kappa` did not overflow after all, the second check is fine.
 679         if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
 680             return Some((len, exp));
 681         }
 682
 683         //   :<------- remainder ------>|   :
 684         //   :                          |   :
 685         //   :<--------- 10^kappa --------->:
 686         //   :                    |     |   : |
 687         //   :                    |1 ulp|1 ulp|
 688         //   :                    |<--->|<--->|
 689         // -----------------------|-----|-----|-----
 690         //                        |     v     |
 691         //                    v - 1 ulp   v + 1 ulp
 692         //
 693         // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
 694         // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
 695         //
 696         // the condition equals to `remainder - ulp >= 10^kappa / 2`.
 697         // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
 698         // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
 699         // so the second check does not overflow.
 700         if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
 701             if let Some(c) = round_up(buf, len) {
 702                 // only add an additional digit when we've been requested the fixed precision.
 703                 // we also need to check that, if the original buffer was empty,
 704                 // the additional digit can only be added when `exp == limit` (edge case).
 705                 exp += 1;
 706                 if exp > limit && len < buf.len() {
 707                     buf[len] = c;
 708                     len += 1;
 709                 }
 710             }
 711             return Some((len, exp));
 712         }
 713
 714         // otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are
 715         // rounding down and others are rounding up) and give up.
 716         None
 717     }
 718 }
 719
 720 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
 721 ///
 722 /// This should be used for most cases.
 723 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
 724     use crate::num::flt2dec::strategy::dragon::format_exact as fallback;
 725     match format_exact_opt(d, buf, limit) {
 726         Some(ret) => ret,
 727         None => fallback(d, buf, limit),
 728     }
 729 }