src/libcore/num/flt2dec/strategy/grisu.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 Rust adaptation of Grisu3 algorithm described in [1]. It uses about
  13 1KB of precomputed table, and in turn, it's very quick for most inputs.
  14
  15 [1] Florian Loitsch. 2010. Printing floating-point numbers quickly and
  16     accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
  17 */
  18
  19 use num::diy_float::Fp;
  20 use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
  21
  22
  23 // see the comments in `format_shortest_opt` for the rationale.
  24 #[doc(hidden)] pub const ALPHA: i16 = -60;
  25 #[doc(hidden)] pub const GAMMA: i16 = -32;
  26
  27 /*
  28 # the following Python code generates this table:
  29 for i in xrange(-308, 333, 8):
  30     if i >= 0: f = 10**i; e = 0
  31     else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
  32     l = f.bit_length()
  33     f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
  34     print '    (%#018x, %5d, %4d),' % (f, e, i)
  35 */
  36
  37 #[doc(hidden)]
  38 pub static CACHED_POW10: [(u64, i16, i16); 81] = [ // (f, e, k)
  39     (0xe61acf033d1a45df, -1087, -308),
  40     (0xab70fe17c79ac6ca, -1060, -300),
  41     (0xff77b1fcbebcdc4f, -1034, -292),
  42     (0xbe5691ef416bd60c, -1007, -284),
  43     (0x8dd01fad907ffc3c,  -980, -276),
  44     (0xd3515c2831559a83,  -954, -268),
  45     (0x9d71ac8fada6c9b5,  -927, -260),
  46     (0xea9c227723ee8bcb,  -901, -252),
  47     (0xaecc49914078536d,  -874, -244),
  48     (0x823c12795db6ce57,  -847, -236),
  49     (0xc21094364dfb5637,  -821, -228),
  50     (0x9096ea6f3848984f,  -794, -220),
  51     (0xd77485cb25823ac7,  -768, -212),
  52     (0xa086cfcd97bf97f4,  -741, -204),
  53     (0xef340a98172aace5,  -715, -196),
  54     (0xb23867fb2a35b28e,  -688, -188),
  55     (0x84c8d4dfd2c63f3b,  -661, -180),
  56     (0xc5dd44271ad3cdba,  -635, -172),
  57     (0x936b9fcebb25c996,  -608, -164),
  58     (0xdbac6c247d62a584,  -582, -156),
  59     (0xa3ab66580d5fdaf6,  -555, -148),
  60     (0xf3e2f893dec3f126,  -529, -140),
  61     (0xb5b5ada8aaff80b8,  -502, -132),
  62     (0x87625f056c7c4a8b,  -475, -124),
  63     (0xc9bcff6034c13053,  -449, -116),
  64     (0x964e858c91ba2655,  -422, -108),
  65     (0xdff9772470297ebd,  -396, -100),
  66     (0xa6dfbd9fb8e5b88f,  -369,  -92),
  67     (0xf8a95fcf88747d94,  -343,  -84),
  68     (0xb94470938fa89bcf,  -316,  -76),
  69     (0x8a08f0f8bf0f156b,  -289,  -68),
  70     (0xcdb02555653131b6,  -263,  -60),
  71     (0x993fe2c6d07b7fac,  -236,  -52),
  72     (0xe45c10c42a2b3b06,  -210,  -44),
  73     (0xaa242499697392d3,  -183,  -36),
  74     (0xfd87b5f28300ca0e,  -157,  -28),
  75     (0xbce5086492111aeb,  -130,  -20),
  76     (0x8cbccc096f5088cc,  -103,  -12),
  77     (0xd1b71758e219652c,   -77,   -4),
  78     (0x9c40000000000000,   -50,    4),
  79     (0xe8d4a51000000000,   -24,   12),
  80     (0xad78ebc5ac620000,     3,   20),
  81     (0x813f3978f8940984,    30,   28),
  82     (0xc097ce7bc90715b3,    56,   36),
  83     (0x8f7e32ce7bea5c70,    83,   44),
  84     (0xd5d238a4abe98068,   109,   52),
  85     (0x9f4f2726179a2245,   136,   60),
  86     (0xed63a231d4c4fb27,   162,   68),
  87     (0xb0de65388cc8ada8,   189,   76),
  88     (0x83c7088e1aab65db,   216,   84),
  89     (0xc45d1df942711d9a,   242,   92),
  90     (0x924d692ca61be758,   269,  100),
  91     (0xda01ee641a708dea,   295,  108),
  92     (0xa26da3999aef774a,   322,  116),
  93     (0xf209787bb47d6b85,   348,  124),
  94     (0xb454e4a179dd1877,   375,  132),
  95     (0x865b86925b9bc5c2,   402,  140),
  96     (0xc83553c5c8965d3d,   428,  148),
  97     (0x952ab45cfa97a0b3,   455,  156),
  98     (0xde469fbd99a05fe3,   481,  164),
  99     (0xa59bc234db398c25,   508,  172),
 100     (0xf6c69a72a3989f5c,   534,  180),
 101     (0xb7dcbf5354e9bece,   561,  188),
 102     (0x88fcf317f22241e2,   588,  196),
 103     (0xcc20ce9bd35c78a5,   614,  204),
 104     (0x98165af37b2153df,   641,  212),
 105     (0xe2a0b5dc971f303a,   667,  220),
 106     (0xa8d9d1535ce3b396,   694,  228),
 107     (0xfb9b7cd9a4a7443c,   720,  236),
 108     (0xbb764c4ca7a44410,   747,  244),
 109     (0x8bab8eefb6409c1a,   774,  252),
 110     (0xd01fef10a657842c,   800,  260),
 111     (0x9b10a4e5e9913129,   827,  268),
 112     (0xe7109bfba19c0c9d,   853,  276),
 113     (0xac2820d9623bf429,   880,  284),
 114     (0x80444b5e7aa7cf85,   907,  292),
 115     (0xbf21e44003acdd2d,   933,  300),
 116     (0x8e679c2f5e44ff8f,   960,  308),
 117     (0xd433179d9c8cb841,   986,  316),
 118     (0x9e19db92b4e31ba9,  1013,  324),
 119     (0xeb96bf6ebadf77d9,  1039,  332),
 120 ];
 121
 122 #[doc(hidden)] pub const CACHED_POW10_FIRST_E: i16 = -1087;
 123 #[doc(hidden)] pub const CACHED_POW10_LAST_E: i16 = 1039;
 124
 125 #[doc(hidden)]
 126 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
 127     let offset = CACHED_POW10_FIRST_E as i32;
 128     let range = (CACHED_POW10.len() as i32) - 1;
 129     let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
 130     let idx = ((gamma as i32) - offset) * range / domain;
 131     let (f, e, k) = CACHED_POW10[idx as usize];
 132     debug_assert!(alpha <= e && e <= gamma);
 133     (k, Fp { f: f, e: e })
 134 }
 135
 136 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
 137 #[doc(hidden)]
 138 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
 139     debug_assert!(x > 0);
 140
 141     const X9: u32 = 10_0000_0000;
 142     const X8: u32 =  1_0000_0000;
 143     const X7: u32 =    1000_0000;
 144     const X6: u32 =     100_0000;
 145     const X5: u32 =      10_0000;
 146     const X4: u32 =       1_0000;
 147     const X3: u32 =         1000;
 148     const X2: u32 =          100;
 149     const X1: u32 =           10;
 150
 151     if x < X4 {
 152         if x < X2 { if x < X1 {(0,  1)} else {(1, X1)} }
 153         else      { if x < X3 {(2, X2)} else {(3, X3)} }
 154     } else {
 155         if x < X6      { if x < X5 {(4, X4)} else {(5, X5)} }
 156         else if x < X8 { if x < X7 {(6, X6)} else {(7, X7)} }
 157         else           { if x < X9 {(8, X8)} else {(9, X9)} }
 158     }
 159 }
 160
 161 /// The shortest mode implementation for Grisu.
 162 ///
 163 /// It returns `None` when it would return an inexact representation otherwise.
 164 pub fn format_shortest_opt(d: &Decoded,
 165                            buf: &mut [u8]) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 166     assert!(d.mant > 0);
 167     assert!(d.minus > 0);
 168     assert!(d.plus > 0);
 169     assert!(d.mant.checked_add(d.plus).is_some());
 170     assert!(d.mant.checked_sub(d.minus).is_some());
 171     assert!(buf.len() >= MAX_SIG_DIGITS);
 172     assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
 173
 174     // start with the normalized values with the shared exponent
 175     let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
 176     let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
 177     let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
 178
 179     // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
 180     // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
 181     // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
 182     //
 183     // it is obviously desirable to maximize `GAMMA - ALPHA`,
 184     // so that we don't need many cached powers of 10, but there are some considerations:
 185     //
 186     // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
 187     //    (this is not really avoidable, remainder is required for accuracy estimation.)
 188     // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
 189     //    and it should not overflow.
 190     //
 191     // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
 192     // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
 193     let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
 194
 195     // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
 196     let plus = plus.mul(&cached);
 197     let minus = minus.mul(&cached);
 198     let v = v.mul(&cached);
 199     debug_assert_eq!(plus.e, minus.e);
 200     debug_assert_eq!(plus.e, v.e);
 201
 202     //         +- actual range of minus
 203     //   | <---|---------------------- unsafe region --------------------------> |
 204     //   |     |                                                                 |
 205     //   |  |<--->|  | <--------------- safe region ---------------> |           |
 206     //   |  |     |  |                                               |           |
 207     //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
 208     //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
 209     //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
 210     //   |   minus   |                 |     v     |                 |   plus    |
 211     // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
 212     //
 213     // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
 214     // as we don't know the error is positive or negative, we use two approximations spaced equally
 215     // and have the maximal error of 2 ulps.
 216     //
 217     // the "unsafe region" is a liberal interval which we initially generate.
 218     // the "safe region" is a conservative interval which we only accept.
 219     // we start with the correct repr within the unsafe region, and try to find the closest repr
 220     // to `v` which is also within the safe region. if we can't, we give up.
 221     let plus1 = plus.f + 1;
 222 //  let plus0 = plus.f - 1; // only for explanation
 223 //  let minus0 = minus.f + 1; // only for explanation
 224     let minus1 = minus.f - 1;
 225     let e = -plus.e as usize; // shared exponent
 226
 227     // divide `plus1` into integral and fractional parts.
 228     // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
 229     // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
 230     let plus1int = (plus1 >> e) as u32;
 231     let plus1frac = plus1 & ((1 << e) - 1);
 232
 233     // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
 234     // this is an upper bound of `kappa` below.
 235     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
 236
 237     let mut i = 0;
 238     let exp = max_kappa as i16 - minusk + 1;
 239
 240     // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
 241     //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
 242     //              representations (with the minimal number of significant digits) in that range.
 243     //
 244     // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
 245     // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
 246     // (e.g. `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
 247     // the algorithm relies on the later verification phase to exclude `y`.
 248     let delta1 = plus1 - minus1;
 249 //  let delta1int = (delta1 >> e) as usize; // only for explanation
 250     let delta1frac = delta1 & ((1 << e) - 1);
 251
 252     // render integral parts, while checking for the accuracy at each step.
 253     let mut kappa = max_kappa as i16;
 254     let mut ten_kappa = max_ten_kappa; // 10^kappa
 255     let mut remainder = plus1int; // digits yet to be rendered
 256     loop { // we always have at least one digit to render, as `plus1 >= 10^kappa`
 257         // invariants:
 258         // - `delta1int <= remainder < 10^(kappa+1)`
 259         // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
 260         //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
 261
 262         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 263         let q = remainder / ten_kappa;
 264         let r = remainder % ten_kappa;
 265         debug_assert!(q < 10);
 266         buf[i] = b'0' + q as u8;
 267         i += 1;
 268
 269         let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
 270         if plus1rem < delta1 {
 271             // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
 272             let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
 273             return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
 274         }
 275
 276         // break the loop when we have rendered all integral digits.
 277         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 278         if i > max_kappa as usize {
 279             debug_assert_eq!(ten_kappa, 1);
 280             debug_assert_eq!(kappa, 0);
 281             break;
 282         }
 283
 284         // restore invariants
 285         kappa -= 1;
 286         ten_kappa /= 10;
 287         remainder = r;
 288     }
 289
 290     // render fractional parts, while checking for the accuracy at each step.
 291     // this time we rely on repeated multiplications, as division will lose the precision.
 292     let mut remainder = plus1frac;
 293     let mut threshold = delta1frac;
 294     let mut ulp = 1;
 295     loop { // the next digit should be significant as we've tested that before breaking out
 296         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 297         // - `remainder < 2^e`
 298         // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 299
 300         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 301         threshold *= 10;
 302         ulp *= 10;
 303
 304         // divide `remainder` by `10^kappa`.
 305         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 306         let q = remainder >> e;
 307         let r = remainder & ((1 << e) - 1);
 308         debug_assert!(q < 10);
 309         buf[i] = b'0' + q as u8;
 310         i += 1;
 311
 312         if r < threshold {
 313             let ten_kappa = 1 << e; // implicit divisor
 314             return round_and_weed(&mut buf[..i], exp, r, threshold,
 315                                   (plus1 - v.f) * ulp, ten_kappa, ulp);
 316         }
 317
 318         // restore invariants
 319         kappa -= 1;
 320         remainder = r;
 321     }
 322
 323     // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
 324     // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
 325     // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
 326     // we have to successively decrease the last digit and check if this is the optimal repr.
 327     // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
 328     //
 329     // the function checks if this "optimal" repr is actually within the ulp ranges,
 330     // and also, it is possible that the "second-to-optimal" repr can actually be optimal
 331     // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
 332     //
 333     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 334     // - `remainder = (plus1 % 10^kappa) * k`
 335     // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
 336     // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
 337     // - `ten_kappa = 10^kappa * k`
 338     // - `ulp = 2^-e * k`
 339     fn round_and_weed(buf: &mut [u8], exp: i16, remainder: u64, threshold: u64, plus1v: u64,
 340                       ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 341         assert!(!buf.is_empty());
 342
 343         // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
 344         // the resulting representation should be the closest representation to both.
 345         //
 346         // here `plus1 - v` is used since calculations are done with respect to `plus1`
 347         // in order to avoid overflow/underflow (hence the seemingly swapped names).
 348         let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
 349         let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
 350
 351         // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
 352         let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
 353         {
 354             let last = buf.last_mut().unwrap();
 355
 356             // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
 357             // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
 358             // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
 359             // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
 360             // note that `plus1w(n)` is always increasing.
 361             //
 362             // we have three conditions to terminate. any of them will make the loop unable to
 363             // proceed, but we then have at least one valid representation known to be closest to
 364             // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
 365             //
 366             // TC1: `w(n) <= v + 1 ulp`, i.e. this is the last repr that can be the closest one.
 367             // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
 368             // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
 369             // overflow on the calculation of `plus1w(n)`.
 370             //
 371             // TC2: `w(n+1) < minus1`, i.e. the next repr definitely does not round to `v`.
 372             // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
 373             // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
 374             // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
 375             // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
 376             // `threshold - plus1w(n) < 10^kappa` instead.
 377             //
 378             // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e. the next repr is
 379             // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
 380             // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
 381             // `z(n) > 0`. we have two cases to consider:
 382             //
 383             // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
 384             //   `z(n)` should be decreasing and this is clearly false.
 385             // - when `z(n+1) < 0`:
 386             //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
 387             //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
 388             //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e. `plus1v_up - plus1w(n) >=
 389             //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
 390             //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
 391             //     combined with TC3a.
 392             //
 393             // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
 394             // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
 395             while plus1w < plus1v_up &&
 396                   threshold - plus1w >= ten_kappa &&
 397                   (plus1w + ten_kappa < plus1v_up ||
 398                    plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) {
 399                 *last -= 1;
 400                 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
 401                 plus1w += ten_kappa;
 402             }
 403         }
 404
 405         // check if this representation is also the closest representation to `v - 1 ulp`.
 406         //
 407         // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
 408         // replaced by `plus1v_down` instead. overflow analysis equally holds.
 409         if plus1w < plus1v_down &&
 410            threshold - plus1w >= ten_kappa &&
 411            (plus1w + ten_kappa < plus1v_down ||
 412             plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) {
 413             return None;
 414         }
 415
 416         // now we have the closest representation to `v` between `plus1` and `minus1`.
 417         // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
 418         // i.e. `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
 419         // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
 420         if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
 421             Some((buf.len(), exp))
 422         } else {
 423             None
 424         }
 425     }
 426 }
 427
 428 /// The shortest mode implementation for Grisu with Dragon fallback.
 429 ///
 430 /// This should be used for most cases.
 431 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
 432     use num::flt2dec::strategy::dragon::format_shortest as fallback;
 433     match format_shortest_opt(d, buf) {
 434         Some(ret) => ret,
 435         None => fallback(d, buf),
 436     }
 437 }
 438
 439 /// The exact and fixed mode implementation for Grisu.
 440 ///
 441 /// It returns `None` when it would return an inexact representation otherwise.
 442 pub fn format_exact_opt(d: &Decoded, buf: &mut [u8], limit: i16)
 443                                 -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 444     assert!(d.mant > 0);
 445     assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
 446     assert!(!buf.is_empty());
 447
 448     // normalize and scale `v`.
 449     let v = Fp { f: d.mant, e: d.exp }.normalize();
 450     let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
 451     let v = v.mul(&cached);
 452
 453     // divide `v` into integral and fractional parts.
 454     let e = -v.e as usize;
 455     let vint = (v.f >> e) as u32;
 456     let vfrac = v.f & ((1 << e) - 1);
 457
 458     // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
 459     // as we don't know the error is positive or negative, we use two approximations
 460     // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
 461     //
 462     // the goal is to find the exactly rounded series of digits that are common to
 463     // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
 464     // if this is not possible, we don't know which one is the correct output for `v`,
 465     // so we give up and fall back.
 466     //
 467     // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
 468     // and we will scale it whenever `v` gets scaled.
 469     let mut err = 1;
 470
 471     // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
 472     // this is an upper bound of `kappa` below.
 473     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
 474
 475     let mut i = 0;
 476     let exp = max_kappa as i16 - minusk + 1;
 477
 478     // if we are working with the last-digit limitation, we need to shorten the buffer
 479     // before the actual rendering in order to avoid double rounding.
 480     // note that we have to enlarge the buffer again when rounding up happens!
 481     let len = if exp <= limit {
 482         // oops, we cannot even produce *one* digit.
 483         // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
 484         //
 485         // in principle we can immediately call `possibly_round` with an empty buffer,
 486         // but scaling `max_ten_kappa << e` by 10 can result in overflow.
 487         // thus we are being sloppy here and widen the error range by a factor of 10.
 488         // this will increase the false negative rate, but only very, *very* slightly;
 489         // it can only matter noticeably when the mantissa is bigger than 60 bits.
 490         return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
 491     } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
 492         (exp - limit) as usize
 493     } else {
 494         buf.len()
 495     };
 496     debug_assert!(len > 0);
 497
 498     // render integral parts.
 499     // the error is entirely fractional, so we don't need to check it in this part.
 500     let mut kappa = max_kappa as i16;
 501     let mut ten_kappa = max_ten_kappa; // 10^kappa
 502     let mut remainder = vint; // digits yet to be rendered
 503     loop { // we always have at least one digit to render
 504         // invariants:
 505         // - `remainder < 10^(kappa+1)`
 506         // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
 507         //   (it follows that `remainder = vint % 10^(kappa+1)`)
 508
 509         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 510         let q = remainder / ten_kappa;
 511         let r = remainder % ten_kappa;
 512         debug_assert!(q < 10);
 513         buf[i] = b'0' + q as u8;
 514         i += 1;
 515
 516         // is the buffer full? run the rounding pass with the remainder.
 517         if i == len {
 518             let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
 519             return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
 520         }
 521
 522         // break the loop when we have rendered all integral digits.
 523         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 524         if i > max_kappa as usize {
 525             debug_assert_eq!(ten_kappa, 1);
 526             debug_assert_eq!(kappa, 0);
 527             break;
 528         }
 529
 530         // restore invariants
 531         kappa -= 1;
 532         ten_kappa /= 10;
 533         remainder = r;
 534     }
 535
 536     // render fractional parts.
 537     //
 538     // in principle we can continue to the last available digit and check for the accuracy.
 539     // unfortunately we are working with the finite-sized integers, so we need some criterion
 540     // to detect the overflow. V8 uses `remainder > err`, which becomes false when
 541     // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
 542     // too many otherwise valid input.
 543     //
 544     // since the later phase has a correct overflow detection, we instead use tighter criterion:
 545     // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
 546     // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
 547     // the first two comparisons from `possibly_round`, for the reference.
 548     let mut remainder = vfrac;
 549     let maxerr = 1 << (e - 1);
 550     while err < maxerr {
 551         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 552         // - `remainder < 2^e`
 553         // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 554         // - `err = 10^(n-m)`
 555
 556         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 557         err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
 558
 559         // divide `remainder` by `10^kappa`.
 560         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 561         let q = remainder >> e;
 562         let r = remainder & ((1 << e) - 1);
 563         debug_assert!(q < 10);
 564         buf[i] = b'0' + q as u8;
 565         i += 1;
 566
 567         // is the buffer full? run the rounding pass with the remainder.
 568         if i == len {
 569             return possibly_round(buf, len, exp, limit, r, 1 << e, err);
 570         }
 571
 572         // restore invariants
 573         remainder = r;
 574     }
 575
 576     // further calculation is useless (`possibly_round` definitely fails), so we give up.
 577     return None;
 578
 579     // we've generated all requested digits of `v`, which should be also same to corresponding
 580     // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
 581     // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
 582     // to the rounded-up version of those digits. if the range contains multiple representations
 583     // of the same length, we cannot be sure and should return `None` instead.
 584     //
 585     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 586     // - `remainder = (v % 10^kappa) * k`
 587     // - `ten_kappa = 10^kappa * k`
 588     // - `ulp = 2^-e * k`
 589     fn possibly_round(buf: &mut [u8], mut len: usize, mut exp: i16, limit: i16,
 590                       remainder: u64, ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 591         debug_assert!(remainder < ten_kappa);
 592
 593         //           10^kappa
 594         //    :   :   :<->:   :
 595         //    :   :   :   :   :
 596         //    :|1 ulp|1 ulp|  :
 597         //    :|<--->|<--->|  :
 598         // ----|-----|-----|----
 599         //     |     v     |
 600         // v - 1 ulp   v + 1 ulp
 601         //
 602         // (for the reference, the dotted line indicates the exact value for
 603         // possible representations in given number of digits.)
 604         //
 605         // error is too large that there are at least three possible representations
 606         // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
 607         if ulp >= ten_kappa { return None; }
 608
 609         //    10^kappa
 610         //   :<------->:
 611         //   :         :
 612         //   : |1 ulp|1 ulp|
 613         //   : |<--->|<--->|
 614         // ----|-----|-----|----
 615         //     |     v     |
 616         // v - 1 ulp   v + 1 ulp
 617         //
 618         // in fact, 1/2 ulp is enough to introduce two possible representations.
 619         // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
 620         // this won't overflow, as `ulp < ten_kappa` from the first check.
 621         if ten_kappa - ulp <= ulp { return None; }
 622
 623         //     remainder
 624         //       :<->|                           :
 625         //       :   |                           :
 626         //       :<--------- 10^kappa ---------->:
 627         //     | :   |                           :
 628         //     |1 ulp|1 ulp|                     :
 629         //     |<--->|<--->|                     :
 630         // ----|-----|-----|------------------------
 631         //     |     v     |
 632         // v - 1 ulp   v + 1 ulp
 633         //
 634         // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
 635         // then we can safely return. note that `v - 1 ulp` *can* be less than the current
 636         // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
 637         // the distance between `v - 1 ulp` and the current representation
 638         // cannot exceed `10^kappa / 2`.
 639         //
 640         // the condition equals to `remainder + ulp < 10^kappa / 2`.
 641         // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
 642         // we've already verified that `ulp < 10^kappa / 2`, so as long as
 643         // `10^kappa` did not overflow after all, the second check is fine.
 644         if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
 645             return Some((len, exp));
 646         }
 647
 648         //   :<------- remainder ------>|   :
 649         //   :                          |   :
 650         //   :<--------- 10^kappa --------->:
 651         //   :                    |     |   : |
 652         //   :                    |1 ulp|1 ulp|
 653         //   :                    |<--->|<--->|
 654         // -----------------------|-----|-----|-----
 655         //                        |     v     |
 656         //                    v - 1 ulp   v + 1 ulp
 657         //
 658         // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
 659         // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
 660         //
 661         // the condition equals to `remainder - ulp >= 10^kappa / 2`.
 662         // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
 663         // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
 664         // so the second check does not overflow.
 665         if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
 666             if let Some(c) = round_up(buf, len) {
 667                 // only add an additional digit when we've been requested the fixed precision.
 668                 // we also need to check that, if the original buffer was empty,
 669                 // the additional digit can only be added when `exp == limit` (edge case).
 670                 exp += 1;
 671                 if exp > limit && len < buf.len() {
 672                     buf[len] = c;
 673                     len += 1;
 674                 }
 675             }
 676             return Some((len, exp));
 677         }
 678
 679         // otherwise we are doomed (i.e. some values between `v - 1 ulp` and `v + 1 ulp` are
 680         // rounding down and others are rounding up) and give up.
 681         None
 682     }
 683 }
 684
 685 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
 686 ///
 687 /// This should be used for most cases.
 688 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
 689     use num::flt2dec::strategy::dragon::format_exact as fallback;
 690     match format_exact_opt(d, buf, limit) {
 691         Some(ret) => ret,
 692         None => fallback(d, buf, limit),
 693     }
 694 }