src/libcore/num/flt2dec/strategy/grisu.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 Rust adaptation of Grisu3 algorithm described in [1]. It uses about
  13 1KB of precomputed table, and in turn, it's very quick for most inputs.
  14
  15 [1] Florian Loitsch. 2010. Printing floating-point numbers quickly and
  16     accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
  17 */
  18
  19 use prelude::*;
  20 use num::Float;
  21
  22 use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
  23
  24 /// A custom 64-bit floating point type, representing `f * 2^e`.
  25 #[derive(Copy, Clone, Debug)]
  26 #[doc(hidden)]
  27 pub struct Fp {
  28     /// The integer mantissa.
  29     pub f: u64,
  30     /// The exponent in base 2.
  31     pub e: i16,
  32 }
  33
  34 impl Fp {
  35     /// Returns a correctly rounded product of itself and `other`.
  36     fn mul(&self, other: &Fp) -> Fp {
  37         const MASK: u64 = 0xffffffff;
  38         let a = self.f >> 32;
  39         let b = self.f & MASK;
  40         let c = other.f >> 32;
  41         let d = other.f & MASK;
  42         let ac = a * c;
  43         let bc = b * c;
  44         let ad = a * d;
  45         let bd = b * d;
  46         let tmp = (bd >> 32) + (ad & MASK) + (bc & MASK) + (1 << 31) /* round */;
  47         let f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
  48         let e = self.e + other.e + 64;
  49         Fp { f: f, e: e }
  50     }
  51
  52     /// Normalizes itself so that the resulting mantissa is at least `2^63`.
  53     fn normalize(&self) -> Fp {
  54         let mut f = self.f;
  55         let mut e = self.e;
  56         if f >> (64 - 32) == 0 { f <<= 32; e -= 32; }
  57         if f >> (64 - 16) == 0 { f <<= 16; e -= 16; }
  58         if f >> (64 -  8) == 0 { f <<=  8; e -=  8; }
  59         if f >> (64 -  4) == 0 { f <<=  4; e -=  4; }
  60         if f >> (64 -  2) == 0 { f <<=  2; e -=  2; }
  61         if f >> (64 -  1) == 0 { f <<=  1; e -=  1; }
  62         debug_assert!(f >= (1 >> 63));
  63         Fp { f: f, e: e }
  64     }
  65
  66     /// Normalizes itself to have the shared exponent.
  67     /// It can only decrease the exponent (and thus increase the mantissa).
  68     fn normalize_to(&self, e: i16) -> Fp {
  69         let edelta = self.e - e;
  70         assert!(edelta >= 0);
  71         let edelta = edelta as usize;
  72         assert_eq!(self.f << edelta >> edelta, self.f);
  73         Fp { f: self.f << edelta, e: e }
  74     }
  75 }
  76
  77 // see the comments in `format_shortest_opt` for the rationale.
  78 #[doc(hidden)] pub const ALPHA: i16 = -60;
  79 #[doc(hidden)] pub const GAMMA: i16 = -32;
  80
  81 /*
  82 # the following Python code generates this table:
  83 for i in xrange(-308, 333, 8):
  84     if i >= 0: f = 10**i; e = 0
  85     else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
  86     l = f.bit_length()
  87     f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
  88     print '    (%#018x, %5d, %4d),' % (f, e, i)
  89 */
  90 // FIXME(#22540) const ref to static array seems to ICE
  91 #[doc(hidden)]
  92 pub static CACHED_POW10: [(u64, i16, i16); 81] = [ // (f, e, k)
  93     (0xe61acf033d1a45df, -1087, -308),
  94     (0xab70fe17c79ac6ca, -1060, -300),
  95     (0xff77b1fcbebcdc4f, -1034, -292),
  96     (0xbe5691ef416bd60c, -1007, -284),
  97     (0x8dd01fad907ffc3c,  -980, -276),
  98     (0xd3515c2831559a83,  -954, -268),
  99     (0x9d71ac8fada6c9b5,  -927, -260),
 100     (0xea9c227723ee8bcb,  -901, -252),
 101     (0xaecc49914078536d,  -874, -244),
 102     (0x823c12795db6ce57,  -847, -236),
 103     (0xc21094364dfb5637,  -821, -228),
 104     (0x9096ea6f3848984f,  -794, -220),
 105     (0xd77485cb25823ac7,  -768, -212),
 106     (0xa086cfcd97bf97f4,  -741, -204),
 107     (0xef340a98172aace5,  -715, -196),
 108     (0xb23867fb2a35b28e,  -688, -188),
 109     (0x84c8d4dfd2c63f3b,  -661, -180),
 110     (0xc5dd44271ad3cdba,  -635, -172),
 111     (0x936b9fcebb25c996,  -608, -164),
 112     (0xdbac6c247d62a584,  -582, -156),
 113     (0xa3ab66580d5fdaf6,  -555, -148),
 114     (0xf3e2f893dec3f126,  -529, -140),
 115     (0xb5b5ada8aaff80b8,  -502, -132),
 116     (0x87625f056c7c4a8b,  -475, -124),
 117     (0xc9bcff6034c13053,  -449, -116),
 118     (0x964e858c91ba2655,  -422, -108),
 119     (0xdff9772470297ebd,  -396, -100),
 120     (0xa6dfbd9fb8e5b88f,  -369,  -92),
 121     (0xf8a95fcf88747d94,  -343,  -84),
 122     (0xb94470938fa89bcf,  -316,  -76),
 123     (0x8a08f0f8bf0f156b,  -289,  -68),
 124     (0xcdb02555653131b6,  -263,  -60),
 125     (0x993fe2c6d07b7fac,  -236,  -52),
 126     (0xe45c10c42a2b3b06,  -210,  -44),
 127     (0xaa242499697392d3,  -183,  -36),
 128     (0xfd87b5f28300ca0e,  -157,  -28),
 129     (0xbce5086492111aeb,  -130,  -20),
 130     (0x8cbccc096f5088cc,  -103,  -12),
 131     (0xd1b71758e219652c,   -77,   -4),
 132     (0x9c40000000000000,   -50,    4),
 133     (0xe8d4a51000000000,   -24,   12),
 134     (0xad78ebc5ac620000,     3,   20),
 135     (0x813f3978f8940984,    30,   28),
 136     (0xc097ce7bc90715b3,    56,   36),
 137     (0x8f7e32ce7bea5c70,    83,   44),
 138     (0xd5d238a4abe98068,   109,   52),
 139     (0x9f4f2726179a2245,   136,   60),
 140     (0xed63a231d4c4fb27,   162,   68),
 141     (0xb0de65388cc8ada8,   189,   76),
 142     (0x83c7088e1aab65db,   216,   84),
 143     (0xc45d1df942711d9a,   242,   92),
 144     (0x924d692ca61be758,   269,  100),
 145     (0xda01ee641a708dea,   295,  108),
 146     (0xa26da3999aef774a,   322,  116),
 147     (0xf209787bb47d6b85,   348,  124),
 148     (0xb454e4a179dd1877,   375,  132),
 149     (0x865b86925b9bc5c2,   402,  140),
 150     (0xc83553c5c8965d3d,   428,  148),
 151     (0x952ab45cfa97a0b3,   455,  156),
 152     (0xde469fbd99a05fe3,   481,  164),
 153     (0xa59bc234db398c25,   508,  172),
 154     (0xf6c69a72a3989f5c,   534,  180),
 155     (0xb7dcbf5354e9bece,   561,  188),
 156     (0x88fcf317f22241e2,   588,  196),
 157     (0xcc20ce9bd35c78a5,   614,  204),
 158     (0x98165af37b2153df,   641,  212),
 159     (0xe2a0b5dc971f303a,   667,  220),
 160     (0xa8d9d1535ce3b396,   694,  228),
 161     (0xfb9b7cd9a4a7443c,   720,  236),
 162     (0xbb764c4ca7a44410,   747,  244),
 163     (0x8bab8eefb6409c1a,   774,  252),
 164     (0xd01fef10a657842c,   800,  260),
 165     (0x9b10a4e5e9913129,   827,  268),
 166     (0xe7109bfba19c0c9d,   853,  276),
 167     (0xac2820d9623bf429,   880,  284),
 168     (0x80444b5e7aa7cf85,   907,  292),
 169     (0xbf21e44003acdd2d,   933,  300),
 170     (0x8e679c2f5e44ff8f,   960,  308),
 171     (0xd433179d9c8cb841,   986,  316),
 172     (0x9e19db92b4e31ba9,  1013,  324),
 173     (0xeb96bf6ebadf77d9,  1039,  332),
 174 ];
 175
 176 #[doc(hidden)] pub const CACHED_POW10_FIRST_E: i16 = -1087;
 177 #[doc(hidden)] pub const CACHED_POW10_LAST_E: i16 = 1039;
 178
 179 #[doc(hidden)]
 180 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
 181     let offset = CACHED_POW10_FIRST_E as i32;
 182     let range = (CACHED_POW10.len() as i32) - 1;
 183     let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
 184     let idx = ((gamma as i32) - offset) * range / domain;
 185     let (f, e, k) = CACHED_POW10[idx as usize];
 186     debug_assert!(alpha <= e && e <= gamma);
 187     (k, Fp { f: f, e: e })
 188 }
 189
 190 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
 191 #[doc(hidden)]
 192 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
 193     debug_assert!(x > 0);
 194
 195     const X9: u32 = 10_0000_0000;
 196     const X8: u32 =  1_0000_0000;
 197     const X7: u32 =    1000_0000;
 198     const X6: u32 =     100_0000;
 199     const X5: u32 =      10_0000;
 200     const X4: u32 =       1_0000;
 201     const X3: u32 =         1000;
 202     const X2: u32 =          100;
 203     const X1: u32 =           10;
 204
 205     if x < X4 {
 206         if x < X2 { if x < X1 {(0,  1)} else {(1, X1)} }
 207         else      { if x < X3 {(2, X2)} else {(3, X3)} }
 208     } else {
 209         if x < X6      { if x < X5 {(4, X4)} else {(5, X5)} }
 210         else if x < X8 { if x < X7 {(6, X6)} else {(7, X7)} }
 211         else           { if x < X9 {(8, X8)} else {(9, X9)} }
 212     }
 213 }
 214
 215 /// The shortest mode implementation for Grisu.
 216 ///
 217 /// It returns `None` when it would return an inexact representation otherwise.
 218 pub fn format_shortest_opt(d: &Decoded,
 219                            buf: &mut [u8]) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 220     assert!(d.mant > 0);
 221     assert!(d.minus > 0);
 222     assert!(d.plus > 0);
 223     assert!(d.mant.checked_add(d.plus).is_some());
 224     assert!(d.mant.checked_sub(d.minus).is_some());
 225     assert!(buf.len() >= MAX_SIG_DIGITS);
 226     assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
 227
 228     // start with the normalized values with the shared exponent
 229     let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
 230     let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
 231     let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
 232
 233     // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
 234     // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
 235     // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
 236     //
 237     // it is obviously desirable to maximize `GAMMA - ALPHA`,
 238     // so that we don't need many cached powers of 10, but there are some considerations:
 239     //
 240     // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
 241     //    (this is not really avoidable, remainder is required for accuracy estimation.)
 242     // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
 243     //    and it should not overflow.
 244     //
 245     // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
 246     // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
 247     let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
 248
 249     // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
 250     let plus = plus.mul(&cached);
 251     let minus = minus.mul(&cached);
 252     let v = v.mul(&cached);
 253     debug_assert_eq!(plus.e, minus.e);
 254     debug_assert_eq!(plus.e, v.e);
 255
 256     //         +- actual range of minus
 257     //   | <---|---------------------- unsafe region --------------------------> |
 258     //   |     |                                                                 |
 259     //   |  |<--->|  | <--------------- safe region ---------------> |           |
 260     //   |  |     |  |                                               |           |
 261     //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
 262     //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
 263     //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
 264     //   |   minus   |                 |     v     |                 |   plus    |
 265     // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
 266     //
 267     // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
 268     // as we don't know the error is positive or negative, we use two approximations spaced equally
 269     // and have the maximal error of 2 ulps.
 270     //
 271     // the "unsafe region" is a liberal interval which we initially generate.
 272     // the "safe region" is a conservative interval which we only accept.
 273     // we start with the correct repr within the unsafe region, and try to find the closest repr
 274     // to `v` which is also within the safe region. if we can't, we give up.
 275     let plus1 = plus.f + 1;
 276 //  let plus0 = plus.f - 1; // only for explanation
 277 //  let minus0 = minus.f + 1; // only for explanation
 278     let minus1 = minus.f - 1;
 279     let e = -plus.e as usize; // shared exponent
 280
 281     // divide `plus1` into integral and fractional parts.
 282     // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
 283     // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
 284     let plus1int = (plus1 >> e) as u32;
 285     let plus1frac = plus1 & ((1 << e) - 1);
 286
 287     // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
 288     // this is an upper bound of `kappa` below.
 289     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
 290
 291     let mut i = 0;
 292     let exp = max_kappa as i16 - minusk + 1;
 293
 294     // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
 295     //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
 296     //              representations (with the minimal number of significant digits) in that range.
 297     //
 298     // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
 299     // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
 300     // (e.g. `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
 301     // the algorithm relies on the later verification phase to exclude `y`.
 302     let delta1 = plus1 - minus1;
 303 //  let delta1int = (delta1 >> e) as usize; // only for explanation
 304     let delta1frac = delta1 & ((1 << e) - 1);
 305
 306     // render integral parts, while checking for the accuracy at each step.
 307     let mut kappa = max_kappa as i16;
 308     let mut ten_kappa = max_ten_kappa; // 10^kappa
 309     let mut remainder = plus1int; // digits yet to be rendered
 310     loop { // we always have at least one digit to render, as `plus1 >= 10^kappa`
 311         // invariants:
 312         // - `delta1int <= remainder < 10^(kappa+1)`
 313         // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
 314         //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
 315
 316         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 317         let q = remainder / ten_kappa;
 318         let r = remainder % ten_kappa;
 319         debug_assert!(q < 10);
 320         buf[i] = b'0' + q as u8;
 321         i += 1;
 322
 323         let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
 324         if plus1rem < delta1 {
 325             // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
 326             let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
 327             return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
 328         }
 329
 330         // break the loop when we have rendered all integral digits.
 331         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 332         if i > max_kappa as usize {
 333             debug_assert_eq!(ten_kappa, 1);
 334             debug_assert_eq!(kappa, 0);
 335             break;
 336         }
 337
 338         // restore invariants
 339         kappa -= 1;
 340         ten_kappa /= 10;
 341         remainder = r;
 342     }
 343
 344     // render fractional parts, while checking for the accuracy at each step.
 345     // this time we rely on repeated multiplications, as division will lose the precision.
 346     let mut remainder = plus1frac;
 347     let mut threshold = delta1frac;
 348     let mut ulp = 1;
 349     loop { // the next digit should be significant as we've tested that before breaking out
 350         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 351         // - `remainder < 2^e`
 352         // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 353
 354         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 355         threshold *= 10;
 356         ulp *= 10;
 357
 358         // divide `remainder` by `10^kappa`.
 359         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 360         let q = remainder >> e;
 361         let r = remainder & ((1 << e) - 1);
 362         debug_assert!(q < 10);
 363         buf[i] = b'0' + q as u8;
 364         i += 1;
 365
 366         if r < threshold {
 367             let ten_kappa = 1 << e; // implicit divisor
 368             return round_and_weed(&mut buf[..i], exp, r, threshold,
 369                                   (plus1 - v.f) * ulp, ten_kappa, ulp);
 370         }
 371
 372         // restore invariants
 373         kappa -= 1;
 374         remainder = r;
 375     }
 376
 377     // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
 378     // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
 379     // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
 380     // we have to successively decrease the last digit and check if this is the optimal repr.
 381     // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
 382     //
 383     // the function checks if this "optimal" repr is actually within the ulp ranges,
 384     // and also, it is possible that the "second-to-optimal" repr can actually be optimal
 385     // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
 386     //
 387     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 388     // - `remainder = (plus1 % 10^kappa) * k`
 389     // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
 390     // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
 391     // - `ten_kappa = 10^kappa * k`
 392     // - `ulp = 2^-e * k`
 393     fn round_and_weed(buf: &mut [u8], exp: i16, remainder: u64, threshold: u64, plus1v: u64,
 394                       ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 395         assert!(!buf.is_empty());
 396
 397         // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
 398         // the resulting representation should be the closest representation to both.
 399         //
 400         // here `plus1 - v` is used since calculations are done with respect to `plus1`
 401         // in order to avoid overflow/underflow (hence the seemingly swapped names).
 402         let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
 403         let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
 404
 405         // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
 406         let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
 407         {
 408             let last = buf.last_mut().unwrap();
 409
 410             // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
 411             // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
 412             // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
 413             // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
 414             // note that `plus1w(n)` is always increasing.
 415             //
 416             // we have three conditions to terminate. any of them will make the loop unable to
 417             // proceed, but we then have at least one valid representation known to be closest to
 418             // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
 419             //
 420             // TC1: `w(n) <= v + 1 ulp`, i.e. this is the last repr that can be the closest one.
 421             // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
 422             // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
 423             // overflow on the calculation of `plus1w(n)`.
 424             //
 425             // TC2: `w(n+1) < minus1`, i.e. the next repr definitely does not round to `v`.
 426             // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
 427             // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
 428             // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
 429             // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
 430             // `threshold - plus1w(n) < 10^kappa` instead.
 431             //
 432             // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e. the next repr is
 433             // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
 434             // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
 435             // `z(n) > 0`. we have two cases to consider:
 436             //
 437             // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
 438             //   `z(n)` should be decreasing and this is clearly false.
 439             // - when `z(n+1) < 0`:
 440             //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
 441             //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
 442             //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e. `plus1v_up - plus1w(n) >=
 443             //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
 444             //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
 445             //     combined with TC3a.
 446             //
 447             // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
 448             // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
 449             while plus1w < plus1v_up &&
 450                   threshold - plus1w >= ten_kappa &&
 451                   (plus1w + ten_kappa < plus1v_up ||
 452                    plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) {
 453                 *last -= 1;
 454                 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
 455                 plus1w += ten_kappa;
 456             }
 457         }
 458
 459         // check if this representation is also the closest representation to `v - 1 ulp`.
 460         //
 461         // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
 462         // replaced by `plus1v_down` instead. overflow analysis equally holds.
 463         if plus1w < plus1v_down &&
 464            threshold - plus1w >= ten_kappa &&
 465            (plus1w + ten_kappa < plus1v_down ||
 466             plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) {
 467             return None;
 468         }
 469
 470         // now we have the closest representation to `v` between `plus1` and `minus1`.
 471         // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
 472         // i.e. `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
 473         // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
 474         if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
 475             Some((buf.len(), exp))
 476         } else {
 477             None
 478         }
 479     }
 480 }
 481
 482 /// The shortest mode implementation for Grisu with Dragon fallback.
 483 ///
 484 /// This should be used for most cases.
 485 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
 486     use num::flt2dec::strategy::dragon::format_shortest as fallback;
 487     match format_shortest_opt(d, buf) {
 488         Some(ret) => ret,
 489         None => fallback(d, buf),
 490     }
 491 }
 492
 493 /// The exact and fixed mode implementation for Grisu.
 494 ///
 495 /// It returns `None` when it would return an inexact representation otherwise.
 496 pub fn format_exact_opt(d: &Decoded, buf: &mut [u8], limit: i16)
 497                                 -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 498     assert!(d.mant > 0);
 499     assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
 500     assert!(!buf.is_empty());
 501
 502     // normalize and scale `v`.
 503     let v = Fp { f: d.mant, e: d.exp }.normalize();
 504     let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
 505     let v = v.mul(&cached);
 506
 507     // divide `v` into integral and fractional parts.
 508     let e = -v.e as usize;
 509     let vint = (v.f >> e) as u32;
 510     let vfrac = v.f & ((1 << e) - 1);
 511
 512     // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
 513     // as we don't know the error is positive or negative, we use two approximations
 514     // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
 515     //
 516     // the goal is to find the exactly rounded series of digits that are common to
 517     // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
 518     // if this is not possible, we don't know which one is the correct output for `v`,
 519     // so we give up and fall back.
 520     //
 521     // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
 522     // and we will scale it whenever `v` gets scaled.
 523     let mut err = 1;
 524
 525     // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
 526     // this is an upper bound of `kappa` below.
 527     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
 528
 529     let mut i = 0;
 530     let exp = max_kappa as i16 - minusk + 1;
 531
 532     // if we are working with the last-digit limitation, we need to shorten the buffer
 533     // before the actual rendering in order to avoid double rounding.
 534     // note that we have to enlarge the buffer again when rounding up happens!
 535     let len = if exp <= limit {
 536         // oops, we cannot even produce *one* digit.
 537         // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
 538         //
 539         // in principle we can immediately call `possibly_round` with an empty buffer,
 540         // but scaling `max_ten_kappa << e` by 10 can result in overflow.
 541         // thus we are being sloppy here and widen the error range by a factor of 10.
 542         // this will increase the false negative rate, but only very, *very* slightly;
 543         // it can only matter noticably when the mantissa is bigger than 60 bits.
 544         return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
 545     } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
 546         (exp - limit) as usize
 547     } else {
 548         buf.len()
 549     };
 550     debug_assert!(len > 0);
 551
 552     // render integral parts.
 553     // the error is entirely fractional, so we don't need to check it in this part.
 554     let mut kappa = max_kappa as i16;
 555     let mut ten_kappa = max_ten_kappa; // 10^kappa
 556     let mut remainder = vint; // digits yet to be rendered
 557     loop { // we always have at least one digit to render
 558         // invariants:
 559         // - `remainder < 10^(kappa+1)`
 560         // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
 561         //   (it follows that `remainder = vint % 10^(kappa+1)`)
 562
 563         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 564         let q = remainder / ten_kappa;
 565         let r = remainder % ten_kappa;
 566         debug_assert!(q < 10);
 567         buf[i] = b'0' + q as u8;
 568         i += 1;
 569
 570         // is the buffer full? run the rounding pass with the remainder.
 571         if i == len {
 572             let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
 573             return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
 574         }
 575
 576         // break the loop when we have rendered all integral digits.
 577         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 578         if i > max_kappa as usize {
 579             debug_assert_eq!(ten_kappa, 1);
 580             debug_assert_eq!(kappa, 0);
 581             break;
 582         }
 583
 584         // restore invariants
 585         kappa -= 1;
 586         ten_kappa /= 10;
 587         remainder = r;
 588     }
 589
 590     // render fractional parts.
 591     //
 592     // in principle we can continue to the last available digit and check for the accuracy.
 593     // unfortunately we are working with the finite-sized integers, so we need some criterion
 594     // to detect the overflow. V8 uses `remainder > err`, which becomes false when
 595     // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
 596     // too many otherwise valid input.
 597     //
 598     // since the later phase has a correct overflow detection, we instead use tighter criterion:
 599     // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
 600     // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
 601     // the first two comparisons from `possibly_round`, for the reference.
 602     let mut remainder = vfrac;
 603     let maxerr = 1 << (e - 1);
 604     while err < maxerr {
 605         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 606         // - `remainder < 2^e`
 607         // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 608         // - `err = 10^(n-m)`
 609
 610         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 611         err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
 612
 613         // divide `remainder` by `10^kappa`.
 614         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 615         let q = remainder >> e;
 616         let r = remainder & ((1 << e) - 1);
 617         debug_assert!(q < 10);
 618         buf[i] = b'0' + q as u8;
 619         i += 1;
 620
 621         // is the buffer full? run the rounding pass with the remainder.
 622         if i == len {
 623             return possibly_round(buf, len, exp, limit, r, 1 << e, err);
 624         }
 625
 626         // restore invariants
 627         remainder = r;
 628     }
 629
 630     // further calculation is useless (`possibly_round` definitely fails), so we give up.
 631     return None;
 632
 633     // we've generated all requested digits of `v`, which should be also same to corresponding
 634     // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
 635     // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
 636     // to the rounded-up version of those digits. if the range contains multiple representations
 637     // of the same length, we cannot be sure and should return `None` instead.
 638     //
 639     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 640     // - `remainder = (v % 10^kappa) * k`
 641     // - `ten_kappa = 10^kappa * k`
 642     // - `ulp = 2^-e * k`
 643     fn possibly_round(buf: &mut [u8], mut len: usize, mut exp: i16, limit: i16,
 644                       remainder: u64, ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 645         debug_assert!(remainder < ten_kappa);
 646
 647         //           10^kappa
 648         //    :   :   :<->:   :
 649         //    :   :   :   :   :
 650         //    :|1 ulp|1 ulp|  :
 651         //    :|<--->|<--->|  :
 652         // ----|-----|-----|----
 653         //     |     v     |
 654         // v - 1 ulp   v + 1 ulp
 655         //
 656         // (for the reference, the dotted line indicates the exact value for
 657         // possible representations in given number of digits.)
 658         //
 659         // error is too large that there are at least three possible representations
 660         // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
 661         if ulp >= ten_kappa { return None; }
 662
 663         //    10^kappa
 664         //   :<------->:
 665         //   :         :
 666         //   : |1 ulp|1 ulp|
 667         //   : |<--->|<--->|
 668         // ----|-----|-----|----
 669         //     |     v     |
 670         // v - 1 ulp   v + 1 ulp
 671         //
 672         // in fact, 1/2 ulp is enough to introduce two possible representations.
 673         // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
 674         // this won't overflow, as `ulp < ten_kappa` from the first check.
 675         if ten_kappa - ulp <= ulp { return None; }
 676
 677         //     remainder
 678         //       :<->|                           :
 679         //       :   |                           :
 680         //       :<--------- 10^kappa ---------->:
 681         //     | :   |                           :
 682         //     |1 ulp|1 ulp|                     :
 683         //     |<--->|<--->|                     :
 684         // ----|-----|-----|------------------------
 685         //     |     v     |
 686         // v - 1 ulp   v + 1 ulp
 687         //
 688         // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
 689         // then we can safely return. note that `v - 1 ulp` *can* be less than the current
 690         // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
 691         // the distance between `v - 1 ulp` and the current representation
 692         // cannot exceed `10^kappa / 2`.
 693         //
 694         // the condition equals to `remainder + ulp < 10^kappa / 2`.
 695         // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
 696         // we've already verified that `ulp < 10^kappa / 2`, so as long as
 697         // `10^kappa` did not overflow after all, the second check is fine.
 698         if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
 699             return Some((len, exp));
 700         }
 701
 702         //   :<------- remainder ------>|   :
 703         //   :                          |   :
 704         //   :<--------- 10^kappa --------->:
 705         //   :                    |     |   : |
 706         //   :                    |1 ulp|1 ulp|
 707         //   :                    |<--->|<--->|
 708         // -----------------------|-----|-----|-----
 709         //                        |     v     |
 710         //                    v - 1 ulp   v + 1 ulp
 711         //
 712         // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
 713         // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
 714         //
 715         // the condition equals to `remainder - ulp >= 10^kappa / 2`.
 716         // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
 717         // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
 718         // so the second check does not overflow.
 719         if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
 720             if let Some(c) = round_up(buf, len) {
 721                 // only add an additional digit when we've been requested the fixed precision.
 722                 // we also need to check that, if the original buffer was empty,
 723                 // the additional digit can only be added when `exp == limit` (edge case).
 724                 exp += 1;
 725                 if exp > limit && len < buf.len() {
 726                     buf[len] = c;
 727                     len += 1;
 728                 }
 729             }
 730             return Some((len, exp));
 731         }
 732
 733         // otherwise we are doomed (i.e. some values between `v - 1 ulp` and `v + 1 ulp` are
 734         // rounding down and others are rounding up) and give up.
 735         None
 736     }
 737 }
 738
 739 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
 740 ///
 741 /// This should be used for most cases.
 742 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
 743     use num::flt2dec::strategy::dragon::format_exact as fallback;
 744     match format_exact_opt(d, buf, limit) {
 745         Some(ret) => ret,
 746         None => fallback(d, buf, limit),
 747     }
 748 }
 749