]> git.proxmox.com Git - rustc.git/blob - src/libcore/num/flt2dec/strategy/grisu.rs
New upstream version 1.41.1+dfsg1
[rustc.git] / src / libcore / num / flt2dec / strategy / grisu.rs
1 //! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly
2 //! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's
3 //! very quick for most inputs.
4 //!
5 //! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and
6 //! accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
7
8 use crate::num::diy_float::Fp;
9 use crate::num::flt2dec::{round_up, Decoded, MAX_SIG_DIGITS};
10
11 // see the comments in `format_shortest_opt` for the rationale.
12 #[doc(hidden)]
13 pub const ALPHA: i16 = -60;
14 #[doc(hidden)]
15 pub const GAMMA: i16 = -32;
16
17 /*
18 # the following Python code generates this table:
19 for i in xrange(-308, 333, 8):
20 if i >= 0: f = 10**i; e = 0
21 else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
22 l = f.bit_length()
23 f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
24 print ' (%#018x, %5d, %4d),' % (f, e, i)
25 */
26
27 #[doc(hidden)]
28 pub static CACHED_POW10: [(u64, i16, i16); 81] = [
29 // (f, e, k)
30 (0xe61acf033d1a45df, -1087, -308),
31 (0xab70fe17c79ac6ca, -1060, -300),
32 (0xff77b1fcbebcdc4f, -1034, -292),
33 (0xbe5691ef416bd60c, -1007, -284),
34 (0x8dd01fad907ffc3c, -980, -276),
35 (0xd3515c2831559a83, -954, -268),
36 (0x9d71ac8fada6c9b5, -927, -260),
37 (0xea9c227723ee8bcb, -901, -252),
38 (0xaecc49914078536d, -874, -244),
39 (0x823c12795db6ce57, -847, -236),
40 (0xc21094364dfb5637, -821, -228),
41 (0x9096ea6f3848984f, -794, -220),
42 (0xd77485cb25823ac7, -768, -212),
43 (0xa086cfcd97bf97f4, -741, -204),
44 (0xef340a98172aace5, -715, -196),
45 (0xb23867fb2a35b28e, -688, -188),
46 (0x84c8d4dfd2c63f3b, -661, -180),
47 (0xc5dd44271ad3cdba, -635, -172),
48 (0x936b9fcebb25c996, -608, -164),
49 (0xdbac6c247d62a584, -582, -156),
50 (0xa3ab66580d5fdaf6, -555, -148),
51 (0xf3e2f893dec3f126, -529, -140),
52 (0xb5b5ada8aaff80b8, -502, -132),
53 (0x87625f056c7c4a8b, -475, -124),
54 (0xc9bcff6034c13053, -449, -116),
55 (0x964e858c91ba2655, -422, -108),
56 (0xdff9772470297ebd, -396, -100),
57 (0xa6dfbd9fb8e5b88f, -369, -92),
58 (0xf8a95fcf88747d94, -343, -84),
59 (0xb94470938fa89bcf, -316, -76),
60 (0x8a08f0f8bf0f156b, -289, -68),
61 (0xcdb02555653131b6, -263, -60),
62 (0x993fe2c6d07b7fac, -236, -52),
63 (0xe45c10c42a2b3b06, -210, -44),
64 (0xaa242499697392d3, -183, -36),
65 (0xfd87b5f28300ca0e, -157, -28),
66 (0xbce5086492111aeb, -130, -20),
67 (0x8cbccc096f5088cc, -103, -12),
68 (0xd1b71758e219652c, -77, -4),
69 (0x9c40000000000000, -50, 4),
70 (0xe8d4a51000000000, -24, 12),
71 (0xad78ebc5ac620000, 3, 20),
72 (0x813f3978f8940984, 30, 28),
73 (0xc097ce7bc90715b3, 56, 36),
74 (0x8f7e32ce7bea5c70, 83, 44),
75 (0xd5d238a4abe98068, 109, 52),
76 (0x9f4f2726179a2245, 136, 60),
77 (0xed63a231d4c4fb27, 162, 68),
78 (0xb0de65388cc8ada8, 189, 76),
79 (0x83c7088e1aab65db, 216, 84),
80 (0xc45d1df942711d9a, 242, 92),
81 (0x924d692ca61be758, 269, 100),
82 (0xda01ee641a708dea, 295, 108),
83 (0xa26da3999aef774a, 322, 116),
84 (0xf209787bb47d6b85, 348, 124),
85 (0xb454e4a179dd1877, 375, 132),
86 (0x865b86925b9bc5c2, 402, 140),
87 (0xc83553c5c8965d3d, 428, 148),
88 (0x952ab45cfa97a0b3, 455, 156),
89 (0xde469fbd99a05fe3, 481, 164),
90 (0xa59bc234db398c25, 508, 172),
91 (0xf6c69a72a3989f5c, 534, 180),
92 (0xb7dcbf5354e9bece, 561, 188),
93 (0x88fcf317f22241e2, 588, 196),
94 (0xcc20ce9bd35c78a5, 614, 204),
95 (0x98165af37b2153df, 641, 212),
96 (0xe2a0b5dc971f303a, 667, 220),
97 (0xa8d9d1535ce3b396, 694, 228),
98 (0xfb9b7cd9a4a7443c, 720, 236),
99 (0xbb764c4ca7a44410, 747, 244),
100 (0x8bab8eefb6409c1a, 774, 252),
101 (0xd01fef10a657842c, 800, 260),
102 (0x9b10a4e5e9913129, 827, 268),
103 (0xe7109bfba19c0c9d, 853, 276),
104 (0xac2820d9623bf429, 880, 284),
105 (0x80444b5e7aa7cf85, 907, 292),
106 (0xbf21e44003acdd2d, 933, 300),
107 (0x8e679c2f5e44ff8f, 960, 308),
108 (0xd433179d9c8cb841, 986, 316),
109 (0x9e19db92b4e31ba9, 1013, 324),
110 (0xeb96bf6ebadf77d9, 1039, 332),
111 ];
112
113 #[doc(hidden)]
114 pub const CACHED_POW10_FIRST_E: i16 = -1087;
115 #[doc(hidden)]
116 pub const CACHED_POW10_LAST_E: i16 = 1039;
117
118 #[doc(hidden)]
119 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
120 let offset = CACHED_POW10_FIRST_E as i32;
121 let range = (CACHED_POW10.len() as i32) - 1;
122 let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
123 let idx = ((gamma as i32) - offset) * range / domain;
124 let (f, e, k) = CACHED_POW10[idx as usize];
125 debug_assert!(alpha <= e && e <= gamma);
126 (k, Fp { f, e })
127 }
128
129 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
130 #[doc(hidden)]
131 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
132 debug_assert!(x > 0);
133
134 const X9: u32 = 10_0000_0000;
135 const X8: u32 = 1_0000_0000;
136 const X7: u32 = 1000_0000;
137 const X6: u32 = 100_0000;
138 const X5: u32 = 10_0000;
139 const X4: u32 = 1_0000;
140 const X3: u32 = 1000;
141 const X2: u32 = 100;
142 const X1: u32 = 10;
143
144 if x < X4 {
145 if x < X2 {
146 if x < X1 { (0, 1) } else { (1, X1) }
147 } else {
148 if x < X3 { (2, X2) } else { (3, X3) }
149 }
150 } else {
151 if x < X6 {
152 if x < X5 { (4, X4) } else { (5, X5) }
153 } else if x < X8 {
154 if x < X7 { (6, X6) } else { (7, X7) }
155 } else {
156 if x < X9 { (8, X8) } else { (9, X9) }
157 }
158 }
159 }
160
161 /// The shortest mode implementation for Grisu.
162 ///
163 /// It returns `None` when it would return an inexact representation otherwise.
164 pub fn format_shortest_opt(
165 d: &Decoded,
166 buf: &mut [u8],
167 ) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
168 assert!(d.mant > 0);
169 assert!(d.minus > 0);
170 assert!(d.plus > 0);
171 assert!(d.mant.checked_add(d.plus).is_some());
172 assert!(d.mant.checked_sub(d.minus).is_some());
173 assert!(buf.len() >= MAX_SIG_DIGITS);
174 assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
175
176 // start with the normalized values with the shared exponent
177 let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
178 let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
179 let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
180
181 // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
182 // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
183 // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
184 //
185 // it is obviously desirable to maximize `GAMMA - ALPHA`,
186 // so that we don't need many cached powers of 10, but there are some considerations:
187 //
188 // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
189 // (this is not really avoidable, remainder is required for accuracy estimation.)
190 // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
191 // and it should not overflow.
192 //
193 // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
194 // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
195 let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
196
197 // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
198 let plus = plus.mul(&cached);
199 let minus = minus.mul(&cached);
200 let v = v.mul(&cached);
201 debug_assert_eq!(plus.e, minus.e);
202 debug_assert_eq!(plus.e, v.e);
203
204 // +- actual range of minus
205 // | <---|---------------------- unsafe region --------------------------> |
206 // | | |
207 // | |<--->| | <--------------- safe region ---------------> | |
208 // | | | | | |
209 // |1 ulp|1 ulp| |1 ulp|1 ulp| |1 ulp|1 ulp|
210 // |<--->|<--->| |<--->|<--->| |<--->|<--->|
211 // |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
212 // | minus | | v | | plus |
213 // minus1 minus0 v - 1 ulp v + 1 ulp plus0 plus1
214 //
215 // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
216 // as we don't know the error is positive or negative, we use two approximations spaced equally
217 // and have the maximal error of 2 ulps.
218 //
219 // the "unsafe region" is a liberal interval which we initially generate.
220 // the "safe region" is a conservative interval which we only accept.
221 // we start with the correct repr within the unsafe region, and try to find the closest repr
222 // to `v` which is also within the safe region. if we can't, we give up.
223 let plus1 = plus.f + 1;
224 // let plus0 = plus.f - 1; // only for explanation
225 // let minus0 = minus.f + 1; // only for explanation
226 let minus1 = minus.f - 1;
227 let e = -plus.e as usize; // shared exponent
228
229 // divide `plus1` into integral and fractional parts.
230 // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
231 // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
232 let plus1int = (plus1 >> e) as u32;
233 let plus1frac = plus1 & ((1 << e) - 1);
234
235 // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
236 // this is an upper bound of `kappa` below.
237 let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
238
239 let mut i = 0;
240 let exp = max_kappa as i16 - minusk + 1;
241
242 // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
243 // then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
244 // representations (with the minimal number of significant digits) in that range.
245 //
246 // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
247 // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
248 // (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
249 // the algorithm relies on the later verification phase to exclude `y`.
250 let delta1 = plus1 - minus1;
251 // let delta1int = (delta1 >> e) as usize; // only for explanation
252 let delta1frac = delta1 & ((1 << e) - 1);
253
254 // render integral parts, while checking for the accuracy at each step.
255 let mut kappa = max_kappa as i16;
256 let mut ten_kappa = max_ten_kappa; // 10^kappa
257 let mut remainder = plus1int; // digits yet to be rendered
258 loop {
259 // we always have at least one digit to render, as `plus1 >= 10^kappa`
260 // invariants:
261 // - `delta1int <= remainder < 10^(kappa+1)`
262 // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
263 // (it follows that `remainder = plus1int % 10^(kappa+1)`)
264
265 // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
266 let q = remainder / ten_kappa;
267 let r = remainder % ten_kappa;
268 debug_assert!(q < 10);
269 buf[i] = b'0' + q as u8;
270 i += 1;
271
272 let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
273 if plus1rem < delta1 {
274 // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
275 let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
276 return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
277 }
278
279 // break the loop when we have rendered all integral digits.
280 // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
281 if i > max_kappa as usize {
282 debug_assert_eq!(ten_kappa, 1);
283 debug_assert_eq!(kappa, 0);
284 break;
285 }
286
287 // restore invariants
288 kappa -= 1;
289 ten_kappa /= 10;
290 remainder = r;
291 }
292
293 // render fractional parts, while checking for the accuracy at each step.
294 // this time we rely on repeated multiplications, as division will lose the precision.
295 let mut remainder = plus1frac;
296 let mut threshold = delta1frac;
297 let mut ulp = 1;
298 loop {
299 // the next digit should be significant as we've tested that before breaking out
300 // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
301 // - `remainder < 2^e`
302 // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
303
304 remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
305 threshold *= 10;
306 ulp *= 10;
307
308 // divide `remainder` by `10^kappa`.
309 // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
310 let q = remainder >> e;
311 let r = remainder & ((1 << e) - 1);
312 debug_assert!(q < 10);
313 buf[i] = b'0' + q as u8;
314 i += 1;
315
316 if r < threshold {
317 let ten_kappa = 1 << e; // implicit divisor
318 return round_and_weed(
319 &mut buf[..i],
320 exp,
321 r,
322 threshold,
323 (plus1 - v.f) * ulp,
324 ten_kappa,
325 ulp,
326 );
327 }
328
329 // restore invariants
330 kappa -= 1;
331 remainder = r;
332 }
333
334 // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
335 // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
336 // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
337 // we have to successively decrease the last digit and check if this is the optimal repr.
338 // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
339 //
340 // the function checks if this "optimal" repr is actually within the ulp ranges,
341 // and also, it is possible that the "second-to-optimal" repr can actually be optimal
342 // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
343 //
344 // all arguments here are scaled by the common (but implicit) value `k`, so that:
345 // - `remainder = (plus1 % 10^kappa) * k`
346 // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
347 // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
348 // - `ten_kappa = 10^kappa * k`
349 // - `ulp = 2^-e * k`
350 fn round_and_weed(
351 buf: &mut [u8],
352 exp: i16,
353 remainder: u64,
354 threshold: u64,
355 plus1v: u64,
356 ten_kappa: u64,
357 ulp: u64,
358 ) -> Option<(usize, i16)> {
359 assert!(!buf.is_empty());
360
361 // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
362 // the resulting representation should be the closest representation to both.
363 //
364 // here `plus1 - v` is used since calculations are done with respect to `plus1`
365 // in order to avoid overflow/underflow (hence the seemingly swapped names).
366 let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
367 let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
368
369 // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
370 let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
371 {
372 let last = buf.last_mut().unwrap();
373
374 // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
375 // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
376 // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
377 // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
378 // note that `plus1w(n)` is always increasing.
379 //
380 // we have three conditions to terminate. any of them will make the loop unable to
381 // proceed, but we then have at least one valid representation known to be closest to
382 // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
383 //
384 // TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one.
385 // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
386 // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
387 // overflow on the calculation of `plus1w(n)`.
388 //
389 // TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`.
390 // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
391 // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
392 // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
393 // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
394 // `threshold - plus1w(n) < 10^kappa` instead.
395 //
396 // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is
397 // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
398 // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
399 // `z(n) > 0`. we have two cases to consider:
400 //
401 // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
402 // `z(n)` should be decreasing and this is clearly false.
403 // - when `z(n+1) < 0`:
404 // - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
405 // false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
406 // - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >=
407 // plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
408 // gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
409 // combined with TC3a.
410 //
411 // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
412 // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
413 while plus1w < plus1v_up
414 && threshold - plus1w >= ten_kappa
415 && (plus1w + ten_kappa < plus1v_up
416 || plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up)
417 {
418 *last -= 1;
419 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
420 plus1w += ten_kappa;
421 }
422 }
423
424 // check if this representation is also the closest representation to `v - 1 ulp`.
425 //
426 // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
427 // replaced by `plus1v_down` instead. overflow analysis equally holds.
428 if plus1w < plus1v_down
429 && threshold - plus1w >= ten_kappa
430 && (plus1w + ten_kappa < plus1v_down
431 || plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down)
432 {
433 return None;
434 }
435
436 // now we have the closest representation to `v` between `plus1` and `minus1`.
437 // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
438 // i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
439 // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
440 if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
441 Some((buf.len(), exp))
442 } else {
443 None
444 }
445 }
446 }
447
448 /// The shortest mode implementation for Grisu with Dragon fallback.
449 ///
450 /// This should be used for most cases.
451 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
452 use crate::num::flt2dec::strategy::dragon::format_shortest as fallback;
453 match format_shortest_opt(d, buf) {
454 Some(ret) => ret,
455 None => fallback(d, buf),
456 }
457 }
458
459 /// The exact and fixed mode implementation for Grisu.
460 ///
461 /// It returns `None` when it would return an inexact representation otherwise.
462 pub fn format_exact_opt(
463 d: &Decoded,
464 buf: &mut [u8],
465 limit: i16,
466 ) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
467 assert!(d.mant > 0);
468 assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
469 assert!(!buf.is_empty());
470
471 // normalize and scale `v`.
472 let v = Fp { f: d.mant, e: d.exp }.normalize();
473 let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
474 let v = v.mul(&cached);
475
476 // divide `v` into integral and fractional parts.
477 let e = -v.e as usize;
478 let vint = (v.f >> e) as u32;
479 let vfrac = v.f & ((1 << e) - 1);
480
481 // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
482 // as we don't know the error is positive or negative, we use two approximations
483 // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
484 //
485 // the goal is to find the exactly rounded series of digits that are common to
486 // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
487 // if this is not possible, we don't know which one is the correct output for `v`,
488 // so we give up and fall back.
489 //
490 // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
491 // and we will scale it whenever `v` gets scaled.
492 let mut err = 1;
493
494 // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
495 // this is an upper bound of `kappa` below.
496 let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
497
498 let mut i = 0;
499 let exp = max_kappa as i16 - minusk + 1;
500
501 // if we are working with the last-digit limitation, we need to shorten the buffer
502 // before the actual rendering in order to avoid double rounding.
503 // note that we have to enlarge the buffer again when rounding up happens!
504 let len = if exp <= limit {
505 // oops, we cannot even produce *one* digit.
506 // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
507 //
508 // in principle we can immediately call `possibly_round` with an empty buffer,
509 // but scaling `max_ten_kappa << e` by 10 can result in overflow.
510 // thus we are being sloppy here and widen the error range by a factor of 10.
511 // this will increase the false negative rate, but only very, *very* slightly;
512 // it can only matter noticeably when the mantissa is bigger than 60 bits.
513 return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
514 } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
515 (exp - limit) as usize
516 } else {
517 buf.len()
518 };
519 debug_assert!(len > 0);
520
521 // render integral parts.
522 // the error is entirely fractional, so we don't need to check it in this part.
523 let mut kappa = max_kappa as i16;
524 let mut ten_kappa = max_ten_kappa; // 10^kappa
525 let mut remainder = vint; // digits yet to be rendered
526 loop {
527 // we always have at least one digit to render
528 // invariants:
529 // - `remainder < 10^(kappa+1)`
530 // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
531 // (it follows that `remainder = vint % 10^(kappa+1)`)
532
533 // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
534 let q = remainder / ten_kappa;
535 let r = remainder % ten_kappa;
536 debug_assert!(q < 10);
537 buf[i] = b'0' + q as u8;
538 i += 1;
539
540 // is the buffer full? run the rounding pass with the remainder.
541 if i == len {
542 let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
543 return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
544 }
545
546 // break the loop when we have rendered all integral digits.
547 // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
548 if i > max_kappa as usize {
549 debug_assert_eq!(ten_kappa, 1);
550 debug_assert_eq!(kappa, 0);
551 break;
552 }
553
554 // restore invariants
555 kappa -= 1;
556 ten_kappa /= 10;
557 remainder = r;
558 }
559
560 // render fractional parts.
561 //
562 // in principle we can continue to the last available digit and check for the accuracy.
563 // unfortunately we are working with the finite-sized integers, so we need some criterion
564 // to detect the overflow. V8 uses `remainder > err`, which becomes false when
565 // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
566 // too many otherwise valid input.
567 //
568 // since the later phase has a correct overflow detection, we instead use tighter criterion:
569 // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
570 // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
571 // the first two comparisons from `possibly_round`, for the reference.
572 let mut remainder = vfrac;
573 let maxerr = 1 << (e - 1);
574 while err < maxerr {
575 // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
576 // - `remainder < 2^e`
577 // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
578 // - `err = 10^(n-m)`
579
580 remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
581 err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
582
583 // divide `remainder` by `10^kappa`.
584 // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
585 let q = remainder >> e;
586 let r = remainder & ((1 << e) - 1);
587 debug_assert!(q < 10);
588 buf[i] = b'0' + q as u8;
589 i += 1;
590
591 // is the buffer full? run the rounding pass with the remainder.
592 if i == len {
593 return possibly_round(buf, len, exp, limit, r, 1 << e, err);
594 }
595
596 // restore invariants
597 remainder = r;
598 }
599
600 // further calculation is useless (`possibly_round` definitely fails), so we give up.
601 return None;
602
603 // we've generated all requested digits of `v`, which should be also same to corresponding
604 // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
605 // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
606 // to the rounded-up version of those digits. if the range contains multiple representations
607 // of the same length, we cannot be sure and should return `None` instead.
608 //
609 // all arguments here are scaled by the common (but implicit) value `k`, so that:
610 // - `remainder = (v % 10^kappa) * k`
611 // - `ten_kappa = 10^kappa * k`
612 // - `ulp = 2^-e * k`
613 fn possibly_round(
614 buf: &mut [u8],
615 mut len: usize,
616 mut exp: i16,
617 limit: i16,
618 remainder: u64,
619 ten_kappa: u64,
620 ulp: u64,
621 ) -> Option<(usize, i16)> {
622 debug_assert!(remainder < ten_kappa);
623
624 // 10^kappa
625 // : : :<->: :
626 // : : : : :
627 // :|1 ulp|1 ulp| :
628 // :|<--->|<--->| :
629 // ----|-----|-----|----
630 // | v |
631 // v - 1 ulp v + 1 ulp
632 //
633 // (for the reference, the dotted line indicates the exact value for
634 // possible representations in given number of digits.)
635 //
636 // error is too large that there are at least three possible representations
637 // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
638 if ulp >= ten_kappa {
639 return None;
640 }
641
642 // 10^kappa
643 // :<------->:
644 // : :
645 // : |1 ulp|1 ulp|
646 // : |<--->|<--->|
647 // ----|-----|-----|----
648 // | v |
649 // v - 1 ulp v + 1 ulp
650 //
651 // in fact, 1/2 ulp is enough to introduce two possible representations.
652 // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
653 // this won't overflow, as `ulp < ten_kappa` from the first check.
654 if ten_kappa - ulp <= ulp {
655 return None;
656 }
657
658 // remainder
659 // :<->| :
660 // : | :
661 // :<--------- 10^kappa ---------->:
662 // | : | :
663 // |1 ulp|1 ulp| :
664 // |<--->|<--->| :
665 // ----|-----|-----|------------------------
666 // | v |
667 // v - 1 ulp v + 1 ulp
668 //
669 // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
670 // then we can safely return. note that `v - 1 ulp` *can* be less than the current
671 // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
672 // the distance between `v - 1 ulp` and the current representation
673 // cannot exceed `10^kappa / 2`.
674 //
675 // the condition equals to `remainder + ulp < 10^kappa / 2`.
676 // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
677 // we've already verified that `ulp < 10^kappa / 2`, so as long as
678 // `10^kappa` did not overflow after all, the second check is fine.
679 if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
680 return Some((len, exp));
681 }
682
683 // :<------- remainder ------>| :
684 // : | :
685 // :<--------- 10^kappa --------->:
686 // : | | : |
687 // : |1 ulp|1 ulp|
688 // : |<--->|<--->|
689 // -----------------------|-----|-----|-----
690 // | v |
691 // v - 1 ulp v + 1 ulp
692 //
693 // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
694 // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
695 //
696 // the condition equals to `remainder - ulp >= 10^kappa / 2`.
697 // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
698 // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
699 // so the second check does not overflow.
700 if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
701 if let Some(c) = round_up(buf, len) {
702 // only add an additional digit when we've been requested the fixed precision.
703 // we also need to check that, if the original buffer was empty,
704 // the additional digit can only be added when `exp == limit` (edge case).
705 exp += 1;
706 if exp > limit && len < buf.len() {
707 buf[len] = c;
708 len += 1;
709 }
710 }
711 return Some((len, exp));
712 }
713
714 // otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are
715 // rounding down and others are rounding up) and give up.
716 None
717 }
718 }
719
720 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
721 ///
722 /// This should be used for most cases.
723 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
724 use crate::num::flt2dec::strategy::dragon::format_exact as fallback;
725 match format_exact_opt(d, buf, limit) {
726 Some(ret) => ret,
727 None => fallback(d, buf, limit),
728 }
729 }