]> git.proxmox.com Git - rustc.git/blame - vendor/ryu/src/d2s.rs
New upstream version 1.37.0+dfsg1
[rustc.git] / vendor / ryu / src / d2s.rs
CommitLineData
b7449926
XL
1// Translated from C to Rust. The original C code can be found at
2// https://github.com/ulfjack/ryu and carries the following license:
3//
4// Copyright 2018 Ulf Adams
5//
6// The contents of this file may be used under the terms of the Apache License,
7// Version 2.0.
8//
9// (See accompanying file LICENSE-Apache or copy at
10// http://www.apache.org/licenses/LICENSE-2.0)
11//
12// Alternatively, the contents of this file may be used under the terms of
13// the Boost Software License, Version 1.0.
14// (See accompanying file LICENSE-Boost or copy at
15// https://www.boost.org/LICENSE_1_0.txt)
16//
17// Unless required by applicable law or agreed to in writing, this software
18// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19// KIND, either express or implied.
20
21use core::{mem, ptr};
22
23use common::*;
24#[cfg(not(feature = "small"))]
25use d2s_full_table::*;
26#[cfg(feature = "small")]
27use d2s_small_table::*;
28use digit_table::*;
29use d2s_intrinsics::*;
30
31#[cfg(feature = "no-panic")]
32use no_panic::no_panic;
33
34pub const DOUBLE_MANTISSA_BITS: u32 = 52;
35pub const DOUBLE_EXPONENT_BITS: u32 = 11;
36
37const DOUBLE_POW5_INV_BITCOUNT: i32 = 122;
38const DOUBLE_POW5_BITCOUNT: i32 = 121;
39
40#[cfg_attr(feature = "no-panic", inline)]
41fn pow5_factor(mut value: u64) -> u32 {
42 let mut count = 0u32;
43 loop {
44 debug_assert!(value != 0);
45 let q = div5(value);
46 let r = (value - 5 * q) as u32;
47 if r != 0 {
48 break;
49 }
50 value = q;
51 count += 1;
52 }
53 count
54}
55
56// Returns true if value is divisible by 5^p.
57#[cfg_attr(feature = "no-panic", inline)]
58fn multiple_of_power_of_5(value: u64, p: u32) -> bool {
59 // I tried a case distinction on p, but there was no performance difference.
60 pow5_factor(value) >= p
61}
62
63// Returns true if value is divisible by 2^p.
64#[cfg_attr(feature = "no-panic", inline)]
65fn multiple_of_power_of_2(value: u64, p: u32) -> bool {
66 // return __builtin_ctzll(value) >= p;
67 (value & ((1u64 << p) - 1)) == 0
68}
69
70#[cfg(integer128)]
71#[cfg_attr(feature = "no-panic", inline)]
72fn mul_shift(m: u64, mul: &(u64, u64), j: u32) -> u64 {
73 let b0 = m as u128 * mul.0 as u128;
74 let b2 = m as u128 * mul.1 as u128;
75 (((b0 >> 64) + b2) >> (j - 64)) as u64
76}
77
78#[cfg(integer128)]
79#[cfg_attr(feature = "no-panic", inline)]
80fn mul_shift_all(
81 m: u64,
82 mul: &(u64, u64),
83 j: u32,
84 vp: &mut u64,
85 vm: &mut u64,
86 mm_shift: u32,
87) -> u64 {
88 *vp = mul_shift(4 * m + 2, mul, j);
89 *vm = mul_shift(4 * m - 1 - mm_shift as u64, mul, j);
90 mul_shift(4 * m, mul, j)
91}
92
93#[cfg(not(integer128))]
94#[cfg_attr(feature = "no-panic", inline)]
95fn mul_shift_all(
96 mut m: u64,
97 mul: &(u64, u64),
98 j: u32,
99 vp: &mut u64,
100 vm: &mut u64,
101 mm_shift: u32,
102) -> u64 {
103 m <<= 1;
104 // m is maximum 55 bits
105 let (lo, tmp) = umul128(m, mul.0);
106 let (mut mid, mut hi) = umul128(m, mul.1);
107 mid = mid.wrapping_add(tmp);
108 hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi
109
110 let lo2 = lo.wrapping_add(mul.0);
111 let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64);
112 let hi2 = hi.wrapping_add((mid2 < mid) as u64);
113 *vp = shiftright128(mid2, hi2, j - 64 - 1);
114
115 if mm_shift == 1 {
116 let lo3 = lo.wrapping_sub(mul.0);
117 let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64);
118 let hi3 = hi.wrapping_sub((mid3 > mid) as u64);
119 *vm = shiftright128(mid3, hi3, j - 64 - 1);
120 } else {
121 let lo3 = lo + lo;
122 let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64);
123 let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64);
124 let lo4 = lo3.wrapping_sub(mul.0);
125 let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64);
126 let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64);
127 *vm = shiftright128(mid4, hi4, j - 64);
128 }
129
130 shiftright128(mid, hi, j - 64 - 1)
131}
132
133#[cfg_attr(feature = "no-panic", inline)]
134pub fn decimal_length(v: u64) -> u32 {
135 // This is slightly faster than a loop.
136 // The average output length is 16.38 digits, so we check high-to-low.
137 // Function precondition: v is not an 18, 19, or 20-digit number.
138 // (17 digits are sufficient for round-tripping.)
139 debug_assert!(v < 100000000000000000);
140
141 if v >= 10000000000000000 {
142 17
143 } else if v >= 1000000000000000 {
144 16
145 } else if v >= 100000000000000 {
146 15
147 } else if v >= 10000000000000 {
148 14
149 } else if v >= 1000000000000 {
150 13
151 } else if v >= 100000000000 {
152 12
153 } else if v >= 10000000000 {
154 11
155 } else if v >= 1000000000 {
156 10
157 } else if v >= 100000000 {
158 9
159 } else if v >= 10000000 {
160 8
161 } else if v >= 1000000 {
162 7
163 } else if v >= 100000 {
164 6
165 } else if v >= 10000 {
166 5
167 } else if v >= 1000 {
168 4
169 } else if v >= 100 {
170 3
171 } else if v >= 10 {
172 2
173 } else {
174 1
175 }
176}
177
178// A floating decimal representing m * 10^e.
179pub struct FloatingDecimal64 {
180 pub mantissa: u64,
181 pub exponent: i32,
182}
183
184#[cfg_attr(feature = "no-panic", inline)]
185pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
186 let bias = (1u32 << (DOUBLE_EXPONENT_BITS - 1)) - 1;
187
188 let (e2, m2) = if ieee_exponent == 0 {
189 (
190 // We subtract 2 so that the bounds computation has 2 additional bits.
191 1 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
192 ieee_mantissa,
193 )
194 } else {
195 (
196 ieee_exponent as i32 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
197 (1u64 << DOUBLE_MANTISSA_BITS) | ieee_mantissa,
198 )
199 };
200 let even = (m2 & 1) == 0;
201 let accept_bounds = even;
202
203 // Step 2: Determine the interval of legal decimal representations.
204 let mv = 4 * m2;
205 // Implicit bool -> int conversion. True is 1, false is 0.
206 let mm_shift = (ieee_mantissa != 0 || ieee_exponent <= 1) as u32;
207 // We would compute mp and mm like this:
208 // uint64_t mp = 4 * m2 + 2;
209 // uint64_t mm = mv - 1 - mm_shift;
210
211 // Step 3: Convert to a decimal power base using 128-bit arithmetic.
212 let mut vr: u64;
213 let mut vp: u64 = unsafe { mem::uninitialized() };
214 let mut vm: u64 = unsafe { mem::uninitialized() };
215 let e10: i32;
216 let mut vm_is_trailing_zeros = false;
217 let mut vr_is_trailing_zeros = false;
218 if e2 >= 0 {
219 // I tried special-casing q == 0, but there was no effect on performance.
220 // This expression is slightly faster than max(0, log10_pow2(e2) - 1).
221 let q = (log10_pow2(e2) - (e2 > 3) as i32) as u32;
222 e10 = q as i32;
223 let k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q as i32) as i32 - 1;
224 let i = -e2 + q as i32 + k;
225 vr = mul_shift_all(
226 m2,
227 #[cfg(feature = "small")]
228 unsafe {
229 &compute_inv_pow5(q)
230 },
231 #[cfg(not(feature = "small"))]
232 unsafe {
233 debug_assert!(q < DOUBLE_POW5_INV_SPLIT.len() as u32);
234 DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
235 },
236 i as u32,
237 &mut vp,
238 &mut vm,
239 mm_shift,
240 );
241 if q <= 21 {
242 // This should use q <= 22, but I think 21 is also safe. Smaller values
243 // may still be safe, but it's more difficult to reason about them.
244 // Only one of mp, mv, and mm can be a multiple of 5, if any.
245 let mv_mod5 = (mv - 5 * div5(mv)) as u32;
246 if mv_mod5 == 0 {
247 vr_is_trailing_zeros = multiple_of_power_of_5(mv, q);
248 } else if accept_bounds {
249 // Same as min(e2 + (~mm & 1), pow5_factor(mm)) >= q
250 // <=> e2 + (~mm & 1) >= q && pow5_factor(mm) >= q
251 // <=> true && pow5_factor(mm) >= q, since e2 >= q.
252 vm_is_trailing_zeros = multiple_of_power_of_5(mv - 1 - mm_shift as u64, q);
253 } else {
254 // Same as min(e2 + 1, pow5_factor(mp)) >= q.
255 vp -= multiple_of_power_of_5(mv + 2, q) as u64;
256 }
257 }
258 } else {
259 // This expression is slightly faster than max(0, log10_pow5(-e2) - 1).
260 let q = (log10_pow5(-e2) - (-e2 > 1) as i32) as u32;
261 e10 = q as i32 + e2;
262 let i = -e2 - q as i32;
263 let k = pow5bits(i) as i32 - DOUBLE_POW5_BITCOUNT;
264 let j = q as i32 - k;
265 vr = mul_shift_all(
266 m2,
267 #[cfg(feature = "small")]
268 unsafe {
269 &compute_pow5(i as u32)
270 },
271 #[cfg(not(feature = "small"))]
272 unsafe {
273 debug_assert!(i < DOUBLE_POW5_SPLIT.len() as i32);
274 DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
275 },
276 j as u32,
277 &mut vp,
278 &mut vm,
279 mm_shift,
280 );
281 if q <= 1 {
282 // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
283 // mv = 4 * m2, so it always has at least two trailing 0 bits.
284 vr_is_trailing_zeros = true;
285 if accept_bounds {
286 // mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1.
287 vm_is_trailing_zeros = mm_shift == 1;
288 } else {
289 // mp = mv + 2, so it always has at least one trailing 0 bit.
290 vp -= 1;
291 }
292 } else if q < 63 {
293 // TODO(ulfjack): Use a tighter bound here.
294 // We need to compute min(ntz(mv), pow5_factor(mv) - e2) >= q - 1
295 // <=> ntz(mv) >= q - 1 && pow5_factor(mv) - e2 >= q - 1
296 // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
297 // <=> (mv & ((1 << (q - 1)) - 1)) == 0
298 // We also need to make sure that the left shift does not overflow.
299 vr_is_trailing_zeros = multiple_of_power_of_2(mv, q - 1);
300 }
301 }
302
303 // Step 4: Find the shortest decimal representation in the interval of legal representations.
304 let mut removed = 0u32;
305 let mut last_removed_digit = 0u8;
306 // On average, we remove ~2 digits.
307 let output = if vm_is_trailing_zeros || vr_is_trailing_zeros {
308 // General case, which happens rarely (~0.7%).
309 loop {
310 let vp_div10 = div10(vp);
311 let vm_div10 = div10(vm);
312 if vp_div10 <= vm_div10 {
313 break;
314 }
315 let vm_mod10 = (vm - 10 * vm_div10) as u32;
316 let vr_div10 = div10(vr);
317 let vr_mod10 = (vr - 10 * vr_div10) as u32;
318 vm_is_trailing_zeros &= vm_mod10 == 0;
319 vr_is_trailing_zeros &= last_removed_digit == 0;
320 last_removed_digit = vr_mod10 as u8;
321 vr = vr_div10;
322 vp = vp_div10;
323 vm = vm_div10;
324 removed += 1;
325 }
326 if vm_is_trailing_zeros {
327 loop {
328 let vm_div10 = div10(vm);
329 let vm_mod10 = (vm - 10 * vm_div10) as u32;
330 if vm_mod10 != 0 {
331 break;
332 }
333 let vp_div10 = div10(vp);
334 let vr_div10 = div10(vr);
335 let vr_mod10 = (vr - 10 * vr_div10) as u32;
336 vr_is_trailing_zeros &= last_removed_digit == 0;
337 last_removed_digit = vr_mod10 as u8;
338 vr = vr_div10;
339 vp = vp_div10;
340 vm = vm_div10;
341 removed += 1;
342 }
343 }
344 if vr_is_trailing_zeros && last_removed_digit == 5 && vr % 2 == 0 {
345 // Round even if the exact number is .....50..0.
346 last_removed_digit = 4;
347 }
348 // We need to take vr + 1 if vr is outside bounds or we need to round up.
349 vr + ((vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5)
350 as u64
351 } else {
352 // Specialized for the common case (~99.3%). Percentages below are relative to this.
353 let mut round_up = false;
354 let vp_div100 = div100(vp);
355 let vm_div100 = div100(vm);
356 // Optimization: remove two digits at a time (~86.2%).
357 if vp_div100 > vm_div100 {
358 let vr_div100 = div100(vr);
359 let vr_mod100 = (vr - 100 * vr_div100) as u32;
360 round_up = vr_mod100 >= 50;
361 vr = vr_div100;
362 vp = vp_div100;
363 vm = vm_div100;
364 removed += 2;
365 }
366 // Loop iterations below (approximately), without optimization above:
367 // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
368 // Loop iterations below (approximately), with optimization above:
369 // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
370 loop {
371 let vp_div10 = div10(vp);
372 let vm_div10 = div10(vm);
373 if vp_div10 <= vm_div10 {
374 break;
375 }
376 let vr_div10 = div10(vr);
377 let vr_mod10 = (vr - 10 * vr_div10) as u32;
378 round_up = vr_mod10 >= 5;
379 vr = vr_div10;
380 vp = vp_div10;
381 vm = vm_div10;
382 removed += 1;
383 }
384 // We need to take vr + 1 if vr is outside bounds or we need to round up.
385 vr + (vr == vm || round_up) as u64
386 };
387 let exp = e10 + removed as i32;
388
389 FloatingDecimal64 {
390 exponent: exp,
391 mantissa: output,
392 }
393}
394
395#[cfg_attr(feature = "no-panic", inline)]
396unsafe fn to_chars(v: FloatingDecimal64, sign: bool, result: *mut u8) -> usize {
397 // Step 5: Print the decimal representation.
398 let mut index = 0isize;
399 if sign {
400 *result.offset(index) = b'-';
401 index += 1;
402 }
403
404 let mut output = v.mantissa;
405 let olength = decimal_length(output);
406
407 // Print the decimal digits.
408 // The following code is equivalent to:
409 // for (uint32_t i = 0; i < olength - 1; ++i) {
410 // const uint32_t c = output % 10; output /= 10;
411 // result[index + olength - i] = (char) ('0' + c);
412 // }
413 // result[index] = '0' + output % 10;
414
415 let mut i = 0isize;
416 // We prefer 32-bit operations, even on 64-bit platforms.
417 // We have at most 17 digits, and uint32_t can store 9 digits.
418 // If output doesn't fit into uint32_t, we cut off 8 digits,
419 // so the rest will fit into uint32_t.
420 if (output >> 32) != 0 {
421 // Expensive 64-bit division.
422 let q = div100_000_000(output);
423 let mut output2 = (output - 100_000_000 * q) as u32;
424 output = q;
425
426 let c = output2 % 10000;
427 output2 /= 10000;
428 let d = output2 % 10000;
429 let c0 = (c % 100) << 1;
430 let c1 = (c / 100) << 1;
431 let d0 = (d % 100) << 1;
432 let d1 = (d / 100) << 1;
433 ptr::copy_nonoverlapping(
434 DIGIT_TABLE.get_unchecked(c0 as usize),
435 result.offset(index + olength as isize - i - 1),
436 2,
437 );
438 ptr::copy_nonoverlapping(
439 DIGIT_TABLE.get_unchecked(c1 as usize),
440 result.offset(index + olength as isize - i - 3),
441 2,
442 );
443 ptr::copy_nonoverlapping(
444 DIGIT_TABLE.get_unchecked(d0 as usize),
445 result.offset(index + olength as isize - i - 5),
446 2,
447 );
448 ptr::copy_nonoverlapping(
449 DIGIT_TABLE.get_unchecked(d1 as usize),
450 result.offset(index + olength as isize - i - 7),
451 2,
452 );
453 i += 8;
454 }
455 let mut output2 = output as u32;
456 while output2 >= 10000 {
457 let c = (output2 - 10000 * (output2 / 10000)) as u32;
458 output2 /= 10000;
459 let c0 = (c % 100) << 1;
460 let c1 = (c / 100) << 1;
461 ptr::copy_nonoverlapping(
462 DIGIT_TABLE.get_unchecked(c0 as usize),
463 result.offset(index + olength as isize - i - 1),
464 2,
465 );
466 ptr::copy_nonoverlapping(
467 DIGIT_TABLE.get_unchecked(c1 as usize),
468 result.offset(index + olength as isize - i - 3),
469 2,
470 );
471 i += 4;
472 }
473 if output2 >= 100 {
474 let c = ((output2 % 100) << 1) as u32;
475 output2 /= 100;
476 ptr::copy_nonoverlapping(
477 DIGIT_TABLE.get_unchecked(c as usize),
478 result.offset(index + olength as isize - i - 1),
479 2,
480 );
481 i += 2;
482 }
483 if output2 >= 10 {
484 let c = (output2 << 1) as u32;
485 // We can't use memcpy here: the decimal dot goes between these two digits.
486 *result.offset(index + olength as isize - i) = *DIGIT_TABLE.get_unchecked(c as usize + 1);
487 *result.offset(index) = *DIGIT_TABLE.get_unchecked(c as usize);
488 } else {
489 *result.offset(index) = b'0' + output2 as u8;
490 }
491
492 // Print decimal point if needed.
493 if olength > 1 {
494 *result.offset(index + 1) = b'.';
495 index += olength as isize + 1;
496 } else {
497 index += 1;
498 }
499
500 // Print the exponent.
501 *result.offset(index) = b'E';
502 index += 1;
503 let mut exp = v.exponent as i32 + olength as i32 - 1;
504 if exp < 0 {
505 *result.offset(index) = b'-';
506 index += 1;
507 exp = -exp;
508 }
509
510 if exp >= 100 {
511 let c = exp % 10;
512 ptr::copy_nonoverlapping(
513 DIGIT_TABLE.get_unchecked((2 * (exp / 10)) as usize),
514 result.offset(index),
515 2,
516 );
517 *result.offset(index + 2) = b'0' + c as u8;
518 index += 3;
519 } else if exp >= 10 {
520 ptr::copy_nonoverlapping(
521 DIGIT_TABLE.get_unchecked((2 * exp) as usize),
522 result.offset(index),
523 2,
524 );
525 index += 2;
526 } else {
527 *result.offset(index) = b'0' + exp as u8;
528 index += 1;
529 }
530
531 debug_assert!(index <= 24);
532 index as usize
533}
534
0731742a
XL
535/// Print f64 to the given buffer and return number of bytes written. Ryū's
536/// original formatting.
b7449926
XL
537///
538/// At most 24 bytes will be written.
539///
540/// ## Special cases
541///
542/// This function represents any NaN as `NaN`, positive infinity as `Infinity`,
543/// and negative infinity as `-Infinity`.
544///
545/// ## Safety
546///
547/// The `result` pointer argument must point to sufficiently many writable bytes
548/// to hold Ryū's representation of `f`.
549///
550/// ## Example
551///
552/// ```rust
553/// let f = 1.234f64;
554///
555/// unsafe {
556/// let mut buffer: [u8; 24] = std::mem::uninitialized();
557/// let n = ryu::raw::d2s_buffered_n(f, &mut buffer[0]);
558/// let s = std::str::from_utf8_unchecked(&buffer[..n]);
559/// assert_eq!(s, "1.234E0");
560/// }
561/// ```
562#[cfg_attr(must_use_return, must_use)]
563#[cfg_attr(feature = "no-panic", no_panic)]
564pub unsafe fn d2s_buffered_n(f: f64, result: *mut u8) -> usize {
565 // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
566 let bits = mem::transmute::<f64, u64>(f);
567
568 // Decode bits into sign, mantissa, and exponent.
569 let ieee_sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
570 let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1);
571 let ieee_exponent =
572 (bits >> DOUBLE_MANTISSA_BITS) as u32 & ((1u32 << DOUBLE_EXPONENT_BITS) - 1);
573 // Case distinction; exit early for the easy cases.
574 if ieee_exponent == ((1u32 << DOUBLE_EXPONENT_BITS) - 1)
575 || (ieee_exponent == 0 && ieee_mantissa == 0)
576 {
577 return copy_special_str(result, ieee_sign, ieee_exponent != 0, ieee_mantissa != 0);
578 }
579
580 let v = d2d(ieee_mantissa, ieee_exponent);
581 to_chars(v, ieee_sign, result)
582}