]>
Commit | Line | Data |
---|---|---|
9fa01778 XL |
1 | use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}; |
2 | use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}; | |
3b2f2976 | 3 | |
e1599b0c XL |
4 | use core::cmp::{self, Ordering}; |
5 | use core::convert::TryFrom; | |
6 | use core::fmt::{self, Write}; | |
7 | use core::marker::PhantomData; | |
8 | use core::mem; | |
9 | use core::ops::Neg; | |
dfeec247 | 10 | use smallvec::{smallvec, SmallVec}; |
3b2f2976 XL |
11 | |
12 | #[must_use] | |
13 | pub struct IeeeFloat<S> { | |
14 | /// Absolute significand value (including the integer bit). | |
15 | sig: [Limb; 1], | |
16 | ||
17 | /// The signed unbiased exponent of the value. | |
18 | exp: ExpInt, | |
19 | ||
20 | /// What kind of floating point number this is. | |
21 | category: Category, | |
22 | ||
23 | /// Sign bit of the number. | |
24 | sign: bool, | |
25 | ||
26 | marker: PhantomData<S>, | |
27 | } | |
28 | ||
29 | /// Fundamental unit of big integer arithmetic, but also | |
30 | /// large to store the largest significands by itself. | |
31 | type Limb = u128; | |
32 | const LIMB_BITS: usize = 128; | |
33 | fn limbs_for_bits(bits: usize) -> usize { | |
34 | (bits + LIMB_BITS - 1) / LIMB_BITS | |
35 | } | |
36 | ||
37 | /// Enum that represents what fraction of the LSB truncated bits of an fp number | |
38 | /// represent. | |
39 | /// | |
40 | /// This essentially combines the roles of guard and sticky bits. | |
41 | #[must_use] | |
42 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] | |
43 | enum Loss { | |
44 | // Example of truncated bits: | |
dfeec247 | 45 | ExactlyZero, // 000000 |
3b2f2976 | 46 | LessThanHalf, // 0xxxxx x's not all zero |
dfeec247 | 47 | ExactlyHalf, // 100000 |
3b2f2976 XL |
48 | MoreThanHalf, // 1xxxxx x's not all zero |
49 | } | |
50 | ||
51 | /// Represents floating point arithmetic semantics. | |
52 | pub trait Semantics: Sized { | |
53 | /// Total number of bits in the in-memory format. | |
54 | const BITS: usize; | |
55 | ||
56 | /// Number of bits in the significand. This includes the integer bit. | |
57 | const PRECISION: usize; | |
58 | ||
ff7c6d11 | 59 | /// The largest E such that 2<sup>E</sup> is representable; this matches the |
3b2f2976 XL |
60 | /// definition of IEEE 754. |
61 | const MAX_EXP: ExpInt; | |
62 | ||
ff7c6d11 | 63 | /// The smallest E such that 2<sup>E</sup> is a normalized number; this |
3b2f2976 XL |
64 | /// matches the definition of IEEE 754. |
65 | const MIN_EXP: ExpInt = -Self::MAX_EXP + 1; | |
66 | ||
67 | /// The significand bit that marks NaN as quiet. | |
68 | const QNAN_BIT: usize = Self::PRECISION - 2; | |
69 | ||
70 | /// The significand bitpattern to mark a NaN as quiet. | |
71 | /// NOTE: for X87DoubleExtended we need to set two bits instead of 2. | |
72 | const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT; | |
73 | ||
74 | fn from_bits(bits: u128) -> IeeeFloat<Self> { | |
75 | assert!(Self::BITS > Self::PRECISION); | |
76 | ||
77 | let sign = bits & (1 << (Self::BITS - 1)); | |
78 | let exponent = (bits & !sign) >> (Self::PRECISION - 1); | |
79 | let mut r = IeeeFloat { | |
80 | sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], | |
81 | // Convert the exponent from its bias representation to a signed integer. | |
82 | exp: (exponent as ExpInt) - Self::MAX_EXP, | |
83 | category: Category::Zero, | |
84 | sign: sign != 0, | |
85 | marker: PhantomData, | |
86 | }; | |
87 | ||
88 | if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { | |
89 | // Exponent, significand meaningless. | |
90 | r.category = Category::Zero; | |
91 | } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] { | |
92 | // Exponent, significand meaningless. | |
93 | r.category = Category::Infinity; | |
94 | } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] { | |
95 | // Sign, exponent, significand meaningless. | |
96 | r.category = Category::NaN; | |
97 | } else { | |
98 | r.category = Category::Normal; | |
99 | if r.exp == Self::MIN_EXP - 1 { | |
100 | // Denormal. | |
101 | r.exp = Self::MIN_EXP; | |
102 | } else { | |
103 | // Set integer bit. | |
104 | sig::set_bit(&mut r.sig, Self::PRECISION - 1); | |
105 | } | |
106 | } | |
107 | ||
108 | r | |
109 | } | |
110 | ||
111 | fn to_bits(x: IeeeFloat<Self>) -> u128 { | |
112 | assert!(Self::BITS > Self::PRECISION); | |
113 | ||
114 | // Split integer bit from significand. | |
115 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); | |
116 | let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1); | |
117 | let exponent = match x.category { | |
118 | Category::Normal => { | |
119 | if x.exp == Self::MIN_EXP && !integer_bit { | |
120 | // Denormal. | |
121 | Self::MIN_EXP - 1 | |
122 | } else { | |
123 | x.exp | |
124 | } | |
125 | } | |
126 | Category::Zero => { | |
127 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? | |
128 | significand = 0; | |
129 | Self::MIN_EXP - 1 | |
130 | } | |
131 | Category::Infinity => { | |
132 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? | |
133 | significand = 0; | |
134 | Self::MAX_EXP + 1 | |
135 | } | |
136 | Category::NaN => Self::MAX_EXP + 1, | |
137 | }; | |
138 | ||
139 | // Convert the exponent from a signed integer to its bias representation. | |
140 | let exponent = (exponent + Self::MAX_EXP) as u128; | |
141 | ||
142 | ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand | |
143 | } | |
144 | } | |
145 | ||
146 | impl<S> Copy for IeeeFloat<S> {} | |
147 | impl<S> Clone for IeeeFloat<S> { | |
148 | fn clone(&self) -> Self { | |
149 | *self | |
150 | } | |
151 | } | |
152 | ||
153 | macro_rules! ieee_semantics { | |
154 | ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => { | |
155 | $(pub struct $sem;)* | |
156 | $(pub type $name = IeeeFloat<$sem>;)* | |
157 | $(impl Semantics for $sem { | |
158 | const BITS: usize = $bits; | |
159 | const PRECISION: usize = ($bits - 1 - $exp_bits) + 1; | |
160 | const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1; | |
161 | })* | |
162 | } | |
163 | } | |
164 | ||
165 | ieee_semantics! { | |
166 | Half = HalfS(16:5), | |
167 | Single = SingleS(32:8), | |
168 | Double = DoubleS(64:11), | |
169 | Quad = QuadS(128:15) | |
170 | } | |
171 | ||
172 | pub struct X87DoubleExtendedS; | |
173 | pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>; | |
174 | impl Semantics for X87DoubleExtendedS { | |
175 | const BITS: usize = 80; | |
176 | const PRECISION: usize = 64; | |
177 | const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1; | |
178 | ||
179 | /// For x87 extended precision, we want to make a NaN, not a | |
180 | /// pseudo-NaN. Maybe we should expose the ability to make | |
181 | /// pseudo-NaNs? | |
182 | const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT; | |
183 | ||
184 | /// Integer bit is explicit in this format. Intel hardware (387 and later) | |
185 | /// does not support these bit patterns: | |
186 | /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") | |
187 | /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") | |
188 | /// exponent = 0, integer bit 1 ("pseudodenormal") | |
9fa01778 | 189 | /// exponent != 0 nor all 1's, integer bit 0 ("unnormal") |
3b2f2976 XL |
190 | /// At the moment, the first two are treated as NaNs, the second two as Normal. |
191 | fn from_bits(bits: u128) -> IeeeFloat<Self> { | |
192 | let sign = bits & (1 << (Self::BITS - 1)); | |
193 | let exponent = (bits & !sign) >> Self::PRECISION; | |
194 | let mut r = IeeeFloat { | |
195 | sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], | |
196 | // Convert the exponent from its bias representation to a signed integer. | |
197 | exp: (exponent as ExpInt) - Self::MAX_EXP, | |
198 | category: Category::Zero, | |
199 | sign: sign != 0, | |
200 | marker: PhantomData, | |
201 | }; | |
202 | ||
203 | if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { | |
204 | // Exponent, significand meaningless. | |
205 | r.category = Category::Zero; | |
206 | } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] { | |
207 | // Exponent, significand meaningless. | |
208 | r.category = Category::Infinity; | |
209 | } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] { | |
210 | // Sign, exponent, significand meaningless. | |
211 | r.category = Category::NaN; | |
212 | } else { | |
213 | r.category = Category::Normal; | |
214 | if r.exp == Self::MIN_EXP - 1 { | |
215 | // Denormal. | |
216 | r.exp = Self::MIN_EXP; | |
217 | } | |
218 | } | |
219 | ||
220 | r | |
221 | } | |
222 | ||
223 | fn to_bits(x: IeeeFloat<Self>) -> u128 { | |
224 | // Get integer bit from significand. | |
225 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); | |
226 | let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1); | |
227 | let exponent = match x.category { | |
228 | Category::Normal => { | |
229 | if x.exp == Self::MIN_EXP && !integer_bit { | |
230 | // Denormal. | |
231 | Self::MIN_EXP - 1 | |
232 | } else { | |
233 | x.exp | |
234 | } | |
235 | } | |
236 | Category::Zero => { | |
237 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? | |
238 | significand = 0; | |
239 | Self::MIN_EXP - 1 | |
240 | } | |
241 | Category::Infinity => { | |
242 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? | |
243 | significand = 1 << (Self::PRECISION - 1); | |
244 | Self::MAX_EXP + 1 | |
245 | } | |
246 | Category::NaN => Self::MAX_EXP + 1, | |
247 | }; | |
248 | ||
249 | // Convert the exponent from a signed integer to its bias representation. | |
250 | let exponent = (exponent + Self::MAX_EXP) as u128; | |
251 | ||
252 | ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand | |
253 | } | |
254 | } | |
255 | ||
256 | float_common_impls!(IeeeFloat<S>); | |
257 | ||
258 | impl<S: Semantics> PartialEq for IeeeFloat<S> { | |
259 | fn eq(&self, rhs: &Self) -> bool { | |
260 | self.partial_cmp(rhs) == Some(Ordering::Equal) | |
261 | } | |
262 | } | |
263 | ||
264 | impl<S: Semantics> PartialOrd for IeeeFloat<S> { | |
265 | fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { | |
266 | match (self.category, rhs.category) { | |
dfeec247 | 267 | (Category::NaN, _) | (_, Category::NaN) => None, |
3b2f2976 XL |
268 | |
269 | (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))), | |
270 | ||
271 | (Category::Zero, Category::Zero) => Some(Ordering::Equal), | |
272 | ||
dfeec247 XL |
273 | (Category::Infinity, _) | (Category::Normal, Category::Zero) => { |
274 | Some((!self.sign).cmp(&self.sign)) | |
275 | } | |
3b2f2976 | 276 | |
dfeec247 XL |
277 | (_, Category::Infinity) | (Category::Zero, Category::Normal) => { |
278 | Some(rhs.sign.cmp(&(!rhs.sign))) | |
279 | } | |
3b2f2976 XL |
280 | |
281 | (Category::Normal, Category::Normal) => { | |
282 | // Two normal numbers. Do they have the same sign? | |
283 | Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| { | |
284 | // Compare absolute values; invert result if negative. | |
285 | let result = self.cmp_abs_normal(*rhs); | |
286 | ||
287 | if self.sign { result.reverse() } else { result } | |
288 | })) | |
289 | } | |
290 | } | |
291 | } | |
292 | } | |
293 | ||
294 | impl<S> Neg for IeeeFloat<S> { | |
295 | type Output = Self; | |
296 | fn neg(mut self) -> Self { | |
297 | self.sign = !self.sign; | |
298 | self | |
299 | } | |
300 | } | |
301 | ||
302 | /// Prints this value as a decimal string. | |
303 | /// | |
304 | /// \param precision The maximum number of digits of | |
305 | /// precision to output. If there are fewer digits available, | |
306 | /// zero padding will not be used unless the value is | |
307 | /// integral and small enough to be expressed in | |
308 | /// precision digits. 0 means to use the natural | |
309 | /// precision of the number. | |
310 | /// \param width The maximum number of zeros to | |
311 | /// consider inserting before falling back to scientific | |
312 | /// notation. 0 means to always use scientific notation. | |
313 | /// | |
314 | /// \param alternate Indicate whether to remove the trailing zero in | |
315 | /// fraction part or not. Also setting this parameter to true forces | |
316 | /// producing of output more similar to default printf behavior. | |
317 | /// Specifically the lower e is used as exponent delimiter and exponent | |
318 | /// always contains no less than two digits. | |
319 | /// | |
320 | /// Number precision width Result | |
321 | /// ------ --------- ----- ------ | |
322 | /// 1.01E+4 5 2 10100 | |
323 | /// 1.01E+4 4 2 1.01E+4 | |
324 | /// 1.01E+4 5 1 1.01E+4 | |
325 | /// 1.01E-2 5 2 0.0101 | |
326 | /// 1.01E-2 4 2 0.0101 | |
327 | /// 1.01E-2 4 1 1.01E-2 | |
328 | impl<S: Semantics> fmt::Display for IeeeFloat<S> { | |
9fa01778 | 329 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
3b2f2976 XL |
330 | let width = f.width().unwrap_or(3); |
331 | let alternate = f.alternate(); | |
332 | ||
333 | match self.category { | |
334 | Category::Infinity => { | |
335 | if self.sign { | |
336 | return f.write_str("-Inf"); | |
337 | } else { | |
338 | return f.write_str("+Inf"); | |
339 | } | |
340 | } | |
341 | ||
342 | Category::NaN => return f.write_str("NaN"), | |
343 | ||
344 | Category::Zero => { | |
345 | if self.sign { | |
346 | f.write_char('-')?; | |
347 | } | |
348 | ||
349 | if width == 0 { | |
350 | if alternate { | |
351 | f.write_str("0.0")?; | |
352 | if let Some(n) = f.precision() { | |
353 | for _ in 1..n { | |
354 | f.write_char('0')?; | |
355 | } | |
356 | } | |
357 | f.write_str("e+00")?; | |
358 | } else { | |
359 | f.write_str("0.0E+0")?; | |
360 | } | |
361 | } else { | |
362 | f.write_char('0')?; | |
363 | } | |
364 | return Ok(()); | |
365 | } | |
366 | ||
367 | Category::Normal => {} | |
368 | } | |
369 | ||
370 | if self.sign { | |
371 | f.write_char('-')?; | |
372 | } | |
373 | ||
374 | // We use enough digits so the number can be round-tripped back to an | |
375 | // APFloat. The formula comes from "How to Print Floating-Point Numbers | |
376 | // Accurately" by Steele and White. | |
377 | // FIXME: Using a formula based purely on the precision is conservative; | |
378 | // we can print fewer digits depending on the actual value being printed. | |
379 | ||
380 | // precision = 2 + floor(S::PRECISION / lg_2(10)) | |
381 | let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196); | |
382 | ||
383 | // Decompose the number into an APInt and an exponent. | |
384 | let mut exp = self.exp - (S::PRECISION as ExpInt - 1); | |
385 | let mut sig = vec![self.sig[0]]; | |
386 | ||
387 | // Ignore trailing binary zeros. | |
388 | let trailing_zeros = sig[0].trailing_zeros(); | |
389 | let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize); | |
390 | ||
391 | // Change the exponent from 2^e to 10^e. | |
392 | if exp == 0 { | |
393 | // Nothing to do. | |
394 | } else if exp > 0 { | |
395 | // Just shift left. | |
396 | let shift = exp as usize; | |
397 | sig.resize(limbs_for_bits(S::PRECISION + shift), 0); | |
398 | sig::shift_left(&mut sig, &mut exp, shift); | |
399 | } else { | |
400 | // exp < 0 | |
401 | let mut texp = -exp as usize; | |
402 | ||
403 | // We transform this using the identity: | |
404 | // (N)(2^-e) == (N)(5^e)(10^-e) | |
405 | ||
406 | // Multiply significand by 5^e. | |
407 | // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) | |
408 | let mut sig_scratch = vec![]; | |
409 | let mut p5 = vec![]; | |
410 | let mut p5_scratch = vec![]; | |
411 | while texp != 0 { | |
412 | if p5.is_empty() { | |
413 | p5.push(5); | |
414 | } else { | |
415 | p5_scratch.resize(p5.len() * 2, 0); | |
416 | let _: Loss = | |
417 | sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); | |
418 | while p5_scratch.last() == Some(&0) { | |
419 | p5_scratch.pop(); | |
420 | } | |
421 | mem::swap(&mut p5, &mut p5_scratch); | |
422 | } | |
423 | if texp & 1 != 0 { | |
424 | sig_scratch.resize(sig.len() + p5.len(), 0); | |
425 | let _: Loss = sig::mul( | |
426 | &mut sig_scratch, | |
427 | &mut 0, | |
428 | &sig, | |
429 | &p5, | |
430 | (sig.len() + p5.len()) * LIMB_BITS, | |
431 | ); | |
432 | while sig_scratch.last() == Some(&0) { | |
433 | sig_scratch.pop(); | |
434 | } | |
435 | mem::swap(&mut sig, &mut sig_scratch); | |
436 | } | |
437 | texp >>= 1; | |
438 | } | |
439 | } | |
440 | ||
441 | // Fill the buffer. | |
442 | let mut buffer = vec![]; | |
443 | ||
444 | // Ignore digits from the significand until it is no more | |
445 | // precise than is required for the desired precision. | |
446 | // 196/59 is a very slight overestimate of lg_2(10). | |
447 | let required = (precision * 196 + 58) / 59; | |
448 | let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196; | |
449 | let mut in_trail = true; | |
450 | while !sig.is_empty() { | |
451 | // Perform short division by 10 to extract the rightmost digit. | |
452 | // rem <- sig % 10 | |
453 | // sig <- sig / 10 | |
454 | let mut rem = 0; | |
455 | ||
456 | // Use 64-bit division and remainder, with 32-bit chunks from sig. | |
457 | sig::each_chunk(&mut sig, 32, |chunk| { | |
458 | let chunk = chunk as u32; | |
459 | let combined = ((rem as u64) << 32) | (chunk as u64); | |
460 | rem = (combined % 10) as u8; | |
461 | (combined / 10) as u32 as Limb | |
462 | }); | |
463 | ||
464 | // Reduce the sigificand to avoid wasting time dividing 0's. | |
465 | while sig.last() == Some(&0) { | |
466 | sig.pop(); | |
467 | } | |
468 | ||
469 | let digit = rem; | |
470 | ||
471 | // Ignore digits we don't need. | |
472 | if discard_digits > 0 { | |
473 | discard_digits -= 1; | |
474 | exp += 1; | |
475 | continue; | |
476 | } | |
477 | ||
478 | // Drop trailing zeros. | |
479 | if in_trail && digit == 0 { | |
480 | exp += 1; | |
481 | } else { | |
482 | in_trail = false; | |
483 | buffer.push(b'0' + digit); | |
484 | } | |
485 | } | |
486 | ||
487 | assert!(!buffer.is_empty(), "no characters in buffer!"); | |
488 | ||
489 | // Drop down to precision. | |
490 | // FIXME: don't do more precise calculations above than are required. | |
491 | if buffer.len() > precision { | |
492 | // The most significant figures are the last ones in the buffer. | |
493 | let mut first_sig = buffer.len() - precision; | |
494 | ||
495 | // Round. | |
496 | // FIXME: this probably shouldn't use 'round half up'. | |
497 | ||
498 | // Rounding down is just a truncation, except we also want to drop | |
499 | // trailing zeros from the new result. | |
500 | if buffer[first_sig - 1] < b'5' { | |
501 | while first_sig < buffer.len() && buffer[first_sig] == b'0' { | |
502 | first_sig += 1; | |
503 | } | |
504 | } else { | |
505 | // Rounding up requires a decimal add-with-carry. If we continue | |
506 | // the carry, the newly-introduced zeros will just be truncated. | |
507 | for x in &mut buffer[first_sig..] { | |
508 | if *x == b'9' { | |
509 | first_sig += 1; | |
510 | } else { | |
511 | *x += 1; | |
512 | break; | |
513 | } | |
514 | } | |
515 | } | |
516 | ||
517 | exp += first_sig as ExpInt; | |
518 | buffer.drain(..first_sig); | |
519 | ||
520 | // If we carried through, we have exactly one digit of precision. | |
521 | if buffer.is_empty() { | |
522 | buffer.push(b'1'); | |
523 | } | |
524 | } | |
525 | ||
526 | let digits = buffer.len(); | |
527 | ||
528 | // Check whether we should use scientific notation. | |
529 | let scientific = if width == 0 { | |
530 | true | |
b7449926 XL |
531 | } else if exp >= 0 { |
532 | // 765e3 --> 765000 | |
533 | // ^^^ | |
534 | // But we shouldn't make the number look more precise than it is. | |
535 | exp as usize > width || digits + exp as usize > precision | |
3b2f2976 | 536 | } else { |
b7449926 XL |
537 | // Power of the most significant digit. |
538 | let msd = exp + (digits - 1) as ExpInt; | |
539 | if msd >= 0 { | |
540 | // 765e-2 == 7.65 | |
541 | false | |
3b2f2976 | 542 | } else { |
b7449926 XL |
543 | // 765e-5 == 0.00765 |
544 | // ^ ^^ | |
545 | -msd as usize > width | |
3b2f2976 XL |
546 | } |
547 | }; | |
548 | ||
549 | // Scientific formatting is pretty straightforward. | |
550 | if scientific { | |
551 | exp += digits as ExpInt - 1; | |
552 | ||
553 | f.write_char(buffer[digits - 1] as char)?; | |
554 | f.write_char('.')?; | |
555 | let truncate_zero = !alternate; | |
556 | if digits == 1 && truncate_zero { | |
557 | f.write_char('0')?; | |
558 | } else { | |
559 | for &d in buffer[..digits - 1].iter().rev() { | |
560 | f.write_char(d as char)?; | |
561 | } | |
562 | } | |
563 | // Fill with zeros up to precision. | |
564 | if !truncate_zero && precision > digits - 1 { | |
0731742a | 565 | for _ in 0..=precision - digits { |
3b2f2976 XL |
566 | f.write_char('0')?; |
567 | } | |
568 | } | |
569 | // For alternate we use lower 'e'. | |
570 | f.write_char(if alternate { 'e' } else { 'E' })?; | |
571 | ||
572 | // Exponent always at least two digits if we do not truncate zeros. | |
573 | if truncate_zero { | |
574 | write!(f, "{:+}", exp)?; | |
575 | } else { | |
576 | write!(f, "{:+03}", exp)?; | |
577 | } | |
578 | ||
579 | return Ok(()); | |
580 | } | |
581 | ||
582 | // Non-scientific, positive exponents. | |
583 | if exp >= 0 { | |
584 | for &d in buffer.iter().rev() { | |
585 | f.write_char(d as char)?; | |
586 | } | |
587 | for _ in 0..exp { | |
588 | f.write_char('0')?; | |
589 | } | |
590 | return Ok(()); | |
591 | } | |
592 | ||
593 | // Non-scientific, negative exponents. | |
594 | let unit_place = -exp as usize; | |
595 | if unit_place < digits { | |
596 | for &d in buffer[unit_place..].iter().rev() { | |
597 | f.write_char(d as char)?; | |
598 | } | |
599 | f.write_char('.')?; | |
600 | for &d in buffer[..unit_place].iter().rev() { | |
601 | f.write_char(d as char)?; | |
602 | } | |
603 | } else { | |
604 | f.write_str("0.")?; | |
605 | for _ in digits..unit_place { | |
606 | f.write_char('0')?; | |
607 | } | |
608 | for &d in buffer.iter().rev() { | |
609 | f.write_char(d as char)?; | |
610 | } | |
611 | } | |
612 | ||
613 | Ok(()) | |
614 | } | |
615 | } | |
616 | ||
617 | impl<S: Semantics> fmt::Debug for IeeeFloat<S> { | |
9fa01778 | 618 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
dfeec247 XL |
619 | write!( |
620 | f, | |
621 | "{}({:?} | {}{:?} * 2^{})", | |
622 | self, | |
623 | self.category, | |
624 | if self.sign { "-" } else { "+" }, | |
625 | self.sig, | |
626 | self.exp | |
627 | ) | |
3b2f2976 XL |
628 | } |
629 | } | |
630 | ||
631 | impl<S: Semantics> Float for IeeeFloat<S> { | |
632 | const BITS: usize = S::BITS; | |
633 | const PRECISION: usize = S::PRECISION; | |
634 | const MAX_EXP: ExpInt = S::MAX_EXP; | |
635 | const MIN_EXP: ExpInt = S::MIN_EXP; | |
636 | ||
637 | const ZERO: Self = IeeeFloat { | |
638 | sig: [0], | |
639 | exp: S::MIN_EXP - 1, | |
640 | category: Category::Zero, | |
641 | sign: false, | |
642 | marker: PhantomData, | |
643 | }; | |
644 | ||
645 | const INFINITY: Self = IeeeFloat { | |
646 | sig: [0], | |
647 | exp: S::MAX_EXP + 1, | |
648 | category: Category::Infinity, | |
649 | sign: false, | |
650 | marker: PhantomData, | |
651 | }; | |
652 | ||
653 | // FIXME(eddyb) remove when qnan becomes const fn. | |
654 | const NAN: Self = IeeeFloat { | |
655 | sig: [S::QNAN_SIGNIFICAND], | |
656 | exp: S::MAX_EXP + 1, | |
657 | category: Category::NaN, | |
658 | sign: false, | |
659 | marker: PhantomData, | |
660 | }; | |
661 | ||
662 | fn qnan(payload: Option<u128>) -> Self { | |
663 | IeeeFloat { | |
dfeec247 XL |
664 | sig: [S::QNAN_SIGNIFICAND |
665 | | payload.map_or(0, |payload| { | |
666 | // Zero out the excess bits of the significand. | |
667 | payload & ((1 << S::QNAN_BIT) - 1) | |
668 | })], | |
3b2f2976 XL |
669 | exp: S::MAX_EXP + 1, |
670 | category: Category::NaN, | |
671 | sign: false, | |
672 | marker: PhantomData, | |
673 | } | |
674 | } | |
675 | ||
676 | fn snan(payload: Option<u128>) -> Self { | |
677 | let mut snan = Self::qnan(payload); | |
678 | ||
679 | // We always have to clear the QNaN bit to make it an SNaN. | |
680 | sig::clear_bit(&mut snan.sig, S::QNAN_BIT); | |
681 | ||
682 | // If there are no bits set in the payload, we have to set | |
683 | // *something* to make it a NaN instead of an infinity; | |
684 | // conventionally, this is the next bit down from the QNaN bit. | |
685 | if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 { | |
686 | sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1); | |
687 | } | |
688 | ||
689 | snan | |
690 | } | |
691 | ||
692 | fn largest() -> Self { | |
693 | // We want (in interchange format): | |
694 | // exponent = 1..10 | |
695 | // significand = 1..1 | |
696 | IeeeFloat { | |
b7449926 | 697 | sig: [(1 << S::PRECISION) - 1], |
3b2f2976 XL |
698 | exp: S::MAX_EXP, |
699 | category: Category::Normal, | |
700 | sign: false, | |
701 | marker: PhantomData, | |
702 | } | |
703 | } | |
704 | ||
705 | // We want (in interchange format): | |
706 | // exponent = 0..0 | |
707 | // significand = 0..01 | |
708 | const SMALLEST: Self = IeeeFloat { | |
709 | sig: [1], | |
710 | exp: S::MIN_EXP, | |
711 | category: Category::Normal, | |
712 | sign: false, | |
713 | marker: PhantomData, | |
714 | }; | |
715 | ||
716 | fn smallest_normalized() -> Self { | |
717 | // We want (in interchange format): | |
718 | // exponent = 0..0 | |
719 | // significand = 10..0 | |
720 | IeeeFloat { | |
721 | sig: [1 << (S::PRECISION - 1)], | |
722 | exp: S::MIN_EXP, | |
723 | category: Category::Normal, | |
724 | sign: false, | |
725 | marker: PhantomData, | |
726 | } | |
727 | } | |
728 | ||
729 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { | |
730 | let status = match (self.category, rhs.category) { | |
731 | (Category::Infinity, Category::Infinity) => { | |
732 | // Differently signed infinities can only be validly | |
733 | // subtracted. | |
734 | if self.sign != rhs.sign { | |
735 | self = Self::NAN; | |
736 | Status::INVALID_OP | |
737 | } else { | |
738 | Status::OK | |
739 | } | |
740 | } | |
741 | ||
742 | // Sign may depend on rounding mode; handled below. | |
dfeec247 XL |
743 | (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => { |
744 | Status::OK | |
745 | } | |
3b2f2976 | 746 | |
ba9703b0 | 747 | (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => { |
3b2f2976 XL |
748 | self = rhs; |
749 | Status::OK | |
750 | } | |
751 | ||
752 | // This return code means it was not a simple case. | |
753 | (Category::Normal, Category::Normal) => { | |
754 | let loss = sig::add_or_sub( | |
755 | &mut self.sig, | |
756 | &mut self.exp, | |
757 | &mut self.sign, | |
758 | &mut [rhs.sig[0]], | |
759 | rhs.exp, | |
760 | rhs.sign, | |
761 | ); | |
762 | let status; | |
763 | self = unpack!(status=, self.normalize(round, loss)); | |
764 | ||
765 | // Can only be zero if we lost no fraction. | |
766 | assert!(self.category != Category::Zero || loss == Loss::ExactlyZero); | |
767 | ||
768 | status | |
769 | } | |
770 | }; | |
771 | ||
772 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a | |
773 | // positive zero unless rounding to minus infinity, except that | |
774 | // adding two like-signed zeroes gives that zero. | |
dfeec247 XL |
775 | if self.category == Category::Zero |
776 | && (rhs.category != Category::Zero || self.sign != rhs.sign) | |
3b2f2976 XL |
777 | { |
778 | self.sign = round == Round::TowardNegative; | |
779 | } | |
780 | ||
781 | status.and(self) | |
782 | } | |
783 | ||
784 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { | |
785 | self.sign ^= rhs.sign; | |
786 | ||
787 | match (self.category, rhs.category) { | |
788 | (Category::NaN, _) => { | |
789 | self.sign = false; | |
790 | Status::OK.and(self) | |
791 | } | |
792 | ||
793 | (_, Category::NaN) => { | |
794 | self.sign = false; | |
795 | self.category = Category::NaN; | |
796 | self.sig = rhs.sig; | |
797 | Status::OK.and(self) | |
798 | } | |
799 | ||
dfeec247 XL |
800 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { |
801 | Status::INVALID_OP.and(Self::NAN) | |
802 | } | |
3b2f2976 | 803 | |
dfeec247 | 804 | (_, Category::Infinity) | (Category::Infinity, _) => { |
3b2f2976 XL |
805 | self.category = Category::Infinity; |
806 | Status::OK.and(self) | |
807 | } | |
808 | ||
dfeec247 | 809 | (Category::Zero, _) | (_, Category::Zero) => { |
3b2f2976 XL |
810 | self.category = Category::Zero; |
811 | Status::OK.and(self) | |
812 | } | |
813 | ||
814 | (Category::Normal, Category::Normal) => { | |
815 | self.exp += rhs.exp; | |
816 | let mut wide_sig = [0; 2]; | |
dfeec247 XL |
817 | let loss = |
818 | sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION); | |
3b2f2976 XL |
819 | self.sig = [wide_sig[0]]; |
820 | let mut status; | |
821 | self = unpack!(status=, self.normalize(round, loss)); | |
822 | if loss != Loss::ExactlyZero { | |
823 | status |= Status::INEXACT; | |
824 | } | |
825 | status.and(self) | |
826 | } | |
827 | } | |
828 | } | |
829 | ||
830 | fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { | |
831 | // If and only if all arguments are normal do we need to do an | |
832 | // extended-precision calculation. | |
833 | if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() { | |
834 | let mut status; | |
835 | self = unpack!(status=, self.mul_r(multiplicand, round)); | |
836 | ||
837 | // FS can only be Status::OK or Status::INVALID_OP. There is no more work | |
838 | // to do in the latter case. The IEEE-754R standard says it is | |
839 | // implementation-defined in this case whether, if ADDEND is a | |
840 | // quiet NaN, we raise invalid op; this implementation does so. | |
841 | // | |
842 | // If we need to do the addition we can do so with normal | |
843 | // precision. | |
844 | if status == Status::OK { | |
845 | self = unpack!(status=, self.add_r(addend, round)); | |
846 | } | |
847 | return status.and(self); | |
848 | } | |
849 | ||
850 | // Post-multiplication sign, before addition. | |
851 | self.sign ^= multiplicand.sign; | |
852 | ||
853 | // Allocate space for twice as many bits as the original significand, plus one | |
854 | // extra bit for the addition to overflow into. | |
855 | assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2); | |
856 | let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]); | |
857 | ||
858 | let mut loss = Loss::ExactlyZero; | |
859 | let mut omsb = sig::omsb(&wide_sig); | |
860 | self.exp += multiplicand.exp; | |
861 | ||
862 | // Assume the operands involved in the multiplication are single-precision | |
863 | // FP, and the two multiplicants are: | |
864 | // lhs = a23 . a22 ... a0 * 2^e1 | |
865 | // rhs = b23 . b22 ... b0 * 2^e2 | |
866 | // the result of multiplication is: | |
867 | // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) | |
868 | // Note that there are three significant bits at the left-hand side of the | |
869 | // radix point: two for the multiplication, and an overflow bit for the | |
870 | // addition (that will always be zero at this point). Move the radix point | |
871 | // toward left by two bits, and adjust exponent accordingly. | |
872 | self.exp += 2; | |
873 | ||
874 | if addend.is_non_zero() { | |
875 | // Normalize our MSB to one below the top bit to allow for overflow. | |
876 | let ext_precision = 2 * S::PRECISION + 1; | |
877 | if omsb != ext_precision - 1 { | |
878 | assert!(ext_precision > omsb); | |
879 | sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb); | |
880 | } | |
881 | ||
882 | // The intermediate result of the multiplication has "2 * S::PRECISION" | |
a1dfa0c6 | 883 | // significant bit; adjust the addend to be consistent with mul result. |
3b2f2976 XL |
884 | let mut ext_addend_sig = [addend.sig[0], 0]; |
885 | ||
886 | // Extend the addend significand to ext_precision - 1. This guarantees | |
887 | // that the high bit of the significand is zero (same as wide_sig), | |
888 | // so the addition will overflow (if it does overflow at all) into the top bit. | |
dfeec247 | 889 | sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION); |
3b2f2976 XL |
890 | loss = sig::add_or_sub( |
891 | &mut wide_sig, | |
892 | &mut self.exp, | |
893 | &mut self.sign, | |
894 | &mut ext_addend_sig, | |
895 | addend.exp + 1, | |
896 | addend.sign, | |
897 | ); | |
898 | ||
899 | omsb = sig::omsb(&wide_sig); | |
900 | } | |
901 | ||
902 | // Convert the result having "2 * S::PRECISION" significant-bits back to the one | |
903 | // having "S::PRECISION" significant-bits. First, move the radix point from | |
a1dfa0c6 | 904 | // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be |
3b2f2976 XL |
905 | // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION". |
906 | self.exp -= S::PRECISION as ExpInt + 1; | |
907 | ||
908 | // In case MSB resides at the left-hand side of radix point, shift the | |
909 | // mantissa right by some amount to make sure the MSB reside right before | |
0731742a | 910 | // the radix point (i.e., "MSB . rest-significant-bits"). |
3b2f2976 XL |
911 | if omsb > S::PRECISION { |
912 | let bits = omsb - S::PRECISION; | |
913 | loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss); | |
914 | } | |
915 | ||
916 | self.sig[0] = wide_sig[0]; | |
917 | ||
918 | let mut status; | |
919 | self = unpack!(status=, self.normalize(round, loss)); | |
920 | if loss != Loss::ExactlyZero { | |
921 | status |= Status::INEXACT; | |
922 | } | |
923 | ||
924 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a | |
925 | // positive zero unless rounding to minus infinity, except that | |
926 | // adding two like-signed zeroes gives that zero. | |
dfeec247 XL |
927 | if self.category == Category::Zero |
928 | && !status.intersects(Status::UNDERFLOW) | |
929 | && self.sign != addend.sign | |
3b2f2976 XL |
930 | { |
931 | self.sign = round == Round::TowardNegative; | |
932 | } | |
933 | ||
934 | status.and(self) | |
935 | } | |
936 | ||
937 | fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { | |
938 | self.sign ^= rhs.sign; | |
939 | ||
940 | match (self.category, rhs.category) { | |
941 | (Category::NaN, _) => { | |
942 | self.sign = false; | |
943 | Status::OK.and(self) | |
944 | } | |
945 | ||
946 | (_, Category::NaN) => { | |
947 | self.category = Category::NaN; | |
948 | self.sig = rhs.sig; | |
949 | self.sign = false; | |
950 | Status::OK.and(self) | |
951 | } | |
952 | ||
dfeec247 XL |
953 | (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => { |
954 | Status::INVALID_OP.and(Self::NAN) | |
955 | } | |
3b2f2976 | 956 | |
ba9703b0 | 957 | (Category::Infinity | Category::Zero, _) => Status::OK.and(self), |
3b2f2976 XL |
958 | |
959 | (Category::Normal, Category::Infinity) => { | |
960 | self.category = Category::Zero; | |
961 | Status::OK.and(self) | |
962 | } | |
963 | ||
964 | (Category::Normal, Category::Zero) => { | |
965 | self.category = Category::Infinity; | |
966 | Status::DIV_BY_ZERO.and(self) | |
967 | } | |
968 | ||
969 | (Category::Normal, Category::Normal) => { | |
970 | self.exp -= rhs.exp; | |
971 | let dividend = self.sig[0]; | |
972 | let loss = sig::div( | |
973 | &mut self.sig, | |
974 | &mut self.exp, | |
975 | &mut [dividend], | |
976 | &mut [rhs.sig[0]], | |
977 | S::PRECISION, | |
978 | ); | |
979 | let mut status; | |
980 | self = unpack!(status=, self.normalize(round, loss)); | |
981 | if loss != Loss::ExactlyZero { | |
982 | status |= Status::INEXACT; | |
983 | } | |
984 | status.and(self) | |
985 | } | |
986 | } | |
987 | } | |
988 | ||
989 | fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> { | |
990 | match (self.category, rhs.category) { | |
dfeec247 | 991 | (Category::NaN, _) |
ba9703b0 | 992 | | (Category::Zero, Category::Infinity | Category::Normal) |
dfeec247 | 993 | | (Category::Normal, Category::Infinity) => Status::OK.and(self), |
3b2f2976 XL |
994 | |
995 | (_, Category::NaN) => { | |
996 | self.sign = false; | |
997 | self.category = Category::NaN; | |
998 | self.sig = rhs.sig; | |
999 | Status::OK.and(self) | |
1000 | } | |
1001 | ||
dfeec247 | 1002 | (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), |
3b2f2976 XL |
1003 | |
1004 | (Category::Normal, Category::Normal) => { | |
dfeec247 XL |
1005 | while self.is_finite_non_zero() |
1006 | && rhs.is_finite_non_zero() | |
1007 | && self.cmp_abs_normal(rhs) != Ordering::Less | |
3b2f2976 XL |
1008 | { |
1009 | let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb()); | |
1010 | if self.cmp_abs_normal(v) == Ordering::Less { | |
1011 | v = v.scalbn(-1); | |
1012 | } | |
1013 | v.sign = self.sign; | |
1014 | ||
1015 | let status; | |
1016 | self = unpack!(status=, self - v); | |
1017 | assert_eq!(status, Status::OK); | |
1018 | } | |
1019 | Status::OK.and(self) | |
1020 | } | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { | |
1025 | // If the exponent is large enough, we know that this value is already | |
1026 | // integral, and the arithmetic below would potentially cause it to saturate | |
1027 | // to +/-Inf. Bail out early instead. | |
1028 | if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt { | |
1029 | return Status::OK.and(self); | |
1030 | } | |
1031 | ||
1032 | // The algorithm here is quite simple: we add 2^(p-1), where p is the | |
1033 | // precision of our format, and then subtract it back off again. The choice | |
1034 | // of rounding modes for the addition/subtraction determines the rounding mode | |
1035 | // for our integral rounding as well. | |
1036 | // NOTE: When the input value is negative, we do subtraction followed by | |
1037 | // addition instead. | |
1038 | assert!(S::PRECISION <= 128); | |
1039 | let mut status; | |
1040 | let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); | |
1041 | let magic_const = magic_const.copy_sign(self); | |
1042 | ||
1043 | if status != Status::OK { | |
1044 | return status.and(self); | |
1045 | } | |
1046 | ||
1047 | let mut r = self; | |
1048 | r = unpack!(status=, r.add_r(magic_const, round)); | |
1049 | if status != Status::OK && status != Status::INEXACT { | |
1050 | return status.and(self); | |
1051 | } | |
1052 | ||
1053 | // Restore the input sign to handle 0.0/-0.0 cases correctly. | |
1054 | r.sub_r(magic_const, round).map(|r| r.copy_sign(self)) | |
1055 | } | |
1056 | ||
1057 | fn next_up(mut self) -> StatusAnd<Self> { | |
1058 | // Compute nextUp(x), handling each float category separately. | |
1059 | match self.category { | |
1060 | Category::Infinity => { | |
1061 | if self.sign { | |
1062 | // nextUp(-inf) = -largest | |
1063 | Status::OK.and(-Self::largest()) | |
1064 | } else { | |
1065 | // nextUp(+inf) = +inf | |
1066 | Status::OK.and(self) | |
1067 | } | |
1068 | } | |
1069 | Category::NaN => { | |
1070 | // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. | |
1071 | // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not | |
1072 | // change the payload. | |
1073 | if self.is_signaling() { | |
1074 | // For consistency, propagate the sign of the sNaN to the qNaN. | |
1075 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) | |
1076 | } else { | |
1077 | Status::OK.and(self) | |
1078 | } | |
1079 | } | |
1080 | Category::Zero => { | |
1081 | // nextUp(pm 0) = +smallest | |
1082 | Status::OK.and(Self::SMALLEST) | |
1083 | } | |
1084 | Category::Normal => { | |
1085 | // nextUp(-smallest) = -0 | |
1086 | if self.is_smallest() && self.sign { | |
1087 | return Status::OK.and(-Self::ZERO); | |
1088 | } | |
1089 | ||
1090 | // nextUp(largest) == INFINITY | |
1091 | if self.is_largest() && !self.sign { | |
1092 | return Status::OK.and(Self::INFINITY); | |
1093 | } | |
1094 | ||
1095 | // Excluding the integral bit. This allows us to test for binade boundaries. | |
1096 | let sig_mask = (1 << (S::PRECISION - 1)) - 1; | |
1097 | ||
1098 | // nextUp(normal) == normal + inc. | |
1099 | if self.sign { | |
1100 | // If we are negative, we need to decrement the significand. | |
1101 | ||
1102 | // We only cross a binade boundary that requires adjusting the exponent | |
1103 | // if: | |
1104 | // 1. exponent != S::MIN_EXP. This implies we are not in the | |
1105 | // smallest binade or are dealing with denormals. | |
1106 | // 2. Our significand excluding the integral bit is all zeros. | |
dfeec247 XL |
1107 | let crossing_binade_boundary = |
1108 | self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0; | |
3b2f2976 XL |
1109 | |
1110 | // Decrement the significand. | |
1111 | // | |
1112 | // We always do this since: | |
1113 | // 1. If we are dealing with a non-binade decrement, by definition we | |
1114 | // just decrement the significand. | |
1115 | // 2. If we are dealing with a normal -> normal binade decrement, since | |
1116 | // we have an explicit integral bit the fact that all bits but the | |
1117 | // integral bit are zero implies that subtracting one will yield a | |
1118 | // significand with 0 integral bit and 1 in all other spots. Thus we | |
1119 | // must just adjust the exponent and set the integral bit to 1. | |
1120 | // 3. If we are dealing with a normal -> denormal binade decrement, | |
1121 | // since we set the integral bit to 0 when we represent denormals, we | |
1122 | // just decrement the significand. | |
1123 | sig::decrement(&mut self.sig); | |
1124 | ||
1125 | if crossing_binade_boundary { | |
1126 | // Our result is a normal number. Do the following: | |
1127 | // 1. Set the integral bit to 1. | |
1128 | // 2. Decrement the exponent. | |
1129 | sig::set_bit(&mut self.sig, S::PRECISION - 1); | |
1130 | self.exp -= 1; | |
1131 | } | |
1132 | } else { | |
1133 | // If we are positive, we need to increment the significand. | |
1134 | ||
1135 | // We only cross a binade boundary that requires adjusting the exponent if | |
1136 | // the input is not a denormal and all of said input's significand bits | |
1137 | // are set. If all of said conditions are true: clear the significand, set | |
1138 | // the integral bit to 1, and increment the exponent. If we have a | |
1139 | // denormal always increment since moving denormals and the numbers in the | |
1140 | // smallest normal binade have the same exponent in our representation. | |
dfeec247 XL |
1141 | let crossing_binade_boundary = |
1142 | !self.is_denormal() && self.sig[0] & sig_mask == sig_mask; | |
3b2f2976 XL |
1143 | |
1144 | if crossing_binade_boundary { | |
1145 | self.sig = [0]; | |
1146 | sig::set_bit(&mut self.sig, S::PRECISION - 1); | |
1147 | assert_ne!( | |
1148 | self.exp, | |
1149 | S::MAX_EXP, | |
1150 | "We can not increment an exponent beyond the MAX_EXP \ | |
1151 | allowed by the given floating point semantics." | |
1152 | ); | |
1153 | self.exp += 1; | |
1154 | } else { | |
1155 | sig::increment(&mut self.sig); | |
1156 | } | |
1157 | } | |
1158 | Status::OK.and(self) | |
1159 | } | |
1160 | } | |
1161 | } | |
1162 | ||
1163 | fn from_bits(input: u128) -> Self { | |
1164 | // Dispatch to semantics. | |
1165 | S::from_bits(input) | |
1166 | } | |
1167 | ||
1168 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { | |
1169 | IeeeFloat { | |
1170 | sig: [input], | |
1171 | exp: S::PRECISION as ExpInt - 1, | |
1172 | category: Category::Normal, | |
1173 | sign: false, | |
1174 | marker: PhantomData, | |
dfeec247 XL |
1175 | } |
1176 | .normalize(round, Loss::ExactlyZero) | |
3b2f2976 XL |
1177 | } |
1178 | ||
1179 | fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> { | |
1180 | if s.is_empty() { | |
1181 | return Err(ParseError("Invalid string length")); | |
1182 | } | |
1183 | ||
1184 | // Handle special cases. | |
1185 | match s { | |
1186 | "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)), | |
1187 | "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)), | |
1188 | "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)), | |
1189 | "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)), | |
1190 | _ => {} | |
1191 | } | |
1192 | ||
1193 | // Handle a leading minus sign. | |
e74abb32 XL |
1194 | let minus = s.starts_with('-'); |
1195 | if minus || s.starts_with('+') { | |
3b2f2976 XL |
1196 | s = &s[1..]; |
1197 | if s.is_empty() { | |
1198 | return Err(ParseError("String has no digits")); | |
1199 | } | |
1200 | } | |
1201 | ||
1202 | // Adjust the rounding mode for the absolute value below. | |
1203 | if minus { | |
1204 | round = -round; | |
1205 | } | |
1206 | ||
1207 | let r = if s.starts_with("0x") || s.starts_with("0X") { | |
1208 | s = &s[2..]; | |
1209 | if s.is_empty() { | |
1210 | return Err(ParseError("Invalid string")); | |
1211 | } | |
1212 | Self::from_hexadecimal_string(s, round)? | |
1213 | } else { | |
1214 | Self::from_decimal_string(s, round)? | |
1215 | }; | |
1216 | ||
1217 | Ok(r.map(|r| if minus { -r } else { r })) | |
1218 | } | |
1219 | ||
1220 | fn to_bits(self) -> u128 { | |
1221 | // Dispatch to semantics. | |
1222 | S::to_bits(self) | |
1223 | } | |
1224 | ||
1225 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { | |
1226 | // The result of trying to convert a number too large. | |
1227 | let overflow = if self.sign { | |
1228 | // Negative numbers cannot be represented as unsigned. | |
1229 | 0 | |
1230 | } else { | |
1231 | // Largest unsigned integer of the given width. | |
1232 | !0 >> (128 - width) | |
1233 | }; | |
1234 | ||
1235 | *is_exact = false; | |
1236 | ||
1237 | match self.category { | |
1238 | Category::NaN => Status::INVALID_OP.and(0), | |
1239 | ||
1240 | Category::Infinity => Status::INVALID_OP.and(overflow), | |
1241 | ||
1242 | Category::Zero => { | |
1243 | // Negative zero can't be represented as an int. | |
1244 | *is_exact = !self.sign; | |
1245 | Status::OK.and(0) | |
1246 | } | |
1247 | ||
1248 | Category::Normal => { | |
1249 | let mut r = 0; | |
1250 | ||
1251 | // Step 1: place our absolute value, with any fraction truncated, in | |
1252 | // the destination. | |
1253 | let truncated_bits = if self.exp < 0 { | |
1254 | // Our absolute value is less than one; truncate everything. | |
1255 | // For exponent -1 the integer bit represents .5, look at that. | |
1256 | // For smaller exponents leftmost truncated bit is 0. | |
1257 | S::PRECISION - 1 + (-self.exp) as usize | |
1258 | } else { | |
1259 | // We want the most significant (exponent + 1) bits; the rest are | |
1260 | // truncated. | |
1261 | let bits = self.exp as usize + 1; | |
1262 | ||
1263 | // Hopelessly large in magnitude? | |
1264 | if bits > width { | |
1265 | return Status::INVALID_OP.and(overflow); | |
1266 | } | |
1267 | ||
1268 | if bits < S::PRECISION { | |
1269 | // We truncate (S::PRECISION - bits) bits. | |
1270 | r = self.sig[0] >> (S::PRECISION - bits); | |
1271 | S::PRECISION - bits | |
1272 | } else { | |
1273 | // We want at least as many bits as are available. | |
1274 | r = self.sig[0] << (bits - S::PRECISION); | |
1275 | 0 | |
1276 | } | |
1277 | }; | |
1278 | ||
1279 | // Step 2: work out any lost fraction, and increment the absolute | |
1280 | // value if we would round away from zero. | |
1281 | let mut loss = Loss::ExactlyZero; | |
1282 | if truncated_bits > 0 { | |
1283 | loss = Loss::through_truncation(&self.sig, truncated_bits); | |
dfeec247 XL |
1284 | if loss != Loss::ExactlyZero |
1285 | && self.round_away_from_zero(round, loss, truncated_bits) | |
3b2f2976 XL |
1286 | { |
1287 | r = r.wrapping_add(1); | |
1288 | if r == 0 { | |
1289 | return Status::INVALID_OP.and(overflow); // Overflow. | |
1290 | } | |
1291 | } | |
1292 | } | |
1293 | ||
1294 | // Step 3: check if we fit in the destination. | |
1295 | if r > overflow { | |
1296 | return Status::INVALID_OP.and(overflow); | |
1297 | } | |
1298 | ||
1299 | if loss == Loss::ExactlyZero { | |
1300 | *is_exact = true; | |
1301 | Status::OK.and(r) | |
1302 | } else { | |
1303 | Status::INEXACT.and(r) | |
1304 | } | |
1305 | } | |
1306 | } | |
1307 | } | |
1308 | ||
1309 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { | |
1310 | assert!(self.is_finite_non_zero()); | |
1311 | assert!(rhs.is_finite_non_zero()); | |
1312 | ||
1313 | // If exponents are equal, do an unsigned comparison of the significands. | |
dfeec247 | 1314 | self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig)) |
3b2f2976 XL |
1315 | } |
1316 | ||
1317 | fn bitwise_eq(self, rhs: Self) -> bool { | |
1318 | if self.category != rhs.category || self.sign != rhs.sign { | |
1319 | return false; | |
1320 | } | |
1321 | ||
1322 | if self.category == Category::Zero || self.category == Category::Infinity { | |
1323 | return true; | |
1324 | } | |
1325 | ||
1326 | if self.is_finite_non_zero() && self.exp != rhs.exp { | |
1327 | return false; | |
1328 | } | |
1329 | ||
1330 | self.sig == rhs.sig | |
1331 | } | |
1332 | ||
1333 | fn is_negative(self) -> bool { | |
1334 | self.sign | |
1335 | } | |
1336 | ||
1337 | fn is_denormal(self) -> bool { | |
dfeec247 XL |
1338 | self.is_finite_non_zero() |
1339 | && self.exp == S::MIN_EXP | |
1340 | && !sig::get_bit(&self.sig, S::PRECISION - 1) | |
3b2f2976 XL |
1341 | } |
1342 | ||
1343 | fn is_signaling(self) -> bool { | |
1344 | // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the | |
1345 | // first bit of the trailing significand being 0. | |
1346 | self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT) | |
1347 | } | |
1348 | ||
1349 | fn category(self) -> Category { | |
1350 | self.category | |
1351 | } | |
1352 | ||
1353 | fn get_exact_inverse(self) -> Option<Self> { | |
1354 | // Special floats and denormals have no exact inverse. | |
1355 | if !self.is_finite_non_zero() { | |
1356 | return None; | |
1357 | } | |
1358 | ||
1359 | // Check that the number is a power of two by making sure that only the | |
1360 | // integer bit is set in the significand. | |
1361 | if self.sig != [1 << (S::PRECISION - 1)] { | |
1362 | return None; | |
1363 | } | |
1364 | ||
1365 | // Get the inverse. | |
1366 | let mut reciprocal = Self::from_u128(1).value; | |
1367 | let status; | |
1368 | reciprocal = unpack!(status=, reciprocal / self); | |
1369 | if status != Status::OK { | |
1370 | return None; | |
1371 | } | |
1372 | ||
1373 | // Avoid multiplication with a denormal, it is not safe on all platforms and | |
1374 | // may be slower than a normal division. | |
1375 | if reciprocal.is_denormal() { | |
1376 | return None; | |
1377 | } | |
1378 | ||
1379 | assert!(reciprocal.is_finite_non_zero()); | |
1380 | assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]); | |
1381 | ||
1382 | Some(reciprocal) | |
1383 | } | |
1384 | ||
1385 | fn ilogb(mut self) -> ExpInt { | |
1386 | if self.is_nan() { | |
1387 | return IEK_NAN; | |
1388 | } | |
1389 | if self.is_zero() { | |
1390 | return IEK_ZERO; | |
1391 | } | |
1392 | if self.is_infinite() { | |
1393 | return IEK_INF; | |
1394 | } | |
1395 | if !self.is_denormal() { | |
1396 | return self.exp; | |
1397 | } | |
1398 | ||
1399 | let sig_bits = (S::PRECISION - 1) as ExpInt; | |
1400 | self.exp += sig_bits; | |
dfeec247 | 1401 | self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value; |
3b2f2976 XL |
1402 | self.exp - sig_bits |
1403 | } | |
1404 | ||
1405 | fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self { | |
1406 | // If exp is wildly out-of-scale, simply adding it to self.exp will | |
1407 | // overflow; clamp it to a safe range before adding, but ensure that the range | |
1408 | // is large enough that the clamp does not change the result. The range we | |
1409 | // need to support is the difference between the largest possible exponent and | |
1410 | // the normalized exponent of half the smallest denormal. | |
1411 | ||
1412 | let sig_bits = (S::PRECISION - 1) as i32; | |
1413 | let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1; | |
1414 | ||
1415 | // Clamp to one past the range ends to let normalize handle overflow. | |
2c00a5a8 | 1416 | let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change); |
3b2f2976 XL |
1417 | self.exp = self.exp.saturating_add(exp_change as ExpInt); |
1418 | self = self.normalize(round, Loss::ExactlyZero).value; | |
1419 | if self.is_nan() { | |
1420 | sig::set_bit(&mut self.sig, S::QNAN_BIT); | |
1421 | } | |
1422 | self | |
1423 | } | |
1424 | ||
1425 | fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self { | |
1426 | *exp = self.ilogb(); | |
1427 | ||
1428 | // Quiet signalling nans. | |
1429 | if *exp == IEK_NAN { | |
1430 | sig::set_bit(&mut self.sig, S::QNAN_BIT); | |
1431 | return self; | |
1432 | } | |
1433 | ||
1434 | if *exp == IEK_INF { | |
1435 | return self; | |
1436 | } | |
1437 | ||
1438 | // 1 is added because frexp is defined to return a normalized fraction in | |
1439 | // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). | |
1440 | if *exp == IEK_ZERO { | |
1441 | *exp = 0; | |
1442 | } else { | |
1443 | *exp += 1; | |
1444 | } | |
1445 | self.scalbn_r(-*exp, round) | |
1446 | } | |
1447 | } | |
1448 | ||
1449 | impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> { | |
1450 | fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> { | |
1451 | let mut r = IeeeFloat { | |
1452 | sig: self.sig, | |
1453 | exp: self.exp, | |
1454 | category: self.category, | |
1455 | sign: self.sign, | |
1456 | marker: PhantomData, | |
1457 | }; | |
1458 | ||
1459 | // x86 has some unusual NaNs which cannot be represented in any other | |
1460 | // format; note them here. | |
1461 | fn is_x87_double_extended<S: Semantics>() -> bool { | |
1462 | S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND | |
1463 | } | |
dfeec247 XL |
1464 | let x87_special_nan = is_x87_double_extended::<S>() |
1465 | && !is_x87_double_extended::<T>() | |
1466 | && r.category == Category::NaN | |
1467 | && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; | |
3b2f2976 XL |
1468 | |
1469 | // If this is a truncation of a denormal number, and the target semantics | |
1470 | // has larger exponent range than the source semantics (this can happen | |
1471 | // when truncating from PowerPC double-double to double format), the | |
1472 | // right shift could lose result mantissa bits. Adjust exponent instead | |
1473 | // of performing excessive shift. | |
1474 | let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt; | |
1475 | if shift < 0 && r.is_finite_non_zero() { | |
1476 | let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt; | |
1477 | if r.exp + exp_change < T::MIN_EXP { | |
1478 | exp_change = T::MIN_EXP - r.exp; | |
1479 | } | |
1480 | if exp_change < shift { | |
1481 | exp_change = shift; | |
1482 | } | |
1483 | if exp_change < 0 { | |
1484 | shift -= exp_change; | |
1485 | r.exp += exp_change; | |
1486 | } | |
1487 | } | |
1488 | ||
1489 | // If this is a truncation, perform the shift. | |
b7449926 XL |
1490 | let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { |
1491 | sig::shift_right(&mut r.sig, &mut 0, -shift as usize) | |
1492 | } else { | |
1493 | Loss::ExactlyZero | |
1494 | }; | |
3b2f2976 XL |
1495 | |
1496 | // If this is an extension, perform the shift. | |
1497 | if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { | |
1498 | sig::shift_left(&mut r.sig, &mut 0, shift as usize); | |
1499 | } | |
1500 | ||
1501 | let status; | |
1502 | if r.is_finite_non_zero() { | |
1503 | r = unpack!(status=, r.normalize(round, loss)); | |
1504 | *loses_info = status != Status::OK; | |
1505 | } else if r.category == Category::NaN { | |
1506 | *loses_info = loss != Loss::ExactlyZero || x87_special_nan; | |
1507 | ||
1508 | // For x87 extended precision, we want to make a NaN, not a special NaN if | |
1509 | // the input wasn't special either. | |
1510 | if !x87_special_nan && is_x87_double_extended::<T>() { | |
1511 | sig::set_bit(&mut r.sig, T::PRECISION - 1); | |
1512 | } | |
1513 | ||
29967ef6 XL |
1514 | // Convert of sNaN creates qNaN and raises an exception (invalid op). |
1515 | // This also guarantees that a sNaN does not become Inf on a truncation | |
1516 | // that loses all payload bits. | |
1517 | if self.is_signaling() { | |
1518 | // Quiet signaling NaN. | |
1519 | sig::set_bit(&mut r.sig, T::QNAN_BIT); | |
1520 | status = Status::INVALID_OP; | |
1521 | } else { | |
1522 | status = Status::OK; | |
1523 | } | |
3b2f2976 XL |
1524 | } else { |
1525 | *loses_info = false; | |
1526 | status = Status::OK; | |
1527 | } | |
1528 | ||
1529 | status.and(r) | |
1530 | } | |
1531 | } | |
1532 | ||
1533 | impl<S: Semantics> IeeeFloat<S> { | |
1534 | /// Handle positive overflow. We either return infinity or | |
1535 | /// the largest finite number. For negative overflow, | |
1536 | /// negate the `round` argument before calling. | |
1537 | fn overflow_result(round: Round) -> StatusAnd<Self> { | |
1538 | match round { | |
1539 | // Infinity? | |
1540 | Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => { | |
1541 | (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY) | |
1542 | } | |
1543 | // Otherwise we become the largest finite number. | |
1544 | Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()), | |
1545 | } | |
1546 | } | |
1547 | ||
9fa01778 | 1548 | /// Returns `true` if, when truncating the current number, with `bit` the |
3b2f2976 XL |
1549 | /// new LSB, with the given lost fraction and rounding mode, the result |
1550 | /// would need to be rounded away from zero (i.e., by increasing the | |
9fa01778 XL |
1551 | /// signficand). This routine must work for `Category::Zero` of both signs, and |
1552 | /// `Category::Normal` numbers. | |
3b2f2976 XL |
1553 | fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool { |
1554 | // NaNs and infinities should not have lost fractions. | |
1555 | assert!(self.is_finite_non_zero() || self.is_zero()); | |
1556 | ||
1557 | // Current callers never pass this so we don't handle it. | |
1558 | assert_ne!(loss, Loss::ExactlyZero); | |
1559 | ||
1560 | match round { | |
1561 | Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf, | |
1562 | Round::NearestTiesToEven => { | |
1563 | if loss == Loss::MoreThanHalf { | |
1564 | return true; | |
1565 | } | |
1566 | ||
1567 | // Our zeros don't have a significand to test. | |
1568 | if loss == Loss::ExactlyHalf && self.category != Category::Zero { | |
1569 | return sig::get_bit(&self.sig, bit); | |
1570 | } | |
1571 | ||
1572 | false | |
1573 | } | |
1574 | Round::TowardZero => false, | |
1575 | Round::TowardPositive => !self.sign, | |
1576 | Round::TowardNegative => self.sign, | |
1577 | } | |
1578 | } | |
1579 | ||
1580 | fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> { | |
1581 | if !self.is_finite_non_zero() { | |
1582 | return Status::OK.and(self); | |
1583 | } | |
1584 | ||
1585 | // Before rounding normalize the exponent of Category::Normal numbers. | |
1586 | let mut omsb = sig::omsb(&self.sig); | |
1587 | ||
1588 | if omsb > 0 { | |
1589 | // OMSB is numbered from 1. We want to place it in the integer | |
1590 | // bit numbered PRECISION if possible, with a compensating change in | |
1591 | // the exponent. | |
dfeec247 | 1592 | let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt); |
3b2f2976 XL |
1593 | |
1594 | // If the resulting exponent is too high, overflow according to | |
1595 | // the rounding mode. | |
1596 | if final_exp > S::MAX_EXP { | |
1597 | let round = if self.sign { -round } else { round }; | |
1598 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); | |
1599 | } | |
1600 | ||
1601 | // Subnormal numbers have exponent MIN_EXP, and their MSB | |
1602 | // is forced based on that. | |
1603 | if final_exp < S::MIN_EXP { | |
1604 | final_exp = S::MIN_EXP; | |
1605 | } | |
1606 | ||
1607 | // Shifting left is easy as we don't lose precision. | |
1608 | if final_exp < self.exp { | |
1609 | assert_eq!(loss, Loss::ExactlyZero); | |
1610 | ||
1611 | let exp_change = (self.exp - final_exp) as usize; | |
1612 | sig::shift_left(&mut self.sig, &mut self.exp, exp_change); | |
1613 | ||
1614 | return Status::OK.and(self); | |
1615 | } | |
1616 | ||
1617 | // Shift right and capture any new lost fraction. | |
1618 | if final_exp > self.exp { | |
1619 | let exp_change = (final_exp - self.exp) as usize; | |
1620 | loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss); | |
1621 | ||
1622 | // Keep OMSB up-to-date. | |
1623 | omsb = omsb.saturating_sub(exp_change); | |
1624 | } | |
1625 | } | |
1626 | ||
1627 | // Now round the number according to round given the lost | |
1628 | // fraction. | |
1629 | ||
1630 | // As specified in IEEE 754, since we do not trap we do not report | |
1631 | // underflow for exact results. | |
1632 | if loss == Loss::ExactlyZero { | |
1633 | // Canonicalize zeros. | |
1634 | if omsb == 0 { | |
1635 | self.category = Category::Zero; | |
1636 | } | |
1637 | ||
1638 | return Status::OK.and(self); | |
1639 | } | |
1640 | ||
1641 | // Increment the significand if we're rounding away from zero. | |
1642 | if self.round_away_from_zero(round, loss, 0) { | |
1643 | if omsb == 0 { | |
1644 | self.exp = S::MIN_EXP; | |
1645 | } | |
1646 | ||
1647 | // We should never overflow. | |
1648 | assert_eq!(sig::increment(&mut self.sig), 0); | |
1649 | omsb = sig::omsb(&self.sig); | |
1650 | ||
1651 | // Did the significand increment overflow? | |
1652 | if omsb == S::PRECISION + 1 { | |
1653 | // Renormalize by incrementing the exponent and shifting our | |
1654 | // significand right one. However if we already have the | |
1655 | // maximum exponent we overflow to infinity. | |
1656 | if self.exp == S::MAX_EXP { | |
1657 | self.category = Category::Infinity; | |
1658 | ||
1659 | return (Status::OVERFLOW | Status::INEXACT).and(self); | |
1660 | } | |
1661 | ||
1662 | let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1); | |
1663 | ||
1664 | return Status::INEXACT.and(self); | |
1665 | } | |
1666 | } | |
1667 | ||
1668 | // The normal case - we were and are not denormal, and any | |
1669 | // significand increment above didn't overflow. | |
1670 | if omsb == S::PRECISION { | |
1671 | return Status::INEXACT.and(self); | |
1672 | } | |
1673 | ||
1674 | // We have a non-zero denormal. | |
1675 | assert!(omsb < S::PRECISION); | |
1676 | ||
1677 | // Canonicalize zeros. | |
1678 | if omsb == 0 { | |
1679 | self.category = Category::Zero; | |
1680 | } | |
1681 | ||
1682 | // The Category::Zero case is a denormal that underflowed to zero. | |
1683 | (Status::UNDERFLOW | Status::INEXACT).and(self) | |
1684 | } | |
1685 | ||
1686 | fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { | |
1687 | let mut r = IeeeFloat { | |
1688 | sig: [0], | |
1689 | exp: 0, | |
1690 | category: Category::Normal, | |
1691 | sign: false, | |
1692 | marker: PhantomData, | |
1693 | }; | |
1694 | ||
1695 | let mut any_digits = false; | |
1696 | let mut has_exp = false; | |
1697 | let mut bit_pos = LIMB_BITS as isize; | |
1698 | let mut loss = None; | |
1699 | ||
1700 | // Without leading or trailing zeros, irrespective of the dot. | |
1701 | let mut first_sig_digit = None; | |
1702 | let mut dot = s.len(); | |
1703 | ||
1704 | for (p, c) in s.char_indices() { | |
1705 | // Skip leading zeros and any (hexa)decimal point. | |
1706 | if c == '.' { | |
1707 | if dot != s.len() { | |
1708 | return Err(ParseError("String contains multiple dots")); | |
1709 | } | |
1710 | dot = p; | |
1711 | } else if let Some(hex_value) = c.to_digit(16) { | |
1712 | any_digits = true; | |
1713 | ||
1714 | if first_sig_digit.is_none() { | |
1715 | if hex_value == 0 { | |
1716 | continue; | |
1717 | } | |
1718 | first_sig_digit = Some(p); | |
1719 | } | |
1720 | ||
1721 | // Store the number while we have space. | |
1722 | bit_pos -= 4; | |
1723 | if bit_pos >= 0 { | |
1724 | r.sig[0] |= (hex_value as Limb) << bit_pos; | |
b7449926 XL |
1725 | // If zero or one-half (the hexadecimal digit 8) are followed |
1726 | // by non-zero, they're a little more than zero or one-half. | |
1727 | } else if let Some(ref mut loss) = loss { | |
1728 | if hex_value != 0 { | |
1729 | if *loss == Loss::ExactlyZero { | |
1730 | *loss = Loss::LessThanHalf; | |
1731 | } | |
1732 | if *loss == Loss::ExactlyHalf { | |
1733 | *loss = Loss::MoreThanHalf; | |
3b2f2976 | 1734 | } |
3b2f2976 | 1735 | } |
b7449926 XL |
1736 | } else { |
1737 | loss = Some(match hex_value { | |
1738 | 0 => Loss::ExactlyZero, | |
1739 | 1..=7 => Loss::LessThanHalf, | |
1740 | 8 => Loss::ExactlyHalf, | |
1741 | 9..=15 => Loss::MoreThanHalf, | |
1742 | _ => unreachable!(), | |
1743 | }); | |
3b2f2976 XL |
1744 | } |
1745 | } else if c == 'p' || c == 'P' { | |
1746 | if !any_digits { | |
1747 | return Err(ParseError("Significand has no digits")); | |
1748 | } | |
1749 | ||
1750 | if dot == s.len() { | |
1751 | dot = p; | |
1752 | } | |
1753 | ||
1754 | let mut chars = s[p + 1..].chars().peekable(); | |
1755 | ||
1756 | // Adjust for the given exponent. | |
1757 | let exp_minus = chars.peek() == Some(&'-'); | |
1758 | if exp_minus || chars.peek() == Some(&'+') { | |
1759 | chars.next(); | |
1760 | } | |
1761 | ||
1762 | for c in chars { | |
1763 | if let Some(value) = c.to_digit(10) { | |
1764 | has_exp = true; | |
1765 | r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt); | |
1766 | } else { | |
1767 | return Err(ParseError("Invalid character in exponent")); | |
1768 | } | |
1769 | } | |
1770 | if !has_exp { | |
1771 | return Err(ParseError("Exponent has no digits")); | |
1772 | } | |
1773 | ||
1774 | if exp_minus { | |
1775 | r.exp = -r.exp; | |
1776 | } | |
1777 | ||
1778 | break; | |
1779 | } else { | |
1780 | return Err(ParseError("Invalid character in significand")); | |
1781 | } | |
1782 | } | |
1783 | if !any_digits { | |
1784 | return Err(ParseError("Significand has no digits")); | |
1785 | } | |
1786 | ||
1787 | // Hex floats require an exponent but not a hexadecimal point. | |
1788 | if !has_exp { | |
1789 | return Err(ParseError("Hex strings require an exponent")); | |
1790 | } | |
1791 | ||
1792 | // Ignore the exponent if we are zero. | |
1793 | let first_sig_digit = match first_sig_digit { | |
1794 | Some(p) => p, | |
1795 | None => return Ok(Status::OK.and(Self::ZERO)), | |
1796 | }; | |
1797 | ||
1798 | // Calculate the exponent adjustment implicit in the number of | |
1799 | // significant digits and adjust for writing the significand starting | |
1800 | // at the most significant nibble. | |
1801 | let exp_adjustment = if dot > first_sig_digit { | |
1802 | ExpInt::try_from(dot - first_sig_digit).unwrap() | |
1803 | } else { | |
1804 | -ExpInt::try_from(first_sig_digit - dot - 1).unwrap() | |
1805 | }; | |
1806 | let exp_adjustment = exp_adjustment | |
1807 | .saturating_mul(4) | |
1808 | .saturating_sub(1) | |
1809 | .saturating_add(S::PRECISION as ExpInt) | |
1810 | .saturating_sub(LIMB_BITS as ExpInt); | |
1811 | r.exp = r.exp.saturating_add(exp_adjustment); | |
1812 | ||
1813 | Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero))) | |
1814 | } | |
1815 | ||
1816 | fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { | |
1817 | // Given a normal decimal floating point number of the form | |
1818 | // | |
1819 | // dddd.dddd[eE][+-]ddd | |
1820 | // | |
1821 | // where the decimal point and exponent are optional, fill out the | |
1822 | // variables below. Exponent is appropriate if the significand is | |
1823 | // treated as an integer, and normalized_exp if the significand | |
1824 | // is taken to have the decimal point after a single leading | |
1825 | // non-zero digit. | |
1826 | // | |
1827 | // If the value is zero, first_sig_digit is None. | |
1828 | ||
1829 | let mut any_digits = false; | |
1830 | let mut dec_exp = 0i32; | |
1831 | ||
1832 | // Without leading or trailing zeros, irrespective of the dot. | |
1833 | let mut first_sig_digit = None; | |
1834 | let mut last_sig_digit = 0; | |
1835 | let mut dot = s.len(); | |
1836 | ||
1837 | for (p, c) in s.char_indices() { | |
1838 | if c == '.' { | |
1839 | if dot != s.len() { | |
1840 | return Err(ParseError("String contains multiple dots")); | |
1841 | } | |
1842 | dot = p; | |
1843 | } else if let Some(dec_value) = c.to_digit(10) { | |
1844 | any_digits = true; | |
1845 | ||
1846 | if dec_value != 0 { | |
1847 | if first_sig_digit.is_none() { | |
1848 | first_sig_digit = Some(p); | |
1849 | } | |
1850 | last_sig_digit = p; | |
1851 | } | |
1852 | } else if c == 'e' || c == 'E' { | |
1853 | if !any_digits { | |
1854 | return Err(ParseError("Significand has no digits")); | |
1855 | } | |
1856 | ||
1857 | if dot == s.len() { | |
1858 | dot = p; | |
1859 | } | |
1860 | ||
1861 | let mut chars = s[p + 1..].chars().peekable(); | |
1862 | ||
1863 | // Adjust for the given exponent. | |
1864 | let exp_minus = chars.peek() == Some(&'-'); | |
1865 | if exp_minus || chars.peek() == Some(&'+') { | |
1866 | chars.next(); | |
1867 | } | |
1868 | ||
1869 | any_digits = false; | |
1870 | for c in chars { | |
1871 | if let Some(value) = c.to_digit(10) { | |
1872 | any_digits = true; | |
1873 | dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32); | |
1874 | } else { | |
1875 | return Err(ParseError("Invalid character in exponent")); | |
1876 | } | |
1877 | } | |
1878 | if !any_digits { | |
1879 | return Err(ParseError("Exponent has no digits")); | |
1880 | } | |
1881 | ||
1882 | if exp_minus { | |
1883 | dec_exp = -dec_exp; | |
1884 | } | |
1885 | ||
1886 | break; | |
1887 | } else { | |
1888 | return Err(ParseError("Invalid character in significand")); | |
1889 | } | |
1890 | } | |
1891 | if !any_digits { | |
1892 | return Err(ParseError("Significand has no digits")); | |
1893 | } | |
1894 | ||
1895 | // Test if we have a zero number allowing for non-zero exponents. | |
1896 | let first_sig_digit = match first_sig_digit { | |
1897 | Some(p) => p, | |
1898 | None => return Ok(Status::OK.and(Self::ZERO)), | |
1899 | }; | |
1900 | ||
1901 | // Adjust the exponents for any decimal point. | |
1902 | if dot > last_sig_digit { | |
1903 | dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32); | |
1904 | } else { | |
1905 | dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32); | |
1906 | } | |
dfeec247 XL |
1907 | let significand_digits = last_sig_digit - first_sig_digit + 1 |
1908 | - (dot > first_sig_digit && dot < last_sig_digit) as usize; | |
3b2f2976 XL |
1909 | let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1); |
1910 | ||
1911 | // Handle the cases where exponents are obviously too large or too | |
1912 | // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp | |
1913 | // definitely overflows if | |
1914 | // | |
1915 | // (dec_exp - 1) * L >= MAX_EXP | |
1916 | // | |
1917 | // and definitely underflows to zero where | |
1918 | // | |
1919 | // (dec_exp + 1) * L <= MIN_EXP - PRECISION | |
1920 | // | |
1921 | // With integer arithmetic the tightest bounds for L are | |
1922 | // | |
1923 | // 93/28 < L < 196/59 [ numerator <= 256 ] | |
1924 | // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] | |
1925 | ||
1926 | // Check for MAX_EXP. | |
1927 | if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 { | |
1928 | // Overflow and round. | |
1929 | return Ok(Self::overflow_result(round)); | |
1930 | } | |
1931 | ||
1932 | // Check for MIN_EXP. | |
dfeec247 XL |
1933 | if normalized_exp.saturating_add(1).saturating_mul(28738) |
1934 | <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) | |
3b2f2976 XL |
1935 | { |
1936 | // Underflow to zero and round. | |
dfeec247 XL |
1937 | let r = |
1938 | if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO }; | |
3b2f2976 XL |
1939 | return Ok((Status::UNDERFLOW | Status::INEXACT).and(r)); |
1940 | } | |
1941 | ||
1942 | // A tight upper bound on number of bits required to hold an | |
1943 | // N-digit decimal integer is N * 196 / 59. Allocate enough space | |
1944 | // to hold the full significand, and an extra limb required by | |
1945 | // tcMultiplyPart. | |
1946 | let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59); | |
a1dfa0c6 | 1947 | let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs); |
3b2f2976 XL |
1948 | |
1949 | // Convert to binary efficiently - we do almost all multiplication | |
1950 | // in a Limb. When this would overflow do we do a single | |
1951 | // bignum multiplication, and then revert again to multiplication | |
1952 | // in a Limb. | |
0731742a | 1953 | let mut chars = s[first_sig_digit..=last_sig_digit].chars(); |
3b2f2976 XL |
1954 | loop { |
1955 | let mut val = 0; | |
1956 | let mut multiplier = 1; | |
1957 | ||
1958 | loop { | |
1959 | let dec_value = match chars.next() { | |
1960 | Some('.') => continue, | |
1961 | Some(c) => c.to_digit(10).unwrap(), | |
1962 | None => break, | |
1963 | }; | |
1964 | ||
1965 | multiplier *= 10; | |
1966 | val = val * 10 + dec_value as Limb; | |
1967 | ||
1968 | // The maximum number that can be multiplied by ten with any | |
1969 | // digit added without overflowing a Limb. | |
1970 | if multiplier > (!0 - 9) / 10 { | |
1971 | break; | |
1972 | } | |
1973 | } | |
1974 | ||
1975 | // If we've consumed no digits, we're done. | |
1976 | if multiplier == 1 { | |
1977 | break; | |
1978 | } | |
1979 | ||
1980 | // Multiply out the current limb. | |
1981 | let mut carry = val; | |
1982 | for x in &mut dec_sig { | |
1983 | let [low, mut high] = sig::widening_mul(*x, multiplier); | |
1984 | ||
1985 | // Now add carry. | |
1986 | let (low, overflow) = low.overflowing_add(carry); | |
1987 | high += overflow as Limb; | |
1988 | ||
1989 | *x = low; | |
1990 | carry = high; | |
1991 | } | |
1992 | ||
1993 | // If we had carry, we need another limb (likely but not guaranteed). | |
1994 | if carry > 0 { | |
1995 | dec_sig.push(carry); | |
1996 | } | |
1997 | } | |
1998 | ||
1999 | // Calculate pow(5, abs(dec_exp)) into `pow5_full`. | |
2000 | // The *_calc Vec's are reused scratch space, as an optimization. | |
2001 | let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = { | |
2002 | let mut power = dec_exp.abs() as usize; | |
2003 | ||
2004 | const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125]; | |
2005 | ||
a1dfa0c6 XL |
2006 | let mut p5_scratch = smallvec![]; |
2007 | let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]]; | |
3b2f2976 | 2008 | |
a1dfa0c6 XL |
2009 | let mut r_scratch = smallvec![]; |
2010 | let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]]; | |
3b2f2976 XL |
2011 | power >>= 3; |
2012 | ||
2013 | while power > 0 { | |
2014 | // Calculate pow(5,pow(2,n+3)). | |
2015 | p5_scratch.resize(p5.len() * 2, 0); | |
2016 | let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); | |
2017 | while p5_scratch.last() == Some(&0) { | |
2018 | p5_scratch.pop(); | |
2019 | } | |
2020 | mem::swap(&mut p5, &mut p5_scratch); | |
2021 | ||
2022 | if power & 1 != 0 { | |
2023 | r_scratch.resize(r.len() + p5.len(), 0); | |
dfeec247 XL |
2024 | let _: Loss = |
2025 | sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS); | |
3b2f2976 XL |
2026 | while r_scratch.last() == Some(&0) { |
2027 | r_scratch.pop(); | |
2028 | } | |
2029 | mem::swap(&mut r, &mut r_scratch); | |
2030 | } | |
2031 | ||
2032 | power >>= 1; | |
2033 | } | |
2034 | ||
2035 | (r, r_scratch, p5, p5_scratch) | |
2036 | }; | |
2037 | ||
2038 | // Attempt dec_sig * 10^dec_exp with increasing precision. | |
2039 | let mut attempt = 0; | |
2040 | loop { | |
2041 | let calc_precision = (LIMB_BITS << attempt) - 1; | |
2042 | attempt += 1; | |
2043 | ||
a1dfa0c6 | 2044 | let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>, |
3b2f2976 XL |
2045 | limbs: &[Limb]| |
2046 | -> StatusAnd<ExpInt> { | |
2047 | sig.resize(limbs_for_bits(calc_precision), 0); | |
2048 | let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision); | |
2049 | ||
2050 | // Before rounding normalize the exponent of Category::Normal numbers. | |
2051 | let mut omsb = sig::omsb(sig); | |
2052 | ||
2053 | assert_ne!(omsb, 0); | |
2054 | ||
2055 | // OMSB is numbered from 1. We want to place it in the integer | |
2056 | // bit numbered PRECISION if possible, with a compensating change in | |
2057 | // the exponent. | |
2058 | let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt); | |
2059 | ||
2060 | // Shifting left is easy as we don't lose precision. | |
2061 | if final_exp < exp { | |
2062 | assert_eq!(loss, Loss::ExactlyZero); | |
2063 | ||
2064 | let exp_change = (exp - final_exp) as usize; | |
2065 | sig::shift_left(sig, &mut exp, exp_change); | |
2066 | ||
2067 | return Status::OK.and(exp); | |
2068 | } | |
2069 | ||
2070 | // Shift right and capture any new lost fraction. | |
2071 | if final_exp > exp { | |
2072 | let exp_change = (final_exp - exp) as usize; | |
2073 | loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss); | |
2074 | ||
2075 | // Keep OMSB up-to-date. | |
2076 | omsb = omsb.saturating_sub(exp_change); | |
2077 | } | |
2078 | ||
2079 | assert_eq!(omsb, calc_precision); | |
2080 | ||
2081 | // Now round the number according to round given the lost | |
2082 | // fraction. | |
2083 | ||
2084 | // As specified in IEEE 754, since we do not trap we do not report | |
2085 | // underflow for exact results. | |
2086 | if loss == Loss::ExactlyZero { | |
2087 | return Status::OK.and(exp); | |
2088 | } | |
2089 | ||
2090 | // Increment the significand if we're rounding away from zero. | |
2091 | if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) { | |
2092 | // We should never overflow. | |
2093 | assert_eq!(sig::increment(sig), 0); | |
2094 | omsb = sig::omsb(sig); | |
2095 | ||
2096 | // Did the significand increment overflow? | |
2097 | if omsb == calc_precision + 1 { | |
2098 | let _: Loss = sig::shift_right(sig, &mut exp, 1); | |
2099 | ||
2100 | return Status::INEXACT.and(exp); | |
2101 | } | |
2102 | } | |
2103 | ||
2104 | // The normal case - we were and are not denormal, and any | |
2105 | // significand increment above didn't overflow. | |
2106 | Status::INEXACT.and(exp) | |
2107 | }; | |
2108 | ||
2109 | let status; | |
2110 | let mut exp = unpack!(status=, | |
2111 | calc_normal_from_limbs(&mut sig_calc, &dec_sig)); | |
2112 | let pow5_status; | |
2113 | let pow5_exp = unpack!(pow5_status=, | |
2114 | calc_normal_from_limbs(&mut pow5_calc, &pow5_full)); | |
2115 | ||
2116 | // Add dec_exp, as 10^n = 5^n * 2^n. | |
2117 | exp += dec_exp as ExpInt; | |
2118 | ||
2119 | let mut used_bits = S::PRECISION; | |
2120 | let mut truncated_bits = calc_precision - used_bits; | |
2121 | ||
2122 | let half_ulp_err1 = (status != Status::OK) as Limb; | |
2123 | let (calc_loss, half_ulp_err2); | |
2124 | if dec_exp >= 0 { | |
2125 | exp += pow5_exp; | |
2126 | ||
2127 | sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0); | |
2128 | calc_loss = sig::mul( | |
2129 | &mut sig_scratch_calc, | |
2130 | &mut exp, | |
2131 | &sig_calc, | |
2132 | &pow5_calc, | |
2133 | calc_precision, | |
2134 | ); | |
2135 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); | |
2136 | ||
2137 | half_ulp_err2 = (pow5_status != Status::OK) as Limb; | |
2138 | } else { | |
2139 | exp -= pow5_exp; | |
2140 | ||
2141 | sig_scratch_calc.resize(sig_calc.len(), 0); | |
2142 | calc_loss = sig::div( | |
2143 | &mut sig_scratch_calc, | |
2144 | &mut exp, | |
2145 | &mut sig_calc, | |
2146 | &mut pow5_calc, | |
2147 | calc_precision, | |
2148 | ); | |
2149 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); | |
2150 | ||
2151 | // Denormal numbers have less precision. | |
2152 | if exp < S::MIN_EXP { | |
2153 | truncated_bits += (S::MIN_EXP - exp) as usize; | |
2154 | used_bits = calc_precision.saturating_sub(truncated_bits); | |
2155 | } | |
2156 | // Extra half-ulp lost in reciprocal of exponent. | |
dfeec247 XL |
2157 | half_ulp_err2 = |
2158 | 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb; | |
3b2f2976 XL |
2159 | } |
2160 | ||
2161 | // Both sig::mul and sig::div return the | |
2162 | // result with the integer bit set. | |
2163 | assert!(sig::get_bit(&sig_calc, calc_precision - 1)); | |
2164 | ||
2165 | // The error from the true value, in half-ulps, on multiplying two | |
2166 | // floating point numbers, which differ from the value they | |
2167 | // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less | |
2168 | // than the returned value. | |
2169 | // | |
2170 | // See "How to Read Floating Point Numbers Accurately" by William D Clinger. | |
dfeec247 | 2171 | assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8)); |
3b2f2976 XL |
2172 | |
2173 | let inexact = (calc_loss != Loss::ExactlyZero) as Limb; | |
2174 | let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 { | |
2175 | inexact * 2 // <= inexact half-ulps. | |
2176 | } else { | |
2177 | inexact + 2 * (half_ulp_err1 + half_ulp_err2) | |
2178 | }; | |
2179 | ||
2180 | let ulps_from_boundary = { | |
2181 | let bits = calc_precision - used_bits - 1; | |
2182 | ||
2183 | let i = bits / LIMB_BITS; | |
2184 | let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS)); | |
2185 | let boundary = match round { | |
2186 | Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS), | |
2187 | _ => 0, | |
2188 | }; | |
2189 | if i == 0 { | |
2190 | let delta = limb.wrapping_sub(boundary); | |
2191 | cmp::min(delta, delta.wrapping_neg()) | |
2192 | } else if limb == boundary { | |
2193 | if !sig::is_all_zeros(&sig_calc[1..i]) { | |
2194 | !0 // A lot. | |
2195 | } else { | |
2196 | sig_calc[0] | |
2197 | } | |
2198 | } else if limb == boundary.wrapping_sub(1) { | |
2199 | if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) { | |
2200 | !0 // A lot. | |
2201 | } else { | |
2202 | sig_calc[0].wrapping_neg() | |
2203 | } | |
2204 | } else { | |
2205 | !0 // A lot. | |
2206 | } | |
2207 | }; | |
2208 | ||
2209 | // Are we guaranteed to round correctly if we truncate? | |
2210 | if ulps_from_boundary.saturating_mul(2) >= half_ulp_err { | |
2211 | let mut r = IeeeFloat { | |
2212 | sig: [0], | |
2213 | exp, | |
2214 | category: Category::Normal, | |
2215 | sign: false, | |
2216 | marker: PhantomData, | |
2217 | }; | |
2218 | sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits); | |
2219 | // If we extracted less bits above we must adjust our exponent | |
2220 | // to compensate for the implicit right shift. | |
2221 | r.exp += (S::PRECISION - used_bits) as ExpInt; | |
2222 | let loss = Loss::through_truncation(&sig_calc, truncated_bits); | |
2223 | return Ok(r.normalize(round, loss)); | |
2224 | } | |
2225 | } | |
2226 | } | |
2227 | } | |
2228 | ||
2229 | impl Loss { | |
2230 | /// Combine the effect of two lost fractions. | |
2231 | fn combine(self, less_significant: Loss) -> Loss { | |
2232 | let mut more_significant = self; | |
2233 | if less_significant != Loss::ExactlyZero { | |
2234 | if more_significant == Loss::ExactlyZero { | |
2235 | more_significant = Loss::LessThanHalf; | |
2236 | } else if more_significant == Loss::ExactlyHalf { | |
2237 | more_significant = Loss::MoreThanHalf; | |
2238 | } | |
2239 | } | |
2240 | ||
2241 | more_significant | |
2242 | } | |
2243 | ||
9fa01778 | 2244 | /// Returns the fraction lost were a bignum truncated losing the least |
3b2f2976 XL |
2245 | /// significant `bits` bits. |
2246 | fn through_truncation(limbs: &[Limb], bits: usize) -> Loss { | |
2247 | if bits == 0 { | |
2248 | return Loss::ExactlyZero; | |
2249 | } | |
2250 | ||
2251 | let half_bit = bits - 1; | |
2252 | let half_limb = half_bit / LIMB_BITS; | |
2253 | let (half_limb, rest) = if half_limb < limbs.len() { | |
2254 | (limbs[half_limb], &limbs[..half_limb]) | |
2255 | } else { | |
2256 | (0, limbs) | |
2257 | }; | |
2258 | let half = 1 << (half_bit % LIMB_BITS); | |
2259 | let has_half = half_limb & half != 0; | |
2260 | let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest); | |
2261 | ||
2262 | match (has_half, has_rest) { | |
2263 | (false, false) => Loss::ExactlyZero, | |
2264 | (false, true) => Loss::LessThanHalf, | |
2265 | (true, false) => Loss::ExactlyHalf, | |
2266 | (true, true) => Loss::MoreThanHalf, | |
2267 | } | |
2268 | } | |
2269 | } | |
2270 | ||
2271 | /// Implementation details of IeeeFloat significands, such as big integer arithmetic. | |
2272 | /// As a rule of thumb, no functions in this module should dynamically allocate. | |
2273 | mod sig { | |
dfeec247 | 2274 | use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}; |
e1599b0c | 2275 | use core::cmp::Ordering; |
cdc7bbd5 | 2276 | use core::iter; |
e1599b0c | 2277 | use core::mem; |
3b2f2976 XL |
2278 | |
2279 | pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool { | |
2280 | limbs.iter().all(|&l| l == 0) | |
2281 | } | |
2282 | ||
2283 | /// One, not zero, based LSB. That is, returns 0 for a zeroed significand. | |
2284 | pub(super) fn olsb(limbs: &[Limb]) -> usize { | |
dfeec247 XL |
2285 | limbs |
2286 | .iter() | |
2287 | .enumerate() | |
2288 | .find(|(_, &limb)| limb != 0) | |
2289 | .map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1) | |
3b2f2976 XL |
2290 | } |
2291 | ||
2292 | /// One, not zero, based MSB. That is, returns 0 for a zeroed significand. | |
2293 | pub(super) fn omsb(limbs: &[Limb]) -> usize { | |
dfeec247 XL |
2294 | limbs |
2295 | .iter() | |
2296 | .enumerate() | |
2297 | .rfind(|(_, &limb)| limb != 0) | |
2298 | .map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize) | |
3b2f2976 XL |
2299 | } |
2300 | ||
2301 | /// Comparison (unsigned) of two significands. | |
2302 | pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering { | |
2303 | assert_eq!(a.len(), b.len()); | |
2304 | for (a, b) in a.iter().zip(b).rev() { | |
2305 | match a.cmp(b) { | |
2306 | Ordering::Equal => {} | |
2307 | o => return o, | |
2308 | } | |
2309 | } | |
2310 | ||
2311 | Ordering::Equal | |
2312 | } | |
2313 | ||
9fa01778 | 2314 | /// Extracts the given bit. |
3b2f2976 XL |
2315 | pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool { |
2316 | limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0 | |
2317 | } | |
2318 | ||
9fa01778 | 2319 | /// Sets the given bit. |
3b2f2976 XL |
2320 | pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) { |
2321 | limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS); | |
2322 | } | |
2323 | ||
2324 | /// Clear the given bit. | |
2325 | pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) { | |
2326 | limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS)); | |
2327 | } | |
2328 | ||
9fa01778 | 2329 | /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent. |
3b2f2976 XL |
2330 | pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) { |
2331 | if bits > 0 { | |
2332 | // Our exponent should not underflow. | |
2333 | *exp = exp.checked_sub(bits as ExpInt).unwrap(); | |
2334 | ||
0731742a | 2335 | // Jump is the inter-limb jump; shift is the intra-limb shift. |
3b2f2976 XL |
2336 | let jump = bits / LIMB_BITS; |
2337 | let shift = bits % LIMB_BITS; | |
2338 | ||
2339 | for i in (0..dst.len()).rev() { | |
2340 | let mut limb; | |
2341 | ||
2342 | if i < jump { | |
2343 | limb = 0; | |
2344 | } else { | |
2345 | // dst[i] comes from the two limbs src[i - jump] and, if we have | |
2346 | // an intra-limb shift, src[i - jump - 1]. | |
2347 | limb = dst[i - jump]; | |
2348 | if shift > 0 { | |
2349 | limb <<= shift; | |
b7449926 | 2350 | if i > jump { |
3b2f2976 XL |
2351 | limb |= dst[i - jump - 1] >> (LIMB_BITS - shift); |
2352 | } | |
2353 | } | |
2354 | } | |
2355 | ||
2356 | dst[i] = limb; | |
2357 | } | |
2358 | } | |
2359 | } | |
2360 | ||
9fa01778 | 2361 | /// Shifts `dst` right `bits` bits noting lost fraction. |
3b2f2976 XL |
2362 | pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss { |
2363 | let loss = Loss::through_truncation(dst, bits); | |
2364 | ||
2365 | if bits > 0 { | |
2366 | // Our exponent should not overflow. | |
2367 | *exp = exp.checked_add(bits as ExpInt).unwrap(); | |
2368 | ||
0731742a | 2369 | // Jump is the inter-limb jump; shift is the intra-limb shift. |
3b2f2976 XL |
2370 | let jump = bits / LIMB_BITS; |
2371 | let shift = bits % LIMB_BITS; | |
2372 | ||
2373 | // Perform the shift. This leaves the most significant `bits` bits | |
2374 | // of the result at zero. | |
2375 | for i in 0..dst.len() { | |
2376 | let mut limb; | |
2377 | ||
2378 | if i + jump >= dst.len() { | |
2379 | limb = 0; | |
2380 | } else { | |
2381 | limb = dst[i + jump]; | |
2382 | if shift > 0 { | |
2383 | limb >>= shift; | |
2384 | if i + jump + 1 < dst.len() { | |
2385 | limb |= dst[i + jump + 1] << (LIMB_BITS - shift); | |
2386 | } | |
2387 | } | |
2388 | } | |
2389 | ||
2390 | dst[i] = limb; | |
2391 | } | |
2392 | } | |
2393 | ||
2394 | loss | |
2395 | } | |
2396 | ||
9fa01778 | 2397 | /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB, |
3b2f2976 XL |
2398 | /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`. |
2399 | /// All high bits above `src_bits` in `dst` are zero-filled. | |
2400 | pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) { | |
2401 | if src_bits == 0 { | |
2402 | return; | |
2403 | } | |
2404 | ||
2405 | let dst_limbs = limbs_for_bits(src_bits); | |
2406 | assert!(dst_limbs <= dst.len()); | |
2407 | ||
2408 | let src = &src[src_lsb / LIMB_BITS..]; | |
2409 | dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]); | |
2410 | ||
2411 | let shift = src_lsb % LIMB_BITS; | |
2412 | let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift); | |
2413 | ||
2414 | // We now have (dst_limbs * LIMB_BITS - shift) bits from `src` | |
2415 | // in `dst`. If this is less that src_bits, append the rest, else | |
2416 | // clear the high bits. | |
2417 | let n = dst_limbs * LIMB_BITS - shift; | |
2418 | if n < src_bits { | |
2419 | let mask = (1 << (src_bits - n)) - 1; | |
b7449926 | 2420 | dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS); |
3b2f2976 XL |
2421 | } else if n > src_bits && src_bits % LIMB_BITS > 0 { |
2422 | dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1; | |
2423 | } | |
2424 | ||
2425 | // Clear high limbs. | |
2426 | for x in &mut dst[dst_limbs..] { | |
2427 | *x = 0; | |
2428 | } | |
2429 | } | |
2430 | ||
2431 | /// We want the most significant PRECISION bits of `src`. There may not | |
2432 | /// be that many; extract what we can. | |
2433 | pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) { | |
2434 | let omsb = omsb(src); | |
2435 | ||
2436 | if precision <= omsb { | |
2437 | extract(dst, src, precision, omsb - precision); | |
dfeec247 | 2438 | (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1) |
3b2f2976 XL |
2439 | } else { |
2440 | extract(dst, src, omsb, 0); | |
2441 | (Loss::ExactlyZero, precision as ExpInt - 1) | |
2442 | } | |
2443 | } | |
2444 | ||
2445 | /// For every consecutive chunk of `bits` bits from `limbs`, | |
2446 | /// going from most significant to the least significant bits, | |
2447 | /// call `f` to transform those bits and store the result back. | |
2448 | pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) { | |
2449 | assert_eq!(LIMB_BITS % bits, 0); | |
2450 | for limb in limbs.iter_mut().rev() { | |
2451 | let mut r = 0; | |
2452 | for i in (0..LIMB_BITS / bits).rev() { | |
2453 | r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits); | |
2454 | } | |
2455 | *limb = r; | |
2456 | } | |
2457 | } | |
2458 | ||
2459 | /// Increment in-place, return the carry flag. | |
2460 | pub(super) fn increment(dst: &mut [Limb]) -> Limb { | |
2461 | for x in dst { | |
2462 | *x = x.wrapping_add(1); | |
2463 | if *x != 0 { | |
2464 | return 0; | |
2465 | } | |
2466 | } | |
2467 | ||
2468 | 1 | |
2469 | } | |
2470 | ||
2471 | /// Decrement in-place, return the borrow flag. | |
2472 | pub(super) fn decrement(dst: &mut [Limb]) -> Limb { | |
2473 | for x in dst { | |
2474 | *x = x.wrapping_sub(1); | |
2475 | if *x != !0 { | |
2476 | return 0; | |
2477 | } | |
2478 | } | |
2479 | ||
2480 | 1 | |
2481 | } | |
2482 | ||
2483 | /// `a += b + c` where `c` is zero or one. Returns the carry flag. | |
2484 | pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { | |
2485 | assert!(c <= 1); | |
2486 | ||
cdc7bbd5 | 2487 | for (a, &b) in iter::zip(a, b) { |
3b2f2976 XL |
2488 | let (r, overflow) = a.overflowing_add(b); |
2489 | let (r, overflow2) = r.overflowing_add(c); | |
2490 | *a = r; | |
2491 | c = (overflow | overflow2) as Limb; | |
2492 | } | |
2493 | ||
2494 | c | |
2495 | } | |
2496 | ||
2497 | /// `a -= b + c` where `c` is zero or one. Returns the borrow flag. | |
2498 | pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { | |
2499 | assert!(c <= 1); | |
2500 | ||
cdc7bbd5 | 2501 | for (a, &b) in iter::zip(a, b) { |
3b2f2976 XL |
2502 | let (r, overflow) = a.overflowing_sub(b); |
2503 | let (r, overflow2) = r.overflowing_sub(c); | |
2504 | *a = r; | |
2505 | c = (overflow | overflow2) as Limb; | |
2506 | } | |
2507 | ||
2508 | c | |
2509 | } | |
2510 | ||
2511 | /// `a += b` or `a -= b`. Does not preserve `b`. | |
2512 | pub(super) fn add_or_sub( | |
2513 | a_sig: &mut [Limb], | |
2514 | a_exp: &mut ExpInt, | |
2515 | a_sign: &mut bool, | |
2516 | b_sig: &mut [Limb], | |
2517 | b_exp: ExpInt, | |
2518 | b_sign: bool, | |
2519 | ) -> Loss { | |
2520 | // Are we bigger exponent-wise than the RHS? | |
2521 | let bits = *a_exp - b_exp; | |
2522 | ||
2523 | // Determine if the operation on the absolute values is effectively | |
2524 | // an addition or subtraction. | |
2525 | // Subtraction is more subtle than one might naively expect. | |
2526 | if *a_sign ^ b_sign { | |
2527 | let (reverse, loss); | |
2528 | ||
2529 | if bits == 0 { | |
2530 | reverse = cmp(a_sig, b_sig) == Ordering::Less; | |
2531 | loss = Loss::ExactlyZero; | |
2532 | } else if bits > 0 { | |
2533 | loss = shift_right(b_sig, &mut 0, (bits - 1) as usize); | |
2534 | shift_left(a_sig, a_exp, 1); | |
2535 | reverse = false; | |
2536 | } else { | |
2537 | loss = shift_right(a_sig, a_exp, (-bits - 1) as usize); | |
2538 | shift_left(b_sig, &mut 0, 1); | |
2539 | reverse = true; | |
2540 | } | |
2541 | ||
2542 | let borrow = (loss != Loss::ExactlyZero) as Limb; | |
2543 | if reverse { | |
2544 | // The code above is intended to ensure that no borrow is necessary. | |
2545 | assert_eq!(sub(b_sig, a_sig, borrow), 0); | |
2546 | a_sig.copy_from_slice(b_sig); | |
2547 | *a_sign = !*a_sign; | |
2548 | } else { | |
2549 | // The code above is intended to ensure that no borrow is necessary. | |
2550 | assert_eq!(sub(a_sig, b_sig, borrow), 0); | |
2551 | } | |
2552 | ||
2553 | // Invert the lost fraction - it was on the RHS and subtracted. | |
2554 | match loss { | |
2555 | Loss::LessThanHalf => Loss::MoreThanHalf, | |
2556 | Loss::MoreThanHalf => Loss::LessThanHalf, | |
2557 | _ => loss, | |
2558 | } | |
2559 | } else { | |
2560 | let loss = if bits > 0 { | |
2561 | shift_right(b_sig, &mut 0, bits as usize) | |
2562 | } else { | |
2563 | shift_right(a_sig, a_exp, -bits as usize) | |
2564 | }; | |
2565 | // We have a guard bit; generating a carry cannot happen. | |
2566 | assert_eq!(add(a_sig, b_sig, 0), 0); | |
2567 | loss | |
2568 | } | |
2569 | } | |
2570 | ||
2571 | /// `[low, high] = a * b`. | |
2572 | /// | |
2573 | /// This cannot overflow, because | |
2574 | /// | |
2575 | /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)` | |
2576 | /// | |
ff7c6d11 | 2577 | /// which is less than n<sup>2</sup>. |
3b2f2976 XL |
2578 | pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] { |
2579 | let mut wide = [0, 0]; | |
2580 | ||
2581 | if a == 0 || b == 0 { | |
2582 | return wide; | |
2583 | } | |
2584 | ||
2585 | const HALF_BITS: usize = LIMB_BITS / 2; | |
2586 | ||
2587 | let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1); | |
2588 | for i in 0..2 { | |
2589 | for j in 0..2 { | |
2590 | let mut x = [select(a, i) * select(b, j), 0]; | |
2591 | shift_left(&mut x, &mut 0, (i + j) * HALF_BITS); | |
2592 | assert_eq!(add(&mut wide, &x, 0), 0); | |
2593 | } | |
2594 | } | |
2595 | ||
2596 | wide | |
2597 | } | |
2598 | ||
2599 | /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction. | |
2600 | pub(super) fn mul<'a>( | |
2601 | dst: &mut [Limb], | |
2602 | exp: &mut ExpInt, | |
2603 | mut a: &'a [Limb], | |
2604 | mut b: &'a [Limb], | |
2605 | precision: usize, | |
2606 | ) -> Loss { | |
2607 | // Put the narrower number on the `a` for less loops below. | |
2608 | if a.len() > b.len() { | |
2609 | mem::swap(&mut a, &mut b); | |
2610 | } | |
2611 | ||
2612 | for x in &mut dst[..b.len()] { | |
2613 | *x = 0; | |
2614 | } | |
2615 | ||
2616 | for i in 0..a.len() { | |
2617 | let mut carry = 0; | |
2618 | for j in 0..b.len() { | |
2619 | let [low, mut high] = widening_mul(a[i], b[j]); | |
2620 | ||
2621 | // Now add carry. | |
2622 | let (low, overflow) = low.overflowing_add(carry); | |
2623 | high += overflow as Limb; | |
2624 | ||
2625 | // And now `dst[i + j]`, and store the new low part there. | |
2626 | let (low, overflow) = low.overflowing_add(dst[i + j]); | |
2627 | high += overflow as Limb; | |
2628 | ||
2629 | dst[i + j] = low; | |
2630 | carry = high; | |
2631 | } | |
2632 | dst[i + b.len()] = carry; | |
2633 | } | |
2634 | ||
2635 | // Assume the operands involved in the multiplication are single-precision | |
2636 | // FP, and the two multiplicants are: | |
2637 | // a = a23 . a22 ... a0 * 2^e1 | |
2638 | // b = b23 . b22 ... b0 * 2^e2 | |
2639 | // the result of multiplication is: | |
2640 | // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) | |
2641 | // Note that there are three significant bits at the left-hand side of the | |
2642 | // radix point: two for the multiplication, and an overflow bit for the | |
2643 | // addition (that will always be zero at this point). Move the radix point | |
2644 | // toward left by two bits, and adjust exponent accordingly. | |
2645 | *exp += 2; | |
2646 | ||
2647 | // Convert the result having "2 * precision" significant-bits back to the one | |
2648 | // having "precision" significant-bits. First, move the radix point from | |
2649 | // poision "2*precision - 1" to "precision - 1". The exponent need to be | |
2650 | // adjusted by "2*precision - 1" - "precision - 1" = "precision". | |
2651 | *exp -= precision as ExpInt + 1; | |
2652 | ||
2653 | // In case MSB resides at the left-hand side of radix point, shift the | |
2654 | // mantissa right by some amount to make sure the MSB reside right before | |
0731742a | 2655 | // the radix point (i.e., "MSB . rest-significant-bits"). |
3b2f2976 XL |
2656 | // |
2657 | // Note that the result is not normalized when "omsb < precision". So, the | |
2658 | // caller needs to call IeeeFloat::normalize() if normalized value is | |
2659 | // expected. | |
2660 | let omsb = omsb(dst); | |
dfeec247 | 2661 | if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) } |
3b2f2976 XL |
2662 | } |
2663 | ||
2664 | /// `quotient = dividend / divisor`. Returns the lost fraction. | |
2665 | /// Does not preserve `dividend` or `divisor`. | |
2666 | pub(super) fn div( | |
2667 | quotient: &mut [Limb], | |
2668 | exp: &mut ExpInt, | |
2669 | dividend: &mut [Limb], | |
2670 | divisor: &mut [Limb], | |
2671 | precision: usize, | |
2672 | ) -> Loss { | |
3b2f2976 XL |
2673 | // Normalize the divisor. |
2674 | let bits = precision - omsb(divisor); | |
2675 | shift_left(divisor, &mut 0, bits); | |
2676 | *exp += bits as ExpInt; | |
2677 | ||
2678 | // Normalize the dividend. | |
2679 | let bits = precision - omsb(dividend); | |
2680 | shift_left(dividend, exp, bits); | |
2681 | ||
2682 | // Division by 1. | |
2683 | let olsb_divisor = olsb(divisor); | |
2684 | if olsb_divisor == precision { | |
2685 | quotient.copy_from_slice(dividend); | |
2686 | return Loss::ExactlyZero; | |
2687 | } | |
2688 | ||
2689 | // Ensure the dividend >= divisor initially for the loop below. | |
2690 | // Incidentally, this means that the division loop below is | |
2691 | // guaranteed to set the integer bit to one. | |
2692 | if cmp(dividend, divisor) == Ordering::Less { | |
2693 | shift_left(dividend, exp, 1); | |
2694 | assert_ne!(cmp(dividend, divisor), Ordering::Less) | |
2695 | } | |
2696 | ||
2697 | // Helper for figuring out the lost fraction. | |
dfeec247 XL |
2698 | let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) { |
2699 | Ordering::Greater => Loss::MoreThanHalf, | |
2700 | Ordering::Equal => Loss::ExactlyHalf, | |
2701 | Ordering::Less => { | |
2702 | if is_all_zeros(dividend) { | |
2703 | Loss::ExactlyZero | |
2704 | } else { | |
2705 | Loss::LessThanHalf | |
3b2f2976 XL |
2706 | } |
2707 | } | |
2708 | }; | |
2709 | ||
2710 | // Try to perform a (much faster) short division for small divisors. | |
2711 | let divisor_bits = precision - (olsb_divisor - 1); | |
2712 | macro_rules! try_short_div { | |
2713 | ($W:ty, $H:ty, $half:expr) => { | |
2714 | if divisor_bits * 2 <= $half { | |
2715 | // Extract the small divisor. | |
2716 | let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1); | |
2717 | let divisor = divisor[0] as $H as $W; | |
2718 | ||
2719 | // Shift the dividend to produce a quotient with the unit bit set. | |
2720 | let top_limb = *dividend.last().unwrap(); | |
2721 | let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H; | |
2722 | shift_left(dividend, &mut 0, divisor_bits - 1); | |
2723 | ||
2724 | // Apply short division in place on $H (of $half bits) chunks. | |
2725 | each_chunk(dividend, $half, |chunk| { | |
2726 | let chunk = chunk as $H; | |
2727 | let combined = ((rem as $W) << $half) | (chunk as $W); | |
2728 | rem = (combined % divisor) as $H; | |
2729 | (combined / divisor) as $H as Limb | |
2730 | }); | |
2731 | quotient.copy_from_slice(dividend); | |
2732 | ||
2733 | return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]); | |
2734 | } | |
dfeec247 | 2735 | }; |
3b2f2976 XL |
2736 | } |
2737 | ||
2738 | try_short_div!(u32, u16, 16); | |
2739 | try_short_div!(u64, u32, 32); | |
2740 | try_short_div!(u128, u64, 64); | |
2741 | ||
2742 | // Zero the quotient before setting bits in it. | |
2743 | for x in &mut quotient[..limbs_for_bits(precision)] { | |
2744 | *x = 0; | |
2745 | } | |
2746 | ||
2747 | // Long division. | |
2748 | for bit in (0..precision).rev() { | |
2749 | if cmp(dividend, divisor) != Ordering::Less { | |
2750 | sub(dividend, divisor, 0); | |
2751 | set_bit(quotient, bit); | |
2752 | } | |
2753 | shift_left(dividend, &mut 0, 1); | |
2754 | } | |
2755 | ||
2756 | lost_fraction(dividend, divisor) | |
2757 | } | |
2758 | } |