3 use std
::cmp
::Ordering
::{self, Less, Greater, Equal}
;
7 use traits
::{Zero, One}
;
13 use bigint
::Sign
::{Minus, NoSign, Plus}
;
15 #[allow(non_snake_case)]
17 /// A `BigDigit` is a `BigUint`'s composing element.
18 pub type BigDigit
= u32;
20 /// A `DoubleBigDigit` is the internal type used to do the computations. Its
21 /// size is the double of the size of `BigDigit`.
22 pub type DoubleBigDigit
= u64;
24 pub const ZERO_BIG_DIGIT
: BigDigit
= 0;
26 // `DoubleBigDigit` size dependent
27 pub const BITS
: usize = 32;
29 pub const BASE
: DoubleBigDigit
= 1 << BITS
;
30 const LO_MASK
: DoubleBigDigit
= (-1i32 as DoubleBigDigit
) >> BITS
;
33 fn get_hi(n
: DoubleBigDigit
) -> BigDigit
{
34 (n
>> BITS
) as BigDigit
37 fn get_lo(n
: DoubleBigDigit
) -> BigDigit
{
38 (n
& LO_MASK
) as BigDigit
41 /// Split one `DoubleBigDigit` into two `BigDigit`s.
43 pub fn from_doublebigdigit(n
: DoubleBigDigit
) -> (BigDigit
, BigDigit
) {
44 (get_hi(n
), get_lo(n
))
47 /// Join two `BigDigit`s into one `DoubleBigDigit`
49 pub fn to_doublebigdigit(hi
: BigDigit
, lo
: BigDigit
) -> DoubleBigDigit
{
50 (lo
as DoubleBigDigit
) | ((hi
as DoubleBigDigit
) << BITS
)
54 use big_digit
::{BigDigit, DoubleBigDigit}
;
56 // Generic functions for add/subtract/multiply with carry/borrow:
60 fn adc(a
: BigDigit
, b
: BigDigit
, carry
: &mut BigDigit
) -> BigDigit
{
61 let (hi
, lo
) = big_digit
::from_doublebigdigit((a
as DoubleBigDigit
) + (b
as DoubleBigDigit
) +
62 (*carry
as DoubleBigDigit
));
68 // Subtract with borrow:
70 fn sbb(a
: BigDigit
, b
: BigDigit
, borrow
: &mut BigDigit
) -> BigDigit
{
71 let (hi
, lo
) = big_digit
::from_doublebigdigit(big_digit
::BASE
+ (a
as DoubleBigDigit
) -
72 (b
as DoubleBigDigit
) -
73 (*borrow
as DoubleBigDigit
));
74 // hi * (base) + lo == 1*(base) + ai - bi - borrow
75 // => ai - bi - borrow < 0 <=> hi == 0
76 *borrow
= (hi
== 0) as BigDigit
;
81 pub fn mac_with_carry(a
: BigDigit
, b
: BigDigit
, c
: BigDigit
, carry
: &mut BigDigit
) -> BigDigit
{
82 let (hi
, lo
) = big_digit
::from_doublebigdigit((a
as DoubleBigDigit
) +
83 (b
as DoubleBigDigit
) * (c
as DoubleBigDigit
) +
84 (*carry
as DoubleBigDigit
));
90 pub fn mul_with_carry(a
: BigDigit
, b
: BigDigit
, carry
: &mut BigDigit
) -> BigDigit
{
91 let (hi
, lo
) = big_digit
::from_doublebigdigit((a
as DoubleBigDigit
) * (b
as DoubleBigDigit
) +
92 (*carry
as DoubleBigDigit
));
98 /// Divide a two digit numerator by a one digit divisor, returns quotient and remainder:
100 /// Note: the caller must ensure that both the quotient and remainder will fit into a single digit.
101 /// This is _not_ true for an arbitrary numerator/denominator.
103 /// (This function also matches what the x86 divide instruction does).
105 fn div_wide(hi
: BigDigit
, lo
: BigDigit
, divisor
: BigDigit
) -> (BigDigit
, BigDigit
) {
106 debug_assert
!(hi
< divisor
);
108 let lhs
= big_digit
::to_doublebigdigit(hi
, lo
);
109 let rhs
= divisor
as DoubleBigDigit
;
110 ((lhs
/ rhs
) as BigDigit
, (lhs
% rhs
) as BigDigit
)
113 pub fn div_rem_digit(mut a
: BigUint
, b
: BigDigit
) -> (BigUint
, BigDigit
) {
116 for d
in a
.data
.iter_mut().rev() {
117 let (q
, r
) = div_wide(rem
, *d
, b
);
122 (a
.normalized(), rem
)
125 // Only for the Add impl:
127 pub fn __add2(a
: &mut [BigDigit
], b
: &[BigDigit
]) -> BigDigit
{
128 debug_assert
!(a
.len() >= b
.len());
131 let (a_lo
, a_hi
) = a
.split_at_mut(b
.len());
133 for (a
, b
) in a_lo
.iter_mut().zip(b
) {
134 *a
= adc(*a
, *b
, &mut carry
);
139 *a
= adc(*a
, 0, &mut carry
);
140 if carry
== 0 { break }
147 /// /Two argument addition of raw slices:
150 /// The caller _must_ ensure that a is big enough to store the result - typically this means
151 /// resizing a to max(a.len(), b.len()) + 1, to fit a possible carry.
152 pub fn add2(a
: &mut [BigDigit
], b
: &[BigDigit
]) {
153 let carry
= __add2(a
, b
);
155 debug_assert
!(carry
== 0);
158 pub fn sub2(a
: &mut [BigDigit
], b
: &[BigDigit
]) {
161 let len
= cmp
::min(a
.len(), b
.len());
162 let (a_lo
, a_hi
) = a
.split_at_mut(len
);
163 let (b_lo
, b_hi
) = b
.split_at(len
);
165 for (a
, b
) in a_lo
.iter_mut().zip(b_lo
) {
166 *a
= sbb(*a
, *b
, &mut borrow
);
171 *a
= sbb(*a
, 0, &mut borrow
);
172 if borrow
== 0 { break }
176 // note: we're _required_ to fail on underflow
177 assert
!(borrow
== 0 && b_hi
.iter().all(|x
| *x
== 0),
178 "Cannot subtract b from a because b is larger than a.");
181 pub fn sub2rev(a
: &[BigDigit
], b
: &mut [BigDigit
]) {
182 debug_assert
!(b
.len() >= a
.len());
186 let len
= cmp
::min(a
.len(), b
.len());
187 let (a_lo
, a_hi
) = a
.split_at(len
);
188 let (b_lo
, b_hi
) = b
.split_at_mut(len
);
190 for (a
, b
) in a_lo
.iter().zip(b_lo
) {
191 *b
= sbb(*a
, *b
, &mut borrow
);
194 assert
!(a_hi
.is_empty());
196 // note: we're _required_ to fail on underflow
197 assert
!(borrow
== 0 && b_hi
.iter().all(|x
| *x
== 0),
198 "Cannot subtract b from a because b is larger than a.");
201 pub fn sub_sign(a
: &[BigDigit
], b
: &[BigDigit
]) -> (Sign
, BigUint
) {
203 let a
= &a
[..a
.iter().rposition(|&x
| x
!= 0).map_or(0, |i
| i
+ 1)];
204 let b
= &b
[..b
.iter().rposition(|&x
| x
!= 0).map_or(0, |i
| i
+ 1)];
206 match cmp_slice(a
, b
) {
208 let mut a
= a
.to_vec();
210 (Plus
, BigUint
::new(a
))
213 let mut b
= b
.to_vec();
215 (Minus
, BigUint
::new(b
))
217 _
=> (NoSign
, Zero
::zero()),
221 /// Three argument multiply accumulate:
223 pub fn mac_digit(acc
: &mut [BigDigit
], b
: &[BigDigit
], c
: BigDigit
) {
229 let (a_lo
, a_hi
) = acc
.split_at_mut(b
.len());
231 for (a
, &b
) in a_lo
.iter_mut().zip(b
) {
232 *a
= mac_with_carry(*a
, b
, c
, &mut carry
);
235 let mut a
= a_hi
.iter_mut();
237 let a
= a
.next().expect("carry overflow during multiplication!");
238 *a
= adc(*a
, 0, &mut carry
);
242 /// Three argument multiply accumulate:
244 fn mac3(acc
: &mut [BigDigit
], b
: &[BigDigit
], c
: &[BigDigit
]) {
245 let (x
, y
) = if b
.len() < c
.len() {
251 // We use three algorithms for different input sizes.
253 // - For small inputs, long multiplication is fastest.
254 // - Next we use Karatsuba multiplication (Toom-2), which we have optimized
255 // to avoid unnecessary allocations for intermediate values.
256 // - For the largest inputs we use Toom-3, which better optimizes the
257 // number of operations, but uses more temporary allocations.
259 // The thresholds are somewhat arbitrary, chosen by evaluating the results
260 // of `cargo bench --bench bigint multiply`.
263 // Long multiplication:
264 for (i
, xi
) in x
.iter().enumerate() {
265 mac_digit(&mut acc
[i
..], y
, *xi
);
267 } else if x
.len() <= 256 {
269 * Karatsuba multiplication:
271 * The idea is that we break x and y up into two smaller numbers that each have about half
272 * as many digits, like so (note that multiplying by b is just a shift):
277 * With some algebra, we can compute x * y with three smaller products, where the inputs to
278 * each of the smaller products have only about half as many digits as x and y:
280 * x * y = (x0 + x1 * b) * (y0 + y1 * b)
287 * Let p0 = x0 * y0 and p2 = x1 * y1:
290 * + (x0 * y1 + x1 * y0) * b
293 * The real trick is that middle term:
297 * = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
299 * = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
301 * Now we complete the square:
303 * = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
305 * = -((x1 - x0) * (y1 - y0)) + p0 + p2
307 * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
310 * + (p0 + p2 - p1) * b
313 * Where the three intermediate products are:
316 * p1 = (x1 - x0) * (y1 - y0)
319 * In doing the computation, we take great care to avoid unnecessary temporary variables
320 * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
321 * bit so we can use the same temporary variable for all the intermediate products:
323 * x * y = p2 * b^2 + p2 * b
327 * The other trick we use is instead of doing explicit shifts, we slice acc at the
328 * appropriate offset when doing the add.
332 * When x is smaller than y, it's significantly faster to pick b such that x is split in
336 let (x0
, x1
) = x
.split_at(b
);
337 let (y0
, y1
) = y
.split_at(b
);
340 * We reuse the same BigUint for all the intermediate multiplies and have to size p
341 * appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
343 let len
= x1
.len() + y1
.len() + 1;
344 let mut p
= BigUint { data: vec![0; len] }
;
347 mac3(&mut p
.data
[..], x1
, y1
);
349 // Not required, but the adds go faster if we drop any unneeded 0s from the end:
352 add2(&mut acc
[b
..], &p
.data
[..]);
353 add2(&mut acc
[b
* 2..], &p
.data
[..]);
355 // Zero out p before the next multiply:
357 p
.data
.extend(repeat(0).take(len
));
360 mac3(&mut p
.data
[..], x0
, y0
);
363 add2(&mut acc
[..], &p
.data
[..]);
364 add2(&mut acc
[b
..], &p
.data
[..]);
366 // p1 = (x1 - x0) * (y1 - y0)
367 // We do this one last, since it may be negative and acc can't ever be negative:
368 let (j0_sign
, j0
) = sub_sign(x1
, x0
);
369 let (j1_sign
, j1
) = sub_sign(y1
, y0
);
371 match j0_sign
* j1_sign
{
374 p
.data
.extend(repeat(0).take(len
));
376 mac3(&mut p
.data
[..], &j0
.data
[..], &j1
.data
[..]);
379 sub2(&mut acc
[b
..], &p
.data
[..]);
382 mac3(&mut acc
[b
..], &j0
.data
[..], &j1
.data
[..]);
388 // Toom-3 multiplication:
390 // Toom-3 is like Karatsuba above, but dividing the inputs into three parts.
391 // Both are instances of Toom-Cook, using `k=3` and `k=2` respectively.
393 // FIXME: It would be nice to have comments breaking down the operations below.
395 let i
= y
.len()/3 + 1;
397 let x0_len
= cmp
::min(x
.len(), i
);
398 let x1_len
= cmp
::min(x
.len() - x0_len
, i
);
401 let y1_len
= cmp
::min(y
.len() - y0_len
, i
);
403 let x0
= BigInt
::from_slice(Plus
, &x
[..x0_len
]);
404 let x1
= BigInt
::from_slice(Plus
, &x
[x0_len
..x0_len
+ x1_len
]);
405 let x2
= BigInt
::from_slice(Plus
, &x
[x0_len
+ x1_len
..]);
407 let y0
= BigInt
::from_slice(Plus
, &y
[..y0_len
]);
408 let y1
= BigInt
::from_slice(Plus
, &y
[y0_len
..y0_len
+ y1_len
]);
409 let y2
= BigInt
::from_slice(Plus
, &y
[y0_len
+ y1_len
..]);
419 let r1
= (p
+ x1
) * (q
+ y1
);
421 let r3
= ((p2
+ x2
)*2 - x0
) * ((q2
+ y2
)*2 - y0
);
423 let mut comp3
: BigInt
= (r3
- &r1
) / 3;
424 let mut comp1
: BigInt
= (r1
- &r2
) / 2;
425 let mut comp2
: BigInt
= r2
- &r0
;
426 comp3
= (&comp2
- comp3
)/2 + &r4
*2;
427 comp2
= comp2
+ &comp1
- &r4
;
428 comp1
= comp1
- &comp3
;
430 let result
= r0
+ (comp1
<< 32*i
) + (comp2
<< 2*32*i
) + (comp3
<< 3*32*i
) + (r4
<< 4*32*i
);
431 let result_pos
= result
.to_biguint().unwrap();
432 add2(&mut acc
[..], &result_pos
.data
);
436 pub fn mul3(x
: &[BigDigit
], y
: &[BigDigit
]) -> BigUint
{
437 let len
= x
.len() + y
.len() + 1;
438 let mut prod
= BigUint { data: vec![0; len] }
;
440 mac3(&mut prod
.data
[..], x
, y
);
444 pub fn scalar_mul(a
: &mut [BigDigit
], b
: BigDigit
) -> BigDigit
{
446 for a
in a
.iter_mut() {
447 *a
= mul_with_carry(*a
, b
, &mut carry
);
452 pub fn div_rem(u
: &BigUint
, d
: &BigUint
) -> (BigUint
, BigUint
) {
457 return (Zero
::zero(), Zero
::zero());
459 if *d
== One
::one() {
460 return (u
.clone(), Zero
::zero());
463 // Required or the q_len calculation below can underflow:
465 Less
=> return (Zero
::zero(), u
.clone()),
466 Equal
=> return (One
::one(), Zero
::zero()),
467 Greater
=> {}
// Do nothing
470 // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
472 // First, normalize the arguments so the highest bit in the highest digit of the divisor is
473 // set: the main loop uses the highest digit of the divisor for generating guesses, so we
474 // want it to be the largest number we can efficiently divide by.
476 let shift
= d
.data
.last().unwrap().leading_zeros() as usize;
477 let mut a
= u
<< shift
;
480 // The algorithm works by incrementally calculating "guesses", q0, for part of the
481 // remainder. Once we have any number q0 such that q0 * b <= a, we can set
486 // and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
488 // q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
489 // - this should give us a guess that is "close" to the actual quotient, but is possibly
490 // greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
491 // until we have a guess such that q0 * b <= a.
494 let bn
= *b
.data
.last().unwrap();
495 let q_len
= a
.data
.len() - b
.data
.len() + 1;
496 let mut q
= BigUint { data: vec![0; q_len] }
;
498 // We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
499 // sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
502 let mut tmp
= BigUint { data: Vec::with_capacity(2) }
;
504 for j
in (0..q_len
).rev() {
506 * When calculating our next guess q0, we don't need to consider the digits below j
507 * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
508 * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
509 * two numbers will be zero in all digits up to (j + b.data.len() - 1).
511 let offset
= j
+ b
.data
.len() - 1;
512 if offset
>= a
.data
.len() {
516 /* just avoiding a heap allocation: */
519 a0
.data
.extend(a
.data
[offset
..].iter().cloned());
522 * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
523 * implicitly at the end, when adding and subtracting to a and q. Not only do we
524 * save the cost of the shifts, the rest of the arithmetic gets to work with
527 let (mut q0
, _
) = div_rem_digit(a0
, bn
);
528 let mut prod
= &b
* &q0
;
530 while cmp_slice(&prod
.data
[..], &a
.data
[j
..]) == Greater
{
531 let one
: BigUint
= One
::one();
536 add2(&mut q
.data
[j
..], &q0
.data
[..]);
537 sub2(&mut a
.data
[j
..], &prod
.data
[..]);
543 debug_assert
!(a
< b
);
545 (q
.normalized(), a
>> shift
)
548 /// Find last set bit
549 /// fls(0) == 0, fls(u32::MAX) == 32
550 pub fn fls
<T
: traits
::PrimInt
>(v
: T
) -> usize {
551 mem
::size_of
::<T
>() * 8 - v
.leading_zeros() as usize
554 pub fn ilog2
<T
: traits
::PrimInt
>(v
: T
) -> usize {
559 pub fn biguint_shl(n
: Cow
<BigUint
>, bits
: usize) -> BigUint
{
560 let n_unit
= bits
/ big_digit
::BITS
;
561 let mut data
= match n_unit
{
562 0 => n
.into_owned().data
,
564 let len
= n_unit
+ n
.data
.len() + 1;
565 let mut data
= Vec
::with_capacity(len
);
566 data
.extend(repeat(0).take(n_unit
));
567 data
.extend(n
.data
.iter().cloned());
572 let n_bits
= bits
% big_digit
::BITS
;
575 for elem
in data
[n_unit
..].iter_mut() {
576 let new_carry
= *elem
>> (big_digit
::BITS
- n_bits
);
577 *elem
= (*elem
<< n_bits
) | carry
;
589 pub fn biguint_shr(n
: Cow
<BigUint
>, bits
: usize) -> BigUint
{
590 let n_unit
= bits
/ big_digit
::BITS
;
591 if n_unit
>= n
.data
.len() {
594 let mut data
= match n_unit
{
595 0 => n
.into_owned().data
,
596 _
=> n
.data
[n_unit
..].to_vec(),
599 let n_bits
= bits
% big_digit
::BITS
;
602 for elem
in data
.iter_mut().rev() {
603 let new_borrow
= *elem
<< (big_digit
::BITS
- n_bits
);
604 *elem
= (*elem
>> n_bits
) | borrow
;
612 pub fn cmp_slice(a
: &[BigDigit
], b
: &[BigDigit
]) -> Ordering
{
613 debug_assert
!(a
.last() != Some(&0));
614 debug_assert
!(b
.last() != Some(&0));
616 let (a_len
, b_len
) = (a
.len(), b
.len());
624 for (&ai
, &bi
) in a
.iter().rev().zip(b
.iter().rev()) {
636 mod algorithm_tests
{
637 use {BigDigit, BigUint, BigInt}
;
645 fn sub_sign_i(a
: &[BigDigit
], b
: &[BigDigit
]) -> BigInt
{
646 let (sign
, val
) = sub_sign(a
, b
);
647 BigInt
::from_biguint(sign
, val
)
650 let a
= BigUint
::from_str_radix("265252859812191058636308480000000", 10).unwrap();
651 let b
= BigUint
::from_str_radix("26525285981219105863630848000000", 10).unwrap();
652 let a_i
= BigInt
::from_biguint(Plus
, a
.clone());
653 let b_i
= BigInt
::from_biguint(Plus
, b
.clone());
655 assert_eq
!(sub_sign_i(&a
.data
[..], &b
.data
[..]), &a_i
- &b_i
);
656 assert_eq
!(sub_sign_i(&b
.data
[..], &a
.data
[..]), &b_i
- &a_i
);