]> git.proxmox.com Git - cargo.git/blob - vendor/num-bigint-0.1.41/src/algorithms.rs
New upstream version 0.24.0
[cargo.git] / vendor / num-bigint-0.1.41 / src / algorithms.rs
1 use std::borrow::Cow;
2 use std::cmp;
3 use std::cmp::Ordering::{self, Less, Greater, Equal};
4 use std::iter::repeat;
5 use std::mem;
6 use traits;
7 use traits::{Zero, One};
8
9 use biguint::BigUint;
10
11 use bigint::BigInt;
12 use bigint::Sign;
13 use bigint::Sign::{Minus, NoSign, Plus};
14
15 #[allow(non_snake_case)]
16 pub mod big_digit {
17 /// A `BigDigit` is a `BigUint`'s composing element.
18 pub type BigDigit = u32;
19
20 /// A `DoubleBigDigit` is the internal type used to do the computations. Its
21 /// size is the double of the size of `BigDigit`.
22 pub type DoubleBigDigit = u64;
23
24 pub const ZERO_BIG_DIGIT: BigDigit = 0;
25
26 // `DoubleBigDigit` size dependent
27 pub const BITS: usize = 32;
28
29 pub const BASE: DoubleBigDigit = 1 << BITS;
30 const LO_MASK: DoubleBigDigit = (-1i32 as DoubleBigDigit) >> BITS;
31
32 #[inline]
33 fn get_hi(n: DoubleBigDigit) -> BigDigit {
34 (n >> BITS) as BigDigit
35 }
36 #[inline]
37 fn get_lo(n: DoubleBigDigit) -> BigDigit {
38 (n & LO_MASK) as BigDigit
39 }
40
41 /// Split one `DoubleBigDigit` into two `BigDigit`s.
42 #[inline]
43 pub fn from_doublebigdigit(n: DoubleBigDigit) -> (BigDigit, BigDigit) {
44 (get_hi(n), get_lo(n))
45 }
46
47 /// Join two `BigDigit`s into one `DoubleBigDigit`
48 #[inline]
49 pub fn to_doublebigdigit(hi: BigDigit, lo: BigDigit) -> DoubleBigDigit {
50 (lo as DoubleBigDigit) | ((hi as DoubleBigDigit) << BITS)
51 }
52 }
53
54 use big_digit::{BigDigit, DoubleBigDigit};
55
56 // Generic functions for add/subtract/multiply with carry/borrow:
57
58 // Add with carry:
59 #[inline]
60 fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
61 let (hi, lo) = big_digit::from_doublebigdigit((a as DoubleBigDigit) + (b as DoubleBigDigit) +
62 (*carry as DoubleBigDigit));
63
64 *carry = hi;
65 lo
66 }
67
68 // Subtract with borrow:
69 #[inline]
70 fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
71 let (hi, lo) = big_digit::from_doublebigdigit(big_digit::BASE + (a as DoubleBigDigit) -
72 (b as DoubleBigDigit) -
73 (*borrow as DoubleBigDigit));
74 // hi * (base) + lo == 1*(base) + ai - bi - borrow
75 // => ai - bi - borrow < 0 <=> hi == 0
76 *borrow = (hi == 0) as BigDigit;
77 lo
78 }
79
80 #[inline]
81 pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
82 let (hi, lo) = big_digit::from_doublebigdigit((a as DoubleBigDigit) +
83 (b as DoubleBigDigit) * (c as DoubleBigDigit) +
84 (*carry as DoubleBigDigit));
85 *carry = hi;
86 lo
87 }
88
89 #[inline]
90 pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
91 let (hi, lo) = big_digit::from_doublebigdigit((a as DoubleBigDigit) * (b as DoubleBigDigit) +
92 (*carry as DoubleBigDigit));
93
94 *carry = hi;
95 lo
96 }
97
98 /// Divide a two digit numerator by a one digit divisor, returns quotient and remainder:
99 ///
100 /// Note: the caller must ensure that both the quotient and remainder will fit into a single digit.
101 /// This is _not_ true for an arbitrary numerator/denominator.
102 ///
103 /// (This function also matches what the x86 divide instruction does).
104 #[inline]
105 fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
106 debug_assert!(hi < divisor);
107
108 let lhs = big_digit::to_doublebigdigit(hi, lo);
109 let rhs = divisor as DoubleBigDigit;
110 ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
111 }
112
113 pub fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) {
114 let mut rem = 0;
115
116 for d in a.data.iter_mut().rev() {
117 let (q, r) = div_wide(rem, *d, b);
118 *d = q;
119 rem = r;
120 }
121
122 (a.normalized(), rem)
123 }
124
125 // Only for the Add impl:
126 #[inline]
127 pub fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
128 debug_assert!(a.len() >= b.len());
129
130 let mut carry = 0;
131 let (a_lo, a_hi) = a.split_at_mut(b.len());
132
133 for (a, b) in a_lo.iter_mut().zip(b) {
134 *a = adc(*a, *b, &mut carry);
135 }
136
137 if carry != 0 {
138 for a in a_hi {
139 *a = adc(*a, 0, &mut carry);
140 if carry == 0 { break }
141 }
142 }
143
144 carry
145 }
146
147 /// /Two argument addition of raw slices:
148 /// a += b
149 ///
150 /// The caller _must_ ensure that a is big enough to store the result - typically this means
151 /// resizing a to max(a.len(), b.len()) + 1, to fit a possible carry.
152 pub fn add2(a: &mut [BigDigit], b: &[BigDigit]) {
153 let carry = __add2(a, b);
154
155 debug_assert!(carry == 0);
156 }
157
158 pub fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
159 let mut borrow = 0;
160
161 let len = cmp::min(a.len(), b.len());
162 let (a_lo, a_hi) = a.split_at_mut(len);
163 let (b_lo, b_hi) = b.split_at(len);
164
165 for (a, b) in a_lo.iter_mut().zip(b_lo) {
166 *a = sbb(*a, *b, &mut borrow);
167 }
168
169 if borrow != 0 {
170 for a in a_hi {
171 *a = sbb(*a, 0, &mut borrow);
172 if borrow == 0 { break }
173 }
174 }
175
176 // note: we're _required_ to fail on underflow
177 assert!(borrow == 0 && b_hi.iter().all(|x| *x == 0),
178 "Cannot subtract b from a because b is larger than a.");
179 }
180
181 pub fn sub2rev(a: &[BigDigit], b: &mut [BigDigit]) {
182 debug_assert!(b.len() >= a.len());
183
184 let mut borrow = 0;
185
186 let len = cmp::min(a.len(), b.len());
187 let (a_lo, a_hi) = a.split_at(len);
188 let (b_lo, b_hi) = b.split_at_mut(len);
189
190 for (a, b) in a_lo.iter().zip(b_lo) {
191 *b = sbb(*a, *b, &mut borrow);
192 }
193
194 assert!(a_hi.is_empty());
195
196 // note: we're _required_ to fail on underflow
197 assert!(borrow == 0 && b_hi.iter().all(|x| *x == 0),
198 "Cannot subtract b from a because b is larger than a.");
199 }
200
201 pub fn sub_sign(a: &[BigDigit], b: &[BigDigit]) -> (Sign, BigUint) {
202 // Normalize:
203 let a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
204 let b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
205
206 match cmp_slice(a, b) {
207 Greater => {
208 let mut a = a.to_vec();
209 sub2(&mut a, b);
210 (Plus, BigUint::new(a))
211 }
212 Less => {
213 let mut b = b.to_vec();
214 sub2(&mut b, a);
215 (Minus, BigUint::new(b))
216 }
217 _ => (NoSign, Zero::zero()),
218 }
219 }
220
221 /// Three argument multiply accumulate:
222 /// acc += b * c
223 pub fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
224 if c == 0 {
225 return;
226 }
227
228 let mut carry = 0;
229 let (a_lo, a_hi) = acc.split_at_mut(b.len());
230
231 for (a, &b) in a_lo.iter_mut().zip(b) {
232 *a = mac_with_carry(*a, b, c, &mut carry);
233 }
234
235 let mut a = a_hi.iter_mut();
236 while carry != 0 {
237 let a = a.next().expect("carry overflow during multiplication!");
238 *a = adc(*a, 0, &mut carry);
239 }
240 }
241
242 /// Three argument multiply accumulate:
243 /// acc += b * c
244 fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
245 let (x, y) = if b.len() < c.len() {
246 (b, c)
247 } else {
248 (c, b)
249 };
250
251 // We use three algorithms for different input sizes.
252 //
253 // - For small inputs, long multiplication is fastest.
254 // - Next we use Karatsuba multiplication (Toom-2), which we have optimized
255 // to avoid unnecessary allocations for intermediate values.
256 // - For the largest inputs we use Toom-3, which better optimizes the
257 // number of operations, but uses more temporary allocations.
258 //
259 // The thresholds are somewhat arbitrary, chosen by evaluating the results
260 // of `cargo bench --bench bigint multiply`.
261
262 if x.len() <= 32 {
263 // Long multiplication:
264 for (i, xi) in x.iter().enumerate() {
265 mac_digit(&mut acc[i..], y, *xi);
266 }
267 } else if x.len() <= 256 {
268 /*
269 * Karatsuba multiplication:
270 *
271 * The idea is that we break x and y up into two smaller numbers that each have about half
272 * as many digits, like so (note that multiplying by b is just a shift):
273 *
274 * x = x0 + x1 * b
275 * y = y0 + y1 * b
276 *
277 * With some algebra, we can compute x * y with three smaller products, where the inputs to
278 * each of the smaller products have only about half as many digits as x and y:
279 *
280 * x * y = (x0 + x1 * b) * (y0 + y1 * b)
281 *
282 * x * y = x0 * y0
283 * + x0 * y1 * b
284 * + x1 * y0 * b
285 * + x1 * y1 * b^2
286 *
287 * Let p0 = x0 * y0 and p2 = x1 * y1:
288 *
289 * x * y = p0
290 * + (x0 * y1 + x1 * y0) * b
291 * + p2 * b^2
292 *
293 * The real trick is that middle term:
294 *
295 * x0 * y1 + x1 * y0
296 *
297 * = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
298 *
299 * = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
300 *
301 * Now we complete the square:
302 *
303 * = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
304 *
305 * = -((x1 - x0) * (y1 - y0)) + p0 + p2
306 *
307 * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
308 *
309 * x * y = p0
310 * + (p0 + p2 - p1) * b
311 * + p2 * b^2
312 *
313 * Where the three intermediate products are:
314 *
315 * p0 = x0 * y0
316 * p1 = (x1 - x0) * (y1 - y0)
317 * p2 = x1 * y1
318 *
319 * In doing the computation, we take great care to avoid unnecessary temporary variables
320 * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
321 * bit so we can use the same temporary variable for all the intermediate products:
322 *
323 * x * y = p2 * b^2 + p2 * b
324 * + p0 * b + p0
325 * - p1 * b
326 *
327 * The other trick we use is instead of doing explicit shifts, we slice acc at the
328 * appropriate offset when doing the add.
329 */
330
331 /*
332 * When x is smaller than y, it's significantly faster to pick b such that x is split in
333 * half, not y:
334 */
335 let b = x.len() / 2;
336 let (x0, x1) = x.split_at(b);
337 let (y0, y1) = y.split_at(b);
338
339 /*
340 * We reuse the same BigUint for all the intermediate multiplies and have to size p
341 * appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
342 */
343 let len = x1.len() + y1.len() + 1;
344 let mut p = BigUint { data: vec![0; len] };
345
346 // p2 = x1 * y1
347 mac3(&mut p.data[..], x1, y1);
348
349 // Not required, but the adds go faster if we drop any unneeded 0s from the end:
350 p.normalize();
351
352 add2(&mut acc[b..], &p.data[..]);
353 add2(&mut acc[b * 2..], &p.data[..]);
354
355 // Zero out p before the next multiply:
356 p.data.truncate(0);
357 p.data.extend(repeat(0).take(len));
358
359 // p0 = x0 * y0
360 mac3(&mut p.data[..], x0, y0);
361 p.normalize();
362
363 add2(&mut acc[..], &p.data[..]);
364 add2(&mut acc[b..], &p.data[..]);
365
366 // p1 = (x1 - x0) * (y1 - y0)
367 // We do this one last, since it may be negative and acc can't ever be negative:
368 let (j0_sign, j0) = sub_sign(x1, x0);
369 let (j1_sign, j1) = sub_sign(y1, y0);
370
371 match j0_sign * j1_sign {
372 Plus => {
373 p.data.truncate(0);
374 p.data.extend(repeat(0).take(len));
375
376 mac3(&mut p.data[..], &j0.data[..], &j1.data[..]);
377 p.normalize();
378
379 sub2(&mut acc[b..], &p.data[..]);
380 },
381 Minus => {
382 mac3(&mut acc[b..], &j0.data[..], &j1.data[..]);
383 },
384 NoSign => (),
385 }
386
387 } else {
388 // Toom-3 multiplication:
389 //
390 // Toom-3 is like Karatsuba above, but dividing the inputs into three parts.
391 // Both are instances of Toom-Cook, using `k=3` and `k=2` respectively.
392 //
393 // FIXME: It would be nice to have comments breaking down the operations below.
394
395 let i = y.len()/3 + 1;
396
397 let x0_len = cmp::min(x.len(), i);
398 let x1_len = cmp::min(x.len() - x0_len, i);
399
400 let y0_len = i;
401 let y1_len = cmp::min(y.len() - y0_len, i);
402
403 let x0 = BigInt::from_slice(Plus, &x[..x0_len]);
404 let x1 = BigInt::from_slice(Plus, &x[x0_len..x0_len + x1_len]);
405 let x2 = BigInt::from_slice(Plus, &x[x0_len + x1_len..]);
406
407 let y0 = BigInt::from_slice(Plus, &y[..y0_len]);
408 let y1 = BigInt::from_slice(Plus, &y[y0_len..y0_len + y1_len]);
409 let y2 = BigInt::from_slice(Plus, &y[y0_len + y1_len..]);
410
411 let p = &x0 + &x2;
412 let q = &y0 + &y2;
413
414 let p2 = &p - &x1;
415 let q2 = &q - &y1;
416
417 let r0 = &x0 * &y0;
418 let r4 = &x2 * &y2;
419 let r1 = (p + x1) * (q + y1);
420 let r2 = &p2 * &q2;
421 let r3 = ((p2 + x2)*2 - x0) * ((q2 + y2)*2 - y0);
422
423 let mut comp3: BigInt = (r3 - &r1) / 3;
424 let mut comp1: BigInt = (r1 - &r2) / 2;
425 let mut comp2: BigInt = r2 - &r0;
426 comp3 = (&comp2 - comp3)/2 + &r4*2;
427 comp2 = comp2 + &comp1 - &r4;
428 comp1 = comp1 - &comp3;
429
430 let result = r0 + (comp1 << 32*i) + (comp2 << 2*32*i) + (comp3 << 3*32*i) + (r4 << 4*32*i);
431 let result_pos = result.to_biguint().unwrap();
432 add2(&mut acc[..], &result_pos.data);
433 }
434 }
435
436 pub fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint {
437 let len = x.len() + y.len() + 1;
438 let mut prod = BigUint { data: vec![0; len] };
439
440 mac3(&mut prod.data[..], x, y);
441 prod.normalized()
442 }
443
444 pub fn scalar_mul(a: &mut [BigDigit], b: BigDigit) -> BigDigit {
445 let mut carry = 0;
446 for a in a.iter_mut() {
447 *a = mul_with_carry(*a, b, &mut carry);
448 }
449 carry
450 }
451
452 pub fn div_rem(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
453 if d.is_zero() {
454 panic!()
455 }
456 if u.is_zero() {
457 return (Zero::zero(), Zero::zero());
458 }
459 if *d == One::one() {
460 return (u.clone(), Zero::zero());
461 }
462
463 // Required or the q_len calculation below can underflow:
464 match u.cmp(d) {
465 Less => return (Zero::zero(), u.clone()),
466 Equal => return (One::one(), Zero::zero()),
467 Greater => {} // Do nothing
468 }
469
470 // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
471 //
472 // First, normalize the arguments so the highest bit in the highest digit of the divisor is
473 // set: the main loop uses the highest digit of the divisor for generating guesses, so we
474 // want it to be the largest number we can efficiently divide by.
475 //
476 let shift = d.data.last().unwrap().leading_zeros() as usize;
477 let mut a = u << shift;
478 let b = d << shift;
479
480 // The algorithm works by incrementally calculating "guesses", q0, for part of the
481 // remainder. Once we have any number q0 such that q0 * b <= a, we can set
482 //
483 // q += q0
484 // a -= q0 * b
485 //
486 // and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
487 //
488 // q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
489 // - this should give us a guess that is "close" to the actual quotient, but is possibly
490 // greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
491 // until we have a guess such that q0 * b <= a.
492 //
493
494 let bn = *b.data.last().unwrap();
495 let q_len = a.data.len() - b.data.len() + 1;
496 let mut q = BigUint { data: vec![0; q_len] };
497
498 // We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
499 // sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
500 // can be bigger).
501 //
502 let mut tmp = BigUint { data: Vec::with_capacity(2) };
503
504 for j in (0..q_len).rev() {
505 /*
506 * When calculating our next guess q0, we don't need to consider the digits below j
507 * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
508 * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
509 * two numbers will be zero in all digits up to (j + b.data.len() - 1).
510 */
511 let offset = j + b.data.len() - 1;
512 if offset >= a.data.len() {
513 continue;
514 }
515
516 /* just avoiding a heap allocation: */
517 let mut a0 = tmp;
518 a0.data.truncate(0);
519 a0.data.extend(a.data[offset..].iter().cloned());
520
521 /*
522 * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
523 * implicitly at the end, when adding and subtracting to a and q. Not only do we
524 * save the cost of the shifts, the rest of the arithmetic gets to work with
525 * smaller numbers.
526 */
527 let (mut q0, _) = div_rem_digit(a0, bn);
528 let mut prod = &b * &q0;
529
530 while cmp_slice(&prod.data[..], &a.data[j..]) == Greater {
531 let one: BigUint = One::one();
532 q0 = q0 - one;
533 prod = prod - &b;
534 }
535
536 add2(&mut q.data[j..], &q0.data[..]);
537 sub2(&mut a.data[j..], &prod.data[..]);
538 a.normalize();
539
540 tmp = q0;
541 }
542
543 debug_assert!(a < b);
544
545 (q.normalized(), a >> shift)
546 }
547
548 /// Find last set bit
549 /// fls(0) == 0, fls(u32::MAX) == 32
550 pub fn fls<T: traits::PrimInt>(v: T) -> usize {
551 mem::size_of::<T>() * 8 - v.leading_zeros() as usize
552 }
553
554 pub fn ilog2<T: traits::PrimInt>(v: T) -> usize {
555 fls(v) - 1
556 }
557
558 #[inline]
559 pub fn biguint_shl(n: Cow<BigUint>, bits: usize) -> BigUint {
560 let n_unit = bits / big_digit::BITS;
561 let mut data = match n_unit {
562 0 => n.into_owned().data,
563 _ => {
564 let len = n_unit + n.data.len() + 1;
565 let mut data = Vec::with_capacity(len);
566 data.extend(repeat(0).take(n_unit));
567 data.extend(n.data.iter().cloned());
568 data
569 }
570 };
571
572 let n_bits = bits % big_digit::BITS;
573 if n_bits > 0 {
574 let mut carry = 0;
575 for elem in data[n_unit..].iter_mut() {
576 let new_carry = *elem >> (big_digit::BITS - n_bits);
577 *elem = (*elem << n_bits) | carry;
578 carry = new_carry;
579 }
580 if carry != 0 {
581 data.push(carry);
582 }
583 }
584
585 BigUint::new(data)
586 }
587
588 #[inline]
589 pub fn biguint_shr(n: Cow<BigUint>, bits: usize) -> BigUint {
590 let n_unit = bits / big_digit::BITS;
591 if n_unit >= n.data.len() {
592 return Zero::zero();
593 }
594 let mut data = match n_unit {
595 0 => n.into_owned().data,
596 _ => n.data[n_unit..].to_vec(),
597 };
598
599 let n_bits = bits % big_digit::BITS;
600 if n_bits > 0 {
601 let mut borrow = 0;
602 for elem in data.iter_mut().rev() {
603 let new_borrow = *elem << (big_digit::BITS - n_bits);
604 *elem = (*elem >> n_bits) | borrow;
605 borrow = new_borrow;
606 }
607 }
608
609 BigUint::new(data)
610 }
611
612 pub fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering {
613 debug_assert!(a.last() != Some(&0));
614 debug_assert!(b.last() != Some(&0));
615
616 let (a_len, b_len) = (a.len(), b.len());
617 if a_len < b_len {
618 return Less;
619 }
620 if a_len > b_len {
621 return Greater;
622 }
623
624 for (&ai, &bi) in a.iter().rev().zip(b.iter().rev()) {
625 if ai < bi {
626 return Less;
627 }
628 if ai > bi {
629 return Greater;
630 }
631 }
632 return Equal;
633 }
634
635 #[cfg(test)]
636 mod algorithm_tests {
637 use {BigDigit, BigUint, BigInt};
638 use Sign::Plus;
639 use traits::Num;
640
641 #[test]
642 fn test_sub_sign() {
643 use super::sub_sign;
644
645 fn sub_sign_i(a: &[BigDigit], b: &[BigDigit]) -> BigInt {
646 let (sign, val) = sub_sign(a, b);
647 BigInt::from_biguint(sign, val)
648 }
649
650 let a = BigUint::from_str_radix("265252859812191058636308480000000", 10).unwrap();
651 let b = BigUint::from_str_radix("26525285981219105863630848000000", 10).unwrap();
652 let a_i = BigInt::from_biguint(Plus, a.clone());
653 let b_i = BigInt::from_biguint(Plus, b.clone());
654
655 assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i);
656 assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i);
657 }
658 }