2 * Copyright (c) 2013, Kenneth MacKay
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/swab.h>
30 #include <linux/fips.h>
31 #include <crypto/ecdh.h>
34 #include "ecc_curve_defs.h"
41 static inline const struct ecc_curve
*ecc_get_curve(unsigned int curve_id
)
44 /* In FIPS mode only allow P256 and higher */
45 case ECC_CURVE_NIST_P192
:
46 return fips_enabled
? NULL
: &nist_p192
;
47 case ECC_CURVE_NIST_P256
:
54 static u64
*ecc_alloc_digits_space(unsigned int ndigits
)
56 size_t len
= ndigits
* sizeof(u64
);
61 return kmalloc(len
, GFP_KERNEL
);
64 static void ecc_free_digits_space(u64
*space
)
69 static struct ecc_point
*ecc_alloc_point(unsigned int ndigits
)
71 struct ecc_point
*p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
76 p
->x
= ecc_alloc_digits_space(ndigits
);
80 p
->y
= ecc_alloc_digits_space(ndigits
);
89 ecc_free_digits_space(p
->x
);
95 static void ecc_free_point(struct ecc_point
*p
)
105 static void vli_clear(u64
*vli
, unsigned int ndigits
)
109 for (i
= 0; i
< ndigits
; i
++)
113 /* Returns true if vli == 0, false otherwise. */
114 static bool vli_is_zero(const u64
*vli
, unsigned int ndigits
)
118 for (i
= 0; i
< ndigits
; i
++) {
126 /* Returns nonzero if bit bit of vli is set. */
127 static u64
vli_test_bit(const u64
*vli
, unsigned int bit
)
129 return (vli
[bit
/ 64] & ((u64
)1 << (bit
% 64)));
132 /* Counts the number of 64-bit "digits" in vli. */
133 static unsigned int vli_num_digits(const u64
*vli
, unsigned int ndigits
)
137 /* Search from the end until we find a non-zero digit.
138 * We do it in reverse because we expect that most digits will
141 for (i
= ndigits
- 1; i
>= 0 && vli
[i
] == 0; i
--);
146 /* Counts the number of bits required for vli. */
147 static unsigned int vli_num_bits(const u64
*vli
, unsigned int ndigits
)
149 unsigned int i
, num_digits
;
152 num_digits
= vli_num_digits(vli
, ndigits
);
156 digit
= vli
[num_digits
- 1];
157 for (i
= 0; digit
; i
++)
160 return ((num_digits
- 1) * 64 + i
);
163 /* Sets dest = src. */
164 static void vli_set(u64
*dest
, const u64
*src
, unsigned int ndigits
)
168 for (i
= 0; i
< ndigits
; i
++)
172 /* Returns sign of left - right. */
173 static int vli_cmp(const u64
*left
, const u64
*right
, unsigned int ndigits
)
177 for (i
= ndigits
- 1; i
>= 0; i
--) {
178 if (left
[i
] > right
[i
])
180 else if (left
[i
] < right
[i
])
187 /* Computes result = in << c, returning carry. Can modify in place
188 * (if result == in). 0 < shift < 64.
190 static u64
vli_lshift(u64
*result
, const u64
*in
, unsigned int shift
,
191 unsigned int ndigits
)
196 for (i
= 0; i
< ndigits
; i
++) {
199 result
[i
] = (temp
<< shift
) | carry
;
200 carry
= temp
>> (64 - shift
);
206 /* Computes vli = vli >> 1. */
207 static void vli_rshift1(u64
*vli
, unsigned int ndigits
)
214 while (vli
-- > end
) {
216 *vli
= (temp
>> 1) | carry
;
221 /* Computes result = left + right, returning carry. Can modify in place. */
222 static u64
vli_add(u64
*result
, const u64
*left
, const u64
*right
,
223 unsigned int ndigits
)
228 for (i
= 0; i
< ndigits
; i
++) {
231 sum
= left
[i
] + right
[i
] + carry
;
233 carry
= (sum
< left
[i
]);
241 /* Computes result = left - right, returning borrow. Can modify in place. */
242 static u64
vli_sub(u64
*result
, const u64
*left
, const u64
*right
,
243 unsigned int ndigits
)
248 for (i
= 0; i
< ndigits
; i
++) {
251 diff
= left
[i
] - right
[i
] - borrow
;
253 borrow
= (diff
> left
[i
]);
261 static uint128_t
mul_64_64(u64 left
, u64 right
)
263 u64 a0
= left
& 0xffffffffull
;
265 u64 b0
= right
& 0xffffffffull
;
266 u64 b1
= right
>> 32;
278 m3
+= 0x100000000ull
;
280 result
.m_low
= (m0
& 0xffffffffull
) | (m2
<< 32);
281 result
.m_high
= m3
+ (m2
>> 32);
286 static uint128_t
add_128_128(uint128_t a
, uint128_t b
)
290 result
.m_low
= a
.m_low
+ b
.m_low
;
291 result
.m_high
= a
.m_high
+ b
.m_high
+ (result
.m_low
< a
.m_low
);
296 static void vli_mult(u64
*result
, const u64
*left
, const u64
*right
,
297 unsigned int ndigits
)
299 uint128_t r01
= { 0, 0 };
303 /* Compute each digit of result in sequence, maintaining the
306 for (k
= 0; k
< ndigits
* 2 - 1; k
++) {
312 min
= (k
+ 1) - ndigits
;
314 for (i
= min
; i
<= k
&& i
< ndigits
; i
++) {
317 product
= mul_64_64(left
[i
], right
[k
- i
]);
319 r01
= add_128_128(r01
, product
);
320 r2
+= (r01
.m_high
< product
.m_high
);
323 result
[k
] = r01
.m_low
;
324 r01
.m_low
= r01
.m_high
;
329 result
[ndigits
* 2 - 1] = r01
.m_low
;
332 static void vli_square(u64
*result
, const u64
*left
, unsigned int ndigits
)
334 uint128_t r01
= { 0, 0 };
338 for (k
= 0; k
< ndigits
* 2 - 1; k
++) {
344 min
= (k
+ 1) - ndigits
;
346 for (i
= min
; i
<= k
&& i
<= k
- i
; i
++) {
349 product
= mul_64_64(left
[i
], left
[k
- i
]);
352 r2
+= product
.m_high
>> 63;
353 product
.m_high
= (product
.m_high
<< 1) |
354 (product
.m_low
>> 63);
358 r01
= add_128_128(r01
, product
);
359 r2
+= (r01
.m_high
< product
.m_high
);
362 result
[k
] = r01
.m_low
;
363 r01
.m_low
= r01
.m_high
;
368 result
[ndigits
* 2 - 1] = r01
.m_low
;
371 /* Computes result = (left + right) % mod.
372 * Assumes that left < mod and right < mod, result != mod.
374 static void vli_mod_add(u64
*result
, const u64
*left
, const u64
*right
,
375 const u64
*mod
, unsigned int ndigits
)
379 carry
= vli_add(result
, left
, right
, ndigits
);
381 /* result > mod (result = mod + remainder), so subtract mod to
384 if (carry
|| vli_cmp(result
, mod
, ndigits
) >= 0)
385 vli_sub(result
, result
, mod
, ndigits
);
388 /* Computes result = (left - right) % mod.
389 * Assumes that left < mod and right < mod, result != mod.
391 static void vli_mod_sub(u64
*result
, const u64
*left
, const u64
*right
,
392 const u64
*mod
, unsigned int ndigits
)
394 u64 borrow
= vli_sub(result
, left
, right
, ndigits
);
396 /* In this case, p_result == -diff == (max int) - diff.
397 * Since -x % d == d - x, we can get the correct result from
398 * result + mod (with overflow).
401 vli_add(result
, result
, mod
, ndigits
);
404 /* Computes p_result = p_product % curve_p.
405 * See algorithm 5 and 6 from
406 * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
408 static void vli_mmod_fast_192(u64
*result
, const u64
*product
,
409 const u64
*curve_prime
, u64
*tmp
)
411 const unsigned int ndigits
= 3;
414 vli_set(result
, product
, ndigits
);
416 vli_set(tmp
, &product
[3], ndigits
);
417 carry
= vli_add(result
, result
, tmp
, ndigits
);
422 carry
+= vli_add(result
, result
, tmp
, ndigits
);
424 tmp
[0] = tmp
[1] = product
[5];
426 carry
+= vli_add(result
, result
, tmp
, ndigits
);
428 while (carry
|| vli_cmp(curve_prime
, result
, ndigits
) != 1)
429 carry
-= vli_sub(result
, result
, curve_prime
, ndigits
);
432 /* Computes result = product % curve_prime
433 * from http://www.nsa.gov/ia/_files/nist-routines.pdf
435 static void vli_mmod_fast_256(u64
*result
, const u64
*product
,
436 const u64
*curve_prime
, u64
*tmp
)
439 const unsigned int ndigits
= 4;
442 vli_set(result
, product
, ndigits
);
446 tmp
[1] = product
[5] & 0xffffffff00000000ull
;
449 carry
= vli_lshift(tmp
, tmp
, 1, ndigits
);
450 carry
+= vli_add(result
, result
, tmp
, ndigits
);
453 tmp
[1] = product
[6] << 32;
454 tmp
[2] = (product
[6] >> 32) | (product
[7] << 32);
455 tmp
[3] = product
[7] >> 32;
456 carry
+= vli_lshift(tmp
, tmp
, 1, ndigits
);
457 carry
+= vli_add(result
, result
, tmp
, ndigits
);
461 tmp
[1] = product
[5] & 0xffffffff;
464 carry
+= vli_add(result
, result
, tmp
, ndigits
);
467 tmp
[0] = (product
[4] >> 32) | (product
[5] << 32);
468 tmp
[1] = (product
[5] >> 32) | (product
[6] & 0xffffffff00000000ull
);
470 tmp
[3] = (product
[6] >> 32) | (product
[4] << 32);
471 carry
+= vli_add(result
, result
, tmp
, ndigits
);
474 tmp
[0] = (product
[5] >> 32) | (product
[6] << 32);
475 tmp
[1] = (product
[6] >> 32);
477 tmp
[3] = (product
[4] & 0xffffffff) | (product
[5] << 32);
478 carry
-= vli_sub(result
, result
, tmp
, ndigits
);
484 tmp
[3] = (product
[4] >> 32) | (product
[5] & 0xffffffff00000000ull
);
485 carry
-= vli_sub(result
, result
, tmp
, ndigits
);
488 tmp
[0] = (product
[6] >> 32) | (product
[7] << 32);
489 tmp
[1] = (product
[7] >> 32) | (product
[4] << 32);
490 tmp
[2] = (product
[4] >> 32) | (product
[5] << 32);
491 tmp
[3] = (product
[6] << 32);
492 carry
-= vli_sub(result
, result
, tmp
, ndigits
);
496 tmp
[1] = product
[4] & 0xffffffff00000000ull
;
498 tmp
[3] = product
[6] & 0xffffffff00000000ull
;
499 carry
-= vli_sub(result
, result
, tmp
, ndigits
);
503 carry
+= vli_add(result
, result
, curve_prime
, ndigits
);
506 while (carry
|| vli_cmp(curve_prime
, result
, ndigits
) != 1)
507 carry
-= vli_sub(result
, result
, curve_prime
, ndigits
);
511 /* Computes result = product % curve_prime
512 * from http://www.nsa.gov/ia/_files/nist-routines.pdf
514 static bool vli_mmod_fast(u64
*result
, u64
*product
,
515 const u64
*curve_prime
, unsigned int ndigits
)
517 u64 tmp
[2 * ndigits
];
521 vli_mmod_fast_192(result
, product
, curve_prime
, tmp
);
524 vli_mmod_fast_256(result
, product
, curve_prime
, tmp
);
527 pr_err("unsupports digits size!\n");
534 /* Computes result = (left * right) % curve_prime. */
535 static void vli_mod_mult_fast(u64
*result
, const u64
*left
, const u64
*right
,
536 const u64
*curve_prime
, unsigned int ndigits
)
538 u64 product
[2 * ndigits
];
540 vli_mult(product
, left
, right
, ndigits
);
541 vli_mmod_fast(result
, product
, curve_prime
, ndigits
);
544 /* Computes result = left^2 % curve_prime. */
545 static void vli_mod_square_fast(u64
*result
, const u64
*left
,
546 const u64
*curve_prime
, unsigned int ndigits
)
548 u64 product
[2 * ndigits
];
550 vli_square(product
, left
, ndigits
);
551 vli_mmod_fast(result
, product
, curve_prime
, ndigits
);
554 #define EVEN(vli) (!(vli[0] & 1))
555 /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
556 * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
557 * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
559 static void vli_mod_inv(u64
*result
, const u64
*input
, const u64
*mod
,
560 unsigned int ndigits
)
562 u64 a
[ndigits
], b
[ndigits
];
563 u64 u
[ndigits
], v
[ndigits
];
567 if (vli_is_zero(input
, ndigits
)) {
568 vli_clear(result
, ndigits
);
572 vli_set(a
, input
, ndigits
);
573 vli_set(b
, mod
, ndigits
);
574 vli_clear(u
, ndigits
);
576 vli_clear(v
, ndigits
);
578 while ((cmp_result
= vli_cmp(a
, b
, ndigits
)) != 0) {
582 vli_rshift1(a
, ndigits
);
585 carry
= vli_add(u
, u
, mod
, ndigits
);
587 vli_rshift1(u
, ndigits
);
589 u
[ndigits
- 1] |= 0x8000000000000000ull
;
590 } else if (EVEN(b
)) {
591 vli_rshift1(b
, ndigits
);
594 carry
= vli_add(v
, v
, mod
, ndigits
);
596 vli_rshift1(v
, ndigits
);
598 v
[ndigits
- 1] |= 0x8000000000000000ull
;
599 } else if (cmp_result
> 0) {
600 vli_sub(a
, a
, b
, ndigits
);
601 vli_rshift1(a
, ndigits
);
603 if (vli_cmp(u
, v
, ndigits
) < 0)
604 vli_add(u
, u
, mod
, ndigits
);
606 vli_sub(u
, u
, v
, ndigits
);
608 carry
= vli_add(u
, u
, mod
, ndigits
);
610 vli_rshift1(u
, ndigits
);
612 u
[ndigits
- 1] |= 0x8000000000000000ull
;
614 vli_sub(b
, b
, a
, ndigits
);
615 vli_rshift1(b
, ndigits
);
617 if (vli_cmp(v
, u
, ndigits
) < 0)
618 vli_add(v
, v
, mod
, ndigits
);
620 vli_sub(v
, v
, u
, ndigits
);
622 carry
= vli_add(v
, v
, mod
, ndigits
);
624 vli_rshift1(v
, ndigits
);
626 v
[ndigits
- 1] |= 0x8000000000000000ull
;
630 vli_set(result
, u
, ndigits
);
633 /* ------ Point operations ------ */
635 /* Returns true if p_point is the point at infinity, false otherwise. */
636 static bool ecc_point_is_zero(const struct ecc_point
*point
)
638 return (vli_is_zero(point
->x
, point
->ndigits
) &&
639 vli_is_zero(point
->y
, point
->ndigits
));
642 /* Point multiplication algorithm using Montgomery's ladder with co-Z
643 * coordinates. From http://eprint.iacr.org/2011/338.pdf
646 /* Double in place */
647 static void ecc_point_double_jacobian(u64
*x1
, u64
*y1
, u64
*z1
,
648 u64
*curve_prime
, unsigned int ndigits
)
650 /* t1 = x, t2 = y, t3 = z */
654 if (vli_is_zero(z1
, ndigits
))
658 vli_mod_square_fast(t4
, y1
, curve_prime
, ndigits
);
659 /* t5 = x1*y1^2 = A */
660 vli_mod_mult_fast(t5
, x1
, t4
, curve_prime
, ndigits
);
662 vli_mod_square_fast(t4
, t4
, curve_prime
, ndigits
);
663 /* t2 = y1*z1 = z3 */
664 vli_mod_mult_fast(y1
, y1
, z1
, curve_prime
, ndigits
);
666 vli_mod_square_fast(z1
, z1
, curve_prime
, ndigits
);
669 vli_mod_add(x1
, x1
, z1
, curve_prime
, ndigits
);
671 vli_mod_add(z1
, z1
, z1
, curve_prime
, ndigits
);
673 vli_mod_sub(z1
, x1
, z1
, curve_prime
, ndigits
);
674 /* t1 = x1^2 - z1^4 */
675 vli_mod_mult_fast(x1
, x1
, z1
, curve_prime
, ndigits
);
677 /* t3 = 2*(x1^2 - z1^4) */
678 vli_mod_add(z1
, x1
, x1
, curve_prime
, ndigits
);
679 /* t1 = 3*(x1^2 - z1^4) */
680 vli_mod_add(x1
, x1
, z1
, curve_prime
, ndigits
);
681 if (vli_test_bit(x1
, 0)) {
682 u64 carry
= vli_add(x1
, x1
, curve_prime
, ndigits
);
684 vli_rshift1(x1
, ndigits
);
685 x1
[ndigits
- 1] |= carry
<< 63;
687 vli_rshift1(x1
, ndigits
);
689 /* t1 = 3/2*(x1^2 - z1^4) = B */
692 vli_mod_square_fast(z1
, x1
, curve_prime
, ndigits
);
694 vli_mod_sub(z1
, z1
, t5
, curve_prime
, ndigits
);
695 /* t3 = B^2 - 2A = x3 */
696 vli_mod_sub(z1
, z1
, t5
, curve_prime
, ndigits
);
698 vli_mod_sub(t5
, t5
, z1
, curve_prime
, ndigits
);
699 /* t1 = B * (A - x3) */
700 vli_mod_mult_fast(x1
, x1
, t5
, curve_prime
, ndigits
);
701 /* t4 = B * (A - x3) - y1^4 = y3 */
702 vli_mod_sub(t4
, x1
, t4
, curve_prime
, ndigits
);
704 vli_set(x1
, z1
, ndigits
);
705 vli_set(z1
, y1
, ndigits
);
706 vli_set(y1
, t4
, ndigits
);
709 /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
710 static void apply_z(u64
*x1
, u64
*y1
, u64
*z
, u64
*curve_prime
,
711 unsigned int ndigits
)
715 vli_mod_square_fast(t1
, z
, curve_prime
, ndigits
); /* z^2 */
716 vli_mod_mult_fast(x1
, x1
, t1
, curve_prime
, ndigits
); /* x1 * z^2 */
717 vli_mod_mult_fast(t1
, t1
, z
, curve_prime
, ndigits
); /* z^3 */
718 vli_mod_mult_fast(y1
, y1
, t1
, curve_prime
, ndigits
); /* y1 * z^3 */
721 /* P = (x1, y1) => 2P, (x2, y2) => P' */
722 static void xycz_initial_double(u64
*x1
, u64
*y1
, u64
*x2
, u64
*y2
,
723 u64
*p_initial_z
, u64
*curve_prime
,
724 unsigned int ndigits
)
728 vli_set(x2
, x1
, ndigits
);
729 vli_set(y2
, y1
, ndigits
);
731 vli_clear(z
, ndigits
);
735 vli_set(z
, p_initial_z
, ndigits
);
737 apply_z(x1
, y1
, z
, curve_prime
, ndigits
);
739 ecc_point_double_jacobian(x1
, y1
, z
, curve_prime
, ndigits
);
741 apply_z(x2
, y2
, z
, curve_prime
, ndigits
);
744 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
745 * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
746 * or P => P', Q => P + Q
748 static void xycz_add(u64
*x1
, u64
*y1
, u64
*x2
, u64
*y2
, u64
*curve_prime
,
749 unsigned int ndigits
)
751 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
755 vli_mod_sub(t5
, x2
, x1
, curve_prime
, ndigits
);
756 /* t5 = (x2 - x1)^2 = A */
757 vli_mod_square_fast(t5
, t5
, curve_prime
, ndigits
);
759 vli_mod_mult_fast(x1
, x1
, t5
, curve_prime
, ndigits
);
761 vli_mod_mult_fast(x2
, x2
, t5
, curve_prime
, ndigits
);
763 vli_mod_sub(y2
, y2
, y1
, curve_prime
, ndigits
);
764 /* t5 = (y2 - y1)^2 = D */
765 vli_mod_square_fast(t5
, y2
, curve_prime
, ndigits
);
768 vli_mod_sub(t5
, t5
, x1
, curve_prime
, ndigits
);
769 /* t5 = D - B - C = x3 */
770 vli_mod_sub(t5
, t5
, x2
, curve_prime
, ndigits
);
772 vli_mod_sub(x2
, x2
, x1
, curve_prime
, ndigits
);
773 /* t2 = y1*(C - B) */
774 vli_mod_mult_fast(y1
, y1
, x2
, curve_prime
, ndigits
);
776 vli_mod_sub(x2
, x1
, t5
, curve_prime
, ndigits
);
777 /* t4 = (y2 - y1)*(B - x3) */
778 vli_mod_mult_fast(y2
, y2
, x2
, curve_prime
, ndigits
);
780 vli_mod_sub(y2
, y2
, y1
, curve_prime
, ndigits
);
782 vli_set(x2
, t5
, ndigits
);
785 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
786 * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
787 * or P => P - Q, Q => P + Q
789 static void xycz_add_c(u64
*x1
, u64
*y1
, u64
*x2
, u64
*y2
, u64
*curve_prime
,
790 unsigned int ndigits
)
792 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
798 vli_mod_sub(t5
, x2
, x1
, curve_prime
, ndigits
);
799 /* t5 = (x2 - x1)^2 = A */
800 vli_mod_square_fast(t5
, t5
, curve_prime
, ndigits
);
802 vli_mod_mult_fast(x1
, x1
, t5
, curve_prime
, ndigits
);
804 vli_mod_mult_fast(x2
, x2
, t5
, curve_prime
, ndigits
);
806 vli_mod_add(t5
, y2
, y1
, curve_prime
, ndigits
);
808 vli_mod_sub(y2
, y2
, y1
, curve_prime
, ndigits
);
811 vli_mod_sub(t6
, x2
, x1
, curve_prime
, ndigits
);
812 /* t2 = y1 * (C - B) */
813 vli_mod_mult_fast(y1
, y1
, t6
, curve_prime
, ndigits
);
815 vli_mod_add(t6
, x1
, x2
, curve_prime
, ndigits
);
816 /* t3 = (y2 - y1)^2 */
817 vli_mod_square_fast(x2
, y2
, curve_prime
, ndigits
);
819 vli_mod_sub(x2
, x2
, t6
, curve_prime
, ndigits
);
822 vli_mod_sub(t7
, x1
, x2
, curve_prime
, ndigits
);
823 /* t4 = (y2 - y1)*(B - x3) */
824 vli_mod_mult_fast(y2
, y2
, t7
, curve_prime
, ndigits
);
826 vli_mod_sub(y2
, y2
, y1
, curve_prime
, ndigits
);
828 /* t7 = (y2 + y1)^2 = F */
829 vli_mod_square_fast(t7
, t5
, curve_prime
, ndigits
);
831 vli_mod_sub(t7
, t7
, t6
, curve_prime
, ndigits
);
833 vli_mod_sub(t6
, t7
, x1
, curve_prime
, ndigits
);
834 /* t6 = (y2 + y1)*(x3' - B) */
835 vli_mod_mult_fast(t6
, t6
, t5
, curve_prime
, ndigits
);
837 vli_mod_sub(y1
, t6
, y1
, curve_prime
, ndigits
);
839 vli_set(x1
, t7
, ndigits
);
842 static void ecc_point_mult(struct ecc_point
*result
,
843 const struct ecc_point
*point
, const u64
*scalar
,
844 u64
*initial_z
, u64
*curve_prime
,
845 unsigned int ndigits
)
852 int num_bits
= vli_num_bits(scalar
, ndigits
);
854 vli_set(rx
[1], point
->x
, ndigits
);
855 vli_set(ry
[1], point
->y
, ndigits
);
857 xycz_initial_double(rx
[1], ry
[1], rx
[0], ry
[0], initial_z
, curve_prime
,
860 for (i
= num_bits
- 2; i
> 0; i
--) {
861 nb
= !vli_test_bit(scalar
, i
);
862 xycz_add_c(rx
[1 - nb
], ry
[1 - nb
], rx
[nb
], ry
[nb
], curve_prime
,
864 xycz_add(rx
[nb
], ry
[nb
], rx
[1 - nb
], ry
[1 - nb
], curve_prime
,
868 nb
= !vli_test_bit(scalar
, 0);
869 xycz_add_c(rx
[1 - nb
], ry
[1 - nb
], rx
[nb
], ry
[nb
], curve_prime
,
872 /* Find final 1/Z value. */
874 vli_mod_sub(z
, rx
[1], rx
[0], curve_prime
, ndigits
);
876 vli_mod_mult_fast(z
, z
, ry
[1 - nb
], curve_prime
, ndigits
);
877 /* xP * Yb * (X1 - X0) */
878 vli_mod_mult_fast(z
, z
, point
->x
, curve_prime
, ndigits
);
880 /* 1 / (xP * Yb * (X1 - X0)) */
881 vli_mod_inv(z
, z
, curve_prime
, point
->ndigits
);
883 /* yP / (xP * Yb * (X1 - X0)) */
884 vli_mod_mult_fast(z
, z
, point
->y
, curve_prime
, ndigits
);
885 /* Xb * yP / (xP * Yb * (X1 - X0)) */
886 vli_mod_mult_fast(z
, z
, rx
[1 - nb
], curve_prime
, ndigits
);
887 /* End 1/Z calculation */
889 xycz_add(rx
[nb
], ry
[nb
], rx
[1 - nb
], ry
[1 - nb
], curve_prime
, ndigits
);
891 apply_z(rx
[0], ry
[0], z
, curve_prime
, ndigits
);
893 vli_set(result
->x
, rx
[0], ndigits
);
894 vli_set(result
->y
, ry
[0], ndigits
);
897 static inline void ecc_swap_digits(const u64
*in
, u64
*out
,
898 unsigned int ndigits
)
902 for (i
= 0; i
< ndigits
; i
++)
903 out
[i
] = __swab64(in
[ndigits
- 1 - i
]);
906 int ecc_is_key_valid(unsigned int curve_id
, unsigned int ndigits
,
907 const u8
*private_key
, unsigned int private_key_len
)
910 const struct ecc_curve
*curve
= ecc_get_curve(curve_id
);
915 nbytes
= ndigits
<< ECC_DIGITS_TO_BYTES_SHIFT
;
917 if (private_key_len
!= nbytes
)
920 if (vli_is_zero((const u64
*)&private_key
[0], ndigits
))
923 /* Make sure the private key is in the range [1, n-1]. */
924 if (vli_cmp(curve
->n
, (const u64
*)&private_key
[0], ndigits
) != 1)
930 int ecdh_make_pub_key(unsigned int curve_id
, unsigned int ndigits
,
931 const u8
*private_key
, unsigned int private_key_len
,
932 u8
*public_key
, unsigned int public_key_len
)
935 struct ecc_point
*pk
;
938 const struct ecc_curve
*curve
= ecc_get_curve(curve_id
);
940 if (!private_key
|| !curve
) {
945 ecc_swap_digits((const u64
*)private_key
, priv
, ndigits
);
947 pk
= ecc_alloc_point(ndigits
);
953 ecc_point_mult(pk
, &curve
->g
, priv
, NULL
, curve
->p
, ndigits
);
954 if (ecc_point_is_zero(pk
)) {
959 nbytes
= ndigits
<< ECC_DIGITS_TO_BYTES_SHIFT
;
960 ecc_swap_digits(pk
->x
, (u64
*)public_key
, ndigits
);
961 ecc_swap_digits(pk
->y
, (u64
*)&public_key
[nbytes
], ndigits
);
969 int crypto_ecdh_shared_secret(unsigned int curve_id
, unsigned int ndigits
,
970 const u8
*private_key
, unsigned int private_key_len
,
971 const u8
*public_key
, unsigned int public_key_len
,
972 u8
*secret
, unsigned int secret_len
)
975 struct ecc_point
*product
, *pk
;
979 const struct ecc_curve
*curve
= ecc_get_curve(curve_id
);
981 if (!private_key
|| !public_key
|| !curve
) {
986 nbytes
= ndigits
<< ECC_DIGITS_TO_BYTES_SHIFT
;
988 get_random_bytes(rand_z
, nbytes
);
990 pk
= ecc_alloc_point(ndigits
);
996 product
= ecc_alloc_point(ndigits
);
999 goto err_alloc_product
;
1002 ecc_swap_digits((const u64
*)public_key
, pk
->x
, ndigits
);
1003 ecc_swap_digits((const u64
*)&public_key
[nbytes
], pk
->y
, ndigits
);
1004 ecc_swap_digits((const u64
*)private_key
, priv
, ndigits
);
1006 ecc_point_mult(product
, pk
, priv
, rand_z
, curve
->p
, ndigits
);
1008 ecc_swap_digits(product
->x
, (u64
*)secret
, ndigits
);
1010 if (ecc_point_is_zero(product
))
1013 ecc_free_point(product
);