1 #![cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))]
3 use consts
::{BLOCK_LEN, K0, K1, K2, K3}
;
4 use byte_tools
::read_u32v_be
;
7 /// Not an intrinsic, but gets the first element of a vector.
9 pub fn sha1_first(w0
: u32x4
) -> u32 {
13 /// Not an intrinsic, but adds a word to the first element of a vector.
15 pub fn sha1_first_add(e
: u32, w0
: u32x4
) -> u32x4
{
16 let u32x4(a
, b
, c
, d
) = w0
;
17 u32x4(e
.wrapping_add(a
), b
, c
, d
)
20 /// Emulates `llvm.x86.sha1msg1` intrinsic.
21 fn sha1msg1(a
: u32x4
, b
: u32x4
) -> u32x4
{
22 let u32x4(_
, _
, w2
, w3
) = a
;
23 let u32x4(w4
, w5
, _
, _
) = b
;
24 a ^
u32x4(w2
, w3
, w4
, w5
)
27 /// Emulates `llvm.x86.sha1msg2` intrinsic.
28 fn sha1msg2(a
: u32x4
, b
: u32x4
) -> u32x4
{
29 let u32x4(x0
, x1
, x2
, x3
) = a
;
30 let u32x4(_
, w13
, w14
, w15
) = b
;
32 let w16
= (x0 ^ w13
).rotate_left(1);
33 let w17
= (x1 ^ w14
).rotate_left(1);
34 let w18
= (x2 ^ w15
).rotate_left(1);
35 let w19
= (x3 ^ w16
).rotate_left(1);
37 u32x4(w16
, w17
, w18
, w19
)
40 /// Performs 4 rounds of the message schedule update.
42 pub fn sha1_schedule_x4(v0: u32x4, v1: u32x4, v2: u32x4, v3: u32x4) -> u32x4 {
43 sha1msg2(sha1msg1(v0, v1) ^ v2, v3)
47 /// Emulates `llvm.x86.sha1nexte` intrinsic.
49 fn sha1_first_half(abcd
: u32x4
, msg
: u32x4
) -> u32x4
{
50 sha1_first_add(sha1_first(abcd
).rotate_left(30), msg
)
53 /// Emulates `llvm.x86.sha1rnds4` intrinsic.
54 /// Performs 4 rounds of the message block digest.
55 fn sha1_digest_round_x4(abcd
: u32x4
, work
: u32x4
, i
: i8) -> u32x4
{
56 const K0V
: u32x4
= u32x4(K0
, K0
, K0
, K0
);
57 const K1V
: u32x4
= u32x4(K1
, K1
, K1
, K1
);
58 const K2V
: u32x4
= u32x4(K2
, K2
, K2
, K2
);
59 const K3V
: u32x4
= u32x4(K3
, K3
, K3
, K3
);
62 0 => sha1rnds4c(abcd
, work
+ K0V
),
63 1 => sha1rnds4p(abcd
, work
+ K1V
),
64 2 => sha1rnds4m(abcd
, work
+ K2V
),
65 3 => sha1rnds4p(abcd
, work
+ K3V
),
66 _
=> unreachable
!("unknown icosaround index"),
70 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
71 fn sha1rnds4c(abcd
: u32x4
, msg
: u32x4
) -> u32x4
{
72 let u32x4(mut a
, mut b
, mut c
, mut d
) = abcd
;
73 let u32x4(t
, u
, v
, w
) = msg
;
76 macro_rules
! bool3ary_202
{
77 ($a
:expr
, $b
:expr
, $c
:expr
) => ($c ^
($a
& ($b ^ $c
)))
78 } // Choose, MD5F, SHA1C
80 e
= e
.wrapping_add(a
.rotate_left(5))
81 .wrapping_add(bool3ary_202
!(b
, c
, d
))
83 b
= b
.rotate_left(30);
85 d
= d
.wrapping_add(e
.rotate_left(5))
86 .wrapping_add(bool3ary_202
!(a
, b
, c
))
88 a
= a
.rotate_left(30);
90 c
= c
.wrapping_add(d
.rotate_left(5))
91 .wrapping_add(bool3ary_202
!(e
, a
, b
))
93 e
= e
.rotate_left(30);
95 b
= b
.wrapping_add(c
.rotate_left(5))
96 .wrapping_add(bool3ary_202
!(d
, e
, a
))
98 d
= d
.rotate_left(30);
103 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
104 fn sha1rnds4p(abcd
: u32x4
, msg
: u32x4
) -> u32x4
{
105 let u32x4(mut a
, mut b
, mut c
, mut d
) = abcd
;
106 let u32x4(t
, u
, v
, w
) = msg
;
109 macro_rules
! bool3ary_150
{
110 ($a
:expr
, $b
:expr
, $c
:expr
) => ($a ^ $b ^ $c
)
111 } // Parity, XOR, MD5H, SHA1P
113 e
= e
.wrapping_add(a
.rotate_left(5))
114 .wrapping_add(bool3ary_150
!(b
, c
, d
))
116 b
= b
.rotate_left(30);
118 d
= d
.wrapping_add(e
.rotate_left(5))
119 .wrapping_add(bool3ary_150
!(a
, b
, c
))
121 a
= a
.rotate_left(30);
123 c
= c
.wrapping_add(d
.rotate_left(5))
124 .wrapping_add(bool3ary_150
!(e
, a
, b
))
126 e
= e
.rotate_left(30);
128 b
= b
.wrapping_add(c
.rotate_left(5))
129 .wrapping_add(bool3ary_150
!(d
, e
, a
))
131 d
= d
.rotate_left(30);
136 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
137 fn sha1rnds4m(abcd
: u32x4
, msg
: u32x4
) -> u32x4
{
138 let u32x4(mut a
, mut b
, mut c
, mut d
) = abcd
;
139 let u32x4(t
, u
, v
, w
) = msg
;
142 macro_rules
! bool3ary_232
{
143 ($a
:expr
, $b
:expr
, $c
:expr
) => (($a
& $b
) ^
($a
& $c
) ^
($b
& $c
))
146 e
= e
.wrapping_add(a
.rotate_left(5))
147 .wrapping_add(bool3ary_232
!(b
, c
, d
))
149 b
= b
.rotate_left(30);
151 d
= d
.wrapping_add(e
.rotate_left(5))
152 .wrapping_add(bool3ary_232
!(a
, b
, c
))
154 a
= a
.rotate_left(30);
156 c
= c
.wrapping_add(d
.rotate_left(5))
157 .wrapping_add(bool3ary_232
!(e
, a
, b
))
159 e
= e
.rotate_left(30);
161 b
= b
.wrapping_add(c
.rotate_left(5))
162 .wrapping_add(bool3ary_232
!(d
, e
, a
))
164 d
= d
.rotate_left(30);
169 /// Process a block with the SHA-1 algorithm.
170 fn sha1_digest_block_u32(state
: &mut [u32; 5], block
: &[u32; 16]) {
172 macro_rules
! schedule
{
173 ($v0
:expr
, $v1
:expr
, $v2
:expr
, $v3
:expr
) => (
174 sha1msg2(sha1msg1($v0
, $v1
) ^ $v2
, $v3
)
178 macro_rules
! rounds4
{
179 ($h0
:ident
, $h1
:ident
, $wk
:expr
, $i
:expr
) => (
180 sha1_digest_round_x4($h0
, sha1_first_half($h1
, $wk
), $i
)
185 // TODO: replace with `u32x4::load`
186 let mut h0
= u32x4(state
[0], state
[1], state
[2], state
[3]);
187 let mut w0
= u32x4(block
[0], block
[1], block
[2], block
[3]);
188 let mut h1
= sha1_digest_round_x4(h0
, sha1_first_add(state
[4], w0
), 0);
189 let mut w1
= u32x4(block
[4], block
[5], block
[6], block
[7]);
190 h0
= rounds4
!(h1
, h0
, w1
, 0);
191 let mut w2
= u32x4(block
[8], block
[9], block
[10], block
[11]);
192 h1
= rounds4
!(h0
, h1
, w2
, 0);
193 let mut w3
= u32x4(block
[12], block
[13], block
[14], block
[15]);
194 h0
= rounds4
!(h1
, h0
, w3
, 0);
195 let mut w4
= schedule
!(w0
, w1
, w2
, w3
);
196 h1
= rounds4
!(h0
, h1
, w4
, 0);
199 w0
= schedule
!(w1
, w2
, w3
, w4
);
200 h0
= rounds4
!(h1
, h0
, w0
, 1);
201 w1
= schedule
!(w2
, w3
, w4
, w0
);
202 h1
= rounds4
!(h0
, h1
, w1
, 1);
203 w2
= schedule
!(w3
, w4
, w0
, w1
);
204 h0
= rounds4
!(h1
, h0
, w2
, 1);
205 w3
= schedule
!(w4
, w0
, w1
, w2
);
206 h1
= rounds4
!(h0
, h1
, w3
, 1);
207 w4
= schedule
!(w0
, w1
, w2
, w3
);
208 h0
= rounds4
!(h1
, h0
, w4
, 1);
211 w0
= schedule
!(w1
, w2
, w3
, w4
);
212 h1
= rounds4
!(h0
, h1
, w0
, 2);
213 w1
= schedule
!(w2
, w3
, w4
, w0
);
214 h0
= rounds4
!(h1
, h0
, w1
, 2);
215 w2
= schedule
!(w3
, w4
, w0
, w1
);
216 h1
= rounds4
!(h0
, h1
, w2
, 2);
217 w3
= schedule
!(w4
, w0
, w1
, w2
);
218 h0
= rounds4
!(h1
, h0
, w3
, 2);
219 w4
= schedule
!(w0
, w1
, w2
, w3
);
220 h1
= rounds4
!(h0
, h1
, w4
, 2);
223 w0
= schedule
!(w1
, w2
, w3
, w4
);
224 h0
= rounds4
!(h1
, h0
, w0
, 3);
225 w1
= schedule
!(w2
, w3
, w4
, w0
);
226 h1
= rounds4
!(h0
, h1
, w1
, 3);
227 w2
= schedule
!(w3
, w4
, w0
, w1
);
228 h0
= rounds4
!(h1
, h0
, w2
, 3);
229 w3
= schedule
!(w4
, w0
, w1
, w2
);
230 h1
= rounds4
!(h0
, h1
, w3
, 3);
231 w4
= schedule
!(w0
, w1
, w2
, w3
);
232 h0
= rounds4
!(h1
, h0
, w4
, 3);
234 let e
= sha1_first(h1
).rotate_left(30);
235 let u32x4(a
, b
, c
, d
) = h0
;
237 state
[0] = state
[0].wrapping_add(a
);
238 state
[1] = state
[1].wrapping_add(b
);
239 state
[2] = state
[2].wrapping_add(c
);
240 state
[3] = state
[3].wrapping_add(d
);
241 state
[4] = state
[4].wrapping_add(e
);
244 /// Process a block with the SHA-1 algorithm. (See more...)
246 /// SHA-1 is a cryptographic hash function, and as such, it operates
247 /// on an arbitrary number of bytes. This function operates on a fixed
248 /// number of bytes. If you call this function with anything other than
249 /// 64 bytes, then it will panic! This function takes two arguments:
251 /// * `state` is reference to an **array** of 5 words.
252 /// * `block` is reference to a **slice** of 64 bytes.
254 /// If you want the function that performs a message digest on an arbitrary
255 /// number of bytes, then see also the `Sha1` struct above.
259 /// First, some background. Both ARM and Intel are releasing documentation
260 /// that they plan to include instruction set extensions for SHA1 and SHA256
261 /// sometime in the near future. Second, LLVM won't lower these intrinsics yet,
262 /// so these functions were written emulate these instructions. Finally,
263 /// the block function implemented with these emulated intrinsics turned out
264 /// to be quite fast! What follows is a discussion of this CPU-level view
265 /// of the SHA-1 algorithm and how it relates to the mathematical definition.
267 /// The SHA instruction set extensions can be divided up into two categories:
269 /// * message work schedule update calculation ("schedule" v., "work" n.)
270 /// * message block 80-round digest calculation ("digest" v., "block" n.)
272 /// The schedule-related functions can be used to easily perform 4 rounds
273 /// of the message work schedule update calculation, as shown below:
276 /// macro_rules! schedule_x4 {
277 /// ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => (
278 /// sha1msg2(sha1msg1($v0, $v1) ^ $v2, $v3)
282 /// macro_rules! round_x4 {
283 /// ($h0:ident, $h1:ident, $wk:expr, $i:expr) => (
284 /// sha1rnds4($h0, sha1_first_half($h1, $wk), $i)
289 /// and also shown above is how the digest-related functions can be used to
290 /// perform 4 rounds of the message block digest calculation.
292 pub fn compress(state
: &mut [u32; 5], block
: &[u8; 64]) {
293 let mut block_u32
= [0u32; BLOCK_LEN
];
294 read_u32v_be(&mut block_u32
[..], block
);
295 sha1_digest_block_u32(state
, &block_u32
);