vendor/sha-1/src/utils.rs

   1 #![cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))]
   2
   3 use consts::{BLOCK_LEN, K0, K1, K2, K3};
   4 use byte_tools::read_u32v_be;
   5 use simd::u32x4;
   6
   7 /// Not an intrinsic, but gets the first element of a vector.
   8 #[inline]
   9 pub fn sha1_first(w0: u32x4) -> u32 {
  10     w0.0
  11 }
  12
  13 /// Not an intrinsic, but adds a word to the first element of a vector.
  14 #[inline]
  15 pub fn sha1_first_add(e: u32, w0: u32x4) -> u32x4 {
  16     let u32x4(a, b, c, d) = w0;
  17     u32x4(e.wrapping_add(a), b, c, d)
  18 }
  19
  20 /// Emulates `llvm.x86.sha1msg1` intrinsic.
  21 fn sha1msg1(a: u32x4, b: u32x4) -> u32x4 {
  22     let u32x4(_, _, w2, w3) = a;
  23     let u32x4(w4, w5, _, _) = b;
  24     a ^ u32x4(w2, w3, w4, w5)
  25 }
  26
  27 /// Emulates `llvm.x86.sha1msg2` intrinsic.
  28 fn sha1msg2(a: u32x4, b: u32x4) -> u32x4 {
  29     let u32x4(x0, x1, x2, x3) = a;
  30     let u32x4(_, w13, w14, w15) = b;
  31
  32     let w16 = (x0 ^ w13).rotate_left(1);
  33     let w17 = (x1 ^ w14).rotate_left(1);
  34     let w18 = (x2 ^ w15).rotate_left(1);
  35     let w19 = (x3 ^ w16).rotate_left(1);
  36
  37     u32x4(w16, w17, w18, w19)
  38 }
  39
  40 /// Performs 4 rounds of the message schedule update.
  41 /*
  42 pub fn sha1_schedule_x4(v0: u32x4, v1: u32x4, v2: u32x4, v3: u32x4) -> u32x4 {
  43     sha1msg2(sha1msg1(v0, v1) ^ v2, v3)
  44 }
  45 */
  46
  47 /// Emulates `llvm.x86.sha1nexte` intrinsic.
  48 #[inline]
  49 fn sha1_first_half(abcd: u32x4, msg: u32x4) -> u32x4 {
  50     sha1_first_add(sha1_first(abcd).rotate_left(30), msg)
  51 }
  52
  53 /// Emulates `llvm.x86.sha1rnds4` intrinsic.
  54 /// Performs 4 rounds of the message block digest.
  55 fn sha1_digest_round_x4(abcd: u32x4, work: u32x4, i: i8) -> u32x4 {
  56     const K0V: u32x4 = u32x4(K0, K0, K0, K0);
  57     const K1V: u32x4 = u32x4(K1, K1, K1, K1);
  58     const K2V: u32x4 = u32x4(K2, K2, K2, K2);
  59     const K3V: u32x4 = u32x4(K3, K3, K3, K3);
  60
  61     match i {
  62         0 => sha1rnds4c(abcd, work + K0V),
  63         1 => sha1rnds4p(abcd, work + K1V),
  64         2 => sha1rnds4m(abcd, work + K2V),
  65         3 => sha1rnds4p(abcd, work + K3V),
  66         _ => unreachable!("unknown icosaround index"),
  67     }
  68 }
  69
  70 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
  71 fn sha1rnds4c(abcd: u32x4, msg: u32x4) -> u32x4 {
  72     let u32x4(mut a, mut b, mut c, mut d) = abcd;
  73     let u32x4(t, u, v, w) = msg;
  74     let mut e = 0u32;
  75
  76     macro_rules! bool3ary_202 {
  77         ($a:expr, $b:expr, $c:expr) => ($c ^ ($a & ($b ^ $c)))
  78     } // Choose, MD5F, SHA1C
  79
  80     e = e.wrapping_add(a.rotate_left(5))
  81         .wrapping_add(bool3ary_202!(b, c, d))
  82         .wrapping_add(t);
  83     b = b.rotate_left(30);
  84
  85     d = d.wrapping_add(e.rotate_left(5))
  86         .wrapping_add(bool3ary_202!(a, b, c))
  87         .wrapping_add(u);
  88     a = a.rotate_left(30);
  89
  90     c = c.wrapping_add(d.rotate_left(5))
  91         .wrapping_add(bool3ary_202!(e, a, b))
  92         .wrapping_add(v);
  93     e = e.rotate_left(30);
  94
  95     b = b.wrapping_add(c.rotate_left(5))
  96         .wrapping_add(bool3ary_202!(d, e, a))
  97         .wrapping_add(w);
  98     d = d.rotate_left(30);
  99
 100     u32x4(b, c, d, e)
 101 }
 102
 103 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
 104 fn sha1rnds4p(abcd: u32x4, msg: u32x4) -> u32x4 {
 105     let u32x4(mut a, mut b, mut c, mut d) = abcd;
 106     let u32x4(t, u, v, w) = msg;
 107     let mut e = 0u32;
 108
 109     macro_rules! bool3ary_150 {
 110         ($a:expr, $b:expr, $c:expr) => ($a ^ $b ^ $c)
 111     } // Parity, XOR, MD5H, SHA1P
 112
 113     e = e.wrapping_add(a.rotate_left(5))
 114         .wrapping_add(bool3ary_150!(b, c, d))
 115         .wrapping_add(t);
 116     b = b.rotate_left(30);
 117
 118     d = d.wrapping_add(e.rotate_left(5))
 119         .wrapping_add(bool3ary_150!(a, b, c))
 120         .wrapping_add(u);
 121     a = a.rotate_left(30);
 122
 123     c = c.wrapping_add(d.rotate_left(5))
 124         .wrapping_add(bool3ary_150!(e, a, b))
 125         .wrapping_add(v);
 126     e = e.rotate_left(30);
 127
 128     b = b.wrapping_add(c.rotate_left(5))
 129         .wrapping_add(bool3ary_150!(d, e, a))
 130         .wrapping_add(w);
 131     d = d.rotate_left(30);
 132
 133     u32x4(b, c, d, e)
 134 }
 135
 136 /// Not an intrinsic, but helps emulate `llvm.x86.sha1rnds4` intrinsic.
 137 fn sha1rnds4m(abcd: u32x4, msg: u32x4) -> u32x4 {
 138     let u32x4(mut a, mut b, mut c, mut d) = abcd;
 139     let u32x4(t, u, v, w) = msg;
 140     let mut e = 0u32;
 141
 142     macro_rules! bool3ary_232 {
 143         ($a:expr, $b:expr, $c:expr) => (($a & $b) ^ ($a & $c) ^ ($b & $c))
 144     } // Majority, SHA1M
 145
 146     e = e.wrapping_add(a.rotate_left(5))
 147         .wrapping_add(bool3ary_232!(b, c, d))
 148         .wrapping_add(t);
 149     b = b.rotate_left(30);
 150
 151     d = d.wrapping_add(e.rotate_left(5))
 152         .wrapping_add(bool3ary_232!(a, b, c))
 153         .wrapping_add(u);
 154     a = a.rotate_left(30);
 155
 156     c = c.wrapping_add(d.rotate_left(5))
 157         .wrapping_add(bool3ary_232!(e, a, b))
 158         .wrapping_add(v);
 159     e = e.rotate_left(30);
 160
 161     b = b.wrapping_add(c.rotate_left(5))
 162         .wrapping_add(bool3ary_232!(d, e, a))
 163         .wrapping_add(w);
 164     d = d.rotate_left(30);
 165
 166     u32x4(b, c, d, e)
 167 }
 168
 169 /// Process a block with the SHA-1 algorithm.
 170 fn sha1_digest_block_u32(state: &mut [u32; 5], block: &[u32; 16]) {
 171
 172     macro_rules! schedule {
 173         ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => (
 174             sha1msg2(sha1msg1($v0, $v1) ^ $v2, $v3)
 175         )
 176     }
 177
 178     macro_rules! rounds4 {
 179         ($h0:ident, $h1:ident, $wk:expr, $i:expr) => (
 180             sha1_digest_round_x4($h0, sha1_first_half($h1, $wk), $i)
 181         )
 182     }
 183
 184     // Rounds 0..20
 185     // TODO: replace with `u32x4::load`
 186     let mut h0 = u32x4(state[0], state[1], state[2], state[3]);
 187     let mut w0 = u32x4(block[0], block[1], block[2], block[3]);
 188     let mut h1 = sha1_digest_round_x4(h0, sha1_first_add(state[4], w0), 0);
 189     let mut w1 = u32x4(block[4], block[5], block[6], block[7]);
 190     h0 = rounds4!(h1, h0, w1, 0);
 191     let mut w2 = u32x4(block[8], block[9], block[10], block[11]);
 192     h1 = rounds4!(h0, h1, w2, 0);
 193     let mut w3 = u32x4(block[12], block[13], block[14], block[15]);
 194     h0 = rounds4!(h1, h0, w3, 0);
 195     let mut w4 = schedule!(w0, w1, w2, w3);
 196     h1 = rounds4!(h0, h1, w4, 0);
 197
 198     // Rounds 20..40
 199     w0 = schedule!(w1, w2, w3, w4);
 200     h0 = rounds4!(h1, h0, w0, 1);
 201     w1 = schedule!(w2, w3, w4, w0);
 202     h1 = rounds4!(h0, h1, w1, 1);
 203     w2 = schedule!(w3, w4, w0, w1);
 204     h0 = rounds4!(h1, h0, w2, 1);
 205     w3 = schedule!(w4, w0, w1, w2);
 206     h1 = rounds4!(h0, h1, w3, 1);
 207     w4 = schedule!(w0, w1, w2, w3);
 208     h0 = rounds4!(h1, h0, w4, 1);
 209
 210     // Rounds 40..60
 211     w0 = schedule!(w1, w2, w3, w4);
 212     h1 = rounds4!(h0, h1, w0, 2);
 213     w1 = schedule!(w2, w3, w4, w0);
 214     h0 = rounds4!(h1, h0, w1, 2);
 215     w2 = schedule!(w3, w4, w0, w1);
 216     h1 = rounds4!(h0, h1, w2, 2);
 217     w3 = schedule!(w4, w0, w1, w2);
 218     h0 = rounds4!(h1, h0, w3, 2);
 219     w4 = schedule!(w0, w1, w2, w3);
 220     h1 = rounds4!(h0, h1, w4, 2);
 221
 222     // Rounds 60..80
 223     w0 = schedule!(w1, w2, w3, w4);
 224     h0 = rounds4!(h1, h0, w0, 3);
 225     w1 = schedule!(w2, w3, w4, w0);
 226     h1 = rounds4!(h0, h1, w1, 3);
 227     w2 = schedule!(w3, w4, w0, w1);
 228     h0 = rounds4!(h1, h0, w2, 3);
 229     w3 = schedule!(w4, w0, w1, w2);
 230     h1 = rounds4!(h0, h1, w3, 3);
 231     w4 = schedule!(w0, w1, w2, w3);
 232     h0 = rounds4!(h1, h0, w4, 3);
 233
 234     let e = sha1_first(h1).rotate_left(30);
 235     let u32x4(a, b, c, d) = h0;
 236
 237     state[0] = state[0].wrapping_add(a);
 238     state[1] = state[1].wrapping_add(b);
 239     state[2] = state[2].wrapping_add(c);
 240     state[3] = state[3].wrapping_add(d);
 241     state[4] = state[4].wrapping_add(e);
 242 }
 243
 244 /// Process a block with the SHA-1 algorithm. (See more...)
 245 ///
 246 /// SHA-1 is a cryptographic hash function, and as such, it operates
 247 /// on an arbitrary number of bytes. This function operates on a fixed
 248 /// number of bytes. If you call this function with anything other than
 249 /// 64 bytes, then it will panic! This function takes two arguments:
 250 ///
 251 /// * `state` is reference to an **array** of 5 words.
 252 /// * `block` is reference to a **slice** of 64 bytes.
 253 ///
 254 /// If you want the function that performs a message digest on an arbitrary
 255 /// number of bytes, then see also the `Sha1` struct above.
 256 ///
 257 /// # Implementation
 258 ///
 259 /// First, some background. Both ARM and Intel are releasing documentation
 260 /// that they plan to include instruction set extensions for SHA1 and SHA256
 261 /// sometime in the near future. Second, LLVM won't lower these intrinsics yet,
 262 /// so these functions were written emulate these instructions. Finally,
 263 /// the block function implemented with these emulated intrinsics turned out
 264 /// to be quite fast! What follows is a discussion of this CPU-level view
 265 /// of the SHA-1 algorithm and how it relates to the mathematical definition.
 266 ///
 267 /// The SHA instruction set extensions can be divided up into two categories:
 268 ///
 269 /// * message work schedule update calculation ("schedule" v., "work" n.)
 270 /// * message block 80-round digest calculation ("digest" v., "block" n.)
 271 ///
 272 /// The schedule-related functions can be used to easily perform 4 rounds
 273 /// of the message work schedule update calculation, as shown below:
 274 ///
 275 /// ```ignore
 276 /// macro_rules! schedule_x4 {
 277 ///     ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => (
 278 ///         sha1msg2(sha1msg1($v0, $v1) ^ $v2, $v3)
 279 ///     )
 280 /// }
 281 ///
 282 /// macro_rules! round_x4 {
 283 ///     ($h0:ident, $h1:ident, $wk:expr, $i:expr) => (
 284 ///         sha1rnds4($h0, sha1_first_half($h1, $wk), $i)
 285 ///     )
 286 /// }
 287 /// ```
 288 ///
 289 /// and also shown above is how the digest-related functions can be used to
 290 /// perform 4 rounds of the message block digest calculation.
 291 ///
 292 pub fn compress(state: &mut [u32; 5], block: &[u8; 64]) {
 293     let mut block_u32 = [0u32; BLOCK_LEN];
 294     read_u32v_be(&mut block_u32[..], block);
 295     sha1_digest_block_u32(state, &block_u32);
 296 }