]> git.proxmox.com Git - efi-boot-shim.git/blame - Cryptlib/OpenSSL/crypto/sha/sha512.c
New upstream version 15+1533136590.3beb971
[efi-boot-shim.git] / Cryptlib / OpenSSL / crypto / sha / sha512.c
CommitLineData
7bf7a6d0
MTL
1/*
2 * Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
3e575651 8 */
7bf7a6d0 9
3e575651 10#include <openssl/opensslconf.h>
d3819813 11/*-
3e575651
SL
12 * IMPLEMENTATION NOTES.
13 *
14 * As you might have noticed 32-bit hash algorithms:
15 *
7bf7a6d0 16 * - permit SHA_LONG to be wider than 32-bit
3e575651
SL
17 * - optimized versions implement two transform functions: one operating
18 * on [aligned] data in host byte order and one - on data in input
19 * stream byte order;
20 * - share common byte-order neutral collector and padding function
21 * implementations, ../md32_common.h;
22 *
23 * Neither of the above applies to this SHA-512 implementations. Reasons
24 * [in reverse order] are:
25 *
26 * - it's the only 64-bit hash algorithm for the moment of this writing,
27 * there is no need for common collector/padding implementation [yet];
28 * - by supporting only one transform function [which operates on
29 * *aligned* data in input stream byte order, big-endian in this case]
30 * we minimize burden of maintenance in two ways: a) collector/padding
31 * function is simpler; b) only one transform function to stare at;
32 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33 * apply a number of optimizations to mitigate potential performance
34 * penalties caused by previous design decision;
35 *
36 * Caveat lector.
37 *
38 * Implementation relies on the fact that "long long" is 64-bit on
39 * both 32- and 64-bit platforms. If some compiler vendor comes up
40 * with 128-bit long long, adjustment to sha.h would be required.
41 * As this implementation relies on 64-bit integer type, it's totally
42 * inappropriate for platforms which don't support it, most notably
43 * 16-bit platforms.
d3819813 44 * <appro@fy.chalmers.se>
3e575651 45 */
7bf7a6d0
MTL
46#include <stdlib.h>
47#include <string.h>
3e575651 48
7bf7a6d0
MTL
49#include <openssl/crypto.h>
50#include <openssl/sha.h>
51#include <openssl/opensslv.h>
3e575651 52
7bf7a6d0 53#include "internal/cryptlib.h"
3e575651 54
7bf7a6d0 55#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
3e575651
SL
56 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57 defined(__s390__) || defined(__s390x__) || \
d3819813 58 defined(__aarch64__) || \
3e575651 59 defined(SHA512_ASM)
7bf7a6d0
MTL
60# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61#endif
3e575651 62
7bf7a6d0 63int SHA384_Init(SHA512_CTX *c)
d3819813
MTL
64{
65 c->h[0] = U64(0xcbbb9d5dc1059ed8);
66 c->h[1] = U64(0x629a292a367cd507);
67 c->h[2] = U64(0x9159015a3070dd17);
68 c->h[3] = U64(0x152fecd8f70e5939);
69 c->h[4] = U64(0x67332667ffc00b31);
70 c->h[5] = U64(0x8eb44a8768581511);
71 c->h[6] = U64(0xdb0c2e0d64f98fa7);
72 c->h[7] = U64(0x47b5481dbefa4fa4);
73
74 c->Nl = 0;
75 c->Nh = 0;
76 c->num = 0;
77 c->md_len = SHA384_DIGEST_LENGTH;
78 return 1;
79}
80
7bf7a6d0 81int SHA512_Init(SHA512_CTX *c)
d3819813
MTL
82{
83 c->h[0] = U64(0x6a09e667f3bcc908);
84 c->h[1] = U64(0xbb67ae8584caa73b);
85 c->h[2] = U64(0x3c6ef372fe94f82b);
86 c->h[3] = U64(0xa54ff53a5f1d36f1);
87 c->h[4] = U64(0x510e527fade682d1);
88 c->h[5] = U64(0x9b05688c2b3e6c1f);
89 c->h[6] = U64(0x1f83d9abfb41bd6b);
90 c->h[7] = U64(0x5be0cd19137e2179);
91
92 c->Nl = 0;
93 c->Nh = 0;
94 c->num = 0;
95 c->md_len = SHA512_DIGEST_LENGTH;
96 return 1;
97}
98
7bf7a6d0 99#ifndef SHA512_ASM
3e575651 100static
7bf7a6d0 101#endif
d3819813
MTL
102void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
103
104int SHA512_Final(unsigned char *md, SHA512_CTX *c)
105{
106 unsigned char *p = (unsigned char *)c->u.p;
107 size_t n = c->num;
108
109 p[n] = 0x80; /* There always is a room for one */
110 n++;
7bf7a6d0
MTL
111 if (n > (sizeof(c->u) - 16)) {
112 memset(p + n, 0, sizeof(c->u) - n);
113 n = 0;
114 sha512_block_data_order(c, p, 1);
115 }
d3819813
MTL
116
117 memset(p + n, 0, sizeof(c->u) - 16 - n);
7bf7a6d0 118#ifdef B_ENDIAN
d3819813
MTL
119 c->u.d[SHA_LBLOCK - 2] = c->Nh;
120 c->u.d[SHA_LBLOCK - 1] = c->Nl;
7bf7a6d0 121#else
d3819813
MTL
122 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
123 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
124 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
125 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
126 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
127 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
128 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
129 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
130 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
131 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
132 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
133 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
134 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
135 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
136 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
137 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
7bf7a6d0 138#endif
d3819813
MTL
139
140 sha512_block_data_order(c, p, 1);
141
142 if (md == 0)
143 return 0;
144
145 switch (c->md_len) {
146 /* Let compiler decide if it's appropriate to unroll... */
147 case SHA384_DIGEST_LENGTH:
148 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
149 SHA_LONG64 t = c->h[n];
150
151 *(md++) = (unsigned char)(t >> 56);
152 *(md++) = (unsigned char)(t >> 48);
153 *(md++) = (unsigned char)(t >> 40);
154 *(md++) = (unsigned char)(t >> 32);
155 *(md++) = (unsigned char)(t >> 24);
156 *(md++) = (unsigned char)(t >> 16);
157 *(md++) = (unsigned char)(t >> 8);
158 *(md++) = (unsigned char)(t);
159 }
160 break;
161 case SHA512_DIGEST_LENGTH:
162 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
163 SHA_LONG64 t = c->h[n];
164
165 *(md++) = (unsigned char)(t >> 56);
166 *(md++) = (unsigned char)(t >> 48);
167 *(md++) = (unsigned char)(t >> 40);
168 *(md++) = (unsigned char)(t >> 32);
169 *(md++) = (unsigned char)(t >> 24);
170 *(md++) = (unsigned char)(t >> 16);
171 *(md++) = (unsigned char)(t >> 8);
172 *(md++) = (unsigned char)(t);
173 }
174 break;
175 /* ... as well as make sure md_len is not abused. */
176 default:
177 return 0;
178 }
179
180 return 1;
181}
182
183int SHA384_Final(unsigned char *md, SHA512_CTX *c)
184{
185 return SHA512_Final(md, c);
186}
187
188int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
189{
190 SHA_LONG64 l;
191 unsigned char *p = c->u.p;
192 const unsigned char *data = (const unsigned char *)_data;
193
194 if (len == 0)
195 return 1;
196
197 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
198 if (l < c->Nl)
199 c->Nh++;
200 if (sizeof(len) >= 8)
201 c->Nh += (((SHA_LONG64) len) >> 61);
202 c->Nl = l;
203
204 if (c->num != 0) {
205 size_t n = sizeof(c->u) - c->num;
206
207 if (len < n) {
208 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
209 return 1;
210 } else {
211 memcpy(p + c->num, data, n), c->num = 0;
212 len -= n, data += n;
213 sha512_block_data_order(c, p, 1);
214 }
215 }
216
217 if (len >= sizeof(c->u)) {
7bf7a6d0 218#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
d3819813
MTL
219 if ((size_t)data % sizeof(c->u.d[0]) != 0)
220 while (len >= sizeof(c->u))
221 memcpy(p, data, sizeof(c->u)),
222 sha512_block_data_order(c, p, 1),
223 len -= sizeof(c->u), data += sizeof(c->u);
224 else
7bf7a6d0 225#endif
d3819813
MTL
226 sha512_block_data_order(c, data, len / sizeof(c->u)),
227 data += len, len %= sizeof(c->u), data -= len;
228 }
229
230 if (len != 0)
231 memcpy(p, data, len), c->num = (int)len;
232
233 return 1;
234}
235
236int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
237{
238 return SHA512_Update(c, data, len);
239}
240
241void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
242{
7bf7a6d0 243#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
d3819813
MTL
244 if ((size_t)data % sizeof(c->u.d[0]) != 0)
245 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
7bf7a6d0 246#endif
d3819813
MTL
247 sha512_block_data_order(c, data, 1);
248}
3e575651
SL
249
250unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
d3819813
MTL
251{
252 SHA512_CTX c;
253 static unsigned char m[SHA384_DIGEST_LENGTH];
254
255 if (md == NULL)
256 md = m;
257 SHA384_Init(&c);
258 SHA512_Update(&c, d, n);
259 SHA512_Final(md, &c);
260 OPENSSL_cleanse(&c, sizeof(c));
261 return (md);
262}
3e575651
SL
263
264unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
d3819813
MTL
265{
266 SHA512_CTX c;
267 static unsigned char m[SHA512_DIGEST_LENGTH];
268
269 if (md == NULL)
270 md = m;
271 SHA512_Init(&c);
272 SHA512_Update(&c, d, n);
273 SHA512_Final(md, &c);
274 OPENSSL_cleanse(&c, sizeof(c));
275 return (md);
276}
277
7bf7a6d0 278#ifndef SHA512_ASM
3e575651 279static const SHA_LONG64 K512[80] = {
d3819813
MTL
280 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
281 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
282 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
283 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
284 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
285 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
286 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
287 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
288 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
289 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
290 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
291 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
292 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
293 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
294 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
295 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
296 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
297 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
298 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
299 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
300 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
301 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
302 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
303 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
304 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
305 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
306 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
307 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
308 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
309 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
310 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
311 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
312 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
313 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
314 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
315 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
316 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
317 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
318 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
319 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
320};
321
7bf7a6d0
MTL
322# ifndef PEDANTIC
323# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
324# if defined(__x86_64) || defined(__x86_64__)
325# define ROTR(a,n) ({ SHA_LONG64 ret; \
d3819813
MTL
326 asm ("rorq %1,%0" \
327 : "=r"(ret) \
328 : "J"(n),"0"(a) \
329 : "cc"); ret; })
7bf7a6d0
MTL
330# if !defined(B_ENDIAN)
331# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
d3819813
MTL
332 asm ("bswapq %0" \
333 : "=r"(ret) \
334 : "0"(ret)); ret; })
7bf7a6d0
MTL
335# endif
336# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
337# if defined(I386_ONLY)
338# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
d3819813
MTL
339 unsigned int hi=p[0],lo=p[1]; \
340 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
341 "roll $16,%%eax; roll $16,%%edx; "\
342 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
343 : "=a"(lo),"=d"(hi) \
344 : "0"(lo),"1"(hi) : "cc"); \
345 ((SHA_LONG64)hi)<<32|lo; })
7bf7a6d0
MTL
346# else
347# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
d3819813
MTL
348 unsigned int hi=p[0],lo=p[1]; \
349 asm ("bswapl %0; bswapl %1;" \
350 : "=r"(lo),"=r"(hi) \
351 : "0"(lo),"1"(hi)); \
352 ((SHA_LONG64)hi)<<32|lo; })
7bf7a6d0
MTL
353# endif
354# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
355# define ROTR(a,n) ({ SHA_LONG64 ret; \
d3819813
MTL
356 asm ("rotrdi %0,%1,%2" \
357 : "=r"(ret) \
358 : "r"(a),"K"(n)); ret; })
7bf7a6d0
MTL
359# elif defined(__aarch64__)
360# define ROTR(a,n) ({ SHA_LONG64 ret; \
d3819813
MTL
361 asm ("ror %0,%1,%2" \
362 : "=r"(ret) \
363 : "r"(a),"I"(n)); ret; })
7bf7a6d0 364# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
d3819813 365 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
7bf7a6d0 366# define PULL64(x) ({ SHA_LONG64 ret; \
d3819813
MTL
367 asm ("rev %0,%1" \
368 : "=r"(ret) \
369 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
b6f94dbe 370# endif
7bf7a6d0
MTL
371# endif
372# elif defined(_MSC_VER)
373# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
374# pragma intrinsic(_rotr64)
375# define ROTR(a,n) _rotr64((a),n)
376# endif
377# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
378# if defined(I386_ONLY)
d3819813
MTL
379static SHA_LONG64 __fastcall __pull64be(const void *x)
380{
381 _asm mov edx,[ecx + 0]
382 _asm mov eax,[ecx + 4]
383_asm xchg dh, dl
384 _asm xchg ah, al
385 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
7bf7a6d0 386# else
d3819813
MTL
387static SHA_LONG64 __fastcall __pull64be(const void *x)
388{
389 _asm mov edx,[ecx + 0]
390 _asm mov eax,[ecx + 4]
391_asm bswap edx _asm bswap eax}
7bf7a6d0
MTL
392# endif
393# define PULL64(x) __pull64be(&(x))
394# if _MSC_VER<=1200
395# pragma inline_depth(0)
d3819813 396# endif
3e575651 397# endif
3e575651 398# endif
7bf7a6d0
MTL
399# endif
400# ifndef PULL64
401# define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
402# define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
403# endif
404# ifndef ROTR
405# define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
406# endif
407# define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
408# define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
409# define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
410# define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
411# define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
412# define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
413# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
d3819813
MTL
414/*
415 * This code should give better results on 32-bit CPU with less than
416 * ~24 registers, both size and performance wise...
417 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
418 size_t num)
419{
420 const SHA_LONG64 *W = in;
421 SHA_LONG64 A, E, T;
422 SHA_LONG64 X[9 + 80], *F;
423 int i;
424
425 while (num--) {
426
427 F = X + 80;
428 A = ctx->h[0];
429 F[1] = ctx->h[1];
430 F[2] = ctx->h[2];
431 F[3] = ctx->h[3];
432 E = ctx->h[4];
433 F[5] = ctx->h[5];
434 F[6] = ctx->h[6];
435 F[7] = ctx->h[7];
436
437 for (i = 0; i < 16; i++, F--) {
7bf7a6d0 438# ifdef B_ENDIAN
d3819813 439 T = W[i];
7bf7a6d0 440# else
d3819813 441 T = PULL64(W[i]);
7bf7a6d0 442# endif
d3819813
MTL
443 F[0] = A;
444 F[4] = E;
445 F[8] = T;
446 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
447 E = F[3] + T;
448 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
449 }
450
451 for (; i < 80; i++, F--) {
452 T = sigma0(F[8 + 16 - 1]);
453 T += sigma1(F[8 + 16 - 14]);
454 T += F[8 + 16] + F[8 + 16 - 9];
455
456 F[0] = A;
457 F[4] = E;
458 F[8] = T;
459 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
460 E = F[3] + T;
461 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
462 }
463
464 ctx->h[0] += A;
465 ctx->h[1] += F[1];
466 ctx->h[2] += F[2];
467 ctx->h[3] += F[3];
468 ctx->h[4] += E;
469 ctx->h[5] += F[5];
470 ctx->h[6] += F[6];
471 ctx->h[7] += F[7];
472
473 W += SHA_LBLOCK;
3e575651 474 }
d3819813
MTL
475}
476
7bf7a6d0 477# elif defined(OPENSSL_SMALL_FOOTPRINT)
d3819813
MTL
478static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
479 size_t num)
480{
481 const SHA_LONG64 *W = in;
482 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
483 SHA_LONG64 X[16];
484 int i;
485
486 while (num--) {
487
488 a = ctx->h[0];
489 b = ctx->h[1];
490 c = ctx->h[2];
491 d = ctx->h[3];
492 e = ctx->h[4];
493 f = ctx->h[5];
494 g = ctx->h[6];
495 h = ctx->h[7];
496
497 for (i = 0; i < 16; i++) {
7bf7a6d0 498# ifdef B_ENDIAN
d3819813 499 T1 = X[i] = W[i];
7bf7a6d0 500# else
d3819813 501 T1 = X[i] = PULL64(W[i]);
7bf7a6d0 502# endif
d3819813
MTL
503 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
504 T2 = Sigma0(a) + Maj(a, b, c);
505 h = g;
506 g = f;
507 f = e;
508 e = d + T1;
509 d = c;
510 c = b;
511 b = a;
512 a = T1 + T2;
513 }
514
515 for (; i < 80; i++) {
516 s0 = X[(i + 1) & 0x0f];
517 s0 = sigma0(s0);
518 s1 = X[(i + 14) & 0x0f];
519 s1 = sigma1(s1);
520
521 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
522 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
523 T2 = Sigma0(a) + Maj(a, b, c);
524 h = g;
525 g = f;
526 f = e;
527 e = d + T1;
528 d = c;
529 c = b;
530 b = a;
531 a = T1 + T2;
532 }
533
534 ctx->h[0] += a;
535 ctx->h[1] += b;
536 ctx->h[2] += c;
537 ctx->h[3] += d;
538 ctx->h[4] += e;
539 ctx->h[5] += f;
540 ctx->h[6] += g;
541 ctx->h[7] += h;
542
543 W += SHA_LBLOCK;
544 }
545}
546
7bf7a6d0
MTL
547# else
548# define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
d3819813
MTL
549 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
550 h = Sigma0(a) + Maj(a,b,c); \
551 d += T1; h += T1; } while (0)
7bf7a6d0 552# define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
d3819813
MTL
553 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
554 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
555 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
556 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
557static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
558 size_t num)
559{
560 const SHA_LONG64 *W = in;
561 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
562 SHA_LONG64 X[16];
563 int i;
564
565 while (num--) {
566
567 a = ctx->h[0];
568 b = ctx->h[1];
569 c = ctx->h[2];
570 d = ctx->h[3];
571 e = ctx->h[4];
572 f = ctx->h[5];
573 g = ctx->h[6];
574 h = ctx->h[7];
575
7bf7a6d0 576# ifdef B_ENDIAN
d3819813
MTL
577 T1 = X[0] = W[0];
578 ROUND_00_15(0, a, b, c, d, e, f, g, h);
579 T1 = X[1] = W[1];
580 ROUND_00_15(1, h, a, b, c, d, e, f, g);
581 T1 = X[2] = W[2];
582 ROUND_00_15(2, g, h, a, b, c, d, e, f);
583 T1 = X[3] = W[3];
584 ROUND_00_15(3, f, g, h, a, b, c, d, e);
585 T1 = X[4] = W[4];
586 ROUND_00_15(4, e, f, g, h, a, b, c, d);
587 T1 = X[5] = W[5];
588 ROUND_00_15(5, d, e, f, g, h, a, b, c);
589 T1 = X[6] = W[6];
590 ROUND_00_15(6, c, d, e, f, g, h, a, b);
591 T1 = X[7] = W[7];
592 ROUND_00_15(7, b, c, d, e, f, g, h, a);
593 T1 = X[8] = W[8];
594 ROUND_00_15(8, a, b, c, d, e, f, g, h);
595 T1 = X[9] = W[9];
596 ROUND_00_15(9, h, a, b, c, d, e, f, g);
597 T1 = X[10] = W[10];
598 ROUND_00_15(10, g, h, a, b, c, d, e, f);
599 T1 = X[11] = W[11];
600 ROUND_00_15(11, f, g, h, a, b, c, d, e);
601 T1 = X[12] = W[12];
602 ROUND_00_15(12, e, f, g, h, a, b, c, d);
603 T1 = X[13] = W[13];
604 ROUND_00_15(13, d, e, f, g, h, a, b, c);
605 T1 = X[14] = W[14];
606 ROUND_00_15(14, c, d, e, f, g, h, a, b);
607 T1 = X[15] = W[15];
608 ROUND_00_15(15, b, c, d, e, f, g, h, a);
7bf7a6d0 609# else
d3819813
MTL
610 T1 = X[0] = PULL64(W[0]);
611 ROUND_00_15(0, a, b, c, d, e, f, g, h);
612 T1 = X[1] = PULL64(W[1]);
613 ROUND_00_15(1, h, a, b, c, d, e, f, g);
614 T1 = X[2] = PULL64(W[2]);
615 ROUND_00_15(2, g, h, a, b, c, d, e, f);
616 T1 = X[3] = PULL64(W[3]);
617 ROUND_00_15(3, f, g, h, a, b, c, d, e);
618 T1 = X[4] = PULL64(W[4]);
619 ROUND_00_15(4, e, f, g, h, a, b, c, d);
620 T1 = X[5] = PULL64(W[5]);
621 ROUND_00_15(5, d, e, f, g, h, a, b, c);
622 T1 = X[6] = PULL64(W[6]);
623 ROUND_00_15(6, c, d, e, f, g, h, a, b);
624 T1 = X[7] = PULL64(W[7]);
625 ROUND_00_15(7, b, c, d, e, f, g, h, a);
626 T1 = X[8] = PULL64(W[8]);
627 ROUND_00_15(8, a, b, c, d, e, f, g, h);
628 T1 = X[9] = PULL64(W[9]);
629 ROUND_00_15(9, h, a, b, c, d, e, f, g);
630 T1 = X[10] = PULL64(W[10]);
631 ROUND_00_15(10, g, h, a, b, c, d, e, f);
632 T1 = X[11] = PULL64(W[11]);
633 ROUND_00_15(11, f, g, h, a, b, c, d, e);
634 T1 = X[12] = PULL64(W[12]);
635 ROUND_00_15(12, e, f, g, h, a, b, c, d);
636 T1 = X[13] = PULL64(W[13]);
637 ROUND_00_15(13, d, e, f, g, h, a, b, c);
638 T1 = X[14] = PULL64(W[14]);
639 ROUND_00_15(14, c, d, e, f, g, h, a, b);
640 T1 = X[15] = PULL64(W[15]);
641 ROUND_00_15(15, b, c, d, e, f, g, h, a);
7bf7a6d0 642# endif
d3819813
MTL
643
644 for (i = 16; i < 80; i += 16) {
645 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
646 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
647 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
648 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
649 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
650 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
651 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
652 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
653 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
654 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
655 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
656 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
657 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
658 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
659 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
660 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
661 }
662
663 ctx->h[0] += a;
664 ctx->h[1] += b;
665 ctx->h[2] += c;
666 ctx->h[3] += d;
667 ctx->h[4] += e;
668 ctx->h[5] += f;
669 ctx->h[6] += g;
670 ctx->h[7] += h;
671
672 W += SHA_LBLOCK;
673 }
674}
675
d3819813 676# endif
3e575651 677
7bf7a6d0 678#endif /* SHA512_ASM */