]>
Commit | Line | Data |
---|---|---|
0b04990a TC |
1 | /* |
2 | * ==================================================================== | |
3 | * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
4 | * project. Rights for redistribution and usage in source and binary | |
5 | * forms are granted according to the OpenSSL license. | |
6 | * ==================================================================== | |
7 | * | |
8 | * sha256/512_block procedure for x86_64. | |
9 | * | |
10 | * 40% improvement over compiler-generated code on Opteron. On EM64T | |
11 | * sha256 was observed to run >80% faster and sha512 - >40%. No magical | |
12 | * tricks, just straight implementation... I really wonder why gcc | |
13 | * [being armed with inline assembler] fails to generate as fast code. | |
14 | * The only thing which is cool about this module is that it's very | |
15 | * same instruction sequence used for both SHA-256 and SHA-512. In | |
16 | * former case the instructions operate on 32-bit operands, while in | |
17 | * latter - on 64-bit ones. All I had to do is to get one flavor right, | |
18 | * the other one passed the test right away:-) | |
19 | * | |
20 | * sha256_block runs in ~1005 cycles on Opteron, which gives you | |
21 | * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock | |
22 | * frequency in GHz. sha512_block runs in ~1275 cycles, which results | |
23 | * in 128*1000/1275=100MBps per GHz. Is there room for improvement? | |
24 | * Well, if you compare it to IA-64 implementation, which maintains | |
25 | * X[16] in register bank[!], tends to 4 instructions per CPU clock | |
26 | * cycle and runs in 1003 cycles, 1275 is very good result for 3-way | |
27 | * issue Opteron pipeline and X[16] maintained in memory. So that *if* | |
28 | * there is a way to improve it, *then* the only way would be to try to | |
29 | * offload X[16] updates to SSE unit, but that would require "deeper" | |
30 | * loop unroll, which in turn would naturally cause size blow-up, not | |
31 | * to mention increased complexity! And once again, only *if* it's | |
32 | * actually possible to noticeably improve overall ILP, instruction | |
33 | * level parallelism, on a given CPU implementation in this case. | |
34 | * | |
35 | * Special note on Intel EM64T. While Opteron CPU exhibits perfect | |
4e33ba4c | 36 | * performance ratio of 1.5 between 64- and 32-bit flavors [see above], |
0b04990a TC |
37 | * [currently available] EM64T CPUs apparently are far from it. On the |
38 | * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit | |
39 | * sha256_block:-( This is presumably because 64-bit shifts/rotates | |
40 | * apparently are not atomic instructions, but implemented in microcode. | |
41 | */ | |
42 | ||
43 | /* | |
44 | * OpenSolaris OS modifications | |
45 | * | |
46 | * Sun elects to use this software under the BSD license. | |
47 | * | |
48 | * This source originates from OpenSSL file sha512-x86_64.pl at | |
49 | * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz | |
50 | * (presumably for future OpenSSL release 0.9.8h), with these changes: | |
51 | * | |
52 | * 1. Added perl "use strict" and declared variables. | |
53 | * | |
54 | * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from | |
55 | * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. | |
56 | * | |
57 | * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) | |
58 | * assemblers). Replaced the .picmeup macro with assembler code. | |
59 | * | |
60 | * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", | |
61 | * at the beginning of SHA2_CTX (the next field is 8-byte aligned). | |
62 | */ | |
63 | ||
64 | /* | |
650383f2 | 65 | * This file was generated by a perl script (sha512-x86_64.pl) that were |
3c67d83a TH |
66 | * used to generate sha256 and sha512 variants from the same code base. |
67 | * The comments from the original file have been pasted above. | |
0b04990a TC |
68 | */ |
69 | ||
70 | #if defined(lint) || defined(__lint) | |
71 | #include <sys/stdint.h> | |
72 | #include <sha2/sha2.h> | |
73 | ||
0b04990a TC |
74 | void |
75 | SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) | |
76 | { | |
18e4f679 | 77 | (void) ctx, (void) in, (void) num; |
0b04990a TC |
78 | } |
79 | ||
80 | ||
81 | #else | |
82 | #define _ASM | |
83 | #include <sys/asm_linkage.h> | |
84 | ||
85 | ENTRY_NP(SHA256TransformBlocks) | |
a9c93ac5 AF |
86 | .cfi_startproc |
87 | movq %rsp, %rax | |
88 | .cfi_def_cfa_register %rax | |
0b04990a | 89 | push %rbx |
a9c93ac5 | 90 | .cfi_offset %rbx,-16 |
0b04990a | 91 | push %rbp |
a9c93ac5 | 92 | .cfi_offset %rbp,-24 |
0b04990a | 93 | push %r12 |
a9c93ac5 | 94 | .cfi_offset %r12,-32 |
0b04990a | 95 | push %r13 |
a9c93ac5 | 96 | .cfi_offset %r13,-40 |
0b04990a | 97 | push %r14 |
a9c93ac5 | 98 | .cfi_offset %r14,-48 |
0b04990a | 99 | push %r15 |
a9c93ac5 | 100 | .cfi_offset %r15,-56 |
0b04990a TC |
101 | mov %rsp,%rbp # copy %rsp |
102 | shl $4,%rdx # num*16 | |
103 | sub $16*4+4*8,%rsp | |
104 | lea (%rsi,%rdx,4),%rdx # inp+num*16*4 | |
105 | and $-64,%rsp # align stack frame | |
106 | add $8,%rdi # Skip OpenSolaris field, "algotype" | |
107 | mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg | |
108 | mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg | |
109 | mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg | |
110 | mov %rbp,16*4+3*8(%rsp) # save copy of %rsp | |
a9c93ac5 AF |
111 | # echo ".cfi_cfa_expression %rsp+88,deref,+56" | |
112 | # openssl/crypto/perlasm/x86_64-xlate.pl | |
113 | .cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38 | |
0b04990a | 114 | |
688c94c5 HL |
115 | #.picmeup %rbp |
116 | # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts | |
117 | # the address of the "next" instruction into the target register | |
118 | # (%rbp). This generates these 2 instructions: | |
0b04990a | 119 | lea .Llea(%rip),%rbp |
688c94c5 | 120 | #nop # .picmeup generates a nop for mod 8 alignment--not needed here |
0b04990a TC |
121 | |
122 | .Llea: | |
123 | lea K256-.(%rbp),%rbp | |
124 | ||
125 | mov 4*0(%rdi),%eax | |
126 | mov 4*1(%rdi),%ebx | |
127 | mov 4*2(%rdi),%ecx | |
128 | mov 4*3(%rdi),%edx | |
129 | mov 4*4(%rdi),%r8d | |
130 | mov 4*5(%rdi),%r9d | |
131 | mov 4*6(%rdi),%r10d | |
132 | mov 4*7(%rdi),%r11d | |
133 | jmp .Lloop | |
134 | ||
135 | .align 16 | |
136 | .Lloop: | |
137 | xor %rdi,%rdi | |
138 | mov 4*0(%rsi),%r12d | |
139 | bswap %r12d | |
140 | mov %r8d,%r13d | |
141 | mov %r8d,%r14d | |
142 | mov %r9d,%r15d | |
143 | ||
144 | ror $6,%r13d | |
145 | ror $11,%r14d | |
146 | xor %r10d,%r15d # f^g | |
147 | ||
148 | xor %r14d,%r13d | |
149 | ror $14,%r14d | |
150 | and %r8d,%r15d # (f^g)&e | |
151 | mov %r12d,0(%rsp) | |
152 | ||
153 | xor %r14d,%r13d # Sigma1(e) | |
154 | xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
155 | add %r11d,%r12d # T1+=h | |
156 | ||
157 | mov %eax,%r11d | |
158 | add %r13d,%r12d # T1+=Sigma1(e) | |
159 | ||
160 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
161 | mov %eax,%r13d | |
162 | mov %eax,%r14d | |
163 | ||
164 | ror $2,%r11d | |
165 | ror $13,%r13d | |
166 | mov %eax,%r15d | |
167 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
168 | ||
169 | xor %r13d,%r11d | |
170 | ror $9,%r13d | |
171 | or %ecx,%r14d # a|c | |
172 | ||
173 | xor %r13d,%r11d # h=Sigma0(a) | |
174 | and %ecx,%r15d # a&c | |
175 | add %r12d,%edx # d+=T1 | |
176 | ||
177 | and %ebx,%r14d # (a|c)&b | |
178 | add %r12d,%r11d # h+=T1 | |
179 | ||
180 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
181 | lea 1(%rdi),%rdi # round++ | |
182 | ||
183 | add %r14d,%r11d # h+=Maj(a,b,c) | |
184 | mov 4*1(%rsi),%r12d | |
185 | bswap %r12d | |
186 | mov %edx,%r13d | |
187 | mov %edx,%r14d | |
188 | mov %r8d,%r15d | |
189 | ||
190 | ror $6,%r13d | |
191 | ror $11,%r14d | |
192 | xor %r9d,%r15d # f^g | |
193 | ||
194 | xor %r14d,%r13d | |
195 | ror $14,%r14d | |
196 | and %edx,%r15d # (f^g)&e | |
197 | mov %r12d,4(%rsp) | |
198 | ||
199 | xor %r14d,%r13d # Sigma1(e) | |
200 | xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
201 | add %r10d,%r12d # T1+=h | |
202 | ||
203 | mov %r11d,%r10d | |
204 | add %r13d,%r12d # T1+=Sigma1(e) | |
205 | ||
206 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
207 | mov %r11d,%r13d | |
208 | mov %r11d,%r14d | |
209 | ||
210 | ror $2,%r10d | |
211 | ror $13,%r13d | |
212 | mov %r11d,%r15d | |
213 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
214 | ||
215 | xor %r13d,%r10d | |
216 | ror $9,%r13d | |
217 | or %ebx,%r14d # a|c | |
218 | ||
219 | xor %r13d,%r10d # h=Sigma0(a) | |
220 | and %ebx,%r15d # a&c | |
221 | add %r12d,%ecx # d+=T1 | |
222 | ||
223 | and %eax,%r14d # (a|c)&b | |
224 | add %r12d,%r10d # h+=T1 | |
225 | ||
226 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
227 | lea 1(%rdi),%rdi # round++ | |
228 | ||
229 | add %r14d,%r10d # h+=Maj(a,b,c) | |
230 | mov 4*2(%rsi),%r12d | |
231 | bswap %r12d | |
232 | mov %ecx,%r13d | |
233 | mov %ecx,%r14d | |
234 | mov %edx,%r15d | |
235 | ||
236 | ror $6,%r13d | |
237 | ror $11,%r14d | |
238 | xor %r8d,%r15d # f^g | |
239 | ||
240 | xor %r14d,%r13d | |
241 | ror $14,%r14d | |
242 | and %ecx,%r15d # (f^g)&e | |
243 | mov %r12d,8(%rsp) | |
244 | ||
245 | xor %r14d,%r13d # Sigma1(e) | |
246 | xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
247 | add %r9d,%r12d # T1+=h | |
248 | ||
249 | mov %r10d,%r9d | |
250 | add %r13d,%r12d # T1+=Sigma1(e) | |
251 | ||
252 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
253 | mov %r10d,%r13d | |
254 | mov %r10d,%r14d | |
255 | ||
256 | ror $2,%r9d | |
257 | ror $13,%r13d | |
258 | mov %r10d,%r15d | |
259 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
260 | ||
261 | xor %r13d,%r9d | |
262 | ror $9,%r13d | |
263 | or %eax,%r14d # a|c | |
264 | ||
265 | xor %r13d,%r9d # h=Sigma0(a) | |
266 | and %eax,%r15d # a&c | |
267 | add %r12d,%ebx # d+=T1 | |
268 | ||
269 | and %r11d,%r14d # (a|c)&b | |
270 | add %r12d,%r9d # h+=T1 | |
271 | ||
272 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
273 | lea 1(%rdi),%rdi # round++ | |
274 | ||
275 | add %r14d,%r9d # h+=Maj(a,b,c) | |
276 | mov 4*3(%rsi),%r12d | |
277 | bswap %r12d | |
278 | mov %ebx,%r13d | |
279 | mov %ebx,%r14d | |
280 | mov %ecx,%r15d | |
281 | ||
282 | ror $6,%r13d | |
283 | ror $11,%r14d | |
284 | xor %edx,%r15d # f^g | |
285 | ||
286 | xor %r14d,%r13d | |
287 | ror $14,%r14d | |
288 | and %ebx,%r15d # (f^g)&e | |
289 | mov %r12d,12(%rsp) | |
290 | ||
291 | xor %r14d,%r13d # Sigma1(e) | |
292 | xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
293 | add %r8d,%r12d # T1+=h | |
294 | ||
295 | mov %r9d,%r8d | |
296 | add %r13d,%r12d # T1+=Sigma1(e) | |
297 | ||
298 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
299 | mov %r9d,%r13d | |
300 | mov %r9d,%r14d | |
301 | ||
302 | ror $2,%r8d | |
303 | ror $13,%r13d | |
304 | mov %r9d,%r15d | |
305 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
306 | ||
307 | xor %r13d,%r8d | |
308 | ror $9,%r13d | |
309 | or %r11d,%r14d # a|c | |
310 | ||
311 | xor %r13d,%r8d # h=Sigma0(a) | |
312 | and %r11d,%r15d # a&c | |
313 | add %r12d,%eax # d+=T1 | |
314 | ||
315 | and %r10d,%r14d # (a|c)&b | |
316 | add %r12d,%r8d # h+=T1 | |
317 | ||
318 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
319 | lea 1(%rdi),%rdi # round++ | |
320 | ||
321 | add %r14d,%r8d # h+=Maj(a,b,c) | |
322 | mov 4*4(%rsi),%r12d | |
323 | bswap %r12d | |
324 | mov %eax,%r13d | |
325 | mov %eax,%r14d | |
326 | mov %ebx,%r15d | |
327 | ||
328 | ror $6,%r13d | |
329 | ror $11,%r14d | |
330 | xor %ecx,%r15d # f^g | |
331 | ||
332 | xor %r14d,%r13d | |
333 | ror $14,%r14d | |
334 | and %eax,%r15d # (f^g)&e | |
335 | mov %r12d,16(%rsp) | |
336 | ||
337 | xor %r14d,%r13d # Sigma1(e) | |
338 | xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
339 | add %edx,%r12d # T1+=h | |
340 | ||
341 | mov %r8d,%edx | |
342 | add %r13d,%r12d # T1+=Sigma1(e) | |
343 | ||
344 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
345 | mov %r8d,%r13d | |
346 | mov %r8d,%r14d | |
347 | ||
348 | ror $2,%edx | |
349 | ror $13,%r13d | |
350 | mov %r8d,%r15d | |
351 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
352 | ||
353 | xor %r13d,%edx | |
354 | ror $9,%r13d | |
355 | or %r10d,%r14d # a|c | |
356 | ||
357 | xor %r13d,%edx # h=Sigma0(a) | |
358 | and %r10d,%r15d # a&c | |
359 | add %r12d,%r11d # d+=T1 | |
360 | ||
361 | and %r9d,%r14d # (a|c)&b | |
362 | add %r12d,%edx # h+=T1 | |
363 | ||
364 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
365 | lea 1(%rdi),%rdi # round++ | |
366 | ||
367 | add %r14d,%edx # h+=Maj(a,b,c) | |
368 | mov 4*5(%rsi),%r12d | |
369 | bswap %r12d | |
370 | mov %r11d,%r13d | |
371 | mov %r11d,%r14d | |
372 | mov %eax,%r15d | |
373 | ||
374 | ror $6,%r13d | |
375 | ror $11,%r14d | |
376 | xor %ebx,%r15d # f^g | |
377 | ||
378 | xor %r14d,%r13d | |
379 | ror $14,%r14d | |
380 | and %r11d,%r15d # (f^g)&e | |
381 | mov %r12d,20(%rsp) | |
382 | ||
383 | xor %r14d,%r13d # Sigma1(e) | |
384 | xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
385 | add %ecx,%r12d # T1+=h | |
386 | ||
387 | mov %edx,%ecx | |
388 | add %r13d,%r12d # T1+=Sigma1(e) | |
389 | ||
390 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
391 | mov %edx,%r13d | |
392 | mov %edx,%r14d | |
393 | ||
394 | ror $2,%ecx | |
395 | ror $13,%r13d | |
396 | mov %edx,%r15d | |
397 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
398 | ||
399 | xor %r13d,%ecx | |
400 | ror $9,%r13d | |
401 | or %r9d,%r14d # a|c | |
402 | ||
403 | xor %r13d,%ecx # h=Sigma0(a) | |
404 | and %r9d,%r15d # a&c | |
405 | add %r12d,%r10d # d+=T1 | |
406 | ||
407 | and %r8d,%r14d # (a|c)&b | |
408 | add %r12d,%ecx # h+=T1 | |
409 | ||
410 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
411 | lea 1(%rdi),%rdi # round++ | |
412 | ||
413 | add %r14d,%ecx # h+=Maj(a,b,c) | |
414 | mov 4*6(%rsi),%r12d | |
415 | bswap %r12d | |
416 | mov %r10d,%r13d | |
417 | mov %r10d,%r14d | |
418 | mov %r11d,%r15d | |
419 | ||
420 | ror $6,%r13d | |
421 | ror $11,%r14d | |
422 | xor %eax,%r15d # f^g | |
423 | ||
424 | xor %r14d,%r13d | |
425 | ror $14,%r14d | |
426 | and %r10d,%r15d # (f^g)&e | |
427 | mov %r12d,24(%rsp) | |
428 | ||
429 | xor %r14d,%r13d # Sigma1(e) | |
430 | xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
431 | add %ebx,%r12d # T1+=h | |
432 | ||
433 | mov %ecx,%ebx | |
434 | add %r13d,%r12d # T1+=Sigma1(e) | |
435 | ||
436 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
437 | mov %ecx,%r13d | |
438 | mov %ecx,%r14d | |
439 | ||
440 | ror $2,%ebx | |
441 | ror $13,%r13d | |
442 | mov %ecx,%r15d | |
443 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
444 | ||
445 | xor %r13d,%ebx | |
446 | ror $9,%r13d | |
447 | or %r8d,%r14d # a|c | |
448 | ||
449 | xor %r13d,%ebx # h=Sigma0(a) | |
450 | and %r8d,%r15d # a&c | |
451 | add %r12d,%r9d # d+=T1 | |
452 | ||
453 | and %edx,%r14d # (a|c)&b | |
454 | add %r12d,%ebx # h+=T1 | |
455 | ||
456 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
457 | lea 1(%rdi),%rdi # round++ | |
458 | ||
459 | add %r14d,%ebx # h+=Maj(a,b,c) | |
460 | mov 4*7(%rsi),%r12d | |
461 | bswap %r12d | |
462 | mov %r9d,%r13d | |
463 | mov %r9d,%r14d | |
464 | mov %r10d,%r15d | |
465 | ||
466 | ror $6,%r13d | |
467 | ror $11,%r14d | |
468 | xor %r11d,%r15d # f^g | |
469 | ||
470 | xor %r14d,%r13d | |
471 | ror $14,%r14d | |
472 | and %r9d,%r15d # (f^g)&e | |
473 | mov %r12d,28(%rsp) | |
474 | ||
475 | xor %r14d,%r13d # Sigma1(e) | |
476 | xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
477 | add %eax,%r12d # T1+=h | |
478 | ||
479 | mov %ebx,%eax | |
480 | add %r13d,%r12d # T1+=Sigma1(e) | |
481 | ||
482 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
483 | mov %ebx,%r13d | |
484 | mov %ebx,%r14d | |
485 | ||
486 | ror $2,%eax | |
487 | ror $13,%r13d | |
488 | mov %ebx,%r15d | |
489 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
490 | ||
491 | xor %r13d,%eax | |
492 | ror $9,%r13d | |
493 | or %edx,%r14d # a|c | |
494 | ||
495 | xor %r13d,%eax # h=Sigma0(a) | |
496 | and %edx,%r15d # a&c | |
497 | add %r12d,%r8d # d+=T1 | |
498 | ||
499 | and %ecx,%r14d # (a|c)&b | |
500 | add %r12d,%eax # h+=T1 | |
501 | ||
502 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
503 | lea 1(%rdi),%rdi # round++ | |
504 | ||
505 | add %r14d,%eax # h+=Maj(a,b,c) | |
506 | mov 4*8(%rsi),%r12d | |
507 | bswap %r12d | |
508 | mov %r8d,%r13d | |
509 | mov %r8d,%r14d | |
510 | mov %r9d,%r15d | |
511 | ||
512 | ror $6,%r13d | |
513 | ror $11,%r14d | |
514 | xor %r10d,%r15d # f^g | |
515 | ||
516 | xor %r14d,%r13d | |
517 | ror $14,%r14d | |
518 | and %r8d,%r15d # (f^g)&e | |
519 | mov %r12d,32(%rsp) | |
520 | ||
521 | xor %r14d,%r13d # Sigma1(e) | |
522 | xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
523 | add %r11d,%r12d # T1+=h | |
524 | ||
525 | mov %eax,%r11d | |
526 | add %r13d,%r12d # T1+=Sigma1(e) | |
527 | ||
528 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
529 | mov %eax,%r13d | |
530 | mov %eax,%r14d | |
531 | ||
532 | ror $2,%r11d | |
533 | ror $13,%r13d | |
534 | mov %eax,%r15d | |
535 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
536 | ||
537 | xor %r13d,%r11d | |
538 | ror $9,%r13d | |
539 | or %ecx,%r14d # a|c | |
540 | ||
541 | xor %r13d,%r11d # h=Sigma0(a) | |
542 | and %ecx,%r15d # a&c | |
543 | add %r12d,%edx # d+=T1 | |
544 | ||
545 | and %ebx,%r14d # (a|c)&b | |
546 | add %r12d,%r11d # h+=T1 | |
547 | ||
548 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
549 | lea 1(%rdi),%rdi # round++ | |
550 | ||
551 | add %r14d,%r11d # h+=Maj(a,b,c) | |
552 | mov 4*9(%rsi),%r12d | |
553 | bswap %r12d | |
554 | mov %edx,%r13d | |
555 | mov %edx,%r14d | |
556 | mov %r8d,%r15d | |
557 | ||
558 | ror $6,%r13d | |
559 | ror $11,%r14d | |
560 | xor %r9d,%r15d # f^g | |
561 | ||
562 | xor %r14d,%r13d | |
563 | ror $14,%r14d | |
564 | and %edx,%r15d # (f^g)&e | |
565 | mov %r12d,36(%rsp) | |
566 | ||
567 | xor %r14d,%r13d # Sigma1(e) | |
568 | xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
569 | add %r10d,%r12d # T1+=h | |
570 | ||
571 | mov %r11d,%r10d | |
572 | add %r13d,%r12d # T1+=Sigma1(e) | |
573 | ||
574 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
575 | mov %r11d,%r13d | |
576 | mov %r11d,%r14d | |
577 | ||
578 | ror $2,%r10d | |
579 | ror $13,%r13d | |
580 | mov %r11d,%r15d | |
581 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
582 | ||
583 | xor %r13d,%r10d | |
584 | ror $9,%r13d | |
585 | or %ebx,%r14d # a|c | |
586 | ||
587 | xor %r13d,%r10d # h=Sigma0(a) | |
588 | and %ebx,%r15d # a&c | |
589 | add %r12d,%ecx # d+=T1 | |
590 | ||
591 | and %eax,%r14d # (a|c)&b | |
592 | add %r12d,%r10d # h+=T1 | |
593 | ||
594 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
595 | lea 1(%rdi),%rdi # round++ | |
596 | ||
597 | add %r14d,%r10d # h+=Maj(a,b,c) | |
598 | mov 4*10(%rsi),%r12d | |
599 | bswap %r12d | |
600 | mov %ecx,%r13d | |
601 | mov %ecx,%r14d | |
602 | mov %edx,%r15d | |
603 | ||
604 | ror $6,%r13d | |
605 | ror $11,%r14d | |
606 | xor %r8d,%r15d # f^g | |
607 | ||
608 | xor %r14d,%r13d | |
609 | ror $14,%r14d | |
610 | and %ecx,%r15d # (f^g)&e | |
611 | mov %r12d,40(%rsp) | |
612 | ||
613 | xor %r14d,%r13d # Sigma1(e) | |
614 | xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
615 | add %r9d,%r12d # T1+=h | |
616 | ||
617 | mov %r10d,%r9d | |
618 | add %r13d,%r12d # T1+=Sigma1(e) | |
619 | ||
620 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
621 | mov %r10d,%r13d | |
622 | mov %r10d,%r14d | |
623 | ||
624 | ror $2,%r9d | |
625 | ror $13,%r13d | |
626 | mov %r10d,%r15d | |
627 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
628 | ||
629 | xor %r13d,%r9d | |
630 | ror $9,%r13d | |
631 | or %eax,%r14d # a|c | |
632 | ||
633 | xor %r13d,%r9d # h=Sigma0(a) | |
634 | and %eax,%r15d # a&c | |
635 | add %r12d,%ebx # d+=T1 | |
636 | ||
637 | and %r11d,%r14d # (a|c)&b | |
638 | add %r12d,%r9d # h+=T1 | |
639 | ||
640 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
641 | lea 1(%rdi),%rdi # round++ | |
642 | ||
643 | add %r14d,%r9d # h+=Maj(a,b,c) | |
644 | mov 4*11(%rsi),%r12d | |
645 | bswap %r12d | |
646 | mov %ebx,%r13d | |
647 | mov %ebx,%r14d | |
648 | mov %ecx,%r15d | |
649 | ||
650 | ror $6,%r13d | |
651 | ror $11,%r14d | |
652 | xor %edx,%r15d # f^g | |
653 | ||
654 | xor %r14d,%r13d | |
655 | ror $14,%r14d | |
656 | and %ebx,%r15d # (f^g)&e | |
657 | mov %r12d,44(%rsp) | |
658 | ||
659 | xor %r14d,%r13d # Sigma1(e) | |
660 | xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
661 | add %r8d,%r12d # T1+=h | |
662 | ||
663 | mov %r9d,%r8d | |
664 | add %r13d,%r12d # T1+=Sigma1(e) | |
665 | ||
666 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
667 | mov %r9d,%r13d | |
668 | mov %r9d,%r14d | |
669 | ||
670 | ror $2,%r8d | |
671 | ror $13,%r13d | |
672 | mov %r9d,%r15d | |
673 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
674 | ||
675 | xor %r13d,%r8d | |
676 | ror $9,%r13d | |
677 | or %r11d,%r14d # a|c | |
678 | ||
679 | xor %r13d,%r8d # h=Sigma0(a) | |
680 | and %r11d,%r15d # a&c | |
681 | add %r12d,%eax # d+=T1 | |
682 | ||
683 | and %r10d,%r14d # (a|c)&b | |
684 | add %r12d,%r8d # h+=T1 | |
685 | ||
686 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
687 | lea 1(%rdi),%rdi # round++ | |
688 | ||
689 | add %r14d,%r8d # h+=Maj(a,b,c) | |
690 | mov 4*12(%rsi),%r12d | |
691 | bswap %r12d | |
692 | mov %eax,%r13d | |
693 | mov %eax,%r14d | |
694 | mov %ebx,%r15d | |
695 | ||
696 | ror $6,%r13d | |
697 | ror $11,%r14d | |
698 | xor %ecx,%r15d # f^g | |
699 | ||
700 | xor %r14d,%r13d | |
701 | ror $14,%r14d | |
702 | and %eax,%r15d # (f^g)&e | |
703 | mov %r12d,48(%rsp) | |
704 | ||
705 | xor %r14d,%r13d # Sigma1(e) | |
706 | xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
707 | add %edx,%r12d # T1+=h | |
708 | ||
709 | mov %r8d,%edx | |
710 | add %r13d,%r12d # T1+=Sigma1(e) | |
711 | ||
712 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
713 | mov %r8d,%r13d | |
714 | mov %r8d,%r14d | |
715 | ||
716 | ror $2,%edx | |
717 | ror $13,%r13d | |
718 | mov %r8d,%r15d | |
719 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
720 | ||
721 | xor %r13d,%edx | |
722 | ror $9,%r13d | |
723 | or %r10d,%r14d # a|c | |
724 | ||
725 | xor %r13d,%edx # h=Sigma0(a) | |
726 | and %r10d,%r15d # a&c | |
727 | add %r12d,%r11d # d+=T1 | |
728 | ||
729 | and %r9d,%r14d # (a|c)&b | |
730 | add %r12d,%edx # h+=T1 | |
731 | ||
732 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
733 | lea 1(%rdi),%rdi # round++ | |
734 | ||
735 | add %r14d,%edx # h+=Maj(a,b,c) | |
736 | mov 4*13(%rsi),%r12d | |
737 | bswap %r12d | |
738 | mov %r11d,%r13d | |
739 | mov %r11d,%r14d | |
740 | mov %eax,%r15d | |
741 | ||
742 | ror $6,%r13d | |
743 | ror $11,%r14d | |
744 | xor %ebx,%r15d # f^g | |
745 | ||
746 | xor %r14d,%r13d | |
747 | ror $14,%r14d | |
748 | and %r11d,%r15d # (f^g)&e | |
749 | mov %r12d,52(%rsp) | |
750 | ||
751 | xor %r14d,%r13d # Sigma1(e) | |
752 | xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
753 | add %ecx,%r12d # T1+=h | |
754 | ||
755 | mov %edx,%ecx | |
756 | add %r13d,%r12d # T1+=Sigma1(e) | |
757 | ||
758 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
759 | mov %edx,%r13d | |
760 | mov %edx,%r14d | |
761 | ||
762 | ror $2,%ecx | |
763 | ror $13,%r13d | |
764 | mov %edx,%r15d | |
765 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
766 | ||
767 | xor %r13d,%ecx | |
768 | ror $9,%r13d | |
769 | or %r9d,%r14d # a|c | |
770 | ||
771 | xor %r13d,%ecx # h=Sigma0(a) | |
772 | and %r9d,%r15d # a&c | |
773 | add %r12d,%r10d # d+=T1 | |
774 | ||
775 | and %r8d,%r14d # (a|c)&b | |
776 | add %r12d,%ecx # h+=T1 | |
777 | ||
778 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
779 | lea 1(%rdi),%rdi # round++ | |
780 | ||
781 | add %r14d,%ecx # h+=Maj(a,b,c) | |
782 | mov 4*14(%rsi),%r12d | |
783 | bswap %r12d | |
784 | mov %r10d,%r13d | |
785 | mov %r10d,%r14d | |
786 | mov %r11d,%r15d | |
787 | ||
788 | ror $6,%r13d | |
789 | ror $11,%r14d | |
790 | xor %eax,%r15d # f^g | |
791 | ||
792 | xor %r14d,%r13d | |
793 | ror $14,%r14d | |
794 | and %r10d,%r15d # (f^g)&e | |
795 | mov %r12d,56(%rsp) | |
796 | ||
797 | xor %r14d,%r13d # Sigma1(e) | |
798 | xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
799 | add %ebx,%r12d # T1+=h | |
800 | ||
801 | mov %ecx,%ebx | |
802 | add %r13d,%r12d # T1+=Sigma1(e) | |
803 | ||
804 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
805 | mov %ecx,%r13d | |
806 | mov %ecx,%r14d | |
807 | ||
808 | ror $2,%ebx | |
809 | ror $13,%r13d | |
810 | mov %ecx,%r15d | |
811 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
812 | ||
813 | xor %r13d,%ebx | |
814 | ror $9,%r13d | |
815 | or %r8d,%r14d # a|c | |
816 | ||
817 | xor %r13d,%ebx # h=Sigma0(a) | |
818 | and %r8d,%r15d # a&c | |
819 | add %r12d,%r9d # d+=T1 | |
820 | ||
821 | and %edx,%r14d # (a|c)&b | |
822 | add %r12d,%ebx # h+=T1 | |
823 | ||
824 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
825 | lea 1(%rdi),%rdi # round++ | |
826 | ||
827 | add %r14d,%ebx # h+=Maj(a,b,c) | |
828 | mov 4*15(%rsi),%r12d | |
829 | bswap %r12d | |
830 | mov %r9d,%r13d | |
831 | mov %r9d,%r14d | |
832 | mov %r10d,%r15d | |
833 | ||
834 | ror $6,%r13d | |
835 | ror $11,%r14d | |
836 | xor %r11d,%r15d # f^g | |
837 | ||
838 | xor %r14d,%r13d | |
839 | ror $14,%r14d | |
840 | and %r9d,%r15d # (f^g)&e | |
841 | mov %r12d,60(%rsp) | |
842 | ||
843 | xor %r14d,%r13d # Sigma1(e) | |
844 | xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
845 | add %eax,%r12d # T1+=h | |
846 | ||
847 | mov %ebx,%eax | |
848 | add %r13d,%r12d # T1+=Sigma1(e) | |
849 | ||
850 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
851 | mov %ebx,%r13d | |
852 | mov %ebx,%r14d | |
853 | ||
854 | ror $2,%eax | |
855 | ror $13,%r13d | |
856 | mov %ebx,%r15d | |
857 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
858 | ||
859 | xor %r13d,%eax | |
860 | ror $9,%r13d | |
861 | or %edx,%r14d # a|c | |
862 | ||
863 | xor %r13d,%eax # h=Sigma0(a) | |
864 | and %edx,%r15d # a&c | |
865 | add %r12d,%r8d # d+=T1 | |
866 | ||
867 | and %ecx,%r14d # (a|c)&b | |
868 | add %r12d,%eax # h+=T1 | |
869 | ||
870 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
871 | lea 1(%rdi),%rdi # round++ | |
872 | ||
873 | add %r14d,%eax # h+=Maj(a,b,c) | |
874 | jmp .Lrounds_16_xx | |
875 | .align 16 | |
876 | .Lrounds_16_xx: | |
877 | mov 4(%rsp),%r13d | |
878 | mov 56(%rsp),%r12d | |
879 | ||
880 | mov %r13d,%r15d | |
881 | ||
882 | shr $3,%r13d | |
883 | ror $7,%r15d | |
884 | ||
885 | xor %r15d,%r13d | |
886 | ror $11,%r15d | |
887 | ||
888 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
889 | mov %r12d,%r14d | |
890 | ||
891 | shr $10,%r12d | |
892 | ror $17,%r14d | |
893 | ||
894 | xor %r14d,%r12d | |
895 | ror $2,%r14d | |
896 | ||
897 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
898 | ||
899 | add %r13d,%r12d | |
900 | ||
901 | add 36(%rsp),%r12d | |
902 | ||
903 | add 0(%rsp),%r12d | |
904 | mov %r8d,%r13d | |
905 | mov %r8d,%r14d | |
906 | mov %r9d,%r15d | |
907 | ||
908 | ror $6,%r13d | |
909 | ror $11,%r14d | |
910 | xor %r10d,%r15d # f^g | |
911 | ||
912 | xor %r14d,%r13d | |
913 | ror $14,%r14d | |
914 | and %r8d,%r15d # (f^g)&e | |
915 | mov %r12d,0(%rsp) | |
916 | ||
917 | xor %r14d,%r13d # Sigma1(e) | |
918 | xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
919 | add %r11d,%r12d # T1+=h | |
920 | ||
921 | mov %eax,%r11d | |
922 | add %r13d,%r12d # T1+=Sigma1(e) | |
923 | ||
924 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
925 | mov %eax,%r13d | |
926 | mov %eax,%r14d | |
927 | ||
928 | ror $2,%r11d | |
929 | ror $13,%r13d | |
930 | mov %eax,%r15d | |
931 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
932 | ||
933 | xor %r13d,%r11d | |
934 | ror $9,%r13d | |
935 | or %ecx,%r14d # a|c | |
936 | ||
937 | xor %r13d,%r11d # h=Sigma0(a) | |
938 | and %ecx,%r15d # a&c | |
939 | add %r12d,%edx # d+=T1 | |
940 | ||
941 | and %ebx,%r14d # (a|c)&b | |
942 | add %r12d,%r11d # h+=T1 | |
943 | ||
944 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
945 | lea 1(%rdi),%rdi # round++ | |
946 | ||
947 | add %r14d,%r11d # h+=Maj(a,b,c) | |
948 | mov 8(%rsp),%r13d | |
949 | mov 60(%rsp),%r12d | |
950 | ||
951 | mov %r13d,%r15d | |
952 | ||
953 | shr $3,%r13d | |
954 | ror $7,%r15d | |
955 | ||
956 | xor %r15d,%r13d | |
957 | ror $11,%r15d | |
958 | ||
959 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
960 | mov %r12d,%r14d | |
961 | ||
962 | shr $10,%r12d | |
963 | ror $17,%r14d | |
964 | ||
965 | xor %r14d,%r12d | |
966 | ror $2,%r14d | |
967 | ||
968 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
969 | ||
970 | add %r13d,%r12d | |
971 | ||
972 | add 40(%rsp),%r12d | |
973 | ||
974 | add 4(%rsp),%r12d | |
975 | mov %edx,%r13d | |
976 | mov %edx,%r14d | |
977 | mov %r8d,%r15d | |
978 | ||
979 | ror $6,%r13d | |
980 | ror $11,%r14d | |
981 | xor %r9d,%r15d # f^g | |
982 | ||
983 | xor %r14d,%r13d | |
984 | ror $14,%r14d | |
985 | and %edx,%r15d # (f^g)&e | |
986 | mov %r12d,4(%rsp) | |
987 | ||
988 | xor %r14d,%r13d # Sigma1(e) | |
989 | xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
990 | add %r10d,%r12d # T1+=h | |
991 | ||
992 | mov %r11d,%r10d | |
993 | add %r13d,%r12d # T1+=Sigma1(e) | |
994 | ||
995 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
996 | mov %r11d,%r13d | |
997 | mov %r11d,%r14d | |
998 | ||
999 | ror $2,%r10d | |
1000 | ror $13,%r13d | |
1001 | mov %r11d,%r15d | |
1002 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1003 | ||
1004 | xor %r13d,%r10d | |
1005 | ror $9,%r13d | |
1006 | or %ebx,%r14d # a|c | |
1007 | ||
1008 | xor %r13d,%r10d # h=Sigma0(a) | |
1009 | and %ebx,%r15d # a&c | |
1010 | add %r12d,%ecx # d+=T1 | |
1011 | ||
1012 | and %eax,%r14d # (a|c)&b | |
1013 | add %r12d,%r10d # h+=T1 | |
1014 | ||
1015 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1016 | lea 1(%rdi),%rdi # round++ | |
1017 | ||
1018 | add %r14d,%r10d # h+=Maj(a,b,c) | |
1019 | mov 12(%rsp),%r13d | |
1020 | mov 0(%rsp),%r12d | |
1021 | ||
1022 | mov %r13d,%r15d | |
1023 | ||
1024 | shr $3,%r13d | |
1025 | ror $7,%r15d | |
1026 | ||
1027 | xor %r15d,%r13d | |
1028 | ror $11,%r15d | |
1029 | ||
1030 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1031 | mov %r12d,%r14d | |
1032 | ||
1033 | shr $10,%r12d | |
1034 | ror $17,%r14d | |
1035 | ||
1036 | xor %r14d,%r12d | |
1037 | ror $2,%r14d | |
1038 | ||
1039 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1040 | ||
1041 | add %r13d,%r12d | |
1042 | ||
1043 | add 44(%rsp),%r12d | |
1044 | ||
1045 | add 8(%rsp),%r12d | |
1046 | mov %ecx,%r13d | |
1047 | mov %ecx,%r14d | |
1048 | mov %edx,%r15d | |
1049 | ||
1050 | ror $6,%r13d | |
1051 | ror $11,%r14d | |
1052 | xor %r8d,%r15d # f^g | |
1053 | ||
1054 | xor %r14d,%r13d | |
1055 | ror $14,%r14d | |
1056 | and %ecx,%r15d # (f^g)&e | |
1057 | mov %r12d,8(%rsp) | |
1058 | ||
1059 | xor %r14d,%r13d # Sigma1(e) | |
1060 | xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1061 | add %r9d,%r12d # T1+=h | |
1062 | ||
1063 | mov %r10d,%r9d | |
1064 | add %r13d,%r12d # T1+=Sigma1(e) | |
1065 | ||
1066 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1067 | mov %r10d,%r13d | |
1068 | mov %r10d,%r14d | |
1069 | ||
1070 | ror $2,%r9d | |
1071 | ror $13,%r13d | |
1072 | mov %r10d,%r15d | |
1073 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1074 | ||
1075 | xor %r13d,%r9d | |
1076 | ror $9,%r13d | |
1077 | or %eax,%r14d # a|c | |
1078 | ||
1079 | xor %r13d,%r9d # h=Sigma0(a) | |
1080 | and %eax,%r15d # a&c | |
1081 | add %r12d,%ebx # d+=T1 | |
1082 | ||
1083 | and %r11d,%r14d # (a|c)&b | |
1084 | add %r12d,%r9d # h+=T1 | |
1085 | ||
1086 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1087 | lea 1(%rdi),%rdi # round++ | |
1088 | ||
1089 | add %r14d,%r9d # h+=Maj(a,b,c) | |
1090 | mov 16(%rsp),%r13d | |
1091 | mov 4(%rsp),%r12d | |
1092 | ||
1093 | mov %r13d,%r15d | |
1094 | ||
1095 | shr $3,%r13d | |
1096 | ror $7,%r15d | |
1097 | ||
1098 | xor %r15d,%r13d | |
1099 | ror $11,%r15d | |
1100 | ||
1101 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1102 | mov %r12d,%r14d | |
1103 | ||
1104 | shr $10,%r12d | |
1105 | ror $17,%r14d | |
1106 | ||
1107 | xor %r14d,%r12d | |
1108 | ror $2,%r14d | |
1109 | ||
1110 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1111 | ||
1112 | add %r13d,%r12d | |
1113 | ||
1114 | add 48(%rsp),%r12d | |
1115 | ||
1116 | add 12(%rsp),%r12d | |
1117 | mov %ebx,%r13d | |
1118 | mov %ebx,%r14d | |
1119 | mov %ecx,%r15d | |
1120 | ||
1121 | ror $6,%r13d | |
1122 | ror $11,%r14d | |
1123 | xor %edx,%r15d # f^g | |
1124 | ||
1125 | xor %r14d,%r13d | |
1126 | ror $14,%r14d | |
1127 | and %ebx,%r15d # (f^g)&e | |
1128 | mov %r12d,12(%rsp) | |
1129 | ||
1130 | xor %r14d,%r13d # Sigma1(e) | |
1131 | xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1132 | add %r8d,%r12d # T1+=h | |
1133 | ||
1134 | mov %r9d,%r8d | |
1135 | add %r13d,%r12d # T1+=Sigma1(e) | |
1136 | ||
1137 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1138 | mov %r9d,%r13d | |
1139 | mov %r9d,%r14d | |
1140 | ||
1141 | ror $2,%r8d | |
1142 | ror $13,%r13d | |
1143 | mov %r9d,%r15d | |
1144 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1145 | ||
1146 | xor %r13d,%r8d | |
1147 | ror $9,%r13d | |
1148 | or %r11d,%r14d # a|c | |
1149 | ||
1150 | xor %r13d,%r8d # h=Sigma0(a) | |
1151 | and %r11d,%r15d # a&c | |
1152 | add %r12d,%eax # d+=T1 | |
1153 | ||
1154 | and %r10d,%r14d # (a|c)&b | |
1155 | add %r12d,%r8d # h+=T1 | |
1156 | ||
1157 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1158 | lea 1(%rdi),%rdi # round++ | |
1159 | ||
1160 | add %r14d,%r8d # h+=Maj(a,b,c) | |
1161 | mov 20(%rsp),%r13d | |
1162 | mov 8(%rsp),%r12d | |
1163 | ||
1164 | mov %r13d,%r15d | |
1165 | ||
1166 | shr $3,%r13d | |
1167 | ror $7,%r15d | |
1168 | ||
1169 | xor %r15d,%r13d | |
1170 | ror $11,%r15d | |
1171 | ||
1172 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1173 | mov %r12d,%r14d | |
1174 | ||
1175 | shr $10,%r12d | |
1176 | ror $17,%r14d | |
1177 | ||
1178 | xor %r14d,%r12d | |
1179 | ror $2,%r14d | |
1180 | ||
1181 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1182 | ||
1183 | add %r13d,%r12d | |
1184 | ||
1185 | add 52(%rsp),%r12d | |
1186 | ||
1187 | add 16(%rsp),%r12d | |
1188 | mov %eax,%r13d | |
1189 | mov %eax,%r14d | |
1190 | mov %ebx,%r15d | |
1191 | ||
1192 | ror $6,%r13d | |
1193 | ror $11,%r14d | |
1194 | xor %ecx,%r15d # f^g | |
1195 | ||
1196 | xor %r14d,%r13d | |
1197 | ror $14,%r14d | |
1198 | and %eax,%r15d # (f^g)&e | |
1199 | mov %r12d,16(%rsp) | |
1200 | ||
1201 | xor %r14d,%r13d # Sigma1(e) | |
1202 | xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1203 | add %edx,%r12d # T1+=h | |
1204 | ||
1205 | mov %r8d,%edx | |
1206 | add %r13d,%r12d # T1+=Sigma1(e) | |
1207 | ||
1208 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1209 | mov %r8d,%r13d | |
1210 | mov %r8d,%r14d | |
1211 | ||
1212 | ror $2,%edx | |
1213 | ror $13,%r13d | |
1214 | mov %r8d,%r15d | |
1215 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1216 | ||
1217 | xor %r13d,%edx | |
1218 | ror $9,%r13d | |
1219 | or %r10d,%r14d # a|c | |
1220 | ||
1221 | xor %r13d,%edx # h=Sigma0(a) | |
1222 | and %r10d,%r15d # a&c | |
1223 | add %r12d,%r11d # d+=T1 | |
1224 | ||
1225 | and %r9d,%r14d # (a|c)&b | |
1226 | add %r12d,%edx # h+=T1 | |
1227 | ||
1228 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1229 | lea 1(%rdi),%rdi # round++ | |
1230 | ||
1231 | add %r14d,%edx # h+=Maj(a,b,c) | |
1232 | mov 24(%rsp),%r13d | |
1233 | mov 12(%rsp),%r12d | |
1234 | ||
1235 | mov %r13d,%r15d | |
1236 | ||
1237 | shr $3,%r13d | |
1238 | ror $7,%r15d | |
1239 | ||
1240 | xor %r15d,%r13d | |
1241 | ror $11,%r15d | |
1242 | ||
1243 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1244 | mov %r12d,%r14d | |
1245 | ||
1246 | shr $10,%r12d | |
1247 | ror $17,%r14d | |
1248 | ||
1249 | xor %r14d,%r12d | |
1250 | ror $2,%r14d | |
1251 | ||
1252 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1253 | ||
1254 | add %r13d,%r12d | |
1255 | ||
1256 | add 56(%rsp),%r12d | |
1257 | ||
1258 | add 20(%rsp),%r12d | |
1259 | mov %r11d,%r13d | |
1260 | mov %r11d,%r14d | |
1261 | mov %eax,%r15d | |
1262 | ||
1263 | ror $6,%r13d | |
1264 | ror $11,%r14d | |
1265 | xor %ebx,%r15d # f^g | |
1266 | ||
1267 | xor %r14d,%r13d | |
1268 | ror $14,%r14d | |
1269 | and %r11d,%r15d # (f^g)&e | |
1270 | mov %r12d,20(%rsp) | |
1271 | ||
1272 | xor %r14d,%r13d # Sigma1(e) | |
1273 | xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1274 | add %ecx,%r12d # T1+=h | |
1275 | ||
1276 | mov %edx,%ecx | |
1277 | add %r13d,%r12d # T1+=Sigma1(e) | |
1278 | ||
1279 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1280 | mov %edx,%r13d | |
1281 | mov %edx,%r14d | |
1282 | ||
1283 | ror $2,%ecx | |
1284 | ror $13,%r13d | |
1285 | mov %edx,%r15d | |
1286 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1287 | ||
1288 | xor %r13d,%ecx | |
1289 | ror $9,%r13d | |
1290 | or %r9d,%r14d # a|c | |
1291 | ||
1292 | xor %r13d,%ecx # h=Sigma0(a) | |
1293 | and %r9d,%r15d # a&c | |
1294 | add %r12d,%r10d # d+=T1 | |
1295 | ||
1296 | and %r8d,%r14d # (a|c)&b | |
1297 | add %r12d,%ecx # h+=T1 | |
1298 | ||
1299 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1300 | lea 1(%rdi),%rdi # round++ | |
1301 | ||
1302 | add %r14d,%ecx # h+=Maj(a,b,c) | |
1303 | mov 28(%rsp),%r13d | |
1304 | mov 16(%rsp),%r12d | |
1305 | ||
1306 | mov %r13d,%r15d | |
1307 | ||
1308 | shr $3,%r13d | |
1309 | ror $7,%r15d | |
1310 | ||
1311 | xor %r15d,%r13d | |
1312 | ror $11,%r15d | |
1313 | ||
1314 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1315 | mov %r12d,%r14d | |
1316 | ||
1317 | shr $10,%r12d | |
1318 | ror $17,%r14d | |
1319 | ||
1320 | xor %r14d,%r12d | |
1321 | ror $2,%r14d | |
1322 | ||
1323 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1324 | ||
1325 | add %r13d,%r12d | |
1326 | ||
1327 | add 60(%rsp),%r12d | |
1328 | ||
1329 | add 24(%rsp),%r12d | |
1330 | mov %r10d,%r13d | |
1331 | mov %r10d,%r14d | |
1332 | mov %r11d,%r15d | |
1333 | ||
1334 | ror $6,%r13d | |
1335 | ror $11,%r14d | |
1336 | xor %eax,%r15d # f^g | |
1337 | ||
1338 | xor %r14d,%r13d | |
1339 | ror $14,%r14d | |
1340 | and %r10d,%r15d # (f^g)&e | |
1341 | mov %r12d,24(%rsp) | |
1342 | ||
1343 | xor %r14d,%r13d # Sigma1(e) | |
1344 | xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1345 | add %ebx,%r12d # T1+=h | |
1346 | ||
1347 | mov %ecx,%ebx | |
1348 | add %r13d,%r12d # T1+=Sigma1(e) | |
1349 | ||
1350 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1351 | mov %ecx,%r13d | |
1352 | mov %ecx,%r14d | |
1353 | ||
1354 | ror $2,%ebx | |
1355 | ror $13,%r13d | |
1356 | mov %ecx,%r15d | |
1357 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1358 | ||
1359 | xor %r13d,%ebx | |
1360 | ror $9,%r13d | |
1361 | or %r8d,%r14d # a|c | |
1362 | ||
1363 | xor %r13d,%ebx # h=Sigma0(a) | |
1364 | and %r8d,%r15d # a&c | |
1365 | add %r12d,%r9d # d+=T1 | |
1366 | ||
1367 | and %edx,%r14d # (a|c)&b | |
1368 | add %r12d,%ebx # h+=T1 | |
1369 | ||
1370 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1371 | lea 1(%rdi),%rdi # round++ | |
1372 | ||
1373 | add %r14d,%ebx # h+=Maj(a,b,c) | |
1374 | mov 32(%rsp),%r13d | |
1375 | mov 20(%rsp),%r12d | |
1376 | ||
1377 | mov %r13d,%r15d | |
1378 | ||
1379 | shr $3,%r13d | |
1380 | ror $7,%r15d | |
1381 | ||
1382 | xor %r15d,%r13d | |
1383 | ror $11,%r15d | |
1384 | ||
1385 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1386 | mov %r12d,%r14d | |
1387 | ||
1388 | shr $10,%r12d | |
1389 | ror $17,%r14d | |
1390 | ||
1391 | xor %r14d,%r12d | |
1392 | ror $2,%r14d | |
1393 | ||
1394 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1395 | ||
1396 | add %r13d,%r12d | |
1397 | ||
1398 | add 0(%rsp),%r12d | |
1399 | ||
1400 | add 28(%rsp),%r12d | |
1401 | mov %r9d,%r13d | |
1402 | mov %r9d,%r14d | |
1403 | mov %r10d,%r15d | |
1404 | ||
1405 | ror $6,%r13d | |
1406 | ror $11,%r14d | |
1407 | xor %r11d,%r15d # f^g | |
1408 | ||
1409 | xor %r14d,%r13d | |
1410 | ror $14,%r14d | |
1411 | and %r9d,%r15d # (f^g)&e | |
1412 | mov %r12d,28(%rsp) | |
1413 | ||
1414 | xor %r14d,%r13d # Sigma1(e) | |
1415 | xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1416 | add %eax,%r12d # T1+=h | |
1417 | ||
1418 | mov %ebx,%eax | |
1419 | add %r13d,%r12d # T1+=Sigma1(e) | |
1420 | ||
1421 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1422 | mov %ebx,%r13d | |
1423 | mov %ebx,%r14d | |
1424 | ||
1425 | ror $2,%eax | |
1426 | ror $13,%r13d | |
1427 | mov %ebx,%r15d | |
1428 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1429 | ||
1430 | xor %r13d,%eax | |
1431 | ror $9,%r13d | |
1432 | or %edx,%r14d # a|c | |
1433 | ||
1434 | xor %r13d,%eax # h=Sigma0(a) | |
1435 | and %edx,%r15d # a&c | |
1436 | add %r12d,%r8d # d+=T1 | |
1437 | ||
1438 | and %ecx,%r14d # (a|c)&b | |
1439 | add %r12d,%eax # h+=T1 | |
1440 | ||
1441 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1442 | lea 1(%rdi),%rdi # round++ | |
1443 | ||
1444 | add %r14d,%eax # h+=Maj(a,b,c) | |
1445 | mov 36(%rsp),%r13d | |
1446 | mov 24(%rsp),%r12d | |
1447 | ||
1448 | mov %r13d,%r15d | |
1449 | ||
1450 | shr $3,%r13d | |
1451 | ror $7,%r15d | |
1452 | ||
1453 | xor %r15d,%r13d | |
1454 | ror $11,%r15d | |
1455 | ||
1456 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1457 | mov %r12d,%r14d | |
1458 | ||
1459 | shr $10,%r12d | |
1460 | ror $17,%r14d | |
1461 | ||
1462 | xor %r14d,%r12d | |
1463 | ror $2,%r14d | |
1464 | ||
1465 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1466 | ||
1467 | add %r13d,%r12d | |
1468 | ||
1469 | add 4(%rsp),%r12d | |
1470 | ||
1471 | add 32(%rsp),%r12d | |
1472 | mov %r8d,%r13d | |
1473 | mov %r8d,%r14d | |
1474 | mov %r9d,%r15d | |
1475 | ||
1476 | ror $6,%r13d | |
1477 | ror $11,%r14d | |
1478 | xor %r10d,%r15d # f^g | |
1479 | ||
1480 | xor %r14d,%r13d | |
1481 | ror $14,%r14d | |
1482 | and %r8d,%r15d # (f^g)&e | |
1483 | mov %r12d,32(%rsp) | |
1484 | ||
1485 | xor %r14d,%r13d # Sigma1(e) | |
1486 | xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1487 | add %r11d,%r12d # T1+=h | |
1488 | ||
1489 | mov %eax,%r11d | |
1490 | add %r13d,%r12d # T1+=Sigma1(e) | |
1491 | ||
1492 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1493 | mov %eax,%r13d | |
1494 | mov %eax,%r14d | |
1495 | ||
1496 | ror $2,%r11d | |
1497 | ror $13,%r13d | |
1498 | mov %eax,%r15d | |
1499 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1500 | ||
1501 | xor %r13d,%r11d | |
1502 | ror $9,%r13d | |
1503 | or %ecx,%r14d # a|c | |
1504 | ||
1505 | xor %r13d,%r11d # h=Sigma0(a) | |
1506 | and %ecx,%r15d # a&c | |
1507 | add %r12d,%edx # d+=T1 | |
1508 | ||
1509 | and %ebx,%r14d # (a|c)&b | |
1510 | add %r12d,%r11d # h+=T1 | |
1511 | ||
1512 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1513 | lea 1(%rdi),%rdi # round++ | |
1514 | ||
1515 | add %r14d,%r11d # h+=Maj(a,b,c) | |
1516 | mov 40(%rsp),%r13d | |
1517 | mov 28(%rsp),%r12d | |
1518 | ||
1519 | mov %r13d,%r15d | |
1520 | ||
1521 | shr $3,%r13d | |
1522 | ror $7,%r15d | |
1523 | ||
1524 | xor %r15d,%r13d | |
1525 | ror $11,%r15d | |
1526 | ||
1527 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1528 | mov %r12d,%r14d | |
1529 | ||
1530 | shr $10,%r12d | |
1531 | ror $17,%r14d | |
1532 | ||
1533 | xor %r14d,%r12d | |
1534 | ror $2,%r14d | |
1535 | ||
1536 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1537 | ||
1538 | add %r13d,%r12d | |
1539 | ||
1540 | add 8(%rsp),%r12d | |
1541 | ||
1542 | add 36(%rsp),%r12d | |
1543 | mov %edx,%r13d | |
1544 | mov %edx,%r14d | |
1545 | mov %r8d,%r15d | |
1546 | ||
1547 | ror $6,%r13d | |
1548 | ror $11,%r14d | |
1549 | xor %r9d,%r15d # f^g | |
1550 | ||
1551 | xor %r14d,%r13d | |
1552 | ror $14,%r14d | |
1553 | and %edx,%r15d # (f^g)&e | |
1554 | mov %r12d,36(%rsp) | |
1555 | ||
1556 | xor %r14d,%r13d # Sigma1(e) | |
1557 | xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1558 | add %r10d,%r12d # T1+=h | |
1559 | ||
1560 | mov %r11d,%r10d | |
1561 | add %r13d,%r12d # T1+=Sigma1(e) | |
1562 | ||
1563 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1564 | mov %r11d,%r13d | |
1565 | mov %r11d,%r14d | |
1566 | ||
1567 | ror $2,%r10d | |
1568 | ror $13,%r13d | |
1569 | mov %r11d,%r15d | |
1570 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1571 | ||
1572 | xor %r13d,%r10d | |
1573 | ror $9,%r13d | |
1574 | or %ebx,%r14d # a|c | |
1575 | ||
1576 | xor %r13d,%r10d # h=Sigma0(a) | |
1577 | and %ebx,%r15d # a&c | |
1578 | add %r12d,%ecx # d+=T1 | |
1579 | ||
1580 | and %eax,%r14d # (a|c)&b | |
1581 | add %r12d,%r10d # h+=T1 | |
1582 | ||
1583 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1584 | lea 1(%rdi),%rdi # round++ | |
1585 | ||
1586 | add %r14d,%r10d # h+=Maj(a,b,c) | |
1587 | mov 44(%rsp),%r13d | |
1588 | mov 32(%rsp),%r12d | |
1589 | ||
1590 | mov %r13d,%r15d | |
1591 | ||
1592 | shr $3,%r13d | |
1593 | ror $7,%r15d | |
1594 | ||
1595 | xor %r15d,%r13d | |
1596 | ror $11,%r15d | |
1597 | ||
1598 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1599 | mov %r12d,%r14d | |
1600 | ||
1601 | shr $10,%r12d | |
1602 | ror $17,%r14d | |
1603 | ||
1604 | xor %r14d,%r12d | |
1605 | ror $2,%r14d | |
1606 | ||
1607 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1608 | ||
1609 | add %r13d,%r12d | |
1610 | ||
1611 | add 12(%rsp),%r12d | |
1612 | ||
1613 | add 40(%rsp),%r12d | |
1614 | mov %ecx,%r13d | |
1615 | mov %ecx,%r14d | |
1616 | mov %edx,%r15d | |
1617 | ||
1618 | ror $6,%r13d | |
1619 | ror $11,%r14d | |
1620 | xor %r8d,%r15d # f^g | |
1621 | ||
1622 | xor %r14d,%r13d | |
1623 | ror $14,%r14d | |
1624 | and %ecx,%r15d # (f^g)&e | |
1625 | mov %r12d,40(%rsp) | |
1626 | ||
1627 | xor %r14d,%r13d # Sigma1(e) | |
1628 | xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1629 | add %r9d,%r12d # T1+=h | |
1630 | ||
1631 | mov %r10d,%r9d | |
1632 | add %r13d,%r12d # T1+=Sigma1(e) | |
1633 | ||
1634 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1635 | mov %r10d,%r13d | |
1636 | mov %r10d,%r14d | |
1637 | ||
1638 | ror $2,%r9d | |
1639 | ror $13,%r13d | |
1640 | mov %r10d,%r15d | |
1641 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1642 | ||
1643 | xor %r13d,%r9d | |
1644 | ror $9,%r13d | |
1645 | or %eax,%r14d # a|c | |
1646 | ||
1647 | xor %r13d,%r9d # h=Sigma0(a) | |
1648 | and %eax,%r15d # a&c | |
1649 | add %r12d,%ebx # d+=T1 | |
1650 | ||
1651 | and %r11d,%r14d # (a|c)&b | |
1652 | add %r12d,%r9d # h+=T1 | |
1653 | ||
1654 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1655 | lea 1(%rdi),%rdi # round++ | |
1656 | ||
1657 | add %r14d,%r9d # h+=Maj(a,b,c) | |
1658 | mov 48(%rsp),%r13d | |
1659 | mov 36(%rsp),%r12d | |
1660 | ||
1661 | mov %r13d,%r15d | |
1662 | ||
1663 | shr $3,%r13d | |
1664 | ror $7,%r15d | |
1665 | ||
1666 | xor %r15d,%r13d | |
1667 | ror $11,%r15d | |
1668 | ||
1669 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1670 | mov %r12d,%r14d | |
1671 | ||
1672 | shr $10,%r12d | |
1673 | ror $17,%r14d | |
1674 | ||
1675 | xor %r14d,%r12d | |
1676 | ror $2,%r14d | |
1677 | ||
1678 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1679 | ||
1680 | add %r13d,%r12d | |
1681 | ||
1682 | add 16(%rsp),%r12d | |
1683 | ||
1684 | add 44(%rsp),%r12d | |
1685 | mov %ebx,%r13d | |
1686 | mov %ebx,%r14d | |
1687 | mov %ecx,%r15d | |
1688 | ||
1689 | ror $6,%r13d | |
1690 | ror $11,%r14d | |
1691 | xor %edx,%r15d # f^g | |
1692 | ||
1693 | xor %r14d,%r13d | |
1694 | ror $14,%r14d | |
1695 | and %ebx,%r15d # (f^g)&e | |
1696 | mov %r12d,44(%rsp) | |
1697 | ||
1698 | xor %r14d,%r13d # Sigma1(e) | |
1699 | xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1700 | add %r8d,%r12d # T1+=h | |
1701 | ||
1702 | mov %r9d,%r8d | |
1703 | add %r13d,%r12d # T1+=Sigma1(e) | |
1704 | ||
1705 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1706 | mov %r9d,%r13d | |
1707 | mov %r9d,%r14d | |
1708 | ||
1709 | ror $2,%r8d | |
1710 | ror $13,%r13d | |
1711 | mov %r9d,%r15d | |
1712 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1713 | ||
1714 | xor %r13d,%r8d | |
1715 | ror $9,%r13d | |
1716 | or %r11d,%r14d # a|c | |
1717 | ||
1718 | xor %r13d,%r8d # h=Sigma0(a) | |
1719 | and %r11d,%r15d # a&c | |
1720 | add %r12d,%eax # d+=T1 | |
1721 | ||
1722 | and %r10d,%r14d # (a|c)&b | |
1723 | add %r12d,%r8d # h+=T1 | |
1724 | ||
1725 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1726 | lea 1(%rdi),%rdi # round++ | |
1727 | ||
1728 | add %r14d,%r8d # h+=Maj(a,b,c) | |
1729 | mov 52(%rsp),%r13d | |
1730 | mov 40(%rsp),%r12d | |
1731 | ||
1732 | mov %r13d,%r15d | |
1733 | ||
1734 | shr $3,%r13d | |
1735 | ror $7,%r15d | |
1736 | ||
1737 | xor %r15d,%r13d | |
1738 | ror $11,%r15d | |
1739 | ||
1740 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1741 | mov %r12d,%r14d | |
1742 | ||
1743 | shr $10,%r12d | |
1744 | ror $17,%r14d | |
1745 | ||
1746 | xor %r14d,%r12d | |
1747 | ror $2,%r14d | |
1748 | ||
1749 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1750 | ||
1751 | add %r13d,%r12d | |
1752 | ||
1753 | add 20(%rsp),%r12d | |
1754 | ||
1755 | add 48(%rsp),%r12d | |
1756 | mov %eax,%r13d | |
1757 | mov %eax,%r14d | |
1758 | mov %ebx,%r15d | |
1759 | ||
1760 | ror $6,%r13d | |
1761 | ror $11,%r14d | |
1762 | xor %ecx,%r15d # f^g | |
1763 | ||
1764 | xor %r14d,%r13d | |
1765 | ror $14,%r14d | |
1766 | and %eax,%r15d # (f^g)&e | |
1767 | mov %r12d,48(%rsp) | |
1768 | ||
1769 | xor %r14d,%r13d # Sigma1(e) | |
1770 | xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1771 | add %edx,%r12d # T1+=h | |
1772 | ||
1773 | mov %r8d,%edx | |
1774 | add %r13d,%r12d # T1+=Sigma1(e) | |
1775 | ||
1776 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1777 | mov %r8d,%r13d | |
1778 | mov %r8d,%r14d | |
1779 | ||
1780 | ror $2,%edx | |
1781 | ror $13,%r13d | |
1782 | mov %r8d,%r15d | |
1783 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1784 | ||
1785 | xor %r13d,%edx | |
1786 | ror $9,%r13d | |
1787 | or %r10d,%r14d # a|c | |
1788 | ||
1789 | xor %r13d,%edx # h=Sigma0(a) | |
1790 | and %r10d,%r15d # a&c | |
1791 | add %r12d,%r11d # d+=T1 | |
1792 | ||
1793 | and %r9d,%r14d # (a|c)&b | |
1794 | add %r12d,%edx # h+=T1 | |
1795 | ||
1796 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1797 | lea 1(%rdi),%rdi # round++ | |
1798 | ||
1799 | add %r14d,%edx # h+=Maj(a,b,c) | |
1800 | mov 56(%rsp),%r13d | |
1801 | mov 44(%rsp),%r12d | |
1802 | ||
1803 | mov %r13d,%r15d | |
1804 | ||
1805 | shr $3,%r13d | |
1806 | ror $7,%r15d | |
1807 | ||
1808 | xor %r15d,%r13d | |
1809 | ror $11,%r15d | |
1810 | ||
1811 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1812 | mov %r12d,%r14d | |
1813 | ||
1814 | shr $10,%r12d | |
1815 | ror $17,%r14d | |
1816 | ||
1817 | xor %r14d,%r12d | |
1818 | ror $2,%r14d | |
1819 | ||
1820 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1821 | ||
1822 | add %r13d,%r12d | |
1823 | ||
1824 | add 24(%rsp),%r12d | |
1825 | ||
1826 | add 52(%rsp),%r12d | |
1827 | mov %r11d,%r13d | |
1828 | mov %r11d,%r14d | |
1829 | mov %eax,%r15d | |
1830 | ||
1831 | ror $6,%r13d | |
1832 | ror $11,%r14d | |
1833 | xor %ebx,%r15d # f^g | |
1834 | ||
1835 | xor %r14d,%r13d | |
1836 | ror $14,%r14d | |
1837 | and %r11d,%r15d # (f^g)&e | |
1838 | mov %r12d,52(%rsp) | |
1839 | ||
1840 | xor %r14d,%r13d # Sigma1(e) | |
1841 | xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1842 | add %ecx,%r12d # T1+=h | |
1843 | ||
1844 | mov %edx,%ecx | |
1845 | add %r13d,%r12d # T1+=Sigma1(e) | |
1846 | ||
1847 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1848 | mov %edx,%r13d | |
1849 | mov %edx,%r14d | |
1850 | ||
1851 | ror $2,%ecx | |
1852 | ror $13,%r13d | |
1853 | mov %edx,%r15d | |
1854 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1855 | ||
1856 | xor %r13d,%ecx | |
1857 | ror $9,%r13d | |
1858 | or %r9d,%r14d # a|c | |
1859 | ||
1860 | xor %r13d,%ecx # h=Sigma0(a) | |
1861 | and %r9d,%r15d # a&c | |
1862 | add %r12d,%r10d # d+=T1 | |
1863 | ||
1864 | and %r8d,%r14d # (a|c)&b | |
1865 | add %r12d,%ecx # h+=T1 | |
1866 | ||
1867 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1868 | lea 1(%rdi),%rdi # round++ | |
1869 | ||
1870 | add %r14d,%ecx # h+=Maj(a,b,c) | |
1871 | mov 60(%rsp),%r13d | |
1872 | mov 48(%rsp),%r12d | |
1873 | ||
1874 | mov %r13d,%r15d | |
1875 | ||
1876 | shr $3,%r13d | |
1877 | ror $7,%r15d | |
1878 | ||
1879 | xor %r15d,%r13d | |
1880 | ror $11,%r15d | |
1881 | ||
1882 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1883 | mov %r12d,%r14d | |
1884 | ||
1885 | shr $10,%r12d | |
1886 | ror $17,%r14d | |
1887 | ||
1888 | xor %r14d,%r12d | |
1889 | ror $2,%r14d | |
1890 | ||
1891 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1892 | ||
1893 | add %r13d,%r12d | |
1894 | ||
1895 | add 28(%rsp),%r12d | |
1896 | ||
1897 | add 56(%rsp),%r12d | |
1898 | mov %r10d,%r13d | |
1899 | mov %r10d,%r14d | |
1900 | mov %r11d,%r15d | |
1901 | ||
1902 | ror $6,%r13d | |
1903 | ror $11,%r14d | |
1904 | xor %eax,%r15d # f^g | |
1905 | ||
1906 | xor %r14d,%r13d | |
1907 | ror $14,%r14d | |
1908 | and %r10d,%r15d # (f^g)&e | |
1909 | mov %r12d,56(%rsp) | |
1910 | ||
1911 | xor %r14d,%r13d # Sigma1(e) | |
1912 | xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1913 | add %ebx,%r12d # T1+=h | |
1914 | ||
1915 | mov %ecx,%ebx | |
1916 | add %r13d,%r12d # T1+=Sigma1(e) | |
1917 | ||
1918 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1919 | mov %ecx,%r13d | |
1920 | mov %ecx,%r14d | |
1921 | ||
1922 | ror $2,%ebx | |
1923 | ror $13,%r13d | |
1924 | mov %ecx,%r15d | |
1925 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1926 | ||
1927 | xor %r13d,%ebx | |
1928 | ror $9,%r13d | |
1929 | or %r8d,%r14d # a|c | |
1930 | ||
1931 | xor %r13d,%ebx # h=Sigma0(a) | |
1932 | and %r8d,%r15d # a&c | |
1933 | add %r12d,%r9d # d+=T1 | |
1934 | ||
1935 | and %edx,%r14d # (a|c)&b | |
1936 | add %r12d,%ebx # h+=T1 | |
1937 | ||
1938 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
1939 | lea 1(%rdi),%rdi # round++ | |
1940 | ||
1941 | add %r14d,%ebx # h+=Maj(a,b,c) | |
1942 | mov 0(%rsp),%r13d | |
1943 | mov 52(%rsp),%r12d | |
1944 | ||
1945 | mov %r13d,%r15d | |
1946 | ||
1947 | shr $3,%r13d | |
1948 | ror $7,%r15d | |
1949 | ||
1950 | xor %r15d,%r13d | |
1951 | ror $11,%r15d | |
1952 | ||
1953 | xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) | |
1954 | mov %r12d,%r14d | |
1955 | ||
1956 | shr $10,%r12d | |
1957 | ror $17,%r14d | |
1958 | ||
1959 | xor %r14d,%r12d | |
1960 | ror $2,%r14d | |
1961 | ||
1962 | xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) | |
1963 | ||
1964 | add %r13d,%r12d | |
1965 | ||
1966 | add 32(%rsp),%r12d | |
1967 | ||
1968 | add 60(%rsp),%r12d | |
1969 | mov %r9d,%r13d | |
1970 | mov %r9d,%r14d | |
1971 | mov %r10d,%r15d | |
1972 | ||
1973 | ror $6,%r13d | |
1974 | ror $11,%r14d | |
1975 | xor %r11d,%r15d # f^g | |
1976 | ||
1977 | xor %r14d,%r13d | |
1978 | ror $14,%r14d | |
1979 | and %r9d,%r15d # (f^g)&e | |
1980 | mov %r12d,60(%rsp) | |
1981 | ||
1982 | xor %r14d,%r13d # Sigma1(e) | |
1983 | xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g | |
1984 | add %eax,%r12d # T1+=h | |
1985 | ||
1986 | mov %ebx,%eax | |
1987 | add %r13d,%r12d # T1+=Sigma1(e) | |
1988 | ||
1989 | add %r15d,%r12d # T1+=Ch(e,f,g) | |
1990 | mov %ebx,%r13d | |
1991 | mov %ebx,%r14d | |
1992 | ||
1993 | ror $2,%eax | |
1994 | ror $13,%r13d | |
1995 | mov %ebx,%r15d | |
1996 | add (%rbp,%rdi,4),%r12d # T1+=K[round] | |
1997 | ||
1998 | xor %r13d,%eax | |
1999 | ror $9,%r13d | |
2000 | or %edx,%r14d # a|c | |
2001 | ||
2002 | xor %r13d,%eax # h=Sigma0(a) | |
2003 | and %edx,%r15d # a&c | |
2004 | add %r12d,%r8d # d+=T1 | |
2005 | ||
2006 | and %ecx,%r14d # (a|c)&b | |
2007 | add %r12d,%eax # h+=T1 | |
2008 | ||
2009 | or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) | |
2010 | lea 1(%rdi),%rdi # round++ | |
2011 | ||
2012 | add %r14d,%eax # h+=Maj(a,b,c) | |
2013 | cmp $64,%rdi | |
2014 | jb .Lrounds_16_xx | |
2015 | ||
2016 | mov 16*4+0*8(%rsp),%rdi | |
2017 | lea 16*4(%rsi),%rsi | |
2018 | ||
2019 | add 4*0(%rdi),%eax | |
2020 | add 4*1(%rdi),%ebx | |
2021 | add 4*2(%rdi),%ecx | |
2022 | add 4*3(%rdi),%edx | |
2023 | add 4*4(%rdi),%r8d | |
2024 | add 4*5(%rdi),%r9d | |
2025 | add 4*6(%rdi),%r10d | |
2026 | add 4*7(%rdi),%r11d | |
2027 | ||
2028 | cmp 16*4+2*8(%rsp),%rsi | |
2029 | ||
2030 | mov %eax,4*0(%rdi) | |
2031 | mov %ebx,4*1(%rdi) | |
2032 | mov %ecx,4*2(%rdi) | |
2033 | mov %edx,4*3(%rdi) | |
2034 | mov %r8d,4*4(%rdi) | |
2035 | mov %r9d,4*5(%rdi) | |
2036 | mov %r10d,4*6(%rdi) | |
2037 | mov %r11d,4*7(%rdi) | |
2038 | jb .Lloop | |
2039 | ||
2040 | mov 16*4+3*8(%rsp),%rsp | |
a9c93ac5 | 2041 | .cfi_def_cfa %rsp,56 |
0b04990a | 2042 | pop %r15 |
a9c93ac5 AF |
2043 | .cfi_adjust_cfa_offset -8 |
2044 | .cfi_restore %r15 | |
0b04990a | 2045 | pop %r14 |
a9c93ac5 AF |
2046 | .cfi_adjust_cfa_offset -8 |
2047 | .cfi_restore %r14 | |
0b04990a | 2048 | pop %r13 |
a9c93ac5 AF |
2049 | .cfi_adjust_cfa_offset -8 |
2050 | .cfi_restore %r13 | |
0b04990a | 2051 | pop %r12 |
a9c93ac5 AF |
2052 | .cfi_adjust_cfa_offset -8 |
2053 | .cfi_restore %r12 | |
0b04990a | 2054 | pop %rbp |
a9c93ac5 AF |
2055 | .cfi_adjust_cfa_offset -8 |
2056 | .cfi_restore %rbp | |
0b04990a | 2057 | pop %rbx |
a9c93ac5 AF |
2058 | .cfi_adjust_cfa_offset -8 |
2059 | .cfi_restore %rbx | |
0b04990a TC |
2060 | |
2061 | ret | |
a9c93ac5 | 2062 | .cfi_endproc |
0b04990a TC |
2063 | SET_SIZE(SHA256TransformBlocks) |
2064 | ||
18168da7 | 2065 | .section .rodata |
0b04990a TC |
2066 | .align 64 |
2067 | .type K256,@object | |
2068 | K256: | |
2069 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | |
2070 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | |
2071 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | |
2072 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | |
2073 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | |
2074 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | |
2075 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | |
2076 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | |
2077 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | |
2078 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | |
2079 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | |
2080 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | |
2081 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | |
2082 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | |
2083 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | |
2084 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | |
2085 | #endif /* !lint && !__lint */ | |
a3600a10 JZ |
2086 | |
2087 | #ifdef __ELF__ | |
2088 | .section .note.GNU-stack,"",%progbits | |
2089 | #endif |