]> git.proxmox.com Git - mirror_zfs.git/blob - module/icp/asm-x86_64/sha2/sha512_impl.S
module/*.ko: prune .data, global .rodata
[mirror_zfs.git] / module / icp / asm-x86_64 / sha2 / sha512_impl.S
1 /*
2 * ====================================================================
3 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4 * project. Rights for redistribution and usage in source and binary
5 * forms are granted according to the OpenSSL license.
6 * ====================================================================
7 *
8 * sha256/512_block procedure for x86_64.
9 *
10 * 40% improvement over compiler-generated code on Opteron. On EM64T
11 * sha256 was observed to run >80% faster and sha512 - >40%. No magical
12 * tricks, just straight implementation... I really wonder why gcc
13 * [being armed with inline assembler] fails to generate as fast code.
14 * The only thing which is cool about this module is that it's very
15 * same instruction sequence used for both SHA-256 and SHA-512. In
16 * former case the instructions operate on 32-bit operands, while in
17 * latter - on 64-bit ones. All I had to do is to get one flavor right,
18 * the other one passed the test right away:-)
19 *
20 * sha256_block runs in ~1005 cycles on Opteron, which gives you
21 * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
22 * frequency in GHz. sha512_block runs in ~1275 cycles, which results
23 * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
24 * Well, if you compare it to IA-64 implementation, which maintains
25 * X[16] in register bank[!], tends to 4 instructions per CPU clock
26 * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
27 * issue Opteron pipeline and X[16] maintained in memory. So that *if*
28 * there is a way to improve it, *then* the only way would be to try to
29 * offload X[16] updates to SSE unit, but that would require "deeper"
30 * loop unroll, which in turn would naturally cause size blow-up, not
31 * to mention increased complexity! And once again, only *if* it's
32 * actually possible to noticeably improve overall ILP, instruction
33 * level parallelism, on a given CPU implementation in this case.
34 *
35 * Special note on Intel EM64T. While Opteron CPU exhibits perfect
36 * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
37 * [currently available] EM64T CPUs apparently are far from it. On the
38 * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
39 * sha256_block:-( This is presumably because 64-bit shifts/rotates
40 * apparently are not atomic instructions, but implemented in microcode.
41 */
42
43 /*
44 * OpenSolaris OS modifications
45 *
46 * Sun elects to use this software under the BSD license.
47 *
48 * This source originates from OpenSSL file sha512-x86_64.pl at
49 * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
50 * (presumably for future OpenSSL release 0.9.8h), with these changes:
51 *
52 * 1. Added perl "use strict" and declared variables.
53 *
54 * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
55 * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
56 *
57 * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
58 * assemblers). Replaced the .picmeup macro with assembler code.
59 *
60 * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
61 * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
62 */
63
64 /*
65 * This file was generated by a perl script (sha512-x86_64.pl) that were
66 * used to generate sha256 and sha512 variants from the same code base.
67 * The comments from the original file have been pasted above.
68 */
69
70
71 #if defined(lint) || defined(__lint)
72 #include <sys/stdint.h>
73 #include <sha2/sha2.h>
74
75 void
76 SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
77 {
78 (void) ctx, (void) in, (void) num;
79 }
80
81
82 #else
83 #define _ASM
84 #include <sys/asm_linkage.h>
85
86 ENTRY_NP(SHA512TransformBlocks)
87 .cfi_startproc
88 movq %rsp, %rax
89 .cfi_def_cfa_register %rax
90 push %rbx
91 .cfi_offset %rbx,-16
92 push %rbp
93 .cfi_offset %rbp,-24
94 push %r12
95 .cfi_offset %r12,-32
96 push %r13
97 .cfi_offset %r13,-40
98 push %r14
99 .cfi_offset %r14,-48
100 push %r15
101 .cfi_offset %r15,-56
102 mov %rsp,%rbp # copy %rsp
103 shl $4,%rdx # num*16
104 sub $16*8+4*8,%rsp
105 lea (%rsi,%rdx,8),%rdx # inp+num*16*8
106 and $-64,%rsp # align stack frame
107 add $8,%rdi # Skip OpenSolaris field, "algotype"
108 mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg
109 mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg
110 mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg
111 mov %rbp,16*8+3*8(%rsp) # save copy of %rsp
112 # echo ".cfi_cfa_expression %rsp+152,deref,+56" |
113 # openssl/crypto/perlasm/x86_64-xlate.pl
114 .cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
115
116 #.picmeup %rbp
117 # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
118 # the address of the "next" instruction into the target register
119 # (%rbp). This generates these 2 instructions:
120 lea .Llea(%rip),%rbp
121 #nop # .picmeup generates a nop for mod 8 alignment--not needed here
122
123 .Llea:
124 lea K512-.(%rbp),%rbp
125
126 mov 8*0(%rdi),%rax
127 mov 8*1(%rdi),%rbx
128 mov 8*2(%rdi),%rcx
129 mov 8*3(%rdi),%rdx
130 mov 8*4(%rdi),%r8
131 mov 8*5(%rdi),%r9
132 mov 8*6(%rdi),%r10
133 mov 8*7(%rdi),%r11
134 jmp .Lloop
135
136 .align 16
137 .Lloop:
138 xor %rdi,%rdi
139 mov 8*0(%rsi),%r12
140 bswap %r12
141 mov %r8,%r13
142 mov %r8,%r14
143 mov %r9,%r15
144
145 ror $14,%r13
146 ror $18,%r14
147 xor %r10,%r15 # f^g
148
149 xor %r14,%r13
150 ror $23,%r14
151 and %r8,%r15 # (f^g)&e
152 mov %r12,0(%rsp)
153
154 xor %r14,%r13 # Sigma1(e)
155 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
156 add %r11,%r12 # T1+=h
157
158 mov %rax,%r11
159 add %r13,%r12 # T1+=Sigma1(e)
160
161 add %r15,%r12 # T1+=Ch(e,f,g)
162 mov %rax,%r13
163 mov %rax,%r14
164
165 ror $28,%r11
166 ror $34,%r13
167 mov %rax,%r15
168 add (%rbp,%rdi,8),%r12 # T1+=K[round]
169
170 xor %r13,%r11
171 ror $5,%r13
172 or %rcx,%r14 # a|c
173
174 xor %r13,%r11 # h=Sigma0(a)
175 and %rcx,%r15 # a&c
176 add %r12,%rdx # d+=T1
177
178 and %rbx,%r14 # (a|c)&b
179 add %r12,%r11 # h+=T1
180
181 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
182 lea 1(%rdi),%rdi # round++
183
184 add %r14,%r11 # h+=Maj(a,b,c)
185 mov 8*1(%rsi),%r12
186 bswap %r12
187 mov %rdx,%r13
188 mov %rdx,%r14
189 mov %r8,%r15
190
191 ror $14,%r13
192 ror $18,%r14
193 xor %r9,%r15 # f^g
194
195 xor %r14,%r13
196 ror $23,%r14
197 and %rdx,%r15 # (f^g)&e
198 mov %r12,8(%rsp)
199
200 xor %r14,%r13 # Sigma1(e)
201 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
202 add %r10,%r12 # T1+=h
203
204 mov %r11,%r10
205 add %r13,%r12 # T1+=Sigma1(e)
206
207 add %r15,%r12 # T1+=Ch(e,f,g)
208 mov %r11,%r13
209 mov %r11,%r14
210
211 ror $28,%r10
212 ror $34,%r13
213 mov %r11,%r15
214 add (%rbp,%rdi,8),%r12 # T1+=K[round]
215
216 xor %r13,%r10
217 ror $5,%r13
218 or %rbx,%r14 # a|c
219
220 xor %r13,%r10 # h=Sigma0(a)
221 and %rbx,%r15 # a&c
222 add %r12,%rcx # d+=T1
223
224 and %rax,%r14 # (a|c)&b
225 add %r12,%r10 # h+=T1
226
227 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
228 lea 1(%rdi),%rdi # round++
229
230 add %r14,%r10 # h+=Maj(a,b,c)
231 mov 8*2(%rsi),%r12
232 bswap %r12
233 mov %rcx,%r13
234 mov %rcx,%r14
235 mov %rdx,%r15
236
237 ror $14,%r13
238 ror $18,%r14
239 xor %r8,%r15 # f^g
240
241 xor %r14,%r13
242 ror $23,%r14
243 and %rcx,%r15 # (f^g)&e
244 mov %r12,16(%rsp)
245
246 xor %r14,%r13 # Sigma1(e)
247 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
248 add %r9,%r12 # T1+=h
249
250 mov %r10,%r9
251 add %r13,%r12 # T1+=Sigma1(e)
252
253 add %r15,%r12 # T1+=Ch(e,f,g)
254 mov %r10,%r13
255 mov %r10,%r14
256
257 ror $28,%r9
258 ror $34,%r13
259 mov %r10,%r15
260 add (%rbp,%rdi,8),%r12 # T1+=K[round]
261
262 xor %r13,%r9
263 ror $5,%r13
264 or %rax,%r14 # a|c
265
266 xor %r13,%r9 # h=Sigma0(a)
267 and %rax,%r15 # a&c
268 add %r12,%rbx # d+=T1
269
270 and %r11,%r14 # (a|c)&b
271 add %r12,%r9 # h+=T1
272
273 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
274 lea 1(%rdi),%rdi # round++
275
276 add %r14,%r9 # h+=Maj(a,b,c)
277 mov 8*3(%rsi),%r12
278 bswap %r12
279 mov %rbx,%r13
280 mov %rbx,%r14
281 mov %rcx,%r15
282
283 ror $14,%r13
284 ror $18,%r14
285 xor %rdx,%r15 # f^g
286
287 xor %r14,%r13
288 ror $23,%r14
289 and %rbx,%r15 # (f^g)&e
290 mov %r12,24(%rsp)
291
292 xor %r14,%r13 # Sigma1(e)
293 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
294 add %r8,%r12 # T1+=h
295
296 mov %r9,%r8
297 add %r13,%r12 # T1+=Sigma1(e)
298
299 add %r15,%r12 # T1+=Ch(e,f,g)
300 mov %r9,%r13
301 mov %r9,%r14
302
303 ror $28,%r8
304 ror $34,%r13
305 mov %r9,%r15
306 add (%rbp,%rdi,8),%r12 # T1+=K[round]
307
308 xor %r13,%r8
309 ror $5,%r13
310 or %r11,%r14 # a|c
311
312 xor %r13,%r8 # h=Sigma0(a)
313 and %r11,%r15 # a&c
314 add %r12,%rax # d+=T1
315
316 and %r10,%r14 # (a|c)&b
317 add %r12,%r8 # h+=T1
318
319 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
320 lea 1(%rdi),%rdi # round++
321
322 add %r14,%r8 # h+=Maj(a,b,c)
323 mov 8*4(%rsi),%r12
324 bswap %r12
325 mov %rax,%r13
326 mov %rax,%r14
327 mov %rbx,%r15
328
329 ror $14,%r13
330 ror $18,%r14
331 xor %rcx,%r15 # f^g
332
333 xor %r14,%r13
334 ror $23,%r14
335 and %rax,%r15 # (f^g)&e
336 mov %r12,32(%rsp)
337
338 xor %r14,%r13 # Sigma1(e)
339 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
340 add %rdx,%r12 # T1+=h
341
342 mov %r8,%rdx
343 add %r13,%r12 # T1+=Sigma1(e)
344
345 add %r15,%r12 # T1+=Ch(e,f,g)
346 mov %r8,%r13
347 mov %r8,%r14
348
349 ror $28,%rdx
350 ror $34,%r13
351 mov %r8,%r15
352 add (%rbp,%rdi,8),%r12 # T1+=K[round]
353
354 xor %r13,%rdx
355 ror $5,%r13
356 or %r10,%r14 # a|c
357
358 xor %r13,%rdx # h=Sigma0(a)
359 and %r10,%r15 # a&c
360 add %r12,%r11 # d+=T1
361
362 and %r9,%r14 # (a|c)&b
363 add %r12,%rdx # h+=T1
364
365 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
366 lea 1(%rdi),%rdi # round++
367
368 add %r14,%rdx # h+=Maj(a,b,c)
369 mov 8*5(%rsi),%r12
370 bswap %r12
371 mov %r11,%r13
372 mov %r11,%r14
373 mov %rax,%r15
374
375 ror $14,%r13
376 ror $18,%r14
377 xor %rbx,%r15 # f^g
378
379 xor %r14,%r13
380 ror $23,%r14
381 and %r11,%r15 # (f^g)&e
382 mov %r12,40(%rsp)
383
384 xor %r14,%r13 # Sigma1(e)
385 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
386 add %rcx,%r12 # T1+=h
387
388 mov %rdx,%rcx
389 add %r13,%r12 # T1+=Sigma1(e)
390
391 add %r15,%r12 # T1+=Ch(e,f,g)
392 mov %rdx,%r13
393 mov %rdx,%r14
394
395 ror $28,%rcx
396 ror $34,%r13
397 mov %rdx,%r15
398 add (%rbp,%rdi,8),%r12 # T1+=K[round]
399
400 xor %r13,%rcx
401 ror $5,%r13
402 or %r9,%r14 # a|c
403
404 xor %r13,%rcx # h=Sigma0(a)
405 and %r9,%r15 # a&c
406 add %r12,%r10 # d+=T1
407
408 and %r8,%r14 # (a|c)&b
409 add %r12,%rcx # h+=T1
410
411 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
412 lea 1(%rdi),%rdi # round++
413
414 add %r14,%rcx # h+=Maj(a,b,c)
415 mov 8*6(%rsi),%r12
416 bswap %r12
417 mov %r10,%r13
418 mov %r10,%r14
419 mov %r11,%r15
420
421 ror $14,%r13
422 ror $18,%r14
423 xor %rax,%r15 # f^g
424
425 xor %r14,%r13
426 ror $23,%r14
427 and %r10,%r15 # (f^g)&e
428 mov %r12,48(%rsp)
429
430 xor %r14,%r13 # Sigma1(e)
431 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
432 add %rbx,%r12 # T1+=h
433
434 mov %rcx,%rbx
435 add %r13,%r12 # T1+=Sigma1(e)
436
437 add %r15,%r12 # T1+=Ch(e,f,g)
438 mov %rcx,%r13
439 mov %rcx,%r14
440
441 ror $28,%rbx
442 ror $34,%r13
443 mov %rcx,%r15
444 add (%rbp,%rdi,8),%r12 # T1+=K[round]
445
446 xor %r13,%rbx
447 ror $5,%r13
448 or %r8,%r14 # a|c
449
450 xor %r13,%rbx # h=Sigma0(a)
451 and %r8,%r15 # a&c
452 add %r12,%r9 # d+=T1
453
454 and %rdx,%r14 # (a|c)&b
455 add %r12,%rbx # h+=T1
456
457 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
458 lea 1(%rdi),%rdi # round++
459
460 add %r14,%rbx # h+=Maj(a,b,c)
461 mov 8*7(%rsi),%r12
462 bswap %r12
463 mov %r9,%r13
464 mov %r9,%r14
465 mov %r10,%r15
466
467 ror $14,%r13
468 ror $18,%r14
469 xor %r11,%r15 # f^g
470
471 xor %r14,%r13
472 ror $23,%r14
473 and %r9,%r15 # (f^g)&e
474 mov %r12,56(%rsp)
475
476 xor %r14,%r13 # Sigma1(e)
477 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
478 add %rax,%r12 # T1+=h
479
480 mov %rbx,%rax
481 add %r13,%r12 # T1+=Sigma1(e)
482
483 add %r15,%r12 # T1+=Ch(e,f,g)
484 mov %rbx,%r13
485 mov %rbx,%r14
486
487 ror $28,%rax
488 ror $34,%r13
489 mov %rbx,%r15
490 add (%rbp,%rdi,8),%r12 # T1+=K[round]
491
492 xor %r13,%rax
493 ror $5,%r13
494 or %rdx,%r14 # a|c
495
496 xor %r13,%rax # h=Sigma0(a)
497 and %rdx,%r15 # a&c
498 add %r12,%r8 # d+=T1
499
500 and %rcx,%r14 # (a|c)&b
501 add %r12,%rax # h+=T1
502
503 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
504 lea 1(%rdi),%rdi # round++
505
506 add %r14,%rax # h+=Maj(a,b,c)
507 mov 8*8(%rsi),%r12
508 bswap %r12
509 mov %r8,%r13
510 mov %r8,%r14
511 mov %r9,%r15
512
513 ror $14,%r13
514 ror $18,%r14
515 xor %r10,%r15 # f^g
516
517 xor %r14,%r13
518 ror $23,%r14
519 and %r8,%r15 # (f^g)&e
520 mov %r12,64(%rsp)
521
522 xor %r14,%r13 # Sigma1(e)
523 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
524 add %r11,%r12 # T1+=h
525
526 mov %rax,%r11
527 add %r13,%r12 # T1+=Sigma1(e)
528
529 add %r15,%r12 # T1+=Ch(e,f,g)
530 mov %rax,%r13
531 mov %rax,%r14
532
533 ror $28,%r11
534 ror $34,%r13
535 mov %rax,%r15
536 add (%rbp,%rdi,8),%r12 # T1+=K[round]
537
538 xor %r13,%r11
539 ror $5,%r13
540 or %rcx,%r14 # a|c
541
542 xor %r13,%r11 # h=Sigma0(a)
543 and %rcx,%r15 # a&c
544 add %r12,%rdx # d+=T1
545
546 and %rbx,%r14 # (a|c)&b
547 add %r12,%r11 # h+=T1
548
549 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
550 lea 1(%rdi),%rdi # round++
551
552 add %r14,%r11 # h+=Maj(a,b,c)
553 mov 8*9(%rsi),%r12
554 bswap %r12
555 mov %rdx,%r13
556 mov %rdx,%r14
557 mov %r8,%r15
558
559 ror $14,%r13
560 ror $18,%r14
561 xor %r9,%r15 # f^g
562
563 xor %r14,%r13
564 ror $23,%r14
565 and %rdx,%r15 # (f^g)&e
566 mov %r12,72(%rsp)
567
568 xor %r14,%r13 # Sigma1(e)
569 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
570 add %r10,%r12 # T1+=h
571
572 mov %r11,%r10
573 add %r13,%r12 # T1+=Sigma1(e)
574
575 add %r15,%r12 # T1+=Ch(e,f,g)
576 mov %r11,%r13
577 mov %r11,%r14
578
579 ror $28,%r10
580 ror $34,%r13
581 mov %r11,%r15
582 add (%rbp,%rdi,8),%r12 # T1+=K[round]
583
584 xor %r13,%r10
585 ror $5,%r13
586 or %rbx,%r14 # a|c
587
588 xor %r13,%r10 # h=Sigma0(a)
589 and %rbx,%r15 # a&c
590 add %r12,%rcx # d+=T1
591
592 and %rax,%r14 # (a|c)&b
593 add %r12,%r10 # h+=T1
594
595 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
596 lea 1(%rdi),%rdi # round++
597
598 add %r14,%r10 # h+=Maj(a,b,c)
599 mov 8*10(%rsi),%r12
600 bswap %r12
601 mov %rcx,%r13
602 mov %rcx,%r14
603 mov %rdx,%r15
604
605 ror $14,%r13
606 ror $18,%r14
607 xor %r8,%r15 # f^g
608
609 xor %r14,%r13
610 ror $23,%r14
611 and %rcx,%r15 # (f^g)&e
612 mov %r12,80(%rsp)
613
614 xor %r14,%r13 # Sigma1(e)
615 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
616 add %r9,%r12 # T1+=h
617
618 mov %r10,%r9
619 add %r13,%r12 # T1+=Sigma1(e)
620
621 add %r15,%r12 # T1+=Ch(e,f,g)
622 mov %r10,%r13
623 mov %r10,%r14
624
625 ror $28,%r9
626 ror $34,%r13
627 mov %r10,%r15
628 add (%rbp,%rdi,8),%r12 # T1+=K[round]
629
630 xor %r13,%r9
631 ror $5,%r13
632 or %rax,%r14 # a|c
633
634 xor %r13,%r9 # h=Sigma0(a)
635 and %rax,%r15 # a&c
636 add %r12,%rbx # d+=T1
637
638 and %r11,%r14 # (a|c)&b
639 add %r12,%r9 # h+=T1
640
641 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
642 lea 1(%rdi),%rdi # round++
643
644 add %r14,%r9 # h+=Maj(a,b,c)
645 mov 8*11(%rsi),%r12
646 bswap %r12
647 mov %rbx,%r13
648 mov %rbx,%r14
649 mov %rcx,%r15
650
651 ror $14,%r13
652 ror $18,%r14
653 xor %rdx,%r15 # f^g
654
655 xor %r14,%r13
656 ror $23,%r14
657 and %rbx,%r15 # (f^g)&e
658 mov %r12,88(%rsp)
659
660 xor %r14,%r13 # Sigma1(e)
661 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
662 add %r8,%r12 # T1+=h
663
664 mov %r9,%r8
665 add %r13,%r12 # T1+=Sigma1(e)
666
667 add %r15,%r12 # T1+=Ch(e,f,g)
668 mov %r9,%r13
669 mov %r9,%r14
670
671 ror $28,%r8
672 ror $34,%r13
673 mov %r9,%r15
674 add (%rbp,%rdi,8),%r12 # T1+=K[round]
675
676 xor %r13,%r8
677 ror $5,%r13
678 or %r11,%r14 # a|c
679
680 xor %r13,%r8 # h=Sigma0(a)
681 and %r11,%r15 # a&c
682 add %r12,%rax # d+=T1
683
684 and %r10,%r14 # (a|c)&b
685 add %r12,%r8 # h+=T1
686
687 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
688 lea 1(%rdi),%rdi # round++
689
690 add %r14,%r8 # h+=Maj(a,b,c)
691 mov 8*12(%rsi),%r12
692 bswap %r12
693 mov %rax,%r13
694 mov %rax,%r14
695 mov %rbx,%r15
696
697 ror $14,%r13
698 ror $18,%r14
699 xor %rcx,%r15 # f^g
700
701 xor %r14,%r13
702 ror $23,%r14
703 and %rax,%r15 # (f^g)&e
704 mov %r12,96(%rsp)
705
706 xor %r14,%r13 # Sigma1(e)
707 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
708 add %rdx,%r12 # T1+=h
709
710 mov %r8,%rdx
711 add %r13,%r12 # T1+=Sigma1(e)
712
713 add %r15,%r12 # T1+=Ch(e,f,g)
714 mov %r8,%r13
715 mov %r8,%r14
716
717 ror $28,%rdx
718 ror $34,%r13
719 mov %r8,%r15
720 add (%rbp,%rdi,8),%r12 # T1+=K[round]
721
722 xor %r13,%rdx
723 ror $5,%r13
724 or %r10,%r14 # a|c
725
726 xor %r13,%rdx # h=Sigma0(a)
727 and %r10,%r15 # a&c
728 add %r12,%r11 # d+=T1
729
730 and %r9,%r14 # (a|c)&b
731 add %r12,%rdx # h+=T1
732
733 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
734 lea 1(%rdi),%rdi # round++
735
736 add %r14,%rdx # h+=Maj(a,b,c)
737 mov 8*13(%rsi),%r12
738 bswap %r12
739 mov %r11,%r13
740 mov %r11,%r14
741 mov %rax,%r15
742
743 ror $14,%r13
744 ror $18,%r14
745 xor %rbx,%r15 # f^g
746
747 xor %r14,%r13
748 ror $23,%r14
749 and %r11,%r15 # (f^g)&e
750 mov %r12,104(%rsp)
751
752 xor %r14,%r13 # Sigma1(e)
753 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
754 add %rcx,%r12 # T1+=h
755
756 mov %rdx,%rcx
757 add %r13,%r12 # T1+=Sigma1(e)
758
759 add %r15,%r12 # T1+=Ch(e,f,g)
760 mov %rdx,%r13
761 mov %rdx,%r14
762
763 ror $28,%rcx
764 ror $34,%r13
765 mov %rdx,%r15
766 add (%rbp,%rdi,8),%r12 # T1+=K[round]
767
768 xor %r13,%rcx
769 ror $5,%r13
770 or %r9,%r14 # a|c
771
772 xor %r13,%rcx # h=Sigma0(a)
773 and %r9,%r15 # a&c
774 add %r12,%r10 # d+=T1
775
776 and %r8,%r14 # (a|c)&b
777 add %r12,%rcx # h+=T1
778
779 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
780 lea 1(%rdi),%rdi # round++
781
782 add %r14,%rcx # h+=Maj(a,b,c)
783 mov 8*14(%rsi),%r12
784 bswap %r12
785 mov %r10,%r13
786 mov %r10,%r14
787 mov %r11,%r15
788
789 ror $14,%r13
790 ror $18,%r14
791 xor %rax,%r15 # f^g
792
793 xor %r14,%r13
794 ror $23,%r14
795 and %r10,%r15 # (f^g)&e
796 mov %r12,112(%rsp)
797
798 xor %r14,%r13 # Sigma1(e)
799 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
800 add %rbx,%r12 # T1+=h
801
802 mov %rcx,%rbx
803 add %r13,%r12 # T1+=Sigma1(e)
804
805 add %r15,%r12 # T1+=Ch(e,f,g)
806 mov %rcx,%r13
807 mov %rcx,%r14
808
809 ror $28,%rbx
810 ror $34,%r13
811 mov %rcx,%r15
812 add (%rbp,%rdi,8),%r12 # T1+=K[round]
813
814 xor %r13,%rbx
815 ror $5,%r13
816 or %r8,%r14 # a|c
817
818 xor %r13,%rbx # h=Sigma0(a)
819 and %r8,%r15 # a&c
820 add %r12,%r9 # d+=T1
821
822 and %rdx,%r14 # (a|c)&b
823 add %r12,%rbx # h+=T1
824
825 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
826 lea 1(%rdi),%rdi # round++
827
828 add %r14,%rbx # h+=Maj(a,b,c)
829 mov 8*15(%rsi),%r12
830 bswap %r12
831 mov %r9,%r13
832 mov %r9,%r14
833 mov %r10,%r15
834
835 ror $14,%r13
836 ror $18,%r14
837 xor %r11,%r15 # f^g
838
839 xor %r14,%r13
840 ror $23,%r14
841 and %r9,%r15 # (f^g)&e
842 mov %r12,120(%rsp)
843
844 xor %r14,%r13 # Sigma1(e)
845 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
846 add %rax,%r12 # T1+=h
847
848 mov %rbx,%rax
849 add %r13,%r12 # T1+=Sigma1(e)
850
851 add %r15,%r12 # T1+=Ch(e,f,g)
852 mov %rbx,%r13
853 mov %rbx,%r14
854
855 ror $28,%rax
856 ror $34,%r13
857 mov %rbx,%r15
858 add (%rbp,%rdi,8),%r12 # T1+=K[round]
859
860 xor %r13,%rax
861 ror $5,%r13
862 or %rdx,%r14 # a|c
863
864 xor %r13,%rax # h=Sigma0(a)
865 and %rdx,%r15 # a&c
866 add %r12,%r8 # d+=T1
867
868 and %rcx,%r14 # (a|c)&b
869 add %r12,%rax # h+=T1
870
871 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
872 lea 1(%rdi),%rdi # round++
873
874 add %r14,%rax # h+=Maj(a,b,c)
875 jmp .Lrounds_16_xx
876 .align 16
877 .Lrounds_16_xx:
878 mov 8(%rsp),%r13
879 mov 112(%rsp),%r12
880
881 mov %r13,%r15
882
883 shr $7,%r13
884 ror $1,%r15
885
886 xor %r15,%r13
887 ror $7,%r15
888
889 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
890 mov %r12,%r14
891
892 shr $6,%r12
893 ror $19,%r14
894
895 xor %r14,%r12
896 ror $42,%r14
897
898 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
899
900 add %r13,%r12
901
902 add 72(%rsp),%r12
903
904 add 0(%rsp),%r12
905 mov %r8,%r13
906 mov %r8,%r14
907 mov %r9,%r15
908
909 ror $14,%r13
910 ror $18,%r14
911 xor %r10,%r15 # f^g
912
913 xor %r14,%r13
914 ror $23,%r14
915 and %r8,%r15 # (f^g)&e
916 mov %r12,0(%rsp)
917
918 xor %r14,%r13 # Sigma1(e)
919 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
920 add %r11,%r12 # T1+=h
921
922 mov %rax,%r11
923 add %r13,%r12 # T1+=Sigma1(e)
924
925 add %r15,%r12 # T1+=Ch(e,f,g)
926 mov %rax,%r13
927 mov %rax,%r14
928
929 ror $28,%r11
930 ror $34,%r13
931 mov %rax,%r15
932 add (%rbp,%rdi,8),%r12 # T1+=K[round]
933
934 xor %r13,%r11
935 ror $5,%r13
936 or %rcx,%r14 # a|c
937
938 xor %r13,%r11 # h=Sigma0(a)
939 and %rcx,%r15 # a&c
940 add %r12,%rdx # d+=T1
941
942 and %rbx,%r14 # (a|c)&b
943 add %r12,%r11 # h+=T1
944
945 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
946 lea 1(%rdi),%rdi # round++
947
948 add %r14,%r11 # h+=Maj(a,b,c)
949 mov 16(%rsp),%r13
950 mov 120(%rsp),%r12
951
952 mov %r13,%r15
953
954 shr $7,%r13
955 ror $1,%r15
956
957 xor %r15,%r13
958 ror $7,%r15
959
960 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
961 mov %r12,%r14
962
963 shr $6,%r12
964 ror $19,%r14
965
966 xor %r14,%r12
967 ror $42,%r14
968
969 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
970
971 add %r13,%r12
972
973 add 80(%rsp),%r12
974
975 add 8(%rsp),%r12
976 mov %rdx,%r13
977 mov %rdx,%r14
978 mov %r8,%r15
979
980 ror $14,%r13
981 ror $18,%r14
982 xor %r9,%r15 # f^g
983
984 xor %r14,%r13
985 ror $23,%r14
986 and %rdx,%r15 # (f^g)&e
987 mov %r12,8(%rsp)
988
989 xor %r14,%r13 # Sigma1(e)
990 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
991 add %r10,%r12 # T1+=h
992
993 mov %r11,%r10
994 add %r13,%r12 # T1+=Sigma1(e)
995
996 add %r15,%r12 # T1+=Ch(e,f,g)
997 mov %r11,%r13
998 mov %r11,%r14
999
1000 ror $28,%r10
1001 ror $34,%r13
1002 mov %r11,%r15
1003 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1004
1005 xor %r13,%r10
1006 ror $5,%r13
1007 or %rbx,%r14 # a|c
1008
1009 xor %r13,%r10 # h=Sigma0(a)
1010 and %rbx,%r15 # a&c
1011 add %r12,%rcx # d+=T1
1012
1013 and %rax,%r14 # (a|c)&b
1014 add %r12,%r10 # h+=T1
1015
1016 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1017 lea 1(%rdi),%rdi # round++
1018
1019 add %r14,%r10 # h+=Maj(a,b,c)
1020 mov 24(%rsp),%r13
1021 mov 0(%rsp),%r12
1022
1023 mov %r13,%r15
1024
1025 shr $7,%r13
1026 ror $1,%r15
1027
1028 xor %r15,%r13
1029 ror $7,%r15
1030
1031 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1032 mov %r12,%r14
1033
1034 shr $6,%r12
1035 ror $19,%r14
1036
1037 xor %r14,%r12
1038 ror $42,%r14
1039
1040 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1041
1042 add %r13,%r12
1043
1044 add 88(%rsp),%r12
1045
1046 add 16(%rsp),%r12
1047 mov %rcx,%r13
1048 mov %rcx,%r14
1049 mov %rdx,%r15
1050
1051 ror $14,%r13
1052 ror $18,%r14
1053 xor %r8,%r15 # f^g
1054
1055 xor %r14,%r13
1056 ror $23,%r14
1057 and %rcx,%r15 # (f^g)&e
1058 mov %r12,16(%rsp)
1059
1060 xor %r14,%r13 # Sigma1(e)
1061 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
1062 add %r9,%r12 # T1+=h
1063
1064 mov %r10,%r9
1065 add %r13,%r12 # T1+=Sigma1(e)
1066
1067 add %r15,%r12 # T1+=Ch(e,f,g)
1068 mov %r10,%r13
1069 mov %r10,%r14
1070
1071 ror $28,%r9
1072 ror $34,%r13
1073 mov %r10,%r15
1074 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1075
1076 xor %r13,%r9
1077 ror $5,%r13
1078 or %rax,%r14 # a|c
1079
1080 xor %r13,%r9 # h=Sigma0(a)
1081 and %rax,%r15 # a&c
1082 add %r12,%rbx # d+=T1
1083
1084 and %r11,%r14 # (a|c)&b
1085 add %r12,%r9 # h+=T1
1086
1087 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1088 lea 1(%rdi),%rdi # round++
1089
1090 add %r14,%r9 # h+=Maj(a,b,c)
1091 mov 32(%rsp),%r13
1092 mov 8(%rsp),%r12
1093
1094 mov %r13,%r15
1095
1096 shr $7,%r13
1097 ror $1,%r15
1098
1099 xor %r15,%r13
1100 ror $7,%r15
1101
1102 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1103 mov %r12,%r14
1104
1105 shr $6,%r12
1106 ror $19,%r14
1107
1108 xor %r14,%r12
1109 ror $42,%r14
1110
1111 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1112
1113 add %r13,%r12
1114
1115 add 96(%rsp),%r12
1116
1117 add 24(%rsp),%r12
1118 mov %rbx,%r13
1119 mov %rbx,%r14
1120 mov %rcx,%r15
1121
1122 ror $14,%r13
1123 ror $18,%r14
1124 xor %rdx,%r15 # f^g
1125
1126 xor %r14,%r13
1127 ror $23,%r14
1128 and %rbx,%r15 # (f^g)&e
1129 mov %r12,24(%rsp)
1130
1131 xor %r14,%r13 # Sigma1(e)
1132 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1133 add %r8,%r12 # T1+=h
1134
1135 mov %r9,%r8
1136 add %r13,%r12 # T1+=Sigma1(e)
1137
1138 add %r15,%r12 # T1+=Ch(e,f,g)
1139 mov %r9,%r13
1140 mov %r9,%r14
1141
1142 ror $28,%r8
1143 ror $34,%r13
1144 mov %r9,%r15
1145 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1146
1147 xor %r13,%r8
1148 ror $5,%r13
1149 or %r11,%r14 # a|c
1150
1151 xor %r13,%r8 # h=Sigma0(a)
1152 and %r11,%r15 # a&c
1153 add %r12,%rax # d+=T1
1154
1155 and %r10,%r14 # (a|c)&b
1156 add %r12,%r8 # h+=T1
1157
1158 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1159 lea 1(%rdi),%rdi # round++
1160
1161 add %r14,%r8 # h+=Maj(a,b,c)
1162 mov 40(%rsp),%r13
1163 mov 16(%rsp),%r12
1164
1165 mov %r13,%r15
1166
1167 shr $7,%r13
1168 ror $1,%r15
1169
1170 xor %r15,%r13
1171 ror $7,%r15
1172
1173 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1174 mov %r12,%r14
1175
1176 shr $6,%r12
1177 ror $19,%r14
1178
1179 xor %r14,%r12
1180 ror $42,%r14
1181
1182 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1183
1184 add %r13,%r12
1185
1186 add 104(%rsp),%r12
1187
1188 add 32(%rsp),%r12
1189 mov %rax,%r13
1190 mov %rax,%r14
1191 mov %rbx,%r15
1192
1193 ror $14,%r13
1194 ror $18,%r14
1195 xor %rcx,%r15 # f^g
1196
1197 xor %r14,%r13
1198 ror $23,%r14
1199 and %rax,%r15 # (f^g)&e
1200 mov %r12,32(%rsp)
1201
1202 xor %r14,%r13 # Sigma1(e)
1203 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1204 add %rdx,%r12 # T1+=h
1205
1206 mov %r8,%rdx
1207 add %r13,%r12 # T1+=Sigma1(e)
1208
1209 add %r15,%r12 # T1+=Ch(e,f,g)
1210 mov %r8,%r13
1211 mov %r8,%r14
1212
1213 ror $28,%rdx
1214 ror $34,%r13
1215 mov %r8,%r15
1216 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1217
1218 xor %r13,%rdx
1219 ror $5,%r13
1220 or %r10,%r14 # a|c
1221
1222 xor %r13,%rdx # h=Sigma0(a)
1223 and %r10,%r15 # a&c
1224 add %r12,%r11 # d+=T1
1225
1226 and %r9,%r14 # (a|c)&b
1227 add %r12,%rdx # h+=T1
1228
1229 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1230 lea 1(%rdi),%rdi # round++
1231
1232 add %r14,%rdx # h+=Maj(a,b,c)
1233 mov 48(%rsp),%r13
1234 mov 24(%rsp),%r12
1235
1236 mov %r13,%r15
1237
1238 shr $7,%r13
1239 ror $1,%r15
1240
1241 xor %r15,%r13
1242 ror $7,%r15
1243
1244 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1245 mov %r12,%r14
1246
1247 shr $6,%r12
1248 ror $19,%r14
1249
1250 xor %r14,%r12
1251 ror $42,%r14
1252
1253 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1254
1255 add %r13,%r12
1256
1257 add 112(%rsp),%r12
1258
1259 add 40(%rsp),%r12
1260 mov %r11,%r13
1261 mov %r11,%r14
1262 mov %rax,%r15
1263
1264 ror $14,%r13
1265 ror $18,%r14
1266 xor %rbx,%r15 # f^g
1267
1268 xor %r14,%r13
1269 ror $23,%r14
1270 and %r11,%r15 # (f^g)&e
1271 mov %r12,40(%rsp)
1272
1273 xor %r14,%r13 # Sigma1(e)
1274 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1275 add %rcx,%r12 # T1+=h
1276
1277 mov %rdx,%rcx
1278 add %r13,%r12 # T1+=Sigma1(e)
1279
1280 add %r15,%r12 # T1+=Ch(e,f,g)
1281 mov %rdx,%r13
1282 mov %rdx,%r14
1283
1284 ror $28,%rcx
1285 ror $34,%r13
1286 mov %rdx,%r15
1287 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1288
1289 xor %r13,%rcx
1290 ror $5,%r13
1291 or %r9,%r14 # a|c
1292
1293 xor %r13,%rcx # h=Sigma0(a)
1294 and %r9,%r15 # a&c
1295 add %r12,%r10 # d+=T1
1296
1297 and %r8,%r14 # (a|c)&b
1298 add %r12,%rcx # h+=T1
1299
1300 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1301 lea 1(%rdi),%rdi # round++
1302
1303 add %r14,%rcx # h+=Maj(a,b,c)
1304 mov 56(%rsp),%r13
1305 mov 32(%rsp),%r12
1306
1307 mov %r13,%r15
1308
1309 shr $7,%r13
1310 ror $1,%r15
1311
1312 xor %r15,%r13
1313 ror $7,%r15
1314
1315 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1316 mov %r12,%r14
1317
1318 shr $6,%r12
1319 ror $19,%r14
1320
1321 xor %r14,%r12
1322 ror $42,%r14
1323
1324 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1325
1326 add %r13,%r12
1327
1328 add 120(%rsp),%r12
1329
1330 add 48(%rsp),%r12
1331 mov %r10,%r13
1332 mov %r10,%r14
1333 mov %r11,%r15
1334
1335 ror $14,%r13
1336 ror $18,%r14
1337 xor %rax,%r15 # f^g
1338
1339 xor %r14,%r13
1340 ror $23,%r14
1341 and %r10,%r15 # (f^g)&e
1342 mov %r12,48(%rsp)
1343
1344 xor %r14,%r13 # Sigma1(e)
1345 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
1346 add %rbx,%r12 # T1+=h
1347
1348 mov %rcx,%rbx
1349 add %r13,%r12 # T1+=Sigma1(e)
1350
1351 add %r15,%r12 # T1+=Ch(e,f,g)
1352 mov %rcx,%r13
1353 mov %rcx,%r14
1354
1355 ror $28,%rbx
1356 ror $34,%r13
1357 mov %rcx,%r15
1358 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1359
1360 xor %r13,%rbx
1361 ror $5,%r13
1362 or %r8,%r14 # a|c
1363
1364 xor %r13,%rbx # h=Sigma0(a)
1365 and %r8,%r15 # a&c
1366 add %r12,%r9 # d+=T1
1367
1368 and %rdx,%r14 # (a|c)&b
1369 add %r12,%rbx # h+=T1
1370
1371 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1372 lea 1(%rdi),%rdi # round++
1373
1374 add %r14,%rbx # h+=Maj(a,b,c)
1375 mov 64(%rsp),%r13
1376 mov 40(%rsp),%r12
1377
1378 mov %r13,%r15
1379
1380 shr $7,%r13
1381 ror $1,%r15
1382
1383 xor %r15,%r13
1384 ror $7,%r15
1385
1386 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1387 mov %r12,%r14
1388
1389 shr $6,%r12
1390 ror $19,%r14
1391
1392 xor %r14,%r12
1393 ror $42,%r14
1394
1395 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1396
1397 add %r13,%r12
1398
1399 add 0(%rsp),%r12
1400
1401 add 56(%rsp),%r12
1402 mov %r9,%r13
1403 mov %r9,%r14
1404 mov %r10,%r15
1405
1406 ror $14,%r13
1407 ror $18,%r14
1408 xor %r11,%r15 # f^g
1409
1410 xor %r14,%r13
1411 ror $23,%r14
1412 and %r9,%r15 # (f^g)&e
1413 mov %r12,56(%rsp)
1414
1415 xor %r14,%r13 # Sigma1(e)
1416 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
1417 add %rax,%r12 # T1+=h
1418
1419 mov %rbx,%rax
1420 add %r13,%r12 # T1+=Sigma1(e)
1421
1422 add %r15,%r12 # T1+=Ch(e,f,g)
1423 mov %rbx,%r13
1424 mov %rbx,%r14
1425
1426 ror $28,%rax
1427 ror $34,%r13
1428 mov %rbx,%r15
1429 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1430
1431 xor %r13,%rax
1432 ror $5,%r13
1433 or %rdx,%r14 # a|c
1434
1435 xor %r13,%rax # h=Sigma0(a)
1436 and %rdx,%r15 # a&c
1437 add %r12,%r8 # d+=T1
1438
1439 and %rcx,%r14 # (a|c)&b
1440 add %r12,%rax # h+=T1
1441
1442 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1443 lea 1(%rdi),%rdi # round++
1444
1445 add %r14,%rax # h+=Maj(a,b,c)
1446 mov 72(%rsp),%r13
1447 mov 48(%rsp),%r12
1448
1449 mov %r13,%r15
1450
1451 shr $7,%r13
1452 ror $1,%r15
1453
1454 xor %r15,%r13
1455 ror $7,%r15
1456
1457 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1458 mov %r12,%r14
1459
1460 shr $6,%r12
1461 ror $19,%r14
1462
1463 xor %r14,%r12
1464 ror $42,%r14
1465
1466 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1467
1468 add %r13,%r12
1469
1470 add 8(%rsp),%r12
1471
1472 add 64(%rsp),%r12
1473 mov %r8,%r13
1474 mov %r8,%r14
1475 mov %r9,%r15
1476
1477 ror $14,%r13
1478 ror $18,%r14
1479 xor %r10,%r15 # f^g
1480
1481 xor %r14,%r13
1482 ror $23,%r14
1483 and %r8,%r15 # (f^g)&e
1484 mov %r12,64(%rsp)
1485
1486 xor %r14,%r13 # Sigma1(e)
1487 xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
1488 add %r11,%r12 # T1+=h
1489
1490 mov %rax,%r11
1491 add %r13,%r12 # T1+=Sigma1(e)
1492
1493 add %r15,%r12 # T1+=Ch(e,f,g)
1494 mov %rax,%r13
1495 mov %rax,%r14
1496
1497 ror $28,%r11
1498 ror $34,%r13
1499 mov %rax,%r15
1500 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1501
1502 xor %r13,%r11
1503 ror $5,%r13
1504 or %rcx,%r14 # a|c
1505
1506 xor %r13,%r11 # h=Sigma0(a)
1507 and %rcx,%r15 # a&c
1508 add %r12,%rdx # d+=T1
1509
1510 and %rbx,%r14 # (a|c)&b
1511 add %r12,%r11 # h+=T1
1512
1513 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1514 lea 1(%rdi),%rdi # round++
1515
1516 add %r14,%r11 # h+=Maj(a,b,c)
1517 mov 80(%rsp),%r13
1518 mov 56(%rsp),%r12
1519
1520 mov %r13,%r15
1521
1522 shr $7,%r13
1523 ror $1,%r15
1524
1525 xor %r15,%r13
1526 ror $7,%r15
1527
1528 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1529 mov %r12,%r14
1530
1531 shr $6,%r12
1532 ror $19,%r14
1533
1534 xor %r14,%r12
1535 ror $42,%r14
1536
1537 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1538
1539 add %r13,%r12
1540
1541 add 16(%rsp),%r12
1542
1543 add 72(%rsp),%r12
1544 mov %rdx,%r13
1545 mov %rdx,%r14
1546 mov %r8,%r15
1547
1548 ror $14,%r13
1549 ror $18,%r14
1550 xor %r9,%r15 # f^g
1551
1552 xor %r14,%r13
1553 ror $23,%r14
1554 and %rdx,%r15 # (f^g)&e
1555 mov %r12,72(%rsp)
1556
1557 xor %r14,%r13 # Sigma1(e)
1558 xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
1559 add %r10,%r12 # T1+=h
1560
1561 mov %r11,%r10
1562 add %r13,%r12 # T1+=Sigma1(e)
1563
1564 add %r15,%r12 # T1+=Ch(e,f,g)
1565 mov %r11,%r13
1566 mov %r11,%r14
1567
1568 ror $28,%r10
1569 ror $34,%r13
1570 mov %r11,%r15
1571 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1572
1573 xor %r13,%r10
1574 ror $5,%r13
1575 or %rbx,%r14 # a|c
1576
1577 xor %r13,%r10 # h=Sigma0(a)
1578 and %rbx,%r15 # a&c
1579 add %r12,%rcx # d+=T1
1580
1581 and %rax,%r14 # (a|c)&b
1582 add %r12,%r10 # h+=T1
1583
1584 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1585 lea 1(%rdi),%rdi # round++
1586
1587 add %r14,%r10 # h+=Maj(a,b,c)
1588 mov 88(%rsp),%r13
1589 mov 64(%rsp),%r12
1590
1591 mov %r13,%r15
1592
1593 shr $7,%r13
1594 ror $1,%r15
1595
1596 xor %r15,%r13
1597 ror $7,%r15
1598
1599 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1600 mov %r12,%r14
1601
1602 shr $6,%r12
1603 ror $19,%r14
1604
1605 xor %r14,%r12
1606 ror $42,%r14
1607
1608 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1609
1610 add %r13,%r12
1611
1612 add 24(%rsp),%r12
1613
1614 add 80(%rsp),%r12
1615 mov %rcx,%r13
1616 mov %rcx,%r14
1617 mov %rdx,%r15
1618
1619 ror $14,%r13
1620 ror $18,%r14
1621 xor %r8,%r15 # f^g
1622
1623 xor %r14,%r13
1624 ror $23,%r14
1625 and %rcx,%r15 # (f^g)&e
1626 mov %r12,80(%rsp)
1627
1628 xor %r14,%r13 # Sigma1(e)
1629 xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
1630 add %r9,%r12 # T1+=h
1631
1632 mov %r10,%r9
1633 add %r13,%r12 # T1+=Sigma1(e)
1634
1635 add %r15,%r12 # T1+=Ch(e,f,g)
1636 mov %r10,%r13
1637 mov %r10,%r14
1638
1639 ror $28,%r9
1640 ror $34,%r13
1641 mov %r10,%r15
1642 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1643
1644 xor %r13,%r9
1645 ror $5,%r13
1646 or %rax,%r14 # a|c
1647
1648 xor %r13,%r9 # h=Sigma0(a)
1649 and %rax,%r15 # a&c
1650 add %r12,%rbx # d+=T1
1651
1652 and %r11,%r14 # (a|c)&b
1653 add %r12,%r9 # h+=T1
1654
1655 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1656 lea 1(%rdi),%rdi # round++
1657
1658 add %r14,%r9 # h+=Maj(a,b,c)
1659 mov 96(%rsp),%r13
1660 mov 72(%rsp),%r12
1661
1662 mov %r13,%r15
1663
1664 shr $7,%r13
1665 ror $1,%r15
1666
1667 xor %r15,%r13
1668 ror $7,%r15
1669
1670 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1671 mov %r12,%r14
1672
1673 shr $6,%r12
1674 ror $19,%r14
1675
1676 xor %r14,%r12
1677 ror $42,%r14
1678
1679 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1680
1681 add %r13,%r12
1682
1683 add 32(%rsp),%r12
1684
1685 add 88(%rsp),%r12
1686 mov %rbx,%r13
1687 mov %rbx,%r14
1688 mov %rcx,%r15
1689
1690 ror $14,%r13
1691 ror $18,%r14
1692 xor %rdx,%r15 # f^g
1693
1694 xor %r14,%r13
1695 ror $23,%r14
1696 and %rbx,%r15 # (f^g)&e
1697 mov %r12,88(%rsp)
1698
1699 xor %r14,%r13 # Sigma1(e)
1700 xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1701 add %r8,%r12 # T1+=h
1702
1703 mov %r9,%r8
1704 add %r13,%r12 # T1+=Sigma1(e)
1705
1706 add %r15,%r12 # T1+=Ch(e,f,g)
1707 mov %r9,%r13
1708 mov %r9,%r14
1709
1710 ror $28,%r8
1711 ror $34,%r13
1712 mov %r9,%r15
1713 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1714
1715 xor %r13,%r8
1716 ror $5,%r13
1717 or %r11,%r14 # a|c
1718
1719 xor %r13,%r8 # h=Sigma0(a)
1720 and %r11,%r15 # a&c
1721 add %r12,%rax # d+=T1
1722
1723 and %r10,%r14 # (a|c)&b
1724 add %r12,%r8 # h+=T1
1725
1726 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1727 lea 1(%rdi),%rdi # round++
1728
1729 add %r14,%r8 # h+=Maj(a,b,c)
1730 mov 104(%rsp),%r13
1731 mov 80(%rsp),%r12
1732
1733 mov %r13,%r15
1734
1735 shr $7,%r13
1736 ror $1,%r15
1737
1738 xor %r15,%r13
1739 ror $7,%r15
1740
1741 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1742 mov %r12,%r14
1743
1744 shr $6,%r12
1745 ror $19,%r14
1746
1747 xor %r14,%r12
1748 ror $42,%r14
1749
1750 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1751
1752 add %r13,%r12
1753
1754 add 40(%rsp),%r12
1755
1756 add 96(%rsp),%r12
1757 mov %rax,%r13
1758 mov %rax,%r14
1759 mov %rbx,%r15
1760
1761 ror $14,%r13
1762 ror $18,%r14
1763 xor %rcx,%r15 # f^g
1764
1765 xor %r14,%r13
1766 ror $23,%r14
1767 and %rax,%r15 # (f^g)&e
1768 mov %r12,96(%rsp)
1769
1770 xor %r14,%r13 # Sigma1(e)
1771 xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1772 add %rdx,%r12 # T1+=h
1773
1774 mov %r8,%rdx
1775 add %r13,%r12 # T1+=Sigma1(e)
1776
1777 add %r15,%r12 # T1+=Ch(e,f,g)
1778 mov %r8,%r13
1779 mov %r8,%r14
1780
1781 ror $28,%rdx
1782 ror $34,%r13
1783 mov %r8,%r15
1784 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1785
1786 xor %r13,%rdx
1787 ror $5,%r13
1788 or %r10,%r14 # a|c
1789
1790 xor %r13,%rdx # h=Sigma0(a)
1791 and %r10,%r15 # a&c
1792 add %r12,%r11 # d+=T1
1793
1794 and %r9,%r14 # (a|c)&b
1795 add %r12,%rdx # h+=T1
1796
1797 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1798 lea 1(%rdi),%rdi # round++
1799
1800 add %r14,%rdx # h+=Maj(a,b,c)
1801 mov 112(%rsp),%r13
1802 mov 88(%rsp),%r12
1803
1804 mov %r13,%r15
1805
1806 shr $7,%r13
1807 ror $1,%r15
1808
1809 xor %r15,%r13
1810 ror $7,%r15
1811
1812 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1813 mov %r12,%r14
1814
1815 shr $6,%r12
1816 ror $19,%r14
1817
1818 xor %r14,%r12
1819 ror $42,%r14
1820
1821 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1822
1823 add %r13,%r12
1824
1825 add 48(%rsp),%r12
1826
1827 add 104(%rsp),%r12
1828 mov %r11,%r13
1829 mov %r11,%r14
1830 mov %rax,%r15
1831
1832 ror $14,%r13
1833 ror $18,%r14
1834 xor %rbx,%r15 # f^g
1835
1836 xor %r14,%r13
1837 ror $23,%r14
1838 and %r11,%r15 # (f^g)&e
1839 mov %r12,104(%rsp)
1840
1841 xor %r14,%r13 # Sigma1(e)
1842 xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
1843 add %rcx,%r12 # T1+=h
1844
1845 mov %rdx,%rcx
1846 add %r13,%r12 # T1+=Sigma1(e)
1847
1848 add %r15,%r12 # T1+=Ch(e,f,g)
1849 mov %rdx,%r13
1850 mov %rdx,%r14
1851
1852 ror $28,%rcx
1853 ror $34,%r13
1854 mov %rdx,%r15
1855 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1856
1857 xor %r13,%rcx
1858 ror $5,%r13
1859 or %r9,%r14 # a|c
1860
1861 xor %r13,%rcx # h=Sigma0(a)
1862 and %r9,%r15 # a&c
1863 add %r12,%r10 # d+=T1
1864
1865 and %r8,%r14 # (a|c)&b
1866 add %r12,%rcx # h+=T1
1867
1868 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1869 lea 1(%rdi),%rdi # round++
1870
1871 add %r14,%rcx # h+=Maj(a,b,c)
1872 mov 120(%rsp),%r13
1873 mov 96(%rsp),%r12
1874
1875 mov %r13,%r15
1876
1877 shr $7,%r13
1878 ror $1,%r15
1879
1880 xor %r15,%r13
1881 ror $7,%r15
1882
1883 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1884 mov %r12,%r14
1885
1886 shr $6,%r12
1887 ror $19,%r14
1888
1889 xor %r14,%r12
1890 ror $42,%r14
1891
1892 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1893
1894 add %r13,%r12
1895
1896 add 56(%rsp),%r12
1897
1898 add 112(%rsp),%r12
1899 mov %r10,%r13
1900 mov %r10,%r14
1901 mov %r11,%r15
1902
1903 ror $14,%r13
1904 ror $18,%r14
1905 xor %rax,%r15 # f^g
1906
1907 xor %r14,%r13
1908 ror $23,%r14
1909 and %r10,%r15 # (f^g)&e
1910 mov %r12,112(%rsp)
1911
1912 xor %r14,%r13 # Sigma1(e)
1913 xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
1914 add %rbx,%r12 # T1+=h
1915
1916 mov %rcx,%rbx
1917 add %r13,%r12 # T1+=Sigma1(e)
1918
1919 add %r15,%r12 # T1+=Ch(e,f,g)
1920 mov %rcx,%r13
1921 mov %rcx,%r14
1922
1923 ror $28,%rbx
1924 ror $34,%r13
1925 mov %rcx,%r15
1926 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1927
1928 xor %r13,%rbx
1929 ror $5,%r13
1930 or %r8,%r14 # a|c
1931
1932 xor %r13,%rbx # h=Sigma0(a)
1933 and %r8,%r15 # a&c
1934 add %r12,%r9 # d+=T1
1935
1936 and %rdx,%r14 # (a|c)&b
1937 add %r12,%rbx # h+=T1
1938
1939 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
1940 lea 1(%rdi),%rdi # round++
1941
1942 add %r14,%rbx # h+=Maj(a,b,c)
1943 mov 0(%rsp),%r13
1944 mov 104(%rsp),%r12
1945
1946 mov %r13,%r15
1947
1948 shr $7,%r13
1949 ror $1,%r15
1950
1951 xor %r15,%r13
1952 ror $7,%r15
1953
1954 xor %r15,%r13 # sigma0(X[(i+1)&0xf])
1955 mov %r12,%r14
1956
1957 shr $6,%r12
1958 ror $19,%r14
1959
1960 xor %r14,%r12
1961 ror $42,%r14
1962
1963 xor %r14,%r12 # sigma1(X[(i+14)&0xf])
1964
1965 add %r13,%r12
1966
1967 add 64(%rsp),%r12
1968
1969 add 120(%rsp),%r12
1970 mov %r9,%r13
1971 mov %r9,%r14
1972 mov %r10,%r15
1973
1974 ror $14,%r13
1975 ror $18,%r14
1976 xor %r11,%r15 # f^g
1977
1978 xor %r14,%r13
1979 ror $23,%r14
1980 and %r9,%r15 # (f^g)&e
1981 mov %r12,120(%rsp)
1982
1983 xor %r14,%r13 # Sigma1(e)
1984 xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
1985 add %rax,%r12 # T1+=h
1986
1987 mov %rbx,%rax
1988 add %r13,%r12 # T1+=Sigma1(e)
1989
1990 add %r15,%r12 # T1+=Ch(e,f,g)
1991 mov %rbx,%r13
1992 mov %rbx,%r14
1993
1994 ror $28,%rax
1995 ror $34,%r13
1996 mov %rbx,%r15
1997 add (%rbp,%rdi,8),%r12 # T1+=K[round]
1998
1999 xor %r13,%rax
2000 ror $5,%r13
2001 or %rdx,%r14 # a|c
2002
2003 xor %r13,%rax # h=Sigma0(a)
2004 and %rdx,%r15 # a&c
2005 add %r12,%r8 # d+=T1
2006
2007 and %rcx,%r14 # (a|c)&b
2008 add %r12,%rax # h+=T1
2009
2010 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
2011 lea 1(%rdi),%rdi # round++
2012
2013 add %r14,%rax # h+=Maj(a,b,c)
2014 cmp $80,%rdi
2015 jb .Lrounds_16_xx
2016
2017 mov 16*8+0*8(%rsp),%rdi
2018 lea 16*8(%rsi),%rsi
2019
2020 add 8*0(%rdi),%rax
2021 add 8*1(%rdi),%rbx
2022 add 8*2(%rdi),%rcx
2023 add 8*3(%rdi),%rdx
2024 add 8*4(%rdi),%r8
2025 add 8*5(%rdi),%r9
2026 add 8*6(%rdi),%r10
2027 add 8*7(%rdi),%r11
2028
2029 cmp 16*8+2*8(%rsp),%rsi
2030
2031 mov %rax,8*0(%rdi)
2032 mov %rbx,8*1(%rdi)
2033 mov %rcx,8*2(%rdi)
2034 mov %rdx,8*3(%rdi)
2035 mov %r8,8*4(%rdi)
2036 mov %r9,8*5(%rdi)
2037 mov %r10,8*6(%rdi)
2038 mov %r11,8*7(%rdi)
2039 jb .Lloop
2040
2041 mov 16*8+3*8(%rsp),%rsp
2042 .cfi_def_cfa %rsp,56
2043 pop %r15
2044 .cfi_adjust_cfa_offset -8
2045 .cfi_restore %r15
2046 pop %r14
2047 .cfi_adjust_cfa_offset -8
2048 .cfi_restore %r14
2049 pop %r13
2050 .cfi_adjust_cfa_offset -8
2051 .cfi_restore %r13
2052 pop %r12
2053 .cfi_adjust_cfa_offset -8
2054 .cfi_restore %r12
2055 pop %rbp
2056 .cfi_adjust_cfa_offset -8
2057 .cfi_restore %rbp
2058 pop %rbx
2059 .cfi_adjust_cfa_offset -8
2060 .cfi_restore %rbx
2061
2062 ret
2063 .cfi_endproc
2064 SET_SIZE(SHA512TransformBlocks)
2065
2066 .section .rodata
2067 .align 64
2068 .type K512,@object
2069 K512:
2070 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
2071 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
2072 .quad 0x3956c25bf348b538,0x59f111f1b605d019
2073 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
2074 .quad 0xd807aa98a3030242,0x12835b0145706fbe
2075 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
2076 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
2077 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
2078 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
2079 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
2080 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
2081 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
2082 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
2083 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
2084 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
2085 .quad 0x06ca6351e003826f,0x142929670a0e6e70
2086 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
2087 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
2088 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
2089 .quad 0x81c2c92e47edaee6,0x92722c851482353b
2090 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
2091 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
2092 .quad 0xd192e819d6ef5218,0xd69906245565a910
2093 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
2094 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
2095 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
2096 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
2097 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
2098 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
2099 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
2100 .quad 0x90befffa23631e28,0xa4506cebde82bde9
2101 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
2102 .quad 0xca273eceea26619c,0xd186b8c721c0c207
2103 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
2104 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
2105 .quad 0x113f9804bef90dae,0x1b710b35131c471b
2106 .quad 0x28db77f523047d84,0x32caab7b40c72493
2107 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
2108 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
2109 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
2110 #endif /* !lint && !__lint */
2111
2112 #ifdef __ELF__
2113 .section .note.GNU-stack,"",%progbits
2114 #endif