]> git.proxmox.com Git - mirror_edk2.git/blame - CryptoPkg/Library/OpensslLib/X64/crypto/aes/aesni-x86_64.nasm
CryptoPkg/OpensslLib: Commit the auto-generated assembly files for X64
[mirror_edk2.git] / CryptoPkg / Library / OpensslLib / X64 / crypto / aes / aesni-x86_64.nasm
CommitLineData
147f34b5
CZ
1; WARNING: do not edit!\r
2; Generated from openssl/crypto/aes/asm/aesni-x86_64.pl\r
3;\r
4; Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.\r
5;\r
6; Licensed under the OpenSSL license (the "License"). You may not use\r
7; this file except in compliance with the License. You can obtain a copy\r
8; in the file LICENSE in the source distribution or at\r
9; https://www.openssl.org/source/license.html\r
10\r
11default rel\r
12%define XMMWORD\r
13%define YMMWORD\r
14%define ZMMWORD\r
15section .text code align=64\r
16\r
17EXTERN OPENSSL_ia32cap_P\r
18global aesni_encrypt\r
19\r
20ALIGN 16\r
21aesni_encrypt:\r
22\r
23 movups xmm2,XMMWORD[rcx]\r
24 mov eax,DWORD[240+r8]\r
25 movups xmm0,XMMWORD[r8]\r
26 movups xmm1,XMMWORD[16+r8]\r
27 lea r8,[32+r8]\r
28 xorps xmm2,xmm0\r
29$L$oop_enc1_1:\r
30DB 102,15,56,220,209\r
31 dec eax\r
32 movups xmm1,XMMWORD[r8]\r
33 lea r8,[16+r8]\r
34 jnz NEAR $L$oop_enc1_1\r
35DB 102,15,56,221,209\r
36 pxor xmm0,xmm0\r
37 pxor xmm1,xmm1\r
38 movups XMMWORD[rdx],xmm2\r
39 pxor xmm2,xmm2\r
40 DB 0F3h,0C3h ;repret\r
41\r
42\r
43\r
44global aesni_decrypt\r
45\r
46ALIGN 16\r
47aesni_decrypt:\r
48\r
49 movups xmm2,XMMWORD[rcx]\r
50 mov eax,DWORD[240+r8]\r
51 movups xmm0,XMMWORD[r8]\r
52 movups xmm1,XMMWORD[16+r8]\r
53 lea r8,[32+r8]\r
54 xorps xmm2,xmm0\r
55$L$oop_dec1_2:\r
56DB 102,15,56,222,209\r
57 dec eax\r
58 movups xmm1,XMMWORD[r8]\r
59 lea r8,[16+r8]\r
60 jnz NEAR $L$oop_dec1_2\r
61DB 102,15,56,223,209\r
62 pxor xmm0,xmm0\r
63 pxor xmm1,xmm1\r
64 movups XMMWORD[rdx],xmm2\r
65 pxor xmm2,xmm2\r
66 DB 0F3h,0C3h ;repret\r
67\r
68\r
69\r
70ALIGN 16\r
71_aesni_encrypt2:\r
72\r
73 movups xmm0,XMMWORD[rcx]\r
74 shl eax,4\r
75 movups xmm1,XMMWORD[16+rcx]\r
76 xorps xmm2,xmm0\r
77 xorps xmm3,xmm0\r
78 movups xmm0,XMMWORD[32+rcx]\r
79 lea rcx,[32+rax*1+rcx]\r
80 neg rax\r
81 add rax,16\r
82\r
83$L$enc_loop2:\r
84DB 102,15,56,220,209\r
85DB 102,15,56,220,217\r
86 movups xmm1,XMMWORD[rax*1+rcx]\r
87 add rax,32\r
88DB 102,15,56,220,208\r
89DB 102,15,56,220,216\r
90 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
91 jnz NEAR $L$enc_loop2\r
92\r
93DB 102,15,56,220,209\r
94DB 102,15,56,220,217\r
95DB 102,15,56,221,208\r
96DB 102,15,56,221,216\r
97 DB 0F3h,0C3h ;repret\r
98\r
99\r
100\r
101ALIGN 16\r
102_aesni_decrypt2:\r
103\r
104 movups xmm0,XMMWORD[rcx]\r
105 shl eax,4\r
106 movups xmm1,XMMWORD[16+rcx]\r
107 xorps xmm2,xmm0\r
108 xorps xmm3,xmm0\r
109 movups xmm0,XMMWORD[32+rcx]\r
110 lea rcx,[32+rax*1+rcx]\r
111 neg rax\r
112 add rax,16\r
113\r
114$L$dec_loop2:\r
115DB 102,15,56,222,209\r
116DB 102,15,56,222,217\r
117 movups xmm1,XMMWORD[rax*1+rcx]\r
118 add rax,32\r
119DB 102,15,56,222,208\r
120DB 102,15,56,222,216\r
121 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
122 jnz NEAR $L$dec_loop2\r
123\r
124DB 102,15,56,222,209\r
125DB 102,15,56,222,217\r
126DB 102,15,56,223,208\r
127DB 102,15,56,223,216\r
128 DB 0F3h,0C3h ;repret\r
129\r
130\r
131\r
132ALIGN 16\r
133_aesni_encrypt3:\r
134\r
135 movups xmm0,XMMWORD[rcx]\r
136 shl eax,4\r
137 movups xmm1,XMMWORD[16+rcx]\r
138 xorps xmm2,xmm0\r
139 xorps xmm3,xmm0\r
140 xorps xmm4,xmm0\r
141 movups xmm0,XMMWORD[32+rcx]\r
142 lea rcx,[32+rax*1+rcx]\r
143 neg rax\r
144 add rax,16\r
145\r
146$L$enc_loop3:\r
147DB 102,15,56,220,209\r
148DB 102,15,56,220,217\r
149DB 102,15,56,220,225\r
150 movups xmm1,XMMWORD[rax*1+rcx]\r
151 add rax,32\r
152DB 102,15,56,220,208\r
153DB 102,15,56,220,216\r
154DB 102,15,56,220,224\r
155 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
156 jnz NEAR $L$enc_loop3\r
157\r
158DB 102,15,56,220,209\r
159DB 102,15,56,220,217\r
160DB 102,15,56,220,225\r
161DB 102,15,56,221,208\r
162DB 102,15,56,221,216\r
163DB 102,15,56,221,224\r
164 DB 0F3h,0C3h ;repret\r
165\r
166\r
167\r
168ALIGN 16\r
169_aesni_decrypt3:\r
170\r
171 movups xmm0,XMMWORD[rcx]\r
172 shl eax,4\r
173 movups xmm1,XMMWORD[16+rcx]\r
174 xorps xmm2,xmm0\r
175 xorps xmm3,xmm0\r
176 xorps xmm4,xmm0\r
177 movups xmm0,XMMWORD[32+rcx]\r
178 lea rcx,[32+rax*1+rcx]\r
179 neg rax\r
180 add rax,16\r
181\r
182$L$dec_loop3:\r
183DB 102,15,56,222,209\r
184DB 102,15,56,222,217\r
185DB 102,15,56,222,225\r
186 movups xmm1,XMMWORD[rax*1+rcx]\r
187 add rax,32\r
188DB 102,15,56,222,208\r
189DB 102,15,56,222,216\r
190DB 102,15,56,222,224\r
191 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
192 jnz NEAR $L$dec_loop3\r
193\r
194DB 102,15,56,222,209\r
195DB 102,15,56,222,217\r
196DB 102,15,56,222,225\r
197DB 102,15,56,223,208\r
198DB 102,15,56,223,216\r
199DB 102,15,56,223,224\r
200 DB 0F3h,0C3h ;repret\r
201\r
202\r
203\r
204ALIGN 16\r
205_aesni_encrypt4:\r
206\r
207 movups xmm0,XMMWORD[rcx]\r
208 shl eax,4\r
209 movups xmm1,XMMWORD[16+rcx]\r
210 xorps xmm2,xmm0\r
211 xorps xmm3,xmm0\r
212 xorps xmm4,xmm0\r
213 xorps xmm5,xmm0\r
214 movups xmm0,XMMWORD[32+rcx]\r
215 lea rcx,[32+rax*1+rcx]\r
216 neg rax\r
217DB 0x0f,0x1f,0x00\r
218 add rax,16\r
219\r
220$L$enc_loop4:\r
221DB 102,15,56,220,209\r
222DB 102,15,56,220,217\r
223DB 102,15,56,220,225\r
224DB 102,15,56,220,233\r
225 movups xmm1,XMMWORD[rax*1+rcx]\r
226 add rax,32\r
227DB 102,15,56,220,208\r
228DB 102,15,56,220,216\r
229DB 102,15,56,220,224\r
230DB 102,15,56,220,232\r
231 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
232 jnz NEAR $L$enc_loop4\r
233\r
234DB 102,15,56,220,209\r
235DB 102,15,56,220,217\r
236DB 102,15,56,220,225\r
237DB 102,15,56,220,233\r
238DB 102,15,56,221,208\r
239DB 102,15,56,221,216\r
240DB 102,15,56,221,224\r
241DB 102,15,56,221,232\r
242 DB 0F3h,0C3h ;repret\r
243\r
244\r
245\r
246ALIGN 16\r
247_aesni_decrypt4:\r
248\r
249 movups xmm0,XMMWORD[rcx]\r
250 shl eax,4\r
251 movups xmm1,XMMWORD[16+rcx]\r
252 xorps xmm2,xmm0\r
253 xorps xmm3,xmm0\r
254 xorps xmm4,xmm0\r
255 xorps xmm5,xmm0\r
256 movups xmm0,XMMWORD[32+rcx]\r
257 lea rcx,[32+rax*1+rcx]\r
258 neg rax\r
259DB 0x0f,0x1f,0x00\r
260 add rax,16\r
261\r
262$L$dec_loop4:\r
263DB 102,15,56,222,209\r
264DB 102,15,56,222,217\r
265DB 102,15,56,222,225\r
266DB 102,15,56,222,233\r
267 movups xmm1,XMMWORD[rax*1+rcx]\r
268 add rax,32\r
269DB 102,15,56,222,208\r
270DB 102,15,56,222,216\r
271DB 102,15,56,222,224\r
272DB 102,15,56,222,232\r
273 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
274 jnz NEAR $L$dec_loop4\r
275\r
276DB 102,15,56,222,209\r
277DB 102,15,56,222,217\r
278DB 102,15,56,222,225\r
279DB 102,15,56,222,233\r
280DB 102,15,56,223,208\r
281DB 102,15,56,223,216\r
282DB 102,15,56,223,224\r
283DB 102,15,56,223,232\r
284 DB 0F3h,0C3h ;repret\r
285\r
286\r
287\r
288ALIGN 16\r
289_aesni_encrypt6:\r
290\r
291 movups xmm0,XMMWORD[rcx]\r
292 shl eax,4\r
293 movups xmm1,XMMWORD[16+rcx]\r
294 xorps xmm2,xmm0\r
295 pxor xmm3,xmm0\r
296 pxor xmm4,xmm0\r
297DB 102,15,56,220,209\r
298 lea rcx,[32+rax*1+rcx]\r
299 neg rax\r
300DB 102,15,56,220,217\r
301 pxor xmm5,xmm0\r
302 pxor xmm6,xmm0\r
303DB 102,15,56,220,225\r
304 pxor xmm7,xmm0\r
305 movups xmm0,XMMWORD[rax*1+rcx]\r
306 add rax,16\r
307 jmp NEAR $L$enc_loop6_enter\r
308ALIGN 16\r
309$L$enc_loop6:\r
310DB 102,15,56,220,209\r
311DB 102,15,56,220,217\r
312DB 102,15,56,220,225\r
313$L$enc_loop6_enter:\r
314DB 102,15,56,220,233\r
315DB 102,15,56,220,241\r
316DB 102,15,56,220,249\r
317 movups xmm1,XMMWORD[rax*1+rcx]\r
318 add rax,32\r
319DB 102,15,56,220,208\r
320DB 102,15,56,220,216\r
321DB 102,15,56,220,224\r
322DB 102,15,56,220,232\r
323DB 102,15,56,220,240\r
324DB 102,15,56,220,248\r
325 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
326 jnz NEAR $L$enc_loop6\r
327\r
328DB 102,15,56,220,209\r
329DB 102,15,56,220,217\r
330DB 102,15,56,220,225\r
331DB 102,15,56,220,233\r
332DB 102,15,56,220,241\r
333DB 102,15,56,220,249\r
334DB 102,15,56,221,208\r
335DB 102,15,56,221,216\r
336DB 102,15,56,221,224\r
337DB 102,15,56,221,232\r
338DB 102,15,56,221,240\r
339DB 102,15,56,221,248\r
340 DB 0F3h,0C3h ;repret\r
341\r
342\r
343\r
344ALIGN 16\r
345_aesni_decrypt6:\r
346\r
347 movups xmm0,XMMWORD[rcx]\r
348 shl eax,4\r
349 movups xmm1,XMMWORD[16+rcx]\r
350 xorps xmm2,xmm0\r
351 pxor xmm3,xmm0\r
352 pxor xmm4,xmm0\r
353DB 102,15,56,222,209\r
354 lea rcx,[32+rax*1+rcx]\r
355 neg rax\r
356DB 102,15,56,222,217\r
357 pxor xmm5,xmm0\r
358 pxor xmm6,xmm0\r
359DB 102,15,56,222,225\r
360 pxor xmm7,xmm0\r
361 movups xmm0,XMMWORD[rax*1+rcx]\r
362 add rax,16\r
363 jmp NEAR $L$dec_loop6_enter\r
364ALIGN 16\r
365$L$dec_loop6:\r
366DB 102,15,56,222,209\r
367DB 102,15,56,222,217\r
368DB 102,15,56,222,225\r
369$L$dec_loop6_enter:\r
370DB 102,15,56,222,233\r
371DB 102,15,56,222,241\r
372DB 102,15,56,222,249\r
373 movups xmm1,XMMWORD[rax*1+rcx]\r
374 add rax,32\r
375DB 102,15,56,222,208\r
376DB 102,15,56,222,216\r
377DB 102,15,56,222,224\r
378DB 102,15,56,222,232\r
379DB 102,15,56,222,240\r
380DB 102,15,56,222,248\r
381 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
382 jnz NEAR $L$dec_loop6\r
383\r
384DB 102,15,56,222,209\r
385DB 102,15,56,222,217\r
386DB 102,15,56,222,225\r
387DB 102,15,56,222,233\r
388DB 102,15,56,222,241\r
389DB 102,15,56,222,249\r
390DB 102,15,56,223,208\r
391DB 102,15,56,223,216\r
392DB 102,15,56,223,224\r
393DB 102,15,56,223,232\r
394DB 102,15,56,223,240\r
395DB 102,15,56,223,248\r
396 DB 0F3h,0C3h ;repret\r
397\r
398\r
399\r
400ALIGN 16\r
401_aesni_encrypt8:\r
402\r
403 movups xmm0,XMMWORD[rcx]\r
404 shl eax,4\r
405 movups xmm1,XMMWORD[16+rcx]\r
406 xorps xmm2,xmm0\r
407 xorps xmm3,xmm0\r
408 pxor xmm4,xmm0\r
409 pxor xmm5,xmm0\r
410 pxor xmm6,xmm0\r
411 lea rcx,[32+rax*1+rcx]\r
412 neg rax\r
413DB 102,15,56,220,209\r
414 pxor xmm7,xmm0\r
415 pxor xmm8,xmm0\r
416DB 102,15,56,220,217\r
417 pxor xmm9,xmm0\r
418 movups xmm0,XMMWORD[rax*1+rcx]\r
419 add rax,16\r
420 jmp NEAR $L$enc_loop8_inner\r
421ALIGN 16\r
422$L$enc_loop8:\r
423DB 102,15,56,220,209\r
424DB 102,15,56,220,217\r
425$L$enc_loop8_inner:\r
426DB 102,15,56,220,225\r
427DB 102,15,56,220,233\r
428DB 102,15,56,220,241\r
429DB 102,15,56,220,249\r
430DB 102,68,15,56,220,193\r
431DB 102,68,15,56,220,201\r
432$L$enc_loop8_enter:\r
433 movups xmm1,XMMWORD[rax*1+rcx]\r
434 add rax,32\r
435DB 102,15,56,220,208\r
436DB 102,15,56,220,216\r
437DB 102,15,56,220,224\r
438DB 102,15,56,220,232\r
439DB 102,15,56,220,240\r
440DB 102,15,56,220,248\r
441DB 102,68,15,56,220,192\r
442DB 102,68,15,56,220,200\r
443 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
444 jnz NEAR $L$enc_loop8\r
445\r
446DB 102,15,56,220,209\r
447DB 102,15,56,220,217\r
448DB 102,15,56,220,225\r
449DB 102,15,56,220,233\r
450DB 102,15,56,220,241\r
451DB 102,15,56,220,249\r
452DB 102,68,15,56,220,193\r
453DB 102,68,15,56,220,201\r
454DB 102,15,56,221,208\r
455DB 102,15,56,221,216\r
456DB 102,15,56,221,224\r
457DB 102,15,56,221,232\r
458DB 102,15,56,221,240\r
459DB 102,15,56,221,248\r
460DB 102,68,15,56,221,192\r
461DB 102,68,15,56,221,200\r
462 DB 0F3h,0C3h ;repret\r
463\r
464\r
465\r
466ALIGN 16\r
467_aesni_decrypt8:\r
468\r
469 movups xmm0,XMMWORD[rcx]\r
470 shl eax,4\r
471 movups xmm1,XMMWORD[16+rcx]\r
472 xorps xmm2,xmm0\r
473 xorps xmm3,xmm0\r
474 pxor xmm4,xmm0\r
475 pxor xmm5,xmm0\r
476 pxor xmm6,xmm0\r
477 lea rcx,[32+rax*1+rcx]\r
478 neg rax\r
479DB 102,15,56,222,209\r
480 pxor xmm7,xmm0\r
481 pxor xmm8,xmm0\r
482DB 102,15,56,222,217\r
483 pxor xmm9,xmm0\r
484 movups xmm0,XMMWORD[rax*1+rcx]\r
485 add rax,16\r
486 jmp NEAR $L$dec_loop8_inner\r
487ALIGN 16\r
488$L$dec_loop8:\r
489DB 102,15,56,222,209\r
490DB 102,15,56,222,217\r
491$L$dec_loop8_inner:\r
492DB 102,15,56,222,225\r
493DB 102,15,56,222,233\r
494DB 102,15,56,222,241\r
495DB 102,15,56,222,249\r
496DB 102,68,15,56,222,193\r
497DB 102,68,15,56,222,201\r
498$L$dec_loop8_enter:\r
499 movups xmm1,XMMWORD[rax*1+rcx]\r
500 add rax,32\r
501DB 102,15,56,222,208\r
502DB 102,15,56,222,216\r
503DB 102,15,56,222,224\r
504DB 102,15,56,222,232\r
505DB 102,15,56,222,240\r
506DB 102,15,56,222,248\r
507DB 102,68,15,56,222,192\r
508DB 102,68,15,56,222,200\r
509 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
510 jnz NEAR $L$dec_loop8\r
511\r
512DB 102,15,56,222,209\r
513DB 102,15,56,222,217\r
514DB 102,15,56,222,225\r
515DB 102,15,56,222,233\r
516DB 102,15,56,222,241\r
517DB 102,15,56,222,249\r
518DB 102,68,15,56,222,193\r
519DB 102,68,15,56,222,201\r
520DB 102,15,56,223,208\r
521DB 102,15,56,223,216\r
522DB 102,15,56,223,224\r
523DB 102,15,56,223,232\r
524DB 102,15,56,223,240\r
525DB 102,15,56,223,248\r
526DB 102,68,15,56,223,192\r
527DB 102,68,15,56,223,200\r
528 DB 0F3h,0C3h ;repret\r
529\r
530\r
531global aesni_ecb_encrypt\r
532\r
533ALIGN 16\r
534aesni_ecb_encrypt:\r
535 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
536 mov QWORD[16+rsp],rsi\r
537 mov rax,rsp\r
538$L$SEH_begin_aesni_ecb_encrypt:\r
539 mov rdi,rcx\r
540 mov rsi,rdx\r
541 mov rdx,r8\r
542 mov rcx,r9\r
543 mov r8,QWORD[40+rsp]\r
544\r
545\r
546\r
547 lea rsp,[((-88))+rsp]\r
548 movaps XMMWORD[rsp],xmm6\r
549 movaps XMMWORD[16+rsp],xmm7\r
550 movaps XMMWORD[32+rsp],xmm8\r
551 movaps XMMWORD[48+rsp],xmm9\r
552$L$ecb_enc_body:\r
553 and rdx,-16\r
554 jz NEAR $L$ecb_ret\r
555\r
556 mov eax,DWORD[240+rcx]\r
557 movups xmm0,XMMWORD[rcx]\r
558 mov r11,rcx\r
559 mov r10d,eax\r
560 test r8d,r8d\r
561 jz NEAR $L$ecb_decrypt\r
562\r
563 cmp rdx,0x80\r
564 jb NEAR $L$ecb_enc_tail\r
565\r
566 movdqu xmm2,XMMWORD[rdi]\r
567 movdqu xmm3,XMMWORD[16+rdi]\r
568 movdqu xmm4,XMMWORD[32+rdi]\r
569 movdqu xmm5,XMMWORD[48+rdi]\r
570 movdqu xmm6,XMMWORD[64+rdi]\r
571 movdqu xmm7,XMMWORD[80+rdi]\r
572 movdqu xmm8,XMMWORD[96+rdi]\r
573 movdqu xmm9,XMMWORD[112+rdi]\r
574 lea rdi,[128+rdi]\r
575 sub rdx,0x80\r
576 jmp NEAR $L$ecb_enc_loop8_enter\r
577ALIGN 16\r
578$L$ecb_enc_loop8:\r
579 movups XMMWORD[rsi],xmm2\r
580 mov rcx,r11\r
581 movdqu xmm2,XMMWORD[rdi]\r
582 mov eax,r10d\r
583 movups XMMWORD[16+rsi],xmm3\r
584 movdqu xmm3,XMMWORD[16+rdi]\r
585 movups XMMWORD[32+rsi],xmm4\r
586 movdqu xmm4,XMMWORD[32+rdi]\r
587 movups XMMWORD[48+rsi],xmm5\r
588 movdqu xmm5,XMMWORD[48+rdi]\r
589 movups XMMWORD[64+rsi],xmm6\r
590 movdqu xmm6,XMMWORD[64+rdi]\r
591 movups XMMWORD[80+rsi],xmm7\r
592 movdqu xmm7,XMMWORD[80+rdi]\r
593 movups XMMWORD[96+rsi],xmm8\r
594 movdqu xmm8,XMMWORD[96+rdi]\r
595 movups XMMWORD[112+rsi],xmm9\r
596 lea rsi,[128+rsi]\r
597 movdqu xmm9,XMMWORD[112+rdi]\r
598 lea rdi,[128+rdi]\r
599$L$ecb_enc_loop8_enter:\r
600\r
601 call _aesni_encrypt8\r
602\r
603 sub rdx,0x80\r
604 jnc NEAR $L$ecb_enc_loop8\r
605\r
606 movups XMMWORD[rsi],xmm2\r
607 mov rcx,r11\r
608 movups XMMWORD[16+rsi],xmm3\r
609 mov eax,r10d\r
610 movups XMMWORD[32+rsi],xmm4\r
611 movups XMMWORD[48+rsi],xmm5\r
612 movups XMMWORD[64+rsi],xmm6\r
613 movups XMMWORD[80+rsi],xmm7\r
614 movups XMMWORD[96+rsi],xmm8\r
615 movups XMMWORD[112+rsi],xmm9\r
616 lea rsi,[128+rsi]\r
617 add rdx,0x80\r
618 jz NEAR $L$ecb_ret\r
619\r
620$L$ecb_enc_tail:\r
621 movups xmm2,XMMWORD[rdi]\r
622 cmp rdx,0x20\r
623 jb NEAR $L$ecb_enc_one\r
624 movups xmm3,XMMWORD[16+rdi]\r
625 je NEAR $L$ecb_enc_two\r
626 movups xmm4,XMMWORD[32+rdi]\r
627 cmp rdx,0x40\r
628 jb NEAR $L$ecb_enc_three\r
629 movups xmm5,XMMWORD[48+rdi]\r
630 je NEAR $L$ecb_enc_four\r
631 movups xmm6,XMMWORD[64+rdi]\r
632 cmp rdx,0x60\r
633 jb NEAR $L$ecb_enc_five\r
634 movups xmm7,XMMWORD[80+rdi]\r
635 je NEAR $L$ecb_enc_six\r
636 movdqu xmm8,XMMWORD[96+rdi]\r
637 xorps xmm9,xmm9\r
638 call _aesni_encrypt8\r
639 movups XMMWORD[rsi],xmm2\r
640 movups XMMWORD[16+rsi],xmm3\r
641 movups XMMWORD[32+rsi],xmm4\r
642 movups XMMWORD[48+rsi],xmm5\r
643 movups XMMWORD[64+rsi],xmm6\r
644 movups XMMWORD[80+rsi],xmm7\r
645 movups XMMWORD[96+rsi],xmm8\r
646 jmp NEAR $L$ecb_ret\r
647ALIGN 16\r
648$L$ecb_enc_one:\r
649 movups xmm0,XMMWORD[rcx]\r
650 movups xmm1,XMMWORD[16+rcx]\r
651 lea rcx,[32+rcx]\r
652 xorps xmm2,xmm0\r
653$L$oop_enc1_3:\r
654DB 102,15,56,220,209\r
655 dec eax\r
656 movups xmm1,XMMWORD[rcx]\r
657 lea rcx,[16+rcx]\r
658 jnz NEAR $L$oop_enc1_3\r
659DB 102,15,56,221,209\r
660 movups XMMWORD[rsi],xmm2\r
661 jmp NEAR $L$ecb_ret\r
662ALIGN 16\r
663$L$ecb_enc_two:\r
664 call _aesni_encrypt2\r
665 movups XMMWORD[rsi],xmm2\r
666 movups XMMWORD[16+rsi],xmm3\r
667 jmp NEAR $L$ecb_ret\r
668ALIGN 16\r
669$L$ecb_enc_three:\r
670 call _aesni_encrypt3\r
671 movups XMMWORD[rsi],xmm2\r
672 movups XMMWORD[16+rsi],xmm3\r
673 movups XMMWORD[32+rsi],xmm4\r
674 jmp NEAR $L$ecb_ret\r
675ALIGN 16\r
676$L$ecb_enc_four:\r
677 call _aesni_encrypt4\r
678 movups XMMWORD[rsi],xmm2\r
679 movups XMMWORD[16+rsi],xmm3\r
680 movups XMMWORD[32+rsi],xmm4\r
681 movups XMMWORD[48+rsi],xmm5\r
682 jmp NEAR $L$ecb_ret\r
683ALIGN 16\r
684$L$ecb_enc_five:\r
685 xorps xmm7,xmm7\r
686 call _aesni_encrypt6\r
687 movups XMMWORD[rsi],xmm2\r
688 movups XMMWORD[16+rsi],xmm3\r
689 movups XMMWORD[32+rsi],xmm4\r
690 movups XMMWORD[48+rsi],xmm5\r
691 movups XMMWORD[64+rsi],xmm6\r
692 jmp NEAR $L$ecb_ret\r
693ALIGN 16\r
694$L$ecb_enc_six:\r
695 call _aesni_encrypt6\r
696 movups XMMWORD[rsi],xmm2\r
697 movups XMMWORD[16+rsi],xmm3\r
698 movups XMMWORD[32+rsi],xmm4\r
699 movups XMMWORD[48+rsi],xmm5\r
700 movups XMMWORD[64+rsi],xmm6\r
701 movups XMMWORD[80+rsi],xmm7\r
702 jmp NEAR $L$ecb_ret\r
703\r
704ALIGN 16\r
705$L$ecb_decrypt:\r
706 cmp rdx,0x80\r
707 jb NEAR $L$ecb_dec_tail\r
708\r
709 movdqu xmm2,XMMWORD[rdi]\r
710 movdqu xmm3,XMMWORD[16+rdi]\r
711 movdqu xmm4,XMMWORD[32+rdi]\r
712 movdqu xmm5,XMMWORD[48+rdi]\r
713 movdqu xmm6,XMMWORD[64+rdi]\r
714 movdqu xmm7,XMMWORD[80+rdi]\r
715 movdqu xmm8,XMMWORD[96+rdi]\r
716 movdqu xmm9,XMMWORD[112+rdi]\r
717 lea rdi,[128+rdi]\r
718 sub rdx,0x80\r
719 jmp NEAR $L$ecb_dec_loop8_enter\r
720ALIGN 16\r
721$L$ecb_dec_loop8:\r
722 movups XMMWORD[rsi],xmm2\r
723 mov rcx,r11\r
724 movdqu xmm2,XMMWORD[rdi]\r
725 mov eax,r10d\r
726 movups XMMWORD[16+rsi],xmm3\r
727 movdqu xmm3,XMMWORD[16+rdi]\r
728 movups XMMWORD[32+rsi],xmm4\r
729 movdqu xmm4,XMMWORD[32+rdi]\r
730 movups XMMWORD[48+rsi],xmm5\r
731 movdqu xmm5,XMMWORD[48+rdi]\r
732 movups XMMWORD[64+rsi],xmm6\r
733 movdqu xmm6,XMMWORD[64+rdi]\r
734 movups XMMWORD[80+rsi],xmm7\r
735 movdqu xmm7,XMMWORD[80+rdi]\r
736 movups XMMWORD[96+rsi],xmm8\r
737 movdqu xmm8,XMMWORD[96+rdi]\r
738 movups XMMWORD[112+rsi],xmm9\r
739 lea rsi,[128+rsi]\r
740 movdqu xmm9,XMMWORD[112+rdi]\r
741 lea rdi,[128+rdi]\r
742$L$ecb_dec_loop8_enter:\r
743\r
744 call _aesni_decrypt8\r
745\r
746 movups xmm0,XMMWORD[r11]\r
747 sub rdx,0x80\r
748 jnc NEAR $L$ecb_dec_loop8\r
749\r
750 movups XMMWORD[rsi],xmm2\r
751 pxor xmm2,xmm2\r
752 mov rcx,r11\r
753 movups XMMWORD[16+rsi],xmm3\r
754 pxor xmm3,xmm3\r
755 mov eax,r10d\r
756 movups XMMWORD[32+rsi],xmm4\r
757 pxor xmm4,xmm4\r
758 movups XMMWORD[48+rsi],xmm5\r
759 pxor xmm5,xmm5\r
760 movups XMMWORD[64+rsi],xmm6\r
761 pxor xmm6,xmm6\r
762 movups XMMWORD[80+rsi],xmm7\r
763 pxor xmm7,xmm7\r
764 movups XMMWORD[96+rsi],xmm8\r
765 pxor xmm8,xmm8\r
766 movups XMMWORD[112+rsi],xmm9\r
767 pxor xmm9,xmm9\r
768 lea rsi,[128+rsi]\r
769 add rdx,0x80\r
770 jz NEAR $L$ecb_ret\r
771\r
772$L$ecb_dec_tail:\r
773 movups xmm2,XMMWORD[rdi]\r
774 cmp rdx,0x20\r
775 jb NEAR $L$ecb_dec_one\r
776 movups xmm3,XMMWORD[16+rdi]\r
777 je NEAR $L$ecb_dec_two\r
778 movups xmm4,XMMWORD[32+rdi]\r
779 cmp rdx,0x40\r
780 jb NEAR $L$ecb_dec_three\r
781 movups xmm5,XMMWORD[48+rdi]\r
782 je NEAR $L$ecb_dec_four\r
783 movups xmm6,XMMWORD[64+rdi]\r
784 cmp rdx,0x60\r
785 jb NEAR $L$ecb_dec_five\r
786 movups xmm7,XMMWORD[80+rdi]\r
787 je NEAR $L$ecb_dec_six\r
788 movups xmm8,XMMWORD[96+rdi]\r
789 movups xmm0,XMMWORD[rcx]\r
790 xorps xmm9,xmm9\r
791 call _aesni_decrypt8\r
792 movups XMMWORD[rsi],xmm2\r
793 pxor xmm2,xmm2\r
794 movups XMMWORD[16+rsi],xmm3\r
795 pxor xmm3,xmm3\r
796 movups XMMWORD[32+rsi],xmm4\r
797 pxor xmm4,xmm4\r
798 movups XMMWORD[48+rsi],xmm5\r
799 pxor xmm5,xmm5\r
800 movups XMMWORD[64+rsi],xmm6\r
801 pxor xmm6,xmm6\r
802 movups XMMWORD[80+rsi],xmm7\r
803 pxor xmm7,xmm7\r
804 movups XMMWORD[96+rsi],xmm8\r
805 pxor xmm8,xmm8\r
806 pxor xmm9,xmm9\r
807 jmp NEAR $L$ecb_ret\r
808ALIGN 16\r
809$L$ecb_dec_one:\r
810 movups xmm0,XMMWORD[rcx]\r
811 movups xmm1,XMMWORD[16+rcx]\r
812 lea rcx,[32+rcx]\r
813 xorps xmm2,xmm0\r
814$L$oop_dec1_4:\r
815DB 102,15,56,222,209\r
816 dec eax\r
817 movups xmm1,XMMWORD[rcx]\r
818 lea rcx,[16+rcx]\r
819 jnz NEAR $L$oop_dec1_4\r
820DB 102,15,56,223,209\r
821 movups XMMWORD[rsi],xmm2\r
822 pxor xmm2,xmm2\r
823 jmp NEAR $L$ecb_ret\r
824ALIGN 16\r
825$L$ecb_dec_two:\r
826 call _aesni_decrypt2\r
827 movups XMMWORD[rsi],xmm2\r
828 pxor xmm2,xmm2\r
829 movups XMMWORD[16+rsi],xmm3\r
830 pxor xmm3,xmm3\r
831 jmp NEAR $L$ecb_ret\r
832ALIGN 16\r
833$L$ecb_dec_three:\r
834 call _aesni_decrypt3\r
835 movups XMMWORD[rsi],xmm2\r
836 pxor xmm2,xmm2\r
837 movups XMMWORD[16+rsi],xmm3\r
838 pxor xmm3,xmm3\r
839 movups XMMWORD[32+rsi],xmm4\r
840 pxor xmm4,xmm4\r
841 jmp NEAR $L$ecb_ret\r
842ALIGN 16\r
843$L$ecb_dec_four:\r
844 call _aesni_decrypt4\r
845 movups XMMWORD[rsi],xmm2\r
846 pxor xmm2,xmm2\r
847 movups XMMWORD[16+rsi],xmm3\r
848 pxor xmm3,xmm3\r
849 movups XMMWORD[32+rsi],xmm4\r
850 pxor xmm4,xmm4\r
851 movups XMMWORD[48+rsi],xmm5\r
852 pxor xmm5,xmm5\r
853 jmp NEAR $L$ecb_ret\r
854ALIGN 16\r
855$L$ecb_dec_five:\r
856 xorps xmm7,xmm7\r
857 call _aesni_decrypt6\r
858 movups XMMWORD[rsi],xmm2\r
859 pxor xmm2,xmm2\r
860 movups XMMWORD[16+rsi],xmm3\r
861 pxor xmm3,xmm3\r
862 movups XMMWORD[32+rsi],xmm4\r
863 pxor xmm4,xmm4\r
864 movups XMMWORD[48+rsi],xmm5\r
865 pxor xmm5,xmm5\r
866 movups XMMWORD[64+rsi],xmm6\r
867 pxor xmm6,xmm6\r
868 pxor xmm7,xmm7\r
869 jmp NEAR $L$ecb_ret\r
870ALIGN 16\r
871$L$ecb_dec_six:\r
872 call _aesni_decrypt6\r
873 movups XMMWORD[rsi],xmm2\r
874 pxor xmm2,xmm2\r
875 movups XMMWORD[16+rsi],xmm3\r
876 pxor xmm3,xmm3\r
877 movups XMMWORD[32+rsi],xmm4\r
878 pxor xmm4,xmm4\r
879 movups XMMWORD[48+rsi],xmm5\r
880 pxor xmm5,xmm5\r
881 movups XMMWORD[64+rsi],xmm6\r
882 pxor xmm6,xmm6\r
883 movups XMMWORD[80+rsi],xmm7\r
884 pxor xmm7,xmm7\r
885\r
886$L$ecb_ret:\r
887 xorps xmm0,xmm0\r
888 pxor xmm1,xmm1\r
889 movaps xmm6,XMMWORD[rsp]\r
890 movaps XMMWORD[rsp],xmm0\r
891 movaps xmm7,XMMWORD[16+rsp]\r
892 movaps XMMWORD[16+rsp],xmm0\r
893 movaps xmm8,XMMWORD[32+rsp]\r
894 movaps XMMWORD[32+rsp],xmm0\r
895 movaps xmm9,XMMWORD[48+rsp]\r
896 movaps XMMWORD[48+rsp],xmm0\r
897 lea rsp,[88+rsp]\r
898$L$ecb_enc_ret:\r
899 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
900 mov rsi,QWORD[16+rsp]\r
901 DB 0F3h,0C3h ;repret\r
902\r
903$L$SEH_end_aesni_ecb_encrypt:\r
904global aesni_ccm64_encrypt_blocks\r
905\r
906ALIGN 16\r
907aesni_ccm64_encrypt_blocks:\r
908 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
909 mov QWORD[16+rsp],rsi\r
910 mov rax,rsp\r
911$L$SEH_begin_aesni_ccm64_encrypt_blocks:\r
912 mov rdi,rcx\r
913 mov rsi,rdx\r
914 mov rdx,r8\r
915 mov rcx,r9\r
916 mov r8,QWORD[40+rsp]\r
917 mov r9,QWORD[48+rsp]\r
918\r
919\r
920\r
921 lea rsp,[((-88))+rsp]\r
922 movaps XMMWORD[rsp],xmm6\r
923 movaps XMMWORD[16+rsp],xmm7\r
924 movaps XMMWORD[32+rsp],xmm8\r
925 movaps XMMWORD[48+rsp],xmm9\r
926$L$ccm64_enc_body:\r
927 mov eax,DWORD[240+rcx]\r
928 movdqu xmm6,XMMWORD[r8]\r
929 movdqa xmm9,XMMWORD[$L$increment64]\r
930 movdqa xmm7,XMMWORD[$L$bswap_mask]\r
931\r
932 shl eax,4\r
933 mov r10d,16\r
934 lea r11,[rcx]\r
935 movdqu xmm3,XMMWORD[r9]\r
936 movdqa xmm2,xmm6\r
937 lea rcx,[32+rax*1+rcx]\r
938DB 102,15,56,0,247\r
939 sub r10,rax\r
940 jmp NEAR $L$ccm64_enc_outer\r
941ALIGN 16\r
942$L$ccm64_enc_outer:\r
943 movups xmm0,XMMWORD[r11]\r
944 mov rax,r10\r
945 movups xmm8,XMMWORD[rdi]\r
946\r
947 xorps xmm2,xmm0\r
948 movups xmm1,XMMWORD[16+r11]\r
949 xorps xmm0,xmm8\r
950 xorps xmm3,xmm0\r
951 movups xmm0,XMMWORD[32+r11]\r
952\r
953$L$ccm64_enc2_loop:\r
954DB 102,15,56,220,209\r
955DB 102,15,56,220,217\r
956 movups xmm1,XMMWORD[rax*1+rcx]\r
957 add rax,32\r
958DB 102,15,56,220,208\r
959DB 102,15,56,220,216\r
960 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
961 jnz NEAR $L$ccm64_enc2_loop\r
962DB 102,15,56,220,209\r
963DB 102,15,56,220,217\r
964 paddq xmm6,xmm9\r
965 dec rdx\r
966DB 102,15,56,221,208\r
967DB 102,15,56,221,216\r
968\r
969 lea rdi,[16+rdi]\r
970 xorps xmm8,xmm2\r
971 movdqa xmm2,xmm6\r
972 movups XMMWORD[rsi],xmm8\r
973DB 102,15,56,0,215\r
974 lea rsi,[16+rsi]\r
975 jnz NEAR $L$ccm64_enc_outer\r
976\r
977 pxor xmm0,xmm0\r
978 pxor xmm1,xmm1\r
979 pxor xmm2,xmm2\r
980 movups XMMWORD[r9],xmm3\r
981 pxor xmm3,xmm3\r
982 pxor xmm8,xmm8\r
983 pxor xmm6,xmm6\r
984 movaps xmm6,XMMWORD[rsp]\r
985 movaps XMMWORD[rsp],xmm0\r
986 movaps xmm7,XMMWORD[16+rsp]\r
987 movaps XMMWORD[16+rsp],xmm0\r
988 movaps xmm8,XMMWORD[32+rsp]\r
989 movaps XMMWORD[32+rsp],xmm0\r
990 movaps xmm9,XMMWORD[48+rsp]\r
991 movaps XMMWORD[48+rsp],xmm0\r
992 lea rsp,[88+rsp]\r
993$L$ccm64_enc_ret:\r
994 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
995 mov rsi,QWORD[16+rsp]\r
996 DB 0F3h,0C3h ;repret\r
997\r
998$L$SEH_end_aesni_ccm64_encrypt_blocks:\r
999global aesni_ccm64_decrypt_blocks\r
1000\r
1001ALIGN 16\r
1002aesni_ccm64_decrypt_blocks:\r
1003 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
1004 mov QWORD[16+rsp],rsi\r
1005 mov rax,rsp\r
1006$L$SEH_begin_aesni_ccm64_decrypt_blocks:\r
1007 mov rdi,rcx\r
1008 mov rsi,rdx\r
1009 mov rdx,r8\r
1010 mov rcx,r9\r
1011 mov r8,QWORD[40+rsp]\r
1012 mov r9,QWORD[48+rsp]\r
1013\r
1014\r
1015\r
1016 lea rsp,[((-88))+rsp]\r
1017 movaps XMMWORD[rsp],xmm6\r
1018 movaps XMMWORD[16+rsp],xmm7\r
1019 movaps XMMWORD[32+rsp],xmm8\r
1020 movaps XMMWORD[48+rsp],xmm9\r
1021$L$ccm64_dec_body:\r
1022 mov eax,DWORD[240+rcx]\r
1023 movups xmm6,XMMWORD[r8]\r
1024 movdqu xmm3,XMMWORD[r9]\r
1025 movdqa xmm9,XMMWORD[$L$increment64]\r
1026 movdqa xmm7,XMMWORD[$L$bswap_mask]\r
1027\r
1028 movaps xmm2,xmm6\r
1029 mov r10d,eax\r
1030 mov r11,rcx\r
1031DB 102,15,56,0,247\r
1032 movups xmm0,XMMWORD[rcx]\r
1033 movups xmm1,XMMWORD[16+rcx]\r
1034 lea rcx,[32+rcx]\r
1035 xorps xmm2,xmm0\r
1036$L$oop_enc1_5:\r
1037DB 102,15,56,220,209\r
1038 dec eax\r
1039 movups xmm1,XMMWORD[rcx]\r
1040 lea rcx,[16+rcx]\r
1041 jnz NEAR $L$oop_enc1_5\r
1042DB 102,15,56,221,209\r
1043 shl r10d,4\r
1044 mov eax,16\r
1045 movups xmm8,XMMWORD[rdi]\r
1046 paddq xmm6,xmm9\r
1047 lea rdi,[16+rdi]\r
1048 sub rax,r10\r
1049 lea rcx,[32+r10*1+r11]\r
1050 mov r10,rax\r
1051 jmp NEAR $L$ccm64_dec_outer\r
1052ALIGN 16\r
1053$L$ccm64_dec_outer:\r
1054 xorps xmm8,xmm2\r
1055 movdqa xmm2,xmm6\r
1056 movups XMMWORD[rsi],xmm8\r
1057 lea rsi,[16+rsi]\r
1058DB 102,15,56,0,215\r
1059\r
1060 sub rdx,1\r
1061 jz NEAR $L$ccm64_dec_break\r
1062\r
1063 movups xmm0,XMMWORD[r11]\r
1064 mov rax,r10\r
1065 movups xmm1,XMMWORD[16+r11]\r
1066 xorps xmm8,xmm0\r
1067 xorps xmm2,xmm0\r
1068 xorps xmm3,xmm8\r
1069 movups xmm0,XMMWORD[32+r11]\r
1070 jmp NEAR $L$ccm64_dec2_loop\r
1071ALIGN 16\r
1072$L$ccm64_dec2_loop:\r
1073DB 102,15,56,220,209\r
1074DB 102,15,56,220,217\r
1075 movups xmm1,XMMWORD[rax*1+rcx]\r
1076 add rax,32\r
1077DB 102,15,56,220,208\r
1078DB 102,15,56,220,216\r
1079 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
1080 jnz NEAR $L$ccm64_dec2_loop\r
1081 movups xmm8,XMMWORD[rdi]\r
1082 paddq xmm6,xmm9\r
1083DB 102,15,56,220,209\r
1084DB 102,15,56,220,217\r
1085DB 102,15,56,221,208\r
1086DB 102,15,56,221,216\r
1087 lea rdi,[16+rdi]\r
1088 jmp NEAR $L$ccm64_dec_outer\r
1089\r
1090ALIGN 16\r
1091$L$ccm64_dec_break:\r
1092\r
1093 mov eax,DWORD[240+r11]\r
1094 movups xmm0,XMMWORD[r11]\r
1095 movups xmm1,XMMWORD[16+r11]\r
1096 xorps xmm8,xmm0\r
1097 lea r11,[32+r11]\r
1098 xorps xmm3,xmm8\r
1099$L$oop_enc1_6:\r
1100DB 102,15,56,220,217\r
1101 dec eax\r
1102 movups xmm1,XMMWORD[r11]\r
1103 lea r11,[16+r11]\r
1104 jnz NEAR $L$oop_enc1_6\r
1105DB 102,15,56,221,217\r
1106 pxor xmm0,xmm0\r
1107 pxor xmm1,xmm1\r
1108 pxor xmm2,xmm2\r
1109 movups XMMWORD[r9],xmm3\r
1110 pxor xmm3,xmm3\r
1111 pxor xmm8,xmm8\r
1112 pxor xmm6,xmm6\r
1113 movaps xmm6,XMMWORD[rsp]\r
1114 movaps XMMWORD[rsp],xmm0\r
1115 movaps xmm7,XMMWORD[16+rsp]\r
1116 movaps XMMWORD[16+rsp],xmm0\r
1117 movaps xmm8,XMMWORD[32+rsp]\r
1118 movaps XMMWORD[32+rsp],xmm0\r
1119 movaps xmm9,XMMWORD[48+rsp]\r
1120 movaps XMMWORD[48+rsp],xmm0\r
1121 lea rsp,[88+rsp]\r
1122$L$ccm64_dec_ret:\r
1123 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
1124 mov rsi,QWORD[16+rsp]\r
1125 DB 0F3h,0C3h ;repret\r
1126\r
1127$L$SEH_end_aesni_ccm64_decrypt_blocks:\r
1128global aesni_ctr32_encrypt_blocks\r
1129\r
1130ALIGN 16\r
1131aesni_ctr32_encrypt_blocks:\r
1132 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
1133 mov QWORD[16+rsp],rsi\r
1134 mov rax,rsp\r
1135$L$SEH_begin_aesni_ctr32_encrypt_blocks:\r
1136 mov rdi,rcx\r
1137 mov rsi,rdx\r
1138 mov rdx,r8\r
1139 mov rcx,r9\r
1140 mov r8,QWORD[40+rsp]\r
1141\r
1142\r
1143\r
1144 cmp rdx,1\r
1145 jne NEAR $L$ctr32_bulk\r
1146\r
1147\r
1148\r
1149 movups xmm2,XMMWORD[r8]\r
1150 movups xmm3,XMMWORD[rdi]\r
1151 mov edx,DWORD[240+rcx]\r
1152 movups xmm0,XMMWORD[rcx]\r
1153 movups xmm1,XMMWORD[16+rcx]\r
1154 lea rcx,[32+rcx]\r
1155 xorps xmm2,xmm0\r
1156$L$oop_enc1_7:\r
1157DB 102,15,56,220,209\r
1158 dec edx\r
1159 movups xmm1,XMMWORD[rcx]\r
1160 lea rcx,[16+rcx]\r
1161 jnz NEAR $L$oop_enc1_7\r
1162DB 102,15,56,221,209\r
1163 pxor xmm0,xmm0\r
1164 pxor xmm1,xmm1\r
1165 xorps xmm2,xmm3\r
1166 pxor xmm3,xmm3\r
1167 movups XMMWORD[rsi],xmm2\r
1168 xorps xmm2,xmm2\r
1169 jmp NEAR $L$ctr32_epilogue\r
1170\r
1171ALIGN 16\r
1172$L$ctr32_bulk:\r
1173 lea r11,[rsp]\r
1174\r
1175 push rbp\r
1176\r
1177 sub rsp,288\r
1178 and rsp,-16\r
1179 movaps XMMWORD[(-168)+r11],xmm6\r
1180 movaps XMMWORD[(-152)+r11],xmm7\r
1181 movaps XMMWORD[(-136)+r11],xmm8\r
1182 movaps XMMWORD[(-120)+r11],xmm9\r
1183 movaps XMMWORD[(-104)+r11],xmm10\r
1184 movaps XMMWORD[(-88)+r11],xmm11\r
1185 movaps XMMWORD[(-72)+r11],xmm12\r
1186 movaps XMMWORD[(-56)+r11],xmm13\r
1187 movaps XMMWORD[(-40)+r11],xmm14\r
1188 movaps XMMWORD[(-24)+r11],xmm15\r
1189$L$ctr32_body:\r
1190\r
1191\r
1192\r
1193\r
1194 movdqu xmm2,XMMWORD[r8]\r
1195 movdqu xmm0,XMMWORD[rcx]\r
1196 mov r8d,DWORD[12+r8]\r
1197 pxor xmm2,xmm0\r
1198 mov ebp,DWORD[12+rcx]\r
1199 movdqa XMMWORD[rsp],xmm2\r
1200 bswap r8d\r
1201 movdqa xmm3,xmm2\r
1202 movdqa xmm4,xmm2\r
1203 movdqa xmm5,xmm2\r
1204 movdqa XMMWORD[64+rsp],xmm2\r
1205 movdqa XMMWORD[80+rsp],xmm2\r
1206 movdqa XMMWORD[96+rsp],xmm2\r
1207 mov r10,rdx\r
1208 movdqa XMMWORD[112+rsp],xmm2\r
1209\r
1210 lea rax,[1+r8]\r
1211 lea rdx,[2+r8]\r
1212 bswap eax\r
1213 bswap edx\r
1214 xor eax,ebp\r
1215 xor edx,ebp\r
1216DB 102,15,58,34,216,3\r
1217 lea rax,[3+r8]\r
1218 movdqa XMMWORD[16+rsp],xmm3\r
1219DB 102,15,58,34,226,3\r
1220 bswap eax\r
1221 mov rdx,r10\r
1222 lea r10,[4+r8]\r
1223 movdqa XMMWORD[32+rsp],xmm4\r
1224 xor eax,ebp\r
1225 bswap r10d\r
1226DB 102,15,58,34,232,3\r
1227 xor r10d,ebp\r
1228 movdqa XMMWORD[48+rsp],xmm5\r
1229 lea r9,[5+r8]\r
1230 mov DWORD[((64+12))+rsp],r10d\r
1231 bswap r9d\r
1232 lea r10,[6+r8]\r
1233 mov eax,DWORD[240+rcx]\r
1234 xor r9d,ebp\r
1235 bswap r10d\r
1236 mov DWORD[((80+12))+rsp],r9d\r
1237 xor r10d,ebp\r
1238 lea r9,[7+r8]\r
1239 mov DWORD[((96+12))+rsp],r10d\r
1240 bswap r9d\r
1241 mov r10d,DWORD[((OPENSSL_ia32cap_P+4))]\r
1242 xor r9d,ebp\r
1243 and r10d,71303168\r
1244 mov DWORD[((112+12))+rsp],r9d\r
1245\r
1246 movups xmm1,XMMWORD[16+rcx]\r
1247\r
1248 movdqa xmm6,XMMWORD[64+rsp]\r
1249 movdqa xmm7,XMMWORD[80+rsp]\r
1250\r
1251 cmp rdx,8\r
1252 jb NEAR $L$ctr32_tail\r
1253\r
1254 sub rdx,6\r
1255 cmp r10d,4194304\r
1256 je NEAR $L$ctr32_6x\r
1257\r
1258 lea rcx,[128+rcx]\r
1259 sub rdx,2\r
1260 jmp NEAR $L$ctr32_loop8\r
1261\r
1262ALIGN 16\r
1263$L$ctr32_6x:\r
1264 shl eax,4\r
1265 mov r10d,48\r
1266 bswap ebp\r
1267 lea rcx,[32+rax*1+rcx]\r
1268 sub r10,rax\r
1269 jmp NEAR $L$ctr32_loop6\r
1270\r
1271ALIGN 16\r
1272$L$ctr32_loop6:\r
1273 add r8d,6\r
1274 movups xmm0,XMMWORD[((-48))+r10*1+rcx]\r
1275DB 102,15,56,220,209\r
1276 mov eax,r8d\r
1277 xor eax,ebp\r
1278DB 102,15,56,220,217\r
1279DB 0x0f,0x38,0xf1,0x44,0x24,12\r
1280 lea eax,[1+r8]\r
1281DB 102,15,56,220,225\r
1282 xor eax,ebp\r
1283DB 0x0f,0x38,0xf1,0x44,0x24,28\r
1284DB 102,15,56,220,233\r
1285 lea eax,[2+r8]\r
1286 xor eax,ebp\r
1287DB 102,15,56,220,241\r
1288DB 0x0f,0x38,0xf1,0x44,0x24,44\r
1289 lea eax,[3+r8]\r
1290DB 102,15,56,220,249\r
1291 movups xmm1,XMMWORD[((-32))+r10*1+rcx]\r
1292 xor eax,ebp\r
1293\r
1294DB 102,15,56,220,208\r
1295DB 0x0f,0x38,0xf1,0x44,0x24,60\r
1296 lea eax,[4+r8]\r
1297DB 102,15,56,220,216\r
1298 xor eax,ebp\r
1299DB 0x0f,0x38,0xf1,0x44,0x24,76\r
1300DB 102,15,56,220,224\r
1301 lea eax,[5+r8]\r
1302 xor eax,ebp\r
1303DB 102,15,56,220,232\r
1304DB 0x0f,0x38,0xf1,0x44,0x24,92\r
1305 mov rax,r10\r
1306DB 102,15,56,220,240\r
1307DB 102,15,56,220,248\r
1308 movups xmm0,XMMWORD[((-16))+r10*1+rcx]\r
1309\r
1310 call $L$enc_loop6\r
1311\r
1312 movdqu xmm8,XMMWORD[rdi]\r
1313 movdqu xmm9,XMMWORD[16+rdi]\r
1314 movdqu xmm10,XMMWORD[32+rdi]\r
1315 movdqu xmm11,XMMWORD[48+rdi]\r
1316 movdqu xmm12,XMMWORD[64+rdi]\r
1317 movdqu xmm13,XMMWORD[80+rdi]\r
1318 lea rdi,[96+rdi]\r
1319 movups xmm1,XMMWORD[((-64))+r10*1+rcx]\r
1320 pxor xmm8,xmm2\r
1321 movaps xmm2,XMMWORD[rsp]\r
1322 pxor xmm9,xmm3\r
1323 movaps xmm3,XMMWORD[16+rsp]\r
1324 pxor xmm10,xmm4\r
1325 movaps xmm4,XMMWORD[32+rsp]\r
1326 pxor xmm11,xmm5\r
1327 movaps xmm5,XMMWORD[48+rsp]\r
1328 pxor xmm12,xmm6\r
1329 movaps xmm6,XMMWORD[64+rsp]\r
1330 pxor xmm13,xmm7\r
1331 movaps xmm7,XMMWORD[80+rsp]\r
1332 movdqu XMMWORD[rsi],xmm8\r
1333 movdqu XMMWORD[16+rsi],xmm9\r
1334 movdqu XMMWORD[32+rsi],xmm10\r
1335 movdqu XMMWORD[48+rsi],xmm11\r
1336 movdqu XMMWORD[64+rsi],xmm12\r
1337 movdqu XMMWORD[80+rsi],xmm13\r
1338 lea rsi,[96+rsi]\r
1339\r
1340 sub rdx,6\r
1341 jnc NEAR $L$ctr32_loop6\r
1342\r
1343 add rdx,6\r
1344 jz NEAR $L$ctr32_done\r
1345\r
1346 lea eax,[((-48))+r10]\r
1347 lea rcx,[((-80))+r10*1+rcx]\r
1348 neg eax\r
1349 shr eax,4\r
1350 jmp NEAR $L$ctr32_tail\r
1351\r
1352ALIGN 32\r
1353$L$ctr32_loop8:\r
1354 add r8d,8\r
1355 movdqa xmm8,XMMWORD[96+rsp]\r
1356DB 102,15,56,220,209\r
1357 mov r9d,r8d\r
1358 movdqa xmm9,XMMWORD[112+rsp]\r
1359DB 102,15,56,220,217\r
1360 bswap r9d\r
1361 movups xmm0,XMMWORD[((32-128))+rcx]\r
1362DB 102,15,56,220,225\r
1363 xor r9d,ebp\r
1364 nop\r
1365DB 102,15,56,220,233\r
1366 mov DWORD[((0+12))+rsp],r9d\r
1367 lea r9,[1+r8]\r
1368DB 102,15,56,220,241\r
1369DB 102,15,56,220,249\r
1370DB 102,68,15,56,220,193\r
1371DB 102,68,15,56,220,201\r
1372 movups xmm1,XMMWORD[((48-128))+rcx]\r
1373 bswap r9d\r
1374DB 102,15,56,220,208\r
1375DB 102,15,56,220,216\r
1376 xor r9d,ebp\r
1377DB 0x66,0x90\r
1378DB 102,15,56,220,224\r
1379DB 102,15,56,220,232\r
1380 mov DWORD[((16+12))+rsp],r9d\r
1381 lea r9,[2+r8]\r
1382DB 102,15,56,220,240\r
1383DB 102,15,56,220,248\r
1384DB 102,68,15,56,220,192\r
1385DB 102,68,15,56,220,200\r
1386 movups xmm0,XMMWORD[((64-128))+rcx]\r
1387 bswap r9d\r
1388DB 102,15,56,220,209\r
1389DB 102,15,56,220,217\r
1390 xor r9d,ebp\r
1391DB 0x66,0x90\r
1392DB 102,15,56,220,225\r
1393DB 102,15,56,220,233\r
1394 mov DWORD[((32+12))+rsp],r9d\r
1395 lea r9,[3+r8]\r
1396DB 102,15,56,220,241\r
1397DB 102,15,56,220,249\r
1398DB 102,68,15,56,220,193\r
1399DB 102,68,15,56,220,201\r
1400 movups xmm1,XMMWORD[((80-128))+rcx]\r
1401 bswap r9d\r
1402DB 102,15,56,220,208\r
1403DB 102,15,56,220,216\r
1404 xor r9d,ebp\r
1405DB 0x66,0x90\r
1406DB 102,15,56,220,224\r
1407DB 102,15,56,220,232\r
1408 mov DWORD[((48+12))+rsp],r9d\r
1409 lea r9,[4+r8]\r
1410DB 102,15,56,220,240\r
1411DB 102,15,56,220,248\r
1412DB 102,68,15,56,220,192\r
1413DB 102,68,15,56,220,200\r
1414 movups xmm0,XMMWORD[((96-128))+rcx]\r
1415 bswap r9d\r
1416DB 102,15,56,220,209\r
1417DB 102,15,56,220,217\r
1418 xor r9d,ebp\r
1419DB 0x66,0x90\r
1420DB 102,15,56,220,225\r
1421DB 102,15,56,220,233\r
1422 mov DWORD[((64+12))+rsp],r9d\r
1423 lea r9,[5+r8]\r
1424DB 102,15,56,220,241\r
1425DB 102,15,56,220,249\r
1426DB 102,68,15,56,220,193\r
1427DB 102,68,15,56,220,201\r
1428 movups xmm1,XMMWORD[((112-128))+rcx]\r
1429 bswap r9d\r
1430DB 102,15,56,220,208\r
1431DB 102,15,56,220,216\r
1432 xor r9d,ebp\r
1433DB 0x66,0x90\r
1434DB 102,15,56,220,224\r
1435DB 102,15,56,220,232\r
1436 mov DWORD[((80+12))+rsp],r9d\r
1437 lea r9,[6+r8]\r
1438DB 102,15,56,220,240\r
1439DB 102,15,56,220,248\r
1440DB 102,68,15,56,220,192\r
1441DB 102,68,15,56,220,200\r
1442 movups xmm0,XMMWORD[((128-128))+rcx]\r
1443 bswap r9d\r
1444DB 102,15,56,220,209\r
1445DB 102,15,56,220,217\r
1446 xor r9d,ebp\r
1447DB 0x66,0x90\r
1448DB 102,15,56,220,225\r
1449DB 102,15,56,220,233\r
1450 mov DWORD[((96+12))+rsp],r9d\r
1451 lea r9,[7+r8]\r
1452DB 102,15,56,220,241\r
1453DB 102,15,56,220,249\r
1454DB 102,68,15,56,220,193\r
1455DB 102,68,15,56,220,201\r
1456 movups xmm1,XMMWORD[((144-128))+rcx]\r
1457 bswap r9d\r
1458DB 102,15,56,220,208\r
1459DB 102,15,56,220,216\r
1460DB 102,15,56,220,224\r
1461 xor r9d,ebp\r
1462 movdqu xmm10,XMMWORD[rdi]\r
1463DB 102,15,56,220,232\r
1464 mov DWORD[((112+12))+rsp],r9d\r
1465 cmp eax,11\r
1466DB 102,15,56,220,240\r
1467DB 102,15,56,220,248\r
1468DB 102,68,15,56,220,192\r
1469DB 102,68,15,56,220,200\r
1470 movups xmm0,XMMWORD[((160-128))+rcx]\r
1471\r
1472 jb NEAR $L$ctr32_enc_done\r
1473\r
1474DB 102,15,56,220,209\r
1475DB 102,15,56,220,217\r
1476DB 102,15,56,220,225\r
1477DB 102,15,56,220,233\r
1478DB 102,15,56,220,241\r
1479DB 102,15,56,220,249\r
1480DB 102,68,15,56,220,193\r
1481DB 102,68,15,56,220,201\r
1482 movups xmm1,XMMWORD[((176-128))+rcx]\r
1483\r
1484DB 102,15,56,220,208\r
1485DB 102,15,56,220,216\r
1486DB 102,15,56,220,224\r
1487DB 102,15,56,220,232\r
1488DB 102,15,56,220,240\r
1489DB 102,15,56,220,248\r
1490DB 102,68,15,56,220,192\r
1491DB 102,68,15,56,220,200\r
1492 movups xmm0,XMMWORD[((192-128))+rcx]\r
1493 je NEAR $L$ctr32_enc_done\r
1494\r
1495DB 102,15,56,220,209\r
1496DB 102,15,56,220,217\r
1497DB 102,15,56,220,225\r
1498DB 102,15,56,220,233\r
1499DB 102,15,56,220,241\r
1500DB 102,15,56,220,249\r
1501DB 102,68,15,56,220,193\r
1502DB 102,68,15,56,220,201\r
1503 movups xmm1,XMMWORD[((208-128))+rcx]\r
1504\r
1505DB 102,15,56,220,208\r
1506DB 102,15,56,220,216\r
1507DB 102,15,56,220,224\r
1508DB 102,15,56,220,232\r
1509DB 102,15,56,220,240\r
1510DB 102,15,56,220,248\r
1511DB 102,68,15,56,220,192\r
1512DB 102,68,15,56,220,200\r
1513 movups xmm0,XMMWORD[((224-128))+rcx]\r
1514 jmp NEAR $L$ctr32_enc_done\r
1515\r
1516ALIGN 16\r
1517$L$ctr32_enc_done:\r
1518 movdqu xmm11,XMMWORD[16+rdi]\r
1519 pxor xmm10,xmm0\r
1520 movdqu xmm12,XMMWORD[32+rdi]\r
1521 pxor xmm11,xmm0\r
1522 movdqu xmm13,XMMWORD[48+rdi]\r
1523 pxor xmm12,xmm0\r
1524 movdqu xmm14,XMMWORD[64+rdi]\r
1525 pxor xmm13,xmm0\r
1526 movdqu xmm15,XMMWORD[80+rdi]\r
1527 pxor xmm14,xmm0\r
1528 pxor xmm15,xmm0\r
1529DB 102,15,56,220,209\r
1530DB 102,15,56,220,217\r
1531DB 102,15,56,220,225\r
1532DB 102,15,56,220,233\r
1533DB 102,15,56,220,241\r
1534DB 102,15,56,220,249\r
1535DB 102,68,15,56,220,193\r
1536DB 102,68,15,56,220,201\r
1537 movdqu xmm1,XMMWORD[96+rdi]\r
1538 lea rdi,[128+rdi]\r
1539\r
1540DB 102,65,15,56,221,210\r
1541 pxor xmm1,xmm0\r
1542 movdqu xmm10,XMMWORD[((112-128))+rdi]\r
1543DB 102,65,15,56,221,219\r
1544 pxor xmm10,xmm0\r
1545 movdqa xmm11,XMMWORD[rsp]\r
1546DB 102,65,15,56,221,228\r
1547DB 102,65,15,56,221,237\r
1548 movdqa xmm12,XMMWORD[16+rsp]\r
1549 movdqa xmm13,XMMWORD[32+rsp]\r
1550DB 102,65,15,56,221,246\r
1551DB 102,65,15,56,221,255\r
1552 movdqa xmm14,XMMWORD[48+rsp]\r
1553 movdqa xmm15,XMMWORD[64+rsp]\r
1554DB 102,68,15,56,221,193\r
1555 movdqa xmm0,XMMWORD[80+rsp]\r
1556 movups xmm1,XMMWORD[((16-128))+rcx]\r
1557DB 102,69,15,56,221,202\r
1558\r
1559 movups XMMWORD[rsi],xmm2\r
1560 movdqa xmm2,xmm11\r
1561 movups XMMWORD[16+rsi],xmm3\r
1562 movdqa xmm3,xmm12\r
1563 movups XMMWORD[32+rsi],xmm4\r
1564 movdqa xmm4,xmm13\r
1565 movups XMMWORD[48+rsi],xmm5\r
1566 movdqa xmm5,xmm14\r
1567 movups XMMWORD[64+rsi],xmm6\r
1568 movdqa xmm6,xmm15\r
1569 movups XMMWORD[80+rsi],xmm7\r
1570 movdqa xmm7,xmm0\r
1571 movups XMMWORD[96+rsi],xmm8\r
1572 movups XMMWORD[112+rsi],xmm9\r
1573 lea rsi,[128+rsi]\r
1574\r
1575 sub rdx,8\r
1576 jnc NEAR $L$ctr32_loop8\r
1577\r
1578 add rdx,8\r
1579 jz NEAR $L$ctr32_done\r
1580 lea rcx,[((-128))+rcx]\r
1581\r
1582$L$ctr32_tail:\r
1583\r
1584\r
1585 lea rcx,[16+rcx]\r
1586 cmp rdx,4\r
1587 jb NEAR $L$ctr32_loop3\r
1588 je NEAR $L$ctr32_loop4\r
1589\r
1590\r
1591 shl eax,4\r
1592 movdqa xmm8,XMMWORD[96+rsp]\r
1593 pxor xmm9,xmm9\r
1594\r
1595 movups xmm0,XMMWORD[16+rcx]\r
1596DB 102,15,56,220,209\r
1597DB 102,15,56,220,217\r
1598 lea rcx,[((32-16))+rax*1+rcx]\r
1599 neg rax\r
1600DB 102,15,56,220,225\r
1601 add rax,16\r
1602 movups xmm10,XMMWORD[rdi]\r
1603DB 102,15,56,220,233\r
1604DB 102,15,56,220,241\r
1605 movups xmm11,XMMWORD[16+rdi]\r
1606 movups xmm12,XMMWORD[32+rdi]\r
1607DB 102,15,56,220,249\r
1608DB 102,68,15,56,220,193\r
1609\r
1610 call $L$enc_loop8_enter\r
1611\r
1612 movdqu xmm13,XMMWORD[48+rdi]\r
1613 pxor xmm2,xmm10\r
1614 movdqu xmm10,XMMWORD[64+rdi]\r
1615 pxor xmm3,xmm11\r
1616 movdqu XMMWORD[rsi],xmm2\r
1617 pxor xmm4,xmm12\r
1618 movdqu XMMWORD[16+rsi],xmm3\r
1619 pxor xmm5,xmm13\r
1620 movdqu XMMWORD[32+rsi],xmm4\r
1621 pxor xmm6,xmm10\r
1622 movdqu XMMWORD[48+rsi],xmm5\r
1623 movdqu XMMWORD[64+rsi],xmm6\r
1624 cmp rdx,6\r
1625 jb NEAR $L$ctr32_done\r
1626\r
1627 movups xmm11,XMMWORD[80+rdi]\r
1628 xorps xmm7,xmm11\r
1629 movups XMMWORD[80+rsi],xmm7\r
1630 je NEAR $L$ctr32_done\r
1631\r
1632 movups xmm12,XMMWORD[96+rdi]\r
1633 xorps xmm8,xmm12\r
1634 movups XMMWORD[96+rsi],xmm8\r
1635 jmp NEAR $L$ctr32_done\r
1636\r
1637ALIGN 32\r
1638$L$ctr32_loop4:\r
1639DB 102,15,56,220,209\r
1640 lea rcx,[16+rcx]\r
1641 dec eax\r
1642DB 102,15,56,220,217\r
1643DB 102,15,56,220,225\r
1644DB 102,15,56,220,233\r
1645 movups xmm1,XMMWORD[rcx]\r
1646 jnz NEAR $L$ctr32_loop4\r
1647DB 102,15,56,221,209\r
1648DB 102,15,56,221,217\r
1649 movups xmm10,XMMWORD[rdi]\r
1650 movups xmm11,XMMWORD[16+rdi]\r
1651DB 102,15,56,221,225\r
1652DB 102,15,56,221,233\r
1653 movups xmm12,XMMWORD[32+rdi]\r
1654 movups xmm13,XMMWORD[48+rdi]\r
1655\r
1656 xorps xmm2,xmm10\r
1657 movups XMMWORD[rsi],xmm2\r
1658 xorps xmm3,xmm11\r
1659 movups XMMWORD[16+rsi],xmm3\r
1660 pxor xmm4,xmm12\r
1661 movdqu XMMWORD[32+rsi],xmm4\r
1662 pxor xmm5,xmm13\r
1663 movdqu XMMWORD[48+rsi],xmm5\r
1664 jmp NEAR $L$ctr32_done\r
1665\r
1666ALIGN 32\r
1667$L$ctr32_loop3:\r
1668DB 102,15,56,220,209\r
1669 lea rcx,[16+rcx]\r
1670 dec eax\r
1671DB 102,15,56,220,217\r
1672DB 102,15,56,220,225\r
1673 movups xmm1,XMMWORD[rcx]\r
1674 jnz NEAR $L$ctr32_loop3\r
1675DB 102,15,56,221,209\r
1676DB 102,15,56,221,217\r
1677DB 102,15,56,221,225\r
1678\r
1679 movups xmm10,XMMWORD[rdi]\r
1680 xorps xmm2,xmm10\r
1681 movups XMMWORD[rsi],xmm2\r
1682 cmp rdx,2\r
1683 jb NEAR $L$ctr32_done\r
1684\r
1685 movups xmm11,XMMWORD[16+rdi]\r
1686 xorps xmm3,xmm11\r
1687 movups XMMWORD[16+rsi],xmm3\r
1688 je NEAR $L$ctr32_done\r
1689\r
1690 movups xmm12,XMMWORD[32+rdi]\r
1691 xorps xmm4,xmm12\r
1692 movups XMMWORD[32+rsi],xmm4\r
1693\r
1694$L$ctr32_done:\r
1695 xorps xmm0,xmm0\r
1696 xor ebp,ebp\r
1697 pxor xmm1,xmm1\r
1698 pxor xmm2,xmm2\r
1699 pxor xmm3,xmm3\r
1700 pxor xmm4,xmm4\r
1701 pxor xmm5,xmm5\r
1702 movaps xmm6,XMMWORD[((-168))+r11]\r
1703 movaps XMMWORD[(-168)+r11],xmm0\r
1704 movaps xmm7,XMMWORD[((-152))+r11]\r
1705 movaps XMMWORD[(-152)+r11],xmm0\r
1706 movaps xmm8,XMMWORD[((-136))+r11]\r
1707 movaps XMMWORD[(-136)+r11],xmm0\r
1708 movaps xmm9,XMMWORD[((-120))+r11]\r
1709 movaps XMMWORD[(-120)+r11],xmm0\r
1710 movaps xmm10,XMMWORD[((-104))+r11]\r
1711 movaps XMMWORD[(-104)+r11],xmm0\r
1712 movaps xmm11,XMMWORD[((-88))+r11]\r
1713 movaps XMMWORD[(-88)+r11],xmm0\r
1714 movaps xmm12,XMMWORD[((-72))+r11]\r
1715 movaps XMMWORD[(-72)+r11],xmm0\r
1716 movaps xmm13,XMMWORD[((-56))+r11]\r
1717 movaps XMMWORD[(-56)+r11],xmm0\r
1718 movaps xmm14,XMMWORD[((-40))+r11]\r
1719 movaps XMMWORD[(-40)+r11],xmm0\r
1720 movaps xmm15,XMMWORD[((-24))+r11]\r
1721 movaps XMMWORD[(-24)+r11],xmm0\r
1722 movaps XMMWORD[rsp],xmm0\r
1723 movaps XMMWORD[16+rsp],xmm0\r
1724 movaps XMMWORD[32+rsp],xmm0\r
1725 movaps XMMWORD[48+rsp],xmm0\r
1726 movaps XMMWORD[64+rsp],xmm0\r
1727 movaps XMMWORD[80+rsp],xmm0\r
1728 movaps XMMWORD[96+rsp],xmm0\r
1729 movaps XMMWORD[112+rsp],xmm0\r
1730 mov rbp,QWORD[((-8))+r11]\r
1731\r
1732 lea rsp,[r11]\r
1733\r
1734$L$ctr32_epilogue:\r
1735 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
1736 mov rsi,QWORD[16+rsp]\r
1737 DB 0F3h,0C3h ;repret\r
1738\r
1739$L$SEH_end_aesni_ctr32_encrypt_blocks:\r
1740global aesni_xts_encrypt\r
1741\r
1742ALIGN 16\r
1743aesni_xts_encrypt:\r
1744 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
1745 mov QWORD[16+rsp],rsi\r
1746 mov rax,rsp\r
1747$L$SEH_begin_aesni_xts_encrypt:\r
1748 mov rdi,rcx\r
1749 mov rsi,rdx\r
1750 mov rdx,r8\r
1751 mov rcx,r9\r
1752 mov r8,QWORD[40+rsp]\r
1753 mov r9,QWORD[48+rsp]\r
1754\r
1755\r
1756\r
1757 lea r11,[rsp]\r
1758\r
1759 push rbp\r
1760\r
1761 sub rsp,272\r
1762 and rsp,-16\r
1763 movaps XMMWORD[(-168)+r11],xmm6\r
1764 movaps XMMWORD[(-152)+r11],xmm7\r
1765 movaps XMMWORD[(-136)+r11],xmm8\r
1766 movaps XMMWORD[(-120)+r11],xmm9\r
1767 movaps XMMWORD[(-104)+r11],xmm10\r
1768 movaps XMMWORD[(-88)+r11],xmm11\r
1769 movaps XMMWORD[(-72)+r11],xmm12\r
1770 movaps XMMWORD[(-56)+r11],xmm13\r
1771 movaps XMMWORD[(-40)+r11],xmm14\r
1772 movaps XMMWORD[(-24)+r11],xmm15\r
1773$L$xts_enc_body:\r
1774 movups xmm2,XMMWORD[r9]\r
1775 mov eax,DWORD[240+r8]\r
1776 mov r10d,DWORD[240+rcx]\r
1777 movups xmm0,XMMWORD[r8]\r
1778 movups xmm1,XMMWORD[16+r8]\r
1779 lea r8,[32+r8]\r
1780 xorps xmm2,xmm0\r
1781$L$oop_enc1_8:\r
1782DB 102,15,56,220,209\r
1783 dec eax\r
1784 movups xmm1,XMMWORD[r8]\r
1785 lea r8,[16+r8]\r
1786 jnz NEAR $L$oop_enc1_8\r
1787DB 102,15,56,221,209\r
1788 movups xmm0,XMMWORD[rcx]\r
1789 mov rbp,rcx\r
1790 mov eax,r10d\r
1791 shl r10d,4\r
1792 mov r9,rdx\r
1793 and rdx,-16\r
1794\r
1795 movups xmm1,XMMWORD[16+r10*1+rcx]\r
1796\r
1797 movdqa xmm8,XMMWORD[$L$xts_magic]\r
1798 movdqa xmm15,xmm2\r
1799 pshufd xmm9,xmm2,0x5f\r
1800 pxor xmm1,xmm0\r
1801 movdqa xmm14,xmm9\r
1802 paddd xmm9,xmm9\r
1803 movdqa xmm10,xmm15\r
1804 psrad xmm14,31\r
1805 paddq xmm15,xmm15\r
1806 pand xmm14,xmm8\r
1807 pxor xmm10,xmm0\r
1808 pxor xmm15,xmm14\r
1809 movdqa xmm14,xmm9\r
1810 paddd xmm9,xmm9\r
1811 movdqa xmm11,xmm15\r
1812 psrad xmm14,31\r
1813 paddq xmm15,xmm15\r
1814 pand xmm14,xmm8\r
1815 pxor xmm11,xmm0\r
1816 pxor xmm15,xmm14\r
1817 movdqa xmm14,xmm9\r
1818 paddd xmm9,xmm9\r
1819 movdqa xmm12,xmm15\r
1820 psrad xmm14,31\r
1821 paddq xmm15,xmm15\r
1822 pand xmm14,xmm8\r
1823 pxor xmm12,xmm0\r
1824 pxor xmm15,xmm14\r
1825 movdqa xmm14,xmm9\r
1826 paddd xmm9,xmm9\r
1827 movdqa xmm13,xmm15\r
1828 psrad xmm14,31\r
1829 paddq xmm15,xmm15\r
1830 pand xmm14,xmm8\r
1831 pxor xmm13,xmm0\r
1832 pxor xmm15,xmm14\r
1833 movdqa xmm14,xmm15\r
1834 psrad xmm9,31\r
1835 paddq xmm15,xmm15\r
1836 pand xmm9,xmm8\r
1837 pxor xmm14,xmm0\r
1838 pxor xmm15,xmm9\r
1839 movaps XMMWORD[96+rsp],xmm1\r
1840\r
1841 sub rdx,16*6\r
1842 jc NEAR $L$xts_enc_short\r
1843\r
1844 mov eax,16+96\r
1845 lea rcx,[32+r10*1+rbp]\r
1846 sub rax,r10\r
1847 movups xmm1,XMMWORD[16+rbp]\r
1848 mov r10,rax\r
1849 lea r8,[$L$xts_magic]\r
1850 jmp NEAR $L$xts_enc_grandloop\r
1851\r
1852ALIGN 32\r
1853$L$xts_enc_grandloop:\r
1854 movdqu xmm2,XMMWORD[rdi]\r
1855 movdqa xmm8,xmm0\r
1856 movdqu xmm3,XMMWORD[16+rdi]\r
1857 pxor xmm2,xmm10\r
1858 movdqu xmm4,XMMWORD[32+rdi]\r
1859 pxor xmm3,xmm11\r
1860DB 102,15,56,220,209\r
1861 movdqu xmm5,XMMWORD[48+rdi]\r
1862 pxor xmm4,xmm12\r
1863DB 102,15,56,220,217\r
1864 movdqu xmm6,XMMWORD[64+rdi]\r
1865 pxor xmm5,xmm13\r
1866DB 102,15,56,220,225\r
1867 movdqu xmm7,XMMWORD[80+rdi]\r
1868 pxor xmm8,xmm15\r
1869 movdqa xmm9,XMMWORD[96+rsp]\r
1870 pxor xmm6,xmm14\r
1871DB 102,15,56,220,233\r
1872 movups xmm0,XMMWORD[32+rbp]\r
1873 lea rdi,[96+rdi]\r
1874 pxor xmm7,xmm8\r
1875\r
1876 pxor xmm10,xmm9\r
1877DB 102,15,56,220,241\r
1878 pxor xmm11,xmm9\r
1879 movdqa XMMWORD[rsp],xmm10\r
1880DB 102,15,56,220,249\r
1881 movups xmm1,XMMWORD[48+rbp]\r
1882 pxor xmm12,xmm9\r
1883\r
1884DB 102,15,56,220,208\r
1885 pxor xmm13,xmm9\r
1886 movdqa XMMWORD[16+rsp],xmm11\r
1887DB 102,15,56,220,216\r
1888 pxor xmm14,xmm9\r
1889 movdqa XMMWORD[32+rsp],xmm12\r
1890DB 102,15,56,220,224\r
1891DB 102,15,56,220,232\r
1892 pxor xmm8,xmm9\r
1893 movdqa XMMWORD[64+rsp],xmm14\r
1894DB 102,15,56,220,240\r
1895DB 102,15,56,220,248\r
1896 movups xmm0,XMMWORD[64+rbp]\r
1897 movdqa XMMWORD[80+rsp],xmm8\r
1898 pshufd xmm9,xmm15,0x5f\r
1899 jmp NEAR $L$xts_enc_loop6\r
1900ALIGN 32\r
1901$L$xts_enc_loop6:\r
1902DB 102,15,56,220,209\r
1903DB 102,15,56,220,217\r
1904DB 102,15,56,220,225\r
1905DB 102,15,56,220,233\r
1906DB 102,15,56,220,241\r
1907DB 102,15,56,220,249\r
1908 movups xmm1,XMMWORD[((-64))+rax*1+rcx]\r
1909 add rax,32\r
1910\r
1911DB 102,15,56,220,208\r
1912DB 102,15,56,220,216\r
1913DB 102,15,56,220,224\r
1914DB 102,15,56,220,232\r
1915DB 102,15,56,220,240\r
1916DB 102,15,56,220,248\r
1917 movups xmm0,XMMWORD[((-80))+rax*1+rcx]\r
1918 jnz NEAR $L$xts_enc_loop6\r
1919\r
1920 movdqa xmm8,XMMWORD[r8]\r
1921 movdqa xmm14,xmm9\r
1922 paddd xmm9,xmm9\r
1923DB 102,15,56,220,209\r
1924 paddq xmm15,xmm15\r
1925 psrad xmm14,31\r
1926DB 102,15,56,220,217\r
1927 pand xmm14,xmm8\r
1928 movups xmm10,XMMWORD[rbp]\r
1929DB 102,15,56,220,225\r
1930DB 102,15,56,220,233\r
1931DB 102,15,56,220,241\r
1932 pxor xmm15,xmm14\r
1933 movaps xmm11,xmm10\r
1934DB 102,15,56,220,249\r
1935 movups xmm1,XMMWORD[((-64))+rcx]\r
1936\r
1937 movdqa xmm14,xmm9\r
1938DB 102,15,56,220,208\r
1939 paddd xmm9,xmm9\r
1940 pxor xmm10,xmm15\r
1941DB 102,15,56,220,216\r
1942 psrad xmm14,31\r
1943 paddq xmm15,xmm15\r
1944DB 102,15,56,220,224\r
1945DB 102,15,56,220,232\r
1946 pand xmm14,xmm8\r
1947 movaps xmm12,xmm11\r
1948DB 102,15,56,220,240\r
1949 pxor xmm15,xmm14\r
1950 movdqa xmm14,xmm9\r
1951DB 102,15,56,220,248\r
1952 movups xmm0,XMMWORD[((-48))+rcx]\r
1953\r
1954 paddd xmm9,xmm9\r
1955DB 102,15,56,220,209\r
1956 pxor xmm11,xmm15\r
1957 psrad xmm14,31\r
1958DB 102,15,56,220,217\r
1959 paddq xmm15,xmm15\r
1960 pand xmm14,xmm8\r
1961DB 102,15,56,220,225\r
1962DB 102,15,56,220,233\r
1963 movdqa XMMWORD[48+rsp],xmm13\r
1964 pxor xmm15,xmm14\r
1965DB 102,15,56,220,241\r
1966 movaps xmm13,xmm12\r
1967 movdqa xmm14,xmm9\r
1968DB 102,15,56,220,249\r
1969 movups xmm1,XMMWORD[((-32))+rcx]\r
1970\r
1971 paddd xmm9,xmm9\r
1972DB 102,15,56,220,208\r
1973 pxor xmm12,xmm15\r
1974 psrad xmm14,31\r
1975DB 102,15,56,220,216\r
1976 paddq xmm15,xmm15\r
1977 pand xmm14,xmm8\r
1978DB 102,15,56,220,224\r
1979DB 102,15,56,220,232\r
1980DB 102,15,56,220,240\r
1981 pxor xmm15,xmm14\r
1982 movaps xmm14,xmm13\r
1983DB 102,15,56,220,248\r
1984\r
1985 movdqa xmm0,xmm9\r
1986 paddd xmm9,xmm9\r
1987DB 102,15,56,220,209\r
1988 pxor xmm13,xmm15\r
1989 psrad xmm0,31\r
1990DB 102,15,56,220,217\r
1991 paddq xmm15,xmm15\r
1992 pand xmm0,xmm8\r
1993DB 102,15,56,220,225\r
1994DB 102,15,56,220,233\r
1995 pxor xmm15,xmm0\r
1996 movups xmm0,XMMWORD[rbp]\r
1997DB 102,15,56,220,241\r
1998DB 102,15,56,220,249\r
1999 movups xmm1,XMMWORD[16+rbp]\r
2000\r
2001 pxor xmm14,xmm15\r
2002DB 102,15,56,221,84,36,0\r
2003 psrad xmm9,31\r
2004 paddq xmm15,xmm15\r
2005DB 102,15,56,221,92,36,16\r
2006DB 102,15,56,221,100,36,32\r
2007 pand xmm9,xmm8\r
2008 mov rax,r10\r
2009DB 102,15,56,221,108,36,48\r
2010DB 102,15,56,221,116,36,64\r
2011DB 102,15,56,221,124,36,80\r
2012 pxor xmm15,xmm9\r
2013\r
2014 lea rsi,[96+rsi]\r
2015 movups XMMWORD[(-96)+rsi],xmm2\r
2016 movups XMMWORD[(-80)+rsi],xmm3\r
2017 movups XMMWORD[(-64)+rsi],xmm4\r
2018 movups XMMWORD[(-48)+rsi],xmm5\r
2019 movups XMMWORD[(-32)+rsi],xmm6\r
2020 movups XMMWORD[(-16)+rsi],xmm7\r
2021 sub rdx,16*6\r
2022 jnc NEAR $L$xts_enc_grandloop\r
2023\r
2024 mov eax,16+96\r
2025 sub eax,r10d\r
2026 mov rcx,rbp\r
2027 shr eax,4\r
2028\r
2029$L$xts_enc_short:\r
2030\r
2031 mov r10d,eax\r
2032 pxor xmm10,xmm0\r
2033 add rdx,16*6\r
2034 jz NEAR $L$xts_enc_done\r
2035\r
2036 pxor xmm11,xmm0\r
2037 cmp rdx,0x20\r
2038 jb NEAR $L$xts_enc_one\r
2039 pxor xmm12,xmm0\r
2040 je NEAR $L$xts_enc_two\r
2041\r
2042 pxor xmm13,xmm0\r
2043 cmp rdx,0x40\r
2044 jb NEAR $L$xts_enc_three\r
2045 pxor xmm14,xmm0\r
2046 je NEAR $L$xts_enc_four\r
2047\r
2048 movdqu xmm2,XMMWORD[rdi]\r
2049 movdqu xmm3,XMMWORD[16+rdi]\r
2050 movdqu xmm4,XMMWORD[32+rdi]\r
2051 pxor xmm2,xmm10\r
2052 movdqu xmm5,XMMWORD[48+rdi]\r
2053 pxor xmm3,xmm11\r
2054 movdqu xmm6,XMMWORD[64+rdi]\r
2055 lea rdi,[80+rdi]\r
2056 pxor xmm4,xmm12\r
2057 pxor xmm5,xmm13\r
2058 pxor xmm6,xmm14\r
2059 pxor xmm7,xmm7\r
2060\r
2061 call _aesni_encrypt6\r
2062\r
2063 xorps xmm2,xmm10\r
2064 movdqa xmm10,xmm15\r
2065 xorps xmm3,xmm11\r
2066 xorps xmm4,xmm12\r
2067 movdqu XMMWORD[rsi],xmm2\r
2068 xorps xmm5,xmm13\r
2069 movdqu XMMWORD[16+rsi],xmm3\r
2070 xorps xmm6,xmm14\r
2071 movdqu XMMWORD[32+rsi],xmm4\r
2072 movdqu XMMWORD[48+rsi],xmm5\r
2073 movdqu XMMWORD[64+rsi],xmm6\r
2074 lea rsi,[80+rsi]\r
2075 jmp NEAR $L$xts_enc_done\r
2076\r
2077ALIGN 16\r
2078$L$xts_enc_one:\r
2079 movups xmm2,XMMWORD[rdi]\r
2080 lea rdi,[16+rdi]\r
2081 xorps xmm2,xmm10\r
2082 movups xmm0,XMMWORD[rcx]\r
2083 movups xmm1,XMMWORD[16+rcx]\r
2084 lea rcx,[32+rcx]\r
2085 xorps xmm2,xmm0\r
2086$L$oop_enc1_9:\r
2087DB 102,15,56,220,209\r
2088 dec eax\r
2089 movups xmm1,XMMWORD[rcx]\r
2090 lea rcx,[16+rcx]\r
2091 jnz NEAR $L$oop_enc1_9\r
2092DB 102,15,56,221,209\r
2093 xorps xmm2,xmm10\r
2094 movdqa xmm10,xmm11\r
2095 movups XMMWORD[rsi],xmm2\r
2096 lea rsi,[16+rsi]\r
2097 jmp NEAR $L$xts_enc_done\r
2098\r
2099ALIGN 16\r
2100$L$xts_enc_two:\r
2101 movups xmm2,XMMWORD[rdi]\r
2102 movups xmm3,XMMWORD[16+rdi]\r
2103 lea rdi,[32+rdi]\r
2104 xorps xmm2,xmm10\r
2105 xorps xmm3,xmm11\r
2106\r
2107 call _aesni_encrypt2\r
2108\r
2109 xorps xmm2,xmm10\r
2110 movdqa xmm10,xmm12\r
2111 xorps xmm3,xmm11\r
2112 movups XMMWORD[rsi],xmm2\r
2113 movups XMMWORD[16+rsi],xmm3\r
2114 lea rsi,[32+rsi]\r
2115 jmp NEAR $L$xts_enc_done\r
2116\r
2117ALIGN 16\r
2118$L$xts_enc_three:\r
2119 movups xmm2,XMMWORD[rdi]\r
2120 movups xmm3,XMMWORD[16+rdi]\r
2121 movups xmm4,XMMWORD[32+rdi]\r
2122 lea rdi,[48+rdi]\r
2123 xorps xmm2,xmm10\r
2124 xorps xmm3,xmm11\r
2125 xorps xmm4,xmm12\r
2126\r
2127 call _aesni_encrypt3\r
2128\r
2129 xorps xmm2,xmm10\r
2130 movdqa xmm10,xmm13\r
2131 xorps xmm3,xmm11\r
2132 xorps xmm4,xmm12\r
2133 movups XMMWORD[rsi],xmm2\r
2134 movups XMMWORD[16+rsi],xmm3\r
2135 movups XMMWORD[32+rsi],xmm4\r
2136 lea rsi,[48+rsi]\r
2137 jmp NEAR $L$xts_enc_done\r
2138\r
2139ALIGN 16\r
2140$L$xts_enc_four:\r
2141 movups xmm2,XMMWORD[rdi]\r
2142 movups xmm3,XMMWORD[16+rdi]\r
2143 movups xmm4,XMMWORD[32+rdi]\r
2144 xorps xmm2,xmm10\r
2145 movups xmm5,XMMWORD[48+rdi]\r
2146 lea rdi,[64+rdi]\r
2147 xorps xmm3,xmm11\r
2148 xorps xmm4,xmm12\r
2149 xorps xmm5,xmm13\r
2150\r
2151 call _aesni_encrypt4\r
2152\r
2153 pxor xmm2,xmm10\r
2154 movdqa xmm10,xmm14\r
2155 pxor xmm3,xmm11\r
2156 pxor xmm4,xmm12\r
2157 movdqu XMMWORD[rsi],xmm2\r
2158 pxor xmm5,xmm13\r
2159 movdqu XMMWORD[16+rsi],xmm3\r
2160 movdqu XMMWORD[32+rsi],xmm4\r
2161 movdqu XMMWORD[48+rsi],xmm5\r
2162 lea rsi,[64+rsi]\r
2163 jmp NEAR $L$xts_enc_done\r
2164\r
2165ALIGN 16\r
2166$L$xts_enc_done:\r
2167 and r9,15\r
2168 jz NEAR $L$xts_enc_ret\r
2169 mov rdx,r9\r
2170\r
2171$L$xts_enc_steal:\r
2172 movzx eax,BYTE[rdi]\r
2173 movzx ecx,BYTE[((-16))+rsi]\r
2174 lea rdi,[1+rdi]\r
2175 mov BYTE[((-16))+rsi],al\r
2176 mov BYTE[rsi],cl\r
2177 lea rsi,[1+rsi]\r
2178 sub rdx,1\r
2179 jnz NEAR $L$xts_enc_steal\r
2180\r
2181 sub rsi,r9\r
2182 mov rcx,rbp\r
2183 mov eax,r10d\r
2184\r
2185 movups xmm2,XMMWORD[((-16))+rsi]\r
2186 xorps xmm2,xmm10\r
2187 movups xmm0,XMMWORD[rcx]\r
2188 movups xmm1,XMMWORD[16+rcx]\r
2189 lea rcx,[32+rcx]\r
2190 xorps xmm2,xmm0\r
2191$L$oop_enc1_10:\r
2192DB 102,15,56,220,209\r
2193 dec eax\r
2194 movups xmm1,XMMWORD[rcx]\r
2195 lea rcx,[16+rcx]\r
2196 jnz NEAR $L$oop_enc1_10\r
2197DB 102,15,56,221,209\r
2198 xorps xmm2,xmm10\r
2199 movups XMMWORD[(-16)+rsi],xmm2\r
2200\r
2201$L$xts_enc_ret:\r
2202 xorps xmm0,xmm0\r
2203 pxor xmm1,xmm1\r
2204 pxor xmm2,xmm2\r
2205 pxor xmm3,xmm3\r
2206 pxor xmm4,xmm4\r
2207 pxor xmm5,xmm5\r
2208 movaps xmm6,XMMWORD[((-168))+r11]\r
2209 movaps XMMWORD[(-168)+r11],xmm0\r
2210 movaps xmm7,XMMWORD[((-152))+r11]\r
2211 movaps XMMWORD[(-152)+r11],xmm0\r
2212 movaps xmm8,XMMWORD[((-136))+r11]\r
2213 movaps XMMWORD[(-136)+r11],xmm0\r
2214 movaps xmm9,XMMWORD[((-120))+r11]\r
2215 movaps XMMWORD[(-120)+r11],xmm0\r
2216 movaps xmm10,XMMWORD[((-104))+r11]\r
2217 movaps XMMWORD[(-104)+r11],xmm0\r
2218 movaps xmm11,XMMWORD[((-88))+r11]\r
2219 movaps XMMWORD[(-88)+r11],xmm0\r
2220 movaps xmm12,XMMWORD[((-72))+r11]\r
2221 movaps XMMWORD[(-72)+r11],xmm0\r
2222 movaps xmm13,XMMWORD[((-56))+r11]\r
2223 movaps XMMWORD[(-56)+r11],xmm0\r
2224 movaps xmm14,XMMWORD[((-40))+r11]\r
2225 movaps XMMWORD[(-40)+r11],xmm0\r
2226 movaps xmm15,XMMWORD[((-24))+r11]\r
2227 movaps XMMWORD[(-24)+r11],xmm0\r
2228 movaps XMMWORD[rsp],xmm0\r
2229 movaps XMMWORD[16+rsp],xmm0\r
2230 movaps XMMWORD[32+rsp],xmm0\r
2231 movaps XMMWORD[48+rsp],xmm0\r
2232 movaps XMMWORD[64+rsp],xmm0\r
2233 movaps XMMWORD[80+rsp],xmm0\r
2234 movaps XMMWORD[96+rsp],xmm0\r
2235 mov rbp,QWORD[((-8))+r11]\r
2236\r
2237 lea rsp,[r11]\r
2238\r
2239$L$xts_enc_epilogue:\r
2240 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
2241 mov rsi,QWORD[16+rsp]\r
2242 DB 0F3h,0C3h ;repret\r
2243\r
2244$L$SEH_end_aesni_xts_encrypt:\r
2245global aesni_xts_decrypt\r
2246\r
2247ALIGN 16\r
2248aesni_xts_decrypt:\r
2249 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
2250 mov QWORD[16+rsp],rsi\r
2251 mov rax,rsp\r
2252$L$SEH_begin_aesni_xts_decrypt:\r
2253 mov rdi,rcx\r
2254 mov rsi,rdx\r
2255 mov rdx,r8\r
2256 mov rcx,r9\r
2257 mov r8,QWORD[40+rsp]\r
2258 mov r9,QWORD[48+rsp]\r
2259\r
2260\r
2261\r
2262 lea r11,[rsp]\r
2263\r
2264 push rbp\r
2265\r
2266 sub rsp,272\r
2267 and rsp,-16\r
2268 movaps XMMWORD[(-168)+r11],xmm6\r
2269 movaps XMMWORD[(-152)+r11],xmm7\r
2270 movaps XMMWORD[(-136)+r11],xmm8\r
2271 movaps XMMWORD[(-120)+r11],xmm9\r
2272 movaps XMMWORD[(-104)+r11],xmm10\r
2273 movaps XMMWORD[(-88)+r11],xmm11\r
2274 movaps XMMWORD[(-72)+r11],xmm12\r
2275 movaps XMMWORD[(-56)+r11],xmm13\r
2276 movaps XMMWORD[(-40)+r11],xmm14\r
2277 movaps XMMWORD[(-24)+r11],xmm15\r
2278$L$xts_dec_body:\r
2279 movups xmm2,XMMWORD[r9]\r
2280 mov eax,DWORD[240+r8]\r
2281 mov r10d,DWORD[240+rcx]\r
2282 movups xmm0,XMMWORD[r8]\r
2283 movups xmm1,XMMWORD[16+r8]\r
2284 lea r8,[32+r8]\r
2285 xorps xmm2,xmm0\r
2286$L$oop_enc1_11:\r
2287DB 102,15,56,220,209\r
2288 dec eax\r
2289 movups xmm1,XMMWORD[r8]\r
2290 lea r8,[16+r8]\r
2291 jnz NEAR $L$oop_enc1_11\r
2292DB 102,15,56,221,209\r
2293 xor eax,eax\r
2294 test rdx,15\r
2295 setnz al\r
2296 shl rax,4\r
2297 sub rdx,rax\r
2298\r
2299 movups xmm0,XMMWORD[rcx]\r
2300 mov rbp,rcx\r
2301 mov eax,r10d\r
2302 shl r10d,4\r
2303 mov r9,rdx\r
2304 and rdx,-16\r
2305\r
2306 movups xmm1,XMMWORD[16+r10*1+rcx]\r
2307\r
2308 movdqa xmm8,XMMWORD[$L$xts_magic]\r
2309 movdqa xmm15,xmm2\r
2310 pshufd xmm9,xmm2,0x5f\r
2311 pxor xmm1,xmm0\r
2312 movdqa xmm14,xmm9\r
2313 paddd xmm9,xmm9\r
2314 movdqa xmm10,xmm15\r
2315 psrad xmm14,31\r
2316 paddq xmm15,xmm15\r
2317 pand xmm14,xmm8\r
2318 pxor xmm10,xmm0\r
2319 pxor xmm15,xmm14\r
2320 movdqa xmm14,xmm9\r
2321 paddd xmm9,xmm9\r
2322 movdqa xmm11,xmm15\r
2323 psrad xmm14,31\r
2324 paddq xmm15,xmm15\r
2325 pand xmm14,xmm8\r
2326 pxor xmm11,xmm0\r
2327 pxor xmm15,xmm14\r
2328 movdqa xmm14,xmm9\r
2329 paddd xmm9,xmm9\r
2330 movdqa xmm12,xmm15\r
2331 psrad xmm14,31\r
2332 paddq xmm15,xmm15\r
2333 pand xmm14,xmm8\r
2334 pxor xmm12,xmm0\r
2335 pxor xmm15,xmm14\r
2336 movdqa xmm14,xmm9\r
2337 paddd xmm9,xmm9\r
2338 movdqa xmm13,xmm15\r
2339 psrad xmm14,31\r
2340 paddq xmm15,xmm15\r
2341 pand xmm14,xmm8\r
2342 pxor xmm13,xmm0\r
2343 pxor xmm15,xmm14\r
2344 movdqa xmm14,xmm15\r
2345 psrad xmm9,31\r
2346 paddq xmm15,xmm15\r
2347 pand xmm9,xmm8\r
2348 pxor xmm14,xmm0\r
2349 pxor xmm15,xmm9\r
2350 movaps XMMWORD[96+rsp],xmm1\r
2351\r
2352 sub rdx,16*6\r
2353 jc NEAR $L$xts_dec_short\r
2354\r
2355 mov eax,16+96\r
2356 lea rcx,[32+r10*1+rbp]\r
2357 sub rax,r10\r
2358 movups xmm1,XMMWORD[16+rbp]\r
2359 mov r10,rax\r
2360 lea r8,[$L$xts_magic]\r
2361 jmp NEAR $L$xts_dec_grandloop\r
2362\r
2363ALIGN 32\r
2364$L$xts_dec_grandloop:\r
2365 movdqu xmm2,XMMWORD[rdi]\r
2366 movdqa xmm8,xmm0\r
2367 movdqu xmm3,XMMWORD[16+rdi]\r
2368 pxor xmm2,xmm10\r
2369 movdqu xmm4,XMMWORD[32+rdi]\r
2370 pxor xmm3,xmm11\r
2371DB 102,15,56,222,209\r
2372 movdqu xmm5,XMMWORD[48+rdi]\r
2373 pxor xmm4,xmm12\r
2374DB 102,15,56,222,217\r
2375 movdqu xmm6,XMMWORD[64+rdi]\r
2376 pxor xmm5,xmm13\r
2377DB 102,15,56,222,225\r
2378 movdqu xmm7,XMMWORD[80+rdi]\r
2379 pxor xmm8,xmm15\r
2380 movdqa xmm9,XMMWORD[96+rsp]\r
2381 pxor xmm6,xmm14\r
2382DB 102,15,56,222,233\r
2383 movups xmm0,XMMWORD[32+rbp]\r
2384 lea rdi,[96+rdi]\r
2385 pxor xmm7,xmm8\r
2386\r
2387 pxor xmm10,xmm9\r
2388DB 102,15,56,222,241\r
2389 pxor xmm11,xmm9\r
2390 movdqa XMMWORD[rsp],xmm10\r
2391DB 102,15,56,222,249\r
2392 movups xmm1,XMMWORD[48+rbp]\r
2393 pxor xmm12,xmm9\r
2394\r
2395DB 102,15,56,222,208\r
2396 pxor xmm13,xmm9\r
2397 movdqa XMMWORD[16+rsp],xmm11\r
2398DB 102,15,56,222,216\r
2399 pxor xmm14,xmm9\r
2400 movdqa XMMWORD[32+rsp],xmm12\r
2401DB 102,15,56,222,224\r
2402DB 102,15,56,222,232\r
2403 pxor xmm8,xmm9\r
2404 movdqa XMMWORD[64+rsp],xmm14\r
2405DB 102,15,56,222,240\r
2406DB 102,15,56,222,248\r
2407 movups xmm0,XMMWORD[64+rbp]\r
2408 movdqa XMMWORD[80+rsp],xmm8\r
2409 pshufd xmm9,xmm15,0x5f\r
2410 jmp NEAR $L$xts_dec_loop6\r
2411ALIGN 32\r
2412$L$xts_dec_loop6:\r
2413DB 102,15,56,222,209\r
2414DB 102,15,56,222,217\r
2415DB 102,15,56,222,225\r
2416DB 102,15,56,222,233\r
2417DB 102,15,56,222,241\r
2418DB 102,15,56,222,249\r
2419 movups xmm1,XMMWORD[((-64))+rax*1+rcx]\r
2420 add rax,32\r
2421\r
2422DB 102,15,56,222,208\r
2423DB 102,15,56,222,216\r
2424DB 102,15,56,222,224\r
2425DB 102,15,56,222,232\r
2426DB 102,15,56,222,240\r
2427DB 102,15,56,222,248\r
2428 movups xmm0,XMMWORD[((-80))+rax*1+rcx]\r
2429 jnz NEAR $L$xts_dec_loop6\r
2430\r
2431 movdqa xmm8,XMMWORD[r8]\r
2432 movdqa xmm14,xmm9\r
2433 paddd xmm9,xmm9\r
2434DB 102,15,56,222,209\r
2435 paddq xmm15,xmm15\r
2436 psrad xmm14,31\r
2437DB 102,15,56,222,217\r
2438 pand xmm14,xmm8\r
2439 movups xmm10,XMMWORD[rbp]\r
2440DB 102,15,56,222,225\r
2441DB 102,15,56,222,233\r
2442DB 102,15,56,222,241\r
2443 pxor xmm15,xmm14\r
2444 movaps xmm11,xmm10\r
2445DB 102,15,56,222,249\r
2446 movups xmm1,XMMWORD[((-64))+rcx]\r
2447\r
2448 movdqa xmm14,xmm9\r
2449DB 102,15,56,222,208\r
2450 paddd xmm9,xmm9\r
2451 pxor xmm10,xmm15\r
2452DB 102,15,56,222,216\r
2453 psrad xmm14,31\r
2454 paddq xmm15,xmm15\r
2455DB 102,15,56,222,224\r
2456DB 102,15,56,222,232\r
2457 pand xmm14,xmm8\r
2458 movaps xmm12,xmm11\r
2459DB 102,15,56,222,240\r
2460 pxor xmm15,xmm14\r
2461 movdqa xmm14,xmm9\r
2462DB 102,15,56,222,248\r
2463 movups xmm0,XMMWORD[((-48))+rcx]\r
2464\r
2465 paddd xmm9,xmm9\r
2466DB 102,15,56,222,209\r
2467 pxor xmm11,xmm15\r
2468 psrad xmm14,31\r
2469DB 102,15,56,222,217\r
2470 paddq xmm15,xmm15\r
2471 pand xmm14,xmm8\r
2472DB 102,15,56,222,225\r
2473DB 102,15,56,222,233\r
2474 movdqa XMMWORD[48+rsp],xmm13\r
2475 pxor xmm15,xmm14\r
2476DB 102,15,56,222,241\r
2477 movaps xmm13,xmm12\r
2478 movdqa xmm14,xmm9\r
2479DB 102,15,56,222,249\r
2480 movups xmm1,XMMWORD[((-32))+rcx]\r
2481\r
2482 paddd xmm9,xmm9\r
2483DB 102,15,56,222,208\r
2484 pxor xmm12,xmm15\r
2485 psrad xmm14,31\r
2486DB 102,15,56,222,216\r
2487 paddq xmm15,xmm15\r
2488 pand xmm14,xmm8\r
2489DB 102,15,56,222,224\r
2490DB 102,15,56,222,232\r
2491DB 102,15,56,222,240\r
2492 pxor xmm15,xmm14\r
2493 movaps xmm14,xmm13\r
2494DB 102,15,56,222,248\r
2495\r
2496 movdqa xmm0,xmm9\r
2497 paddd xmm9,xmm9\r
2498DB 102,15,56,222,209\r
2499 pxor xmm13,xmm15\r
2500 psrad xmm0,31\r
2501DB 102,15,56,222,217\r
2502 paddq xmm15,xmm15\r
2503 pand xmm0,xmm8\r
2504DB 102,15,56,222,225\r
2505DB 102,15,56,222,233\r
2506 pxor xmm15,xmm0\r
2507 movups xmm0,XMMWORD[rbp]\r
2508DB 102,15,56,222,241\r
2509DB 102,15,56,222,249\r
2510 movups xmm1,XMMWORD[16+rbp]\r
2511\r
2512 pxor xmm14,xmm15\r
2513DB 102,15,56,223,84,36,0\r
2514 psrad xmm9,31\r
2515 paddq xmm15,xmm15\r
2516DB 102,15,56,223,92,36,16\r
2517DB 102,15,56,223,100,36,32\r
2518 pand xmm9,xmm8\r
2519 mov rax,r10\r
2520DB 102,15,56,223,108,36,48\r
2521DB 102,15,56,223,116,36,64\r
2522DB 102,15,56,223,124,36,80\r
2523 pxor xmm15,xmm9\r
2524\r
2525 lea rsi,[96+rsi]\r
2526 movups XMMWORD[(-96)+rsi],xmm2\r
2527 movups XMMWORD[(-80)+rsi],xmm3\r
2528 movups XMMWORD[(-64)+rsi],xmm4\r
2529 movups XMMWORD[(-48)+rsi],xmm5\r
2530 movups XMMWORD[(-32)+rsi],xmm6\r
2531 movups XMMWORD[(-16)+rsi],xmm7\r
2532 sub rdx,16*6\r
2533 jnc NEAR $L$xts_dec_grandloop\r
2534\r
2535 mov eax,16+96\r
2536 sub eax,r10d\r
2537 mov rcx,rbp\r
2538 shr eax,4\r
2539\r
2540$L$xts_dec_short:\r
2541\r
2542 mov r10d,eax\r
2543 pxor xmm10,xmm0\r
2544 pxor xmm11,xmm0\r
2545 add rdx,16*6\r
2546 jz NEAR $L$xts_dec_done\r
2547\r
2548 pxor xmm12,xmm0\r
2549 cmp rdx,0x20\r
2550 jb NEAR $L$xts_dec_one\r
2551 pxor xmm13,xmm0\r
2552 je NEAR $L$xts_dec_two\r
2553\r
2554 pxor xmm14,xmm0\r
2555 cmp rdx,0x40\r
2556 jb NEAR $L$xts_dec_three\r
2557 je NEAR $L$xts_dec_four\r
2558\r
2559 movdqu xmm2,XMMWORD[rdi]\r
2560 movdqu xmm3,XMMWORD[16+rdi]\r
2561 movdqu xmm4,XMMWORD[32+rdi]\r
2562 pxor xmm2,xmm10\r
2563 movdqu xmm5,XMMWORD[48+rdi]\r
2564 pxor xmm3,xmm11\r
2565 movdqu xmm6,XMMWORD[64+rdi]\r
2566 lea rdi,[80+rdi]\r
2567 pxor xmm4,xmm12\r
2568 pxor xmm5,xmm13\r
2569 pxor xmm6,xmm14\r
2570\r
2571 call _aesni_decrypt6\r
2572\r
2573 xorps xmm2,xmm10\r
2574 xorps xmm3,xmm11\r
2575 xorps xmm4,xmm12\r
2576 movdqu XMMWORD[rsi],xmm2\r
2577 xorps xmm5,xmm13\r
2578 movdqu XMMWORD[16+rsi],xmm3\r
2579 xorps xmm6,xmm14\r
2580 movdqu XMMWORD[32+rsi],xmm4\r
2581 pxor xmm14,xmm14\r
2582 movdqu XMMWORD[48+rsi],xmm5\r
2583 pcmpgtd xmm14,xmm15\r
2584 movdqu XMMWORD[64+rsi],xmm6\r
2585 lea rsi,[80+rsi]\r
2586 pshufd xmm11,xmm14,0x13\r
2587 and r9,15\r
2588 jz NEAR $L$xts_dec_ret\r
2589\r
2590 movdqa xmm10,xmm15\r
2591 paddq xmm15,xmm15\r
2592 pand xmm11,xmm8\r
2593 pxor xmm11,xmm15\r
2594 jmp NEAR $L$xts_dec_done2\r
2595\r
2596ALIGN 16\r
2597$L$xts_dec_one:\r
2598 movups xmm2,XMMWORD[rdi]\r
2599 lea rdi,[16+rdi]\r
2600 xorps xmm2,xmm10\r
2601 movups xmm0,XMMWORD[rcx]\r
2602 movups xmm1,XMMWORD[16+rcx]\r
2603 lea rcx,[32+rcx]\r
2604 xorps xmm2,xmm0\r
2605$L$oop_dec1_12:\r
2606DB 102,15,56,222,209\r
2607 dec eax\r
2608 movups xmm1,XMMWORD[rcx]\r
2609 lea rcx,[16+rcx]\r
2610 jnz NEAR $L$oop_dec1_12\r
2611DB 102,15,56,223,209\r
2612 xorps xmm2,xmm10\r
2613 movdqa xmm10,xmm11\r
2614 movups XMMWORD[rsi],xmm2\r
2615 movdqa xmm11,xmm12\r
2616 lea rsi,[16+rsi]\r
2617 jmp NEAR $L$xts_dec_done\r
2618\r
2619ALIGN 16\r
2620$L$xts_dec_two:\r
2621 movups xmm2,XMMWORD[rdi]\r
2622 movups xmm3,XMMWORD[16+rdi]\r
2623 lea rdi,[32+rdi]\r
2624 xorps xmm2,xmm10\r
2625 xorps xmm3,xmm11\r
2626\r
2627 call _aesni_decrypt2\r
2628\r
2629 xorps xmm2,xmm10\r
2630 movdqa xmm10,xmm12\r
2631 xorps xmm3,xmm11\r
2632 movdqa xmm11,xmm13\r
2633 movups XMMWORD[rsi],xmm2\r
2634 movups XMMWORD[16+rsi],xmm3\r
2635 lea rsi,[32+rsi]\r
2636 jmp NEAR $L$xts_dec_done\r
2637\r
2638ALIGN 16\r
2639$L$xts_dec_three:\r
2640 movups xmm2,XMMWORD[rdi]\r
2641 movups xmm3,XMMWORD[16+rdi]\r
2642 movups xmm4,XMMWORD[32+rdi]\r
2643 lea rdi,[48+rdi]\r
2644 xorps xmm2,xmm10\r
2645 xorps xmm3,xmm11\r
2646 xorps xmm4,xmm12\r
2647\r
2648 call _aesni_decrypt3\r
2649\r
2650 xorps xmm2,xmm10\r
2651 movdqa xmm10,xmm13\r
2652 xorps xmm3,xmm11\r
2653 movdqa xmm11,xmm14\r
2654 xorps xmm4,xmm12\r
2655 movups XMMWORD[rsi],xmm2\r
2656 movups XMMWORD[16+rsi],xmm3\r
2657 movups XMMWORD[32+rsi],xmm4\r
2658 lea rsi,[48+rsi]\r
2659 jmp NEAR $L$xts_dec_done\r
2660\r
2661ALIGN 16\r
2662$L$xts_dec_four:\r
2663 movups xmm2,XMMWORD[rdi]\r
2664 movups xmm3,XMMWORD[16+rdi]\r
2665 movups xmm4,XMMWORD[32+rdi]\r
2666 xorps xmm2,xmm10\r
2667 movups xmm5,XMMWORD[48+rdi]\r
2668 lea rdi,[64+rdi]\r
2669 xorps xmm3,xmm11\r
2670 xorps xmm4,xmm12\r
2671 xorps xmm5,xmm13\r
2672\r
2673 call _aesni_decrypt4\r
2674\r
2675 pxor xmm2,xmm10\r
2676 movdqa xmm10,xmm14\r
2677 pxor xmm3,xmm11\r
2678 movdqa xmm11,xmm15\r
2679 pxor xmm4,xmm12\r
2680 movdqu XMMWORD[rsi],xmm2\r
2681 pxor xmm5,xmm13\r
2682 movdqu XMMWORD[16+rsi],xmm3\r
2683 movdqu XMMWORD[32+rsi],xmm4\r
2684 movdqu XMMWORD[48+rsi],xmm5\r
2685 lea rsi,[64+rsi]\r
2686 jmp NEAR $L$xts_dec_done\r
2687\r
2688ALIGN 16\r
2689$L$xts_dec_done:\r
2690 and r9,15\r
2691 jz NEAR $L$xts_dec_ret\r
2692$L$xts_dec_done2:\r
2693 mov rdx,r9\r
2694 mov rcx,rbp\r
2695 mov eax,r10d\r
2696\r
2697 movups xmm2,XMMWORD[rdi]\r
2698 xorps xmm2,xmm11\r
2699 movups xmm0,XMMWORD[rcx]\r
2700 movups xmm1,XMMWORD[16+rcx]\r
2701 lea rcx,[32+rcx]\r
2702 xorps xmm2,xmm0\r
2703$L$oop_dec1_13:\r
2704DB 102,15,56,222,209\r
2705 dec eax\r
2706 movups xmm1,XMMWORD[rcx]\r
2707 lea rcx,[16+rcx]\r
2708 jnz NEAR $L$oop_dec1_13\r
2709DB 102,15,56,223,209\r
2710 xorps xmm2,xmm11\r
2711 movups XMMWORD[rsi],xmm2\r
2712\r
2713$L$xts_dec_steal:\r
2714 movzx eax,BYTE[16+rdi]\r
2715 movzx ecx,BYTE[rsi]\r
2716 lea rdi,[1+rdi]\r
2717 mov BYTE[rsi],al\r
2718 mov BYTE[16+rsi],cl\r
2719 lea rsi,[1+rsi]\r
2720 sub rdx,1\r
2721 jnz NEAR $L$xts_dec_steal\r
2722\r
2723 sub rsi,r9\r
2724 mov rcx,rbp\r
2725 mov eax,r10d\r
2726\r
2727 movups xmm2,XMMWORD[rsi]\r
2728 xorps xmm2,xmm10\r
2729 movups xmm0,XMMWORD[rcx]\r
2730 movups xmm1,XMMWORD[16+rcx]\r
2731 lea rcx,[32+rcx]\r
2732 xorps xmm2,xmm0\r
2733$L$oop_dec1_14:\r
2734DB 102,15,56,222,209\r
2735 dec eax\r
2736 movups xmm1,XMMWORD[rcx]\r
2737 lea rcx,[16+rcx]\r
2738 jnz NEAR $L$oop_dec1_14\r
2739DB 102,15,56,223,209\r
2740 xorps xmm2,xmm10\r
2741 movups XMMWORD[rsi],xmm2\r
2742\r
2743$L$xts_dec_ret:\r
2744 xorps xmm0,xmm0\r
2745 pxor xmm1,xmm1\r
2746 pxor xmm2,xmm2\r
2747 pxor xmm3,xmm3\r
2748 pxor xmm4,xmm4\r
2749 pxor xmm5,xmm5\r
2750 movaps xmm6,XMMWORD[((-168))+r11]\r
2751 movaps XMMWORD[(-168)+r11],xmm0\r
2752 movaps xmm7,XMMWORD[((-152))+r11]\r
2753 movaps XMMWORD[(-152)+r11],xmm0\r
2754 movaps xmm8,XMMWORD[((-136))+r11]\r
2755 movaps XMMWORD[(-136)+r11],xmm0\r
2756 movaps xmm9,XMMWORD[((-120))+r11]\r
2757 movaps XMMWORD[(-120)+r11],xmm0\r
2758 movaps xmm10,XMMWORD[((-104))+r11]\r
2759 movaps XMMWORD[(-104)+r11],xmm0\r
2760 movaps xmm11,XMMWORD[((-88))+r11]\r
2761 movaps XMMWORD[(-88)+r11],xmm0\r
2762 movaps xmm12,XMMWORD[((-72))+r11]\r
2763 movaps XMMWORD[(-72)+r11],xmm0\r
2764 movaps xmm13,XMMWORD[((-56))+r11]\r
2765 movaps XMMWORD[(-56)+r11],xmm0\r
2766 movaps xmm14,XMMWORD[((-40))+r11]\r
2767 movaps XMMWORD[(-40)+r11],xmm0\r
2768 movaps xmm15,XMMWORD[((-24))+r11]\r
2769 movaps XMMWORD[(-24)+r11],xmm0\r
2770 movaps XMMWORD[rsp],xmm0\r
2771 movaps XMMWORD[16+rsp],xmm0\r
2772 movaps XMMWORD[32+rsp],xmm0\r
2773 movaps XMMWORD[48+rsp],xmm0\r
2774 movaps XMMWORD[64+rsp],xmm0\r
2775 movaps XMMWORD[80+rsp],xmm0\r
2776 movaps XMMWORD[96+rsp],xmm0\r
2777 mov rbp,QWORD[((-8))+r11]\r
2778\r
2779 lea rsp,[r11]\r
2780\r
2781$L$xts_dec_epilogue:\r
2782 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
2783 mov rsi,QWORD[16+rsp]\r
2784 DB 0F3h,0C3h ;repret\r
2785\r
2786$L$SEH_end_aesni_xts_decrypt:\r
2787global aesni_ocb_encrypt\r
2788\r
2789ALIGN 32\r
2790aesni_ocb_encrypt:\r
2791 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
2792 mov QWORD[16+rsp],rsi\r
2793 mov rax,rsp\r
2794$L$SEH_begin_aesni_ocb_encrypt:\r
2795 mov rdi,rcx\r
2796 mov rsi,rdx\r
2797 mov rdx,r8\r
2798 mov rcx,r9\r
2799 mov r8,QWORD[40+rsp]\r
2800 mov r9,QWORD[48+rsp]\r
2801\r
2802\r
2803\r
2804 lea rax,[rsp]\r
2805 push rbx\r
2806\r
2807 push rbp\r
2808\r
2809 push r12\r
2810\r
2811 push r13\r
2812\r
2813 push r14\r
2814\r
2815 lea rsp,[((-160))+rsp]\r
2816 movaps XMMWORD[rsp],xmm6\r
2817 movaps XMMWORD[16+rsp],xmm7\r
2818 movaps XMMWORD[32+rsp],xmm8\r
2819 movaps XMMWORD[48+rsp],xmm9\r
2820 movaps XMMWORD[64+rsp],xmm10\r
2821 movaps XMMWORD[80+rsp],xmm11\r
2822 movaps XMMWORD[96+rsp],xmm12\r
2823 movaps XMMWORD[112+rsp],xmm13\r
2824 movaps XMMWORD[128+rsp],xmm14\r
2825 movaps XMMWORD[144+rsp],xmm15\r
2826$L$ocb_enc_body:\r
2827 mov rbx,QWORD[56+rax]\r
2828 mov rbp,QWORD[((56+8))+rax]\r
2829\r
2830 mov r10d,DWORD[240+rcx]\r
2831 mov r11,rcx\r
2832 shl r10d,4\r
2833 movups xmm9,XMMWORD[rcx]\r
2834 movups xmm1,XMMWORD[16+r10*1+rcx]\r
2835\r
2836 movdqu xmm15,XMMWORD[r9]\r
2837 pxor xmm9,xmm1\r
2838 pxor xmm15,xmm1\r
2839\r
2840 mov eax,16+32\r
2841 lea rcx,[32+r10*1+r11]\r
2842 movups xmm1,XMMWORD[16+r11]\r
2843 sub rax,r10\r
2844 mov r10,rax\r
2845\r
2846 movdqu xmm10,XMMWORD[rbx]\r
2847 movdqu xmm8,XMMWORD[rbp]\r
2848\r
2849 test r8,1\r
2850 jnz NEAR $L$ocb_enc_odd\r
2851\r
2852 bsf r12,r8\r
2853 add r8,1\r
2854 shl r12,4\r
2855 movdqu xmm7,XMMWORD[r12*1+rbx]\r
2856 movdqu xmm2,XMMWORD[rdi]\r
2857 lea rdi,[16+rdi]\r
2858\r
2859 call __ocb_encrypt1\r
2860\r
2861 movdqa xmm15,xmm7\r
2862 movups XMMWORD[rsi],xmm2\r
2863 lea rsi,[16+rsi]\r
2864 sub rdx,1\r
2865 jz NEAR $L$ocb_enc_done\r
2866\r
2867$L$ocb_enc_odd:\r
2868 lea r12,[1+r8]\r
2869 lea r13,[3+r8]\r
2870 lea r14,[5+r8]\r
2871 lea r8,[6+r8]\r
2872 bsf r12,r12\r
2873 bsf r13,r13\r
2874 bsf r14,r14\r
2875 shl r12,4\r
2876 shl r13,4\r
2877 shl r14,4\r
2878\r
2879 sub rdx,6\r
2880 jc NEAR $L$ocb_enc_short\r
2881 jmp NEAR $L$ocb_enc_grandloop\r
2882\r
2883ALIGN 32\r
2884$L$ocb_enc_grandloop:\r
2885 movdqu xmm2,XMMWORD[rdi]\r
2886 movdqu xmm3,XMMWORD[16+rdi]\r
2887 movdqu xmm4,XMMWORD[32+rdi]\r
2888 movdqu xmm5,XMMWORD[48+rdi]\r
2889 movdqu xmm6,XMMWORD[64+rdi]\r
2890 movdqu xmm7,XMMWORD[80+rdi]\r
2891 lea rdi,[96+rdi]\r
2892\r
2893 call __ocb_encrypt6\r
2894\r
2895 movups XMMWORD[rsi],xmm2\r
2896 movups XMMWORD[16+rsi],xmm3\r
2897 movups XMMWORD[32+rsi],xmm4\r
2898 movups XMMWORD[48+rsi],xmm5\r
2899 movups XMMWORD[64+rsi],xmm6\r
2900 movups XMMWORD[80+rsi],xmm7\r
2901 lea rsi,[96+rsi]\r
2902 sub rdx,6\r
2903 jnc NEAR $L$ocb_enc_grandloop\r
2904\r
2905$L$ocb_enc_short:\r
2906 add rdx,6\r
2907 jz NEAR $L$ocb_enc_done\r
2908\r
2909 movdqu xmm2,XMMWORD[rdi]\r
2910 cmp rdx,2\r
2911 jb NEAR $L$ocb_enc_one\r
2912 movdqu xmm3,XMMWORD[16+rdi]\r
2913 je NEAR $L$ocb_enc_two\r
2914\r
2915 movdqu xmm4,XMMWORD[32+rdi]\r
2916 cmp rdx,4\r
2917 jb NEAR $L$ocb_enc_three\r
2918 movdqu xmm5,XMMWORD[48+rdi]\r
2919 je NEAR $L$ocb_enc_four\r
2920\r
2921 movdqu xmm6,XMMWORD[64+rdi]\r
2922 pxor xmm7,xmm7\r
2923\r
2924 call __ocb_encrypt6\r
2925\r
2926 movdqa xmm15,xmm14\r
2927 movups XMMWORD[rsi],xmm2\r
2928 movups XMMWORD[16+rsi],xmm3\r
2929 movups XMMWORD[32+rsi],xmm4\r
2930 movups XMMWORD[48+rsi],xmm5\r
2931 movups XMMWORD[64+rsi],xmm6\r
2932\r
2933 jmp NEAR $L$ocb_enc_done\r
2934\r
2935ALIGN 16\r
2936$L$ocb_enc_one:\r
2937 movdqa xmm7,xmm10\r
2938\r
2939 call __ocb_encrypt1\r
2940\r
2941 movdqa xmm15,xmm7\r
2942 movups XMMWORD[rsi],xmm2\r
2943 jmp NEAR $L$ocb_enc_done\r
2944\r
2945ALIGN 16\r
2946$L$ocb_enc_two:\r
2947 pxor xmm4,xmm4\r
2948 pxor xmm5,xmm5\r
2949\r
2950 call __ocb_encrypt4\r
2951\r
2952 movdqa xmm15,xmm11\r
2953 movups XMMWORD[rsi],xmm2\r
2954 movups XMMWORD[16+rsi],xmm3\r
2955\r
2956 jmp NEAR $L$ocb_enc_done\r
2957\r
2958ALIGN 16\r
2959$L$ocb_enc_three:\r
2960 pxor xmm5,xmm5\r
2961\r
2962 call __ocb_encrypt4\r
2963\r
2964 movdqa xmm15,xmm12\r
2965 movups XMMWORD[rsi],xmm2\r
2966 movups XMMWORD[16+rsi],xmm3\r
2967 movups XMMWORD[32+rsi],xmm4\r
2968\r
2969 jmp NEAR $L$ocb_enc_done\r
2970\r
2971ALIGN 16\r
2972$L$ocb_enc_four:\r
2973 call __ocb_encrypt4\r
2974\r
2975 movdqa xmm15,xmm13\r
2976 movups XMMWORD[rsi],xmm2\r
2977 movups XMMWORD[16+rsi],xmm3\r
2978 movups XMMWORD[32+rsi],xmm4\r
2979 movups XMMWORD[48+rsi],xmm5\r
2980\r
2981$L$ocb_enc_done:\r
2982 pxor xmm15,xmm0\r
2983 movdqu XMMWORD[rbp],xmm8\r
2984 movdqu XMMWORD[r9],xmm15\r
2985\r
2986 xorps xmm0,xmm0\r
2987 pxor xmm1,xmm1\r
2988 pxor xmm2,xmm2\r
2989 pxor xmm3,xmm3\r
2990 pxor xmm4,xmm4\r
2991 pxor xmm5,xmm5\r
2992 movaps xmm6,XMMWORD[rsp]\r
2993 movaps XMMWORD[rsp],xmm0\r
2994 movaps xmm7,XMMWORD[16+rsp]\r
2995 movaps XMMWORD[16+rsp],xmm0\r
2996 movaps xmm8,XMMWORD[32+rsp]\r
2997 movaps XMMWORD[32+rsp],xmm0\r
2998 movaps xmm9,XMMWORD[48+rsp]\r
2999 movaps XMMWORD[48+rsp],xmm0\r
3000 movaps xmm10,XMMWORD[64+rsp]\r
3001 movaps XMMWORD[64+rsp],xmm0\r
3002 movaps xmm11,XMMWORD[80+rsp]\r
3003 movaps XMMWORD[80+rsp],xmm0\r
3004 movaps xmm12,XMMWORD[96+rsp]\r
3005 movaps XMMWORD[96+rsp],xmm0\r
3006 movaps xmm13,XMMWORD[112+rsp]\r
3007 movaps XMMWORD[112+rsp],xmm0\r
3008 movaps xmm14,XMMWORD[128+rsp]\r
3009 movaps XMMWORD[128+rsp],xmm0\r
3010 movaps xmm15,XMMWORD[144+rsp]\r
3011 movaps XMMWORD[144+rsp],xmm0\r
3012 lea rax,[((160+40))+rsp]\r
3013$L$ocb_enc_pop:\r
3014 mov r14,QWORD[((-40))+rax]\r
3015\r
3016 mov r13,QWORD[((-32))+rax]\r
3017\r
3018 mov r12,QWORD[((-24))+rax]\r
3019\r
3020 mov rbp,QWORD[((-16))+rax]\r
3021\r
3022 mov rbx,QWORD[((-8))+rax]\r
3023\r
3024 lea rsp,[rax]\r
3025\r
3026$L$ocb_enc_epilogue:\r
3027 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
3028 mov rsi,QWORD[16+rsp]\r
3029 DB 0F3h,0C3h ;repret\r
3030\r
3031$L$SEH_end_aesni_ocb_encrypt:\r
3032\r
3033\r
3034ALIGN 32\r
3035__ocb_encrypt6:\r
3036\r
3037 pxor xmm15,xmm9\r
3038 movdqu xmm11,XMMWORD[r12*1+rbx]\r
3039 movdqa xmm12,xmm10\r
3040 movdqu xmm13,XMMWORD[r13*1+rbx]\r
3041 movdqa xmm14,xmm10\r
3042 pxor xmm10,xmm15\r
3043 movdqu xmm15,XMMWORD[r14*1+rbx]\r
3044 pxor xmm11,xmm10\r
3045 pxor xmm8,xmm2\r
3046 pxor xmm2,xmm10\r
3047 pxor xmm12,xmm11\r
3048 pxor xmm8,xmm3\r
3049 pxor xmm3,xmm11\r
3050 pxor xmm13,xmm12\r
3051 pxor xmm8,xmm4\r
3052 pxor xmm4,xmm12\r
3053 pxor xmm14,xmm13\r
3054 pxor xmm8,xmm5\r
3055 pxor xmm5,xmm13\r
3056 pxor xmm15,xmm14\r
3057 pxor xmm8,xmm6\r
3058 pxor xmm6,xmm14\r
3059 pxor xmm8,xmm7\r
3060 pxor xmm7,xmm15\r
3061 movups xmm0,XMMWORD[32+r11]\r
3062\r
3063 lea r12,[1+r8]\r
3064 lea r13,[3+r8]\r
3065 lea r14,[5+r8]\r
3066 add r8,6\r
3067 pxor xmm10,xmm9\r
3068 bsf r12,r12\r
3069 bsf r13,r13\r
3070 bsf r14,r14\r
3071\r
3072DB 102,15,56,220,209\r
3073DB 102,15,56,220,217\r
3074DB 102,15,56,220,225\r
3075DB 102,15,56,220,233\r
3076 pxor xmm11,xmm9\r
3077 pxor xmm12,xmm9\r
3078DB 102,15,56,220,241\r
3079 pxor xmm13,xmm9\r
3080 pxor xmm14,xmm9\r
3081DB 102,15,56,220,249\r
3082 movups xmm1,XMMWORD[48+r11]\r
3083 pxor xmm15,xmm9\r
3084\r
3085DB 102,15,56,220,208\r
3086DB 102,15,56,220,216\r
3087DB 102,15,56,220,224\r
3088DB 102,15,56,220,232\r
3089DB 102,15,56,220,240\r
3090DB 102,15,56,220,248\r
3091 movups xmm0,XMMWORD[64+r11]\r
3092 shl r12,4\r
3093 shl r13,4\r
3094 jmp NEAR $L$ocb_enc_loop6\r
3095\r
3096ALIGN 32\r
3097$L$ocb_enc_loop6:\r
3098DB 102,15,56,220,209\r
3099DB 102,15,56,220,217\r
3100DB 102,15,56,220,225\r
3101DB 102,15,56,220,233\r
3102DB 102,15,56,220,241\r
3103DB 102,15,56,220,249\r
3104 movups xmm1,XMMWORD[rax*1+rcx]\r
3105 add rax,32\r
3106\r
3107DB 102,15,56,220,208\r
3108DB 102,15,56,220,216\r
3109DB 102,15,56,220,224\r
3110DB 102,15,56,220,232\r
3111DB 102,15,56,220,240\r
3112DB 102,15,56,220,248\r
3113 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3114 jnz NEAR $L$ocb_enc_loop6\r
3115\r
3116DB 102,15,56,220,209\r
3117DB 102,15,56,220,217\r
3118DB 102,15,56,220,225\r
3119DB 102,15,56,220,233\r
3120DB 102,15,56,220,241\r
3121DB 102,15,56,220,249\r
3122 movups xmm1,XMMWORD[16+r11]\r
3123 shl r14,4\r
3124\r
3125DB 102,65,15,56,221,210\r
3126 movdqu xmm10,XMMWORD[rbx]\r
3127 mov rax,r10\r
3128DB 102,65,15,56,221,219\r
3129DB 102,65,15,56,221,228\r
3130DB 102,65,15,56,221,237\r
3131DB 102,65,15,56,221,246\r
3132DB 102,65,15,56,221,255\r
3133 DB 0F3h,0C3h ;repret\r
3134\r
3135\r
3136\r
3137\r
3138ALIGN 32\r
3139__ocb_encrypt4:\r
3140\r
3141 pxor xmm15,xmm9\r
3142 movdqu xmm11,XMMWORD[r12*1+rbx]\r
3143 movdqa xmm12,xmm10\r
3144 movdqu xmm13,XMMWORD[r13*1+rbx]\r
3145 pxor xmm10,xmm15\r
3146 pxor xmm11,xmm10\r
3147 pxor xmm8,xmm2\r
3148 pxor xmm2,xmm10\r
3149 pxor xmm12,xmm11\r
3150 pxor xmm8,xmm3\r
3151 pxor xmm3,xmm11\r
3152 pxor xmm13,xmm12\r
3153 pxor xmm8,xmm4\r
3154 pxor xmm4,xmm12\r
3155 pxor xmm8,xmm5\r
3156 pxor xmm5,xmm13\r
3157 movups xmm0,XMMWORD[32+r11]\r
3158\r
3159 pxor xmm10,xmm9\r
3160 pxor xmm11,xmm9\r
3161 pxor xmm12,xmm9\r
3162 pxor xmm13,xmm9\r
3163\r
3164DB 102,15,56,220,209\r
3165DB 102,15,56,220,217\r
3166DB 102,15,56,220,225\r
3167DB 102,15,56,220,233\r
3168 movups xmm1,XMMWORD[48+r11]\r
3169\r
3170DB 102,15,56,220,208\r
3171DB 102,15,56,220,216\r
3172DB 102,15,56,220,224\r
3173DB 102,15,56,220,232\r
3174 movups xmm0,XMMWORD[64+r11]\r
3175 jmp NEAR $L$ocb_enc_loop4\r
3176\r
3177ALIGN 32\r
3178$L$ocb_enc_loop4:\r
3179DB 102,15,56,220,209\r
3180DB 102,15,56,220,217\r
3181DB 102,15,56,220,225\r
3182DB 102,15,56,220,233\r
3183 movups xmm1,XMMWORD[rax*1+rcx]\r
3184 add rax,32\r
3185\r
3186DB 102,15,56,220,208\r
3187DB 102,15,56,220,216\r
3188DB 102,15,56,220,224\r
3189DB 102,15,56,220,232\r
3190 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3191 jnz NEAR $L$ocb_enc_loop4\r
3192\r
3193DB 102,15,56,220,209\r
3194DB 102,15,56,220,217\r
3195DB 102,15,56,220,225\r
3196DB 102,15,56,220,233\r
3197 movups xmm1,XMMWORD[16+r11]\r
3198 mov rax,r10\r
3199\r
3200DB 102,65,15,56,221,210\r
3201DB 102,65,15,56,221,219\r
3202DB 102,65,15,56,221,228\r
3203DB 102,65,15,56,221,237\r
3204 DB 0F3h,0C3h ;repret\r
3205\r
3206\r
3207\r
3208\r
3209ALIGN 32\r
3210__ocb_encrypt1:\r
3211\r
3212 pxor xmm7,xmm15\r
3213 pxor xmm7,xmm9\r
3214 pxor xmm8,xmm2\r
3215 pxor xmm2,xmm7\r
3216 movups xmm0,XMMWORD[32+r11]\r
3217\r
3218DB 102,15,56,220,209\r
3219 movups xmm1,XMMWORD[48+r11]\r
3220 pxor xmm7,xmm9\r
3221\r
3222DB 102,15,56,220,208\r
3223 movups xmm0,XMMWORD[64+r11]\r
3224 jmp NEAR $L$ocb_enc_loop1\r
3225\r
3226ALIGN 32\r
3227$L$ocb_enc_loop1:\r
3228DB 102,15,56,220,209\r
3229 movups xmm1,XMMWORD[rax*1+rcx]\r
3230 add rax,32\r
3231\r
3232DB 102,15,56,220,208\r
3233 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3234 jnz NEAR $L$ocb_enc_loop1\r
3235\r
3236DB 102,15,56,220,209\r
3237 movups xmm1,XMMWORD[16+r11]\r
3238 mov rax,r10\r
3239\r
3240DB 102,15,56,221,215\r
3241 DB 0F3h,0C3h ;repret\r
3242\r
3243\r
3244\r
3245global aesni_ocb_decrypt\r
3246\r
3247ALIGN 32\r
3248aesni_ocb_decrypt:\r
3249 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
3250 mov QWORD[16+rsp],rsi\r
3251 mov rax,rsp\r
3252$L$SEH_begin_aesni_ocb_decrypt:\r
3253 mov rdi,rcx\r
3254 mov rsi,rdx\r
3255 mov rdx,r8\r
3256 mov rcx,r9\r
3257 mov r8,QWORD[40+rsp]\r
3258 mov r9,QWORD[48+rsp]\r
3259\r
3260\r
3261\r
3262 lea rax,[rsp]\r
3263 push rbx\r
3264\r
3265 push rbp\r
3266\r
3267 push r12\r
3268\r
3269 push r13\r
3270\r
3271 push r14\r
3272\r
3273 lea rsp,[((-160))+rsp]\r
3274 movaps XMMWORD[rsp],xmm6\r
3275 movaps XMMWORD[16+rsp],xmm7\r
3276 movaps XMMWORD[32+rsp],xmm8\r
3277 movaps XMMWORD[48+rsp],xmm9\r
3278 movaps XMMWORD[64+rsp],xmm10\r
3279 movaps XMMWORD[80+rsp],xmm11\r
3280 movaps XMMWORD[96+rsp],xmm12\r
3281 movaps XMMWORD[112+rsp],xmm13\r
3282 movaps XMMWORD[128+rsp],xmm14\r
3283 movaps XMMWORD[144+rsp],xmm15\r
3284$L$ocb_dec_body:\r
3285 mov rbx,QWORD[56+rax]\r
3286 mov rbp,QWORD[((56+8))+rax]\r
3287\r
3288 mov r10d,DWORD[240+rcx]\r
3289 mov r11,rcx\r
3290 shl r10d,4\r
3291 movups xmm9,XMMWORD[rcx]\r
3292 movups xmm1,XMMWORD[16+r10*1+rcx]\r
3293\r
3294 movdqu xmm15,XMMWORD[r9]\r
3295 pxor xmm9,xmm1\r
3296 pxor xmm15,xmm1\r
3297\r
3298 mov eax,16+32\r
3299 lea rcx,[32+r10*1+r11]\r
3300 movups xmm1,XMMWORD[16+r11]\r
3301 sub rax,r10\r
3302 mov r10,rax\r
3303\r
3304 movdqu xmm10,XMMWORD[rbx]\r
3305 movdqu xmm8,XMMWORD[rbp]\r
3306\r
3307 test r8,1\r
3308 jnz NEAR $L$ocb_dec_odd\r
3309\r
3310 bsf r12,r8\r
3311 add r8,1\r
3312 shl r12,4\r
3313 movdqu xmm7,XMMWORD[r12*1+rbx]\r
3314 movdqu xmm2,XMMWORD[rdi]\r
3315 lea rdi,[16+rdi]\r
3316\r
3317 call __ocb_decrypt1\r
3318\r
3319 movdqa xmm15,xmm7\r
3320 movups XMMWORD[rsi],xmm2\r
3321 xorps xmm8,xmm2\r
3322 lea rsi,[16+rsi]\r
3323 sub rdx,1\r
3324 jz NEAR $L$ocb_dec_done\r
3325\r
3326$L$ocb_dec_odd:\r
3327 lea r12,[1+r8]\r
3328 lea r13,[3+r8]\r
3329 lea r14,[5+r8]\r
3330 lea r8,[6+r8]\r
3331 bsf r12,r12\r
3332 bsf r13,r13\r
3333 bsf r14,r14\r
3334 shl r12,4\r
3335 shl r13,4\r
3336 shl r14,4\r
3337\r
3338 sub rdx,6\r
3339 jc NEAR $L$ocb_dec_short\r
3340 jmp NEAR $L$ocb_dec_grandloop\r
3341\r
3342ALIGN 32\r
3343$L$ocb_dec_grandloop:\r
3344 movdqu xmm2,XMMWORD[rdi]\r
3345 movdqu xmm3,XMMWORD[16+rdi]\r
3346 movdqu xmm4,XMMWORD[32+rdi]\r
3347 movdqu xmm5,XMMWORD[48+rdi]\r
3348 movdqu xmm6,XMMWORD[64+rdi]\r
3349 movdqu xmm7,XMMWORD[80+rdi]\r
3350 lea rdi,[96+rdi]\r
3351\r
3352 call __ocb_decrypt6\r
3353\r
3354 movups XMMWORD[rsi],xmm2\r
3355 pxor xmm8,xmm2\r
3356 movups XMMWORD[16+rsi],xmm3\r
3357 pxor xmm8,xmm3\r
3358 movups XMMWORD[32+rsi],xmm4\r
3359 pxor xmm8,xmm4\r
3360 movups XMMWORD[48+rsi],xmm5\r
3361 pxor xmm8,xmm5\r
3362 movups XMMWORD[64+rsi],xmm6\r
3363 pxor xmm8,xmm6\r
3364 movups XMMWORD[80+rsi],xmm7\r
3365 pxor xmm8,xmm7\r
3366 lea rsi,[96+rsi]\r
3367 sub rdx,6\r
3368 jnc NEAR $L$ocb_dec_grandloop\r
3369\r
3370$L$ocb_dec_short:\r
3371 add rdx,6\r
3372 jz NEAR $L$ocb_dec_done\r
3373\r
3374 movdqu xmm2,XMMWORD[rdi]\r
3375 cmp rdx,2\r
3376 jb NEAR $L$ocb_dec_one\r
3377 movdqu xmm3,XMMWORD[16+rdi]\r
3378 je NEAR $L$ocb_dec_two\r
3379\r
3380 movdqu xmm4,XMMWORD[32+rdi]\r
3381 cmp rdx,4\r
3382 jb NEAR $L$ocb_dec_three\r
3383 movdqu xmm5,XMMWORD[48+rdi]\r
3384 je NEAR $L$ocb_dec_four\r
3385\r
3386 movdqu xmm6,XMMWORD[64+rdi]\r
3387 pxor xmm7,xmm7\r
3388\r
3389 call __ocb_decrypt6\r
3390\r
3391 movdqa xmm15,xmm14\r
3392 movups XMMWORD[rsi],xmm2\r
3393 pxor xmm8,xmm2\r
3394 movups XMMWORD[16+rsi],xmm3\r
3395 pxor xmm8,xmm3\r
3396 movups XMMWORD[32+rsi],xmm4\r
3397 pxor xmm8,xmm4\r
3398 movups XMMWORD[48+rsi],xmm5\r
3399 pxor xmm8,xmm5\r
3400 movups XMMWORD[64+rsi],xmm6\r
3401 pxor xmm8,xmm6\r
3402\r
3403 jmp NEAR $L$ocb_dec_done\r
3404\r
3405ALIGN 16\r
3406$L$ocb_dec_one:\r
3407 movdqa xmm7,xmm10\r
3408\r
3409 call __ocb_decrypt1\r
3410\r
3411 movdqa xmm15,xmm7\r
3412 movups XMMWORD[rsi],xmm2\r
3413 xorps xmm8,xmm2\r
3414 jmp NEAR $L$ocb_dec_done\r
3415\r
3416ALIGN 16\r
3417$L$ocb_dec_two:\r
3418 pxor xmm4,xmm4\r
3419 pxor xmm5,xmm5\r
3420\r
3421 call __ocb_decrypt4\r
3422\r
3423 movdqa xmm15,xmm11\r
3424 movups XMMWORD[rsi],xmm2\r
3425 xorps xmm8,xmm2\r
3426 movups XMMWORD[16+rsi],xmm3\r
3427 xorps xmm8,xmm3\r
3428\r
3429 jmp NEAR $L$ocb_dec_done\r
3430\r
3431ALIGN 16\r
3432$L$ocb_dec_three:\r
3433 pxor xmm5,xmm5\r
3434\r
3435 call __ocb_decrypt4\r
3436\r
3437 movdqa xmm15,xmm12\r
3438 movups XMMWORD[rsi],xmm2\r
3439 xorps xmm8,xmm2\r
3440 movups XMMWORD[16+rsi],xmm3\r
3441 xorps xmm8,xmm3\r
3442 movups XMMWORD[32+rsi],xmm4\r
3443 xorps xmm8,xmm4\r
3444\r
3445 jmp NEAR $L$ocb_dec_done\r
3446\r
3447ALIGN 16\r
3448$L$ocb_dec_four:\r
3449 call __ocb_decrypt4\r
3450\r
3451 movdqa xmm15,xmm13\r
3452 movups XMMWORD[rsi],xmm2\r
3453 pxor xmm8,xmm2\r
3454 movups XMMWORD[16+rsi],xmm3\r
3455 pxor xmm8,xmm3\r
3456 movups XMMWORD[32+rsi],xmm4\r
3457 pxor xmm8,xmm4\r
3458 movups XMMWORD[48+rsi],xmm5\r
3459 pxor xmm8,xmm5\r
3460\r
3461$L$ocb_dec_done:\r
3462 pxor xmm15,xmm0\r
3463 movdqu XMMWORD[rbp],xmm8\r
3464 movdqu XMMWORD[r9],xmm15\r
3465\r
3466 xorps xmm0,xmm0\r
3467 pxor xmm1,xmm1\r
3468 pxor xmm2,xmm2\r
3469 pxor xmm3,xmm3\r
3470 pxor xmm4,xmm4\r
3471 pxor xmm5,xmm5\r
3472 movaps xmm6,XMMWORD[rsp]\r
3473 movaps XMMWORD[rsp],xmm0\r
3474 movaps xmm7,XMMWORD[16+rsp]\r
3475 movaps XMMWORD[16+rsp],xmm0\r
3476 movaps xmm8,XMMWORD[32+rsp]\r
3477 movaps XMMWORD[32+rsp],xmm0\r
3478 movaps xmm9,XMMWORD[48+rsp]\r
3479 movaps XMMWORD[48+rsp],xmm0\r
3480 movaps xmm10,XMMWORD[64+rsp]\r
3481 movaps XMMWORD[64+rsp],xmm0\r
3482 movaps xmm11,XMMWORD[80+rsp]\r
3483 movaps XMMWORD[80+rsp],xmm0\r
3484 movaps xmm12,XMMWORD[96+rsp]\r
3485 movaps XMMWORD[96+rsp],xmm0\r
3486 movaps xmm13,XMMWORD[112+rsp]\r
3487 movaps XMMWORD[112+rsp],xmm0\r
3488 movaps xmm14,XMMWORD[128+rsp]\r
3489 movaps XMMWORD[128+rsp],xmm0\r
3490 movaps xmm15,XMMWORD[144+rsp]\r
3491 movaps XMMWORD[144+rsp],xmm0\r
3492 lea rax,[((160+40))+rsp]\r
3493$L$ocb_dec_pop:\r
3494 mov r14,QWORD[((-40))+rax]\r
3495\r
3496 mov r13,QWORD[((-32))+rax]\r
3497\r
3498 mov r12,QWORD[((-24))+rax]\r
3499\r
3500 mov rbp,QWORD[((-16))+rax]\r
3501\r
3502 mov rbx,QWORD[((-8))+rax]\r
3503\r
3504 lea rsp,[rax]\r
3505\r
3506$L$ocb_dec_epilogue:\r
3507 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
3508 mov rsi,QWORD[16+rsp]\r
3509 DB 0F3h,0C3h ;repret\r
3510\r
3511$L$SEH_end_aesni_ocb_decrypt:\r
3512\r
3513\r
3514ALIGN 32\r
3515__ocb_decrypt6:\r
3516\r
3517 pxor xmm15,xmm9\r
3518 movdqu xmm11,XMMWORD[r12*1+rbx]\r
3519 movdqa xmm12,xmm10\r
3520 movdqu xmm13,XMMWORD[r13*1+rbx]\r
3521 movdqa xmm14,xmm10\r
3522 pxor xmm10,xmm15\r
3523 movdqu xmm15,XMMWORD[r14*1+rbx]\r
3524 pxor xmm11,xmm10\r
3525 pxor xmm2,xmm10\r
3526 pxor xmm12,xmm11\r
3527 pxor xmm3,xmm11\r
3528 pxor xmm13,xmm12\r
3529 pxor xmm4,xmm12\r
3530 pxor xmm14,xmm13\r
3531 pxor xmm5,xmm13\r
3532 pxor xmm15,xmm14\r
3533 pxor xmm6,xmm14\r
3534 pxor xmm7,xmm15\r
3535 movups xmm0,XMMWORD[32+r11]\r
3536\r
3537 lea r12,[1+r8]\r
3538 lea r13,[3+r8]\r
3539 lea r14,[5+r8]\r
3540 add r8,6\r
3541 pxor xmm10,xmm9\r
3542 bsf r12,r12\r
3543 bsf r13,r13\r
3544 bsf r14,r14\r
3545\r
3546DB 102,15,56,222,209\r
3547DB 102,15,56,222,217\r
3548DB 102,15,56,222,225\r
3549DB 102,15,56,222,233\r
3550 pxor xmm11,xmm9\r
3551 pxor xmm12,xmm9\r
3552DB 102,15,56,222,241\r
3553 pxor xmm13,xmm9\r
3554 pxor xmm14,xmm9\r
3555DB 102,15,56,222,249\r
3556 movups xmm1,XMMWORD[48+r11]\r
3557 pxor xmm15,xmm9\r
3558\r
3559DB 102,15,56,222,208\r
3560DB 102,15,56,222,216\r
3561DB 102,15,56,222,224\r
3562DB 102,15,56,222,232\r
3563DB 102,15,56,222,240\r
3564DB 102,15,56,222,248\r
3565 movups xmm0,XMMWORD[64+r11]\r
3566 shl r12,4\r
3567 shl r13,4\r
3568 jmp NEAR $L$ocb_dec_loop6\r
3569\r
3570ALIGN 32\r
3571$L$ocb_dec_loop6:\r
3572DB 102,15,56,222,209\r
3573DB 102,15,56,222,217\r
3574DB 102,15,56,222,225\r
3575DB 102,15,56,222,233\r
3576DB 102,15,56,222,241\r
3577DB 102,15,56,222,249\r
3578 movups xmm1,XMMWORD[rax*1+rcx]\r
3579 add rax,32\r
3580\r
3581DB 102,15,56,222,208\r
3582DB 102,15,56,222,216\r
3583DB 102,15,56,222,224\r
3584DB 102,15,56,222,232\r
3585DB 102,15,56,222,240\r
3586DB 102,15,56,222,248\r
3587 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3588 jnz NEAR $L$ocb_dec_loop6\r
3589\r
3590DB 102,15,56,222,209\r
3591DB 102,15,56,222,217\r
3592DB 102,15,56,222,225\r
3593DB 102,15,56,222,233\r
3594DB 102,15,56,222,241\r
3595DB 102,15,56,222,249\r
3596 movups xmm1,XMMWORD[16+r11]\r
3597 shl r14,4\r
3598\r
3599DB 102,65,15,56,223,210\r
3600 movdqu xmm10,XMMWORD[rbx]\r
3601 mov rax,r10\r
3602DB 102,65,15,56,223,219\r
3603DB 102,65,15,56,223,228\r
3604DB 102,65,15,56,223,237\r
3605DB 102,65,15,56,223,246\r
3606DB 102,65,15,56,223,255\r
3607 DB 0F3h,0C3h ;repret\r
3608\r
3609\r
3610\r
3611\r
3612ALIGN 32\r
3613__ocb_decrypt4:\r
3614\r
3615 pxor xmm15,xmm9\r
3616 movdqu xmm11,XMMWORD[r12*1+rbx]\r
3617 movdqa xmm12,xmm10\r
3618 movdqu xmm13,XMMWORD[r13*1+rbx]\r
3619 pxor xmm10,xmm15\r
3620 pxor xmm11,xmm10\r
3621 pxor xmm2,xmm10\r
3622 pxor xmm12,xmm11\r
3623 pxor xmm3,xmm11\r
3624 pxor xmm13,xmm12\r
3625 pxor xmm4,xmm12\r
3626 pxor xmm5,xmm13\r
3627 movups xmm0,XMMWORD[32+r11]\r
3628\r
3629 pxor xmm10,xmm9\r
3630 pxor xmm11,xmm9\r
3631 pxor xmm12,xmm9\r
3632 pxor xmm13,xmm9\r
3633\r
3634DB 102,15,56,222,209\r
3635DB 102,15,56,222,217\r
3636DB 102,15,56,222,225\r
3637DB 102,15,56,222,233\r
3638 movups xmm1,XMMWORD[48+r11]\r
3639\r
3640DB 102,15,56,222,208\r
3641DB 102,15,56,222,216\r
3642DB 102,15,56,222,224\r
3643DB 102,15,56,222,232\r
3644 movups xmm0,XMMWORD[64+r11]\r
3645 jmp NEAR $L$ocb_dec_loop4\r
3646\r
3647ALIGN 32\r
3648$L$ocb_dec_loop4:\r
3649DB 102,15,56,222,209\r
3650DB 102,15,56,222,217\r
3651DB 102,15,56,222,225\r
3652DB 102,15,56,222,233\r
3653 movups xmm1,XMMWORD[rax*1+rcx]\r
3654 add rax,32\r
3655\r
3656DB 102,15,56,222,208\r
3657DB 102,15,56,222,216\r
3658DB 102,15,56,222,224\r
3659DB 102,15,56,222,232\r
3660 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3661 jnz NEAR $L$ocb_dec_loop4\r
3662\r
3663DB 102,15,56,222,209\r
3664DB 102,15,56,222,217\r
3665DB 102,15,56,222,225\r
3666DB 102,15,56,222,233\r
3667 movups xmm1,XMMWORD[16+r11]\r
3668 mov rax,r10\r
3669\r
3670DB 102,65,15,56,223,210\r
3671DB 102,65,15,56,223,219\r
3672DB 102,65,15,56,223,228\r
3673DB 102,65,15,56,223,237\r
3674 DB 0F3h,0C3h ;repret\r
3675\r
3676\r
3677\r
3678\r
3679ALIGN 32\r
3680__ocb_decrypt1:\r
3681\r
3682 pxor xmm7,xmm15\r
3683 pxor xmm7,xmm9\r
3684 pxor xmm2,xmm7\r
3685 movups xmm0,XMMWORD[32+r11]\r
3686\r
3687DB 102,15,56,222,209\r
3688 movups xmm1,XMMWORD[48+r11]\r
3689 pxor xmm7,xmm9\r
3690\r
3691DB 102,15,56,222,208\r
3692 movups xmm0,XMMWORD[64+r11]\r
3693 jmp NEAR $L$ocb_dec_loop1\r
3694\r
3695ALIGN 32\r
3696$L$ocb_dec_loop1:\r
3697DB 102,15,56,222,209\r
3698 movups xmm1,XMMWORD[rax*1+rcx]\r
3699 add rax,32\r
3700\r
3701DB 102,15,56,222,208\r
3702 movups xmm0,XMMWORD[((-16))+rax*1+rcx]\r
3703 jnz NEAR $L$ocb_dec_loop1\r
3704\r
3705DB 102,15,56,222,209\r
3706 movups xmm1,XMMWORD[16+r11]\r
3707 mov rax,r10\r
3708\r
3709DB 102,15,56,223,215\r
3710 DB 0F3h,0C3h ;repret\r
3711\r
3712\r
3713global aesni_cbc_encrypt\r
3714\r
3715ALIGN 16\r
3716aesni_cbc_encrypt:\r
3717 mov QWORD[8+rsp],rdi ;WIN64 prologue\r
3718 mov QWORD[16+rsp],rsi\r
3719 mov rax,rsp\r
3720$L$SEH_begin_aesni_cbc_encrypt:\r
3721 mov rdi,rcx\r
3722 mov rsi,rdx\r
3723 mov rdx,r8\r
3724 mov rcx,r9\r
3725 mov r8,QWORD[40+rsp]\r
3726 mov r9,QWORD[48+rsp]\r
3727\r
3728\r
3729\r
3730 test rdx,rdx\r
3731 jz NEAR $L$cbc_ret\r
3732\r
3733 mov r10d,DWORD[240+rcx]\r
3734 mov r11,rcx\r
3735 test r9d,r9d\r
3736 jz NEAR $L$cbc_decrypt\r
3737\r
3738 movups xmm2,XMMWORD[r8]\r
3739 mov eax,r10d\r
3740 cmp rdx,16\r
3741 jb NEAR $L$cbc_enc_tail\r
3742 sub rdx,16\r
3743 jmp NEAR $L$cbc_enc_loop\r
3744ALIGN 16\r
3745$L$cbc_enc_loop:\r
3746 movups xmm3,XMMWORD[rdi]\r
3747 lea rdi,[16+rdi]\r
3748\r
3749 movups xmm0,XMMWORD[rcx]\r
3750 movups xmm1,XMMWORD[16+rcx]\r
3751 xorps xmm3,xmm0\r
3752 lea rcx,[32+rcx]\r
3753 xorps xmm2,xmm3\r
3754$L$oop_enc1_15:\r
3755DB 102,15,56,220,209\r
3756 dec eax\r
3757 movups xmm1,XMMWORD[rcx]\r
3758 lea rcx,[16+rcx]\r
3759 jnz NEAR $L$oop_enc1_15\r
3760DB 102,15,56,221,209\r
3761 mov eax,r10d\r
3762 mov rcx,r11\r
3763 movups XMMWORD[rsi],xmm2\r
3764 lea rsi,[16+rsi]\r
3765 sub rdx,16\r
3766 jnc NEAR $L$cbc_enc_loop\r
3767 add rdx,16\r
3768 jnz NEAR $L$cbc_enc_tail\r
3769 pxor xmm0,xmm0\r
3770 pxor xmm1,xmm1\r
3771 movups XMMWORD[r8],xmm2\r
3772 pxor xmm2,xmm2\r
3773 pxor xmm3,xmm3\r
3774 jmp NEAR $L$cbc_ret\r
3775\r
3776$L$cbc_enc_tail:\r
3777 mov rcx,rdx\r
3778 xchg rsi,rdi\r
3779 DD 0x9066A4F3\r
3780 mov ecx,16\r
3781 sub rcx,rdx\r
3782 xor eax,eax\r
3783 DD 0x9066AAF3\r
3784 lea rdi,[((-16))+rdi]\r
3785 mov eax,r10d\r
3786 mov rsi,rdi\r
3787 mov rcx,r11\r
3788 xor rdx,rdx\r
3789 jmp NEAR $L$cbc_enc_loop\r
3790\r
3791ALIGN 16\r
3792$L$cbc_decrypt:\r
3793 cmp rdx,16\r
3794 jne NEAR $L$cbc_decrypt_bulk\r
3795\r
3796\r
3797\r
3798 movdqu xmm2,XMMWORD[rdi]\r
3799 movdqu xmm3,XMMWORD[r8]\r
3800 movdqa xmm4,xmm2\r
3801 movups xmm0,XMMWORD[rcx]\r
3802 movups xmm1,XMMWORD[16+rcx]\r
3803 lea rcx,[32+rcx]\r
3804 xorps xmm2,xmm0\r
3805$L$oop_dec1_16:\r
3806DB 102,15,56,222,209\r
3807 dec r10d\r
3808 movups xmm1,XMMWORD[rcx]\r
3809 lea rcx,[16+rcx]\r
3810 jnz NEAR $L$oop_dec1_16\r
3811DB 102,15,56,223,209\r
3812 pxor xmm0,xmm0\r
3813 pxor xmm1,xmm1\r
3814 movdqu XMMWORD[r8],xmm4\r
3815 xorps xmm2,xmm3\r
3816 pxor xmm3,xmm3\r
3817 movups XMMWORD[rsi],xmm2\r
3818 pxor xmm2,xmm2\r
3819 jmp NEAR $L$cbc_ret\r
3820ALIGN 16\r
3821$L$cbc_decrypt_bulk:\r
3822 lea r11,[rsp]\r
3823\r
3824 push rbp\r
3825\r
3826 sub rsp,176\r
3827 and rsp,-16\r
3828 movaps XMMWORD[16+rsp],xmm6\r
3829 movaps XMMWORD[32+rsp],xmm7\r
3830 movaps XMMWORD[48+rsp],xmm8\r
3831 movaps XMMWORD[64+rsp],xmm9\r
3832 movaps XMMWORD[80+rsp],xmm10\r
3833 movaps XMMWORD[96+rsp],xmm11\r
3834 movaps XMMWORD[112+rsp],xmm12\r
3835 movaps XMMWORD[128+rsp],xmm13\r
3836 movaps XMMWORD[144+rsp],xmm14\r
3837 movaps XMMWORD[160+rsp],xmm15\r
3838$L$cbc_decrypt_body:\r
3839 mov rbp,rcx\r
3840 movups xmm10,XMMWORD[r8]\r
3841 mov eax,r10d\r
3842 cmp rdx,0x50\r
3843 jbe NEAR $L$cbc_dec_tail\r
3844\r
3845 movups xmm0,XMMWORD[rcx]\r
3846 movdqu xmm2,XMMWORD[rdi]\r
3847 movdqu xmm3,XMMWORD[16+rdi]\r
3848 movdqa xmm11,xmm2\r
3849 movdqu xmm4,XMMWORD[32+rdi]\r
3850 movdqa xmm12,xmm3\r
3851 movdqu xmm5,XMMWORD[48+rdi]\r
3852 movdqa xmm13,xmm4\r
3853 movdqu xmm6,XMMWORD[64+rdi]\r
3854 movdqa xmm14,xmm5\r
3855 movdqu xmm7,XMMWORD[80+rdi]\r
3856 movdqa xmm15,xmm6\r
3857 mov r9d,DWORD[((OPENSSL_ia32cap_P+4))]\r
3858 cmp rdx,0x70\r
3859 jbe NEAR $L$cbc_dec_six_or_seven\r
3860\r
3861 and r9d,71303168\r
3862 sub rdx,0x50\r
3863 cmp r9d,4194304\r
3864 je NEAR $L$cbc_dec_loop6_enter\r
3865 sub rdx,0x20\r
3866 lea rcx,[112+rcx]\r
3867 jmp NEAR $L$cbc_dec_loop8_enter\r
3868ALIGN 16\r
3869$L$cbc_dec_loop8:\r
3870 movups XMMWORD[rsi],xmm9\r
3871 lea rsi,[16+rsi]\r
3872$L$cbc_dec_loop8_enter:\r
3873 movdqu xmm8,XMMWORD[96+rdi]\r
3874 pxor xmm2,xmm0\r
3875 movdqu xmm9,XMMWORD[112+rdi]\r
3876 pxor xmm3,xmm0\r
3877 movups xmm1,XMMWORD[((16-112))+rcx]\r
3878 pxor xmm4,xmm0\r
3879 mov rbp,-1\r
3880 cmp rdx,0x70\r
3881 pxor xmm5,xmm0\r
3882 pxor xmm6,xmm0\r
3883 pxor xmm7,xmm0\r
3884 pxor xmm8,xmm0\r
3885\r
3886DB 102,15,56,222,209\r
3887 pxor xmm9,xmm0\r
3888 movups xmm0,XMMWORD[((32-112))+rcx]\r
3889DB 102,15,56,222,217\r
3890DB 102,15,56,222,225\r
3891DB 102,15,56,222,233\r
3892DB 102,15,56,222,241\r
3893DB 102,15,56,222,249\r
3894DB 102,68,15,56,222,193\r
3895 adc rbp,0\r
3896 and rbp,128\r
3897DB 102,68,15,56,222,201\r
3898 add rbp,rdi\r
3899 movups xmm1,XMMWORD[((48-112))+rcx]\r
3900DB 102,15,56,222,208\r
3901DB 102,15,56,222,216\r
3902DB 102,15,56,222,224\r
3903DB 102,15,56,222,232\r
3904DB 102,15,56,222,240\r
3905DB 102,15,56,222,248\r
3906DB 102,68,15,56,222,192\r
3907DB 102,68,15,56,222,200\r
3908 movups xmm0,XMMWORD[((64-112))+rcx]\r
3909 nop\r
3910DB 102,15,56,222,209\r
3911DB 102,15,56,222,217\r
3912DB 102,15,56,222,225\r
3913DB 102,15,56,222,233\r
3914DB 102,15,56,222,241\r
3915DB 102,15,56,222,249\r
3916DB 102,68,15,56,222,193\r
3917DB 102,68,15,56,222,201\r
3918 movups xmm1,XMMWORD[((80-112))+rcx]\r
3919 nop\r
3920DB 102,15,56,222,208\r
3921DB 102,15,56,222,216\r
3922DB 102,15,56,222,224\r
3923DB 102,15,56,222,232\r
3924DB 102,15,56,222,240\r
3925DB 102,15,56,222,248\r
3926DB 102,68,15,56,222,192\r
3927DB 102,68,15,56,222,200\r
3928 movups xmm0,XMMWORD[((96-112))+rcx]\r
3929 nop\r
3930DB 102,15,56,222,209\r
3931DB 102,15,56,222,217\r
3932DB 102,15,56,222,225\r
3933DB 102,15,56,222,233\r
3934DB 102,15,56,222,241\r
3935DB 102,15,56,222,249\r
3936DB 102,68,15,56,222,193\r
3937DB 102,68,15,56,222,201\r
3938 movups xmm1,XMMWORD[((112-112))+rcx]\r
3939 nop\r
3940DB 102,15,56,222,208\r
3941DB 102,15,56,222,216\r
3942DB 102,15,56,222,224\r
3943DB 102,15,56,222,232\r
3944DB 102,15,56,222,240\r
3945DB 102,15,56,222,248\r
3946DB 102,68,15,56,222,192\r
3947DB 102,68,15,56,222,200\r
3948 movups xmm0,XMMWORD[((128-112))+rcx]\r
3949 nop\r
3950DB 102,15,56,222,209\r
3951DB 102,15,56,222,217\r
3952DB 102,15,56,222,225\r
3953DB 102,15,56,222,233\r
3954DB 102,15,56,222,241\r
3955DB 102,15,56,222,249\r
3956DB 102,68,15,56,222,193\r
3957DB 102,68,15,56,222,201\r
3958 movups xmm1,XMMWORD[((144-112))+rcx]\r
3959 cmp eax,11\r
3960DB 102,15,56,222,208\r
3961DB 102,15,56,222,216\r
3962DB 102,15,56,222,224\r
3963DB 102,15,56,222,232\r
3964DB 102,15,56,222,240\r
3965DB 102,15,56,222,248\r
3966DB 102,68,15,56,222,192\r
3967DB 102,68,15,56,222,200\r
3968 movups xmm0,XMMWORD[((160-112))+rcx]\r
3969 jb NEAR $L$cbc_dec_done\r
3970DB 102,15,56,222,209\r
3971DB 102,15,56,222,217\r
3972DB 102,15,56,222,225\r
3973DB 102,15,56,222,233\r
3974DB 102,15,56,222,241\r
3975DB 102,15,56,222,249\r
3976DB 102,68,15,56,222,193\r
3977DB 102,68,15,56,222,201\r
3978 movups xmm1,XMMWORD[((176-112))+rcx]\r
3979 nop\r
3980DB 102,15,56,222,208\r
3981DB 102,15,56,222,216\r
3982DB 102,15,56,222,224\r
3983DB 102,15,56,222,232\r
3984DB 102,15,56,222,240\r
3985DB 102,15,56,222,248\r
3986DB 102,68,15,56,222,192\r
3987DB 102,68,15,56,222,200\r
3988 movups xmm0,XMMWORD[((192-112))+rcx]\r
3989 je NEAR $L$cbc_dec_done\r
3990DB 102,15,56,222,209\r
3991DB 102,15,56,222,217\r
3992DB 102,15,56,222,225\r
3993DB 102,15,56,222,233\r
3994DB 102,15,56,222,241\r
3995DB 102,15,56,222,249\r
3996DB 102,68,15,56,222,193\r
3997DB 102,68,15,56,222,201\r
3998 movups xmm1,XMMWORD[((208-112))+rcx]\r
3999 nop\r
4000DB 102,15,56,222,208\r
4001DB 102,15,56,222,216\r
4002DB 102,15,56,222,224\r
4003DB 102,15,56,222,232\r
4004DB 102,15,56,222,240\r
4005DB 102,15,56,222,248\r
4006DB 102,68,15,56,222,192\r
4007DB 102,68,15,56,222,200\r
4008 movups xmm0,XMMWORD[((224-112))+rcx]\r
4009 jmp NEAR $L$cbc_dec_done\r
4010ALIGN 16\r
4011$L$cbc_dec_done:\r
4012DB 102,15,56,222,209\r
4013DB 102,15,56,222,217\r
4014 pxor xmm10,xmm0\r
4015 pxor xmm11,xmm0\r
4016DB 102,15,56,222,225\r
4017DB 102,15,56,222,233\r
4018 pxor xmm12,xmm0\r
4019 pxor xmm13,xmm0\r
4020DB 102,15,56,222,241\r
4021DB 102,15,56,222,249\r
4022 pxor xmm14,xmm0\r
4023 pxor xmm15,xmm0\r
4024DB 102,68,15,56,222,193\r
4025DB 102,68,15,56,222,201\r
4026 movdqu xmm1,XMMWORD[80+rdi]\r
4027\r
4028DB 102,65,15,56,223,210\r
4029 movdqu xmm10,XMMWORD[96+rdi]\r
4030 pxor xmm1,xmm0\r
4031DB 102,65,15,56,223,219\r
4032 pxor xmm10,xmm0\r
4033 movdqu xmm0,XMMWORD[112+rdi]\r
4034DB 102,65,15,56,223,228\r
4035 lea rdi,[128+rdi]\r
4036 movdqu xmm11,XMMWORD[rbp]\r
4037DB 102,65,15,56,223,237\r
4038DB 102,65,15,56,223,246\r
4039 movdqu xmm12,XMMWORD[16+rbp]\r
4040 movdqu xmm13,XMMWORD[32+rbp]\r
4041DB 102,65,15,56,223,255\r
4042DB 102,68,15,56,223,193\r
4043 movdqu xmm14,XMMWORD[48+rbp]\r
4044 movdqu xmm15,XMMWORD[64+rbp]\r
4045DB 102,69,15,56,223,202\r
4046 movdqa xmm10,xmm0\r
4047 movdqu xmm1,XMMWORD[80+rbp]\r
4048 movups xmm0,XMMWORD[((-112))+rcx]\r
4049\r
4050 movups XMMWORD[rsi],xmm2\r
4051 movdqa xmm2,xmm11\r
4052 movups XMMWORD[16+rsi],xmm3\r
4053 movdqa xmm3,xmm12\r
4054 movups XMMWORD[32+rsi],xmm4\r
4055 movdqa xmm4,xmm13\r
4056 movups XMMWORD[48+rsi],xmm5\r
4057 movdqa xmm5,xmm14\r
4058 movups XMMWORD[64+rsi],xmm6\r
4059 movdqa xmm6,xmm15\r
4060 movups XMMWORD[80+rsi],xmm7\r
4061 movdqa xmm7,xmm1\r
4062 movups XMMWORD[96+rsi],xmm8\r
4063 lea rsi,[112+rsi]\r
4064\r
4065 sub rdx,0x80\r
4066 ja NEAR $L$cbc_dec_loop8\r
4067\r
4068 movaps xmm2,xmm9\r
4069 lea rcx,[((-112))+rcx]\r
4070 add rdx,0x70\r
4071 jle NEAR $L$cbc_dec_clear_tail_collected\r
4072 movups XMMWORD[rsi],xmm9\r
4073 lea rsi,[16+rsi]\r
4074 cmp rdx,0x50\r
4075 jbe NEAR $L$cbc_dec_tail\r
4076\r
4077 movaps xmm2,xmm11\r
4078$L$cbc_dec_six_or_seven:\r
4079 cmp rdx,0x60\r
4080 ja NEAR $L$cbc_dec_seven\r
4081\r
4082 movaps xmm8,xmm7\r
4083 call _aesni_decrypt6\r
4084 pxor xmm2,xmm10\r
4085 movaps xmm10,xmm8\r
4086 pxor xmm3,xmm11\r
4087 movdqu XMMWORD[rsi],xmm2\r
4088 pxor xmm4,xmm12\r
4089 movdqu XMMWORD[16+rsi],xmm3\r
4090 pxor xmm3,xmm3\r
4091 pxor xmm5,xmm13\r
4092 movdqu XMMWORD[32+rsi],xmm4\r
4093 pxor xmm4,xmm4\r
4094 pxor xmm6,xmm14\r
4095 movdqu XMMWORD[48+rsi],xmm5\r
4096 pxor xmm5,xmm5\r
4097 pxor xmm7,xmm15\r
4098 movdqu XMMWORD[64+rsi],xmm6\r
4099 pxor xmm6,xmm6\r
4100 lea rsi,[80+rsi]\r
4101 movdqa xmm2,xmm7\r
4102 pxor xmm7,xmm7\r
4103 jmp NEAR $L$cbc_dec_tail_collected\r
4104\r
4105ALIGN 16\r
4106$L$cbc_dec_seven:\r
4107 movups xmm8,XMMWORD[96+rdi]\r
4108 xorps xmm9,xmm9\r
4109 call _aesni_decrypt8\r
4110 movups xmm9,XMMWORD[80+rdi]\r
4111 pxor xmm2,xmm10\r
4112 movups xmm10,XMMWORD[96+rdi]\r
4113 pxor xmm3,xmm11\r
4114 movdqu XMMWORD[rsi],xmm2\r
4115 pxor xmm4,xmm12\r
4116 movdqu XMMWORD[16+rsi],xmm3\r
4117 pxor xmm3,xmm3\r
4118 pxor xmm5,xmm13\r
4119 movdqu XMMWORD[32+rsi],xmm4\r
4120 pxor xmm4,xmm4\r
4121 pxor xmm6,xmm14\r
4122 movdqu XMMWORD[48+rsi],xmm5\r
4123 pxor xmm5,xmm5\r
4124 pxor xmm7,xmm15\r
4125 movdqu XMMWORD[64+rsi],xmm6\r
4126 pxor xmm6,xmm6\r
4127 pxor xmm8,xmm9\r
4128 movdqu XMMWORD[80+rsi],xmm7\r
4129 pxor xmm7,xmm7\r
4130 lea rsi,[96+rsi]\r
4131 movdqa xmm2,xmm8\r
4132 pxor xmm8,xmm8\r
4133 pxor xmm9,xmm9\r
4134 jmp NEAR $L$cbc_dec_tail_collected\r
4135\r
4136ALIGN 16\r
4137$L$cbc_dec_loop6:\r
4138 movups XMMWORD[rsi],xmm7\r
4139 lea rsi,[16+rsi]\r
4140 movdqu xmm2,XMMWORD[rdi]\r
4141 movdqu xmm3,XMMWORD[16+rdi]\r
4142 movdqa xmm11,xmm2\r
4143 movdqu xmm4,XMMWORD[32+rdi]\r
4144 movdqa xmm12,xmm3\r
4145 movdqu xmm5,XMMWORD[48+rdi]\r
4146 movdqa xmm13,xmm4\r
4147 movdqu xmm6,XMMWORD[64+rdi]\r
4148 movdqa xmm14,xmm5\r
4149 movdqu xmm7,XMMWORD[80+rdi]\r
4150 movdqa xmm15,xmm6\r
4151$L$cbc_dec_loop6_enter:\r
4152 lea rdi,[96+rdi]\r
4153 movdqa xmm8,xmm7\r
4154\r
4155 call _aesni_decrypt6\r
4156\r
4157 pxor xmm2,xmm10\r
4158 movdqa xmm10,xmm8\r
4159 pxor xmm3,xmm11\r
4160 movdqu XMMWORD[rsi],xmm2\r
4161 pxor xmm4,xmm12\r
4162 movdqu XMMWORD[16+rsi],xmm3\r
4163 pxor xmm5,xmm13\r
4164 movdqu XMMWORD[32+rsi],xmm4\r
4165 pxor xmm6,xmm14\r
4166 mov rcx,rbp\r
4167 movdqu XMMWORD[48+rsi],xmm5\r
4168 pxor xmm7,xmm15\r
4169 mov eax,r10d\r
4170 movdqu XMMWORD[64+rsi],xmm6\r
4171 lea rsi,[80+rsi]\r
4172 sub rdx,0x60\r
4173 ja NEAR $L$cbc_dec_loop6\r
4174\r
4175 movdqa xmm2,xmm7\r
4176 add rdx,0x50\r
4177 jle NEAR $L$cbc_dec_clear_tail_collected\r
4178 movups XMMWORD[rsi],xmm7\r
4179 lea rsi,[16+rsi]\r
4180\r
4181$L$cbc_dec_tail:\r
4182 movups xmm2,XMMWORD[rdi]\r
4183 sub rdx,0x10\r
4184 jbe NEAR $L$cbc_dec_one\r
4185\r
4186 movups xmm3,XMMWORD[16+rdi]\r
4187 movaps xmm11,xmm2\r
4188 sub rdx,0x10\r
4189 jbe NEAR $L$cbc_dec_two\r
4190\r
4191 movups xmm4,XMMWORD[32+rdi]\r
4192 movaps xmm12,xmm3\r
4193 sub rdx,0x10\r
4194 jbe NEAR $L$cbc_dec_three\r
4195\r
4196 movups xmm5,XMMWORD[48+rdi]\r
4197 movaps xmm13,xmm4\r
4198 sub rdx,0x10\r
4199 jbe NEAR $L$cbc_dec_four\r
4200\r
4201 movups xmm6,XMMWORD[64+rdi]\r
4202 movaps xmm14,xmm5\r
4203 movaps xmm15,xmm6\r
4204 xorps xmm7,xmm7\r
4205 call _aesni_decrypt6\r
4206 pxor xmm2,xmm10\r
4207 movaps xmm10,xmm15\r
4208 pxor xmm3,xmm11\r
4209 movdqu XMMWORD[rsi],xmm2\r
4210 pxor xmm4,xmm12\r
4211 movdqu XMMWORD[16+rsi],xmm3\r
4212 pxor xmm3,xmm3\r
4213 pxor xmm5,xmm13\r
4214 movdqu XMMWORD[32+rsi],xmm4\r
4215 pxor xmm4,xmm4\r
4216 pxor xmm6,xmm14\r
4217 movdqu XMMWORD[48+rsi],xmm5\r
4218 pxor xmm5,xmm5\r
4219 lea rsi,[64+rsi]\r
4220 movdqa xmm2,xmm6\r
4221 pxor xmm6,xmm6\r
4222 pxor xmm7,xmm7\r
4223 sub rdx,0x10\r
4224 jmp NEAR $L$cbc_dec_tail_collected\r
4225\r
4226ALIGN 16\r
4227$L$cbc_dec_one:\r
4228 movaps xmm11,xmm2\r
4229 movups xmm0,XMMWORD[rcx]\r
4230 movups xmm1,XMMWORD[16+rcx]\r
4231 lea rcx,[32+rcx]\r
4232 xorps xmm2,xmm0\r
4233$L$oop_dec1_17:\r
4234DB 102,15,56,222,209\r
4235 dec eax\r
4236 movups xmm1,XMMWORD[rcx]\r
4237 lea rcx,[16+rcx]\r
4238 jnz NEAR $L$oop_dec1_17\r
4239DB 102,15,56,223,209\r
4240 xorps xmm2,xmm10\r
4241 movaps xmm10,xmm11\r
4242 jmp NEAR $L$cbc_dec_tail_collected\r
4243ALIGN 16\r
4244$L$cbc_dec_two:\r
4245 movaps xmm12,xmm3\r
4246 call _aesni_decrypt2\r
4247 pxor xmm2,xmm10\r
4248 movaps xmm10,xmm12\r
4249 pxor xmm3,xmm11\r
4250 movdqu XMMWORD[rsi],xmm2\r
4251 movdqa xmm2,xmm3\r
4252 pxor xmm3,xmm3\r
4253 lea rsi,[16+rsi]\r
4254 jmp NEAR $L$cbc_dec_tail_collected\r
4255ALIGN 16\r
4256$L$cbc_dec_three:\r
4257 movaps xmm13,xmm4\r
4258 call _aesni_decrypt3\r
4259 pxor xmm2,xmm10\r
4260 movaps xmm10,xmm13\r
4261 pxor xmm3,xmm11\r
4262 movdqu XMMWORD[rsi],xmm2\r
4263 pxor xmm4,xmm12\r
4264 movdqu XMMWORD[16+rsi],xmm3\r
4265 pxor xmm3,xmm3\r
4266 movdqa xmm2,xmm4\r
4267 pxor xmm4,xmm4\r
4268 lea rsi,[32+rsi]\r
4269 jmp NEAR $L$cbc_dec_tail_collected\r
4270ALIGN 16\r
4271$L$cbc_dec_four:\r
4272 movaps xmm14,xmm5\r
4273 call _aesni_decrypt4\r
4274 pxor xmm2,xmm10\r
4275 movaps xmm10,xmm14\r
4276 pxor xmm3,xmm11\r
4277 movdqu XMMWORD[rsi],xmm2\r
4278 pxor xmm4,xmm12\r
4279 movdqu XMMWORD[16+rsi],xmm3\r
4280 pxor xmm3,xmm3\r
4281 pxor xmm5,xmm13\r
4282 movdqu XMMWORD[32+rsi],xmm4\r
4283 pxor xmm4,xmm4\r
4284 movdqa xmm2,xmm5\r
4285 pxor xmm5,xmm5\r
4286 lea rsi,[48+rsi]\r
4287 jmp NEAR $L$cbc_dec_tail_collected\r
4288\r
4289ALIGN 16\r
4290$L$cbc_dec_clear_tail_collected:\r
4291 pxor xmm3,xmm3\r
4292 pxor xmm4,xmm4\r
4293 pxor xmm5,xmm5\r
4294$L$cbc_dec_tail_collected:\r
4295 movups XMMWORD[r8],xmm10\r
4296 and rdx,15\r
4297 jnz NEAR $L$cbc_dec_tail_partial\r
4298 movups XMMWORD[rsi],xmm2\r
4299 pxor xmm2,xmm2\r
4300 jmp NEAR $L$cbc_dec_ret\r
4301ALIGN 16\r
4302$L$cbc_dec_tail_partial:\r
4303 movaps XMMWORD[rsp],xmm2\r
4304 pxor xmm2,xmm2\r
4305 mov rcx,16\r
4306 mov rdi,rsi\r
4307 sub rcx,rdx\r
4308 lea rsi,[rsp]\r
4309 DD 0x9066A4F3\r
4310 movdqa XMMWORD[rsp],xmm2\r
4311\r
4312$L$cbc_dec_ret:\r
4313 xorps xmm0,xmm0\r
4314 pxor xmm1,xmm1\r
4315 movaps xmm6,XMMWORD[16+rsp]\r
4316 movaps XMMWORD[16+rsp],xmm0\r
4317 movaps xmm7,XMMWORD[32+rsp]\r
4318 movaps XMMWORD[32+rsp],xmm0\r
4319 movaps xmm8,XMMWORD[48+rsp]\r
4320 movaps XMMWORD[48+rsp],xmm0\r
4321 movaps xmm9,XMMWORD[64+rsp]\r
4322 movaps XMMWORD[64+rsp],xmm0\r
4323 movaps xmm10,XMMWORD[80+rsp]\r
4324 movaps XMMWORD[80+rsp],xmm0\r
4325 movaps xmm11,XMMWORD[96+rsp]\r
4326 movaps XMMWORD[96+rsp],xmm0\r
4327 movaps xmm12,XMMWORD[112+rsp]\r
4328 movaps XMMWORD[112+rsp],xmm0\r
4329 movaps xmm13,XMMWORD[128+rsp]\r
4330 movaps XMMWORD[128+rsp],xmm0\r
4331 movaps xmm14,XMMWORD[144+rsp]\r
4332 movaps XMMWORD[144+rsp],xmm0\r
4333 movaps xmm15,XMMWORD[160+rsp]\r
4334 movaps XMMWORD[160+rsp],xmm0\r
4335 mov rbp,QWORD[((-8))+r11]\r
4336\r
4337 lea rsp,[r11]\r
4338\r
4339$L$cbc_ret:\r
4340 mov rdi,QWORD[8+rsp] ;WIN64 epilogue\r
4341 mov rsi,QWORD[16+rsp]\r
4342 DB 0F3h,0C3h ;repret\r
4343\r
4344$L$SEH_end_aesni_cbc_encrypt:\r
4345global aesni_set_decrypt_key\r
4346\r
4347ALIGN 16\r
4348aesni_set_decrypt_key:\r
4349\r
4350DB 0x48,0x83,0xEC,0x08\r
4351\r
4352 call __aesni_set_encrypt_key\r
4353 shl edx,4\r
4354 test eax,eax\r
4355 jnz NEAR $L$dec_key_ret\r
4356 lea rcx,[16+rdx*1+r8]\r
4357\r
4358 movups xmm0,XMMWORD[r8]\r
4359 movups xmm1,XMMWORD[rcx]\r
4360 movups XMMWORD[rcx],xmm0\r
4361 movups XMMWORD[r8],xmm1\r
4362 lea r8,[16+r8]\r
4363 lea rcx,[((-16))+rcx]\r
4364\r
4365$L$dec_key_inverse:\r
4366 movups xmm0,XMMWORD[r8]\r
4367 movups xmm1,XMMWORD[rcx]\r
4368DB 102,15,56,219,192\r
4369DB 102,15,56,219,201\r
4370 lea r8,[16+r8]\r
4371 lea rcx,[((-16))+rcx]\r
4372 movups XMMWORD[16+rcx],xmm0\r
4373 movups XMMWORD[(-16)+r8],xmm1\r
4374 cmp rcx,r8\r
4375 ja NEAR $L$dec_key_inverse\r
4376\r
4377 movups xmm0,XMMWORD[r8]\r
4378DB 102,15,56,219,192\r
4379 pxor xmm1,xmm1\r
4380 movups XMMWORD[rcx],xmm0\r
4381 pxor xmm0,xmm0\r
4382$L$dec_key_ret:\r
4383 add rsp,8\r
4384\r
4385 DB 0F3h,0C3h ;repret\r
4386\r
4387$L$SEH_end_set_decrypt_key:\r
4388\r
4389global aesni_set_encrypt_key\r
4390\r
4391ALIGN 16\r
4392aesni_set_encrypt_key:\r
4393__aesni_set_encrypt_key:\r
4394\r
4395DB 0x48,0x83,0xEC,0x08\r
4396\r
4397 mov rax,-1\r
4398 test rcx,rcx\r
4399 jz NEAR $L$enc_key_ret\r
4400 test r8,r8\r
4401 jz NEAR $L$enc_key_ret\r
4402\r
4403 mov r10d,268437504\r
4404 movups xmm0,XMMWORD[rcx]\r
4405 xorps xmm4,xmm4\r
4406 and r10d,DWORD[((OPENSSL_ia32cap_P+4))]\r
4407 lea rax,[16+r8]\r
4408 cmp edx,256\r
4409 je NEAR $L$14rounds\r
4410 cmp edx,192\r
4411 je NEAR $L$12rounds\r
4412 cmp edx,128\r
4413 jne NEAR $L$bad_keybits\r
4414\r
4415$L$10rounds:\r
4416 mov edx,9\r
4417 cmp r10d,268435456\r
4418 je NEAR $L$10rounds_alt\r
4419\r
4420 movups XMMWORD[r8],xmm0\r
4421DB 102,15,58,223,200,1\r
4422 call $L$key_expansion_128_cold\r
4423DB 102,15,58,223,200,2\r
4424 call $L$key_expansion_128\r
4425DB 102,15,58,223,200,4\r
4426 call $L$key_expansion_128\r
4427DB 102,15,58,223,200,8\r
4428 call $L$key_expansion_128\r
4429DB 102,15,58,223,200,16\r
4430 call $L$key_expansion_128\r
4431DB 102,15,58,223,200,32\r
4432 call $L$key_expansion_128\r
4433DB 102,15,58,223,200,64\r
4434 call $L$key_expansion_128\r
4435DB 102,15,58,223,200,128\r
4436 call $L$key_expansion_128\r
4437DB 102,15,58,223,200,27\r
4438 call $L$key_expansion_128\r
4439DB 102,15,58,223,200,54\r
4440 call $L$key_expansion_128\r
4441 movups XMMWORD[rax],xmm0\r
4442 mov DWORD[80+rax],edx\r
4443 xor eax,eax\r
4444 jmp NEAR $L$enc_key_ret\r
4445\r
4446ALIGN 16\r
4447$L$10rounds_alt:\r
4448 movdqa xmm5,XMMWORD[$L$key_rotate]\r
4449 mov r10d,8\r
4450 movdqa xmm4,XMMWORD[$L$key_rcon1]\r
4451 movdqa xmm2,xmm0\r
4452 movdqu XMMWORD[r8],xmm0\r
4453 jmp NEAR $L$oop_key128\r
4454\r
4455ALIGN 16\r
4456$L$oop_key128:\r
4457DB 102,15,56,0,197\r
4458DB 102,15,56,221,196\r
4459 pslld xmm4,1\r
4460 lea rax,[16+rax]\r
4461\r
4462 movdqa xmm3,xmm2\r
4463 pslldq xmm2,4\r
4464 pxor xmm3,xmm2\r
4465 pslldq xmm2,4\r
4466 pxor xmm3,xmm2\r
4467 pslldq xmm2,4\r
4468 pxor xmm2,xmm3\r
4469\r
4470 pxor xmm0,xmm2\r
4471 movdqu XMMWORD[(-16)+rax],xmm0\r
4472 movdqa xmm2,xmm0\r
4473\r
4474 dec r10d\r
4475 jnz NEAR $L$oop_key128\r
4476\r
4477 movdqa xmm4,XMMWORD[$L$key_rcon1b]\r
4478\r
4479DB 102,15,56,0,197\r
4480DB 102,15,56,221,196\r
4481 pslld xmm4,1\r
4482\r
4483 movdqa xmm3,xmm2\r
4484 pslldq xmm2,4\r
4485 pxor xmm3,xmm2\r
4486 pslldq xmm2,4\r
4487 pxor xmm3,xmm2\r
4488 pslldq xmm2,4\r
4489 pxor xmm2,xmm3\r
4490\r
4491 pxor xmm0,xmm2\r
4492 movdqu XMMWORD[rax],xmm0\r
4493\r
4494 movdqa xmm2,xmm0\r
4495DB 102,15,56,0,197\r
4496DB 102,15,56,221,196\r
4497\r
4498 movdqa xmm3,xmm2\r
4499 pslldq xmm2,4\r
4500 pxor xmm3,xmm2\r
4501 pslldq xmm2,4\r
4502 pxor xmm3,xmm2\r
4503 pslldq xmm2,4\r
4504 pxor xmm2,xmm3\r
4505\r
4506 pxor xmm0,xmm2\r
4507 movdqu XMMWORD[16+rax],xmm0\r
4508\r
4509 mov DWORD[96+rax],edx\r
4510 xor eax,eax\r
4511 jmp NEAR $L$enc_key_ret\r
4512\r
4513ALIGN 16\r
4514$L$12rounds:\r
4515 movq xmm2,QWORD[16+rcx]\r
4516 mov edx,11\r
4517 cmp r10d,268435456\r
4518 je NEAR $L$12rounds_alt\r
4519\r
4520 movups XMMWORD[r8],xmm0\r
4521DB 102,15,58,223,202,1\r
4522 call $L$key_expansion_192a_cold\r
4523DB 102,15,58,223,202,2\r
4524 call $L$key_expansion_192b\r
4525DB 102,15,58,223,202,4\r
4526 call $L$key_expansion_192a\r
4527DB 102,15,58,223,202,8\r
4528 call $L$key_expansion_192b\r
4529DB 102,15,58,223,202,16\r
4530 call $L$key_expansion_192a\r
4531DB 102,15,58,223,202,32\r
4532 call $L$key_expansion_192b\r
4533DB 102,15,58,223,202,64\r
4534 call $L$key_expansion_192a\r
4535DB 102,15,58,223,202,128\r
4536 call $L$key_expansion_192b\r
4537 movups XMMWORD[rax],xmm0\r
4538 mov DWORD[48+rax],edx\r
4539 xor rax,rax\r
4540 jmp NEAR $L$enc_key_ret\r
4541\r
4542ALIGN 16\r
4543$L$12rounds_alt:\r
4544 movdqa xmm5,XMMWORD[$L$key_rotate192]\r
4545 movdqa xmm4,XMMWORD[$L$key_rcon1]\r
4546 mov r10d,8\r
4547 movdqu XMMWORD[r8],xmm0\r
4548 jmp NEAR $L$oop_key192\r
4549\r
4550ALIGN 16\r
4551$L$oop_key192:\r
4552 movq QWORD[rax],xmm2\r
4553 movdqa xmm1,xmm2\r
4554DB 102,15,56,0,213\r
4555DB 102,15,56,221,212\r
4556 pslld xmm4,1\r
4557 lea rax,[24+rax]\r
4558\r
4559 movdqa xmm3,xmm0\r
4560 pslldq xmm0,4\r
4561 pxor xmm3,xmm0\r
4562 pslldq xmm0,4\r
4563 pxor xmm3,xmm0\r
4564 pslldq xmm0,4\r
4565 pxor xmm0,xmm3\r
4566\r
4567 pshufd xmm3,xmm0,0xff\r
4568 pxor xmm3,xmm1\r
4569 pslldq xmm1,4\r
4570 pxor xmm3,xmm1\r
4571\r
4572 pxor xmm0,xmm2\r
4573 pxor xmm2,xmm3\r
4574 movdqu XMMWORD[(-16)+rax],xmm0\r
4575\r
4576 dec r10d\r
4577 jnz NEAR $L$oop_key192\r
4578\r
4579 mov DWORD[32+rax],edx\r
4580 xor eax,eax\r
4581 jmp NEAR $L$enc_key_ret\r
4582\r
4583ALIGN 16\r
4584$L$14rounds:\r
4585 movups xmm2,XMMWORD[16+rcx]\r
4586 mov edx,13\r
4587 lea rax,[16+rax]\r
4588 cmp r10d,268435456\r
4589 je NEAR $L$14rounds_alt\r
4590\r
4591 movups XMMWORD[r8],xmm0\r
4592 movups XMMWORD[16+r8],xmm2\r
4593DB 102,15,58,223,202,1\r
4594 call $L$key_expansion_256a_cold\r
4595DB 102,15,58,223,200,1\r
4596 call $L$key_expansion_256b\r
4597DB 102,15,58,223,202,2\r
4598 call $L$key_expansion_256a\r
4599DB 102,15,58,223,200,2\r
4600 call $L$key_expansion_256b\r
4601DB 102,15,58,223,202,4\r
4602 call $L$key_expansion_256a\r
4603DB 102,15,58,223,200,4\r
4604 call $L$key_expansion_256b\r
4605DB 102,15,58,223,202,8\r
4606 call $L$key_expansion_256a\r
4607DB 102,15,58,223,200,8\r
4608 call $L$key_expansion_256b\r
4609DB 102,15,58,223,202,16\r
4610 call $L$key_expansion_256a\r
4611DB 102,15,58,223,200,16\r
4612 call $L$key_expansion_256b\r
4613DB 102,15,58,223,202,32\r
4614 call $L$key_expansion_256a\r
4615DB 102,15,58,223,200,32\r
4616 call $L$key_expansion_256b\r
4617DB 102,15,58,223,202,64\r
4618 call $L$key_expansion_256a\r
4619 movups XMMWORD[rax],xmm0\r
4620 mov DWORD[16+rax],edx\r
4621 xor rax,rax\r
4622 jmp NEAR $L$enc_key_ret\r
4623\r
4624ALIGN 16\r
4625$L$14rounds_alt:\r
4626 movdqa xmm5,XMMWORD[$L$key_rotate]\r
4627 movdqa xmm4,XMMWORD[$L$key_rcon1]\r
4628 mov r10d,7\r
4629 movdqu XMMWORD[r8],xmm0\r
4630 movdqa xmm1,xmm2\r
4631 movdqu XMMWORD[16+r8],xmm2\r
4632 jmp NEAR $L$oop_key256\r
4633\r
4634ALIGN 16\r
4635$L$oop_key256:\r
4636DB 102,15,56,0,213\r
4637DB 102,15,56,221,212\r
4638\r
4639 movdqa xmm3,xmm0\r
4640 pslldq xmm0,4\r
4641 pxor xmm3,xmm0\r
4642 pslldq xmm0,4\r
4643 pxor xmm3,xmm0\r
4644 pslldq xmm0,4\r
4645 pxor xmm0,xmm3\r
4646 pslld xmm4,1\r
4647\r
4648 pxor xmm0,xmm2\r
4649 movdqu XMMWORD[rax],xmm0\r
4650\r
4651 dec r10d\r
4652 jz NEAR $L$done_key256\r
4653\r
4654 pshufd xmm2,xmm0,0xff\r
4655 pxor xmm3,xmm3\r
4656DB 102,15,56,221,211\r
4657\r
4658 movdqa xmm3,xmm1\r
4659 pslldq xmm1,4\r
4660 pxor xmm3,xmm1\r
4661 pslldq xmm1,4\r
4662 pxor xmm3,xmm1\r
4663 pslldq xmm1,4\r
4664 pxor xmm1,xmm3\r
4665\r
4666 pxor xmm2,xmm1\r
4667 movdqu XMMWORD[16+rax],xmm2\r
4668 lea rax,[32+rax]\r
4669 movdqa xmm1,xmm2\r
4670\r
4671 jmp NEAR $L$oop_key256\r
4672\r
4673$L$done_key256:\r
4674 mov DWORD[16+rax],edx\r
4675 xor eax,eax\r
4676 jmp NEAR $L$enc_key_ret\r
4677\r
4678ALIGN 16\r
4679$L$bad_keybits:\r
4680 mov rax,-2\r
4681$L$enc_key_ret:\r
4682 pxor xmm0,xmm0\r
4683 pxor xmm1,xmm1\r
4684 pxor xmm2,xmm2\r
4685 pxor xmm3,xmm3\r
4686 pxor xmm4,xmm4\r
4687 pxor xmm5,xmm5\r
4688 add rsp,8\r
4689\r
4690 DB 0F3h,0C3h ;repret\r
4691$L$SEH_end_set_encrypt_key:\r
4692\r
4693ALIGN 16\r
4694$L$key_expansion_128:\r
4695 movups XMMWORD[rax],xmm0\r
4696 lea rax,[16+rax]\r
4697$L$key_expansion_128_cold:\r
4698 shufps xmm4,xmm0,16\r
4699 xorps xmm0,xmm4\r
4700 shufps xmm4,xmm0,140\r
4701 xorps xmm0,xmm4\r
4702 shufps xmm1,xmm1,255\r
4703 xorps xmm0,xmm1\r
4704 DB 0F3h,0C3h ;repret\r
4705\r
4706ALIGN 16\r
4707$L$key_expansion_192a:\r
4708 movups XMMWORD[rax],xmm0\r
4709 lea rax,[16+rax]\r
4710$L$key_expansion_192a_cold:\r
4711 movaps xmm5,xmm2\r
4712$L$key_expansion_192b_warm:\r
4713 shufps xmm4,xmm0,16\r
4714 movdqa xmm3,xmm2\r
4715 xorps xmm0,xmm4\r
4716 shufps xmm4,xmm0,140\r
4717 pslldq xmm3,4\r
4718 xorps xmm0,xmm4\r
4719 pshufd xmm1,xmm1,85\r
4720 pxor xmm2,xmm3\r
4721 pxor xmm0,xmm1\r
4722 pshufd xmm3,xmm0,255\r
4723 pxor xmm2,xmm3\r
4724 DB 0F3h,0C3h ;repret\r
4725\r
4726ALIGN 16\r
4727$L$key_expansion_192b:\r
4728 movaps xmm3,xmm0\r
4729 shufps xmm5,xmm0,68\r
4730 movups XMMWORD[rax],xmm5\r
4731 shufps xmm3,xmm2,78\r
4732 movups XMMWORD[16+rax],xmm3\r
4733 lea rax,[32+rax]\r
4734 jmp NEAR $L$key_expansion_192b_warm\r
4735\r
4736ALIGN 16\r
4737$L$key_expansion_256a:\r
4738 movups XMMWORD[rax],xmm2\r
4739 lea rax,[16+rax]\r
4740$L$key_expansion_256a_cold:\r
4741 shufps xmm4,xmm0,16\r
4742 xorps xmm0,xmm4\r
4743 shufps xmm4,xmm0,140\r
4744 xorps xmm0,xmm4\r
4745 shufps xmm1,xmm1,255\r
4746 xorps xmm0,xmm1\r
4747 DB 0F3h,0C3h ;repret\r
4748\r
4749ALIGN 16\r
4750$L$key_expansion_256b:\r
4751 movups XMMWORD[rax],xmm0\r
4752 lea rax,[16+rax]\r
4753\r
4754 shufps xmm4,xmm2,16\r
4755 xorps xmm2,xmm4\r
4756 shufps xmm4,xmm2,140\r
4757 xorps xmm2,xmm4\r
4758 shufps xmm1,xmm1,170\r
4759 xorps xmm2,xmm1\r
4760 DB 0F3h,0C3h ;repret\r
4761\r
4762\r
4763\r
4764ALIGN 64\r
4765$L$bswap_mask:\r
4766DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\r
4767$L$increment32:\r
4768 DD 6,6,6,0\r
4769$L$increment64:\r
4770 DD 1,0,0,0\r
4771$L$xts_magic:\r
4772 DD 0x87,0,1,0\r
4773$L$increment1:\r
4774DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1\r
4775$L$key_rotate:\r
4776 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d\r
4777$L$key_rotate192:\r
4778 DD 0x04070605,0x04070605,0x04070605,0x04070605\r
4779$L$key_rcon1:\r
4780 DD 1,1,1,1\r
4781$L$key_rcon1b:\r
4782 DD 0x1b,0x1b,0x1b,0x1b\r
4783\r
4784DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69\r
4785DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83\r
4786DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115\r
4787DB 115,108,46,111,114,103,62,0\r
4788ALIGN 64\r
4789EXTERN __imp_RtlVirtualUnwind\r
4790\r
4791ALIGN 16\r
4792ecb_ccm64_se_handler:\r
4793 push rsi\r
4794 push rdi\r
4795 push rbx\r
4796 push rbp\r
4797 push r12\r
4798 push r13\r
4799 push r14\r
4800 push r15\r
4801 pushfq\r
4802 sub rsp,64\r
4803\r
4804 mov rax,QWORD[120+r8]\r
4805 mov rbx,QWORD[248+r8]\r
4806\r
4807 mov rsi,QWORD[8+r9]\r
4808 mov r11,QWORD[56+r9]\r
4809\r
4810 mov r10d,DWORD[r11]\r
4811 lea r10,[r10*1+rsi]\r
4812 cmp rbx,r10\r
4813 jb NEAR $L$common_seh_tail\r
4814\r
4815 mov rax,QWORD[152+r8]\r
4816\r
4817 mov r10d,DWORD[4+r11]\r
4818 lea r10,[r10*1+rsi]\r
4819 cmp rbx,r10\r
4820 jae NEAR $L$common_seh_tail\r
4821\r
4822 lea rsi,[rax]\r
4823 lea rdi,[512+r8]\r
4824 mov ecx,8\r
4825 DD 0xa548f3fc\r
4826 lea rax,[88+rax]\r
4827\r
4828 jmp NEAR $L$common_seh_tail\r
4829\r
4830\r
4831\r
4832ALIGN 16\r
4833ctr_xts_se_handler:\r
4834 push rsi\r
4835 push rdi\r
4836 push rbx\r
4837 push rbp\r
4838 push r12\r
4839 push r13\r
4840 push r14\r
4841 push r15\r
4842 pushfq\r
4843 sub rsp,64\r
4844\r
4845 mov rax,QWORD[120+r8]\r
4846 mov rbx,QWORD[248+r8]\r
4847\r
4848 mov rsi,QWORD[8+r9]\r
4849 mov r11,QWORD[56+r9]\r
4850\r
4851 mov r10d,DWORD[r11]\r
4852 lea r10,[r10*1+rsi]\r
4853 cmp rbx,r10\r
4854 jb NEAR $L$common_seh_tail\r
4855\r
4856 mov rax,QWORD[152+r8]\r
4857\r
4858 mov r10d,DWORD[4+r11]\r
4859 lea r10,[r10*1+rsi]\r
4860 cmp rbx,r10\r
4861 jae NEAR $L$common_seh_tail\r
4862\r
4863 mov rax,QWORD[208+r8]\r
4864\r
4865 lea rsi,[((-168))+rax]\r
4866 lea rdi,[512+r8]\r
4867 mov ecx,20\r
4868 DD 0xa548f3fc\r
4869\r
4870 mov rbp,QWORD[((-8))+rax]\r
4871 mov QWORD[160+r8],rbp\r
4872 jmp NEAR $L$common_seh_tail\r
4873\r
4874\r
4875\r
4876ALIGN 16\r
4877ocb_se_handler:\r
4878 push rsi\r
4879 push rdi\r
4880 push rbx\r
4881 push rbp\r
4882 push r12\r
4883 push r13\r
4884 push r14\r
4885 push r15\r
4886 pushfq\r
4887 sub rsp,64\r
4888\r
4889 mov rax,QWORD[120+r8]\r
4890 mov rbx,QWORD[248+r8]\r
4891\r
4892 mov rsi,QWORD[8+r9]\r
4893 mov r11,QWORD[56+r9]\r
4894\r
4895 mov r10d,DWORD[r11]\r
4896 lea r10,[r10*1+rsi]\r
4897 cmp rbx,r10\r
4898 jb NEAR $L$common_seh_tail\r
4899\r
4900 mov r10d,DWORD[4+r11]\r
4901 lea r10,[r10*1+rsi]\r
4902 cmp rbx,r10\r
4903 jae NEAR $L$common_seh_tail\r
4904\r
4905 mov r10d,DWORD[8+r11]\r
4906 lea r10,[r10*1+rsi]\r
4907 cmp rbx,r10\r
4908 jae NEAR $L$ocb_no_xmm\r
4909\r
4910 mov rax,QWORD[152+r8]\r
4911\r
4912 lea rsi,[rax]\r
4913 lea rdi,[512+r8]\r
4914 mov ecx,20\r
4915 DD 0xa548f3fc\r
4916 lea rax,[((160+40))+rax]\r
4917\r
4918$L$ocb_no_xmm:\r
4919 mov rbx,QWORD[((-8))+rax]\r
4920 mov rbp,QWORD[((-16))+rax]\r
4921 mov r12,QWORD[((-24))+rax]\r
4922 mov r13,QWORD[((-32))+rax]\r
4923 mov r14,QWORD[((-40))+rax]\r
4924\r
4925 mov QWORD[144+r8],rbx\r
4926 mov QWORD[160+r8],rbp\r
4927 mov QWORD[216+r8],r12\r
4928 mov QWORD[224+r8],r13\r
4929 mov QWORD[232+r8],r14\r
4930\r
4931 jmp NEAR $L$common_seh_tail\r
4932\r
4933\r
4934ALIGN 16\r
4935cbc_se_handler:\r
4936 push rsi\r
4937 push rdi\r
4938 push rbx\r
4939 push rbp\r
4940 push r12\r
4941 push r13\r
4942 push r14\r
4943 push r15\r
4944 pushfq\r
4945 sub rsp,64\r
4946\r
4947 mov rax,QWORD[152+r8]\r
4948 mov rbx,QWORD[248+r8]\r
4949\r
4950 lea r10,[$L$cbc_decrypt_bulk]\r
4951 cmp rbx,r10\r
4952 jb NEAR $L$common_seh_tail\r
4953\r
4954 mov rax,QWORD[120+r8]\r
4955\r
4956 lea r10,[$L$cbc_decrypt_body]\r
4957 cmp rbx,r10\r
4958 jb NEAR $L$common_seh_tail\r
4959\r
4960 mov rax,QWORD[152+r8]\r
4961\r
4962 lea r10,[$L$cbc_ret]\r
4963 cmp rbx,r10\r
4964 jae NEAR $L$common_seh_tail\r
4965\r
4966 lea rsi,[16+rax]\r
4967 lea rdi,[512+r8]\r
4968 mov ecx,20\r
4969 DD 0xa548f3fc\r
4970\r
4971 mov rax,QWORD[208+r8]\r
4972\r
4973 mov rbp,QWORD[((-8))+rax]\r
4974 mov QWORD[160+r8],rbp\r
4975\r
4976$L$common_seh_tail:\r
4977 mov rdi,QWORD[8+rax]\r
4978 mov rsi,QWORD[16+rax]\r
4979 mov QWORD[152+r8],rax\r
4980 mov QWORD[168+r8],rsi\r
4981 mov QWORD[176+r8],rdi\r
4982\r
4983 mov rdi,QWORD[40+r9]\r
4984 mov rsi,r8\r
4985 mov ecx,154\r
4986 DD 0xa548f3fc\r
4987\r
4988 mov rsi,r9\r
4989 xor rcx,rcx\r
4990 mov rdx,QWORD[8+rsi]\r
4991 mov r8,QWORD[rsi]\r
4992 mov r9,QWORD[16+rsi]\r
4993 mov r10,QWORD[40+rsi]\r
4994 lea r11,[56+rsi]\r
4995 lea r12,[24+rsi]\r
4996 mov QWORD[32+rsp],r10\r
4997 mov QWORD[40+rsp],r11\r
4998 mov QWORD[48+rsp],r12\r
4999 mov QWORD[56+rsp],rcx\r
5000 call QWORD[__imp_RtlVirtualUnwind]\r
5001\r
5002 mov eax,1\r
5003 add rsp,64\r
5004 popfq\r
5005 pop r15\r
5006 pop r14\r
5007 pop r13\r
5008 pop r12\r
5009 pop rbp\r
5010 pop rbx\r
5011 pop rdi\r
5012 pop rsi\r
5013 DB 0F3h,0C3h ;repret\r
5014\r
5015\r
5016section .pdata rdata align=4\r
5017ALIGN 4\r
5018 DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase\r
5019 DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase\r
5020 DD $L$SEH_info_ecb wrt ..imagebase\r
5021\r
5022 DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase\r
5023 DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase\r
5024 DD $L$SEH_info_ccm64_enc wrt ..imagebase\r
5025\r
5026 DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase\r
5027 DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase\r
5028 DD $L$SEH_info_ccm64_dec wrt ..imagebase\r
5029\r
5030 DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase\r
5031 DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase\r
5032 DD $L$SEH_info_ctr32 wrt ..imagebase\r
5033\r
5034 DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase\r
5035 DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase\r
5036 DD $L$SEH_info_xts_enc wrt ..imagebase\r
5037\r
5038 DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase\r
5039 DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase\r
5040 DD $L$SEH_info_xts_dec wrt ..imagebase\r
5041\r
5042 DD $L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase\r
5043 DD $L$SEH_end_aesni_ocb_encrypt wrt ..imagebase\r
5044 DD $L$SEH_info_ocb_enc wrt ..imagebase\r
5045\r
5046 DD $L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase\r
5047 DD $L$SEH_end_aesni_ocb_decrypt wrt ..imagebase\r
5048 DD $L$SEH_info_ocb_dec wrt ..imagebase\r
5049 DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase\r
5050 DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase\r
5051 DD $L$SEH_info_cbc wrt ..imagebase\r
5052\r
5053 DD aesni_set_decrypt_key wrt ..imagebase\r
5054 DD $L$SEH_end_set_decrypt_key wrt ..imagebase\r
5055 DD $L$SEH_info_key wrt ..imagebase\r
5056\r
5057 DD aesni_set_encrypt_key wrt ..imagebase\r
5058 DD $L$SEH_end_set_encrypt_key wrt ..imagebase\r
5059 DD $L$SEH_info_key wrt ..imagebase\r
5060section .xdata rdata align=8\r
5061ALIGN 8\r
5062$L$SEH_info_ecb:\r
5063DB 9,0,0,0\r
5064 DD ecb_ccm64_se_handler wrt ..imagebase\r
5065 DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase\r
5066$L$SEH_info_ccm64_enc:\r
5067DB 9,0,0,0\r
5068 DD ecb_ccm64_se_handler wrt ..imagebase\r
5069 DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase\r
5070$L$SEH_info_ccm64_dec:\r
5071DB 9,0,0,0\r
5072 DD ecb_ccm64_se_handler wrt ..imagebase\r
5073 DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase\r
5074$L$SEH_info_ctr32:\r
5075DB 9,0,0,0\r
5076 DD ctr_xts_se_handler wrt ..imagebase\r
5077 DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase\r
5078$L$SEH_info_xts_enc:\r
5079DB 9,0,0,0\r
5080 DD ctr_xts_se_handler wrt ..imagebase\r
5081 DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase\r
5082$L$SEH_info_xts_dec:\r
5083DB 9,0,0,0\r
5084 DD ctr_xts_se_handler wrt ..imagebase\r
5085 DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase\r
5086$L$SEH_info_ocb_enc:\r
5087DB 9,0,0,0\r
5088 DD ocb_se_handler wrt ..imagebase\r
5089 DD $L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase\r
5090 DD $L$ocb_enc_pop wrt ..imagebase\r
5091 DD 0\r
5092$L$SEH_info_ocb_dec:\r
5093DB 9,0,0,0\r
5094 DD ocb_se_handler wrt ..imagebase\r
5095 DD $L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase\r
5096 DD $L$ocb_dec_pop wrt ..imagebase\r
5097 DD 0\r
5098$L$SEH_info_cbc:\r
5099DB 9,0,0,0\r
5100 DD cbc_se_handler wrt ..imagebase\r
5101$L$SEH_info_key:\r
5102DB 0x01,0x04,0x01,0x00\r
5103DB 0x04,0x02,0x00,0x00\r