2 ;; Copyright (c) 2012-2018, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 %include "include/os.asm"
29 %define NO_AESNI_RENAME
30 %include "include/aesni_emu.inc"
31 %include "include/clear_regs.asm"
35 %define EXP_ENC_KEYS rsi
36 %define EXP_DEC_KEYS rdx
39 %define EXP_ENC_KEYS rdx
40 %define EXP_DEC_KEYS r8
46 %macro key_expansion_1_192_sse 1
47 ;; Assumes the xmm3 includes all zeros at this point.
48 pshufd xmm2, xmm2, 11111111b
49 shufps xmm3, xmm1, 00010000b
51 shufps xmm3, xmm1, 10001100b
54 movdqu [EXP_ENC_KEYS + %1], xmm1
57 ; Calculate w10 and w11 using calculated w9 and known w4-w5
58 %macro key_expansion_2_192_sse 1
61 shufps xmm6, xmm1, 11110000b
64 pshufd xmm7, xmm4, 00001110b
65 movdqu [EXP_ENC_KEYS + %1], xmm7
68 %macro key_dec_192_sse 1
69 movdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
71 movdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
74 %macro key_dec_192_sse_no_aesni 1
75 movdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
76 EMULATE_AESIMC xmm1, xmm0
77 movdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
80 %macro key_expansion_1_192_avx 1
81 ;; Assumes the xmm3 includes all zeros at this point.
82 vpshufd xmm2, xmm2, 11111111b
83 vshufps xmm3, xmm3, xmm1, 00010000b
84 vpxor xmm1, xmm1, xmm3
85 vshufps xmm3, xmm3, xmm1, 10001100b
86 vpxor xmm1, xmm1, xmm3
87 vpxor xmm1, xmm1, xmm2
88 vmovdqu [EXP_ENC_KEYS + %1], xmm1
91 ; Calculate w10 and w11 using calculated w9 and known w4-w5
92 %macro key_expansion_2_192_avx 1
95 vshufps xmm6, xmm6, xmm1, 11110000b
96 vpxor xmm6, xmm6, xmm5
97 vpxor xmm4, xmm4, xmm6
98 vpshufd xmm7, xmm4, 00001110b
99 vmovdqu [EXP_ENC_KEYS + %1], xmm7
102 %macro key_dec_192_avx 1
103 vmovdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
105 vmovdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
110 ; void aes_keyexp_192(UINT128 *key,
111 ; UINT128 *enc_exp_keys,
112 ; UINT128 *dec_exp_keys);
114 ; arg 1: rcx: pointer to key
115 ; arg 2: rdx: pointer to expanded key array for encrypt
116 ; arg 3: r8: pointer to expanded key array for decrypt
118 MKGLOBAL(aes_keyexp_192_sse,function,)
123 jz aes_keyexp_192_sse_return
125 jz aes_keyexp_192_sse_return
127 jz aes_keyexp_192_sse_return
132 movdqa [rsp + 0*16], xmm6
133 movdqa [rsp + 1*16], xmm7
136 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
137 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
138 pshufd xmm4, xmm7, 01001111b
139 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
140 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
141 movdqa [EXP_DEC_KEYS + 16*0], xmm1
142 movdqa [EXP_DEC_KEYS + 16*12], xmm1
144 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion
145 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion
147 aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
148 key_expansion_1_192_sse 24
149 key_expansion_2_192_sse 40
151 aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
152 key_expansion_1_192_sse 48
153 key_expansion_2_192_sse 64
155 aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
156 key_expansion_1_192_sse 72
157 key_expansion_2_192_sse 88
159 aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
160 key_expansion_1_192_sse 96
161 key_expansion_2_192_sse 112
163 aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
164 key_expansion_1_192_sse 120
165 key_expansion_2_192_sse 136
167 aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
168 key_expansion_1_192_sse 144
169 key_expansion_2_192_sse 160
171 aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
172 key_expansion_1_192_sse 168
173 key_expansion_2_192_sse 184
175 aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
176 key_expansion_1_192_sse 192
178 ;;; we have already saved the 12 th key, which is pure input on the
180 movdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
181 movdqa [EXP_DEC_KEYS + 16*0], xmm0
182 ;;; generate remaining decrypt keys
196 clear_scratch_gps_asm
197 clear_scratch_xmms_sse_asm
201 movdqa xmm6, [rsp + 0*16]
202 movdqa xmm7, [rsp + 1*16]
206 aes_keyexp_192_sse_return:
209 MKGLOBAL(aes_keyexp_192_sse_no_aesni,function,)
210 aes_keyexp_192_sse_no_aesni:
214 jz aes_keyexp_192_sse_no_aesni_return
216 jz aes_keyexp_192_sse_no_aesni_return
218 jz aes_keyexp_192_sse_no_aesni_return
223 movdqa [rsp + 0*16], xmm6
224 movdqa [rsp + 1*16], xmm7
227 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
228 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
229 pshufd xmm4, xmm7, 01001111b
230 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
231 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
232 movdqa [EXP_DEC_KEYS + 16*0], xmm1
233 movdqa [EXP_DEC_KEYS + 16*12], xmm1
235 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion
236 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion
238 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
239 key_expansion_1_192_sse 24
240 key_expansion_2_192_sse 40
242 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
243 key_expansion_1_192_sse 48
244 key_expansion_2_192_sse 64
246 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
247 key_expansion_1_192_sse 72
248 key_expansion_2_192_sse 88
250 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
251 key_expansion_1_192_sse 96
252 key_expansion_2_192_sse 112
254 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
255 key_expansion_1_192_sse 120
256 key_expansion_2_192_sse 136
258 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
259 key_expansion_1_192_sse 144
260 key_expansion_2_192_sse 160
262 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
263 key_expansion_1_192_sse 168
264 key_expansion_2_192_sse 184
266 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x80 ; Generate round key 12
267 key_expansion_1_192_sse 192
269 ;;; we have already saved the 12 th key, which is pure input on the
271 movdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
272 movdqa [EXP_DEC_KEYS + 16*0], xmm0
273 ;;; generate remaining decrypt keys
274 key_dec_192_sse_no_aesni 1
275 key_dec_192_sse_no_aesni 2
276 key_dec_192_sse_no_aesni 3
277 key_dec_192_sse_no_aesni 4
278 key_dec_192_sse_no_aesni 5
279 key_dec_192_sse_no_aesni 6
280 key_dec_192_sse_no_aesni 7
281 key_dec_192_sse_no_aesni 8
282 key_dec_192_sse_no_aesni 9
283 key_dec_192_sse_no_aesni 10
284 key_dec_192_sse_no_aesni 11
287 clear_scratch_gps_asm
288 clear_scratch_xmms_sse_asm
292 movdqa xmm6, [rsp + 0*16]
293 movdqa xmm7, [rsp + 1*16]
297 aes_keyexp_192_sse_no_aesni_return:
300 MKGLOBAL(aes_keyexp_192_avx,function,)
301 MKGLOBAL(aes_keyexp_192_avx2,function,)
302 MKGLOBAL(aes_keyexp_192_avx512,function,)
305 aes_keyexp_192_avx512:
309 jz aes_keyexp_192_avx_return
311 jz aes_keyexp_192_avx_return
313 jz aes_keyexp_192_avx_return
318 vmovdqa [rsp + 0*16], xmm6
319 vmovdqa [rsp + 1*16], xmm7
322 vmovq xmm7, [KEY + 16] ; loading the AES key, 64 bits
323 vmovq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
324 vpshufd xmm4, xmm7, 01001111b
325 vmovdqu xmm1, [KEY] ; loading the AES key, 128 bits
326 vmovdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
327 vmovdqa [EXP_DEC_KEYS + 16*0], xmm1
328 vmovdqa [EXP_DEC_KEYS + 16*12], xmm1
330 vpxor xmm3, xmm3, xmm3
331 vpxor xmm6, xmm6, xmm6
333 vaeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
334 key_expansion_1_192_avx 24
335 key_expansion_2_192_avx 40
337 vaeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
338 key_expansion_1_192_avx 48
339 key_expansion_2_192_avx 64
341 vaeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
342 key_expansion_1_192_avx 72
343 key_expansion_2_192_avx 88
345 vaeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
346 key_expansion_1_192_avx 96
347 key_expansion_2_192_avx 112
349 vaeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
350 key_expansion_1_192_avx 120
351 key_expansion_2_192_avx 136
353 vaeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
354 key_expansion_1_192_avx 144
355 key_expansion_2_192_avx 160
357 vaeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
358 key_expansion_1_192_avx 168
359 key_expansion_2_192_avx 184
361 vaeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
362 key_expansion_1_192_avx 192
364 ;;; we have already saved the 12 th key, which is pure input on the
366 vmovdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
367 vmovdqa [EXP_DEC_KEYS + 16*0], xmm0
368 ;;; generate remaining decrypt keys
382 clear_scratch_gps_asm
383 clear_scratch_xmms_avx_asm
387 vmovdqa xmm6, [rsp + 0*16]
388 vmovdqa xmm7, [rsp + 1*16]
392 aes_keyexp_192_avx_return:
395 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
397 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
400 ; void aes_keyexp_192_enc_sse(UINT128 *key,
401 ; UINT128 *enc_exp_keys);
403 ; arg 1: rcx: pointer to key
404 ; arg 2: rdx: pointer to expanded key array for encrypt
406 MKGLOBAL(aes_keyexp_192_enc_sse,function,)
407 aes_keyexp_192_enc_sse:
411 jz aes_keyexp_192_enc_sse_return
413 jz aes_keyexp_192_enc_sse_return
418 movdqa [rsp + 0*16], xmm6
419 movdqa [rsp + 1*16], xmm7
422 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
423 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
424 pshufd xmm4, xmm7, 01001111b
425 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
426 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
428 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
429 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
431 aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
432 key_expansion_1_192_sse 24
433 key_expansion_2_192_sse 40
435 aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
436 key_expansion_1_192_sse 48
437 key_expansion_2_192_sse 64
439 aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
440 key_expansion_1_192_sse 72
441 key_expansion_2_192_sse 88
443 aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
444 key_expansion_1_192_sse 96
445 key_expansion_2_192_sse 112
447 aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
448 key_expansion_1_192_sse 120
449 key_expansion_2_192_sse 136
451 aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
452 key_expansion_1_192_sse 144
453 key_expansion_2_192_sse 160
455 aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
456 key_expansion_1_192_sse 168
457 key_expansion_2_192_sse 184
459 aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
460 key_expansion_1_192_sse 192
463 clear_scratch_gps_asm
464 clear_scratch_xmms_sse_asm
468 movdqa xmm6, [rsp + 0*16]
469 movdqa xmm7, [rsp + 1*16]
473 aes_keyexp_192_enc_sse_return:
476 MKGLOBAL(aes_keyexp_192_enc_sse_no_aesni,function,)
477 aes_keyexp_192_enc_sse_no_aesni:
481 jz aes_keyexp_192_enc_sse_no_aesni_return
483 jz aes_keyexp_192_enc_sse_no_aesni_return
488 movdqa [rsp + 0*16], xmm6
489 movdqa [rsp + 1*16], xmm7
492 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
493 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
494 pshufd xmm4, xmm7, 01001111b
495 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
496 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
498 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
499 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
501 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
502 key_expansion_1_192_sse 24
503 key_expansion_2_192_sse 40
505 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
506 key_expansion_1_192_sse 48
507 key_expansion_2_192_sse 64
509 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
510 key_expansion_1_192_sse 72
511 key_expansion_2_192_sse 88
513 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
514 key_expansion_1_192_sse 96
515 key_expansion_2_192_sse 112
517 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
518 key_expansion_1_192_sse 120
519 key_expansion_2_192_sse 136
521 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
522 key_expansion_1_192_sse 144
523 key_expansion_2_192_sse 160
525 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
526 key_expansion_1_192_sse 168
527 key_expansion_2_192_sse 184
529 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x80 ; Generate round key 12
530 key_expansion_1_192_sse 192
533 clear_scratch_gps_asm
534 clear_scratch_xmms_sse_asm
538 movdqa xmm6, [rsp + 0*16]
539 movdqa xmm7, [rsp + 1*16]
543 aes_keyexp_192_enc_sse_no_aesni_return:
546 MKGLOBAL(aes_keyexp_192_enc_avx,function,)
547 MKGLOBAL(aes_keyexp_192_enc_avx2,function,)
548 MKGLOBAL(aes_keyexp_192_enc_avx512,function,)
549 aes_keyexp_192_enc_avx:
550 aes_keyexp_192_enc_avx2:
551 aes_keyexp_192_enc_avx512:
555 jz aes_keyexp_192_enc_avx_return
557 jz aes_keyexp_192_enc_avx_return
562 vmovdqa [rsp + 0*16], xmm6
563 vmovdqa [rsp + 1*16], xmm7
566 vmovq xmm7, [KEY + 16] ; loading the AES key, 64 bits
567 vmovq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
568 vpshufd xmm4, xmm7, 01001111b
569 vmovdqu xmm1, [KEY] ; loading the AES key, 128 bits
570 vmovdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
572 vpxor xmm3, xmm3, xmm3
573 vpxor xmm6, xmm6, xmm6
575 vaeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
576 key_expansion_1_192_avx 24
577 key_expansion_2_192_avx 40
579 vaeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
580 key_expansion_1_192_avx 48
581 key_expansion_2_192_avx 64
583 vaeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
584 key_expansion_1_192_avx 72
585 key_expansion_2_192_avx 88
587 vaeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
588 key_expansion_1_192_avx 96
589 key_expansion_2_192_avx 112
591 vaeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
592 key_expansion_1_192_avx 120
593 key_expansion_2_192_avx 136
595 vaeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
596 key_expansion_1_192_avx 144
597 key_expansion_2_192_avx 160
599 vaeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
600 key_expansion_1_192_avx 168
601 key_expansion_2_192_avx 184
603 vaeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
604 key_expansion_1_192_avx 192
607 clear_scratch_gps_asm
608 clear_scratch_xmms_avx_asm
612 vmovdqa xmm6, [rsp + 0*16]
613 vmovdqa xmm7, [rsp + 1*16]
617 aes_keyexp_192_enc_avx_return:
621 section .note.GNU-stack noalloc noexec nowrite progbits