2 ;; Copyright (c) 2012-2018, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 %define NO_AESNI_RENAME
30 %include "aesni_emu.inc"
34 %define EXP_ENC_KEYS rsi
35 %define EXP_DEC_KEYS rdx
38 %define EXP_ENC_KEYS rdx
39 %define EXP_DEC_KEYS r8
45 %macro key_expansion_1_192_sse 1
46 ;; Assumes the xmm3 includes all zeros at this point.
47 pshufd xmm2, xmm2, 11111111b
48 shufps xmm3, xmm1, 00010000b
50 shufps xmm3, xmm1, 10001100b
53 movdqu [EXP_ENC_KEYS + %1], xmm1
56 ; Calculate w10 and w11 using calculated w9 and known w4-w5
57 %macro key_expansion_2_192_sse 1
60 shufps xmm6, xmm1, 11110000b
63 pshufd xmm7, xmm4, 00001110b
64 movdqu [EXP_ENC_KEYS + %1], xmm7
67 %macro key_dec_192_sse 1
68 movdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
70 movdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
73 %macro key_dec_192_sse_no_aesni 1
74 movdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
75 EMULATE_AESIMC xmm1, xmm0
76 movdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
79 %macro key_expansion_1_192_avx 1
80 ;; Assumes the xmm3 includes all zeros at this point.
81 vpshufd xmm2, xmm2, 11111111b
82 vshufps xmm3, xmm3, xmm1, 00010000b
83 vpxor xmm1, xmm1, xmm3
84 vshufps xmm3, xmm3, xmm1, 10001100b
85 vpxor xmm1, xmm1, xmm3
86 vpxor xmm1, xmm1, xmm2
87 vmovdqu [EXP_ENC_KEYS + %1], xmm1
90 ; Calculate w10 and w11 using calculated w9 and known w4-w5
91 %macro key_expansion_2_192_avx 1
94 vshufps xmm6, xmm6, xmm1, 11110000b
95 vpxor xmm6, xmm6, xmm5
96 vpxor xmm4, xmm4, xmm6
97 vpshufd xmm7, xmm4, 00001110b
98 vmovdqu [EXP_ENC_KEYS + %1], xmm7
101 %macro key_dec_192_avx 1
102 vmovdqa xmm0, [EXP_ENC_KEYS + 16 * %1]
104 vmovdqa [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1
109 ; void aes_keyexp_192(UINT128 *key,
110 ; UINT128 *enc_exp_keys,
111 ; UINT128 *dec_exp_keys);
113 ; arg 1: rcx: pointer to key
114 ; arg 2: rdx: pointer to expanded key array for encrypt
115 ; arg 3: r8: pointer to expanded key array for decrypt
117 MKGLOBAL(aes_keyexp_192_sse,function,)
122 movdqa [rsp + 0*16], xmm6
123 movdqa [rsp + 1*16], xmm7
126 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
127 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
128 pshufd xmm4, xmm7, 01001111b
129 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
130 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
131 movdqa [EXP_DEC_KEYS + 16*0], xmm1
132 movdqa [EXP_DEC_KEYS + 16*12], xmm1
134 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion
135 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion
137 aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
138 key_expansion_1_192_sse 24
139 key_expansion_2_192_sse 40
141 aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
142 key_expansion_1_192_sse 48
143 key_expansion_2_192_sse 64
145 aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
146 key_expansion_1_192_sse 72
147 key_expansion_2_192_sse 88
149 aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
150 key_expansion_1_192_sse 96
151 key_expansion_2_192_sse 112
153 aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
154 key_expansion_1_192_sse 120
155 key_expansion_2_192_sse 136
157 aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
158 key_expansion_1_192_sse 144
159 key_expansion_2_192_sse 160
161 aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
162 key_expansion_1_192_sse 168
163 key_expansion_2_192_sse 184
165 aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
166 key_expansion_1_192_sse 192
168 ;;; we have already saved the 12 th key, which is pure input on the
170 movdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
171 movdqa [EXP_DEC_KEYS + 16*0], xmm0
172 ;;; generate remaining decrypt keys
186 movdqa xmm6, [rsp + 0*16]
187 movdqa xmm7, [rsp + 1*16]
193 MKGLOBAL(aes_keyexp_192_sse_no_aesni,function,)
194 aes_keyexp_192_sse_no_aesni:
198 movdqa [rsp + 0*16], xmm6
199 movdqa [rsp + 1*16], xmm7
202 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
203 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
204 pshufd xmm4, xmm7, 01001111b
205 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
206 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
207 movdqa [EXP_DEC_KEYS + 16*0], xmm1
208 movdqa [EXP_DEC_KEYS + 16*12], xmm1
210 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion
211 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion
213 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
214 key_expansion_1_192_sse 24
215 key_expansion_2_192_sse 40
217 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
218 key_expansion_1_192_sse 48
219 key_expansion_2_192_sse 64
221 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
222 key_expansion_1_192_sse 72
223 key_expansion_2_192_sse 88
225 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
226 key_expansion_1_192_sse 96
227 key_expansion_2_192_sse 112
229 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
230 key_expansion_1_192_sse 120
231 key_expansion_2_192_sse 136
233 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
234 key_expansion_1_192_sse 144
235 key_expansion_2_192_sse 160
237 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
238 key_expansion_1_192_sse 168
239 key_expansion_2_192_sse 184
241 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x80 ; Generate round key 12
242 key_expansion_1_192_sse 192
244 ;;; we have already saved the 12 th key, which is pure input on the
246 movdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
247 movdqa [EXP_DEC_KEYS + 16*0], xmm0
248 ;;; generate remaining decrypt keys
249 key_dec_192_sse_no_aesni 1
250 key_dec_192_sse_no_aesni 2
251 key_dec_192_sse_no_aesni 3
252 key_dec_192_sse_no_aesni 4
253 key_dec_192_sse_no_aesni 5
254 key_dec_192_sse_no_aesni 6
255 key_dec_192_sse_no_aesni 7
256 key_dec_192_sse_no_aesni 8
257 key_dec_192_sse_no_aesni 9
258 key_dec_192_sse_no_aesni 10
259 key_dec_192_sse_no_aesni 11
262 movdqa xmm6, [rsp + 0*16]
263 movdqa xmm7, [rsp + 1*16]
269 MKGLOBAL(aes_keyexp_192_avx,function,)
270 MKGLOBAL(aes_keyexp_192_avx2,function,)
271 MKGLOBAL(aes_keyexp_192_avx512,function,)
274 aes_keyexp_192_avx512:
278 vmovdqa [rsp + 0*16], xmm6
279 vmovdqa [rsp + 1*16], xmm7
282 vmovq xmm7, [KEY + 16] ; loading the AES key, 64 bits
283 vmovq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
284 vpshufd xmm4, xmm7, 01001111b
285 vmovdqu xmm1, [KEY] ; loading the AES key, 128 bits
286 vmovdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
287 vmovdqa [EXP_DEC_KEYS + 16*0], xmm1
288 vmovdqa [EXP_DEC_KEYS + 16*12], xmm1
290 vpxor xmm3, xmm3, xmm3
291 vpxor xmm6, xmm6, xmm6
293 vaeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
294 key_expansion_1_192_avx 24
295 key_expansion_2_192_avx 40
297 vaeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
298 key_expansion_1_192_avx 48
299 key_expansion_2_192_avx 64
301 vaeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
302 key_expansion_1_192_avx 72
303 key_expansion_2_192_avx 88
305 vaeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
306 key_expansion_1_192_avx 96
307 key_expansion_2_192_avx 112
309 vaeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
310 key_expansion_1_192_avx 120
311 key_expansion_2_192_avx 136
313 vaeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
314 key_expansion_1_192_avx 144
315 key_expansion_2_192_avx 160
317 vaeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
318 key_expansion_1_192_avx 168
319 key_expansion_2_192_avx 184
321 vaeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
322 key_expansion_1_192_avx 192
324 ;;; we have already saved the 12 th key, which is pure input on the
326 vmovdqa xmm0, [EXP_ENC_KEYS + 16 * 12]
327 vmovdqa [EXP_DEC_KEYS + 16*0], xmm0
328 ;;; generate remaining decrypt keys
342 vmovdqa xmm6, [rsp + 0*16]
343 vmovdqa xmm7, [rsp + 1*16]
349 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
354 ; void aes_keyexp_192_enc_sse(UINT128 *key,
355 ; UINT128 *enc_exp_keys);
357 ; arg 1: rcx: pointer to key
358 ; arg 2: rdx: pointer to expanded key array for encrypt
360 MKGLOBAL(aes_keyexp_192_enc_sse,function,)
361 aes_keyexp_192_enc_sse:
365 movdqa [rsp + 0*16], xmm6
366 movdqa [rsp + 1*16], xmm7
369 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
370 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
371 pshufd xmm4, xmm7, 01001111b
372 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
373 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
375 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
376 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
378 aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
379 key_expansion_1_192_sse 24
380 key_expansion_2_192_sse 40
382 aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
383 key_expansion_1_192_sse 48
384 key_expansion_2_192_sse 64
386 aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
387 key_expansion_1_192_sse 72
388 key_expansion_2_192_sse 88
390 aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
391 key_expansion_1_192_sse 96
392 key_expansion_2_192_sse 112
394 aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
395 key_expansion_1_192_sse 120
396 key_expansion_2_192_sse 136
398 aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
399 key_expansion_1_192_sse 144
400 key_expansion_2_192_sse 160
402 aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
403 key_expansion_1_192_sse 168
404 key_expansion_2_192_sse 184
406 aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
407 key_expansion_1_192_sse 192
410 movdqa xmm6, [rsp + 0*16]
411 movdqa xmm7, [rsp + 1*16]
417 MKGLOBAL(aes_keyexp_192_enc_sse_no_aesni,function,)
418 aes_keyexp_192_enc_sse_no_aesni:
422 movdqa [rsp + 0*16], xmm6
423 movdqa [rsp + 1*16], xmm7
426 movq xmm7, [KEY + 16] ; loading the AES key, 64 bits
427 movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
428 pshufd xmm4, xmm7, 01001111b
429 movdqu xmm1, [KEY] ; loading the AES key, 128 bits
430 movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
432 pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
433 pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
435 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
436 key_expansion_1_192_sse 24
437 key_expansion_2_192_sse 40
439 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
440 key_expansion_1_192_sse 48
441 key_expansion_2_192_sse 64
443 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
444 key_expansion_1_192_sse 72
445 key_expansion_2_192_sse 88
447 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
448 key_expansion_1_192_sse 96
449 key_expansion_2_192_sse 112
451 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
452 key_expansion_1_192_sse 120
453 key_expansion_2_192_sse 136
455 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
456 key_expansion_1_192_sse 144
457 key_expansion_2_192_sse 160
459 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
460 key_expansion_1_192_sse 168
461 key_expansion_2_192_sse 184
463 EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x80 ; Generate round key 12
464 key_expansion_1_192_sse 192
467 movdqa xmm6, [rsp + 0*16]
468 movdqa xmm7, [rsp + 1*16]
474 MKGLOBAL(aes_keyexp_192_enc_avx,function,)
475 MKGLOBAL(aes_keyexp_192_enc_avx2,function,)
476 MKGLOBAL(aes_keyexp_192_enc_avx512,function,)
477 aes_keyexp_192_enc_avx:
478 aes_keyexp_192_enc_avx2:
479 aes_keyexp_192_enc_avx512:
483 vmovdqa [rsp + 0*16], xmm6
484 vmovdqa [rsp + 1*16], xmm7
487 vmovq xmm7, [KEY + 16] ; loading the AES key, 64 bits
488 vmovq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion
489 vpshufd xmm4, xmm7, 01001111b
490 vmovdqu xmm1, [KEY] ; loading the AES key, 128 bits
491 vmovdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion
493 vpxor xmm3, xmm3, xmm3
494 vpxor xmm6, xmm6, xmm6
496 vaeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
497 key_expansion_1_192_avx 24
498 key_expansion_2_192_avx 40
500 vaeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
501 key_expansion_1_192_avx 48
502 key_expansion_2_192_avx 64
504 vaeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
505 key_expansion_1_192_avx 72
506 key_expansion_2_192_avx 88
508 vaeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
509 key_expansion_1_192_avx 96
510 key_expansion_2_192_avx 112
512 vaeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
513 key_expansion_1_192_avx 120
514 key_expansion_2_192_avx 136
516 vaeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
517 key_expansion_1_192_avx 144
518 key_expansion_2_192_avx 160
520 vaeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
521 key_expansion_1_192_avx 168
522 key_expansion_2_192_avx 184
524 vaeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
525 key_expansion_1_192_avx 192
528 vmovdqa xmm6, [rsp + 0*16]
529 vmovdqa xmm7, [rsp + 1*16]
536 section .note.GNU-stack noalloc noexec nowrite progbits