2 ;; Copyright (c) 2019, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ; routine to do AES ECB encrypt/decrypt on 16n bytes doing AES by 4
30 ; XMM registers are clobbered. Saving/restoring must be done at a higher level
32 ; void aes_ecb_x_y_sse(void *in,
37 ; x = direction (enc/dec)
38 ; y = key size (128/192/256)
39 ; arg 1: IN: pointer to input (cipher text)
40 ; arg 2: KEYS: pointer to keys
41 ; arg 3: OUT: pointer to output (plain text)
42 ; arg 4: LEN: length in bytes (multiple of 16)
45 %include "include/os.asm"
47 %ifndef AES_ECB_ENC_128
48 %define AES_ECB_ENC_128 aes_ecb_enc_128_sse
49 %define AES_ECB_ENC_192 aes_ecb_enc_192_sse
50 %define AES_ECB_ENC_256 aes_ecb_enc_256_sse
51 %define AES_ECB_DEC_128 aes_ecb_dec_128_sse
52 %define AES_ECB_DEC_192 aes_ecb_dec_192_sse
53 %define AES_ECB_DEC_256 aes_ecb_dec_256_sse
85 %define %%NROUNDS %1 ; [in] Number of AES rounds, numerical value
86 %define %%DIR %2 ; [in] Direction (encrypt/decrypt)
90 %define AES_LAST aesenclast
93 %define AES_LAST aesdeclast
103 ; load plain/cipher text
104 movdqu XDATA0, [IN + 0*16]
105 movdqu XDATA1, [IN + 1*16]
107 movdqa XKEY0, [KEYS + 0*16]
109 pxor XDATA0, XKEY0 ; 0. ARK
112 movdqa XKEY2, [KEYS + 2*16]
114 AES XDATA0, [KEYS + 1*16] ; 1. ENC
115 AES XDATA1, [KEYS + 1*16]
119 AES XDATA0, XKEY2 ; 2. ENC
122 movdqa XKEY4, [KEYS + 4*16]
124 AES XDATA0, [KEYS + 3*16] ; 3. ENC
125 AES XDATA1, [KEYS + 3*16]
127 AES XDATA0, XKEY4 ; 4. ENC
130 movdqa XKEY6, [KEYS + 6*16]
132 AES XDATA0, [KEYS + 5*16] ; 5. ENC
133 AES XDATA1, [KEYS + 5*16]
135 AES XDATA0, XKEY6 ; 6. ENC
138 movdqa XKEY_B, [KEYS + 8*16]
140 AES XDATA0, [KEYS + 7*16] ; 7. ENC
141 AES XDATA1, [KEYS + 7*16]
143 AES XDATA0, XKEY_B ; 8. ENC
146 movdqa XKEY10, [KEYS + 10*16]
148 AES XDATA0, [KEYS + 9*16] ; 9. ENC
149 AES XDATA1, [KEYS + 9*16]
152 AES XDATA0, XKEY10 ; 10. ENC
155 AES XDATA0, [KEYS + 11*16] ; 11. ENC
156 AES XDATA1, [KEYS + 11*16]
160 AES XDATA0, [KEYS + 12*16] ; 12. ENC
161 AES XDATA1, [KEYS + 12*16]
163 AES XDATA0, [KEYS + 13*16] ; 13. ENC
164 AES XDATA1, [KEYS + 13*16]
168 AES_LAST XDATA0, XKEY10 ; 10. ENC
169 AES_LAST XDATA1, XKEY10
170 %elif %%NROUNDS == 12
171 AES_LAST XDATA0, [KEYS + 12*16] ; 12. ENC
172 AES_LAST XDATA1, [KEYS + 12*16]
174 AES_LAST XDATA0, [KEYS + 14*16] ; 14. ENC
175 AES_LAST XDATA1, [KEYS + 14*16]
177 movdqu [OUT + 0*16], XDATA0
178 movdqu [OUT + 1*16], XDATA1
187 ; load plain/cipher text
188 movdqu XDATA0, [IN + 0*16]
190 movdqa XKEY0, [KEYS + 0*16]
192 pxor XDATA0, XKEY0 ; 0. ARK
194 movdqa XKEY2, [KEYS + 2*16]
196 AES XDATA0, [KEYS + 1*16] ; 1. ENC
200 AES XDATA0, XKEY2 ; 2. ENC
202 movdqa XKEY4, [KEYS + 4*16]
204 AES XDATA0, [KEYS + 3*16] ; 3. ENC
206 AES XDATA0, XKEY4 ; 4. ENC
208 movdqa XKEY6, [KEYS + 6*16]
210 AES XDATA0, [KEYS + 5*16] ; 5. ENC
212 AES XDATA0, XKEY6 ; 6. ENC
214 movdqa XKEY_B, [KEYS + 8*16]
216 AES XDATA0, [KEYS + 7*16] ; 7. ENC
218 AES XDATA0, XKEY_B ; 8. ENC
220 movdqa XKEY10, [KEYS + 10*16]
222 AES XDATA0, [KEYS + 9*16] ; 9. ENC
225 AES XDATA0, XKEY10 ; 10. ENC
227 AES XDATA0, [KEYS + 11*16] ; 11. ENC
231 AES XDATA0, [KEYS + 12*16] ; 12. ENC
233 AES XDATA0, [KEYS + 13*16] ; 13. ENC
238 AES_LAST XDATA0, XKEY10 ; 10. ENC
239 %elif %%NROUNDS == 12
240 AES_LAST XDATA0, [KEYS + 12*16] ; 12. ENC
242 AES_LAST XDATA0, [KEYS + 14*16] ; 14. ENC
245 movdqu [OUT + 0*16], XDATA0
253 ; load plain/cipher text
254 movdqu XDATA0, [IN + 0*16]
255 movdqu XDATA1, [IN + 1*16]
256 movdqu XDATA2, [IN + 2*16]
258 movdqa XKEY0, [KEYS + 0*16]
260 movdqa XKEY_A, [KEYS + 1*16]
262 pxor XDATA0, XKEY0 ; 0. ARK
266 movdqa XKEY2, [KEYS + 2*16]
268 AES XDATA0, XKEY_A ; 1. ENC
272 movdqa XKEY_A, [KEYS + 3*16]
275 AES XDATA0, XKEY2 ; 2. ENC
279 movdqa XKEY4, [KEYS + 4*16]
281 AES XDATA0, XKEY_A ; 3. ENC
285 movdqa XKEY_A, [KEYS + 5*16]
287 AES XDATA0, XKEY4 ; 4. ENC
291 movdqa XKEY6, [KEYS + 6*16]
293 AES XDATA0, XKEY_A ; 5. ENC
297 movdqa XKEY_A, [KEYS + 7*16]
299 AES XDATA0, XKEY6 ; 6. ENC
303 movdqa XKEY_B, [KEYS + 8*16]
305 AES XDATA0, XKEY_A ; 7. ENC
309 movdqa XKEY_A, [KEYS + 9*16]
311 AES XDATA0, XKEY_B ; 8. ENC
315 movdqa XKEY_B, [KEYS + 10*16]
317 AES XDATA0, XKEY_A ; 9. ENC
322 movdqa XKEY_A, [KEYS + 11*16]
324 AES XDATA0, XKEY_B ; 10. ENC
328 movdqa XKEY_B, [KEYS + 12*16]
330 AES XDATA0, XKEY_A ; 11. ENC
337 movdqa XKEY_A, [KEYS + 13*16]
339 AES XDATA0, XKEY_B ; 12. ENC
343 movdqa XKEY_B, [KEYS + 14*16]
345 AES XDATA0, XKEY_A ; 13. ENC
350 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
351 AES_LAST XDATA1, XKEY_B
352 AES_LAST XDATA2, XKEY_B
354 movdqu [OUT + 0*16], XDATA0
355 movdqu [OUT + 1*16], XDATA1
356 movdqu [OUT + 2*16], XDATA2
365 ; load plain/cipher text
366 movdqu XDATA0, [IN + 0*16]
367 movdqu XDATA1, [IN + 1*16]
368 movdqu XDATA2, [IN + 2*16]
369 movdqu XDATA3, [IN + 3*16]
371 movdqa XKEY0, [KEYS + 0*16]
373 movdqa XKEY_A, [KEYS + 1*16]
375 pxor XDATA0, XKEY0 ; 0. ARK
380 movdqa XKEY2, [KEYS + 2*16]
382 AES XDATA0, XKEY_A ; 1. ENC
387 movdqa XKEY_A, [KEYS + 3*16]
391 AES XDATA0, XKEY2 ; 2. ENC
396 movdqa XKEY4, [KEYS + 4*16]
398 AES XDATA0, XKEY_A ; 3. ENC
403 movdqa XKEY_A, [KEYS + 5*16]
405 AES XDATA0, XKEY4 ; 4. ENC
410 movdqa XKEY6, [KEYS + 6*16]
412 AES XDATA0, XKEY_A ; 5. ENC
417 movdqa XKEY_A, [KEYS + 7*16]
419 AES XDATA0, XKEY6 ; 6. ENC
424 movdqa XKEY_B, [KEYS + 8*16]
426 AES XDATA0, XKEY_A ; 7. ENC
431 movdqa XKEY_A, [KEYS + 9*16]
433 AES XDATA0, XKEY_B ; 8. ENC
438 movdqa XKEY_B, [KEYS + 10*16]
440 AES XDATA0, XKEY_A ; 9. ENC
446 movdqa XKEY_A, [KEYS + 11*16]
448 AES XDATA0, XKEY_B ; 10. ENC
453 movdqa XKEY_B, [KEYS + 12*16]
455 AES XDATA0, XKEY_A ; 11. ENC
462 movdqa XKEY_A, [KEYS + 13*16]
464 AES XDATA0, XKEY_B ; 12. ENC
469 movdqa XKEY_B, [KEYS + 14*16]
471 AES XDATA0, XKEY_A ; 13. ENC
477 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
478 AES_LAST XDATA1, XKEY_B
479 AES_LAST XDATA2, XKEY_B
480 AES_LAST XDATA3, XKEY_B
482 movdqu [OUT + 0*16], XDATA0
483 movdqu [OUT + 1*16], XDATA1
484 movdqu [OUT + 2*16], XDATA2
485 movdqu [OUT + 3*16], XDATA3
493 ; load plain/cipher text
494 movdqu XDATA0, [IN + IDX + 0*16]
495 movdqu XDATA1, [IN + IDX + 1*16]
496 movdqu XDATA2, [IN + IDX + 2*16]
497 movdqu XDATA3, [IN + IDX + 3*16]
499 movdqa XKEY_A, [KEYS + 1*16]
501 pxor XDATA0, XKEY0 ; 0. ARK
508 AES XDATA0, XKEY_A ; 1. ENC
513 movdqa XKEY_A, [KEYS + 3*16]
515 AES XDATA0, XKEY2 ; 2. ENC
520 AES XDATA0, XKEY_A ; 3. ENC
525 movdqa XKEY_A, [KEYS + 5*16]
527 AES XDATA0, XKEY4 ; 4. ENC
532 AES XDATA0, XKEY_A ; 5. ENC
537 movdqa XKEY_A, [KEYS + 7*16]
539 AES XDATA0, XKEY6 ; 6. ENC
544 movdqa XKEY_B, [KEYS + 8*16]
546 AES XDATA0, XKEY_A ; 7. ENC
551 movdqa XKEY_A, [KEYS + 9*16]
553 AES XDATA0, XKEY_B ; 8. ENC
558 movdqa XKEY_B, [KEYS + 10*16]
560 AES XDATA0, XKEY_A ; 9. ENC
566 movdqa XKEY_A, [KEYS + 11*16]
568 AES XDATA0, XKEY_B ; 10. ENC
573 movdqa XKEY_B, [KEYS + 12*16]
575 AES XDATA0, XKEY_A ; 11. ENC
582 movdqa XKEY_A, [KEYS + 13*16]
584 AES XDATA0, XKEY_B ; 12. ENC
589 movdqa XKEY_B, [KEYS + 14*16]
591 AES XDATA0, XKEY_A ; 13. ENC
597 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
598 AES_LAST XDATA1, XKEY_B
599 AES_LAST XDATA2, XKEY_B
600 AES_LAST XDATA3, XKEY_B
602 movdqu [OUT + IDX + 0*16 - 4*16], XDATA0
603 movdqu [OUT + IDX + 1*16 - 4*16], XDATA1
604 movdqu [OUT + IDX + 2*16 - 4*16], XDATA2
605 movdqu [OUT + IDX + 3*16 - 4*16], XDATA3
617 MKGLOBAL(AES_ECB_ENC_128,function,internal)
623 MKGLOBAL(AES_ECB_ENC_192,function,internal)
629 MKGLOBAL(AES_ECB_ENC_256,function,internal)
635 MKGLOBAL(AES_ECB_DEC_128,function,internal)
641 MKGLOBAL(AES_ECB_DEC_192,function,internal)
647 MKGLOBAL(AES_ECB_DEC_256,function,internal)
653 section .note.GNU-stack noalloc noexec nowrite progbits