]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/sse/aes_ecb_by4_sse.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / sse / aes_ecb_by4_sse.asm
1 ;;
2 ;; Copyright (c) 2019, Intel Corporation
3 ;;
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
6 ;;
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
15 ;;
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;
27
28 ; routine to do AES ECB encrypt/decrypt on 16n bytes doing AES by 4
29
30 ; XMM registers are clobbered. Saving/restoring must be done at a higher level
31
32 ; void aes_ecb_x_y_sse(void *in,
33 ; UINT128 keys[],
34 ; void *out,
35 ; UINT64 len_bytes);
36 ;
37 ; x = direction (enc/dec)
38 ; y = key size (128/192/256)
39 ; arg 1: IN: pointer to input (cipher text)
40 ; arg 2: KEYS: pointer to keys
41 ; arg 3: OUT: pointer to output (plain text)
42 ; arg 4: LEN: length in bytes (multiple of 16)
43 ;
44
45 %include "include/os.asm"
46
47 %ifndef AES_ECB_ENC_128
48 %define AES_ECB_ENC_128 aes_ecb_enc_128_sse
49 %define AES_ECB_ENC_192 aes_ecb_enc_192_sse
50 %define AES_ECB_ENC_256 aes_ecb_enc_256_sse
51 %define AES_ECB_DEC_128 aes_ecb_dec_128_sse
52 %define AES_ECB_DEC_192 aes_ecb_dec_192_sse
53 %define AES_ECB_DEC_256 aes_ecb_dec_256_sse
54 %endif
55
56 %ifdef LINUX
57 %define IN rdi
58 %define KEYS rsi
59 %define OUT rdx
60 %define LEN rcx
61 %else
62 %define IN rcx
63 %define KEYS rdx
64 %define OUT r8
65 %define LEN r9
66 %endif
67
68 %define IDX rax
69 %define TMP IDX
70 %define XDATA0 xmm0
71 %define XDATA1 xmm1
72 %define XDATA2 xmm2
73 %define XDATA3 xmm3
74 %define XKEY0 xmm4
75 %define XKEY2 xmm5
76 %define XKEY4 xmm6
77 %define XKEY6 xmm7
78 %define XKEY10 xmm8
79 %define XKEY_A xmm14
80 %define XKEY_B xmm15
81
82 section .text
83
84 %macro AES_ECB 2
85 %define %%NROUNDS %1 ; [in] Number of AES rounds, numerical value
86 %define %%DIR %2 ; [in] Direction (encrypt/decrypt)
87
88 %ifidn %%DIR, ENC
89 %define AES aesenc
90 %define AES_LAST aesenclast
91 %else ; DIR = DEC
92 %define AES aesdec
93 %define AES_LAST aesdeclast
94 %endif
95 mov TMP, LEN
96 and TMP, 3*16
97 jz %%initial_4
98 cmp TMP, 2*16
99 jb %%initial_1
100 ja %%initial_3
101
102 %%initial_2:
103 ; load plain/cipher text
104 movdqu XDATA0, [IN + 0*16]
105 movdqu XDATA1, [IN + 1*16]
106
107 movdqa XKEY0, [KEYS + 0*16]
108
109 pxor XDATA0, XKEY0 ; 0. ARK
110 pxor XDATA1, XKEY0
111
112 movdqa XKEY2, [KEYS + 2*16]
113
114 AES XDATA0, [KEYS + 1*16] ; 1. ENC
115 AES XDATA1, [KEYS + 1*16]
116
117 mov IDX, 2*16
118
119 AES XDATA0, XKEY2 ; 2. ENC
120 AES XDATA1, XKEY2
121
122 movdqa XKEY4, [KEYS + 4*16]
123
124 AES XDATA0, [KEYS + 3*16] ; 3. ENC
125 AES XDATA1, [KEYS + 3*16]
126
127 AES XDATA0, XKEY4 ; 4. ENC
128 AES XDATA1, XKEY4
129
130 movdqa XKEY6, [KEYS + 6*16]
131
132 AES XDATA0, [KEYS + 5*16] ; 5. ENC
133 AES XDATA1, [KEYS + 5*16]
134
135 AES XDATA0, XKEY6 ; 6. ENC
136 AES XDATA1, XKEY6
137
138 movdqa XKEY_B, [KEYS + 8*16]
139
140 AES XDATA0, [KEYS + 7*16] ; 7. ENC
141 AES XDATA1, [KEYS + 7*16]
142
143 AES XDATA0, XKEY_B ; 8. ENC
144 AES XDATA1, XKEY_B
145
146 movdqa XKEY10, [KEYS + 10*16]
147
148 AES XDATA0, [KEYS + 9*16] ; 9. ENC
149 AES XDATA1, [KEYS + 9*16]
150
151 %if %%NROUNDS >= 12
152 AES XDATA0, XKEY10 ; 10. ENC
153 AES XDATA1, XKEY10
154
155 AES XDATA0, [KEYS + 11*16] ; 11. ENC
156 AES XDATA1, [KEYS + 11*16]
157 %endif
158
159 %if %%NROUNDS == 14
160 AES XDATA0, [KEYS + 12*16] ; 12. ENC
161 AES XDATA1, [KEYS + 12*16]
162
163 AES XDATA0, [KEYS + 13*16] ; 13. ENC
164 AES XDATA1, [KEYS + 13*16]
165 %endif
166
167 %if %%NROUNDS == 10
168 AES_LAST XDATA0, XKEY10 ; 10. ENC
169 AES_LAST XDATA1, XKEY10
170 %elif %%NROUNDS == 12
171 AES_LAST XDATA0, [KEYS + 12*16] ; 12. ENC
172 AES_LAST XDATA1, [KEYS + 12*16]
173 %else
174 AES_LAST XDATA0, [KEYS + 14*16] ; 14. ENC
175 AES_LAST XDATA1, [KEYS + 14*16]
176 %endif
177 movdqu [OUT + 0*16], XDATA0
178 movdqu [OUT + 1*16], XDATA1
179
180 cmp LEN, 2*16
181 je %%done
182 jmp %%main_loop
183
184
185 align 16
186 %%initial_1:
187 ; load plain/cipher text
188 movdqu XDATA0, [IN + 0*16]
189
190 movdqa XKEY0, [KEYS + 0*16]
191
192 pxor XDATA0, XKEY0 ; 0. ARK
193
194 movdqa XKEY2, [KEYS + 2*16]
195
196 AES XDATA0, [KEYS + 1*16] ; 1. ENC
197
198 mov IDX, 1*16
199
200 AES XDATA0, XKEY2 ; 2. ENC
201
202 movdqa XKEY4, [KEYS + 4*16]
203
204 AES XDATA0, [KEYS + 3*16] ; 3. ENC
205
206 AES XDATA0, XKEY4 ; 4. ENC
207
208 movdqa XKEY6, [KEYS + 6*16]
209
210 AES XDATA0, [KEYS + 5*16] ; 5. ENC
211
212 AES XDATA0, XKEY6 ; 6. ENC
213
214 movdqa XKEY_B, [KEYS + 8*16]
215
216 AES XDATA0, [KEYS + 7*16] ; 7. ENC
217
218 AES XDATA0, XKEY_B ; 8. ENC
219
220 movdqa XKEY10, [KEYS + 10*16]
221
222 AES XDATA0, [KEYS + 9*16] ; 9. ENC
223
224 %if %%NROUNDS >= 12
225 AES XDATA0, XKEY10 ; 10. ENC
226
227 AES XDATA0, [KEYS + 11*16] ; 11. ENC
228 %endif
229
230 %if %%NROUNDS == 14
231 AES XDATA0, [KEYS + 12*16] ; 12. ENC
232
233 AES XDATA0, [KEYS + 13*16] ; 13. ENC
234 %endif
235
236 %if %%NROUNDS == 10
237
238 AES_LAST XDATA0, XKEY10 ; 10. ENC
239 %elif %%NROUNDS == 12
240 AES_LAST XDATA0, [KEYS + 12*16] ; 12. ENC
241 %else
242 AES_LAST XDATA0, [KEYS + 14*16] ; 14. ENC
243 %endif
244
245 movdqu [OUT + 0*16], XDATA0
246
247 cmp LEN, 1*16
248 je %%done
249 jmp %%main_loop
250
251
252 %%initial_3:
253 ; load plain/cipher text
254 movdqu XDATA0, [IN + 0*16]
255 movdqu XDATA1, [IN + 1*16]
256 movdqu XDATA2, [IN + 2*16]
257
258 movdqa XKEY0, [KEYS + 0*16]
259
260 movdqa XKEY_A, [KEYS + 1*16]
261
262 pxor XDATA0, XKEY0 ; 0. ARK
263 pxor XDATA1, XKEY0
264 pxor XDATA2, XKEY0
265
266 movdqa XKEY2, [KEYS + 2*16]
267
268 AES XDATA0, XKEY_A ; 1. ENC
269 AES XDATA1, XKEY_A
270 AES XDATA2, XKEY_A
271
272 movdqa XKEY_A, [KEYS + 3*16]
273 mov IDX, 3*16
274
275 AES XDATA0, XKEY2 ; 2. ENC
276 AES XDATA1, XKEY2
277 AES XDATA2, XKEY2
278
279 movdqa XKEY4, [KEYS + 4*16]
280
281 AES XDATA0, XKEY_A ; 3. ENC
282 AES XDATA1, XKEY_A
283 AES XDATA2, XKEY_A
284
285 movdqa XKEY_A, [KEYS + 5*16]
286
287 AES XDATA0, XKEY4 ; 4. ENC
288 AES XDATA1, XKEY4
289 AES XDATA2, XKEY4
290
291 movdqa XKEY6, [KEYS + 6*16]
292
293 AES XDATA0, XKEY_A ; 5. ENC
294 AES XDATA1, XKEY_A
295 AES XDATA2, XKEY_A
296
297 movdqa XKEY_A, [KEYS + 7*16]
298
299 AES XDATA0, XKEY6 ; 6. ENC
300 AES XDATA1, XKEY6
301 AES XDATA2, XKEY6
302
303 movdqa XKEY_B, [KEYS + 8*16]
304
305 AES XDATA0, XKEY_A ; 7. ENC
306 AES XDATA1, XKEY_A
307 AES XDATA2, XKEY_A
308
309 movdqa XKEY_A, [KEYS + 9*16]
310
311 AES XDATA0, XKEY_B ; 8. ENC
312 AES XDATA1, XKEY_B
313 AES XDATA2, XKEY_B
314
315 movdqa XKEY_B, [KEYS + 10*16]
316
317 AES XDATA0, XKEY_A ; 9. ENC
318 AES XDATA1, XKEY_A
319 AES XDATA2, XKEY_A
320
321 %if %%NROUNDS >= 12
322 movdqa XKEY_A, [KEYS + 11*16]
323
324 AES XDATA0, XKEY_B ; 10. ENC
325 AES XDATA1, XKEY_B
326 AES XDATA2, XKEY_B
327
328 movdqa XKEY_B, [KEYS + 12*16]
329
330 AES XDATA0, XKEY_A ; 11. ENC
331 AES XDATA1, XKEY_A
332 AES XDATA2, XKEY_A
333
334 %endif
335
336 %if %%NROUNDS == 14
337 movdqa XKEY_A, [KEYS + 13*16]
338
339 AES XDATA0, XKEY_B ; 12. ENC
340 AES XDATA1, XKEY_B
341 AES XDATA2, XKEY_B
342
343 movdqa XKEY_B, [KEYS + 14*16]
344
345 AES XDATA0, XKEY_A ; 13. ENC
346 AES XDATA1, XKEY_A
347 AES XDATA2, XKEY_A
348 %endif
349
350 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
351 AES_LAST XDATA1, XKEY_B
352 AES_LAST XDATA2, XKEY_B
353
354 movdqu [OUT + 0*16], XDATA0
355 movdqu [OUT + 1*16], XDATA1
356 movdqu [OUT + 2*16], XDATA2
357
358 cmp LEN, 3*16
359 je %%done
360 jmp %%main_loop
361
362
363 align 16
364 %%initial_4:
365 ; load plain/cipher text
366 movdqu XDATA0, [IN + 0*16]
367 movdqu XDATA1, [IN + 1*16]
368 movdqu XDATA2, [IN + 2*16]
369 movdqu XDATA3, [IN + 3*16]
370
371 movdqa XKEY0, [KEYS + 0*16]
372
373 movdqa XKEY_A, [KEYS + 1*16]
374
375 pxor XDATA0, XKEY0 ; 0. ARK
376 pxor XDATA1, XKEY0
377 pxor XDATA2, XKEY0
378 pxor XDATA3, XKEY0
379
380 movdqa XKEY2, [KEYS + 2*16]
381
382 AES XDATA0, XKEY_A ; 1. ENC
383 AES XDATA1, XKEY_A
384 AES XDATA2, XKEY_A
385 AES XDATA3, XKEY_A
386
387 movdqa XKEY_A, [KEYS + 3*16]
388
389 mov IDX, 4*16
390
391 AES XDATA0, XKEY2 ; 2. ENC
392 AES XDATA1, XKEY2
393 AES XDATA2, XKEY2
394 AES XDATA3, XKEY2
395
396 movdqa XKEY4, [KEYS + 4*16]
397
398 AES XDATA0, XKEY_A ; 3. ENC
399 AES XDATA1, XKEY_A
400 AES XDATA2, XKEY_A
401 AES XDATA3, XKEY_A
402
403 movdqa XKEY_A, [KEYS + 5*16]
404
405 AES XDATA0, XKEY4 ; 4. ENC
406 AES XDATA1, XKEY4
407 AES XDATA2, XKEY4
408 AES XDATA3, XKEY4
409
410 movdqa XKEY6, [KEYS + 6*16]
411
412 AES XDATA0, XKEY_A ; 5. ENC
413 AES XDATA1, XKEY_A
414 AES XDATA2, XKEY_A
415 AES XDATA3, XKEY_A
416
417 movdqa XKEY_A, [KEYS + 7*16]
418
419 AES XDATA0, XKEY6 ; 6. ENC
420 AES XDATA1, XKEY6
421 AES XDATA2, XKEY6
422 AES XDATA3, XKEY6
423
424 movdqa XKEY_B, [KEYS + 8*16]
425
426 AES XDATA0, XKEY_A ; 7. ENC
427 AES XDATA1, XKEY_A
428 AES XDATA2, XKEY_A
429 AES XDATA3, XKEY_A
430
431 movdqa XKEY_A, [KEYS + 9*16]
432
433 AES XDATA0, XKEY_B ; 8. ENC
434 AES XDATA1, XKEY_B
435 AES XDATA2, XKEY_B
436 AES XDATA3, XKEY_B
437
438 movdqa XKEY_B, [KEYS + 10*16]
439
440 AES XDATA0, XKEY_A ; 9. ENC
441 AES XDATA1, XKEY_A
442 AES XDATA2, XKEY_A
443 AES XDATA3, XKEY_A
444
445 %if %%NROUNDS >= 12
446 movdqa XKEY_A, [KEYS + 11*16]
447
448 AES XDATA0, XKEY_B ; 10. ENC
449 AES XDATA1, XKEY_B
450 AES XDATA2, XKEY_B
451 AES XDATA3, XKEY_B
452
453 movdqa XKEY_B, [KEYS + 12*16]
454
455 AES XDATA0, XKEY_A ; 11. ENC
456 AES XDATA1, XKEY_A
457 AES XDATA2, XKEY_A
458 AES XDATA3, XKEY_A
459 %endif
460
461 %if %%NROUNDS == 14
462 movdqa XKEY_A, [KEYS + 13*16]
463
464 AES XDATA0, XKEY_B ; 12. ENC
465 AES XDATA1, XKEY_B
466 AES XDATA2, XKEY_B
467 AES XDATA3, XKEY_B
468
469 movdqa XKEY_B, [KEYS + 14*16]
470
471 AES XDATA0, XKEY_A ; 13. ENC
472 AES XDATA1, XKEY_A
473 AES XDATA2, XKEY_A
474 AES XDATA3, XKEY_A
475 %endif
476
477 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
478 AES_LAST XDATA1, XKEY_B
479 AES_LAST XDATA2, XKEY_B
480 AES_LAST XDATA3, XKEY_B
481
482 movdqu [OUT + 0*16], XDATA0
483 movdqu [OUT + 1*16], XDATA1
484 movdqu [OUT + 2*16], XDATA2
485 movdqu [OUT + 3*16], XDATA3
486
487 cmp LEN, 4*16
488 jz %%done
489 jmp %%main_loop
490
491 align 16
492 %%main_loop:
493 ; load plain/cipher text
494 movdqu XDATA0, [IN + IDX + 0*16]
495 movdqu XDATA1, [IN + IDX + 1*16]
496 movdqu XDATA2, [IN + IDX + 2*16]
497 movdqu XDATA3, [IN + IDX + 3*16]
498
499 movdqa XKEY_A, [KEYS + 1*16]
500
501 pxor XDATA0, XKEY0 ; 0. ARK
502 pxor XDATA1, XKEY0
503 pxor XDATA2, XKEY0
504 pxor XDATA3, XKEY0
505
506 add IDX, 4*16
507
508 AES XDATA0, XKEY_A ; 1. ENC
509 AES XDATA1, XKEY_A
510 AES XDATA2, XKEY_A
511 AES XDATA3, XKEY_A
512
513 movdqa XKEY_A, [KEYS + 3*16]
514
515 AES XDATA0, XKEY2 ; 2. ENC
516 AES XDATA1, XKEY2
517 AES XDATA2, XKEY2
518 AES XDATA3, XKEY2
519
520 AES XDATA0, XKEY_A ; 3. ENC
521 AES XDATA1, XKEY_A
522 AES XDATA2, XKEY_A
523 AES XDATA3, XKEY_A
524
525 movdqa XKEY_A, [KEYS + 5*16]
526
527 AES XDATA0, XKEY4 ; 4. ENC
528 AES XDATA1, XKEY4
529 AES XDATA2, XKEY4
530 AES XDATA3, XKEY4
531
532 AES XDATA0, XKEY_A ; 5. ENC
533 AES XDATA1, XKEY_A
534 AES XDATA2, XKEY_A
535 AES XDATA3, XKEY_A
536
537 movdqa XKEY_A, [KEYS + 7*16]
538
539 AES XDATA0, XKEY6 ; 6. ENC
540 AES XDATA1, XKEY6
541 AES XDATA2, XKEY6
542 AES XDATA3, XKEY6
543
544 movdqa XKEY_B, [KEYS + 8*16]
545
546 AES XDATA0, XKEY_A ; 7. ENC
547 AES XDATA1, XKEY_A
548 AES XDATA2, XKEY_A
549 AES XDATA3, XKEY_A
550
551 movdqa XKEY_A, [KEYS + 9*16]
552
553 AES XDATA0, XKEY_B ; 8. ENC
554 AES XDATA1, XKEY_B
555 AES XDATA2, XKEY_B
556 AES XDATA3, XKEY_B
557
558 movdqa XKEY_B, [KEYS + 10*16]
559
560 AES XDATA0, XKEY_A ; 9. ENC
561 AES XDATA1, XKEY_A
562 AES XDATA2, XKEY_A
563 AES XDATA3, XKEY_A
564
565 %if %%NROUNDS >= 12
566 movdqa XKEY_A, [KEYS + 11*16]
567
568 AES XDATA0, XKEY_B ; 10. ENC
569 AES XDATA1, XKEY_B
570 AES XDATA2, XKEY_B
571 AES XDATA3, XKEY_B
572
573 movdqa XKEY_B, [KEYS + 12*16]
574
575 AES XDATA0, XKEY_A ; 11. ENC
576 AES XDATA1, XKEY_A
577 AES XDATA2, XKEY_A
578 AES XDATA3, XKEY_A
579 %endif
580
581 %if %%NROUNDS == 14
582 movdqa XKEY_A, [KEYS + 13*16]
583
584 AES XDATA0, XKEY_B ; 12. ENC
585 AES XDATA1, XKEY_B
586 AES XDATA2, XKEY_B
587 AES XDATA3, XKEY_B
588
589 movdqa XKEY_B, [KEYS + 14*16]
590
591 AES XDATA0, XKEY_A ; 13. ENC
592 AES XDATA1, XKEY_A
593 AES XDATA2, XKEY_A
594 AES XDATA3, XKEY_A
595 %endif
596
597 AES_LAST XDATA0, XKEY_B ; 10/12/14. ENC (depending on key size)
598 AES_LAST XDATA1, XKEY_B
599 AES_LAST XDATA2, XKEY_B
600 AES_LAST XDATA3, XKEY_B
601
602 movdqu [OUT + IDX + 0*16 - 4*16], XDATA0
603 movdqu [OUT + IDX + 1*16 - 4*16], XDATA1
604 movdqu [OUT + IDX + 2*16 - 4*16], XDATA2
605 movdqu [OUT + IDX + 3*16 - 4*16], XDATA3
606
607 cmp IDX, LEN
608 jne %%main_loop
609
610 %%done:
611
612 ret
613
614 %endmacro
615
616 align 16
617 MKGLOBAL(AES_ECB_ENC_128,function,internal)
618 AES_ECB_ENC_128:
619
620 AES_ECB 10, ENC
621
622 align 16
623 MKGLOBAL(AES_ECB_ENC_192,function,internal)
624 AES_ECB_ENC_192:
625
626 AES_ECB 12, ENC
627
628 align 16
629 MKGLOBAL(AES_ECB_ENC_256,function,internal)
630 AES_ECB_ENC_256:
631
632 AES_ECB 14, ENC
633
634 align 16
635 MKGLOBAL(AES_ECB_DEC_128,function,internal)
636 AES_ECB_DEC_128:
637
638 AES_ECB 10, DEC
639
640 align 16
641 MKGLOBAL(AES_ECB_DEC_192,function,internal)
642 AES_ECB_DEC_192:
643
644 AES_ECB 12, DEC
645
646 align 16
647 MKGLOBAL(AES_ECB_DEC_256,function,internal)
648 AES_ECB_DEC_256:
649
650 AES_ECB 14, DEC
651
652 %ifdef LINUX
653 section .note.GNU-stack noalloc noexec nowrite progbits
654 %endif