]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / avx512 / aes_cbc_dec_vaes_avx512.asm
CommitLineData
f67539c2
TL
1;;
2;; Copyright (c) 2019, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;; * Redistributions of source code must retain the above copyright notice,
8;; this list of conditions and the following disclaimer.
9;; * Redistributions in binary form must reproduce the above copyright
10;; notice, this list of conditions and the following disclaimer in the
11;; documentation and/or other materials provided with the distribution.
12;; * Neither the name of Intel Corporation nor the names of its contributors
13;; may be used to endorse or promote products derived from this software
14;; without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28%include "include/os.asm"
29%include "include/reg_sizes.asm"
30%include "include/aes_common.asm"
31
32%define zIV zmm0
33%define zBLK_0_3 zmm1
34%define zBLK_4_7 zmm2
35%define zBLK_8_11 zmm3
36%define zBLK_12_15 zmm4
37%define zTMP0 zmm5
38%define zTMP1 zmm6
39%define zTMP2 zmm7
40%define zTMP3 zmm8
41
42%define ZKEY0 zmm17
43%define ZKEY1 zmm18
44%define ZKEY2 zmm19
45%define ZKEY3 zmm20
46%define ZKEY4 zmm21
47%define ZKEY5 zmm22
48%define ZKEY6 zmm23
49%define ZKEY7 zmm24
50%define ZKEY8 zmm25
51%define ZKEY9 zmm26
52%define ZKEY10 zmm27
53%define ZKEY11 zmm28
54%define ZKEY12 zmm29
55%define ZKEY13 zmm30
56%define ZKEY14 zmm31
57
58%ifdef LINUX
59%define p_in rdi
60%define p_IV rsi
61%define p_keys rdx
62%define p_out rcx
63%define num_bytes r8
64%else
65%define p_in rcx
66%define p_IV rdx
67%define p_keys r8
68%define p_out r9
69%define num_bytes rax
70%endif
71
72%define tmp r10
73
74;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75;;; macro to preload keys
76;;; - uses ZKEY[0-14] registers (ZMM)
77%macro LOAD_KEYS 2
78%define %%KEYS %1 ; [in] key pointer
79%define %%NROUNDS %2 ; [in] numerical value, number of AES rounds
80 ; excluding 1st and last rounds.
81 ; Example: AES-128 -> value 9
82
83%assign i 0
84%rep (%%NROUNDS + 2)
85 vbroadcastf64x2 ZKEY %+ i, [%%KEYS + 16*i]
86%assign i (i + 1)
87%endrep
88
89%endmacro
90
91;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
92;;; This macro is used to "cool down" pipeline after DECRYPT_16_PARALLEL macro
93;;; code as the number of final blocks is variable.
94;;; Processes the last %%num_final_blocks blocks (1 to 15, can't be 0)
95
96%macro FINAL_BLOCKS 14
97%define %%PLAIN_OUT %1 ; [in] output buffer
98%define %%CIPH_IN %2 ; [in] input buffer
99%define %%LAST_CIPH_BLK %3 ; [in/out] ZMM with IV/last cipher blk (in idx 3)
100%define %%num_final_blocks %4 ; [in] numerical value (1 - 15)
101%define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks
102%define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks
103%define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks
104%define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks
105%define %%ZT1 %9 ; [clobbered] ZMM temporary
106%define %%ZT2 %10 ; [clobbered] ZMM temporary
107%define %%ZT3 %11 ; [clobbered] ZMM temporary
108%define %%ZT4 %12 ; [clobbered] ZMM temporary
109%define %%IA0 %13 ; [clobbered] GP temporary
110%define %%NROUNDS %14 ; [in] number of rounds; numerical value
111
112 ;; load plain/cipher text
113 ZMM_LOAD_BLOCKS_0_16 %%num_final_blocks, %%CIPH_IN, 0, \
114 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
115 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
116
117 ;; Prepare final cipher text blocks to
118 ;; be XOR'd later after AESDEC
119 valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6
120%if %%num_final_blocks > 4
121 valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6
122%endif
123%if %%num_final_blocks > 8
124 valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6
125%endif
126%if %%num_final_blocks > 12
127 valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6
128%endif
129
130 ;; Update IV with last cipher block
131 ;; to be used later in DECRYPT_16_PARALLEL
132%if %%num_final_blocks == 1
133 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 2
134%elif %%num_final_blocks == 2
135 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 4
136%elif %%num_final_blocks == 3
137 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 6
138%elif %%num_final_blocks == 4
139 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3
140%elif %%num_final_blocks == 5
141 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 2
142%elif %%num_final_blocks == 6
143 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 4
144%elif %%num_final_blocks == 7
145 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 6
146%elif %%num_final_blocks == 8
147 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7
148%elif %%num_final_blocks == 9
149 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 2
150%elif %%num_final_blocks == 10
151 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 4
152%elif %%num_final_blocks == 11
153 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 6
154%elif %%num_final_blocks == 12
155 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11
156%elif %%num_final_blocks == 13
157 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 2
158%elif %%num_final_blocks == 14
159 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 4
160%elif %%num_final_blocks == 15
161 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 6
162%endif
163
164 ;; AES rounds
165%assign j 0
166%rep (%%NROUNDS + 2)
167 ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
168 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \
169 ZKEY %+ j, j, no_data, no_data, no_data, no_data, \
170 %%num_final_blocks, %%NROUNDS
171%assign j (j + 1)
172%endrep
173
174 ;; XOR with decrypted blocks to get plain text
175 vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1
176%if %%num_final_blocks > 4
177 vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2
178%endif
179%if %%num_final_blocks > 8
180 vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3
181%endif
182%if %%num_final_blocks > 12
183 vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4
184%endif
185
186 ;; write plain text back to output
187 ZMM_STORE_BLOCKS_0_16 %%num_final_blocks, %%PLAIN_OUT, 0, \
188 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
189 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
190
191%endmacro ; FINAL_BLOCKS
192
193;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
194;;; Main AES-CBC decrypt macro
195;;; - operates on single stream
196;;; - decrypts 16 blocks at a time
197%macro DECRYPT_16_PARALLEL 14
198%define %%PLAIN_OUT %1 ; [in] output buffer
199%define %%CIPH_IN %2 ; [in] input buffer
200%define %%LENGTH %3 ; [in/out] number of bytes to process
201%define %%LAST_CIPH_BLK %4 ; [in/out] ZMM with IV (first block) or last cipher block (idx 3)
202%define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks
203%define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks
204%define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks
205%define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks
206%define %%ZT1 %9 ; [clobbered] ZMM temporary
207%define %%ZT2 %10 ; [clobbered] ZMM temporary
208%define %%ZT3 %11 ; [clobbered] ZMM temporary
209%define %%ZT4 %12 ; [clobbered] ZMM temporary
210%define %%NROUNDS %13 ; [in] number of rounds; numerical value
211%define %%IA0 %14 ; [clobbered] GP temporary
212
213 vmovdqu8 %%CIPHER_PLAIN_0_3, [%%CIPH_IN]
214 vmovdqu8 %%CIPHER_PLAIN_4_7, [%%CIPH_IN + 64]
215 vmovdqu8 %%CIPHER_PLAIN_8_11, [%%CIPH_IN + 128]
216 vmovdqu8 %%CIPHER_PLAIN_12_15, [%%CIPH_IN + 192]
217
218 ;; prepare first set of cipher blocks for later XOR'ing
219 valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6
220 valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6
221 valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6
222 valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6
223
224 ;; store last cipher text block to be used for next 16 blocks
225 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15
226
227 ;; AES rounds
228%assign j 0
229%rep (%%NROUNDS + 2)
230 ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
231 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \
232 ZKEY %+ j, j, no_data, no_data, no_data, no_data, \
233 16, %%NROUNDS
234%assign j (j + 1)
235%endrep
236
237 ;; XOR with decrypted blocks to get plain text
238 vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1
239 vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2
240 vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3
241 vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4
242
243 ;; write plain text back to output
244 vmovdqu8 [%%PLAIN_OUT], %%CIPHER_PLAIN_0_3
245 vmovdqu8 [%%PLAIN_OUT + 64], %%CIPHER_PLAIN_4_7
246 vmovdqu8 [%%PLAIN_OUT + 128], %%CIPHER_PLAIN_8_11
247 vmovdqu8 [%%PLAIN_OUT + 192], %%CIPHER_PLAIN_12_15
248
249 ;; adjust input pointer and length
250 sub %%LENGTH, (16 * 16)
251 add %%CIPH_IN, (16 * 16)
252 add %%PLAIN_OUT, (16 * 16)
253
254%endmacro ; DECRYPT_16_PARALLEL
255
256;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
257;;; AES_CBC_DEC macro decrypts given data.
258;;; Flow:
259;;; - Decrypt all blocks (multiple of 16) up to final 1-15 blocks
260;;; - Decrypt final blocks (1-15 blocks)
261;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
262%macro AES_CBC_DEC 7
263%define %%CIPH_IN %1 ;; [in] pointer to input buffer
264%define %%PLAIN_OUT %2 ;; [in] pointer to output buffer
265%define %%KEYS %3 ;; [in] pointer to expanded keys
266%define %%IV %4 ;; [in] pointer to IV
267%define %%LENGTH %5 ;; [in/out] GP register with length in bytes
268%define %%NROUNDS %6 ;; [in] Number of AES rounds; numerical value
269%define %%TMP %7 ;; [clobbered] GP register
270
271 cmp %%LENGTH, 0
272 je %%cbc_dec_done
273
274 vinserti64x2 zIV, zIV, [%%IV], 3
275
276 ;; preload keys
277 LOAD_KEYS %%KEYS, %%NROUNDS
278
279%%decrypt_16_parallel:
280 cmp %%LENGTH, 256
281 jb %%final_blocks
282
283 DECRYPT_16_PARALLEL %%PLAIN_OUT, %%CIPH_IN, %%LENGTH, zIV, \
284 zBLK_0_3, zBLK_4_7, zBLK_8_11, zBLK_12_15, \
285 zTMP0, zTMP1, zTMP2, zTMP3, %%NROUNDS, %%TMP
286 jmp %%decrypt_16_parallel
287
288%%final_blocks:
289 ;; get num final blocks
290 shr %%LENGTH, 4
291 and %%LENGTH, 0xf
292 je %%cbc_dec_done
293
294 cmp %%LENGTH, 8
295 je %%final_num_blocks_is_8
296 jl %%final_blocks_is_1_7
297
298 ; Final blocks 9-15
299 cmp %%LENGTH, 12
300 je %%final_num_blocks_is_12
301 jl %%final_blocks_is_9_11
302
303 ; Final blocks 13-15
304 cmp %%LENGTH, 15
305 je %%final_num_blocks_is_15
306 cmp %%LENGTH, 14
307 je %%final_num_blocks_is_14
308 cmp %%LENGTH, 13
309 je %%final_num_blocks_is_13
310
311%%final_blocks_is_9_11:
312 cmp %%LENGTH, 11
313 je %%final_num_blocks_is_11
314 cmp %%LENGTH, 10
315 je %%final_num_blocks_is_10
316 cmp %%LENGTH, 9
317 je %%final_num_blocks_is_9
318
319%%final_blocks_is_1_7:
320 cmp %%LENGTH, 4
321 je %%final_num_blocks_is_4
322 jl %%final_blocks_is_1_3
323
324 ; Final blocks 5-7
325 cmp %%LENGTH, 7
326 je %%final_num_blocks_is_7
327 cmp %%LENGTH, 6
328 je %%final_num_blocks_is_6
329 cmp %%LENGTH, 5
330 je %%final_num_blocks_is_5
331
332%%final_blocks_is_1_3:
333 cmp %%LENGTH, 3
334 je %%final_num_blocks_is_3
335 cmp %%LENGTH, 2
336 je %%final_num_blocks_is_2
337 jmp %%final_num_blocks_is_1
338
339
340%%final_num_blocks_is_15:
341 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 15, zBLK_0_3, zBLK_4_7, \
342 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
343 %%TMP, %%NROUNDS
344 jmp %%cbc_dec_done
345
346%%final_num_blocks_is_14:
347 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 14, zBLK_0_3, zBLK_4_7, \
348 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
349 %%TMP, %%NROUNDS
350 jmp %%cbc_dec_done
351
352%%final_num_blocks_is_13:
353 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 13, zBLK_0_3, zBLK_4_7, \
354 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
355 %%TMP, %%NROUNDS
356 jmp %%cbc_dec_done
357
358%%final_num_blocks_is_12:
359 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 12, zBLK_0_3, zBLK_4_7, \
360 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
361 %%TMP, %%NROUNDS
362 jmp %%cbc_dec_done
363
364%%final_num_blocks_is_11:
365 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 11, zBLK_0_3, zBLK_4_7, \
366 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
367 %%TMP, %%NROUNDS
368 jmp %%cbc_dec_done
369
370%%final_num_blocks_is_10:
371 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 10, zBLK_0_3, zBLK_4_7, \
372 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
373 %%TMP, %%NROUNDS
374 jmp %%cbc_dec_done
375
376%%final_num_blocks_is_9:
377 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 9, zBLK_0_3, zBLK_4_7, \
378 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
379 %%TMP, %%NROUNDS
380 jmp %%cbc_dec_done
381
382%%final_num_blocks_is_8:
383 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 8, zBLK_0_3, zBLK_4_7, \
384 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
385 %%TMP, %%NROUNDS
386 jmp %%cbc_dec_done
387
388%%final_num_blocks_is_7:
389 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 7, zBLK_0_3, zBLK_4_7, \
390 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
391 %%TMP, %%NROUNDS
392 jmp %%cbc_dec_done
393
394%%final_num_blocks_is_6:
395 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 6, zBLK_0_3, zBLK_4_7, \
396 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
397 %%TMP, %%NROUNDS
398 jmp %%cbc_dec_done
399
400%%final_num_blocks_is_5:
401 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 5, zBLK_0_3, zBLK_4_7, \
402 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
403 %%TMP, %%NROUNDS
404 jmp %%cbc_dec_done
405
406%%final_num_blocks_is_4:
407 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 4, zBLK_0_3, zBLK_4_7, \
408 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
409 %%TMP, %%NROUNDS
410 jmp %%cbc_dec_done
411
412%%final_num_blocks_is_3:
413 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 3, zBLK_0_3, zBLK_4_7, \
414 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
415 %%TMP, %%NROUNDS
416 jmp %%cbc_dec_done
417
418%%final_num_blocks_is_2:
419 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 2, zBLK_0_3, zBLK_4_7, \
420 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
421 %%TMP, %%NROUNDS
422 jmp %%cbc_dec_done
423
424%%final_num_blocks_is_1:
425 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 1, zBLK_0_3, zBLK_4_7, \
426 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
427 %%TMP, %%NROUNDS
428
429%%cbc_dec_done:
430%endmacro
431
432;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
433;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
434;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
435
436section .text
437
438;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
439;; aes_cbc_dec_128_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
440;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
441MKGLOBAL(aes_cbc_dec_128_vaes_avx512,function,internal)
442aes_cbc_dec_128_vaes_avx512:
443%ifndef LINUX
444 mov num_bytes, [rsp + 8*5]
445%endif
446 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 9, tmp
447
448 ret
449
450;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
451;; aes_cbc_dec_192_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
452;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
453MKGLOBAL(aes_cbc_dec_192_vaes_avx512,function,internal)
454aes_cbc_dec_192_vaes_avx512:
455%ifndef LINUX
456 mov num_bytes, [rsp + 8*5]
457%endif
458 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 11, tmp
459
460 ret
461
462;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
463;; aes_cbc_dec_256_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
464;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
465MKGLOBAL(aes_cbc_dec_256_vaes_avx512,function,internal)
466aes_cbc_dec_256_vaes_avx512:
467%ifndef LINUX
468 mov num_bytes, [rsp + 8*5]
469%endif
470 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 13, tmp
471
472 ret
473
474%ifdef LINUX
475section .note.GNU-stack noalloc noexec nowrite progbits
476%endif
477