]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_vaes_avx512.asm
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / aes / cbc_dec_vaes_avx512.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2019-2021 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "aes_common.asm"
31 %include "reg_sizes.asm"
32
33 %if (AS_FEATURE_LEVEL) >= 10
34
35 [bits 64]
36 default rel
37
38 %define zIV zmm0
39 %define zBLK_0_3 zmm1
40 %define zBLK_4_7 zmm2
41 %define zBLK_8_11 zmm3
42 %define zBLK_12_15 zmm4
43 %define zTMP0 zmm5
44 %define zTMP1 zmm6
45 %define zTMP2 zmm7
46 %define zTMP3 zmm8
47
48 %define ZKEY0 zmm17
49 %define ZKEY1 zmm18
50 %define ZKEY2 zmm19
51 %define ZKEY3 zmm20
52 %define ZKEY4 zmm21
53 %define ZKEY5 zmm22
54 %define ZKEY6 zmm23
55 %define ZKEY7 zmm24
56 %define ZKEY8 zmm25
57 %define ZKEY9 zmm26
58 %define ZKEY10 zmm27
59 %define ZKEY11 zmm28
60 %define ZKEY12 zmm29
61 %define ZKEY13 zmm30
62 %define ZKEY14 zmm31
63
64 %ifidn __OUTPUT_FORMAT__, elf64
65 %define p_in rdi
66 %define p_IV rsi
67 %define p_keys rdx
68 %define p_out rcx
69 %define num_bytes r8
70 %else
71 %define p_in rcx
72 %define p_IV rdx
73 %define p_keys r8
74 %define p_out r9
75 %define num_bytes rax
76 %endif
77
78 %define tmp r10
79 %define tmp2 r11
80
81 %ifdef CBCS
82 %define OFFSET 160
83 %else
84 %define OFFSET 16
85 %endif
86
87 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88 ;;; macro to preload keys
89 ;;; - uses ZKEY[0-14] registers (ZMM)
90 %macro LOAD_KEYS 2
91 %define %%KEYS %1 ; [in] key pointer
92 %define %%NROUNDS %2 ; [in] numerical value, number of AES rounds
93 ; excluding 1st and last rounds.
94 ; Example: AES-128 -> value 9
95
96 %assign i 0
97 %rep (%%NROUNDS + 2)
98 vbroadcastf64x2 ZKEY %+ i, [%%KEYS + 16*i]
99 %assign i (i + 1)
100 %endrep
101
102 %endmacro
103
104 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
105 ;;; This macro is used to "cool down" pipeline after DECRYPT_16_PARALLEL macro
106 ;;; code as the number of final blocks is variable.
107 ;;; Processes the last %%num_final_blocks blocks (1 to 15, can't be 0)
108
109 %macro FINAL_BLOCKS 14
110 %define %%PLAIN_OUT %1 ; [in] output buffer
111 %define %%CIPH_IN %2 ; [in] input buffer
112 %define %%LAST_CIPH_BLK %3 ; [in/out] ZMM with IV/last cipher blk (in idx 3)
113 %define %%num_final_blocks %4 ; [in] numerical value (1 - 15)
114 %define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks
115 %define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks
116 %define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks
117 %define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks
118 %define %%ZT1 %9 ; [clobbered] ZMM temporary
119 %define %%ZT2 %10 ; [clobbered] ZMM temporary
120 %define %%ZT3 %11 ; [clobbered] ZMM temporary
121 %define %%ZT4 %12 ; [clobbered] ZMM temporary
122 %define %%IA0 %13 ; [clobbered] GP temporary
123 %define %%NROUNDS %14 ; [in] number of rounds; numerical value
124
125 ;; load plain/cipher text
126 %ifdef CBCS
127 ZMM_LOAD_BLOCKS_0_16_OFFSET %%num_final_blocks, %%CIPH_IN, \
128 OFFSET, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
129 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
130 %else
131 ZMM_LOAD_BLOCKS_0_16 %%num_final_blocks, %%CIPH_IN, 0, \
132 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
133 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
134 %endif
135 ;; Prepare final cipher text blocks to
136 ;; be XOR'd later after AESDEC
137 valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6
138 %if %%num_final_blocks > 4
139 valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6
140 %endif
141 %if %%num_final_blocks > 8
142 valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6
143 %endif
144 %if %%num_final_blocks > 12
145 valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6
146 %endif
147
148 ;; Update IV with last cipher block
149 ;; to be used later in DECRYPT_16_PARALLEL
150 %if %%num_final_blocks == 1
151 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 2
152 %elif %%num_final_blocks == 2
153 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 4
154 %elif %%num_final_blocks == 3
155 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 6
156 %elif %%num_final_blocks == 4
157 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3
158 %elif %%num_final_blocks == 5
159 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 2
160 %elif %%num_final_blocks == 6
161 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 4
162 %elif %%num_final_blocks == 7
163 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 6
164 %elif %%num_final_blocks == 8
165 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7
166 %elif %%num_final_blocks == 9
167 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 2
168 %elif %%num_final_blocks == 10
169 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 4
170 %elif %%num_final_blocks == 11
171 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 6
172 %elif %%num_final_blocks == 12
173 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11
174 %elif %%num_final_blocks == 13
175 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 2
176 %elif %%num_final_blocks == 14
177 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 4
178 %elif %%num_final_blocks == 15
179 valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 6
180 %endif
181
182 ;; AES rounds
183 %assign j 0
184 %rep (%%NROUNDS + 2)
185 ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
186 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \
187 ZKEY %+ j, j, no_data, no_data, no_data, no_data, \
188 %%num_final_blocks, %%NROUNDS
189 %assign j (j + 1)
190 %endrep
191
192 ;; XOR with decrypted blocks to get plain text
193 vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1
194 %if %%num_final_blocks > 4
195 vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2
196 %endif
197 %if %%num_final_blocks > 8
198 vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3
199 %endif
200 %if %%num_final_blocks > 12
201 vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4
202 %endif
203
204 ;; write plain text back to output
205 %ifdef CBCS
206 ZMM_STORE_BLOCKS_0_16_OFFSET %%num_final_blocks, %%PLAIN_OUT, \
207 OFFSET, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
208 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
209 %else
210 ZMM_STORE_BLOCKS_0_16 %%num_final_blocks, %%PLAIN_OUT, 0, \
211 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
212 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
213 %endif
214
215 %endmacro ; FINAL_BLOCKS
216
217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
218 ;;; Main AES-CBC decrypt macro
219 ;;; - operates on single stream
220 ;;; - decrypts 16 blocks at a time
221 %macro DECRYPT_16_PARALLEL 14
222 %define %%PLAIN_OUT %1 ; [in] output buffer
223 %define %%CIPH_IN %2 ; [in] input buffer
224 %define %%LENGTH %3 ; [in/out] number of bytes to process
225 %define %%LAST_CIPH_BLK %4 ; [in/out] ZMM with IV (first block) or last cipher block (idx 3)
226 %define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks
227 %define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks
228 %define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks
229 %define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks
230 %define %%ZT1 %9 ; [clobbered] ZMM temporary
231 %define %%ZT2 %10 ; [clobbered] ZMM temporary
232 %define %%ZT3 %11 ; [clobbered] ZMM temporary
233 %define %%ZT4 %12 ; [clobbered] ZMM temporary
234 %define %%NROUNDS %13 ; [in] number of rounds; numerical value
235 %define %%IA0 %14 ; [clobbered] GP temporary
236
237 %ifdef CBCS
238 ZMM_LOAD_BLOCKS_0_16_OFFSET 16, %%CIPH_IN, OFFSET, \
239 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
240 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
241 %else
242 vmovdqu8 %%CIPHER_PLAIN_0_3, [%%CIPH_IN]
243 vmovdqu8 %%CIPHER_PLAIN_4_7, [%%CIPH_IN + 64]
244 vmovdqu8 %%CIPHER_PLAIN_8_11, [%%CIPH_IN + 128]
245 vmovdqu8 %%CIPHER_PLAIN_12_15, [%%CIPH_IN + 192]
246 %endif
247 ;; prepare first set of cipher blocks for later XOR'ing
248 valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6
249 valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6
250 valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6
251 valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6
252
253 ;; store last cipher text block to be used for next 16 blocks
254 vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15
255
256 ;; AES rounds
257 %assign j 0
258 %rep (%%NROUNDS + 2)
259 ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
260 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \
261 ZKEY %+ j, j, no_data, no_data, no_data, no_data, \
262 16, %%NROUNDS
263 %assign j (j + 1)
264 %endrep
265
266 ;; XOR with decrypted blocks to get plain text
267 vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1
268 vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2
269 vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3
270 vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4
271
272 ;; write plain text back to output
273 %ifdef CBCS
274 ZMM_STORE_BLOCKS_0_16_OFFSET 16, %%PLAIN_OUT, OFFSET, \
275 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \
276 %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15
277 %else
278 vmovdqu8 [%%PLAIN_OUT], %%CIPHER_PLAIN_0_3
279 vmovdqu8 [%%PLAIN_OUT + 64], %%CIPHER_PLAIN_4_7
280 vmovdqu8 [%%PLAIN_OUT + 128], %%CIPHER_PLAIN_8_11
281 vmovdqu8 [%%PLAIN_OUT + 192], %%CIPHER_PLAIN_12_15
282 %endif
283 ;; adjust input pointer and length
284 sub %%LENGTH, (16 * 16)
285 add %%CIPH_IN, (16 * OFFSET)
286 add %%PLAIN_OUT, (16 * OFFSET)
287
288 %endmacro ; DECRYPT_16_PARALLEL
289
290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
291 ;;; AES_CBC_DEC macro decrypts given data.
292 ;;; Flow:
293 ;;; - Decrypt all blocks (multiple of 16) up to final 1-15 blocks
294 ;;; - Decrypt final blocks (1-15 blocks)
295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
296 %macro AES_CBC_DEC 7
297 %define %%CIPH_IN %1 ;; [in] pointer to input buffer
298 %define %%PLAIN_OUT %2 ;; [in] pointer to output buffer
299 %define %%KEYS %3 ;; [in] pointer to expanded keys
300 %define %%IV %4 ;; [in] pointer to IV
301 %define %%LENGTH %5 ;; [in/out] GP register with length in bytes
302 %define %%NROUNDS %6 ;; [in] Number of AES rounds; numerical value
303 %define %%TMP %7 ;; [clobbered] GP register
304
305 cmp %%LENGTH, 0
306 je %%cbc_dec_done
307
308 vinserti64x2 zIV, zIV, [%%IV], 3
309
310 ;; preload keys
311 LOAD_KEYS %%KEYS, %%NROUNDS
312
313 %%decrypt_16_parallel:
314 cmp %%LENGTH, 256
315 jb %%final_blocks
316
317 DECRYPT_16_PARALLEL %%PLAIN_OUT, %%CIPH_IN, %%LENGTH, zIV, \
318 zBLK_0_3, zBLK_4_7, zBLK_8_11, zBLK_12_15, \
319 zTMP0, zTMP1, zTMP2, zTMP3, %%NROUNDS, %%TMP
320 jmp %%decrypt_16_parallel
321
322 %%final_blocks:
323 ;; get num final blocks
324 shr %%LENGTH, 4
325 and %%LENGTH, 0xf
326 je %%cbc_dec_done
327
328 cmp %%LENGTH, 8
329 je %%final_num_blocks_is_8
330 jl %%final_blocks_is_1_7
331
332 ; Final blocks 9-15
333 cmp %%LENGTH, 12
334 je %%final_num_blocks_is_12
335 jl %%final_blocks_is_9_11
336
337 ; Final blocks 13-15
338 cmp %%LENGTH, 15
339 je %%final_num_blocks_is_15
340 cmp %%LENGTH, 14
341 je %%final_num_blocks_is_14
342 cmp %%LENGTH, 13
343 je %%final_num_blocks_is_13
344
345 %%final_blocks_is_9_11:
346 cmp %%LENGTH, 11
347 je %%final_num_blocks_is_11
348 cmp %%LENGTH, 10
349 je %%final_num_blocks_is_10
350 cmp %%LENGTH, 9
351 je %%final_num_blocks_is_9
352
353 %%final_blocks_is_1_7:
354 cmp %%LENGTH, 4
355 je %%final_num_blocks_is_4
356 jl %%final_blocks_is_1_3
357
358 ; Final blocks 5-7
359 cmp %%LENGTH, 7
360 je %%final_num_blocks_is_7
361 cmp %%LENGTH, 6
362 je %%final_num_blocks_is_6
363 cmp %%LENGTH, 5
364 je %%final_num_blocks_is_5
365
366 %%final_blocks_is_1_3:
367 cmp %%LENGTH, 3
368 je %%final_num_blocks_is_3
369 cmp %%LENGTH, 2
370 je %%final_num_blocks_is_2
371 jmp %%final_num_blocks_is_1
372
373
374 %%final_num_blocks_is_15:
375 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 15, zBLK_0_3, zBLK_4_7, \
376 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
377 %%TMP, %%NROUNDS
378 jmp %%cbc_dec_done
379
380 %%final_num_blocks_is_14:
381 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 14, zBLK_0_3, zBLK_4_7, \
382 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
383 %%TMP, %%NROUNDS
384 jmp %%cbc_dec_done
385
386 %%final_num_blocks_is_13:
387 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 13, zBLK_0_3, zBLK_4_7, \
388 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
389 %%TMP, %%NROUNDS
390 jmp %%cbc_dec_done
391
392 %%final_num_blocks_is_12:
393 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 12, zBLK_0_3, zBLK_4_7, \
394 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
395 %%TMP, %%NROUNDS
396 jmp %%cbc_dec_done
397
398 %%final_num_blocks_is_11:
399 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 11, zBLK_0_3, zBLK_4_7, \
400 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
401 %%TMP, %%NROUNDS
402 jmp %%cbc_dec_done
403
404 %%final_num_blocks_is_10:
405 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 10, zBLK_0_3, zBLK_4_7, \
406 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
407 %%TMP, %%NROUNDS
408 jmp %%cbc_dec_done
409
410 %%final_num_blocks_is_9:
411 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 9, zBLK_0_3, zBLK_4_7, \
412 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
413 %%TMP, %%NROUNDS
414 jmp %%cbc_dec_done
415
416 %%final_num_blocks_is_8:
417 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 8, zBLK_0_3, zBLK_4_7, \
418 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
419 %%TMP, %%NROUNDS
420 jmp %%cbc_dec_done
421
422 %%final_num_blocks_is_7:
423 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 7, zBLK_0_3, zBLK_4_7, \
424 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
425 %%TMP, %%NROUNDS
426 jmp %%cbc_dec_done
427
428 %%final_num_blocks_is_6:
429 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 6, zBLK_0_3, zBLK_4_7, \
430 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
431 %%TMP, %%NROUNDS
432 jmp %%cbc_dec_done
433
434 %%final_num_blocks_is_5:
435 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 5, zBLK_0_3, zBLK_4_7, \
436 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
437 %%TMP, %%NROUNDS
438 jmp %%cbc_dec_done
439
440 %%final_num_blocks_is_4:
441 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 4, zBLK_0_3, zBLK_4_7, \
442 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
443 %%TMP, %%NROUNDS
444 jmp %%cbc_dec_done
445
446 %%final_num_blocks_is_3:
447 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 3, zBLK_0_3, zBLK_4_7, \
448 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
449 %%TMP, %%NROUNDS
450 jmp %%cbc_dec_done
451
452 %%final_num_blocks_is_2:
453 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 2, zBLK_0_3, zBLK_4_7, \
454 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
455 %%TMP, %%NROUNDS
456 jmp %%cbc_dec_done
457
458 %%final_num_blocks_is_1:
459 FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 1, zBLK_0_3, zBLK_4_7, \
460 zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \
461 %%TMP, %%NROUNDS
462
463 %%cbc_dec_done:
464 %endmacro
465
466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
469
470 section .text
471
472 %ifndef CBCS
473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
474 ;; aes_cbc_dec_128_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
476 mk_global aes_cbc_dec_128_vaes_avx512,function,internal
477 aes_cbc_dec_128_vaes_avx512:
478 endbranch
479 %ifidn __OUTPUT_FORMAT__, win64
480 mov num_bytes, [rsp + 8*5]
481 %endif
482 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 9, tmp
483
484 ret
485
486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
487 ;; aes_cbc_dec_192_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
489 mk_global aes_cbc_dec_192_vaes_avx512,function,internal
490 aes_cbc_dec_192_vaes_avx512:
491 endbranch
492 %ifidn __OUTPUT_FORMAT__, win64
493 mov num_bytes, [rsp + 8*5]
494 %endif
495 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 11, tmp
496
497 ret
498
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
500 ;; aes_cbc_dec_256_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
502 mk_global aes_cbc_dec_256_vaes_avx512,function,internal
503 aes_cbc_dec_256_vaes_avx512:
504 endbranch
505 %ifidn __OUTPUT_FORMAT__, win64
506 mov num_bytes, [rsp + 8*5]
507 %endif
508 AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 13, tmp
509
510 ret
511
512 %endif ;; CBCS
513
514 %else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
515 %ifidn __OUTPUT_FORMAT__, win64
516 global no_aes_cbc_dec_256_vaes_avx512
517 no_aes_cbc_dec_256_vaes_avx512:
518 %endif
519 %endif ; (AS_FEATURE_LEVEL) >= 10