]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | ;; |
2 | ;; Copyright (c) 2019, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | %include "include/os.asm" | |
29 | %include "include/reg_sizes.asm" | |
30 | %include "include/aes_common.asm" | |
31 | ||
32 | %define zIV zmm0 | |
33 | %define zBLK_0_3 zmm1 | |
34 | %define zBLK_4_7 zmm2 | |
35 | %define zBLK_8_11 zmm3 | |
36 | %define zBLK_12_15 zmm4 | |
37 | %define zTMP0 zmm5 | |
38 | %define zTMP1 zmm6 | |
39 | %define zTMP2 zmm7 | |
40 | %define zTMP3 zmm8 | |
41 | ||
42 | %define ZKEY0 zmm17 | |
43 | %define ZKEY1 zmm18 | |
44 | %define ZKEY2 zmm19 | |
45 | %define ZKEY3 zmm20 | |
46 | %define ZKEY4 zmm21 | |
47 | %define ZKEY5 zmm22 | |
48 | %define ZKEY6 zmm23 | |
49 | %define ZKEY7 zmm24 | |
50 | %define ZKEY8 zmm25 | |
51 | %define ZKEY9 zmm26 | |
52 | %define ZKEY10 zmm27 | |
53 | %define ZKEY11 zmm28 | |
54 | %define ZKEY12 zmm29 | |
55 | %define ZKEY13 zmm30 | |
56 | %define ZKEY14 zmm31 | |
57 | ||
58 | %ifdef LINUX | |
59 | %define p_in rdi | |
60 | %define p_IV rsi | |
61 | %define p_keys rdx | |
62 | %define p_out rcx | |
63 | %define num_bytes r8 | |
64 | %else | |
65 | %define p_in rcx | |
66 | %define p_IV rdx | |
67 | %define p_keys r8 | |
68 | %define p_out r9 | |
69 | %define num_bytes rax | |
70 | %endif | |
71 | ||
72 | %define tmp r10 | |
73 | ||
74 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
75 | ;;; macro to preload keys | |
76 | ;;; - uses ZKEY[0-14] registers (ZMM) | |
77 | %macro LOAD_KEYS 2 | |
78 | %define %%KEYS %1 ; [in] key pointer | |
79 | %define %%NROUNDS %2 ; [in] numerical value, number of AES rounds | |
80 | ; excluding 1st and last rounds. | |
81 | ; Example: AES-128 -> value 9 | |
82 | ||
83 | %assign i 0 | |
84 | %rep (%%NROUNDS + 2) | |
85 | vbroadcastf64x2 ZKEY %+ i, [%%KEYS + 16*i] | |
86 | %assign i (i + 1) | |
87 | %endrep | |
88 | ||
89 | %endmacro | |
90 | ||
91 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
92 | ;;; This macro is used to "cool down" pipeline after DECRYPT_16_PARALLEL macro | |
93 | ;;; code as the number of final blocks is variable. | |
94 | ;;; Processes the last %%num_final_blocks blocks (1 to 15, can't be 0) | |
95 | ||
96 | %macro FINAL_BLOCKS 14 | |
97 | %define %%PLAIN_OUT %1 ; [in] output buffer | |
98 | %define %%CIPH_IN %2 ; [in] input buffer | |
99 | %define %%LAST_CIPH_BLK %3 ; [in/out] ZMM with IV/last cipher blk (in idx 3) | |
100 | %define %%num_final_blocks %4 ; [in] numerical value (1 - 15) | |
101 | %define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks | |
102 | %define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks | |
103 | %define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks | |
104 | %define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks | |
105 | %define %%ZT1 %9 ; [clobbered] ZMM temporary | |
106 | %define %%ZT2 %10 ; [clobbered] ZMM temporary | |
107 | %define %%ZT3 %11 ; [clobbered] ZMM temporary | |
108 | %define %%ZT4 %12 ; [clobbered] ZMM temporary | |
109 | %define %%IA0 %13 ; [clobbered] GP temporary | |
110 | %define %%NROUNDS %14 ; [in] number of rounds; numerical value | |
111 | ||
112 | ;; load plain/cipher text | |
113 | ZMM_LOAD_BLOCKS_0_16 %%num_final_blocks, %%CIPH_IN, 0, \ | |
114 | %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ | |
115 | %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15 | |
116 | ||
117 | ;; Prepare final cipher text blocks to | |
118 | ;; be XOR'd later after AESDEC | |
119 | valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6 | |
120 | %if %%num_final_blocks > 4 | |
121 | valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6 | |
122 | %endif | |
123 | %if %%num_final_blocks > 8 | |
124 | valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6 | |
125 | %endif | |
126 | %if %%num_final_blocks > 12 | |
127 | valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6 | |
128 | %endif | |
129 | ||
130 | ;; Update IV with last cipher block | |
131 | ;; to be used later in DECRYPT_16_PARALLEL | |
132 | %if %%num_final_blocks == 1 | |
133 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 2 | |
134 | %elif %%num_final_blocks == 2 | |
135 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 4 | |
136 | %elif %%num_final_blocks == 3 | |
137 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 6 | |
138 | %elif %%num_final_blocks == 4 | |
139 | vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3 | |
140 | %elif %%num_final_blocks == 5 | |
141 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 2 | |
142 | %elif %%num_final_blocks == 6 | |
143 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 4 | |
144 | %elif %%num_final_blocks == 7 | |
145 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 6 | |
146 | %elif %%num_final_blocks == 8 | |
147 | vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7 | |
148 | %elif %%num_final_blocks == 9 | |
149 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 2 | |
150 | %elif %%num_final_blocks == 10 | |
151 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 4 | |
152 | %elif %%num_final_blocks == 11 | |
153 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 6 | |
154 | %elif %%num_final_blocks == 12 | |
155 | vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11 | |
156 | %elif %%num_final_blocks == 13 | |
157 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 2 | |
158 | %elif %%num_final_blocks == 14 | |
159 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 4 | |
160 | %elif %%num_final_blocks == 15 | |
161 | valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 6 | |
162 | %endif | |
163 | ||
164 | ;; AES rounds | |
165 | %assign j 0 | |
166 | %rep (%%NROUNDS + 2) | |
167 | ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ | |
168 | %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \ | |
169 | ZKEY %+ j, j, no_data, no_data, no_data, no_data, \ | |
170 | %%num_final_blocks, %%NROUNDS | |
171 | %assign j (j + 1) | |
172 | %endrep | |
173 | ||
174 | ;; XOR with decrypted blocks to get plain text | |
175 | vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1 | |
176 | %if %%num_final_blocks > 4 | |
177 | vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2 | |
178 | %endif | |
179 | %if %%num_final_blocks > 8 | |
180 | vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3 | |
181 | %endif | |
182 | %if %%num_final_blocks > 12 | |
183 | vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4 | |
184 | %endif | |
185 | ||
186 | ;; write plain text back to output | |
187 | ZMM_STORE_BLOCKS_0_16 %%num_final_blocks, %%PLAIN_OUT, 0, \ | |
188 | %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ | |
189 | %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15 | |
190 | ||
191 | %endmacro ; FINAL_BLOCKS | |
192 | ||
193 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
194 | ;;; Main AES-CBC decrypt macro | |
195 | ;;; - operates on single stream | |
196 | ;;; - decrypts 16 blocks at a time | |
197 | %macro DECRYPT_16_PARALLEL 14 | |
198 | %define %%PLAIN_OUT %1 ; [in] output buffer | |
199 | %define %%CIPH_IN %2 ; [in] input buffer | |
200 | %define %%LENGTH %3 ; [in/out] number of bytes to process | |
201 | %define %%LAST_CIPH_BLK %4 ; [in/out] ZMM with IV (first block) or last cipher block (idx 3) | |
202 | %define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks | |
203 | %define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks | |
204 | %define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks | |
205 | %define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks | |
206 | %define %%ZT1 %9 ; [clobbered] ZMM temporary | |
207 | %define %%ZT2 %10 ; [clobbered] ZMM temporary | |
208 | %define %%ZT3 %11 ; [clobbered] ZMM temporary | |
209 | %define %%ZT4 %12 ; [clobbered] ZMM temporary | |
210 | %define %%NROUNDS %13 ; [in] number of rounds; numerical value | |
211 | %define %%IA0 %14 ; [clobbered] GP temporary | |
212 | ||
213 | vmovdqu8 %%CIPHER_PLAIN_0_3, [%%CIPH_IN] | |
214 | vmovdqu8 %%CIPHER_PLAIN_4_7, [%%CIPH_IN + 64] | |
215 | vmovdqu8 %%CIPHER_PLAIN_8_11, [%%CIPH_IN + 128] | |
216 | vmovdqu8 %%CIPHER_PLAIN_12_15, [%%CIPH_IN + 192] | |
217 | ||
218 | ;; prepare first set of cipher blocks for later XOR'ing | |
219 | valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6 | |
220 | valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6 | |
221 | valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6 | |
222 | valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6 | |
223 | ||
224 | ;; store last cipher text block to be used for next 16 blocks | |
225 | vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15 | |
226 | ||
227 | ;; AES rounds | |
228 | %assign j 0 | |
229 | %rep (%%NROUNDS + 2) | |
230 | ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ | |
231 | %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \ | |
232 | ZKEY %+ j, j, no_data, no_data, no_data, no_data, \ | |
233 | 16, %%NROUNDS | |
234 | %assign j (j + 1) | |
235 | %endrep | |
236 | ||
237 | ;; XOR with decrypted blocks to get plain text | |
238 | vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1 | |
239 | vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2 | |
240 | vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3 | |
241 | vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4 | |
242 | ||
243 | ;; write plain text back to output | |
244 | vmovdqu8 [%%PLAIN_OUT], %%CIPHER_PLAIN_0_3 | |
245 | vmovdqu8 [%%PLAIN_OUT + 64], %%CIPHER_PLAIN_4_7 | |
246 | vmovdqu8 [%%PLAIN_OUT + 128], %%CIPHER_PLAIN_8_11 | |
247 | vmovdqu8 [%%PLAIN_OUT + 192], %%CIPHER_PLAIN_12_15 | |
248 | ||
249 | ;; adjust input pointer and length | |
250 | sub %%LENGTH, (16 * 16) | |
251 | add %%CIPH_IN, (16 * 16) | |
252 | add %%PLAIN_OUT, (16 * 16) | |
253 | ||
254 | %endmacro ; DECRYPT_16_PARALLEL | |
255 | ||
256 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
257 | ;;; AES_CBC_DEC macro decrypts given data. | |
258 | ;;; Flow: | |
259 | ;;; - Decrypt all blocks (multiple of 16) up to final 1-15 blocks | |
260 | ;;; - Decrypt final blocks (1-15 blocks) | |
261 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
262 | %macro AES_CBC_DEC 7 | |
263 | %define %%CIPH_IN %1 ;; [in] pointer to input buffer | |
264 | %define %%PLAIN_OUT %2 ;; [in] pointer to output buffer | |
265 | %define %%KEYS %3 ;; [in] pointer to expanded keys | |
266 | %define %%IV %4 ;; [in] pointer to IV | |
267 | %define %%LENGTH %5 ;; [in/out] GP register with length in bytes | |
268 | %define %%NROUNDS %6 ;; [in] Number of AES rounds; numerical value | |
269 | %define %%TMP %7 ;; [clobbered] GP register | |
270 | ||
271 | cmp %%LENGTH, 0 | |
272 | je %%cbc_dec_done | |
273 | ||
274 | vinserti64x2 zIV, zIV, [%%IV], 3 | |
275 | ||
276 | ;; preload keys | |
277 | LOAD_KEYS %%KEYS, %%NROUNDS | |
278 | ||
279 | %%decrypt_16_parallel: | |
280 | cmp %%LENGTH, 256 | |
281 | jb %%final_blocks | |
282 | ||
283 | DECRYPT_16_PARALLEL %%PLAIN_OUT, %%CIPH_IN, %%LENGTH, zIV, \ | |
284 | zBLK_0_3, zBLK_4_7, zBLK_8_11, zBLK_12_15, \ | |
285 | zTMP0, zTMP1, zTMP2, zTMP3, %%NROUNDS, %%TMP | |
286 | jmp %%decrypt_16_parallel | |
287 | ||
288 | %%final_blocks: | |
289 | ;; get num final blocks | |
290 | shr %%LENGTH, 4 | |
291 | and %%LENGTH, 0xf | |
292 | je %%cbc_dec_done | |
293 | ||
294 | cmp %%LENGTH, 8 | |
295 | je %%final_num_blocks_is_8 | |
296 | jl %%final_blocks_is_1_7 | |
297 | ||
298 | ; Final blocks 9-15 | |
299 | cmp %%LENGTH, 12 | |
300 | je %%final_num_blocks_is_12 | |
301 | jl %%final_blocks_is_9_11 | |
302 | ||
303 | ; Final blocks 13-15 | |
304 | cmp %%LENGTH, 15 | |
305 | je %%final_num_blocks_is_15 | |
306 | cmp %%LENGTH, 14 | |
307 | je %%final_num_blocks_is_14 | |
308 | cmp %%LENGTH, 13 | |
309 | je %%final_num_blocks_is_13 | |
310 | ||
311 | %%final_blocks_is_9_11: | |
312 | cmp %%LENGTH, 11 | |
313 | je %%final_num_blocks_is_11 | |
314 | cmp %%LENGTH, 10 | |
315 | je %%final_num_blocks_is_10 | |
316 | cmp %%LENGTH, 9 | |
317 | je %%final_num_blocks_is_9 | |
318 | ||
319 | %%final_blocks_is_1_7: | |
320 | cmp %%LENGTH, 4 | |
321 | je %%final_num_blocks_is_4 | |
322 | jl %%final_blocks_is_1_3 | |
323 | ||
324 | ; Final blocks 5-7 | |
325 | cmp %%LENGTH, 7 | |
326 | je %%final_num_blocks_is_7 | |
327 | cmp %%LENGTH, 6 | |
328 | je %%final_num_blocks_is_6 | |
329 | cmp %%LENGTH, 5 | |
330 | je %%final_num_blocks_is_5 | |
331 | ||
332 | %%final_blocks_is_1_3: | |
333 | cmp %%LENGTH, 3 | |
334 | je %%final_num_blocks_is_3 | |
335 | cmp %%LENGTH, 2 | |
336 | je %%final_num_blocks_is_2 | |
337 | jmp %%final_num_blocks_is_1 | |
338 | ||
339 | ||
340 | %%final_num_blocks_is_15: | |
341 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 15, zBLK_0_3, zBLK_4_7, \ | |
342 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
343 | %%TMP, %%NROUNDS | |
344 | jmp %%cbc_dec_done | |
345 | ||
346 | %%final_num_blocks_is_14: | |
347 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 14, zBLK_0_3, zBLK_4_7, \ | |
348 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
349 | %%TMP, %%NROUNDS | |
350 | jmp %%cbc_dec_done | |
351 | ||
352 | %%final_num_blocks_is_13: | |
353 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 13, zBLK_0_3, zBLK_4_7, \ | |
354 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
355 | %%TMP, %%NROUNDS | |
356 | jmp %%cbc_dec_done | |
357 | ||
358 | %%final_num_blocks_is_12: | |
359 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 12, zBLK_0_3, zBLK_4_7, \ | |
360 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
361 | %%TMP, %%NROUNDS | |
362 | jmp %%cbc_dec_done | |
363 | ||
364 | %%final_num_blocks_is_11: | |
365 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 11, zBLK_0_3, zBLK_4_7, \ | |
366 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
367 | %%TMP, %%NROUNDS | |
368 | jmp %%cbc_dec_done | |
369 | ||
370 | %%final_num_blocks_is_10: | |
371 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 10, zBLK_0_3, zBLK_4_7, \ | |
372 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
373 | %%TMP, %%NROUNDS | |
374 | jmp %%cbc_dec_done | |
375 | ||
376 | %%final_num_blocks_is_9: | |
377 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 9, zBLK_0_3, zBLK_4_7, \ | |
378 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
379 | %%TMP, %%NROUNDS | |
380 | jmp %%cbc_dec_done | |
381 | ||
382 | %%final_num_blocks_is_8: | |
383 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 8, zBLK_0_3, zBLK_4_7, \ | |
384 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
385 | %%TMP, %%NROUNDS | |
386 | jmp %%cbc_dec_done | |
387 | ||
388 | %%final_num_blocks_is_7: | |
389 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 7, zBLK_0_3, zBLK_4_7, \ | |
390 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
391 | %%TMP, %%NROUNDS | |
392 | jmp %%cbc_dec_done | |
393 | ||
394 | %%final_num_blocks_is_6: | |
395 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 6, zBLK_0_3, zBLK_4_7, \ | |
396 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
397 | %%TMP, %%NROUNDS | |
398 | jmp %%cbc_dec_done | |
399 | ||
400 | %%final_num_blocks_is_5: | |
401 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 5, zBLK_0_3, zBLK_4_7, \ | |
402 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
403 | %%TMP, %%NROUNDS | |
404 | jmp %%cbc_dec_done | |
405 | ||
406 | %%final_num_blocks_is_4: | |
407 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 4, zBLK_0_3, zBLK_4_7, \ | |
408 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
409 | %%TMP, %%NROUNDS | |
410 | jmp %%cbc_dec_done | |
411 | ||
412 | %%final_num_blocks_is_3: | |
413 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 3, zBLK_0_3, zBLK_4_7, \ | |
414 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
415 | %%TMP, %%NROUNDS | |
416 | jmp %%cbc_dec_done | |
417 | ||
418 | %%final_num_blocks_is_2: | |
419 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 2, zBLK_0_3, zBLK_4_7, \ | |
420 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
421 | %%TMP, %%NROUNDS | |
422 | jmp %%cbc_dec_done | |
423 | ||
424 | %%final_num_blocks_is_1: | |
425 | FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 1, zBLK_0_3, zBLK_4_7, \ | |
426 | zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ | |
427 | %%TMP, %%NROUNDS | |
428 | ||
429 | %%cbc_dec_done: | |
430 | %endmacro | |
431 | ||
432 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
433 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
434 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
435 | ||
436 | section .text | |
437 | ||
438 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
439 | ;; aes_cbc_dec_128_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) | |
440 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
441 | MKGLOBAL(aes_cbc_dec_128_vaes_avx512,function,internal) | |
442 | aes_cbc_dec_128_vaes_avx512: | |
443 | %ifndef LINUX | |
444 | mov num_bytes, [rsp + 8*5] | |
445 | %endif | |
446 | AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 9, tmp | |
447 | ||
448 | ret | |
449 | ||
450 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
451 | ;; aes_cbc_dec_192_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) | |
452 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
453 | MKGLOBAL(aes_cbc_dec_192_vaes_avx512,function,internal) | |
454 | aes_cbc_dec_192_vaes_avx512: | |
455 | %ifndef LINUX | |
456 | mov num_bytes, [rsp + 8*5] | |
457 | %endif | |
458 | AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 11, tmp | |
459 | ||
460 | ret | |
461 | ||
462 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
463 | ;; aes_cbc_dec_256_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) | |
464 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
465 | MKGLOBAL(aes_cbc_dec_256_vaes_avx512,function,internal) | |
466 | aes_cbc_dec_256_vaes_avx512: | |
467 | %ifndef LINUX | |
468 | mov num_bytes, [rsp + 8*5] | |
469 | %endif | |
470 | AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 13, tmp | |
471 | ||
472 | ret | |
473 | ||
474 | %ifdef LINUX | |
475 | section .note.GNU-stack noalloc noexec nowrite progbits | |
476 | %endif | |
477 |