]> git.proxmox.com Git - ceph.git/blame - ceph/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / aes / aes_common.asm
CommitLineData
1e59de90
TL
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%ifndef _AES_COMMON_ASM_
31%define _AES_COMMON_ASM_
32
33%include "reg_sizes.asm"
34
35;; =============================================================================
36;; Generic macro to produce code that executes %%OPCODE instruction
37;; on selected number of AES blocks (16 bytes long ) between 0 and 16.
38;; All three operands of the instruction come from registers.
39;; Note: if 3 blocks are left at the end instruction is produced to operate all
40;; 4 blocks (full width of ZMM)
41
42%macro ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 14
43%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
44%define %%OPCODE %2 ; [in] instruction name
45%define %%DST0 %3 ; [out] destination ZMM register
46%define %%DST1 %4 ; [out] destination ZMM register
47%define %%DST2 %5 ; [out] destination ZMM register
48%define %%DST3 %6 ; [out] destination ZMM register
49%define %%SRC1_0 %7 ; [in] source 1 ZMM register
50%define %%SRC1_1 %8 ; [in] source 1 ZMM register
51%define %%SRC1_2 %9 ; [in] source 1 ZMM register
52%define %%SRC1_3 %10 ; [in] source 1 ZMM register
53%define %%SRC2_0 %11 ; [in] source 2 ZMM register
54%define %%SRC2_1 %12 ; [in] source 2 ZMM register
55%define %%SRC2_2 %13 ; [in] source 2 ZMM register
56%define %%SRC2_3 %14 ; [in] source 2 ZMM register
57
58%assign reg_idx 0
59%assign blocks_left %%NUM_BLOCKS
60
61%rep (%%NUM_BLOCKS / 4)
62%xdefine %%DSTREG %%DST %+ reg_idx
63%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
64%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
65 %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
66%undef %%DSTREG
67%undef %%SRC1REG
68%undef %%SRC2REG
69%assign reg_idx (reg_idx + 1)
70%assign blocks_left (blocks_left - 4)
71%endrep
72
73%xdefine %%DSTREG %%DST %+ reg_idx
74%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
75%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
76
77%if blocks_left == 1
78 %%OPCODE XWORD(%%DSTREG), XWORD(%%SRC1REG), XWORD(%%SRC2REG)
79%elif blocks_left == 2
80 %%OPCODE YWORD(%%DSTREG), YWORD(%%SRC1REG), YWORD(%%SRC2REG)
81%elif blocks_left == 3
82 %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
83%endif
84
85%endmacro
86
87;; =============================================================================
88;; Loads specified number of AES blocks into ZMM registers
89;; %%FLAGS are optional and only affect behavior when 3 trailing blocks are left
90;; - if %%FlAGS not provided then exactly 3 blocks are loaded (move and insert)
91;; - if "load_4_instead_of_3" option is passed then 4 blocks are loaded
92%macro ZMM_LOAD_BLOCKS_0_16 7-8
93%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
94%define %%INP %2 ; [in] input data pointer to read from
95%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
96%define %%DST0 %4 ; [out] ZMM register with loaded data
97%define %%DST1 %5 ; [out] ZMM register with loaded data
98%define %%DST2 %6 ; [out] ZMM register with loaded data
99%define %%DST3 %7 ; [out] ZMM register with loaded data
100%define %%FLAGS %8 ; [in] optional "load_4_instead_of_3"
101
102%assign src_offset 0
103%assign dst_idx 0
104
105%rep (%%NUM_BLOCKS / 4)
106%xdefine %%DSTREG %%DST %+ dst_idx
107 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
108%undef %%DSTREG
109%assign src_offset (src_offset + 64)
110%assign dst_idx (dst_idx + 1)
111%endrep
112
113%assign blocks_left (%%NUM_BLOCKS % 4)
114%xdefine %%DSTREG %%DST %+ dst_idx
115
116%if blocks_left == 1
117 vmovdqu8 XWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
118%elif blocks_left == 2
119 vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
120%elif blocks_left == 3
121%ifidn %%FLAGS, load_4_instead_of_3
122 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
123%else
124 vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
125 vinserti64x2 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset + 32], 2
126%endif
127%endif
128
129%endmacro
130
131;; =============================================================================
132;; Loads specified number of AES blocks into ZMM registers using mask register
133;; for the last loaded register (xmm, ymm or zmm).
134;; Loads take place at 1 byte granularity.
135%macro ZMM_LOAD_MASKED_BLOCKS_0_16 8
136%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
137%define %%INP %2 ; [in] input data pointer to read from
138%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
139%define %%DST0 %4 ; [out] ZMM register with loaded data
140%define %%DST1 %5 ; [out] ZMM register with loaded data
141%define %%DST2 %6 ; [out] ZMM register with loaded data
142%define %%DST3 %7 ; [out] ZMM register with loaded data
143%define %%MASK %8 ; [in] mask register
144
145%assign src_offset 0
146%assign dst_idx 0
147%assign blocks_left %%NUM_BLOCKS
148
149%if %%NUM_BLOCKS > 0
150%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
151%xdefine %%DSTREG %%DST %+ dst_idx
152 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
153%undef %%DSTREG
154%assign src_offset (src_offset + 64)
155%assign dst_idx (dst_idx + 1)
156%assign blocks_left (blocks_left - 4)
157%endrep
158%endif ; %if %%NUM_BLOCKS > 0
159
160%xdefine %%DSTREG %%DST %+ dst_idx
161
162%if blocks_left == 1
163 vmovdqu8 XWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
164%elif blocks_left == 2
165 vmovdqu8 YWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
166%elif (blocks_left == 3 || blocks_left == 4)
167 vmovdqu8 %%DSTREG{%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
168%endif
169
170%endmacro
171
172;; =============================================================================
173;; Stores specified number of AES blocks from ZMM registers
174%macro ZMM_STORE_BLOCKS_0_16 7
175%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
176%define %%OUTP %2 ; [in] output data pointer to write to
177%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
178%define %%SRC0 %4 ; [in] ZMM register with data to store
179%define %%SRC1 %5 ; [in] ZMM register with data to store
180%define %%SRC2 %6 ; [in] ZMM register with data to store
181%define %%SRC3 %7 ; [in] ZMM register with data to store
182
183%assign dst_offset 0
184%assign src_idx 0
185
186%rep (%%NUM_BLOCKS / 4)
187%xdefine %%SRCREG %%SRC %+ src_idx
188 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
189%undef %%SRCREG
190%assign dst_offset (dst_offset + 64)
191%assign src_idx (src_idx + 1)
192%endrep
193
194%assign blocks_left (%%NUM_BLOCKS % 4)
195%xdefine %%SRCREG %%SRC %+ src_idx
196
197%if blocks_left == 1
198 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], XWORD(%%SRCREG)
199%elif blocks_left == 2
200 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
201%elif blocks_left == 3
202 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
203 vextracti32x4 [%%OUTP + %%DATA_OFFSET + dst_offset + 32], %%SRCREG, 2
204%endif
205
206%endmacro
207
208;; =============================================================================
209;; Stores specified number of AES blocks from ZMM registers with mask register
210;; for the last loaded register (xmm, ymm or zmm).
211;; Stores take place at 1 byte granularity.
212%macro ZMM_STORE_MASKED_BLOCKS_0_16 8
213%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
214%define %%OUTP %2 ; [in] output data pointer to write to
215%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
216%define %%SRC0 %4 ; [in] ZMM register with data to store
217%define %%SRC1 %5 ; [in] ZMM register with data to store
218%define %%SRC2 %6 ; [in] ZMM register with data to store
219%define %%SRC3 %7 ; [in] ZMM register with data to store
220%define %%MASK %8 ; [in] mask register
221
222%assign dst_offset 0
223%assign src_idx 0
224%assign blocks_left %%NUM_BLOCKS
225
226%if %%NUM_BLOCKS > 0
227%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
228%xdefine %%SRCREG %%SRC %+ src_idx
229 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
230%undef %%SRCREG
231%assign dst_offset (dst_offset + 64)
232%assign src_idx (src_idx + 1)
233%assign blocks_left (blocks_left - 4)
234%endrep
235%endif ; %if %%NUM_BLOCKS > 0
236
237%xdefine %%SRCREG %%SRC %+ src_idx
238
239%if blocks_left == 1
240 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, XWORD(%%SRCREG)
241%elif blocks_left == 2
242 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, YWORD(%%SRCREG)
243%elif (blocks_left == 3 || blocks_left == 4)
244 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, %%SRCREG
245%endif
246
247%endmacro
248
249;;; ===========================================================================
250;;; Handles AES encryption rounds
251;;; It handles special cases: the last and first rounds
252;;; Optionally, it performs XOR with data after the last AES round.
253;;; Uses NROUNDS parameterto check what needs to be done for the current round.
254;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
255%macro ZMM_AESENC_ROUND_BLOCKS_0_16 12
256%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
257%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
258%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
259%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
260%define %%KEY %5 ; [in] zmm containing round key
261%define %%ROUND %6 ; [in] round number
262%define %%D0_3 %7 ; [in] zmm or no_data; plain/cipher text blocks 0-3
263%define %%D4_7 %8 ; [in] zmm or no_data; plain/cipher text blocks 4-7
264%define %%D8_11 %9 ; [in] zmm or no_data; plain/cipher text blocks 8-11
265%define %%D12_15 %10 ; [in] zmm or no_data; plain/cipher text blocks 12-15
266%define %%NUMBL %11 ; [in] number of blocks; numerical value
267%define %%NROUNDS %12 ; [in] number of rounds; numerical value
268
269;;; === first AES round
270%if (%%ROUND < 1)
271 ;; round 0
272 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
273 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
274 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
275 %%KEY, %%KEY, %%KEY, %%KEY
276%endif ; ROUND 0
277
278;;; === middle AES rounds
279%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
280 ;; rounds 1 to 9/11/13
281 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenc, \
282 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
283 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
284 %%KEY, %%KEY, %%KEY, %%KEY
285%endif ; rounds 1 to 9/11/13
286
287;;; === last AES round
288%if (%%ROUND > %%NROUNDS)
289 ;; the last round - mix enclast with text xor's
290 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenclast, \
291 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
292 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
293 %%KEY, %%KEY, %%KEY, %%KEY
294
295;;; === XOR with data
296%ifnidn %%D0_3, no_data
297%ifnidn %%D4_7, no_data
298%ifnidn %%D8_11, no_data
299%ifnidn %%D12_15, no_data
300 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
301 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
302 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
303 %%D0_3, %%D4_7, %%D8_11, %%D12_15
304%endif ; !no_data
305%endif ; !no_data
306%endif ; !no_data
307%endif ; !no_data
308
309%endif ; The last round
310
311%endmacro
312
313;;; ===========================================================================
314;;; Handles AES decryption rounds
315;;; It handles special cases: the last and first rounds
316;;; Optionally, it performs XOR with data after the last AES round.
317;;; Uses NROUNDS parameter to check what needs to be done for the current round.
318;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
319%macro ZMM_AESDEC_ROUND_BLOCKS_0_16 12
320%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
321%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
322%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
323%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
324%define %%KEY %5 ; [in] zmm containing round key
325%define %%ROUND %6 ; [in] round number
326%define %%D0_3 %7 ; [in] zmm or no_data; cipher text blocks 0-3
327%define %%D4_7 %8 ; [in] zmm or no_data; cipher text blocks 4-7
328%define %%D8_11 %9 ; [in] zmm or no_data; cipher text blocks 8-11
329%define %%D12_15 %10 ; [in] zmm or no_data; cipher text blocks 12-15
330%define %%NUMBL %11 ; [in] number of blocks; numerical value
331%define %%NROUNDS %12 ; [in] number of rounds; numerical value
332
333;;; === first AES round
334%if (%%ROUND < 1)
335 ;; round 0
336 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
337 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
338 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
339 %%KEY, %%KEY, %%KEY, %%KEY
340%endif ; ROUND 0
341
342;;; === middle AES rounds
343%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
344 ;; rounds 1 to 9/11/13
345 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdec, \
346 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
347 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
348 %%KEY, %%KEY, %%KEY, %%KEY
349%endif ; rounds 1 to 9/11/13
350
351;;; === last AES round
352%if (%%ROUND > %%NROUNDS)
353 ;; the last round - mix enclast with text xor's
354 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdeclast, \
355 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
356 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
357 %%KEY, %%KEY, %%KEY, %%KEY
358
359;;; === XOR with data
360%ifnidn %%D0_3, no_data
361%ifnidn %%D4_7, no_data
362%ifnidn %%D8_11, no_data
363%ifnidn %%D12_15, no_data
364 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
365 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
366 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
367 %%D0_3, %%D4_7, %%D8_11, %%D12_15
368%endif ; !no_data
369%endif ; !no_data
370%endif ; !no_data
371%endif ; !no_data
372
373%endif ; The last round
374
375%endmacro
376
377%endif ;; _AES_COMMON_ASM