]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/intel-ipsec-mb/include/aes_common.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / include / aes_common.asm
CommitLineData
f67539c2
TL
1;;
2;; Copyright (c) 2019, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;; * Redistributions of source code must retain the above copyright notice,
8;; this list of conditions and the following disclaimer.
9;; * Redistributions in binary form must reproduce the above copyright
10;; notice, this list of conditions and the following disclaimer in the
11;; documentation and/or other materials provided with the distribution.
12;; * Neither the name of Intel Corporation nor the names of its contributors
13;; may be used to endorse or promote products derived from this software
14;; without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28%ifndef _AES_COMMON_ASM_
29%define _AES_COMMON_ASM_
30
31%include "include/reg_sizes.asm"
32
33;; =============================================================================
34;; Generic macro to produce code that executes %%OPCODE instruction
35;; on selected number of AES blocks (16 bytes long ) between 0 and 16.
36;; All three operands of the instruction come from registers.
37;; Note: if 3 blocks are left at the end instruction is produced to operate all
38;; 4 blocks (full width of ZMM)
39
40%macro ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 14
41%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
42%define %%OPCODE %2 ; [in] instruction name
43%define %%DST0 %3 ; [out] destination ZMM register
44%define %%DST1 %4 ; [out] destination ZMM register
45%define %%DST2 %5 ; [out] destination ZMM register
46%define %%DST3 %6 ; [out] destination ZMM register
47%define %%SRC1_0 %7 ; [in] source 1 ZMM register
48%define %%SRC1_1 %8 ; [in] source 1 ZMM register
49%define %%SRC1_2 %9 ; [in] source 1 ZMM register
50%define %%SRC1_3 %10 ; [in] source 1 ZMM register
51%define %%SRC2_0 %11 ; [in] source 2 ZMM register
52%define %%SRC2_1 %12 ; [in] source 2 ZMM register
53%define %%SRC2_2 %13 ; [in] source 2 ZMM register
54%define %%SRC2_3 %14 ; [in] source 2 ZMM register
55
56%assign reg_idx 0
57%assign blocks_left %%NUM_BLOCKS
58
59%rep (%%NUM_BLOCKS / 4)
60%xdefine %%DSTREG %%DST %+ reg_idx
61%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
62%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
63 %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
64%undef %%DSTREG
65%undef %%SRC1REG
66%undef %%SRC2REG
67%assign reg_idx (reg_idx + 1)
68%assign blocks_left (blocks_left - 4)
69%endrep
70
71%xdefine %%DSTREG %%DST %+ reg_idx
72%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
73%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
74
75%if blocks_left == 1
76 %%OPCODE XWORD(%%DSTREG), XWORD(%%SRC1REG), XWORD(%%SRC2REG)
77%elif blocks_left == 2
78 %%OPCODE YWORD(%%DSTREG), YWORD(%%SRC1REG), YWORD(%%SRC2REG)
79%elif blocks_left == 3
80 %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
81%endif
82
83%endmacro
84
85;; =============================================================================
86;; Loads specified number of AES blocks into ZMM registers
87;; %%FLAGS are optional and only affect behavior when 3 trailing blocks are left
88;; - if %%FlAGS not provided then exactly 3 blocks are loaded (move and insert)
89;; - if "load_4_instead_of_3" option is passed then 4 blocks are loaded
90%macro ZMM_LOAD_BLOCKS_0_16 7-8
91%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
92%define %%INP %2 ; [in] input data pointer to read from
93%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
94%define %%DST0 %4 ; [out] ZMM register with loaded data
95%define %%DST1 %5 ; [out] ZMM register with loaded data
96%define %%DST2 %6 ; [out] ZMM register with loaded data
97%define %%DST3 %7 ; [out] ZMM register with loaded data
98%define %%FLAGS %8 ; [in] optional "load_4_instead_of_3"
99
100%assign src_offset 0
101%assign dst_idx 0
102
103%rep (%%NUM_BLOCKS / 4)
104%xdefine %%DSTREG %%DST %+ dst_idx
105 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
106%undef %%DSTREG
107%assign src_offset (src_offset + 64)
108%assign dst_idx (dst_idx + 1)
109%endrep
110
111%assign blocks_left (%%NUM_BLOCKS % 4)
112%xdefine %%DSTREG %%DST %+ dst_idx
113
114%if blocks_left == 1
115 vmovdqu8 XWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
116%elif blocks_left == 2
117 vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
118%elif blocks_left == 3
119%ifidn %%FLAGS, load_4_instead_of_3
120 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
121%else
122 vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
123 vinserti64x2 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset + 32], 2
124%endif
125%endif
126
127%endmacro
128
129;; =============================================================================
130;; Loads specified number of AES blocks into ZMM registers using mask register
131;; for the last loaded register (xmm, ymm or zmm).
132;; Loads take place at 1 byte granularity.
133%macro ZMM_LOAD_MASKED_BLOCKS_0_16 8
134%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
135%define %%INP %2 ; [in] input data pointer to read from
136%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
137%define %%DST0 %4 ; [out] ZMM register with loaded data
138%define %%DST1 %5 ; [out] ZMM register with loaded data
139%define %%DST2 %6 ; [out] ZMM register with loaded data
140%define %%DST3 %7 ; [out] ZMM register with loaded data
141%define %%MASK %8 ; [in] mask register
142
143%assign src_offset 0
144%assign dst_idx 0
145%assign blocks_left %%NUM_BLOCKS
146
147%if %%NUM_BLOCKS > 0
148%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
149%xdefine %%DSTREG %%DST %+ dst_idx
150 vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
151%undef %%DSTREG
152%assign src_offset (src_offset + 64)
153%assign dst_idx (dst_idx + 1)
154%assign blocks_left (blocks_left - 4)
155%endrep
156%endif ; %if %%NUM_BLOCKS > 0
157
158%xdefine %%DSTREG %%DST %+ dst_idx
159
160%if blocks_left == 1
161 vmovdqu8 XWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
162%elif blocks_left == 2
163 vmovdqu8 YWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
164%elif (blocks_left == 3 || blocks_left == 4)
165 vmovdqu8 %%DSTREG{%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
166%endif
167
168%endmacro
169
170;; =============================================================================
171;; Stores specified number of AES blocks from ZMM registers
172%macro ZMM_STORE_BLOCKS_0_16 7
173%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
174%define %%OUTP %2 ; [in] output data pointer to write to
175%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
176%define %%SRC0 %4 ; [in] ZMM register with data to store
177%define %%SRC1 %5 ; [in] ZMM register with data to store
178%define %%SRC2 %6 ; [in] ZMM register with data to store
179%define %%SRC3 %7 ; [in] ZMM register with data to store
180
181%assign dst_offset 0
182%assign src_idx 0
183
184%rep (%%NUM_BLOCKS / 4)
185%xdefine %%SRCREG %%SRC %+ src_idx
186 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
187%undef %%SRCREG
188%assign dst_offset (dst_offset + 64)
189%assign src_idx (src_idx + 1)
190%endrep
191
192%assign blocks_left (%%NUM_BLOCKS % 4)
193%xdefine %%SRCREG %%SRC %+ src_idx
194
195%if blocks_left == 1
196 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], XWORD(%%SRCREG)
197%elif blocks_left == 2
198 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
199%elif blocks_left == 3
200 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
201 vextracti32x4 [%%OUTP + %%DATA_OFFSET + dst_offset + 32], %%SRCREG, 2
202%endif
203
204%endmacro
205
206;; =============================================================================
207;; Stores specified number of AES blocks from ZMM registers with mask register
208;; for the last loaded register (xmm, ymm or zmm).
209;; Stores take place at 1 byte granularity.
210%macro ZMM_STORE_MASKED_BLOCKS_0_16 8
211%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
212%define %%OUTP %2 ; [in] output data pointer to write to
213%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
214%define %%SRC0 %4 ; [in] ZMM register with data to store
215%define %%SRC1 %5 ; [in] ZMM register with data to store
216%define %%SRC2 %6 ; [in] ZMM register with data to store
217%define %%SRC3 %7 ; [in] ZMM register with data to store
218%define %%MASK %8 ; [in] mask register
219
220%assign dst_offset 0
221%assign src_idx 0
222%assign blocks_left %%NUM_BLOCKS
223
224%if %%NUM_BLOCKS > 0
225%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
226%xdefine %%SRCREG %%SRC %+ src_idx
227 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
228%undef %%SRCREG
229%assign dst_offset (dst_offset + 64)
230%assign src_idx (src_idx + 1)
231%assign blocks_left (blocks_left - 4)
232%endrep
233%endif ; %if %%NUM_BLOCKS > 0
234
235%xdefine %%SRCREG %%SRC %+ src_idx
236
237%if blocks_left == 1
238 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, XWORD(%%SRCREG)
239%elif blocks_left == 2
240 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, YWORD(%%SRCREG)
241%elif (blocks_left == 3 || blocks_left == 4)
242 vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, %%SRCREG
243%endif
244
245%endmacro
246
247;;; ===========================================================================
248;;; Handles AES encryption rounds
249;;; It handles special cases: the last and first rounds
250;;; Optionally, it performs XOR with data after the last AES round.
251;;; Uses NROUNDS parameterto check what needs to be done for the current round.
252;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
253%macro ZMM_AESENC_ROUND_BLOCKS_0_16 12
254%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
255%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
256%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
257%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
258%define %%KEY %5 ; [in] zmm containing round key
259%define %%ROUND %6 ; [in] round number
260%define %%D0_3 %7 ; [in] zmm or no_data; plain/cipher text blocks 0-3
261%define %%D4_7 %8 ; [in] zmm or no_data; plain/cipher text blocks 4-7
262%define %%D8_11 %9 ; [in] zmm or no_data; plain/cipher text blocks 8-11
263%define %%D12_15 %10 ; [in] zmm or no_data; plain/cipher text blocks 12-15
264%define %%NUMBL %11 ; [in] number of blocks; numerical value
265%define %%NROUNDS %12 ; [in] number of rounds; numerical value
266
267;;; === first AES round
268%if (%%ROUND < 1)
269 ;; round 0
270 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
271 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
272 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
273 %%KEY, %%KEY, %%KEY, %%KEY
274%endif ; ROUND 0
275
276;;; === middle AES rounds
277%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
278 ;; rounds 1 to 9/11/13
279 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenc, \
280 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
281 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
282 %%KEY, %%KEY, %%KEY, %%KEY
283%endif ; rounds 1 to 9/11/13
284
285;;; === last AES round
286%if (%%ROUND > %%NROUNDS)
287 ;; the last round - mix enclast with text xor's
288 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenclast, \
289 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
290 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
291 %%KEY, %%KEY, %%KEY, %%KEY
292
293;;; === XOR with data
294%ifnidn %%D0_3, no_data
295%ifnidn %%D4_7, no_data
296%ifnidn %%D8_11, no_data
297%ifnidn %%D12_15, no_data
298 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
299 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
300 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
301 %%D0_3, %%D4_7, %%D8_11, %%D12_15
302%endif ; !no_data
303%endif ; !no_data
304%endif ; !no_data
305%endif ; !no_data
306
307%endif ; The last round
308
309%endmacro
310
311;;; ===========================================================================
312;;; Handles AES decryption rounds
313;;; It handles special cases: the last and first rounds
314;;; Optionally, it performs XOR with data after the last AES round.
315;;; Uses NROUNDS parameter to check what needs to be done for the current round.
316;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
317%macro ZMM_AESDEC_ROUND_BLOCKS_0_16 12
318%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
319%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
320%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
321%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
322%define %%KEY %5 ; [in] zmm containing round key
323%define %%ROUND %6 ; [in] round number
324%define %%D0_3 %7 ; [in] zmm or no_data; cipher text blocks 0-3
325%define %%D4_7 %8 ; [in] zmm or no_data; cipher text blocks 4-7
326%define %%D8_11 %9 ; [in] zmm or no_data; cipher text blocks 8-11
327%define %%D12_15 %10 ; [in] zmm or no_data; cipher text blocks 12-15
328%define %%NUMBL %11 ; [in] number of blocks; numerical value
329%define %%NROUNDS %12 ; [in] number of rounds; numerical value
330
331;;; === first AES round
332%if (%%ROUND < 1)
333 ;; round 0
334 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
335 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
336 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
337 %%KEY, %%KEY, %%KEY, %%KEY
338%endif ; ROUND 0
339
340;;; === middle AES rounds
341%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
342 ;; rounds 1 to 9/11/13
343 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdec, \
344 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
345 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
346 %%KEY, %%KEY, %%KEY, %%KEY
347%endif ; rounds 1 to 9/11/13
348
349;;; === last AES round
350%if (%%ROUND > %%NROUNDS)
351 ;; the last round - mix enclast with text xor's
352 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdeclast, \
353 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
354 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
355 %%KEY, %%KEY, %%KEY, %%KEY
356
357;;; === XOR with data
358%ifnidn %%D0_3, no_data
359%ifnidn %%D4_7, no_data
360%ifnidn %%D8_11, no_data
361%ifnidn %%D12_15, no_data
362 ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
363 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
364 %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
365 %%D0_3, %%D4_7, %%D8_11, %%D12_15
366%endif ; !no_data
367%endif ; !no_data
368%endif ; !no_data
369%endif ; !no_data
370
371%endif ; The last round
372
373%endmacro
374
375%endif ;; _AES_COMMON_ASM