]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/sse/aes_cbc_enc_128_x4.asm
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / sse / aes_cbc_enc_128_x4.asm
1 ;;
2 ;; Copyright (c) 2012-2018, Intel Corporation
3 ;;
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
6 ;;
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
15 ;;
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;
27
28 ;;; Routine to do a 128 bit CBC AES encryption / CBC-MAC digest computation
29 ;;; processes 4 buffers at a time, single data structure as input
30 ;;; Updates In and Out pointers at end
31
32 %include "os.asm"
33 %include "mb_mgr_datastruct.asm"
34
35 %define MOVDQ movdqu ;; assume buffers not aligned
36 %macro pxor2 2
37 MOVDQ XTMP, %2
38 pxor %1, XTMP
39 %endm
40
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;; struct AES_ARGS_x8 {
43 ;; void* in[8];
44 ;; void* out[8];
45 ;; UINT128* keys[8];
46 ;; UINT128 IV[8];
47 ;; }
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 ;; void aes_cbc_enc_128_x4(AES_ARGS_x8 *args, UINT64 len);
50 ;; arg 1: ARG : addr of AES_ARGS_x8 structure
51 ;; arg 2: LEN : len (in units of bytes)
52
53 struc STACK
54 _gpr_save: resq 8
55 endstruc
56
57 %ifdef LINUX
58 %define arg1 rdi
59 %define arg2 rsi
60 %define arg3 rdx
61 %define arg4 rcx
62 %else
63 %define arg1 rcx
64 %define arg2 rdx
65 %define arg3 rdi ;r8
66 %define arg4 rsi ;r9
67 %endif
68
69 %define ARG arg1
70 %define LEN arg2
71
72 %define IDX rax
73
74 %define IN0 r8
75 %define KEYS0 rbx
76
77 %define IN1 r10
78 %define KEYS1 arg3
79
80 %define IN2 r12
81 %define KEYS2 arg4
82
83 %define IN3 r14
84 %define KEYS3 rbp
85
86 %ifndef CBC_MAC
87 ;; No cipher text write back for CBC-MAC
88 %define OUT0 r9
89 %define OUT1 r11
90 %define OUT2 r13
91 %define OUT3 r15
92 %endif
93
94 %define XDATA0 xmm0
95 %define XDATA1 xmm1
96 %define XDATA2 xmm2
97 %define XDATA3 xmm3
98
99 %define XKEY0_3 xmm4
100 %define XKEY0_6 [KEYS0 + 16*6]
101 %define XTMP xmm5
102 %define XKEY0_9 xmm6
103
104 %define XKEY1_3 xmm7
105 %define XKEY1_6 xmm8
106 %define XKEY1_9 xmm9
107
108 %define XKEY2_3 xmm10
109 %define XKEY2_6 xmm11
110 %define XKEY2_9 xmm12
111
112 %define XKEY3_3 xmm13
113 %define XKEY3_6 xmm14
114 %define XKEY3_9 xmm15
115
116 section .text
117
118 %ifdef CBC_MAC
119 MKGLOBAL(aes128_cbc_mac_x4,function,internal)
120 aes128_cbc_mac_x4:
121 %else
122 MKGLOBAL(aes_cbc_enc_128_x4,function,internal)
123 aes_cbc_enc_128_x4:
124 %endif
125 sub rsp, STACK_size
126 mov [rsp + _gpr_save + 8*0], rbp
127 %ifdef CBC_MAC
128 mov [rsp + _gpr_save + 8*1], rbx
129 mov [rsp + _gpr_save + 8*2], r12
130 mov [rsp + _gpr_save + 8*3], r13
131 mov [rsp + _gpr_save + 8*4], r14
132 mov [rsp + _gpr_save + 8*5], r15
133 %ifndef LINUX
134 mov [rsp + _gpr_save + 8*6], rsi
135 mov [rsp + _gpr_save + 8*7], rdi
136 %endif
137 %endif
138 mov IDX, 16
139
140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141
142 mov IN0, [ARG + _aesarg_in + 8*0]
143 mov IN1, [ARG + _aesarg_in + 8*1]
144 mov IN2, [ARG + _aesarg_in + 8*2]
145 mov IN3, [ARG + _aesarg_in + 8*3]
146
147 MOVDQ XDATA0, [IN0] ; load first block of plain text
148 MOVDQ XDATA1, [IN1] ; load first block of plain text
149 MOVDQ XDATA2, [IN2] ; load first block of plain text
150 MOVDQ XDATA3, [IN3] ; load first block of plain text
151
152 mov KEYS0, [ARG + _aesarg_keys + 8*0]
153 mov KEYS1, [ARG + _aesarg_keys + 8*1]
154 mov KEYS2, [ARG + _aesarg_keys + 8*2]
155 mov KEYS3, [ARG + _aesarg_keys + 8*3]
156
157 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
158 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
159 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
160 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
161
162 %ifndef CBC_MAC
163 mov OUT0, [ARG + _aesarg_out + 8*0]
164 mov OUT1, [ARG + _aesarg_out + 8*1]
165 mov OUT2, [ARG + _aesarg_out + 8*2]
166 mov OUT3, [ARG + _aesarg_out + 8*3]
167 %endif
168
169 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
170 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
171 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
172 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
173
174 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
175 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
176 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
177 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
178
179 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
180 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
181 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
182 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
183
184 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
185 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
186 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
187 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
188
189 aesenc XDATA0, XKEY0_3 ; 3. ENC
190 aesenc XDATA1, XKEY1_3 ; 3. ENC
191 aesenc XDATA2, XKEY2_3 ; 3. ENC
192 aesenc XDATA3, XKEY3_3 ; 3. ENC
193
194 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
195 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
196 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
197 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
198
199 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
200 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
201 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
202 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
203
204 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
205 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
206 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
207
208 aesenc XDATA0, XKEY0_6 ; 6. ENC
209 aesenc XDATA1, XKEY1_6 ; 6. ENC
210 aesenc XDATA2, XKEY2_6 ; 6. ENC
211 aesenc XDATA3, XKEY3_6 ; 6. ENC
212
213 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
214 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
215 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
216 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
217
218 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
219 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
220 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
221 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
222
223 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
224 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
225 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
226 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
227
228 aesenc XDATA0, XKEY0_9 ; 9. ENC
229 aesenc XDATA1, XKEY1_9 ; 9. ENC
230 aesenc XDATA2, XKEY2_9 ; 9. ENC
231 aesenc XDATA3, XKEY3_9 ; 9. ENC
232
233 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
234 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
235 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
236 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
237
238 %ifndef CBC_MAC
239 MOVDQ [OUT0], XDATA0 ; write back ciphertext
240 MOVDQ [OUT1], XDATA1 ; write back ciphertext
241 MOVDQ [OUT2], XDATA2 ; write back ciphertext
242 MOVDQ [OUT3], XDATA3 ; write back ciphertext
243 %endif
244 cmp LEN, IDX
245 je done
246
247 main_loop:
248 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
249 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
250 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
251 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
252
253 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
254 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
255 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
256 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
257
258 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
259 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
260 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
261 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
262
263 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
264 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
265 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
266 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
267
268 aesenc XDATA0, XKEY0_3 ; 3. ENC
269 aesenc XDATA1, XKEY1_3 ; 3. ENC
270 aesenc XDATA2, XKEY2_3 ; 3. ENC
271 aesenc XDATA3, XKEY3_3 ; 3. ENC
272
273 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
274 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
275 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
276 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
277
278 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
279 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
280 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
281 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
282
283 aesenc XDATA0, XKEY0_6 ; 6. ENC
284 aesenc XDATA1, XKEY1_6 ; 6. ENC
285 aesenc XDATA2, XKEY2_6 ; 6. ENC
286 aesenc XDATA3, XKEY3_6 ; 6. ENC
287
288 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
289 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
290 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
291 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
292
293 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
294 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
295 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
296 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
297
298 aesenc XDATA0, XKEY0_9 ; 9. ENC
299 aesenc XDATA1, XKEY1_9 ; 9. ENC
300 aesenc XDATA2, XKEY2_9 ; 9. ENC
301 aesenc XDATA3, XKEY3_9 ; 9. ENC
302
303 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
304 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
305 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
306 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
307
308 %ifndef CBC_MAC
309 ;; No cipher text write back for CBC-MAC
310 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
311 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertext
312 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertext
313 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertext
314 %endif
315
316 add IDX, 16
317 cmp LEN, IDX
318 jne main_loop
319
320 done:
321 ;; update IV / store digest for CBC-MAC
322 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
323 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
324 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
325 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
326
327 ;; update IN and OUT
328 add IN0, LEN
329 mov [ARG + _aesarg_in + 8*0], IN0
330 add IN1, LEN
331 mov [ARG + _aesarg_in + 8*1], IN1
332 add IN2, LEN
333 mov [ARG + _aesarg_in + 8*2], IN2
334 add IN3, LEN
335 mov [ARG + _aesarg_in + 8*3], IN3
336
337 %ifndef CBC_MAC
338 ;; No OUT pointer updates for CBC-MAC
339 add OUT0, LEN
340 mov [ARG + _aesarg_out + 8*0], OUT0
341 add OUT1, LEN
342 mov [ARG + _aesarg_out + 8*1], OUT1
343 add OUT2, LEN
344 mov [ARG + _aesarg_out + 8*2], OUT2
345 add OUT3, LEN
346 mov [ARG + _aesarg_out + 8*3], OUT3
347 %endif
348
349 %ifdef CBC_MAC
350 mov rbx, [rsp + _gpr_save + 8*1]
351 mov r12, [rsp + _gpr_save + 8*2]
352 mov r13, [rsp + _gpr_save + 8*3]
353 mov r14, [rsp + _gpr_save + 8*4]
354 mov r15, [rsp + _gpr_save + 8*5]
355 %ifndef LINUX
356 mov rsi, [rsp + _gpr_save + 8*6]
357 mov rdi, [rsp + _gpr_save + 8*7]
358 %endif
359 %endif
360 mov rbp, [rsp + _gpr_save + 8*0]
361 add rsp, STACK_size
362 ret
363
364 %ifdef LINUX
365 section .note.GNU-stack noalloc noexec nowrite progbits
366 %endif