]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/sse/aes_cbc_enc_128_x4.asm
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / sse / aes_cbc_enc_128_x4.asm
1 ;;
2 ;; Copyright (c) 2012-2018, Intel Corporation
3 ;;
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
6 ;;
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
15 ;;
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;
27
28 ;;; Routine to do a 128 bit CBC AES encryption / CBC-MAC digest computation
29 ;;; processes 4 buffers at a time, single data structure as input
30 ;;; Updates In and Out pointers at end
31
32 %include "os.asm"
33 %include "mb_mgr_datastruct.asm"
34
35 %define MOVDQ movdqu ;; assume buffers not aligned
36 %macro pxor2 2
37 MOVDQ XTMP, %2
38 pxor %1, XTMP
39 %endm
40
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;; struct AES_ARGS_x8 {
43 ;; void* in[8];
44 ;; void* out[8];
45 ;; UINT128* keys[8];
46 ;; UINT128 IV[8];
47 ;; }
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 ;; void aes_cbc_enc_128_x4(AES_ARGS_x8 *args, UINT64 len);
50 ;; arg 1: ARG : addr of AES_ARGS_x8 structure
51 ;; arg 2: LEN : len (in units of bytes)
52
53 struc STACK
54 _gpr_save: resq 8
55 endstruc
56
57 %ifdef LINUX
58 %define arg1 rdi
59 %define arg2 rsi
60 %define arg3 rdx
61 %define arg4 rcx
62 %else
63 %define arg1 rcx
64 %define arg2 rdx
65 %define arg3 rdi ;r8
66 %define arg4 rsi ;r9
67 %endif
68
69 %define ARG arg1
70 %define LEN arg2
71
72 %define IDX rax
73
74 %define IN0 r8
75 %define KEYS0 rbx
76
77 %define IN1 r10
78 %define KEYS1 arg3
79
80 %define IN2 r12
81 %define KEYS2 arg4
82
83 %define IN3 r14
84 %define KEYS3 rbp
85
86 %ifndef CBC_MAC
87 ;; No cipher text write back for CBC-MAC
88 %define OUT0 r9
89 %define OUT1 r11
90 %define OUT2 r13
91 %define OUT3 r15
92 %endif
93
94 %define XDATA0 xmm0
95 %define XDATA1 xmm1
96 %define XDATA2 xmm2
97 %define XDATA3 xmm3
98
99 %define XKEY0_3 xmm4
100 %define XKEY0_6 [KEYS0 + 16*6]
101 %define XTMP xmm5
102 %define XKEY0_9 xmm6
103
104 %define XKEY1_3 xmm7
105 %define XKEY1_6 xmm8
106 %define XKEY1_9 xmm9
107
108 %define XKEY2_3 xmm10
109 %define XKEY2_6 xmm11
110 %define XKEY2_9 xmm12
111
112 %define XKEY3_3 xmm13
113 %define XKEY3_6 xmm14
114 %define XKEY3_9 xmm15
115
116 section .text
117
118 %ifndef AES_CBC_ENC_X4
119
120 %ifdef CBC_MAC
121 MKGLOBAL(aes128_cbc_mac_x4,function,internal)
122 aes128_cbc_mac_x4:
123 %else
124 MKGLOBAL(aes_cbc_enc_128_x4,function,internal)
125 aes_cbc_enc_128_x4:
126 %endif
127
128 %else ;; AES_CBC_ENC_X4 already defined
129
130 %ifdef CBC_MAC
131 MKGLOBAL(aes128_cbc_mac_x4_no_aesni,function,internal)
132 aes128_cbc_mac_x4_no_aesni:
133 %else
134 MKGLOBAL(aes_cbc_enc_128_x4_no_aesni,function,internal)
135 aes_cbc_enc_128_x4_no_aesni:
136 %endif
137
138 %endif
139 sub rsp, STACK_size
140 mov [rsp + _gpr_save + 8*0], rbp
141 %ifdef CBC_MAC
142 mov [rsp + _gpr_save + 8*1], rbx
143 mov [rsp + _gpr_save + 8*2], r12
144 mov [rsp + _gpr_save + 8*3], r13
145 mov [rsp + _gpr_save + 8*4], r14
146 mov [rsp + _gpr_save + 8*5], r15
147 %ifndef LINUX
148 mov [rsp + _gpr_save + 8*6], rsi
149 mov [rsp + _gpr_save + 8*7], rdi
150 %endif
151 %endif
152 mov IDX, 16
153
154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
155
156 mov IN0, [ARG + _aesarg_in + 8*0]
157 mov IN1, [ARG + _aesarg_in + 8*1]
158 mov IN2, [ARG + _aesarg_in + 8*2]
159 mov IN3, [ARG + _aesarg_in + 8*3]
160
161 MOVDQ XDATA0, [IN0] ; load first block of plain text
162 MOVDQ XDATA1, [IN1] ; load first block of plain text
163 MOVDQ XDATA2, [IN2] ; load first block of plain text
164 MOVDQ XDATA3, [IN3] ; load first block of plain text
165
166 mov KEYS0, [ARG + _aesarg_keys + 8*0]
167 mov KEYS1, [ARG + _aesarg_keys + 8*1]
168 mov KEYS2, [ARG + _aesarg_keys + 8*2]
169 mov KEYS3, [ARG + _aesarg_keys + 8*3]
170
171 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
172 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
173 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
174 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
175
176 %ifndef CBC_MAC
177 mov OUT0, [ARG + _aesarg_out + 8*0]
178 mov OUT1, [ARG + _aesarg_out + 8*1]
179 mov OUT2, [ARG + _aesarg_out + 8*2]
180 mov OUT3, [ARG + _aesarg_out + 8*3]
181 %endif
182
183 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
184 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
185 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
186 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
187
188 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
189 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
190 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
191 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
192
193 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
194 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
195 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
196 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
197
198 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
199 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
200 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
201 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
202
203 aesenc XDATA0, XKEY0_3 ; 3. ENC
204 aesenc XDATA1, XKEY1_3 ; 3. ENC
205 aesenc XDATA2, XKEY2_3 ; 3. ENC
206 aesenc XDATA3, XKEY3_3 ; 3. ENC
207
208 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
209 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
210 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
211 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
212
213 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
214 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
215 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
216 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
217
218 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
219 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
220 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
221
222 aesenc XDATA0, XKEY0_6 ; 6. ENC
223 aesenc XDATA1, XKEY1_6 ; 6. ENC
224 aesenc XDATA2, XKEY2_6 ; 6. ENC
225 aesenc XDATA3, XKEY3_6 ; 6. ENC
226
227 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
228 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
229 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
230 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
231
232 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
233 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
234 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
235 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
236
237 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
238 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
239 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
240 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
241
242 aesenc XDATA0, XKEY0_9 ; 9. ENC
243 aesenc XDATA1, XKEY1_9 ; 9. ENC
244 aesenc XDATA2, XKEY2_9 ; 9. ENC
245 aesenc XDATA3, XKEY3_9 ; 9. ENC
246
247 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
248 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
249 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
250 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
251
252 %ifndef CBC_MAC
253 MOVDQ [OUT0], XDATA0 ; write back ciphertext
254 MOVDQ [OUT1], XDATA1 ; write back ciphertext
255 MOVDQ [OUT2], XDATA2 ; write back ciphertext
256 MOVDQ [OUT3], XDATA3 ; write back ciphertext
257 %endif
258 cmp LEN, IDX
259 je done
260
261 main_loop:
262 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
263 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
264 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
265 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
266
267 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
268 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
269 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
270 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
271
272 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
273 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
274 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
275 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
276
277 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
278 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
279 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
280 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
281
282 aesenc XDATA0, XKEY0_3 ; 3. ENC
283 aesenc XDATA1, XKEY1_3 ; 3. ENC
284 aesenc XDATA2, XKEY2_3 ; 3. ENC
285 aesenc XDATA3, XKEY3_3 ; 3. ENC
286
287 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
288 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
289 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
290 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
291
292 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
293 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
294 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
295 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
296
297 aesenc XDATA0, XKEY0_6 ; 6. ENC
298 aesenc XDATA1, XKEY1_6 ; 6. ENC
299 aesenc XDATA2, XKEY2_6 ; 6. ENC
300 aesenc XDATA3, XKEY3_6 ; 6. ENC
301
302 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
303 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
304 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
305 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
306
307 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
308 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
309 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
310 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
311
312 aesenc XDATA0, XKEY0_9 ; 9. ENC
313 aesenc XDATA1, XKEY1_9 ; 9. ENC
314 aesenc XDATA2, XKEY2_9 ; 9. ENC
315 aesenc XDATA3, XKEY3_9 ; 9. ENC
316
317 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
318 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
319 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
320 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
321
322 %ifndef CBC_MAC
323 ;; No cipher text write back for CBC-MAC
324 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
325 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertext
326 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertext
327 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertext
328 %endif
329
330 add IDX, 16
331 cmp LEN, IDX
332 jne main_loop
333
334 done:
335 ;; update IV / store digest for CBC-MAC
336 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
337 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
338 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
339 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
340
341 ;; update IN and OUT
342 add IN0, LEN
343 mov [ARG + _aesarg_in + 8*0], IN0
344 add IN1, LEN
345 mov [ARG + _aesarg_in + 8*1], IN1
346 add IN2, LEN
347 mov [ARG + _aesarg_in + 8*2], IN2
348 add IN3, LEN
349 mov [ARG + _aesarg_in + 8*3], IN3
350
351 %ifndef CBC_MAC
352 ;; No OUT pointer updates for CBC-MAC
353 add OUT0, LEN
354 mov [ARG + _aesarg_out + 8*0], OUT0
355 add OUT1, LEN
356 mov [ARG + _aesarg_out + 8*1], OUT1
357 add OUT2, LEN
358 mov [ARG + _aesarg_out + 8*2], OUT2
359 add OUT3, LEN
360 mov [ARG + _aesarg_out + 8*3], OUT3
361 %endif
362
363 %ifdef CBC_MAC
364 mov rbx, [rsp + _gpr_save + 8*1]
365 mov r12, [rsp + _gpr_save + 8*2]
366 mov r13, [rsp + _gpr_save + 8*3]
367 mov r14, [rsp + _gpr_save + 8*4]
368 mov r15, [rsp + _gpr_save + 8*5]
369 %ifndef LINUX
370 mov rsi, [rsp + _gpr_save + 8*6]
371 mov rdi, [rsp + _gpr_save + 8*7]
372 %endif
373 %endif
374 mov rbp, [rsp + _gpr_save + 8*0]
375 add rsp, STACK_size
376 ret
377
378 %ifdef LINUX
379 section .note.GNU-stack noalloc noexec nowrite progbits
380 %endif