]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/intel-ipsec-mb/sse/aes_cbc_enc_256_x4.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / sse / aes_cbc_enc_256_x4.asm
CommitLineData
11fdf7f2
TL
1;;
2;; Copyright (c) 2012-2018, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;; * Redistributions of source code must retain the above copyright notice,
8;; this list of conditions and the following disclaimer.
9;; * Redistributions in binary form must reproduce the above copyright
10;; notice, this list of conditions and the following disclaimer in the
11;; documentation and/or other materials provided with the distribution.
12;; * Neither the name of Intel Corporation nor the names of its contributors
13;; may be used to endorse or promote products derived from this software
14;; without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28;;; routine to do a 256 bit CBC AES encrypt
29;;; process 4 buffers at a time, single data structure as input
30;;; Updates In and Out pointers at end
31
f67539c2 32%include "include/os.asm"
11fdf7f2
TL
33%include "mb_mgr_datastruct.asm"
34
35%define MOVDQ movdqu ;; assume buffers not aligned
36%macro pxor2 2
37 MOVDQ XTMP, %2
38 pxor %1, XTMP
39%endm
40
41;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f67539c2 42;; struct AES_ARGS {
11fdf7f2
TL
43;; void* in[8];
44;; void* out[8];
45;; UINT128* keys[8];
46;; UINT128 IV[8];
47;; }
48;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f67539c2
TL
49;; void aes_cbc_enc_256_x4(AES_ARGS *args, UINT64 len);
50;; arg 1: ARG : addr of AES_ARGS structure
11fdf7f2
TL
51;; arg 2: LEN : len (in units of bytes)
52
53%ifdef LINUX
54%define ARG rdi
55%define LEN rsi
56%define REG3 rcx
57%define REG4 rdx
58%else
59%define ARG rcx
60%define LEN rdx
61%define REG3 rsi
62%define REG4 rdi
63%endif
64
65%define IDX rax
66
67%define IN0 r8
68%define KEYS0 rbx
69%define OUT0 r9
70
71%define IN1 r10
72%define KEYS1 REG3
73%define OUT1 r11
74
75%define IN2 r12
76%define KEYS2 REG4
77%define OUT2 r13
78
79%define IN3 r14
80%define KEYS3 rbp
81%define OUT3 r15
82
83
84%define XDATA0 xmm0
85%define XDATA1 xmm1
86%define XDATA2 xmm2
87%define XDATA3 xmm3
88
89%define XKEY0_3 xmm4
90%define XKEY0_6 [KEYS0 + 16*6]
91%define XTMP xmm5
92%define XKEY0_9 xmm6
93
94%define XKEY1_3 xmm7
95%define XKEY1_6 xmm8
96%define XKEY1_9 xmm9
97
98%define XKEY2_3 xmm10
99%define XKEY2_6 xmm11
100%define XKEY2_9 xmm12
101
102%define XKEY3_3 xmm13
103%define XKEY3_6 xmm14
104%define XKEY3_9 xmm15
105
9f95a23c
TL
106%ifndef AES_CBC_ENC_X4
107%define AES_CBC_ENC_X4 aes_cbc_enc_256_x4
108%endif
109
11fdf7f2
TL
110section .text
111
9f95a23c
TL
112MKGLOBAL(AES_CBC_ENC_X4,function,internal)
113AES_CBC_ENC_X4:
11fdf7f2
TL
114
115 push rbp
116
117 mov IDX, 16
118
119 mov IN0, [ARG + _aesarg_in + 8*0]
120 mov IN1, [ARG + _aesarg_in + 8*1]
121 mov IN2, [ARG + _aesarg_in + 8*2]
122 mov IN3, [ARG + _aesarg_in + 8*3]
123
124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
125
126 MOVDQ XDATA0, [IN0] ; load first block of plain text
127 MOVDQ XDATA1, [IN1] ; load first block of plain text
128 MOVDQ XDATA2, [IN2] ; load first block of plain text
129 MOVDQ XDATA3, [IN3] ; load first block of plain text
130
131 mov KEYS0, [ARG + _aesarg_keys + 8*0]
132 mov KEYS1, [ARG + _aesarg_keys + 8*1]
133 mov KEYS2, [ARG + _aesarg_keys + 8*2]
134 mov KEYS3, [ARG + _aesarg_keys + 8*3]
135
136 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
137 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
138 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
139 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
140
141 mov OUT0, [ARG + _aesarg_out + 8*0]
142 mov OUT1, [ARG + _aesarg_out + 8*1]
143 mov OUT2, [ARG + _aesarg_out + 8*2]
144 mov OUT3, [ARG + _aesarg_out + 8*3]
145
146 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
147 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
148 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
149 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
150
151 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
152 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
153 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
154 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
155
156 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
157 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
158 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
159 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
160
161 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
162 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
163 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
164 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
165
166 aesenc XDATA0, XKEY0_3 ; 3. ENC
167 aesenc XDATA1, XKEY1_3 ; 3. ENC
168 aesenc XDATA2, XKEY2_3 ; 3. ENC
169 aesenc XDATA3, XKEY3_3 ; 3. ENC
170
171 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
172 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
173 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
174 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
175
176 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
177 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
178 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
179 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
180
181 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
182 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
183 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
184
185 aesenc XDATA0, XKEY0_6 ; 6. ENC
186 aesenc XDATA1, XKEY1_6 ; 6. ENC
187 aesenc XDATA2, XKEY2_6 ; 6. ENC
188 aesenc XDATA3, XKEY3_6 ; 6. ENC
189
190 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
191 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
192 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
193 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
194
195 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
196 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
197 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
198 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
199
200 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
201 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
202 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
203 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
204
205 aesenc XDATA0, XKEY0_9 ; 9. ENC
206 aesenc XDATA1, XKEY1_9 ; 9. ENC
207 aesenc XDATA2, XKEY2_9 ; 9. ENC
208 aesenc XDATA3, XKEY3_9 ; 9. ENC
209
210 aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC
211 aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC
212 aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC
213 aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC
214
215 aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC
216 aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC
217 aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC
218 aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC
219
220 aesenc XDATA0, [KEYS0 + 16*12] ; 12. ENC
221 aesenc XDATA1, [KEYS1 + 16*12] ; 12. ENC
222 aesenc XDATA2, [KEYS2 + 16*12] ; 12. ENC
223 aesenc XDATA3, [KEYS3 + 16*12] ; 12. ENC
224
225 aesenc XDATA0, [KEYS0 + 16*13] ; 13. ENC
226 aesenc XDATA1, [KEYS1 + 16*13] ; 13. ENC
227 aesenc XDATA2, [KEYS2 + 16*13] ; 13. ENC
228 aesenc XDATA3, [KEYS3 + 16*13] ; 13. ENC
229
230 aesenclast XDATA0, [KEYS0 + 16*14] ; 14. ENC
231 aesenclast XDATA1, [KEYS1 + 16*14] ; 14. ENC
232 aesenclast XDATA2, [KEYS2 + 16*14] ; 14. ENC
233 aesenclast XDATA3, [KEYS3 + 16*14] ; 14. ENC
234
235 MOVDQ [OUT0], XDATA0 ; write back ciphertext
236 MOVDQ [OUT1], XDATA1 ; write back ciphertext
237 MOVDQ [OUT2], XDATA2 ; write back ciphertext
238 MOVDQ [OUT3], XDATA3 ; write back ciphertext
239
240 cmp LEN, IDX
241 je done
242
243main_loop:
244 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
245 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
246 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
247 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
248
249
250 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
251 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
252 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
253 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
254
255 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
256 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
257 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
258 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
259
260 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
261 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
262 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
263 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
264
265 aesenc XDATA0, XKEY0_3 ; 3. ENC
266 aesenc XDATA1, XKEY1_3 ; 3. ENC
267 aesenc XDATA2, XKEY2_3 ; 3. ENC
268 aesenc XDATA3, XKEY3_3 ; 3. ENC
269
270 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
271 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
272 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
273 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
274
275 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
276 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
277 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
278 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
279
280 aesenc XDATA0, XKEY0_6 ; 6. ENC
281 aesenc XDATA1, XKEY1_6 ; 6. ENC
282 aesenc XDATA2, XKEY2_6 ; 6. ENC
283 aesenc XDATA3, XKEY3_6 ; 6. ENC
284
285 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
286 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
287 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
288 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
289
290 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
291 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
292 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
293 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
294
295 aesenc XDATA0, XKEY0_9 ; 9. ENC
296 aesenc XDATA1, XKEY1_9 ; 9. ENC
297 aesenc XDATA2, XKEY2_9 ; 9. ENC
298 aesenc XDATA3, XKEY3_9 ; 9. ENC
299
300 aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC
301 aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC
302 aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC
303 aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC
304
305 aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC
306 aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC
307 aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC
308 aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC
309
310 aesenc XDATA0, [KEYS0 + 16*12] ; 12. ENC
311 aesenc XDATA1, [KEYS1 + 16*12] ; 12. ENC
312 aesenc XDATA2, [KEYS2 + 16*12] ; 12. ENC
313 aesenc XDATA3, [KEYS3 + 16*12] ; 12. ENC
314
315 aesenc XDATA0, [KEYS0 + 16*13] ; 13. ENC
316 aesenc XDATA1, [KEYS1 + 16*13] ; 13. ENC
317 aesenc XDATA2, [KEYS2 + 16*13] ; 13. ENC
318 aesenc XDATA3, [KEYS3 + 16*13] ; 13. ENC
319
320 aesenclast XDATA0, [KEYS0 + 16*14] ; 14. ENC
321 aesenclast XDATA1, [KEYS1 + 16*14] ; 14. ENC
322 aesenclast XDATA2, [KEYS2 + 16*14] ; 14. ENC
323 aesenclast XDATA3, [KEYS3 + 16*14] ; 14. ENC
324
325
326 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
327 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertex
328 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertex
329 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertex
330
331
332 add IDX, 16
333 cmp LEN, IDX
334 jne main_loop
335
336done:
337 ;; update IV
338 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
339 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
340 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
341 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
342
343 ;; update IN and OUT
344 add IN0, LEN
345 mov [ARG + _aesarg_in + 8*0], IN0
346 add IN1, LEN
347 mov [ARG + _aesarg_in + 8*1], IN1
348 add IN2, LEN
349 mov [ARG + _aesarg_in + 8*2], IN2
350 add IN3, LEN
351 mov [ARG + _aesarg_in + 8*3], IN3
352
353 add OUT0, LEN
354 mov [ARG + _aesarg_out + 8*0], OUT0
355 add OUT1, LEN
356 mov [ARG + _aesarg_out + 8*1], OUT1
357 add OUT2, LEN
358 mov [ARG + _aesarg_out + 8*2], OUT2
359 add OUT3, LEN
360 mov [ARG + _aesarg_out + 8*3], OUT3
361
362 pop rbp
363
364 ret
365
366%ifdef LINUX
367section .note.GNU-stack noalloc noexec nowrite progbits
368%endif