]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/sse/aes_cbc_enc_192_x4.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / sse / aes_cbc_enc_192_x4.asm
1 ;;
2 ;; Copyright (c) 2012-2018, Intel Corporation
3 ;;
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
6 ;;
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
15 ;;
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;
27
28 ;;; routine to do a 192 bit CBC AES encrypt
29 ;;; process 4 buffers at a time, single data structure as input
30 ;;; Updates In and Out pointers at end
31
32 %include "include/os.asm"
33 %include "mb_mgr_datastruct.asm"
34
35 %define MOVDQ movdqu ;; assume buffers not aligned
36 %macro pxor2 2
37 MOVDQ XTMP, %2
38 pxor %1, XTMP
39 %endm
40
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;; struct AES_ARGS {
43 ;; void* in[8];
44 ;; void* out[8];
45 ;; UINT128* keys[8];
46 ;; UINT128 IV[8];
47 ;; }
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 ;; void aes_cbc_enc_192_x4(AES_ARGS *args, UINT64 len);
50 ;; arg 1: ARG : addr of AES_ARGS structure
51 ;; arg 2: LEN : len (in units of bytes)
52
53 %ifdef LINUX
54 %define ARG rdi
55 %define LEN rsi
56 %define REG3 rcx
57 %define REG4 rdx
58 %else
59 %define ARG rcx
60 %define LEN rdx
61 %define REG3 rsi
62 %define REG4 rdi
63 %endif
64
65 %define IDX rax
66
67 %define IN0 r8
68 %define KEYS0 rbx
69 %define OUT0 r9
70
71 %define IN1 r10
72 %define KEYS1 REG3
73 %define OUT1 r11
74
75 %define IN2 r12
76 %define KEYS2 REG4
77 %define OUT2 r13
78
79 %define IN3 r14
80 %define KEYS3 rbp
81 %define OUT3 r15
82
83
84 %define XDATA0 xmm0
85 %define XDATA1 xmm1
86 %define XDATA2 xmm2
87 %define XDATA3 xmm3
88
89 %define XKEY0_3 xmm4
90 %define XKEY0_6 [KEYS0 + 16*6]
91 %define XTMP xmm5
92 %define XKEY0_9 xmm6
93
94 %define XKEY1_3 xmm7
95 %define XKEY1_6 xmm8
96 %define XKEY1_9 xmm9
97
98 %define XKEY2_3 xmm10
99 %define XKEY2_6 xmm11
100 %define XKEY2_9 xmm12
101
102 %define XKEY3_3 xmm13
103 %define XKEY3_6 xmm14
104 %define XKEY3_9 xmm15
105
106 %ifndef AES_CBC_ENC_X4
107 %define AES_CBC_ENC_X4 aes_cbc_enc_192_x4
108 %endif
109
110 section .text
111
112 MKGLOBAL(AES_CBC_ENC_X4,function,internal)
113 AES_CBC_ENC_X4:
114
115 push rbp
116
117 mov IDX, 16
118
119 mov IN0, [ARG + _aesarg_in + 8*0]
120 mov IN1, [ARG + _aesarg_in + 8*1]
121 mov IN2, [ARG + _aesarg_in + 8*2]
122 mov IN3, [ARG + _aesarg_in + 8*3]
123
124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
125
126 MOVDQ XDATA0, [IN0] ; load first block of plain text
127 MOVDQ XDATA1, [IN1] ; load first block of plain text
128 MOVDQ XDATA2, [IN2] ; load first block of plain text
129 MOVDQ XDATA3, [IN3] ; load first block of plain text
130
131 mov KEYS0, [ARG + _aesarg_keys + 8*0]
132 mov KEYS1, [ARG + _aesarg_keys + 8*1]
133 mov KEYS2, [ARG + _aesarg_keys + 8*2]
134 mov KEYS3, [ARG + _aesarg_keys + 8*3]
135
136 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
137 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
138 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
139 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
140
141 mov OUT0, [ARG + _aesarg_out + 8*0]
142 mov OUT1, [ARG + _aesarg_out + 8*1]
143 mov OUT2, [ARG + _aesarg_out + 8*2]
144 mov OUT3, [ARG + _aesarg_out + 8*3]
145
146 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
147 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
148 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
149 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
150
151 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
152 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
153 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
154 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
155
156 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
157 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
158 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
159 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
160
161 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
162 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
163 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
164 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
165
166 aesenc XDATA0, XKEY0_3 ; 3. ENC
167 aesenc XDATA1, XKEY1_3 ; 3. ENC
168 aesenc XDATA2, XKEY2_3 ; 3. ENC
169 aesenc XDATA3, XKEY3_3 ; 3. ENC
170
171 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
172 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
173 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
174 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
175
176 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
177 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
178 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
179 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
180
181 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
182 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
183 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
184
185 aesenc XDATA0, XKEY0_6 ; 6. ENC
186 aesenc XDATA1, XKEY1_6 ; 6. ENC
187 aesenc XDATA2, XKEY2_6 ; 6. ENC
188 aesenc XDATA3, XKEY3_6 ; 6. ENC
189
190 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
191 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
192 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
193 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
194
195 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
196 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
197 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
198 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
199
200 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
201 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
202 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
203 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
204
205 aesenc XDATA0, XKEY0_9 ; 9. ENC
206 aesenc XDATA1, XKEY1_9 ; 9. ENC
207 aesenc XDATA2, XKEY2_9 ; 9. ENC
208 aesenc XDATA3, XKEY3_9 ; 9. ENC
209
210 aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC
211 aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC
212 aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC
213 aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC
214
215 aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC
216 aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC
217 aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC
218 aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC
219
220 aesenclast XDATA0, [KEYS0 + 16*12] ; 12. ENC
221 aesenclast XDATA1, [KEYS1 + 16*12] ; 12. ENC
222 aesenclast XDATA2, [KEYS2 + 16*12] ; 12. ENC
223 aesenclast XDATA3, [KEYS3 + 16*12] ; 12. ENC
224
225 MOVDQ [OUT0], XDATA0 ; write back ciphertext
226 MOVDQ [OUT1], XDATA1 ; write back ciphertext
227 MOVDQ [OUT2], XDATA2 ; write back ciphertext
228 MOVDQ [OUT3], XDATA3 ; write back ciphertext
229
230 cmp LEN, IDX
231 je done
232
233 main_loop:
234 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
235 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
236 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
237 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
238
239
240 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
241 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
242 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
243 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
244
245 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
246 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
247 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
248 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
249
250 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
251 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
252 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
253 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
254
255 aesenc XDATA0, XKEY0_3 ; 3. ENC
256 aesenc XDATA1, XKEY1_3 ; 3. ENC
257 aesenc XDATA2, XKEY2_3 ; 3. ENC
258 aesenc XDATA3, XKEY3_3 ; 3. ENC
259
260 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
261 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
262 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
263 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
264
265 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
266 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
267 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
268 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
269
270 aesenc XDATA0, XKEY0_6 ; 6. ENC
271 aesenc XDATA1, XKEY1_6 ; 6. ENC
272 aesenc XDATA2, XKEY2_6 ; 6. ENC
273 aesenc XDATA3, XKEY3_6 ; 6. ENC
274
275 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
276 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
277 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
278 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
279
280 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
281 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
282 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
283 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
284
285 aesenc XDATA0, XKEY0_9 ; 9. ENC
286 aesenc XDATA1, XKEY1_9 ; 9. ENC
287 aesenc XDATA2, XKEY2_9 ; 9. ENC
288 aesenc XDATA3, XKEY3_9 ; 9. ENC
289
290 aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC
291 aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC
292 aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC
293 aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC
294
295 aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC
296 aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC
297 aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC
298 aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC
299
300 aesenclast XDATA0, [KEYS0 + 16*12] ; 12. ENC
301 aesenclast XDATA1, [KEYS1 + 16*12] ; 12. ENC
302 aesenclast XDATA2, [KEYS2 + 16*12] ; 12. ENC
303 aesenclast XDATA3, [KEYS3 + 16*12] ; 12. ENC
304
305
306
307 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
308 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertex
309 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertex
310 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertex
311
312
313 add IDX, 16
314 cmp LEN, IDX
315 jne main_loop
316
317 done:
318 ;; update IV
319 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
320 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
321 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
322 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
323
324 ;; update IN and OUT
325 add IN0, LEN
326 mov [ARG + _aesarg_in + 8*0], IN0
327 add IN1, LEN
328 mov [ARG + _aesarg_in + 8*1], IN1
329 add IN2, LEN
330 mov [ARG + _aesarg_in + 8*2], IN2
331 add IN3, LEN
332 mov [ARG + _aesarg_in + 8*3], IN3
333
334 add OUT0, LEN
335 mov [ARG + _aesarg_out + 8*0], OUT0
336 add OUT1, LEN
337 mov [ARG + _aesarg_out + 8*1], OUT1
338 add OUT2, LEN
339 mov [ARG + _aesarg_out + 8*2], OUT2
340 add OUT3, LEN
341 mov [ARG + _aesarg_out + 8*3], OUT3
342
343 pop rbp
344
345 ret
346
347 %ifdef LINUX
348 section .note.GNU-stack noalloc noexec nowrite progbits
349 %endif