]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/intel-ipsec-mb/avx/aes256_cbc_dec_by8_avx.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / avx / aes256_cbc_dec_by8_avx.asm
CommitLineData
11fdf7f2
TL
1;;
2;; Copyright (c) 2012-2018, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;; * Redistributions of source code must retain the above copyright notice,
8;; this list of conditions and the following disclaimer.
9;; * Redistributions in binary form must reproduce the above copyright
10;; notice, this list of conditions and the following disclaimer in the
11;; documentation and/or other materials provided with the distribution.
12;; * Neither the name of Intel Corporation nor the names of its contributors
13;; may be used to endorse or promote products derived from this software
14;; without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28; routine to do AES256 CBC decrypt "by8"
29
30; XMM registers are clobbered. Saving/restoring must be done at a higher level
f67539c2 31%include "include/os.asm"
11fdf7f2
TL
32
33%define CONCAT(a,b) a %+ b
34%define VMOVDQ vmovdqu
35
36%define xdata0 xmm0
37%define xdata1 xmm1
38%define xdata2 xmm2
39%define xdata3 xmm3
40%define xdata4 xmm4
41%define xdata5 xmm5
42%define xdata6 xmm6
43%define xdata7 xmm7
44%define xIV xmm8
45%define xkey0 xmm9
46%define xkey3 xmm10
47%define xkey6 xmm11
48%define xkey9 xmm12
49%define xkey12 xmm13
50%define xkeyA xmm14
51%define xkeyB xmm15
52
53%ifdef LINUX
54%define p_in rdi
55%define p_IV rsi
56%define p_keys rdx
57%define p_out rcx
58%define num_bytes r8
59%else
60%define p_in rcx
61%define p_IV rdx
62%define p_keys r8
63%define p_out r9
64%define num_bytes rax
65%endif
66
67%define tmp r10
68
69%macro do_aes_load 1
70 do_aes %1, 1
71%endmacro
72
73%macro do_aes_noload 1
74 do_aes %1, 0
75%endmacro
76
77; do_aes num_in_par load_keys
78; This increments p_in, but not p_out
79%macro do_aes 2
80%define %%by %1
81%define %%load_keys %2
82
83%if (%%load_keys)
84 vmovdqa xkey0, [p_keys + 0*16]
85%endif
86
87%assign i 0
88%rep %%by
89 VMOVDQ CONCAT(xdata,i), [p_in + i*16]
90%assign i (i+1)
91%endrep
92
93 vmovdqa xkeyA, [p_keys + 1*16]
94
95%assign i 0
96%rep %%by
97 vpxor CONCAT(xdata,i), CONCAT(xdata,i), xkey0
98%assign i (i+1)
99%endrep
100
101 vmovdqa xkeyB, [p_keys + 2*16]
102
103%assign i 0
104%rep %%by
105 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyA
106%assign i (i+1)
107%endrep
108
109 add p_in, 16*%%by
110
111%if (%%load_keys)
112 vmovdqa xkey3, [p_keys + 3*16]
113%endif
114
115%assign i 0
116%rep %%by
117 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyB
118%assign i (i+1)
119%endrep
120
121 vmovdqa xkeyA, [p_keys + 4*16]
122
123%assign i 0
124%rep %%by
125 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkey3
126%assign i (i+1)
127%endrep
128
129 vmovdqa xkeyB, [p_keys + 5*16]
130
131%assign i 0
132%rep %%by
133 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyA
134%assign i (i+1)
135%endrep
136
137%if (%%load_keys)
138 vmovdqa xkey6, [p_keys + 6*16]
139%endif
140
141%assign i 0
142%rep %%by
143 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyB
144%assign i (i+1)
145%endrep
146
147 vmovdqa xkeyA, [p_keys + 7*16]
148
149%assign i 0
150%rep %%by
151 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkey6
152%assign i (i+1)
153%endrep
154
155 vmovdqa xkeyB, [p_keys + 8*16]
156
157%assign i 0
158%rep %%by
159 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyA
160%assign i (i+1)
161%endrep
162
163%if (%%load_keys)
164 vmovdqa xkey9, [p_keys + 9*16]
165%endif
166
167%assign i 0
168%rep %%by
169 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyB
170%assign i (i+1)
171%endrep
172
173 vmovdqa xkeyA, [p_keys + 10*16]
174
175%assign i 0
176%rep %%by
177 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkey9
178%assign i (i+1)
179%endrep
180
181 vmovdqa xkeyB, [p_keys + 11*16]
182
183%assign i 0
184%rep %%by
185 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyA
186%assign i (i+1)
187%endrep
188
189%if (%%load_keys)
190 vmovdqa xkey12, [p_keys + 12*16]
191%endif
192
193%assign i 0
194%rep %%by
195 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyB
196%assign i (i+1)
197%endrep
198
199 vmovdqa xkeyA, [p_keys + 13*16]
200
201%assign i 0
202%rep %%by
203 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkey12
204%assign i (i+1)
205%endrep
206
207 vmovdqa xkeyB, [p_keys + 14*16]
208
209%assign i 0
210%rep %%by
211 vaesdec CONCAT(xdata,i), CONCAT(xdata,i), xkeyA
212%assign i (i+1)
213%endrep
214
215%assign i 0
216%rep %%by
217 vaesdeclast CONCAT(xdata,i), CONCAT(xdata,i), xkeyB
218%assign i (i+1)
219%endrep
220
221 vpxor xdata0, xdata0, xIV
222%assign i 1
223%if (%%by > 1)
224%rep (%%by - 1)
225 VMOVDQ xIV, [p_in + (i-1)*16 - 16*%%by]
226 vpxor CONCAT(xdata,i), CONCAT(xdata,i), xIV
227%assign i (i+1)
228%endrep
229%endif
230 VMOVDQ xIV, [p_in + (i-1)*16 - 16*%%by]
231
232%assign i 0
233%rep %%by
234 VMOVDQ [p_out + i*16], CONCAT(xdata,i)
235%assign i (i+1)
236%endrep
237%endmacro
238
239;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
240;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
241;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
242
243section .text
244
245;; aes_cbc_dec_256_avx(void *in, void *IV, void *keys, void *out, UINT64 num_bytes)
246MKGLOBAL(aes_cbc_dec_256_avx,function,internal)
247aes_cbc_dec_256_avx:
248
249%ifndef LINUX
250 mov num_bytes, [rsp + 8*5]
251%endif
252
253 vmovdqu xIV, [p_IV]
254
255 mov tmp, num_bytes
256 and tmp, 7*16
257 jz mult_of_8_blks
258
259 ; 1 <= tmp <= 7
260 cmp tmp, 4*16
261 jg gt4
262 je eq4
263
264lt4:
265 cmp tmp, 2*16
266 jg eq3
267 je eq2
268eq1:
269 do_aes_load 1
270 add p_out, 1*16
271 and num_bytes, ~7*16
272 jz do_return2
273 jmp main_loop2
274
275eq2:
276 do_aes_load 2
277 add p_out, 2*16
278 and num_bytes, ~7*16
279 jz do_return2
280 jmp main_loop2
281
282eq3:
283 do_aes_load 3
284 add p_out, 3*16
285 and num_bytes, ~7*16
286 jz do_return2
287 jmp main_loop2
288
289eq4:
290 do_aes_load 4
291 add p_out, 4*16
292 and num_bytes, ~7*16
293 jz do_return2
294 jmp main_loop2
295
296gt4:
297 cmp tmp, 6*16
298 jg eq7
299 je eq6
300
301eq5:
302 do_aes_load 5
303 add p_out, 5*16
304 and num_bytes, ~7*16
305 jz do_return2
306 jmp main_loop2
307
308eq6:
309 do_aes_load 6
310 add p_out, 6*16
311 and num_bytes, ~7*16
312 jz do_return2
313 jmp main_loop2
314
315eq7:
316 do_aes_load 7
317 add p_out, 7*16
318 and num_bytes, ~7*16
319 jz do_return2
320 jmp main_loop2
321
322mult_of_8_blks:
323 vmovdqa xkey0, [p_keys + 0*16]
324 vmovdqa xkey3, [p_keys + 3*16]
325 vmovdqa xkey6, [p_keys + 6*16]
326 vmovdqa xkey9, [p_keys + 9*16]
327 vmovdqa xkey12, [p_keys + 12*16]
328
329main_loop2:
330 ; num_bytes is a multiple of 8 and >0
331 do_aes_noload 8
332 add p_out, 8*16
333 sub num_bytes, 8*16
334 jne main_loop2
335
336do_return2:
337; Don't write back IV
338; vmovdqu [p_IV], xIV
339
340 ret
341
342%ifdef LINUX
343section .note.GNU-stack noalloc noexec nowrite progbits
344%endif