]>
Commit | Line | Data |
---|---|---|
31b160f0 AF |
1 | # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. |
2 | # | |
3 | # Licensed under the Apache License 2.0 (the "License"). You may not use | |
4 | # this file except in compliance with the License. You can obtain a copy | |
5 | # in the file LICENSE in the source distribution or at | |
6 | # https://www.openssl.org/source/license.html | |
7 | ||
8 | # | |
9 | # ==================================================================== | |
10 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
11 | # project. The module is, however, dual licensed under OpenSSL and | |
12 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
13 | # details see http://www.openssl.org/~appro/cryptogams/. | |
14 | # ==================================================================== | |
15 | # | |
16 | # | |
17 | # AES-NI-CTR+GHASH stitch. | |
18 | # | |
19 | # February 2013 | |
20 | # | |
21 | # OpenSSL GCM implementation is organized in such way that its | |
22 | # performance is rather close to the sum of its streamed components, | |
23 | # in the context parallelized AES-NI CTR and modulo-scheduled | |
24 | # PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation | |
25 | # was observed to perform significantly better than the sum of the | |
26 | # components on contemporary CPUs, the effort was deemed impossible to | |
27 | # justify. This module is based on combination of Intel submissions, | |
28 | # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max | |
29 | # Locktyukhin of Intel Corp. who verified that it reduces shuffles | |
30 | # pressure with notable relative improvement, achieving 1.0 cycle per | |
31 | # byte processed with 128-bit key on Haswell processor, 0.74 - on | |
32 | # Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled | |
33 | # measurements for favourable packet size, one divisible by 96. | |
34 | # Applications using the EVP interface will observe a few percent | |
35 | # worse performance.] | |
36 | # | |
37 | # Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). | |
38 | # | |
39 | # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest | |
40 | # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf | |
41 | ||
42 | # Generated once from | |
43 | # https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl | |
44 | # and modified for ICP. Modification are kept at a bare minimum to ease later | |
45 | # upstream merges. | |
46 | ||
47 | #if defined(__x86_64__) && defined(HAVE_AVX) && \ | |
5b3b7955 AF |
48 | defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) |
49 | ||
b844489e AL |
50 | #define _ASM |
51 | #include <sys/asm_linkage.h> | |
52 | ||
5b3b7955 | 53 | .extern gcm_avx_can_use_movbe |
31b160f0 AF |
54 | |
55 | .text | |
56 | ||
5b3b7955 | 57 | #ifdef HAVE_MOVBE |
31b160f0 AF |
58 | .type _aesni_ctr32_ghash_6x,@function |
59 | .align 32 | |
60 | _aesni_ctr32_ghash_6x: | |
d9655c5b | 61 | .cfi_startproc |
31b160f0 AF |
62 | vmovdqu 32(%r11),%xmm2 |
63 | subq $6,%rdx | |
64 | vpxor %xmm4,%xmm4,%xmm4 | |
65 | vmovdqu 0-128(%rcx),%xmm15 | |
66 | vpaddb %xmm2,%xmm1,%xmm10 | |
67 | vpaddb %xmm2,%xmm10,%xmm11 | |
68 | vpaddb %xmm2,%xmm11,%xmm12 | |
69 | vpaddb %xmm2,%xmm12,%xmm13 | |
70 | vpaddb %xmm2,%xmm13,%xmm14 | |
71 | vpxor %xmm15,%xmm1,%xmm9 | |
72 | vmovdqu %xmm4,16+8(%rsp) | |
73 | jmp .Loop6x | |
74 | ||
75 | .align 32 | |
76 | .Loop6x: | |
77 | addl $100663296,%ebx | |
78 | jc .Lhandle_ctr32 | |
79 | vmovdqu 0-32(%r9),%xmm3 | |
80 | vpaddb %xmm2,%xmm14,%xmm1 | |
81 | vpxor %xmm15,%xmm10,%xmm10 | |
82 | vpxor %xmm15,%xmm11,%xmm11 | |
83 | ||
84 | .Lresume_ctr32: | |
85 | vmovdqu %xmm1,(%r8) | |
86 | vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 | |
87 | vpxor %xmm15,%xmm12,%xmm12 | |
88 | vmovups 16-128(%rcx),%xmm2 | |
89 | vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 | |
90 | xorq %r12,%r12 | |
91 | cmpq %r14,%r15 | |
92 | ||
93 | vaesenc %xmm2,%xmm9,%xmm9 | |
94 | vmovdqu 48+8(%rsp),%xmm0 | |
95 | vpxor %xmm15,%xmm13,%xmm13 | |
96 | vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 | |
97 | vaesenc %xmm2,%xmm10,%xmm10 | |
98 | vpxor %xmm15,%xmm14,%xmm14 | |
99 | setnc %r12b | |
100 | vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 | |
101 | vaesenc %xmm2,%xmm11,%xmm11 | |
102 | vmovdqu 16-32(%r9),%xmm3 | |
103 | negq %r12 | |
104 | vaesenc %xmm2,%xmm12,%xmm12 | |
105 | vpxor %xmm5,%xmm6,%xmm6 | |
106 | vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 | |
107 | vpxor %xmm4,%xmm8,%xmm8 | |
108 | vaesenc %xmm2,%xmm13,%xmm13 | |
109 | vpxor %xmm5,%xmm1,%xmm4 | |
110 | andq $0x60,%r12 | |
111 | vmovups 32-128(%rcx),%xmm15 | |
112 | vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 | |
113 | vaesenc %xmm2,%xmm14,%xmm14 | |
114 | ||
115 | vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 | |
116 | leaq (%r14,%r12,1),%r14 | |
117 | vaesenc %xmm15,%xmm9,%xmm9 | |
118 | vpxor 16+8(%rsp),%xmm8,%xmm8 | |
119 | vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 | |
120 | vmovdqu 64+8(%rsp),%xmm0 | |
121 | vaesenc %xmm15,%xmm10,%xmm10 | |
122 | movbeq 88(%r14),%r13 | |
123 | vaesenc %xmm15,%xmm11,%xmm11 | |
124 | movbeq 80(%r14),%r12 | |
125 | vaesenc %xmm15,%xmm12,%xmm12 | |
126 | movq %r13,32+8(%rsp) | |
127 | vaesenc %xmm15,%xmm13,%xmm13 | |
128 | movq %r12,40+8(%rsp) | |
129 | vmovdqu 48-32(%r9),%xmm5 | |
130 | vaesenc %xmm15,%xmm14,%xmm14 | |
131 | ||
132 | vmovups 48-128(%rcx),%xmm15 | |
133 | vpxor %xmm1,%xmm6,%xmm6 | |
134 | vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 | |
135 | vaesenc %xmm15,%xmm9,%xmm9 | |
136 | vpxor %xmm2,%xmm6,%xmm6 | |
137 | vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 | |
138 | vaesenc %xmm15,%xmm10,%xmm10 | |
139 | vpxor %xmm3,%xmm7,%xmm7 | |
140 | vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 | |
141 | vaesenc %xmm15,%xmm11,%xmm11 | |
142 | vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 | |
143 | vmovdqu 80+8(%rsp),%xmm0 | |
144 | vaesenc %xmm15,%xmm12,%xmm12 | |
145 | vaesenc %xmm15,%xmm13,%xmm13 | |
146 | vpxor %xmm1,%xmm4,%xmm4 | |
147 | vmovdqu 64-32(%r9),%xmm1 | |
148 | vaesenc %xmm15,%xmm14,%xmm14 | |
149 | ||
150 | vmovups 64-128(%rcx),%xmm15 | |
151 | vpxor %xmm2,%xmm6,%xmm6 | |
152 | vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 | |
153 | vaesenc %xmm15,%xmm9,%xmm9 | |
154 | vpxor %xmm3,%xmm6,%xmm6 | |
155 | vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 | |
156 | vaesenc %xmm15,%xmm10,%xmm10 | |
157 | movbeq 72(%r14),%r13 | |
158 | vpxor %xmm5,%xmm7,%xmm7 | |
159 | vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 | |
160 | vaesenc %xmm15,%xmm11,%xmm11 | |
161 | movbeq 64(%r14),%r12 | |
162 | vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 | |
163 | vmovdqu 96+8(%rsp),%xmm0 | |
164 | vaesenc %xmm15,%xmm12,%xmm12 | |
165 | movq %r13,48+8(%rsp) | |
166 | vaesenc %xmm15,%xmm13,%xmm13 | |
167 | movq %r12,56+8(%rsp) | |
168 | vpxor %xmm2,%xmm4,%xmm4 | |
169 | vmovdqu 96-32(%r9),%xmm2 | |
170 | vaesenc %xmm15,%xmm14,%xmm14 | |
171 | ||
172 | vmovups 80-128(%rcx),%xmm15 | |
173 | vpxor %xmm3,%xmm6,%xmm6 | |
174 | vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 | |
175 | vaesenc %xmm15,%xmm9,%xmm9 | |
176 | vpxor %xmm5,%xmm6,%xmm6 | |
177 | vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 | |
178 | vaesenc %xmm15,%xmm10,%xmm10 | |
179 | movbeq 56(%r14),%r13 | |
180 | vpxor %xmm1,%xmm7,%xmm7 | |
181 | vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 | |
182 | vpxor 112+8(%rsp),%xmm8,%xmm8 | |
183 | vaesenc %xmm15,%xmm11,%xmm11 | |
184 | movbeq 48(%r14),%r12 | |
185 | vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 | |
186 | vaesenc %xmm15,%xmm12,%xmm12 | |
187 | movq %r13,64+8(%rsp) | |
188 | vaesenc %xmm15,%xmm13,%xmm13 | |
189 | movq %r12,72+8(%rsp) | |
190 | vpxor %xmm3,%xmm4,%xmm4 | |
191 | vmovdqu 112-32(%r9),%xmm3 | |
192 | vaesenc %xmm15,%xmm14,%xmm14 | |
193 | ||
194 | vmovups 96-128(%rcx),%xmm15 | |
195 | vpxor %xmm5,%xmm6,%xmm6 | |
196 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 | |
197 | vaesenc %xmm15,%xmm9,%xmm9 | |
198 | vpxor %xmm1,%xmm6,%xmm6 | |
199 | vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 | |
200 | vaesenc %xmm15,%xmm10,%xmm10 | |
201 | movbeq 40(%r14),%r13 | |
202 | vpxor %xmm2,%xmm7,%xmm7 | |
203 | vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 | |
204 | vaesenc %xmm15,%xmm11,%xmm11 | |
205 | movbeq 32(%r14),%r12 | |
206 | vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 | |
207 | vaesenc %xmm15,%xmm12,%xmm12 | |
208 | movq %r13,80+8(%rsp) | |
209 | vaesenc %xmm15,%xmm13,%xmm13 | |
210 | movq %r12,88+8(%rsp) | |
211 | vpxor %xmm5,%xmm6,%xmm6 | |
212 | vaesenc %xmm15,%xmm14,%xmm14 | |
213 | vpxor %xmm1,%xmm6,%xmm6 | |
214 | ||
215 | vmovups 112-128(%rcx),%xmm15 | |
216 | vpslldq $8,%xmm6,%xmm5 | |
217 | vpxor %xmm2,%xmm4,%xmm4 | |
218 | vmovdqu 16(%r11),%xmm3 | |
219 | ||
220 | vaesenc %xmm15,%xmm9,%xmm9 | |
221 | vpxor %xmm8,%xmm7,%xmm7 | |
222 | vaesenc %xmm15,%xmm10,%xmm10 | |
223 | vpxor %xmm5,%xmm4,%xmm4 | |
224 | movbeq 24(%r14),%r13 | |
225 | vaesenc %xmm15,%xmm11,%xmm11 | |
226 | movbeq 16(%r14),%r12 | |
227 | vpalignr $8,%xmm4,%xmm4,%xmm0 | |
228 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 | |
229 | movq %r13,96+8(%rsp) | |
230 | vaesenc %xmm15,%xmm12,%xmm12 | |
231 | movq %r12,104+8(%rsp) | |
232 | vaesenc %xmm15,%xmm13,%xmm13 | |
233 | vmovups 128-128(%rcx),%xmm1 | |
234 | vaesenc %xmm15,%xmm14,%xmm14 | |
235 | ||
236 | vaesenc %xmm1,%xmm9,%xmm9 | |
237 | vmovups 144-128(%rcx),%xmm15 | |
238 | vaesenc %xmm1,%xmm10,%xmm10 | |
239 | vpsrldq $8,%xmm6,%xmm6 | |
240 | vaesenc %xmm1,%xmm11,%xmm11 | |
241 | vpxor %xmm6,%xmm7,%xmm7 | |
242 | vaesenc %xmm1,%xmm12,%xmm12 | |
243 | vpxor %xmm0,%xmm4,%xmm4 | |
244 | movbeq 8(%r14),%r13 | |
245 | vaesenc %xmm1,%xmm13,%xmm13 | |
246 | movbeq 0(%r14),%r12 | |
247 | vaesenc %xmm1,%xmm14,%xmm14 | |
248 | vmovups 160-128(%rcx),%xmm1 | |
249 | cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds. | |
250 | jb .Lenc_tail | |
251 | ||
252 | vaesenc %xmm15,%xmm9,%xmm9 | |
253 | vaesenc %xmm15,%xmm10,%xmm10 | |
254 | vaesenc %xmm15,%xmm11,%xmm11 | |
255 | vaesenc %xmm15,%xmm12,%xmm12 | |
256 | vaesenc %xmm15,%xmm13,%xmm13 | |
257 | vaesenc %xmm15,%xmm14,%xmm14 | |
258 | ||
259 | vaesenc %xmm1,%xmm9,%xmm9 | |
260 | vaesenc %xmm1,%xmm10,%xmm10 | |
261 | vaesenc %xmm1,%xmm11,%xmm11 | |
262 | vaesenc %xmm1,%xmm12,%xmm12 | |
263 | vaesenc %xmm1,%xmm13,%xmm13 | |
264 | vmovups 176-128(%rcx),%xmm15 | |
265 | vaesenc %xmm1,%xmm14,%xmm14 | |
266 | vmovups 192-128(%rcx),%xmm1 | |
267 | cmpl $14,%ebp // ICP does not zero key schedule. | |
268 | jb .Lenc_tail | |
269 | ||
270 | vaesenc %xmm15,%xmm9,%xmm9 | |
271 | vaesenc %xmm15,%xmm10,%xmm10 | |
272 | vaesenc %xmm15,%xmm11,%xmm11 | |
273 | vaesenc %xmm15,%xmm12,%xmm12 | |
274 | vaesenc %xmm15,%xmm13,%xmm13 | |
275 | vaesenc %xmm15,%xmm14,%xmm14 | |
276 | ||
277 | vaesenc %xmm1,%xmm9,%xmm9 | |
278 | vaesenc %xmm1,%xmm10,%xmm10 | |
279 | vaesenc %xmm1,%xmm11,%xmm11 | |
280 | vaesenc %xmm1,%xmm12,%xmm12 | |
281 | vaesenc %xmm1,%xmm13,%xmm13 | |
282 | vmovups 208-128(%rcx),%xmm15 | |
283 | vaesenc %xmm1,%xmm14,%xmm14 | |
284 | vmovups 224-128(%rcx),%xmm1 | |
285 | jmp .Lenc_tail | |
286 | ||
287 | .align 32 | |
288 | .Lhandle_ctr32: | |
289 | vmovdqu (%r11),%xmm0 | |
290 | vpshufb %xmm0,%xmm1,%xmm6 | |
291 | vmovdqu 48(%r11),%xmm5 | |
292 | vpaddd 64(%r11),%xmm6,%xmm10 | |
293 | vpaddd %xmm5,%xmm6,%xmm11 | |
294 | vmovdqu 0-32(%r9),%xmm3 | |
295 | vpaddd %xmm5,%xmm10,%xmm12 | |
296 | vpshufb %xmm0,%xmm10,%xmm10 | |
297 | vpaddd %xmm5,%xmm11,%xmm13 | |
298 | vpshufb %xmm0,%xmm11,%xmm11 | |
299 | vpxor %xmm15,%xmm10,%xmm10 | |
300 | vpaddd %xmm5,%xmm12,%xmm14 | |
301 | vpshufb %xmm0,%xmm12,%xmm12 | |
302 | vpxor %xmm15,%xmm11,%xmm11 | |
303 | vpaddd %xmm5,%xmm13,%xmm1 | |
304 | vpshufb %xmm0,%xmm13,%xmm13 | |
305 | vpshufb %xmm0,%xmm14,%xmm14 | |
306 | vpshufb %xmm0,%xmm1,%xmm1 | |
307 | jmp .Lresume_ctr32 | |
308 | ||
309 | .align 32 | |
310 | .Lenc_tail: | |
311 | vaesenc %xmm15,%xmm9,%xmm9 | |
312 | vmovdqu %xmm7,16+8(%rsp) | |
313 | vpalignr $8,%xmm4,%xmm4,%xmm8 | |
314 | vaesenc %xmm15,%xmm10,%xmm10 | |
315 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 | |
316 | vpxor 0(%rdi),%xmm1,%xmm2 | |
317 | vaesenc %xmm15,%xmm11,%xmm11 | |
318 | vpxor 16(%rdi),%xmm1,%xmm0 | |
319 | vaesenc %xmm15,%xmm12,%xmm12 | |
320 | vpxor 32(%rdi),%xmm1,%xmm5 | |
321 | vaesenc %xmm15,%xmm13,%xmm13 | |
322 | vpxor 48(%rdi),%xmm1,%xmm6 | |
323 | vaesenc %xmm15,%xmm14,%xmm14 | |
324 | vpxor 64(%rdi),%xmm1,%xmm7 | |
325 | vpxor 80(%rdi),%xmm1,%xmm3 | |
326 | vmovdqu (%r8),%xmm1 | |
327 | ||
328 | vaesenclast %xmm2,%xmm9,%xmm9 | |
329 | vmovdqu 32(%r11),%xmm2 | |
330 | vaesenclast %xmm0,%xmm10,%xmm10 | |
331 | vpaddb %xmm2,%xmm1,%xmm0 | |
332 | movq %r13,112+8(%rsp) | |
333 | leaq 96(%rdi),%rdi | |
334 | vaesenclast %xmm5,%xmm11,%xmm11 | |
335 | vpaddb %xmm2,%xmm0,%xmm5 | |
336 | movq %r12,120+8(%rsp) | |
337 | leaq 96(%rsi),%rsi | |
338 | vmovdqu 0-128(%rcx),%xmm15 | |
339 | vaesenclast %xmm6,%xmm12,%xmm12 | |
340 | vpaddb %xmm2,%xmm5,%xmm6 | |
341 | vaesenclast %xmm7,%xmm13,%xmm13 | |
342 | vpaddb %xmm2,%xmm6,%xmm7 | |
343 | vaesenclast %xmm3,%xmm14,%xmm14 | |
344 | vpaddb %xmm2,%xmm7,%xmm3 | |
345 | ||
346 | addq $0x60,%r10 | |
347 | subq $0x6,%rdx | |
348 | jc .L6x_done | |
349 | ||
350 | vmovups %xmm9,-96(%rsi) | |
351 | vpxor %xmm15,%xmm1,%xmm9 | |
352 | vmovups %xmm10,-80(%rsi) | |
353 | vmovdqa %xmm0,%xmm10 | |
354 | vmovups %xmm11,-64(%rsi) | |
355 | vmovdqa %xmm5,%xmm11 | |
356 | vmovups %xmm12,-48(%rsi) | |
357 | vmovdqa %xmm6,%xmm12 | |
358 | vmovups %xmm13,-32(%rsi) | |
359 | vmovdqa %xmm7,%xmm13 | |
360 | vmovups %xmm14,-16(%rsi) | |
361 | vmovdqa %xmm3,%xmm14 | |
362 | vmovdqu 32+8(%rsp),%xmm7 | |
363 | jmp .Loop6x | |
364 | ||
365 | .L6x_done: | |
366 | vpxor 16+8(%rsp),%xmm8,%xmm8 | |
367 | vpxor %xmm4,%xmm8,%xmm8 | |
368 | ||
b844489e | 369 | RET |
d9655c5b | 370 | .cfi_endproc |
31b160f0 | 371 | .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x |
5b3b7955 AF |
372 | #endif /* ifdef HAVE_MOVBE */ |
373 | ||
374 | .type _aesni_ctr32_ghash_no_movbe_6x,@function | |
375 | .align 32 | |
376 | _aesni_ctr32_ghash_no_movbe_6x: | |
d9655c5b | 377 | .cfi_startproc |
5b3b7955 AF |
378 | vmovdqu 32(%r11),%xmm2 |
379 | subq $6,%rdx | |
380 | vpxor %xmm4,%xmm4,%xmm4 | |
381 | vmovdqu 0-128(%rcx),%xmm15 | |
382 | vpaddb %xmm2,%xmm1,%xmm10 | |
383 | vpaddb %xmm2,%xmm10,%xmm11 | |
384 | vpaddb %xmm2,%xmm11,%xmm12 | |
385 | vpaddb %xmm2,%xmm12,%xmm13 | |
386 | vpaddb %xmm2,%xmm13,%xmm14 | |
387 | vpxor %xmm15,%xmm1,%xmm9 | |
388 | vmovdqu %xmm4,16+8(%rsp) | |
389 | jmp .Loop6x_nmb | |
390 | ||
391 | .align 32 | |
392 | .Loop6x_nmb: | |
393 | addl $100663296,%ebx | |
394 | jc .Lhandle_ctr32_nmb | |
395 | vmovdqu 0-32(%r9),%xmm3 | |
396 | vpaddb %xmm2,%xmm14,%xmm1 | |
397 | vpxor %xmm15,%xmm10,%xmm10 | |
398 | vpxor %xmm15,%xmm11,%xmm11 | |
399 | ||
400 | .Lresume_ctr32_nmb: | |
401 | vmovdqu %xmm1,(%r8) | |
402 | vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 | |
403 | vpxor %xmm15,%xmm12,%xmm12 | |
404 | vmovups 16-128(%rcx),%xmm2 | |
405 | vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 | |
406 | xorq %r12,%r12 | |
407 | cmpq %r14,%r15 | |
408 | ||
409 | vaesenc %xmm2,%xmm9,%xmm9 | |
410 | vmovdqu 48+8(%rsp),%xmm0 | |
411 | vpxor %xmm15,%xmm13,%xmm13 | |
412 | vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 | |
413 | vaesenc %xmm2,%xmm10,%xmm10 | |
414 | vpxor %xmm15,%xmm14,%xmm14 | |
415 | setnc %r12b | |
416 | vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 | |
417 | vaesenc %xmm2,%xmm11,%xmm11 | |
418 | vmovdqu 16-32(%r9),%xmm3 | |
419 | negq %r12 | |
420 | vaesenc %xmm2,%xmm12,%xmm12 | |
421 | vpxor %xmm5,%xmm6,%xmm6 | |
422 | vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 | |
423 | vpxor %xmm4,%xmm8,%xmm8 | |
424 | vaesenc %xmm2,%xmm13,%xmm13 | |
425 | vpxor %xmm5,%xmm1,%xmm4 | |
426 | andq $0x60,%r12 | |
427 | vmovups 32-128(%rcx),%xmm15 | |
428 | vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 | |
429 | vaesenc %xmm2,%xmm14,%xmm14 | |
430 | ||
431 | vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 | |
432 | leaq (%r14,%r12,1),%r14 | |
433 | vaesenc %xmm15,%xmm9,%xmm9 | |
434 | vpxor 16+8(%rsp),%xmm8,%xmm8 | |
435 | vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 | |
436 | vmovdqu 64+8(%rsp),%xmm0 | |
437 | vaesenc %xmm15,%xmm10,%xmm10 | |
438 | movq 88(%r14),%r13 | |
439 | bswapq %r13 | |
440 | vaesenc %xmm15,%xmm11,%xmm11 | |
441 | movq 80(%r14),%r12 | |
442 | bswapq %r12 | |
443 | vaesenc %xmm15,%xmm12,%xmm12 | |
444 | movq %r13,32+8(%rsp) | |
445 | vaesenc %xmm15,%xmm13,%xmm13 | |
446 | movq %r12,40+8(%rsp) | |
447 | vmovdqu 48-32(%r9),%xmm5 | |
448 | vaesenc %xmm15,%xmm14,%xmm14 | |
449 | ||
450 | vmovups 48-128(%rcx),%xmm15 | |
451 | vpxor %xmm1,%xmm6,%xmm6 | |
452 | vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 | |
453 | vaesenc %xmm15,%xmm9,%xmm9 | |
454 | vpxor %xmm2,%xmm6,%xmm6 | |
455 | vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 | |
456 | vaesenc %xmm15,%xmm10,%xmm10 | |
457 | vpxor %xmm3,%xmm7,%xmm7 | |
458 | vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 | |
459 | vaesenc %xmm15,%xmm11,%xmm11 | |
460 | vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 | |
461 | vmovdqu 80+8(%rsp),%xmm0 | |
462 | vaesenc %xmm15,%xmm12,%xmm12 | |
463 | vaesenc %xmm15,%xmm13,%xmm13 | |
464 | vpxor %xmm1,%xmm4,%xmm4 | |
465 | vmovdqu 64-32(%r9),%xmm1 | |
466 | vaesenc %xmm15,%xmm14,%xmm14 | |
467 | ||
468 | vmovups 64-128(%rcx),%xmm15 | |
469 | vpxor %xmm2,%xmm6,%xmm6 | |
470 | vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 | |
471 | vaesenc %xmm15,%xmm9,%xmm9 | |
472 | vpxor %xmm3,%xmm6,%xmm6 | |
473 | vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 | |
474 | vaesenc %xmm15,%xmm10,%xmm10 | |
475 | movq 72(%r14),%r13 | |
476 | bswapq %r13 | |
477 | vpxor %xmm5,%xmm7,%xmm7 | |
478 | vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 | |
479 | vaesenc %xmm15,%xmm11,%xmm11 | |
480 | movq 64(%r14),%r12 | |
481 | bswapq %r12 | |
482 | vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 | |
483 | vmovdqu 96+8(%rsp),%xmm0 | |
484 | vaesenc %xmm15,%xmm12,%xmm12 | |
485 | movq %r13,48+8(%rsp) | |
486 | vaesenc %xmm15,%xmm13,%xmm13 | |
487 | movq %r12,56+8(%rsp) | |
488 | vpxor %xmm2,%xmm4,%xmm4 | |
489 | vmovdqu 96-32(%r9),%xmm2 | |
490 | vaesenc %xmm15,%xmm14,%xmm14 | |
491 | ||
492 | vmovups 80-128(%rcx),%xmm15 | |
493 | vpxor %xmm3,%xmm6,%xmm6 | |
494 | vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 | |
495 | vaesenc %xmm15,%xmm9,%xmm9 | |
496 | vpxor %xmm5,%xmm6,%xmm6 | |
497 | vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 | |
498 | vaesenc %xmm15,%xmm10,%xmm10 | |
499 | movq 56(%r14),%r13 | |
500 | bswapq %r13 | |
501 | vpxor %xmm1,%xmm7,%xmm7 | |
502 | vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 | |
503 | vpxor 112+8(%rsp),%xmm8,%xmm8 | |
504 | vaesenc %xmm15,%xmm11,%xmm11 | |
505 | movq 48(%r14),%r12 | |
506 | bswapq %r12 | |
507 | vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 | |
508 | vaesenc %xmm15,%xmm12,%xmm12 | |
509 | movq %r13,64+8(%rsp) | |
510 | vaesenc %xmm15,%xmm13,%xmm13 | |
511 | movq %r12,72+8(%rsp) | |
512 | vpxor %xmm3,%xmm4,%xmm4 | |
513 | vmovdqu 112-32(%r9),%xmm3 | |
514 | vaesenc %xmm15,%xmm14,%xmm14 | |
515 | ||
516 | vmovups 96-128(%rcx),%xmm15 | |
517 | vpxor %xmm5,%xmm6,%xmm6 | |
518 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 | |
519 | vaesenc %xmm15,%xmm9,%xmm9 | |
520 | vpxor %xmm1,%xmm6,%xmm6 | |
521 | vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 | |
522 | vaesenc %xmm15,%xmm10,%xmm10 | |
523 | movq 40(%r14),%r13 | |
524 | bswapq %r13 | |
525 | vpxor %xmm2,%xmm7,%xmm7 | |
526 | vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 | |
527 | vaesenc %xmm15,%xmm11,%xmm11 | |
528 | movq 32(%r14),%r12 | |
529 | bswapq %r12 | |
530 | vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 | |
531 | vaesenc %xmm15,%xmm12,%xmm12 | |
532 | movq %r13,80+8(%rsp) | |
533 | vaesenc %xmm15,%xmm13,%xmm13 | |
534 | movq %r12,88+8(%rsp) | |
535 | vpxor %xmm5,%xmm6,%xmm6 | |
536 | vaesenc %xmm15,%xmm14,%xmm14 | |
537 | vpxor %xmm1,%xmm6,%xmm6 | |
538 | ||
539 | vmovups 112-128(%rcx),%xmm15 | |
540 | vpslldq $8,%xmm6,%xmm5 | |
541 | vpxor %xmm2,%xmm4,%xmm4 | |
542 | vmovdqu 16(%r11),%xmm3 | |
543 | ||
544 | vaesenc %xmm15,%xmm9,%xmm9 | |
545 | vpxor %xmm8,%xmm7,%xmm7 | |
546 | vaesenc %xmm15,%xmm10,%xmm10 | |
547 | vpxor %xmm5,%xmm4,%xmm4 | |
548 | movq 24(%r14),%r13 | |
549 | bswapq %r13 | |
550 | vaesenc %xmm15,%xmm11,%xmm11 | |
551 | movq 16(%r14),%r12 | |
552 | bswapq %r12 | |
553 | vpalignr $8,%xmm4,%xmm4,%xmm0 | |
554 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 | |
555 | movq %r13,96+8(%rsp) | |
556 | vaesenc %xmm15,%xmm12,%xmm12 | |
557 | movq %r12,104+8(%rsp) | |
558 | vaesenc %xmm15,%xmm13,%xmm13 | |
559 | vmovups 128-128(%rcx),%xmm1 | |
560 | vaesenc %xmm15,%xmm14,%xmm14 | |
561 | ||
562 | vaesenc %xmm1,%xmm9,%xmm9 | |
563 | vmovups 144-128(%rcx),%xmm15 | |
564 | vaesenc %xmm1,%xmm10,%xmm10 | |
565 | vpsrldq $8,%xmm6,%xmm6 | |
566 | vaesenc %xmm1,%xmm11,%xmm11 | |
567 | vpxor %xmm6,%xmm7,%xmm7 | |
568 | vaesenc %xmm1,%xmm12,%xmm12 | |
569 | vpxor %xmm0,%xmm4,%xmm4 | |
570 | movq 8(%r14),%r13 | |
571 | bswapq %r13 | |
572 | vaesenc %xmm1,%xmm13,%xmm13 | |
573 | movq 0(%r14),%r12 | |
574 | bswapq %r12 | |
575 | vaesenc %xmm1,%xmm14,%xmm14 | |
576 | vmovups 160-128(%rcx),%xmm1 | |
577 | cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds. | |
578 | jb .Lenc_tail_nmb | |
579 | ||
580 | vaesenc %xmm15,%xmm9,%xmm9 | |
581 | vaesenc %xmm15,%xmm10,%xmm10 | |
582 | vaesenc %xmm15,%xmm11,%xmm11 | |
583 | vaesenc %xmm15,%xmm12,%xmm12 | |
584 | vaesenc %xmm15,%xmm13,%xmm13 | |
585 | vaesenc %xmm15,%xmm14,%xmm14 | |
586 | ||
587 | vaesenc %xmm1,%xmm9,%xmm9 | |
588 | vaesenc %xmm1,%xmm10,%xmm10 | |
589 | vaesenc %xmm1,%xmm11,%xmm11 | |
590 | vaesenc %xmm1,%xmm12,%xmm12 | |
591 | vaesenc %xmm1,%xmm13,%xmm13 | |
592 | vmovups 176-128(%rcx),%xmm15 | |
593 | vaesenc %xmm1,%xmm14,%xmm14 | |
594 | vmovups 192-128(%rcx),%xmm1 | |
595 | cmpl $14,%ebp // ICP does not zero key schedule. | |
596 | jb .Lenc_tail_nmb | |
597 | ||
598 | vaesenc %xmm15,%xmm9,%xmm9 | |
599 | vaesenc %xmm15,%xmm10,%xmm10 | |
600 | vaesenc %xmm15,%xmm11,%xmm11 | |
601 | vaesenc %xmm15,%xmm12,%xmm12 | |
602 | vaesenc %xmm15,%xmm13,%xmm13 | |
603 | vaesenc %xmm15,%xmm14,%xmm14 | |
604 | ||
605 | vaesenc %xmm1,%xmm9,%xmm9 | |
606 | vaesenc %xmm1,%xmm10,%xmm10 | |
607 | vaesenc %xmm1,%xmm11,%xmm11 | |
608 | vaesenc %xmm1,%xmm12,%xmm12 | |
609 | vaesenc %xmm1,%xmm13,%xmm13 | |
610 | vmovups 208-128(%rcx),%xmm15 | |
611 | vaesenc %xmm1,%xmm14,%xmm14 | |
612 | vmovups 224-128(%rcx),%xmm1 | |
613 | jmp .Lenc_tail_nmb | |
614 | ||
615 | .align 32 | |
616 | .Lhandle_ctr32_nmb: | |
617 | vmovdqu (%r11),%xmm0 | |
618 | vpshufb %xmm0,%xmm1,%xmm6 | |
619 | vmovdqu 48(%r11),%xmm5 | |
620 | vpaddd 64(%r11),%xmm6,%xmm10 | |
621 | vpaddd %xmm5,%xmm6,%xmm11 | |
622 | vmovdqu 0-32(%r9),%xmm3 | |
623 | vpaddd %xmm5,%xmm10,%xmm12 | |
624 | vpshufb %xmm0,%xmm10,%xmm10 | |
625 | vpaddd %xmm5,%xmm11,%xmm13 | |
626 | vpshufb %xmm0,%xmm11,%xmm11 | |
627 | vpxor %xmm15,%xmm10,%xmm10 | |
628 | vpaddd %xmm5,%xmm12,%xmm14 | |
629 | vpshufb %xmm0,%xmm12,%xmm12 | |
630 | vpxor %xmm15,%xmm11,%xmm11 | |
631 | vpaddd %xmm5,%xmm13,%xmm1 | |
632 | vpshufb %xmm0,%xmm13,%xmm13 | |
633 | vpshufb %xmm0,%xmm14,%xmm14 | |
634 | vpshufb %xmm0,%xmm1,%xmm1 | |
635 | jmp .Lresume_ctr32_nmb | |
636 | ||
637 | .align 32 | |
638 | .Lenc_tail_nmb: | |
639 | vaesenc %xmm15,%xmm9,%xmm9 | |
640 | vmovdqu %xmm7,16+8(%rsp) | |
641 | vpalignr $8,%xmm4,%xmm4,%xmm8 | |
642 | vaesenc %xmm15,%xmm10,%xmm10 | |
643 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 | |
644 | vpxor 0(%rdi),%xmm1,%xmm2 | |
645 | vaesenc %xmm15,%xmm11,%xmm11 | |
646 | vpxor 16(%rdi),%xmm1,%xmm0 | |
647 | vaesenc %xmm15,%xmm12,%xmm12 | |
648 | vpxor 32(%rdi),%xmm1,%xmm5 | |
649 | vaesenc %xmm15,%xmm13,%xmm13 | |
650 | vpxor 48(%rdi),%xmm1,%xmm6 | |
651 | vaesenc %xmm15,%xmm14,%xmm14 | |
652 | vpxor 64(%rdi),%xmm1,%xmm7 | |
653 | vpxor 80(%rdi),%xmm1,%xmm3 | |
654 | vmovdqu (%r8),%xmm1 | |
655 | ||
656 | vaesenclast %xmm2,%xmm9,%xmm9 | |
657 | vmovdqu 32(%r11),%xmm2 | |
658 | vaesenclast %xmm0,%xmm10,%xmm10 | |
659 | vpaddb %xmm2,%xmm1,%xmm0 | |
660 | movq %r13,112+8(%rsp) | |
661 | leaq 96(%rdi),%rdi | |
662 | vaesenclast %xmm5,%xmm11,%xmm11 | |
663 | vpaddb %xmm2,%xmm0,%xmm5 | |
664 | movq %r12,120+8(%rsp) | |
665 | leaq 96(%rsi),%rsi | |
666 | vmovdqu 0-128(%rcx),%xmm15 | |
667 | vaesenclast %xmm6,%xmm12,%xmm12 | |
668 | vpaddb %xmm2,%xmm5,%xmm6 | |
669 | vaesenclast %xmm7,%xmm13,%xmm13 | |
670 | vpaddb %xmm2,%xmm6,%xmm7 | |
671 | vaesenclast %xmm3,%xmm14,%xmm14 | |
672 | vpaddb %xmm2,%xmm7,%xmm3 | |
673 | ||
674 | addq $0x60,%r10 | |
675 | subq $0x6,%rdx | |
676 | jc .L6x_done_nmb | |
677 | ||
678 | vmovups %xmm9,-96(%rsi) | |
679 | vpxor %xmm15,%xmm1,%xmm9 | |
680 | vmovups %xmm10,-80(%rsi) | |
681 | vmovdqa %xmm0,%xmm10 | |
682 | vmovups %xmm11,-64(%rsi) | |
683 | vmovdqa %xmm5,%xmm11 | |
684 | vmovups %xmm12,-48(%rsi) | |
685 | vmovdqa %xmm6,%xmm12 | |
686 | vmovups %xmm13,-32(%rsi) | |
687 | vmovdqa %xmm7,%xmm13 | |
688 | vmovups %xmm14,-16(%rsi) | |
689 | vmovdqa %xmm3,%xmm14 | |
690 | vmovdqu 32+8(%rsp),%xmm7 | |
691 | jmp .Loop6x_nmb | |
692 | ||
693 | .L6x_done_nmb: | |
694 | vpxor 16+8(%rsp),%xmm8,%xmm8 | |
695 | vpxor %xmm4,%xmm8,%xmm8 | |
696 | ||
b844489e | 697 | RET |
d9655c5b | 698 | .cfi_endproc |
5b3b7955 AF |
699 | .size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x |
700 | ||
31b160f0 AF |
701 | .globl aesni_gcm_decrypt |
702 | .type aesni_gcm_decrypt,@function | |
703 | .align 32 | |
704 | aesni_gcm_decrypt: | |
705 | .cfi_startproc | |
706 | xorq %r10,%r10 | |
707 | cmpq $0x60,%rdx | |
708 | jb .Lgcm_dec_abort | |
709 | ||
710 | leaq (%rsp),%rax | |
711 | .cfi_def_cfa_register %rax | |
712 | pushq %rbx | |
713 | .cfi_offset %rbx,-16 | |
714 | pushq %rbp | |
715 | .cfi_offset %rbp,-24 | |
716 | pushq %r12 | |
717 | .cfi_offset %r12,-32 | |
718 | pushq %r13 | |
719 | .cfi_offset %r13,-40 | |
720 | pushq %r14 | |
721 | .cfi_offset %r14,-48 | |
722 | pushq %r15 | |
723 | .cfi_offset %r15,-56 | |
e8beeaa1 AF |
724 | pushq %r9 |
725 | .cfi_offset %r9,-64 | |
31b160f0 AF |
726 | vzeroupper |
727 | ||
728 | vmovdqu (%r8),%xmm1 | |
729 | addq $-128,%rsp | |
730 | movl 12(%r8),%ebx | |
731 | leaq .Lbswap_mask(%rip),%r11 | |
732 | leaq -128(%rcx),%r14 | |
733 | movq $0xf80,%r15 | |
734 | vmovdqu (%r9),%xmm8 | |
735 | andq $-128,%rsp | |
736 | vmovdqu (%r11),%xmm0 | |
737 | leaq 128(%rcx),%rcx | |
e8beeaa1 AF |
738 | movq 32(%r9),%r9 |
739 | leaq 32(%r9),%r9 | |
31b160f0 AF |
740 | movl 504-128(%rcx),%ebp // ICP has a larger offset for rounds. |
741 | vpshufb %xmm0,%xmm8,%xmm8 | |
742 | ||
743 | andq %r15,%r14 | |
744 | andq %rsp,%r15 | |
745 | subq %r14,%r15 | |
746 | jc .Ldec_no_key_aliasing | |
747 | cmpq $768,%r15 | |
748 | jnc .Ldec_no_key_aliasing | |
749 | subq %r15,%rsp | |
750 | .Ldec_no_key_aliasing: | |
751 | ||
752 | vmovdqu 80(%rdi),%xmm7 | |
753 | leaq (%rdi),%r14 | |
754 | vmovdqu 64(%rdi),%xmm4 | |
755 | leaq -192(%rdi,%rdx,1),%r15 | |
756 | vmovdqu 48(%rdi),%xmm5 | |
757 | shrq $4,%rdx | |
758 | xorq %r10,%r10 | |
759 | vmovdqu 32(%rdi),%xmm6 | |
760 | vpshufb %xmm0,%xmm7,%xmm7 | |
761 | vmovdqu 16(%rdi),%xmm2 | |
762 | vpshufb %xmm0,%xmm4,%xmm4 | |
763 | vmovdqu (%rdi),%xmm3 | |
764 | vpshufb %xmm0,%xmm5,%xmm5 | |
765 | vmovdqu %xmm4,48(%rsp) | |
766 | vpshufb %xmm0,%xmm6,%xmm6 | |
767 | vmovdqu %xmm5,64(%rsp) | |
768 | vpshufb %xmm0,%xmm2,%xmm2 | |
769 | vmovdqu %xmm6,80(%rsp) | |
770 | vpshufb %xmm0,%xmm3,%xmm3 | |
771 | vmovdqu %xmm2,96(%rsp) | |
772 | vmovdqu %xmm3,112(%rsp) | |
773 | ||
5b3b7955 AF |
774 | #ifdef HAVE_MOVBE |
775 | #ifdef _KERNEL | |
776 | testl $1,gcm_avx_can_use_movbe(%rip) | |
777 | #else | |
778 | testl $1,gcm_avx_can_use_movbe@GOTPCREL(%rip) | |
779 | #endif | |
780 | jz 1f | |
31b160f0 | 781 | call _aesni_ctr32_ghash_6x |
5b3b7955 AF |
782 | jmp 2f |
783 | 1: | |
784 | #endif | |
785 | call _aesni_ctr32_ghash_no_movbe_6x | |
786 | 2: | |
31b160f0 AF |
787 | vmovups %xmm9,-96(%rsi) |
788 | vmovups %xmm10,-80(%rsi) | |
789 | vmovups %xmm11,-64(%rsi) | |
790 | vmovups %xmm12,-48(%rsi) | |
791 | vmovups %xmm13,-32(%rsi) | |
792 | vmovups %xmm14,-16(%rsi) | |
793 | ||
794 | vpshufb (%r11),%xmm8,%xmm8 | |
e8beeaa1 AF |
795 | movq -56(%rax),%r9 |
796 | .cfi_restore %r9 | |
797 | vmovdqu %xmm8,(%r9) | |
31b160f0 AF |
798 | |
799 | vzeroupper | |
800 | movq -48(%rax),%r15 | |
801 | .cfi_restore %r15 | |
802 | movq -40(%rax),%r14 | |
803 | .cfi_restore %r14 | |
804 | movq -32(%rax),%r13 | |
805 | .cfi_restore %r13 | |
806 | movq -24(%rax),%r12 | |
807 | .cfi_restore %r12 | |
808 | movq -16(%rax),%rbp | |
809 | .cfi_restore %rbp | |
810 | movq -8(%rax),%rbx | |
811 | .cfi_restore %rbx | |
812 | leaq (%rax),%rsp | |
813 | .cfi_def_cfa_register %rsp | |
814 | .Lgcm_dec_abort: | |
815 | movq %r10,%rax | |
b844489e | 816 | RET |
31b160f0 AF |
817 | .cfi_endproc |
818 | .size aesni_gcm_decrypt,.-aesni_gcm_decrypt | |
819 | .type _aesni_ctr32_6x,@function | |
820 | .align 32 | |
821 | _aesni_ctr32_6x: | |
d9655c5b | 822 | .cfi_startproc |
31b160f0 AF |
823 | vmovdqu 0-128(%rcx),%xmm4 |
824 | vmovdqu 32(%r11),%xmm2 | |
825 | leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds. | |
826 | vmovups 16-128(%rcx),%xmm15 | |
827 | leaq 32-128(%rcx),%r12 | |
828 | vpxor %xmm4,%xmm1,%xmm9 | |
829 | addl $100663296,%ebx | |
830 | jc .Lhandle_ctr32_2 | |
831 | vpaddb %xmm2,%xmm1,%xmm10 | |
832 | vpaddb %xmm2,%xmm10,%xmm11 | |
833 | vpxor %xmm4,%xmm10,%xmm10 | |
834 | vpaddb %xmm2,%xmm11,%xmm12 | |
835 | vpxor %xmm4,%xmm11,%xmm11 | |
836 | vpaddb %xmm2,%xmm12,%xmm13 | |
837 | vpxor %xmm4,%xmm12,%xmm12 | |
838 | vpaddb %xmm2,%xmm13,%xmm14 | |
839 | vpxor %xmm4,%xmm13,%xmm13 | |
840 | vpaddb %xmm2,%xmm14,%xmm1 | |
841 | vpxor %xmm4,%xmm14,%xmm14 | |
842 | jmp .Loop_ctr32 | |
843 | ||
844 | .align 16 | |
845 | .Loop_ctr32: | |
846 | vaesenc %xmm15,%xmm9,%xmm9 | |
847 | vaesenc %xmm15,%xmm10,%xmm10 | |
848 | vaesenc %xmm15,%xmm11,%xmm11 | |
849 | vaesenc %xmm15,%xmm12,%xmm12 | |
850 | vaesenc %xmm15,%xmm13,%xmm13 | |
851 | vaesenc %xmm15,%xmm14,%xmm14 | |
852 | vmovups (%r12),%xmm15 | |
853 | leaq 16(%r12),%r12 | |
854 | decl %r13d | |
855 | jnz .Loop_ctr32 | |
856 | ||
857 | vmovdqu (%r12),%xmm3 | |
858 | vaesenc %xmm15,%xmm9,%xmm9 | |
859 | vpxor 0(%rdi),%xmm3,%xmm4 | |
860 | vaesenc %xmm15,%xmm10,%xmm10 | |
861 | vpxor 16(%rdi),%xmm3,%xmm5 | |
862 | vaesenc %xmm15,%xmm11,%xmm11 | |
863 | vpxor 32(%rdi),%xmm3,%xmm6 | |
864 | vaesenc %xmm15,%xmm12,%xmm12 | |
865 | vpxor 48(%rdi),%xmm3,%xmm8 | |
866 | vaesenc %xmm15,%xmm13,%xmm13 | |
867 | vpxor 64(%rdi),%xmm3,%xmm2 | |
868 | vaesenc %xmm15,%xmm14,%xmm14 | |
869 | vpxor 80(%rdi),%xmm3,%xmm3 | |
870 | leaq 96(%rdi),%rdi | |
871 | ||
872 | vaesenclast %xmm4,%xmm9,%xmm9 | |
873 | vaesenclast %xmm5,%xmm10,%xmm10 | |
874 | vaesenclast %xmm6,%xmm11,%xmm11 | |
875 | vaesenclast %xmm8,%xmm12,%xmm12 | |
876 | vaesenclast %xmm2,%xmm13,%xmm13 | |
877 | vaesenclast %xmm3,%xmm14,%xmm14 | |
878 | vmovups %xmm9,0(%rsi) | |
879 | vmovups %xmm10,16(%rsi) | |
880 | vmovups %xmm11,32(%rsi) | |
881 | vmovups %xmm12,48(%rsi) | |
882 | vmovups %xmm13,64(%rsi) | |
883 | vmovups %xmm14,80(%rsi) | |
884 | leaq 96(%rsi),%rsi | |
885 | ||
b844489e | 886 | RET |
31b160f0 AF |
887 | .align 32 |
888 | .Lhandle_ctr32_2: | |
889 | vpshufb %xmm0,%xmm1,%xmm6 | |
890 | vmovdqu 48(%r11),%xmm5 | |
891 | vpaddd 64(%r11),%xmm6,%xmm10 | |
892 | vpaddd %xmm5,%xmm6,%xmm11 | |
893 | vpaddd %xmm5,%xmm10,%xmm12 | |
894 | vpshufb %xmm0,%xmm10,%xmm10 | |
895 | vpaddd %xmm5,%xmm11,%xmm13 | |
896 | vpshufb %xmm0,%xmm11,%xmm11 | |
897 | vpxor %xmm4,%xmm10,%xmm10 | |
898 | vpaddd %xmm5,%xmm12,%xmm14 | |
899 | vpshufb %xmm0,%xmm12,%xmm12 | |
900 | vpxor %xmm4,%xmm11,%xmm11 | |
901 | vpaddd %xmm5,%xmm13,%xmm1 | |
902 | vpshufb %xmm0,%xmm13,%xmm13 | |
903 | vpxor %xmm4,%xmm12,%xmm12 | |
904 | vpshufb %xmm0,%xmm14,%xmm14 | |
905 | vpxor %xmm4,%xmm13,%xmm13 | |
906 | vpshufb %xmm0,%xmm1,%xmm1 | |
907 | vpxor %xmm4,%xmm14,%xmm14 | |
908 | jmp .Loop_ctr32 | |
d9655c5b | 909 | .cfi_endproc |
31b160f0 AF |
910 | .size _aesni_ctr32_6x,.-_aesni_ctr32_6x |
911 | ||
912 | .globl aesni_gcm_encrypt | |
913 | .type aesni_gcm_encrypt,@function | |
914 | .align 32 | |
915 | aesni_gcm_encrypt: | |
916 | .cfi_startproc | |
917 | xorq %r10,%r10 | |
918 | cmpq $288,%rdx | |
919 | jb .Lgcm_enc_abort | |
920 | ||
921 | leaq (%rsp),%rax | |
922 | .cfi_def_cfa_register %rax | |
923 | pushq %rbx | |
924 | .cfi_offset %rbx,-16 | |
925 | pushq %rbp | |
926 | .cfi_offset %rbp,-24 | |
927 | pushq %r12 | |
928 | .cfi_offset %r12,-32 | |
929 | pushq %r13 | |
930 | .cfi_offset %r13,-40 | |
931 | pushq %r14 | |
932 | .cfi_offset %r14,-48 | |
933 | pushq %r15 | |
934 | .cfi_offset %r15,-56 | |
e8beeaa1 AF |
935 | pushq %r9 |
936 | .cfi_offset %r9,-64 | |
31b160f0 AF |
937 | vzeroupper |
938 | ||
939 | vmovdqu (%r8),%xmm1 | |
940 | addq $-128,%rsp | |
941 | movl 12(%r8),%ebx | |
942 | leaq .Lbswap_mask(%rip),%r11 | |
943 | leaq -128(%rcx),%r14 | |
944 | movq $0xf80,%r15 | |
945 | leaq 128(%rcx),%rcx | |
946 | vmovdqu (%r11),%xmm0 | |
947 | andq $-128,%rsp | |
948 | movl 504-128(%rcx),%ebp // ICP has an larger offset for rounds. | |
949 | ||
950 | andq %r15,%r14 | |
951 | andq %rsp,%r15 | |
952 | subq %r14,%r15 | |
953 | jc .Lenc_no_key_aliasing | |
954 | cmpq $768,%r15 | |
955 | jnc .Lenc_no_key_aliasing | |
956 | subq %r15,%rsp | |
957 | .Lenc_no_key_aliasing: | |
958 | ||
959 | leaq (%rsi),%r14 | |
960 | leaq -192(%rsi,%rdx,1),%r15 | |
961 | shrq $4,%rdx | |
962 | ||
963 | call _aesni_ctr32_6x | |
964 | vpshufb %xmm0,%xmm9,%xmm8 | |
965 | vpshufb %xmm0,%xmm10,%xmm2 | |
966 | vmovdqu %xmm8,112(%rsp) | |
967 | vpshufb %xmm0,%xmm11,%xmm4 | |
968 | vmovdqu %xmm2,96(%rsp) | |
969 | vpshufb %xmm0,%xmm12,%xmm5 | |
970 | vmovdqu %xmm4,80(%rsp) | |
971 | vpshufb %xmm0,%xmm13,%xmm6 | |
972 | vmovdqu %xmm5,64(%rsp) | |
973 | vpshufb %xmm0,%xmm14,%xmm7 | |
974 | vmovdqu %xmm6,48(%rsp) | |
975 | ||
976 | call _aesni_ctr32_6x | |
977 | ||
978 | vmovdqu (%r9),%xmm8 | |
e8beeaa1 AF |
979 | movq 32(%r9),%r9 |
980 | leaq 32(%r9),%r9 | |
31b160f0 AF |
981 | subq $12,%rdx |
982 | movq $192,%r10 | |
983 | vpshufb %xmm0,%xmm8,%xmm8 | |
984 | ||
5b3b7955 AF |
985 | #ifdef HAVE_MOVBE |
986 | #ifdef _KERNEL | |
987 | testl $1,gcm_avx_can_use_movbe(%rip) | |
988 | #else | |
989 | testl $1,gcm_avx_can_use_movbe@GOTPCREL(%rip) | |
990 | #endif | |
991 | jz 1f | |
31b160f0 | 992 | call _aesni_ctr32_ghash_6x |
5b3b7955 AF |
993 | jmp 2f |
994 | 1: | |
995 | #endif | |
996 | call _aesni_ctr32_ghash_no_movbe_6x | |
997 | 2: | |
31b160f0 AF |
998 | vmovdqu 32(%rsp),%xmm7 |
999 | vmovdqu (%r11),%xmm0 | |
1000 | vmovdqu 0-32(%r9),%xmm3 | |
1001 | vpunpckhqdq %xmm7,%xmm7,%xmm1 | |
1002 | vmovdqu 32-32(%r9),%xmm15 | |
1003 | vmovups %xmm9,-96(%rsi) | |
1004 | vpshufb %xmm0,%xmm9,%xmm9 | |
1005 | vpxor %xmm7,%xmm1,%xmm1 | |
1006 | vmovups %xmm10,-80(%rsi) | |
1007 | vpshufb %xmm0,%xmm10,%xmm10 | |
1008 | vmovups %xmm11,-64(%rsi) | |
1009 | vpshufb %xmm0,%xmm11,%xmm11 | |
1010 | vmovups %xmm12,-48(%rsi) | |
1011 | vpshufb %xmm0,%xmm12,%xmm12 | |
1012 | vmovups %xmm13,-32(%rsi) | |
1013 | vpshufb %xmm0,%xmm13,%xmm13 | |
1014 | vmovups %xmm14,-16(%rsi) | |
1015 | vpshufb %xmm0,%xmm14,%xmm14 | |
1016 | vmovdqu %xmm9,16(%rsp) | |
1017 | vmovdqu 48(%rsp),%xmm6 | |
1018 | vmovdqu 16-32(%r9),%xmm0 | |
1019 | vpunpckhqdq %xmm6,%xmm6,%xmm2 | |
1020 | vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 | |
1021 | vpxor %xmm6,%xmm2,%xmm2 | |
1022 | vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 | |
1023 | vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 | |
1024 | ||
1025 | vmovdqu 64(%rsp),%xmm9 | |
1026 | vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 | |
1027 | vmovdqu 48-32(%r9),%xmm3 | |
1028 | vpxor %xmm5,%xmm4,%xmm4 | |
1029 | vpunpckhqdq %xmm9,%xmm9,%xmm5 | |
1030 | vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 | |
1031 | vpxor %xmm9,%xmm5,%xmm5 | |
1032 | vpxor %xmm7,%xmm6,%xmm6 | |
1033 | vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 | |
1034 | vmovdqu 80-32(%r9),%xmm15 | |
1035 | vpxor %xmm1,%xmm2,%xmm2 | |
1036 | ||
1037 | vmovdqu 80(%rsp),%xmm1 | |
1038 | vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 | |
1039 | vmovdqu 64-32(%r9),%xmm0 | |
1040 | vpxor %xmm4,%xmm7,%xmm7 | |
1041 | vpunpckhqdq %xmm1,%xmm1,%xmm4 | |
1042 | vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 | |
1043 | vpxor %xmm1,%xmm4,%xmm4 | |
1044 | vpxor %xmm6,%xmm9,%xmm9 | |
1045 | vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 | |
1046 | vpxor %xmm2,%xmm5,%xmm5 | |
1047 | ||
1048 | vmovdqu 96(%rsp),%xmm2 | |
1049 | vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 | |
1050 | vmovdqu 96-32(%r9),%xmm3 | |
1051 | vpxor %xmm7,%xmm6,%xmm6 | |
1052 | vpunpckhqdq %xmm2,%xmm2,%xmm7 | |
1053 | vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 | |
1054 | vpxor %xmm2,%xmm7,%xmm7 | |
1055 | vpxor %xmm9,%xmm1,%xmm1 | |
1056 | vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 | |
1057 | vmovdqu 128-32(%r9),%xmm15 | |
1058 | vpxor %xmm5,%xmm4,%xmm4 | |
1059 | ||
1060 | vpxor 112(%rsp),%xmm8,%xmm8 | |
1061 | vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 | |
1062 | vmovdqu 112-32(%r9),%xmm0 | |
1063 | vpunpckhqdq %xmm8,%xmm8,%xmm9 | |
1064 | vpxor %xmm6,%xmm5,%xmm5 | |
1065 | vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 | |
1066 | vpxor %xmm8,%xmm9,%xmm9 | |
1067 | vpxor %xmm1,%xmm2,%xmm2 | |
1068 | vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 | |
1069 | vpxor %xmm4,%xmm7,%xmm4 | |
1070 | ||
1071 | vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 | |
1072 | vmovdqu 0-32(%r9),%xmm3 | |
1073 | vpunpckhqdq %xmm14,%xmm14,%xmm1 | |
1074 | vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 | |
1075 | vpxor %xmm14,%xmm1,%xmm1 | |
1076 | vpxor %xmm5,%xmm6,%xmm5 | |
1077 | vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 | |
1078 | vmovdqu 32-32(%r9),%xmm15 | |
1079 | vpxor %xmm2,%xmm8,%xmm7 | |
1080 | vpxor %xmm4,%xmm9,%xmm6 | |
1081 | ||
1082 | vmovdqu 16-32(%r9),%xmm0 | |
1083 | vpxor %xmm5,%xmm7,%xmm9 | |
1084 | vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 | |
1085 | vpxor %xmm9,%xmm6,%xmm6 | |
1086 | vpunpckhqdq %xmm13,%xmm13,%xmm2 | |
1087 | vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 | |
1088 | vpxor %xmm13,%xmm2,%xmm2 | |
1089 | vpslldq $8,%xmm6,%xmm9 | |
1090 | vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 | |
1091 | vpxor %xmm9,%xmm5,%xmm8 | |
1092 | vpsrldq $8,%xmm6,%xmm6 | |
1093 | vpxor %xmm6,%xmm7,%xmm7 | |
1094 | ||
1095 | vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 | |
1096 | vmovdqu 48-32(%r9),%xmm3 | |
1097 | vpxor %xmm4,%xmm5,%xmm5 | |
1098 | vpunpckhqdq %xmm12,%xmm12,%xmm9 | |
1099 | vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 | |
1100 | vpxor %xmm12,%xmm9,%xmm9 | |
1101 | vpxor %xmm14,%xmm13,%xmm13 | |
1102 | vpalignr $8,%xmm8,%xmm8,%xmm14 | |
1103 | vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 | |
1104 | vmovdqu 80-32(%r9),%xmm15 | |
1105 | vpxor %xmm1,%xmm2,%xmm2 | |
1106 | ||
1107 | vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 | |
1108 | vmovdqu 64-32(%r9),%xmm0 | |
1109 | vpxor %xmm5,%xmm4,%xmm4 | |
1110 | vpunpckhqdq %xmm11,%xmm11,%xmm1 | |
1111 | vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 | |
1112 | vpxor %xmm11,%xmm1,%xmm1 | |
1113 | vpxor %xmm13,%xmm12,%xmm12 | |
1114 | vxorps 16(%rsp),%xmm7,%xmm7 | |
1115 | vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 | |
1116 | vpxor %xmm2,%xmm9,%xmm9 | |
1117 | ||
1118 | vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 | |
1119 | vxorps %xmm14,%xmm8,%xmm8 | |
1120 | ||
1121 | vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 | |
1122 | vmovdqu 96-32(%r9),%xmm3 | |
1123 | vpxor %xmm4,%xmm5,%xmm5 | |
1124 | vpunpckhqdq %xmm10,%xmm10,%xmm2 | |
1125 | vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 | |
1126 | vpxor %xmm10,%xmm2,%xmm2 | |
1127 | vpalignr $8,%xmm8,%xmm8,%xmm14 | |
1128 | vpxor %xmm12,%xmm11,%xmm11 | |
1129 | vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 | |
1130 | vmovdqu 128-32(%r9),%xmm15 | |
1131 | vpxor %xmm9,%xmm1,%xmm1 | |
1132 | ||
1133 | vxorps %xmm7,%xmm14,%xmm14 | |
1134 | vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 | |
1135 | vxorps %xmm14,%xmm8,%xmm8 | |
1136 | ||
1137 | vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 | |
1138 | vmovdqu 112-32(%r9),%xmm0 | |
1139 | vpxor %xmm5,%xmm4,%xmm4 | |
1140 | vpunpckhqdq %xmm8,%xmm8,%xmm9 | |
1141 | vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 | |
1142 | vpxor %xmm8,%xmm9,%xmm9 | |
1143 | vpxor %xmm11,%xmm10,%xmm10 | |
1144 | vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 | |
1145 | vpxor %xmm1,%xmm2,%xmm2 | |
1146 | ||
1147 | vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 | |
1148 | vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 | |
1149 | vpxor %xmm4,%xmm5,%xmm5 | |
1150 | vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 | |
1151 | vpxor %xmm10,%xmm7,%xmm7 | |
1152 | vpxor %xmm2,%xmm6,%xmm6 | |
1153 | ||
1154 | vpxor %xmm5,%xmm7,%xmm4 | |
1155 | vpxor %xmm4,%xmm6,%xmm6 | |
1156 | vpslldq $8,%xmm6,%xmm1 | |
1157 | vmovdqu 16(%r11),%xmm3 | |
1158 | vpsrldq $8,%xmm6,%xmm6 | |
1159 | vpxor %xmm1,%xmm5,%xmm8 | |
1160 | vpxor %xmm6,%xmm7,%xmm7 | |
1161 | ||
1162 | vpalignr $8,%xmm8,%xmm8,%xmm2 | |
1163 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 | |
1164 | vpxor %xmm2,%xmm8,%xmm8 | |
1165 | ||
1166 | vpalignr $8,%xmm8,%xmm8,%xmm2 | |
1167 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 | |
1168 | vpxor %xmm7,%xmm2,%xmm2 | |
1169 | vpxor %xmm2,%xmm8,%xmm8 | |
1170 | vpshufb (%r11),%xmm8,%xmm8 | |
e8beeaa1 AF |
1171 | movq -56(%rax),%r9 |
1172 | .cfi_restore %r9 | |
1173 | vmovdqu %xmm8,(%r9) | |
31b160f0 AF |
1174 | |
1175 | vzeroupper | |
1176 | movq -48(%rax),%r15 | |
1177 | .cfi_restore %r15 | |
1178 | movq -40(%rax),%r14 | |
1179 | .cfi_restore %r14 | |
1180 | movq -32(%rax),%r13 | |
1181 | .cfi_restore %r13 | |
1182 | movq -24(%rax),%r12 | |
1183 | .cfi_restore %r12 | |
1184 | movq -16(%rax),%rbp | |
1185 | .cfi_restore %rbp | |
1186 | movq -8(%rax),%rbx | |
1187 | .cfi_restore %rbx | |
1188 | leaq (%rax),%rsp | |
1189 | .cfi_def_cfa_register %rsp | |
1190 | .Lgcm_enc_abort: | |
1191 | movq %r10,%rax | |
b844489e | 1192 | RET |
31b160f0 AF |
1193 | .cfi_endproc |
1194 | .size aesni_gcm_encrypt,.-aesni_gcm_encrypt | |
1195 | ||
1196 | /* Some utility routines */ | |
1197 | ||
1198 | /* | |
1199 | * clear all fpu registers | |
1200 | * void clear_fpu_regs_avx(void); | |
1201 | */ | |
1202 | .globl clear_fpu_regs_avx | |
1203 | .type clear_fpu_regs_avx,@function | |
1204 | .align 32 | |
1205 | clear_fpu_regs_avx: | |
1206 | vzeroall | |
43569ee3 | 1207 | RET |
31b160f0 AF |
1208 | .size clear_fpu_regs_avx,.-clear_fpu_regs_avx |
1209 | ||
1210 | /* | |
1211 | * void gcm_xor_avx(const uint8_t *src, uint8_t *dst); | |
1212 | * | |
1213 | * XORs one pair of unaligned 128-bit blocks from `src' and `dst' and | |
1214 | * stores the result at `dst'. The XOR is performed using FPU registers, | |
1215 | * so make sure FPU state is saved when running this in the kernel. | |
1216 | */ | |
1217 | .globl gcm_xor_avx | |
1218 | .type gcm_xor_avx,@function | |
1219 | .align 32 | |
1220 | gcm_xor_avx: | |
1221 | movdqu (%rdi), %xmm0 | |
1222 | movdqu (%rsi), %xmm1 | |
1223 | pxor %xmm1, %xmm0 | |
1224 | movdqu %xmm0, (%rsi) | |
43569ee3 | 1225 | RET |
31b160f0 AF |
1226 | .size gcm_xor_avx,.-gcm_xor_avx |
1227 | ||
1228 | /* | |
1229 | * Toggle a boolean_t value atomically and return the new value. | |
1230 | * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); | |
1231 | */ | |
1232 | .globl atomic_toggle_boolean_nv | |
1233 | .type atomic_toggle_boolean_nv,@function | |
1234 | .align 32 | |
1235 | atomic_toggle_boolean_nv: | |
1236 | xorl %eax, %eax | |
1237 | lock | |
1238 | xorl $1, (%rdi) | |
1239 | jz 1f | |
1240 | movl $1, %eax | |
1241 | 1: | |
43569ee3 | 1242 | RET |
31b160f0 AF |
1243 | .size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv |
1244 | ||
61cca6fa | 1245 | .pushsection .rodata |
31b160f0 AF |
1246 | .align 64 |
1247 | .Lbswap_mask: | |
1248 | .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
1249 | .Lpoly: | |
1250 | .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 | |
1251 | .Lone_msb: | |
1252 | .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 | |
1253 | .Ltwo_lsb: | |
1254 | .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | |
1255 | .Lone_lsb: | |
1256 | .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | |
1257 | .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 | |
1258 | .align 64 | |
61cca6fa | 1259 | .popsection |
31b160f0 AF |
1260 | |
1261 | /* Mark the stack non-executable. */ | |
1262 | #if defined(__linux__) && defined(__ELF__) | |
1263 | .section .note.GNU-stack,"",%progbits | |
1264 | #endif | |
1265 | ||
1266 | #endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ |