]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - zfs/module/icp/asm-x86_64/aes/aes_intel.S
UBUNTU: SAUCE: Update zfs to e02aaf17f15ad274fa1f24c9c826f1477911ea3f
[mirror_ubuntu-zesty-kernel.git] / zfs / module / icp / asm-x86_64 / aes / aes_intel.S
1 /*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
7 *
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
10 * Kahraman Akdemir
11 *
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
22 */
23
24 /*
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 *
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
38 * distribution.
39 *
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
44 *
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
49 *
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
53 *
54 * 6. Redistributions of any form whatsoever must retain the following
55 * acknowledgment:
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
72 */
73
74 /*
75 * ====================================================================
76 * OpenSolaris OS modifications
77 *
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
82 *
83 * This OpenSolaris version has these major changes from the original source:
84 *
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
88 *
89 * 2. Formatted code, added comments, and added #includes and #defines.
90 *
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
96 *
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
99 *
100 * OpenSSL interface:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
106 *
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
113 * int rounds;
114 * unsigned int pad[3];
115 * } AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
119 *
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
126 *
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
133 *
134 * typedef union {
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 * } aes_ks_t;
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
141 * } aes_key_t;
142 *
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
146 *
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
148 *
149 * ====================================================================
150 */
151
152 #if defined(lint) || defined(__lint)
153
154 #include <sys/types.h>
155
156 /* ARGSUSED */
157 void
158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159 uint32_t ct[4]) {
160 }
161 /* ARGSUSED */
162 void
163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164 uint32_t pt[4]) {
165 }
166 /* ARGSUSED */
167 int
168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169 uint64_t keyBits) {
170 return (0);
171 }
172 /* ARGSUSED */
173 int
174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175 uint64_t keyBits) {
176 return (0);
177 }
178
179
180 #else /* lint */
181
182 #define _ASM
183 #include <sys/asm_linkage.h>
184
185 #ifdef _KERNEL
186 /*
187 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
188 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
189 * uses it to pass P2 to syscall.
190 * This also occurs with the STTS macro, but we dont care if
191 * P2 (%rsi) is modified just before function exit.
192 * The CLTS and STTS macros push and pop P1 (%rdi) already.
193 */
194 #ifdef __xpv
195 #define PROTECTED_CLTS \
196 push %rsi; \
197 CLTS; \
198 pop %rsi
199 #else
200 #define PROTECTED_CLTS \
201 CLTS
202 #endif /* __xpv */
203
204 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
205 push %rbp; \
206 mov %rsp, %rbp; \
207 movq %cr0, tmpreg; \
208 testq $CR0_TS, tmpreg; \
209 jnz 1f; \
210 and $-XMM_ALIGN, %rsp; \
211 sub $[XMM_SIZE * 2], %rsp; \
212 movaps %xmm0, 16(%rsp); \
213 movaps %xmm1, (%rsp); \
214 jmp 2f; \
215 1: \
216 PROTECTED_CLTS; \
217 2:
218
219 /*
220 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
221 * otherwise set CR0_TS.
222 */
223 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
224 testq $CR0_TS, tmpreg; \
225 jnz 1f; \
226 movaps (%rsp), %xmm1; \
227 movaps 16(%rsp), %xmm0; \
228 jmp 2f; \
229 1: \
230 STTS(tmpreg); \
231 2: \
232 mov %rbp, %rsp; \
233 pop %rbp
234
235 /*
236 * If CR0_TS is not set, align stack (with push %rbp) and push
237 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
238 */
239 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
240 push %rbp; \
241 mov %rsp, %rbp; \
242 movq %cr0, tmpreg; \
243 testq $CR0_TS, tmpreg; \
244 jnz 1f; \
245 and $-XMM_ALIGN, %rsp; \
246 sub $[XMM_SIZE * 7], %rsp; \
247 movaps %xmm0, 96(%rsp); \
248 movaps %xmm1, 80(%rsp); \
249 movaps %xmm2, 64(%rsp); \
250 movaps %xmm3, 48(%rsp); \
251 movaps %xmm4, 32(%rsp); \
252 movaps %xmm5, 16(%rsp); \
253 movaps %xmm6, (%rsp); \
254 jmp 2f; \
255 1: \
256 PROTECTED_CLTS; \
257 2:
258
259
260 /*
261 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
262 * otherwise set CR0_TS.
263 */
264 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
265 testq $CR0_TS, tmpreg; \
266 jnz 1f; \
267 movaps (%rsp), %xmm6; \
268 movaps 16(%rsp), %xmm5; \
269 movaps 32(%rsp), %xmm4; \
270 movaps 48(%rsp), %xmm3; \
271 movaps 64(%rsp), %xmm2; \
272 movaps 80(%rsp), %xmm1; \
273 movaps 96(%rsp), %xmm0; \
274 jmp 2f; \
275 1: \
276 STTS(tmpreg); \
277 2: \
278 mov %rbp, %rsp; \
279 pop %rbp
280
281
282 #else
283 #define PROTECTED_CLTS
284 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
285 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
286 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
287 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
288 #endif /* _KERNEL */
289
290
291 /*
292 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
293 * _key_expansion_256a(), _key_expansion_256b()
294 *
295 * Helper functions called by rijndael_key_setup_inc_intel().
296 * Also used indirectly by rijndael_key_setup_dec_intel().
297 *
298 * Input:
299 * %xmm0 User-provided cipher key
300 * %xmm1 Round constant
301 * Output:
302 * (%rcx) AES key
303 */
304
305 .align 16
306 _key_expansion_128:
307 _key_expansion_256a:
308 pshufd $0b11111111, %xmm1, %xmm1
309 shufps $0b00010000, %xmm0, %xmm4
310 pxor %xmm4, %xmm0
311 shufps $0b10001100, %xmm0, %xmm4
312 pxor %xmm4, %xmm0
313 pxor %xmm1, %xmm0
314 movaps %xmm0, (%rcx)
315 add $0x10, %rcx
316 ret
317 SET_SIZE(_key_expansion_128)
318 SET_SIZE(_key_expansion_256a)
319
320 .align 16
321 _key_expansion_192a:
322 pshufd $0b01010101, %xmm1, %xmm1
323 shufps $0b00010000, %xmm0, %xmm4
324 pxor %xmm4, %xmm0
325 shufps $0b10001100, %xmm0, %xmm4
326 pxor %xmm4, %xmm0
327 pxor %xmm1, %xmm0
328
329 movaps %xmm2, %xmm5
330 movaps %xmm2, %xmm6
331 pslldq $4, %xmm5
332 pshufd $0b11111111, %xmm0, %xmm3
333 pxor %xmm3, %xmm2
334 pxor %xmm5, %xmm2
335
336 movaps %xmm0, %xmm1
337 shufps $0b01000100, %xmm0, %xmm6
338 movaps %xmm6, (%rcx)
339 shufps $0b01001110, %xmm2, %xmm1
340 movaps %xmm1, 0x10(%rcx)
341 add $0x20, %rcx
342 ret
343 SET_SIZE(_key_expansion_192a)
344
345 .align 16
346 _key_expansion_192b:
347 pshufd $0b01010101, %xmm1, %xmm1
348 shufps $0b00010000, %xmm0, %xmm4
349 pxor %xmm4, %xmm0
350 shufps $0b10001100, %xmm0, %xmm4
351 pxor %xmm4, %xmm0
352 pxor %xmm1, %xmm0
353
354 movaps %xmm2, %xmm5
355 pslldq $4, %xmm5
356 pshufd $0b11111111, %xmm0, %xmm3
357 pxor %xmm3, %xmm2
358 pxor %xmm5, %xmm2
359
360 movaps %xmm0, (%rcx)
361 add $0x10, %rcx
362 ret
363 SET_SIZE(_key_expansion_192b)
364
365 .align 16
366 _key_expansion_256b:
367 pshufd $0b10101010, %xmm1, %xmm1
368 shufps $0b00010000, %xmm2, %xmm4
369 pxor %xmm4, %xmm2
370 shufps $0b10001100, %xmm2, %xmm4
371 pxor %xmm4, %xmm2
372 pxor %xmm1, %xmm2
373 movaps %xmm2, (%rcx)
374 add $0x10, %rcx
375 ret
376 SET_SIZE(_key_expansion_256b)
377
378
379 /*
380 * rijndael_key_setup_enc_intel()
381 * Expand the cipher key into the encryption key schedule.
382 *
383 * For kernel code, caller is responsible for ensuring kpreempt_disable()
384 * has been called. This is because %xmm registers are not saved/restored.
385 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
386 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
387 * on the stack.
388 *
389 * OpenSolaris interface:
390 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
391 * uint64_t keyBits);
392 * Return value is 0 on error, number of rounds on success.
393 *
394 * Original Intel OpenSSL interface:
395 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
396 * const int bits, AES_KEY *key);
397 * Return value is non-zero on error, 0 on success.
398 */
399
400 #ifdef OPENSSL_INTERFACE
401 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
402 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
403
404 #define USERCIPHERKEY rdi /* P1, 64 bits */
405 #define KEYSIZE32 esi /* P2, 32 bits */
406 #define KEYSIZE64 rsi /* P2, 64 bits */
407 #define AESKEY rdx /* P3, 64 bits */
408
409 #else /* OpenSolaris Interface */
410 #define AESKEY rdi /* P1, 64 bits */
411 #define USERCIPHERKEY rsi /* P2, 64 bits */
412 #define KEYSIZE32 edx /* P3, 32 bits */
413 #define KEYSIZE64 rdx /* P3, 64 bits */
414 #endif /* OPENSSL_INTERFACE */
415
416 #define ROUNDS32 KEYSIZE32 /* temp */
417 #define ROUNDS64 KEYSIZE64 /* temp */
418 #define ENDAESKEY USERCIPHERKEY /* temp */
419
420 ENTRY_NP(rijndael_key_setup_enc_intel)
421 rijndael_key_setup_enc_intel_local:
422 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
423
424 // NULL pointer sanity check
425 test %USERCIPHERKEY, %USERCIPHERKEY
426 jz .Lenc_key_invalid_param
427 test %AESKEY, %AESKEY
428 jz .Lenc_key_invalid_param
429
430 movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
431 movaps %xmm0, (%AESKEY)
432 lea 0x10(%AESKEY), %rcx // key addr
433 pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
434
435 cmp $256, %KEYSIZE32
436 jnz .Lenc_key192
437
438 // AES 256: 14 rounds in encryption key schedule
439 #ifdef OPENSSL_INTERFACE
440 mov $14, %ROUNDS32
441 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
442 #endif /* OPENSSL_INTERFACE */
443
444 movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
445 movaps %xmm2, (%rcx)
446 add $0x10, %rcx
447
448 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
449 call _key_expansion_256a
450 aeskeygenassist $0x1, %xmm0, %xmm1
451 call _key_expansion_256b
452 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
453 call _key_expansion_256a
454 aeskeygenassist $0x2, %xmm0, %xmm1
455 call _key_expansion_256b
456 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
457 call _key_expansion_256a
458 aeskeygenassist $0x4, %xmm0, %xmm1
459 call _key_expansion_256b
460 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
461 call _key_expansion_256a
462 aeskeygenassist $0x8, %xmm0, %xmm1
463 call _key_expansion_256b
464 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
465 call _key_expansion_256a
466 aeskeygenassist $0x10, %xmm0, %xmm1
467 call _key_expansion_256b
468 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
469 call _key_expansion_256a
470 aeskeygenassist $0x20, %xmm0, %xmm1
471 call _key_expansion_256b
472 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
473 call _key_expansion_256a
474
475 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
476 #ifdef OPENSSL_INTERFACE
477 xor %rax, %rax // return 0 (OK)
478 #else /* Open Solaris Interface */
479 mov $14, %rax // return # rounds = 14
480 #endif
481 ret
482
483 .align 4
484 .Lenc_key192:
485 cmp $192, %KEYSIZE32
486 jnz .Lenc_key128
487
488 // AES 192: 12 rounds in encryption key schedule
489 #ifdef OPENSSL_INTERFACE
490 mov $12, %ROUNDS32
491 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
492 #endif /* OPENSSL_INTERFACE */
493
494 movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
495 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
496 call _key_expansion_192a
497 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
498 call _key_expansion_192b
499 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
500 call _key_expansion_192a
501 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
502 call _key_expansion_192b
503 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
504 call _key_expansion_192a
505 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
506 call _key_expansion_192b
507 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
508 call _key_expansion_192a
509 aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
510 call _key_expansion_192b
511
512 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
513 #ifdef OPENSSL_INTERFACE
514 xor %rax, %rax // return 0 (OK)
515 #else /* OpenSolaris Interface */
516 mov $12, %rax // return # rounds = 12
517 #endif
518 ret
519
520 .align 4
521 .Lenc_key128:
522 cmp $128, %KEYSIZE32
523 jnz .Lenc_key_invalid_key_bits
524
525 // AES 128: 10 rounds in encryption key schedule
526 #ifdef OPENSSL_INTERFACE
527 mov $10, %ROUNDS32
528 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
529 #endif /* OPENSSL_INTERFACE */
530
531 aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
532 call _key_expansion_128
533 aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
534 call _key_expansion_128
535 aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
536 call _key_expansion_128
537 aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
538 call _key_expansion_128
539 aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
540 call _key_expansion_128
541 aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
542 call _key_expansion_128
543 aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
544 call _key_expansion_128
545 aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
546 call _key_expansion_128
547 aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
548 call _key_expansion_128
549 aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
550 call _key_expansion_128
551
552 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
553 #ifdef OPENSSL_INTERFACE
554 xor %rax, %rax // return 0 (OK)
555 #else /* OpenSolaris Interface */
556 mov $10, %rax // return # rounds = 10
557 #endif
558 ret
559
560 .Lenc_key_invalid_param:
561 #ifdef OPENSSL_INTERFACE
562 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
563 mov $-1, %rax // user key or AES key pointer is NULL
564 ret
565 #else
566 /* FALLTHROUGH */
567 #endif /* OPENSSL_INTERFACE */
568
569 .Lenc_key_invalid_key_bits:
570 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
571 #ifdef OPENSSL_INTERFACE
572 mov $-2, %rax // keysize is invalid
573 #else /* Open Solaris Interface */
574 xor %rax, %rax // a key pointer is NULL or invalid keysize
575 #endif /* OPENSSL_INTERFACE */
576
577 ret
578 SET_SIZE(rijndael_key_setup_enc_intel)
579
580
581 /*
582 * rijndael_key_setup_dec_intel()
583 * Expand the cipher key into the decryption key schedule.
584 *
585 * For kernel code, caller is responsible for ensuring kpreempt_disable()
586 * has been called. This is because %xmm registers are not saved/restored.
587 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
588 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
589 * on the stack.
590 *
591 * OpenSolaris interface:
592 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
593 * uint64_t keyBits);
594 * Return value is 0 on error, number of rounds on success.
595 * P1->P2, P2->P3, P3->P1
596 *
597 * Original Intel OpenSSL interface:
598 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
599 * const int bits, AES_KEY *key);
600 * Return value is non-zero on error, 0 on success.
601 */
602 ENTRY_NP(rijndael_key_setup_dec_intel)
603 // Generate round keys used for encryption
604 call rijndael_key_setup_enc_intel_local
605 test %rax, %rax
606 #ifdef OPENSSL_INTERFACE
607 jnz .Ldec_key_exit // Failed if returned non-0
608 #else /* OpenSolaris Interface */
609 jz .Ldec_key_exit // Failed if returned 0
610 #endif /* OPENSSL_INTERFACE */
611
612 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
613
614 /*
615 * Convert round keys used for encryption
616 * to a form usable for decryption
617 */
618 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
619 mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
620 // (already set for OpenSSL)
621 #endif
622
623 lea 0x10(%AESKEY), %rcx // key addr
624 shl $4, %ROUNDS32
625 add %AESKEY, %ROUNDS64
626 mov %ROUNDS64, %ENDAESKEY
627
628 .align 4
629 .Ldec_key_reorder_loop:
630 movaps (%AESKEY), %xmm0
631 movaps (%ROUNDS64), %xmm1
632 movaps %xmm0, (%ROUNDS64)
633 movaps %xmm1, (%AESKEY)
634 lea 0x10(%AESKEY), %AESKEY
635 lea -0x10(%ROUNDS64), %ROUNDS64
636 cmp %AESKEY, %ROUNDS64
637 ja .Ldec_key_reorder_loop
638
639 .align 4
640 .Ldec_key_inv_loop:
641 movaps (%rcx), %xmm0
642 // Convert an encryption round key to a form usable for decryption
643 // with the "AES Inverse Mix Columns" instruction
644 aesimc %xmm0, %xmm1
645 movaps %xmm1, (%rcx)
646 lea 0x10(%rcx), %rcx
647 cmp %ENDAESKEY, %rcx
648 jnz .Ldec_key_inv_loop
649
650 SET_TS_OR_POP_XMM0_XMM1(%r10)
651
652 .Ldec_key_exit:
653 // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
654 // OpenSSL: rax = 0 for OK, or non-zero for error
655 ret
656 SET_SIZE(rijndael_key_setup_dec_intel)
657
658
659 /*
660 * aes_encrypt_intel()
661 * Encrypt a single block (in and out can overlap).
662 *
663 * For kernel code, caller is responsible for ensuring kpreempt_disable()
664 * has been called. This is because %xmm registers are not saved/restored.
665 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
666 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
667 * on the stack.
668 *
669 * Temporary register usage:
670 * %xmm0 State
671 * %xmm1 Key
672 *
673 * Original OpenSolaris Interface:
674 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
675 * const uint32_t pt[4], uint32_t ct[4])
676 *
677 * Original Intel OpenSSL Interface:
678 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
679 * const AES_KEY *key)
680 */
681
682 #ifdef OPENSSL_INTERFACE
683 #define aes_encrypt_intel intel_AES_encrypt
684 #define aes_decrypt_intel intel_AES_decrypt
685
686 #define INP rdi /* P1, 64 bits */
687 #define OUTP rsi /* P2, 64 bits */
688 #define KEYP rdx /* P3, 64 bits */
689
690 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
691 #define NROUNDS32 ecx /* temporary, 32 bits */
692 #define NROUNDS cl /* temporary, 8 bits */
693
694 #else /* OpenSolaris Interface */
695 #define KEYP rdi /* P1, 64 bits */
696 #define NROUNDS esi /* P2, 32 bits */
697 #define INP rdx /* P3, 64 bits */
698 #define OUTP rcx /* P4, 64 bits */
699 #endif /* OPENSSL_INTERFACE */
700
701 #define STATE xmm0 /* temporary, 128 bits */
702 #define KEY xmm1 /* temporary, 128 bits */
703
704 ENTRY_NP(aes_encrypt_intel)
705 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
706
707 movups (%INP), %STATE // input
708 movaps (%KEYP), %KEY // key
709 #ifdef OPENSSL_INTERFACE
710 mov 240(%KEYP), %NROUNDS32 // round count
711 #else /* OpenSolaris Interface */
712 /* Round count is already present as P2 in %rsi/%esi */
713 #endif /* OPENSSL_INTERFACE */
714
715 pxor %KEY, %STATE // round 0
716 lea 0x30(%KEYP), %KEYP
717 cmp $12, %NROUNDS
718 jb .Lenc128
719 lea 0x20(%KEYP), %KEYP
720 je .Lenc192
721
722 // AES 256
723 lea 0x20(%KEYP), %KEYP
724 movaps -0x60(%KEYP), %KEY
725 aesenc %KEY, %STATE
726 movaps -0x50(%KEYP), %KEY
727 aesenc %KEY, %STATE
728
729 .align 4
730 .Lenc192:
731 // AES 192 and 256
732 movaps -0x40(%KEYP), %KEY
733 aesenc %KEY, %STATE
734 movaps -0x30(%KEYP), %KEY
735 aesenc %KEY, %STATE
736
737 .align 4
738 .Lenc128:
739 // AES 128, 192, and 256
740 movaps -0x20(%KEYP), %KEY
741 aesenc %KEY, %STATE
742 movaps -0x10(%KEYP), %KEY
743 aesenc %KEY, %STATE
744 movaps (%KEYP), %KEY
745 aesenc %KEY, %STATE
746 movaps 0x10(%KEYP), %KEY
747 aesenc %KEY, %STATE
748 movaps 0x20(%KEYP), %KEY
749 aesenc %KEY, %STATE
750 movaps 0x30(%KEYP), %KEY
751 aesenc %KEY, %STATE
752 movaps 0x40(%KEYP), %KEY
753 aesenc %KEY, %STATE
754 movaps 0x50(%KEYP), %KEY
755 aesenc %KEY, %STATE
756 movaps 0x60(%KEYP), %KEY
757 aesenc %KEY, %STATE
758 movaps 0x70(%KEYP), %KEY
759 aesenclast %KEY, %STATE // last round
760 movups %STATE, (%OUTP) // output
761
762 SET_TS_OR_POP_XMM0_XMM1(%r10)
763 ret
764 SET_SIZE(aes_encrypt_intel)
765
766
767 /*
768 * aes_decrypt_intel()
769 * Decrypt a single block (in and out can overlap).
770 *
771 * For kernel code, caller is responsible for ensuring kpreempt_disable()
772 * has been called. This is because %xmm registers are not saved/restored.
773 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
774 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
775 * on the stack.
776 *
777 * Temporary register usage:
778 * %xmm0 State
779 * %xmm1 Key
780 *
781 * Original OpenSolaris Interface:
782 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
783 * const uint32_t pt[4], uint32_t ct[4])/
784 *
785 * Original Intel OpenSSL Interface:
786 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
787 * const AES_KEY *key);
788 */
789 ENTRY_NP(aes_decrypt_intel)
790 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
791
792 movups (%INP), %STATE // input
793 movaps (%KEYP), %KEY // key
794 #ifdef OPENSSL_INTERFACE
795 mov 240(%KEYP), %NROUNDS32 // round count
796 #else /* OpenSolaris Interface */
797 /* Round count is already present as P2 in %rsi/%esi */
798 #endif /* OPENSSL_INTERFACE */
799
800 pxor %KEY, %STATE // round 0
801 lea 0x30(%KEYP), %KEYP
802 cmp $12, %NROUNDS
803 jb .Ldec128
804 lea 0x20(%KEYP), %KEYP
805 je .Ldec192
806
807 // AES 256
808 lea 0x20(%KEYP), %KEYP
809 movaps -0x60(%KEYP), %KEY
810 aesdec %KEY, %STATE
811 movaps -0x50(%KEYP), %KEY
812 aesdec %KEY, %STATE
813
814 .align 4
815 .Ldec192:
816 // AES 192 and 256
817 movaps -0x40(%KEYP), %KEY
818 aesdec %KEY, %STATE
819 movaps -0x30(%KEYP), %KEY
820 aesdec %KEY, %STATE
821
822 .align 4
823 .Ldec128:
824 // AES 128, 192, and 256
825 movaps -0x20(%KEYP), %KEY
826 aesdec %KEY, %STATE
827 movaps -0x10(%KEYP), %KEY
828 aesdec %KEY, %STATE
829 movaps (%KEYP), %KEY
830 aesdec %KEY, %STATE
831 movaps 0x10(%KEYP), %KEY
832 aesdec %KEY, %STATE
833 movaps 0x20(%KEYP), %KEY
834 aesdec %KEY, %STATE
835 movaps 0x30(%KEYP), %KEY
836 aesdec %KEY, %STATE
837 movaps 0x40(%KEYP), %KEY
838 aesdec %KEY, %STATE
839 movaps 0x50(%KEYP), %KEY
840 aesdec %KEY, %STATE
841 movaps 0x60(%KEYP), %KEY
842 aesdec %KEY, %STATE
843 movaps 0x70(%KEYP), %KEY
844 aesdeclast %KEY, %STATE // last round
845 movups %STATE, (%OUTP) // output
846
847 SET_TS_OR_POP_XMM0_XMM1(%r10)
848 ret
849 SET_SIZE(aes_decrypt_intel)
850
851 #endif /* lint || __lint */
852
853 #ifdef __ELF__
854 .section .note.GNU-stack,"",%progbits
855 #endif