]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
1e59de90 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ; routine to do AES128 CBC decrypt | |
30 | ;; clobbers xmm0-15 | |
31 | ||
32 | ||
33 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
35 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
36 | ||
37 | %include "reg_sizes.asm" | |
38 | ||
39 | %ifidn __OUTPUT_FORMAT__, elf64 | |
40 | %define IN rdi | |
41 | %define IV rsi | |
42 | %define KEYS rdx | |
43 | %define OUT rcx | |
44 | %define LEN r8 | |
45 | %define func(x) x: | |
46 | %define FUNC_SAVE | |
47 | %define FUNC_RESTORE | |
48 | %endif | |
49 | ||
50 | %ifidn __OUTPUT_FORMAT__, win64 | |
51 | %define IN rcx | |
52 | %define IV rdx | |
53 | %define KEYS r8 | |
54 | %define OUT r9 | |
55 | %define LEN r10 | |
56 | %define PS 8 | |
57 | %define stack_size 10*16 + 1*8 ; must be an odd multiple of 8 | |
58 | %define arg(x) [rsp + stack_size + PS + PS*x] | |
59 | ||
60 | %define func(x) proc_frame x | |
61 | %macro FUNC_SAVE 0 | |
62 | alloc_stack stack_size | |
63 | save_xmm128 xmm6, 0*16 | |
64 | save_xmm128 xmm7, 1*16 | |
65 | save_xmm128 xmm8, 2*16 | |
66 | save_xmm128 xmm9, 3*16 | |
67 | save_xmm128 xmm10, 4*16 | |
68 | save_xmm128 xmm11, 5*16 | |
69 | save_xmm128 xmm12, 6*16 | |
70 | save_xmm128 xmm13, 7*16 | |
71 | save_xmm128 xmm14, 8*16 | |
72 | save_xmm128 xmm15, 9*16 | |
73 | end_prolog | |
74 | mov LEN, arg(4) | |
75 | %endmacro | |
76 | ||
77 | %macro FUNC_RESTORE 0 | |
78 | movdqa xmm6, [rsp + 0*16] | |
79 | movdqa xmm7, [rsp + 1*16] | |
80 | movdqa xmm8, [rsp + 2*16] | |
81 | movdqa xmm9, [rsp + 3*16] | |
82 | movdqa xmm10, [rsp + 4*16] | |
83 | movdqa xmm11, [rsp + 5*16] | |
84 | movdqa xmm12, [rsp + 6*16] | |
85 | movdqa xmm13, [rsp + 7*16] | |
86 | movdqa xmm14, [rsp + 8*16] | |
87 | movdqa xmm15, [rsp + 9*16] | |
88 | add rsp, stack_size | |
89 | %endmacro | |
90 | ||
91 | %endif | |
92 | ||
93 | ; configuration paramaters for AES-CBC | |
94 | %define KEY_ROUNDS 11 | |
95 | %define XMM_USAGE (16) | |
96 | %define EARLY_BLOCKS (4) | |
97 | %define PARALLEL_BLOCKS (11) | |
98 | %define IV_CNT (1) | |
99 | ||
100 | ; instruction set specific operation definitions | |
101 | %define MOVDQ vmovdqu | |
102 | %macro PXOR 2 | |
103 | vpxor %1, %1, %2 | |
104 | %endm | |
105 | ||
106 | %macro AES_DEC 2 | |
107 | vaesdec %1, %1, %2 | |
108 | %endm | |
109 | ||
110 | %macro AES_DEC_LAST 2 | |
111 | vaesdeclast %1, %1, %2 | |
112 | %endm | |
113 | ||
114 | %include "cbc_common.asm" | |
115 | ||
116 | section .text | |
117 | ||
118 | ;; aes_cbc_dec_128_avx(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) | |
1e59de90 | 119 | mk_global aes_cbc_dec_128_avx, function |
7c673cae | 120 | func(aes_cbc_dec_128_avx) |
1e59de90 | 121 | endbranch |
7c673cae FG |
122 | FUNC_SAVE |
123 | ||
124 | FILL_KEY_CACHE CKEY_CNT, FIRST_CKEY, KEYS, MOVDQ | |
125 | ||
126 | MOVDQ reg(IV_IDX), [IV] ; Load IV for next round of block decrypt | |
127 | mov IDX, 0 | |
128 | cmp LEN, PARALLEL_BLOCKS*16 | |
129 | jge main_loop ; if enough data blocks remain enter main_loop | |
130 | jmp partials | |
131 | ||
132 | main_loop: | |
133 | CBC_DECRYPT_BLOCKS KEY_ROUNDS, PARALLEL_BLOCKS, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN | |
134 | cmp LEN, PARALLEL_BLOCKS*16 | |
135 | jge main_loop ; enough blocks to do another full parallel set | |
136 | jz done | |
137 | ||
138 | partials: ; fewer than 'PARALLEL_BLOCKS' left do in groups of 4, 2 or 1 | |
139 | cmp LEN, 0 | |
140 | je done | |
141 | cmp LEN, 4*16 | |
142 | jge initial_4 | |
143 | cmp LEN, 2*16 | |
144 | jge initial_2 | |
145 | ||
146 | initial_1: | |
147 | CBC_DECRYPT_BLOCKS KEY_ROUNDS, 1, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN | |
148 | jmp done | |
149 | ||
150 | initial_2: | |
151 | CBC_DECRYPT_BLOCKS KEY_ROUNDS, 2, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN | |
152 | jz done | |
153 | jmp partials | |
154 | ||
155 | initial_4: | |
156 | CBC_DECRYPT_BLOCKS KEY_ROUNDS, 4, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN | |
157 | jnz partials | |
158 | done: | |
159 | FUNC_RESTORE | |
160 | ret | |
161 | ||
162 | endproc_frame |