]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
1e59de90 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "reg_sizes.asm" | |
31 | ||
1e59de90 TL |
32 | [bits 64] |
33 | default rel | |
34 | section .text | |
35 | ||
7c673cae FG |
36 | %ifidn __OUTPUT_FORMAT__, elf64 |
37 | %define KEY rdi | |
38 | %define EXP_ENC_KEYS rsi | |
39 | %define EXP_DEC_KEYS rdx | |
40 | %else | |
41 | %define KEY rcx | |
42 | %define EXP_ENC_KEYS rdx | |
43 | %define EXP_DEC_KEYS r8 | |
44 | %endif | |
45 | ||
46 | ||
47 | ||
48 | ||
49 | %macro key_expansion_1_192_sse 1 | |
50 | ;; Assumes the xmm3 includes all zeros at this point. | |
51 | pshufd xmm2, xmm2, 11111111b | |
52 | shufps xmm3, xmm1, 00010000b | |
53 | pxor xmm1, xmm3 | |
54 | shufps xmm3, xmm1, 10001100b | |
55 | pxor xmm1, xmm3 | |
56 | pxor xmm1, xmm2 | |
57 | movdqu [EXP_ENC_KEYS+%1], xmm1 | |
58 | %endmacro | |
59 | ||
60 | ; Calculate w10 and w11 using calculated w9 and known w4-w5 | |
61 | %macro key_expansion_2_192_sse 1 | |
62 | movdqu xmm5, xmm4 | |
63 | pslldq xmm5, 4 | |
64 | shufps xmm6, xmm1, 11110000b | |
65 | pxor xmm6, xmm5 | |
66 | pxor xmm4, xmm6 | |
67 | pshufd xmm7, xmm4, 00001110b | |
68 | movdqu [EXP_ENC_KEYS+%1], xmm7 | |
69 | %endmacro | |
70 | ||
71 | %macro key_dec_192_sse 1 | |
72 | movdqu xmm0, [EXP_ENC_KEYS + 16 * %1] | |
73 | aesimc xmm1, xmm0 | |
74 | movdqu [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1 | |
75 | %endmacro | |
76 | ||
77 | ||
78 | ||
79 | ||
80 | ||
81 | %macro key_expansion_1_192_avx 1 | |
82 | ;; Assumes the xmm3 includes all zeros at this point. | |
83 | vpshufd xmm2, xmm2, 11111111b | |
84 | vshufps xmm3, xmm3, xmm1, 00010000b | |
85 | vpxor xmm1, xmm1, xmm3 | |
86 | vshufps xmm3, xmm3, xmm1, 10001100b | |
87 | vpxor xmm1, xmm1, xmm3 | |
88 | vpxor xmm1, xmm1, xmm2 | |
89 | vmovdqu [EXP_ENC_KEYS+%1], xmm1 | |
90 | %endmacro | |
91 | ||
92 | ; Calculate w10 and w11 using calculated w9 and known w4-w5 | |
93 | %macro key_expansion_2_192_avx 1 | |
94 | vmovdqa xmm5, xmm4 | |
95 | vpslldq xmm5, xmm5, 4 | |
96 | vshufps xmm6, xmm6, xmm1, 11110000b | |
97 | vpxor xmm6, xmm6, xmm5 | |
98 | vpxor xmm4, xmm4, xmm6 | |
99 | vpshufd xmm7, xmm4, 00001110b | |
100 | vmovdqu [EXP_ENC_KEYS+%1], xmm7 | |
101 | %endmacro | |
102 | ||
103 | %macro key_dec_192_avx 1 | |
104 | vmovdqu xmm0, [EXP_ENC_KEYS + 16 * %1] | |
105 | vaesimc xmm1, xmm0 | |
106 | vmovdqu [EXP_DEC_KEYS + 16 * (12 - %1)], xmm1 | |
107 | %endmacro | |
108 | ||
109 | ||
110 | ||
111 | ||
112 | ; void aes_keyexp_192(UINT8 *key, | |
113 | ; UINT8 *enc_exp_keys, | |
114 | ; UINT8 *dec_exp_keys); | |
115 | ; | |
116 | ; arg 1: rcx: pointer to key | |
117 | ; arg 2: rdx: pointer to expanded key array for encrypt | |
118 | ; arg 3: r8: pointer to expanded key array for decrypt | |
119 | ; | |
1e59de90 | 120 | mk_global aes_keyexp_192_sse, function |
7c673cae | 121 | aes_keyexp_192_sse: |
1e59de90 | 122 | endbranch |
7c673cae FG |
123 | |
124 | %ifnidn __OUTPUT_FORMAT__, elf64 | |
125 | sub rsp, 16*2 + 8 | |
126 | movdqu [rsp + 0*16], xmm6 | |
127 | movdqu [rsp + 1*16], xmm7 | |
128 | %endif | |
129 | ||
130 | movq xmm7, [KEY + 16] ; loading the AES key, 64 bits | |
131 | movq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion | |
132 | pshufd xmm4, xmm7, 01001111b | |
133 | movdqu xmm1, [KEY] ; loading the AES key, 128 bits | |
134 | movdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion | |
135 | movdqu [EXP_DEC_KEYS + 16*0], xmm1 | |
136 | movdqu [EXP_DEC_KEYS + 16*12], xmm1 | |
137 | ||
138 | pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion. | |
139 | pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion. | |
140 | ||
141 | aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2 | |
142 | key_expansion_1_192_sse 24 | |
143 | key_expansion_2_192_sse 40 | |
144 | ||
145 | aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4 | |
146 | key_expansion_1_192_sse 48 | |
147 | key_expansion_2_192_sse 64 | |
148 | ||
149 | aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5 | |
150 | key_expansion_1_192_sse 72 | |
151 | key_expansion_2_192_sse 88 | |
152 | ||
153 | aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7 | |
154 | key_expansion_1_192_sse 96 | |
155 | key_expansion_2_192_sse 112 | |
156 | ||
157 | aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8 | |
158 | key_expansion_1_192_sse 120 | |
159 | key_expansion_2_192_sse 136 | |
160 | ||
161 | aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10 | |
162 | key_expansion_1_192_sse 144 | |
163 | key_expansion_2_192_sse 160 | |
164 | ||
165 | aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11 | |
166 | key_expansion_1_192_sse 168 | |
167 | key_expansion_2_192_sse 184 | |
168 | ||
169 | aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12 | |
170 | key_expansion_1_192_sse 192 | |
171 | ||
172 | ;;; we have already saved the 12 th key, which is pure input on the | |
173 | ;;; ENC key path | |
174 | movdqu xmm0, [EXP_ENC_KEYS + 16 * 12] | |
175 | movdqu [EXP_DEC_KEYS + 16*0], xmm0 | |
176 | ;;; generate remaining decrypt keys | |
177 | key_dec_192_sse 1 | |
178 | key_dec_192_sse 2 | |
179 | key_dec_192_sse 3 | |
180 | key_dec_192_sse 4 | |
181 | key_dec_192_sse 5 | |
182 | key_dec_192_sse 6 | |
183 | key_dec_192_sse 7 | |
184 | key_dec_192_sse 8 | |
185 | key_dec_192_sse 9 | |
186 | key_dec_192_sse 10 | |
187 | key_dec_192_sse 11 | |
188 | ||
189 | %ifnidn __OUTPUT_FORMAT__, elf64 | |
190 | movdqu xmm6, [rsp + 0*16] | |
191 | movdqu xmm7, [rsp + 1*16] | |
192 | add rsp, 16*2 + 8 | |
193 | %endif | |
194 | ||
195 | ret | |
196 | ||
197 | ||
198 | ||
1e59de90 | 199 | mk_global aes_keyexp_192_avx, function |
7c673cae | 200 | aes_keyexp_192_avx: |
1e59de90 | 201 | endbranch |
7c673cae FG |
202 | |
203 | %ifnidn __OUTPUT_FORMAT__, elf64 | |
204 | sub rsp, 16*2 + 8 | |
205 | vmovdqu [rsp + 0*16], xmm6 | |
206 | vmovdqu [rsp + 1*16], xmm7 | |
207 | %endif | |
208 | ||
209 | vmovq xmm7, [KEY + 16] ; loading the AES key, 64 bits | |
210 | vmovq [EXP_ENC_KEYS + 16], xmm7 ; Storing key in memory where all key expansion | |
211 | vpshufd xmm4, xmm7, 01001111b | |
212 | vmovdqu xmm1, [KEY] ; loading the AES key, 128 bits | |
213 | vmovdqu [EXP_ENC_KEYS], xmm1 ; Storing key in memory where all key expansion | |
214 | vmovdqu [EXP_DEC_KEYS + 16*0], xmm1 | |
215 | vmovdqu [EXP_DEC_KEYS + 16*12], xmm1 | |
216 | ||
217 | vpxor xmm3, xmm3, xmm3 | |
218 | vpxor xmm6, xmm6, xmm6 | |
219 | ||
220 | vaeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2 | |
221 | key_expansion_1_192_avx 24 | |
222 | key_expansion_2_192_avx 40 | |
223 | ||
224 | vaeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4 | |
225 | key_expansion_1_192_avx 48 | |
226 | key_expansion_2_192_avx 64 | |
227 | ||
228 | vaeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5 | |
229 | key_expansion_1_192_avx 72 | |
230 | key_expansion_2_192_avx 88 | |
231 | ||
232 | vaeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7 | |
233 | key_expansion_1_192_avx 96 | |
234 | key_expansion_2_192_avx 112 | |
235 | ||
236 | vaeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8 | |
237 | key_expansion_1_192_avx 120 | |
238 | key_expansion_2_192_avx 136 | |
239 | ||
240 | vaeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10 | |
241 | key_expansion_1_192_avx 144 | |
242 | key_expansion_2_192_avx 160 | |
243 | ||
244 | vaeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11 | |
245 | key_expansion_1_192_avx 168 | |
246 | key_expansion_2_192_avx 184 | |
247 | ||
248 | vaeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12 | |
249 | key_expansion_1_192_avx 192 | |
250 | ||
251 | ;;; we have already saved the 12 th key, which is pure input on the | |
252 | ;;; ENC key path | |
253 | vmovdqu xmm0, [EXP_ENC_KEYS + 16 * 12] | |
254 | vmovdqu [EXP_DEC_KEYS + 16*0], xmm0 | |
255 | ;;; generate remaining decrypt keys | |
256 | key_dec_192_avx 1 | |
257 | key_dec_192_avx 2 | |
258 | key_dec_192_avx 3 | |
259 | key_dec_192_avx 4 | |
260 | key_dec_192_avx 5 | |
261 | key_dec_192_avx 6 | |
262 | key_dec_192_avx 7 | |
263 | key_dec_192_avx 8 | |
264 | key_dec_192_avx 9 | |
265 | key_dec_192_avx 10 | |
266 | key_dec_192_avx 11 | |
267 | ||
268 | %ifnidn __OUTPUT_FORMAT__, elf64 | |
269 | vmovdqu xmm6, [rsp + 0*16] | |
270 | vmovdqu xmm7, [rsp + 1*16] | |
271 | add rsp, 16*2 + 8 | |
272 | %endif | |
273 | ||
274 | ret |