]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2012-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ;;; routine to do a 192 bit CBC AES encrypt | |
29 | ||
30 | ;; clobbers all registers except for ARG1 and rbp | |
31 | ||
f67539c2 | 32 | %include "include/os.asm" |
11fdf7f2 TL |
33 | %include "mb_mgr_datastruct.asm" |
34 | ||
35 | %define VMOVDQ vmovdqu ;; assume buffers not aligned | |
36 | ||
37 | %macro VPXOR2 2 | |
38 | vpxor %1, %1, %2 | |
39 | %endm | |
40 | ||
41 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
f67539c2 | 42 | ;; struct AES_ARGS { |
11fdf7f2 TL |
43 | ;; void* in[8]; |
44 | ;; void* out[8]; | |
45 | ;; UINT128* keys[8]; | |
46 | ;; UINT128 IV[8]; | |
47 | ;; } | |
48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
f67539c2 TL |
49 | ;; void aes_cbc_enc_192_x8(AES_ARGS *args, UINT64 len); |
50 | ;; arg 1: ARG : addr of AES_ARGS structure | |
11fdf7f2 TL |
51 | ;; arg 2: LEN : len (in units of bytes) |
52 | ||
53 | struc STACK | |
54 | _gpr_save: resq 1 | |
55 | _len: resq 1 | |
56 | endstruc | |
57 | ||
58 | %define GPR_SAVE_AREA rsp + _gpr_save | |
59 | %define LEN_AREA rsp + _len | |
60 | ||
61 | %ifdef LINUX | |
62 | %define ARG rdi | |
63 | %define LEN rsi | |
64 | %define REG3 rcx | |
65 | %define REG4 rdx | |
66 | %else | |
67 | %define ARG rcx | |
68 | %define LEN rdx | |
69 | %define REG3 rsi | |
70 | %define REG4 rdi | |
71 | %endif | |
72 | ||
73 | %define IDX rax | |
74 | %define TMP rbx | |
75 | ||
76 | %define KEYS0 REG3 | |
77 | %define KEYS1 REG4 | |
78 | %define KEYS2 rbp | |
79 | %define KEYS3 r8 | |
80 | %define KEYS4 r9 | |
81 | %define KEYS5 r10 | |
82 | %define KEYS6 r11 | |
83 | %define KEYS7 r12 | |
84 | ||
85 | %define IN0 r13 | |
86 | %define IN2 r14 | |
87 | %define IN4 r15 | |
88 | %define IN6 LEN | |
89 | ||
90 | %define XDATA0 xmm0 | |
91 | %define XDATA1 xmm1 | |
92 | %define XDATA2 xmm2 | |
93 | %define XDATA3 xmm3 | |
94 | %define XDATA4 xmm4 | |
95 | %define XDATA5 xmm5 | |
96 | %define XDATA6 xmm6 | |
97 | %define XDATA7 xmm7 | |
98 | ||
99 | %define XKEY0_3 xmm8 | |
100 | %define XKEY1_4 xmm9 | |
101 | %define XKEY2_5 xmm10 | |
102 | %define XKEY3_6 xmm11 | |
103 | %define XKEY4_7 xmm12 | |
104 | %define XKEY5_8 xmm13 | |
105 | %define XKEY6_9 xmm14 | |
106 | %define XTMP xmm15 | |
107 | ||
108 | section .text | |
109 | ||
110 | MKGLOBAL(aes_cbc_enc_192_x8,function,internal) | |
111 | aes_cbc_enc_192_x8: | |
112 | ||
113 | sub rsp, STACK_size | |
114 | mov [GPR_SAVE_AREA + 8*0], rbp | |
115 | ||
116 | mov IDX, 16 | |
117 | mov [LEN_AREA], LEN | |
118 | ||
119 | mov IN0, [ARG + _aesarg_in + 8*0] | |
120 | mov IN2, [ARG + _aesarg_in + 8*2] | |
121 | mov IN4, [ARG + _aesarg_in + 8*4] | |
122 | mov IN6, [ARG + _aesarg_in + 8*6] | |
123 | ||
124 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
125 | ||
126 | mov TMP, [ARG + _aesarg_in + 8*1] | |
127 | VMOVDQ XDATA0, [IN0] ; load first block of plain text | |
128 | VMOVDQ XDATA1, [TMP] ; load first block of plain text | |
129 | mov TMP, [ARG + _aesarg_in + 8*3] | |
130 | VMOVDQ XDATA2, [IN2] ; load first block of plain text | |
131 | VMOVDQ XDATA3, [TMP] ; load first block of plain text | |
132 | mov TMP, [ARG + _aesarg_in + 8*5] | |
133 | VMOVDQ XDATA4, [IN4] ; load first block of plain text | |
134 | VMOVDQ XDATA5, [TMP] ; load first block of plain text | |
135 | mov TMP, [ARG + _aesarg_in + 8*7] | |
136 | VMOVDQ XDATA6, [IN6] ; load first block of plain text | |
137 | VMOVDQ XDATA7, [TMP] ; load first block of plain text | |
138 | ||
139 | ||
140 | VPXOR2 XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV | |
141 | VPXOR2 XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV | |
142 | VPXOR2 XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV | |
143 | VPXOR2 XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV | |
144 | VPXOR2 XDATA4, [ARG + _aesarg_IV + 16*4] ; plaintext XOR IV | |
145 | VPXOR2 XDATA5, [ARG + _aesarg_IV + 16*5] ; plaintext XOR IV | |
146 | VPXOR2 XDATA6, [ARG + _aesarg_IV + 16*6] ; plaintext XOR IV | |
147 | VPXOR2 XDATA7, [ARG + _aesarg_IV + 16*7] ; plaintext XOR IV | |
148 | ||
149 | mov KEYS0, [ARG + _aesarg_keys + 8*0] | |
150 | mov KEYS1, [ARG + _aesarg_keys + 8*1] | |
151 | mov KEYS2, [ARG + _aesarg_keys + 8*2] | |
152 | mov KEYS3, [ARG + _aesarg_keys + 8*3] | |
153 | mov KEYS4, [ARG + _aesarg_keys + 8*4] | |
154 | mov KEYS5, [ARG + _aesarg_keys + 8*5] | |
155 | mov KEYS6, [ARG + _aesarg_keys + 8*6] | |
156 | mov KEYS7, [ARG + _aesarg_keys + 8*7] | |
157 | ||
158 | VPXOR2 XDATA0, [KEYS0 + 16*0] ; 0. ARK | |
159 | VPXOR2 XDATA1, [KEYS1 + 16*0] ; 0. ARK | |
160 | VPXOR2 XDATA2, [KEYS2 + 16*0] ; 0. ARK | |
161 | VPXOR2 XDATA3, [KEYS3 + 16*0] ; 0. ARK | |
162 | VPXOR2 XDATA4, [KEYS4 + 16*0] ; 0. ARK | |
163 | VPXOR2 XDATA5, [KEYS5 + 16*0] ; 0. ARK | |
164 | VPXOR2 XDATA6, [KEYS6 + 16*0] ; 0. ARK | |
165 | VPXOR2 XDATA7, [KEYS7 + 16*0] ; 0. ARK | |
166 | ||
167 | vaesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC | |
168 | vaesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC | |
169 | vaesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC | |
170 | vaesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC | |
171 | vaesenc XDATA4, [KEYS4 + 16*1] ; 1. ENC | |
172 | vaesenc XDATA5, [KEYS5 + 16*1] ; 1. ENC | |
173 | vaesenc XDATA6, [KEYS6 + 16*1] ; 1. ENC | |
174 | vaesenc XDATA7, [KEYS7 + 16*1] ; 1. ENC | |
175 | ||
176 | vmovdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key | |
177 | ||
178 | vaesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC | |
179 | vaesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC | |
180 | vaesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC | |
181 | vaesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC | |
182 | vaesenc XDATA4, [KEYS4 + 16*2] ; 2. ENC | |
183 | vaesenc XDATA5, [KEYS5 + 16*2] ; 2. ENC | |
184 | vaesenc XDATA6, [KEYS6 + 16*2] ; 2. ENC | |
185 | vaesenc XDATA7, [KEYS7 + 16*2] ; 2. ENC | |
186 | ||
187 | vmovdqa XKEY1_4, [KEYS1 + 16*4] ; load round 4 key | |
188 | ||
189 | vaesenc XDATA0, XKEY0_3 ; 3. ENC | |
190 | vaesenc XDATA1, [KEYS1 + 16*3] ; 3. ENC | |
191 | vaesenc XDATA2, [KEYS2 + 16*3] ; 3. ENC | |
192 | vaesenc XDATA3, [KEYS3 + 16*3] ; 3. ENC | |
193 | vaesenc XDATA4, [KEYS4 + 16*3] ; 3. ENC | |
194 | vaesenc XDATA5, [KEYS5 + 16*3] ; 3. ENC | |
195 | vaesenc XDATA6, [KEYS6 + 16*3] ; 3. ENC | |
196 | vaesenc XDATA7, [KEYS7 + 16*3] ; 3. ENC | |
197 | ||
198 | vaesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC | |
199 | vmovdqa XKEY2_5, [KEYS2 + 16*5] ; load round 5 key | |
200 | vaesenc XDATA1, XKEY1_4 ; 4. ENC | |
201 | vaesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC | |
202 | vaesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC | |
203 | vaesenc XDATA4, [KEYS4 + 16*4] ; 4. ENC | |
204 | vaesenc XDATA5, [KEYS5 + 16*4] ; 4. ENC | |
205 | vaesenc XDATA6, [KEYS6 + 16*4] ; 4. ENC | |
206 | vaesenc XDATA7, [KEYS7 + 16*4] ; 4. ENC | |
207 | ||
208 | vaesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC | |
209 | vaesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC | |
210 | vmovdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key | |
211 | vaesenc XDATA2, XKEY2_5 ; 5. ENC | |
212 | vaesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC | |
213 | vaesenc XDATA4, [KEYS4 + 16*5] ; 5. ENC | |
214 | vaesenc XDATA5, [KEYS5 + 16*5] ; 5. ENC | |
215 | vaesenc XDATA6, [KEYS6 + 16*5] ; 5. ENC | |
216 | vaesenc XDATA7, [KEYS7 + 16*5] ; 5. ENC | |
217 | ||
218 | vaesenc XDATA0, [KEYS0 + 16*6] ; 6. ENC | |
219 | vaesenc XDATA1, [KEYS1 + 16*6] ; 6. ENC | |
220 | vaesenc XDATA2, [KEYS2 + 16*6] ; 6. ENC | |
221 | vmovdqa XKEY4_7, [KEYS4 + 16*7] ; load round 7 key | |
222 | vaesenc XDATA3, XKEY3_6 ; 6. ENC | |
223 | vaesenc XDATA4, [KEYS4 + 16*6] ; 6. ENC | |
224 | vaesenc XDATA5, [KEYS5 + 16*6] ; 6. ENC | |
225 | vaesenc XDATA6, [KEYS6 + 16*6] ; 6. ENC | |
226 | vaesenc XDATA7, [KEYS7 + 16*6] ; 6. ENC | |
227 | ||
228 | vaesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC | |
229 | vaesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC | |
230 | vaesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC | |
231 | vaesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC | |
232 | vmovdqa XKEY5_8, [KEYS5 + 16*8] ; load round 8 key | |
233 | vaesenc XDATA4, XKEY4_7 ; 7. ENC | |
234 | vaesenc XDATA5, [KEYS5 + 16*7] ; 7. ENC | |
235 | vaesenc XDATA6, [KEYS6 + 16*7] ; 7. ENC | |
236 | vaesenc XDATA7, [KEYS7 + 16*7] ; 7. ENC | |
237 | ||
238 | vaesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC | |
239 | vaesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC | |
240 | vaesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC | |
241 | vaesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC | |
242 | vaesenc XDATA4, [KEYS4 + 16*8] ; 8. ENC | |
243 | vmovdqa XKEY6_9, [KEYS6 + 16*9] ; load round 9 key | |
244 | vaesenc XDATA5, XKEY5_8 ; 8. ENC | |
245 | vaesenc XDATA6, [KEYS6 + 16*8] ; 8. ENC | |
246 | vaesenc XDATA7, [KEYS7 + 16*8] ; 8. ENC | |
247 | ||
248 | vaesenc XDATA0, [KEYS0 + 16*9] ; 9. ENC | |
249 | vaesenc XDATA1, [KEYS1 + 16*9] ; 9. ENC | |
250 | vaesenc XDATA2, [KEYS2 + 16*9] ; 9. ENC | |
251 | vaesenc XDATA3, [KEYS3 + 16*9] ; 9. ENC | |
252 | vaesenc XDATA4, [KEYS4 + 16*9] ; 9. ENC | |
253 | vaesenc XDATA5, [KEYS5 + 16*9] ; 9. ENC | |
254 | mov TMP, [ARG + _aesarg_out + 8*0] | |
255 | vaesenc XDATA6, XKEY6_9 ; 9. ENC | |
256 | vaesenc XDATA7, [KEYS7 + 16*9] ; 9. ENC | |
257 | ||
258 | ||
259 | vaesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC | |
260 | vaesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC | |
261 | vaesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC | |
262 | vaesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC | |
263 | vaesenc XDATA4, [KEYS4 + 16*10] ; 10. ENC | |
264 | vaesenc XDATA5, [KEYS5 + 16*10] ; 10. ENC | |
265 | vaesenc XDATA6, [KEYS6 + 16*10] ; 10. ENC | |
266 | vaesenc XDATA7, [KEYS7 + 16*10] ; 10. ENC | |
267 | ||
268 | vaesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC | |
269 | vaesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC | |
270 | vaesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC | |
271 | vaesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC | |
272 | vaesenc XDATA4, [KEYS4 + 16*11] ; 11. ENC | |
273 | vaesenc XDATA5, [KEYS5 + 16*11] ; 11. ENC | |
274 | vaesenc XDATA6, [KEYS6 + 16*11] ; 11. ENC | |
275 | vaesenc XDATA7, [KEYS7 + 16*11] ; 11. ENC | |
276 | ||
277 | ||
278 | vaesenclast XDATA0, [KEYS0 + 16*12] ; 12. ENC | |
279 | vaesenclast XDATA1, [KEYS1 + 16*12] ; 12. ENC | |
280 | vaesenclast XDATA2, [KEYS2 + 16*12] ; 12. ENC | |
281 | vaesenclast XDATA3, [KEYS3 + 16*12] ; 12. ENC | |
282 | vaesenclast XDATA4, [KEYS4 + 16*12] ; 12. ENC | |
283 | vaesenclast XDATA5, [KEYS5 + 16*12] ; 12. ENC | |
284 | vaesenclast XDATA6, [KEYS6 + 16*12] ; 12. ENC | |
285 | vaesenclast XDATA7, [KEYS7 + 16*12] ; 12. ENC | |
286 | ||
287 | VMOVDQ [TMP], XDATA0 ; write back ciphertext | |
288 | mov TMP, [ARG + _aesarg_out + 8*1] | |
289 | VMOVDQ [TMP], XDATA1 ; write back ciphertext | |
290 | mov TMP, [ARG + _aesarg_out + 8*2] | |
291 | VMOVDQ [TMP], XDATA2 ; write back ciphertext | |
292 | mov TMP, [ARG + _aesarg_out + 8*3] | |
293 | VMOVDQ [TMP], XDATA3 ; write back ciphertext | |
294 | mov TMP, [ARG + _aesarg_out + 8*4] | |
295 | VMOVDQ [TMP], XDATA4 ; write back ciphertext | |
296 | mov TMP, [ARG + _aesarg_out + 8*5] | |
297 | VMOVDQ [TMP], XDATA5 ; write back ciphertext | |
298 | mov TMP, [ARG + _aesarg_out + 8*6] | |
299 | VMOVDQ [TMP], XDATA6 ; write back ciphertext | |
300 | mov TMP, [ARG + _aesarg_out + 8*7] | |
301 | VMOVDQ [TMP], XDATA7 ; write back ciphertext | |
302 | ||
303 | cmp [LEN_AREA], IDX | |
304 | je done | |
305 | ||
306 | main_loop: | |
307 | mov TMP, [ARG + _aesarg_in + 8*1] | |
308 | VPXOR2 XDATA0, [IN0 + IDX] ; load next block of plain text | |
309 | VPXOR2 XDATA1, [TMP + IDX] ; load next block of plain text | |
310 | mov TMP, [ARG + _aesarg_in + 8*3] | |
311 | VPXOR2 XDATA2, [IN2 + IDX] ; load next block of plain text | |
312 | VPXOR2 XDATA3, [TMP + IDX] ; load next block of plain text | |
313 | mov TMP, [ARG + _aesarg_in + 8*5] | |
314 | VPXOR2 XDATA4, [IN4 + IDX] ; load next block of plain text | |
315 | VPXOR2 XDATA5, [TMP + IDX] ; load next block of plain text | |
316 | mov TMP, [ARG + _aesarg_in + 8*7] | |
317 | VPXOR2 XDATA6, [IN6 + IDX] ; load next block of plain text | |
318 | VPXOR2 XDATA7, [TMP + IDX] ; load next block of plain text | |
319 | ||
320 | ||
321 | VPXOR2 XDATA0, [KEYS0 + 16*0] ; 0. ARK | |
322 | VPXOR2 XDATA1, [KEYS1 + 16*0] ; 0. ARK | |
323 | VPXOR2 XDATA2, [KEYS2 + 16*0] ; 0. ARK | |
324 | VPXOR2 XDATA3, [KEYS3 + 16*0] ; 0. ARK | |
325 | VPXOR2 XDATA4, [KEYS4 + 16*0] ; 0. ARK | |
326 | VPXOR2 XDATA5, [KEYS5 + 16*0] ; 0. ARK | |
327 | VPXOR2 XDATA6, [KEYS6 + 16*0] ; 0. ARK | |
328 | VPXOR2 XDATA7, [KEYS7 + 16*0] ; 0. ARK | |
329 | ||
330 | vaesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC | |
331 | vaesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC | |
332 | vaesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC | |
333 | vaesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC | |
334 | vaesenc XDATA4, [KEYS4 + 16*1] ; 1. ENC | |
335 | vaesenc XDATA5, [KEYS5 + 16*1] ; 1. ENC | |
336 | vaesenc XDATA6, [KEYS6 + 16*1] ; 1. ENC | |
337 | vaesenc XDATA7, [KEYS7 + 16*1] ; 1. ENC | |
338 | ||
339 | vaesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC | |
340 | vaesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC | |
341 | vaesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC | |
342 | vaesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC | |
343 | vaesenc XDATA4, [KEYS4 + 16*2] ; 2. ENC | |
344 | vaesenc XDATA5, [KEYS5 + 16*2] ; 2. ENC | |
345 | vaesenc XDATA6, [KEYS6 + 16*2] ; 2. ENC | |
346 | vaesenc XDATA7, [KEYS7 + 16*2] ; 2. ENC | |
347 | ||
348 | vaesenc XDATA0, XKEY0_3 ; 3. ENC | |
349 | vaesenc XDATA1, [KEYS1 + 16*3] ; 3. ENC | |
350 | vaesenc XDATA2, [KEYS2 + 16*3] ; 3. ENC | |
351 | vaesenc XDATA3, [KEYS3 + 16*3] ; 3. ENC | |
352 | vaesenc XDATA4, [KEYS4 + 16*3] ; 3. ENC | |
353 | vaesenc XDATA5, [KEYS5 + 16*3] ; 3. ENC | |
354 | vaesenc XDATA6, [KEYS6 + 16*3] ; 3. ENC | |
355 | vaesenc XDATA7, [KEYS7 + 16*3] ; 3. ENC | |
356 | ||
357 | vaesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC | |
358 | vaesenc XDATA1, XKEY1_4 ; 4. ENC | |
359 | vaesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC | |
360 | vaesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC | |
361 | vaesenc XDATA4, [KEYS4 + 16*4] ; 4. ENC | |
362 | vaesenc XDATA5, [KEYS5 + 16*4] ; 4. ENC | |
363 | vaesenc XDATA6, [KEYS6 + 16*4] ; 4. ENC | |
364 | vaesenc XDATA7, [KEYS7 + 16*4] ; 4. ENC | |
365 | ||
366 | vaesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC | |
367 | vaesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC | |
368 | vaesenc XDATA2, XKEY2_5 ; 5. ENC | |
369 | vaesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC | |
370 | vaesenc XDATA4, [KEYS4 + 16*5] ; 5. ENC | |
371 | vaesenc XDATA5, [KEYS5 + 16*5] ; 5. ENC | |
372 | vaesenc XDATA6, [KEYS6 + 16*5] ; 5. ENC | |
373 | vaesenc XDATA7, [KEYS7 + 16*5] ; 5. ENC | |
374 | ||
375 | vaesenc XDATA0, [KEYS0 + 16*6] ; 6. ENC | |
376 | vaesenc XDATA1, [KEYS1 + 16*6] ; 6. ENC | |
377 | vaesenc XDATA2, [KEYS2 + 16*6] ; 6. ENC | |
378 | vaesenc XDATA3, XKEY3_6 ; 6. ENC | |
379 | vaesenc XDATA4, [KEYS4 + 16*6] ; 6. ENC | |
380 | vaesenc XDATA5, [KEYS5 + 16*6] ; 6. ENC | |
381 | vaesenc XDATA6, [KEYS6 + 16*6] ; 6. ENC | |
382 | vaesenc XDATA7, [KEYS7 + 16*6] ; 6. ENC | |
383 | ||
384 | vaesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC | |
385 | vaesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC | |
386 | vaesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC | |
387 | vaesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC | |
388 | vaesenc XDATA4, XKEY4_7 ; 7. ENC | |
389 | vaesenc XDATA5, [KEYS5 + 16*7] ; 7. ENC | |
390 | vaesenc XDATA6, [KEYS6 + 16*7] ; 7. ENC | |
391 | vaesenc XDATA7, [KEYS7 + 16*7] ; 7. ENC | |
392 | ||
393 | vaesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC | |
394 | vaesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC | |
395 | vaesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC | |
396 | vaesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC | |
397 | vaesenc XDATA4, [KEYS4 + 16*8] ; 8. ENC | |
398 | vaesenc XDATA5, XKEY5_8 ; 8. ENC | |
399 | vaesenc XDATA6, [KEYS6 + 16*8] ; 8. ENC | |
400 | vaesenc XDATA7, [KEYS7 + 16*8] ; 8. ENC | |
401 | ||
402 | vaesenc XDATA0, [KEYS0 + 16*9] ; 9. ENC | |
403 | vaesenc XDATA1, [KEYS1 + 16*9] ; 9. ENC | |
404 | vaesenc XDATA2, [KEYS2 + 16*9] ; 9. ENC | |
405 | vaesenc XDATA3, [KEYS3 + 16*9] ; 9. ENC | |
406 | vaesenc XDATA4, [KEYS4 + 16*9] ; 9. ENC | |
407 | vaesenc XDATA5, [KEYS5 + 16*9] ; 9. ENC | |
408 | mov TMP, [ARG + _aesarg_out + 8*0] | |
409 | vaesenc XDATA6, XKEY6_9 ; 9. ENC | |
410 | vaesenc XDATA7, [KEYS7 + 16*9] ; 9. ENC | |
411 | ||
412 | ||
413 | vaesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC | |
414 | vaesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC | |
415 | vaesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC | |
416 | vaesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC | |
417 | vaesenc XDATA4, [KEYS4 + 16*10] ; 10. ENC | |
418 | vaesenc XDATA5, [KEYS5 + 16*10] ; 10. ENC | |
419 | vaesenc XDATA6, [KEYS6 + 16*10] ; 10. ENC | |
420 | vaesenc XDATA7, [KEYS7 + 16*10] ; 10. ENC | |
421 | ||
422 | vaesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC | |
423 | vaesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC | |
424 | vaesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC | |
425 | vaesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC | |
426 | vaesenc XDATA4, [KEYS4 + 16*11] ; 11. ENC | |
427 | vaesenc XDATA5, [KEYS5 + 16*11] ; 11. ENC | |
428 | vaesenc XDATA6, [KEYS6 + 16*11] ; 11. ENC | |
429 | vaesenc XDATA7, [KEYS7 + 16*11] ; 11. ENC | |
430 | ||
431 | vaesenclast XDATA0, [KEYS0 + 16*12] ; 12. ENC | |
432 | vaesenclast XDATA1, [KEYS1 + 16*12] ; 12. ENC | |
433 | vaesenclast XDATA2, [KEYS2 + 16*12] ; 12. ENC | |
434 | vaesenclast XDATA3, [KEYS3 + 16*12] ; 12. ENC | |
435 | vaesenclast XDATA4, [KEYS4 + 16*12] ; 12. ENC | |
436 | vaesenclast XDATA5, [KEYS5 + 16*12] ; 12. ENC | |
437 | vaesenclast XDATA6, [KEYS6 + 16*12] ; 12. ENC | |
438 | vaesenclast XDATA7, [KEYS7 + 16*12] ; 12. ENC | |
439 | ||
440 | ||
441 | VMOVDQ [TMP + IDX], XDATA0 ; write back ciphertext | |
442 | mov TMP, [ARG + _aesarg_out + 8*1] | |
443 | VMOVDQ [TMP + IDX], XDATA1 ; write back ciphertext | |
444 | mov TMP, [ARG + _aesarg_out + 8*2] | |
445 | VMOVDQ [TMP + IDX], XDATA2 ; write back ciphertext | |
446 | mov TMP, [ARG + _aesarg_out + 8*3] | |
447 | VMOVDQ [TMP + IDX], XDATA3 ; write back ciphertext | |
448 | mov TMP, [ARG + _aesarg_out + 8*4] | |
449 | VMOVDQ [TMP + IDX], XDATA4 ; write back ciphertext | |
450 | mov TMP, [ARG + _aesarg_out + 8*5] | |
451 | VMOVDQ [TMP + IDX], XDATA5 ; write back ciphertext | |
452 | mov TMP, [ARG + _aesarg_out + 8*6] | |
453 | VMOVDQ [TMP + IDX], XDATA6 ; write back ciphertext | |
454 | mov TMP, [ARG + _aesarg_out + 8*7] | |
455 | VMOVDQ [TMP + IDX], XDATA7 ; write back ciphertext | |
456 | ||
457 | add IDX, 16 | |
458 | cmp [LEN_AREA], IDX | |
459 | jne main_loop | |
460 | ||
461 | done: | |
462 | ;; update IV | |
463 | vmovdqa [ARG + _aesarg_IV + 16*0], XDATA0 | |
464 | vmovdqa [ARG + _aesarg_IV + 16*1], XDATA1 | |
465 | vmovdqa [ARG + _aesarg_IV + 16*2], XDATA2 | |
466 | vmovdqa [ARG + _aesarg_IV + 16*3], XDATA3 | |
467 | vmovdqa [ARG + _aesarg_IV + 16*4], XDATA4 | |
468 | vmovdqa [ARG + _aesarg_IV + 16*5], XDATA5 | |
469 | vmovdqa [ARG + _aesarg_IV + 16*6], XDATA6 | |
470 | vmovdqa [ARG + _aesarg_IV + 16*7], XDATA7 | |
471 | ||
472 | ;; update IN and OUT | |
473 | vmovd xmm0, [LEN_AREA] | |
474 | vpshufd xmm0, xmm0, 0x44 | |
475 | vpaddq xmm1, xmm0, [ARG + _aesarg_in + 16*0] | |
476 | vpaddq xmm2, xmm0, [ARG + _aesarg_in + 16*1] | |
477 | vpaddq xmm3, xmm0, [ARG + _aesarg_in + 16*2] | |
478 | vpaddq xmm4, xmm0, [ARG + _aesarg_in + 16*3] | |
479 | vmovdqa [ARG + _aesarg_in + 16*0], xmm1 | |
480 | vmovdqa [ARG + _aesarg_in + 16*1], xmm2 | |
481 | vmovdqa [ARG + _aesarg_in + 16*2], xmm3 | |
482 | vmovdqa [ARG + _aesarg_in + 16*3], xmm4 | |
483 | vpaddq xmm5, xmm0, [ARG + _aesarg_out + 16*0] | |
484 | vpaddq xmm6, xmm0, [ARG + _aesarg_out + 16*1] | |
485 | vpaddq xmm7, xmm0, [ARG + _aesarg_out + 16*2] | |
486 | vpaddq xmm8, xmm0, [ARG + _aesarg_out + 16*3] | |
487 | vmovdqa [ARG + _aesarg_out + 16*0], xmm5 | |
488 | vmovdqa [ARG + _aesarg_out + 16*1], xmm6 | |
489 | vmovdqa [ARG + _aesarg_out + 16*2], xmm7 | |
490 | vmovdqa [ARG + _aesarg_out + 16*3], xmm8 | |
491 | ||
492 | ;; XMMs are saved at a higher level | |
493 | mov rbp, [GPR_SAVE_AREA + 8*0] | |
494 | ||
495 | add rsp, STACK_size | |
496 | ||
497 | ret | |
498 | ||
499 | %ifdef LINUX | |
500 | section .note.GNU-stack noalloc noexec nowrite progbits | |
501 | %endif |