]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2012-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ; routine to do AES cbc decrypt on 16n bytes doing AES by 4 | |
29 | ||
30 | ; XMM registers are clobbered. Saving/restoring must be done at a higher level | |
31 | ||
32 | ; void aes_cbc_dec_256_sse(void *in, | |
33 | ; UINT128 *IV, | |
34 | ; UINT128 keys[15], | |
35 | ; void *out, | |
36 | ; UINT64 len_bytes); | |
37 | ; | |
38 | ; arg 1: rcx: pointer to input (cipher text) | |
39 | ; arg 2: rdx: pointer to IV | |
40 | ; arg 3: r8: pointer to keys | |
41 | ; arg 4: r9: pointer to output (plain text) | |
42 | ; arg 5: sp: length in bytes (multiple of 16) | |
43 | ; | |
44 | ||
f67539c2 | 45 | %include "include/os.asm" |
11fdf7f2 | 46 | |
9f95a23c TL |
47 | %ifndef AES_CBC_DEC_256 |
48 | %define AES_CBC_DEC_256 aes_cbc_dec_256_sse | |
49 | %endif | |
50 | ||
11fdf7f2 TL |
51 | %define MOVDQ movdqu |
52 | ||
53 | %ifdef LINUX | |
54 | %define IN rdi | |
55 | %define IV rsi | |
56 | %define KEYS rdx | |
57 | %define OUT rcx | |
58 | %define LEN r8 | |
59 | %else | |
60 | %define IN rcx | |
61 | %define IV rdx | |
62 | %define KEYS r8 | |
63 | %define OUT r9 | |
64 | %define LEN r10 | |
65 | %endif | |
66 | ||
67 | %define IDX rax | |
68 | %define TMP IDX | |
69 | %define XDATA0 xmm0 | |
70 | %define XDATA1 xmm1 | |
71 | %define XDATA2 xmm2 | |
72 | %define XDATA3 xmm3 | |
73 | %define XKEY0 xmm4 | |
74 | %define XKEY2 xmm5 | |
75 | %define XKEY4 xmm6 | |
76 | %define XKEY6 xmm7 | |
77 | %define XKEY10 xmm8 | |
78 | %define XIV xmm9 | |
79 | %define XSAVED0 xmm10 | |
80 | %define XSAVED1 xmm11 | |
81 | %define XSAVED2 xmm12 | |
82 | %define XSAVED3 xmm13 | |
83 | %define XKEY_A xmm14 | |
84 | %define XKEY_B xmm15 | |
85 | ||
86 | %define IV_TMP XSAVED3 | |
87 | ||
88 | section .text | |
89 | ||
9f95a23c TL |
90 | MKGLOBAL(AES_CBC_DEC_256,function,internal) |
91 | AES_CBC_DEC_256: | |
11fdf7f2 TL |
92 | %ifndef LINUX |
93 | mov LEN, [rsp + 8*5] | |
94 | %endif | |
95 | ||
96 | mov TMP, LEN | |
97 | and TMP, 3*16 | |
98 | jz initial_4 | |
99 | cmp TMP, 2*16 | |
100 | jb initial_1 | |
101 | ja initial_3 | |
102 | ||
103 | initial_2: | |
104 | ; load cipher text | |
105 | movdqu XDATA0, [IN + 0*16] | |
106 | movdqu XDATA1, [IN + 1*16] | |
107 | ||
108 | movdqa XKEY0, [KEYS + 0*16] | |
109 | ||
110 | ; save cipher text | |
111 | movdqa XSAVED0, XDATA0 | |
112 | movdqa XIV, XDATA1 | |
113 | ||
114 | pxor XDATA0, XKEY0 ; 0. ARK | |
115 | pxor XDATA1, XKEY0 | |
116 | ||
117 | movdqa XKEY2, [KEYS + 2*16] | |
118 | ||
119 | aesdec XDATA0, [KEYS + 1*16] ; 1. DEC | |
120 | aesdec XDATA1, [KEYS + 1*16] | |
121 | ||
122 | mov IDX, 2*16 | |
123 | ||
124 | aesdec XDATA0, XKEY2 ; 2. DEC | |
125 | aesdec XDATA1, XKEY2 | |
126 | ||
127 | movdqa XKEY4, [KEYS + 4*16] | |
128 | ||
129 | aesdec XDATA0, [KEYS + 3*16] ; 3. DEC | |
130 | aesdec XDATA1, [KEYS + 3*16] | |
131 | ||
132 | movdqu IV_TMP, [IV] | |
133 | ||
134 | aesdec XDATA0, XKEY4 ; 4. DEC | |
135 | aesdec XDATA1, XKEY4 | |
136 | ||
137 | movdqa XKEY6, [KEYS + 6*16] | |
138 | ||
139 | aesdec XDATA0, [KEYS + 5*16] ; 5. DEC | |
140 | aesdec XDATA1, [KEYS + 5*16] | |
141 | ||
142 | aesdec XDATA0, XKEY6 ; 6. DEC | |
143 | aesdec XDATA1, XKEY6 | |
144 | ||
145 | movdqa XKEY_B, [KEYS + 8*16] | |
146 | ||
147 | aesdec XDATA0, [KEYS + 7*16] ; 7. DEC | |
148 | aesdec XDATA1, [KEYS + 7*16] | |
149 | ||
150 | aesdec XDATA0, XKEY_B ; 8. DEC | |
151 | aesdec XDATA1, XKEY_B | |
152 | ||
153 | movdqa XKEY10, [KEYS + 10*16] | |
154 | ||
155 | aesdec XDATA0, [KEYS + 9*16] ; 9. DEC | |
156 | aesdec XDATA1, [KEYS + 9*16] | |
157 | ||
158 | aesdec XDATA0, XKEY10 ; 10. DEC | |
159 | aesdec XDATA1, XKEY10 | |
160 | ||
161 | aesdec XDATA0, [KEYS + 11*16] ; 11. DEC | |
162 | aesdec XDATA1, [KEYS + 11*16] | |
163 | ||
164 | aesdec XDATA0, [KEYS + 12*16] ; 12. DEC | |
165 | aesdec XDATA1, [KEYS + 12*16] | |
166 | ||
167 | aesdec XDATA0, [KEYS + 13*16] ; 13. DEC | |
168 | aesdec XDATA1, [KEYS + 13*16] | |
169 | ||
170 | aesdeclast XDATA0, [KEYS + 14*16] ; 14. DEC | |
171 | aesdeclast XDATA1, [KEYS + 14*16] | |
172 | ||
173 | pxor XDATA0, IV_TMP | |
174 | pxor XDATA1, XSAVED0 | |
175 | ||
176 | movdqu [OUT + 0*16], XDATA0 | |
177 | movdqu [OUT + 1*16], XDATA1 | |
178 | ||
179 | cmp LEN, 2*16 | |
180 | je done | |
181 | jmp main_loop | |
182 | ||
183 | ||
184 | align 16 | |
185 | initial_1: | |
186 | ; load cipher text | |
187 | movdqu XDATA0, [IN + 0*16] | |
188 | ||
189 | movdqa XKEY0, [KEYS + 0*16] | |
190 | ||
191 | ; save cipher text | |
192 | movdqa XIV, XDATA0 | |
193 | ||
194 | pxor XDATA0, XKEY0 ; 0. ARK | |
195 | ||
196 | movdqa XKEY2, [KEYS + 2*16] | |
197 | ||
198 | aesdec XDATA0, [KEYS + 1*16] ; 1. DEC | |
199 | ||
200 | mov IDX, 1*16 | |
201 | ||
202 | aesdec XDATA0, XKEY2 ; 2. DEC | |
203 | ||
204 | movdqa XKEY4, [KEYS + 4*16] | |
205 | ||
206 | aesdec XDATA0, [KEYS + 3*16] ; 3. DEC | |
207 | ||
208 | movdqu IV_TMP, [IV] | |
209 | ||
210 | aesdec XDATA0, XKEY4 ; 4. DEC | |
211 | ||
212 | movdqa XKEY6, [KEYS + 6*16] | |
213 | ||
214 | aesdec XDATA0, [KEYS + 5*16] ; 5. DEC | |
215 | ||
216 | aesdec XDATA0, XKEY6 ; 6. DEC | |
217 | ||
218 | movdqa XKEY_B, [KEYS + 8*16] | |
219 | ||
220 | aesdec XDATA0, [KEYS + 7*16] ; 7. DEC | |
221 | ||
222 | aesdec XDATA0, XKEY_B ; 8. DEC | |
223 | ||
224 | movdqa XKEY10, [KEYS + 10*16] | |
225 | ||
226 | aesdec XDATA0, [KEYS + 9*16] ; 9. DEC | |
227 | ||
228 | aesdec XDATA0, XKEY10 ; 10. DEC | |
229 | ||
230 | aesdec XDATA0, [KEYS + 11*16] ; 11. DEC | |
231 | ||
232 | aesdec XDATA0, [KEYS + 12*16] ; 12. DEC | |
233 | ||
234 | aesdec XDATA0, [KEYS + 13*16] ; 13. DEC | |
235 | ||
236 | aesdeclast XDATA0, [KEYS + 14*16] ; 14. DEC | |
237 | ||
238 | pxor XDATA0, IV_TMP | |
239 | ||
240 | movdqu [OUT + 0*16], XDATA0 | |
241 | ||
242 | cmp LEN, 1*16 | |
243 | je done | |
244 | jmp main_loop | |
245 | ||
246 | ||
247 | initial_3: | |
248 | ; load cipher text | |
249 | movdqu XDATA0, [IN + 0*16] | |
250 | movdqu XDATA1, [IN + 1*16] | |
251 | movdqu XDATA2, [IN + 2*16] | |
252 | ||
253 | movdqa XKEY0, [KEYS + 0*16] | |
254 | ||
255 | ; save cipher text | |
256 | movdqa XSAVED0, XDATA0 | |
257 | movdqa XSAVED1, XDATA1 | |
258 | movdqa XIV, XDATA2 | |
259 | ||
260 | movdqa XKEY_A, [KEYS + 1*16] | |
261 | ||
262 | pxor XDATA0, XKEY0 ; 0. ARK | |
263 | pxor XDATA1, XKEY0 | |
264 | pxor XDATA2, XKEY0 | |
265 | ||
266 | movdqa XKEY2, [KEYS + 2*16] | |
267 | ||
268 | aesdec XDATA0, XKEY_A ; 1. DEC | |
269 | aesdec XDATA1, XKEY_A | |
270 | aesdec XDATA2, XKEY_A | |
271 | ||
272 | movdqa XKEY_A, [KEYS + 3*16] | |
273 | mov IDX, 3*16 | |
274 | ||
275 | aesdec XDATA0, XKEY2 ; 2. DEC | |
276 | aesdec XDATA1, XKEY2 | |
277 | aesdec XDATA2, XKEY2 | |
278 | ||
279 | movdqa XKEY4, [KEYS + 4*16] | |
280 | ||
281 | aesdec XDATA0, XKEY_A ; 3. DEC | |
282 | aesdec XDATA1, XKEY_A | |
283 | aesdec XDATA2, XKEY_A | |
284 | ||
285 | movdqa XKEY_A, [KEYS + 5*16] | |
286 | movdqu IV_TMP, [IV] | |
287 | ||
288 | aesdec XDATA0, XKEY4 ; 4. DEC | |
289 | aesdec XDATA1, XKEY4 | |
290 | aesdec XDATA2, XKEY4 | |
291 | ||
292 | movdqa XKEY6, [KEYS + 6*16] | |
293 | ||
294 | aesdec XDATA0, XKEY_A ; 5. DEC | |
295 | aesdec XDATA1, XKEY_A | |
296 | aesdec XDATA2, XKEY_A | |
297 | ||
298 | movdqa XKEY_A, [KEYS + 7*16] | |
299 | ||
300 | aesdec XDATA0, XKEY6 ; 6. DEC | |
301 | aesdec XDATA1, XKEY6 | |
302 | aesdec XDATA2, XKEY6 | |
303 | ||
304 | movdqa XKEY_B, [KEYS + 8*16] | |
305 | ||
306 | aesdec XDATA0, XKEY_A ; 7. DEC | |
307 | aesdec XDATA1, XKEY_A | |
308 | aesdec XDATA2, XKEY_A | |
309 | ||
310 | movdqa XKEY_A, [KEYS + 9*16] | |
311 | ||
312 | aesdec XDATA0, XKEY_B ; 8. DEC | |
313 | aesdec XDATA1, XKEY_B | |
314 | aesdec XDATA2, XKEY_B | |
315 | ||
316 | movdqa XKEY10, [KEYS + 10*16] | |
317 | ||
318 | aesdec XDATA0, XKEY_A ; 9. DEC | |
319 | aesdec XDATA1, XKEY_A | |
320 | aesdec XDATA2, XKEY_A | |
321 | ||
322 | movdqa XKEY_A, [KEYS + 11*16] | |
323 | ||
324 | aesdec XDATA0, XKEY10 ; 10. DEC | |
325 | aesdec XDATA1, XKEY10 | |
326 | aesdec XDATA2, XKEY10 | |
327 | ||
328 | movdqa XKEY_B, [KEYS + 12*16] | |
329 | ||
330 | aesdec XDATA0, XKEY_A ; 11. DEC | |
331 | aesdec XDATA1, XKEY_A | |
332 | aesdec XDATA2, XKEY_A | |
333 | ||
334 | movdqa XKEY_A, [KEYS + 13*16] | |
335 | ||
336 | aesdec XDATA0, XKEY_B ; 12. DEC | |
337 | aesdec XDATA1, XKEY_B | |
338 | aesdec XDATA2, XKEY_B | |
339 | ||
340 | movdqa XKEY_B, [KEYS + 14*16] | |
341 | ||
342 | aesdec XDATA0, XKEY_A ; 13. DEC | |
343 | aesdec XDATA1, XKEY_A | |
344 | aesdec XDATA2, XKEY_A | |
345 | ||
346 | aesdeclast XDATA0, XKEY_B ; 14. DEC | |
347 | aesdeclast XDATA1, XKEY_B | |
348 | aesdeclast XDATA2, XKEY_B | |
349 | ||
350 | pxor XDATA0, IV_TMP | |
351 | pxor XDATA1, XSAVED0 | |
352 | pxor XDATA2, XSAVED1 | |
353 | ||
354 | movdqu [OUT + 0*16], XDATA0 | |
355 | movdqu [OUT + 1*16], XDATA1 | |
356 | movdqu [OUT + 2*16], XDATA2 | |
357 | ||
358 | cmp LEN, 3*16 | |
359 | je done | |
360 | jmp main_loop | |
361 | ||
362 | ||
363 | align 16 | |
364 | initial_4: | |
365 | ; load cipher text | |
366 | movdqu XDATA0, [IN + 0*16] | |
367 | movdqu XDATA1, [IN + 1*16] | |
368 | movdqu XDATA2, [IN + 2*16] | |
369 | movdqu XDATA3, [IN + 3*16] | |
370 | ||
371 | movdqa XKEY0, [KEYS + 0*16] | |
372 | ||
373 | ; save cipher text | |
374 | movdqa XSAVED0, XDATA0 | |
375 | movdqa XSAVED1, XDATA1 | |
376 | movdqa XSAVED2, XDATA2 | |
377 | movdqa XIV, XDATA3 | |
378 | ||
379 | movdqa XKEY_A, [KEYS + 1*16] | |
380 | ||
381 | pxor XDATA0, XKEY0 ; 0. ARK | |
382 | pxor XDATA1, XKEY0 | |
383 | pxor XDATA2, XKEY0 | |
384 | pxor XDATA3, XKEY0 | |
385 | ||
386 | movdqa XKEY2, [KEYS + 2*16] | |
387 | ||
388 | aesdec XDATA0, XKEY_A ; 1. DEC | |
389 | aesdec XDATA1, XKEY_A | |
390 | aesdec XDATA2, XKEY_A | |
391 | aesdec XDATA3, XKEY_A | |
392 | ||
393 | movdqa XKEY_A, [KEYS + 3*16] | |
394 | ||
395 | mov IDX, 4*16 | |
396 | ||
397 | aesdec XDATA0, XKEY2 ; 2. DEC | |
398 | aesdec XDATA1, XKEY2 | |
399 | aesdec XDATA2, XKEY2 | |
400 | aesdec XDATA3, XKEY2 | |
401 | ||
402 | movdqa XKEY4, [KEYS + 4*16] | |
403 | ||
404 | aesdec XDATA0, XKEY_A ; 3. DEC | |
405 | aesdec XDATA1, XKEY_A | |
406 | aesdec XDATA2, XKEY_A | |
407 | aesdec XDATA3, XKEY_A | |
408 | ||
409 | movdqa XKEY_A, [KEYS + 5*16] | |
410 | ||
411 | movdqu IV_TMP, [IV] | |
412 | ||
413 | aesdec XDATA0, XKEY4 ; 4. DEC | |
414 | aesdec XDATA1, XKEY4 | |
415 | aesdec XDATA2, XKEY4 | |
416 | aesdec XDATA3, XKEY4 | |
417 | ||
418 | movdqa XKEY6, [KEYS + 6*16] | |
419 | ||
420 | aesdec XDATA0, XKEY_A ; 5. DEC | |
421 | aesdec XDATA1, XKEY_A | |
422 | aesdec XDATA2, XKEY_A | |
423 | aesdec XDATA3, XKEY_A | |
424 | ||
425 | movdqa XKEY_A, [KEYS + 7*16] | |
426 | ||
427 | aesdec XDATA0, XKEY6 ; 6. DEC | |
428 | aesdec XDATA1, XKEY6 | |
429 | aesdec XDATA2, XKEY6 | |
430 | aesdec XDATA3, XKEY6 | |
431 | ||
432 | movdqa XKEY_B, [KEYS + 8*16] | |
433 | ||
434 | aesdec XDATA0, XKEY_A ; 7. DEC | |
435 | aesdec XDATA1, XKEY_A | |
436 | aesdec XDATA2, XKEY_A | |
437 | aesdec XDATA3, XKEY_A | |
438 | ||
439 | movdqa XKEY_A, [KEYS + 9*16] | |
440 | ||
441 | aesdec XDATA0, XKEY_B ; 8. DEC | |
442 | aesdec XDATA1, XKEY_B | |
443 | aesdec XDATA2, XKEY_B | |
444 | aesdec XDATA3, XKEY_B | |
445 | ||
446 | movdqa XKEY10, [KEYS + 10*16] | |
447 | ||
448 | aesdec XDATA0, XKEY_A ; 9. DEC | |
449 | aesdec XDATA1, XKEY_A | |
450 | aesdec XDATA2, XKEY_A | |
451 | aesdec XDATA3, XKEY_A | |
452 | ||
453 | movdqa XKEY_A, [KEYS + 11*16] | |
454 | ||
455 | aesdec XDATA0, XKEY10 ; 10. DEC | |
456 | aesdec XDATA1, XKEY10 | |
457 | aesdec XDATA2, XKEY10 | |
458 | aesdec XDATA3, XKEY10 | |
459 | ||
460 | movdqa XKEY_B, [KEYS + 12*16] | |
461 | ||
462 | aesdec XDATA0, XKEY_A ; 11. DEC | |
463 | aesdec XDATA1, XKEY_A | |
464 | aesdec XDATA2, XKEY_A | |
465 | aesdec XDATA3, XKEY_A | |
466 | ||
467 | movdqa XKEY_A, [KEYS + 13*16] | |
468 | ||
469 | aesdec XDATA0, XKEY_B ; 12. DEC | |
470 | aesdec XDATA1, XKEY_B | |
471 | aesdec XDATA2, XKEY_B | |
472 | aesdec XDATA3, XKEY_B | |
473 | ||
474 | movdqa XKEY_B, [KEYS + 14*16] | |
475 | ||
476 | aesdec XDATA0, XKEY_A ; 13. DEC | |
477 | aesdec XDATA1, XKEY_A | |
478 | aesdec XDATA2, XKEY_A | |
479 | aesdec XDATA3, XKEY_A | |
480 | ||
481 | aesdeclast XDATA0, XKEY_B ; 14. DEC | |
482 | aesdeclast XDATA1, XKEY_B | |
483 | aesdeclast XDATA2, XKEY_B | |
484 | aesdeclast XDATA3, XKEY_B | |
485 | ||
486 | pxor XDATA0, IV_TMP | |
487 | pxor XDATA1, XSAVED0 | |
488 | pxor XDATA2, XSAVED1 | |
489 | pxor XDATA3, XSAVED2 | |
490 | ||
491 | movdqu [OUT + 0*16], XDATA0 | |
492 | movdqu [OUT + 1*16], XDATA1 | |
493 | movdqu [OUT + 2*16], XDATA2 | |
494 | movdqu [OUT + 3*16], XDATA3 | |
495 | ||
496 | cmp LEN, 4*16 | |
497 | jz done | |
498 | jmp main_loop | |
499 | ||
500 | align 16 | |
501 | main_loop: | |
502 | ; load cipher text | |
503 | movdqu XDATA0, [IN + IDX + 0*16] | |
504 | movdqu XDATA1, [IN + IDX + 1*16] | |
505 | movdqu XDATA2, [IN + IDX + 2*16] | |
506 | movdqu XDATA3, [IN + IDX + 3*16] | |
507 | ||
508 | ; save cipher text | |
509 | movdqa XSAVED0, XDATA0 | |
510 | movdqa XSAVED1, XDATA1 | |
511 | movdqa XSAVED2, XDATA2 | |
512 | movdqa XSAVED3, XDATA3 | |
513 | ||
514 | movdqa XKEY_A, [KEYS + 1*16] | |
515 | ||
516 | pxor XDATA0, XKEY0 ; 0. ARK | |
517 | pxor XDATA1, XKEY0 | |
518 | pxor XDATA2, XKEY0 | |
519 | pxor XDATA3, XKEY0 | |
520 | ||
521 | add IDX, 4*16 | |
522 | ||
523 | aesdec XDATA0, XKEY_A ; 1. DEC | |
524 | aesdec XDATA1, XKEY_A | |
525 | aesdec XDATA2, XKEY_A | |
526 | aesdec XDATA3, XKEY_A | |
527 | ||
528 | movdqa XKEY_A, [KEYS + 3*16] | |
529 | ||
530 | aesdec XDATA0, XKEY2 ; 2. DEC | |
531 | aesdec XDATA1, XKEY2 | |
532 | aesdec XDATA2, XKEY2 | |
533 | aesdec XDATA3, XKEY2 | |
534 | ||
535 | aesdec XDATA0, XKEY_A ; 3. DEC | |
536 | aesdec XDATA1, XKEY_A | |
537 | aesdec XDATA2, XKEY_A | |
538 | aesdec XDATA3, XKEY_A | |
539 | ||
540 | movdqa XKEY_A, [KEYS + 5*16] | |
541 | ||
542 | aesdec XDATA0, XKEY4 ; 4. DEC | |
543 | aesdec XDATA1, XKEY4 | |
544 | aesdec XDATA2, XKEY4 | |
545 | aesdec XDATA3, XKEY4 | |
546 | ||
547 | aesdec XDATA0, XKEY_A ; 5. DEC | |
548 | aesdec XDATA1, XKEY_A | |
549 | aesdec XDATA2, XKEY_A | |
550 | aesdec XDATA3, XKEY_A | |
551 | ||
552 | movdqa XKEY_A, [KEYS + 7*16] | |
553 | ||
554 | aesdec XDATA0, XKEY6 ; 6. DEC | |
555 | aesdec XDATA1, XKEY6 | |
556 | aesdec XDATA2, XKEY6 | |
557 | aesdec XDATA3, XKEY6 | |
558 | ||
559 | movdqa XKEY_B, [KEYS + 8*16] | |
560 | ||
561 | aesdec XDATA0, XKEY_A ; 7. DEC | |
562 | aesdec XDATA1, XKEY_A | |
563 | aesdec XDATA2, XKEY_A | |
564 | aesdec XDATA3, XKEY_A | |
565 | ||
566 | movdqa XKEY_A, [KEYS + 9*16] | |
567 | ||
568 | aesdec XDATA0, XKEY_B ; 8. DEC | |
569 | aesdec XDATA1, XKEY_B | |
570 | aesdec XDATA2, XKEY_B | |
571 | aesdec XDATA3, XKEY_B | |
572 | ||
573 | aesdec XDATA0, XKEY_A ; 9. DEC | |
574 | aesdec XDATA1, XKEY_A | |
575 | aesdec XDATA2, XKEY_A | |
576 | aesdec XDATA3, XKEY_A | |
577 | ||
578 | movdqa XKEY_A, [KEYS + 11*16] | |
579 | ||
580 | aesdec XDATA0, XKEY10 ; 10. DEC | |
581 | aesdec XDATA1, XKEY10 | |
582 | aesdec XDATA2, XKEY10 | |
583 | aesdec XDATA3, XKEY10 | |
584 | ||
585 | movdqa XKEY_B, [KEYS + 12*16] | |
586 | ||
587 | aesdec XDATA0, XKEY_A ; 11. DEC | |
588 | aesdec XDATA1, XKEY_A | |
589 | aesdec XDATA2, XKEY_A | |
590 | aesdec XDATA3, XKEY_A | |
591 | ||
592 | movdqa XKEY_A, [KEYS + 13*16] | |
593 | ||
594 | aesdec XDATA0, XKEY_B ; 12. DEC | |
595 | aesdec XDATA1, XKEY_B | |
596 | aesdec XDATA2, XKEY_B | |
597 | aesdec XDATA3, XKEY_B | |
598 | ||
599 | movdqa XKEY_B, [KEYS + 14*16] | |
600 | ||
601 | aesdec XDATA0, XKEY_A ; 13. DEC | |
602 | aesdec XDATA1, XKEY_A | |
603 | aesdec XDATA2, XKEY_A | |
604 | aesdec XDATA3, XKEY_A | |
605 | ||
606 | aesdeclast XDATA0, XKEY_B ; 14. DEC | |
607 | aesdeclast XDATA1, XKEY_B | |
608 | aesdeclast XDATA2, XKEY_B | |
609 | aesdeclast XDATA3, XKEY_B | |
610 | ||
611 | pxor XDATA0, XIV | |
612 | pxor XDATA1, XSAVED0 | |
613 | pxor XDATA2, XSAVED1 | |
614 | pxor XDATA3, XSAVED2 | |
615 | ||
616 | movdqu [OUT + IDX + 0*16 - 4*16], XDATA0 | |
617 | movdqu [OUT + IDX + 1*16 - 4*16], XDATA1 | |
618 | movdqu [OUT + IDX + 2*16 - 4*16], XDATA2 | |
619 | movdqu [OUT + IDX + 3*16 - 4*16], XDATA3 | |
620 | ||
621 | movdqa XIV, XSAVED3 | |
622 | ||
623 | CMP IDX, LEN | |
624 | jne main_loop | |
625 | ||
626 | done: | |
627 | ; Don't write back IV | |
628 | ; movdqu [IV], XIV | |
629 | ||
630 | ret | |
631 | ||
632 | %ifdef LINUX | |
633 | section .note.GNU-stack noalloc noexec nowrite progbits | |
634 | %endif |