]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2012-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ;;; routine to do a 256 bit CBC AES encrypt | |
29 | ;;; process 4 buffers at a time, single data structure as input | |
30 | ;;; Updates In and Out pointers at end | |
31 | ||
f67539c2 | 32 | %include "include/os.asm" |
11fdf7f2 TL |
33 | %include "mb_mgr_datastruct.asm" |
34 | ||
35 | %define MOVDQ movdqu ;; assume buffers not aligned | |
36 | %macro pxor2 2 | |
37 | MOVDQ XTMP, %2 | |
38 | pxor %1, XTMP | |
39 | %endm | |
40 | ||
41 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
f67539c2 | 42 | ;; struct AES_ARGS { |
11fdf7f2 TL |
43 | ;; void* in[8]; |
44 | ;; void* out[8]; | |
45 | ;; UINT128* keys[8]; | |
46 | ;; UINT128 IV[8]; | |
47 | ;; } | |
48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
f67539c2 TL |
49 | ;; void aes_cbc_enc_256_x4(AES_ARGS *args, UINT64 len); |
50 | ;; arg 1: ARG : addr of AES_ARGS structure | |
11fdf7f2 TL |
51 | ;; arg 2: LEN : len (in units of bytes) |
52 | ||
53 | %ifdef LINUX | |
54 | %define ARG rdi | |
55 | %define LEN rsi | |
56 | %define REG3 rcx | |
57 | %define REG4 rdx | |
58 | %else | |
59 | %define ARG rcx | |
60 | %define LEN rdx | |
61 | %define REG3 rsi | |
62 | %define REG4 rdi | |
63 | %endif | |
64 | ||
65 | %define IDX rax | |
66 | ||
67 | %define IN0 r8 | |
68 | %define KEYS0 rbx | |
69 | %define OUT0 r9 | |
70 | ||
71 | %define IN1 r10 | |
72 | %define KEYS1 REG3 | |
73 | %define OUT1 r11 | |
74 | ||
75 | %define IN2 r12 | |
76 | %define KEYS2 REG4 | |
77 | %define OUT2 r13 | |
78 | ||
79 | %define IN3 r14 | |
80 | %define KEYS3 rbp | |
81 | %define OUT3 r15 | |
82 | ||
83 | ||
84 | %define XDATA0 xmm0 | |
85 | %define XDATA1 xmm1 | |
86 | %define XDATA2 xmm2 | |
87 | %define XDATA3 xmm3 | |
88 | ||
89 | %define XKEY0_3 xmm4 | |
90 | %define XKEY0_6 [KEYS0 + 16*6] | |
91 | %define XTMP xmm5 | |
92 | %define XKEY0_9 xmm6 | |
93 | ||
94 | %define XKEY1_3 xmm7 | |
95 | %define XKEY1_6 xmm8 | |
96 | %define XKEY1_9 xmm9 | |
97 | ||
98 | %define XKEY2_3 xmm10 | |
99 | %define XKEY2_6 xmm11 | |
100 | %define XKEY2_9 xmm12 | |
101 | ||
102 | %define XKEY3_3 xmm13 | |
103 | %define XKEY3_6 xmm14 | |
104 | %define XKEY3_9 xmm15 | |
105 | ||
9f95a23c TL |
106 | %ifndef AES_CBC_ENC_X4 |
107 | %define AES_CBC_ENC_X4 aes_cbc_enc_256_x4 | |
108 | %endif | |
109 | ||
11fdf7f2 TL |
110 | section .text |
111 | ||
9f95a23c TL |
112 | MKGLOBAL(AES_CBC_ENC_X4,function,internal) |
113 | AES_CBC_ENC_X4: | |
11fdf7f2 TL |
114 | |
115 | push rbp | |
116 | ||
117 | mov IDX, 16 | |
118 | ||
119 | mov IN0, [ARG + _aesarg_in + 8*0] | |
120 | mov IN1, [ARG + _aesarg_in + 8*1] | |
121 | mov IN2, [ARG + _aesarg_in + 8*2] | |
122 | mov IN3, [ARG + _aesarg_in + 8*3] | |
123 | ||
124 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
125 | ||
126 | MOVDQ XDATA0, [IN0] ; load first block of plain text | |
127 | MOVDQ XDATA1, [IN1] ; load first block of plain text | |
128 | MOVDQ XDATA2, [IN2] ; load first block of plain text | |
129 | MOVDQ XDATA3, [IN3] ; load first block of plain text | |
130 | ||
131 | mov KEYS0, [ARG + _aesarg_keys + 8*0] | |
132 | mov KEYS1, [ARG + _aesarg_keys + 8*1] | |
133 | mov KEYS2, [ARG + _aesarg_keys + 8*2] | |
134 | mov KEYS3, [ARG + _aesarg_keys + 8*3] | |
135 | ||
136 | pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV | |
137 | pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV | |
138 | pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV | |
139 | pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV | |
140 | ||
141 | mov OUT0, [ARG + _aesarg_out + 8*0] | |
142 | mov OUT1, [ARG + _aesarg_out + 8*1] | |
143 | mov OUT2, [ARG + _aesarg_out + 8*2] | |
144 | mov OUT3, [ARG + _aesarg_out + 8*3] | |
145 | ||
146 | pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK | |
147 | pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK | |
148 | pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK | |
149 | pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK | |
150 | ||
151 | aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC | |
152 | aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC | |
153 | aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC | |
154 | aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC | |
155 | ||
156 | aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC | |
157 | aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC | |
158 | aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC | |
159 | aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC | |
160 | ||
161 | movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key | |
162 | movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key | |
163 | movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key | |
164 | movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key | |
165 | ||
166 | aesenc XDATA0, XKEY0_3 ; 3. ENC | |
167 | aesenc XDATA1, XKEY1_3 ; 3. ENC | |
168 | aesenc XDATA2, XKEY2_3 ; 3. ENC | |
169 | aesenc XDATA3, XKEY3_3 ; 3. ENC | |
170 | ||
171 | aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC | |
172 | aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC | |
173 | aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC | |
174 | aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC | |
175 | ||
176 | aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC | |
177 | aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC | |
178 | aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC | |
179 | aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC | |
180 | ||
181 | movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key | |
182 | movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key | |
183 | movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key | |
184 | ||
185 | aesenc XDATA0, XKEY0_6 ; 6. ENC | |
186 | aesenc XDATA1, XKEY1_6 ; 6. ENC | |
187 | aesenc XDATA2, XKEY2_6 ; 6. ENC | |
188 | aesenc XDATA3, XKEY3_6 ; 6. ENC | |
189 | ||
190 | aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC | |
191 | aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC | |
192 | aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC | |
193 | aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC | |
194 | ||
195 | aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC | |
196 | aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC | |
197 | aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC | |
198 | aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC | |
199 | ||
200 | movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key | |
201 | movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key | |
202 | movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key | |
203 | movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key | |
204 | ||
205 | aesenc XDATA0, XKEY0_9 ; 9. ENC | |
206 | aesenc XDATA1, XKEY1_9 ; 9. ENC | |
207 | aesenc XDATA2, XKEY2_9 ; 9. ENC | |
208 | aesenc XDATA3, XKEY3_9 ; 9. ENC | |
209 | ||
210 | aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC | |
211 | aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC | |
212 | aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC | |
213 | aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC | |
214 | ||
215 | aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC | |
216 | aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC | |
217 | aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC | |
218 | aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC | |
219 | ||
220 | aesenc XDATA0, [KEYS0 + 16*12] ; 12. ENC | |
221 | aesenc XDATA1, [KEYS1 + 16*12] ; 12. ENC | |
222 | aesenc XDATA2, [KEYS2 + 16*12] ; 12. ENC | |
223 | aesenc XDATA3, [KEYS3 + 16*12] ; 12. ENC | |
224 | ||
225 | aesenc XDATA0, [KEYS0 + 16*13] ; 13. ENC | |
226 | aesenc XDATA1, [KEYS1 + 16*13] ; 13. ENC | |
227 | aesenc XDATA2, [KEYS2 + 16*13] ; 13. ENC | |
228 | aesenc XDATA3, [KEYS3 + 16*13] ; 13. ENC | |
229 | ||
230 | aesenclast XDATA0, [KEYS0 + 16*14] ; 14. ENC | |
231 | aesenclast XDATA1, [KEYS1 + 16*14] ; 14. ENC | |
232 | aesenclast XDATA2, [KEYS2 + 16*14] ; 14. ENC | |
233 | aesenclast XDATA3, [KEYS3 + 16*14] ; 14. ENC | |
234 | ||
235 | MOVDQ [OUT0], XDATA0 ; write back ciphertext | |
236 | MOVDQ [OUT1], XDATA1 ; write back ciphertext | |
237 | MOVDQ [OUT2], XDATA2 ; write back ciphertext | |
238 | MOVDQ [OUT3], XDATA3 ; write back ciphertext | |
239 | ||
240 | cmp LEN, IDX | |
241 | je done | |
242 | ||
243 | main_loop: | |
244 | pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV | |
245 | pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV | |
246 | pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV | |
247 | pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV | |
248 | ||
249 | ||
250 | pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK | |
251 | pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK | |
252 | pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK | |
253 | pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK | |
254 | ||
255 | aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC | |
256 | aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC | |
257 | aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC | |
258 | aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC | |
259 | ||
260 | aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC | |
261 | aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC | |
262 | aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC | |
263 | aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC | |
264 | ||
265 | aesenc XDATA0, XKEY0_3 ; 3. ENC | |
266 | aesenc XDATA1, XKEY1_3 ; 3. ENC | |
267 | aesenc XDATA2, XKEY2_3 ; 3. ENC | |
268 | aesenc XDATA3, XKEY3_3 ; 3. ENC | |
269 | ||
270 | aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC | |
271 | aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC | |
272 | aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC | |
273 | aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC | |
274 | ||
275 | aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC | |
276 | aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC | |
277 | aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC | |
278 | aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC | |
279 | ||
280 | aesenc XDATA0, XKEY0_6 ; 6. ENC | |
281 | aesenc XDATA1, XKEY1_6 ; 6. ENC | |
282 | aesenc XDATA2, XKEY2_6 ; 6. ENC | |
283 | aesenc XDATA3, XKEY3_6 ; 6. ENC | |
284 | ||
285 | aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC | |
286 | aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC | |
287 | aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC | |
288 | aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC | |
289 | ||
290 | aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC | |
291 | aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC | |
292 | aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC | |
293 | aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC | |
294 | ||
295 | aesenc XDATA0, XKEY0_9 ; 9. ENC | |
296 | aesenc XDATA1, XKEY1_9 ; 9. ENC | |
297 | aesenc XDATA2, XKEY2_9 ; 9. ENC | |
298 | aesenc XDATA3, XKEY3_9 ; 9. ENC | |
299 | ||
300 | aesenc XDATA0, [KEYS0 + 16*10] ; 10. ENC | |
301 | aesenc XDATA1, [KEYS1 + 16*10] ; 10. ENC | |
302 | aesenc XDATA2, [KEYS2 + 16*10] ; 10. ENC | |
303 | aesenc XDATA3, [KEYS3 + 16*10] ; 10. ENC | |
304 | ||
305 | aesenc XDATA0, [KEYS0 + 16*11] ; 11. ENC | |
306 | aesenc XDATA1, [KEYS1 + 16*11] ; 11. ENC | |
307 | aesenc XDATA2, [KEYS2 + 16*11] ; 11. ENC | |
308 | aesenc XDATA3, [KEYS3 + 16*11] ; 11. ENC | |
309 | ||
310 | aesenc XDATA0, [KEYS0 + 16*12] ; 12. ENC | |
311 | aesenc XDATA1, [KEYS1 + 16*12] ; 12. ENC | |
312 | aesenc XDATA2, [KEYS2 + 16*12] ; 12. ENC | |
313 | aesenc XDATA3, [KEYS3 + 16*12] ; 12. ENC | |
314 | ||
315 | aesenc XDATA0, [KEYS0 + 16*13] ; 13. ENC | |
316 | aesenc XDATA1, [KEYS1 + 16*13] ; 13. ENC | |
317 | aesenc XDATA2, [KEYS2 + 16*13] ; 13. ENC | |
318 | aesenc XDATA3, [KEYS3 + 16*13] ; 13. ENC | |
319 | ||
320 | aesenclast XDATA0, [KEYS0 + 16*14] ; 14. ENC | |
321 | aesenclast XDATA1, [KEYS1 + 16*14] ; 14. ENC | |
322 | aesenclast XDATA2, [KEYS2 + 16*14] ; 14. ENC | |
323 | aesenclast XDATA3, [KEYS3 + 16*14] ; 14. ENC | |
324 | ||
325 | ||
326 | MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext | |
327 | MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertex | |
328 | MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertex | |
329 | MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertex | |
330 | ||
331 | ||
332 | add IDX, 16 | |
333 | cmp LEN, IDX | |
334 | jne main_loop | |
335 | ||
336 | done: | |
337 | ;; update IV | |
338 | movdqa [ARG + _aesarg_IV + 16*0], XDATA0 | |
339 | movdqa [ARG + _aesarg_IV + 16*1], XDATA1 | |
340 | movdqa [ARG + _aesarg_IV + 16*2], XDATA2 | |
341 | movdqa [ARG + _aesarg_IV + 16*3], XDATA3 | |
342 | ||
343 | ;; update IN and OUT | |
344 | add IN0, LEN | |
345 | mov [ARG + _aesarg_in + 8*0], IN0 | |
346 | add IN1, LEN | |
347 | mov [ARG + _aesarg_in + 8*1], IN1 | |
348 | add IN2, LEN | |
349 | mov [ARG + _aesarg_in + 8*2], IN2 | |
350 | add IN3, LEN | |
351 | mov [ARG + _aesarg_in + 8*3], IN3 | |
352 | ||
353 | add OUT0, LEN | |
354 | mov [ARG + _aesarg_out + 8*0], OUT0 | |
355 | add OUT1, LEN | |
356 | mov [ARG + _aesarg_out + 8*1], OUT1 | |
357 | add OUT2, LEN | |
358 | mov [ARG + _aesarg_out + 8*2], OUT2 | |
359 | add OUT3, LEN | |
360 | mov [ARG + _aesarg_out + 8*3], OUT3 | |
361 | ||
362 | pop rbp | |
363 | ||
364 | ret | |
365 | ||
366 | %ifdef LINUX | |
367 | section .note.GNU-stack noalloc noexec nowrite progbits | |
368 | %endif |