2 ;; Copyright (c) 2012-2018, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;; Routine to do a 128 bit CBC AES encryption / CBC-MAC digest computation
29 ;;; processes 4 buffers at a time, single data structure as input
30 ;;; Updates In and Out pointers at end
33 %include "mb_mgr_datastruct.asm"
35 %define MOVDQ movdqu ;; assume buffers not aligned
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;; struct AES_ARGS_x8 {
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 ;; void aes_cbc_enc_128_x4(AES_ARGS_x8 *args, UINT64 len);
50 ;; arg 1: ARG : addr of AES_ARGS_x8 structure
51 ;; arg 2: LEN : len (in units of bytes)
87 ;; No cipher text write back for CBC-MAC
100 %define XKEY0_6 [KEYS0 + 16*6]
108 %define XKEY2_3 xmm10
109 %define XKEY2_6 xmm11
110 %define XKEY2_9 xmm12
112 %define XKEY3_3 xmm13
113 %define XKEY3_6 xmm14
114 %define XKEY3_9 xmm15
118 %ifndef AES_CBC_ENC_X4
121 MKGLOBAL(aes128_cbc_mac_x4,function,internal)
124 MKGLOBAL(aes_cbc_enc_128_x4,function,internal)
128 %else ;; AES_CBC_ENC_X4 already defined
131 MKGLOBAL(aes128_cbc_mac_x4_no_aesni,function,internal)
132 aes128_cbc_mac_x4_no_aesni:
134 MKGLOBAL(aes_cbc_enc_128_x4_no_aesni,function,internal)
135 aes_cbc_enc_128_x4_no_aesni:
140 mov [rsp + _gpr_save + 8*0], rbp
142 mov [rsp + _gpr_save + 8*1], rbx
143 mov [rsp + _gpr_save + 8*2], r12
144 mov [rsp + _gpr_save + 8*3], r13
145 mov [rsp + _gpr_save + 8*4], r14
146 mov [rsp + _gpr_save + 8*5], r15
148 mov [rsp + _gpr_save + 8*6], rsi
149 mov [rsp + _gpr_save + 8*7], rdi
154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
156 mov IN0, [ARG + _aesarg_in + 8*0]
157 mov IN1, [ARG + _aesarg_in + 8*1]
158 mov IN2, [ARG + _aesarg_in + 8*2]
159 mov IN3, [ARG + _aesarg_in + 8*3]
161 MOVDQ XDATA0, [IN0] ; load first block of plain text
162 MOVDQ XDATA1, [IN1] ; load first block of plain text
163 MOVDQ XDATA2, [IN2] ; load first block of plain text
164 MOVDQ XDATA3, [IN3] ; load first block of plain text
166 mov KEYS0, [ARG + _aesarg_keys + 8*0]
167 mov KEYS1, [ARG + _aesarg_keys + 8*1]
168 mov KEYS2, [ARG + _aesarg_keys + 8*2]
169 mov KEYS3, [ARG + _aesarg_keys + 8*3]
171 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
172 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
173 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
174 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
177 mov OUT0, [ARG + _aesarg_out + 8*0]
178 mov OUT1, [ARG + _aesarg_out + 8*1]
179 mov OUT2, [ARG + _aesarg_out + 8*2]
180 mov OUT3, [ARG + _aesarg_out + 8*3]
183 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
184 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
185 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
186 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
188 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
189 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
190 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
191 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
193 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
194 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
195 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
196 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
198 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
199 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
200 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
201 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
203 aesenc XDATA0, XKEY0_3 ; 3. ENC
204 aesenc XDATA1, XKEY1_3 ; 3. ENC
205 aesenc XDATA2, XKEY2_3 ; 3. ENC
206 aesenc XDATA3, XKEY3_3 ; 3. ENC
208 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
209 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
210 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
211 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
213 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
214 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
215 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
216 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
218 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
219 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
220 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
222 aesenc XDATA0, XKEY0_6 ; 6. ENC
223 aesenc XDATA1, XKEY1_6 ; 6. ENC
224 aesenc XDATA2, XKEY2_6 ; 6. ENC
225 aesenc XDATA3, XKEY3_6 ; 6. ENC
227 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
228 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
229 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
230 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
232 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
233 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
234 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
235 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
237 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
238 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
239 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
240 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
242 aesenc XDATA0, XKEY0_9 ; 9. ENC
243 aesenc XDATA1, XKEY1_9 ; 9. ENC
244 aesenc XDATA2, XKEY2_9 ; 9. ENC
245 aesenc XDATA3, XKEY3_9 ; 9. ENC
247 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
248 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
249 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
250 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
253 MOVDQ [OUT0], XDATA0 ; write back ciphertext
254 MOVDQ [OUT1], XDATA1 ; write back ciphertext
255 MOVDQ [OUT2], XDATA2 ; write back ciphertext
256 MOVDQ [OUT3], XDATA3 ; write back ciphertext
262 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
263 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
264 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
265 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
267 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
268 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
269 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
270 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
272 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
273 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
274 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
275 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
277 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
278 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
279 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
280 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
282 aesenc XDATA0, XKEY0_3 ; 3. ENC
283 aesenc XDATA1, XKEY1_3 ; 3. ENC
284 aesenc XDATA2, XKEY2_3 ; 3. ENC
285 aesenc XDATA3, XKEY3_3 ; 3. ENC
287 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
288 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
289 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
290 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
292 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
293 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
294 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
295 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
297 aesenc XDATA0, XKEY0_6 ; 6. ENC
298 aesenc XDATA1, XKEY1_6 ; 6. ENC
299 aesenc XDATA2, XKEY2_6 ; 6. ENC
300 aesenc XDATA3, XKEY3_6 ; 6. ENC
302 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
303 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
304 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
305 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
307 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
308 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
309 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
310 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
312 aesenc XDATA0, XKEY0_9 ; 9. ENC
313 aesenc XDATA1, XKEY1_9 ; 9. ENC
314 aesenc XDATA2, XKEY2_9 ; 9. ENC
315 aesenc XDATA3, XKEY3_9 ; 9. ENC
317 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
318 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
319 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
320 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
323 ;; No cipher text write back for CBC-MAC
324 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
325 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertext
326 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertext
327 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertext
335 ;; update IV / store digest for CBC-MAC
336 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
337 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
338 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
339 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
343 mov [ARG + _aesarg_in + 8*0], IN0
345 mov [ARG + _aesarg_in + 8*1], IN1
347 mov [ARG + _aesarg_in + 8*2], IN2
349 mov [ARG + _aesarg_in + 8*3], IN3
352 ;; No OUT pointer updates for CBC-MAC
354 mov [ARG + _aesarg_out + 8*0], OUT0
356 mov [ARG + _aesarg_out + 8*1], OUT1
358 mov [ARG + _aesarg_out + 8*2], OUT2
360 mov [ARG + _aesarg_out + 8*3], OUT3
364 mov rbx, [rsp + _gpr_save + 8*1]
365 mov r12, [rsp + _gpr_save + 8*2]
366 mov r13, [rsp + _gpr_save + 8*3]
367 mov r14, [rsp + _gpr_save + 8*4]
368 mov r15, [rsp + _gpr_save + 8*5]
370 mov rsi, [rsp + _gpr_save + 8*6]
371 mov rdi, [rsp + _gpr_save + 8*7]
374 mov rbp, [rsp + _gpr_save + 8*0]
379 section .note.GNU-stack noalloc noexec nowrite progbits