2 ;; Copyright (c) 2012-2018, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;; Routine to do a 128 bit CBC AES encryption / CBC-MAC digest computation
29 ;;; processes 4 buffers at a time, single data structure as input
30 ;;; Updates In and Out pointers at end
33 %include "mb_mgr_datastruct.asm"
35 %define MOVDQ movdqu ;; assume buffers not aligned
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;; struct AES_ARGS_x8 {
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 ;; void aes_cbc_enc_128_x4(AES_ARGS_x8 *args, UINT64 len);
50 ;; arg 1: ARG : addr of AES_ARGS_x8 structure
51 ;; arg 2: LEN : len (in units of bytes)
87 ;; No cipher text write back for CBC-MAC
100 %define XKEY0_6 [KEYS0 + 16*6]
108 %define XKEY2_3 xmm10
109 %define XKEY2_6 xmm11
110 %define XKEY2_9 xmm12
112 %define XKEY3_3 xmm13
113 %define XKEY3_6 xmm14
114 %define XKEY3_9 xmm15
119 MKGLOBAL(aes128_cbc_mac_x4,function,internal)
122 MKGLOBAL(aes_cbc_enc_128_x4,function,internal)
126 mov [rsp + _gpr_save + 8*0], rbp
128 mov [rsp + _gpr_save + 8*1], rbx
129 mov [rsp + _gpr_save + 8*2], r12
130 mov [rsp + _gpr_save + 8*3], r13
131 mov [rsp + _gpr_save + 8*4], r14
132 mov [rsp + _gpr_save + 8*5], r15
134 mov [rsp + _gpr_save + 8*6], rsi
135 mov [rsp + _gpr_save + 8*7], rdi
140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
142 mov IN0, [ARG + _aesarg_in + 8*0]
143 mov IN1, [ARG + _aesarg_in + 8*1]
144 mov IN2, [ARG + _aesarg_in + 8*2]
145 mov IN3, [ARG + _aesarg_in + 8*3]
147 MOVDQ XDATA0, [IN0] ; load first block of plain text
148 MOVDQ XDATA1, [IN1] ; load first block of plain text
149 MOVDQ XDATA2, [IN2] ; load first block of plain text
150 MOVDQ XDATA3, [IN3] ; load first block of plain text
152 mov KEYS0, [ARG + _aesarg_keys + 8*0]
153 mov KEYS1, [ARG + _aesarg_keys + 8*1]
154 mov KEYS2, [ARG + _aesarg_keys + 8*2]
155 mov KEYS3, [ARG + _aesarg_keys + 8*3]
157 pxor XDATA0, [ARG + _aesarg_IV + 16*0] ; plaintext XOR IV
158 pxor XDATA1, [ARG + _aesarg_IV + 16*1] ; plaintext XOR IV
159 pxor XDATA2, [ARG + _aesarg_IV + 16*2] ; plaintext XOR IV
160 pxor XDATA3, [ARG + _aesarg_IV + 16*3] ; plaintext XOR IV
163 mov OUT0, [ARG + _aesarg_out + 8*0]
164 mov OUT1, [ARG + _aesarg_out + 8*1]
165 mov OUT2, [ARG + _aesarg_out + 8*2]
166 mov OUT3, [ARG + _aesarg_out + 8*3]
169 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
170 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
171 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
172 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
174 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
175 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
176 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
177 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
179 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
180 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
181 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
182 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
184 movdqa XKEY0_3, [KEYS0 + 16*3] ; load round 3 key
185 movdqa XKEY1_3, [KEYS1 + 16*3] ; load round 3 key
186 movdqa XKEY2_3, [KEYS2 + 16*3] ; load round 3 key
187 movdqa XKEY3_3, [KEYS3 + 16*3] ; load round 3 key
189 aesenc XDATA0, XKEY0_3 ; 3. ENC
190 aesenc XDATA1, XKEY1_3 ; 3. ENC
191 aesenc XDATA2, XKEY2_3 ; 3. ENC
192 aesenc XDATA3, XKEY3_3 ; 3. ENC
194 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
195 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
196 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
197 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
199 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
200 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
201 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
202 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
204 movdqa XKEY1_6, [KEYS1 + 16*6] ; load round 6 key
205 movdqa XKEY2_6, [KEYS2 + 16*6] ; load round 6 key
206 movdqa XKEY3_6, [KEYS3 + 16*6] ; load round 6 key
208 aesenc XDATA0, XKEY0_6 ; 6. ENC
209 aesenc XDATA1, XKEY1_6 ; 6. ENC
210 aesenc XDATA2, XKEY2_6 ; 6. ENC
211 aesenc XDATA3, XKEY3_6 ; 6. ENC
213 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
214 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
215 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
216 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
218 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
219 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
220 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
221 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
223 movdqa XKEY0_9, [KEYS0 + 16*9] ; load round 9 key
224 movdqa XKEY1_9, [KEYS1 + 16*9] ; load round 9 key
225 movdqa XKEY2_9, [KEYS2 + 16*9] ; load round 9 key
226 movdqa XKEY3_9, [KEYS3 + 16*9] ; load round 9 key
228 aesenc XDATA0, XKEY0_9 ; 9. ENC
229 aesenc XDATA1, XKEY1_9 ; 9. ENC
230 aesenc XDATA2, XKEY2_9 ; 9. ENC
231 aesenc XDATA3, XKEY3_9 ; 9. ENC
233 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
234 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
235 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
236 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
239 MOVDQ [OUT0], XDATA0 ; write back ciphertext
240 MOVDQ [OUT1], XDATA1 ; write back ciphertext
241 MOVDQ [OUT2], XDATA2 ; write back ciphertext
242 MOVDQ [OUT3], XDATA3 ; write back ciphertext
248 pxor2 XDATA0, [IN0 + IDX] ; plaintext XOR IV
249 pxor2 XDATA1, [IN1 + IDX] ; plaintext XOR IV
250 pxor2 XDATA2, [IN2 + IDX] ; plaintext XOR IV
251 pxor2 XDATA3, [IN3 + IDX] ; plaintext XOR IV
253 pxor XDATA0, [KEYS0 + 16*0] ; 0. ARK
254 pxor XDATA1, [KEYS1 + 16*0] ; 0. ARK
255 pxor XDATA2, [KEYS2 + 16*0] ; 0. ARK
256 pxor XDATA3, [KEYS3 + 16*0] ; 0. ARK
258 aesenc XDATA0, [KEYS0 + 16*1] ; 1. ENC
259 aesenc XDATA1, [KEYS1 + 16*1] ; 1. ENC
260 aesenc XDATA2, [KEYS2 + 16*1] ; 1. ENC
261 aesenc XDATA3, [KEYS3 + 16*1] ; 1. ENC
263 aesenc XDATA0, [KEYS0 + 16*2] ; 2. ENC
264 aesenc XDATA1, [KEYS1 + 16*2] ; 2. ENC
265 aesenc XDATA2, [KEYS2 + 16*2] ; 2. ENC
266 aesenc XDATA3, [KEYS3 + 16*2] ; 2. ENC
268 aesenc XDATA0, XKEY0_3 ; 3. ENC
269 aesenc XDATA1, XKEY1_3 ; 3. ENC
270 aesenc XDATA2, XKEY2_3 ; 3. ENC
271 aesenc XDATA3, XKEY3_3 ; 3. ENC
273 aesenc XDATA0, [KEYS0 + 16*4] ; 4. ENC
274 aesenc XDATA1, [KEYS1 + 16*4] ; 4. ENC
275 aesenc XDATA2, [KEYS2 + 16*4] ; 4. ENC
276 aesenc XDATA3, [KEYS3 + 16*4] ; 4. ENC
278 aesenc XDATA0, [KEYS0 + 16*5] ; 5. ENC
279 aesenc XDATA1, [KEYS1 + 16*5] ; 5. ENC
280 aesenc XDATA2, [KEYS2 + 16*5] ; 5. ENC
281 aesenc XDATA3, [KEYS3 + 16*5] ; 5. ENC
283 aesenc XDATA0, XKEY0_6 ; 6. ENC
284 aesenc XDATA1, XKEY1_6 ; 6. ENC
285 aesenc XDATA2, XKEY2_6 ; 6. ENC
286 aesenc XDATA3, XKEY3_6 ; 6. ENC
288 aesenc XDATA0, [KEYS0 + 16*7] ; 7. ENC
289 aesenc XDATA1, [KEYS1 + 16*7] ; 7. ENC
290 aesenc XDATA2, [KEYS2 + 16*7] ; 7. ENC
291 aesenc XDATA3, [KEYS3 + 16*7] ; 7. ENC
293 aesenc XDATA0, [KEYS0 + 16*8] ; 8. ENC
294 aesenc XDATA1, [KEYS1 + 16*8] ; 8. ENC
295 aesenc XDATA2, [KEYS2 + 16*8] ; 8. ENC
296 aesenc XDATA3, [KEYS3 + 16*8] ; 8. ENC
298 aesenc XDATA0, XKEY0_9 ; 9. ENC
299 aesenc XDATA1, XKEY1_9 ; 9. ENC
300 aesenc XDATA2, XKEY2_9 ; 9. ENC
301 aesenc XDATA3, XKEY3_9 ; 9. ENC
303 aesenclast XDATA0, [KEYS0 + 16*10] ; 10. ENC
304 aesenclast XDATA1, [KEYS1 + 16*10] ; 10. ENC
305 aesenclast XDATA2, [KEYS2 + 16*10] ; 10. ENC
306 aesenclast XDATA3, [KEYS3 + 16*10] ; 10. ENC
309 ;; No cipher text write back for CBC-MAC
310 MOVDQ [OUT0 + IDX], XDATA0 ; write back ciphertext
311 MOVDQ [OUT1 + IDX], XDATA1 ; write back ciphertext
312 MOVDQ [OUT2 + IDX], XDATA2 ; write back ciphertext
313 MOVDQ [OUT3 + IDX], XDATA3 ; write back ciphertext
321 ;; update IV / store digest for CBC-MAC
322 movdqa [ARG + _aesarg_IV + 16*0], XDATA0
323 movdqa [ARG + _aesarg_IV + 16*1], XDATA1
324 movdqa [ARG + _aesarg_IV + 16*2], XDATA2
325 movdqa [ARG + _aesarg_IV + 16*3], XDATA3
329 mov [ARG + _aesarg_in + 8*0], IN0
331 mov [ARG + _aesarg_in + 8*1], IN1
333 mov [ARG + _aesarg_in + 8*2], IN2
335 mov [ARG + _aesarg_in + 8*3], IN3
338 ;; No OUT pointer updates for CBC-MAC
340 mov [ARG + _aesarg_out + 8*0], OUT0
342 mov [ARG + _aesarg_out + 8*1], OUT1
344 mov [ARG + _aesarg_out + 8*2], OUT2
346 mov [ARG + _aesarg_out + 8*3], OUT3
350 mov rbx, [rsp + _gpr_save + 8*1]
351 mov r12, [rsp + _gpr_save + 8*2]
352 mov r13, [rsp + _gpr_save + 8*3]
353 mov r14, [rsp + _gpr_save + 8*4]
354 mov r15, [rsp + _gpr_save + 8*5]
356 mov rsi, [rsp + _gpr_save + 8*6]
357 mov rdi, [rsp + _gpr_save + 8*7]
360 mov rbp, [rsp + _gpr_save + 8*0]
365 section .note.GNU-stack noalloc noexec nowrite progbits