2 ;; Copyright (c) 2019, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 %include "include/os.asm"
30 %include "job_aes_hmac.asm"
31 %include "mb_mgr_datastruct.asm"
33 %include "include/reg_sizes.asm"
34 %include "include/const.inc"
35 %include "include/memcpy.asm"
37 %ifndef AES128_CBC_MAC
39 %define AES128_CBC_MAC aes128_cbc_mac_x8
40 %define SUBMIT_JOB_AES_CCM_AUTH submit_job_aes_ccm_auth_avx
41 %define FLUSH_JOB_AES_CCM_AUTH flush_job_aes_ccm_auth_avx
55 dq 0x000000000000FFFF, 0x0000000000000000
56 dq 0x00000000FFFF0000, 0x0000000000000000
57 dq 0x0000FFFF00000000, 0x0000000000000000
58 dq 0xFFFF000000000000, 0x0000000000000000
59 dq 0x0000000000000000, 0x000000000000FFFF
60 dq 0x0000000000000000, 0x00000000FFFF0000
61 dq 0x0000000000000000, 0x0000FFFF00000000
62 dq 0x0000000000000000, 0xFFFF000000000000
64 dq 0x0100010001000100, 0x0100010001000100
66 dq 0xFFFFFFFFFFFFFF07, 0x0000FFFFFFFFFFFF
77 %define APPEND(a,b) a %+ b
79 %define NROUNDS 9 ; AES-CCM-128
94 %define auth_len_aad rax
105 %define init_block_addr r11
107 %define unused_lanes rbx
114 %define good_lane r15
117 %define init_block0 xmm0
118 %define ccm_lens xmm1
119 %define min_len_idx xmm2
125 ; STACK_SPACE needs to be an odd multiple of 8
126 ; This routine and its callee clobbers all GPRs
132 ;;; ===========================================================================
133 ;;; ===========================================================================
135 ;;; ===========================================================================
136 ;;; ===========================================================================
138 %macro ENCRYPT_SINGLE_BLOCK 2
142 vpxor %%XMM0, [%%GDATA+16*0]
145 vaesenc %%XMM0, [%%GDATA+16*i]
148 vaesenclast %%XMM0, [%%GDATA+16*i]
151 ;;; ===========================================================================
152 ;;; AES CCM auth job submit & flush
153 ;;; ===========================================================================
154 ;;; SUBMIT_FLUSH [in] - SUBMIT, FLUSH job selection
155 %macro GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX 1
156 %define %%SUBMIT_FLUSH %1
162 mov [rsp + _gpr_save + 8*0], rbx
163 mov [rsp + _gpr_save + 8*1], rbp
164 mov [rsp + _gpr_save + 8*2], r12
165 mov [rsp + _gpr_save + 8*3], r13
166 mov [rsp + _gpr_save + 8*4], r14
167 mov [rsp + _gpr_save + 8*5], r15
169 mov [rsp + _gpr_save + 8*6], rsi
170 mov [rsp + _gpr_save + 8*7], rdi
172 mov [rsp + _rsp_save], rax ; original SP
175 mov unused_lanes, [state + _aes_ccm_unused_lanes]
177 %ifidn %%SUBMIT_FLUSH, SUBMIT
179 mov lane, unused_lanes
182 mov [state + _aes_ccm_unused_lanes], unused_lanes
184 ;; Copy job info into lane
185 mov [state + _aes_ccm_job_in_lane + lane*8], job
186 ;; Copy keys into lane args
187 mov tmp, [job + _aes_enc_key_expanded]
188 mov [state + _aes_ccm_args_keys + lane*8], tmp
190 mov word [state + _aes_ccm_init_done + lane*2], 0
193 vpxor init_block0, init_block0
194 vmovdqa [state + _aes_ccm_args_IV + tmp*2], init_block0
196 ;; Prepare initial Block 0 for CBC-MAC-128
198 ;; Byte 0: flags with L' and M' (AAD later)
199 ;; Calculate L' = 15 - IV length - 1 = 14 - IV length
201 mov iv_len, [job + _iv_len_in_bytes]
203 ;; Calculate M' = (Digest length - 2) / 2
204 mov tmp, [job + _auth_tag_output_len_in_bytes]
207 shl tmp, 2 ; M' << 3 (combine 1xshr, to div by 2, and 3xshl)
210 ;; Bytes 1 - 13: Nonce (7 - 13 bytes long)
212 ;; Bytes 1 - 7 are always copied (first 7 bytes)
214 vpinsrb init_block0, [tmp], 1
215 vpinsrw init_block0, [tmp + 1], 1
216 vpinsrd init_block0, [tmp + 3], 1
219 je %%_finish_nonce_move
234 vpinsrb init_block0, [tmp + 12], 13
236 vpinsrb init_block0, [tmp + 11], 12
238 vpinsrd init_block0, [tmp + 7], 2
239 jmp %%_finish_nonce_move
241 vpinsrb init_block0, [tmp + 9], 10
243 vpinsrb init_block0, [tmp + 8], 9
245 vpinsrb init_block0, [tmp + 7], 8
247 %%_finish_nonce_move:
249 ;; Bytes 14 & 15 (message length), in Big Endian
250 mov ax, [job + _msg_len_to_hash_in_bytes]
252 vpinsrw init_block0, ax, 7
254 mov aad_len, [job + _cbcmac_aad_len]
255 ;; Initial length to authenticate (Block 0)
257 ;; Length to authenticate (Block 0 + len(AAD) (2B) + AAD padded,
258 ;; so length is multiple of 64B)
259 lea auth_len_aad, [aad_len + (2 + 15) + 16]
260 and auth_len_aad, -16
263 cmovne auth_len, auth_len_aad
264 ;; Update lengths to authenticate and find min length
265 vmovdqa ccm_lens, [state + _aes_ccm_lens]
266 XVPINSRW ccm_lens, xtmp0, tmp2, lane, auth_len, scale_x16
267 vmovdqa [state + _aes_ccm_lens], ccm_lens
268 vphminposuw min_len_idx, ccm_lens
272 lea init_block_addr, [state + _aes_ccm_init_blocks + tmp]
276 or flags, (1 << 6) ; Set Adata bit in flags
279 ;; Set all 0s in last block (padding)
280 lea tmp, [init_block_addr + auth_len]
285 ;; Start copying from second block
286 lea tmp, [init_block_addr+16]
291 mov tmp2, [job + _cbcmac_aad]
292 memcpy_avx_64_1 tmp, tmp2, aad_len, tmp3, tmp4, xtmp0, xtmp1, xtmp2, xtmp3
296 ;; Finish Block 0 with Byte 0
297 vpinsrb init_block0, BYTE(flags), 0
298 vmovdqa [init_block_addr], init_block0
300 mov [state + _aes_ccm_args_in + lane * 8], init_block_addr
302 cmp byte [state + _aes_ccm_unused_lanes], 0xf
307 ;; Check at least one job
311 ;; Find a lane with a non-null job
312 xor good_lane, good_lane
313 cmp QWORD [state + _aes_ccm_job_in_lane + 1*8], 0
314 cmovne good_lane, [rel one]
315 cmp QWORD [state + _aes_ccm_job_in_lane + 2*8], 0
316 cmovne good_lane, [rel two]
317 cmp QWORD [state + _aes_ccm_job_in_lane + 3*8], 0
318 cmovne good_lane, [rel three]
319 cmp qword [state + _aes_ccm_job_in_lane + 4*8], 0
320 cmovne good_lane, [rel four]
321 cmp qword [state + _aes_ccm_job_in_lane + 5*8], 0
322 cmovne good_lane, [rel five]
323 cmp qword [state + _aes_ccm_job_in_lane + 6*8], 0
324 cmovne good_lane, [rel six]
325 cmp qword [state + _aes_ccm_job_in_lane + 7*8], 0
326 cmovne good_lane, [rel seven]
328 ; Copy good_lane to empty lanes
329 movzx tmp, word [state + _aes_ccm_init_done + good_lane*2]
330 mov tmp2, [state + _aes_ccm_args_in + good_lane*8]
331 mov tmp3, [state + _aes_ccm_args_keys + good_lane*8]
332 shl good_lane, 4 ; multiply by 16
333 vmovdqa xtmp0, [state + _aes_ccm_args_IV + good_lane]
334 vmovdqa ccm_lens, [state + _aes_ccm_lens]
338 cmp qword [state + _aes_ccm_job_in_lane + I*8], 0
340 vpor ccm_lens, [rel len_masks + 16*I]
341 mov [state + _aes_ccm_init_done + I*2], WORD(tmp)
342 mov [state + _aes_ccm_args_in + I*8], tmp2
343 mov [state + _aes_ccm_args_keys + I*8], tmp3
344 vmovdqa [state + _aes_ccm_args_IV + I*16], xtmp0
348 vmovdqa [state + _aes_ccm_lens], ccm_lens
350 vphminposuw min_len_idx, ccm_lens
355 vpextrw len2, min_len_idx, 0 ; min value
356 vpextrw min_idx, min_len_idx, 1 ; min index (0...7)
358 mov min_job, [state + _aes_ccm_job_in_lane + min_idx*8]
362 ;; subtract min length from all lengths
363 vpshufb min_len_idx, min_len_idx, [rel dupw] ; broadcast min length
364 vpsubw ccm_lens, min_len_idx
365 vmovdqa [state + _aes_ccm_lens], ccm_lens
367 ; "state" and "args" are the same address, arg1
370 ; state and min_idx are intact
374 movzx tmp, WORD [state + _aes_ccm_init_done + min_idx*2]
376 je %%_prepare_full_blocks_to_auth
378 je %%_prepare_partial_block_to_auth
382 ;; Set counter block 0 (reusing previous initial block 0)
385 vmovdqa init_block0, [state + _aes_ccm_init_blocks + tmp * 8]
387 vpand init_block0, [rel counter_mask]
389 mov tmp2, [state + _aes_ccm_args_keys + tmp]
390 ENCRYPT_SINGLE_BLOCK tmp2, init_block0
391 vpxor init_block0, [state + _aes_ccm_args_IV + tmp * 2]
393 ;; Copy Mlen bytes into auth_tag_output (Mlen = 4,6,8,10,12,14,16)
394 mov min_job, [state + _aes_ccm_job_in_lane + tmp]
395 mov tmp3, [min_job + _auth_tag_output_len_in_bytes]
396 mov tmp2, [min_job + _auth_tag_output]
398 simd_store_avx tmp2, init_block0, tmp3, tmp, tmp4
400 ; Update unused lanes
401 mov unused_lanes, [state + _aes_ccm_unused_lanes]
403 or unused_lanes, min_idx
404 mov [state + _aes_ccm_unused_lanes], unused_lanes
409 mov qword [state + _aes_ccm_job_in_lane + min_idx*8], 0
410 or dword [job_rax + _status], STS_COMPLETED_HMAC
414 %ifidn %%SUBMIT_FLUSH, SUBMIT
416 ;; Clear digest (in memory for CBC IV), counter block 0 and AAD of returned job
417 vmovdqa [state + _aes_ccm_args_IV + min_idx * 2], xtmp0
418 vmovdqa [state + _aes_ccm_init_blocks + min_idx * 8], xtmp0
419 vmovdqa [state + _aes_ccm_init_blocks + min_idx * 8 + 16], xtmp0
420 vmovdqa [state + _aes_ccm_init_blocks + min_idx * 8 + 32], xtmp0
421 vmovdqa [state + _aes_ccm_init_blocks + min_idx * 8 + 48], xtmp0
422 mov qword [state + _aes_ccm_args_keys + min_idx], 0
424 ;; Clear digest (in memory for CBC IV), counter block 0 and AAD
425 ;; of returned job and "NULL lanes"
428 cmp qword [state + _aes_ccm_job_in_lane + I*8], 0
429 jne APPEND(skip_clear_,I)
430 vmovdqa [state + _aes_ccm_args_IV + I*16], xtmp0
431 vmovdqa [state + _aes_ccm_init_blocks + I*64], xtmp0
432 vmovdqa [state + _aes_ccm_init_blocks + I*64 + 16], xtmp0
433 vmovdqa [state + _aes_ccm_init_blocks + I*64 + 32], xtmp0
434 vmovdqa [state + _aes_ccm_init_blocks + I*64 + 48], xtmp0
435 mov qword [state + _aes_ccm_args_keys + I*8], 0
436 APPEND(skip_clear_,I):
444 mov rbx, [rsp + _gpr_save + 8*0]
445 mov rbp, [rsp + _gpr_save + 8*1]
446 mov r12, [rsp + _gpr_save + 8*2]
447 mov r13, [rsp + _gpr_save + 8*3]
448 mov r14, [rsp + _gpr_save + 8*4]
449 mov r15, [rsp + _gpr_save + 8*5]
451 mov rsi, [rsp + _gpr_save + 8*6]
452 mov rdi, [rsp + _gpr_save + 8*7]
454 mov rsp, [rsp + _rsp_save] ; original SP
461 %%_prepare_full_blocks_to_auth:
463 cmp dword [min_job + _cipher_direction], 2 ; DECRYPT
467 mov tmp, [min_job + _src]
468 add tmp, [min_job + _hash_start_src_offset_in_bytes]
469 jmp %%_set_init_done_1
472 mov tmp, [min_job + _dst]
475 mov [state + _aes_ccm_args_in + min_idx*8], tmp
476 mov word [state + _aes_ccm_init_done + min_idx*2], 1
478 ; Check if there are full blocks to hash
479 mov tmp, [min_job + _msg_len_to_hash_in_bytes]
481 je %%_prepare_partial_block_to_auth
483 ;; Update lengths to authenticate and find min length
484 vmovdqa ccm_lens, [state + _aes_ccm_lens]
485 XVPINSRW ccm_lens, xtmp0, tmp2, min_idx, tmp, scale_x16
486 vphminposuw min_len_idx, ccm_lens
487 vmovdqa [state + _aes_ccm_lens], ccm_lens
491 %%_prepare_partial_block_to_auth:
492 ; Check if partial block needs to be hashed
493 mov auth_len, [min_job + _msg_len_to_hash_in_bytes]
497 mov word [state + _aes_ccm_init_done + min_idx * 2], 2
498 ;; Update lengths to authenticate and find min length
499 vmovdqa ccm_lens, [state + _aes_ccm_lens]
500 XVPINSRW ccm_lens, xtmp0, tmp2, min_idx, 16, scale_x16
501 vphminposuw min_len_idx, ccm_lens
502 vmovdqa [state + _aes_ccm_lens], ccm_lens
506 add tmp2, 16 ; pb[AES_BLOCK_SIZE]
507 lea init_block_addr, [state + _aes_ccm_init_blocks + tmp2]
508 mov tmp2, [state + _aes_ccm_args_in + min_idx * 8]
510 simd_load_avx_15_1 xtmp0, tmp2, auth_len
512 %%_finish_partial_block_copy:
513 vmovdqa [init_block_addr], xtmp0
514 mov [state + _aes_ccm_args_in + min_idx * 8], init_block_addr
521 ; JOB_AES_HMAC * submit_job_aes_ccm_auth_avx(MB_MGR_CCM_OOO *state, JOB_AES_HMAC *job)
524 MKGLOBAL(SUBMIT_JOB_AES_CCM_AUTH,function,internal)
525 SUBMIT_JOB_AES_CCM_AUTH:
526 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX SUBMIT
528 ; JOB_AES_HMAC * flush_job_aes_ccm_auth_avx(MB_MGR_CCM_OOO *state)
530 MKGLOBAL(FLUSH_JOB_AES_CCM_AUTH,function,internal)
531 FLUSH_JOB_AES_CCM_AUTH:
532 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX FLUSH
536 section .note.GNU-stack noalloc noexec nowrite progbits