; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
+; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
%include "reg_sizes.asm"
extern md5_mb_x4x2_sse
+
+[bits 64]
default rel
+section .text
%if 1
%ifidn __OUTPUT_FORMAT__, elf64
%define unused_lanes r9
%define lane_data r10
-
+
%define job_rax rax
%define tmp rax
-
+
%endif ;; if 1
; STACK_SPACE needs to be an odd multiple of 8
; JOB* md5_mb_mgr_flush_sse(MB_MGR_HMAC_OOO *state)
; arg 1 : rcx : state
-global md5_mb_mgr_flush_sse:function
+mk_global md5_mb_mgr_flush_sse, function
md5_mb_mgr_flush_sse:
+ endbranch
sub rsp, STACK_SPACE
mov [rsp + _GPR_SAVE + 8*0], rbx
mov [rsp + _GPR_SAVE + 8*3], rbp
; Find min length
movdqa xmm0, [state + _lens + 0*16]
movdqa xmm1, [state + _lens + 1*16]
-
+
movdqa xmm2, xmm0
pminud xmm2, xmm1 ; xmm2 has {D,C,B,A}
palignr xmm3, xmm2, 8 ; xmm3 has {x,x,D,C}
pminud xmm2, xmm3 ; xmm2 has {x,x,E,F}
palignr xmm3, xmm2, 4 ; xmm3 has {x,x,x,E}
pminud xmm2, xmm3 ; xmm2 has min value in low dword
-
+
movd DWORD(idx), xmm2
mov len2, idx
and idx, 0xF
shr len2, 4
jz len_is_0
-
+
pand xmm2, [rel clear_low_nibble]
pshufd xmm2, xmm2, 0
mov [state + _unused_lanes], unused_lanes
mov dword [state + _lens + 4*idx], 0xFFFFFFFF
+ sub dword [state + _num_lanes_inuse], 1
movd xmm0, [state + _args_digest + 4*idx + 0*32]
pinsrd xmm0, [state + _args_digest + 4*idx + 1*32], 1
movdqa xmm13, [rsp + _XMM_SAVE + 16*7]
movdqa xmm14, [rsp + _XMM_SAVE + 16*8]
movdqa xmm15, [rsp + _XMM_SAVE + 16*9]
- mov rsi, [rsp + _GPR_SAVE + 8*1]
- mov rdi, [rsp + _GPR_SAVE + 8*2]
+ mov rsi, [rsp + _GPR_SAVE + 8*1]
+ mov rdi, [rsp + _GPR_SAVE + 8*2]
%endif
- mov rbx, [rsp + _GPR_SAVE + 8*0]
- mov rbp, [rsp + _GPR_SAVE + 8*3]
- mov r12, [rsp + _GPR_SAVE + 8*4]
- mov r13, [rsp + _GPR_SAVE + 8*5]
- mov r14, [rsp + _GPR_SAVE + 8*6]
- mov r15, [rsp + _GPR_SAVE + 8*7]
+ mov rbx, [rsp + _GPR_SAVE + 8*0]
+ mov rbp, [rsp + _GPR_SAVE + 8*3]
+ mov r12, [rsp + _GPR_SAVE + 8*4]
+ mov r13, [rsp + _GPR_SAVE + 8*5]
+ mov r14, [rsp + _GPR_SAVE + 8*6]
+ mov r15, [rsp + _GPR_SAVE + 8*7]
add rsp, STACK_SPACE
ret
return_null:
xor job_rax, job_rax
jmp return
-
+
section .data align=16