]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "sha1_job.asm" | |
31 | %include "sha1_mb_mgr_datastruct.asm" | |
32 | ||
33 | %include "reg_sizes.asm" | |
34 | ||
35 | extern sha1_mb_x4_sse | |
36 | default rel | |
37 | ||
38 | %ifidn __OUTPUT_FORMAT__, elf64 | |
39 | ; LINUX register definitions | |
40 | %define arg1 rdi ; rcx | |
41 | %define arg2 rsi ; rdx | |
42 | ||
43 | ; idx needs to be other than ARG1, ARG2, rax, r8-r11 | |
44 | %define idx rdx ; rsi | |
45 | %else | |
46 | ; WINDOWS register definitions | |
47 | %define arg1 rcx | |
48 | %define arg2 rdx | |
49 | ||
50 | ; idx needs to be other than ARG1, ARG2, rax, r8-r11 | |
51 | %define idx rsi | |
52 | %endif | |
53 | ||
54 | ; Common definitions | |
55 | %define state arg1 | |
56 | %define job arg2 | |
57 | %define len2 arg2 | |
58 | ||
59 | %define unused_lanes rbx | |
60 | %define lane_data rbx | |
61 | %define tmp2 rbx | |
62 | ||
63 | %define job_rax rax | |
64 | %define tmp1 rax | |
65 | %define size_offset rax | |
66 | %define tmp rax | |
67 | %define start_offset rax | |
68 | ||
69 | %define tmp3 arg1 | |
70 | ||
71 | %define extra_blocks arg2 | |
72 | %define p arg2 | |
73 | ||
74 | %define tmp4 r8 | |
75 | %define lens0 r8 | |
76 | ||
77 | %define lens1 r9 | |
78 | %define lens2 r10 | |
79 | %define lens3 r11 | |
80 | ||
81 | ||
82 | ; STACK_SPACE needs to be an odd multiple of 8 | |
83 | _XMM_SAVE_SIZE equ 10*16 | |
84 | _GPR_SAVE_SIZE equ 8*2 | |
85 | _ALIGN_SIZE equ 8 | |
86 | ||
87 | _XMM_SAVE equ 0 | |
88 | _GPR_SAVE equ _XMM_SAVE + _XMM_SAVE_SIZE | |
89 | STACK_SPACE equ _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE | |
90 | ||
91 | %define APPEND(a,b) a %+ b | |
92 | ||
93 | ; SHA1_JOB* sha1_mb_mgr_flush_sse(SHA1_MB_JOB_MGR *state) | |
94 | ; arg 1 : rcx : state | |
95 | global sha1_mb_mgr_flush_sse:function | |
96 | sha1_mb_mgr_flush_sse: | |
97 | ||
98 | sub rsp, STACK_SPACE | |
99 | mov [rsp + _GPR_SAVE + 8*0], rbx | |
100 | %ifidn __OUTPUT_FORMAT__, win64 | |
101 | mov [rsp + _GPR_SAVE + 8*1], rsi | |
102 | movdqa [rsp + _XMM_SAVE + 16*0], xmm6 | |
103 | movdqa [rsp + _XMM_SAVE + 16*1], xmm7 | |
104 | movdqa [rsp + _XMM_SAVE + 16*2], xmm8 | |
105 | movdqa [rsp + _XMM_SAVE + 16*3], xmm9 | |
106 | movdqa [rsp + _XMM_SAVE + 16*4], xmm10 | |
107 | movdqa [rsp + _XMM_SAVE + 16*5], xmm11 | |
108 | movdqa [rsp + _XMM_SAVE + 16*6], xmm12 | |
109 | movdqa [rsp + _XMM_SAVE + 16*7], xmm13 | |
110 | movdqa [rsp + _XMM_SAVE + 16*8], xmm14 | |
111 | movdqa [rsp + _XMM_SAVE + 16*9], xmm15 | |
112 | %endif | |
113 | ||
114 | mov unused_lanes, [state + _unused_lanes] | |
115 | bt unused_lanes, 16+3 | |
116 | jc return_null | |
117 | ||
118 | ; find a lane with a non-null job | |
119 | xor idx, idx | |
120 | cmp qword [state + _ldata + 1 * _LANE_DATA_size + _job_in_lane], 0 | |
121 | cmovne idx, [one] | |
122 | cmp qword [state + _ldata + 2 * _LANE_DATA_size + _job_in_lane], 0 | |
123 | cmovne idx, [two] | |
124 | cmp qword [state + _ldata + 3 * _LANE_DATA_size + _job_in_lane], 0 | |
125 | cmovne idx, [three] | |
126 | ||
127 | ; copy idx to empty lanes | |
128 | copy_lane_data: | |
129 | mov tmp, [state + _args + _data_ptr + 8*idx] | |
130 | ||
131 | %assign I 0 | |
132 | %rep 4 | |
133 | cmp qword [state + _ldata + I * _LANE_DATA_size + _job_in_lane], 0 | |
134 | jne APPEND(skip_,I) | |
135 | mov [state + _args + _data_ptr + 8*I], tmp | |
136 | mov dword [state + _lens + 4*I], 0xFFFFFFFF | |
137 | APPEND(skip_,I): | |
138 | %assign I (I+1) | |
139 | %endrep | |
140 | ||
141 | ; Find min length | |
142 | mov DWORD(lens0), [state + _lens + 0*4] | |
143 | mov idx, lens0 | |
144 | mov DWORD(lens1), [state + _lens + 1*4] | |
145 | cmp lens1, idx | |
146 | cmovb idx, lens1 | |
147 | mov DWORD(lens2), [state + _lens + 2*4] | |
148 | cmp lens2, idx | |
149 | cmovb idx, lens2 | |
150 | mov DWORD(lens3), [state + _lens + 3*4] | |
151 | cmp lens3, idx | |
152 | cmovb idx, lens3 | |
153 | mov len2, idx | |
154 | and idx, 0xF | |
155 | and len2, ~0xF | |
156 | jz len_is_0 | |
157 | ||
158 | sub lens0, len2 | |
159 | sub lens1, len2 | |
160 | sub lens2, len2 | |
161 | sub lens3, len2 | |
162 | shr len2, 4 | |
163 | mov [state + _lens + 0*4], DWORD(lens0) | |
164 | mov [state + _lens + 1*4], DWORD(lens1) | |
165 | mov [state + _lens + 2*4], DWORD(lens2) | |
166 | mov [state + _lens + 3*4], DWORD(lens3) | |
167 | ||
168 | ; "state" and "args" are the same address, arg1 | |
169 | ; len is arg2 | |
170 | call sha1_mb_x4_sse | |
171 | ; state and idx are intact | |
172 | ||
173 | len_is_0: | |
174 | ; process completed job "idx" | |
175 | imul lane_data, idx, _LANE_DATA_size | |
176 | lea lane_data, [state + _ldata + lane_data] | |
177 | ||
178 | mov job_rax, [lane_data + _job_in_lane] | |
179 | mov qword [lane_data + _job_in_lane], 0 | |
180 | mov dword [job_rax + _status], STS_COMPLETED | |
181 | mov unused_lanes, [state + _unused_lanes] | |
182 | shl unused_lanes, 4 | |
183 | or unused_lanes, idx | |
184 | mov [state + _unused_lanes], unused_lanes | |
185 | ||
186 | movd xmm0, [state + _args_digest + 4*idx + 0*16] | |
187 | pinsrd xmm0, [state + _args_digest + 4*idx + 1*16], 1 | |
188 | pinsrd xmm0, [state + _args_digest + 4*idx + 2*16], 2 | |
189 | pinsrd xmm0, [state + _args_digest + 4*idx + 3*16], 3 | |
190 | mov DWORD(tmp2), [state + _args_digest + 4*idx + 4*16] | |
191 | ||
192 | movdqa [job_rax + _result_digest + 0*16], xmm0 | |
193 | mov [job_rax + _result_digest + 1*16], DWORD(tmp2) | |
194 | ||
195 | return: | |
196 | ||
197 | %ifidn __OUTPUT_FORMAT__, win64 | |
198 | movdqa xmm6, [rsp + _XMM_SAVE + 16*0] | |
199 | movdqa xmm7, [rsp + _XMM_SAVE + 16*1] | |
200 | movdqa xmm8, [rsp + _XMM_SAVE + 16*2] | |
201 | movdqa xmm9, [rsp + _XMM_SAVE + 16*3] | |
202 | movdqa xmm10, [rsp + _XMM_SAVE + 16*4] | |
203 | movdqa xmm11, [rsp + _XMM_SAVE + 16*5] | |
204 | movdqa xmm12, [rsp + _XMM_SAVE + 16*6] | |
205 | movdqa xmm13, [rsp + _XMM_SAVE + 16*7] | |
206 | movdqa xmm14, [rsp + _XMM_SAVE + 16*8] | |
207 | movdqa xmm15, [rsp + _XMM_SAVE + 16*9] | |
208 | mov rsi, [rsp + _GPR_SAVE + 8*1] | |
209 | %endif | |
210 | mov rbx, [rsp + _GPR_SAVE + 8*0] | |
211 | add rsp, STACK_SPACE | |
212 | ||
213 | ret | |
214 | ||
215 | return_null: | |
216 | xor job_rax, job_rax | |
217 | jmp return | |
218 | ||
219 | section .data align=16 | |
220 | ||
221 | align 16 | |
222 | one: dq 1 | |
223 | two: dq 2 | |
224 | three: dq 3 | |
225 |