]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
1e59de90 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
7c673cae FG |
30 | %include "reg_sizes.asm" |
31 | %include "multibinary.asm" | |
32 | ||
33 | ;;;;; | |
34 | ; mbin_dispatch_init_avoton parameters | |
35 | ; Use this function when SSE/00/01 is a minimum requirement | |
36 | ; if AVOTON is true, then use avoton_func instead of sse_func | |
37 | ; 1-> function name | |
38 | ; 2-> SSE/00/01 optimized function used as base | |
39 | ; 3-> AVX or AVX/02 opt func | |
40 | ; 4-> AVX2 or AVX/04 opt func | |
41 | ; 5-> AVOTON opt func | |
42 | ;;;;; | |
43 | %macro mbin_dispatch_init_avoton 5 | |
44 | section .text | |
45 | %1_dispatch_init: | |
46 | push mbin_rsi | |
47 | push mbin_rax | |
48 | push mbin_rbx | |
49 | push mbin_rcx | |
50 | push mbin_rdx | |
51 | push mbin_rdi | |
52 | lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 | |
53 | ||
54 | mov eax, 1 | |
55 | cpuid | |
56 | lea mbin_rdi, [%5 WRT_OPT] | |
57 | and eax, FLAG_CPUID1_EAX_STEP_MASK | |
58 | cmp eax, FLAG_CPUID1_EAX_AVOTON | |
59 | ; If Avoton, set Avoton symbol and exit | |
60 | cmove mbin_rsi, mbin_rdi | |
61 | je _%1_init_done | |
62 | ||
63 | and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
64 | cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
65 | lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func | |
66 | jne _%1_init_done ; AVX is not available so end | |
67 | mov mbin_rsi, mbin_rbx | |
68 | ||
69 | ;; Try for AVX2 | |
70 | xor ecx, ecx | |
71 | mov eax, 7 | |
72 | cpuid | |
73 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
74 | lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func | |
75 | cmovne mbin_rsi, mbin_rbx | |
76 | ||
77 | ;; Does it have xmm and ymm support | |
78 | xor ecx, ecx | |
79 | xgetbv | |
80 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
81 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
82 | je _%1_init_done | |
83 | lea mbin_rsi, [%2 WRT_OPT] | |
84 | ||
85 | _%1_init_done: | |
86 | pop mbin_rdi | |
87 | pop mbin_rdx | |
88 | pop mbin_rcx | |
89 | pop mbin_rbx | |
90 | pop mbin_rax | |
91 | mov [%1_dispatched], mbin_rsi | |
92 | pop mbin_rsi | |
93 | ret | |
94 | %endmacro | |
95 | ||
96 | ;;;;; | |
97 | ; mbin_dispatch_init6_avoton parameters | |
98 | ; if AVOTON is true, then use avoton_func instead of sse_func | |
99 | ; 1-> function name | |
100 | ; 2-> base function | |
101 | ; 3-> SSE4_1 or 00/01 optimized function | |
102 | ; 4-> AVX/02 opt func | |
103 | ; 5-> AVX2/04 opt func | |
104 | ; 6-> AVX512/06 opt func | |
105 | ; 7-> AVOTON opt func | |
106 | ;;;;; | |
107 | %macro mbin_dispatch_init6_avoton 7 | |
108 | section .text | |
109 | %1_dispatch_init: | |
110 | push mbin_rsi | |
111 | push mbin_rax | |
112 | push mbin_rbx | |
113 | push mbin_rcx | |
114 | push mbin_rdx | |
115 | push mbin_rdi | |
116 | lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function | |
117 | ||
118 | mov eax, 1 | |
119 | cpuid | |
120 | mov ebx, ecx ; save cpuid1.ecx | |
121 | test ecx, FLAG_CPUID1_ECX_SSE4_1 | |
122 | je _%1_init_done ; Use base function if no SSE4_1 | |
123 | lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt | |
124 | ||
125 | lea mbin_rdi, [%7 WRT_OPT] | |
126 | and eax, FLAG_CPUID1_EAX_STEP_MASK | |
127 | cmp eax, FLAG_CPUID1_EAX_AVOTON | |
128 | ; If Avoton, set Avoton symbol and exit | |
129 | cmove mbin_rsi, mbin_rdi | |
130 | je _%1_init_done | |
131 | ||
132 | ||
133 | ;; Test for XMM_YMM support/AVX | |
134 | test ecx, FLAG_CPUID1_ECX_OSXSAVE | |
135 | je _%1_init_done | |
136 | xor ecx, ecx | |
137 | xgetbv ; xcr -> edx:eax | |
138 | mov edi, eax ; save xgetvb.eax | |
139 | ||
140 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
141 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
142 | jne _%1_init_done | |
143 | test ebx, FLAG_CPUID1_ECX_AVX | |
144 | je _%1_init_done | |
145 | lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt | |
146 | ||
147 | ;; Test for AVX2 | |
148 | xor ecx, ecx | |
149 | mov eax, 7 | |
150 | cpuid | |
151 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
152 | je _%1_init_done ; No AVX2 possible | |
153 | lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func | |
154 | ||
155 | ;; Test for AVX512 | |
156 | and edi, FLAG_XGETBV_EAX_ZMM_OPM | |
157 | cmp edi, FLAG_XGETBV_EAX_ZMM_OPM | |
158 | jne _%1_init_done ; No AVX512 possible | |
1e59de90 TL |
159 | and ebx, FLAGS_CPUID7_EBX_AVX512_G1 |
160 | cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
7c673cae FG |
161 | lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt |
162 | cmove mbin_rsi, mbin_rbx | |
163 | ||
164 | _%1_init_done: | |
165 | pop mbin_rdi | |
166 | pop mbin_rdx | |
167 | pop mbin_rcx | |
168 | pop mbin_rbx | |
169 | pop mbin_rax | |
170 | mov [%1_dispatched], mbin_rsi | |
171 | pop mbin_rsi | |
172 | ret | |
173 | %endmacro | |
174 | ||
175 | default rel | |
176 | [bits 64] | |
177 | ||
178 | %define def_wrd dq | |
179 | %define wrd_sz qword | |
180 | %define arg1 rsi | |
181 | ||
182 | ; declare the L3 ctx level symbols (these will then call the appropriate | |
183 | ; L2 symbols) | |
184 | extern sha512_ctx_mgr_init_sse | |
185 | extern sha512_ctx_mgr_submit_sse | |
186 | extern sha512_ctx_mgr_flush_sse | |
187 | ||
188 | extern sha512_ctx_mgr_init_avx | |
189 | extern sha512_ctx_mgr_submit_avx | |
190 | extern sha512_ctx_mgr_flush_avx | |
191 | ||
192 | extern sha512_ctx_mgr_init_avx2 | |
193 | extern sha512_ctx_mgr_submit_avx2 | |
194 | extern sha512_ctx_mgr_flush_avx2 | |
195 | ||
1e59de90 TL |
196 | extern sha512_ctx_mgr_init_base |
197 | extern sha512_ctx_mgr_submit_base | |
198 | extern sha512_ctx_mgr_flush_base | |
199 | ||
7c673cae FG |
200 | %ifdef HAVE_AS_KNOWS_AVX512 |
201 | extern sha512_ctx_mgr_init_avx512 | |
202 | extern sha512_ctx_mgr_submit_avx512 | |
203 | extern sha512_ctx_mgr_flush_avx512 | |
204 | %endif | |
205 | ||
206 | extern sha512_ctx_mgr_init_sb_sse4 | |
207 | extern sha512_ctx_mgr_submit_sb_sse4 | |
208 | extern sha512_ctx_mgr_flush_sb_sse4 | |
209 | ||
210 | ;;; *_mbinit are initial values for *_dispatched; is updated on first call. | |
211 | ;;; Therefore, *_dispatch_init is only executed on first call. | |
212 | ||
213 | ; Initialise symbols | |
214 | mbin_interface sha512_ctx_mgr_init | |
215 | mbin_interface sha512_ctx_mgr_submit | |
216 | mbin_interface sha512_ctx_mgr_flush | |
217 | ||
218 | %ifdef HAVE_AS_KNOWS_AVX512 | |
219 | ; Reuse mbin_dispatch_init6 through replacing base by sse version | |
1e59de90 | 220 | mbin_dispatch_init6_avoton sha512_ctx_mgr_init, sha512_ctx_mgr_init_base, \ |
7c673cae FG |
221 | sha512_ctx_mgr_init_sse, sha512_ctx_mgr_init_avx, \ |
222 | sha512_ctx_mgr_init_avx2, sha512_ctx_mgr_init_avx512, \ | |
223 | sha512_ctx_mgr_init_sb_sse4 | |
224 | ||
1e59de90 | 225 | mbin_dispatch_init6_avoton sha512_ctx_mgr_submit, sha512_ctx_mgr_submit_base, \ |
7c673cae FG |
226 | sha512_ctx_mgr_submit_sse, sha512_ctx_mgr_submit_avx, \ |
227 | sha512_ctx_mgr_submit_avx2, sha512_ctx_mgr_submit_avx512, \ | |
228 | sha512_ctx_mgr_submit_sb_sse4 | |
229 | ||
1e59de90 | 230 | mbin_dispatch_init6_avoton sha512_ctx_mgr_flush, sha512_ctx_mgr_flush_base, \ |
7c673cae FG |
231 | sha512_ctx_mgr_flush_sse, sha512_ctx_mgr_flush_avx, \ |
232 | sha512_ctx_mgr_flush_avx2, sha512_ctx_mgr_flush_avx512, \ | |
233 | sha512_ctx_mgr_flush_sb_sse4 | |
234 | %else | |
235 | mbin_dispatch_init_avoton sha512_ctx_mgr_init, sha512_ctx_mgr_init_sse, \ | |
236 | sha512_ctx_mgr_init_avx, sha512_ctx_mgr_init_avx2, \ | |
237 | sha512_ctx_mgr_init_sb_sse4 | |
238 | ||
239 | mbin_dispatch_init_avoton sha512_ctx_mgr_submit, sha512_ctx_mgr_submit_sse, \ | |
240 | sha512_ctx_mgr_submit_avx, sha512_ctx_mgr_submit_avx2, \ | |
241 | sha512_ctx_mgr_submit_sb_sse4 | |
242 | ||
243 | mbin_dispatch_init_avoton sha512_ctx_mgr_flush, sha512_ctx_mgr_flush_sse, \ | |
244 | sha512_ctx_mgr_flush_avx, sha512_ctx_mgr_flush_avx2, \ | |
245 | sha512_ctx_mgr_flush_sb_sse4 | |
246 | %endif | |
247 | ||
248 | ||
249 | ;;; func core, ver, snum | |
1e59de90 TL |
250 | slversion sha512_ctx_mgr_init, 00, 04, 0175 |
251 | slversion sha512_ctx_mgr_submit, 00, 04, 0176 | |
252 | slversion sha512_ctx_mgr_flush, 00, 04, 0177 |