]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
f91f0fd5 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %ifndef _MULTIBINARY_ASM_ | |
31 | %define _MULTIBINARY_ASM_ | |
32 | ||
33 | %ifidn __OUTPUT_FORMAT__, elf32 | |
34 | %define mbin_def_ptr dd | |
35 | %define mbin_ptr_sz dword | |
36 | %define mbin_rdi edi | |
37 | %define mbin_rsi esi | |
38 | %define mbin_rax eax | |
39 | %define mbin_rbx ebx | |
40 | %define mbin_rcx ecx | |
41 | %define mbin_rdx edx | |
42 | %else | |
43 | %define mbin_def_ptr dq | |
44 | %define mbin_ptr_sz qword | |
45 | %define mbin_rdi rdi | |
46 | %define mbin_rsi rsi | |
47 | %define mbin_rax rax | |
48 | %define mbin_rbx rbx | |
49 | %define mbin_rcx rcx | |
50 | %define mbin_rdx rdx | |
51 | %endif | |
52 | ||
f91f0fd5 TL |
53 | %ifndef AS_FEATURE_LEVEL |
54 | %define AS_FEATURE_LEVEL 4 | |
55 | %endif | |
56 | ||
7c673cae FG |
57 | ;;;; |
58 | ; multibinary macro: | |
59 | ; creates the visable entry point that uses HW optimized call pointer | |
60 | ; creates the init of the HW optimized call pointer | |
61 | ;;;; | |
62 | %macro mbin_interface 1 | |
63 | ;;;; | |
64 | ; *_dispatched is defaulted to *_mbinit and replaced on first call. | |
65 | ; Therefore, *_dispatch_init is only executed on first call. | |
66 | ;;;; | |
67 | section .data | |
68 | %1_dispatched: | |
69 | mbin_def_ptr %1_mbinit | |
70 | ||
71 | section .text | |
20effc67 | 72 | mk_global %1, function |
7c673cae | 73 | %1_mbinit: |
20effc67 | 74 | endbranch |
7c673cae FG |
75 | ;;; only called the first time to setup hardware match |
76 | call %1_dispatch_init | |
77 | ;;; falls thru to execute the hw optimized code | |
78 | %1: | |
20effc67 | 79 | endbranch |
7c673cae FG |
80 | jmp mbin_ptr_sz [%1_dispatched] |
81 | %endmacro | |
82 | ||
83 | ;;;;; | |
84 | ; mbin_dispatch_init parameters | |
85 | ; Use this function when SSE/00/01 is a minimum requirement | |
86 | ; 1-> function name | |
87 | ; 2-> SSE/00/01 optimized function used as base | |
88 | ; 3-> AVX or AVX/02 opt func | |
89 | ; 4-> AVX2 or AVX/04 opt func | |
90 | ;;;;; | |
91 | %macro mbin_dispatch_init 4 | |
92 | section .text | |
93 | %1_dispatch_init: | |
94 | push mbin_rsi | |
95 | push mbin_rax | |
96 | push mbin_rbx | |
97 | push mbin_rcx | |
98 | push mbin_rdx | |
99 | lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 | |
100 | ||
101 | mov eax, 1 | |
102 | cpuid | |
103 | and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
104 | cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
105 | lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func | |
106 | jne _%1_init_done ; AVX is not available so end | |
107 | mov mbin_rsi, mbin_rbx | |
108 | ||
109 | ;; Try for AVX2 | |
110 | xor ecx, ecx | |
111 | mov eax, 7 | |
112 | cpuid | |
113 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
114 | lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func | |
115 | cmovne mbin_rsi, mbin_rbx | |
116 | ||
117 | ;; Does it have xmm and ymm support | |
118 | xor ecx, ecx | |
119 | xgetbv | |
120 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
121 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
122 | je _%1_init_done | |
123 | lea mbin_rsi, [%2 WRT_OPT] | |
124 | ||
125 | _%1_init_done: | |
126 | pop mbin_rdx | |
127 | pop mbin_rcx | |
128 | pop mbin_rbx | |
129 | pop mbin_rax | |
130 | mov [%1_dispatched], mbin_rsi | |
131 | pop mbin_rsi | |
132 | ret | |
133 | %endmacro | |
134 | ||
135 | ;;;;; | |
136 | ; mbin_dispatch_init2 parameters | |
137 | ; Cases where only base functions are available | |
138 | ; 1-> function name | |
139 | ; 2-> base function | |
140 | ;;;;; | |
141 | %macro mbin_dispatch_init2 2 | |
142 | section .text | |
143 | %1_dispatch_init: | |
144 | push mbin_rsi | |
145 | lea mbin_rsi, [%2 WRT_OPT] ; Default | |
146 | mov [%1_dispatched], mbin_rsi | |
147 | pop mbin_rsi | |
148 | ret | |
149 | %endmacro | |
150 | ||
224ce89b WB |
151 | ;;;;; |
152 | ; mbin_dispatch_init_clmul 3 parameters | |
153 | ; Use this case for CRC which needs both SSE4_1 and CLMUL | |
154 | ; 1-> function name | |
155 | ; 2-> base function | |
156 | ; 3-> SSE4_1 and CLMUL optimized function | |
f91f0fd5 TL |
157 | ; 4-> AVX/02 opt func |
158 | ; 5-> AVX512/10 opt func | |
224ce89b | 159 | ;;;;; |
f91f0fd5 | 160 | %macro mbin_dispatch_init_clmul 5 |
224ce89b WB |
161 | section .text |
162 | %1_dispatch_init: | |
163 | push mbin_rsi | |
164 | push mbin_rax | |
165 | push mbin_rbx | |
166 | push mbin_rcx | |
167 | push mbin_rdx | |
f91f0fd5 | 168 | push mbin_rdi |
224ce89b WB |
169 | lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function |
170 | ||
171 | mov eax, 1 | |
172 | cpuid | |
f91f0fd5 | 173 | mov ebx, ecx ; save cpuid1.ecx |
224ce89b WB |
174 | test ecx, FLAG_CPUID1_ECX_SSE4_1 |
175 | jz _%1_init_done | |
176 | test ecx, FLAG_CPUID1_ECX_CLMUL | |
f91f0fd5 TL |
177 | jz _%1_init_done |
178 | lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt | |
179 | ||
180 | ;; Test for XMM_YMM support/AVX | |
181 | test ecx, FLAG_CPUID1_ECX_OSXSAVE | |
182 | je _%1_init_done | |
183 | xor ecx, ecx | |
184 | xgetbv ; xcr -> edx:eax | |
185 | mov edi, eax ; save xgetvb.eax | |
186 | ||
187 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
188 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
189 | jne _%1_init_done | |
190 | test ebx, FLAG_CPUID1_ECX_AVX | |
191 | je _%1_init_done | |
192 | lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt | |
193 | ||
194 | %if AS_FEATURE_LEVEL >= 10 | |
195 | ;; Test for AVX2 | |
196 | xor ecx, ecx | |
197 | mov eax, 7 | |
198 | cpuid | |
199 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
200 | je _%1_init_done ; No AVX2 possible | |
201 | ||
202 | ;; Test for AVX512 | |
203 | and edi, FLAG_XGETBV_EAX_ZMM_OPM | |
204 | cmp edi, FLAG_XGETBV_EAX_ZMM_OPM | |
205 | jne _%1_init_done ; No AVX512 possible | |
206 | and ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
207 | cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
208 | jne _%1_init_done | |
209 | ||
210 | and ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
211 | cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
212 | lea mbin_rbx, [%5 WRT_OPT] ; AVX512/10 opt | |
213 | cmove mbin_rsi, mbin_rbx | |
214 | %endif | |
224ce89b | 215 | _%1_init_done: |
f91f0fd5 | 216 | pop mbin_rdi |
224ce89b WB |
217 | pop mbin_rdx |
218 | pop mbin_rcx | |
219 | pop mbin_rbx | |
220 | pop mbin_rax | |
221 | mov [%1_dispatched], mbin_rsi | |
222 | pop mbin_rsi | |
223 | ret | |
224 | %endmacro | |
225 | ||
7c673cae FG |
226 | ;;;;; |
227 | ; mbin_dispatch_init5 parameters | |
228 | ; 1-> function name | |
229 | ; 2-> base function | |
224ce89b | 230 | ; 3-> SSE4_2 or 00/01 optimized function |
7c673cae FG |
231 | ; 4-> AVX/02 opt func |
232 | ; 5-> AVX2/04 opt func | |
233 | ;;;;; | |
234 | %macro mbin_dispatch_init5 5 | |
235 | section .text | |
236 | %1_dispatch_init: | |
237 | push mbin_rsi | |
238 | push mbin_rax | |
239 | push mbin_rbx | |
240 | push mbin_rcx | |
241 | push mbin_rdx | |
242 | lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function | |
243 | ||
244 | mov eax, 1 | |
245 | cpuid | |
224ce89b WB |
246 | ; Test for SSE4.2 |
247 | test ecx, FLAG_CPUID1_ECX_SSE4_2 | |
7c673cae FG |
248 | lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func |
249 | cmovne mbin_rsi, mbin_rbx | |
250 | ||
251 | and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
252 | cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | |
253 | lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func | |
254 | jne _%1_init_done ; AVX is not available so end | |
255 | mov mbin_rsi, mbin_rbx | |
256 | ||
257 | ;; Try for AVX2 | |
258 | xor ecx, ecx | |
259 | mov eax, 7 | |
260 | cpuid | |
261 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
262 | lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func | |
263 | cmovne mbin_rsi, mbin_rbx | |
264 | ||
265 | ;; Does it have xmm and ymm support | |
266 | xor ecx, ecx | |
267 | xgetbv | |
268 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
269 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
270 | je _%1_init_done | |
271 | lea mbin_rsi, [%3 WRT_OPT] | |
272 | ||
273 | _%1_init_done: | |
274 | pop mbin_rdx | |
275 | pop mbin_rcx | |
276 | pop mbin_rbx | |
277 | pop mbin_rax | |
278 | mov [%1_dispatched], mbin_rsi | |
279 | pop mbin_rsi | |
280 | ret | |
281 | %endmacro | |
282 | ||
f91f0fd5 | 283 | %if AS_FEATURE_LEVEL >= 6 |
7c673cae FG |
284 | ;;;;; |
285 | ; mbin_dispatch_init6 parameters | |
286 | ; 1-> function name | |
287 | ; 2-> base function | |
224ce89b | 288 | ; 3-> SSE4_2 or 00/01 optimized function |
7c673cae FG |
289 | ; 4-> AVX/02 opt func |
290 | ; 5-> AVX2/04 opt func | |
291 | ; 6-> AVX512/06 opt func | |
292 | ;;;;; | |
293 | %macro mbin_dispatch_init6 6 | |
294 | section .text | |
295 | %1_dispatch_init: | |
296 | push mbin_rsi | |
297 | push mbin_rax | |
298 | push mbin_rbx | |
299 | push mbin_rcx | |
300 | push mbin_rdx | |
301 | push mbin_rdi | |
302 | lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function | |
303 | ||
304 | mov eax, 1 | |
305 | cpuid | |
306 | mov ebx, ecx ; save cpuid1.ecx | |
224ce89b WB |
307 | test ecx, FLAG_CPUID1_ECX_SSE4_2 |
308 | je _%1_init_done ; Use base function if no SSE4_2 | |
7c673cae FG |
309 | lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt |
310 | ||
311 | ;; Test for XMM_YMM support/AVX | |
312 | test ecx, FLAG_CPUID1_ECX_OSXSAVE | |
313 | je _%1_init_done | |
314 | xor ecx, ecx | |
315 | xgetbv ; xcr -> edx:eax | |
316 | mov edi, eax ; save xgetvb.eax | |
317 | ||
318 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
319 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
320 | jne _%1_init_done | |
321 | test ebx, FLAG_CPUID1_ECX_AVX | |
322 | je _%1_init_done | |
323 | lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt | |
324 | ||
325 | ;; Test for AVX2 | |
326 | xor ecx, ecx | |
327 | mov eax, 7 | |
328 | cpuid | |
329 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
330 | je _%1_init_done ; No AVX2 possible | |
331 | lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func | |
332 | ||
333 | ;; Test for AVX512 | |
334 | and edi, FLAG_XGETBV_EAX_ZMM_OPM | |
335 | cmp edi, FLAG_XGETBV_EAX_ZMM_OPM | |
336 | jne _%1_init_done ; No AVX512 possible | |
f91f0fd5 TL |
337 | and ebx, FLAGS_CPUID7_EBX_AVX512_G1 |
338 | cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
7c673cae FG |
339 | lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt |
340 | cmove mbin_rsi, mbin_rbx | |
341 | ||
342 | _%1_init_done: | |
343 | pop mbin_rdi | |
344 | pop mbin_rdx | |
345 | pop mbin_rcx | |
346 | pop mbin_rbx | |
347 | pop mbin_rax | |
348 | mov [%1_dispatched], mbin_rsi | |
349 | pop mbin_rsi | |
350 | ret | |
351 | %endmacro | |
352 | ||
f91f0fd5 TL |
353 | %else |
354 | %macro mbin_dispatch_init6 6 | |
355 | mbin_dispatch_init5 %1, %2, %3, %4, %5 | |
356 | %endmacro | |
357 | %endif | |
358 | ||
359 | %if AS_FEATURE_LEVEL >= 10 | |
360 | ;;;;; | |
361 | ; mbin_dispatch_init7 parameters | |
362 | ; 1-> function name | |
363 | ; 2-> base function | |
364 | ; 3-> SSE4_2 or 00/01 optimized function | |
365 | ; 4-> AVX/02 opt func | |
366 | ; 5-> AVX2/04 opt func | |
367 | ; 6-> AVX512/06 opt func | |
368 | ; 7-> AVX512 Update/10 opt func | |
369 | ;;;;; | |
370 | %macro mbin_dispatch_init7 7 | |
371 | section .text | |
372 | %1_dispatch_init: | |
373 | push mbin_rsi | |
374 | push mbin_rax | |
375 | push mbin_rbx | |
376 | push mbin_rcx | |
377 | push mbin_rdx | |
378 | push mbin_rdi | |
379 | lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function | |
380 | ||
381 | mov eax, 1 | |
382 | cpuid | |
383 | mov ebx, ecx ; save cpuid1.ecx | |
384 | test ecx, FLAG_CPUID1_ECX_SSE4_2 | |
385 | je _%1_init_done ; Use base function if no SSE4_2 | |
386 | lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt | |
387 | ||
388 | ;; Test for XMM_YMM support/AVX | |
389 | test ecx, FLAG_CPUID1_ECX_OSXSAVE | |
390 | je _%1_init_done | |
391 | xor ecx, ecx | |
392 | xgetbv ; xcr -> edx:eax | |
393 | mov edi, eax ; save xgetvb.eax | |
394 | ||
395 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
396 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
397 | jne _%1_init_done | |
398 | test ebx, FLAG_CPUID1_ECX_AVX | |
399 | je _%1_init_done | |
400 | lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt | |
401 | ||
402 | ;; Test for AVX2 | |
403 | xor ecx, ecx | |
404 | mov eax, 7 | |
405 | cpuid | |
406 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
407 | je _%1_init_done ; No AVX2 possible | |
408 | lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func | |
409 | ||
410 | ;; Test for AVX512 | |
411 | and edi, FLAG_XGETBV_EAX_ZMM_OPM | |
412 | cmp edi, FLAG_XGETBV_EAX_ZMM_OPM | |
413 | jne _%1_init_done ; No AVX512 possible | |
414 | and ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
415 | cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
416 | lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt | |
417 | cmove mbin_rsi, mbin_rbx | |
418 | ||
419 | and ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
420 | cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
421 | lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt | |
422 | cmove mbin_rsi, mbin_rbx | |
423 | ||
424 | _%1_init_done: | |
425 | pop mbin_rdi | |
426 | pop mbin_rdx | |
427 | pop mbin_rcx | |
428 | pop mbin_rbx | |
429 | pop mbin_rax | |
430 | mov [%1_dispatched], mbin_rsi | |
431 | pop mbin_rsi | |
432 | ret | |
433 | %endmacro | |
434 | %else | |
435 | %macro mbin_dispatch_init7 7 | |
436 | mbin_dispatch_init6 %1, %2, %3, %4, %5, %6 | |
437 | %endmacro | |
438 | %endif | |
439 | ||
7c673cae | 440 | %endif ; ifndef _MULTIBINARY_ASM_ |