1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 %ifndef _MULTIBINARY_ASM_
31 %define _MULTIBINARY_ASM_
33 %ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
55 ; creates the visable entry point that uses HW optimized call pointer
56 ; creates the init of the HW optimized call pointer
58 %macro mbin_interface 1
60 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
61 ; Therefore, *_dispatch_init is only executed on first call.
65 mbin_def_ptr %1_mbinit
70 ;;; only called the first time to setup hardware match
72 ;;; falls thru to execute the hw optimized code
74 jmp mbin_ptr_sz [%1_dispatched]
78 ; mbin_dispatch_init parameters
79 ; Use this function when SSE/00/01 is a minimum requirement
81 ; 2-> SSE/00/01 optimized function used as base
82 ; 3-> AVX or AVX/02 opt func
83 ; 4-> AVX2 or AVX/04 opt func
85 %macro mbin_dispatch_init 4
93 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
97 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
98 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
99 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
100 jne _%1_init_done ; AVX is not available so end
101 mov mbin_rsi, mbin_rbx
107 test ebx, FLAG_CPUID7_EBX_AVX2
108 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
109 cmovne mbin_rsi, mbin_rbx
111 ;; Does it have xmm and ymm support
114 and eax, FLAG_XGETBV_EAX_XMM_YMM
115 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
117 lea mbin_rsi, [%2 WRT_OPT]
124 mov [%1_dispatched], mbin_rsi
130 ; mbin_dispatch_init2 parameters
131 ; Cases where only base functions are available
135 %macro mbin_dispatch_init2 2
139 lea mbin_rsi, [%2 WRT_OPT] ; Default
140 mov [%1_dispatched], mbin_rsi
146 ; mbin_dispatch_init_clmul 3 parameters
147 ; Use this case for CRC which needs both SSE4_1 and CLMUL
150 ; 3-> SSE4_1 and CLMUL optimized function
152 %macro mbin_dispatch_init_clmul 3
160 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
164 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
167 test ecx, FLAG_CPUID1_ECX_SSE4_1
169 test ecx, FLAG_CPUID1_ECX_CLMUL
170 cmovne mbin_rsi, mbin_rbx
176 mov [%1_dispatched], mbin_rsi
182 ; mbin_dispatch_init5 parameters
185 ; 3-> SSE4_2 or 00/01 optimized function
186 ; 4-> AVX/02 opt func
187 ; 5-> AVX2/04 opt func
189 %macro mbin_dispatch_init5 5
197 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
202 test ecx, FLAG_CPUID1_ECX_SSE4_2
203 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
204 cmovne mbin_rsi, mbin_rbx
206 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
207 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
208 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
209 jne _%1_init_done ; AVX is not available so end
210 mov mbin_rsi, mbin_rbx
216 test ebx, FLAG_CPUID7_EBX_AVX2
217 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
218 cmovne mbin_rsi, mbin_rbx
220 ;; Does it have xmm and ymm support
223 and eax, FLAG_XGETBV_EAX_XMM_YMM
224 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
226 lea mbin_rsi, [%3 WRT_OPT]
233 mov [%1_dispatched], mbin_rsi
239 ; mbin_dispatch_init6 parameters
242 ; 3-> SSE4_2 or 00/01 optimized function
243 ; 4-> AVX/02 opt func
244 ; 5-> AVX2/04 opt func
245 ; 6-> AVX512/06 opt func
247 %macro mbin_dispatch_init6 6
256 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
260 mov ebx, ecx ; save cpuid1.ecx
261 test ecx, FLAG_CPUID1_ECX_SSE4_2
262 je _%1_init_done ; Use base function if no SSE4_2
263 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
265 ;; Test for XMM_YMM support/AVX
266 test ecx, FLAG_CPUID1_ECX_OSXSAVE
269 xgetbv ; xcr -> edx:eax
270 mov edi, eax ; save xgetvb.eax
272 and eax, FLAG_XGETBV_EAX_XMM_YMM
273 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
275 test ebx, FLAG_CPUID1_ECX_AVX
277 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
283 test ebx, FLAG_CPUID7_EBX_AVX2
284 je _%1_init_done ; No AVX2 possible
285 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
288 and edi, FLAG_XGETBV_EAX_ZMM_OPM
289 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
290 jne _%1_init_done ; No AVX512 possible
291 and ebx, FLAGS_CPUID7_ECX_AVX512_G1
292 cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1
293 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
294 cmove mbin_rsi, mbin_rbx
302 mov [%1_dispatched], mbin_rsi
307 %endif ; ifndef _MULTIBINARY_ASM_