1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 %ifndef _MULTIBINARY_ASM_
31 %define _MULTIBINARY_ASM_
33 %ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
53 %ifndef AS_FEATURE_LEVEL
54 %define AS_FEATURE_LEVEL 4
59 ; creates the visable entry point that uses HW optimized call pointer
60 ; creates the init of the HW optimized call pointer
62 %macro mbin_interface 1
64 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
65 ; Therefore, *_dispatch_init is only executed on first call.
69 mbin_def_ptr %1_mbinit
72 global %1:ISAL_SYM_TYPE_FUNCTION
74 ;;; only called the first time to setup hardware match
76 ;;; falls thru to execute the hw optimized code
78 jmp mbin_ptr_sz [%1_dispatched]
82 ; mbin_dispatch_init parameters
83 ; Use this function when SSE/00/01 is a minimum requirement
85 ; 2-> SSE/00/01 optimized function used as base
86 ; 3-> AVX or AVX/02 opt func
87 ; 4-> AVX2 or AVX/04 opt func
89 %macro mbin_dispatch_init 4
97 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
101 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
102 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
103 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
104 jne _%1_init_done ; AVX is not available so end
105 mov mbin_rsi, mbin_rbx
111 test ebx, FLAG_CPUID7_EBX_AVX2
112 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
113 cmovne mbin_rsi, mbin_rbx
115 ;; Does it have xmm and ymm support
118 and eax, FLAG_XGETBV_EAX_XMM_YMM
119 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
121 lea mbin_rsi, [%2 WRT_OPT]
128 mov [%1_dispatched], mbin_rsi
134 ; mbin_dispatch_init2 parameters
135 ; Cases where only base functions are available
139 %macro mbin_dispatch_init2 2
143 lea mbin_rsi, [%2 WRT_OPT] ; Default
144 mov [%1_dispatched], mbin_rsi
150 ; mbin_dispatch_init_clmul 3 parameters
151 ; Use this case for CRC which needs both SSE4_1 and CLMUL
154 ; 3-> SSE4_1 and CLMUL optimized function
156 %macro mbin_dispatch_init_clmul 3
164 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
168 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
171 test ecx, FLAG_CPUID1_ECX_SSE4_1
173 test ecx, FLAG_CPUID1_ECX_CLMUL
174 cmovne mbin_rsi, mbin_rbx
180 mov [%1_dispatched], mbin_rsi
186 ; mbin_dispatch_init5 parameters
189 ; 3-> SSE4_2 or 00/01 optimized function
190 ; 4-> AVX/02 opt func
191 ; 5-> AVX2/04 opt func
193 %macro mbin_dispatch_init5 5
201 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
206 test ecx, FLAG_CPUID1_ECX_SSE4_2
207 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
208 cmovne mbin_rsi, mbin_rbx
210 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
211 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
212 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
213 jne _%1_init_done ; AVX is not available so end
214 mov mbin_rsi, mbin_rbx
220 test ebx, FLAG_CPUID7_EBX_AVX2
221 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
222 cmovne mbin_rsi, mbin_rbx
224 ;; Does it have xmm and ymm support
227 and eax, FLAG_XGETBV_EAX_XMM_YMM
228 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
230 lea mbin_rsi, [%3 WRT_OPT]
237 mov [%1_dispatched], mbin_rsi
242 %if AS_FEATURE_LEVEL >= 6
244 ; mbin_dispatch_init6 parameters
247 ; 3-> SSE4_2 or 00/01 optimized function
248 ; 4-> AVX/02 opt func
249 ; 5-> AVX2/04 opt func
250 ; 6-> AVX512/06 opt func
252 %macro mbin_dispatch_init6 6
261 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
265 mov ebx, ecx ; save cpuid1.ecx
266 test ecx, FLAG_CPUID1_ECX_SSE4_2
267 je _%1_init_done ; Use base function if no SSE4_2
268 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
270 ;; Test for XMM_YMM support/AVX
271 test ecx, FLAG_CPUID1_ECX_OSXSAVE
274 xgetbv ; xcr -> edx:eax
275 mov edi, eax ; save xgetvb.eax
277 and eax, FLAG_XGETBV_EAX_XMM_YMM
278 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
280 test ebx, FLAG_CPUID1_ECX_AVX
282 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
288 test ebx, FLAG_CPUID7_EBX_AVX2
289 je _%1_init_done ; No AVX2 possible
290 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
293 and edi, FLAG_XGETBV_EAX_ZMM_OPM
294 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
295 jne _%1_init_done ; No AVX512 possible
296 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
297 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
298 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
299 cmove mbin_rsi, mbin_rbx
307 mov [%1_dispatched], mbin_rsi
313 %macro mbin_dispatch_init6 6
314 mbin_dispatch_init5 %1, %2, %3, %4, %5
318 %if AS_FEATURE_LEVEL >= 10
320 ; mbin_dispatch_init7 parameters
323 ; 3-> SSE4_2 or 00/01 optimized function
324 ; 4-> AVX/02 opt func
325 ; 5-> AVX2/04 opt func
326 ; 6-> AVX512/06 opt func
327 ; 7-> AVX512 Update/10 opt func
329 %macro mbin_dispatch_init7 7
338 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
342 mov ebx, ecx ; save cpuid1.ecx
343 test ecx, FLAG_CPUID1_ECX_SSE4_2
344 je _%1_init_done ; Use base function if no SSE4_2
345 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
347 ;; Test for XMM_YMM support/AVX
348 test ecx, FLAG_CPUID1_ECX_OSXSAVE
351 xgetbv ; xcr -> edx:eax
352 mov edi, eax ; save xgetvb.eax
354 and eax, FLAG_XGETBV_EAX_XMM_YMM
355 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
357 test ebx, FLAG_CPUID1_ECX_AVX
359 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
365 test ebx, FLAG_CPUID7_EBX_AVX2
366 je _%1_init_done ; No AVX2 possible
367 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
370 and edi, FLAG_XGETBV_EAX_ZMM_OPM
371 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
372 jne _%1_init_done ; No AVX512 possible
373 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
374 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
375 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
376 cmove mbin_rsi, mbin_rbx
378 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
379 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
380 lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
381 cmove mbin_rsi, mbin_rbx
389 mov [%1_dispatched], mbin_rsi
394 %macro mbin_dispatch_init7 7
395 mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
399 %endif ; ifndef _MULTIBINARY_ASM_