; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
+; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
%define mbin_rdx rdx
%endif
+%ifndef AS_FEATURE_LEVEL
+%define AS_FEATURE_LEVEL 4
+%endif
+
;;;;
; multibinary macro:
; creates the visable entry point that uses HW optimized call pointer
mbin_def_ptr %1_mbinit
section .text
- global %1:function
+ global %1:ISAL_SYM_TYPE_FUNCTION
%1_mbinit:
;;; only called the first time to setup hardware match
call %1_dispatch_init
; 1-> function name
; 2-> base function
; 3-> SSE4_1 and CLMUL optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX512/10 opt func
;;;;;
-%macro mbin_dispatch_init_clmul 3
+%macro mbin_dispatch_init_clmul 5
section .text
%1_dispatch_init:
push mbin_rsi
push mbin_rbx
push mbin_rcx
push mbin_rdx
+ push mbin_rdi
lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
mov eax, 1
cpuid
- lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
-
- ; Test for SSE4.2
+ mov ebx, ecx ; save cpuid1.ecx
test ecx, FLAG_CPUID1_ECX_SSE4_1
jz _%1_init_done
test ecx, FLAG_CPUID1_ECX_CLMUL
- cmovne mbin_rsi, mbin_rbx
+ jz _%1_init_done
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+%if AS_FEATURE_LEVEL >= 10
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ jne _%1_init_done
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea mbin_rbx, [%5 WRT_OPT] ; AVX512/10 opt
+ cmove mbin_rsi, mbin_rbx
+%endif
_%1_init_done:
+ pop mbin_rdi
pop mbin_rdx
pop mbin_rcx
pop mbin_rbx
ret
%endmacro
+%if AS_FEATURE_LEVEL >= 6
;;;;;
; mbin_dispatch_init6 parameters
; 1-> function name
and edi, FLAG_XGETBV_EAX_ZMM_OPM
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
jne _%1_init_done ; No AVX512 possible
- and ebx, FLAGS_CPUID7_ECX_AVX512_G1
- cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
cmove mbin_rsi, mbin_rbx
ret
%endmacro
+%else
+%macro mbin_dispatch_init6 6
+ mbin_dispatch_init5 %1, %2, %3, %4, %5
+%endmacro
+%endif
+
+%if AS_FEATURE_LEVEL >= 10
+;;;;;
+; mbin_dispatch_init7 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_2 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+; 6-> AVX512/06 opt func
+; 7-> AVX512 Update/10 opt func
+;;;;;
+%macro mbin_dispatch_init7 7
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ push mbin_rdi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_2
+ je _%1_init_done ; Use base function if no SSE4_2
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+ lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ _%1_init_done:
+ pop mbin_rdi
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+%else
+%macro mbin_dispatch_init7 7
+ mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
+%endmacro
+%endif
+
%endif ; ifndef _MULTIBINARY_ASM_