]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/include/multibinary.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / include / multibinary.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%ifndef _MULTIBINARY_ASM_
31%define _MULTIBINARY_ASM_
32
33%ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
36 %define mbin_rdi edi
37 %define mbin_rsi esi
38 %define mbin_rax eax
39 %define mbin_rbx ebx
40 %define mbin_rcx ecx
41 %define mbin_rdx edx
42%else
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
45 %define mbin_rdi rdi
46 %define mbin_rsi rsi
47 %define mbin_rax rax
48 %define mbin_rbx rbx
49 %define mbin_rcx rcx
50 %define mbin_rdx rdx
51%endif
52
53;;;;
54; multibinary macro:
55; creates the visable entry point that uses HW optimized call pointer
56; creates the init of the HW optimized call pointer
57;;;;
58%macro mbin_interface 1
59 ;;;;
60 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
61 ; Therefore, *_dispatch_init is only executed on first call.
62 ;;;;
63 section .data
64 %1_dispatched:
65 mbin_def_ptr %1_mbinit
66
67 section .text
68 global %1:function
69 %1_mbinit:
70 ;;; only called the first time to setup hardware match
71 call %1_dispatch_init
72 ;;; falls thru to execute the hw optimized code
73 %1:
74 jmp mbin_ptr_sz [%1_dispatched]
75%endmacro
76
77;;;;;
78; mbin_dispatch_init parameters
79; Use this function when SSE/00/01 is a minimum requirement
80; 1-> function name
81; 2-> SSE/00/01 optimized function used as base
82; 3-> AVX or AVX/02 opt func
83; 4-> AVX2 or AVX/04 opt func
84;;;;;
85%macro mbin_dispatch_init 4
86 section .text
87 %1_dispatch_init:
88 push mbin_rsi
89 push mbin_rax
90 push mbin_rbx
91 push mbin_rcx
92 push mbin_rdx
93 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
94
95 mov eax, 1
96 cpuid
97 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
98 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
99 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
100 jne _%1_init_done ; AVX is not available so end
101 mov mbin_rsi, mbin_rbx
102
103 ;; Try for AVX2
104 xor ecx, ecx
105 mov eax, 7
106 cpuid
107 test ebx, FLAG_CPUID7_EBX_AVX2
108 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
109 cmovne mbin_rsi, mbin_rbx
110
111 ;; Does it have xmm and ymm support
112 xor ecx, ecx
113 xgetbv
114 and eax, FLAG_XGETBV_EAX_XMM_YMM
115 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
116 je _%1_init_done
117 lea mbin_rsi, [%2 WRT_OPT]
118
119 _%1_init_done:
120 pop mbin_rdx
121 pop mbin_rcx
122 pop mbin_rbx
123 pop mbin_rax
124 mov [%1_dispatched], mbin_rsi
125 pop mbin_rsi
126 ret
127%endmacro
128
129;;;;;
130; mbin_dispatch_init2 parameters
131; Cases where only base functions are available
132; 1-> function name
133; 2-> base function
134;;;;;
135%macro mbin_dispatch_init2 2
136 section .text
137 %1_dispatch_init:
138 push mbin_rsi
139 lea mbin_rsi, [%2 WRT_OPT] ; Default
140 mov [%1_dispatched], mbin_rsi
141 pop mbin_rsi
142 ret
143%endmacro
144
224ce89b
WB
145;;;;;
146; mbin_dispatch_init_clmul 3 parameters
147; Use this case for CRC which needs both SSE4_1 and CLMUL
148; 1-> function name
149; 2-> base function
150; 3-> SSE4_1 and CLMUL optimized function
151;;;;;
152%macro mbin_dispatch_init_clmul 3
153 section .text
154 %1_dispatch_init:
155 push mbin_rsi
156 push mbin_rax
157 push mbin_rbx
158 push mbin_rcx
159 push mbin_rdx
160 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
161
162 mov eax, 1
163 cpuid
164 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
165
166 ; Test for SSE4.2
167 test ecx, FLAG_CPUID1_ECX_SSE4_1
168 jz _%1_init_done
169 test ecx, FLAG_CPUID1_ECX_CLMUL
170 cmovne mbin_rsi, mbin_rbx
171 _%1_init_done:
172 pop mbin_rdx
173 pop mbin_rcx
174 pop mbin_rbx
175 pop mbin_rax
176 mov [%1_dispatched], mbin_rsi
177 pop mbin_rsi
178 ret
179%endmacro
180
7c673cae
FG
181;;;;;
182; mbin_dispatch_init5 parameters
183; 1-> function name
184; 2-> base function
224ce89b 185; 3-> SSE4_2 or 00/01 optimized function
7c673cae
FG
186; 4-> AVX/02 opt func
187; 5-> AVX2/04 opt func
188;;;;;
189%macro mbin_dispatch_init5 5
190 section .text
191 %1_dispatch_init:
192 push mbin_rsi
193 push mbin_rax
194 push mbin_rbx
195 push mbin_rcx
196 push mbin_rdx
197 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
198
199 mov eax, 1
200 cpuid
224ce89b
WB
201 ; Test for SSE4.2
202 test ecx, FLAG_CPUID1_ECX_SSE4_2
7c673cae
FG
203 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
204 cmovne mbin_rsi, mbin_rbx
205
206 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
207 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
208 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
209 jne _%1_init_done ; AVX is not available so end
210 mov mbin_rsi, mbin_rbx
211
212 ;; Try for AVX2
213 xor ecx, ecx
214 mov eax, 7
215 cpuid
216 test ebx, FLAG_CPUID7_EBX_AVX2
217 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
218 cmovne mbin_rsi, mbin_rbx
219
220 ;; Does it have xmm and ymm support
221 xor ecx, ecx
222 xgetbv
223 and eax, FLAG_XGETBV_EAX_XMM_YMM
224 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
225 je _%1_init_done
226 lea mbin_rsi, [%3 WRT_OPT]
227
228 _%1_init_done:
229 pop mbin_rdx
230 pop mbin_rcx
231 pop mbin_rbx
232 pop mbin_rax
233 mov [%1_dispatched], mbin_rsi
234 pop mbin_rsi
235 ret
236%endmacro
237
238;;;;;
239; mbin_dispatch_init6 parameters
240; 1-> function name
241; 2-> base function
224ce89b 242; 3-> SSE4_2 or 00/01 optimized function
7c673cae
FG
243; 4-> AVX/02 opt func
244; 5-> AVX2/04 opt func
245; 6-> AVX512/06 opt func
246;;;;;
247%macro mbin_dispatch_init6 6
248 section .text
249 %1_dispatch_init:
250 push mbin_rsi
251 push mbin_rax
252 push mbin_rbx
253 push mbin_rcx
254 push mbin_rdx
255 push mbin_rdi
256 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
257
258 mov eax, 1
259 cpuid
260 mov ebx, ecx ; save cpuid1.ecx
224ce89b
WB
261 test ecx, FLAG_CPUID1_ECX_SSE4_2
262 je _%1_init_done ; Use base function if no SSE4_2
7c673cae
FG
263 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
264
265 ;; Test for XMM_YMM support/AVX
266 test ecx, FLAG_CPUID1_ECX_OSXSAVE
267 je _%1_init_done
268 xor ecx, ecx
269 xgetbv ; xcr -> edx:eax
270 mov edi, eax ; save xgetvb.eax
271
272 and eax, FLAG_XGETBV_EAX_XMM_YMM
273 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
274 jne _%1_init_done
275 test ebx, FLAG_CPUID1_ECX_AVX
276 je _%1_init_done
277 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
278
279 ;; Test for AVX2
280 xor ecx, ecx
281 mov eax, 7
282 cpuid
283 test ebx, FLAG_CPUID7_EBX_AVX2
284 je _%1_init_done ; No AVX2 possible
285 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
286
287 ;; Test for AVX512
288 and edi, FLAG_XGETBV_EAX_ZMM_OPM
289 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
290 jne _%1_init_done ; No AVX512 possible
291 and ebx, FLAGS_CPUID7_ECX_AVX512_G1
292 cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1
293 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
294 cmove mbin_rsi, mbin_rbx
295
296 _%1_init_done:
297 pop mbin_rdi
298 pop mbin_rdx
299 pop mbin_rcx
300 pop mbin_rbx
301 pop mbin_rax
302 mov [%1_dispatched], mbin_rsi
303 pop mbin_rsi
304 ret
305%endmacro
306
307%endif ; ifndef _MULTIBINARY_ASM_