]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/include/multibinary.asm
import quincy beta 17.1.0
[ceph.git] / ceph / src / isa-l / include / multibinary.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
f91f0fd5 5; modification, are permitted provided that the following conditions
7c673cae
FG
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%ifndef _MULTIBINARY_ASM_
31%define _MULTIBINARY_ASM_
32
33%ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
36 %define mbin_rdi edi
37 %define mbin_rsi esi
38 %define mbin_rax eax
39 %define mbin_rbx ebx
40 %define mbin_rcx ecx
41 %define mbin_rdx edx
42%else
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
45 %define mbin_rdi rdi
46 %define mbin_rsi rsi
47 %define mbin_rax rax
48 %define mbin_rbx rbx
49 %define mbin_rcx rcx
50 %define mbin_rdx rdx
51%endif
52
f91f0fd5
TL
53%ifndef AS_FEATURE_LEVEL
54%define AS_FEATURE_LEVEL 4
55%endif
56
7c673cae
FG
57;;;;
58; multibinary macro:
59; creates the visable entry point that uses HW optimized call pointer
60; creates the init of the HW optimized call pointer
61;;;;
62%macro mbin_interface 1
63 ;;;;
64 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
65 ; Therefore, *_dispatch_init is only executed on first call.
66 ;;;;
67 section .data
68 %1_dispatched:
69 mbin_def_ptr %1_mbinit
70
71 section .text
20effc67 72 mk_global %1, function
7c673cae 73 %1_mbinit:
20effc67 74 endbranch
7c673cae
FG
75 ;;; only called the first time to setup hardware match
76 call %1_dispatch_init
77 ;;; falls thru to execute the hw optimized code
78 %1:
20effc67 79 endbranch
7c673cae
FG
80 jmp mbin_ptr_sz [%1_dispatched]
81%endmacro
82
83;;;;;
84; mbin_dispatch_init parameters
85; Use this function when SSE/00/01 is a minimum requirement
86; 1-> function name
87; 2-> SSE/00/01 optimized function used as base
88; 3-> AVX or AVX/02 opt func
89; 4-> AVX2 or AVX/04 opt func
90;;;;;
91%macro mbin_dispatch_init 4
92 section .text
93 %1_dispatch_init:
94 push mbin_rsi
95 push mbin_rax
96 push mbin_rbx
97 push mbin_rcx
98 push mbin_rdx
99 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
100
101 mov eax, 1
102 cpuid
103 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
104 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
105 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
106 jne _%1_init_done ; AVX is not available so end
107 mov mbin_rsi, mbin_rbx
108
109 ;; Try for AVX2
110 xor ecx, ecx
111 mov eax, 7
112 cpuid
113 test ebx, FLAG_CPUID7_EBX_AVX2
114 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
115 cmovne mbin_rsi, mbin_rbx
116
117 ;; Does it have xmm and ymm support
118 xor ecx, ecx
119 xgetbv
120 and eax, FLAG_XGETBV_EAX_XMM_YMM
121 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
122 je _%1_init_done
123 lea mbin_rsi, [%2 WRT_OPT]
124
125 _%1_init_done:
126 pop mbin_rdx
127 pop mbin_rcx
128 pop mbin_rbx
129 pop mbin_rax
130 mov [%1_dispatched], mbin_rsi
131 pop mbin_rsi
132 ret
133%endmacro
134
135;;;;;
136; mbin_dispatch_init2 parameters
137; Cases where only base functions are available
138; 1-> function name
139; 2-> base function
140;;;;;
141%macro mbin_dispatch_init2 2
142 section .text
143 %1_dispatch_init:
144 push mbin_rsi
145 lea mbin_rsi, [%2 WRT_OPT] ; Default
146 mov [%1_dispatched], mbin_rsi
147 pop mbin_rsi
148 ret
149%endmacro
150
224ce89b
WB
151;;;;;
152; mbin_dispatch_init_clmul 3 parameters
153; Use this case for CRC which needs both SSE4_1 and CLMUL
154; 1-> function name
155; 2-> base function
156; 3-> SSE4_1 and CLMUL optimized function
f91f0fd5
TL
157; 4-> AVX/02 opt func
158; 5-> AVX512/10 opt func
224ce89b 159;;;;;
f91f0fd5 160%macro mbin_dispatch_init_clmul 5
224ce89b
WB
161 section .text
162 %1_dispatch_init:
163 push mbin_rsi
164 push mbin_rax
165 push mbin_rbx
166 push mbin_rcx
167 push mbin_rdx
f91f0fd5 168 push mbin_rdi
224ce89b
WB
169 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
170
171 mov eax, 1
172 cpuid
f91f0fd5 173 mov ebx, ecx ; save cpuid1.ecx
224ce89b
WB
174 test ecx, FLAG_CPUID1_ECX_SSE4_1
175 jz _%1_init_done
176 test ecx, FLAG_CPUID1_ECX_CLMUL
f91f0fd5
TL
177 jz _%1_init_done
178 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
179
180 ;; Test for XMM_YMM support/AVX
181 test ecx, FLAG_CPUID1_ECX_OSXSAVE
182 je _%1_init_done
183 xor ecx, ecx
184 xgetbv ; xcr -> edx:eax
185 mov edi, eax ; save xgetvb.eax
186
187 and eax, FLAG_XGETBV_EAX_XMM_YMM
188 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
189 jne _%1_init_done
190 test ebx, FLAG_CPUID1_ECX_AVX
191 je _%1_init_done
192 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
193
194%if AS_FEATURE_LEVEL >= 10
195 ;; Test for AVX2
196 xor ecx, ecx
197 mov eax, 7
198 cpuid
199 test ebx, FLAG_CPUID7_EBX_AVX2
200 je _%1_init_done ; No AVX2 possible
201
202 ;; Test for AVX512
203 and edi, FLAG_XGETBV_EAX_ZMM_OPM
204 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
205 jne _%1_init_done ; No AVX512 possible
206 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
207 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
208 jne _%1_init_done
209
210 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
211 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
212 lea mbin_rbx, [%5 WRT_OPT] ; AVX512/10 opt
213 cmove mbin_rsi, mbin_rbx
214%endif
224ce89b 215 _%1_init_done:
f91f0fd5 216 pop mbin_rdi
224ce89b
WB
217 pop mbin_rdx
218 pop mbin_rcx
219 pop mbin_rbx
220 pop mbin_rax
221 mov [%1_dispatched], mbin_rsi
222 pop mbin_rsi
223 ret
224%endmacro
225
7c673cae
FG
226;;;;;
227; mbin_dispatch_init5 parameters
228; 1-> function name
229; 2-> base function
224ce89b 230; 3-> SSE4_2 or 00/01 optimized function
7c673cae
FG
231; 4-> AVX/02 opt func
232; 5-> AVX2/04 opt func
233;;;;;
234%macro mbin_dispatch_init5 5
235 section .text
236 %1_dispatch_init:
237 push mbin_rsi
238 push mbin_rax
239 push mbin_rbx
240 push mbin_rcx
241 push mbin_rdx
242 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
243
244 mov eax, 1
245 cpuid
224ce89b
WB
246 ; Test for SSE4.2
247 test ecx, FLAG_CPUID1_ECX_SSE4_2
7c673cae
FG
248 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
249 cmovne mbin_rsi, mbin_rbx
250
251 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
252 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
253 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
254 jne _%1_init_done ; AVX is not available so end
255 mov mbin_rsi, mbin_rbx
256
257 ;; Try for AVX2
258 xor ecx, ecx
259 mov eax, 7
260 cpuid
261 test ebx, FLAG_CPUID7_EBX_AVX2
262 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
263 cmovne mbin_rsi, mbin_rbx
264
265 ;; Does it have xmm and ymm support
266 xor ecx, ecx
267 xgetbv
268 and eax, FLAG_XGETBV_EAX_XMM_YMM
269 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
270 je _%1_init_done
271 lea mbin_rsi, [%3 WRT_OPT]
272
273 _%1_init_done:
274 pop mbin_rdx
275 pop mbin_rcx
276 pop mbin_rbx
277 pop mbin_rax
278 mov [%1_dispatched], mbin_rsi
279 pop mbin_rsi
280 ret
281%endmacro
282
f91f0fd5 283%if AS_FEATURE_LEVEL >= 6
7c673cae
FG
284;;;;;
285; mbin_dispatch_init6 parameters
286; 1-> function name
287; 2-> base function
224ce89b 288; 3-> SSE4_2 or 00/01 optimized function
7c673cae
FG
289; 4-> AVX/02 opt func
290; 5-> AVX2/04 opt func
291; 6-> AVX512/06 opt func
292;;;;;
293%macro mbin_dispatch_init6 6
294 section .text
295 %1_dispatch_init:
296 push mbin_rsi
297 push mbin_rax
298 push mbin_rbx
299 push mbin_rcx
300 push mbin_rdx
301 push mbin_rdi
302 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
303
304 mov eax, 1
305 cpuid
306 mov ebx, ecx ; save cpuid1.ecx
224ce89b
WB
307 test ecx, FLAG_CPUID1_ECX_SSE4_2
308 je _%1_init_done ; Use base function if no SSE4_2
7c673cae
FG
309 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
310
311 ;; Test for XMM_YMM support/AVX
312 test ecx, FLAG_CPUID1_ECX_OSXSAVE
313 je _%1_init_done
314 xor ecx, ecx
315 xgetbv ; xcr -> edx:eax
316 mov edi, eax ; save xgetvb.eax
317
318 and eax, FLAG_XGETBV_EAX_XMM_YMM
319 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
320 jne _%1_init_done
321 test ebx, FLAG_CPUID1_ECX_AVX
322 je _%1_init_done
323 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
324
325 ;; Test for AVX2
326 xor ecx, ecx
327 mov eax, 7
328 cpuid
329 test ebx, FLAG_CPUID7_EBX_AVX2
330 je _%1_init_done ; No AVX2 possible
331 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
332
333 ;; Test for AVX512
334 and edi, FLAG_XGETBV_EAX_ZMM_OPM
335 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
336 jne _%1_init_done ; No AVX512 possible
f91f0fd5
TL
337 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
338 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
7c673cae
FG
339 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
340 cmove mbin_rsi, mbin_rbx
341
342 _%1_init_done:
343 pop mbin_rdi
344 pop mbin_rdx
345 pop mbin_rcx
346 pop mbin_rbx
347 pop mbin_rax
348 mov [%1_dispatched], mbin_rsi
349 pop mbin_rsi
350 ret
351%endmacro
352
f91f0fd5
TL
353%else
354%macro mbin_dispatch_init6 6
355 mbin_dispatch_init5 %1, %2, %3, %4, %5
356%endmacro
357%endif
358
359%if AS_FEATURE_LEVEL >= 10
360;;;;;
361; mbin_dispatch_init7 parameters
362; 1-> function name
363; 2-> base function
364; 3-> SSE4_2 or 00/01 optimized function
365; 4-> AVX/02 opt func
366; 5-> AVX2/04 opt func
367; 6-> AVX512/06 opt func
368; 7-> AVX512 Update/10 opt func
369;;;;;
370%macro mbin_dispatch_init7 7
371 section .text
372 %1_dispatch_init:
373 push mbin_rsi
374 push mbin_rax
375 push mbin_rbx
376 push mbin_rcx
377 push mbin_rdx
378 push mbin_rdi
379 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
380
381 mov eax, 1
382 cpuid
383 mov ebx, ecx ; save cpuid1.ecx
384 test ecx, FLAG_CPUID1_ECX_SSE4_2
385 je _%1_init_done ; Use base function if no SSE4_2
386 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
387
388 ;; Test for XMM_YMM support/AVX
389 test ecx, FLAG_CPUID1_ECX_OSXSAVE
390 je _%1_init_done
391 xor ecx, ecx
392 xgetbv ; xcr -> edx:eax
393 mov edi, eax ; save xgetvb.eax
394
395 and eax, FLAG_XGETBV_EAX_XMM_YMM
396 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
397 jne _%1_init_done
398 test ebx, FLAG_CPUID1_ECX_AVX
399 je _%1_init_done
400 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
401
402 ;; Test for AVX2
403 xor ecx, ecx
404 mov eax, 7
405 cpuid
406 test ebx, FLAG_CPUID7_EBX_AVX2
407 je _%1_init_done ; No AVX2 possible
408 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
409
410 ;; Test for AVX512
411 and edi, FLAG_XGETBV_EAX_ZMM_OPM
412 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
413 jne _%1_init_done ; No AVX512 possible
414 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
415 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
416 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
417 cmove mbin_rsi, mbin_rbx
418
419 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
420 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
421 lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
422 cmove mbin_rsi, mbin_rbx
423
424 _%1_init_done:
425 pop mbin_rdi
426 pop mbin_rdx
427 pop mbin_rcx
428 pop mbin_rbx
429 pop mbin_rax
430 mov [%1_dispatched], mbin_rsi
431 pop mbin_rsi
432 ret
433%endmacro
434%else
435%macro mbin_dispatch_init7 7
436 mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
437%endmacro
438%endif
439
7c673cae 440%endif ; ifndef _MULTIBINARY_ASM_