]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/include/multibinary.asm
8bc777c6ec0e35d750b4b6cd07ba2dab4e5e53e9
[ceph.git] / ceph / src / isa-l / include / multibinary.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %ifndef _MULTIBINARY_ASM_
31 %define _MULTIBINARY_ASM_
32
33 %ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
36 %define mbin_rdi edi
37 %define mbin_rsi esi
38 %define mbin_rax eax
39 %define mbin_rbx ebx
40 %define mbin_rcx ecx
41 %define mbin_rdx edx
42 %else
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
45 %define mbin_rdi rdi
46 %define mbin_rsi rsi
47 %define mbin_rax rax
48 %define mbin_rbx rbx
49 %define mbin_rcx rcx
50 %define mbin_rdx rdx
51 %endif
52
53 ;;;;
54 ; multibinary macro:
55 ; creates the visable entry point that uses HW optimized call pointer
56 ; creates the init of the HW optimized call pointer
57 ;;;;
58 %macro mbin_interface 1
59 ;;;;
60 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
61 ; Therefore, *_dispatch_init is only executed on first call.
62 ;;;;
63 section .data
64 %1_dispatched:
65 mbin_def_ptr %1_mbinit
66
67 section .text
68 global %1:function
69 %1_mbinit:
70 ;;; only called the first time to setup hardware match
71 call %1_dispatch_init
72 ;;; falls thru to execute the hw optimized code
73 %1:
74 jmp mbin_ptr_sz [%1_dispatched]
75 %endmacro
76
77 ;;;;;
78 ; mbin_dispatch_init parameters
79 ; Use this function when SSE/00/01 is a minimum requirement
80 ; 1-> function name
81 ; 2-> SSE/00/01 optimized function used as base
82 ; 3-> AVX or AVX/02 opt func
83 ; 4-> AVX2 or AVX/04 opt func
84 ;;;;;
85 %macro mbin_dispatch_init 4
86 section .text
87 %1_dispatch_init:
88 push mbin_rsi
89 push mbin_rax
90 push mbin_rbx
91 push mbin_rcx
92 push mbin_rdx
93 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
94
95 mov eax, 1
96 cpuid
97 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
98 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
99 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
100 jne _%1_init_done ; AVX is not available so end
101 mov mbin_rsi, mbin_rbx
102
103 ;; Try for AVX2
104 xor ecx, ecx
105 mov eax, 7
106 cpuid
107 test ebx, FLAG_CPUID7_EBX_AVX2
108 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
109 cmovne mbin_rsi, mbin_rbx
110
111 ;; Does it have xmm and ymm support
112 xor ecx, ecx
113 xgetbv
114 and eax, FLAG_XGETBV_EAX_XMM_YMM
115 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
116 je _%1_init_done
117 lea mbin_rsi, [%2 WRT_OPT]
118
119 _%1_init_done:
120 pop mbin_rdx
121 pop mbin_rcx
122 pop mbin_rbx
123 pop mbin_rax
124 mov [%1_dispatched], mbin_rsi
125 pop mbin_rsi
126 ret
127 %endmacro
128
129 ;;;;;
130 ; mbin_dispatch_init2 parameters
131 ; Cases where only base functions are available
132 ; 1-> function name
133 ; 2-> base function
134 ;;;;;
135 %macro mbin_dispatch_init2 2
136 section .text
137 %1_dispatch_init:
138 push mbin_rsi
139 lea mbin_rsi, [%2 WRT_OPT] ; Default
140 mov [%1_dispatched], mbin_rsi
141 pop mbin_rsi
142 ret
143 %endmacro
144
145 ;;;;;
146 ; mbin_dispatch_init5 parameters
147 ; 1-> function name
148 ; 2-> base function
149 ; 3-> SSE4_1 or 00/01 optimized function
150 ; 4-> AVX/02 opt func
151 ; 5-> AVX2/04 opt func
152 ;;;;;
153 %macro mbin_dispatch_init5 5
154 section .text
155 %1_dispatch_init:
156 push mbin_rsi
157 push mbin_rax
158 push mbin_rbx
159 push mbin_rcx
160 push mbin_rdx
161 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
162
163 mov eax, 1
164 cpuid
165 ; Test for SSE4.1
166 test ecx, FLAG_CPUID1_ECX_SSE4_1
167 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
168 cmovne mbin_rsi, mbin_rbx
169
170 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
171 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
172 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
173 jne _%1_init_done ; AVX is not available so end
174 mov mbin_rsi, mbin_rbx
175
176 ;; Try for AVX2
177 xor ecx, ecx
178 mov eax, 7
179 cpuid
180 test ebx, FLAG_CPUID7_EBX_AVX2
181 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
182 cmovne mbin_rsi, mbin_rbx
183
184 ;; Does it have xmm and ymm support
185 xor ecx, ecx
186 xgetbv
187 and eax, FLAG_XGETBV_EAX_XMM_YMM
188 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
189 je _%1_init_done
190 lea mbin_rsi, [%3 WRT_OPT]
191
192 _%1_init_done:
193 pop mbin_rdx
194 pop mbin_rcx
195 pop mbin_rbx
196 pop mbin_rax
197 mov [%1_dispatched], mbin_rsi
198 pop mbin_rsi
199 ret
200 %endmacro
201
202 ;;;;;
203 ; mbin_dispatch_init6 parameters
204 ; 1-> function name
205 ; 2-> base function
206 ; 3-> SSE4_1 or 00/01 optimized function
207 ; 4-> AVX/02 opt func
208 ; 5-> AVX2/04 opt func
209 ; 6-> AVX512/06 opt func
210 ;;;;;
211 %macro mbin_dispatch_init6 6
212 section .text
213 %1_dispatch_init:
214 push mbin_rsi
215 push mbin_rax
216 push mbin_rbx
217 push mbin_rcx
218 push mbin_rdx
219 push mbin_rdi
220 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
221
222 mov eax, 1
223 cpuid
224 mov ebx, ecx ; save cpuid1.ecx
225 test ecx, FLAG_CPUID1_ECX_SSE4_1
226 je _%1_init_done ; Use base function if no SSE4_1
227 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
228
229 ;; Test for XMM_YMM support/AVX
230 test ecx, FLAG_CPUID1_ECX_OSXSAVE
231 je _%1_init_done
232 xor ecx, ecx
233 xgetbv ; xcr -> edx:eax
234 mov edi, eax ; save xgetvb.eax
235
236 and eax, FLAG_XGETBV_EAX_XMM_YMM
237 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
238 jne _%1_init_done
239 test ebx, FLAG_CPUID1_ECX_AVX
240 je _%1_init_done
241 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
242
243 ;; Test for AVX2
244 xor ecx, ecx
245 mov eax, 7
246 cpuid
247 test ebx, FLAG_CPUID7_EBX_AVX2
248 je _%1_init_done ; No AVX2 possible
249 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
250
251 ;; Test for AVX512
252 and edi, FLAG_XGETBV_EAX_ZMM_OPM
253 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
254 jne _%1_init_done ; No AVX512 possible
255 and ebx, FLAGS_CPUID7_ECX_AVX512_G1
256 cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1
257 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
258 cmove mbin_rsi, mbin_rbx
259
260 _%1_init_done:
261 pop mbin_rdi
262 pop mbin_rdx
263 pop mbin_rcx
264 pop mbin_rbx
265 pop mbin_rax
266 mov [%1_dispatched], mbin_rsi
267 pop mbin_rsi
268 ret
269 %endmacro
270
271 %endif ; ifndef _MULTIBINARY_ASM_