]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/isa-l/include/multibinary.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / include / multibinary.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %ifndef _MULTIBINARY_ASM_
31 %define _MULTIBINARY_ASM_
32
33 %ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
36 %define mbin_rdi edi
37 %define mbin_rsi esi
38 %define mbin_rax eax
39 %define mbin_rbx ebx
40 %define mbin_rcx ecx
41 %define mbin_rdx edx
42 %else
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
45 %define mbin_rdi rdi
46 %define mbin_rsi rsi
47 %define mbin_rax rax
48 %define mbin_rbx rbx
49 %define mbin_rcx rcx
50 %define mbin_rdx rdx
51 %endif
52
53 %ifndef AS_FEATURE_LEVEL
54 %define AS_FEATURE_LEVEL 4
55 %endif
56
57 ;;;;
58 ; multibinary macro:
59 ; creates the visable entry point that uses HW optimized call pointer
60 ; creates the init of the HW optimized call pointer
61 ;;;;
62 %macro mbin_interface 1
63 ;;;;
64 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
65 ; Therefore, *_dispatch_init is only executed on first call.
66 ;;;;
67 section .data
68 %1_dispatched:
69 mbin_def_ptr %1_mbinit
70
71 section .text
72 global %1:ISAL_SYM_TYPE_FUNCTION
73 %1_mbinit:
74 ;;; only called the first time to setup hardware match
75 call %1_dispatch_init
76 ;;; falls thru to execute the hw optimized code
77 %1:
78 jmp mbin_ptr_sz [%1_dispatched]
79 %endmacro
80
81 ;;;;;
82 ; mbin_dispatch_init parameters
83 ; Use this function when SSE/00/01 is a minimum requirement
84 ; 1-> function name
85 ; 2-> SSE/00/01 optimized function used as base
86 ; 3-> AVX or AVX/02 opt func
87 ; 4-> AVX2 or AVX/04 opt func
88 ;;;;;
89 %macro mbin_dispatch_init 4
90 section .text
91 %1_dispatch_init:
92 push mbin_rsi
93 push mbin_rax
94 push mbin_rbx
95 push mbin_rcx
96 push mbin_rdx
97 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
98
99 mov eax, 1
100 cpuid
101 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
102 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
103 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
104 jne _%1_init_done ; AVX is not available so end
105 mov mbin_rsi, mbin_rbx
106
107 ;; Try for AVX2
108 xor ecx, ecx
109 mov eax, 7
110 cpuid
111 test ebx, FLAG_CPUID7_EBX_AVX2
112 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
113 cmovne mbin_rsi, mbin_rbx
114
115 ;; Does it have xmm and ymm support
116 xor ecx, ecx
117 xgetbv
118 and eax, FLAG_XGETBV_EAX_XMM_YMM
119 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
120 je _%1_init_done
121 lea mbin_rsi, [%2 WRT_OPT]
122
123 _%1_init_done:
124 pop mbin_rdx
125 pop mbin_rcx
126 pop mbin_rbx
127 pop mbin_rax
128 mov [%1_dispatched], mbin_rsi
129 pop mbin_rsi
130 ret
131 %endmacro
132
133 ;;;;;
134 ; mbin_dispatch_init2 parameters
135 ; Cases where only base functions are available
136 ; 1-> function name
137 ; 2-> base function
138 ;;;;;
139 %macro mbin_dispatch_init2 2
140 section .text
141 %1_dispatch_init:
142 push mbin_rsi
143 lea mbin_rsi, [%2 WRT_OPT] ; Default
144 mov [%1_dispatched], mbin_rsi
145 pop mbin_rsi
146 ret
147 %endmacro
148
149 ;;;;;
150 ; mbin_dispatch_init_clmul 3 parameters
151 ; Use this case for CRC which needs both SSE4_1 and CLMUL
152 ; 1-> function name
153 ; 2-> base function
154 ; 3-> SSE4_1 and CLMUL optimized function
155 ;;;;;
156 %macro mbin_dispatch_init_clmul 3
157 section .text
158 %1_dispatch_init:
159 push mbin_rsi
160 push mbin_rax
161 push mbin_rbx
162 push mbin_rcx
163 push mbin_rdx
164 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
165
166 mov eax, 1
167 cpuid
168 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
169
170 ; Test for SSE4.2
171 test ecx, FLAG_CPUID1_ECX_SSE4_1
172 jz _%1_init_done
173 test ecx, FLAG_CPUID1_ECX_CLMUL
174 cmovne mbin_rsi, mbin_rbx
175 _%1_init_done:
176 pop mbin_rdx
177 pop mbin_rcx
178 pop mbin_rbx
179 pop mbin_rax
180 mov [%1_dispatched], mbin_rsi
181 pop mbin_rsi
182 ret
183 %endmacro
184
185 ;;;;;
186 ; mbin_dispatch_init5 parameters
187 ; 1-> function name
188 ; 2-> base function
189 ; 3-> SSE4_2 or 00/01 optimized function
190 ; 4-> AVX/02 opt func
191 ; 5-> AVX2/04 opt func
192 ;;;;;
193 %macro mbin_dispatch_init5 5
194 section .text
195 %1_dispatch_init:
196 push mbin_rsi
197 push mbin_rax
198 push mbin_rbx
199 push mbin_rcx
200 push mbin_rdx
201 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
202
203 mov eax, 1
204 cpuid
205 ; Test for SSE4.2
206 test ecx, FLAG_CPUID1_ECX_SSE4_2
207 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
208 cmovne mbin_rsi, mbin_rbx
209
210 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
211 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
212 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
213 jne _%1_init_done ; AVX is not available so end
214 mov mbin_rsi, mbin_rbx
215
216 ;; Try for AVX2
217 xor ecx, ecx
218 mov eax, 7
219 cpuid
220 test ebx, FLAG_CPUID7_EBX_AVX2
221 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
222 cmovne mbin_rsi, mbin_rbx
223
224 ;; Does it have xmm and ymm support
225 xor ecx, ecx
226 xgetbv
227 and eax, FLAG_XGETBV_EAX_XMM_YMM
228 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
229 je _%1_init_done
230 lea mbin_rsi, [%3 WRT_OPT]
231
232 _%1_init_done:
233 pop mbin_rdx
234 pop mbin_rcx
235 pop mbin_rbx
236 pop mbin_rax
237 mov [%1_dispatched], mbin_rsi
238 pop mbin_rsi
239 ret
240 %endmacro
241
242 %if AS_FEATURE_LEVEL >= 6
243 ;;;;;
244 ; mbin_dispatch_init6 parameters
245 ; 1-> function name
246 ; 2-> base function
247 ; 3-> SSE4_2 or 00/01 optimized function
248 ; 4-> AVX/02 opt func
249 ; 5-> AVX2/04 opt func
250 ; 6-> AVX512/06 opt func
251 ;;;;;
252 %macro mbin_dispatch_init6 6
253 section .text
254 %1_dispatch_init:
255 push mbin_rsi
256 push mbin_rax
257 push mbin_rbx
258 push mbin_rcx
259 push mbin_rdx
260 push mbin_rdi
261 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
262
263 mov eax, 1
264 cpuid
265 mov ebx, ecx ; save cpuid1.ecx
266 test ecx, FLAG_CPUID1_ECX_SSE4_2
267 je _%1_init_done ; Use base function if no SSE4_2
268 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
269
270 ;; Test for XMM_YMM support/AVX
271 test ecx, FLAG_CPUID1_ECX_OSXSAVE
272 je _%1_init_done
273 xor ecx, ecx
274 xgetbv ; xcr -> edx:eax
275 mov edi, eax ; save xgetvb.eax
276
277 and eax, FLAG_XGETBV_EAX_XMM_YMM
278 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
279 jne _%1_init_done
280 test ebx, FLAG_CPUID1_ECX_AVX
281 je _%1_init_done
282 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
283
284 ;; Test for AVX2
285 xor ecx, ecx
286 mov eax, 7
287 cpuid
288 test ebx, FLAG_CPUID7_EBX_AVX2
289 je _%1_init_done ; No AVX2 possible
290 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
291
292 ;; Test for AVX512
293 and edi, FLAG_XGETBV_EAX_ZMM_OPM
294 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
295 jne _%1_init_done ; No AVX512 possible
296 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
297 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
298 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
299 cmove mbin_rsi, mbin_rbx
300
301 _%1_init_done:
302 pop mbin_rdi
303 pop mbin_rdx
304 pop mbin_rcx
305 pop mbin_rbx
306 pop mbin_rax
307 mov [%1_dispatched], mbin_rsi
308 pop mbin_rsi
309 ret
310 %endmacro
311
312 %else
313 %macro mbin_dispatch_init6 6
314 mbin_dispatch_init5 %1, %2, %3, %4, %5
315 %endmacro
316 %endif
317
318 %if AS_FEATURE_LEVEL >= 10
319 ;;;;;
320 ; mbin_dispatch_init7 parameters
321 ; 1-> function name
322 ; 2-> base function
323 ; 3-> SSE4_2 or 00/01 optimized function
324 ; 4-> AVX/02 opt func
325 ; 5-> AVX2/04 opt func
326 ; 6-> AVX512/06 opt func
327 ; 7-> AVX512 Update/10 opt func
328 ;;;;;
329 %macro mbin_dispatch_init7 7
330 section .text
331 %1_dispatch_init:
332 push mbin_rsi
333 push mbin_rax
334 push mbin_rbx
335 push mbin_rcx
336 push mbin_rdx
337 push mbin_rdi
338 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
339
340 mov eax, 1
341 cpuid
342 mov ebx, ecx ; save cpuid1.ecx
343 test ecx, FLAG_CPUID1_ECX_SSE4_2
344 je _%1_init_done ; Use base function if no SSE4_2
345 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
346
347 ;; Test for XMM_YMM support/AVX
348 test ecx, FLAG_CPUID1_ECX_OSXSAVE
349 je _%1_init_done
350 xor ecx, ecx
351 xgetbv ; xcr -> edx:eax
352 mov edi, eax ; save xgetvb.eax
353
354 and eax, FLAG_XGETBV_EAX_XMM_YMM
355 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
356 jne _%1_init_done
357 test ebx, FLAG_CPUID1_ECX_AVX
358 je _%1_init_done
359 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
360
361 ;; Test for AVX2
362 xor ecx, ecx
363 mov eax, 7
364 cpuid
365 test ebx, FLAG_CPUID7_EBX_AVX2
366 je _%1_init_done ; No AVX2 possible
367 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
368
369 ;; Test for AVX512
370 and edi, FLAG_XGETBV_EAX_ZMM_OPM
371 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
372 jne _%1_init_done ; No AVX512 possible
373 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
374 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
375 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
376 cmove mbin_rsi, mbin_rbx
377
378 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
379 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
380 lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
381 cmove mbin_rsi, mbin_rbx
382
383 _%1_init_done:
384 pop mbin_rdi
385 pop mbin_rdx
386 pop mbin_rcx
387 pop mbin_rbx
388 pop mbin_rax
389 mov [%1_dispatched], mbin_rsi
390 pop mbin_rsi
391 ret
392 %endmacro
393 %else
394 %macro mbin_dispatch_init7 7
395 mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
396 %endmacro
397 %endif
398
399 %endif ; ifndef _MULTIBINARY_ASM_