]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / include / multibinary.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %ifndef _MULTIBINARY_ASM_
31 %define _MULTIBINARY_ASM_
32
33 %ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr dd
35 %define mbin_ptr_sz dword
36 %define mbin_rdi edi
37 %define mbin_rsi esi
38 %define mbin_rax eax
39 %define mbin_rbx ebx
40 %define mbin_rcx ecx
41 %define mbin_rdx edx
42 %else
43 %define mbin_def_ptr dq
44 %define mbin_ptr_sz qword
45 %define mbin_rdi rdi
46 %define mbin_rsi rsi
47 %define mbin_rax rax
48 %define mbin_rbx rbx
49 %define mbin_rcx rcx
50 %define mbin_rdx rdx
51 %endif
52
53 %ifndef AS_FEATURE_LEVEL
54 %define AS_FEATURE_LEVEL 4
55 %endif
56
57 ;;;;
58 ; multibinary macro:
59 ; creates the visable entry point that uses HW optimized call pointer
60 ; creates the init of the HW optimized call pointer
61 ;;;;
62 %macro mbin_interface 1
63 ;;;;
64 ; *_dispatched is defaulted to *_mbinit and replaced on first call.
65 ; Therefore, *_dispatch_init is only executed on first call.
66 ;;;;
67 section .data
68 %1_dispatched:
69 mbin_def_ptr %1_mbinit
70
71 section .text
72 mk_global %1, function
73 %1_mbinit:
74 ;;; only called the first time to setup hardware match
75 call %1_dispatch_init
76 ;;; falls thru to execute the hw optimized code
77 %1:
78 jmp mbin_ptr_sz [%1_dispatched]
79 %endmacro
80
81 ;;;;;
82 ; mbin_dispatch_init parameters
83 ; Use this function when SSE/00/01 is a minimum requirement
84 ; 1-> function name
85 ; 2-> SSE/00/01 optimized function used as base
86 ; 3-> AVX or AVX/02 opt func
87 ; 4-> AVX2 or AVX/04 opt func
88 ;;;;;
89 %macro mbin_dispatch_init 4
90 section .text
91 %1_dispatch_init:
92 push mbin_rsi
93 push mbin_rax
94 push mbin_rbx
95 push mbin_rcx
96 push mbin_rdx
97 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
98
99 mov eax, 1
100 cpuid
101 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
102 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
103 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
104 jne _%1_init_done ; AVX is not available so end
105 mov mbin_rsi, mbin_rbx
106
107 ;; Try for AVX2
108 xor ecx, ecx
109 mov eax, 7
110 cpuid
111 test ebx, FLAG_CPUID7_EBX_AVX2
112 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
113 cmovne mbin_rsi, mbin_rbx
114
115 ;; Does it have xmm and ymm support
116 xor ecx, ecx
117 xgetbv
118 and eax, FLAG_XGETBV_EAX_XMM_YMM
119 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
120 je _%1_init_done
121 lea mbin_rsi, [%2 WRT_OPT]
122
123 _%1_init_done:
124 pop mbin_rdx
125 pop mbin_rcx
126 pop mbin_rbx
127 pop mbin_rax
128 mov [%1_dispatched], mbin_rsi
129 pop mbin_rsi
130 ret
131 %endmacro
132
133 ;;;;;
134 ; mbin_dispatch_init2 parameters
135 ; Cases where only base functions are available
136 ; 1-> function name
137 ; 2-> base function
138 ;;;;;
139 %macro mbin_dispatch_init2 2
140 section .text
141 %1_dispatch_init:
142 push mbin_rsi
143 lea mbin_rsi, [%2 WRT_OPT] ; Default
144 mov [%1_dispatched], mbin_rsi
145 pop mbin_rsi
146 ret
147 %endmacro
148
149 ;;;;;
150 ; mbin_dispatch_init5 parameters
151 ; 1-> function name
152 ; 2-> base function
153 ; 3-> SSE4_1 or 00/01 optimized function
154 ; 4-> AVX/02 opt func
155 ; 5-> AVX2/04 opt func
156 ;;;;;
157 %macro mbin_dispatch_init5 5
158 section .text
159 %1_dispatch_init:
160 push mbin_rsi
161 push mbin_rax
162 push mbin_rbx
163 push mbin_rcx
164 push mbin_rdx
165 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
166
167 mov eax, 1
168 cpuid
169 ; Test for SSE4.1
170 test ecx, FLAG_CPUID1_ECX_SSE4_1
171 lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
172 cmovne mbin_rsi, mbin_rbx
173
174 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
175 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
176 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
177 jne _%1_init_done ; AVX is not available so end
178 mov mbin_rsi, mbin_rbx
179
180 ;; Try for AVX2
181 xor ecx, ecx
182 mov eax, 7
183 cpuid
184 test ebx, FLAG_CPUID7_EBX_AVX2
185 lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
186 cmovne mbin_rsi, mbin_rbx
187
188 ;; Does it have xmm and ymm support
189 xor ecx, ecx
190 xgetbv
191 and eax, FLAG_XGETBV_EAX_XMM_YMM
192 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
193 je _%1_init_done
194 lea mbin_rsi, [%3 WRT_OPT]
195
196 _%1_init_done:
197 pop mbin_rdx
198 pop mbin_rcx
199 pop mbin_rbx
200 pop mbin_rax
201 mov [%1_dispatched], mbin_rsi
202 pop mbin_rsi
203 ret
204 %endmacro
205
206 %if AS_FEATURE_LEVEL >= 6
207 ;;;;;
208 ; mbin_dispatch_init6 parameters
209 ; 1-> function name
210 ; 2-> base function
211 ; 3-> SSE4_1 or 00/01 optimized function
212 ; 4-> AVX/02 opt func
213 ; 5-> AVX2/04 opt func
214 ; 6-> AVX512/06 opt func
215 ;;;;;
216 %macro mbin_dispatch_init6 6
217 section .text
218 %1_dispatch_init:
219 push mbin_rsi
220 push mbin_rax
221 push mbin_rbx
222 push mbin_rcx
223 push mbin_rdx
224 push mbin_rdi
225 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
226
227 mov eax, 1
228 cpuid
229 mov ebx, ecx ; save cpuid1.ecx
230 test ecx, FLAG_CPUID1_ECX_SSE4_1
231 je _%1_init_done ; Use base function if no SSE4_1
232 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
233
234 ;; Test for XMM_YMM support/AVX
235 test ecx, FLAG_CPUID1_ECX_OSXSAVE
236 je _%1_init_done
237 xor ecx, ecx
238 xgetbv ; xcr -> edx:eax
239 mov edi, eax ; save xgetvb.eax
240
241 and eax, FLAG_XGETBV_EAX_XMM_YMM
242 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
243 jne _%1_init_done
244 test ebx, FLAG_CPUID1_ECX_AVX
245 je _%1_init_done
246 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
247
248 ;; Test for AVX2
249 xor ecx, ecx
250 mov eax, 7
251 cpuid
252 test ebx, FLAG_CPUID7_EBX_AVX2
253 je _%1_init_done ; No AVX2 possible
254 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
255
256 ;; Test for AVX512
257 and edi, FLAG_XGETBV_EAX_ZMM_OPM
258 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
259 jne _%1_init_done ; No AVX512 possible
260 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
261 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
262 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
263 cmove mbin_rsi, mbin_rbx
264
265 _%1_init_done:
266 pop mbin_rdi
267 pop mbin_rdx
268 pop mbin_rcx
269 pop mbin_rbx
270 pop mbin_rax
271 mov [%1_dispatched], mbin_rsi
272 pop mbin_rsi
273 ret
274 %endmacro
275
276 %else
277 %macro mbin_dispatch_init6 6
278 mbin_dispatch_init5 %1, %2, %3, %4, %5
279 %endmacro
280 %endif
281
282 %if AS_FEATURE_LEVEL >= 10
283 ;;;;;
284 ; mbin_dispatch_init7 parameters
285 ; 1-> function name
286 ; 2-> base function
287 ; 3-> SSE4_2 or 00/01 optimized function
288 ; 4-> AVX/02 opt func
289 ; 5-> AVX2/04 opt func
290 ; 6-> AVX512/06 opt func
291 ; 7-> AVX512 Update/10 opt func
292 ;;;;;
293 %macro mbin_dispatch_init7 7
294 section .text
295 %1_dispatch_init:
296 push mbin_rsi
297 push mbin_rax
298 push mbin_rbx
299 push mbin_rcx
300 push mbin_rdx
301 push mbin_rdi
302 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
303
304 mov eax, 1
305 cpuid
306 mov ebx, ecx ; save cpuid1.ecx
307 test ecx, FLAG_CPUID1_ECX_SSE4_2
308 je _%1_init_done ; Use base function if no SSE4_2
309 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
310
311 ;; Test for XMM_YMM support/AVX
312 test ecx, FLAG_CPUID1_ECX_OSXSAVE
313 je _%1_init_done
314 xor ecx, ecx
315 xgetbv ; xcr -> edx:eax
316 mov edi, eax ; save xgetvb.eax
317
318 and eax, FLAG_XGETBV_EAX_XMM_YMM
319 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
320 jne _%1_init_done
321 test ebx, FLAG_CPUID1_ECX_AVX
322 je _%1_init_done
323 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
324
325 ;; Test for AVX2
326 xor ecx, ecx
327 mov eax, 7
328 cpuid
329 test ebx, FLAG_CPUID7_EBX_AVX2
330 je _%1_init_done ; No AVX2 possible
331 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
332
333 ;; Test for AVX512
334 and edi, FLAG_XGETBV_EAX_ZMM_OPM
335 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
336 jne _%1_init_done ; No AVX512 possible
337 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
338 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
339 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
340 cmove mbin_rsi, mbin_rbx
341
342 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
343 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
344 lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
345 cmove mbin_rsi, mbin_rbx
346
347 _%1_init_done:
348 pop mbin_rdi
349 pop mbin_rdx
350 pop mbin_rcx
351 pop mbin_rbx
352 pop mbin_rax
353 mov [%1_dispatched], mbin_rsi
354 pop mbin_rsi
355 ret
356 %endmacro
357 %else
358 %macro mbin_dispatch_init7 7
359 mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
360 %endmacro
361 %endif
362
363 ;;;;;
364 ; mbin_dispatch_sse_to_avx2_shani parameters
365 ; derived from mbin_dispatch_init
366 ; Use this function when SSE/00/01 is a minimum requirement
367 ; 1-> function name
368 ; 2-> SSE/00/01 optimized function used as base
369 ; 3-> AVX or AVX/02 opt func
370 ; 4-> AVX2 or AVX/04 opt func
371 ; 5-> SHANI opt for GLM
372 ;;;;;
373 %macro mbin_dispatch_sse_to_avx2_shani 5
374 section .text
375 %1_dispatch_init:
376 push mbin_rsi
377 push mbin_rax
378 push mbin_rbx
379 push mbin_rcx
380 push mbin_rdx
381 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
382
383 mov eax, 1
384 cpuid
385 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
386 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
387 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
388 jne _%1_shani_check ; AVX is not available so check shani
389 mov mbin_rsi, mbin_rbx
390
391 ;; Try for AVX2
392 xor ecx, ecx
393 mov eax, 7
394 cpuid
395 test ebx, FLAG_CPUID7_EBX_AVX2
396 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
397 cmovne mbin_rsi, mbin_rbx
398
399 ;; Does it have xmm and ymm support
400 xor ecx, ecx
401 xgetbv
402 and eax, FLAG_XGETBV_EAX_XMM_YMM
403 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
404 je _%1_init_done
405 lea mbin_rsi, [%2 WRT_OPT]
406
407 _%1_init_done:
408 pop mbin_rdx
409 pop mbin_rcx
410 pop mbin_rbx
411 pop mbin_rax
412 mov [%1_dispatched], mbin_rsi
413 pop mbin_rsi
414 ret
415
416 _%1_shani_check:
417 xor ecx, ecx
418 mov eax, 7
419 cpuid
420 test ebx, FLAG_CPUID7_EBX_SHA
421 lea mbin_rbx, [%5 WRT_OPT] ; SHANI opt func
422 cmovne mbin_rsi, mbin_rbx
423 jmp _%1_init_done ; end
424 %endmacro
425
426 ;;;;;
427 ; mbin_dispatch_base_to_avx512_shani parameters
428 ; derived from mbin_dispatch_init6
429 ; 1-> function name
430 ; 2-> base function
431 ; 3-> SSE4_2 or 00/01 optimized function
432 ; 4-> AVX/02 opt func
433 ; 5-> AVX2/04 opt func
434 ; 6-> AVX512/06 opt func
435 ; 7-> SHANI opt for GLM
436 ; 8-> SHANI opt for CNL
437 ;;;;;
438 %macro mbin_dispatch_base_to_avx512_shani 8
439 section .text
440 %1_dispatch_init:
441 push mbin_rsi
442 push mbin_rax
443 push mbin_rbx
444 push mbin_rcx
445 push mbin_rdx
446 push mbin_rdi
447 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
448
449 mov eax, 1
450 cpuid
451 mov ebx, ecx ; save cpuid1.ecx
452 test ecx, FLAG_CPUID1_ECX_SSE4_2
453 je _%1_init_done ; Use base function if no SSE4_2
454 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
455
456 ;; Test for XMM_YMM support/AVX
457 test ecx, FLAG_CPUID1_ECX_OSXSAVE
458 je _%1_shani_check
459 xor ecx, ecx
460 xgetbv ; xcr -> edx:eax
461 mov edi, eax ; save xgetvb.eax
462
463 and eax, FLAG_XGETBV_EAX_XMM_YMM
464 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
465 jne _%1_shani_check
466 test ebx, FLAG_CPUID1_ECX_AVX
467 je _%1_shani_check
468 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
469
470 ;; Test for AVX2
471 xor ecx, ecx
472 mov eax, 7
473 cpuid
474 test ebx, FLAG_CPUID7_EBX_AVX2
475 je _%1_init_done ; No AVX2 possible
476 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
477
478 ;; Test for AVX512
479 and edi, FLAG_XGETBV_EAX_ZMM_OPM
480 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
481 jne _%1_init_done ; No AVX512 possible
482 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
483 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
484 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
485 cmove mbin_rsi, mbin_rbx
486
487 ;; Test for SHANI
488 xor ecx, ecx
489 mov eax, 7
490 cpuid
491 test ebx, FLAG_CPUID7_EBX_SHA
492 lea mbin_rbx, [%8 WRT_OPT] ; SHANI opt sse func
493 cmovne mbin_rsi, mbin_rbx
494
495 _%1_init_done:
496 pop mbin_rdi
497 pop mbin_rdx
498 pop mbin_rcx
499 pop mbin_rbx
500 pop mbin_rax
501 mov [%1_dispatched], mbin_rsi
502 pop mbin_rsi
503 ret
504
505 _%1_shani_check:
506 xor ecx, ecx
507 mov eax, 7
508 cpuid
509 test ebx, FLAG_CPUID7_EBX_SHA
510 lea mbin_rbx, [%7 WRT_OPT] ; SHANI opt sse func
511 cmovne mbin_rsi, mbin_rbx
512 jmp _%1_init_done ; end
513 %endmacro
514
515
516
517 %endif ; ifndef _MULTIBINARY_ASM_