]> git.proxmox.com Git - ceph.git/blame - ceph/src/crypto/isa-l/isa-l_crypto/sha512_mb/sha512_multibinary.asm
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / sha512_mb / sha512_multibinary.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
1e59de90 5; modification, are permitted provided that the following conditions
7c673cae
FG
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
7c673cae
FG
30%include "reg_sizes.asm"
31%include "multibinary.asm"
32
33;;;;;
34; mbin_dispatch_init_avoton parameters
35; Use this function when SSE/00/01 is a minimum requirement
36; if AVOTON is true, then use avoton_func instead of sse_func
37; 1-> function name
38; 2-> SSE/00/01 optimized function used as base
39; 3-> AVX or AVX/02 opt func
40; 4-> AVX2 or AVX/04 opt func
41; 5-> AVOTON opt func
42;;;;;
43%macro mbin_dispatch_init_avoton 5
44 section .text
45 %1_dispatch_init:
46 push mbin_rsi
47 push mbin_rax
48 push mbin_rbx
49 push mbin_rcx
50 push mbin_rdx
51 push mbin_rdi
52 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
53
54 mov eax, 1
55 cpuid
56 lea mbin_rdi, [%5 WRT_OPT]
57 and eax, FLAG_CPUID1_EAX_STEP_MASK
58 cmp eax, FLAG_CPUID1_EAX_AVOTON
59 ; If Avoton, set Avoton symbol and exit
60 cmove mbin_rsi, mbin_rdi
61 je _%1_init_done
62
63 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
64 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
65 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
66 jne _%1_init_done ; AVX is not available so end
67 mov mbin_rsi, mbin_rbx
68
69 ;; Try for AVX2
70 xor ecx, ecx
71 mov eax, 7
72 cpuid
73 test ebx, FLAG_CPUID7_EBX_AVX2
74 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
75 cmovne mbin_rsi, mbin_rbx
76
77 ;; Does it have xmm and ymm support
78 xor ecx, ecx
79 xgetbv
80 and eax, FLAG_XGETBV_EAX_XMM_YMM
81 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
82 je _%1_init_done
83 lea mbin_rsi, [%2 WRT_OPT]
84
85 _%1_init_done:
86 pop mbin_rdi
87 pop mbin_rdx
88 pop mbin_rcx
89 pop mbin_rbx
90 pop mbin_rax
91 mov [%1_dispatched], mbin_rsi
92 pop mbin_rsi
93 ret
94%endmacro
95
96;;;;;
97; mbin_dispatch_init6_avoton parameters
98; if AVOTON is true, then use avoton_func instead of sse_func
99; 1-> function name
100; 2-> base function
101; 3-> SSE4_1 or 00/01 optimized function
102; 4-> AVX/02 opt func
103; 5-> AVX2/04 opt func
104; 6-> AVX512/06 opt func
105; 7-> AVOTON opt func
106;;;;;
107%macro mbin_dispatch_init6_avoton 7
108 section .text
109 %1_dispatch_init:
110 push mbin_rsi
111 push mbin_rax
112 push mbin_rbx
113 push mbin_rcx
114 push mbin_rdx
115 push mbin_rdi
116 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
117
118 mov eax, 1
119 cpuid
120 mov ebx, ecx ; save cpuid1.ecx
121 test ecx, FLAG_CPUID1_ECX_SSE4_1
122 je _%1_init_done ; Use base function if no SSE4_1
123 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
124
125 lea mbin_rdi, [%7 WRT_OPT]
126 and eax, FLAG_CPUID1_EAX_STEP_MASK
127 cmp eax, FLAG_CPUID1_EAX_AVOTON
128 ; If Avoton, set Avoton symbol and exit
129 cmove mbin_rsi, mbin_rdi
130 je _%1_init_done
131
132
133 ;; Test for XMM_YMM support/AVX
134 test ecx, FLAG_CPUID1_ECX_OSXSAVE
135 je _%1_init_done
136 xor ecx, ecx
137 xgetbv ; xcr -> edx:eax
138 mov edi, eax ; save xgetvb.eax
139
140 and eax, FLAG_XGETBV_EAX_XMM_YMM
141 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
142 jne _%1_init_done
143 test ebx, FLAG_CPUID1_ECX_AVX
144 je _%1_init_done
145 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
146
147 ;; Test for AVX2
148 xor ecx, ecx
149 mov eax, 7
150 cpuid
151 test ebx, FLAG_CPUID7_EBX_AVX2
152 je _%1_init_done ; No AVX2 possible
153 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
154
155 ;; Test for AVX512
156 and edi, FLAG_XGETBV_EAX_ZMM_OPM
157 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
158 jne _%1_init_done ; No AVX512 possible
1e59de90
TL
159 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
160 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
7c673cae
FG
161 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
162 cmove mbin_rsi, mbin_rbx
163
164 _%1_init_done:
165 pop mbin_rdi
166 pop mbin_rdx
167 pop mbin_rcx
168 pop mbin_rbx
169 pop mbin_rax
170 mov [%1_dispatched], mbin_rsi
171 pop mbin_rsi
172 ret
173%endmacro
174
175default rel
176[bits 64]
177
178%define def_wrd dq
179%define wrd_sz qword
180%define arg1 rsi
181
182; declare the L3 ctx level symbols (these will then call the appropriate
183; L2 symbols)
184extern sha512_ctx_mgr_init_sse
185extern sha512_ctx_mgr_submit_sse
186extern sha512_ctx_mgr_flush_sse
187
188extern sha512_ctx_mgr_init_avx
189extern sha512_ctx_mgr_submit_avx
190extern sha512_ctx_mgr_flush_avx
191
192extern sha512_ctx_mgr_init_avx2
193extern sha512_ctx_mgr_submit_avx2
194extern sha512_ctx_mgr_flush_avx2
195
1e59de90
TL
196extern sha512_ctx_mgr_init_base
197extern sha512_ctx_mgr_submit_base
198extern sha512_ctx_mgr_flush_base
199
7c673cae
FG
200%ifdef HAVE_AS_KNOWS_AVX512
201 extern sha512_ctx_mgr_init_avx512
202 extern sha512_ctx_mgr_submit_avx512
203 extern sha512_ctx_mgr_flush_avx512
204%endif
205
206extern sha512_ctx_mgr_init_sb_sse4
207extern sha512_ctx_mgr_submit_sb_sse4
208extern sha512_ctx_mgr_flush_sb_sse4
209
210;;; *_mbinit are initial values for *_dispatched; is updated on first call.
211;;; Therefore, *_dispatch_init is only executed on first call.
212
213; Initialise symbols
214mbin_interface sha512_ctx_mgr_init
215mbin_interface sha512_ctx_mgr_submit
216mbin_interface sha512_ctx_mgr_flush
217
218%ifdef HAVE_AS_KNOWS_AVX512
219 ; Reuse mbin_dispatch_init6 through replacing base by sse version
1e59de90 220 mbin_dispatch_init6_avoton sha512_ctx_mgr_init, sha512_ctx_mgr_init_base, \
7c673cae
FG
221 sha512_ctx_mgr_init_sse, sha512_ctx_mgr_init_avx, \
222 sha512_ctx_mgr_init_avx2, sha512_ctx_mgr_init_avx512, \
223 sha512_ctx_mgr_init_sb_sse4
224
1e59de90 225 mbin_dispatch_init6_avoton sha512_ctx_mgr_submit, sha512_ctx_mgr_submit_base, \
7c673cae
FG
226 sha512_ctx_mgr_submit_sse, sha512_ctx_mgr_submit_avx, \
227 sha512_ctx_mgr_submit_avx2, sha512_ctx_mgr_submit_avx512, \
228 sha512_ctx_mgr_submit_sb_sse4
229
1e59de90 230 mbin_dispatch_init6_avoton sha512_ctx_mgr_flush, sha512_ctx_mgr_flush_base, \
7c673cae
FG
231 sha512_ctx_mgr_flush_sse, sha512_ctx_mgr_flush_avx, \
232 sha512_ctx_mgr_flush_avx2, sha512_ctx_mgr_flush_avx512, \
233 sha512_ctx_mgr_flush_sb_sse4
234%else
235 mbin_dispatch_init_avoton sha512_ctx_mgr_init, sha512_ctx_mgr_init_sse, \
236 sha512_ctx_mgr_init_avx, sha512_ctx_mgr_init_avx2, \
237 sha512_ctx_mgr_init_sb_sse4
238
239 mbin_dispatch_init_avoton sha512_ctx_mgr_submit, sha512_ctx_mgr_submit_sse, \
240 sha512_ctx_mgr_submit_avx, sha512_ctx_mgr_submit_avx2, \
241 sha512_ctx_mgr_submit_sb_sse4
242
243 mbin_dispatch_init_avoton sha512_ctx_mgr_flush, sha512_ctx_mgr_flush_sse, \
244 sha512_ctx_mgr_flush_avx, sha512_ctx_mgr_flush_avx2, \
245 sha512_ctx_mgr_flush_sb_sse4
246%endif
247
248
249;;; func core, ver, snum
1e59de90
TL
250slversion sha512_ctx_mgr_init, 00, 04, 0175
251slversion sha512_ctx_mgr_submit, 00, 04, 0176
252slversion sha512_ctx_mgr_flush, 00, 04, 0177