]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/crc/crc_multibinary.asm
import quincy beta 17.1.0
[ceph.git] / ceph / src / isa-l / crc / crc_multibinary.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 default rel
31 [bits 64]
32
33 %include "reg_sizes.asm"
34
35 extern crc32_iscsi_00
36 extern crc32_iscsi_01
37 extern crc32_iscsi_base
38
39 extern crc32_ieee_01
40 extern crc32_ieee_by4 ;; Optimized for SLM
41 extern crc32_ieee_02
42 extern crc32_ieee_base
43
44 extern crc16_t10dif_01
45 extern crc16_t10dif_by4 ;; Optimized for SLM
46 extern crc16_t10dif_02
47 extern crc16_t10dif_base
48
49 extern crc32_gzip_refl_by8
50 extern crc32_gzip_refl_by8_02
51 extern crc32_gzip_refl_base
52
53 extern crc16_t10dif_copy_by4
54 extern crc16_t10dif_copy_by4_02
55 extern crc16_t10dif_copy_base
56
57 %if (AS_FEATURE_LEVEL) >= 10
58 extern crc32_gzip_refl_by16_10
59 extern crc32_ieee_by16_10
60 extern crc32_iscsi_by16_10
61 extern crc16_t10dif_by16_10
62 %endif
63
64 %include "multibinary.asm"
65
66 section .data
67 ;;; *_mbinit are initial values for *_dispatched; is updated on first call.
68 ;;; Therefore, *_dispatch_init is only executed on first call.
69
70 crc32_iscsi_dispatched:
71 dq crc32_iscsi_mbinit
72
73 crc32_ieee_dispatched:
74 dq crc32_ieee_mbinit
75
76 crc16_t10dif_dispatched:
77 dq crc16_t10dif_mbinit
78
79 section .text
80 ;;;;
81 ; crc32_iscsi multibinary function
82 ;;;;
83 mk_global crc32_iscsi, function
84 crc32_iscsi_mbinit:
85 endbranch
86 call crc32_iscsi_dispatch_init
87 crc32_iscsi:
88 endbranch
89 jmp qword [crc32_iscsi_dispatched]
90
91 crc32_iscsi_dispatch_init:
92 push rax
93 push rbx
94 push rcx
95 push rdx
96 push rsi
97 push rdi
98 lea rsi, [crc32_iscsi_base WRT_OPT] ; Default
99
100 mov eax, 1
101 cpuid
102 mov ebx, ecx ; save cpuid1.ecx
103 test ecx, FLAG_CPUID1_ECX_SSE4_2
104 jz .crc_iscsi_init_done ; use iscsi_base
105 lea rsi, [crc32_iscsi_00 WRT_OPT]
106 test ecx, FLAG_CPUID1_ECX_CLMUL
107 jz .crc_iscsi_init_done ; use ieee_base
108 lea rsi, [crc32_iscsi_01 WRT_OPT]
109
110 ;; Test for XMM_YMM support/AVX
111 test ecx, FLAG_CPUID1_ECX_OSXSAVE
112 je .crc_iscsi_init_done
113 xor ecx, ecx
114 xgetbv ; xcr -> edx:eax
115 mov edi, eax ; save xgetvb.eax
116
117 and eax, FLAG_XGETBV_EAX_XMM_YMM
118 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
119 jne .crc_iscsi_init_done
120 test ebx, FLAG_CPUID1_ECX_AVX
121 je .crc_iscsi_init_done
122 ;; AVX/02 opt if available
123
124 %if AS_FEATURE_LEVEL >= 10
125 ;; Test for AVX2
126 xor ecx, ecx
127 mov eax, 7
128 cpuid
129 test ebx, FLAG_CPUID7_EBX_AVX2
130 je .crc_iscsi_init_done ; No AVX2 possible
131
132 ;; Test for AVX512
133 and edi, FLAG_XGETBV_EAX_ZMM_OPM
134 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
135 jne .crc_iscsi_init_done ; No AVX512 possible
136 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
137 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
138 jne .crc_iscsi_init_done
139
140 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
141 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
142 lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt
143 cmove rsi, rbx
144 %endif
145
146 .crc_iscsi_init_done:
147 mov [crc32_iscsi_dispatched], rsi
148 pop rdi
149 pop rsi
150 pop rdx
151 pop rcx
152 pop rbx
153 pop rax
154 ret
155
156 ;;;;
157 ; crc32_ieee multibinary function
158 ;;;;
159 mk_global crc32_ieee, function
160 crc32_ieee_mbinit:
161 endbranch
162 call crc32_ieee_dispatch_init
163 crc32_ieee:
164 endbranch
165 jmp qword [crc32_ieee_dispatched]
166
167 crc32_ieee_dispatch_init:
168 push rax
169 push rbx
170 push rcx
171 push rdx
172 push rsi
173 push rdi
174 lea rsi, [crc32_ieee_base WRT_OPT] ; Default
175
176 mov eax, 1
177 cpuid
178 mov ebx, ecx ; save cpuid1.ecx
179 test ecx, FLAG_CPUID1_ECX_SSE3
180 jz .crc_ieee_init_done ; use ieee_base
181 test ecx, FLAG_CPUID1_ECX_CLMUL
182 jz .crc_ieee_init_done ; use ieee_base
183 lea rsi, [crc32_ieee_01 WRT_OPT]
184
185 ;; Extra Avoton test
186 lea rdx, [crc32_ieee_by4 WRT_OPT]
187 and eax, FLAG_CPUID1_EAX_STEP_MASK
188 cmp eax, FLAG_CPUID1_EAX_AVOTON
189 cmove rsi, rdx
190
191 ;; Test for XMM_YMM support/AVX
192 test ecx, FLAG_CPUID1_ECX_OSXSAVE
193 je .crc_ieee_init_done
194 xor ecx, ecx
195 xgetbv ; xcr -> edx:eax
196 mov edi, eax ; save xgetvb.eax
197
198 and eax, FLAG_XGETBV_EAX_XMM_YMM
199 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
200 jne .crc_ieee_init_done
201 test ebx, FLAG_CPUID1_ECX_AVX
202 je .crc_ieee_init_done
203 lea rsi, [crc32_ieee_02 WRT_OPT] ; AVX/02 opt
204
205 %if AS_FEATURE_LEVEL >= 10
206 ;; Test for AVX2
207 xor ecx, ecx
208 mov eax, 7
209 cpuid
210 test ebx, FLAG_CPUID7_EBX_AVX2
211 je .crc_ieee_init_done ; No AVX2 possible
212
213 ;; Test for AVX512
214 and edi, FLAG_XGETBV_EAX_ZMM_OPM
215 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
216 jne .crc_ieee_init_done ; No AVX512 possible
217 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
218 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
219 jne .crc_ieee_init_done
220
221 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
222 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
223 lea rbx, [crc32_ieee_by16_10 WRT_OPT] ; AVX512/10 opt
224 cmove rsi, rbx
225 %endif
226
227 .crc_ieee_init_done:
228 mov [crc32_ieee_dispatched], rsi
229 pop rdi
230 pop rsi
231 pop rdx
232 pop rcx
233 pop rbx
234 pop rax
235 ret
236
237 ;;;;
238 ; crc16_t10dif multibinary function
239 ;;;;
240 mk_global crc16_t10dif, function
241 crc16_t10dif_mbinit:
242 endbranch
243 call crc16_t10dif_dispatch_init
244 crc16_t10dif:
245 endbranch
246 jmp qword [crc16_t10dif_dispatched]
247
248 crc16_t10dif_dispatch_init:
249 push rax
250 push rbx
251 push rcx
252 push rdx
253 push rsi
254 push rdi
255 lea rsi, [crc16_t10dif_base WRT_OPT] ; Default
256
257 mov eax, 1
258 cpuid
259 mov ebx, ecx ; save cpuid1.ecx
260 test ecx, FLAG_CPUID1_ECX_SSE3
261 jz .t10dif_init_done ; use t10dif_base
262 test ecx, FLAG_CPUID1_ECX_CLMUL
263 jz .t10dif_init_done ; use t10dif_base
264 lea rsi, [crc16_t10dif_01 WRT_OPT]
265
266 ;; Extra Avoton test
267 lea rdx, [crc16_t10dif_by4 WRT_OPT]
268 and eax, FLAG_CPUID1_EAX_STEP_MASK
269 cmp eax, FLAG_CPUID1_EAX_AVOTON
270 cmove rsi, rdx
271
272 ;; Test for XMM_YMM support/AVX
273 test ecx, FLAG_CPUID1_ECX_OSXSAVE
274 je .t10dif_init_done
275 xor ecx, ecx
276 xgetbv ; xcr -> edx:eax
277 mov edi, eax ; save xgetvb.eax
278
279 and eax, FLAG_XGETBV_EAX_XMM_YMM
280 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
281 jne .t10dif_init_done
282 test ebx, FLAG_CPUID1_ECX_AVX
283 je .t10dif_init_done
284 lea rsi, [crc16_t10dif_02 WRT_OPT] ; AVX/02 opt
285
286 %if AS_FEATURE_LEVEL >= 10
287 ;; Test for AVX2
288 xor ecx, ecx
289 mov eax, 7
290 cpuid
291 test ebx, FLAG_CPUID7_EBX_AVX2
292 je .t10dif_init_done ; No AVX2 possible
293
294 ;; Test for AVX512
295 and edi, FLAG_XGETBV_EAX_ZMM_OPM
296 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
297 jne .t10dif_init_done ; No AVX512 possible
298 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
299 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
300 jne .t10dif_init_done
301
302 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
303 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
304 lea rbx, [crc16_t10dif_by16_10 WRT_OPT] ; AVX512/10 opt
305 cmove rsi, rbx
306 %endif
307
308 .t10dif_init_done:
309 mov [crc16_t10dif_dispatched], rsi
310 pop rdi
311 pop rsi
312 pop rdx
313 pop rcx
314 pop rbx
315 pop rax
316 ret
317
318 mbin_interface crc32_gzip_refl
319 mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10
320
321 mbin_interface crc16_t10dif_copy
322 mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4, crc16_t10dif_copy_by4_02, crc16_t10dif_copy_by4_02
323
324 ;;; func core, ver, snum
325 slversion crc16_t10dif, 00, 03, 011a
326 slversion crc32_ieee, 00, 03, 011b
327 slversion crc32_iscsi, 00, 03, 011c
328 slversion crc32_gzip_refl, 00, 00, 002a