]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/isa-l/igzip/igzip_compare_types.asm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / igzip / igzip_compare_types.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "stdmac.asm"
32
33 %ifndef UTILS_ASM
34 %define UTILS_ASM
35 ; compare macro
36
37 ;; sttni2 is faster, but it can't be debugged
38 ;; so following code is based on "mine5"
39
40 ;; compares 8 bytes at a time, using xor
41 ;; assumes the input buffer has size at least 8
42 ;; compare_r src1, src2, result, result_max, tmp
43 %macro compare_r 5
44 %define %%src1 %1
45 %define %%src2 %2
46 %define %%result %3
47 %define %%result_max %4
48 %define %%tmp %5
49 %define %%tmp16 %5w ; tmp as a 16-bit register
50
51 sub %%result_max, 16
52 cmp %%result, %%result_max
53 jg %%_by_8
54
55 %%loop1:
56 mov %%tmp, [%%src1 + %%result]
57 xor %%tmp, [%%src2 + %%result]
58 jnz %%miscompare_reg
59 add %%result, 8
60
61 mov %%tmp, [%%src1 + %%result]
62 xor %%tmp, [%%src2 + %%result]
63 jnz %%miscompare_reg
64 add %%result, 8
65 cmp %%result, %%result_max
66 jle %%loop1
67
68 %%_by_8:
69 add %%result_max, 8
70 cmp %%result, %%result_max
71 jg %%_cmp_last
72
73 ; compare last two bytes
74 mov %%tmp, [%%src1 + %%result]
75 xor %%tmp, [%%src2 + %%result]
76 jnz %%miscompare_reg
77 add %%result, 8
78
79 %%_cmp_last:
80 add %%result_max, 8
81 cmp %%result, %%result_max
82 je %%end
83
84 lea %%result, [%%result_max - 8]
85
86 mov %%tmp, [%%src1 + %%result]
87 xor %%tmp, [%%src2 + %%result]
88 jnz %%miscompare_reg
89 add %%result, 8
90 jmp %%end
91
92 %%miscompare_reg:
93 bsf %%tmp, %%tmp
94 shr %%tmp, 3
95 add %%result, %%tmp
96 %%end:
97 %endm
98
99 ;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
100 ;; assumes the input buffer has size at least 8
101 ;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2
102 %macro compare_x 7
103 %define %%src1 %1
104 %define %%src2 %2
105 %define %%result %3 ; Accumulator for match_length
106 %define %%result_max %4
107 %define %%tmp %5
108 %define %%tmp16 %5w ; tmp as a 16-bit register
109 %define %%tmp32 %5d ; tmp as a 32-bit register
110 %define %%xtmp %6
111 %define %%xtmp2 %7
112
113 sub %%result_max, 32
114 cmp %%result, %%result_max
115 jg %%_by_16
116
117 %%loop1:
118 MOVDQU %%xtmp, [%%src1 + %%result]
119 MOVDQU %%xtmp2, [%%src2 + %%result]
120 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
121 PMOVMSKB %%tmp32, %%xtmp
122 xor %%tmp, 0xFFFF
123 jnz %%miscompare_vect
124 add %%result, 16
125
126 MOVDQU %%xtmp, [%%src1 + %%result]
127 MOVDQU %%xtmp2, [%%src2 + %%result]
128 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
129 PMOVMSKB %%tmp32, %%xtmp
130 xor %%tmp, 0xFFFF
131 jnz %%miscompare_vect
132 add %%result, 16
133
134 cmp %%result, %%result_max
135 jle %%loop1
136
137 %%_by_16:
138 add %%result_max, 16
139 cmp %%result, %%result_max
140 jg %%_by_8
141
142 MOVDQU %%xtmp, [%%src1 + %%result]
143 MOVDQU %%xtmp2, [%%src2 + %%result]
144 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
145 PMOVMSKB %%tmp32, %%xtmp
146 xor %%tmp, 0xFFFF
147 jnz %%miscompare_vect
148 add %%result, 16
149
150 %%_by_8:
151 add %%result_max, 8
152 cmp %%result, %%result_max
153 jg %%_cmp_last
154
155 ; compare last two bytes
156 mov %%tmp, [%%src1 + %%result]
157 xor %%tmp, [%%src2 + %%result]
158 jnz %%miscompare_reg
159 add %%result, 8
160
161 %%_cmp_last:
162 add %%result_max, 8
163 cmp %%result, %%result_max
164 je %%end
165
166 lea %%result, [%%result_max - 8]
167
168 mov %%tmp, [%%src1 + %%result]
169 xor %%tmp, [%%src2 + %%result]
170 jnz %%miscompare_reg
171 add %%result, 8
172 jmp %%end
173
174 %%miscompare_reg:
175 bsf %%tmp, %%tmp
176 shr %%tmp, 3
177 add %%result, %%tmp
178 jmp %%end
179
180 %%miscompare_vect:
181 bsf %%tmp, %%tmp
182 add %%result, %%tmp
183 %%end:
184 %endm
185
186 ;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
187 ;; assumes the input buffer has size at least 8
188 ;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2
189 %macro compare_y 7
190 %define %%src1 %1
191 %define %%src2 %2
192 %define %%result %3 ; Accumulator for match_length
193 %define %%result_max %4
194 %define %%tmp %5
195 %define %%tmp16 %5w ; tmp as a 16-bit register
196 %define %%tmp32 %5d ; tmp as a 32-bit register
197 %define %%ytmp %6
198 %define %%ytmp2 %7
199
200 sub %%result_max, 64
201 cmp %%result, %%result_max
202 jg %%_by_32
203
204 %%loop1:
205 vmovdqu %%ytmp, [%%src1 + %%result]
206 vmovdqu %%ytmp2, [%%src2 + %%result]
207 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
208 vpmovmskb %%tmp, %%ytmp
209 xor %%tmp32, 0xFFFFFFFF
210 jnz %%miscompare_vect
211 add %%result, 32
212
213 vmovdqu %%ytmp, [%%src1 + %%result]
214 vmovdqu %%ytmp2, [%%src2 + %%result]
215 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
216 vpmovmskb %%tmp, %%ytmp
217 xor %%tmp32, 0xFFFFFFFF
218 jnz %%miscompare_vect
219 add %%result, 32
220
221 cmp %%result, %%result_max
222 jle %%loop1
223
224 %%_by_32:
225 add %%result_max, 32
226 cmp %%result, %%result_max
227 jg %%_by_16
228
229 vmovdqu %%ytmp, [%%src1 + %%result]
230 vmovdqu %%ytmp2, [%%src2 + %%result]
231 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
232 vpmovmskb %%tmp, %%ytmp
233 xor %%tmp32, 0xFFFFFFFF
234 jnz %%miscompare_vect
235 add %%result, 32
236
237 %%_by_16:
238 add %%result_max, 16
239 cmp %%result, %%result_max
240 jg %%_by_8
241
242 vmovdqu %%ytmp %+ x, [%%src1 + %%result]
243 vmovdqu %%ytmp2 %+ x, [%%src2 + %%result]
244 vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x
245 vpmovmskb %%tmp, %%ytmp %+ x
246 xor %%tmp32, 0xFFFF
247 jnz %%miscompare_vect
248 add %%result, 16
249
250 %%_by_8:
251 add %%result_max, 8
252 cmp %%result, %%result_max
253 jg %%_cmp_last
254
255 mov %%tmp, [%%src1 + %%result]
256 xor %%tmp, [%%src2 + %%result]
257 jnz %%miscompare_reg
258 add %%result, 8
259
260 %%_cmp_last:
261 add %%result_max, 8
262 cmp %%result, %%result_max
263 je %%end
264
265 lea %%result, [%%result_max - 8]
266
267 ; compare last two bytes
268 mov %%tmp, [%%src1 + %%result]
269 xor %%tmp, [%%src2 + %%result]
270 jnz %%miscompare_reg
271 add %%result, 8
272 jmp %%end
273
274 %%miscompare_reg:
275 bsf %%tmp, %%tmp
276 shr %%tmp, 3
277 add %%result, %%tmp
278 jmp %%end
279
280 %%miscompare_vect:
281 tzcnt %%tmp, %%tmp
282 add %%result, %%tmp
283 %%end:
284 %endm
285
286 ;; compares 64 bytes at a time
287 ;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
288 ;; Clobbers result_max
289 %macro compare_z 8
290 %define %%src1 %1
291 %define %%src2 %2
292 %define %%result %3 ; Accumulator for match_length
293 %define %%result_max %4
294 %define %%tmp %5 ; tmp as a 16-bit register
295 %define %%ktmp %6
296 %define %%ztmp %7
297 %define %%ztmp2 %8
298
299 sub %%result_max, 128
300 cmp %%result, %%result_max
301 jg %%_by_64
302
303 %%loop1:
304 vmovdqu8 %%ztmp, [%%src1 + %%result]
305 vmovdqu8 %%ztmp2, [%%src2 + %%result]
306 vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
307 ktestq %%ktmp, %%ktmp
308 jnz %%miscompare
309 add %%result, 64
310
311 vmovdqu8 %%ztmp, [%%src1 + %%result]
312 vmovdqu8 %%ztmp2, [%%src2 + %%result]
313 vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
314 ktestq %%ktmp, %%ktmp
315 jnz %%miscompare
316 add %%result, 64
317
318 cmp %%result, %%result_max
319 jle %%loop1
320
321 %%_by_64:
322 add %%result_max, 64
323 cmp %%result, %%result_max
324 jg %%_less_than_64
325
326 vmovdqu8 %%ztmp, [%%src1 + %%result]
327 vmovdqu8 %%ztmp2, [%%src2 + %%result]
328 vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
329 ktestq %%ktmp, %%ktmp
330 jnz %%miscompare
331 add %%result, 64
332
333 %%_less_than_64:
334 add %%result_max, 64
335 sub %%result_max, %%result
336 jle %%end
337
338 mov %%tmp, -1
339 bzhi %%tmp, %%tmp, %%result_max
340 kmovq %%ktmp, %%tmp
341
342 vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result]
343 vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
344 vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
345 ktestq %%ktmp, %%ktmp
346 jnz %%miscompare
347 add %%result, %%result_max
348
349 jmp %%end
350 %%miscompare:
351 kmovq %%tmp, %%ktmp
352 tzcnt %%tmp, %%tmp
353 add %%result, %%tmp
354 %%end:
355 %endm
356
357 %macro compare250 7
358 %define %%src1 %1
359 %define %%src2 %2
360 %define %%result %3
361 %define %%result_max %4
362 %define %%tmp %5
363 %define %%xtmp0 %6x
364 %define %%xtmp1 %7x
365 %define %%ytmp0 %6
366 %define %%ytmp1 %7
367
368 mov %%tmp, 250
369 cmp %%result_max, 250
370 cmovg %%result_max, %%tmp
371
372 %if (COMPARE_TYPE == 1)
373 compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
374 %elif (COMPARE_TYPE == 2)
375 compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
376 %elif (COMPARE_TYPE == 3)
377 compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
378 %else
379 %error Unknown Compare type COMPARE_TYPE
380 % error
381 %endif
382 %endmacro
383
384 ; Assumes the buffer has at least 8 bytes
385 ; Accumulates match length onto result
386 %macro compare_large 7
387 %define %%src1 %1
388 %define %%src2 %2
389 %define %%result %3
390 %define %%result_max %4
391 %define %%tmp %5
392 %define %%xtmp0 %6x
393 %define %%xtmp1 %7x
394 %define %%ytmp0 %6
395 %define %%ytmp1 %7
396
397 %if (COMPARE_TYPE == 1)
398 compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
399 %elif (COMPARE_TYPE == 2)
400 compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
401 %elif (COMPARE_TYPE == 3)
402 compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
403 %else
404 %error Unknown Compare type COMPARE_TYPE
405 % error
406 %endif
407 %endmacro
408
409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411
412 ;; compare size, src1, src2, result, tmp
413 %macro compare 5
414 %define %%size %1
415 %define %%src1 %2
416 %define %%src2 %3
417 %define %%result %4
418 %define %%tmp %5
419 %define %%tmp8 %5b ; tmp as a 8-bit register
420
421 xor %%result, %%result
422 sub %%size, 7
423 jle %%lab2
424 %%loop1:
425 mov %%tmp, [%%src1 + %%result]
426 xor %%tmp, [%%src2 + %%result]
427 jnz %%miscompare
428 add %%result, 8
429 sub %%size, 8
430 jg %%loop1
431 %%lab2:
432 ;; if we fall through from above, we have found no mismatches,
433 ;; %%size+7 is the number of bytes left to look at, and %%result is the
434 ;; number of bytes that have matched
435 add %%size, 7
436 jle %%end
437 %%loop3:
438 mov %%tmp8, [%%src1 + %%result]
439 cmp %%tmp8, [%%src2 + %%result]
440 jne %%end
441 inc %%result
442 dec %%size
443 jg %%loop3
444 jmp %%end
445 %%miscompare:
446 bsf %%tmp, %%tmp
447 shr %%tmp, 3
448 add %%result, %%tmp
449 %%end:
450 %endm
451
452 %endif ;UTILS_ASM