]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/igzip_compare_types.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / igzip_compare_types.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "stdmac.asm"
32
33 %ifndef UTILS_ASM
34 %define UTILS_ASM
35 ; compare macro
36
37 ;; sttni2 is faster, but it can't be debugged
38 ;; so following code is based on "mine5"
39
40 ;; compare 258 bytes = 8 * 32 + 2
41 ;; tmp16 is a 16-bit version of tmp
42 ;; compare258 src1, src2, result, tmp
43 %macro compare258 4
44 %define %%src1 %1
45 %define %%src2 %2
46 %define %%result %3
47 %define %%tmp %4
48 %define %%tmp16 %4w ; tmp as a 16-bit register
49
50 xor %%result, %%result
51 %%loop1:
52 mov %%tmp, [%%src1 + %%result]
53 xor %%tmp, [%%src2 + %%result]
54 jnz %%miscompare
55 add %%result, 8
56
57 mov %%tmp, [%%src1 + %%result]
58 xor %%tmp, [%%src2 + %%result]
59 jnz %%miscompare
60 add %%result, 8
61
62 cmp %%result, 256
63 jb %%loop1
64
65 ; compare last two bytes
66 mov %%tmp16, [%%src1 + %%result]
67 xor %%tmp16, [%%src2 + %%result]
68 jnz %%miscompare16
69
70 ; no miscompares, return 258
71 add %%result, 2
72 jmp %%end
73
74 %%miscompare16:
75 and %%tmp, 0xFFFF
76 %%miscompare:
77 bsf %%tmp, %%tmp
78 shr %%tmp, 3
79 add %%result, %%tmp
80 %%end:
81 %endm
82
83 ;; compare 258 bytes = 8 * 32 + 2
84 ;; tmp16 is a 16-bit version of tmp
85 ;; compare258 src1, src2, result, tmp
86 %macro compare250 4
87 %define %%src1 %1
88 %define %%src2 %2
89 %define %%result %3
90 %define %%tmp %4
91 %define %%tmp16 %4w ; tmp as a 16-bit register
92
93 mov %%result, 8
94 mov %%tmp, [%%src1 + 8]
95 xor %%tmp, [%%src2 + 8]
96 jnz %%miscompare
97 add %%result, 8
98
99 %%loop1:
100 mov %%tmp, [%%src1 + %%result]
101 xor %%tmp, [%%src2 + %%result]
102 jnz %%miscompare
103 add %%result, 8
104
105 mov %%tmp, [%%src1 + %%result]
106 xor %%tmp, [%%src2 + %%result]
107 jnz %%miscompare
108 add %%result, 8
109
110 cmp %%result, 256
111 jb %%loop1
112
113 ; compare last two bytes
114 mov %%tmp16, [%%src1 + %%result]
115 xor %%tmp16, [%%src2 + %%result]
116 jnz %%miscompare16
117
118 ; no miscompares, return 258
119 add %%result, 2
120 jmp %%end
121
122 %%miscompare16:
123 and %%tmp, 0xFFFF
124 %%miscompare:
125 bsf %%tmp, %%tmp
126 shr %%tmp, 3
127 add %%result, %%tmp
128 %%end:
129 %endm
130
131 ;; compare 258 bytes = 8 * 32 + 2
132 ;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
133 ;; compare258_x src1, src2, result, tmp, xtmp1, xtmp2
134 %macro compare258_x 6
135 %define %%src1 %1
136 %define %%src2 %2
137 %define %%result %3
138 %define %%tmp %4
139 %define %%tmp32 %4d
140 %define %%tmp16 %4w ; tmp as a 16-bit register
141 %define %%xtmp %5
142 %define %%xtmp2 %6
143
144 xor %%result, %%result
145 %%loop1:
146 MOVDQU %%xtmp, [%%src1 + %%result]
147 MOVDQU %%xtmp2, [%%src2 + %%result]
148 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
149 PMOVMSKB %%tmp32, %%xtmp
150 xor %%tmp, 0xFFFF
151 jnz %%miscompare
152 add %%result, 16
153
154 MOVDQU %%xtmp, [%%src1 + %%result]
155 MOVDQU %%xtmp2, [%%src2 + %%result]
156 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
157 PMOVMSKB %%tmp32, %%xtmp
158 xor %%tmp, 0xFFFF
159 jnz %%miscompare
160 add %%result, 16
161
162 cmp %%result, 256
163 jb %%loop1
164
165 ; compare last two bytes
166 mov %%tmp16, [%%src1 + %%result]
167 xor %%tmp16, [%%src2 + %%result]
168 jnz %%miscompare16
169
170 ; no miscompares, return 258
171 add %%result, 2
172 jmp %%end
173
174 %%miscompare16:
175 and %%tmp, 0xFFFF
176 bsf %%tmp, %%tmp
177 shr %%tmp, 3
178 add %%result, %%tmp
179 jmp %%end
180 %%miscompare:
181 bsf %%tmp, %%tmp
182 add %%result, %%tmp
183 %%end:
184 %endm
185
186 ;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
187 ;; were already checked
188 ;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
189 ;; compare250_x src1, src2, result, tmp, xtmp1, xtmp2
190 %macro compare250_x 6
191 %define %%src1 %1
192 %define %%src2 %2
193 %define %%result %3
194 %define %%tmp %4
195 %define %%tmp32 %4d ; tmp as a 16-bit register
196 %define %%xtmp %5
197 %define %%xtmp2 %6
198
199 mov %%result, 8
200 MOVDQU %%xtmp, [%%src1 + 8]
201 MOVDQU %%xtmp2, [%%src2 + 8]
202 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
203 PMOVMSKB %%tmp32, %%xtmp
204 xor %%tmp, 0xFFFF
205 jnz %%miscompare
206 add %%result, 16
207 %%loop1:
208 MOVDQU %%xtmp, [%%src1 + %%result]
209 MOVDQU %%xtmp2, [%%src2 + %%result]
210 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
211 PMOVMSKB %%tmp32, %%xtmp
212 xor %%tmp, 0xFFFF
213 jnz %%miscompare
214 add %%result, 16
215
216 MOVDQU %%xtmp, [%%src1 + %%result]
217 MOVDQU %%xtmp2, [%%src2 + %%result]
218 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
219 PMOVMSKB %%tmp32, %%xtmp
220 xor %%tmp, 0xFFFF
221 jnz %%miscompare
222 add %%result, 16
223
224 cmp %%result, 258 - 16
225 jb %%loop1
226
227 MOVDQU %%xtmp, [%%src1 + %%result]
228 MOVDQU %%xtmp2, [%%src2 + %%result]
229 PCMPEQB %%xtmp, %%xtmp, %%xtmp2
230 PMOVMSKB %%tmp32, %%xtmp
231 xor %%tmp, 0xFFFF
232 jnz %%miscompare_last
233 ; no miscompares, return 258
234 mov %%result, 258
235 jmp %%end
236
237 %%miscompare_last:
238 bsf %%tmp, %%tmp
239 add %%result, %%tmp
240
241 ;; Guarantee the result has length at most 258.
242 mov %%tmp, 258
243 cmp %%result, 258
244 cmova %%result, %%tmp
245 jmp %%end
246 %%miscompare:
247 bsf %%tmp, %%tmp
248 add %%result, %%tmp
249 %%end:
250 %endm
251
252 ;; compare 258 bytes = 8 * 32 + 2
253 ;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
254 ;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
255 %macro compare258_y 6
256 %define %%src1 %1
257 %define %%src2 %2
258 %define %%result %3
259 %define %%tmp %4
260 %define %%tmp16 %4w ; tmp as a 16-bit register
261 %define %%tmp32 %4d ; tmp as a 32-bit register
262 %define %%ytmp %5
263 %define %%ytmp2 %6
264
265 xor %%result, %%result
266 %%loop1:
267 vmovdqu %%ytmp, [%%src1 + %%result]
268 vmovdqu %%ytmp2, [%%src2 + %%result]
269 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
270 vpmovmskb %%tmp, %%ytmp
271 xor %%tmp32, 0xFFFFFFFF
272 jnz %%miscompare
273 add %%result, 32
274
275 vmovdqu %%ytmp, [%%src1 + %%result]
276 vmovdqu %%ytmp2, [%%src2 + %%result]
277 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
278 vpmovmskb %%tmp, %%ytmp
279 xor %%tmp32, 0xFFFFFFFF
280 jnz %%miscompare
281 add %%result, 32
282
283 cmp %%result, 256
284 jb %%loop1
285
286 ; compare last two bytes
287 mov %%tmp16, [%%src1 + %%result]
288 xor %%tmp16, [%%src2 + %%result]
289 jnz %%miscompare16
290
291 ; no miscompares, return 258
292 add %%result, 2
293 jmp %%end
294
295 %%miscompare16:
296 and %%tmp, 0xFFFF
297 bsf %%tmp, %%tmp
298 shr %%tmp, 3
299 add %%result, %%tmp
300 jmp %%end
301 %%miscompare:
302 bsf %%tmp, %%tmp
303 add %%result, %%tmp
304 %%end:
305 %endm
306
307
308 ;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
309 ;; were already checked
310 ;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
311 ;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
312 %macro compare250_y 6
313 %define %%src1 %1
314 %define %%src2 %2
315 %define %%result %3
316 %define %%tmp %4
317 %define %%tmp16 %4w ; tmp as a 16-bit register
318 %define %%tmp32 %4d ; tmp as a 32-bit register
319 %define %%ytmp %5
320 %define %%ytmp2 %6
321
322 mov %%result, 8
323 vmovdqu %%ytmp, [%%src1 + 8]
324 vmovdqu %%ytmp2, [%%src2 + 8]
325 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
326 vpmovmskb %%tmp, %%ytmp
327 xor %%tmp32, 0xFFFFFFFF
328 jnz %%miscompare
329 add %%result, 32
330 %%loop1:
331 vmovdqu %%ytmp, [%%src1 + %%result]
332 vmovdqu %%ytmp2, [%%src2 + %%result]
333 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
334 vpmovmskb %%tmp, %%ytmp
335 xor %%tmp32, 0xFFFFFFFF
336 jnz %%miscompare
337 add %%result, 32
338
339 vmovdqu %%ytmp, [%%src1 + %%result]
340 vmovdqu %%ytmp2, [%%src2 + %%result]
341 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
342 vpmovmskb %%tmp, %%ytmp
343 xor %%tmp32, 0xFFFFFFFF
344 jnz %%miscompare
345 add %%result, 32
346
347 cmp %%result, 258 - 32
348 jb %%loop1
349
350 vmovdqu %%ytmp, [%%src1 + %%result]
351 vmovdqu %%ytmp2, [%%src2 + %%result]
352 vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
353 vpmovmskb %%tmp, %%ytmp
354 xor %%tmp32, 0xFFFFFFFF
355 jnz %%miscompare_last
356 mov %%result, 258
357 jmp %%end
358
359 %%miscompare_last:
360 bsf %%tmp, %%tmp
361 add %%result, %%tmp
362
363 ;; Guarantee the result has length at most 258.
364 mov %%tmp, 258
365 cmp %%result, 258
366 cmova %%result, %%tmp
367 jmp %%end
368
369 %%miscompare:
370 bsf %%tmp, %%tmp
371 add %%result, %%tmp
372 %%end:
373 %endm
374
375 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
376 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
377
378 ;; compare size, src1, src2, result, tmp
379 %macro compare 5
380 %define %%size %1
381 %define %%src1 %2
382 %define %%src2 %3
383 %define %%result %4
384 %define %%tmp %5
385 %define %%tmp8 %5b ; tmp as a 8-bit register
386
387 xor %%result, %%result
388 sub %%size, 7
389 jle %%lab2
390 %%loop1:
391 mov %%tmp, [%%src1 + %%result]
392 xor %%tmp, [%%src2 + %%result]
393 jnz %%miscompare
394 add %%result, 8
395 sub %%size, 8
396 jg %%loop1
397 %%lab2:
398 ;; if we fall through from above, we have found no mismatches,
399 ;; %%size+7 is the number of bytes left to look at, and %%result is the
400 ;; number of bytes that have matched
401 add %%size, 7
402 jle %%end
403 %%loop3:
404 mov %%tmp8, [%%src1 + %%result]
405 cmp %%tmp8, [%%src2 + %%result]
406 jne %%end
407 inc %%result
408 dec %%size
409 jg %%loop3
410 jmp %%end
411 %%miscompare:
412 bsf %%tmp, %%tmp
413 shr %%tmp, 3
414 add %%result, %%tmp
415 %%end:
416 %endm
417
418 %endif ;UTILS_ASM