]>
Commit | Line | Data |
---|---|---|
f91f0fd5 TL |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2018 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "reg_sizes.asm" | |
31 | ||
32 | %ifidn __OUTPUT_FORMAT__, elf64 | |
33 | %define arg0 rdi | |
34 | %define arg1 rsi | |
35 | %define arg2 rdx | |
36 | %define arg3 rcx | |
37 | %define arg4 r8 | |
38 | %define arg5 r9 | |
39 | %define tmp r11 | |
40 | %define tmpb r11b | |
41 | %define tmp3 arg4 | |
42 | %define return rax | |
20effc67 | 43 | %define func(x) x: endbranch |
f91f0fd5 TL |
44 | %define FUNC_SAVE |
45 | %define FUNC_RESTORE | |
46 | %endif | |
47 | ||
48 | %ifidn __OUTPUT_FORMAT__, win64 | |
49 | %define arg0 rcx | |
50 | %define arg1 rdx | |
51 | %define arg2 r8 | |
52 | %define arg3 r9 | |
53 | %define tmp r11 | |
54 | %define tmpb r11b | |
55 | %define tmp3 r10 | |
56 | %define return rax | |
57 | %define func(x) proc_frame x | |
58 | %macro FUNC_SAVE 0 | |
59 | end_prolog | |
60 | %endmacro | |
61 | %macro FUNC_RESTORE 0 | |
62 | %endmacro | |
63 | %endif | |
64 | ||
65 | %define src arg0 | |
66 | %define len arg1 | |
67 | %define ptr arg2 | |
68 | %define pos return | |
69 | ||
70 | default rel | |
71 | ||
72 | [bits 64] | |
73 | section .text | |
74 | ||
75 | align 16 | |
20effc67 | 76 | mk_global mem_zero_detect_avx, function |
f91f0fd5 TL |
77 | func(mem_zero_detect_avx) |
78 | FUNC_SAVE | |
79 | mov pos, 0 | |
80 | sub len, 4*32 | |
81 | jle .mem_z_small_block | |
82 | ||
83 | .mem_z_loop: | |
84 | vmovdqu ymm0, [src+pos] | |
85 | vmovdqu ymm1, [src+pos+1*32] | |
86 | vmovdqu ymm2, [src+pos+2*32] | |
87 | vmovdqu ymm3, [src+pos+3*32] | |
88 | vptest ymm0, ymm0 | |
89 | jnz .return_fail | |
90 | vptest ymm1, ymm1 | |
91 | jnz .return_fail | |
92 | vptest ymm2, ymm2 | |
93 | jnz .return_fail | |
94 | vptest ymm3, ymm3 | |
95 | jnz .return_fail | |
96 | add pos, 4*32 | |
97 | cmp pos, len | |
98 | jl .mem_z_loop | |
99 | ||
100 | .mem_z_last_block: | |
101 | vmovdqu ymm0, [src+len] | |
102 | vmovdqu ymm1, [src+len+1*32] | |
103 | vmovdqu ymm2, [src+len+2*32] | |
104 | vmovdqu ymm3, [src+len+3*32] | |
105 | vptest ymm0, ymm0 | |
106 | jnz .return_fail | |
107 | vptest ymm1, ymm1 | |
108 | jnz .return_fail | |
109 | vptest ymm2, ymm2 | |
110 | jnz .return_fail | |
111 | vptest ymm3, ymm3 | |
112 | jnz .return_fail | |
113 | ||
114 | .return_pass: | |
115 | mov return, 0 | |
116 | FUNC_RESTORE | |
117 | ret | |
118 | ||
119 | ||
120 | .mem_z_small_block: | |
121 | add len, 4*32 | |
122 | cmp len, 2*32 | |
123 | jl .mem_z_lt64 | |
124 | vmovdqu ymm0, [src] | |
125 | vmovdqu ymm1, [src+32] | |
126 | vmovdqu ymm2, [src+len-2*32] | |
127 | vmovdqu ymm3, [src+len-1*32] | |
128 | vptest ymm0, ymm0 | |
129 | jnz .return_fail | |
130 | vptest ymm1, ymm1 | |
131 | jnz .return_fail | |
132 | vptest ymm2, ymm2 | |
133 | jnz .return_fail | |
134 | vptest ymm3, ymm3 | |
135 | jnz .return_fail | |
136 | jmp .return_pass | |
137 | ||
138 | .mem_z_lt64: | |
139 | cmp len, 32 | |
140 | jl .mem_z_lt32 | |
141 | vmovdqu ymm0, [src] | |
142 | vmovdqu ymm1, [src+len-32] | |
143 | vptest ymm0, ymm0 | |
144 | jnz .return_fail | |
145 | vptest ymm1, ymm1 | |
146 | jnz .return_fail | |
147 | jmp .return_pass | |
148 | ||
149 | ||
150 | .mem_z_lt32: | |
151 | cmp len, 16 | |
152 | jl .mem_z_lt16 | |
153 | vmovdqu xmm0, [src] | |
154 | vmovdqu xmm1, [src+len-16] | |
155 | vptest xmm0, xmm0 | |
156 | jnz .return_fail | |
157 | vptest xmm1, xmm1 | |
158 | jnz .return_fail | |
159 | jmp .return_pass | |
160 | ||
161 | ||
162 | .mem_z_lt16: | |
163 | cmp len, 8 | |
164 | jl .mem_z_lt8 | |
165 | mov tmp, [src] | |
166 | mov tmp3,[src+len-8] | |
167 | or tmp, tmp3 | |
168 | test tmp, tmp | |
169 | jnz .return_fail | |
170 | jmp .return_pass | |
171 | ||
172 | .mem_z_lt8: | |
173 | cmp len, 0 | |
174 | je .return_pass | |
175 | .mem_z_1byte_loop: | |
176 | mov tmpb, [src+pos] | |
177 | cmp tmpb, 0 | |
178 | jnz .return_fail | |
179 | add pos, 1 | |
180 | cmp pos, len | |
181 | jl .mem_z_1byte_loop | |
182 | jmp .return_pass | |
183 | ||
184 | .return_fail: | |
185 | mov return, 1 | |
186 | FUNC_RESTORE | |
187 | ret | |
188 | ||
189 | endproc_frame |