]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/raid/xor_check_sse.asm
import quincy beta 17.1.0
[ceph.git] / ceph / src / isa-l / raid / xor_check_sse.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;; Optimized xor of N source vectors using SSE
31;;; int xor_gen_sse(int vects, int len, void **array)
32
33;;; Generates xor parity vector from N (vects-1) sources in array of pointers
34;;; (**array). Last pointer is the dest.
35;;; Vectors must be aligned to 16 bytes. Length can be any value.
36
37%include "reg_sizes.asm"
38
39%ifidn __OUTPUT_FORMAT__, elf64
40 %define arg0 rdi
41 %define arg1 rsi
42 %define arg2 rdx
43 %define arg3 rcx
44 %define arg4 r8
45 %define arg5 r9
46 %define tmp r11
47 %define tmp2 rax
48 %define tmp2.b al
49 %define tmp3 arg4
50 %define return rax
51 %define PS 8
20effc67 52 %define func(x) x: endbranch
7c673cae
FG
53 %define FUNC_SAVE
54 %define FUNC_RESTORE
55
56%elifidn __OUTPUT_FORMAT__, win64
57 %define arg0 rcx
58 %define arg1 rdx
59 %define arg2 r8
60 %define arg3 r9
61 %define return rax
62 %define tmp2 rax
63 %define tmp2.b al
64 %define PS 8
65 %define tmp r11
66 %define tmp3 r10
67 %define stack_size 2*16 + 8 ; must be an odd multiple of 8
68 %define func(x) proc_frame x
69
70 %macro FUNC_SAVE 0
71 alloc_stack stack_size
72 save_xmm128 xmm6, 0*16
73 save_xmm128 xmm7, 1*16
74 end_prolog
75 %endmacro
76 %macro FUNC_RESTORE 0
77 movdqa xmm6, [rsp + 0*16]
78 movdqa xmm7, [rsp + 1*16]
79 add rsp, stack_size
80 %endmacro
81
82
83%elifidn __OUTPUT_FORMAT__, elf32
84 %define arg0 arg(0)
85 %define arg1 ecx
86 %define tmp2 eax
87 %define tmp2.b al
88 %define tmp3 edx
89 %define return eax
90 %define PS 4
20effc67 91 %define func(x) x: endbranch
7c673cae
FG
92 %define arg(x) [ebp+8+PS*x]
93 %define arg2 edi ; must sav/restore
94 %define arg3 esi
95 %define tmp ebx
96
97 %macro FUNC_SAVE 0
98 push ebp
99 mov ebp, esp
100 push esi
101 push edi
102 push ebx
103 mov arg1, arg(1)
104 mov arg2, arg(2)
105 %endmacro
106
107 %macro FUNC_RESTORE 0
108 pop ebx
109 pop edi
110 pop esi
111 mov esp, ebp ;if has frame pointer
112 pop ebp
113 %endmacro
114
115%endif ; output formats
116
117
118%define vec arg0
119%define len arg1
120%define ptr arg3
121%define pos tmp3
122
123%ifidn PS,8 ; 64-bit code
124 default rel
125 [bits 64]
126%endif
127
128;;; Use Non-temporal load/stor
129%ifdef NO_NT_LDST
130 %define XLDR movdqa
131 %define XSTR movdqa
132%else
133 %define XLDR movntdqa
134 %define XSTR movntdq
135%endif
136
137section .text
138
139align 16
20effc67 140mk_global xor_check_sse, function
7c673cae
FG
141func(xor_check_sse)
142 FUNC_SAVE
143%ifidn PS,8 ;64-bit code
144 sub vec, 1 ; Keep as offset to last source
145%else ;32-bit code
146 mov tmp, arg(0) ; Update vec length arg to last source
147 sub tmp, 1
148 mov arg(0), tmp
149%endif
150
151 jng return_fail ;Must have at least 2 sources
152 cmp len, 0
153 je return_pass
154 test len, (128-1) ;Check alignment of length
155 jnz len_not_aligned
156
157
158len_aligned_128bytes:
159 sub len, 128
160 mov pos, 0
161 mov tmp, vec ;Preset to last vector
162
163loop128:
164 mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
165 sub tmp, 1 ;Next vect
166 XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
167 XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
168 XLDR xmm2, [tmp2+pos+(2*16)]
169 XLDR xmm3, [tmp2+pos+(3*16)]
170 XLDR xmm4, [tmp2+pos+(4*16)]
171 XLDR xmm5, [tmp2+pos+(5*16)]
172 XLDR xmm6, [tmp2+pos+(6*16)]
173 XLDR xmm7, [tmp2+pos+(7*16)]
174
175next_vect:
176 mov ptr, [arg2+tmp*PS]
177 sub tmp, 1
178 xorpd xmm0, [ptr+pos] ;Get next vector (source)
179 xorpd xmm1, [ptr+pos+16]
180 xorpd xmm2, [ptr+pos+(2*16)]
181 xorpd xmm3, [ptr+pos+(3*16)]
182 xorpd xmm4, [ptr+pos+(4*16)]
183 xorpd xmm5, [ptr+pos+(5*16)]
184 xorpd xmm6, [ptr+pos+(6*16)]
185 xorpd xmm7, [ptr+pos+(7*16)]
186;;; prefetch [ptr+pos+(8*16)]
187 jge next_vect ;Loop for each vect
188
189 ;; End of vects, chech that all parity regs = 0
190 mov tmp, vec ;Back to last vector
191 por xmm0, xmm1
192 por xmm0, xmm2
193 por xmm0, xmm3
194 por xmm0, xmm4
195 por xmm0, xmm5
196 por xmm0, xmm6
197 por xmm0, xmm7
198 ptest xmm0, xmm0
199 jnz return_fail
200
201 add pos, 128
202 cmp pos, len
203 jle loop128
204
205return_pass:
206 FUNC_RESTORE
207 mov return, 0
208 ret
209
210
211
212;;; Do one byte at a time for no alignment case
213
214xor_gen_byte:
215 mov tmp, vec ;Preset to last vector
216
217loop_1byte:
218 mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
219 mov tmp2.b, [ptr+len-1] ;Get array n
220 sub tmp, 1
221nextvect_1byte:
222 mov ptr, [arg2+tmp*PS]
223 xor tmp2.b, [ptr+len-1]
224 sub tmp, 1
225 jge nextvect_1byte
226
227 mov tmp, vec ;Back to last vector
228 cmp tmp2.b, 0
229 jne return_fail
230 sub len, 1
231 test len, (8-1)
232 jnz loop_1byte
233
234 cmp len, 0
235 je return_pass
236 test len, (128-1) ;If not 0 and 128bit aligned
237 jz len_aligned_128bytes ; then do aligned case. len = y * 128
238
239 ;; else we are 8-byte aligned so fall through to recheck
240
241
242 ;; Unaligned length cases
243len_not_aligned:
244 test len, (PS-1)
245 jne xor_gen_byte
246 mov tmp3, len
247 and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
248 mov tmp, vec ;Preset to last vector
249
250 ;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
251loopN_bytes:
252 mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
253 mov tmp2, [ptr+len-PS] ;Get array n
254 sub tmp, 1
255nextvect_Nbytes:
256 mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
257 xor tmp2, [ptr+len-PS]
258 sub tmp, 1
259 jge nextvect_Nbytes ;Loop for each source
260
261 mov tmp, vec ;Back to last vector
262 cmp tmp2, 0
263 jne return_fail
264 sub len, PS
265 sub tmp3, PS
266 jg loopN_bytes
267
268 cmp len, 128 ;Now len is aligned to 128B
269 jge len_aligned_128bytes ;We can do the rest aligned
270
271 cmp len, 0
272 je return_pass
273
274return_fail:
275 mov return, 1
276 FUNC_RESTORE
277 ret
278
279endproc_frame
280
281section .data
282
283;;; func core, ver, snum
284slversion xor_check_sse, 00, 03, 0031
285