]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S
Import ceph 15.2.8
[ceph.git] / ceph / src / isa-l / erasure_code / aarch64 / gf_5vect_mad_neon.S
1 /**************************************************************
2 Copyright (c) 2019 Huawei Technologies Co., Ltd.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Huawei Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 .text
30
31 .global gf_5vect_mad_neon
32 .type gf_5vect_mad_neon, %function
33
34
35 /* arguments */
36 x_len .req x0
37 x_vec .req x1
38 x_vec_i .req x2
39 x_tbl .req x3
40 x_src .req x4
41 x_dest .req x5
42
43 /* returns */
44 w_ret .req w0
45
46 /* local variables */
47 x_src_end .req x6
48 x_dest1 .req x7
49 x_dest2 .req x8
50 x_dest3 .req x9
51 x_dest4 .req x10
52 x_dest5 .req x_dest
53 x_tmp .req x11
54 x_tbl1 .req x12
55 x_tbl2 .req x13
56 x_tbl3 .req x14
57 x_tbl4 .req x15
58 x_tbl5 .req x16
59 x_const .req x17
60
61 /* vectors */
62 v_mask0f .req v0
63 v_tmp_lo .req v1
64 v_tmp_hi .req v2
65 v_tmp .req v3
66 q_tmp .req q3
67
68 v_gft1_lo .req v4
69 v_gft1_hi .req v5
70 v_gft2_lo .req v6
71 v_gft2_hi .req v7
72 v_gft3_lo .req v16
73 v_gft3_hi .req v17
74 q_gft1_lo .req q4
75 q_gft1_hi .req q5
76 q_gft2_lo .req q6
77 q_gft2_hi .req q7
78 q_gft3_lo .req q16
79 q_gft3_hi .req q17
80
81 v_gft4_lo .req v18
82 v_gft4_hi .req v19
83 q_gft4_lo .req q18
84 q_gft4_hi .req q19
85 v_gft5_lo .req v_gft2_lo
86 v_gft5_hi .req v_gft2_hi
87 q_gft5_lo .req q_gft2_lo
88 q_gft5_hi .req q_gft2_hi
89
90 v_data_0 .req v8
91 v_data_1 .req v9
92 v_data_2 .req v10
93 v_data_3 .req v11
94 q_data_0 .req q8
95 q_data_1 .req q9
96 q_data_2 .req q10
97 q_data_3 .req q11
98
99 v_data_0_lo .req v12
100 v_data_1_lo .req v13
101 v_data_2_lo .req v14
102 v_data_3_lo .req v15
103 v_data_0_hi .req v_data_0
104 v_data_1_hi .req v_data_1
105 v_data_2_hi .req v_data_2
106 v_data_3_hi .req v_data_3
107
108 v_d1_0 .req v20
109 v_d1_1 .req v21
110 v_d1_2 .req v22
111 v_d1_3 .req v23
112 v_d2_0 .req v24
113 v_d2_1 .req v25
114 v_d2_2 .req v26
115 v_d2_3 .req v27
116 v_d3_0 .req v28
117 v_d3_1 .req v29
118 v_d3_2 .req v30
119 v_d3_3 .req v31
120 q_d1_0 .req q20
121 q_d1_1 .req q21
122 q_d1_2 .req q22
123 q_d1_3 .req q23
124 q_d2_0 .req q24
125 q_d2_1 .req q25
126 q_d2_2 .req q26
127 q_d2_3 .req q27
128 q_d3_0 .req q28
129 q_d3_1 .req q29
130 q_d3_2 .req q30
131 q_d3_3 .req q31
132
133 v_d4_0 .req v_d1_0
134 v_d4_1 .req v_d1_1
135 v_d4_2 .req v_d1_2
136 v_d4_3 .req v_d1_3
137 q_d4_0 .req q_d1_0
138 q_d4_1 .req q_d1_1
139 q_d4_2 .req q_d1_2
140 q_d4_3 .req q_d1_3
141 v_d5_0 .req v_d2_0
142 v_d5_1 .req v_d2_1
143 v_d5_2 .req v_d2_2
144 v_d5_3 .req v_d2_3
145 q_d5_0 .req q_d2_0
146 q_d5_1 .req q_d2_1
147 q_d5_2 .req q_d2_2
148 q_d5_3 .req q_d2_3
149
150 v_data .req v21
151 q_data .req q21
152 v_data_lo .req v22
153 v_data_hi .req v23
154
155 gf_5vect_mad_neon:
156 /* less than 16 bytes, return_fail */
157 cmp x_len, #16
158 blt .return_fail
159
160 movi v_mask0f.16b, #0x0f
161 lsl x_vec_i, x_vec_i, #5
162 lsl x_vec, x_vec, #5
163 add x_tbl1, x_tbl, x_vec_i
164 add x_tbl2, x_tbl1, x_vec
165 add x_tbl3, x_tbl2, x_vec
166 add x_tbl4, x_tbl3, x_vec
167 add x_tbl5, x_tbl4, x_vec
168 add x_src_end, x_src, x_len
169 ldr x_dest1, [x_dest, #8*0]
170 ldr x_dest2, [x_dest, #8*1]
171 ldr x_dest3, [x_dest, #8*2]
172 ldr x_dest4, [x_dest, #8*3]
173 ldr x_dest5, [x_dest, #8*4]
174 ldr q_gft1_lo, [x_tbl1]
175 ldr q_gft1_hi, [x_tbl1, #16]
176 ldr q_gft3_lo, [x_tbl3]
177 ldr q_gft3_hi, [x_tbl3, #16]
178 ldr q_gft4_lo, [x_tbl4]
179 ldr q_gft4_hi, [x_tbl4, #16]
180
181 .Lloop64_init:
182 /* less than 64 bytes, goto Lloop16_init */
183 cmp x_len, #64
184 blt .Lloop16_init
185
186 /* save d8 ~ d15 to stack */
187 sub sp, sp, #64
188 stp d8, d9, [sp]
189 stp d10, d11, [sp, #16]
190 stp d12, d13, [sp, #32]
191 stp d14, d15, [sp, #48]
192
193 sub x_src_end, x_src_end, #64
194
195 .Lloop64:
196 ldr q_data_0, [x_src, #16*0]
197 ldr q_data_1, [x_src, #16*1]
198 ldr q_data_2, [x_src, #16*2]
199 ldr q_data_3, [x_src, #16*3]
200 add x_src, x_src, #64
201
202 ldr q_d1_0, [x_dest1, #16*0]
203 ldr q_d1_1, [x_dest1, #16*1]
204 ldr q_d1_2, [x_dest1, #16*2]
205 ldr q_d1_3, [x_dest1, #16*3]
206
207 ldr q_d2_0, [x_dest2, #16*0]
208 ldr q_d2_1, [x_dest2, #16*1]
209 ldr q_d2_2, [x_dest2, #16*2]
210 ldr q_d2_3, [x_dest2, #16*3]
211
212 ldr q_d3_0, [x_dest3, #16*0]
213 ldr q_d3_1, [x_dest3, #16*1]
214 ldr q_d3_2, [x_dest3, #16*2]
215 ldr q_d3_3, [x_dest3, #16*3]
216
217 ldr q_gft2_lo, [x_tbl2]
218 ldr q_gft2_hi, [x_tbl2, #16]
219
220 and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
221 and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
222 and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
223 and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
224
225 ushr v_data_0_hi.16b, v_data_0.16b, #4
226 ushr v_data_1_hi.16b, v_data_1.16b, #4
227 ushr v_data_2_hi.16b, v_data_2.16b, #4
228 ushr v_data_3_hi.16b, v_data_3.16b, #4
229
230 /* dest1 */
231 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
232 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
233 eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
234 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
235
236 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
237 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
238 eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b
239 eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b
240
241 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
242 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
243 eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b
244 eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b
245
246 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
247 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
248 eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b
249 eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b
250
251 /* dest2 */
252 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
253 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
254 eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
255 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
256
257 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
258 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
259 eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b
260 eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b
261
262 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
263 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
264 eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b
265 eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b
266
267 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
268 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
269 eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b
270 eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b
271
272 /* dest3 */
273 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b
274 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b
275 eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
276 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
277
278 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b
279 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b
280 eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b
281 eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b
282
283 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b
284 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b
285 eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b
286 eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b
287
288 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b
289 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b
290 eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b
291 eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b
292
293 str q_d1_0, [x_dest1, #16*0]
294 str q_d1_1, [x_dest1, #16*1]
295 str q_d1_2, [x_dest1, #16*2]
296 str q_d1_3, [x_dest1, #16*3]
297 add x_dest1, x_dest1, #64
298
299 str q_d2_0, [x_dest2, #16*0]
300 str q_d2_1, [x_dest2, #16*1]
301 str q_d2_2, [x_dest2, #16*2]
302 str q_d2_3, [x_dest2, #16*3]
303 add x_dest2, x_dest2, #64
304
305 str q_d3_0, [x_dest3, #16*0]
306 str q_d3_1, [x_dest3, #16*1]
307 str q_d3_2, [x_dest3, #16*2]
308 str q_d3_3, [x_dest3, #16*3]
309 add x_dest3, x_dest3, #64
310
311 ldr q_d4_0, [x_dest4, #16*0]
312 ldr q_d4_1, [x_dest4, #16*1]
313 ldr q_d4_2, [x_dest4, #16*2]
314 ldr q_d4_3, [x_dest4, #16*3]
315
316 ldr q_d5_0, [x_dest5, #16*0]
317 ldr q_d5_1, [x_dest5, #16*1]
318 ldr q_d5_2, [x_dest5, #16*2]
319 ldr q_d5_3, [x_dest5, #16*3]
320
321 ldr q_gft5_lo, [x_tbl5]
322 ldr q_gft5_hi, [x_tbl5, #16]
323
324 /* dest4 */
325 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b
326 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b
327 eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
328 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
329
330 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b
331 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b
332 eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b
333 eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b
334
335 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b
336 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b
337 eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b
338 eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b
339
340 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b
341 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b
342 eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b
343 eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b
344
345 /* dest5 */
346 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b
347 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b
348 eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
349 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
350
351 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b
352 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b
353 eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b
354 eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b
355
356 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b
357 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b
358 eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b
359 eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b
360
361 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b
362 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b
363 eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b
364 eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b
365
366 str q_d4_0, [x_dest4, #16*0]
367 str q_d4_1, [x_dest4, #16*1]
368 str q_d4_2, [x_dest4, #16*2]
369 str q_d4_3, [x_dest4, #16*3]
370 add x_dest4, x_dest4, #64
371
372 str q_d5_0, [x_dest5, #16*0]
373 str q_d5_1, [x_dest5, #16*1]
374 str q_d5_2, [x_dest5, #16*2]
375 str q_d5_3, [x_dest5, #16*3]
376 add x_dest5, x_dest5, #64
377
378 cmp x_src, x_src_end
379 bls .Lloop64
380
381 .Lloop64_end:
382 /* restore d8 ~ d15 */
383 ldp d8, d9, [sp]
384 ldp d10, d11, [sp, #16]
385 ldp d12, d13, [sp, #32]
386 ldp d14, d15, [sp, #48]
387 add sp, sp, #64
388 add x_src_end, x_src_end, #64
389
390 .Lloop16_init:
391 sub x_src_end, x_src_end, #16
392 cmp x_src, x_src_end
393 bhi .lessthan16_init
394
395 .Lloop16:
396 ldr q_data, [x_src]
397
398 ldr q_d1_0, [x_dest1]
399 ldr q_d2_0, [x_dest2]
400 ldr q_d3_0, [x_dest3]
401 ldr q_gft2_lo, [x_tbl2]
402 ldr q_gft2_hi, [x_tbl2, #16]
403
404 and v_data_lo.16b, v_data.16b, v_mask0f.16b
405 ushr v_data_hi.16b, v_data.16b, #4
406
407 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
408 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
409 eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
410 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
411
412 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
413 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
414 eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
415 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
416
417 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
418 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
419 eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
420 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
421
422 str q_d1_0, [x_dest1]
423 str q_d2_0, [x_dest2]
424 str q_d3_0, [x_dest3]
425
426 ldr q_d4_0, [x_dest4]
427 ldr q_d5_0, [x_dest5]
428 ldr q_gft5_lo, [x_tbl5]
429 ldr q_gft5_hi, [x_tbl5, #16]
430
431 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
432 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
433 eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
434 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
435
436 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
437 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
438 eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
439 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
440
441 str q_d4_0, [x_dest4]
442 str q_d5_0, [x_dest5]
443
444 add x_src, x_src, #16
445 add x_dest1, x_dest1, #16
446 add x_dest2, x_dest2, #16
447 add x_dest3, x_dest3, #16
448 add x_dest4, x_dest4, #16
449 add x_dest5, x_dest5, #16
450 cmp x_src, x_src_end
451 bls .Lloop16
452
453 .lessthan16_init:
454 sub x_tmp, x_src, x_src_end
455 cmp x_tmp, #16
456 beq .return_pass
457
458 .lessthan16:
459 mov x_src, x_src_end
460 sub x_dest1, x_dest1, x_tmp
461 sub x_dest2, x_dest2, x_tmp
462 sub x_dest3, x_dest3, x_tmp
463 sub x_dest4, x_dest4, x_tmp
464 sub x_dest5, x_dest5, x_tmp
465
466 ldr x_const, =const_tbl
467 sub x_const, x_const, x_tmp
468 ldr q_tmp, [x_const, #16]
469
470 ldr q_data, [x_src]
471 ldr q_d1_0, [x_dest1]
472 ldr q_d2_0, [x_dest2]
473 ldr q_d3_0, [x_dest3]
474 ldr q_gft2_lo, [x_tbl2]
475 ldr q_gft2_hi, [x_tbl2, #16]
476
477 and v_data_lo.16b, v_data.16b, v_mask0f.16b
478 ushr v_data_hi.16b, v_data.16b, #4
479
480 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
481 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
482 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
483 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
484 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
485
486 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
487 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
488 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
489 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
490 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
491
492 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
493 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
494 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
495 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
496 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
497
498 str q_d1_0, [x_dest1]
499 str q_d2_0, [x_dest2]
500 str q_d3_0, [x_dest3]
501
502 ldr q_d4_0, [x_dest4]
503 ldr q_d5_0, [x_dest5]
504 ldr q_gft5_lo, [x_tbl5]
505 ldr q_gft5_hi, [x_tbl5, #16]
506
507 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
508 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
509 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
510 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
511 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
512
513 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
514 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
515 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
516 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
517 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
518
519 str q_d4_0, [x_dest4]
520 str q_d5_0, [x_dest5]
521
522 .return_pass:
523 mov w_ret, #0
524 ret
525
526 .return_fail:
527 mov w_ret, #1
528 ret
529
530 .section .data
531 .balign 8
532 const_tbl:
533 .dword 0x0000000000000000, 0x0000000000000000
534 .dword 0xffffffffffffffff, 0xffffffffffffffff