]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / crc / aarch64 / crc32_gzip_refl_hw_fold.S
CommitLineData
f67539c2
TL
1########################################################################
2# Copyright(c) 2019 Arm Corporation All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions
6# are met:
7# * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9# * Redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in
11# the documentation and/or other materials provided with the
12# distribution.
13# * Neither the name of Arm Corporation nor the names of its
14# contributors may be used to endorse or promote products derived
15# from this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#########################################################################
29
30 .arch armv8-a+crc+crypto
31 .text
32 .align 3
33 .global crc32_gzip_refl_hw_fold
34 .type crc32_gzip_refl_hw_fold, %function
35
36/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */
37
38w_seed .req w0
39w_crc .req w0
40x_buf .req x1
41x_len .req x2
42
43x_buf_loop_end .req x10
44x_buf_iter .req x10
45
46x_tmp .req x15
47w_tmp .req w15
48
49d_c0 .req d3
50d_c1 .req d1
51v_c0 .req v3
52v_c1 .req v1
53crc32_gzip_refl_hw_fold:
54 mvn w_seed, w_seed
55 cmp x_len, 1023
56 mov x_buf_iter, x_buf
57 bls .loop_fold_end
58
59 sub x_buf_loop_end, x_len, #1024
60 and x_buf_loop_end, x_buf_loop_end, -1024
61 add x_buf_loop_end, x_buf_loop_end, 1024
62 add x_buf_loop_end, x_buf, x_buf_loop_end
63
64 mov x_tmp, 0x819b
65 movk x_tmp, 0xb486, lsl 16
66 fmov d_c0, x_tmp
67
68 mov x_tmp, 0x8617
69 movk x_tmp, 0x7627, lsl 16
70 fmov d_c1, x_tmp
71
72x_in64 .req x3
73w_crc0 .req w0
74w_crc1 .req w4
75w_crc2 .req w5
76
77d_crc0 .req d4
78d_crc1 .req d5
79v_crc0 .req v4
80v_crc1 .req v5
81 .align 3
82.loop_fold:
83 add x9, x_buf, 336
84 mov x_in64, x_buf
85 mov w_crc1, 0
86 mov w_crc2, 0
87
88 .align 3
89.loop_for:
90 ldr x8, [x_in64]
91 ldr x7, [x_in64, 336]
92 ldr x6, [x_in64, 672]
93
94 add x_in64, x_in64, 8
95 cmp x_in64, x9
96
97 crc32x w_crc0, w_crc0, x8
98 crc32x w_crc1, w_crc1, x7
99 crc32x w_crc2, w_crc2, x6
100 bne .loop_for
101
102 uxtw x_tmp, w_crc0
103 fmov d_crc0, x_tmp
104 pmull v_crc0.1q, v_crc0.1d, v_c0.1d
105
106 uxtw x_tmp, w_crc1
107 fmov d_crc1, x_tmp
108 pmull v_crc1.1q, v_crc1.1d, v_c1.1d
109
110 ldr x_tmp, [x_buf, 1008]
111 crc32x w_crc2, w_crc2, x_tmp
112
113 fmov x_tmp, d_crc0
114 crc32x w_crc0, wzr, x_tmp
115
116 fmov x_tmp, d_crc1
117 crc32x w_crc1, wzr, x_tmp
118
119 eor w_crc0, w_crc0, w_crc1
120 eor w_crc0, w_crc0, w_crc2
121
122 ldr x_tmp, [x_buf, 1016]
123 crc32x w_crc0, w_crc0, x_tmp
124
125 add x_buf, x_buf, 1024
126 cmp x_buf_loop_end, x_buf
127 bne .loop_fold
128
129 and x_len, x_len, 1023
130
131x_buf_loop_size8_end .req x3
132.loop_fold_end:
133 cmp x_len, 7
134 bls .size_4
135
136 sub x_buf_loop_size8_end, x_len, #8
137 and x_buf_loop_size8_end, x_buf_loop_size8_end, -8
138 add x_buf_loop_size8_end, x_buf_loop_size8_end, 8
139 add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
140
141 .align 3
142.loop_size_8:
143 ldr x_tmp, [x_buf_iter], 8
144 crc32x w_crc, w_crc, x_tmp
145
146 cmp x_buf_iter, x_buf_loop_size8_end
147 bne .loop_size_8
148
149 and x_len, x_len, 7
150.size_4:
151 cmp x_len, 3
152 bls .size_2
153
154 ldr w_tmp, [x_buf_iter], 4
155 crc32w w_crc, w_crc, w_tmp
156
157 sub x_len, x_len, #4
158.size_2:
159 cmp x_len, 1
160 bls .size_1
161
162 ldrh w_tmp, [x_buf_iter], 2
163 crc32h w_crc, w_crc, w_tmp
164
165 sub x_len, x_len, #2
166.size_1:
167 cbz x_len, .done
168
169 ldrb w_tmp, [x_buf_iter]
170 crc32b w_crc, w_crc, w_tmp
171
172.done:
173 mvn w_crc, w_crc
174 ret
175
176 .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold