]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | ######################################################################## |
2 | # Copyright(c) 2019 Arm Corporation All rights reserved. | |
3 | # | |
4 | # Redistribution and use in source and binary forms, with or without | |
5 | # modification, are permitted provided that the following conditions | |
6 | # are met: | |
7 | # * Redistributions of source code must retain the above copyright | |
8 | # notice, this list of conditions and the following disclaimer. | |
9 | # * Redistributions in binary form must reproduce the above copyright | |
10 | # notice, this list of conditions and the following disclaimer in | |
11 | # the documentation and/or other materials provided with the | |
12 | # distribution. | |
13 | # * Neither the name of Arm Corporation nor the names of its | |
14 | # contributors may be used to endorse or promote products derived | |
15 | # from this software without specific prior written permission. | |
16 | # | |
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ######################################################################### | |
29 | ||
30 | .arch armv8-a+crc+crypto | |
31 | .text | |
32 | .align 3 | |
33 | .global crc32_gzip_refl_hw_fold | |
34 | .type crc32_gzip_refl_hw_fold, %function | |
35 | ||
36 | /* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ | |
37 | ||
38 | w_seed .req w0 | |
39 | w_crc .req w0 | |
40 | x_buf .req x1 | |
41 | x_len .req x2 | |
42 | ||
43 | x_buf_loop_end .req x10 | |
44 | x_buf_iter .req x10 | |
45 | ||
46 | x_tmp .req x15 | |
47 | w_tmp .req w15 | |
48 | ||
49 | d_c0 .req d3 | |
50 | d_c1 .req d1 | |
51 | v_c0 .req v3 | |
52 | v_c1 .req v1 | |
53 | crc32_gzip_refl_hw_fold: | |
54 | mvn w_seed, w_seed | |
55 | cmp x_len, 1023 | |
56 | mov x_buf_iter, x_buf | |
57 | bls .loop_fold_end | |
58 | ||
59 | sub x_buf_loop_end, x_len, #1024 | |
60 | and x_buf_loop_end, x_buf_loop_end, -1024 | |
61 | add x_buf_loop_end, x_buf_loop_end, 1024 | |
62 | add x_buf_loop_end, x_buf, x_buf_loop_end | |
63 | ||
64 | mov x_tmp, 0x819b | |
65 | movk x_tmp, 0xb486, lsl 16 | |
66 | fmov d_c0, x_tmp | |
67 | ||
68 | mov x_tmp, 0x8617 | |
69 | movk x_tmp, 0x7627, lsl 16 | |
70 | fmov d_c1, x_tmp | |
71 | ||
72 | x_in64 .req x3 | |
73 | w_crc0 .req w0 | |
74 | w_crc1 .req w4 | |
75 | w_crc2 .req w5 | |
76 | ||
77 | d_crc0 .req d4 | |
78 | d_crc1 .req d5 | |
79 | v_crc0 .req v4 | |
80 | v_crc1 .req v5 | |
81 | .align 3 | |
82 | .loop_fold: | |
83 | add x9, x_buf, 336 | |
84 | mov x_in64, x_buf | |
85 | mov w_crc1, 0 | |
86 | mov w_crc2, 0 | |
87 | ||
88 | .align 3 | |
89 | .loop_for: | |
90 | ldr x8, [x_in64] | |
91 | ldr x7, [x_in64, 336] | |
92 | ldr x6, [x_in64, 672] | |
93 | ||
94 | add x_in64, x_in64, 8 | |
95 | cmp x_in64, x9 | |
96 | ||
97 | crc32x w_crc0, w_crc0, x8 | |
98 | crc32x w_crc1, w_crc1, x7 | |
99 | crc32x w_crc2, w_crc2, x6 | |
100 | bne .loop_for | |
101 | ||
102 | uxtw x_tmp, w_crc0 | |
103 | fmov d_crc0, x_tmp | |
104 | pmull v_crc0.1q, v_crc0.1d, v_c0.1d | |
105 | ||
106 | uxtw x_tmp, w_crc1 | |
107 | fmov d_crc1, x_tmp | |
108 | pmull v_crc1.1q, v_crc1.1d, v_c1.1d | |
109 | ||
110 | ldr x_tmp, [x_buf, 1008] | |
111 | crc32x w_crc2, w_crc2, x_tmp | |
112 | ||
113 | fmov x_tmp, d_crc0 | |
114 | crc32x w_crc0, wzr, x_tmp | |
115 | ||
116 | fmov x_tmp, d_crc1 | |
117 | crc32x w_crc1, wzr, x_tmp | |
118 | ||
119 | eor w_crc0, w_crc0, w_crc1 | |
120 | eor w_crc0, w_crc0, w_crc2 | |
121 | ||
122 | ldr x_tmp, [x_buf, 1016] | |
123 | crc32x w_crc0, w_crc0, x_tmp | |
124 | ||
125 | add x_buf, x_buf, 1024 | |
126 | cmp x_buf_loop_end, x_buf | |
127 | bne .loop_fold | |
128 | ||
129 | and x_len, x_len, 1023 | |
130 | ||
131 | x_buf_loop_size8_end .req x3 | |
132 | .loop_fold_end: | |
133 | cmp x_len, 7 | |
134 | bls .size_4 | |
135 | ||
136 | sub x_buf_loop_size8_end, x_len, #8 | |
137 | and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 | |
138 | add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 | |
139 | add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end | |
140 | ||
141 | .align 3 | |
142 | .loop_size_8: | |
143 | ldr x_tmp, [x_buf_iter], 8 | |
144 | crc32x w_crc, w_crc, x_tmp | |
145 | ||
146 | cmp x_buf_iter, x_buf_loop_size8_end | |
147 | bne .loop_size_8 | |
148 | ||
149 | and x_len, x_len, 7 | |
150 | .size_4: | |
151 | cmp x_len, 3 | |
152 | bls .size_2 | |
153 | ||
154 | ldr w_tmp, [x_buf_iter], 4 | |
155 | crc32w w_crc, w_crc, w_tmp | |
156 | ||
157 | sub x_len, x_len, #4 | |
158 | .size_2: | |
159 | cmp x_len, 1 | |
160 | bls .size_1 | |
161 | ||
162 | ldrh w_tmp, [x_buf_iter], 2 | |
163 | crc32h w_crc, w_crc, w_tmp | |
164 | ||
165 | sub x_len, x_len, #2 | |
166 | .size_1: | |
167 | cbz x_len, .done | |
168 | ||
169 | ldrb w_tmp, [x_buf_iter] | |
170 | crc32b w_crc, w_crc, w_tmp | |
171 | ||
172 | .done: | |
173 | mvn w_crc, w_crc | |
174 | ret | |
175 | ||
176 | .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold |