]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/crc32c_aarch64.c
update sources to 12.2.7
[ceph.git] / ceph / src / common / crc32c_aarch64.c
CommitLineData
7c673cae
FG
1#include "acconfig.h"
2#include "include/int_types.h"
3#include "common/crc32c_aarch64.h"
28e407b8 4#include "arch/arm.h"
7c673cae
FG
5
6#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
7/* Request crc extension capabilities from the assembler */
8asm(".arch_extension crc");
9
10#ifdef HAVE_ARMV8_CRYPTO
11/* Request crypto extension capabilities from the assembler */
12asm(".arch_extension crypto");
13#endif
14
15#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
16#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
17#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
18#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
19
20#define CRC32C3X8(ITR) \
21 __asm__("crc32cx %w[c1], %w[c1], %x[v]":[c1]"+r"(crc1):[v]"r"(*((const uint64_t *)buffer + 42*1 + (ITR))));\
22 __asm__("crc32cx %w[c2], %w[c2], %x[v]":[c2]"+r"(crc2):[v]"r"(*((const uint64_t *)buffer + 42*2 + (ITR))));\
23 __asm__("crc32cx %w[c0], %w[c0], %x[v]":[c0]"+r"(crc0):[v]"r"(*((const uint64_t *)buffer + 42*0 + (ITR))));
24
25#define CRC32C3X8_ZERO \
26 __asm__("crc32cx %w[c0], %w[c0], xzr":[c0]"+r"(crc0));
27
28#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
29
30#include <arm_acle.h>
31#include <arm_neon.h>
32
33#define CRC32CX(crc, value) (crc) = __crc32cd((crc), (value))
34#define CRC32CW(crc, value) (crc) = __crc32cw((crc), (value))
35#define CRC32CH(crc, value) (crc) = __crc32ch((crc), (value))
36#define CRC32CB(crc, value) (crc) = __crc32cb((crc), (value))
37
38#define CRC32C3X8(ITR) \
39 crc1 = __crc32cd(crc1, *((const uint64_t *)buffer + 42*1 + (ITR)));\
40 crc2 = __crc32cd(crc2, *((const uint64_t *)buffer + 42*2 + (ITR)));\
41 crc0 = __crc32cd(crc0, *((const uint64_t *)buffer + 42*0 + (ITR)));
42
43#define CRC32C3X8_ZERO \
44 crc0 = __crc32cd(crc0, (const uint64_t)0);
45
46#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
47
48#define CRC32C7X3X8(ITR) do {\
49 CRC32C3X8((ITR)*7+0) \
50 CRC32C3X8((ITR)*7+1) \
51 CRC32C3X8((ITR)*7+2) \
52 CRC32C3X8((ITR)*7+3) \
53 CRC32C3X8((ITR)*7+4) \
54 CRC32C3X8((ITR)*7+5) \
55 CRC32C3X8((ITR)*7+6) \
56 } while(0)
57
58#define CRC32C7X3X8_ZERO do {\
59 CRC32C3X8_ZERO \
60 CRC32C3X8_ZERO \
61 CRC32C3X8_ZERO \
62 CRC32C3X8_ZERO \
63 CRC32C3X8_ZERO \
64 CRC32C3X8_ZERO \
65 CRC32C3X8_ZERO \
66 } while(0)
67
68#define PREF4X64L1(PREF_OFFSET, ITR) \
69 __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
70 __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
71 __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
72 __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
73
74#define PREF1KL1(PREF_OFFSET) \
75 PREF4X64L1((PREF_OFFSET), 0) \
76 PREF4X64L1((PREF_OFFSET), 4) \
77 PREF4X64L1((PREF_OFFSET), 8) \
78 PREF4X64L1((PREF_OFFSET), 12)
79
80#define PREF4X64L2(PREF_OFFSET, ITR) \
81 __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
82 __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
83 __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
84 __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
85
86#define PREF1KL2(PREF_OFFSET) \
87 PREF4X64L2((PREF_OFFSET), 0) \
88 PREF4X64L2((PREF_OFFSET), 4) \
89 PREF4X64L2((PREF_OFFSET), 8) \
90 PREF4X64L2((PREF_OFFSET), 12)
91
92
93uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
94{
95 int64_t length = len;
96 uint32_t crc0, crc1, crc2;
97
98 if (buffer) {
99#ifdef HAVE_ARMV8_CRYPTO
28e407b8 100 if (ceph_arch_aarch64_pmull) {
7c673cae
FG
101#ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
102 /* Calculate reflected crc with PMULL Instruction */
103 const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
104 uint64_t t0, t1;
105
106 /* crc done "by 3" for fixed input block size of 1024 bytes */
107 while ((length -= 1024) >= 0) {
108 /* Prefetch data for following block to avoid cache miss */
109 PREF1KL2(1024*3);
110 /* Do first 8 bytes here for better pipelining */
111 crc0 = __crc32cd(crc, *(const uint64_t *)buffer);
112 crc1 = 0;
113 crc2 = 0;
114 buffer += sizeof(uint64_t);
115
116 /* Process block inline
117 Process crc0 last to avoid dependency with above */
118 CRC32C7X3X8(0);
119 CRC32C7X3X8(1);
120 CRC32C7X3X8(2);
121 CRC32C7X3X8(3);
122 CRC32C7X3X8(4);
123 CRC32C7X3X8(5);
124
125 buffer += 42*3*sizeof(uint64_t);
126 /* Prefetch data for following block to avoid cache miss */
127 PREF1KL1(1024);
128
129 /* Merge crc0 and crc1 into crc2
130 crc1 multiply by K2
131 crc0 multiply by K1 */
132
133 t1 = (uint64_t)vmull_p64(crc1, k2);
134 t0 = (uint64_t)vmull_p64(crc0, k1);
135 crc = __crc32cd(crc2, *(const uint64_t *)buffer);
136 crc1 = __crc32cd(0, t1);
137 crc ^= crc1;
138 crc0 = __crc32cd(0, t0);
139 crc ^= crc0;
140
141 buffer += sizeof(uint64_t);
142 }
143#else /* !HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
144 __asm__("mov x16, #0xf38a \n\t"
145 "movk x16, #0xe417, lsl 16 \n\t"
146 "mov v1.2d[0], x16 \n\t"
147 "mov x16, #0x8014 \n\t"
148 "movk x16, #0x8f15, lsl 16 \n\t"
149 "mov v0.2d[0], x16 \n\t"
150 :::"x16");
151
152 while ((length -= 1024) >= 0) {
153 PREF1KL2(1024*3);
154 __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
155 :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
156 crc1 = 0;
157 crc2 = 0;
158 buffer += sizeof(uint64_t);
159
160 CRC32C7X3X8(0);
161 CRC32C7X3X8(1);
162 CRC32C7X3X8(2);
163 CRC32C7X3X8(3);
164 CRC32C7X3X8(4);
165 CRC32C7X3X8(5);
166
167 buffer += 42*3*sizeof(uint64_t);
168 PREF1KL1(1024);
169 __asm__("mov v2.2d[0], %x[c1] \n\t"
170 "pmull v2.1q, v2.1d, v0.1d \n\t"
171 "mov v3.2d[0], %x[c0] \n\t"
172 "pmull v3.1q, v3.1d, v1.1d \n\t"
173 "crc32cx %w[c], %w[c2], %x[v] \n\t"
174 "mov %x[c1], v2.2d[0] \n\t"
175 "crc32cx %w[c1], wzr, %x[c1] \n\t"
176 "eor %w[c], %w[c], %w[c1] \n\t"
177 "mov %x[c0], v3.2d[0] \n\t"
178 "crc32cx %w[c0], wzr, %x[c0] \n\t"
179 "eor %w[c], %w[c], %w[c0] \n\t"
180 :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
181 :[v]"r"(*((const uint64_t *)buffer)));
182 buffer += sizeof(uint64_t);
183 }
184#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
185
186 if(!(length += 1024))
187 return crc;
28e407b8 188 }
7c673cae
FG
189#endif /* HAVE_ARMV8_CRYPTO */
190 while ((length -= sizeof(uint64_t)) >= 0) {
191 CRC32CX(crc, *(uint64_t *)buffer);
192 buffer += sizeof(uint64_t);
193 }
194
195 /* The following is more efficient than the straight loop */
196 if (length & sizeof(uint32_t)) {
197 CRC32CW(crc, *(uint32_t *)buffer);
198 buffer += sizeof(uint32_t);
199 }
200 if (length & sizeof(uint16_t)) {
201 CRC32CH(crc, *(uint16_t *)buffer);
202 buffer += sizeof(uint16_t);
203 }
204 if (length & sizeof(uint8_t))
205 CRC32CB(crc, *buffer);
206 } else {
207#ifdef HAVE_ARMV8_CRYPTO
28e407b8 208 if (ceph_arch_aarch64_pmull) {
7c673cae
FG
209#ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
210 const poly64_t k1 = 0xe417f38a;
211 uint64_t t0;
212
213 while ((length -= 1024) >= 0) {
214 crc0 = __crc32cd(crc, 0);
215
216 CRC32C7X3X8_ZERO;
217 CRC32C7X3X8_ZERO;
218 CRC32C7X3X8_ZERO;
219 CRC32C7X3X8_ZERO;
220 CRC32C7X3X8_ZERO;
221 CRC32C7X3X8_ZERO;
222
223 /* Merge crc0 into crc: crc0 multiply by K1 */
224
225 t0 = (uint64_t)vmull_p64(crc0, k1);
226 crc = __crc32cd(0, t0);
227 }
228#else /* !HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
229 __asm__("mov x16, #0xf38a \n\t"
230 "movk x16, #0xe417, lsl 16 \n\t"
231 "mov v1.2d[0], x16 \n\t"
232 :::"x16");
233
234 while ((length -= 1024) >= 0) {
235 __asm__("crc32cx %w[c0], %w[c], xzr\n\t"
236 :[c0]"=r"(crc0):[c]"r"(crc));
237
238 CRC32C7X3X8_ZERO;
239 CRC32C7X3X8_ZERO;
240 CRC32C7X3X8_ZERO;
241 CRC32C7X3X8_ZERO;
242 CRC32C7X3X8_ZERO;
243 CRC32C7X3X8_ZERO;
244
245 __asm__("mov v3.2d[0], %x[c0] \n\t"
246 "pmull v3.1q, v3.1d, v1.1d \n\t"
247 "mov %x[c0], v3.2d[0] \n\t"
248 "crc32cx %w[c], wzr, %x[c0] \n\t"
249 :[c]"=r"(crc)
250 :[c0]"r"(crc0));
251 }
252#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
253
254 if(!(length += 1024))
255 return crc;
28e407b8 256 }
7c673cae
FG
257#endif /* HAVE_ARMV8_CRYPTO */
258 while ((length -= sizeof(uint64_t)) >= 0)
259 CRC32CX(crc, 0);
260
261 /* The following is more efficient than the straight loop */
262 if (length & sizeof(uint32_t))
263 CRC32CW(crc, 0);
264
265 if (length & sizeof(uint16_t))
266 CRC32CH(crc, 0);
267
268 if (length & sizeof(uint8_t))
269 CRC32CB(crc, 0);
270 }
271 return crc;
272}