]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/crc32c_arm64.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / util / crc32c_arm64.cc
1 // Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include "util/crc32c_arm64.h"
7
8 #if defined(HAVE_ARM64_CRC)
9
10 #if defined(__linux__)
11 #include <asm/hwcap.h>
12 #endif
13 #ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
14 #include <sys/auxv.h>
15 #endif
16 #ifndef HWCAP_CRC32
17 #define HWCAP_CRC32 (1 << 7)
18 #endif
19 #ifndef HWCAP_PMULL
20 #define HWCAP_PMULL (1 << 4)
21 #endif
22 #if defined(__APPLE__)
23 #include <sys/sysctl.h>
24 #endif
25 #if defined(__OpenBSD__)
26 #include <sys/types.h>
27 #include <sys/sysctl.h>
28 #include <machine/cpu.h>
29 #include <machine/armreg.h>
30 #endif
31
32 #ifdef HAVE_ARM64_CRYPTO
33 /* unfolding to compute 8 * 3 = 24 bytes parallelly */
34 #define CRC32C24BYTES(ITR) \
35 crc1 = crc32c_u64(crc1, *(buf64 + BLK_LENGTH + (ITR))); \
36 crc2 = crc32c_u64(crc2, *(buf64 + BLK_LENGTH * 2 + (ITR))); \
37 crc0 = crc32c_u64(crc0, *(buf64 + (ITR)));
38
39 /* unfolding to compute 24 * 7 = 168 bytes parallelly */
40 #define CRC32C7X24BYTES(ITR) \
41 do { \
42 CRC32C24BYTES((ITR)*7 + 0) \
43 CRC32C24BYTES((ITR)*7 + 1) \
44 CRC32C24BYTES((ITR)*7 + 2) \
45 CRC32C24BYTES((ITR)*7 + 3) \
46 CRC32C24BYTES((ITR)*7 + 4) \
47 CRC32C24BYTES((ITR)*7 + 5) \
48 CRC32C24BYTES((ITR)*7 + 6) \
49 } while (0)
50 #endif
51
52 extern bool pmull_runtime_flag;
53
54 uint32_t crc32c_runtime_check(void) {
55 #if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
56 uint64_t auxv = 0;
57 #if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
58 auxv = getauxval(AT_HWCAP);
59 #elif defined(__FreeBSD__)
60 elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
61 #endif
62 return (auxv & HWCAP_CRC32) != 0;
63 #elif defined(__APPLE__)
64 int r;
65 size_t l = sizeof(r);
66 if (sysctlbyname("hw.optional.armv8_crc32", &r, &l, NULL, 0) == -1) return 0;
67 return r == 1;
68 #elif defined(__OpenBSD__)
69 int r = 0;
70 const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
71 uint64_t isar0;
72 size_t len = sizeof(isar0);
73
74 if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
75 if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
76 r = 1;
77 }
78 return r;
79 #else
80 return 0;
81 #endif
82 }
83
84 bool crc32c_pmull_runtime_check(void) {
85 #if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
86 uint64_t auxv = 0;
87 #if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
88 auxv = getauxval(AT_HWCAP);
89 #elif defined(__FreeBSD__)
90 elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
91 #endif
92 return (auxv & HWCAP_PMULL) != 0;
93 #elif defined(__APPLE__)
94 return true;
95 #elif defined(__OpenBSD__)
96 bool r = false;
97 const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
98 uint64_t isar0;
99 size_t len = sizeof(isar0);
100
101 if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
102 if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL)
103 r = true;
104 }
105 return r;
106 #else
107 return false;
108 #endif
109 }
110
111 #ifdef ROCKSDB_UBSAN_RUN
112 #if defined(__clang__)
113 __attribute__((__no_sanitize__("alignment")))
114 #elif defined(__GNUC__)
115 __attribute__((__no_sanitize_undefined__))
116 #endif
117 #endif
118 uint32_t
119 crc32c_arm64(uint32_t crc, unsigned char const *data, size_t len) {
120 const uint8_t *buf8;
121 const uint64_t *buf64 = (uint64_t *)data;
122 int length = (int)len;
123 crc ^= 0xffffffff;
124
125 /*
126 * Pmull runtime check here.
127 * Raspberry Pi supports crc32 but doesn't support pmull.
128 * Skip Crc32c Parallel computation if no crypto extension available.
129 */
130 if (pmull_runtime_flag) {
131 /* Macro (HAVE_ARM64_CRYPTO) is used for compiling check */
132 #ifdef HAVE_ARM64_CRYPTO
133 /* Crc32c Parallel computation
134 * Algorithm comes from Intel whitepaper:
135 * crc-iscsi-polynomial-crc32-instruction-paper
136 *
137 * Input data is divided into three equal-sized blocks
138 * Three parallel blocks (crc0, crc1, crc2) for 1024 Bytes
139 * One Block: 42(BLK_LENGTH) * 8(step length: crc32c_u64) bytes
140 */
141 #define BLK_LENGTH 42
142 while (length >= 1024) {
143 uint64_t t0, t1;
144 uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
145
146 /* Parallel Param:
147 * k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
148 * k1 = CRC32(x ^ (42 * 8 * 8 - 1));
149 */
150 uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
151
152 /* Prefetch data for following block to avoid cache miss */
153 PREF1KL1((uint8_t *)buf64, 1024);
154
155 /* First 8 byte for better pipelining */
156 crc0 = crc32c_u64(crc, *buf64++);
157
158 /* 3 blocks crc32c parallel computation
159 * Macro unfolding to compute parallelly
160 * 168 * 6 = 1008 (bytes)
161 */
162 CRC32C7X24BYTES(0);
163 CRC32C7X24BYTES(1);
164 CRC32C7X24BYTES(2);
165 CRC32C7X24BYTES(3);
166 CRC32C7X24BYTES(4);
167 CRC32C7X24BYTES(5);
168 buf64 += (BLK_LENGTH * 3);
169
170 /* Last 8 bytes */
171 crc = crc32c_u64(crc2, *buf64++);
172
173 t0 = (uint64_t)vmull_p64(crc0, k0);
174 t1 = (uint64_t)vmull_p64(crc1, k1);
175
176 /* Merge (crc0, crc1, crc2) -> crc */
177 crc1 = crc32c_u64(0, t1);
178 crc ^= crc1;
179 crc0 = crc32c_u64(0, t0);
180 crc ^= crc0;
181
182 length -= 1024;
183 }
184
185 if (length == 0) return crc ^ (0xffffffffU);
186 #endif
187 } // if Pmull runtime check here
188
189 buf8 = (const uint8_t *)buf64;
190 while (length >= 8) {
191 crc = crc32c_u64(crc, *(const uint64_t *)buf8);
192 buf8 += 8;
193 length -= 8;
194 }
195
196 /* The following is more efficient than the straight loop */
197 if (length >= 4) {
198 crc = crc32c_u32(crc, *(const uint32_t *)buf8);
199 buf8 += 4;
200 length -= 4;
201 }
202
203 if (length >= 2) {
204 crc = crc32c_u16(crc, *(const uint16_t *)buf8);
205 buf8 += 2;
206 length -= 2;
207 }
208
209 if (length >= 1) crc = crc32c_u8(crc, *buf8);
210
211 crc ^= 0xffffffff;
212 return crc;
213 }
214
215 #endif