]>
Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
c9320b6d | 2 | /* |
8b65f34c EB |
3 | * x64 SIMD accelerated ChaCha and XChaCha stream ciphers, |
4 | * including ChaCha20 (RFC7539) | |
c9320b6d MW |
5 | * |
6 | * Copyright (C) 2015 Martin Willi | |
c9320b6d MW |
7 | */ |
8 | ||
9 | #include <crypto/algapi.h> | |
1ca1b917 | 10 | #include <crypto/chacha.h> |
f2abe0d7 | 11 | #include <crypto/internal/simd.h> |
9ae433bc | 12 | #include <crypto/internal/skcipher.h> |
c9320b6d MW |
13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | |
c9320b6d MW |
15 | #include <asm/simd.h> |
16 | ||
8b65f34c | 17 | #define CHACHA_STATE_ALIGN 16 |
c9320b6d | 18 | |
8b65f34c EB |
19 | asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, |
20 | unsigned int len, int nrounds); | |
21 | asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, | |
22 | unsigned int len, int nrounds); | |
23 | asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); | |
3d1e93cd | 24 | #ifdef CONFIG_AS_AVX2 |
8b65f34c EB |
25 | asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
26 | unsigned int len, int nrounds); | |
27 | asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, | |
28 | unsigned int len, int nrounds); | |
29 | asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, | |
30 | unsigned int len, int nrounds); | |
31 | static bool chacha_use_avx2; | |
cee7a36e | 32 | #ifdef CONFIG_AS_AVX512 |
8b65f34c EB |
33 | asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
34 | unsigned int len, int nrounds); | |
35 | asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, | |
36 | unsigned int len, int nrounds); | |
37 | asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, | |
38 | unsigned int len, int nrounds); | |
39 | static bool chacha_use_avx512vl; | |
cee7a36e | 40 | #endif |
3d1e93cd | 41 | #endif |
c9320b6d | 42 | |
8b65f34c | 43 | static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) |
9b17608f | 44 | { |
1ca1b917 EB |
45 | len = min(len, maxblocks * CHACHA_BLOCK_SIZE); |
46 | return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; | |
9b17608f MW |
47 | } |
48 | ||
8b65f34c EB |
49 | static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
50 | unsigned int bytes, int nrounds) | |
c9320b6d | 51 | { |
3d1e93cd | 52 | #ifdef CONFIG_AS_AVX2 |
cee7a36e | 53 | #ifdef CONFIG_AS_AVX512 |
8b65f34c | 54 | if (chacha_use_avx512vl) { |
cee7a36e | 55 | while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
8b65f34c EB |
56 | chacha_8block_xor_avx512vl(state, dst, src, bytes, |
57 | nrounds); | |
cee7a36e MW |
58 | bytes -= CHACHA_BLOCK_SIZE * 8; |
59 | src += CHACHA_BLOCK_SIZE * 8; | |
60 | dst += CHACHA_BLOCK_SIZE * 8; | |
61 | state[12] += 8; | |
62 | } | |
63 | if (bytes > CHACHA_BLOCK_SIZE * 4) { | |
8b65f34c EB |
64 | chacha_8block_xor_avx512vl(state, dst, src, bytes, |
65 | nrounds); | |
66 | state[12] += chacha_advance(bytes, 8); | |
cee7a36e MW |
67 | return; |
68 | } | |
180def6c | 69 | if (bytes > CHACHA_BLOCK_SIZE * 2) { |
8b65f34c EB |
70 | chacha_4block_xor_avx512vl(state, dst, src, bytes, |
71 | nrounds); | |
72 | state[12] += chacha_advance(bytes, 4); | |
180def6c MW |
73 | return; |
74 | } | |
29a47b54 | 75 | if (bytes) { |
8b65f34c EB |
76 | chacha_2block_xor_avx512vl(state, dst, src, bytes, |
77 | nrounds); | |
78 | state[12] += chacha_advance(bytes, 2); | |
29a47b54 MW |
79 | return; |
80 | } | |
cee7a36e MW |
81 | } |
82 | #endif | |
8b65f34c | 83 | if (chacha_use_avx2) { |
1ca1b917 | 84 | while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
8b65f34c | 85 | chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); |
1ca1b917 EB |
86 | bytes -= CHACHA_BLOCK_SIZE * 8; |
87 | src += CHACHA_BLOCK_SIZE * 8; | |
88 | dst += CHACHA_BLOCK_SIZE * 8; | |
3d1e93cd MW |
89 | state[12] += 8; |
90 | } | |
1ca1b917 | 91 | if (bytes > CHACHA_BLOCK_SIZE * 4) { |
8b65f34c EB |
92 | chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); |
93 | state[12] += chacha_advance(bytes, 8); | |
9b17608f MW |
94 | return; |
95 | } | |
1ca1b917 | 96 | if (bytes > CHACHA_BLOCK_SIZE * 2) { |
8b65f34c EB |
97 | chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); |
98 | state[12] += chacha_advance(bytes, 4); | |
8a5a79d5 MW |
99 | return; |
100 | } | |
1ca1b917 | 101 | if (bytes > CHACHA_BLOCK_SIZE) { |
8b65f34c EB |
102 | chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); |
103 | state[12] += chacha_advance(bytes, 2); | |
a5dd97f8 MW |
104 | return; |
105 | } | |
3d1e93cd MW |
106 | } |
107 | #endif | |
1ca1b917 | 108 | while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
8b65f34c | 109 | chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); |
1ca1b917 EB |
110 | bytes -= CHACHA_BLOCK_SIZE * 4; |
111 | src += CHACHA_BLOCK_SIZE * 4; | |
112 | dst += CHACHA_BLOCK_SIZE * 4; | |
274f938e MW |
113 | state[12] += 4; |
114 | } | |
1ca1b917 | 115 | if (bytes > CHACHA_BLOCK_SIZE) { |
8b65f34c EB |
116 | chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); |
117 | state[12] += chacha_advance(bytes, 4); | |
9b17608f | 118 | return; |
c9320b6d MW |
119 | } |
120 | if (bytes) { | |
8b65f34c | 121 | chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); |
9b17608f | 122 | state[12]++; |
c9320b6d MW |
123 | } |
124 | } | |
125 | ||
f9c9bdb5 | 126 | static int chacha_simd_stream_xor(struct skcipher_walk *walk, |
8b65f34c | 127 | struct chacha_ctx *ctx, u8 *iv) |
c9320b6d | 128 | { |
b8fbe71f | 129 | u32 *state, state_buf[16 + 2] __aligned(8); |
a033aed5 | 130 | int next_yield = 4096; /* bytes until next FPU yield */ |
f9c9bdb5 | 131 | int err = 0; |
c9320b6d | 132 | |
8b65f34c EB |
133 | BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); |
134 | state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); | |
b8fbe71f | 135 | |
4af78261 | 136 | crypto_chacha_init(state, ctx, iv); |
c9320b6d | 137 | |
f9c9bdb5 EB |
138 | while (walk->nbytes > 0) { |
139 | unsigned int nbytes = walk->nbytes; | |
9b17608f | 140 | |
f9c9bdb5 EB |
141 | if (nbytes < walk->total) { |
142 | nbytes = round_down(nbytes, walk->stride); | |
a033aed5 EB |
143 | next_yield -= nbytes; |
144 | } | |
c9320b6d | 145 | |
f9c9bdb5 | 146 | chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, |
8b65f34c | 147 | nbytes, ctx->nrounds); |
9b17608f | 148 | |
a033aed5 EB |
149 | if (next_yield <= 0) { |
150 | /* temporarily allow preemption */ | |
151 | kernel_fpu_end(); | |
152 | kernel_fpu_begin(); | |
153 | next_yield = 4096; | |
154 | } | |
155 | ||
f9c9bdb5 | 156 | err = skcipher_walk_done(walk, walk->nbytes - nbytes); |
c9320b6d MW |
157 | } |
158 | ||
4af78261 EB |
159 | return err; |
160 | } | |
161 | ||
8b65f34c | 162 | static int chacha_simd(struct skcipher_request *req) |
4af78261 EB |
163 | { |
164 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
165 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
f9c9bdb5 | 166 | struct skcipher_walk walk; |
4af78261 EB |
167 | int err; |
168 | ||
f2abe0d7 | 169 | if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
4af78261 EB |
170 | return crypto_chacha_crypt(req); |
171 | ||
f9c9bdb5 EB |
172 | err = skcipher_walk_virt(&walk, req, true); |
173 | if (err) | |
174 | return err; | |
175 | ||
4af78261 | 176 | kernel_fpu_begin(); |
f9c9bdb5 | 177 | err = chacha_simd_stream_xor(&walk, ctx, req->iv); |
4af78261 EB |
178 | kernel_fpu_end(); |
179 | return err; | |
180 | } | |
181 | ||
8b65f34c | 182 | static int xchacha_simd(struct skcipher_request *req) |
4af78261 EB |
183 | { |
184 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
185 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
f9c9bdb5 | 186 | struct skcipher_walk walk; |
4af78261 EB |
187 | struct chacha_ctx subctx; |
188 | u32 *state, state_buf[16 + 2] __aligned(8); | |
189 | u8 real_iv[16]; | |
190 | int err; | |
191 | ||
f2abe0d7 | 192 | if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
4af78261 EB |
193 | return crypto_xchacha_crypt(req); |
194 | ||
f9c9bdb5 EB |
195 | err = skcipher_walk_virt(&walk, req, true); |
196 | if (err) | |
197 | return err; | |
198 | ||
8b65f34c EB |
199 | BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); |
200 | state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); | |
4af78261 EB |
201 | crypto_chacha_init(state, ctx, req->iv); |
202 | ||
203 | kernel_fpu_begin(); | |
204 | ||
8b65f34c EB |
205 | hchacha_block_ssse3(state, subctx.key, ctx->nrounds); |
206 | subctx.nrounds = ctx->nrounds; | |
4af78261 EB |
207 | |
208 | memcpy(&real_iv[0], req->iv + 24, 8); | |
209 | memcpy(&real_iv[8], req->iv + 16, 8); | |
f9c9bdb5 | 210 | err = chacha_simd_stream_xor(&walk, &subctx, real_iv); |
4af78261 | 211 | |
c9320b6d MW |
212 | kernel_fpu_end(); |
213 | ||
214 | return err; | |
215 | } | |
216 | ||
4af78261 EB |
217 | static struct skcipher_alg algs[] = { |
218 | { | |
219 | .base.cra_name = "chacha20", | |
220 | .base.cra_driver_name = "chacha20-simd", | |
221 | .base.cra_priority = 300, | |
222 | .base.cra_blocksize = 1, | |
223 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
224 | .base.cra_module = THIS_MODULE, | |
225 | ||
226 | .min_keysize = CHACHA_KEY_SIZE, | |
227 | .max_keysize = CHACHA_KEY_SIZE, | |
228 | .ivsize = CHACHA_IV_SIZE, | |
229 | .chunksize = CHACHA_BLOCK_SIZE, | |
230 | .setkey = crypto_chacha20_setkey, | |
8b65f34c EB |
231 | .encrypt = chacha_simd, |
232 | .decrypt = chacha_simd, | |
4af78261 EB |
233 | }, { |
234 | .base.cra_name = "xchacha20", | |
235 | .base.cra_driver_name = "xchacha20-simd", | |
236 | .base.cra_priority = 300, | |
237 | .base.cra_blocksize = 1, | |
238 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
239 | .base.cra_module = THIS_MODULE, | |
240 | ||
241 | .min_keysize = CHACHA_KEY_SIZE, | |
242 | .max_keysize = CHACHA_KEY_SIZE, | |
243 | .ivsize = XCHACHA_IV_SIZE, | |
244 | .chunksize = CHACHA_BLOCK_SIZE, | |
245 | .setkey = crypto_chacha20_setkey, | |
8b65f34c EB |
246 | .encrypt = xchacha_simd, |
247 | .decrypt = xchacha_simd, | |
7a507d62 EB |
248 | }, { |
249 | .base.cra_name = "xchacha12", | |
250 | .base.cra_driver_name = "xchacha12-simd", | |
251 | .base.cra_priority = 300, | |
252 | .base.cra_blocksize = 1, | |
253 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
254 | .base.cra_module = THIS_MODULE, | |
255 | ||
256 | .min_keysize = CHACHA_KEY_SIZE, | |
257 | .max_keysize = CHACHA_KEY_SIZE, | |
258 | .ivsize = XCHACHA_IV_SIZE, | |
259 | .chunksize = CHACHA_BLOCK_SIZE, | |
260 | .setkey = crypto_chacha12_setkey, | |
261 | .encrypt = xchacha_simd, | |
262 | .decrypt = xchacha_simd, | |
4af78261 | 263 | }, |
c9320b6d MW |
264 | }; |
265 | ||
8b65f34c | 266 | static int __init chacha_simd_mod_init(void) |
c9320b6d | 267 | { |
362f924b | 268 | if (!boot_cpu_has(X86_FEATURE_SSSE3)) |
c9320b6d MW |
269 | return -ENODEV; |
270 | ||
3d1e93cd | 271 | #ifdef CONFIG_AS_AVX2 |
8b65f34c EB |
272 | chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && |
273 | boot_cpu_has(X86_FEATURE_AVX2) && | |
274 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); | |
cee7a36e | 275 | #ifdef CONFIG_AS_AVX512 |
8b65f34c EB |
276 | chacha_use_avx512vl = chacha_use_avx2 && |
277 | boot_cpu_has(X86_FEATURE_AVX512VL) && | |
278 | boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ | |
cee7a36e | 279 | #endif |
3d1e93cd | 280 | #endif |
4af78261 | 281 | return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
c9320b6d MW |
282 | } |
283 | ||
8b65f34c | 284 | static void __exit chacha_simd_mod_fini(void) |
c9320b6d | 285 | { |
4af78261 | 286 | crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
c9320b6d MW |
287 | } |
288 | ||
8b65f34c EB |
289 | module_init(chacha_simd_mod_init); |
290 | module_exit(chacha_simd_mod_fini); | |
c9320b6d MW |
291 | |
292 | MODULE_LICENSE("GPL"); | |
293 | MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); | |
8b65f34c | 294 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)"); |
c9320b6d MW |
295 | MODULE_ALIAS_CRYPTO("chacha20"); |
296 | MODULE_ALIAS_CRYPTO("chacha20-simd"); | |
4af78261 EB |
297 | MODULE_ALIAS_CRYPTO("xchacha20"); |
298 | MODULE_ALIAS_CRYPTO("xchacha20-simd"); | |
7a507d62 EB |
299 | MODULE_ALIAS_CRYPTO("xchacha12"); |
300 | MODULE_ALIAS_CRYPTO("xchacha12-simd"); |