]> git.proxmox.com Git - mirror_zfs.git/blob - include/linux/simd_x86.h
Add support for selecting encryption backend
[mirror_zfs.git] / include / linux / simd_x86.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23 */
24
25 /*
26 * USER API:
27 *
28 * Kernel fpu methods:
29 * kfpu_begin()
30 * kfpu_end()
31 *
32 * SIMD support:
33 *
34 * Following functions should be called to determine whether CPU feature
35 * is supported. All functions are usable in kernel and user space.
36 * If a SIMD algorithm is using more than one instruction set
37 * all relevant feature test functions should be called.
38 *
39 * Supported features:
40 * zfs_sse_available()
41 * zfs_sse2_available()
42 * zfs_sse3_available()
43 * zfs_ssse3_available()
44 * zfs_sse4_1_available()
45 * zfs_sse4_2_available()
46 *
47 * zfs_avx_available()
48 * zfs_avx2_available()
49 *
50 * zfs_bmi1_available()
51 * zfs_bmi2_available()
52 *
53 * zfs_avx512f_available()
54 * zfs_avx512cd_available()
55 * zfs_avx512er_available()
56 * zfs_avx512pf_available()
57 * zfs_avx512bw_available()
58 * zfs_avx512dq_available()
59 * zfs_avx512vl_available()
60 * zfs_avx512ifma_available()
61 * zfs_avx512vbmi_available()
62 *
63 * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers
64 * also add zfs_avx512vl_available() to feature check.
65 */
66
67 #ifndef _SIMD_X86_H
68 #define _SIMD_X86_H
69
70 #include <sys/isa_defs.h>
71
72 /* only for __x86 */
73 #if defined(__x86)
74
75 #include <sys/types.h>
76
77 #if defined(_KERNEL)
78 #include <asm/cpufeature.h>
79 #else
80 #include <cpuid.h>
81 #endif
82
83 #if defined(_KERNEL)
84 #if defined(HAVE_FPU_API_H)
85 #include <asm/fpu/api.h>
86 #include <asm/fpu/internal.h>
87 #define kfpu_begin() \
88 { \
89 preempt_disable(); \
90 __kernel_fpu_begin(); \
91 }
92 #define kfpu_end() \
93 { \
94 __kernel_fpu_end(); \
95 preempt_enable(); \
96 }
97 #else
98 #include <asm/i387.h>
99 #include <asm/xcr.h>
100 #define kfpu_begin() kernel_fpu_begin()
101 #define kfpu_end() kernel_fpu_end()
102 #endif /* defined(HAVE_FPU_API_H) */
103 #else
104 /*
105 * fpu dummy methods for userspace
106 */
107 #define kfpu_begin() do {} while (0)
108 #define kfpu_end() do {} while (0)
109 #endif /* defined(_KERNEL) */
110
111 /*
112 * CPUID feature tests for user-space. Linux kernel provides an interface for
113 * CPU feature testing.
114 */
115 #if !defined(_KERNEL)
116
117 /*
118 * x86 registers used implicitly by CPUID
119 */
120 typedef enum cpuid_regs {
121 EAX = 0,
122 EBX,
123 ECX,
124 EDX,
125 CPUID_REG_CNT = 4
126 } cpuid_regs_t;
127
128 /*
129 * List of instruction sets identified by CPUID
130 */
131 typedef enum cpuid_inst_sets {
132 SSE = 0,
133 SSE2,
134 SSE3,
135 SSSE3,
136 SSE4_1,
137 SSE4_2,
138 OSXSAVE,
139 AVX,
140 AVX2,
141 BMI1,
142 BMI2,
143 AVX512F,
144 AVX512CD,
145 AVX512DQ,
146 AVX512BW,
147 AVX512IFMA,
148 AVX512VBMI,
149 AVX512PF,
150 AVX512ER,
151 AVX512VL,
152 AES,
153 PCLMULQDQ
154 } cpuid_inst_sets_t;
155
156 /*
157 * Instruction set descriptor.
158 */
159 typedef struct cpuid_feature_desc {
160 uint32_t leaf; /* CPUID leaf */
161 uint32_t subleaf; /* CPUID sub-leaf */
162 uint32_t flag; /* bit mask of the feature */
163 cpuid_regs_t reg; /* which CPUID return register to test */
164 } cpuid_feature_desc_t;
165
166 #define _AVX512F_BIT (1U << 16)
167 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
168 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
169 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
170 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
171 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
172 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
173 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
174 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
175 #define _AES_BIT (1U << 25)
176 #define _PCLMULQDQ_BIT (1U << 1)
177
178 /*
179 * Descriptions of supported instruction sets
180 */
181 static const cpuid_feature_desc_t cpuid_features[] = {
182 [SSE] = {1U, 0U, 1U << 25, EDX },
183 [SSE2] = {1U, 0U, 1U << 26, EDX },
184 [SSE3] = {1U, 0U, 1U << 0, ECX },
185 [SSSE3] = {1U, 0U, 1U << 9, ECX },
186 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
187 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
188 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
189 [AVX] = {1U, 0U, 1U << 28, ECX },
190 [AVX2] = {7U, 0U, 1U << 5, EBX },
191 [BMI1] = {7U, 0U, 1U << 3, EBX },
192 [BMI2] = {7U, 0U, 1U << 8, EBX },
193 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
194 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
195 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
196 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
197 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
198 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
199 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
200 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
201 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
202 [AES] = {1U, 0U, _AES_BIT, ECX },
203 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
204 };
205
206 /*
207 * Check if OS supports AVX and AVX2 by checking XCR0
208 * Only call this function if CPUID indicates that AVX feature is
209 * supported by the CPU, otherwise it might be an illegal instruction.
210 */
211 static inline uint64_t
212 xgetbv(uint32_t index)
213 {
214 uint32_t eax, edx;
215 /* xgetbv - instruction byte code */
216 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
217 : "=a" (eax), "=d" (edx)
218 : "c" (index));
219
220 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
221 }
222
223 /*
224 * Check if CPU supports a feature
225 */
226 static inline boolean_t
227 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
228 {
229 uint32_t r[CPUID_REG_CNT];
230
231 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
232 /*
233 * __cpuid_count is needed to properly check
234 * for AVX2. It is a macro, so return parameters
235 * are passed by value.
236 */
237 __cpuid_count(desc->leaf, desc->subleaf,
238 r[EAX], r[EBX], r[ECX], r[EDX]);
239 return ((r[desc->reg] & desc->flag) == desc->flag);
240 }
241 return (B_FALSE);
242 }
243
244 #define CPUID_FEATURE_CHECK(name, id) \
245 static inline boolean_t \
246 __cpuid_has_ ## name(void) \
247 { \
248 return (__cpuid_check_feature(&cpuid_features[id])); \
249 }
250
251 /*
252 * Define functions for user-space CPUID features testing
253 */
254 CPUID_FEATURE_CHECK(sse, SSE);
255 CPUID_FEATURE_CHECK(sse2, SSE2);
256 CPUID_FEATURE_CHECK(sse3, SSE3);
257 CPUID_FEATURE_CHECK(ssse3, SSSE3);
258 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
259 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
260 CPUID_FEATURE_CHECK(avx, AVX);
261 CPUID_FEATURE_CHECK(avx2, AVX2);
262 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
263 CPUID_FEATURE_CHECK(bmi1, BMI1);
264 CPUID_FEATURE_CHECK(bmi2, BMI2);
265 CPUID_FEATURE_CHECK(avx512f, AVX512F);
266 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
267 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
268 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
269 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
270 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
271 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
272 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
273 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
274 CPUID_FEATURE_CHECK(aes, AES);
275 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
276
277 #endif /* !defined(_KERNEL) */
278
279
280 /*
281 * Detect register set support
282 */
283 static inline boolean_t
284 __simd_state_enabled(const uint64_t state)
285 {
286 boolean_t has_osxsave;
287 uint64_t xcr0;
288
289 #if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE)
290 has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
291 #elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE)
292 has_osxsave = B_FALSE;
293 #else
294 has_osxsave = __cpuid_has_osxsave();
295 #endif
296
297 if (!has_osxsave)
298 return (B_FALSE);
299
300 xcr0 = xgetbv(0);
301 return ((xcr0 & state) == state);
302 }
303
304 #define _XSTATE_SSE_AVX (0x2 | 0x4)
305 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
306
307 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
308 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
309
310
311 /*
312 * Check if SSE instruction set is available
313 */
314 static inline boolean_t
315 zfs_sse_available(void)
316 {
317 #if defined(_KERNEL)
318 return (!!boot_cpu_has(X86_FEATURE_XMM));
319 #else
320 return (__cpuid_has_sse());
321 #endif
322 }
323
324 /*
325 * Check if SSE2 instruction set is available
326 */
327 static inline boolean_t
328 zfs_sse2_available(void)
329 {
330 #if defined(_KERNEL)
331 return (!!boot_cpu_has(X86_FEATURE_XMM2));
332 #else
333 return (__cpuid_has_sse2());
334 #endif
335 }
336
337 /*
338 * Check if SSE3 instruction set is available
339 */
340 static inline boolean_t
341 zfs_sse3_available(void)
342 {
343 #if defined(_KERNEL)
344 return (!!boot_cpu_has(X86_FEATURE_XMM3));
345 #else
346 return (__cpuid_has_sse3());
347 #endif
348 }
349
350 /*
351 * Check if SSSE3 instruction set is available
352 */
353 static inline boolean_t
354 zfs_ssse3_available(void)
355 {
356 #if defined(_KERNEL)
357 return (!!boot_cpu_has(X86_FEATURE_SSSE3));
358 #else
359 return (__cpuid_has_ssse3());
360 #endif
361 }
362
363 /*
364 * Check if SSE4.1 instruction set is available
365 */
366 static inline boolean_t
367 zfs_sse4_1_available(void)
368 {
369 #if defined(_KERNEL)
370 return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
371 #else
372 return (__cpuid_has_sse4_1());
373 #endif
374 }
375
376 /*
377 * Check if SSE4.2 instruction set is available
378 */
379 static inline boolean_t
380 zfs_sse4_2_available(void)
381 {
382 #if defined(_KERNEL)
383 return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
384 #else
385 return (__cpuid_has_sse4_2());
386 #endif
387 }
388
389 /*
390 * Check if AVX instruction set is available
391 */
392 static inline boolean_t
393 zfs_avx_available(void)
394 {
395 boolean_t has_avx;
396 #if defined(_KERNEL)
397 has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
398 #else
399 has_avx = __cpuid_has_avx();
400 #endif
401
402 return (has_avx && __ymm_enabled());
403 }
404
405 /*
406 * Check if AVX2 instruction set is available
407 */
408 static inline boolean_t
409 zfs_avx2_available(void)
410 {
411 boolean_t has_avx2;
412 #if defined(_KERNEL) && defined(X86_FEATURE_AVX2)
413 has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
414 #elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2)
415 has_avx2 = B_FALSE;
416 #else
417 has_avx2 = __cpuid_has_avx2();
418 #endif
419
420 return (has_avx2 && __ymm_enabled());
421 }
422
423 /*
424 * Check if BMI1 instruction set is available
425 */
426 static inline boolean_t
427 zfs_bmi1_available(void)
428 {
429 #if defined(_KERNEL) && defined(X86_FEATURE_BMI1)
430 return (!!boot_cpu_has(X86_FEATURE_BMI1));
431 #elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1)
432 return (B_FALSE);
433 #else
434 return (__cpuid_has_bmi1());
435 #endif
436 }
437
438 /*
439 * Check if BMI2 instruction set is available
440 */
441 static inline boolean_t
442 zfs_bmi2_available(void)
443 {
444 #if defined(_KERNEL) && defined(X86_FEATURE_BMI2)
445 return (!!boot_cpu_has(X86_FEATURE_BMI2));
446 #elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2)
447 return (B_FALSE);
448 #else
449 return (__cpuid_has_bmi2());
450 #endif
451 }
452
453 /*
454 * Check if AES instruction set is available
455 */
456 static inline boolean_t
457 zfs_aes_available(void)
458 {
459 #if defined(_KERNEL) && defined(X86_FEATURE_AES)
460 return (!!boot_cpu_has(X86_FEATURE_AES));
461 #elif defined(_KERNEL) && !defined(X86_FEATURE_AES)
462 return (B_FALSE);
463 #else
464 return (__cpuid_has_aes());
465 #endif
466 }
467
468 /*
469 * Check if PCLMULQDQ instruction set is available
470 */
471 static inline boolean_t
472 zfs_pclmulqdq_available(void)
473 {
474 #if defined(_KERNEL) && defined(X86_FEATURE_PCLMULQDQ)
475 return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
476 #elif defined(_KERNEL) && !defined(X86_FEATURE_PCLMULQDQ)
477 return (B_FALSE);
478 #else
479 return (__cpuid_has_pclmulqdq());
480 #endif
481 }
482
483 /*
484 * AVX-512 family of instruction sets:
485 *
486 * AVX512F Foundation
487 * AVX512CD Conflict Detection Instructions
488 * AVX512ER Exponential and Reciprocal Instructions
489 * AVX512PF Prefetch Instructions
490 *
491 * AVX512BW Byte and Word Instructions
492 * AVX512DQ Double-word and Quadword Instructions
493 * AVX512VL Vector Length Extensions
494 *
495 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
496 * AVX512VBMI Vector Byte Manipulation Instructions
497 */
498
499
500 /* Check if AVX512F instruction set is available */
501 static inline boolean_t
502 zfs_avx512f_available(void)
503 {
504 boolean_t has_avx512 = B_FALSE;
505
506 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512F)
507 has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
508 #elif !defined(_KERNEL)
509 has_avx512 = __cpuid_has_avx512f();
510 #endif
511
512 return (has_avx512 && __zmm_enabled());
513 }
514
515 /* Check if AVX512CD instruction set is available */
516 static inline boolean_t
517 zfs_avx512cd_available(void)
518 {
519 boolean_t has_avx512 = B_FALSE;
520
521 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD)
522 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
523 boot_cpu_has(X86_FEATURE_AVX512CD);
524 #elif !defined(_KERNEL)
525 has_avx512 = __cpuid_has_avx512cd();
526 #endif
527
528 return (has_avx512 && __zmm_enabled());
529 }
530
531 /* Check if AVX512ER instruction set is available */
532 static inline boolean_t
533 zfs_avx512er_available(void)
534 {
535 boolean_t has_avx512 = B_FALSE;
536
537 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER)
538 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
539 boot_cpu_has(X86_FEATURE_AVX512ER);
540 #elif !defined(_KERNEL)
541 has_avx512 = __cpuid_has_avx512er();
542 #endif
543
544 return (has_avx512 && __zmm_enabled());
545 }
546
547 /* Check if AVX512PF instruction set is available */
548 static inline boolean_t
549 zfs_avx512pf_available(void)
550 {
551 boolean_t has_avx512 = B_FALSE;
552
553 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF)
554 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
555 boot_cpu_has(X86_FEATURE_AVX512PF);
556 #elif !defined(_KERNEL)
557 has_avx512 = __cpuid_has_avx512pf();
558 #endif
559
560 return (has_avx512 && __zmm_enabled());
561 }
562
563 /* Check if AVX512BW instruction set is available */
564 static inline boolean_t
565 zfs_avx512bw_available(void)
566 {
567 boolean_t has_avx512 = B_FALSE;
568
569 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW)
570 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
571 boot_cpu_has(X86_FEATURE_AVX512BW);
572 #elif !defined(_KERNEL)
573 has_avx512 = __cpuid_has_avx512bw();
574 #endif
575
576 return (has_avx512 && __zmm_enabled());
577 }
578
579 /* Check if AVX512DQ instruction set is available */
580 static inline boolean_t
581 zfs_avx512dq_available(void)
582 {
583 boolean_t has_avx512 = B_FALSE;
584
585 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ)
586 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
587 boot_cpu_has(X86_FEATURE_AVX512DQ);
588 #elif !defined(_KERNEL)
589 has_avx512 = __cpuid_has_avx512dq();
590 #endif
591
592 return (has_avx512 && __zmm_enabled());
593 }
594
595 /* Check if AVX512VL instruction set is available */
596 static inline boolean_t
597 zfs_avx512vl_available(void)
598 {
599 boolean_t has_avx512 = B_FALSE;
600
601 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL)
602 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
603 boot_cpu_has(X86_FEATURE_AVX512VL);
604 #elif !defined(_KERNEL)
605 has_avx512 = __cpuid_has_avx512vl();
606 #endif
607
608 return (has_avx512 && __zmm_enabled());
609 }
610
611 /* Check if AVX512IFMA instruction set is available */
612 static inline boolean_t
613 zfs_avx512ifma_available(void)
614 {
615 boolean_t has_avx512 = B_FALSE;
616
617 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA)
618 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
619 boot_cpu_has(X86_FEATURE_AVX512IFMA);
620 #elif !defined(_KERNEL)
621 has_avx512 = __cpuid_has_avx512ifma();
622 #endif
623
624 return (has_avx512 && __zmm_enabled());
625 }
626
627 /* Check if AVX512VBMI instruction set is available */
628 static inline boolean_t
629 zfs_avx512vbmi_available(void)
630 {
631 boolean_t has_avx512 = B_FALSE;
632
633 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI)
634 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
635 boot_cpu_has(X86_FEATURE_AVX512VBMI);
636 #elif !defined(_KERNEL)
637 has_avx512 = __cpuid_has_avx512f() &&
638 __cpuid_has_avx512vbmi();
639 #endif
640
641 return (has_avx512 && __zmm_enabled());
642 }
643
644 #endif /* defined(__x86) */
645
646 #endif /* _SIMD_X86_H */