]> git.proxmox.com Git - mirror_zfs.git/blame - include/linux/simd_x86.h
OpenZFS 7304 - zfs filesystem/snapshot counts should be read-only
[mirror_zfs.git] / include / linux / simd_x86.h
CommitLineData
fc0c72b1
GN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23 */
24
25/*
26 * USER API:
27 *
28 * Kernel fpu methods:
29 * kfpu_begin()
30 * kfpu_end()
31 *
32 * SIMD support:
33 *
34 * Following functions should be called to determine whether CPU feature
35 * is supported. All functions are usable in kernel and user space.
36 * If a SIMD algorithm is using more than one instruction set
37 * all relevant feature test functions should be called.
38 *
39 * Supported features:
40 * zfs_sse_available()
41 * zfs_sse2_available()
42 * zfs_sse3_available()
43 * zfs_ssse3_available()
44 * zfs_sse4_1_available()
45 * zfs_sse4_2_available()
32ffaa3d 46 *
fc0c72b1
GN
47 * zfs_avx_available()
48 * zfs_avx2_available()
32ffaa3d 49 *
fc0c72b1
GN
50 * zfs_bmi1_available()
51 * zfs_bmi2_available()
32ffaa3d
GN
52 *
53 * zfs_avx512f_available()
54 * zfs_avx512cd_available()
55 * zfs_avx512er_available()
56 * zfs_avx512pf_available()
57 * zfs_avx512bw_available()
58 * zfs_avx512dq_available()
59 * zfs_avx512vl_available()
60 * zfs_avx512ifma_available()
61 * zfs_avx512vbmi_available()
62 *
63 * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers
64 * also add zfs_avx512vl_available() to feature check.
fc0c72b1
GN
65 */
66
67#ifndef _SIMD_X86_H
68#define _SIMD_X86_H
69
70#include <sys/isa_defs.h>
71
72/* only for __x86 */
73#if defined(__x86)
74
75#include <sys/types.h>
76
77#if defined(_KERNEL)
78#include <asm/cpufeature.h>
79#else
80#include <cpuid.h>
81#endif
82
83#if defined(_KERNEL)
84#if defined(HAVE_FPU_API_H)
85#include <asm/fpu/api.h>
86#include <asm/fpu/internal.h>
87#define kfpu_begin() \
88{ \
89 preempt_disable(); \
90 __kernel_fpu_begin(); \
91}
92#define kfpu_end() \
93{ \
94 __kernel_fpu_end(); \
95 preempt_enable(); \
96}
97#else
98#include <asm/i387.h>
99#include <asm/xcr.h>
100#define kfpu_begin() kernel_fpu_begin()
101#define kfpu_end() kernel_fpu_end()
102#endif /* defined(HAVE_FPU_API_H) */
103#else
104/*
105 * fpu dummy methods for userspace
106 */
107#define kfpu_begin() do {} while (0)
108#define kfpu_end() do {} while (0)
109#endif /* defined(_KERNEL) */
110
111/*
112 * CPUID feature tests for user-space. Linux kernel provides an interface for
113 * CPU feature testing.
114 */
115#if !defined(_KERNEL)
116
117/*
118 * x86 registers used implicitly by CPUID
119 */
120typedef enum cpuid_regs {
121 EAX = 0,
122 EBX,
123 ECX,
124 EDX,
125 CPUID_REG_CNT = 4
126} cpuid_regs_t;
127
128/*
129 * List of instruction sets identified by CPUID
130 */
131typedef enum cpuid_inst_sets {
132 SSE = 0,
133 SSE2,
134 SSE3,
135 SSSE3,
136 SSE4_1,
137 SSE4_2,
138 OSXSAVE,
139 AVX,
140 AVX2,
141 BMI1,
32ffaa3d
GN
142 BMI2,
143 AVX512F,
144 AVX512CD,
145 AVX512DQ,
146 AVX512BW,
147 AVX512IFMA,
148 AVX512VBMI,
149 AVX512PF,
150 AVX512ER,
151 AVX512VL
fc0c72b1
GN
152} cpuid_inst_sets_t;
153
154/*
155 * Instruction set descriptor.
156 */
157typedef struct cpuid_feature_desc {
158 uint32_t leaf; /* CPUID leaf */
32ffaa3d 159 uint32_t subleaf; /* CPUID sub-leaf */
fc0c72b1
GN
160 uint32_t flag; /* bit mask of the feature */
161 cpuid_regs_t reg; /* which CPUID return register to test */
162} cpuid_feature_desc_t;
163
32ffaa3d
GN
164#define _AVX512F_BIT (1U << 16)
165#define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
166#define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
167#define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
168#define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
169#define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
170#define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
171#define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
172#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
173
fc0c72b1
GN
174/*
175 * Descriptions of supported instruction sets
176 */
177static const cpuid_feature_desc_t cpuid_features[] = {
178 [SSE] = {1U, 0U, 1U << 25, EDX },
179 [SSE2] = {1U, 0U, 1U << 26, EDX },
180 [SSE3] = {1U, 0U, 1U << 0, ECX },
181 [SSSE3] = {1U, 0U, 1U << 9, ECX },
182 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
183 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
184 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
185 [AVX] = {1U, 0U, 1U << 28, ECX },
186 [AVX2] = {7U, 0U, 1U << 5, EBX },
187 [BMI1] = {7U, 0U, 1U << 3, EBX },
32ffaa3d
GN
188 [BMI2] = {7U, 0U, 1U << 8, EBX },
189 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
190 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
191 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
192 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
193 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
194 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
195 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
196 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
197 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }
fc0c72b1
GN
198};
199
200/*
201 * Check if OS supports AVX and AVX2 by checking XCR0
202 * Only call this function if CPUID indicates that AVX feature is
203 * supported by the CPU, otherwise it might be an illegal instruction.
204 */
205static inline uint64_t
206xgetbv(uint32_t index)
207{
208 uint32_t eax, edx;
209 /* xgetbv - instruction byte code */
210 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
02730c33
BB
211 : "=a" (eax), "=d" (edx)
212 : "c" (index));
fc0c72b1
GN
213
214 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
215}
216
217/*
218 * Check if CPU supports a feature
219 */
220static inline boolean_t
221__cpuid_check_feature(const cpuid_feature_desc_t *desc)
222{
223 uint32_t r[CPUID_REG_CNT];
224
225 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
226 /*
227 * __cpuid_count is needed to properly check
228 * for AVX2. It is a macro, so return parameters
229 * are passed by value.
230 */
231 __cpuid_count(desc->leaf, desc->subleaf,
02730c33 232 r[EAX], r[EBX], r[ECX], r[EDX]);
32ffaa3d 233 return ((r[desc->reg] & desc->flag) == desc->flag);
fc0c72b1
GN
234 }
235 return (B_FALSE);
236}
237
02730c33 238#define CPUID_FEATURE_CHECK(name, id) \
32ffaa3d
GN
239static inline boolean_t \
240__cpuid_has_ ## name(void) \
241{ \
fc0c72b1
GN
242 return (__cpuid_check_feature(&cpuid_features[id])); \
243}
244
245/*
246 * Define functions for user-space CPUID features testing
247 */
248CPUID_FEATURE_CHECK(sse, SSE);
249CPUID_FEATURE_CHECK(sse2, SSE2);
250CPUID_FEATURE_CHECK(sse3, SSE3);
251CPUID_FEATURE_CHECK(ssse3, SSSE3);
252CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
253CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
254CPUID_FEATURE_CHECK(avx, AVX);
255CPUID_FEATURE_CHECK(avx2, AVX2);
256CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
257CPUID_FEATURE_CHECK(bmi1, BMI1);
258CPUID_FEATURE_CHECK(bmi2, BMI2);
32ffaa3d
GN
259CPUID_FEATURE_CHECK(avx512f, AVX512F);
260CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
261CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
262CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
263CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
264CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
265CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
266CPUID_FEATURE_CHECK(avx512er, AVX512ER);
267CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
fc0c72b1
GN
268
269#endif /* !defined(_KERNEL) */
270
32ffaa3d 271
fc0c72b1 272/*
32ffaa3d 273 * Detect register set support
fc0c72b1
GN
274 */
275static inline boolean_t
32ffaa3d 276__simd_state_enabled(const uint64_t state)
fc0c72b1 277{
fc0c72b1
GN
278 boolean_t has_osxsave;
279 uint64_t xcr0;
280
281#if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE)
282 has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
283#elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE)
284 has_osxsave = B_FALSE;
285#else
286 has_osxsave = __cpuid_has_osxsave();
287#endif
288
289 if (!has_osxsave)
290 return (B_FALSE);
291
292 xcr0 = xgetbv(0);
32ffaa3d 293 return ((xcr0 & state) == state);
fc0c72b1
GN
294}
295
32ffaa3d
GN
296#define _XSTATE_SSE_AVX (0x2 | 0x4)
297#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
298
299#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
300#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
301
302
fc0c72b1
GN
303/*
304 * Check if SSE instruction set is available
305 */
306static inline boolean_t
307zfs_sse_available(void)
308{
309#if defined(_KERNEL)
310 return (!!boot_cpu_has(X86_FEATURE_XMM));
311#else
312 return (__cpuid_has_sse());
313#endif
314}
315
316/*
317 * Check if SSE2 instruction set is available
318 */
319static inline boolean_t
320zfs_sse2_available(void)
321{
322#if defined(_KERNEL)
323 return (!!boot_cpu_has(X86_FEATURE_XMM2));
324#else
325 return (__cpuid_has_sse2());
326#endif
327}
328
329/*
330 * Check if SSE3 instruction set is available
331 */
332static inline boolean_t
333zfs_sse3_available(void)
334{
335#if defined(_KERNEL)
336 return (!!boot_cpu_has(X86_FEATURE_XMM3));
337#else
338 return (__cpuid_has_sse3());
339#endif
340}
341
342/*
343 * Check if SSSE3 instruction set is available
344 */
345static inline boolean_t
346zfs_ssse3_available(void)
347{
348#if defined(_KERNEL)
349 return (!!boot_cpu_has(X86_FEATURE_SSSE3));
350#else
351 return (__cpuid_has_ssse3());
352#endif
353}
354
355/*
356 * Check if SSE4.1 instruction set is available
357 */
358static inline boolean_t
359zfs_sse4_1_available(void)
360{
361#if defined(_KERNEL)
362 return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
363#else
364 return (__cpuid_has_sse4_1());
365#endif
366}
367
368/*
369 * Check if SSE4.2 instruction set is available
370 */
371static inline boolean_t
372zfs_sse4_2_available(void)
373{
374#if defined(_KERNEL)
375 return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
376#else
377 return (__cpuid_has_sse4_2());
378#endif
379}
380
381/*
382 * Check if AVX instruction set is available
383 */
384static inline boolean_t
385zfs_avx_available(void)
386{
387 boolean_t has_avx;
388#if defined(_KERNEL)
389 has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
390#else
391 has_avx = __cpuid_has_avx();
392#endif
393
394 return (has_avx && __ymm_enabled());
395}
396
397/*
398 * Check if AVX2 instruction set is available
399 */
400static inline boolean_t
401zfs_avx2_available(void)
402{
403 boolean_t has_avx2;
404#if defined(_KERNEL) && defined(X86_FEATURE_AVX2)
405 has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
406#elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2)
407 has_avx2 = B_FALSE;
408#else
409 has_avx2 = __cpuid_has_avx2();
410#endif
411
412 return (has_avx2 && __ymm_enabled());
413}
414
415/*
416 * Check if BMI1 instruction set is available
417 */
418static inline boolean_t
419zfs_bmi1_available(void)
420{
421#if defined(_KERNEL) && defined(X86_FEATURE_BMI1)
422 return (!!boot_cpu_has(X86_FEATURE_BMI1));
423#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1)
424 return (B_FALSE);
425#else
426 return (__cpuid_has_bmi1());
427#endif
428}
429
430/*
431 * Check if BMI2 instruction set is available
432 */
433static inline boolean_t
434zfs_bmi2_available(void)
435{
436#if defined(_KERNEL) && defined(X86_FEATURE_BMI2)
437 return (!!boot_cpu_has(X86_FEATURE_BMI2));
438#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2)
439 return (B_FALSE);
440#else
441 return (__cpuid_has_bmi2());
442#endif
443}
444
32ffaa3d
GN
445
446/*
447 * AVX-512 family of instruction sets:
448 *
449 * AVX512F Foundation
450 * AVX512CD Conflict Detection Instructions
451 * AVX512ER Exponential and Reciprocal Instructions
452 * AVX512PF Prefetch Instructions
453 *
454 * AVX512BW Byte and Word Instructions
455 * AVX512DQ Double-word and Quadword Instructions
456 * AVX512VL Vector Length Extensions
457 *
458 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
459 * AVX512VBMI Vector Byte Manipulation Instructions
460 */
461
462
463/* Check if AVX512F instruction set is available */
464static inline boolean_t
465zfs_avx512f_available(void)
466{
467 boolean_t has_avx512 = B_FALSE;
468
469#if defined(_KERNEL) && defined(X86_FEATURE_AVX512F)
470 has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
471#elif !defined(_KERNEL)
472 has_avx512 = __cpuid_has_avx512f();
473#endif
474
475 return (has_avx512 && __zmm_enabled());
476}
477
478/* Check if AVX512CD instruction set is available */
479static inline boolean_t
480zfs_avx512cd_available(void)
481{
482 boolean_t has_avx512 = B_FALSE;
483
484#if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD)
485 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
486 boot_cpu_has(X86_FEATURE_AVX512CD);
487#elif !defined(_KERNEL)
488 has_avx512 = __cpuid_has_avx512cd();
489#endif
490
491 return (has_avx512 && __zmm_enabled());
492}
493
494/* Check if AVX512ER instruction set is available */
495static inline boolean_t
496zfs_avx512er_available(void)
497{
498 boolean_t has_avx512 = B_FALSE;
499
500#if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER)
501 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
502 boot_cpu_has(X86_FEATURE_AVX512ER);
503#elif !defined(_KERNEL)
504 has_avx512 = __cpuid_has_avx512er();
505#endif
506
507 return (has_avx512 && __zmm_enabled());
508}
509
510/* Check if AVX512PF instruction set is available */
511static inline boolean_t
512zfs_avx512pf_available(void)
513{
514 boolean_t has_avx512 = B_FALSE;
515
516#if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF)
517 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
518 boot_cpu_has(X86_FEATURE_AVX512PF);
519#elif !defined(_KERNEL)
520 has_avx512 = __cpuid_has_avx512pf();
521#endif
522
523 return (has_avx512 && __zmm_enabled());
524}
525
526/* Check if AVX512BW instruction set is available */
527static inline boolean_t
528zfs_avx512bw_available(void)
529{
530 boolean_t has_avx512 = B_FALSE;
531
532#if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW)
533 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
534 boot_cpu_has(X86_FEATURE_AVX512BW);
535#elif !defined(_KERNEL)
536 has_avx512 = __cpuid_has_avx512bw();
537#endif
538
539 return (has_avx512 && __zmm_enabled());
540}
541
542/* Check if AVX512DQ instruction set is available */
543static inline boolean_t
544zfs_avx512dq_available(void)
545{
546 boolean_t has_avx512 = B_FALSE;
547
548#if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ)
549 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
550 boot_cpu_has(X86_FEATURE_AVX512DQ);
551#elif !defined(_KERNEL)
552 has_avx512 = __cpuid_has_avx512dq();
553#endif
554
555 return (has_avx512 && __zmm_enabled());
556}
557
558/* Check if AVX512VL instruction set is available */
559static inline boolean_t
560zfs_avx512vl_available(void)
561{
562 boolean_t has_avx512 = B_FALSE;
563
564#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL)
565 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
566 boot_cpu_has(X86_FEATURE_AVX512VL);
567#elif !defined(_KERNEL)
568 has_avx512 = __cpuid_has_avx512vl();
569#endif
570
571 return (has_avx512 && __zmm_enabled());
572}
573
574/* Check if AVX512IFMA instruction set is available */
575static inline boolean_t
576zfs_avx512ifma_available(void)
577{
578 boolean_t has_avx512 = B_FALSE;
579
580#if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA)
581 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
582 boot_cpu_has(X86_FEATURE_AVX512IFMA);
583#elif !defined(_KERNEL)
584 has_avx512 = __cpuid_has_avx512ifma();
585#endif
586
587 return (has_avx512 && __zmm_enabled());
588}
589
590/* Check if AVX512VBMI instruction set is available */
591static inline boolean_t
592zfs_avx512vbmi_available(void)
593{
594 boolean_t has_avx512 = B_FALSE;
595
596#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI)
597 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
598 boot_cpu_has(X86_FEATURE_AVX512VBMI);
599#elif !defined(_KERNEL)
600 has_avx512 = __cpuid_has_avx512f() &&
601 __cpuid_has_avx512vbmi();
602#endif
603
604 return (has_avx512 && __zmm_enabled());
605}
606
fc0c72b1
GN
607#endif /* defined(__x86) */
608
609#endif /* _SIMD_X86_H */