]> git.proxmox.com Git - mirror_zfs-debian.git/blob - include/linux/simd_x86.h
New upstream version 0.7.2
[mirror_zfs-debian.git] / include / linux / simd_x86.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23 */
24
25 /*
26 * USER API:
27 *
28 * Kernel fpu methods:
29 * kfpu_begin()
30 * kfpu_end()
31 *
32 * SIMD support:
33 *
34 * Following functions should be called to determine whether CPU feature
35 * is supported. All functions are usable in kernel and user space.
36 * If a SIMD algorithm is using more than one instruction set
37 * all relevant feature test functions should be called.
38 *
39 * Supported features:
40 * zfs_sse_available()
41 * zfs_sse2_available()
42 * zfs_sse3_available()
43 * zfs_ssse3_available()
44 * zfs_sse4_1_available()
45 * zfs_sse4_2_available()
46 *
47 * zfs_avx_available()
48 * zfs_avx2_available()
49 *
50 * zfs_bmi1_available()
51 * zfs_bmi2_available()
52 *
53 * zfs_avx512f_available()
54 * zfs_avx512cd_available()
55 * zfs_avx512er_available()
56 * zfs_avx512pf_available()
57 * zfs_avx512bw_available()
58 * zfs_avx512dq_available()
59 * zfs_avx512vl_available()
60 * zfs_avx512ifma_available()
61 * zfs_avx512vbmi_available()
62 *
63 * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers
64 * also add zfs_avx512vl_available() to feature check.
65 */
66
67 #ifndef _SIMD_X86_H
68 #define _SIMD_X86_H
69
70 #include <sys/isa_defs.h>
71
72 /* only for __x86 */
73 #if defined(__x86)
74
75 #include <sys/types.h>
76
77 #if defined(_KERNEL)
78 #include <asm/cpufeature.h>
79 #else
80 #include <cpuid.h>
81 #endif
82
83 #if defined(_KERNEL)
84 #if defined(HAVE_FPU_API_H)
85 #include <asm/fpu/api.h>
86 #include <asm/fpu/internal.h>
87 #define kfpu_begin() \
88 { \
89 preempt_disable(); \
90 __kernel_fpu_begin(); \
91 }
92 #define kfpu_end() \
93 { \
94 __kernel_fpu_end(); \
95 preempt_enable(); \
96 }
97 #else
98 #include <asm/i387.h>
99 #include <asm/xcr.h>
100 #define kfpu_begin() kernel_fpu_begin()
101 #define kfpu_end() kernel_fpu_end()
102 #endif /* defined(HAVE_FPU_API_H) */
103 #else
104 /*
105 * fpu dummy methods for userspace
106 */
107 #define kfpu_begin() do {} while (0)
108 #define kfpu_end() do {} while (0)
109 #endif /* defined(_KERNEL) */
110
111 /*
112 * CPUID feature tests for user-space. Linux kernel provides an interface for
113 * CPU feature testing.
114 */
115 #if !defined(_KERNEL)
116
117 /*
118 * x86 registers used implicitly by CPUID
119 */
120 typedef enum cpuid_regs {
121 EAX = 0,
122 EBX,
123 ECX,
124 EDX,
125 CPUID_REG_CNT = 4
126 } cpuid_regs_t;
127
128 /*
129 * List of instruction sets identified by CPUID
130 */
131 typedef enum cpuid_inst_sets {
132 SSE = 0,
133 SSE2,
134 SSE3,
135 SSSE3,
136 SSE4_1,
137 SSE4_2,
138 OSXSAVE,
139 AVX,
140 AVX2,
141 BMI1,
142 BMI2,
143 AVX512F,
144 AVX512CD,
145 AVX512DQ,
146 AVX512BW,
147 AVX512IFMA,
148 AVX512VBMI,
149 AVX512PF,
150 AVX512ER,
151 AVX512VL
152 } cpuid_inst_sets_t;
153
154 /*
155 * Instruction set descriptor.
156 */
157 typedef struct cpuid_feature_desc {
158 uint32_t leaf; /* CPUID leaf */
159 uint32_t subleaf; /* CPUID sub-leaf */
160 uint32_t flag; /* bit mask of the feature */
161 cpuid_regs_t reg; /* which CPUID return register to test */
162 } cpuid_feature_desc_t;
163
164 #define _AVX512F_BIT (1U << 16)
165 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
166 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
167 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
168 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
169 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
170 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
171 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
172 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
173
174 /*
175 * Descriptions of supported instruction sets
176 */
177 static const cpuid_feature_desc_t cpuid_features[] = {
178 [SSE] = {1U, 0U, 1U << 25, EDX },
179 [SSE2] = {1U, 0U, 1U << 26, EDX },
180 [SSE3] = {1U, 0U, 1U << 0, ECX },
181 [SSSE3] = {1U, 0U, 1U << 9, ECX },
182 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
183 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
184 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
185 [AVX] = {1U, 0U, 1U << 28, ECX },
186 [AVX2] = {7U, 0U, 1U << 5, EBX },
187 [BMI1] = {7U, 0U, 1U << 3, EBX },
188 [BMI2] = {7U, 0U, 1U << 8, EBX },
189 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
190 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
191 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
192 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
193 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
194 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
195 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
196 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
197 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }
198 };
199
200 /*
201 * Check if OS supports AVX and AVX2 by checking XCR0
202 * Only call this function if CPUID indicates that AVX feature is
203 * supported by the CPU, otherwise it might be an illegal instruction.
204 */
205 static inline uint64_t
206 xgetbv(uint32_t index)
207 {
208 uint32_t eax, edx;
209 /* xgetbv - instruction byte code */
210 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
211 : "=a" (eax), "=d" (edx)
212 : "c" (index));
213
214 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
215 }
216
217 /*
218 * Check if CPU supports a feature
219 */
220 static inline boolean_t
221 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
222 {
223 uint32_t r[CPUID_REG_CNT];
224
225 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
226 /*
227 * __cpuid_count is needed to properly check
228 * for AVX2. It is a macro, so return parameters
229 * are passed by value.
230 */
231 __cpuid_count(desc->leaf, desc->subleaf,
232 r[EAX], r[EBX], r[ECX], r[EDX]);
233 return ((r[desc->reg] & desc->flag) == desc->flag);
234 }
235 return (B_FALSE);
236 }
237
238 #define CPUID_FEATURE_CHECK(name, id) \
239 static inline boolean_t \
240 __cpuid_has_ ## name(void) \
241 { \
242 return (__cpuid_check_feature(&cpuid_features[id])); \
243 }
244
245 /*
246 * Define functions for user-space CPUID features testing
247 */
248 CPUID_FEATURE_CHECK(sse, SSE);
249 CPUID_FEATURE_CHECK(sse2, SSE2);
250 CPUID_FEATURE_CHECK(sse3, SSE3);
251 CPUID_FEATURE_CHECK(ssse3, SSSE3);
252 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
253 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
254 CPUID_FEATURE_CHECK(avx, AVX);
255 CPUID_FEATURE_CHECK(avx2, AVX2);
256 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
257 CPUID_FEATURE_CHECK(bmi1, BMI1);
258 CPUID_FEATURE_CHECK(bmi2, BMI2);
259 CPUID_FEATURE_CHECK(avx512f, AVX512F);
260 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
261 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
262 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
263 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
264 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
265 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
266 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
267 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
268
269 #endif /* !defined(_KERNEL) */
270
271
272 /*
273 * Detect register set support
274 */
275 static inline boolean_t
276 __simd_state_enabled(const uint64_t state)
277 {
278 boolean_t has_osxsave;
279 uint64_t xcr0;
280
281 #if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE)
282 has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
283 #elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE)
284 has_osxsave = B_FALSE;
285 #else
286 has_osxsave = __cpuid_has_osxsave();
287 #endif
288
289 if (!has_osxsave)
290 return (B_FALSE);
291
292 xcr0 = xgetbv(0);
293 return ((xcr0 & state) == state);
294 }
295
296 #define _XSTATE_SSE_AVX (0x2 | 0x4)
297 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
298
299 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
300 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
301
302
303 /*
304 * Check if SSE instruction set is available
305 */
306 static inline boolean_t
307 zfs_sse_available(void)
308 {
309 #if defined(_KERNEL)
310 return (!!boot_cpu_has(X86_FEATURE_XMM));
311 #else
312 return (__cpuid_has_sse());
313 #endif
314 }
315
316 /*
317 * Check if SSE2 instruction set is available
318 */
319 static inline boolean_t
320 zfs_sse2_available(void)
321 {
322 #if defined(_KERNEL)
323 return (!!boot_cpu_has(X86_FEATURE_XMM2));
324 #else
325 return (__cpuid_has_sse2());
326 #endif
327 }
328
329 /*
330 * Check if SSE3 instruction set is available
331 */
332 static inline boolean_t
333 zfs_sse3_available(void)
334 {
335 #if defined(_KERNEL)
336 return (!!boot_cpu_has(X86_FEATURE_XMM3));
337 #else
338 return (__cpuid_has_sse3());
339 #endif
340 }
341
342 /*
343 * Check if SSSE3 instruction set is available
344 */
345 static inline boolean_t
346 zfs_ssse3_available(void)
347 {
348 #if defined(_KERNEL)
349 return (!!boot_cpu_has(X86_FEATURE_SSSE3));
350 #else
351 return (__cpuid_has_ssse3());
352 #endif
353 }
354
355 /*
356 * Check if SSE4.1 instruction set is available
357 */
358 static inline boolean_t
359 zfs_sse4_1_available(void)
360 {
361 #if defined(_KERNEL)
362 return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
363 #else
364 return (__cpuid_has_sse4_1());
365 #endif
366 }
367
368 /*
369 * Check if SSE4.2 instruction set is available
370 */
371 static inline boolean_t
372 zfs_sse4_2_available(void)
373 {
374 #if defined(_KERNEL)
375 return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
376 #else
377 return (__cpuid_has_sse4_2());
378 #endif
379 }
380
381 /*
382 * Check if AVX instruction set is available
383 */
384 static inline boolean_t
385 zfs_avx_available(void)
386 {
387 boolean_t has_avx;
388 #if defined(_KERNEL)
389 has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
390 #else
391 has_avx = __cpuid_has_avx();
392 #endif
393
394 return (has_avx && __ymm_enabled());
395 }
396
397 /*
398 * Check if AVX2 instruction set is available
399 */
400 static inline boolean_t
401 zfs_avx2_available(void)
402 {
403 boolean_t has_avx2;
404 #if defined(_KERNEL) && defined(X86_FEATURE_AVX2)
405 has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
406 #elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2)
407 has_avx2 = B_FALSE;
408 #else
409 has_avx2 = __cpuid_has_avx2();
410 #endif
411
412 return (has_avx2 && __ymm_enabled());
413 }
414
415 /*
416 * Check if BMI1 instruction set is available
417 */
418 static inline boolean_t
419 zfs_bmi1_available(void)
420 {
421 #if defined(_KERNEL) && defined(X86_FEATURE_BMI1)
422 return (!!boot_cpu_has(X86_FEATURE_BMI1));
423 #elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1)
424 return (B_FALSE);
425 #else
426 return (__cpuid_has_bmi1());
427 #endif
428 }
429
430 /*
431 * Check if BMI2 instruction set is available
432 */
433 static inline boolean_t
434 zfs_bmi2_available(void)
435 {
436 #if defined(_KERNEL) && defined(X86_FEATURE_BMI2)
437 return (!!boot_cpu_has(X86_FEATURE_BMI2));
438 #elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2)
439 return (B_FALSE);
440 #else
441 return (__cpuid_has_bmi2());
442 #endif
443 }
444
445
446 /*
447 * AVX-512 family of instruction sets:
448 *
449 * AVX512F Foundation
450 * AVX512CD Conflict Detection Instructions
451 * AVX512ER Exponential and Reciprocal Instructions
452 * AVX512PF Prefetch Instructions
453 *
454 * AVX512BW Byte and Word Instructions
455 * AVX512DQ Double-word and Quadword Instructions
456 * AVX512VL Vector Length Extensions
457 *
458 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
459 * AVX512VBMI Vector Byte Manipulation Instructions
460 */
461
462
463 /* Check if AVX512F instruction set is available */
464 static inline boolean_t
465 zfs_avx512f_available(void)
466 {
467 boolean_t has_avx512 = B_FALSE;
468
469 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512F)
470 has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
471 #elif !defined(_KERNEL)
472 has_avx512 = __cpuid_has_avx512f();
473 #endif
474
475 return (has_avx512 && __zmm_enabled());
476 }
477
478 /* Check if AVX512CD instruction set is available */
479 static inline boolean_t
480 zfs_avx512cd_available(void)
481 {
482 boolean_t has_avx512 = B_FALSE;
483
484 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD)
485 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
486 boot_cpu_has(X86_FEATURE_AVX512CD);
487 #elif !defined(_KERNEL)
488 has_avx512 = __cpuid_has_avx512cd();
489 #endif
490
491 return (has_avx512 && __zmm_enabled());
492 }
493
494 /* Check if AVX512ER instruction set is available */
495 static inline boolean_t
496 zfs_avx512er_available(void)
497 {
498 boolean_t has_avx512 = B_FALSE;
499
500 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER)
501 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
502 boot_cpu_has(X86_FEATURE_AVX512ER);
503 #elif !defined(_KERNEL)
504 has_avx512 = __cpuid_has_avx512er();
505 #endif
506
507 return (has_avx512 && __zmm_enabled());
508 }
509
510 /* Check if AVX512PF instruction set is available */
511 static inline boolean_t
512 zfs_avx512pf_available(void)
513 {
514 boolean_t has_avx512 = B_FALSE;
515
516 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF)
517 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
518 boot_cpu_has(X86_FEATURE_AVX512PF);
519 #elif !defined(_KERNEL)
520 has_avx512 = __cpuid_has_avx512pf();
521 #endif
522
523 return (has_avx512 && __zmm_enabled());
524 }
525
526 /* Check if AVX512BW instruction set is available */
527 static inline boolean_t
528 zfs_avx512bw_available(void)
529 {
530 boolean_t has_avx512 = B_FALSE;
531
532 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW)
533 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
534 boot_cpu_has(X86_FEATURE_AVX512BW);
535 #elif !defined(_KERNEL)
536 has_avx512 = __cpuid_has_avx512bw();
537 #endif
538
539 return (has_avx512 && __zmm_enabled());
540 }
541
542 /* Check if AVX512DQ instruction set is available */
543 static inline boolean_t
544 zfs_avx512dq_available(void)
545 {
546 boolean_t has_avx512 = B_FALSE;
547
548 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ)
549 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
550 boot_cpu_has(X86_FEATURE_AVX512DQ);
551 #elif !defined(_KERNEL)
552 has_avx512 = __cpuid_has_avx512dq();
553 #endif
554
555 return (has_avx512 && __zmm_enabled());
556 }
557
558 /* Check if AVX512VL instruction set is available */
559 static inline boolean_t
560 zfs_avx512vl_available(void)
561 {
562 boolean_t has_avx512 = B_FALSE;
563
564 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL)
565 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
566 boot_cpu_has(X86_FEATURE_AVX512VL);
567 #elif !defined(_KERNEL)
568 has_avx512 = __cpuid_has_avx512vl();
569 #endif
570
571 return (has_avx512 && __zmm_enabled());
572 }
573
574 /* Check if AVX512IFMA instruction set is available */
575 static inline boolean_t
576 zfs_avx512ifma_available(void)
577 {
578 boolean_t has_avx512 = B_FALSE;
579
580 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA)
581 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
582 boot_cpu_has(X86_FEATURE_AVX512IFMA);
583 #elif !defined(_KERNEL)
584 has_avx512 = __cpuid_has_avx512ifma();
585 #endif
586
587 return (has_avx512 && __zmm_enabled());
588 }
589
590 /* Check if AVX512VBMI instruction set is available */
591 static inline boolean_t
592 zfs_avx512vbmi_available(void)
593 {
594 boolean_t has_avx512 = B_FALSE;
595
596 #if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI)
597 has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
598 boot_cpu_has(X86_FEATURE_AVX512VBMI);
599 #elif !defined(_KERNEL)
600 has_avx512 = __cpuid_has_avx512f() &&
601 __cpuid_has_avx512vbmi();
602 #endif
603
604 return (has_avx512 && __zmm_enabled());
605 }
606
607 #endif /* defined(__x86) */
608
609 #endif /* _SIMD_X86_H */