1 /* -*- linux-c -*- --------------------------------------------------------
3 * Copyright (C) 2016 Intel Corporation
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
14 * Boston MA 02111-1307, USA; either version 2 of the License, or
15 * (at your option) any later version; incorporated herein by reference.
17 * -----------------------------------------------------------------------
21 * AVX512 implementation of RAID-6 syndrome functions
25 #ifdef CONFIG_AS_AVX512
27 #include <linux/raid/pq.h>
30 static const struct raid6_avx512_constants
{
32 } raid6_avx512_constants
__aligned(512) = {
33 { 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,
34 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,
35 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,
36 0x1d1d1d1d1d1d1d1dULL
, 0x1d1d1d1d1d1d1d1dULL
,},
39 static int raid6_have_avx512(void)
41 return boot_cpu_has(X86_FEATURE_AVX2
) &&
42 boot_cpu_has(X86_FEATURE_AVX
) &&
43 boot_cpu_has(X86_FEATURE_AVX512F
) &&
44 boot_cpu_has(X86_FEATURE_AVX512BW
) &&
45 boot_cpu_has(X86_FEATURE_AVX512VL
) &&
46 boot_cpu_has(X86_FEATURE_AVX512DQ
);
49 static void raid6_avx5121_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
51 u8
**dptr
= (u8
**)ptrs
;
55 z0
= disks
- 3; /* Highest data disk */
56 p
= dptr
[z0
+1]; /* XOR parity */
57 q
= dptr
[z0
+2]; /* RS syndrome */
61 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
64 : "m" (raid6_avx512_constants
.x1d
[0]));
66 for (d
= 0; d
< bytes
; d
+= 64) {
67 asm volatile("prefetchnta %0\n\t"
68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
73 : "m" (dptr
[z0
][d
]), "m" (dptr
[z0
-1][d
]));
74 for (z
= z0
-2; z
>= 0; z
--) {
75 asm volatile("prefetchnta %0\n\t"
76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
77 "vpmovm2b %%k1,%%zmm5\n\t"
78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
88 "vpmovm2b %%k1,%%zmm5\n\t"
89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
94 "vmovntdq %%zmm2,%0\n\t"
95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
96 "vmovntdq %%zmm4,%1\n\t"
97 "vpxorq %%zmm4,%%zmm4,%%zmm4"
99 : "m" (p
[d
]), "m" (q
[d
]));
102 asm volatile("sfence" : : : "memory");
106 const struct raid6_calls raid6_avx512x1
= {
107 raid6_avx5121_gen_syndrome
,
108 NULL
, /* XOR not yet implemented */
111 1 /* Has cache hints */
115 * Unrolled-by-2 AVX512 implementation
117 static void raid6_avx5122_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
119 u8
**dptr
= (u8
**)ptrs
;
123 z0
= disks
- 3; /* Highest data disk */
124 p
= dptr
[z0
+1]; /* XOR parity */
125 q
= dptr
[z0
+2]; /* RS syndrome */
129 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
130 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
132 : "m" (raid6_avx512_constants
.x1d
[0]));
134 /* We uniformly assume a single prefetch covers at least 64 bytes */
135 for (d
= 0; d
< bytes
; d
+= 128) {
136 asm volatile("prefetchnta %0\n\t"
138 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
139 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
140 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
141 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
143 : "m" (dptr
[z0
][d
]), "m" (dptr
[z0
][d
+64]));
144 for (z
= z0
-1; z
>= 0; z
--) {
145 asm volatile("prefetchnta %0\n\t"
147 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
148 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
149 "vpmovm2b %%k1,%%zmm5\n\t"
150 "vpmovm2b %%k2,%%zmm7\n\t"
151 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
152 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
153 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
154 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
155 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
156 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
157 "vmovdqa64 %0,%%zmm5\n\t"
158 "vmovdqa64 %1,%%zmm7\n\t"
159 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
160 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
161 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
162 "vpxorq %%zmm7,%%zmm6,%%zmm6"
164 : "m" (dptr
[z
][d
]), "m" (dptr
[z
][d
+64]));
166 asm volatile("vmovntdq %%zmm2,%0\n\t"
167 "vmovntdq %%zmm3,%1\n\t"
168 "vmovntdq %%zmm4,%2\n\t"
171 : "m" (p
[d
]), "m" (p
[d
+64]), "m" (q
[d
]),
175 asm volatile("sfence" : : : "memory");
179 const struct raid6_calls raid6_avx512x2
= {
180 raid6_avx5122_gen_syndrome
,
181 NULL
, /* XOR not yet implemented */
184 1 /* Has cache hints */
190 * Unrolled-by-4 AVX2 implementation
192 static void raid6_avx5124_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
194 u8
**dptr
= (u8
**)ptrs
;
198 z0
= disks
- 3; /* Highest data disk */
199 p
= dptr
[z0
+1]; /* XOR parity */
200 q
= dptr
[z0
+2]; /* RS syndrome */
204 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
205 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
206 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
207 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
208 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
209 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
210 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
211 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
212 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
213 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
215 : "m" (raid6_avx512_constants
.x1d
[0]));
217 for (d
= 0; d
< bytes
; d
+= 256) {
218 for (z
= z0
; z
>= 0; z
--) {
219 asm volatile("prefetchnta %0\n\t"
223 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
224 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
225 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
226 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
227 "vpmovm2b %%k1,%%zmm5\n\t"
228 "vpmovm2b %%k2,%%zmm7\n\t"
229 "vpmovm2b %%k3,%%zmm13\n\t"
230 "vpmovm2b %%k4,%%zmm15\n\t"
231 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
232 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
233 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
234 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
235 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
236 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
237 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
238 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
239 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
240 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
241 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
242 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
243 "vmovdqa64 %0,%%zmm5\n\t"
244 "vmovdqa64 %1,%%zmm7\n\t"
245 "vmovdqa64 %2,%%zmm13\n\t"
246 "vmovdqa64 %3,%%zmm15\n\t"
247 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
248 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
249 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
250 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
251 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
252 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
253 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
254 "vpxorq %%zmm15,%%zmm14,%%zmm14"
256 : "m" (dptr
[z
][d
]), "m" (dptr
[z
][d
+64]),
257 "m" (dptr
[z
][d
+128]), "m" (dptr
[z
][d
+192]));
259 asm volatile("vmovntdq %%zmm2,%0\n\t"
260 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
261 "vmovntdq %%zmm3,%1\n\t"
262 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
263 "vmovntdq %%zmm10,%2\n\t"
264 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
265 "vmovntdq %%zmm11,%3\n\t"
266 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
267 "vmovntdq %%zmm4,%4\n\t"
268 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
269 "vmovntdq %%zmm6,%5\n\t"
270 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
271 "vmovntdq %%zmm12,%6\n\t"
272 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
273 "vmovntdq %%zmm14,%7\n\t"
274 "vpxorq %%zmm14,%%zmm14,%%zmm14"
276 : "m" (p
[d
]), "m" (p
[d
+64]), "m" (p
[d
+128]),
277 "m" (p
[d
+192]), "m" (q
[d
]), "m" (q
[d
+64]),
278 "m" (q
[d
+128]), "m" (q
[d
+192]));
281 asm volatile("sfence" : : : "memory");
285 const struct raid6_calls raid6_avx512x4
= {
286 raid6_avx5124_gen_syndrome
,
287 NULL
, /* XOR not yet implemented */
290 1 /* Has cache hints */
294 #endif /* CONFIG_AS_AVX512 */