[mirror_zfs.git] / module / zfs / vdev_raidz_math_scalar.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
 */

#include <sys/vdev_raidz_impl.h>

/*
 * Provide native CPU scalar routines.
 * Support 32bit and 64bit CPUs.
 */
#if ((~(0x0ULL)) >> 24) == 0xffULL
#define	ELEM_SIZE	4
typedef uint32_t iv_t;
#elif ((~(0x0ULL)) >> 56) == 0xffULL
#define	ELEM_SIZE	8
typedef uint64_t iv_t;
#endif

/*
 * Vector type used in scalar implementation
 *
 * The union is expected to be of native CPU register size. Since addition
 * uses XOR operation, it can be performed an all byte elements at once.
 * Multiplication requires per byte access.
 */
typedef union {
	iv_t e;
	uint8_t b[ELEM_SIZE];
} v_t;

/*
 * Precomputed lookup tables for multiplication by a constant
 *
 * Reconstruction path requires multiplication by a constant factors. Instead of
 * performing two step lookup (log & exp tables), a direct lookup can be used
 * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
 *
 * 	r = vdev_raidz_mul_lt[c_log][a];
 *
 * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
 * they are faster to obtain while solving the syndrome equations.
 *
 * PERFORMANCE NOTE:
 * Even though the complete lookup table uses 64kiB, only relatively small
 * portion of it is used at the same time. Following shows number of accessed
 * bytes for different cases:
 * 	- 1 failed disk: 256B (1 mul. coefficient)
 * 	- 2 failed disks: 512B (2 mul. coefficients)
 * 	- 3 failed disks: 1536B (6 mul. coefficients)
 *
 * Size of actually accessed lookup table regions is only larger for
 * reconstruction of 3 failed disks, when compared to traditional log/exp
 * method. But since the result is obtained in one lookup step performance is
 * doubled.
 */
static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));

static void
raidz_init_scalar(void)
{
	int c, i;
	for (c = 0; c < 256; c++)
		for (i = 0; i < 256; i++)
			vdev_raidz_mul_lt[c][i] = gf_mul(c, i);

}

#define	PREFETCHNTA(ptr, offset)	{}
#define	PREFETCH(ptr, offset) 		{}

#define	XOR_ACC(src, acc)	acc.e ^= ((v_t *)src)[0].e
#define	XOR(src, acc)		acc.e ^= src.e
#define	ZERO(acc)		acc.e = 0
#define	COPY(src, dst)		dst = src
#define	LOAD(src, val) 		val = ((v_t *)src)[0]
#define	STORE(dst, val)		((v_t *)dst)[0] = val

/*
 * Constants used for optimized multiplication by 2.
 */
static const struct {
	iv_t mod;
	iv_t mask;
	iv_t msb;
} scalar_mul2_consts = {
#if ELEM_SIZE == 8
	.mod	= 0x1d1d1d1d1d1d1d1dULL,
	.mask	= 0xfefefefefefefefeULL,
	.msb	= 0x8080808080808080ULL,
#else
	.mod	= 0x1d1d1d1dULL,
	.mask	= 0xfefefefeULL,
	.msb	= 0x80808080ULL,
#endif
};

#define	MUL2_SETUP() {}

#define	MUL2(a)								\
{									\
	iv_t _mask;							\
									\
	_mask = (a).e & scalar_mul2_consts.msb;				\
	_mask = (_mask << 1) - (_mask >> 7);				\
	(a).e = ((a).e << 1) & scalar_mul2_consts.mask;			\
	(a).e = (a).e ^ (_mask & scalar_mul2_consts.mod);		\
}

#define	MUL4(a) 							\
{									\
	MUL2(a);							\
	MUL2(a);							\
}

#define	MUL(c, a)							\
{									\
	const uint8_t *mul_lt = vdev_raidz_mul_lt[c];			\
	switch (ELEM_SIZE) {						\
	case 8:								\
		a.b[7] = mul_lt[a.b[7]];				\
		a.b[6] = mul_lt[a.b[6]];				\
		a.b[5] = mul_lt[a.b[5]];				\
		a.b[4] = mul_lt[a.b[4]];				\
	case 4:								\
		a.b[3] = mul_lt[a.b[3]];				\
		a.b[2] = mul_lt[a.b[2]];				\
		a.b[1] = mul_lt[a.b[1]];				\
		a.b[0] = mul_lt[a.b[0]];				\
		break;							\
	}								\
}

#define	raidz_math_begin()	{}
#define	raidz_math_end()	{}

#define	GEN_P_DEFINE() v_t p0
#define	GEN_P_STRIDE	1
#define	GEN_P_P		p0

#define	GEN_PQ_DEFINE() v_t d0, p0, q0
#define	GEN_PQ_STRIDE	1
#define	GEN_PQ_D	d0
#define	GEN_PQ_P	p0
#define	GEN_PQ_Q	q0

#define	GEN_PQR_DEFINE() v_t d0, p0, q0, r0
#define	GEN_PQR_STRIDE	1
#define	GEN_PQR_D	d0
#define	GEN_PQR_P	p0
#define	GEN_PQR_Q	q0
#define	GEN_PQR_R	r0

#define	REC_P_DEFINE() 	v_t x0
#define	REC_P_STRIDE	1
#define	REC_P_X		x0

#define	REC_Q_DEFINE() 	v_t x0
#define	REC_Q_STRIDE	1
#define	REC_Q_X		x0

#define	REC_R_DEFINE() 	v_t x0
#define	REC_R_STRIDE	1
#define	REC_R_X		x0

#define	REC_PQ_DEFINE() v_t x0, y0, d0
#define	REC_PQ_STRIDE	1
#define	REC_PQ_X	x0
#define	REC_PQ_Y	y0
#define	REC_PQ_D	d0

#define	REC_PR_DEFINE() v_t x0, y0, d0
#define	REC_PR_STRIDE	1
#define	REC_PR_X	x0
#define	REC_PR_Y	y0
#define	REC_PR_D	d0

#define	REC_QR_DEFINE() v_t x0, y0, d0
#define	REC_QR_STRIDE	1
#define	REC_QR_X	x0
#define	REC_QR_Y	y0
#define	REC_QR_D	d0

#define	REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
#define	REC_PQR_STRIDE	1
#define	REC_PQR_X	x0
#define	REC_PQR_Y	y0
#define	REC_PQR_Z	z0
#define	REC_PQR_D	d0
#define	REC_PQR_XS	d0
#define	REC_PQR_YS	t0

#include "vdev_raidz_math_impl.h"

/*
 * If compiled with -O0, gcc doesn't do any stack frame coalescing
 * and -Wframe-larger-than=1024 is triggered in debug mode.
 * Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
 * does not trigger this warning.
 */
#pragma GCC diagnostic ignored "-Wframe-larger-than="

DEFINE_GEN_METHODS(scalar);
DEFINE_REC_METHODS(scalar);

boolean_t
raidz_will_scalar_work(void)
{
	return (B_TRUE); /* always */
}

const raidz_impl_ops_t vdev_raidz_scalar_impl = {
	.init = raidz_init_scalar,
	.fini = NULL,
	.gen = RAIDZ_GEN_METHODS(scalar),
	.rec = RAIDZ_REC_METHODS(scalar),
	.is_supported = &raidz_will_scalar_work,
	.name = "scalar"
};

/* Powers of 2 in the RAID-Z Galois field. */
const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
	0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
	0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
	0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
	0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
	0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
	0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
	0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
	0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
	0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
	0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
	0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
	0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
	0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
	0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
	0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
	0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
	0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
	0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
	0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
	0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
	0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
	0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
	0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
	0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
	0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
	0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
	0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
	0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
};

/* Logs of 2 in the RAID-Z Galois field. */
const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
	0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
	0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
	0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
	0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
	0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
	0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
	0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
	0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
	0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
	0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
	0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
	0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
	0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
	0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
	0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
	0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
	0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
	0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
	0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
	0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
	0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
	0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
	0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
	0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
	0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
	0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
	0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
	0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
	0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
	0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
};
Commit	Line	Data
ab9f4b0b GN	1	/*
	2	* CDDL HEADER START
	3	*
	4	* The contents of this file are subject to the terms of the
	5	* Common Development and Distribution License (the "License").
	6	* You may not use this file except in compliance with the License.
	7	*
	8	* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	9	* or http://www.opensolaris.org/os/licensing.
	10	* See the License for the specific language governing permissions
	11	* and limitations under the License.
	12	*
	13	* When distributing Covered Code, include this CDDL HEADER in each
	14	* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	15	* If applicable, add the following below this CDDL HEADER, with the
	16	* fields enclosed by brackets "[]" replaced with your own identifying
	17	* information: Portions Copyright [yyyy] [name of copyright owner]
	18	*
	19	* CDDL HEADER END
	20	*/
	21
	22	/*
	23	* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
	24	*/
	25
	26	#include <sys/vdev_raidz_impl.h>
	27
	28	/*
	29	* Provide native CPU scalar routines.
	30	* Support 32bit and 64bit CPUs.
	31	*/
	32	#if ((~(0x0ULL)) >> 24) == 0xffULL
	33	#define ELEM_SIZE 4
	34	typedef uint32_t iv_t;
	35	#elif ((~(0x0ULL)) >> 56) == 0xffULL
	36	#define ELEM_SIZE 8
	37	typedef uint64_t iv_t;
	38	#endif
	39
	40	/*
	41	* Vector type used in scalar implementation
	42	*
	43	* The union is expected to be of native CPU register size. Since addition
	44	* uses XOR operation, it can be performed an all byte elements at once.
	45	* Multiplication requires per byte access.
	46	*/
	47	typedef union {
	48	iv_t e;
	49	uint8_t b[ELEM_SIZE];
	50	} v_t;
	51
	52	/*
	53	* Precomputed lookup tables for multiplication by a constant
	54	*
	55	* Reconstruction path requires multiplication by a constant factors. Instead of
	56	* performing two step lookup (log & exp tables), a direct lookup can be used
	57	* instead. Multiplication of element 'a' by a constant 'c' is obtained as:
	58	*
	59	* r = vdev_raidz_mul_lt[c_log][a];
	60	*
	61	* where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
	62	* they are faster to obtain while solving the syndrome equations.
	63	*
	64	* PERFORMANCE NOTE:
65	* Even though the complete lookup table uses 64kiB, only relatively small
66	* portion of it is used at the same time. Following shows number of accessed
67	* bytes for different cases:
68	* - 1 failed disk: 256B (1 mul. coefficient)
69	* - 2 failed disks: 512B (2 mul. coefficients)
70	* - 3 failed disks: 1536B (6 mul. coefficients)
71	*
72	* Size of actually accessed lookup table regions is only larger for
73	* reconstruction of 3 failed disks, when compared to traditional log/exp
74	* method. But since the result is obtained in one lookup step performance is
75	* doubled.
76	*/
77	static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
78
79	static void
80	raidz_init_scalar(void)
81	{
82	int c, i;
83	for (c = 0; c < 256; c++)
84	for (i = 0; i < 256; i++)
85	vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
86
87	}
88
89	#define PREFETCHNTA(ptr, offset) {}
90	#define PREFETCH(ptr, offset) {}
91
92	#define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
93	#define XOR(src, acc) acc.e ^= src.e
62a65a65	94	#define ZERO(acc) acc.e = 0
ab9f4b0b GN	95	#define COPY(src, dst) dst = src
	96	#define LOAD(src, val) val = ((v_t *)src)[0]
	97	#define STORE(dst, val) ((v_t *)dst)[0] = val
	98
	99	/*
	100	* Constants used for optimized multiplication by 2.
	101	*/
	102	static const struct {
	103	iv_t mod;
	104	iv_t mask;
	105	iv_t msb;
	106	} scalar_mul2_consts = {
	107	#if ELEM_SIZE == 8
	108	.mod = 0x1d1d1d1d1d1d1d1dULL,
	109	.mask = 0xfefefefefefefefeULL,
	110	.msb = 0x8080808080808080ULL,
	111	#else
	112	.mod = 0x1d1d1d1dULL,
	113	.mask = 0xfefefefeULL,
	114	.msb = 0x80808080ULL,
	115	#endif
	116	};
	117
	118	#define MUL2_SETUP() {}
	119
	120	#define MUL2(a) \
	121	{ \
	122	iv_t _mask; \
	123	\
	124	_mask = (a).e & scalar_mul2_consts.msb; \
	125	_mask = (_mask << 1) - (_mask >> 7); \
	126	(a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
	127	(a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
	128	}
	129
	130	#define MUL4(a) \
	131	{ \
	132	MUL2(a); \
	133	MUL2(a); \
	134	}
	135
	136	#define MUL(c, a) \
	137	{ \
	138	const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
	139	switch (ELEM_SIZE) { \
	140	case 8: \
	141	a.b[7] = mul_lt[a.b[7]]; \
	142	a.b[6] = mul_lt[a.b[6]]; \
	143	a.b[5] = mul_lt[a.b[5]]; \
	144	a.b[4] = mul_lt[a.b[4]]; \
	145	case 4: \
	146	a.b[3] = mul_lt[a.b[3]]; \
	147	a.b[2] = mul_lt[a.b[2]]; \
	148	a.b[1] = mul_lt[a.b[1]]; \
	149	a.b[0] = mul_lt[a.b[0]]; \
	150	break; \
	151	} \
	152	}
	153
	154	#define raidz_math_begin() {}
	155	#define raidz_math_end() {}
	156
	157	#define GEN_P_DEFINE() v_t p0
	158	#define GEN_P_STRIDE 1
159	#define GEN_P_P p0
160
161	#define GEN_PQ_DEFINE() v_t d0, p0, q0
162	#define GEN_PQ_STRIDE 1
163	#define GEN_PQ_D d0
164	#define GEN_PQ_P p0
165	#define GEN_PQ_Q q0
166
167	#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
168	#define GEN_PQR_STRIDE 1
169	#define GEN_PQR_D d0
170	#define GEN_PQR_P p0
171	#define GEN_PQR_Q q0
172	#define GEN_PQR_R r0
173
174	#define REC_P_DEFINE() v_t x0
175	#define REC_P_STRIDE 1
176	#define REC_P_X x0
177
178	#define REC_Q_DEFINE() v_t x0
179	#define REC_Q_STRIDE 1
180	#define REC_Q_X x0
181
182	#define REC_R_DEFINE() v_t x0
183	#define REC_R_STRIDE 1
184	#define REC_R_X x0
185
186	#define REC_PQ_DEFINE() v_t x0, y0, d0
187	#define REC_PQ_STRIDE 1
188	#define REC_PQ_X x0
189	#define REC_PQ_Y y0
190	#define REC_PQ_D d0
191
192	#define REC_PR_DEFINE() v_t x0, y0, d0
193	#define REC_PR_STRIDE 1
194	#define REC_PR_X x0
195	#define REC_PR_Y y0
196	#define REC_PR_D d0
197
198	#define REC_QR_DEFINE() v_t x0, y0, d0
199	#define REC_QR_STRIDE 1
200	#define REC_QR_X x0
201	#define REC_QR_Y y0
202	#define REC_QR_D d0
203
204	#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
205	#define REC_PQR_STRIDE 1
206	#define REC_PQR_X x0
207	#define REC_PQR_Y y0
208	#define REC_PQR_Z z0
209	#define REC_PQR_D d0
210	#define REC_PQR_XS d0
211	#define REC_PQR_YS t0
212
213	#include "vdev_raidz_math_impl.h"
214
590c9a09 GN	215	/*
	216	* If compiled with -O0, gcc doesn't do any stack frame coalescing
	217	* and -Wframe-larger-than=1024 is triggered in debug mode.
	218	* Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
	219	* does not trigger this warning.
	220	*/
	221	#pragma GCC diagnostic ignored "-Wframe-larger-than="
	222
ab9f4b0b GN	223	DEFINE_GEN_METHODS(scalar);
	224	DEFINE_REC_METHODS(scalar);
	225
c9187d86	226	boolean_t
ab9f4b0b GN	227	raidz_will_scalar_work(void)
	228	{
	229	return (B_TRUE); /* always */
	230	}
	231
	232	const raidz_impl_ops_t vdev_raidz_scalar_impl = {
	233	.init = raidz_init_scalar,
	234	.fini = NULL,
	235	.gen = RAIDZ_GEN_METHODS(scalar),
	236	.rec = RAIDZ_REC_METHODS(scalar),
	237	.is_supported = &raidz_will_scalar_work,
	238	.name = "scalar"
	239	};
	240
	241	/* Powers of 2 in the RAID-Z Galois field. */
	242	const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
	243	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
	244	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
	245	0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
	246	0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
	247	0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
	248	0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
	249	0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
	250	0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
	251	0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
	252	0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
	253	0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
	254	0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
	255	0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
	256	0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
	257	0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
	258	0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
	259	0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
	260	0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
	261	0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
	262	0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
	263	0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
	264	0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
	265	0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
	266	0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
	267	0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
	268	0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
	269	0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
	270	0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
	271	0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
	272	0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
	273	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
	274	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
	275	};
	276
	277	/* Logs of 2 in the RAID-Z Galois field. */
	278	const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
	279	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
	280	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
	281	0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
	282	0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
	283	0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
	284	0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
	285	0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
	286	0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
	287	0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
	288	0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
	289	0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
	290	0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
291	0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
292	0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
293	0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
294	0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
295	0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
296	0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
297	0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
298	0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
299	0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
300	0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
301	0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
302	0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
303	0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
304	0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
305	0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
306	0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
307	0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
308	0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
309	0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
310	0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
311	};