]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/raidz_test/raidz_test.c
Remove bcopy(), bzero(), bcmp()
[mirror_zfs.git] / cmd / raidz_test / raidz_test.c
CommitLineData
ab9f4b0b
GN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
24 */
25
26#include <sys/zfs_context.h>
27#include <sys/time.h>
28#include <sys/wait.h>
29#include <sys/zio.h>
30#include <umem.h>
31#include <sys/vdev_raidz.h>
32#include <sys/vdev_raidz_impl.h>
33#include <assert.h>
34#include <stdio.h>
35#include "raidz_test.h"
36
37static int *rand_data;
38raidz_test_opts_t rto_opts;
39
e7238382 40static char pid_s[16];
ab9f4b0b
GN
41
42static void sig_handler(int signo)
43{
e7238382 44 int old_errno = errno;
ab9f4b0b
GN
45 struct sigaction action;
46 /*
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
49 */
50 action.sa_handler = SIG_DFL;
51 sigemptyset(&action.sa_mask);
52 action.sa_flags = 0;
53 (void) sigaction(signo, &action, NULL);
54
e7238382
AZ
55 if (rto_opts.rto_gdb) {
56 pid_t pid = fork();
57 if (pid == 0) {
58 execlp("gdb", "gdb", "-ex", "set pagination 0",
59 "-p", pid_s, NULL);
60 _exit(-1);
61 } else if (pid > 0)
62 while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
63 ;
64 }
ab9f4b0b
GN
65
66 raise(signo);
e7238382 67 errno = old_errno;
ab9f4b0b
GN
68}
69
70static void print_opts(raidz_test_opts_t *opts, boolean_t force)
71{
72 char *verbose;
73 switch (opts->rto_v) {
b7c42ce5 74 case D_ALL:
ab9f4b0b
GN
75 verbose = "no";
76 break;
b7c42ce5 77 case D_INFO:
ab9f4b0b
GN
78 verbose = "info";
79 break;
b7c42ce5 80 case D_DEBUG:
ab9f4b0b
GN
81 default:
82 verbose = "debug";
83 break;
84 }
85
86 if (force || opts->rto_v >= D_INFO) {
87 (void) fprintf(stdout, DBLSEP "Running with options:\n"
88 " (-a) zio ashift : %zu\n"
89 " (-o) zio offset : 1 << %zu\n"
b2255edc
BB
90 " (-e) expanded map : %s\n"
91 " (-r) reflow offset : %llx\n"
ab9f4b0b
GN
92 " (-d) number of raidz data columns : %zu\n"
93 " (-s) size of DATA : 1 << %zu\n"
94 " (-S) sweep parameters : %s \n"
95 " (-v) verbose : %s \n\n",
b2255edc
BB
96 opts->rto_ashift, /* -a */
97 ilog2(opts->rto_offset), /* -o */
98 opts->rto_expand ? "yes" : "no", /* -e */
99 (u_longlong_t)opts->rto_expand_offset, /* -r */
100 opts->rto_dcols, /* -d */
101 ilog2(opts->rto_dsize), /* -s */
102 opts->rto_sweep ? "yes" : "no", /* -S */
103 verbose); /* -v */
ab9f4b0b
GN
104 }
105}
106
107static void usage(boolean_t requested)
108{
109 const raidz_test_opts_t *o = &rto_opts_defaults;
110
111 FILE *fp = requested ? stdout : stderr;
112
113 (void) fprintf(fp, "Usage:\n"
02730c33
BB
114 "\t[-a zio ashift (default: %zu)]\n"
115 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116 "\t[-d number of raidz data columns (default: %zu)]\n"
117 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118 "\t[-S parameter sweep (default: %s)]\n"
119 "\t[-t timeout for parameter sweep test]\n"
120 "\t[-B benchmark all raidz implementations]\n"
b2255edc
BB
121 "\t[-e use expanded raidz map (default: %s)]\n"
122 "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
b7c42ce5 123 "\t[-v increase verbosity (default: %d)]\n"
02730c33
BB
124 "\t[-h (print help)]\n"
125 "\t[-T test the test, see if failure would be detected]\n"
126 "\t[-D debug (attach gdb on SIGSEGV)]\n"
127 "",
128 o->rto_ashift, /* -a */
129 ilog2(o->rto_offset), /* -o */
130 o->rto_dcols, /* -d */
131 ilog2(o->rto_dsize), /* -s */
132 rto_opts.rto_sweep ? "yes" : "no", /* -S */
b2255edc
BB
133 rto_opts.rto_expand ? "yes" : "no", /* -e */
134 (u_longlong_t)o->rto_expand_offset, /* -r */
b7c42ce5 135 o->rto_v); /* -v */
ab9f4b0b
GN
136
137 exit(requested ? 0 : 1);
138}
139
140static void process_options(int argc, char **argv)
141{
142 size_t value;
143 int opt;
ab9f4b0b
GN
144 raidz_test_opts_t *o = &rto_opts;
145
861166b0 146 memcpy(o, &rto_opts_defaults, sizeof (*o));
ab9f4b0b 147
b2255edc 148 while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
ab9f4b0b
GN
149 value = 0;
150
151 switch (opt) {
152 case 'a':
153 value = strtoull(optarg, NULL, 0);
154 o->rto_ashift = MIN(13, MAX(9, value));
155 break;
b2255edc
BB
156 case 'e':
157 o->rto_expand = 1;
158 break;
159 case 'r':
160 o->rto_expand_offset = strtoull(optarg, NULL, 0);
161 break;
ab9f4b0b
GN
162 case 'o':
163 value = strtoull(optarg, NULL, 0);
164 o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
165 break;
166 case 'd':
167 value = strtoull(optarg, NULL, 0);
168 o->rto_dcols = MIN(255, MAX(1, value));
169 break;
170 case 's':
171 value = strtoull(optarg, NULL, 0);
172 o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT,
173 MAX(SPA_MINBLOCKSHIFT, value));
174 break;
175 case 't':
176 value = strtoull(optarg, NULL, 0);
177 o->rto_sweep_timeout = value;
178 break;
179 case 'v':
180 o->rto_v++;
181 break;
182 case 'S':
183 o->rto_sweep = 1;
184 break;
185 case 'B':
186 o->rto_benchmark = 1;
187 break;
188 case 'D':
189 o->rto_gdb = 1;
190 break;
191 case 'T':
192 o->rto_sanity = 1;
193 break;
194 case 'h':
195 usage(B_TRUE);
196 break;
197 case '?':
198 default:
199 usage(B_FALSE);
200 break;
201 }
202 }
203}
204
b2255edc
BB
205#define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
206#define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
ab9f4b0b 207
b2255edc
BB
208#define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
209#define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
ab9f4b0b
GN
210
211static int
212cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
213{
b2255edc 214 int r, i, ret = 0;
ab9f4b0b
GN
215
216 VERIFY(parity >= 1 && parity <= 3);
217
b2255edc
BB
218 for (r = 0; r < rm->rm_nrows; r++) {
219 raidz_row_t * const rr = rm->rm_row[r];
220 raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
221 for (i = 0; i < parity; i++) {
222 if (CODE_COL_SIZE(rrg, i) == 0) {
223 VERIFY0(CODE_COL_SIZE(rr, i));
224 continue;
225 }
226
227 if (abd_cmp(CODE_COL(rr, i),
228 CODE_COL(rrg, i)) != 0) {
229 ret++;
230 LOG_OPT(D_DEBUG, opts,
231 "\nParity block [%d] different!\n", i);
232 }
ab9f4b0b
GN
233 }
234 }
235 return (ret);
236}
237
238static int
239cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
240{
b2255edc
BB
241 int r, i, dcols, ret = 0;
242
243 for (r = 0; r < rm->rm_nrows; r++) {
244 raidz_row_t *rr = rm->rm_row[r];
245 raidz_row_t *rrg = opts->rm_golden->rm_row[r];
246 dcols = opts->rm_golden->rm_row[0]->rr_cols -
247 raidz_parity(opts->rm_golden);
248 for (i = 0; i < dcols; i++) {
249 if (DATA_COL_SIZE(rrg, i) == 0) {
250 VERIFY0(DATA_COL_SIZE(rr, i));
251 continue;
252 }
ab9f4b0b 253
b2255edc
BB
254 if (abd_cmp(DATA_COL(rrg, i),
255 DATA_COL(rr, i)) != 0) {
256 ret++;
ab9f4b0b 257
b2255edc
BB
258 LOG_OPT(D_DEBUG, opts,
259 "\nData block [%d] different!\n", i);
260 }
ab9f4b0b
GN
261 }
262 }
263 return (ret);
264}
265
cbf484f8
GN
266static int
267init_rand(void *data, size_t size, void *private)
268{
876b60dc
AZ
269 (void) private;
270 memcpy(data, rand_data, size);
cbf484f8
GN
271 return (0);
272}
273
ab9f4b0b
GN
274static void
275corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
276{
b2255edc
BB
277 for (int r = 0; r < rm->rm_nrows; r++) {
278 raidz_row_t *rr = rm->rm_row[r];
279 for (int i = 0; i < cnt; i++) {
280 raidz_col_t *col = &rr->rr_col[tgts[i]];
281 abd_iterate_func(col->rc_abd, 0, col->rc_size,
282 init_rand, NULL);
283 }
ab9f4b0b
GN
284 }
285}
286
287void
cbf484f8 288init_zio_abd(zio_t *zio)
ab9f4b0b 289{
cbf484f8 290 abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
ab9f4b0b
GN
291}
292
293static void
294fini_raidz_map(zio_t **zio, raidz_map_t **rm)
295{
296 vdev_raidz_map_free(*rm);
cbf484f8 297 raidz_free((*zio)->io_abd, (*zio)->io_size);
ab9f4b0b
GN
298 umem_free(*zio, sizeof (zio_t));
299
300 *zio = NULL;
301 *rm = NULL;
302}
303
304static int
305init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
306{
307 int err = 0;
308 zio_t *zio_test;
309 raidz_map_t *rm_test;
310 const size_t total_ncols = opts->rto_dcols + parity;
311
312 if (opts->rm_golden) {
313 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
314 }
315
316 opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
317 zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
318
319 opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
320 opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
321
cbf484f8
GN
322 opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
323 zio_test->io_abd = raidz_alloc(opts->rto_dsize);
ab9f4b0b 324
cbf484f8
GN
325 init_zio_abd(opts->zio_golden);
326 init_zio_abd(zio_test);
ab9f4b0b
GN
327
328 VERIFY0(vdev_raidz_impl_set("original"));
329
b2255edc
BB
330 if (opts->rto_expand) {
331 opts->rm_golden =
332 vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
333 opts->zio_golden->io_size, opts->zio_golden->io_offset,
334 opts->rto_ashift, total_ncols+1, total_ncols,
335 parity, opts->rto_expand_offset);
336 rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
337 zio_test->io_size, zio_test->io_offset,
338 opts->rto_ashift, total_ncols+1, total_ncols,
339 parity, opts->rto_expand_offset);
340 } else {
341 opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
342 opts->rto_ashift, total_ncols, parity);
343 rm_test = vdev_raidz_map_alloc(zio_test,
344 opts->rto_ashift, total_ncols, parity);
345 }
ab9f4b0b
GN
346
347 VERIFY(opts->zio_golden);
348 VERIFY(opts->rm_golden);
349
350 vdev_raidz_generate_parity(opts->rm_golden);
351 vdev_raidz_generate_parity(rm_test);
352
353 /* sanity check */
354 err |= cmp_data(opts, rm_test);
355 err |= cmp_code(opts, rm_test, parity);
356
357 if (err)
358 ERR("initializing the golden copy ... [FAIL]!\n");
359
360 /* tear down raidz_map of test zio */
361 fini_raidz_map(&zio_test, &rm_test);
362
363 return (err);
364}
365
b2255edc
BB
366/*
367 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
368 * For each row, if the row is entirely before reflow_offset, it will
369 * come from the new location. Otherwise this row will come from the
370 * old location. Therefore, rows that straddle the reflow_offset will
371 * come from the old location.
372 *
373 * NOTE: Until raidz expansion is implemented this function is only
374 * needed by raidz_test.c to the multi-row raid_map_t functionality.
375 */
376raidz_map_t *
377vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
378 uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
379 uint64_t nparity, uint64_t reflow_offset)
380{
381 /* The zio's size in units of the vdev's minimum sector size. */
382 uint64_t s = size >> ashift;
383 uint64_t q, r, bc, devidx, asize = 0, tot;
384
385 /*
386 * "Quotient": The number of data sectors for this stripe on all but
387 * the "big column" child vdevs that also contain "remainder" data.
388 * AKA "full rows"
389 */
390 q = s / (logical_cols - nparity);
391
392 /*
393 * "Remainder": The number of partial stripe data sectors in this I/O.
394 * This will add a sector to some, but not all, child vdevs.
395 */
396 r = s - q * (logical_cols - nparity);
397
398 /* The number of "big columns" - those which contain remainder data. */
399 bc = (r == 0 ? 0 : r + nparity);
400
401 /*
402 * The total number of data and parity sectors associated with
403 * this I/O.
404 */
405 tot = s + nparity * (q + (r == 0 ? 0 : 1));
406
407 /* How many rows contain data (not skip) */
408 uint64_t rows = howmany(tot, logical_cols);
409 int cols = MIN(tot, logical_cols);
410
411 raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
412 KM_SLEEP);
413 rm->rm_nrows = rows;
414
415 for (uint64_t row = 0; row < rows; row++) {
416 raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
417 rr_col[cols]), KM_SLEEP);
418 rm->rm_row[row] = rr;
419
420 /* The starting RAIDZ (parent) vdev sector of the row. */
421 uint64_t b = (offset >> ashift) + row * logical_cols;
422
423 /*
424 * If we are in the middle of a reflow, and any part of this
425 * row has not been copied, then use the old location of
426 * this row.
427 */
428 int row_phys_cols = physical_cols;
429 if (b + (logical_cols - nparity) > reflow_offset >> ashift)
430 row_phys_cols--;
431
432 /* starting child of this row */
433 uint64_t child_id = b % row_phys_cols;
434 /* The starting byte offset on each child vdev. */
435 uint64_t child_offset = (b / row_phys_cols) << ashift;
436
437 /*
438 * We set cols to the entire width of the block, even
439 * if this row is shorter. This is needed because parity
440 * generation (for Q and R) needs to know the entire width,
441 * because it treats the short row as though it was
442 * full-width (and the "phantom" sectors were zero-filled).
443 *
444 * Another approach to this would be to set cols shorter
445 * (to just the number of columns that we might do i/o to)
446 * and have another mechanism to tell the parity generation
447 * about the "entire width". Reconstruction (at least
448 * vdev_raidz_reconstruct_general()) would also need to
449 * know about the "entire width".
450 */
451 rr->rr_cols = cols;
452 rr->rr_bigcols = bc;
453 rr->rr_missingdata = 0;
454 rr->rr_missingparity = 0;
455 rr->rr_firstdatacol = nparity;
b2255edc
BB
456 rr->rr_abd_empty = NULL;
457 rr->rr_nempty = 0;
458
459 for (int c = 0; c < rr->rr_cols; c++, child_id++) {
460 if (child_id >= row_phys_cols) {
461 child_id -= row_phys_cols;
462 child_offset += 1ULL << ashift;
463 }
464 rr->rr_col[c].rc_devidx = child_id;
465 rr->rr_col[c].rc_offset = child_offset;
b2255edc
BB
466 rr->rr_col[c].rc_orig_data = NULL;
467 rr->rr_col[c].rc_error = 0;
468 rr->rr_col[c].rc_tried = 0;
469 rr->rr_col[c].rc_skipped = 0;
470 rr->rr_col[c].rc_need_orig_restore = B_FALSE;
471
472 uint64_t dc = c - rr->rr_firstdatacol;
473 if (c < rr->rr_firstdatacol) {
474 rr->rr_col[c].rc_size = 1ULL << ashift;
475 rr->rr_col[c].rc_abd =
476 abd_alloc_linear(rr->rr_col[c].rc_size,
477 B_TRUE);
478 } else if (row == rows - 1 && bc != 0 && c >= bc) {
479 /*
480 * Past the end, this for parity generation.
481 */
482 rr->rr_col[c].rc_size = 0;
483 rr->rr_col[c].rc_abd = NULL;
484 } else {
485 /*
486 * "data column" (col excluding parity)
487 * Add an ASCII art diagram here
488 */
489 uint64_t off;
490
491 if (c < bc || r == 0) {
492 off = dc * rows + row;
493 } else {
494 off = r * rows +
495 (dc - r) * (rows - 1) + row;
496 }
497 rr->rr_col[c].rc_size = 1ULL << ashift;
e2af2acc
MA
498 rr->rr_col[c].rc_abd = abd_get_offset_struct(
499 &rr->rr_col[c].rc_abdstruct,
500 abd, off << ashift, 1 << ashift);
b2255edc
BB
501 }
502
503 asize += rr->rr_col[c].rc_size;
504 }
505 /*
506 * If all data stored spans all columns, there's a danger that
507 * parity will always be on the same device and, since parity
508 * isn't read during normal operation, that that device's I/O
509 * bandwidth won't be used effectively. We therefore switch
510 * the parity every 1MB.
511 *
512 * ...at least that was, ostensibly, the theory. As a practical
513 * matter unless we juggle the parity between all devices
514 * evenly, we won't see any benefit. Further, occasional writes
515 * that aren't a multiple of the LCM of the number of children
516 * and the minimum stripe width are sufficient to avoid pessimal
517 * behavior. Unfortunately, this decision created an implicit
518 * on-disk format requirement that we need to support for all
519 * eternity, but only for single-parity RAID-Z.
520 *
521 * If we intend to skip a sector in the zeroth column for
522 * padding we must make sure to note this swap. We will never
523 * intend to skip the first column since at least one data and
524 * one parity column must appear in each row.
525 */
526 if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
527 (offset & (1ULL << 20))) {
528 ASSERT(rr->rr_cols >= 2);
529 ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
530 devidx = rr->rr_col[0].rc_devidx;
531 uint64_t o = rr->rr_col[0].rc_offset;
532 rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
533 rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
534 rr->rr_col[1].rc_devidx = devidx;
535 rr->rr_col[1].rc_offset = o;
536 }
537
538 }
539 ASSERT3U(asize, ==, tot << ashift);
540
541 /* init RAIDZ parity ops */
542 rm->rm_ops = vdev_raidz_math_get_ops();
543
544 return (rm);
545}
546
ab9f4b0b
GN
547static raidz_map_t *
548init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
549{
550 raidz_map_t *rm = NULL;
551 const size_t alloc_dsize = opts->rto_dsize;
552 const size_t total_ncols = opts->rto_dcols + parity;
553 const int ccols[] = { 0, 1, 2 };
554
555 VERIFY(zio);
556 VERIFY(parity <= 3 && parity >= 1);
557
558 *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
559
560 (*zio)->io_offset = 0;
561 (*zio)->io_size = alloc_dsize;
cbf484f8
GN
562 (*zio)->io_abd = raidz_alloc(alloc_dsize);
563 init_zio_abd(*zio);
ab9f4b0b 564
b2255edc
BB
565 if (opts->rto_expand) {
566 rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
567 (*zio)->io_size, (*zio)->io_offset,
568 opts->rto_ashift, total_ncols+1, total_ncols,
569 parity, opts->rto_expand_offset);
570 } else {
571 rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
572 total_ncols, parity);
573 }
ab9f4b0b
GN
574 VERIFY(rm);
575
576 /* Make sure code columns are destroyed */
577 corrupt_colums(rm, ccols, parity);
578
579 return (rm);
580}
581
582static int
583run_gen_check(raidz_test_opts_t *opts)
584{
585 char **impl_name;
586 int fn, err = 0;
587 zio_t *zio_test;
588 raidz_map_t *rm_test;
589
590 err = init_raidz_golden_map(opts, PARITY_PQR);
591 if (0 != err)
592 return (err);
593
594 LOG(D_INFO, DBLSEP);
595 LOG(D_INFO, "Testing parity generation...\n");
596
597 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
598 impl_name++) {
599
600 LOG(D_INFO, SEP);
601 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
602
603 if (0 != vdev_raidz_impl_set(*impl_name)) {
604 LOG(D_INFO, "[SKIP]\n");
605 continue;
606 } else {
607 LOG(D_INFO, "[SUPPORTED]\n");
608 }
609
610 for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
611
292d573e
GN
612 /* Check if should stop */
613 if (rto_opts.rto_should_stop)
614 return (err);
615
ab9f4b0b
GN
616 /* create suitable raidz_map */
617 rm_test = init_raidz_map(opts, &zio_test, fn+1);
618 VERIFY(rm_test);
619
620 LOG(D_INFO, "\t\tTesting method [%s] ...",
621 raidz_gen_name[fn]);
622
623 if (!opts->rto_sanity)
624 vdev_raidz_generate_parity(rm_test);
625
626 if (cmp_code(opts, rm_test, fn+1) != 0) {
627 LOG(D_INFO, "[FAIL]\n");
628 err++;
629 } else
630 LOG(D_INFO, "[PASS]\n");
631
632 fini_raidz_map(&zio_test, &rm_test);
633 }
634 }
635
636 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
637
638 return (err);
639}
640
641static int
642run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
643{
644 int x0, x1, x2;
645 int tgtidx[3];
646 int err = 0;
647 static const int rec_tgts[7][3] = {
648 {1, 2, 3}, /* rec_p: bad QR & D[0] */
649 {0, 2, 3}, /* rec_q: bad PR & D[0] */
650 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
651 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
652 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
653 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
654 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
655 };
656
657 memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
658
659 if (fn < RAIDZ_REC_PQ) {
660 /* can reconstruct 1 failed data disk */
661 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
b2255edc 662 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
ab9f4b0b
GN
663 continue;
664
292d573e
GN
665 /* Check if should stop */
666 if (rto_opts.rto_should_stop)
667 return (err);
668
ab9f4b0b
GN
669 LOG(D_DEBUG, "[%d] ", x0);
670
671 tgtidx[2] = x0 + raidz_parity(rm);
672
673 corrupt_colums(rm, tgtidx+2, 1);
674
675 if (!opts->rto_sanity)
676 vdev_raidz_reconstruct(rm, tgtidx, 3);
677
678 if (cmp_data(opts, rm) != 0) {
679 err++;
680 LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
681 }
682 }
683
684 } else if (fn < RAIDZ_REC_PQR) {
685 /* can reconstruct 2 failed data disk */
686 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
b2255edc 687 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
ab9f4b0b
GN
688 continue;
689 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
b2255edc
BB
690 if (x1 >= rm->rm_row[0]->rr_cols -
691 raidz_parity(rm))
ab9f4b0b
GN
692 continue;
693
292d573e
GN
694 /* Check if should stop */
695 if (rto_opts.rto_should_stop)
696 return (err);
697
ab9f4b0b
GN
698 LOG(D_DEBUG, "[%d %d] ", x0, x1);
699
700 tgtidx[1] = x0 + raidz_parity(rm);
701 tgtidx[2] = x1 + raidz_parity(rm);
702
703 corrupt_colums(rm, tgtidx+1, 2);
704
705 if (!opts->rto_sanity)
706 vdev_raidz_reconstruct(rm, tgtidx, 3);
707
708 if (cmp_data(opts, rm) != 0) {
709 err++;
710 LOG(D_DEBUG, "\nREC D[%d %d]... "
711 "[FAIL]\n", x0, x1);
712 }
713 }
714 }
715 } else {
716 /* can reconstruct 3 failed data disk */
02730c33 717 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
b2255edc 718 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
ab9f4b0b 719 continue;
02730c33 720 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
b2255edc
BB
721 if (x1 >= rm->rm_row[0]->rr_cols -
722 raidz_parity(rm))
ab9f4b0b 723 continue;
02730c33 724 for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
b2255edc
BB
725 if (x2 >= rm->rm_row[0]->rr_cols -
726 raidz_parity(rm))
ab9f4b0b
GN
727 continue;
728
292d573e
GN
729 /* Check if should stop */
730 if (rto_opts.rto_should_stop)
731 return (err);
732
ab9f4b0b
GN
733 LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
734
735 tgtidx[0] = x0 + raidz_parity(rm);
736 tgtidx[1] = x1 + raidz_parity(rm);
737 tgtidx[2] = x2 + raidz_parity(rm);
738
739 corrupt_colums(rm, tgtidx, 3);
740
741 if (!opts->rto_sanity)
742 vdev_raidz_reconstruct(rm,
02730c33 743 tgtidx, 3);
ab9f4b0b
GN
744
745 if (cmp_data(opts, rm) != 0) {
746 err++;
747 LOG(D_DEBUG,
748 "\nREC D[%d %d %d]... "
749 "[FAIL]\n", x0, x1, x2);
750 }
751 }
752 }
753 }
754 }
755 return (err);
756}
757
758static int
759run_rec_check(raidz_test_opts_t *opts)
760{
761 char **impl_name;
762 unsigned fn, err = 0;
763 zio_t *zio_test;
764 raidz_map_t *rm_test;
765
766 err = init_raidz_golden_map(opts, PARITY_PQR);
767 if (0 != err)
768 return (err);
769
770 LOG(D_INFO, DBLSEP);
771 LOG(D_INFO, "Testing data reconstruction...\n");
772
773 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
774 impl_name++) {
775
776 LOG(D_INFO, SEP);
777 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
778
779 if (vdev_raidz_impl_set(*impl_name) != 0) {
780 LOG(D_INFO, "[SKIP]\n");
781 continue;
782 } else
783 LOG(D_INFO, "[SUPPORTED]\n");
784
785
786 /* create suitable raidz_map */
787 rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
788 /* generate parity */
789 vdev_raidz_generate_parity(rm_test);
790
791 for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
792
793 LOG(D_INFO, "\t\tTesting method [%s] ...",
02730c33 794 raidz_rec_name[fn]);
ab9f4b0b
GN
795
796 if (run_rec_check_impl(opts, rm_test, fn) != 0) {
797 LOG(D_INFO, "[FAIL]\n");
798 err++;
799
800 } else
801 LOG(D_INFO, "[PASS]\n");
802
803 }
804 /* tear down test raidz_map */
805 fini_raidz_map(&zio_test, &rm_test);
806 }
807
808 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
809
810 return (err);
811}
812
813static int
814run_test(raidz_test_opts_t *opts)
815{
816 int err = 0;
817
818 if (opts == NULL)
819 opts = &rto_opts;
820
821 print_opts(opts, B_FALSE);
822
823 err |= run_gen_check(opts);
824 err |= run_rec_check(opts);
825
826 return (err);
827}
828
829#define SWEEP_RUNNING 0
830#define SWEEP_FINISHED 1
831#define SWEEP_ERROR 2
832#define SWEEP_TIMEOUT 3
833
834static int sweep_state = 0;
835static raidz_test_opts_t failed_opts;
836
837static kmutex_t sem_mtx;
838static kcondvar_t sem_cv;
839static int max_free_slots;
840static int free_slots;
841
db7f1a91 842static _Noreturn void
ab9f4b0b
GN
843sweep_thread(void *arg)
844{
845 int err = 0;
02730c33 846 raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
ab9f4b0b
GN
847 VERIFY(opts != NULL);
848
849 err = run_test(opts);
850
851 if (rto_opts.rto_sanity) {
852 /* 25% chance that a sweep test fails */
853 if (rand() < (RAND_MAX/4))
854 err = 1;
855 }
856
857 if (0 != err) {
858 mutex_enter(&sem_mtx);
859 memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
860 sweep_state = SWEEP_ERROR;
861 mutex_exit(&sem_mtx);
862 }
863
864 umem_free(opts, sizeof (raidz_test_opts_t));
865
866 /* signal the next thread */
867 mutex_enter(&sem_mtx);
868 free_slots++;
869 cv_signal(&sem_cv);
870 mutex_exit(&sem_mtx);
871
872 thread_exit();
873}
874
875static int
876run_sweep(void)
877{
292d573e
GN
878 static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
879 static const size_t ashift_v[] = { 9, 12, 14 };
ab9f4b0b
GN
880 static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
881 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
882
883 (void) setvbuf(stdout, NULL, _IONBF, 0);
884
885 ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
292d573e 886 ARRAY_SIZE(dcols_v);
ab9f4b0b
GN
887 ulong_t tried_comb = 0;
888 hrtime_t time_diff, start_time = gethrtime();
889 raidz_test_opts_t *opts;
292d573e 890 int a, d, s;
ab9f4b0b
GN
891
892 max_free_slots = free_slots = MAX(2, boot_ncpus);
893
894 mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
895 cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
896
897 for (s = 0; s < ARRAY_SIZE(size_v); s++)
898 for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
ab9f4b0b
GN
899 for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
900
292d573e 901 if (size_v[s] < (1 << ashift_v[a])) {
ab9f4b0b
GN
902 total_comb--;
903 continue;
904 }
905
906 if (++tried_comb % 20 == 0)
907 LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
908
909 /* wait for signal to start new thread */
910 mutex_enter(&sem_mtx);
911 while (cv_timedwait_sig(&sem_cv, &sem_mtx,
912 ddi_get_lbolt() + hz)) {
913
914 /* check if should stop the test (timeout) */
915 time_diff = (gethrtime() - start_time) / NANOSEC;
916 if (rto_opts.rto_sweep_timeout > 0 &&
917 time_diff >= rto_opts.rto_sweep_timeout) {
918 sweep_state = SWEEP_TIMEOUT;
292d573e 919 rto_opts.rto_should_stop = B_TRUE;
ab9f4b0b
GN
920 mutex_exit(&sem_mtx);
921 goto exit;
922 }
923
924 /* check if should stop the test (error) */
925 if (sweep_state != SWEEP_RUNNING) {
926 mutex_exit(&sem_mtx);
927 goto exit;
928 }
929
930 /* exit loop if a slot is available */
931 if (free_slots > 0) {
932 break;
933 }
934 }
935
936 free_slots--;
937 mutex_exit(&sem_mtx);
938
939 opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
940 opts->rto_ashift = ashift_v[a];
941 opts->rto_dcols = dcols_v[d];
292d573e 942 opts->rto_offset = (1 << ashift_v[a]) * rand();
ab9f4b0b 943 opts->rto_dsize = size_v[s];
b2255edc
BB
944 opts->rto_expand = rto_opts.rto_expand;
945 opts->rto_expand_offset = rto_opts.rto_expand_offset;
ab9f4b0b
GN
946 opts->rto_v = 0; /* be quiet */
947
c25b8f99
BB
948 VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
949 0, NULL, TS_RUN, defclsyspri), !=, NULL);
ab9f4b0b
GN
950 }
951
952exit:
953 LOG(D_ALL, "\nWaiting for test threads to finish...\n");
954 mutex_enter(&sem_mtx);
955 VERIFY(free_slots <= max_free_slots);
956 while (free_slots < max_free_slots) {
957 (void) cv_wait(&sem_cv, &sem_mtx);
958 }
959 mutex_exit(&sem_mtx);
960
961 if (sweep_state == SWEEP_ERROR) {
962 ERR("Sweep test failed! Failed option: \n");
963 print_opts(&failed_opts, B_TRUE);
964 } else {
965 if (sweep_state == SWEEP_TIMEOUT)
966 LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
967 (ulong_t)rto_opts.rto_sweep_timeout);
968
969 LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
970 (ulong_t)tried_comb);
971 }
972
c17486b2
GN
973 mutex_destroy(&sem_mtx);
974
ab9f4b0b
GN
975 return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
976}
977
b2255edc 978
ab9f4b0b
GN
979int
980main(int argc, char **argv)
981{
982 size_t i;
983 struct sigaction action;
984 int err = 0;
985
e7238382
AZ
986 /* init gdb pid string early */
987 (void) sprintf(pid_s, "%d", getpid());
ab9f4b0b
GN
988
989 action.sa_handler = sig_handler;
990 sigemptyset(&action.sa_mask);
991 action.sa_flags = 0;
992
993 if (sigaction(SIGSEGV, &action, NULL) < 0) {
994 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
995 exit(EXIT_FAILURE);
996 }
997
998 (void) setvbuf(stdout, NULL, _IOLBF, 0);
999
1000 dprintf_setup(&argc, argv);
1001
1002 process_options(argc, argv);
1003
da92d5cb 1004 kernel_init(SPA_MODE_READ);
ab9f4b0b
GN
1005
1006 /* setup random data because rand() is not reentrant */
02730c33 1007 rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
ab9f4b0b
GN
1008 srand((unsigned)time(NULL) * getpid());
1009 for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
1010 rand_data[i] = rand();
1011
1012 mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
1013
1014 if (rto_opts.rto_benchmark) {
1015 run_raidz_benchmark();
1016 } else if (rto_opts.rto_sweep) {
1017 err = run_sweep();
1018 } else {
1019 err = run_test(NULL);
1020 }
1021
1022 umem_free(rand_data, SPA_MAXBLOCKSIZE);
1023 kernel_fini();
1024
1025 return (err);
1026}