4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/zfs_context.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
35 #include "raidz_test.h"
37 static int *rand_data
;
38 raidz_test_opts_t rto_opts
;
41 static const char gdb_tmpl
[] = "gdb -ex \"set pagination 0\" -p %d";
43 static void sig_handler(int signo
)
45 struct sigaction action
;
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
50 action
.sa_handler
= SIG_DFL
;
51 sigemptyset(&action
.sa_mask
);
53 (void) sigaction(signo
, &action
, NULL
);
61 static void print_opts(raidz_test_opts_t
*opts
, boolean_t force
)
64 switch (opts
->rto_v
) {
76 if (force
|| opts
->rto_v
>= D_INFO
) {
77 (void) fprintf(stdout
, DBLSEP
"Running with options:\n"
78 " (-a) zio ashift : %zu\n"
79 " (-o) zio offset : 1 << %zu\n"
80 " (-d) number of raidz data columns : %zu\n"
81 " (-s) size of DATA : 1 << %zu\n"
82 " (-S) sweep parameters : %s \n"
83 " (-v) verbose : %s \n\n",
84 opts
->rto_ashift
, /* -a */
85 ilog2(opts
->rto_offset
), /* -o */
86 opts
->rto_dcols
, /* -d */
87 ilog2(opts
->rto_dsize
), /* -s */
88 opts
->rto_sweep
? "yes" : "no", /* -S */
94 static void usage(boolean_t requested
)
96 const raidz_test_opts_t
*o
= &rto_opts_defaults
;
98 FILE *fp
= requested
? stdout
: stderr
;
100 (void) fprintf(fp
, "Usage:\n"
101 "\t[-a zio ashift (default: %zu)]\n"
102 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
103 "\t[-d number of raidz data columns (default: %zu)]\n"
104 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
105 "\t[-S parameter sweep (default: %s)]\n"
106 "\t[-t timeout for parameter sweep test]\n"
107 "\t[-B benchmark all raidz implementations]\n"
108 "\t[-v increase verbosity (default: %zu)]\n"
109 "\t[-h (print help)]\n"
110 "\t[-T test the test, see if failure would be detected]\n"
111 "\t[-D debug (attach gdb on SIGSEGV)]\n"
113 o
->rto_ashift
, /* -a */
114 ilog2(o
->rto_offset
), /* -o */
115 o
->rto_dcols
, /* -d */
116 ilog2(o
->rto_dsize
), /* -s */
117 rto_opts
.rto_sweep
? "yes" : "no", /* -S */
121 exit(requested
? 0 : 1);
124 static void process_options(int argc
, char **argv
)
129 raidz_test_opts_t
*o
= &rto_opts
;
131 bcopy(&rto_opts_defaults
, o
, sizeof (*o
));
133 while ((opt
= getopt(argc
, argv
, "TDBSvha:o:d:s:t:")) != -1) {
138 value
= strtoull(optarg
, NULL
, 0);
139 o
->rto_ashift
= MIN(13, MAX(9, value
));
142 value
= strtoull(optarg
, NULL
, 0);
143 o
->rto_offset
= ((1ULL << MIN(12, value
)) >> 9) << 9;
146 value
= strtoull(optarg
, NULL
, 0);
147 o
->rto_dcols
= MIN(255, MAX(1, value
));
150 value
= strtoull(optarg
, NULL
, 0);
151 o
->rto_dsize
= 1ULL << MIN(SPA_MAXBLOCKSHIFT
,
152 MAX(SPA_MINBLOCKSHIFT
, value
));
155 value
= strtoull(optarg
, NULL
, 0);
156 o
->rto_sweep_timeout
= value
;
165 o
->rto_benchmark
= 1;
184 #define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
185 #define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
187 #define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
188 #define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
191 cmp_code(raidz_test_opts_t
*opts
, const raidz_map_t
*rm
, const int parity
)
195 VERIFY(parity
>= 1 && parity
<= 3);
197 for (i
= 0; i
< parity
; i
++) {
198 if (abd_cmp(CODE_COL(rm
, i
), CODE_COL(opts
->rm_golden
, i
))
201 LOG_OPT(D_DEBUG
, opts
,
202 "\nParity block [%d] different!\n", i
);
209 cmp_data(raidz_test_opts_t
*opts
, raidz_map_t
*rm
)
212 int dcols
= opts
->rm_golden
->rm_cols
- raidz_parity(opts
->rm_golden
);
214 for (i
= 0; i
< dcols
; i
++) {
215 if (abd_cmp(DATA_COL(opts
->rm_golden
, i
), DATA_COL(rm
, i
))
219 LOG_OPT(D_DEBUG
, opts
,
220 "\nData block [%d] different!\n", i
);
227 init_rand(void *data
, size_t size
, void *private)
230 int *dst
= (int *) data
;
232 for (i
= 0; i
< size
/ sizeof (int); i
++)
233 dst
[i
] = rand_data
[i
];
239 corrupt_colums(raidz_map_t
*rm
, const int *tgts
, const int cnt
)
244 for (i
= 0; i
< cnt
; i
++) {
245 col
= &rm
->rm_col
[tgts
[i
]];
246 abd_iterate_func(col
->rc_abd
, 0, col
->rc_size
, init_rand
, NULL
);
251 init_zio_abd(zio_t
*zio
)
253 abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
, init_rand
, NULL
);
257 fini_raidz_map(zio_t
**zio
, raidz_map_t
**rm
)
259 vdev_raidz_map_free(*rm
);
260 raidz_free((*zio
)->io_abd
, (*zio
)->io_size
);
261 umem_free(*zio
, sizeof (zio_t
));
268 init_raidz_golden_map(raidz_test_opts_t
*opts
, const int parity
)
272 raidz_map_t
*rm_test
;
273 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
275 if (opts
->rm_golden
) {
276 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
279 opts
->zio_golden
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
280 zio_test
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
282 opts
->zio_golden
->io_offset
= zio_test
->io_offset
= opts
->rto_offset
;
283 opts
->zio_golden
->io_size
= zio_test
->io_size
= opts
->rto_dsize
;
285 opts
->zio_golden
->io_abd
= raidz_alloc(opts
->rto_dsize
);
286 zio_test
->io_abd
= raidz_alloc(opts
->rto_dsize
);
288 init_zio_abd(opts
->zio_golden
);
289 init_zio_abd(zio_test
);
291 VERIFY0(vdev_raidz_impl_set("original"));
293 opts
->rm_golden
= vdev_raidz_map_alloc(opts
->zio_golden
,
294 opts
->rto_ashift
, total_ncols
, parity
);
295 rm_test
= vdev_raidz_map_alloc(zio_test
,
296 opts
->rto_ashift
, total_ncols
, parity
);
298 VERIFY(opts
->zio_golden
);
299 VERIFY(opts
->rm_golden
);
301 vdev_raidz_generate_parity(opts
->rm_golden
);
302 vdev_raidz_generate_parity(rm_test
);
305 err
|= cmp_data(opts
, rm_test
);
306 err
|= cmp_code(opts
, rm_test
, parity
);
309 ERR("initializing the golden copy ... [FAIL]!\n");
311 /* tear down raidz_map of test zio */
312 fini_raidz_map(&zio_test
, &rm_test
);
318 init_raidz_map(raidz_test_opts_t
*opts
, zio_t
**zio
, const int parity
)
320 raidz_map_t
*rm
= NULL
;
321 const size_t alloc_dsize
= opts
->rto_dsize
;
322 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
323 const int ccols
[] = { 0, 1, 2 };
326 VERIFY(parity
<= 3 && parity
>= 1);
328 *zio
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
330 (*zio
)->io_offset
= 0;
331 (*zio
)->io_size
= alloc_dsize
;
332 (*zio
)->io_abd
= raidz_alloc(alloc_dsize
);
335 rm
= vdev_raidz_map_alloc(*zio
, opts
->rto_ashift
,
336 total_ncols
, parity
);
339 /* Make sure code columns are destroyed */
340 corrupt_colums(rm
, ccols
, parity
);
346 run_gen_check(raidz_test_opts_t
*opts
)
351 raidz_map_t
*rm_test
;
353 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
358 LOG(D_INFO
, "Testing parity generation...\n");
360 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
364 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
366 if (0 != vdev_raidz_impl_set(*impl_name
)) {
367 LOG(D_INFO
, "[SKIP]\n");
370 LOG(D_INFO
, "[SUPPORTED]\n");
373 for (fn
= 0; fn
< RAIDZ_GEN_NUM
; fn
++) {
375 /* Check if should stop */
376 if (rto_opts
.rto_should_stop
)
379 /* create suitable raidz_map */
380 rm_test
= init_raidz_map(opts
, &zio_test
, fn
+1);
383 LOG(D_INFO
, "\t\tTesting method [%s] ...",
386 if (!opts
->rto_sanity
)
387 vdev_raidz_generate_parity(rm_test
);
389 if (cmp_code(opts
, rm_test
, fn
+1) != 0) {
390 LOG(D_INFO
, "[FAIL]\n");
393 LOG(D_INFO
, "[PASS]\n");
395 fini_raidz_map(&zio_test
, &rm_test
);
399 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
405 run_rec_check_impl(raidz_test_opts_t
*opts
, raidz_map_t
*rm
, const int fn
)
410 static const int rec_tgts
[7][3] = {
411 {1, 2, 3}, /* rec_p: bad QR & D[0] */
412 {0, 2, 3}, /* rec_q: bad PR & D[0] */
413 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
414 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
415 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
416 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
417 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
420 memcpy(tgtidx
, rec_tgts
[fn
], sizeof (tgtidx
));
422 if (fn
< RAIDZ_REC_PQ
) {
423 /* can reconstruct 1 failed data disk */
424 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
425 if (x0
>= rm
->rm_cols
- raidz_parity(rm
))
428 /* Check if should stop */
429 if (rto_opts
.rto_should_stop
)
432 LOG(D_DEBUG
, "[%d] ", x0
);
434 tgtidx
[2] = x0
+ raidz_parity(rm
);
436 corrupt_colums(rm
, tgtidx
+2, 1);
438 if (!opts
->rto_sanity
)
439 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
441 if (cmp_data(opts
, rm
) != 0) {
443 LOG(D_DEBUG
, "\nREC D[%d]... [FAIL]\n", x0
);
447 } else if (fn
< RAIDZ_REC_PQR
) {
448 /* can reconstruct 2 failed data disk */
449 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
450 if (x0
>= rm
->rm_cols
- raidz_parity(rm
))
452 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
453 if (x1
>= rm
->rm_cols
- raidz_parity(rm
))
456 /* Check if should stop */
457 if (rto_opts
.rto_should_stop
)
460 LOG(D_DEBUG
, "[%d %d] ", x0
, x1
);
462 tgtidx
[1] = x0
+ raidz_parity(rm
);
463 tgtidx
[2] = x1
+ raidz_parity(rm
);
465 corrupt_colums(rm
, tgtidx
+1, 2);
467 if (!opts
->rto_sanity
)
468 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
470 if (cmp_data(opts
, rm
) != 0) {
472 LOG(D_DEBUG
, "\nREC D[%d %d]... "
478 /* can reconstruct 3 failed data disk */
480 x0
< opts
->rto_dcols
; x0
++) {
481 if (x0
>= rm
->rm_cols
- raidz_parity(rm
))
484 x1
< opts
->rto_dcols
; x1
++) {
485 if (x1
>= rm
->rm_cols
- raidz_parity(rm
))
488 x2
< opts
->rto_dcols
; x2
++) {
490 rm
->rm_cols
- raidz_parity(rm
))
493 /* Check if should stop */
494 if (rto_opts
.rto_should_stop
)
497 LOG(D_DEBUG
, "[%d %d %d]", x0
, x1
, x2
);
499 tgtidx
[0] = x0
+ raidz_parity(rm
);
500 tgtidx
[1] = x1
+ raidz_parity(rm
);
501 tgtidx
[2] = x2
+ raidz_parity(rm
);
503 corrupt_colums(rm
, tgtidx
, 3);
505 if (!opts
->rto_sanity
)
506 vdev_raidz_reconstruct(rm
,
509 if (cmp_data(opts
, rm
) != 0) {
512 "\nREC D[%d %d %d]... "
513 "[FAIL]\n", x0
, x1
, x2
);
523 run_rec_check(raidz_test_opts_t
*opts
)
526 unsigned fn
, err
= 0;
528 raidz_map_t
*rm_test
;
530 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
535 LOG(D_INFO
, "Testing data reconstruction...\n");
537 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
541 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
543 if (vdev_raidz_impl_set(*impl_name
) != 0) {
544 LOG(D_INFO
, "[SKIP]\n");
547 LOG(D_INFO
, "[SUPPORTED]\n");
550 /* create suitable raidz_map */
551 rm_test
= init_raidz_map(opts
, &zio_test
, PARITY_PQR
);
552 /* generate parity */
553 vdev_raidz_generate_parity(rm_test
);
555 for (fn
= 0; fn
< RAIDZ_REC_NUM
; fn
++) {
557 LOG(D_INFO
, "\t\tTesting method [%s] ...",
560 if (run_rec_check_impl(opts
, rm_test
, fn
) != 0) {
561 LOG(D_INFO
, "[FAIL]\n");
565 LOG(D_INFO
, "[PASS]\n");
568 /* tear down test raidz_map */
569 fini_raidz_map(&zio_test
, &rm_test
);
572 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
578 run_test(raidz_test_opts_t
*opts
)
585 print_opts(opts
, B_FALSE
);
587 err
|= run_gen_check(opts
);
588 err
|= run_rec_check(opts
);
593 #define SWEEP_RUNNING 0
594 #define SWEEP_FINISHED 1
595 #define SWEEP_ERROR 2
596 #define SWEEP_TIMEOUT 3
598 static int sweep_state
= 0;
599 static raidz_test_opts_t failed_opts
;
601 static kmutex_t sem_mtx
;
602 static kcondvar_t sem_cv
;
603 static int max_free_slots
;
604 static int free_slots
;
607 sweep_thread(void *arg
)
610 raidz_test_opts_t
*opts
= (raidz_test_opts_t
*) arg
;
611 VERIFY(opts
!= NULL
);
613 err
= run_test(opts
);
615 if (rto_opts
.rto_sanity
) {
616 /* 25% chance that a sweep test fails */
617 if (rand() < (RAND_MAX
/4))
622 mutex_enter(&sem_mtx
);
623 memcpy(&failed_opts
, opts
, sizeof (raidz_test_opts_t
));
624 sweep_state
= SWEEP_ERROR
;
625 mutex_exit(&sem_mtx
);
628 umem_free(opts
, sizeof (raidz_test_opts_t
));
630 /* signal the next thread */
631 mutex_enter(&sem_mtx
);
634 mutex_exit(&sem_mtx
);
642 static const size_t dcols_v
[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
643 static const size_t ashift_v
[] = { 9, 12, 14 };
644 static const size_t size_v
[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
645 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE
};
647 (void) setvbuf(stdout
, NULL
, _IONBF
, 0);
649 ulong_t total_comb
= ARRAY_SIZE(size_v
) * ARRAY_SIZE(ashift_v
) *
651 ulong_t tried_comb
= 0;
652 hrtime_t time_diff
, start_time
= gethrtime();
653 raidz_test_opts_t
*opts
;
656 max_free_slots
= free_slots
= MAX(2, boot_ncpus
);
658 mutex_init(&sem_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
659 cv_init(&sem_cv
, NULL
, CV_DEFAULT
, NULL
);
661 for (s
= 0; s
< ARRAY_SIZE(size_v
); s
++)
662 for (a
= 0; a
< ARRAY_SIZE(ashift_v
); a
++)
663 for (d
= 0; d
< ARRAY_SIZE(dcols_v
); d
++) {
665 if (size_v
[s
] < (1 << ashift_v
[a
])) {
670 if (++tried_comb
% 20 == 0)
671 LOG(D_ALL
, "%lu/%lu... ", tried_comb
, total_comb
);
673 /* wait for signal to start new thread */
674 mutex_enter(&sem_mtx
);
675 while (cv_timedwait_sig(&sem_cv
, &sem_mtx
,
676 ddi_get_lbolt() + hz
)) {
678 /* check if should stop the test (timeout) */
679 time_diff
= (gethrtime() - start_time
) / NANOSEC
;
680 if (rto_opts
.rto_sweep_timeout
> 0 &&
681 time_diff
>= rto_opts
.rto_sweep_timeout
) {
682 sweep_state
= SWEEP_TIMEOUT
;
683 rto_opts
.rto_should_stop
= B_TRUE
;
684 mutex_exit(&sem_mtx
);
688 /* check if should stop the test (error) */
689 if (sweep_state
!= SWEEP_RUNNING
) {
690 mutex_exit(&sem_mtx
);
694 /* exit loop if a slot is available */
695 if (free_slots
> 0) {
701 mutex_exit(&sem_mtx
);
703 opts
= umem_zalloc(sizeof (raidz_test_opts_t
), UMEM_NOFAIL
);
704 opts
->rto_ashift
= ashift_v
[a
];
705 opts
->rto_dcols
= dcols_v
[d
];
706 opts
->rto_offset
= (1 << ashift_v
[a
]) * rand();
707 opts
->rto_dsize
= size_v
[s
];
708 opts
->rto_v
= 0; /* be quiet */
710 VERIFY3P(zk_thread_create(NULL
, 0,
711 (thread_func_t
) sweep_thread
,
712 (void *) opts
, TS_RUN
, NULL
, 0, 0,
713 PTHREAD_CREATE_JOINABLE
), !=, NULL
);
717 LOG(D_ALL
, "\nWaiting for test threads to finish...\n");
718 mutex_enter(&sem_mtx
);
719 VERIFY(free_slots
<= max_free_slots
);
720 while (free_slots
< max_free_slots
) {
721 (void) cv_wait(&sem_cv
, &sem_mtx
);
723 mutex_exit(&sem_mtx
);
725 if (sweep_state
== SWEEP_ERROR
) {
726 ERR("Sweep test failed! Failed option: \n");
727 print_opts(&failed_opts
, B_TRUE
);
729 if (sweep_state
== SWEEP_TIMEOUT
)
730 LOG(D_ALL
, "Test timeout (%lus). Stopping...\n",
731 (ulong_t
)rto_opts
.rto_sweep_timeout
);
733 LOG(D_ALL
, "Sweep test succeeded on %lu raidz maps!\n",
734 (ulong_t
)tried_comb
);
737 return (sweep_state
== SWEEP_ERROR
? SWEEP_ERROR
: 0);
741 main(int argc
, char **argv
)
744 struct sigaction action
;
747 /* init gdb string early */
748 (void) sprintf(gdb
, gdb_tmpl
, getpid());
750 action
.sa_handler
= sig_handler
;
751 sigemptyset(&action
.sa_mask
);
754 if (sigaction(SIGSEGV
, &action
, NULL
) < 0) {
755 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno
));
759 (void) setvbuf(stdout
, NULL
, _IOLBF
, 0);
761 dprintf_setup(&argc
, argv
);
763 process_options(argc
, argv
);
767 /* setup random data because rand() is not reentrant */
768 rand_data
= (int *) umem_alloc(SPA_MAXBLOCKSIZE
, UMEM_NOFAIL
);
769 srand((unsigned)time(NULL
) * getpid());
770 for (i
= 0; i
< SPA_MAXBLOCKSIZE
/ sizeof (int); i
++)
771 rand_data
[i
] = rand();
773 mprotect(rand_data
, SPA_MAXBLOCKSIZE
, PROT_READ
);
775 if (rto_opts
.rto_benchmark
) {
776 run_raidz_benchmark();
777 } else if (rto_opts
.rto_sweep
) {
780 err
= run_test(NULL
);
783 umem_free(rand_data
, SPA_MAXBLOCKSIZE
);