3 # ZPOOL fault verification test script.
5 # The current suite of fault tests should not be thought of an exhaustive
6 # list of failure modes. Rather it is simply an starting point which trys
7 # to cover the bulk the of the 'easy' and hopefully common, failure modes.
9 # Additional tests should be added but the current suite as new interesting
10 # failures modes are observed. Additional failure modes I'd like to see
11 # tests for include, but are not limited too:
13 # * Slow but successful IO.
14 # * SCSI sense codes generated as zevents.
20 # The current infrastructure using the 'mdadm' faulty device and the
21 # 'scsi_debug' simulated scsi devices. The idea is to inject the error
22 # below the zfs stack to validate all the error paths. More targeted
23 # failure testing should be added using the 'zinject' command line util.
25 # Requires the following packages:
31 basedir
="$(dirname $0)"
33 SCRIPT_COMMON
=common.sh
34 if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
35 .
"${basedir}/${SCRIPT_COMMON}"
37 echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
48 ZPOOL fault verification tests
53 -c Cleanup md+lo+file devices at start
54 -t <#> Run listed tests
55 -s <#> Skip listed tests
60 while getopts 'hvct:s:?' OPTION
; do
85 if [ $
(id
-u) != 0 ]; then
86 die
"Must run as root"
89 # Initialize the test suite
92 # Perform pre-cleanup is requested
93 if [ ${CLEANUP} ]; then
97 rm -f /tmp
/zpool.cache.
*
100 # Check if we need to skip all md based tests.
102 check_md_partitionable
&& MD_PARTITIONABLE
=1
103 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
104 echo "Skipping tests 1-7 which require partitionable md devices"
107 # Check if we need to skip all the scsi_debug tests.
109 ${INFOMOD} scsi_debug
&>/dev
/null
&& SCSI_DEBUG
=1
110 if [ ${SCSI_DEBUG} -eq 0 ]; then
111 echo "Skipping tests 8-9 which require the scsi_debug module"
114 if [ ${MD_PARTITIONABLE} -eq 0 ] ||
[ ${SCSI_DEBUG} -eq 0 ]; then
118 printf "%40s%s\t%s\t%s\t%s\t%s\n" "" "raid0" "raid10" "raidz" "raidz2" "raidz3"
121 echo -n -e "${COLOR_GREEN}Pass${COLOR_RESET}\t"
125 echo -n -e "${COLOR_BROWN}Skip${COLOR_RESET}\t"
133 ${ZPOOL} status ${POOL_NAME} | grep ${DEVICE_TYPE} ${TMP_STATUS} | \
134 head -n${DEVICE_NTH} | tail -n1 | ${AWK} "{ print \$1 }"
141 ${ZPOOL} status ${POOL_NAME} | ${AWK} "/${VDEV_NAME}/ { print \$2 }"
144 # Required format is x.yz[KMGTP]
145 expand_numeric_suffix
() {
148 VALUE
=`echo "${VALUE/%K/*1000}"`
149 VALUE
=`echo "${VALUE/%M/*1000000}"`
150 VALUE
=`echo "${VALUE/%G/*1000000000}"`
151 VALUE
=`echo "${VALUE/%T/*1000000000000}"`
152 VALUE
=`echo "${VALUE/%P/*1000000000000000}"`
153 VALUE
=`echo "${VALUE}" | bc | cut -d'.' -f1`
161 local VDEV_ERRORS
=`${ZPOOL} status ${POOL_NAME} |
162 ${AWK} "/${VDEV_NAME}/ { print \\$3 }"`
164 expand_numeric_suffix
${VDEV_ERRORS}
167 vdev_write_errors
() {
170 local VDEV_ERRORS
=`${ZPOOL} status ${POOL_NAME} |
171 ${AWK} "/${VDEV_NAME}/ { print \\$4 }"`
173 expand_numeric_suffix
${VDEV_ERRORS}
176 vdev_cksum_errors
() {
179 local VDEV_ERRORS
=`${ZPOOL} status ${POOL_NAME} |
180 ${AWK} "/${VDEV_NAME}/ { print \\$5 }"`
182 expand_numeric_suffix
${VDEV_ERRORS}
188 ${ZPOOL} status ${POOL_NAME} | ${AWK} "/state/ { print \$2; exit }"
195 SCRIPT1="BEGIN
{RS
=\"\"; FS
=\"\n\"} /${EVENT_NAME}/ { print \
$0; exit }"
196 SCRIPT2="BEGIN
{FS
=\"=\"} /${EVENT_KEY}/ { print \
$2; exit }"
198 ${ZPOOL} events -vH | ${AWK} "${SCRIPT1}" | ${AWK} "${SCRIPT2}"
201 zpool_scan_errors
() {
204 ${ZPOOL} status ${POOL_NAME} | ${AWK} "/scan: scrub/ { print \$8 }"
205 ${ZPOOL} status ${POOL_NAME} | ${AWK} "/scan: resilver/ { print \$7 }"
209 local PATTERN_BLOCK_SIZE
=$1
210 local PATTERN_BLOCK_COUNT
=$2
211 local PATTERN_NAME
=`mktemp -p /tmp zpool.pattern.XXXXXXXX`
214 dd if=/dev
/urandom of
=${PATTERN_NAME} bs
=${PATTERN_BLOCK_SIZE} \
215 count
=${PATTERN_BLOCK_COUNT} &>/dev
/null
220 local PATTERN_NAME
=$1
221 local PATTERN_BLOCK_SIZE
=$2
222 local PATTERN_BLOCK_COUNT
=$3
225 dd if=${PATTERN_NAME} of=${DEVICE_NAME} bs=${PATTERN_BLOCK_SIZE} \
226 count
=${PATTERN_BLOCK_COUNT} oflag
=direct
&>/dev
/null
231 local PATTERN_NAME
=$1
232 local PATTERN_BLOCK_SIZE
=$2
233 local PATTERN_BLOCK_COUNT
=$3
236 dd if=${PATTERN_NAME} of=${DEVICE_NAME} bs=${PATTERN_BLOCK_SIZE} \
237 count
=${PATTERN_BLOCK_COUNT} oflag
=direct
&>/dev
/null
&
242 local PATTERN_NAME
=$1
243 local PATTERN_BLOCK_SIZE
=$2
244 local PATTERN_BLOCK_COUNT
=$3
246 local DEVICE_FILE
=`mktemp -p /tmp zpool.pattern.XXXXXXXX`
248 dd if=${DEVICE_NAME} of=${DEVICE_FILE} bs=${PATTERN_BLOCK_SIZE} \
249 count
=${PATTERN_BLOCK_COUNT} iflag
=direct
&>/dev
/null
250 cmp -s ${PATTERN_NAME} ${DEVICE_FILE}
258 local PATTERN_NAME
=$1
260 rm -f ${PATTERN_NAME}
268 ${MDADM} /dev
/${VDEV_FAULTY} --grow --level=faulty \
269 --layout=${FAULT_TYPE} >/dev
/null
276 # Clear all failure injection.
277 ${MDADM} /dev
/${VDEV_FAULTY} --grow --level=faulty \
278 --layout=clear >/dev
/null ||
return $?
279 ${MDADM} /dev
/${VDEV_FAULTY} --grow --level=faulty \
280 --layout=flush
>/dev
/null ||
return $?
288 echo ${OPTS} >/sys
/bus
/pseudo
/drivers
/scsi_debug
/opts
289 echo ${NTH} >/sys
/bus
/pseudo
/drivers
/scsi_debug
/every_nth
293 echo 0 >/sys
/bus
/pseudo
/drivers
/scsi_debug
/every_nth
294 echo 0 >/sys
/bus
/pseudo
/drivers
/scsi_debug
/opts
303 ${ZFS_SH} zfs
="spa_config_path=${TMP_CACHE}" || fail
1
304 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c ${POOL_CONFIG} || fail
2
305 ${ZFS} create -V 64M ${POOL_NAME}/${ZVOL_NAME} || fail
3
307 # Trigger udev and re-read the partition table to ensure all of
308 # this IO is out of the way before we begin injecting failures.
309 udev_trigger || fail
4
310 ${BLOCKDEV} --rereadpt /dev/${POOL_NAME}/${ZVOL_NAME} || fail
5
319 ${ZFS} destroy ${POOL_NAME}/${ZVOL_NAME} || fail
101
320 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c ${POOL_CONFIG} -d || fail
102
321 ${ZFS_SH} -u || fail
103
322 rm -f ${TMP_CACHE} || fail
104
328 local POOL_REDUNDANT
=$3
329 local ZVOL_NAME
="zvol"
330 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
332 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
337 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
338 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
340 # Set soft write failure for first vdev device.
341 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md 1`
342 fault_set_md
${VDEV_FAULTY} write-transient
344 # The application must not observe an error.
345 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
346 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
347 fault_clear_md
${VDEV_FAULTY}
349 # Soft errors will not be logged to 'zpool status'
350 local WRITE_ERRORS
=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
351 test ${WRITE_ERRORS} -eq 0 || fail
13
353 # Soft errors will still generate an EIO (5) event.
354 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
14
356 # Verify the known pattern.
357 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
15
358 pattern_remove
${TMP_PATTERN} || fail
16
360 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
366 test_write_soft tank lo-faulty-raid0
0
367 test_write_soft tank lo-faulty-raid10
1
368 test_write_soft tank lo-faulty-raidz
1
369 test_write_soft tank lo-faulty-raidz2
1
370 test_write_soft tank lo-faulty-raidz3
1
373 run_test
1 "soft write error"
378 local POOL_REDUNDANT
=$3
379 local ZVOL_NAME
="zvol"
380 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
382 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
387 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
388 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
390 # Set hard write failure for first vdev device.
391 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md 1`
392 fault_set_md
${VDEV_FAULTY} write-persistent
394 # The application must not observe an error.
395 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
396 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
397 fault_clear_md
${VDEV_FAULTY}
399 local WRITE_ERRORS
=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
400 if [ ${POOL_REDUNDANT} -eq 1 ]; then
401 # For redundant configurations hard errors will not be
402 # logged to 'zpool status' but will generate EIO events.
403 test ${WRITE_ERRORS} -eq 0 || fail
21
404 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
22
406 # For non-redundant configurations hard errors will be
407 # logged to 'zpool status' and generate EIO events. They
408 # will also trigger a scrub of the impacted sectors.
410 test ${WRITE_ERRORS} -gt 0 || fail
31
411 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
32
412 test `zpool_event "zfs.resilver.start" "ena"` != "" || fail
33
413 test `zpool_event "zfs.resilver.finish" "ena"` != "" || fail
34
414 test `zpool_scan_errors ${POOL_NAME}` -eq 0 || fail
35
417 # Verify the known pattern.
418 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
41
419 pattern_remove
${TMP_PATTERN} || fail
42
421 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
427 test_write_hard tank lo-faulty-raid0
0
428 test_write_hard tank lo-faulty-raid10
1
429 test_write_hard tank lo-faulty-raidz
1
430 test_write_hard tank lo-faulty-raidz2
1
431 test_write_hard tank lo-faulty-raidz3
1
434 run_test
2 "hard write error"
439 local POOL_REDUNDANT
=$3
440 local ZVOL_NAME
="zvol"
441 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
443 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
448 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
449 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
451 # Set all write failures for first vdev device.
452 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md 1`
453 fault_set_md
${VDEV_FAULTY} write-all
455 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
456 if [ ${POOL_REDUNDANT} -eq 1 ]; then
457 # The application must not observe an error.
458 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
460 # The application is expected to hang in the background until
461 # the faulty device is repaired and 'zpool clear' is run.
462 pattern_write_bg
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
13
465 fault_clear_md
${VDEV_FAULTY}
467 local WRITE_ERRORS
=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
468 local VDEV_STATUS
=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
469 local POOL_STATE
=`zpool_state ${POOL_NAME}`
470 # For all configurations write errors are logged to 'zpool status',
471 # and EIO events are generated. However, only a redundant config
472 # will cause the vdev to be FAULTED and pool DEGRADED. In a non-
473 # redundant config the IO will hang until 'zpool clear' is run.
474 test ${WRITE_ERRORS} -gt 0 || fail
14
475 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
15
477 if [ ${POOL_REDUNDANT} -eq 1 ]; then
478 test "${VDEV_STATUS}" = "FAULTED" || fail
21
479 test "${POOL_STATE}" = "DEGRADED" || fail
22
481 BLOCKED
=`ps a | grep "${ZVOL_DEVICE}" | grep -c -v "grep"`
482 ${ZPOOL} clear ${POOL_NAME} || fail
31
483 test ${BLOCKED} -eq 1 || fail
32
487 # Verify the known pattern.
488 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
41
489 pattern_remove
${TMP_PATTERN} || fail
42
491 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
497 test_write_all tank lo-faulty-raid0
0
498 test_write_all tank lo-faulty-raid10
1
499 test_write_all tank lo-faulty-raidz
1
500 test_write_all tank lo-faulty-raidz2
1
501 test_write_all tank lo-faulty-raidz3
1
504 run_test
3 "all write errors"
509 local POOL_REDUNDANT
=$3
510 local ZVOL_NAME
="zvol"
511 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
514 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
519 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
520 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
522 # Create a pattern to be verified during a read error.
523 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
524 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
526 # Set soft read failure for all the vdevs to ensure we hit it.
527 for (( i
=1; i
<=4; i
++ )); do
528 fault_set_md
`nth_zpool_vdev ${POOL_NAME} md $i` read-transient
531 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
13
532 pattern_remove
${TMP_PATTERN} || fail
14
534 # Clear all failure injection and sum read errors.
535 for (( i
=1; i
<=4; i
++ )); do
536 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md $i`
537 local VDEV_ERRORS
=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
538 let READ_ERRORS
=${READ_ERRORS}+${VDEV_ERRORS}
539 fault_clear_md
${VDEV_FAULTY}
542 # Soft errors will not be logged to 'zpool status'.
543 test ${READ_ERRORS} -eq 0 || fail
15
545 # Soft errors will still generate an EIO (5) event.
546 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
16
548 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
554 test_read_soft tank lo-faulty-raid0
0
555 test_read_soft tank lo-faulty-raid10
1
556 test_read_soft tank lo-faulty-raidz
1
557 test_read_soft tank lo-faulty-raidz2
1
558 test_read_soft tank lo-faulty-raidz3
1
561 run_test
4 "soft read error"
566 local POOL_REDUNDANT
=$3
567 local ZVOL_NAME
="zvol"
568 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
571 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
576 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
577 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
579 # Create a pattern to be verified during a read error.
580 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
581 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
583 # Set hard read failure for the fourth vdev.
584 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md 4`
585 fault_set_md
${VDEV_FAULTY} read-persistent
587 # For a redundant pool there must be no IO error, for a non-redundant
588 # pool we expect permanent damage and an IO error during verify, unless
589 # we get exceptionally lucky and have just damaged redundant metadata.
590 if [ ${POOL_REDUNDANT} -eq 1 ]; then
591 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
21
592 local READ_ERRORS
=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
593 test ${READ_ERRORS} -eq 0 || fail
22
595 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE}
596 ${ZPOOL} scrub
${POOL_NAME} || fail
32
597 local READ_ERRORS
=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
598 test ${READ_ERRORS} -gt 0 || fail
33
599 ${ZPOOL} status
-v ${POOL_NAME} | \
600 grep -A8 "Permanent errors" | \
601 grep -q "${POOL_NAME}" || fail
34
603 pattern_remove
${TMP_PATTERN} || fail
41
605 # Clear all failure injection and sum read errors.
606 fault_clear_md
${VDEV_FAULTY}
608 # Hard errors will generate an EIO (5) event.
609 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
42
611 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
617 test_read_hard tank lo-faulty-raid0
0
618 test_read_hard tank lo-faulty-raid10
1
619 test_read_hard tank lo-faulty-raidz
1
620 test_read_hard tank lo-faulty-raidz2
1
621 test_read_hard tank lo-faulty-raidz3
1
624 run_test
5 "hard read error"
626 # Fixable read error.
627 test_read_fixable
() {
630 local POOL_REDUNDANT
=$3
631 local ZVOL_NAME
="zvol"
632 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
635 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
640 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
641 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
643 # Create a pattern to be verified during a read error.
644 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
645 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
647 # Set hard read failure for the fourth vdev.
648 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md 4`
649 fault_set_md
${VDEV_FAULTY} read-fixable
651 # For a redundant pool there must be no IO error, for a non-redundant
652 # pool we expect permanent damage and an IO error during verify, unless
653 # we get exceptionally lucky and have just damaged redundant metadata.
654 if [ ${POOL_REDUNDANT} -eq 1 ]; then
655 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
21
656 local READ_ERRORS
=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
657 test ${READ_ERRORS} -eq 0 || fail
22
659 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE}
660 ${ZPOOL} scrub
${POOL_NAME} || fail
32
661 local READ_ERRORS
=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
662 test ${READ_ERRORS} -gt 0 || fail
33
663 ${ZPOOL} status
-v ${POOL_NAME} | \
664 grep -A8 "Permanent errors" | \
665 grep -q "${POOL_NAME}" || fail
34
667 pattern_remove
${TMP_PATTERN} || fail
41
669 # Clear all failure injection and sum read errors.
670 fault_clear_md
${VDEV_FAULTY}
672 # Hard errors will generate an EIO (5) event.
673 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
42
675 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
679 # Read errors fixable with a write.
681 test_read_fixable tank lo-faulty-raid0
0
682 test_read_fixable tank lo-faulty-raid10
1
683 test_read_fixable tank lo-faulty-raidz
1
684 test_read_fixable tank lo-faulty-raidz2
1
685 test_read_fixable tank lo-faulty-raidz3
1
688 run_test
6 "fixable read error"
693 local POOL_REDUNDANT
=$3
694 local VDEV_DAMAGE
="$4"
695 local ZVOL_NAME
="zvol"
696 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
698 if [ ${MD_PARTITIONABLE} -eq 0 ]; then
703 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
704 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
706 # Create a pattern to be verified.
707 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
708 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
710 # Verify the pattern and that no vdev has cksum errors.
711 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
13
712 for (( i
=1; i
<4; i
++ )); do
713 VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md ${i}`
714 CKSUM_ERRORS
=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
715 test ${CKSUM_ERRORS} -eq 0 || fail
14
718 # Corrupt the bulk of a vdev with random garbage, we damage as many
719 # vdevs as we have levels of redundancy. For example for a raidz3
720 # configuration we can trash 3 vdevs and still expect correct data.
721 # This improves the odds that we read one of the damaged vdevs.
722 for VDEV
in ${VDEV_DAMAGE}; do
723 VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} md $VDEV`
724 pattern_write
/dev
/urandom
1M
64 /dev
/${VDEV_FAULTY}p1
727 # Verify the pattern is still correct. For non-redundant pools
728 # expect failure and for redundant pools success due to resilvering.
729 if [ ${POOL_REDUNDANT} -eq 1 ]; then
730 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
16
732 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} && fail
17
735 CKSUM_ERRORS
=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
736 test ${CKSUM_ERRORS} -gt 0 || fail
18
737 STATUS
=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
738 test "${STATUS}" = "ONLINE" || fail
19
740 # The checksum errors must be logged as an event.
741 local CKSUM_ERRORS
=`zpool_event "zfs.checksum" "zio_err"`
742 test ${CKSUM_ERRORS} = "0x34" ||
test ${CKSUM_ERRORS} = "0x0" || fail
20
744 # Verify permant errors for non-redundant pools, and for redundant
745 # pools trigger a scrub and check that all checksums have been fixed.
746 if [ ${POOL_REDUNDANT} -eq 1 ]; then
747 # Scrub the checksum errors and clear the faults.
748 ${ZPOOL} scrub
${POOL_NAME} || fail
21
750 ${ZPOOL} clear ${POOL_NAME} || fail
22
752 # Re-verify the pattern for fixed checksums.
753 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
23
754 CKSUM_ERRORS
=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
755 test ${CKSUM_ERRORS} -eq 0 || fail
24
757 # Re-verify the entire pool for fixed checksums.
758 ${ZPOOL} scrub
${POOL_NAME} || fail
25
759 CKSUM_ERRORS
=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
760 test ${CKSUM_ERRORS} -eq 0 || fail
26
762 ${ZPOOL} status
-v ${POOL_NAME} | \
763 grep -A8 "Permanent errors" | \
764 grep -q "${POOL_NAME}/${ZVOL_NAME}" || fail
31
765 ${ZPOOL} clear ${POOL_NAME} || fail
32
767 pattern_remove
${TMP_PATTERN} || fail
41
769 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
773 # Silent data corruption
775 test_cksum tank lo-faulty-raid0
0 "1"
776 test_cksum tank lo-faulty-raid10
1 "1 3"
777 test_cksum tank lo-faulty-raidz
1 "4"
778 test_cksum tank lo-faulty-raidz2
1 "3 4"
779 test_cksum tank lo-faulty-raidz3
1 "2 3 4"
782 run_test
7 "silent data corruption"
784 # Soft write timeout at the scsi device layer.
785 test_write_timeout_soft
() {
788 local POOL_REDUNDANT
=$3
790 local ZVOL_NAME
="zvol"
791 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
793 if [ ${SCSI_DEBUG} -eq 0 ]; then
798 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
799 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
801 # Set timeout(0x4) for every nth command.
802 fault_set_sd
4 ${POOL_NTH}
804 # The application must not observe an error.
805 local TMP_PATTERN
=`pattern_create 1M 8` || fail
11
806 pattern_write
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
12
809 # Intermittent write timeouts even with FAILFAST set may not cause
810 # an EIO (5) event. This is because how FAILFAST is handled depends
811 # a log on the low level driver and the exact nature of the failure.
812 # We will however see a 'zfs.delay' event logged due to the timeout.
813 VDEV_DELAY
=`zpool_event "zfs.delay" "zio_delay"`
814 test `printf "%d" ${VDEV_DELAY}` -ge 30000 || fail
13
816 # Verify the known pattern.
817 pattern_verify
${TMP_PATTERN} 1M
8 ${ZVOL_DEVICE} || fail
14
818 pattern_remove
${TMP_PATTERN} || fail
15
820 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
825 test_write_timeout_soft tank scsi_debug-raid0
0 50
826 test_write_timeout_soft tank scsi_debug-raid10
1 100
827 test_write_timeout_soft tank scsi_debug-raidz
1 75
828 test_write_timeout_soft tank scsi_debug-raidz2
1 150
829 test_write_timeout_soft tank scsi_debug-raidz3
1 300
832 run_test
8 "soft write timeout"
834 # Persistent write timeout at the scsi device layer.
835 test_write_timeout_hard
() {
838 local POOL_REDUNDANT
=$3
840 local ZVOL_NAME
="zvol"
841 local ZVOL_DEVICE
="/dev/${POOL_NAME}/${ZVOL_NAME}"
844 if [ ${SCSI_DEBUG} -eq 0 ]; then
849 local TMP_CACHE
=`mktemp -p /tmp zpool.cache.XXXXXXXX`
850 test_setup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
852 local TMP_PATTERN1
=`pattern_create 1M 8`
853 local TMP_PATTERN2
=`pattern_create 1M 8`
854 local TMP_PATTERN3
=`pattern_create 1M 8`
856 # Create three partitions each one gets a unique pattern. The first
857 # pattern is written before the failure, the second pattern during
858 # the failure, and the third pattern while the vdev is degraded.
859 # All three patterns are verified while the vdev is degraded and
860 # then again once it is brought back online.
861 ${PARTED} -s ${ZVOL_DEVICE} mklabel gpt || fail
11
862 ${PARTED} -s ${ZVOL_DEVICE} mkpart primary
1M
16M || fail
12
863 ${PARTED} -s ${ZVOL_DEVICE} mkpart primary
16M
32M || fail
13
864 ${PARTED} -s ${ZVOL_DEVICE} mkpart primary
32M
48M || fail
14
866 wait_udev
${ZVOL_DEVICE}1 30
867 wait_udev
${ZVOL_DEVICE}2 30
868 wait_udev
${ZVOL_DEVICE}3 30
870 # Before the failure.
871 pattern_write
${TMP_PATTERN1} 1M
8 ${ZVOL_DEVICE}1 || fail
15
873 # Get the faulty vdev name.
874 local VDEV_FAULTY
=`nth_zpool_vdev ${POOL_NAME} sd 1`
876 # Set timeout(0x4) for every nth command.
877 fault_set_sd
4 ${POOL_NTH}
879 # During the failure.
880 pattern_write
${TMP_PATTERN2} 1M
8 ${ZVOL_DEVICE}2 || fail
21
882 # Expect write errors to be logged to 'zpool status'
883 local WRITE_ERRORS
=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
884 test ${WRITE_ERRORS} -gt 0 || fail
22
886 local VDEV_STATUS
=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
887 test "${VDEV_STATUS}" = "UNAVAIL" || fail
23
889 # Clear the error and remove it from /dev/.
891 rm -f /dev
/${VDEV_FAULTY}[0-9]
893 # Verify the first two patterns and write out the third.
894 pattern_write
${TMP_PATTERN3} 1M
8 ${ZVOL_DEVICE}3 || fail
31
895 pattern_verify
${TMP_PATTERN1} 1M
8 ${ZVOL_DEVICE}1 || fail
32
896 pattern_verify
${TMP_PATTERN2} 1M
8 ${ZVOL_DEVICE}2 || fail
33
897 pattern_verify
${TMP_PATTERN3} 1M
8 ${ZVOL_DEVICE}3 || fail
34
899 # Bring the device back online by rescanning for it. It must appear
900 # in lsscsi and be available to dd before allowing ZFS to bring it
901 # online. This is not required but provides additional sanity.
902 while [ ${RESCAN} -eq 1 ]; do
904 wait_udev
/dev
/${VDEV_FAULTY} 30
906 if [ `${LSSCSI} | grep -c "/dev/${VDEV_FAULTY}"` -eq 0 ]; then
910 dd if=/dev
/${VDEV_FAULTY} of
=/dev
/null bs
=8M count
=1 &>/dev
/null
911 if [ $?
-ne 0 ]; then
918 # Bring the device back online. We expect it to be automatically
919 # resilvered without error and we should see minimally the zfs.io,
920 # zfs.statechange (VDEV_STATE_HEALTHY (0x7)), and zfs.resilver.*
922 ${ZPOOL} online ${POOL_NAME} ${VDEV_FAULTY} || fail
51
924 test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail
52
925 test `zpool_event "zfs.statechange" "vdev_state"` = "0x7" || fail
53
926 test `zpool_event "zfs.resilver.start" "ena"` != "" || fail
54
927 test `zpool_event "zfs.resilver.finish" "ena"` != "" || fail
55
928 test `zpool_scan_errors ${POOL_NAME}` -eq 0 || fail
56
930 local VDEV_STATUS
=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
931 test "${VDEV_STATUS}" = "ONLINE" || fail
57
933 # Verify the known pattern.
934 pattern_verify
${TMP_PATTERN1} 1M
8 ${ZVOL_DEVICE}1 || fail
61
935 pattern_verify
${TMP_PATTERN2} 1M
8 ${ZVOL_DEVICE}2 || fail
62
936 pattern_verify
${TMP_PATTERN3} 1M
8 ${ZVOL_DEVICE}3 || fail
63
937 pattern_remove
${TMP_PATTERN1} || fail
64
938 pattern_remove
${TMP_PATTERN2} || fail
65
939 pattern_remove
${TMP_PATTERN3} || fail
66
941 test_cleanup
${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
946 skip_nonewline
# Skip non-redundant config
947 test_write_timeout_hard tank scsi_debug-raid10
1 -50
948 test_write_timeout_hard tank scsi_debug-raidz
1 -50
949 test_write_timeout_hard tank scsi_debug-raidz2
1 -50
950 test_write_timeout_hard tank scsi_debug-raidz3
1 -50
953 run_test
9 "hard write timeout"