]> git.proxmox.com Git - mirror_zfs.git/blame - tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
Fix typos in tests/
[mirror_zfs.git] / tests / zfs-tests / tests / functional / fault / auto_spare_multiple.ksh
CommitLineData
4e9b1569 1#!/bin/ksh -p
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source. A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2017 by Intel Corporation. All rights reserved.
20# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
21#
22
23. $STF_SUITE/include/libtest.shlib
24. $STF_SUITE/tests/functional/fault/fault.cfg
25
26#
27# DESCRIPTION:
28# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
29# multiple drives are faulted.
30#
31# STRATEGY:
32# 1. Create a pool with two hot spares
33# 2. Inject IO ERRORS with a zinject error handler on the first device
34# 3. Start a scrub
35# 4. Verify the ZED kicks in a hot spare and expected pool/device status
36# 5. Inject IO ERRORS on a second device
37# 6. Start a scrub
38# 7. Verify the ZED kicks in a second hot spare
39# 8. Clear the fault on both devices
40# 9. Verify the hot spares are available and expected pool/device status
41# 10. Rinse and repeat, this time faulting both devices at the same time
42#
43
44verify_runnable "both"
45
46function cleanup
47{
48 log_must zinject -c all
49 destroy_pool $TESTPOOL
50 rm -f $DATA_DEVS $SPARE_DEVS
51}
52
53log_assert "ZED should be able to handle multiple faulted devices"
54log_onexit cleanup
55
56# Clear events from previous runs
57zed_events_drain
58
59FAULT_DEV1="$TEST_BASE_DIR/fault-dev1"
60FAULT_DEV2="$TEST_BASE_DIR/fault-dev2"
61SAFE_DEV1="$TEST_BASE_DIR/safe-dev1"
62SAFE_DEV2="$TEST_BASE_DIR/safe-dev2"
63DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2"
64SPARE_DEV1="$TEST_BASE_DIR/spare-dev1"
65SPARE_DEV2="$TEST_BASE_DIR/spare-dev2"
66SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2"
67
68for type in "mirror" "raidz" "raidz2" "raidz3"; do
69 # 1. Create a pool with two hot spares
70 truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
71 log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
72
73 # 2. Inject IO ERRORS with a zinject error handler on the first device
74 log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL
75
76 # 3. Start a scrub
77 log_must zpool scrub $TESTPOOL
78
79 # 4. Verify the ZED kicks in a hot spare and expected pool/device status
80 log_note "Wait for ZED to auto-spare"
81 log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
82 log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
83 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
84 log_must check_state $TESTPOOL "" "DEGRADED"
85
86 # 5. Inject IO ERRORS on a second device
87 log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL
88
89 # 6. Start a scrub
90 while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do
91 sleep 1
92 done
93 log_must zpool scrub $TESTPOOL
94
95 # 7. Verify the ZED kicks in a second hot spare
96 log_note "Wait for ZED to auto-spare"
97 log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
98 log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
99 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
100 log_must check_state $TESTPOOL "" "DEGRADED"
101
102 # 8. Clear the fault on both devices
103 log_must zinject -c all
104 log_must zpool clear $TESTPOOL $FAULT_DEV1
105 log_must zpool clear $TESTPOOL $FAULT_DEV2
106
107 # 9. Verify the hot spares are available and expected pool/device status
108 log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60
109 log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60
110 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL"
111 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL"
112 log_must check_state $TESTPOOL "" "ONLINE"
113
114 # Cleanup
115 cleanup
116done
117
118# Rinse and repeat, this time faulting both devices at the same time
220dd4ae 119# NOTE: "raidz" is excluded since it cannot survive 2 faulted devices
4e9b1569 120# NOTE: "mirror" is a 4-way mirror here and should survive this test
121for type in "mirror" "raidz2" "raidz3"; do
122 # 1. Create a pool with two hot spares
123 truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
124 log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
125
126 # 2. Inject IO ERRORS with a zinject error handler on two devices
127 log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &"
128 log_must eval "zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL &"
129
130 # 3. Start a scrub
131 log_must zpool scrub $TESTPOOL
132
133 # 4. Verify the ZED kicks in two hot spares and expected pool/device status
134 log_note "Wait for ZED to auto-spare"
135 log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
136 log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
137 log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
138 log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
139 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
140 log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
141 log_must check_state $TESTPOOL "" "DEGRADED"
142
143 # 5. Clear the fault on both devices
144 log_must zinject -c all
145 log_must zpool clear $TESTPOOL $FAULT_DEV1
146 log_must zpool clear $TESTPOOL $FAULT_DEV2
147
148 # Cleanup
149 cleanup
150done
151
152log_pass "ZED successfully handles multiple faulted devices"