]>
Commit | Line | Data |
---|---|---|
4e9b1569 | 1 | #!/bin/ksh -p |
2 | ||
3 | # | |
4 | # CDDL HEADER START | |
5 | # | |
6 | # This file and its contents are supplied under the terms of the | |
7 | # Common Development and Distribution License ("CDDL"), version 1.0. | |
8 | # You may only use this file in accordance with the terms of version | |
9 | # 1.0 of the CDDL. | |
10 | # | |
11 | # A full copy of the text of the CDDL should have accompanied this | |
12 | # source. A copy of the CDDL is also available via the Internet at | |
13 | # http://www.illumos.org/license/CDDL. | |
14 | # | |
15 | # CDDL HEADER END | |
16 | # | |
17 | ||
18 | # | |
19 | # Copyright (c) 2017 by Intel Corporation. All rights reserved. | |
20 | # Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. | |
21 | # | |
22 | ||
23 | . $STF_SUITE/include/libtest.shlib | |
24 | . $STF_SUITE/tests/functional/fault/fault.cfg | |
25 | ||
26 | # | |
27 | # DESCRIPTION: | |
28 | # Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when | |
29 | # multiple drives are faulted. | |
30 | # | |
31 | # STRATEGY: | |
32 | # 1. Create a pool with two hot spares | |
33 | # 2. Inject IO ERRORS with a zinject error handler on the first device | |
34 | # 3. Start a scrub | |
35 | # 4. Verify the ZED kicks in a hot spare and expected pool/device status | |
36 | # 5. Inject IO ERRORS on a second device | |
37 | # 6. Start a scrub | |
38 | # 7. Verify the ZED kicks in a second hot spare | |
39 | # 8. Clear the fault on both devices | |
40 | # 9. Verify the hot spares are available and expected pool/device status | |
41 | # 10. Rinse and repeat, this time faulting both devices at the same time | |
42 | # | |
43 | ||
44 | verify_runnable "both" | |
45 | ||
46 | function cleanup | |
47 | { | |
48 | log_must zinject -c all | |
49 | destroy_pool $TESTPOOL | |
50 | rm -f $DATA_DEVS $SPARE_DEVS | |
51 | } | |
52 | ||
53 | log_assert "ZED should be able to handle multiple faulted devices" | |
54 | log_onexit cleanup | |
55 | ||
56 | # Clear events from previous runs | |
57 | zed_events_drain | |
58 | ||
59 | FAULT_DEV1="$TEST_BASE_DIR/fault-dev1" | |
60 | FAULT_DEV2="$TEST_BASE_DIR/fault-dev2" | |
61 | SAFE_DEV1="$TEST_BASE_DIR/safe-dev1" | |
62 | SAFE_DEV2="$TEST_BASE_DIR/safe-dev2" | |
63 | DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2" | |
64 | SPARE_DEV1="$TEST_BASE_DIR/spare-dev1" | |
65 | SPARE_DEV2="$TEST_BASE_DIR/spare-dev2" | |
66 | SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2" | |
67 | ||
68 | for type in "mirror" "raidz" "raidz2" "raidz3"; do | |
69 | # 1. Create a pool with two hot spares | |
70 | truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS | |
71 | log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS | |
72 | ||
73 | # 2. Inject IO ERRORS with a zinject error handler on the first device | |
74 | log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL | |
75 | ||
76 | # 3. Start a scrub | |
77 | log_must zpool scrub $TESTPOOL | |
78 | ||
79 | # 4. Verify the ZED kicks in a hot spare and expected pool/device status | |
80 | log_note "Wait for ZED to auto-spare" | |
81 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60 | |
82 | log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60 | |
83 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE" | |
84 | log_must check_state $TESTPOOL "" "DEGRADED" | |
85 | ||
86 | # 5. Inject IO ERRORS on a second device | |
87 | log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL | |
88 | ||
89 | # 6. Start a scrub | |
90 | while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do | |
91 | sleep 1 | |
92 | done | |
93 | log_must zpool scrub $TESTPOOL | |
94 | ||
95 | # 7. Verify the ZED kicks in a second hot spare | |
96 | log_note "Wait for ZED to auto-spare" | |
97 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60 | |
98 | log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60 | |
99 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE" | |
100 | log_must check_state $TESTPOOL "" "DEGRADED" | |
101 | ||
102 | # 8. Clear the fault on both devices | |
103 | log_must zinject -c all | |
104 | log_must zpool clear $TESTPOOL $FAULT_DEV1 | |
105 | log_must zpool clear $TESTPOOL $FAULT_DEV2 | |
106 | ||
107 | # 9. Verify the hot spares are available and expected pool/device status | |
108 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60 | |
109 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60 | |
110 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL" | |
111 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL" | |
112 | log_must check_state $TESTPOOL "" "ONLINE" | |
113 | ||
114 | # Cleanup | |
115 | cleanup | |
116 | done | |
117 | ||
118 | # Rinse and repeat, this time faulting both devices at the same time | |
220dd4ae | 119 | # NOTE: "raidz" is excluded since it cannot survive 2 faulted devices |
4e9b1569 | 120 | # NOTE: "mirror" is a 4-way mirror here and should survive this test |
121 | for type in "mirror" "raidz2" "raidz3"; do | |
122 | # 1. Create a pool with two hot spares | |
123 | truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS | |
124 | log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS | |
125 | ||
126 | # 2. Inject IO ERRORS with a zinject error handler on two devices | |
127 | log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &" | |
128 | log_must eval "zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL &" | |
129 | ||
130 | # 3. Start a scrub | |
131 | log_must zpool scrub $TESTPOOL | |
132 | ||
133 | # 4. Verify the ZED kicks in two hot spares and expected pool/device status | |
134 | log_note "Wait for ZED to auto-spare" | |
135 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60 | |
136 | log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60 | |
137 | log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60 | |
138 | log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60 | |
139 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE" | |
140 | log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE" | |
141 | log_must check_state $TESTPOOL "" "DEGRADED" | |
142 | ||
143 | # 5. Clear the fault on both devices | |
144 | log_must zinject -c all | |
145 | log_must zpool clear $TESTPOOL $FAULT_DEV1 | |
146 | log_must zpool clear $TESTPOOL $FAULT_DEV2 | |
147 | ||
148 | # Cleanup | |
149 | cleanup | |
150 | done | |
151 | ||
152 | log_pass "ZED successfully handles multiple faulted devices" |