]> git.proxmox.com Git - mirror_zfs-debian.git/blob - cmd/zed/zed.d/io-spare.sh
Imported Upstream version 0.6.4.2
[mirror_zfs-debian.git] / cmd / zed / zed.d / io-spare.sh
1 #!/bin/sh
2 #
3 # Replace a device with a hot spare in response to IO or checksum errors.
4 # The following actions will be performed automatically when the number
5 # of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or
6 # ZED_SPARE_ON_CHECKSUM_ERRORS.
7 #
8 # 1) FAULT the device on IO errors, no futher IO will be attempted.
9 # DEGRADE the device on checksum errors, the device is still
10 # functional and can be used to service IO requests.
11 # 2) Set the SES fault beacon for the device.
12 # 3) Replace the device with a hot spare if any are available.
13 #
14 # Once the hot sparing operation is complete either the failed device or
15 # the hot spare must be manually retired using the 'zpool detach' command.
16 # The 'autoreplace' functionality which would normally take care of this
17 # under Illumos has not yet been implemented.
18 #
19 # Full support for autoreplace is planned, but it requires that the full
20 # ZFS Diagnosis Engine be ported. In the meanwhile this script provides
21 # the majority of the expected hot spare functionality.
22 #
23 # Exit codes:
24 # 0: replaced by hot spare
25 # 1: no hot spare device available
26 # 2: hot sparing disabled
27 # 3: already faulted or degraded
28 # 4: unsupported event class
29 # 5: internal error
30 #
31 test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
32
33 test -n "${ZEVENT_POOL}" || exit 5
34 test -n "${ZEVENT_SUBCLASS}" || exit 5
35 test -n "${ZEVENT_VDEV_PATH}" || exit 5
36 test -n "${ZEVENT_VDEV_GUID}" || exit 5
37
38 # Defaults to disabled, enable in the zed.rc file.
39 ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0}
40 ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0}
41
42 if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \
43 ${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then
44 exit 2
45 fi
46
47 # A lock file is used to serialize execution.
48 ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock}
49 LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock"
50
51 exec 8> "${LOCKFILE}"
52 flock -x 8
53
54 # Given a <pool> and <device> return the status, (ONLINE, FAULTED, etc...).
55 vdev_status() {
56 local POOL=$1
57 local VDEV=`basename $2`
58 local T=' ' # tab character since '\t' isn't portable
59
60 ${ZPOOL} status ${POOL} | sed -n -e \
61 "s,^[ $T]*\(.*$VDEV\(-part[0-9]\+\)\?\)[ $T]*\([A-Z]\+\).*,\1 \3,p"
62 return 0
63 }
64
65 # Fault devices after N I/O errors.
66 if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then
67 ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}`
68
69 if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \
70 ${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then
71 ACTION="fault"
72 fi
73 # Degrade devices after N checksum errors.
74 elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then
75 ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS}
76
77 if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \
78 ${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then
79 ACTION="degrade"
80 fi
81 else
82 ACTION=
83 fi
84
85 if [ -n "${ACTION}" ]; then
86
87 # Device is already FAULTED or DEGRADED
88 set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}`
89 ZEVENT_VDEV_PATH_FOUND=$1
90 STATUS=$2
91 if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then
92 exit 3
93 fi
94
95 # Step 1) FAULT or DEGRADE the device
96 #
97 ${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL}
98
99 # Step 2) Set the SES fault beacon.
100 #
101 # XXX: Set the 'fault' or 'ident' beacon for the device. This can
102 # be done through the sg_ses utility, the only hard part is to map
103 # the sd device to its corresponding enclosure and slot. We may
104 # be able to leverage the existing vdev_id scripts for this.
105 #
106 # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3
107 # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3
108
109 # Step 3) Replace the device with a hot spare.
110 #
111 # Round robin through the spares selecting those which are available.
112 #
113 for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do
114 set -- `vdev_status ${ZEVENT_POOL} ${SPARE}`
115 SPARE_VDEV_FOUND=$1
116 STATUS=$2
117 if [ "${STATUS}" = "AVAIL" ]; then
118 ${ZPOOL} replace ${ZEVENT_POOL} \
119 ${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0
120 fi
121 done
122
123 exit 1
124 fi
125
126 exit 4