]> git.proxmox.com Git - mirror_zfs-debian.git/blame - cmd/zpool/zpool.d/r_ucor
New upstream version 0.7.9
[mirror_zfs-debian.git] / cmd / zpool / zpool.d / r_ucor
CommitLineData
cae5b340
AX
1#!/bin/sh
2#
3# Show SMART stats
4#
5
6helpstr="
7smart: Show SMART temperature and error stats (specific to drive type)
8smartx: Show SMART extended drive stats (specific to drive type).
9temp: Show SMART drive temperature in celsius (all drives).
10health: Show reported SMART status (all drives).
11r_proc: Show SMART read GBytes processed over drive lifetime (SAS).
12w_proc: Show SMART write GBytes processed over drive lifetime (SAS).
13r_ucor: Show SMART read uncorrectable errors (SAS).
14w_ucor: Show SMART write uncorrectable errors (SAS).
15nonmed: Show SMART non-medium errors (SAS).
16defect: Show SMART grown defect list (SAS).
17hours_on: Show number of hours drive powered on (all drives).
18realloc: Show SMART reallocated sectors count (ATA).
19rep_ucor: Show SMART reported uncorrectable count (ATA).
20cmd_to: Show SMART command timeout count (ATA).
21pend_sec: Show SMART current pending sector count (ATA).
22off_ucor: Show SMART offline uncorrectable errors (ATA).
23ata_err: Show SMART ATA errors (ATA).
24pwr_cyc: Show SMART power cycle count (ATA).
25serial: Show disk serial number.
42f7b73b
AX
26nvme_err: Show SMART NVMe errors (NVMe).
27smart_test: Show SMART self-test results summary.
28test_type: Show SMART self-test type (short, long... ).
29test_status: Show SMART self-test status.
30test_progress: Show SMART self-test percentage done.
31test_ended: Show when the last SMART self-test ended (if supported).
cae5b340
AX
32"
33
42f7b73b
AX
34# Hack for developer testing
35#
36# If you set $samples to a directory containing smartctl output text files,
37# we will use them instead of running smartctl on the vdevs. This can be
38# useful if you want to test a bunch of different smartctl outputs. Also, if
39# $samples is set, and additional 'file' column is added to the zpool output
40# showing the filename.
41samples=
42
43# get_filename_from_dir DIR
44#
45# Look in directory DIR and return a filename from it. The filename returned
46# is chosen quasi-sequentially (based off our PID). This allows us to return
47# a different filename every time this script is invoked (which we do for each
48# vdev), without having to maintain state.
49get_filename_from_dir()
50{
51 dir=$1
52 pid="$$"
53 num_files=$(find "$dir" -maxdepth 1 -type f | wc -l)
54 mod=$((pid % num_files))
55 i=0
56 find "$dir" -type f -printf "%f\n" | while read -r file ; do
57 if [ "$mod" = "$i" ] ; then
58 echo "$file"
59 break
60 fi
61 i=$((i+1))
62 done
63}
64
cae5b340
AX
65script=$(basename "$0")
66
67if [ "$1" = "-h" ] ; then
68 echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
69 exit
70fi
71
72smartctl_path=$(which smartctl)
73
42f7b73b
AX
74if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ] || [ -n "$samples" ] ; then
75 if [ -n "$samples" ] ; then
76 # cat a smartctl output text file instead of running smartctl
77 # on a vdev (only used for developer testing).
78 file=$(get_filename_from_dir $samples)
79 echo "file=$file"
80 raw_out=$(cat "$samples/$file")
81 else
82 raw_out=$(eval "sudo $smartctl_path -a $VDEV_UPATH")
83 fi
cae5b340 84
42f7b73b 85 # What kind of drive are we? Look for the right line in smartctl:
cae5b340
AX
86 #
87 # SAS:
88 # Transport protocol: SAS
89 #
90 # SATA:
91 # ATA Version is: 8
92 #
42f7b73b
AX
93 # NVMe:
94 # SMART/Health Information (NVMe Log 0xnn, NSID 0xnn)
95 #
cae5b340
AX
96 out=$(echo "$raw_out" | awk '
97# SAS specific
98/read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8}
99/write:/{print "rwr="$4"\nw_cor="$5"\nw_proc="$7"\nw_ucor="$8}
100/Non-medium error count/{print "nonmed="$4}
101/Elements in grown defect list/{print "defect="$6}
102
103# SAS common
42f7b73b 104/SAS/{type="sas"}
cae5b340
AX
105/Drive Temperature:/{print "temp="$4}
106# Status can be a long string, substitute spaces for '_'
107/SMART Health Status:/{printf "health="; for(i=4;i<=NF-1;i++){printf "%s_", $i}; printf "%s\n", $i}
42f7b73b 108/number of hours powered up/{print "hours_on="$7; hours_on=int($7)}
cae5b340
AX
109/Serial number:/{print "serial="$3}
110
111# SATA specific
112/Reallocated_Sector_Ct/{print "realloc="$10}
113/Reported_Uncorrect/{print "rep_ucor="$10}
114/Command_Timeout/{print "cmd_to="$10}
115/Current_Pending_Sector/{print "pend_sec="$10}
116/Offline_Uncorrectable/{print "off_ucor="$10}
117/ATA Error Count:/{print "ata_err="$4}
118/Power_Cycle_Count/{print "pwr_cyc="$10}
119
120# SATA common
42f7b73b 121/SATA/{type="sata"}
cae5b340 122/Temperature_Celsius/{print "temp="$10}
42f7b73b
AX
123/Airflow_Temperature_Cel/{print "temp="$10}
124/Current Temperature:/{print "temp="$3}
125/SMART overall-health self-assessment test result:/{print "health="$6}
126/Power_On_Hours/{print "hours_on="$10; hours_on=int($10)}
127/Serial Number:/{print "serial="$3}
128
129# NVMe common
130/NVMe/{type="nvme"}
131/Temperature:/{print "temp="$2}
cae5b340 132/SMART overall-health self-assessment test result:/{print "health="$6}
42f7b73b 133/Power On Hours:/{gsub("[^0-9]","",$4); print "hours_on="$4}
cae5b340 134/Serial Number:/{print "serial="$3}
42f7b73b
AX
135/Power Cycles:/{print "pwr_cyc="$3}
136
137# NVMe specific
138/Media and Data Integrity Errors:/{print "nvme_err="$6}
139
140# SMART self-test info
141/Self-test execution status:/{progress=tolower($4)} # SAS
142/SMART Self-test log/{test_seen=1} # SAS
143/SMART Extended Self-test Log/{test_seen=1} # SATA
144/# 1/{
145 test_type=tolower($3"_"$4);
146 # Status could be one word ("Completed") or multiple ("Completed: read
147 # failure"). Look for the ":" to see if we need to grab more words.
148
149 if ($5 ~ ":")
150 status=tolower($5""$6"_"$7)
151 else
152 status=tolower($5)
153 if (status=="self")
154 status="running";
cae5b340 155
42f7b73b
AX
156 if (type == "sas") {
157 hours=int($(NF-4))
158 } else {
159 hours=int($(NF-1))
160 # SATA reports percent remaining, rather than percent done
161 # Convert it to percent done.
162 progress=(100-int($(NF-2)))"%"
163 }
164 # When we int()-ify "hours", it converts stuff like "NOW" and "-" into
165 # 0. In those cases, set it to hours_on, so they will cancel out in
166 # the "hours_ago" calculation later on.
167 if (hours == 0)
168 hours=hours_on
169
170 if (test_seen) {
171 print "test="hours_on
172 print "test_type="test_type
173 print "test_status="status
174 print "test_progress="progress
175 }
176 # Not all drives report hours_on
177 if (hours_on && hours) {
178 total_hours_ago=(hours_on-hours)
179 days_ago=int(total_hours_ago/24)
180 hours_ago=(total_hours_ago % 24)
181 if (days_ago != 0)
182 ago_str=days_ago"d"
183 if (hours_ago !=0)
184 ago_str=ago_str""hours_ago"h"
185 print "test_ended="ago_str
186 }
187}
188
189END {print "type="type; ORS="\n"; print ""}
cae5b340
AX
190');
191fi
42f7b73b 192type=$(echo "$out" | grep '^type=' | cut -d '=' -f 2)
cae5b340 193
42f7b73b
AX
194# If type is not set by now, either we don't have a block device
195# or smartctl failed. Either way, default to ATA and set $out to
196# nothing.
cae5b340 197if [ -z "$type" ]; then
42f7b73b 198 type="sata"
cae5b340
AX
199 out=
200fi
201
202case $script in
203smart)
204 # Print temperature plus common predictors of drive failure
42f7b73b 205 if [ "$type" = "sas" ] ; then
cae5b340 206 scripts="temp|health|r_ucor|w_ucor"
42f7b73b 207 elif [ "$type" = "sata" ] ; then
cae5b340 208 scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor"
42f7b73b
AX
209 elif [ "$type" = "nvme" ] ; then
210 scripts="temp|health|nvme_err"
cae5b340
AX
211 fi
212 ;;
213smartx)
214 # Print some other interesting stats
42f7b73b 215 if [ "$type" = "sas" ] ; then
cae5b340 216 scripts="hours_on|defect|nonmed|r_proc|w_proc"
42f7b73b
AX
217 elif [ "$type" = "sata" ] ; then
218 scripts="hours_on|pwr_cyc"
219 elif [ "$type" = "nvme" ] ; then
cae5b340
AX
220 scripts="hours_on|pwr_cyc"
221 fi
222 ;;
42f7b73b
AX
223smart_test)
224 scripts="test_type|test_status|test_progress|test_ended"
225 ;;
cae5b340
AX
226*)
227 scripts="$script"
228esac
229
230with_vals=$(echo "$out" | grep -E "$scripts")
231if [ ! -z "$with_vals" ]; then
232 echo "$with_vals"
233 without_vals=$(echo "$scripts" | tr "|" "\n" |
234 grep -v -E "$(echo "$with_vals" |
235 awk -F "=" '{print $1}')" | awk '{print $0"="}')
236else
237 without_vals=$(echo "$scripts" | tr "|" "\n" | awk '{print $0"="}')
238fi
239
240if [ ! -z "$without_vals" ]; then
241 echo "$without_vals"
242fi