]> git.proxmox.com Git - proxmox-backup.git/blob - src/tools/disks/smart.rs
disk: smartctl: ignore bit 2 of exitcode
[proxmox-backup.git] / src / tools / disks / smart.rs
1 use std::collections::{HashMap, HashSet};
2
3 use ::serde::{Deserialize, Serialize};
4 use anyhow::{bail, Error};
5 use lazy_static::lazy_static;
6
7 use proxmox_schema::api;
8
9 #[api()]
10 #[derive(Debug, Serialize, Deserialize)]
11 #[serde(rename_all = "lowercase")]
12 /// SMART status
13 pub enum SmartStatus {
14 /// Smart tests passed - everything is OK
15 Passed,
16 /// Smart tests failed - disk has problems
17 Failed,
18 /// Unknown status
19 Unknown,
20 }
21
22 #[api()]
23 #[derive(Debug, Serialize, Deserialize)]
24 /// SMART Attribute
25 pub struct SmartAttribute {
26 /// Attribute name
27 name: String,
28 // FIXME: remove value with next major relase (PBS 3.0)
29 /// duplicate of raw - kept for API stability
30 value: String,
31 /// Attribute raw value
32 raw: String,
33 // the rest of the values is available for ATA type
34 /// ATA Attribute ID
35 #[serde(skip_serializing_if = "Option::is_none")]
36 id: Option<u64>,
37 /// ATA Flags
38 #[serde(skip_serializing_if = "Option::is_none")]
39 flags: Option<String>,
40 /// ATA normalized value (0..100)
41 #[serde(skip_serializing_if = "Option::is_none")]
42 normalized: Option<f64>,
43 /// ATA worst
44 #[serde(skip_serializing_if = "Option::is_none")]
45 worst: Option<f64>,
46 /// ATA threshold
47 #[serde(skip_serializing_if = "Option::is_none")]
48 threshold: Option<f64>,
49 }
50
51 #[api(
52 properties: {
53 status: {
54 type: SmartStatus,
55 },
56 wearout: {
57 description: "Wearout level.",
58 type: f64,
59 optional: true,
60 },
61 attributes: {
62 description: "SMART attributes.",
63 type: Array,
64 items: {
65 type: SmartAttribute,
66 },
67 },
68 },
69 )]
70 #[derive(Debug, Serialize, Deserialize)]
71 /// Data from smartctl
72 pub struct SmartData {
73 pub status: SmartStatus,
74 pub wearout: Option<f64>,
75 pub attributes: Vec<SmartAttribute>,
76 }
77
78 /// Read smartctl data for a disk (/dev/XXX).
79 pub fn get_smart_data(disk: &super::Disk, health_only: bool) -> Result<SmartData, Error> {
80 const SMARTCTL_BIN_PATH: &str = "smartctl";
81
82 let mut command = std::process::Command::new(SMARTCTL_BIN_PATH);
83 command.arg("-H");
84 if !health_only {
85 command.args(&["-A", "-j"]);
86 }
87
88 let disk_path = match disk.device_path() {
89 Some(path) => path,
90 None => bail!("disk {:?} has no node in /dev", disk.syspath()),
91 };
92 command.arg(disk_path);
93
94 let output = proxmox_sys::command::run_command(
95 command,
96 Some(
97 |exitcode| (exitcode & 0b0011) == 0, // only bits 0-1 are fatal errors
98 ),
99 )?;
100
101 let output: serde_json::Value = output.parse()?;
102
103 let mut wearout = None;
104
105 let mut attributes = Vec::new();
106 let mut wearout_candidates = HashMap::new();
107
108 // ATA devices
109 if let Some(list) = output["ata_smart_attributes"]["table"].as_array() {
110 for item in list {
111 let id = match item["id"].as_u64() {
112 Some(id) => id,
113 None => continue, // skip attributes without id
114 };
115
116 let name = match item["name"].as_str() {
117 Some(name) => name.to_string(),
118 None => continue, // skip attributes without name
119 };
120
121 let raw_value = match item["raw"]["string"].as_str() {
122 Some(value) => value.to_string(),
123 None => continue, // skip attributes without raw value
124 };
125
126 let flags = match item["flags"]["string"].as_str() {
127 Some(flags) => flags.to_string(),
128 None => continue, // skip attributes without flags
129 };
130
131 let normalized = match item["value"].as_f64() {
132 Some(v) => v,
133 None => continue, // skip attributes without normalize value
134 };
135
136 let worst = match item["worst"].as_f64() {
137 Some(v) => v,
138 None => continue, // skip attributes without worst entry
139 };
140
141 let threshold = match item["thresh"].as_f64() {
142 Some(v) => v,
143 None => continue, // skip attributes without threshold entry
144 };
145
146 if WEAROUT_FIELD_NAMES.contains(&name as &str) {
147 wearout_candidates.insert(name.clone(), normalized);
148 }
149
150 attributes.push(SmartAttribute {
151 name,
152 value: raw_value.clone(),
153 raw: raw_value,
154 id: Some(id),
155 flags: Some(flags),
156 normalized: Some(normalized),
157 worst: Some(worst),
158 threshold: Some(threshold),
159 });
160 }
161 }
162
163 if !wearout_candidates.is_empty() {
164 for field in WEAROUT_FIELD_ORDER {
165 if let Some(value) = wearout_candidates.get(field as &str) {
166 wearout = Some(*value);
167 break;
168 }
169 }
170 }
171
172 // NVME devices
173 if let Some(list) = output["nvme_smart_health_information_log"].as_object() {
174 for (name, value) in list {
175 if name == "percentage_used" {
176 // extract wearout from nvme text, allow for decimal values
177 if let Some(v) = value.as_f64() {
178 if v <= 100.0 {
179 wearout = Some(100.0 - v);
180 }
181 }
182 }
183 if let Some(value) = value.as_f64() {
184 attributes.push(SmartAttribute {
185 name: name.to_string(),
186 value: value.to_string(),
187 raw: value.to_string(),
188 id: None,
189 flags: None,
190 normalized: None,
191 worst: None,
192 threshold: None,
193 });
194 }
195 }
196 }
197
198 let status = match output["smart_status"]["passed"].as_bool() {
199 None => SmartStatus::Unknown,
200 Some(true) => SmartStatus::Passed,
201 Some(false) => SmartStatus::Failed,
202 };
203
204 Ok(SmartData {
205 status,
206 wearout,
207 attributes,
208 })
209 }
210
211 static WEAROUT_FIELD_ORDER: &[&str] = &[
212 "Media_Wearout_Indicator",
213 "SSD_Life_Left",
214 "Wear_Leveling_Count",
215 "Perc_Write/Erase_Ct_BC",
216 "Perc_Rated_Life_Remain",
217 "Remaining_Lifetime_Perc",
218 "Percent_Lifetime_Remain",
219 "Lifetime_Left",
220 "PCT_Life_Remaining",
221 "Lifetime_Remaining",
222 "Percent_Life_Remaining",
223 "Percent_Lifetime_Used",
224 "Perc_Rated_Life_Used",
225 ];
226
227 lazy_static! {
228 static ref WEAROUT_FIELD_NAMES: HashSet<&'static str> =
229 WEAROUT_FIELD_ORDER.iter().cloned().collect();
230 }