]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
import smartmontools 7.0
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-18 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12 #include "config.h"
13 #define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15 // unconditionally included files
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <sys/types.h>
19 #include <sys/stat.h> // umask
20 #include <signal.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <syslog.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <errno.h>
27 #include <time.h>
28 #include <limits.h>
29 #include <getopt.h>
30
31 #include <stdexcept>
32 #include <string>
33 #include <vector>
34 #include <algorithm> // std::replace()
35
36 // conditionally included files
37 #ifndef _WIN32
38 #include <sys/wait.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43
44 #ifdef _WIN32
45 #include "os_win32/popen.h" // popen/pclose()
46 #ifdef _MSC_VER
47 #pragma warning(disable:4761) // "conversion supplied"
48 typedef unsigned short mode_t;
49 typedef int pid_t;
50 #endif
51 #include <io.h> // umask()
52 #include <process.h> // getpid()
53 #endif // _WIN32
54
55 #ifdef __CYGWIN__
56 #include <io.h> // setmode()
57 #endif // __CYGWIN__
58
59 #ifdef HAVE_LIBCAP_NG
60 #include <cap-ng.h>
61 #endif // LIBCAP_NG
62
63 #ifdef HAVE_LIBSYSTEMD
64 #include <systemd/sd-daemon.h>
65 #endif // HAVE_LIBSYSTEMD
66
67 // locally included files
68 #include "atacmds.h"
69 #include "dev_interface.h"
70 #include "knowndrives.h"
71 #include "scsicmds.h"
72 #include "nvmecmds.h"
73 #include "utility.h"
74
75 #ifdef _WIN32
76 // fork()/signal()/initd simulation for native Windows
77 #include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
78 #define strsignal daemon_strsignal
79 #define sleep daemon_sleep
80 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
81 #define SIGQUIT SIGBREAK
82 #define SIGQUIT_KEYNAME "CONTROL-Break"
83 #else // _WIN32
84 #define SIGQUIT_KEYNAME "CONTROL-\\"
85 #endif // _WIN32
86
87 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4864 2018-12-20 13:02:39Z chrfranke $"
88 CONFIG_H_CVSID;
89
90 extern "C" {
91 typedef void (*signal_handler_type)(int);
92 }
93
94 static void set_signal_if_not_ignored(int sig, signal_handler_type handler)
95 {
96 #if defined(_WIN32)
97 // signal() emulation
98 daemon_signal(sig, handler);
99
100 #elif defined(HAVE_SIGACTION)
101 // SVr4, POSIX.1-2001, POSIX.1-2008
102 struct sigaction sa;
103 sa.sa_handler = SIG_DFL;
104 sigaction(sig, (struct sigaction *)0, &sa);
105 if (sa.sa_handler == SIG_IGN)
106 return;
107
108 memset(&sa, 0, sizeof(sa));
109 sa.sa_handler = handler;
110 sa.sa_flags = SA_RESTART; // BSD signal() semantics
111 sigaction(sig, &sa, (struct sigaction *)0);
112
113 #elif defined(HAVE_SIGSET)
114 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
115 if (sigset(sig, handler) == SIG_IGN)
116 sigset(sig, SIG_IGN);
117
118 #else
119 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
120 // Important: BSD semantics is required. Traditional signal()
121 // resets the handler to SIG_DFL after the first signal is caught.
122 if (signal(sig, handler) == SIG_IGN)
123 signal(sig, SIG_IGN);
124 #endif
125 }
126
127 using namespace smartmontools;
128
129 // smartd exit codes
130 #define EXIT_BADCMD 1 // command line did not parse
131 #define EXIT_BADCONF 2 // syntax error in config file
132 #define EXIT_STARTUP 3 // problem forking daemon
133 #define EXIT_PID 4 // problem creating pid file
134 #define EXIT_NOCONF 5 // config file does not exist
135 #define EXIT_READCONF 6 // config file exists but cannot be read
136
137 #define EXIT_NOMEM 8 // out of memory
138 #define EXIT_BADCODE 10 // internal error - should NEVER happen
139
140 #define EXIT_BADDEV 16 // we can't monitor this device
141 #define EXIT_NODEV 17 // no devices to monitor
142
143 #define EXIT_SIGNAL 254 // abort on signal
144
145
146 // command-line: 1=debug mode, 2=print presets
147 static unsigned char debugmode = 0;
148
149 // command-line: how long to sleep between checks
150 #define CHECKTIME 1800
151 static int checktime=CHECKTIME;
152
153 // command-line: name of PID file (empty for no pid file)
154 static std::string pid_file;
155
156 // command-line: path prefix of persistent state file, empty if no persistence.
157 static std::string state_path_prefix
158 #ifdef SMARTMONTOOLS_SAVESTATES
159 = SMARTMONTOOLS_SAVESTATES
160 #endif
161 ;
162
163 // command-line: path prefix of attribute log file, empty if no logs.
164 static std::string attrlog_path_prefix
165 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
166 = SMARTMONTOOLS_ATTRIBUTELOG
167 #endif
168 ;
169
170 // configuration file name
171 static const char * configfile;
172 // configuration file "name" if read from stdin
173 static const char * const configfile_stdin = "<stdin>";
174 // path of alternate configuration file
175 static std::string configfile_alt;
176
177 // warning script file
178 static std::string warning_script;
179
180 // command-line: when should we exit?
181 enum quit_t {
182 QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
183 QUIT_SHOWTESTS, QUIT_ERRORS
184 };
185 static quit_t quit = QUIT_NODEV;
186
187 // command-line; this is the default syslog(3) log facility to use.
188 static int facility=LOG_DAEMON;
189
190 #ifndef _WIN32
191 // command-line: fork into background?
192 static bool do_fork=true;
193 #endif
194
195 // TODO: This smartctl only variable is also used in some os_*.cpp
196 unsigned char failuretest_permissive = 0;
197
198 // set to one if we catch a USR1 (check devices now)
199 static volatile int caughtsigUSR1=0;
200
201 #ifdef _WIN32
202 // set to one if we catch a USR2 (toggle debug mode)
203 static volatile int caughtsigUSR2=0;
204 #endif
205
206 // set to one if we catch a HUP (reload config file). In debug mode,
207 // set to two, if we catch INT (also reload config file).
208 static volatile int caughtsigHUP=0;
209
210 // set to signal value if we catch INT, QUIT, or TERM
211 static volatile int caughtsigEXIT=0;
212
213 // This function prints either to stdout or to the syslog as needed.
214 static void PrintOut(int priority, const char *fmt, ...)
215 __attribute_format_printf(2, 3);
216
217 #ifdef HAVE_LIBSYSTEMD
218 // systemd notify support
219
220 static bool notify_enabled = false;
221
222 static inline void notify_init()
223 {
224 if (!getenv("NOTIFY_SOCKET"))
225 return;
226 notify_enabled = true;
227 }
228
229 static inline bool notify_post_init()
230 {
231 if (!notify_enabled)
232 return true;
233 if (do_fork) {
234 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
235 return false;
236 }
237 return true;
238 }
239
240 static void notify_msg(const char * msg, bool ready = false)
241 {
242 if (!notify_enabled)
243 return;
244 if (debugmode) {
245 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
246 return;
247 }
248 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
249 }
250
251 static void notify_check(int numdev)
252 {
253 if (!notify_enabled)
254 return;
255 char msg[32];
256 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
257 numdev, (numdev != 1 ? "s" : ""));
258 notify_msg(msg);
259 }
260
261 static void notify_wait(time_t wakeuptime, int numdev)
262 {
263 if (!notify_enabled)
264 return;
265 char ts[16], msg[64];
266 strftime(ts, sizeof(ts), "%H:%M:%S", localtime(&wakeuptime));
267 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
268 numdev, (numdev != 1 ? "s" : ""), ts);
269 static bool ready = true; // first call notifies READY=1
270 notify_msg(msg, ready);
271 ready = false;
272 }
273
274 static void notify_exit(int status)
275 {
276 if (!notify_enabled)
277 return;
278 const char * msg;
279 switch (status) {
280 case 0: msg = "Exiting ..."; break;
281 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
282 case EXIT_BADCONF: case EXIT_NOCONF:
283 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
284 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
285 case EXIT_NODEV: msg = "No devices to monitor"; break;
286 default: msg = "Error (see SYSLOG)"; break;
287 }
288 notify_msg(msg);
289 }
290
291 #else // HAVE_LIBSYSTEMD
292 // No systemd notify support
293
294 static inline bool notify_post_init()
295 {
296 #ifdef __linux__
297 if (getenv("NOTIFY_SOCKET")) {
298 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
299 return false;
300 }
301 #endif
302 return true;
303 }
304
305 static inline void notify_init() { }
306 static inline void notify_msg(const char *) { }
307 static inline void notify_check(int) { }
308 static inline void notify_wait(time_t, int) { }
309 static inline void notify_exit(int) { }
310
311 #endif // HAVE_LIBSYSTEMD
312
313 // Attribute monitoring flags.
314 // See monitor_attr_flags below.
315 enum {
316 MONITOR_IGN_FAILUSE = 0x01,
317 MONITOR_IGNORE = 0x02,
318 MONITOR_RAW_PRINT = 0x04,
319 MONITOR_RAW = 0x08,
320 MONITOR_AS_CRIT = 0x10,
321 MONITOR_RAW_AS_CRIT = 0x20,
322 };
323
324 // Array of flags for each attribute.
325 class attribute_flags
326 {
327 public:
328 attribute_flags()
329 { memset(m_flags, 0, sizeof(m_flags)); }
330
331 bool is_set(int id, unsigned char flag) const
332 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
333
334 void set(int id, unsigned char flags)
335 {
336 if (0 < id && id < (int)sizeof(m_flags))
337 m_flags[id] |= flags;
338 }
339
340 private:
341 unsigned char m_flags[256];
342 };
343
344
345 /// Configuration data for a device. Read from smartd.conf.
346 /// Supports copy & assignment and is compatible with STL containers.
347 struct dev_config
348 {
349 int lineno; // Line number of entry in file
350 std::string name; // Device name (with optional extra info)
351 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
352 std::string dev_type; // Device type argument from -d directive, empty if none
353 std::string dev_idinfo; // Device identify info for warning emails
354 std::string state_file; // Path of the persistent state file, empty if none
355 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
356 bool ignore; // Ignore this entry
357 bool id_is_unique; // True if dev_idinfo is unique (includes S/N or WWN)
358 bool smartcheck; // Check SMART status
359 bool usagefailed; // Check for failed Usage Attributes
360 bool prefail; // Track changes in Prefail Attributes
361 bool usage; // Track changes in Usage Attributes
362 bool selftest; // Monitor number of selftest errors
363 bool errorlog; // Monitor number of ATA errors
364 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
365 bool offlinests; // Monitor changes in offline data collection status
366 bool offlinests_ns; // Disable auto standby if in progress
367 bool selfteststs; // Monitor changes in self-test execution status
368 bool selfteststs_ns; // Disable auto standby if in progress
369 bool permissive; // Ignore failed SMART commands
370 char autosave; // 1=disable, 2=enable Autosave Attributes
371 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
372 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
373 bool ignorepresets; // Ignore database of -v options
374 bool showpresets; // Show database entry for this device
375 bool removable; // Device may disappear (not be present)
376 char powermode; // skip check, if disk in idle or standby mode
377 bool powerquiet; // skip powermode 'skipping checks' message
378 int powerskipmax; // how many times can be check skipped
379 unsigned char tempdiff; // Track Temperature changes >= this limit
380 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
381 regular_expression test_regex; // Regex for scheduled testing
382
383 // Configuration of email warning messages
384 std::string emailcmdline; // script to execute, empty if no messages
385 std::string emailaddress; // email address, or empty
386 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
387 bool emailtest; // Send test email?
388
389 // ATA ONLY
390 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
391 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
392 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
393 int set_lookahead; // disable(-1), enable(1) read look-ahead
394 int set_standby; // set(1..255->0..254) standby timer
395 bool set_security_freeze; // Freeze ATA security
396 int set_wcache; // disable(-1), enable(1) write cache
397 int set_dsn; // disable(0x2), enable(0x1) DSN
398
399 bool sct_erc_set; // set SCT ERC to:
400 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
401 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
402
403 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
404 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
405 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
406 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
407
408 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
409
410 ata_vendor_attr_defs attribute_defs; // -v options
411
412 dev_config();
413 };
414
415 dev_config::dev_config()
416 : lineno(0),
417 ignore(false),
418 id_is_unique(false),
419 smartcheck(false),
420 usagefailed(false),
421 prefail(false),
422 usage(false),
423 selftest(false),
424 errorlog(false),
425 xerrorlog(false),
426 offlinests(false), offlinests_ns(false),
427 selfteststs(false), selfteststs_ns(false),
428 permissive(false),
429 autosave(0),
430 autoofflinetest(0),
431 ignorepresets(false),
432 showpresets(false),
433 removable(false),
434 powermode(0),
435 powerquiet(false),
436 powerskipmax(0),
437 tempdiff(0),
438 tempinfo(0), tempcrit(0),
439 emailfreq(0),
440 emailtest(false),
441 dev_rpm(0),
442 set_aam(0), set_apm(0),
443 set_lookahead(0),
444 set_standby(0),
445 set_security_freeze(false),
446 set_wcache(0), set_dsn(0),
447 sct_erc_set(false),
448 sct_erc_readtime(0), sct_erc_writetime(0),
449 curr_pending_id(0), offl_pending_id(0),
450 curr_pending_incr(false), offl_pending_incr(false),
451 curr_pending_set(false), offl_pending_set(false)
452 {
453 }
454
455
456 // Number of allowed mail message types
457 static const int SMARTD_NMAIL = 13;
458 // Type for '-M test' mails (state not persistent)
459 static const int MAILTYPE_TEST = 0;
460 // TODO: Add const or enum for all mail types.
461
462 struct mailinfo {
463 int logged;// number of times an email has been sent
464 time_t firstsent;// time first email was sent, as defined by time(2)
465 time_t lastsent; // time last email was sent, as defined by time(2)
466
467 mailinfo()
468 : logged(0), firstsent(0), lastsent(0) { }
469 };
470
471 /// Persistent state data for a device.
472 struct persistent_dev_state
473 {
474 unsigned char tempmin, tempmax; // Min/Max Temperatures
475
476 unsigned char selflogcount; // total number of self-test errors
477 unsigned short selfloghour; // lifetime hours of last self-test error
478
479 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
480
481 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
482 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
483
484 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
485
486 // ATA ONLY
487 int ataerrorcount; // Total number of ATA errors
488
489 // Persistent part of ata_smart_values:
490 struct ata_attribute {
491 unsigned char id;
492 unsigned char val;
493 unsigned char worst; // Byte needed for 'raw64' attribute only.
494 uint64_t raw;
495 unsigned char resvd;
496
497 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
498 };
499 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
500
501 // SCSI ONLY
502
503 struct scsi_error_counter_t {
504 struct scsiErrorCounter errCounter;
505 unsigned char found;
506 scsi_error_counter_t() : found(0)
507 { memset(&errCounter, 0, sizeof(errCounter)); }
508 };
509 scsi_error_counter_t scsi_error_counters[3];
510
511 struct scsi_nonmedium_error_t {
512 struct scsiNonMediumError nme;
513 unsigned char found;
514 scsi_nonmedium_error_t() : found(0)
515 { memset(&nme, 0, sizeof(nme)); }
516 };
517 scsi_nonmedium_error_t scsi_nonmedium_error;
518
519 // NVMe only
520 uint64_t nvme_err_log_entries;
521
522 persistent_dev_state();
523 };
524
525 persistent_dev_state::persistent_dev_state()
526 : tempmin(0), tempmax(0),
527 selflogcount(0),
528 selfloghour(0),
529 scheduled_test_next_check(0),
530 selective_test_last_start(0),
531 selective_test_last_end(0),
532 ataerrorcount(0),
533 nvme_err_log_entries(0)
534 {
535 }
536
537 /// Non-persistent state data for a device.
538 struct temp_dev_state
539 {
540 bool must_write; // true if persistent part should be written
541
542 bool not_cap_offline; // true == not capable of offline testing
543 bool not_cap_conveyance;
544 bool not_cap_short;
545 bool not_cap_long;
546 bool not_cap_selective;
547
548 unsigned char temperature; // last recorded Temperature (in Celsius)
549 time_t tempmin_delay; // time where Min Temperature tracking will start
550
551 bool removed; // true if open() failed for removable device
552
553 bool powermodefail; // true if power mode check failed
554 int powerskipcnt; // Number of checks skipped due to idle or standby mode
555 int lastpowermodeskipped; // the last power mode that was skipped
556
557 // SCSI ONLY
558 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
559 unsigned char TempPageSupported; // has log sense temperature page (0xd)
560 unsigned char ReadECounterPageSupported;
561 unsigned char WriteECounterPageSupported;
562 unsigned char VerifyECounterPageSupported;
563 unsigned char NonMediumErrorPageSupported;
564 unsigned char SuppressReport; // minimize nuisance reports
565 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
566 // know yet) 6 or 10
567 // ATA ONLY
568 uint64_t num_sectors; // Number of sectors
569 ata_smart_values smartval; // SMART data
570 ata_smart_thresholds_pvt smartthres; // SMART thresholds
571 bool offline_started; // true if offline data collection was started
572 bool selftest_started; // true if self-test was started
573
574 temp_dev_state();
575 };
576
577 temp_dev_state::temp_dev_state()
578 : must_write(false),
579 not_cap_offline(false),
580 not_cap_conveyance(false),
581 not_cap_short(false),
582 not_cap_long(false),
583 not_cap_selective(false),
584 temperature(0),
585 tempmin_delay(0),
586 removed(false),
587 powermodefail(false),
588 powerskipcnt(0),
589 lastpowermodeskipped(0),
590 SmartPageSupported(false),
591 TempPageSupported(false),
592 ReadECounterPageSupported(false),
593 WriteECounterPageSupported(false),
594 VerifyECounterPageSupported(false),
595 NonMediumErrorPageSupported(false),
596 SuppressReport(false),
597 modese_len(0),
598 num_sectors(0),
599 offline_started(false),
600 selftest_started(false)
601 {
602 memset(&smartval, 0, sizeof(smartval));
603 memset(&smartthres, 0, sizeof(smartthres));
604 }
605
606 /// Runtime state data for a device.
607 struct dev_state
608 : public persistent_dev_state,
609 public temp_dev_state
610 {
611 void update_persistent_state();
612 void update_temp_state();
613 };
614
615 /// Container for configuration info for each device.
616 typedef std::vector<dev_config> dev_config_vector;
617
618 /// Container for state info for each device.
619 typedef std::vector<dev_state> dev_state_vector;
620
621 // Copy ATA attributes to persistent state.
622 void dev_state::update_persistent_state()
623 {
624 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
625 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
626 ata_attribute & pa = ata_attributes[i];
627 pa.id = ta.id;
628 if (ta.id == 0) {
629 pa.val = pa.worst = 0; pa.raw = 0;
630 continue;
631 }
632 pa.val = ta.current;
633 pa.worst = ta.worst;
634 pa.raw = ta.raw[0]
635 | ( ta.raw[1] << 8)
636 | ( ta.raw[2] << 16)
637 | ((uint64_t)ta.raw[3] << 24)
638 | ((uint64_t)ta.raw[4] << 32)
639 | ((uint64_t)ta.raw[5] << 40);
640 pa.resvd = ta.reserv;
641 }
642 }
643
644 // Copy ATA from persistent to temp state.
645 void dev_state::update_temp_state()
646 {
647 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
648 const ata_attribute & pa = ata_attributes[i];
649 ata_smart_attribute & ta = smartval.vendor_attributes[i];
650 ta.id = pa.id;
651 if (pa.id == 0) {
652 ta.current = ta.worst = 0;
653 memset(ta.raw, 0, sizeof(ta.raw));
654 continue;
655 }
656 ta.current = pa.val;
657 ta.worst = pa.worst;
658 ta.raw[0] = (unsigned char) pa.raw;
659 ta.raw[1] = (unsigned char)(pa.raw >> 8);
660 ta.raw[2] = (unsigned char)(pa.raw >> 16);
661 ta.raw[3] = (unsigned char)(pa.raw >> 24);
662 ta.raw[4] = (unsigned char)(pa.raw >> 32);
663 ta.raw[5] = (unsigned char)(pa.raw >> 40);
664 ta.reserv = pa.resvd;
665 }
666 }
667
668 // Parse a line from a state file.
669 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
670 {
671 static const regular_expression regex(
672 "^ *"
673 "((temperature-min)" // (1 (2)
674 "|(temperature-max)" // (3)
675 "|(self-test-errors)" // (4)
676 "|(self-test-last-err-hour)" // (5)
677 "|(scheduled-test-next-check)" // (6)
678 "|(selective-test-last-start)" // (7)
679 "|(selective-test-last-end)" // (8)
680 "|(ata-error-count)" // (9)
681 "|(mail\\.([0-9]+)\\." // (10 (11)
682 "((count)" // (12 (13)
683 "|(first-sent-time)" // (14)
684 "|(last-sent-time)" // (15)
685 ")" // 12)
686 ")" // 10)
687 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
688 "((id)" // (18 (19)
689 "|(val)" // (20)
690 "|(worst)" // (21)
691 "|(raw)" // (22)
692 "|(resvd)" // (23)
693 ")" // 18)
694 ")" // 16)
695 "|(nvme-err-log-entries)" // (24)
696 ")" // 1)
697 " *= *([0-9]+)[ \n]*$" // (25)
698 );
699
700 const int nmatch = 1+25;
701 regular_expression::match_range match[nmatch];
702 if (!regex.execute(line, nmatch, match))
703 return false;
704 if (match[nmatch-1].rm_so < 0)
705 return false;
706
707 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
708
709 int m = 1;
710 if (match[++m].rm_so >= 0)
711 state.tempmin = (unsigned char)val;
712 else if (match[++m].rm_so >= 0)
713 state.tempmax = (unsigned char)val;
714 else if (match[++m].rm_so >= 0)
715 state.selflogcount = (unsigned char)val;
716 else if (match[++m].rm_so >= 0)
717 state.selfloghour = (unsigned short)val;
718 else if (match[++m].rm_so >= 0)
719 state.scheduled_test_next_check = (time_t)val;
720 else if (match[++m].rm_so >= 0)
721 state.selective_test_last_start = val;
722 else if (match[++m].rm_so >= 0)
723 state.selective_test_last_end = val;
724 else if (match[++m].rm_so >= 0)
725 state.ataerrorcount = (int)val;
726 else if (match[m+=2].rm_so >= 0) {
727 int i = atoi(line+match[m].rm_so);
728 if (!(0 <= i && i < SMARTD_NMAIL))
729 return false;
730 if (i == MAILTYPE_TEST) // Don't suppress test mails
731 return true;
732 if (match[m+=2].rm_so >= 0)
733 state.maillog[i].logged = (int)val;
734 else if (match[++m].rm_so >= 0)
735 state.maillog[i].firstsent = (time_t)val;
736 else if (match[++m].rm_so >= 0)
737 state.maillog[i].lastsent = (time_t)val;
738 else
739 return false;
740 }
741 else if (match[m+=5+1].rm_so >= 0) {
742 int i = atoi(line+match[m].rm_so);
743 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
744 return false;
745 if (match[m+=2].rm_so >= 0)
746 state.ata_attributes[i].id = (unsigned char)val;
747 else if (match[++m].rm_so >= 0)
748 state.ata_attributes[i].val = (unsigned char)val;
749 else if (match[++m].rm_so >= 0)
750 state.ata_attributes[i].worst = (unsigned char)val;
751 else if (match[++m].rm_so >= 0)
752 state.ata_attributes[i].raw = val;
753 else if (match[++m].rm_so >= 0)
754 state.ata_attributes[i].resvd = (unsigned char)val;
755 else
756 return false;
757 }
758 else if (match[m+7].rm_so >= 0)
759 state.nvme_err_log_entries = val;
760 else
761 return false;
762 return true;
763 }
764
765 // Read a state file.
766 static bool read_dev_state(const char * path, persistent_dev_state & state)
767 {
768 stdio_file f(path, "r");
769 if (!f) {
770 if (errno != ENOENT)
771 pout("Cannot read state file \"%s\"\n", path);
772 return false;
773 }
774 #ifdef __CYGWIN__
775 setmode(fileno(f), O_TEXT); // Allow files with \r\n
776 #endif
777
778 persistent_dev_state new_state;
779 int good = 0, bad = 0;
780 char line[256];
781 while (fgets(line, sizeof(line), f)) {
782 const char * s = line + strspn(line, " \t");
783 if (!*s || *s == '#')
784 continue;
785 if (!parse_dev_state_line(line, new_state))
786 bad++;
787 else
788 good++;
789 }
790
791 if (bad) {
792 if (!good) {
793 pout("%s: format error\n", path);
794 return false;
795 }
796 pout("%s: %d invalid line(s) ignored\n", path, bad);
797 }
798
799 // This sets the values missing in the file to 0.
800 state = new_state;
801 return true;
802 }
803
804 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
805 {
806 if (val)
807 fprintf(f, "%s = %" PRIu64 "\n", name, val);
808 }
809
810 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
811 {
812 if (val)
813 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
814 }
815
816 // Write a state file
817 static bool write_dev_state(const char * path, const persistent_dev_state & state)
818 {
819 // Rename old "file" to "file~"
820 std::string pathbak = path; pathbak += '~';
821 unlink(pathbak.c_str());
822 rename(path, pathbak.c_str());
823
824 stdio_file f(path, "w");
825 if (!f) {
826 pout("Cannot create state file \"%s\"\n", path);
827 return false;
828 }
829
830 fprintf(f, "# smartd state file\n");
831 write_dev_state_line(f, "temperature-min", state.tempmin);
832 write_dev_state_line(f, "temperature-max", state.tempmax);
833 write_dev_state_line(f, "self-test-errors", state.selflogcount);
834 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
835 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
836 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
837 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
838
839 int i;
840 for (i = 0; i < SMARTD_NMAIL; i++) {
841 if (i == MAILTYPE_TEST) // Don't suppress test mails
842 continue;
843 const mailinfo & mi = state.maillog[i];
844 if (!mi.logged)
845 continue;
846 write_dev_state_line(f, "mail", i, "count", mi.logged);
847 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
848 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
849 }
850
851 // ATA ONLY
852 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
853
854 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
855 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
856 if (!pa.id)
857 continue;
858 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
859 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
860 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
861 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
862 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
863 }
864
865 // NVMe only
866 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
867
868 return true;
869 }
870
871 // Write to the attrlog file
872 static bool write_dev_attrlog(const char * path, const dev_state & state)
873 {
874 stdio_file f(path, "a");
875 if (!f) {
876 pout("Cannot create attribute log file \"%s\"\n", path);
877 return false;
878 }
879
880
881 time_t now = time(0);
882 struct tm * tms = gmtime(&now);
883 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
884 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
885 tms->tm_hour, tms->tm_min, tms->tm_sec);
886 // ATA ONLY
887 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
888 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
889 if (!pa.id)
890 continue;
891 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
892 }
893 // SCSI ONLY
894 const struct scsiErrorCounter * ecp;
895 const char * pageNames[3] = {"read", "write", "verify"};
896 for (int k = 0; k < 3; ++k) {
897 if ( !state.scsi_error_counters[k].found ) continue;
898 ecp = &state.scsi_error_counters[k].errCounter;
899 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
900 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
901 "\t%s-corr-by-retry;%" PRIu64 ";"
902 "\t%s-total-err-corrected;%" PRIu64 ";"
903 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
904 "\t%s-gb-processed;%.3f;"
905 "\t%s-total-unc-errors;%" PRIu64 ";",
906 pageNames[k], ecp->counter[0],
907 pageNames[k], ecp->counter[1],
908 pageNames[k], ecp->counter[2],
909 pageNames[k], ecp->counter[3],
910 pageNames[k], ecp->counter[4],
911 pageNames[k], (ecp->counter[5] / 1000000000.0),
912 pageNames[k], ecp->counter[6]);
913 }
914 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
915 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
916 }
917 // write SCSI current temperature if it is monitored
918 if (state.temperature)
919 fprintf(f, "\ttemperature;%d;", state.temperature);
920 // end of line
921 fprintf(f, "\n");
922 return true;
923 }
924
925 // Write all state files. If write_always is false, don't write
926 // unless must_write is set.
927 static void write_all_dev_states(const dev_config_vector & configs,
928 dev_state_vector & states,
929 bool write_always = true)
930 {
931 for (unsigned i = 0; i < states.size(); i++) {
932 const dev_config & cfg = configs.at(i);
933 if (cfg.state_file.empty())
934 continue;
935 dev_state & state = states[i];
936 if (!write_always && !state.must_write)
937 continue;
938 if (!write_dev_state(cfg.state_file.c_str(), state))
939 continue;
940 state.must_write = false;
941 if (write_always || debugmode)
942 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
943 cfg.name.c_str(), cfg.state_file.c_str());
944 }
945 }
946
947 // Write to all attrlog files
948 static void write_all_dev_attrlogs(const dev_config_vector & configs,
949 dev_state_vector & states)
950 {
951 for (unsigned i = 0; i < states.size(); i++) {
952 const dev_config & cfg = configs.at(i);
953 if (cfg.attrlog_file.empty())
954 continue;
955 dev_state & state = states[i];
956 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
957 }
958 }
959
960 extern "C" { // signal handlers require C-linkage
961
962 // Note if we catch a SIGUSR1
963 static void USR1handler(int sig)
964 {
965 if (SIGUSR1==sig)
966 caughtsigUSR1=1;
967 return;
968 }
969
970 #ifdef _WIN32
971 // Note if we catch a SIGUSR2
972 static void USR2handler(int sig)
973 {
974 if (SIGUSR2==sig)
975 caughtsigUSR2=1;
976 return;
977 }
978 #endif
979
980 // Note if we catch a HUP (or INT in debug mode)
981 static void HUPhandler(int sig)
982 {
983 if (sig==SIGHUP)
984 caughtsigHUP=1;
985 else
986 caughtsigHUP=2;
987 return;
988 }
989
990 // signal handler for TERM, QUIT, and INT (if not in debug mode)
991 static void sighandler(int sig)
992 {
993 if (!caughtsigEXIT)
994 caughtsigEXIT=sig;
995 return;
996 }
997
998 } // extern "C"
999
1000 #ifdef HAVE_LIBCAP_NG
1001 // capabilities(7) support
1002
1003 static bool capabilities_enabled = false;
1004
1005 static void capabilities_drop_now()
1006 {
1007 if (!capabilities_enabled)
1008 return;
1009 capng_clear(CAPNG_SELECT_BOTH);
1010 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
1011 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
1012 capng_apply(CAPNG_SELECT_BOTH);
1013 }
1014
1015 static void capabilities_check_config(dev_config_vector & configs)
1016 {
1017 if (!capabilities_enabled)
1018 return;
1019 for (unsigned i = 0; i < configs.size(); i++) {
1020 dev_config & cfg = configs[i];
1021 if (!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) {
1022 PrintOut(LOG_INFO, "Device: %s, --capabilites is set, mail will be suppressed.\n",
1023 cfg.name.c_str());
1024 cfg.emailaddress.clear(); cfg.emailcmdline.clear();
1025 }
1026 }
1027 }
1028
1029 #else // HAVE_LIBCAP_NG
1030 // No capabilities(7) support
1031
1032 static inline void capabilities_drop_now() { }
1033 static inline void capabilities_check_config(dev_config_vector &) { }
1034
1035 #endif // HAVE_LIBCAP_NG
1036
1037 // a replacement for setenv() which is not available on all platforms.
1038 // Note that the string passed to putenv must not be freed or made
1039 // invalid, since a pointer to it is kept by putenv(). This means that
1040 // it must either be a static buffer or allocated off the heap. The
1041 // string can be freed if the environment variable is redefined via
1042 // another call to putenv(). There is no portable way to unset a variable
1043 // with putenv(). So we manage the buffer in a static object.
1044 // Using setenv() if available is not considered because some
1045 // implementations may produce memory leaks.
1046
1047 class env_buffer
1048 {
1049 public:
1050 env_buffer()
1051 : m_buf((char *)0) { }
1052
1053 void set(const char * name, const char * value);
1054
1055 private:
1056 char * m_buf;
1057
1058 env_buffer(const env_buffer &);
1059 void operator=(const env_buffer &);
1060 };
1061
1062 void env_buffer::set(const char * name, const char * value)
1063 {
1064 int size = strlen(name) + 1 + strlen(value) + 1;
1065 char * newbuf = new char[size];
1066 snprintf(newbuf, size, "%s=%s", name, value);
1067
1068 if (putenv(newbuf))
1069 throw std::runtime_error("putenv() failed");
1070
1071 // This assumes that the same NAME is passed on each call
1072 delete [] m_buf;
1073 m_buf = newbuf;
1074 }
1075
1076 #define EBUFLEN 1024
1077
1078 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1079 __attribute_format_printf(4, 5);
1080
1081 // If either address or executable path is non-null then send and log
1082 // a warning email, or execute executable
1083 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1084 {
1085 static const char * const whichfail[] = {
1086 "EmailTest", // 0
1087 "Health", // 1
1088 "Usage", // 2
1089 "SelfTest", // 3
1090 "ErrorCount", // 4
1091 "FailedHealthCheck", // 5
1092 "FailedReadSmartData", // 6
1093 "FailedReadSmartErrorLog", // 7
1094 "FailedReadSmartSelfTestLog", // 8
1095 "FailedOpenDevice", // 9
1096 "CurrentPendingSector", // 10
1097 "OfflineUncorrectableSector", // 11
1098 "Temperature" // 12
1099 };
1100
1101 // See if user wants us to send mail
1102 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1103 return;
1104
1105 std::string address = cfg.emailaddress;
1106 const char * executable = cfg.emailcmdline.c_str();
1107
1108 // which type of mail are we sending?
1109 mailinfo * mail=(state.maillog)+which;
1110
1111 // checks for sanity
1112 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
1113 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
1114 return;
1115 }
1116 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
1117 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
1118 which, (int)sizeof(whichfail));
1119 return;
1120 }
1121
1122 // Return if a single warning mail has been sent.
1123 if ((cfg.emailfreq==1) && mail->logged)
1124 return;
1125
1126 // Return if this is an email test and one has already been sent.
1127 if (which == 0 && mail->logged)
1128 return;
1129
1130 // To decide if to send mail, we need to know what time it is.
1131 time_t epoch = time(0);
1132
1133 // Return if less than one day has gone by
1134 const int day = 24*3600;
1135 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1136 return;
1137
1138 // Return if less than 2^(logged-1) days have gone by
1139 if (cfg.emailfreq==3 && mail->logged) {
1140 int days = 0x01 << (mail->logged - 1);
1141 days*=day;
1142 if (epoch<(mail->lastsent+days))
1143 return;
1144 }
1145
1146 // record the time of this mail message, and the first mail message
1147 if (!mail->logged)
1148 mail->firstsent=epoch;
1149 mail->lastsent=epoch;
1150
1151 // print warning string into message
1152 char message[256];
1153 va_list ap;
1154 va_start(ap, fmt);
1155 vsnprintf(message, sizeof(message), fmt, ap);
1156 va_end(ap);
1157
1158 // replace commas by spaces to separate recipients
1159 std::replace(address.begin(), address.end(), ',', ' ');
1160
1161 // Export information in environment variables that will be useful
1162 // for user scripts
1163 static env_buffer env[12];
1164 env[0].set("SMARTD_MAILER", executable);
1165 env[1].set("SMARTD_MESSAGE", message);
1166 char dates[DATEANDEPOCHLEN];
1167 snprintf(dates, sizeof(dates), "%d", mail->logged);
1168 env[2].set("SMARTD_PREVCNT", dates);
1169 dateandtimezoneepoch(dates, mail->firstsent);
1170 env[3].set("SMARTD_TFIRST", dates);
1171 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1172 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1173 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1174 env[6].set("SMARTD_ADDRESS", address.c_str());
1175 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1176
1177 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1178 env[8].set("SMARTD_DEVICETYPE",
1179 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1180 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1181
1182 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1183 dates[0] = 0;
1184 if (which) switch (cfg.emailfreq) {
1185 case 2: dates[0] = '1'; dates[1] = 0; break;
1186 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1187 }
1188 env[11].set("SMARTD_NEXTDAYS", dates);
1189
1190 // now construct a command to send this as EMAIL
1191 if (!*executable)
1192 executable = "<mail>";
1193 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1194 const char * newwarn = (which? "Warning via" : "Test of");
1195
1196 char command[256];
1197 #ifdef _WIN32
1198 // Path may contain spaces
1199 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1200 #else
1201 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1202 #endif
1203
1204 // tell SYSLOG what we are about to do...
1205 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1206 which?"Sending warning via":"Executing test of", executable, newadd);
1207
1208 // issue the command to send mail or to run the user's executable
1209 errno=0;
1210 FILE * pfp;
1211 if (!(pfp=popen(command, "r")))
1212 // failed to popen() mail process
1213 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1214 newwarn, executable, newadd, errno?strerror(errno):"");
1215 else {
1216 // pipe succeeded!
1217 int len, status;
1218 char buffer[EBUFLEN];
1219
1220 // if unexpected output on stdout/stderr, null terminate, print, and flush
1221 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1222 int count=0;
1223 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1224 buffer[newlen]='\0';
1225 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1226 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1227
1228 // flush pipe if needed
1229 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1230 count++;
1231
1232 // tell user that pipe was flushed, or that something is really wrong
1233 if (count && count<EBUFLEN)
1234 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1235 newwarn, executable, newadd);
1236 else if (count)
1237 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1238 newwarn, executable, newadd);
1239 }
1240
1241 // if something went wrong with mail process, print warning
1242 errno=0;
1243 if (-1==(status=pclose(pfp)))
1244 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1245 errno?strerror(errno):"");
1246 else {
1247 // mail process apparently succeeded. Check and report exit status
1248 if (WIFEXITED(status)) {
1249 // exited 'normally' (but perhaps with nonzero status)
1250 int status8 = WEXITSTATUS(status);
1251 if (status8>128)
1252 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1253 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1254 else if (status8)
1255 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1256 newwarn, executable, newadd, status, status8);
1257 else
1258 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1259 }
1260
1261 if (WIFSIGNALED(status))
1262 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1263 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1264
1265 // this branch is probably not possible. If subprocess is
1266 // stopped then pclose() should not return.
1267 if (WIFSTOPPED(status))
1268 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1269 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1270
1271 }
1272 }
1273
1274 // increment mail sent counter
1275 mail->logged++;
1276 }
1277
1278 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1279 __attribute_format_printf(4, 5);
1280
1281 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1282 {
1283 if (!(0 <= which && which < SMARTD_NMAIL))
1284 return;
1285
1286 // Return if no mail sent yet
1287 mailinfo & mi = state.maillog[which];
1288 if (!mi.logged)
1289 return;
1290
1291 // Format & print message
1292 char msg[256];
1293 va_list ap;
1294 va_start(ap, fmt);
1295 vsnprintf(msg, sizeof(msg), fmt, ap);
1296 va_end(ap);
1297
1298 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1299 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1300
1301 // Clear mail counter and timestamps
1302 mi = mailinfo();
1303 state.must_write = true;
1304 }
1305
1306 #ifndef _WIN32
1307
1308 // Output multiple lines via separate syslog(3) calls.
1309 __attribute_format_printf(2, 0)
1310 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1311 {
1312 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1313 vsnprintf(buf, sizeof(buf), fmt, ap);
1314
1315 for (char * p = buf, * q; p && *p; p = q) {
1316 if ((q = strchr(p, '\n')))
1317 *q++ = 0;
1318 if (*p)
1319 syslog(priority, "%s\n", p);
1320 }
1321 }
1322
1323 #else // _WIN32
1324 // os_win32/syslog_win32.cpp supports multiple lines.
1325 #define vsyslog_lines vsyslog
1326 #endif // _WIN32
1327
1328 // Printing function for watching ataprint commands, or losing them
1329 // [From GLIBC Manual: Since the prototype doesn't specify types for
1330 // optional arguments, in a call to a variadic function the default
1331 // argument promotions are performed on the optional argument
1332 // values. This means the objects of type char or short int (whether
1333 // signed or not) are promoted to either int or unsigned int, as
1334 // appropriate.]
1335 void pout(const char *fmt, ...){
1336 va_list ap;
1337
1338 // get the correct time in syslog()
1339 FixGlibcTimeZoneBug();
1340 // initialize variable argument list
1341 va_start(ap,fmt);
1342 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1343 if (debugmode && debugmode != 2) {
1344 FILE * f = stdout;
1345 #ifdef _WIN32
1346 if (facility == LOG_LOCAL1) // logging to stdout
1347 f = stderr;
1348 #endif
1349 vfprintf(f, fmt, ap);
1350 fflush(f);
1351 }
1352 // in debugmode==2 mode we print output from knowndrives.o functions
1353 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1354 openlog("smartd", LOG_PID, facility);
1355 vsyslog_lines(LOG_INFO, fmt, ap);
1356 closelog();
1357 }
1358 va_end(ap);
1359 return;
1360 }
1361
1362 // This function prints either to stdout or to the syslog as needed.
1363 static void PrintOut(int priority, const char *fmt, ...){
1364 va_list ap;
1365
1366 // get the correct time in syslog()
1367 FixGlibcTimeZoneBug();
1368 // initialize variable argument list
1369 va_start(ap,fmt);
1370 if (debugmode) {
1371 FILE * f = stdout;
1372 #ifdef _WIN32
1373 if (facility == LOG_LOCAL1) // logging to stdout
1374 f = stderr;
1375 #endif
1376 vfprintf(f, fmt, ap);
1377 fflush(f);
1378 }
1379 else {
1380 openlog("smartd", LOG_PID, facility);
1381 vsyslog_lines(priority, fmt, ap);
1382 closelog();
1383 }
1384 va_end(ap);
1385 return;
1386 }
1387
1388 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1389 void checksumwarning(const char * string)
1390 {
1391 pout("Warning! %s error: invalid SMART checksum.\n", string);
1392 }
1393
1394 #ifndef _WIN32
1395
1396 // Wait for the pid file to show up, this makes sure a calling program knows
1397 // that the daemon is really up and running and has a pid to kill it
1398 static bool WaitForPidFile()
1399 {
1400 int waited, max_wait = 10;
1401 struct stat stat_buf;
1402
1403 if (pid_file.empty() || debugmode)
1404 return true;
1405
1406 for(waited = 0; waited < max_wait; ++waited) {
1407 if (!stat(pid_file.c_str(), &stat_buf)) {
1408 return true;
1409 } else
1410 sleep(1);
1411 }
1412 return false;
1413 }
1414
1415 #endif // _WIN32
1416
1417 // Forks new process if needed, closes ALL file descriptors,
1418 // redirects stdin, stdout, and stderr. Not quite daemon().
1419 // See https://www.linuxjournal.com/article/2335
1420 // for a good description of why we do things this way.
1421 static int daemon_init()
1422 {
1423 #ifndef _WIN32
1424
1425 // flush all buffered streams. Else we might get two copies of open
1426 // streams since both parent and child get copies of the buffers.
1427 fflush(NULL);
1428
1429 if (do_fork) {
1430 pid_t pid;
1431 if ((pid=fork()) < 0) {
1432 // unable to fork!
1433 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1434 return EXIT_STARTUP;
1435 }
1436 if (pid) {
1437 // we are the parent process, wait for pid file, then exit cleanly
1438 if(!WaitForPidFile()) {
1439 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1440 return EXIT_STARTUP;
1441 }
1442 return 0;
1443 }
1444
1445 // from here on, we are the child process.
1446 setsid();
1447
1448 // Fork one more time to avoid any possibility of having terminals
1449 if ((pid=fork()) < 0) {
1450 // unable to fork!
1451 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1452 return EXIT_STARTUP;
1453 }
1454 if (pid)
1455 // we are the parent process -- exit cleanly
1456 return 0;
1457
1458 // Now we are the child's child...
1459 }
1460
1461 // close any open file descriptors
1462 for (int i = getdtablesize(); --i >= 0; )
1463 close(i);
1464
1465 // redirect any IO attempts to /dev/null and change to root directory
1466 int fd = open("/dev/null", O_RDWR);
1467 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1468 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1469 return EXIT_STARTUP;
1470 }
1471 umask(0022);
1472
1473 if (do_fork)
1474 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1475
1476 #else // _WIN32
1477
1478 // No fork() on native Win32
1479 // Detach this process from console
1480 fflush(NULL);
1481 if (daemon_detach("smartd")) {
1482 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1483 return EXIT_STARTUP;
1484 }
1485 // stdin/out/err now closed if not redirected
1486
1487 #endif // _WIN32
1488
1489 // No error, continue in main_worker()
1490 return -1;
1491 }
1492
1493 // create a PID file containing the current process id
1494 static bool write_pid_file()
1495 {
1496 if (!pid_file.empty()) {
1497 pid_t pid = getpid();
1498 mode_t old_umask;
1499 #ifndef __CYGWIN__
1500 old_umask = umask(0077); // rwx------
1501 #else
1502 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1503 old_umask = umask(0033); // rwxr--r--
1504 #endif
1505
1506 stdio_file f(pid_file.c_str(), "w");
1507 umask(old_umask);
1508 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1509 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1510 return false;
1511 }
1512 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1513 }
1514 return true;
1515 }
1516
1517 // Prints header identifying version of code and home
1518 static void PrintHead()
1519 {
1520 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1521 }
1522
1523 // prints help info for configuration file Directives
1524 static void Directives()
1525 {
1526 PrintOut(LOG_INFO,
1527 "Configuration file (%s) Directives (after device name):\n"
1528 " -d TYPE Set the device type: auto, ignore, removable,\n"
1529 " %s\n"
1530 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1531 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1532 " -S VAL Enable/disable attribute autosave (on/off)\n"
1533 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1534 " -H Monitor SMART Health Status, report if failed\n"
1535 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1536 " -l TYPE Monitor SMART log or self-test status:\n"
1537 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1538 " -l scterc,R,W Set SCT Error Recovery Control\n"
1539 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1540 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1541 " -f Monitor 'Usage' Attributes, report failures\n"
1542 " -m ADD Send email warning to address ADD\n"
1543 " -M TYPE Modify email warning behavior (see man page)\n"
1544 " -p Report changes in 'Prefailure' Attributes\n"
1545 " -u Report changes in 'Usage' Attributes\n"
1546 " -t Equivalent to -p and -u Directives\n"
1547 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1548 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1549 " -i ID Ignore Attribute ID for -f Directive\n"
1550 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1551 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1552 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1553 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1554 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1555 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1556 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1557 " -F TYPE Use firmware bug workaround:\n"
1558 " %s\n"
1559 " # Comment: text after a hash sign is ignored\n"
1560 " \\ Line continuation character\n"
1561 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1562 "Use ID = 0 to turn off -C and/or -U Directives\n"
1563 "Example: /dev/sda -a\n",
1564 configfile,
1565 smi()->get_valid_dev_types_str().c_str(),
1566 get_valid_firmwarebug_args());
1567 }
1568
1569 /* Returns a pointer to a static string containing a formatted list of the valid
1570 arguments to the option opt or NULL on failure. */
1571 static const char *GetValidArgList(char opt)
1572 {
1573 switch (opt) {
1574 case 'A':
1575 case 's':
1576 return "<PATH_PREFIX>";
1577 case 'B':
1578 return "[+]<FILE_NAME>";
1579 case 'c':
1580 return "<FILE_NAME>, -";
1581 case 'l':
1582 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1583 case 'q':
1584 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1585 case 'r':
1586 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1587 case 'p':
1588 case 'w':
1589 return "<FILE_NAME>";
1590 case 'i':
1591 return "<INTEGER_SECONDS>";
1592 default:
1593 return NULL;
1594 }
1595 }
1596
1597 /* prints help information for command syntax */
1598 static void Usage()
1599 {
1600 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1601 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1602 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1603 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1604 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1605 #endif
1606 PrintOut(LOG_INFO,"\n");
1607 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1608 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1609 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1610 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1611 PrintOut(LOG_INFO,"\n");
1612 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1613 #endif
1614 PrintOut(LOG_INFO,"]\n\n");
1615 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1616 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1617 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1618 #ifdef HAVE_LIBCAP_NG
1619 PrintOut(LOG_INFO," -C, --capabilities\n");
1620 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1621 " Warning: Mail notification does not work when used.\n\n");
1622 #endif
1623 PrintOut(LOG_INFO," -d, --debug\n");
1624 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1625 PrintOut(LOG_INFO," -D, --showdirectives\n");
1626 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1627 PrintOut(LOG_INFO," -h, --help, --usage\n");
1628 PrintOut(LOG_INFO," Display this help and exit\n\n");
1629 PrintOut(LOG_INFO," -i N, --interval=N\n");
1630 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1631 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1632 #ifndef _WIN32
1633 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1634 #else
1635 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1636 #endif
1637 #ifndef _WIN32
1638 PrintOut(LOG_INFO," -n, --no-fork\n");
1639 PrintOut(LOG_INFO," Do not fork into background\n");
1640 #ifdef HAVE_LIBSYSTEMD
1641 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1642 #endif // HAVE_LIBSYSTEMD
1643 PrintOut(LOG_INFO,"\n");
1644 #endif // WIN32
1645 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1646 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1647 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1648 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1649 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1650 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1651 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1652 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1653 #ifdef SMARTMONTOOLS_SAVESTATES
1654 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1655 #endif
1656 PrintOut(LOG_INFO,"\n");
1657 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1658 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1659 #ifndef _WIN32
1660 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1661 #else
1662 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1663 #endif
1664 #ifdef _WIN32
1665 PrintOut(LOG_INFO," --service\n");
1666 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1667 PrintOut(LOG_INFO," smartd install [options]\n");
1668 PrintOut(LOG_INFO," Remove service with:\n");
1669 PrintOut(LOG_INFO," smartd remove\n\n");
1670 #endif // _WIN32
1671 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1672 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1673 }
1674
1675 static int CloseDevice(smart_device * device, const char * name)
1676 {
1677 if (!device->close()){
1678 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1679 return 1;
1680 }
1681 // device successfully closed
1682 return 0;
1683 }
1684
1685 // return true if a char is not allowed in a state file name
1686 static bool not_allowed_in_filename(char c)
1687 {
1688 return !( ('0' <= c && c <= '9')
1689 || ('A' <= c && c <= 'Z')
1690 || ('a' <= c && c <= 'z'));
1691 }
1692
1693 // Read error count from Summary or Extended Comprehensive SMART error log
1694 // Return -1 on error
1695 static int read_ata_error_count(ata_device * device, const char * name,
1696 firmwarebug_defs firmwarebugs, bool extended)
1697 {
1698 if (!extended) {
1699 ata_smart_errorlog log;
1700 if (ataReadErrorLog(device, &log, firmwarebugs)){
1701 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1702 return -1;
1703 }
1704 return (log.error_log_pointer ? log.ata_error_count : 0);
1705 }
1706 else {
1707 ata_smart_exterrlog logx;
1708 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1709 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1710 return -1;
1711 }
1712 // Some disks use the reserved byte as index, see ataprint.cpp.
1713 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1714 }
1715 }
1716
1717 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1718 // error count, and top bits are the power-on hours of the last error.
1719 static int SelfTestErrorCount(ata_device * device, const char * name,
1720 firmwarebug_defs firmwarebugs)
1721 {
1722 struct ata_smart_selftestlog log;
1723
1724 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1725 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1726 return -1;
1727 }
1728
1729 if (!log.mostrecenttest)
1730 // No tests logged
1731 return 0;
1732
1733 // Count failed self-tests
1734 int errcnt = 0, hours = 0;
1735 for (int i = 20; i >= 0; i--) {
1736 int j = (i + log.mostrecenttest) % 21;
1737 const ata_smart_selftestlog_struct & entry = log.selftest_struct[j];
1738 if (!nonempty(&entry, sizeof(entry)))
1739 continue;
1740
1741 int status = entry.selfteststatus >> 4;
1742 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1743 // First successful extended self-test, stop count
1744 break;
1745
1746 if (0x3 <= status && status <= 0x8) {
1747 // Self-test showed an error
1748 errcnt++;
1749 // Keep track of time of most recent error
1750 if (!hours)
1751 hours = entry.timestamp;
1752 }
1753 }
1754
1755 return ((hours << 8) | errcnt);
1756 }
1757
1758 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1759 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1760
1761 // Check offline data collection status
1762 static inline bool is_offl_coll_in_progress(unsigned char status)
1763 {
1764 return ((status & 0x7f) == 0x03);
1765 }
1766
1767 // Check self-test execution status
1768 static inline bool is_self_test_in_progress(unsigned char status)
1769 {
1770 return ((status >> 4) == 0xf);
1771 }
1772
1773 // Log offline data collection status
1774 static void log_offline_data_coll_status(const char * name, unsigned char status)
1775 {
1776 const char * msg;
1777 switch (status & 0x7f) {
1778 case 0x00: msg = "was never started"; break;
1779 case 0x02: msg = "was completed without error"; break;
1780 case 0x03: msg = "is in progress"; break;
1781 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1782 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1783 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1784 default: msg = 0;
1785 }
1786
1787 if (msg)
1788 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1789 "Device: %s, offline data collection %s%s\n", name, msg,
1790 ((status & 0x80) ? " (auto:on)" : ""));
1791 else
1792 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1793 name, status);
1794 }
1795
1796 // Log self-test execution status
1797 static void log_self_test_exec_status(const char * name, unsigned char status)
1798 {
1799 const char * msg;
1800 switch (status >> 4) {
1801 case 0x0: msg = "completed without error"; break;
1802 case 0x1: msg = "was aborted by the host"; break;
1803 case 0x2: msg = "was interrupted by the host with a reset"; break;
1804 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1805 case 0x4: msg = "completed with error (unknown test element)"; break;
1806 case 0x5: msg = "completed with error (electrical test element)"; break;
1807 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1808 case 0x7: msg = "completed with error (read test element)"; break;
1809 case 0x8: msg = "completed with error (handling damage?)"; break;
1810 default: msg = 0;
1811 }
1812
1813 if (msg)
1814 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1815 "Device: %s, previous self-test %s\n", name, msg);
1816 else if ((status >> 4) == 0xf)
1817 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1818 name, status & 0x0f);
1819 else
1820 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1821 name, status);
1822 }
1823
1824 // Check pending sector count id (-C, -U directives).
1825 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1826 unsigned char id, const char * msg)
1827 {
1828 // Check attribute index
1829 int i = ata_find_attr_index(id, state.smartval);
1830 if (i < 0) {
1831 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1832 cfg.name.c_str(), msg, id);
1833 return false;
1834 }
1835
1836 // Check value
1837 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1838 cfg.attribute_defs);
1839 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1840 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1841 cfg.name.c_str(), msg, id, rawval, rawval);
1842 return false;
1843 }
1844
1845 return true;
1846 }
1847
1848 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1849 static void finish_device_scan(dev_config & cfg, dev_state & state)
1850 {
1851 // Set cfg.emailfreq if user hasn't set it
1852 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1853 // Avoid that emails are suppressed forever due to state persistence
1854 if (cfg.state_file.empty())
1855 cfg.emailfreq = 1; // '-M once'
1856 else
1857 cfg.emailfreq = 2; // '-M daily'
1858 }
1859
1860 // Start self-test regex check now if time was not read from state file
1861 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1862 state.scheduled_test_next_check = time(0);
1863 }
1864
1865 // Common function to format result message for ATA setting
1866 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1867 int set_option = 0, bool has_value = false)
1868 {
1869 if (!msg.empty())
1870 msg += ", ";
1871 msg += name;
1872 if (!ok)
1873 msg += ":--";
1874 else if (set_option < 0)
1875 msg += ":off";
1876 else if (has_value)
1877 msg += strprintf(":%d", set_option-1);
1878 else if (set_option > 0)
1879 msg += ":on";
1880 }
1881
1882 // Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1883 static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1884 {
1885 if (!cfg.id_is_unique)
1886 return false;
1887
1888 for (unsigned i = 0; i < prev_cfgs.size(); i++) {
1889 if (!prev_cfgs[i].id_is_unique)
1890 continue;
1891 if (cfg.dev_idinfo != prev_cfgs[i].dev_idinfo)
1892 continue;
1893
1894 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1895 cfg.dev_name.c_str(), prev_cfgs[i].dev_name.c_str());
1896 return true;
1897 }
1898
1899 return false;
1900 }
1901
1902 // TODO: Add '-F swapid' directive
1903 const bool fix_swapped_id = false;
1904
1905 // scan to see what ata devices there are, and if they support SMART
1906 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1907 const dev_config_vector * prev_cfgs)
1908 {
1909 int supported=0;
1910 struct ata_identify_device drive;
1911 const char *name = cfg.name.c_str();
1912 int retid;
1913
1914 // Device must be open
1915
1916 // Get drive identity structure
1917 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1918 if (retid<0)
1919 // Unable to read Identity structure
1920 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1921 else
1922 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1923 name, packetdevicetype(retid-1));
1924 CloseDevice(atadev, name);
1925 return 2;
1926 }
1927
1928 // Get drive identity, size and rotation rate (HDD/SSD)
1929 char model[40+1], serial[20+1], firmware[8+1];
1930 ata_format_id_string(model, drive.model, sizeof(model)-1);
1931 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1932 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1933
1934 ata_size_info sizes;
1935 ata_get_size_info(&drive, sizes);
1936 state.num_sectors = sizes.sectors;
1937 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1938
1939 char wwn[30]; wwn[0] = 0;
1940 unsigned oui = 0; uint64_t unique_id = 0;
1941 int naa = ata_get_wwn(&drive, oui, unique_id);
1942 if (naa >= 0)
1943 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1944
1945 // Format device id string for warning emails
1946 char cap[32];
1947 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1948 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1949 cfg.id_is_unique = true; // TODO: Check serial?
1950
1951 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1952
1953 // Check for duplicates
1954 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1955 CloseDevice(atadev, name);
1956 return 1;
1957 }
1958
1959 // Show if device in database, and use preset vendor attribute
1960 // options unless user has requested otherwise.
1961 if (cfg.ignorepresets)
1962 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1963 else {
1964 // Apply vendor specific presets, print warning if present
1965 const drive_settings * dbentry = lookup_drive_apply_presets(
1966 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1967 if (!dbentry)
1968 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1969 else {
1970 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1971 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1972 if (*dbentry->warningmsg)
1973 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1974 }
1975 }
1976
1977 // Check for ATA Security LOCK
1978 unsigned short word128 = drive.words088_255[128-88];
1979 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1980 if (locked)
1981 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1982
1983 // Set default '-C 197[+]' if no '-C ID' is specified.
1984 if (!cfg.curr_pending_set)
1985 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1986 // Set default '-U 198[+]' if no '-U ID' is specified.
1987 if (!cfg.offl_pending_set)
1988 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1989
1990 // If requested, show which presets would be used for this drive
1991 if (cfg.showpresets) {
1992 int savedebugmode=debugmode;
1993 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1994 if (!debugmode)
1995 debugmode=2;
1996 show_presets(&drive);
1997 debugmode=savedebugmode;
1998 }
1999
2000 // see if drive supports SMART
2001 supported=ataSmartSupport(&drive);
2002 if (supported!=1) {
2003 if (supported==0)
2004 // drive does NOT support SMART
2005 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2006 else
2007 // can't tell if drive supports SMART
2008 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2009
2010 // should we proceed anyway?
2011 if (cfg.permissive) {
2012 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2013 }
2014 else {
2015 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2016 CloseDevice(atadev, name);
2017 return 2;
2018 }
2019 }
2020
2021 if (ataEnableSmart(atadev)) {
2022 // Enable SMART command has failed
2023 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2024
2025 if (ataIsSmartEnabled(&drive) <= 0) {
2026 if (!cfg.permissive) {
2027 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2028 CloseDevice(atadev, name);
2029 return 2;
2030 }
2031 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2032 }
2033 else {
2034 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2035 }
2036 }
2037
2038 // disable device attribute autosave...
2039 if (cfg.autosave==1) {
2040 if (ataDisableAutoSave(atadev))
2041 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2042 else
2043 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2044 }
2045
2046 // or enable device attribute autosave
2047 if (cfg.autosave==2) {
2048 if (ataEnableAutoSave(atadev))
2049 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2050 else
2051 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2052 }
2053
2054 // capability check: SMART status
2055 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2056 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2057 cfg.smartcheck = false;
2058 }
2059
2060 // capability check: Read smart values and thresholds. Note that
2061 // smart values are ALSO needed even if we ONLY want to know if the
2062 // device is self-test log or error-log capable! After ATA-5, this
2063 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2064 // but sadly not for ATA-5. Sigh.
2065
2066 // do we need to get SMART data?
2067 bool smart_val_ok = false;
2068 if ( cfg.autoofflinetest || cfg.selftest
2069 || cfg.errorlog || cfg.xerrorlog
2070 || cfg.offlinests || cfg.selfteststs
2071 || cfg.usagefailed || cfg.prefail || cfg.usage
2072 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2073 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2074
2075 if (ataReadSmartValues(atadev, &state.smartval)) {
2076 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2077 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2078 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2079 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2080 }
2081 else {
2082 smart_val_ok = true;
2083 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2084 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2085 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2086 cfg.usagefailed = false;
2087 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2088 memset(&state.smartthres, 0, sizeof(state.smartthres));
2089 }
2090 }
2091
2092 // see if the necessary Attribute is there to monitor offline or
2093 // current pending sectors or temperature
2094 if ( cfg.curr_pending_id
2095 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2096 "Current_Pending_Sector"))
2097 cfg.curr_pending_id = 0;
2098
2099 if ( cfg.offl_pending_id
2100 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2101 "Offline_Uncorrectable"))
2102 cfg.offl_pending_id = 0;
2103
2104 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2105 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
2106 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2107 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2108 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2109 }
2110
2111 // Report ignored '-r' or '-R' directives
2112 for (int id = 1; id <= 255; id++) {
2113 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
2114 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2115 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2116 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2117
2118 int idx = ata_find_attr_index(id, state.smartval);
2119 if (idx < 0)
2120 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2121 else {
2122 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2123 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2124 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2125 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2126 }
2127 }
2128 }
2129 }
2130
2131 // enable/disable automatic on-line testing
2132 if (cfg.autoofflinetest) {
2133 // is this an enable or disable request?
2134 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2135 if (!smart_val_ok)
2136 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2137 else {
2138 // if command appears unsupported, issue a warning...
2139 if (!isSupportAutomaticTimer(&state.smartval))
2140 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2141 // ... but then try anyway
2142 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2143 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2144 else
2145 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2146 }
2147 }
2148
2149 // Read log directories if required for capability check
2150 ata_smart_log_directory smart_logdir, gp_logdir;
2151 bool smart_logdir_ok = false, gp_logdir_ok = false;
2152
2153 if ( isGeneralPurposeLoggingCapable(&drive)
2154 && (cfg.errorlog || cfg.selftest)
2155 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2156 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2157 smart_logdir_ok = true;
2158 }
2159
2160 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2161 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2162 gp_logdir_ok = true;
2163 }
2164
2165 // capability check: self-test-log
2166 state.selflogcount = 0; state.selfloghour = 0;
2167 if (cfg.selftest) {
2168 int retval;
2169 if (!( cfg.permissive
2170 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2171 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2172 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2173 cfg.selftest = false;
2174 }
2175 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2176 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2177 cfg.selftest = false;
2178 }
2179 else {
2180 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2181 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2182 }
2183 }
2184
2185 // capability check: ATA error log
2186 state.ataerrorcount = 0;
2187 if (cfg.errorlog) {
2188 int errcnt1;
2189 if (!( cfg.permissive
2190 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2191 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2192 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2193 cfg.errorlog = false;
2194 }
2195 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2196 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2197 cfg.errorlog = false;
2198 }
2199 else
2200 state.ataerrorcount = errcnt1;
2201 }
2202
2203 if (cfg.xerrorlog) {
2204 int errcnt2;
2205 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2206 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2207 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2208 name);
2209 cfg.xerrorlog = false;
2210 }
2211 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2212 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2213 cfg.xerrorlog = false;
2214 }
2215 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2216 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2217 name, state.ataerrorcount, errcnt2);
2218 // Record max error count
2219 if (errcnt2 > state.ataerrorcount)
2220 state.ataerrorcount = errcnt2;
2221 }
2222 else
2223 state.ataerrorcount = errcnt2;
2224 }
2225
2226 // capability check: self-test and offline data collection status
2227 if (cfg.offlinests || cfg.selfteststs) {
2228 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2229 if (cfg.offlinests)
2230 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2231 if (cfg.selfteststs)
2232 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2233 cfg.offlinests = cfg.selfteststs = false;
2234 }
2235 }
2236
2237 // capabilities check -- does it support powermode?
2238 if (cfg.powermode) {
2239 int powermode = ataCheckPowerMode(atadev);
2240
2241 if (-1 == powermode) {
2242 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2243 cfg.powermode=0;
2244 }
2245 else if (powermode!=0x00 && powermode!=0x01
2246 && powermode!=0x40 && powermode!=0x41
2247 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2248 && powermode!=0xff) {
2249 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2250 name, powermode);
2251 cfg.powermode=0;
2252 }
2253 }
2254
2255 // Apply ATA settings
2256 std::string msg;
2257
2258 if (cfg.set_aam)
2259 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2260 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2261 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2262
2263 if (cfg.set_apm)
2264 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2265 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2266 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2267
2268 if (cfg.set_lookahead)
2269 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2270 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2271 cfg.set_lookahead);
2272
2273 if (cfg.set_wcache)
2274 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2275 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2276
2277 if (cfg.set_dsn)
2278 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2279 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2280
2281 if (cfg.set_security_freeze)
2282 format_set_result_msg(msg, "Security freeze",
2283 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2284
2285 if (cfg.set_standby)
2286 format_set_result_msg(msg, "Standby",
2287 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2288
2289 // Report as one log entry
2290 if (!msg.empty())
2291 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2292
2293 // set SCT Error Recovery Control if requested
2294 if (cfg.sct_erc_set) {
2295 if (!isSCTErrorRecoveryControlCapable(&drive))
2296 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2297 name);
2298 else if (locked)
2299 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2300 name);
2301 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2302 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2303 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2304 else
2305 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2306 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2307 }
2308
2309 // If no tests available or selected, return
2310 if (!( cfg.smartcheck || cfg.selftest
2311 || cfg.errorlog || cfg.xerrorlog
2312 || cfg.offlinests || cfg.selfteststs
2313 || cfg.usagefailed || cfg.prefail || cfg.usage
2314 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2315 CloseDevice(atadev, name);
2316 return 3;
2317 }
2318
2319 // tell user we are registering device
2320 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2321
2322 // close file descriptor
2323 CloseDevice(atadev, name);
2324
2325 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2326 // Build file name for state file
2327 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2328 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2329 if (!state_path_prefix.empty()) {
2330 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2331 // Read previous state
2332 if (read_dev_state(cfg.state_file.c_str(), state)) {
2333 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2334 // Copy ATA attribute values to temp state
2335 state.update_temp_state();
2336 }
2337 }
2338 if (!attrlog_path_prefix.empty())
2339 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2340 }
2341
2342 finish_device_scan(cfg, state);
2343
2344 return 0;
2345 }
2346
2347 // on success, return 0. On failure, return >0. Never return <0,
2348 // please.
2349 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2350 const dev_config_vector * prev_cfgs)
2351 {
2352 int err, req_len, avail_len, version, len;
2353 const char *device = cfg.name.c_str();
2354 struct scsi_iec_mode_page iec;
2355 uint8_t tBuf[64];
2356 uint8_t inqBuf[96];
2357 uint8_t vpdBuf[252];
2358 char lu_id[64], serial[256], vendor[40], model[40];
2359
2360 // Device must be open
2361 memset(inqBuf, 0, 96);
2362 req_len = 36;
2363 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2364 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2365 req_len = 64;
2366 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2367 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2368 "skip device\n", device);
2369 return 2;
2370 }
2371 }
2372 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2373
2374 avail_len = inqBuf[4] + 5;
2375 len = (avail_len < req_len) ? avail_len : req_len;
2376 if (len < 36) {
2377 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2378 "skip device\n", device);
2379 return 2;
2380 }
2381
2382 int pdt = inqBuf[0] & 0x1f;
2383
2384 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2385 (0xe == pdt))) {
2386 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2387 "skip\n", device, pdt);
2388 return 2;
2389 }
2390
2391 if (supported_vpd_pages_p) {
2392 delete supported_vpd_pages_p;
2393 supported_vpd_pages_p = NULL;
2394 }
2395 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2396
2397 lu_id[0] = '\0';
2398 if ((version >= 0x3) && (version < 0x8)) {
2399 /* SPC to SPC-5 */
2400 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2401 vpdBuf, sizeof(vpdBuf))) {
2402 len = vpdBuf[3];
2403 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2404 }
2405 }
2406 serial[0] = '\0';
2407 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2408 vpdBuf, sizeof(vpdBuf))) {
2409 len = vpdBuf[3];
2410 vpdBuf[4 + len] = '\0';
2411 scsi_format_id_string(serial, &vpdBuf[4], len);
2412 }
2413
2414 char si_str[64];
2415 struct scsi_readcap_resp srr;
2416 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2417
2418 if (capacity)
2419 format_capacity(si_str, sizeof(si_str), capacity, ".");
2420 else
2421 si_str[0] = '\0';
2422
2423 // Format device id string for warning emails
2424 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2425 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2426 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2427 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2428 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2429 cfg.id_is_unique = (lu_id[0] || serial[0]);
2430
2431 // format "model" string
2432 scsi_format_id_string(vendor, &inqBuf[8], 8);
2433 scsi_format_id_string(model, &inqBuf[16], 16);
2434 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2435
2436 // Check for duplicates
2437 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2438 CloseDevice(scsidev, device);
2439 return 1;
2440 }
2441
2442 // check that device is ready for commands. IE stores its stuff on
2443 // the media.
2444 if ((err = scsiTestUnitReady(scsidev))) {
2445 if (SIMPLE_ERR_NOT_READY == err)
2446 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2447 else if (SIMPLE_ERR_NO_MEDIUM == err)
2448 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2449 else if (SIMPLE_ERR_BECOMING_READY == err)
2450 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2451 else
2452 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2453 CloseDevice(scsidev, device);
2454 return 2;
2455 }
2456
2457 // Badly-conforming USB storage devices may fail this check.
2458 // The response to the following IE mode page fetch (current and
2459 // changeable values) is carefully examined. It has been found
2460 // that various USB devices that malform the response will lock up
2461 // if asked for a log page (e.g. temperature) so it is best to
2462 // bail out now.
2463 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2464 state.modese_len = iec.modese_len;
2465 else if (SIMPLE_ERR_BAD_FIELD == err)
2466 ; /* continue since it is reasonable not to support IE mpage */
2467 else { /* any other error (including malformed response) unreasonable */
2468 PrintOut(LOG_INFO,
2469 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2470 device, err);
2471 CloseDevice(scsidev, device);
2472 return 3;
2473 }
2474
2475 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2476 // smart if it is off). This may change to be the same as the ATA side.
2477 if (!scsi_IsExceptionControlEnabled(&iec)) {
2478 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2479 "Try 'smartctl -s on %s' to turn on SMART features\n",
2480 device, device);
2481 CloseDevice(scsidev, device);
2482 return 3;
2483 }
2484
2485 // Flag that certain log pages are supported (information may be
2486 // available from other sources).
2487 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2488 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2489 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2490 {
2491 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2492 switch (tBuf[k]) {
2493 case TEMPERATURE_LPAGE:
2494 state.TempPageSupported = 1;
2495 break;
2496 case IE_LPAGE:
2497 state.SmartPageSupported = 1;
2498 break;
2499 case READ_ERROR_COUNTER_LPAGE:
2500 state.ReadECounterPageSupported = 1;
2501 break;
2502 case WRITE_ERROR_COUNTER_LPAGE:
2503 state.WriteECounterPageSupported = 1;
2504 break;
2505 case VERIFY_ERROR_COUNTER_LPAGE:
2506 state.VerifyECounterPageSupported = 1;
2507 break;
2508 case NON_MEDIUM_ERROR_LPAGE:
2509 state.NonMediumErrorPageSupported = 1;
2510 break;
2511 default:
2512 break;
2513 }
2514 }
2515 }
2516
2517 // Check if scsiCheckIE() is going to work
2518 {
2519 uint8_t asc = 0;
2520 uint8_t ascq = 0;
2521 uint8_t currenttemp = 0;
2522 uint8_t triptemp = 0;
2523
2524 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2525 &asc, &ascq, &currenttemp, &triptemp)) {
2526 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2527 state.SuppressReport = 1;
2528 }
2529 if ( (state.SuppressReport || !currenttemp)
2530 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2531 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2532 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2533 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2534 }
2535 }
2536
2537 // capability check: self-test-log
2538 if (cfg.selftest){
2539 int retval = scsiCountFailedSelfTests(scsidev, 0);
2540 if (retval<0) {
2541 // no self-test log, turn off monitoring
2542 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2543 cfg.selftest = false;
2544 state.selflogcount = 0;
2545 state.selfloghour = 0;
2546 }
2547 else {
2548 // register starting values to watch for changes
2549 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2550 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2551 }
2552 }
2553
2554 // disable autosave (set GLTSD bit)
2555 if (cfg.autosave==1){
2556 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2557 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2558 else
2559 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2560 }
2561
2562 // or enable autosave (clear GLTSD bit)
2563 if (cfg.autosave==2){
2564 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2565 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2566 else
2567 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2568 }
2569
2570 // tell user we are registering device
2571 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2572
2573 // Make sure that init_standby_check() ignores SCSI devices
2574 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2575
2576 // close file descriptor
2577 CloseDevice(scsidev, device);
2578
2579 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2580 // Build file name for state file
2581 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2582 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2583 if (!state_path_prefix.empty()) {
2584 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2585 // Read previous state
2586 if (read_dev_state(cfg.state_file.c_str(), state)) {
2587 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2588 // Copy ATA attribute values to temp state
2589 state.update_temp_state();
2590 }
2591 }
2592 if (!attrlog_path_prefix.empty())
2593 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2594 }
2595
2596 finish_device_scan(cfg, state);
2597
2598 return 0;
2599 }
2600
2601 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2602 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2603 {
2604 for (int i = 8; i < 16; i++) {
2605 if (val[i])
2606 return ~(uint64_t)0;
2607 }
2608 uint64_t lo = val[7];
2609 for (int i = 7-1; i >= 0; i--) {
2610 lo <<= 8; lo += val[i];
2611 }
2612 return lo;
2613 }
2614
2615 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2616 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2617 {
2618 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2619 for (int i = 0; i < 8; i++) {
2620 if (smart_log.temp_sensor[i] > k)
2621 k = smart_log.temp_sensor[i];
2622 }
2623 return k;
2624 }
2625
2626 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2627 const dev_config_vector * prev_cfgs)
2628 {
2629 const char *name = cfg.name.c_str();
2630
2631 // Device must be open
2632
2633 // Get ID Controller
2634 nvme_id_ctrl id_ctrl;
2635 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2636 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2637 CloseDevice(nvmedev, name);
2638 return 2;
2639 }
2640
2641 // Get drive identity
2642 char model[40+1], serial[20+1], firmware[8+1];
2643 format_char_array(model, id_ctrl.mn);
2644 format_char_array(serial, id_ctrl.sn);
2645 format_char_array(firmware, id_ctrl.fr);
2646
2647 // Format device id string for warning emails
2648 char nsstr[32] = "", capstr[32] = "";
2649 unsigned nsid = nvmedev->get_nsid();
2650 if (nsid != 0xffffffff)
2651 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2652 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2653 if (capacity)
2654 format_capacity(capstr, sizeof(capstr), capacity, ".");
2655 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2656 nsstr, (capstr[0] ? ", " : ""), capstr);
2657 cfg.id_is_unique = true; // TODO: Check serial?
2658
2659 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2660
2661 // Check for duplicates
2662 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2663 CloseDevice(nvmedev, name);
2664 return 1;
2665 }
2666
2667 // Read SMART/Health log
2668 nvme_smart_log smart_log;
2669 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2670 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2671 CloseDevice(nvmedev, name);
2672 return 2;
2673 }
2674
2675 // Check temperature sensor support
2676 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2677 if (!nvme_get_max_temp_kelvin(smart_log)) {
2678 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2679 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2680 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2681 }
2682 }
2683
2684 // Init total error count
2685 if (cfg.errorlog || cfg.xerrorlog) {
2686 state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2687 }
2688
2689 // If no supported tests selected, return
2690 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2691 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2692 CloseDevice(nvmedev, name);
2693 return 3;
2694 }
2695
2696 // Tell user we are registering device
2697 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2698
2699 // Make sure that init_standby_check() ignores NVMe devices
2700 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2701
2702 CloseDevice(nvmedev, name);
2703
2704 if (!state_path_prefix.empty()) {
2705 // Build file name for state file
2706 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2707 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2708 nsstr[0] = 0;
2709 if (nsid != 0xffffffff)
2710 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2711 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2712 // Read previous state
2713 if (read_dev_state(cfg.state_file.c_str(), state))
2714 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2715 }
2716
2717 finish_device_scan(cfg, state);
2718
2719 return 0;
2720 }
2721
2722 // Open device for next check, return false on error
2723 static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2724 const char * type)
2725 {
2726 const char * name = cfg.name.c_str();
2727
2728 // If user has asked, test the email warning system
2729 if (cfg.emailtest)
2730 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2731
2732 // User may have requested (with the -n Directive) to leave the disk
2733 // alone if it is in idle or standby mode. In this case check the
2734 // power mode first before opening the device for full access,
2735 // and exit without check if disk is reported in standby.
2736 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2737 // Note that 'is_powered_down()' handles opening the device itself, and
2738 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2739 if (device->is_powered_down())
2740 {
2741 // skip at most powerskipmax checks
2742 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2743 // report first only except if state has changed, avoid waking up system disk
2744 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2745 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2746 state.lastpowermodeskipped = -1;
2747 }
2748 state.powerskipcnt++;
2749 return false;
2750 }
2751 }
2752 }
2753
2754 // if we can't open device, fail gracefully rather than hard --
2755 // perhaps the next time around we'll be able to open it
2756 if (!device->open()) {
2757 // For removable devices, print error message only once and suppress email
2758 if (!cfg.removable) {
2759 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2760 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2761 }
2762 else if (!state.removed) {
2763 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2764 state.removed = true;
2765 }
2766 else if (debugmode)
2767 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2768 return false;
2769 }
2770
2771 if (debugmode)
2772 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2773
2774 if (!cfg.removable)
2775 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2776 else if (state.removed) {
2777 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2778 state.removed = false;
2779 }
2780
2781 return true;
2782 }
2783
2784 // If the self-test log has got more self-test errors (or more recent
2785 // self-test errors) recorded, then notify user.
2786 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2787 {
2788 const char * name = cfg.name.c_str();
2789
2790 if (newi<0)
2791 // command failed
2792 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2793 else {
2794 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2795
2796 // old and new error counts
2797 int oldc=state.selflogcount;
2798 int newc=SELFTEST_ERRORCOUNT(newi);
2799
2800 // old and new error timestamps in hours
2801 int oldh=state.selfloghour;
2802 int newh=SELFTEST_ERRORHOURS(newi);
2803
2804 if (oldc<newc) {
2805 // increase in error count
2806 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2807 name, oldc, newc);
2808 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2809 name, oldc, newc);
2810 state.must_write = true;
2811 }
2812 else if (newc > 0 && oldh != newh) {
2813 // more recent error
2814 // a 'more recent' error might actually be a smaller hour number,
2815 // if the hour number has wrapped.
2816 // There's still a bug here. You might just happen to run a new test
2817 // exactly 32768 hours after the previous failure, and have run exactly
2818 // 20 tests between the two, in which case smartd will miss the
2819 // new failure.
2820 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2821 name, newh);
2822 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2823 name, newh);
2824 state.must_write = true;
2825 }
2826
2827 // Print info if error entries have disappeared
2828 // or newer successful successful extended self-test exits
2829 if (oldc > newc) {
2830 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2831 name, oldc, newc);
2832 if (newc == 0)
2833 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2834 }
2835
2836 // Needed since self-test error count may DECREASE. Hour might
2837 // also have changed.
2838 state.selflogcount= newc;
2839 state.selfloghour = newh;
2840 }
2841 return;
2842 }
2843
2844 // Test types, ordered by priority.
2845 static const char test_type_chars[] = "LncrSCO";
2846 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2847
2848 // returns test type if time to do test of type testtype,
2849 // 0 if not time to do test.
2850 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2851 {
2852 // check that self-testing has been requested
2853 if (cfg.test_regex.empty())
2854 return 0;
2855
2856 // Exit if drive not capable of any test
2857 if ( state.not_cap_long && state.not_cap_short &&
2858 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2859 return 0;
2860
2861 // since we are about to call localtime(), be sure glibc is informed
2862 // of any timezone changes we make.
2863 if (!usetime)
2864 FixGlibcTimeZoneBug();
2865
2866 // Is it time for next check?
2867 time_t now = (!usetime ? time(0) : usetime);
2868 if (now < state.scheduled_test_next_check)
2869 return 0;
2870
2871 // Limit time check interval to 90 days
2872 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2873 state.scheduled_test_next_check = now - (3600L*24*90);
2874
2875 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2876 char testtype = 0;
2877 time_t testtime = 0; int testhour = 0;
2878 int maxtest = num_test_types-1;
2879
2880 for (time_t t = state.scheduled_test_next_check; ; ) {
2881 struct tm * tms = localtime(&t);
2882 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2883 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2884 for (int i = 0; i <= maxtest; i++) {
2885 // Skip if drive not capable of this test
2886 switch (test_type_chars[i]) {
2887 case 'L': if (state.not_cap_long) continue; break;
2888 case 'S': if (state.not_cap_short) continue; break;
2889 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2890 case 'O': if (scsi || state.not_cap_offline) continue; break;
2891 case 'c': case 'n':
2892 case 'r': if (scsi || state.not_cap_selective) continue; break;
2893 default: continue;
2894 }
2895 // Try match of "T/MM/DD/d/HH"
2896 char pattern[16];
2897 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2898 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2899 if (cfg.test_regex.full_match(pattern)) {
2900 // Test found
2901 testtype = pattern[0];
2902 testtime = t; testhour = tms->tm_hour;
2903 // Limit further matches to higher priority self-tests
2904 maxtest = i-1;
2905 break;
2906 }
2907 }
2908 // Exit if no tests left or current time reached
2909 if (maxtest < 0)
2910 break;
2911 if (t >= now)
2912 break;
2913 // Check next hour
2914 if ((t += 3600) > now)
2915 t = now;
2916 }
2917
2918 // Do next check not before next hour.
2919 struct tm * tmnow = localtime(&now);
2920 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2921
2922 if (testtype) {
2923 state.must_write = true;
2924 // Tell user if an old test was found.
2925 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2926 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2927 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2928 cfg.name.c_str(), testtype, datebuf);
2929 }
2930 }
2931
2932 return testtype;
2933 }
2934
2935 // Print a list of future tests.
2936 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2937 {
2938 unsigned numdev = configs.size();
2939 if (!numdev)
2940 return;
2941 std::vector<int> testcnts(numdev * num_test_types, 0);
2942
2943 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2944
2945 // FixGlibcTimeZoneBug(); // done in PrintOut()
2946 time_t now = time(0);
2947 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2948 dateandtimezoneepoch(datenow, now);
2949
2950 long seconds;
2951 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2952 // Check for each device whether a test will be run
2953 time_t testtime = now + seconds;
2954 for (unsigned i = 0; i < numdev; i++) {
2955 const dev_config & cfg = configs.at(i);
2956 dev_state & state = states.at(i);
2957 const char * p;
2958 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2959 if (testtype && (p = strchr(test_type_chars, testtype))) {
2960 unsigned t = (p - test_type_chars);
2961 // Report at most 5 tests of each type
2962 if (++testcnts[i*num_test_types + t] <= 5) {
2963 dateandtimezoneepoch(date, testtime);
2964 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2965 testcnts[i*num_test_types + t], testtype, date);
2966 }
2967 }
2968 }
2969 }
2970
2971 // Report totals
2972 dateandtimezoneepoch(date, now+seconds);
2973 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2974 for (unsigned i = 0; i < numdev; i++) {
2975 const dev_config & cfg = configs.at(i);
2976 bool scsi = devices.at(i)->is_scsi();
2977 for (unsigned t = 0; t < num_test_types; t++) {
2978 int cnt = testcnts[i*num_test_types + t];
2979 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2980 continue;
2981 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2982 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2983 }
2984 }
2985
2986 }
2987
2988 // Return zero on success, nonzero on failure. Perform offline (background)
2989 // short or long (extended) self test on given scsi device.
2990 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2991 {
2992 int retval = 0;
2993 const char *testname = 0;
2994 const char *name = cfg.name.c_str();
2995 int inProgress;
2996
2997 if (scsiSelfTestInProgress(device, &inProgress)) {
2998 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2999 state.not_cap_short = state.not_cap_long = true;
3000 return 1;
3001 }
3002
3003 if (1 == inProgress) {
3004 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3005 "progress.\n", name);
3006 return 1;
3007 }
3008
3009 switch (testtype) {
3010 case 'S':
3011 testname = "Short Self";
3012 retval = scsiSmartShortSelfTest(device);
3013 break;
3014 case 'L':
3015 testname = "Long Self";
3016 retval = scsiSmartExtendSelfTest(device);
3017 break;
3018 }
3019 // If we can't do the test, exit
3020 if (NULL == testname) {
3021 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3022 testtype);
3023 return 1;
3024 }
3025 if (retval) {
3026 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3027 (SIMPLE_ERR_BAD_FIELD == retval)) {
3028 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3029 testname);
3030 if ('L'==testtype)
3031 state.not_cap_long = true;
3032 else
3033 state.not_cap_short = true;
3034
3035 return 1;
3036 }
3037 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3038 testname, retval);
3039 return 1;
3040 }
3041
3042 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3043
3044 return 0;
3045 }
3046
3047 // Do an offline immediate or self-test. Return zero on success,
3048 // nonzero on failure.
3049 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3050 {
3051 const char *name = cfg.name.c_str();
3052
3053 // Read current smart data and check status/capability
3054 struct ata_smart_values data;
3055 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3056 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3057 return 1;
3058 }
3059
3060 // Check for capability to do the test
3061 int dotest = -1, mode = 0;
3062 const char *testname = 0;
3063 switch (testtype) {
3064 case 'O':
3065 testname="Offline Immediate ";
3066 if (isSupportExecuteOfflineImmediate(&data))
3067 dotest=OFFLINE_FULL_SCAN;
3068 else
3069 state.not_cap_offline = true;
3070 break;
3071 case 'C':
3072 testname="Conveyance Self-";
3073 if (isSupportConveyanceSelfTest(&data))
3074 dotest=CONVEYANCE_SELF_TEST;
3075 else
3076 state.not_cap_conveyance = true;
3077 break;
3078 case 'S':
3079 testname="Short Self-";
3080 if (isSupportSelfTest(&data))
3081 dotest=SHORT_SELF_TEST;
3082 else
3083 state.not_cap_short = true;
3084 break;
3085 case 'L':
3086 testname="Long Self-";
3087 if (isSupportSelfTest(&data))
3088 dotest=EXTEND_SELF_TEST;
3089 else
3090 state.not_cap_long = true;
3091 break;
3092
3093 case 'c': case 'n': case 'r':
3094 testname = "Selective Self-";
3095 if (isSupportSelectiveSelfTest(&data)) {
3096 dotest = SELECTIVE_SELF_TEST;
3097 switch (testtype) {
3098 case 'c': mode = SEL_CONT; break;
3099 case 'n': mode = SEL_NEXT; break;
3100 case 'r': mode = SEL_REDO; break;
3101 }
3102 }
3103 else
3104 state.not_cap_selective = true;
3105 break;
3106 }
3107
3108 // If we can't do the test, exit
3109 if (dotest<0) {
3110 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3111 return 1;
3112 }
3113
3114 // If currently running a self-test, do not interrupt it to start another.
3115 if (15==(data.self_test_exec_status >> 4)) {
3116 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3117 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3118 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3119 } else {
3120 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3121 name, testname, (int)(data.self_test_exec_status & 0x0f));
3122 return 1;
3123 }
3124 }
3125
3126 if (dotest == SELECTIVE_SELF_TEST) {
3127 // Set test span
3128 ata_selective_selftest_args selargs, prev_args;
3129 selargs.num_spans = 1;
3130 selargs.span[0].mode = mode;
3131 prev_args.num_spans = 1;
3132 prev_args.span[0].start = state.selective_test_last_start;
3133 prev_args.span[0].end = state.selective_test_last_end;
3134 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3135 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3136 return 1;
3137 }
3138 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3139 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3140 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3141 start, end, end - start + 1,
3142 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3143 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3144 state.selective_test_last_start = start;
3145 state.selective_test_last_end = end;
3146 }
3147
3148 // execute the test, and return status
3149 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
3150 if (retval) {
3151 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3152 return retval;
3153 }
3154
3155 // Report recent test start to do_disable_standby_check()
3156 // and force log of next test status
3157 if (testtype == 'O')
3158 state.offline_started = true;
3159 else
3160 state.selftest_started = true;
3161
3162 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3163 return 0;
3164 }
3165
3166 // Check pending sector count attribute values (-C, -U directives).
3167 static void check_pending(const dev_config & cfg, dev_state & state,
3168 unsigned char id, bool increase_only,
3169 const ata_smart_values & smartval,
3170 int mailtype, const char * msg)
3171 {
3172 // Find attribute index
3173 int i = ata_find_attr_index(id, smartval);
3174 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3175 return;
3176
3177 // No report if no sectors pending.
3178 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3179 if (rawval == 0) {
3180 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3181 return;
3182 }
3183
3184 // If attribute is not reset, report only sector count increases.
3185 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3186 if (!(!increase_only || prev_rawval < rawval))
3187 return;
3188
3189 // Format message.
3190 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3191 if (prev_rawval > 0 && rawval != prev_rawval)
3192 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3193
3194 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3195 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3196 state.must_write = true;
3197 }
3198
3199 // Format Temperature value
3200 static const char * fmt_temp(unsigned char x, char (& buf)[20])
3201 {
3202 if (!x) // unset
3203 return "??";
3204 snprintf(buf, sizeof(buf), "%u", x);
3205 return buf;
3206 }
3207
3208 // Check Temperature limits
3209 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3210 {
3211 if (!(0 < currtemp && currtemp < 255)) {
3212 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3213 return;
3214 }
3215
3216 // Update Max Temperature
3217 const char * minchg = "", * maxchg = "";
3218 if (currtemp > state.tempmax) {
3219 if (state.tempmax)
3220 maxchg = "!";
3221 state.tempmax = currtemp;
3222 state.must_write = true;
3223 }
3224
3225 char buf[20];
3226 if (!state.temperature) {
3227 // First check
3228 if (!state.tempmin || currtemp < state.tempmin)
3229 // Delay Min Temperature update by ~ 30 minutes.
3230 state.tempmin_delay = time(0) + CHECKTIME - 60;
3231 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3232 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3233 if (triptemp)
3234 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3235 state.temperature = currtemp;
3236 }
3237 else {
3238 if (state.tempmin_delay) {
3239 // End Min Temperature update delay if ...
3240 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3241 || (state.tempmin_delay <= time(0))) { // or delay time is over.
3242 state.tempmin_delay = 0;
3243 if (!state.tempmin)
3244 state.tempmin = 255;
3245 }
3246 }
3247
3248 // Update Min Temperature
3249 if (!state.tempmin_delay && currtemp < state.tempmin) {
3250 state.tempmin = currtemp;
3251 state.must_write = true;
3252 if (currtemp != state.temperature)
3253 minchg = "!";
3254 }
3255
3256 // Track changes
3257 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3258 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3259 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3260 state.temperature = currtemp;
3261 }
3262 }
3263
3264 // Check limits
3265 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3266 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3267 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3268 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3269 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3270 }
3271 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3272 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3273 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3274 }
3275 else if (cfg.tempcrit) {
3276 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3277 if (currtemp < limit)
3278 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3279 }
3280 }
3281
3282 // Check normalized and raw attribute values.
3283 static void check_attribute(const dev_config & cfg, dev_state & state,
3284 const ata_smart_attribute & attr,
3285 const ata_smart_attribute & prev,
3286 int attridx,
3287 const ata_smart_threshold_entry * thresholds)
3288 {
3289 // Check attribute and threshold
3290 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3291 if (attrstate == ATTRSTATE_NON_EXISTING)
3292 return;
3293
3294 // If requested, check for usage attributes that have failed.
3295 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3296 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3297 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3298 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3299 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3300 state.must_write = true;
3301 }
3302
3303 // Return if we're not tracking this type of attribute
3304 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3305 if (!( ( prefail && cfg.prefail)
3306 || (!prefail && cfg.usage )))
3307 return;
3308
3309 // Return if '-I ID' was specified
3310 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3311 return;
3312
3313 // Issue warning if they don't have the same ID in all structures.
3314 if (attr.id != prev.id) {
3315 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3316 cfg.name.c_str(), attr.id, prev.id);
3317 return;
3318 }
3319
3320 // Compare normalized values if valid.
3321 bool valchanged = false;
3322 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3323 if (attr.current != prev.current)
3324 valchanged = true;
3325 }
3326
3327 // Compare raw values if requested.
3328 bool rawchanged = false;
3329 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3330 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3331 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3332 rawchanged = true;
3333 }
3334
3335 // Return if no change
3336 if (!(valchanged || rawchanged))
3337 return;
3338
3339 // Format value strings
3340 std::string currstr, prevstr;
3341 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3342 // Print raw values only
3343 currstr = strprintf("%s (Raw)",
3344 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3345 prevstr = strprintf("%s (Raw)",
3346 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3347 }
3348 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3349 // Print normalized and raw values
3350 currstr = strprintf("%d [Raw %s]", attr.current,
3351 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3352 prevstr = strprintf("%d [Raw %s]", prev.current,
3353 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3354 }
3355 else {
3356 // Print normalized values only
3357 currstr = strprintf("%d", attr.current);
3358 prevstr = strprintf("%d", prev.current);
3359 }
3360
3361 // Format message
3362 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3363 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3364 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3365 prevstr.c_str(), currstr.c_str());
3366
3367 // Report this change as critical ?
3368 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3369 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3370 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3371 MailWarning(cfg, state, 2, "%s", msg.c_str());
3372 }
3373 else {
3374 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3375 }
3376 state.must_write = true;
3377 }
3378
3379
3380 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3381 bool firstpass, bool allow_selftests)
3382 {
3383 if (!open_device(cfg, state, atadev, "ATA"))
3384 return 1;
3385
3386 const char * name = cfg.name.c_str();
3387
3388 // user may have requested (with the -n Directive) to leave the disk
3389 // alone if it is in idle or sleeping mode. In this case check the
3390 // power mode and exit without check if needed
3391 if (cfg.powermode && !state.powermodefail) {
3392 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3393 const char * mode = 0;
3394 if (0 <= powermode && powermode < 0xff) {
3395 // wait for possible spin up and check again
3396 int powermode2;
3397 sleep(5);
3398 powermode2 = ataCheckPowerMode(atadev);
3399 if (powermode2 > powermode)
3400 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3401 powermode = powermode2;
3402 }
3403
3404 switch (powermode){
3405 case -1:
3406 // SLEEP
3407 mode="SLEEP";
3408 if (cfg.powermode>=1)
3409 dontcheck=1;
3410 break;
3411 case 0x00:
3412 // STANDBY
3413 mode="STANDBY";
3414 if (cfg.powermode>=2)
3415 dontcheck=1;
3416 break;
3417 case 0x01:
3418 // STANDBY_Y
3419 mode="STANDBY_Y";
3420 if (cfg.powermode>=2)
3421 dontcheck=1;
3422 break;
3423 case 0x80:
3424 // IDLE
3425 mode="IDLE";
3426 if (cfg.powermode>=3)
3427 dontcheck=1;
3428 break;
3429 case 0x81:
3430 // IDLE_A
3431 mode="IDLE_A";
3432 if (cfg.powermode>=3)
3433 dontcheck=1;
3434 break;
3435 case 0x82:
3436 // IDLE_B
3437 mode="IDLE_B";
3438 if (cfg.powermode>=3)
3439 dontcheck=1;
3440 break;
3441 case 0x83:
3442 // IDLE_C
3443 mode="IDLE_C";
3444 if (cfg.powermode>=3)
3445 dontcheck=1;
3446 break;
3447 case 0xff:
3448 // ACTIVE/IDLE
3449 case 0x40:
3450 // ACTIVE
3451 case 0x41:
3452 // ACTIVE
3453 mode="ACTIVE or IDLE";
3454 break;
3455 default:
3456 // UNKNOWN
3457 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3458 name, powermode);
3459 state.powermodefail = true;
3460 break;
3461 }
3462
3463 // if we are going to skip a check, return now
3464 if (dontcheck){
3465 // skip at most powerskipmax checks
3466 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3467 CloseDevice(atadev, name);
3468 // report first only except if state has changed, avoid waking up system disk
3469 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3470 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3471 state.lastpowermodeskipped = powermode;
3472 }
3473 state.powerskipcnt++;
3474 return 0;
3475 }
3476 else {
3477 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3478 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3479 }
3480 state.powerskipcnt = 0;
3481 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3482 }
3483 else if (state.powerskipcnt) {
3484 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3485 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3486 state.powerskipcnt = 0;
3487 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3488 }
3489 }
3490
3491 // check smart status
3492 if (cfg.smartcheck) {
3493 int status=ataSmartStatus2(atadev);
3494 if (status==-1){
3495 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3496 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3497 state.must_write = true;
3498 }
3499 else if (status==1){
3500 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3501 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3502 state.must_write = true;
3503 }
3504 }
3505
3506 // Check everything that depends upon SMART Data (eg, Attribute values)
3507 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3508 || cfg.curr_pending_id || cfg.offl_pending_id
3509 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3510 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3511
3512 // Read current attribute values.
3513 ata_smart_values curval;
3514 if (ataReadSmartValues(atadev, &curval)){
3515 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3516 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3517 state.must_write = true;
3518 }
3519 else {
3520 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3521
3522 // look for current or offline pending sectors
3523 if (cfg.curr_pending_id)
3524 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3525 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3526 : "Total unreadable (pending) sectors" ));
3527
3528 if (cfg.offl_pending_id)
3529 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3530 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3531 : "Total offline uncorrectable sectors"));
3532
3533 // check temperature limits
3534 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3535 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3536
3537 // look for failed usage attributes, or track usage or prefail attributes
3538 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3539 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3540 check_attribute(cfg, state,
3541 curval.vendor_attributes[i],
3542 state.smartval.vendor_attributes[i],
3543 i, state.smartthres.thres_entries);
3544 }
3545 }
3546
3547 // Log changes of offline data collection status
3548 if (cfg.offlinests) {
3549 if ( curval.offline_data_collection_status
3550 != state.smartval.offline_data_collection_status
3551 || state.offline_started // test was started in previous call
3552 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3553 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3554 }
3555
3556 // Log changes of self-test execution status
3557 if (cfg.selfteststs) {
3558 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3559 || state.selftest_started // test was started in previous call
3560 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3561 log_self_test_exec_status(name, curval.self_test_exec_status);
3562 }
3563
3564 // Save the new values for the next time around
3565 state.smartval = curval;
3566 }
3567 }
3568 state.offline_started = state.selftest_started = false;
3569
3570 // check if number of selftest errors has increased (note: may also DECREASE)
3571 if (cfg.selftest)
3572 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3573
3574 // check if number of ATA errors has increased
3575 if (cfg.errorlog || cfg.xerrorlog) {
3576
3577 int errcnt1 = -1, errcnt2 = -1;
3578 if (cfg.errorlog)
3579 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3580 if (cfg.xerrorlog)
3581 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3582
3583 // new number of errors is max of both logs
3584 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3585
3586 // did command fail?
3587 if (newc<0)
3588 // lack of PrintOut here is INTENTIONAL
3589 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3590
3591 // has error count increased?
3592 int oldc = state.ataerrorcount;
3593 if (newc>oldc){
3594 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3595 name, oldc, newc);
3596 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3597 name, oldc, newc);
3598 state.must_write = true;
3599 }
3600
3601 if (newc>=0)
3602 state.ataerrorcount=newc;
3603 }
3604
3605 // if the user has asked, and device is capable (or we're not yet
3606 // sure) check whether a self test should be done now.
3607 if (allow_selftests && !cfg.test_regex.empty()) {
3608 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3609 if (testtype)
3610 DoATASelfTest(cfg, state, atadev, testtype);
3611 }
3612
3613 // Don't leave device open -- the OS/user may want to access it
3614 // before the next smartd cycle!
3615 CloseDevice(atadev, name);
3616
3617 // Copy ATA attribute values to persistent state
3618 state.update_persistent_state();
3619
3620 return 0;
3621 }
3622
3623 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3624 {
3625 if (!open_device(cfg, state, scsidev, "SCSI"))
3626 return 1;
3627
3628 const char * name = cfg.name.c_str();
3629
3630 uint8_t asc = 0, ascq = 0;
3631 uint8_t currenttemp = 0, triptemp = 0;
3632 if (!state.SuppressReport) {
3633 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3634 &asc, &ascq, &currenttemp, &triptemp)) {
3635 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3636 name);
3637 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3638 state.SuppressReport = 1;
3639 }
3640 }
3641 if (asc > 0) {
3642 const char * cp = scsiGetIEString(asc, ascq);
3643 if (cp) {
3644 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3645 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3646 } else if (asc == 4 && ascq == 9) {
3647 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3648 } else if (debugmode)
3649 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3650 name, (int)asc, (int)ascq);
3651 } else if (debugmode)
3652 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3653
3654 // check temperature limits
3655 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3656 CheckTemperature(cfg, state, currenttemp, triptemp);
3657
3658 // check if number of selftest errors has increased (note: may also DECREASE)
3659 if (cfg.selftest)
3660 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3661
3662 if (allow_selftests && !cfg.test_regex.empty()) {
3663 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3664 if (testtype)
3665 DoSCSISelfTest(cfg, state, scsidev, testtype);
3666 }
3667 if (!cfg.attrlog_file.empty()){
3668 // saving error counters to state
3669 uint8_t tBuf[252];
3670 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3671 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3672 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3673 state.scsi_error_counters[0].found=1;
3674 }
3675 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3676 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3677 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3678 state.scsi_error_counters[1].found=1;
3679 }
3680 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3681 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3682 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3683 state.scsi_error_counters[2].found=1;
3684 }
3685 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3686 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3687 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3688 state.scsi_nonmedium_error.found=1;
3689 }
3690 // store temperature if not done by CheckTemperature() above
3691 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3692 state.temperature = currenttemp;
3693 }
3694 CloseDevice(scsidev, name);
3695 return 0;
3696 }
3697
3698 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3699 {
3700 if (!open_device(cfg, state, nvmedev, "NVMe"))
3701 return 1;
3702
3703 const char * name = cfg.name.c_str();
3704
3705 // Read SMART/Health log
3706 nvme_smart_log smart_log;
3707 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3708 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3709 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3710 state.must_write = true;
3711 return 0;
3712 }
3713
3714 // Check Critical Warning bits
3715 if (cfg.smartcheck && smart_log.critical_warning) {
3716 unsigned char w = smart_log.critical_warning;
3717 std::string msg;
3718 static const char * const wnames[] =
3719 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3720
3721 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3722 if (!(w & (1 << b)))
3723 continue;
3724 if (cnt)
3725 msg += ", ";
3726 if (++cnt > 3) {
3727 msg += "..."; break;
3728 }
3729 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3730 msg += "*Unknown*"; break;
3731 }
3732 msg += wnames[b];
3733 }
3734
3735 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3736 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3737 state.must_write = true;
3738 }
3739
3740 // Check temperature limits
3741 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3742 int k = nvme_get_max_temp_kelvin(smart_log);
3743 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3744 int c = k - 273;
3745 if (c < 1)
3746 c = 1;
3747 else if (c > 0xff)
3748 c = 0xff;
3749 CheckTemperature(cfg, state, c, 0);
3750 }
3751
3752 // Check if number of errors has increased
3753 if (cfg.errorlog || cfg.xerrorlog) {
3754 uint64_t oldcnt = state.nvme_err_log_entries;
3755 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3756 if (newcnt > oldcnt) {
3757 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3758 name, oldcnt, newcnt);
3759 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3760 name, oldcnt, newcnt);
3761 state.must_write = true;
3762 }
3763 state.nvme_err_log_entries = newcnt;
3764 }
3765
3766 CloseDevice(nvmedev, name);
3767 return 0;
3768 }
3769
3770 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3771 static int standby_disable_state = 0;
3772
3773 static void init_disable_standby_check(dev_config_vector & configs)
3774 {
3775 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3776 bool sts1 = false, sts2 = false;
3777 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3778 const dev_config & cfg = configs.at(i);
3779 if (cfg.offlinests_ns)
3780 sts1 = true;
3781 if (cfg.selfteststs_ns)
3782 sts2 = true;
3783 }
3784
3785 // Check for support of disable auto standby
3786 // Reenable standby if smartd.conf was reread
3787 if (sts1 || sts2 || standby_disable_state == 3) {
3788 if (!smi()->disable_system_auto_standby(false)) {
3789 if (standby_disable_state == 3)
3790 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3791 if (sts1 || sts2) {
3792 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3793 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3794 sts1 = sts2 = false;
3795 }
3796 }
3797 }
3798
3799 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3800 }
3801
3802 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3803 {
3804 if (!standby_disable_state)
3805 return;
3806
3807 // Check for just started or still running self-tests
3808 bool running = false;
3809 for (unsigned i = 0; i < configs.size() && !running; i++) {
3810 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3811
3812 if ( ( cfg.offlinests_ns
3813 && (state.offline_started ||
3814 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3815 || ( cfg.selfteststs_ns
3816 && (state.selftest_started ||
3817 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3818 running = true;
3819 // state.offline/selftest_started will be reset after next logging of test status
3820 }
3821
3822 // Disable/enable auto standby and log state changes
3823 if (!running) {
3824 if (standby_disable_state != 1) {
3825 if (!smi()->disable_system_auto_standby(false))
3826 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3827 smi()->get_errmsg());
3828 else
3829 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3830 standby_disable_state = 1;
3831 }
3832 }
3833 else if (!smi()->disable_system_auto_standby(true)) {
3834 if (standby_disable_state != 2) {
3835 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3836 smi()->get_errmsg());
3837 standby_disable_state = 2;
3838 }
3839 }
3840 else {
3841 if (standby_disable_state != 3) {
3842 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3843 standby_disable_state = 3;
3844 }
3845 }
3846 }
3847
3848 // Checks the SMART status of all ATA and SCSI devices
3849 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3850 smart_device_list & devices, bool firstpass, bool allow_selftests)
3851 {
3852 for (unsigned i = 0; i < configs.size(); i++) {
3853 const dev_config & cfg = configs.at(i);
3854 dev_state & state = states.at(i);
3855 smart_device * dev = devices.at(i);
3856 if (dev->is_ata())
3857 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3858 else if (dev->is_scsi())
3859 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3860 else if (dev->is_nvme())
3861 NVMeCheckDevice(cfg, state, dev->to_nvme());
3862 }
3863
3864 do_disable_standby_check(configs, states);
3865 }
3866
3867 // Install all signal handlers
3868 static void install_signal_handlers()
3869 {
3870 // normal and abnormal exit
3871 set_signal_if_not_ignored(SIGTERM, sighandler);
3872 set_signal_if_not_ignored(SIGQUIT, sighandler);
3873
3874 // in debug mode, <CONTROL-C> ==> HUP
3875 set_signal_if_not_ignored(SIGINT, (debugmode ? HUPhandler : sighandler));
3876
3877 // Catch HUP and USR1
3878 set_signal_if_not_ignored(SIGHUP, HUPhandler);
3879 set_signal_if_not_ignored(SIGUSR1, USR1handler);
3880 #ifdef _WIN32
3881 set_signal_if_not_ignored(SIGUSR2, USR2handler);
3882 #endif
3883 }
3884
3885 #ifdef _WIN32
3886 // Toggle debug mode implemented for native windows only
3887 // (there is no easy way to reopen tty on *nix)
3888 static void ToggleDebugMode()
3889 {
3890 if (!debugmode) {
3891 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3892 if (!daemon_enable_console("smartd [Debug]")) {
3893 debugmode = 1;
3894 daemon_signal(SIGINT, HUPhandler);
3895 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3896 }
3897 else
3898 PrintOut(LOG_INFO,"enable console failed\n");
3899 }
3900 else if (debugmode == 1) {
3901 daemon_disable_console();
3902 debugmode = 0;
3903 daemon_signal(SIGINT, sighandler);
3904 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3905 }
3906 else
3907 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3908 }
3909 #endif
3910
3911 static time_t dosleep(time_t wakeuptime, bool & sigwakeup, int numdev)
3912 {
3913 // If past wake-up-time, compute next wake-up-time
3914 time_t timenow=time(NULL);
3915 while (wakeuptime<=timenow){
3916 int intervals=1+(timenow-wakeuptime)/checktime;
3917 wakeuptime+=intervals*checktime;
3918 }
3919
3920 notify_wait(wakeuptime, numdev);
3921
3922 // sleep until we catch SIGUSR1 or have completed sleeping
3923 int addtime = 0;
3924 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3925
3926 // protect user again system clock being adjusted backwards
3927 if (wakeuptime>timenow+checktime){
3928 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3929 wakeuptime=timenow+checktime;
3930 }
3931
3932 // Exit sleep when time interval has expired or a signal is received
3933 sleep(wakeuptime+addtime-timenow);
3934
3935 #ifdef _WIN32
3936 // toggle debug mode?
3937 if (caughtsigUSR2) {
3938 ToggleDebugMode();
3939 caughtsigUSR2 = 0;
3940 }
3941 #endif
3942
3943 timenow=time(NULL);
3944
3945 // Actual sleep time too long?
3946 if (!addtime && timenow > wakeuptime+60) {
3947 if (debugmode)
3948 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3949 (int)(timenow-wakeuptime));
3950 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3951 addtime = timenow-wakeuptime+20;
3952 // Use next wake-up-time if close
3953 int nextcheck = checktime - addtime % checktime;
3954 if (nextcheck <= 20)
3955 addtime += nextcheck;
3956 }
3957 }
3958
3959 // if we caught a SIGUSR1 then print message and clear signal
3960 if (caughtsigUSR1){
3961 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3962 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3963 caughtsigUSR1=0;
3964 sigwakeup = true;
3965 }
3966
3967 // return adjusted wakeuptime
3968 return wakeuptime;
3969 }
3970
3971 // Print out a list of valid arguments for the Directive d
3972 static void printoutvaliddirectiveargs(int priority, char d)
3973 {
3974 switch (d) {
3975 case 'n':
3976 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3977 break;
3978 case 's':
3979 PrintOut(priority, "valid_regular_expression");
3980 break;
3981 case 'd':
3982 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3983 break;
3984 case 'T':
3985 PrintOut(priority, "normal, permissive");
3986 break;
3987 case 'o':
3988 case 'S':
3989 PrintOut(priority, "on, off");
3990 break;
3991 case 'l':
3992 PrintOut(priority, "error, selftest");
3993 break;
3994 case 'M':
3995 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3996 break;
3997 case 'v':
3998 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3999 break;
4000 case 'P':
4001 PrintOut(priority, "use, ignore, show, showall");
4002 break;
4003 case 'F':
4004 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4005 break;
4006 case 'e':
4007 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4008 "security-freeze, standby,[N|off], wcache,[on|off]");
4009 break;
4010 }
4011 }
4012
4013 // exits with an error message, or returns integer value of token
4014 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4015 int min, int max, char * suffix = 0)
4016 {
4017 // make sure argument is there
4018 if (!arg) {
4019 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4020 cfgfile, lineno, name, token, min, max);
4021 return -1;
4022 }
4023
4024 // get argument value (base 10), check that it's integer, and in-range
4025 char *endptr;
4026 int val = strtol(arg,&endptr,10);
4027
4028 // optional suffix present?
4029 if (suffix) {
4030 if (!strcmp(endptr, suffix))
4031 endptr += strlen(suffix);
4032 else
4033 *suffix = 0;
4034 }
4035
4036 if (!(!*endptr && min <= val && val <= max)) {
4037 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4038 cfgfile, lineno, name, token, arg, min, max);
4039 return -1;
4040 }
4041
4042 // all is well; return value
4043 return val;
4044 }
4045
4046
4047 // Get 1-3 small integer(s) for '-W' directive
4048 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4049 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4050 {
4051 unsigned v1 = 0, v2 = 0, v3 = 0;
4052 int n1 = -1, n2 = -1, n3 = -1, len;
4053 if (!arg) {
4054 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4055 cfgfile, lineno, name, token);
4056 return -1;
4057 }
4058
4059 len = strlen(arg);
4060 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4061 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4062 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4063 cfgfile, lineno, name, token, arg);
4064 return -1;
4065 }
4066 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4067 return 0;
4068 }
4069
4070
4071 #ifdef _WIN32
4072
4073 // Concatenate strtok() results if quoted with "..."
4074 static const char * strtok_dequote(const char * delimiters)
4075 {
4076 const char * t = strtok(0, delimiters);
4077 if (!t || t[0] != '"')
4078 return t;
4079
4080 static std::string token;
4081 token = t+1;
4082 for (;;) {
4083 t = strtok(0, delimiters);
4084 if (!t || !*t)
4085 return "\"";
4086 token += ' ';
4087 int len = strlen(t);
4088 if (t[len-1] == '"') {
4089 token += std::string(t, len-1);
4090 break;
4091 }
4092 token += t;
4093 }
4094 return token.c_str();
4095 }
4096
4097 #endif // _WIN32
4098
4099
4100 // This function returns 1 if it has correctly parsed one token (and
4101 // any arguments), else zero if no tokens remain. It returns -1 if an
4102 // error was encountered.
4103 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4104 {
4105 char sym;
4106 const char * name = cfg.name.c_str();
4107 int lineno=cfg.lineno;
4108 const char *delim = " \n\t";
4109 int badarg = 0;
4110 int missingarg = 0;
4111 const char *arg = 0;
4112
4113 // is the rest of the line a comment
4114 if (*token=='#')
4115 return 1;
4116
4117 // is the token not recognized?
4118 if (*token!='-' || strlen(token)!=2) {
4119 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4120 configfile, lineno, name, token);
4121 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4122 return -1;
4123 }
4124
4125 // token we will be parsing:
4126 sym=token[1];
4127
4128 // parse the token and swallow its argument
4129 int val;
4130 char plus[] = "+", excl[] = "!";
4131
4132 switch (sym) {
4133 case 'C':
4134 // monitor current pending sector count (default 197)
4135 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4136 return -1;
4137 cfg.curr_pending_id = (unsigned char)val;
4138 cfg.curr_pending_incr = (*plus == '+');
4139 cfg.curr_pending_set = true;
4140 break;
4141 case 'U':
4142 // monitor offline uncorrectable sectors (default 198)
4143 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4144 return -1;
4145 cfg.offl_pending_id = (unsigned char)val;
4146 cfg.offl_pending_incr = (*plus == '+');
4147 cfg.offl_pending_set = true;
4148 break;
4149 case 'T':
4150 // Set tolerance level for SMART command failures
4151 if ((arg = strtok(NULL, delim)) == NULL) {
4152 missingarg = 1;
4153 } else if (!strcmp(arg, "normal")) {
4154 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4155 // not on failure of an optional S.M.A.R.T. command.
4156 // This is the default so we don't need to actually do anything here.
4157 cfg.permissive = false;
4158 } else if (!strcmp(arg, "permissive")) {
4159 // Permissive mode; ignore errors from Mandatory SMART commands
4160 cfg.permissive = true;
4161 } else {
4162 badarg = 1;
4163 }
4164 break;
4165 case 'd':
4166 // specify the device type
4167 if ((arg = strtok(NULL, delim)) == NULL) {
4168 missingarg = 1;
4169 } else if (!strcmp(arg, "ignore")) {
4170 cfg.ignore = true;
4171 } else if (!strcmp(arg, "removable")) {
4172 cfg.removable = true;
4173 } else if (!strcmp(arg, "auto")) {
4174 cfg.dev_type = "";
4175 scan_types.clear();
4176 } else {
4177 cfg.dev_type = arg;
4178 scan_types.push_back(arg);
4179 }
4180 break;
4181 case 'F':
4182 // fix firmware bug
4183 if (!(arg = strtok(0, delim)))
4184 missingarg = 1;
4185 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4186 badarg = 1;
4187 break;
4188 case 'H':
4189 // check SMART status
4190 cfg.smartcheck = true;
4191 break;
4192 case 'f':
4193 // check for failure of usage attributes
4194 cfg.usagefailed = true;
4195 break;
4196 case 't':
4197 // track changes in all vendor attributes
4198 cfg.prefail = true;
4199 cfg.usage = true;
4200 break;
4201 case 'p':
4202 // track changes in prefail vendor attributes
4203 cfg.prefail = true;
4204 break;
4205 case 'u':
4206 // track changes in usage vendor attributes
4207 cfg.usage = true;
4208 break;
4209 case 'l':
4210 // track changes in SMART logs
4211 if ((arg = strtok(NULL, delim)) == NULL) {
4212 missingarg = 1;
4213 } else if (!strcmp(arg, "selftest")) {
4214 // track changes in self-test log
4215 cfg.selftest = true;
4216 } else if (!strcmp(arg, "error")) {
4217 // track changes in ATA error log
4218 cfg.errorlog = true;
4219 } else if (!strcmp(arg, "xerror")) {
4220 // track changes in Extended Comprehensive SMART error log
4221 cfg.xerrorlog = true;
4222 } else if (!strcmp(arg, "offlinests")) {
4223 // track changes in offline data collection status
4224 cfg.offlinests = true;
4225 } else if (!strcmp(arg, "offlinests,ns")) {
4226 // track changes in offline data collection status, disable auto standby
4227 cfg.offlinests = cfg.offlinests_ns = true;
4228 } else if (!strcmp(arg, "selfteststs")) {
4229 // track changes in self-test execution status
4230 cfg.selfteststs = true;
4231 } else if (!strcmp(arg, "selfteststs,ns")) {
4232 // track changes in self-test execution status, disable auto standby
4233 cfg.selfteststs = cfg.selfteststs_ns = true;
4234 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4235 // set SCT Error Recovery Control
4236 unsigned rt = ~0, wt = ~0; int nc = -1;
4237 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4238 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4239 cfg.sct_erc_set = true;
4240 cfg.sct_erc_readtime = rt;
4241 cfg.sct_erc_writetime = wt;
4242 }
4243 else
4244 badarg = 1;
4245 } else {
4246 badarg = 1;
4247 }
4248 break;
4249 case 'a':
4250 // monitor everything
4251 cfg.smartcheck = true;
4252 cfg.prefail = true;
4253 cfg.usagefailed = true;
4254 cfg.usage = true;
4255 cfg.selftest = true;
4256 cfg.errorlog = true;
4257 cfg.selfteststs = true;
4258 break;
4259 case 'o':
4260 // automatic offline testing enable/disable
4261 if ((arg = strtok(NULL, delim)) == NULL) {
4262 missingarg = 1;
4263 } else if (!strcmp(arg, "on")) {
4264 cfg.autoofflinetest = 2;
4265 } else if (!strcmp(arg, "off")) {
4266 cfg.autoofflinetest = 1;
4267 } else {
4268 badarg = 1;
4269 }
4270 break;
4271 case 'n':
4272 // skip disk check if in idle or standby mode
4273 if (!(arg = strtok(NULL, delim)))
4274 missingarg = 1;
4275 else {
4276 char *endptr = NULL;
4277 char *next = strchr(const_cast<char*>(arg), ',');
4278
4279 cfg.powerquiet = false;
4280 cfg.powerskipmax = 0;
4281
4282 if (next!=NULL) *next='\0';
4283 if (!strcmp(arg, "never"))
4284 cfg.powermode = 0;
4285 else if (!strcmp(arg, "sleep"))
4286 cfg.powermode = 1;
4287 else if (!strcmp(arg, "standby"))
4288 cfg.powermode = 2;
4289 else if (!strcmp(arg, "idle"))
4290 cfg.powermode = 3;
4291 else
4292 badarg = 1;
4293
4294 // if optional arguments are present
4295 if (!badarg && next!=NULL) {
4296 next++;
4297 cfg.powerskipmax = strtol(next, &endptr, 10);
4298 if (endptr == next)
4299 cfg.powerskipmax = 0;
4300 else {
4301 next = endptr + (*endptr != '\0');
4302 if (cfg.powerskipmax <= 0)
4303 badarg = 1;
4304 }
4305 if (*next != '\0') {
4306 if (!strcmp("q", next))
4307 cfg.powerquiet = true;
4308 else {
4309 badarg = 1;
4310 }
4311 }
4312 }
4313 }
4314 break;
4315 case 'S':
4316 // automatic attribute autosave enable/disable
4317 if ((arg = strtok(NULL, delim)) == NULL) {
4318 missingarg = 1;
4319 } else if (!strcmp(arg, "on")) {
4320 cfg.autosave = 2;
4321 } else if (!strcmp(arg, "off")) {
4322 cfg.autosave = 1;
4323 } else {
4324 badarg = 1;
4325 }
4326 break;
4327 case 's':
4328 // warn user, and delete any previously given -s REGEXP Directives
4329 if (!cfg.test_regex.empty()){
4330 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4331 configfile, lineno, name, cfg.test_regex.get_pattern());
4332 cfg.test_regex = regular_expression();
4333 }
4334 // check for missing argument
4335 if (!(arg = strtok(NULL, delim))) {
4336 missingarg = 1;
4337 }
4338 // Compile regex
4339 else {
4340 if (!cfg.test_regex.compile(arg)) {
4341 // not a valid regular expression!
4342 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4343 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4344 return -1;
4345 }
4346 // Do a bit of sanity checking and warn user if we think that
4347 // their regexp is "strange". User probably confused about shell
4348 // glob(3) syntax versus regular expression syntax regexp(7).
4349 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4350 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4351 configfile, lineno, name, val+1, arg[val], arg);
4352 }
4353 break;
4354 case 'm':
4355 // send email to address that follows
4356 if (!(arg = strtok(NULL,delim)))
4357 missingarg = 1;
4358 else {
4359 if (!cfg.emailaddress.empty())
4360 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4361 configfile, lineno, name, cfg.emailaddress.c_str());
4362 cfg.emailaddress = arg;
4363 }
4364 break;
4365 case 'M':
4366 // email warning options
4367 if (!(arg = strtok(NULL, delim)))
4368 missingarg = 1;
4369 else if (!strcmp(arg, "once"))
4370 cfg.emailfreq = 1;
4371 else if (!strcmp(arg, "daily"))
4372 cfg.emailfreq = 2;
4373 else if (!strcmp(arg, "diminishing"))
4374 cfg.emailfreq = 3;
4375 else if (!strcmp(arg, "test"))
4376 cfg.emailtest = 1;
4377 else if (!strcmp(arg, "exec")) {
4378 // Get the next argument (the command line)
4379 #ifdef _WIN32
4380 // Allow "/path name/with spaces/..." on Windows
4381 arg = strtok_dequote(delim);
4382 if (arg && arg[0] == '"') {
4383 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4384 configfile, lineno, name, token);
4385 return -1;
4386 }
4387 #else
4388 arg = strtok(0, delim);
4389 #endif
4390 if (!arg) {
4391 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4392 configfile, lineno, name, token);
4393 return -1;
4394 }
4395 // Free the last cmd line given if any, and copy new one
4396 if (!cfg.emailcmdline.empty())
4397 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4398 configfile, lineno, name, cfg.emailcmdline.c_str());
4399 cfg.emailcmdline = arg;
4400 }
4401 else
4402 badarg = 1;
4403 break;
4404 case 'i':
4405 // ignore failure of usage attribute
4406 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4407 return -1;
4408 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4409 break;
4410 case 'I':
4411 // ignore attribute for tracking purposes
4412 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4413 return -1;
4414 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4415 break;
4416 case 'r':
4417 // print raw value when tracking
4418 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4419 return -1;
4420 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4421 if (*excl == '!') // attribute change is critical
4422 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4423 break;
4424 case 'R':
4425 // track changes in raw value (forces printing of raw value)
4426 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4427 return -1;
4428 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4429 if (*excl == '!') // raw value change is critical
4430 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4431 break;
4432 case 'W':
4433 // track Temperature
4434 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4435 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4436 return -1;
4437 break;
4438 case 'v':
4439 // non-default vendor-specific attribute meaning
4440 if (!(arg=strtok(NULL,delim))) {
4441 missingarg = 1;
4442 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4443 badarg = 1;
4444 }
4445 break;
4446 case 'P':
4447 // Define use of drive-specific presets.
4448 if (!(arg = strtok(NULL, delim))) {
4449 missingarg = 1;
4450 } else if (!strcmp(arg, "use")) {
4451 cfg.ignorepresets = false;
4452 } else if (!strcmp(arg, "ignore")) {
4453 cfg.ignorepresets = true;
4454 } else if (!strcmp(arg, "show")) {
4455 cfg.showpresets = true;
4456 } else if (!strcmp(arg, "showall")) {
4457 showallpresets();
4458 } else {
4459 badarg = 1;
4460 }
4461 break;
4462
4463 case 'e':
4464 // Various ATA settings
4465 if (!(arg = strtok(NULL, delim))) {
4466 missingarg = true;
4467 }
4468 else {
4469 char arg2[16+1]; unsigned val;
4470 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4471 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4472 && (n1 == len || n2 > 0)) {
4473 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4474 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4475 if (n3 != len)
4476 val = ~0U;
4477
4478 if (!strcmp(arg2, "aam")) {
4479 if (off)
4480 cfg.set_aam = -1;
4481 else if (val <= 254)
4482 cfg.set_aam = val + 1;
4483 else
4484 badarg = true;
4485 }
4486 else if (!strcmp(arg2, "apm")) {
4487 if (off)
4488 cfg.set_apm = -1;
4489 else if (1 <= val && val <= 254)
4490 cfg.set_apm = val + 1;
4491 else
4492 badarg = true;
4493 }
4494 else if (!strcmp(arg2, "lookahead")) {
4495 if (off)
4496 cfg.set_lookahead = -1;
4497 else if (on)
4498 cfg.set_lookahead = 1;
4499 else
4500 badarg = true;
4501 }
4502 else if (!strcmp(arg, "security-freeze")) {
4503 cfg.set_security_freeze = true;
4504 }
4505 else if (!strcmp(arg2, "standby")) {
4506 if (off)
4507 cfg.set_standby = 0 + 1;
4508 else if (val <= 255)
4509 cfg.set_standby = val + 1;
4510 else
4511 badarg = true;
4512 }
4513 else if (!strcmp(arg2, "wcache")) {
4514 if (off)
4515 cfg.set_wcache = -1;
4516 else if (on)
4517 cfg.set_wcache = 1;
4518 else
4519 badarg = true;
4520 }
4521 else if (!strcmp(arg2, "dsn")) {
4522 if (off)
4523 cfg.set_dsn = -1;
4524 else if (on)
4525 cfg.set_dsn = 1;
4526 else
4527 badarg = true;
4528 }
4529 else
4530 badarg = true;
4531 }
4532 else
4533 badarg = true;
4534 }
4535 break;
4536
4537 default:
4538 // Directive not recognized
4539 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4540 configfile, lineno, name, token);
4541 Directives();
4542 return -1;
4543 }
4544 if (missingarg) {
4545 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4546 configfile, lineno, name, token);
4547 }
4548 if (badarg) {
4549 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4550 configfile, lineno, name, token, arg);
4551 }
4552 if (missingarg || badarg) {
4553 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4554 printoutvaliddirectiveargs(LOG_CRIT, sym);
4555 PrintOut(LOG_CRIT, "\n");
4556 return -1;
4557 }
4558
4559 return 1;
4560 }
4561
4562 // Scan directive for configuration file
4563 #define SCANDIRECTIVE "DEVICESCAN"
4564
4565 // This is the routine that adds things to the conf_entries list.
4566 //
4567 // Return values are:
4568 // 1: parsed a normal line
4569 // 0: found DEFAULT setting or comment or blank line
4570 // -1: found SCANDIRECTIVE line
4571 // -2: found an error
4572 //
4573 // Note: this routine modifies *line from the caller!
4574 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4575 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4576 {
4577 const char *delim = " \n\t";
4578
4579 // get first token: device name. If a comment, skip line
4580 const char * name = strtok(line, delim);
4581 if (!name || *name == '#')
4582 return 0;
4583
4584 // Check device name for DEFAULT or DEVICESCAN
4585 int retval;
4586 if (!strcmp("DEFAULT", name)) {
4587 retval = 0;
4588 // Restart with empty defaults
4589 default_conf = dev_config();
4590 }
4591 else {
4592 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4593 // Init new entry with current defaults
4594 conf_entries.push_back(default_conf);
4595 }
4596 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4597
4598 cfg.name = name; // Later replaced by dev->get_info().info_name
4599 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4600 cfg.lineno = lineno;
4601
4602 // parse tokens one at a time from the file.
4603 while (char * token = strtok(0, delim)) {
4604 int rc = ParseToken(token, cfg, scan_types);
4605 if (rc < 0)
4606 // error found on the line
4607 return -2;
4608
4609 if (rc == 0)
4610 // No tokens left
4611 break;
4612
4613 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4614 }
4615
4616 // Check for multiple -d TYPE directives
4617 if (retval != -1 && scan_types.size() > 1) {
4618 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4619 cfg.name.c_str(), cfg.lineno, configfile);
4620 return -2;
4621 }
4622
4623 // Don't perform checks below for DEFAULT entries
4624 if (retval == 0)
4625 return retval;
4626
4627 // If NO monitoring directives are set, then set all of them.
4628 if (!( cfg.smartcheck || cfg.selftest
4629 || cfg.errorlog || cfg.xerrorlog
4630 || cfg.offlinests || cfg.selfteststs
4631 || cfg.usagefailed || cfg.prefail || cfg.usage
4632 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4633
4634 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4635 cfg.name.c_str(), cfg.lineno, configfile);
4636
4637 cfg.smartcheck = true;
4638 cfg.usagefailed = true;
4639 cfg.prefail = true;
4640 cfg.usage = true;
4641 cfg.selftest = true;
4642 cfg.errorlog = true;
4643 cfg.selfteststs = true;
4644 }
4645
4646 // additional sanity check. Has user set -M options without -m?
4647 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4648 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4649 cfg.name.c_str(), cfg.lineno, configfile);
4650 return -2;
4651 }
4652
4653 // has the user has set <nomailer>?
4654 if (cfg.emailaddress == "<nomailer>") {
4655 // check that -M exec is also set
4656 if (cfg.emailcmdline.empty()){
4657 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4658 cfg.name.c_str(), cfg.lineno, configfile);
4659 return -2;
4660 }
4661 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4662 cfg.emailaddress.clear();
4663 }
4664
4665 return retval;
4666 }
4667
4668 // Parses a configuration file. Return values are:
4669 // N=>0: found N entries
4670 // -1: syntax error in config file
4671 // -2: config file does not exist
4672 // -3: config file exists but cannot be read
4673 //
4674 // In the case where the return value is 0, there are three
4675 // possibilities:
4676 // Empty configuration file ==> conf_entries.empty()
4677 // No configuration file ==> conf_entries[0].lineno == 0
4678 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4679 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4680 {
4681 // maximum line length in configuration file
4682 const int MAXLINELEN = 256;
4683 // maximum length of a continued line in configuration file
4684 const int MAXCONTLINE = 1023;
4685
4686 stdio_file f;
4687 // Open config file, if it exists and is not <stdin>
4688 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4689 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4690 // file exists but we can't read it or it should exist due to '-c' option
4691 int ret = (errno!=ENOENT ? -3 : -2);
4692 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4693 strerror(errno),configfile);
4694 return ret;
4695 }
4696 }
4697 else // read from stdin ('-c -' option)
4698 f.open(stdin);
4699
4700 // Start with empty defaults
4701 dev_config default_conf;
4702
4703 // No configuration file found -- use fake one
4704 int entry = 0;
4705 if (!f) {
4706 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4707
4708 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4709 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4710 return 0;
4711 }
4712
4713 #ifdef __CYGWIN__
4714 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4715 #endif
4716
4717 // configuration file exists
4718 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4719
4720 // parse config file line by line
4721 int lineno = 1, cont = 0, contlineno = 0;
4722 char line[MAXLINELEN+2];
4723 char fullline[MAXCONTLINE+1];
4724
4725 for (;;) {
4726 int len=0,scandevice;
4727 char *lastslash;
4728 char *comment;
4729 char *code;
4730
4731 // make debugging simpler
4732 memset(line,0,sizeof(line));
4733
4734 // get a line
4735 code=fgets(line, MAXLINELEN+2, f);
4736
4737 // are we at the end of the file?
4738 if (!code){
4739 if (cont) {
4740 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4741 // See if we found a SCANDIRECTIVE directive
4742 if (scandevice==-1)
4743 return 0;
4744 // did we find a syntax error
4745 if (scandevice==-2)
4746 return -1;
4747 // the final line is part of a continuation line
4748 entry+=scandevice;
4749 }
4750 break;
4751 }
4752
4753 // input file line number
4754 contlineno++;
4755
4756 // See if line is too long
4757 len=strlen(line);
4758 if (len>MAXLINELEN){
4759 const char *warn;
4760 if (line[len-1]=='\n')
4761 warn="(including newline!) ";
4762 else
4763 warn="";
4764 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4765 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4766 return -1;
4767 }
4768
4769 // Ignore anything after comment symbol
4770 if ((comment=strchr(line,'#'))){
4771 *comment='\0';
4772 len=strlen(line);
4773 }
4774
4775 // is the total line (made of all continuation lines) too long?
4776 if (cont+len>MAXCONTLINE){
4777 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4778 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4779 return -1;
4780 }
4781
4782 // copy string so far into fullline, and increment length
4783 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4784 cont+=len;
4785
4786 // is this a continuation line. If so, replace \ by space and look at next line
4787 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4788 *(fullline+(cont-len)+(lastslash-line))=' ';
4789 continue;
4790 }
4791
4792 // Not a continuation line. Parse it
4793 scan_types.clear();
4794 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4795
4796 // did we find a scandevice directive?
4797 if (scandevice==-1)
4798 return 0;
4799 // did we find a syntax error
4800 if (scandevice==-2)
4801 return -1;
4802
4803 entry+=scandevice;
4804 lineno++;
4805 cont=0;
4806 }
4807
4808 // note -- may be zero if syntax of file OK, but no valid entries!
4809 return entry;
4810 }
4811
4812 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4813 <LIST> is the list of valid arguments for option opt. */
4814 static void PrintValidArgs(char opt)
4815 {
4816 const char *s;
4817
4818 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4819 if (!(s = GetValidArgList(opt)))
4820 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4821 else
4822 PrintOut(LOG_CRIT, "%s", (char *)s);
4823 PrintOut(LOG_CRIT, " <=======\n");
4824 }
4825
4826 #ifndef _WIN32
4827 // Report error and return false if specified path is not absolute.
4828 static bool check_abs_path(char option, const std::string & path)
4829 {
4830 if (path.empty() || path[0] == '/')
4831 return true;
4832
4833 debugmode = 1;
4834 PrintHead();
4835 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4836 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4837 return false;
4838 }
4839 #endif // !_WIN32
4840
4841 // Parses input line, prints usage message and
4842 // version/license/copyright messages
4843 static int parse_options(int argc, char **argv)
4844 {
4845 // Init default path names
4846 #ifndef _WIN32
4847 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4848 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4849 #else
4850 std::string exedir = get_exe_dir();
4851 static std::string configfile_str = exedir + "/smartd.conf";
4852 configfile = configfile_str.c_str();
4853 warning_script = exedir + "/smartd_warning.cmd";
4854 #endif
4855
4856 // Please update GetValidArgList() if you edit shortopts
4857 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4858 #ifdef HAVE_LIBCAP_NG
4859 "C"
4860 #endif
4861 ;
4862 // Please update GetValidArgList() if you edit longopts
4863 struct option longopts[] = {
4864 { "configfile", required_argument, 0, 'c' },
4865 { "logfacility", required_argument, 0, 'l' },
4866 { "quit", required_argument, 0, 'q' },
4867 { "debug", no_argument, 0, 'd' },
4868 { "showdirectives", no_argument, 0, 'D' },
4869 { "interval", required_argument, 0, 'i' },
4870 #ifndef _WIN32
4871 { "no-fork", no_argument, 0, 'n' },
4872 #else
4873 { "service", no_argument, 0, 'n' },
4874 #endif
4875 { "pidfile", required_argument, 0, 'p' },
4876 { "report", required_argument, 0, 'r' },
4877 { "savestates", required_argument, 0, 's' },
4878 { "attributelog", required_argument, 0, 'A' },
4879 { "drivedb", required_argument, 0, 'B' },
4880 { "warnexec", required_argument, 0, 'w' },
4881 { "version", no_argument, 0, 'V' },
4882 { "license", no_argument, 0, 'V' },
4883 { "copyright", no_argument, 0, 'V' },
4884 { "help", no_argument, 0, 'h' },
4885 { "usage", no_argument, 0, 'h' },
4886 #ifdef HAVE_LIBCAP_NG
4887 { "capabilities", no_argument, 0, 'C' },
4888 #endif
4889 { 0, 0, 0, 0 }
4890 };
4891
4892 opterr=optopt=0;
4893 bool badarg = false;
4894 bool use_default_db = true; // set false on '-B FILE'
4895
4896 // Parse input options.
4897 int optchar;
4898 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4899 char *arg;
4900 char *tailptr;
4901 long lchecktime;
4902
4903 switch(optchar) {
4904 case 'q':
4905 // when to quit
4906 if (!strcmp(optarg, "nodev"))
4907 quit = QUIT_NODEV;
4908 else if (!strcmp(optarg, "nodevstartup"))
4909 quit = QUIT_NODEVSTARTUP;
4910 else if (!strcmp(optarg, "never"))
4911 quit = QUIT_NEVER;
4912 else if (!strcmp(optarg, "onecheck")) {
4913 quit = QUIT_ONECHECK;
4914 debugmode = 1;
4915 }
4916 else if (!strcmp(optarg, "showtests")) {
4917 quit = QUIT_SHOWTESTS;
4918 debugmode = 1;
4919 }
4920 else if (!strcmp(optarg, "errors"))
4921 quit = QUIT_ERRORS;
4922 else
4923 badarg = true;
4924 break;
4925 case 'l':
4926 // set the log facility level
4927 if (!strcmp(optarg, "daemon"))
4928 facility=LOG_DAEMON;
4929 else if (!strcmp(optarg, "local0"))
4930 facility=LOG_LOCAL0;
4931 else if (!strcmp(optarg, "local1"))
4932 facility=LOG_LOCAL1;
4933 else if (!strcmp(optarg, "local2"))
4934 facility=LOG_LOCAL2;
4935 else if (!strcmp(optarg, "local3"))
4936 facility=LOG_LOCAL3;
4937 else if (!strcmp(optarg, "local4"))
4938 facility=LOG_LOCAL4;
4939 else if (!strcmp(optarg, "local5"))
4940 facility=LOG_LOCAL5;
4941 else if (!strcmp(optarg, "local6"))
4942 facility=LOG_LOCAL6;
4943 else if (!strcmp(optarg, "local7"))
4944 facility=LOG_LOCAL7;
4945 else
4946 badarg = true;
4947 break;
4948 case 'd':
4949 // enable debug mode
4950 debugmode = 1;
4951 break;
4952 case 'n':
4953 // don't fork()
4954 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4955 do_fork = false;
4956 #endif
4957 break;
4958 case 'D':
4959 // print summary of all valid directives
4960 debugmode = 1;
4961 Directives();
4962 return 0;
4963 case 'i':
4964 // Period (time interval) for checking
4965 // strtol will set errno in the event of overflow, so we'll check it.
4966 errno = 0;
4967 lchecktime = strtol(optarg, &tailptr, 10);
4968 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4969 debugmode=1;
4970 PrintHead();
4971 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4972 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4973 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4974 return EXIT_BADCMD;
4975 }
4976 checktime = (int)lchecktime;
4977 break;
4978 case 'r':
4979 // report IOCTL transactions
4980 {
4981 int n1 = -1, n2 = -1, len = strlen(optarg);
4982 char s[9+1]; unsigned i = 1;
4983 sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
4984 if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
4985 badarg = true;
4986 } else if (!strcmp(s,"ioctl")) {
4987 ata_debugmode = scsi_debugmode = nvme_debugmode = i;
4988 } else if (!strcmp(s,"ataioctl")) {
4989 ata_debugmode = i;
4990 } else if (!strcmp(s,"scsiioctl")) {
4991 scsi_debugmode = i;
4992 } else if (!strcmp(s,"nvmeioctl")) {
4993 nvme_debugmode = i;
4994 } else {
4995 badarg = true;
4996 }
4997 }
4998 break;
4999 case 'c':
5000 // alternate configuration file
5001 if (strcmp(optarg,"-"))
5002 configfile = (configfile_alt = optarg).c_str();
5003 else // read from stdin
5004 configfile=configfile_stdin;
5005 break;
5006 case 'p':
5007 // output file with PID number
5008 pid_file = optarg;
5009 break;
5010 case 's':
5011 // path prefix of persistent state file
5012 state_path_prefix = optarg;
5013 break;
5014 case 'A':
5015 // path prefix of attribute log file
5016 attrlog_path_prefix = optarg;
5017 break;
5018 case 'B':
5019 {
5020 const char * path = optarg;
5021 if (*path == '+' && path[1])
5022 path++;
5023 else
5024 use_default_db = false;
5025 unsigned char savedebug = debugmode; debugmode = 1;
5026 if (!read_drive_database(path))
5027 return EXIT_BADCMD;
5028 debugmode = savedebug;
5029 }
5030 break;
5031 case 'w':
5032 warning_script = optarg;
5033 break;
5034 case 'V':
5035 // print version and CVS info
5036 debugmode = 1;
5037 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
5038 return 0;
5039 #ifdef HAVE_LIBCAP_NG
5040 case 'C':
5041 // enable capabilities
5042 capabilities_enabled = true;
5043 break;
5044 #endif
5045 case 'h':
5046 // help: print summary of command-line options
5047 debugmode=1;
5048 PrintHead();
5049 Usage();
5050 return 0;
5051 case '?':
5052 default:
5053 // unrecognized option
5054 debugmode=1;
5055 PrintHead();
5056 // Point arg to the argument in which this option was found.
5057 arg = argv[optind-1];
5058 // Check whether the option is a long option that doesn't map to -h.
5059 if (arg[1] == '-' && optchar != 'h') {
5060 // Iff optopt holds a valid option then argument must be missing.
5061 if (optopt && (strchr(shortopts, optopt) != NULL)) {
5062 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
5063 PrintValidArgs(optopt);
5064 } else {
5065 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
5066 }
5067 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
5068 return EXIT_BADCMD;
5069 }
5070 if (optopt) {
5071 // Iff optopt holds a valid option then argument must be missing.
5072 if (strchr(shortopts, optopt) != NULL){
5073 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
5074 PrintValidArgs(optopt);
5075 } else {
5076 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
5077 }
5078 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5079 return EXIT_BADCMD;
5080 }
5081 Usage();
5082 return 0;
5083 }
5084
5085 // Check to see if option had an unrecognized or incorrect argument.
5086 if (badarg) {
5087 debugmode=1;
5088 PrintHead();
5089 // It would be nice to print the actual option name given by the user
5090 // here, but we just print the short form. Please fix this if you know
5091 // a clean way to do it.
5092 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
5093 PrintValidArgs(optchar);
5094 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5095 return EXIT_BADCMD;
5096 }
5097 }
5098
5099 // non-option arguments are not allowed
5100 if (argc > optind) {
5101 debugmode=1;
5102 PrintHead();
5103 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
5104 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5105 return EXIT_BADCMD;
5106 }
5107
5108 // no pidfile in debug mode
5109 if (debugmode && !pid_file.empty()) {
5110 debugmode=1;
5111 PrintHead();
5112 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
5113 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
5114 return EXIT_BADCMD;
5115 }
5116
5117 #ifndef _WIN32
5118 if (!debugmode) {
5119 // absolute path names are required due to chdir('/') in daemon_init()
5120 if (!( check_abs_path('p', pid_file)
5121 && check_abs_path('s', state_path_prefix)
5122 && check_abs_path('A', attrlog_path_prefix)))
5123 return EXIT_BADCMD;
5124 }
5125 #endif
5126
5127 // Read or init drive database
5128 {
5129 unsigned char savedebug = debugmode; debugmode = 1;
5130 if (!init_drive_database(use_default_db))
5131 return EXIT_BADCMD;
5132 debugmode = savedebug;
5133 }
5134
5135 // Check option compatibility of notify support
5136 if (!notify_post_init())
5137 return EXIT_BADCMD;
5138
5139 // print header
5140 PrintHead();
5141
5142 // No error, continue in main_worker()
5143 return -1;
5144 }
5145
5146 // Function we call if no configuration file was found or if the
5147 // SCANDIRECTIVE Directive was found. It makes entries for device
5148 // names returned by scan_smart_devices() in os_OSNAME.cpp
5149 static int MakeConfigEntries(const dev_config & base_cfg,
5150 dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5151 const smart_devtype_list & types)
5152 {
5153 // make list of devices
5154 smart_device_list devlist;
5155 if (!smi()->scan_smart_devices(devlist, types)) {
5156 PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5157 return 0;
5158 }
5159
5160 // if no devices, return
5161 if (devlist.size() <= 0)
5162 return 0;
5163
5164 // add empty device slots for existing config entries
5165 while (scanned_devs.size() < conf_entries.size())
5166 scanned_devs.push_back((smart_device *)0);
5167
5168 // loop over entries to create
5169 for (unsigned i = 0; i < devlist.size(); i++) {
5170 // Move device pointer
5171 smart_device * dev = devlist.release(i);
5172 scanned_devs.push_back(dev);
5173
5174 // Copy configuration, update device and type name
5175 conf_entries.push_back(base_cfg);
5176 dev_config & cfg = conf_entries.back();
5177 cfg.name = dev->get_info().info_name;
5178 cfg.dev_name = dev->get_info().dev_name;
5179 cfg.dev_type = dev->get_info().dev_type;
5180 }
5181
5182 return devlist.size();
5183 }
5184
5185 // Returns negative value (see ParseConfigFile()) if config file
5186 // had errors, else number of entries which may be zero or positive.
5187 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5188 {
5189 // parse configuration file configfile (normally /etc/smartd.conf)
5190 smart_devtype_list scan_types;
5191 int entries = ParseConfigFile(conf_entries, scan_types);
5192
5193 if (entries < 0) {
5194 // There was an error reading the configuration file.
5195 conf_entries.clear();
5196 if (entries == -1)
5197 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5198 return entries;
5199 }
5200
5201 // no error parsing config file.
5202 if (entries) {
5203 // we did not find a SCANDIRECTIVE and did find valid entries
5204 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5205 }
5206 else if (!conf_entries.empty()) {
5207 // we found a SCANDIRECTIVE or there was no configuration file so
5208 // scan. Configuration file's last entry contains all options
5209 // that were set
5210 dev_config first = conf_entries.back();
5211 conf_entries.pop_back();
5212
5213 if (first.lineno)
5214 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5215 else
5216 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5217
5218 // make config list of devices to search for
5219 MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5220
5221 // warn user if scan table found no devices
5222 if (conf_entries.empty())
5223 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5224 }
5225 else
5226 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5227
5228 return conf_entries.size();
5229 }
5230
5231 // Return true if TYPE contains a RAID drive number
5232 static bool is_raid_type(const char * type)
5233 {
5234 if (str_starts_with(type, "sat,"))
5235 return false;
5236 int i;
5237 if (sscanf(type, "%*[^,],%d", &i) != 1)
5238 return false;
5239 return true;
5240 }
5241
5242 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5243 static bool is_duplicate_device(const smart_device * dev,
5244 const smart_device_list & devices, unsigned numdevs,
5245 const dev_config_vector & ignored)
5246 {
5247 const smart_device::device_info & info1 = dev->get_info();
5248 bool is_raid1 = is_raid_type(info1.dev_type.c_str());
5249
5250 for (unsigned i = 0; i < numdevs; i++) {
5251 const smart_device::device_info & info2 = devices.at(i)->get_info();
5252 // -d TYPE options must match if RAID drive number is specified
5253 if ( info1.dev_name == info2.dev_name
5254 && ( info1.dev_type == info2.dev_type
5255 || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
5256 return true;
5257 }
5258
5259 for (unsigned i = 0; i < ignored.size(); i++) {
5260 const dev_config & cfg2 = ignored.at(i);
5261 if ( info1.dev_name == cfg2.dev_name
5262 && ( info1.dev_type == cfg2.dev_type
5263 || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
5264 return true;
5265 }
5266 return false;
5267 }
5268
5269 // Register one device, return false on error
5270 static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev,
5271 const dev_config_vector * prev_cfgs)
5272 {
5273 bool scanning;
5274 if (!dev) {
5275 // Get device of appropriate type
5276 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5277 if (!dev) {
5278 if (cfg.dev_type.empty())
5279 PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5280 else
5281 PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5282 return false;
5283 }
5284 scanning = false;
5285 }
5286 else {
5287 // Use device from device scan
5288 scanning = true;
5289 }
5290
5291 // Save old info
5292 smart_device::device_info oldinfo = dev->get_info();
5293
5294 // Open with autodetect support, may return 'better' device
5295 dev.replace( dev->autodetect_open() );
5296
5297 // Report if type has changed
5298 if (oldinfo.dev_type != dev->get_dev_type())
5299 PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
5300 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5301
5302 // Return if autodetect_open() failed
5303 if (!dev->is_open()) {
5304 if (debugmode || !scanning)
5305 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5306 return false;
5307 }
5308
5309 // Update informal name
5310 cfg.name = dev->get_info().info_name;
5311 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5312
5313 int status;
5314 const char * typemsg;
5315 // register ATA device
5316 if (dev->is_ata()){
5317 typemsg = "ATA";
5318 status = ATADeviceScan(cfg, state, dev->to_ata(), prev_cfgs);
5319 }
5320 // or register SCSI device
5321 else if (dev->is_scsi()){
5322 typemsg = "SCSI";
5323 status = SCSIDeviceScan(cfg, state, dev->to_scsi(), prev_cfgs);
5324 }
5325 // or register NVMe device
5326 else if (dev->is_nvme()) {
5327 typemsg = "NVMe";
5328 status = NVMeDeviceScan(cfg, state, dev->to_nvme(), prev_cfgs);
5329 }
5330 else {
5331 PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5332 return false;
5333 }
5334
5335 if (status) {
5336 if (!scanning || debugmode) {
5337 if (cfg.lineno)
5338 PrintOut(scanning ? LOG_INFO : LOG_CRIT,
5339 "Unable to register %s device %s at line %d of file %s\n",
5340 typemsg, cfg.name.c_str(), cfg.lineno, configfile);
5341 else
5342 PrintOut(LOG_INFO, "Unable to register %s device %s\n",
5343 typemsg, cfg.name.c_str());
5344 }
5345
5346 return false;
5347 }
5348
5349 return true;
5350 }
5351
5352 // This function tries devices from conf_entries. Each one that can be
5353 // registered is moved onto the [ata|scsi]devices lists and removed
5354 // from the conf_entries list.
5355 static bool register_devices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5356 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5357 {
5358 // start by clearing lists/memory of ALL existing devices
5359 configs.clear();
5360 devices.clear();
5361 states.clear();
5362
5363 // Register entries
5364 dev_config_vector ignored_entries;
5365 unsigned numnoscan = 0;
5366 for (unsigned i = 0; i < conf_entries.size(); i++){
5367
5368 dev_config cfg = conf_entries[i];
5369
5370 if (cfg.ignore) {
5371 // Store for is_duplicate_device() check and ignore
5372 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5373 (!cfg.dev_type.empty() ? " [" : ""),
5374 cfg.dev_type.c_str(),
5375 (!cfg.dev_type.empty() ? "]" : ""));
5376 ignored_entries.push_back(cfg);
5377 continue;
5378 }
5379
5380 smart_device_auto_ptr dev;
5381
5382 // Device may already be detected during devicescan
5383 bool scanning = false;
5384 if (i < scanned_devs.size()) {
5385 dev = scanned_devs.release(i);
5386 if (dev) {
5387 // Check for a preceding non-DEVICESCAN entry for the same device
5388 if ( (numnoscan || !ignored_entries.empty())
5389 && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
5390 PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
5391 continue;
5392 }
5393 scanning = true;
5394 }
5395 }
5396
5397 // Register device
5398 // If scanning, pass dev_idinfo of previous devices for duplicate check
5399 dev_state state;
5400 if (!register_device(cfg, state, dev, (scanning ? &configs : 0))) {
5401 // if device is explicitly listed and we can't register it, then
5402 // exit unless the user has specified that the device is removable
5403 if (!scanning) {
5404 if (!(cfg.removable || quit == QUIT_NEVER)) {
5405 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
5406 return false;
5407 }
5408 PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
5409 // Prevent retry of registration
5410 ignored_entries.push_back(cfg);
5411 }
5412 continue;
5413 }
5414
5415 // move onto the list of devices
5416 configs.push_back(cfg);
5417 states.push_back(state);
5418 devices.push_back(dev);
5419 if (!scanning)
5420 numnoscan = devices.size();
5421 }
5422
5423 init_disable_standby_check(configs);
5424 return true;
5425 }
5426
5427
5428 // Main program without exception handling
5429 static int main_worker(int argc, char **argv)
5430 {
5431 // Initialize interface
5432 smart_interface::init();
5433 if (!smi())
5434 return 1;
5435
5436 // Check whether systemd notify is supported and enabled
5437 notify_init();
5438
5439 // parse input and print header and usage info if needed
5440 int status = parse_options(argc,argv);
5441 if (status >= 0)
5442 return status;
5443
5444 // Configuration for each device
5445 dev_config_vector configs;
5446 // Device states
5447 dev_state_vector states;
5448 // Devices to monitor
5449 smart_device_list devices;
5450
5451 // Drop capabilities if supported and enabled
5452 capabilities_drop_now();
5453
5454 notify_msg("Initializing ...");
5455
5456 // the main loop of the code
5457 bool firstpass = true, write_states_always = true;
5458 time_t wakeuptime = 0;
5459 // assert(status < 0);
5460 do {
5461 // Should we (re)read the config file?
5462 if (firstpass || caughtsigHUP){
5463 if (!firstpass) {
5464 // Write state files
5465 if (!state_path_prefix.empty())
5466 write_all_dev_states(configs, states);
5467
5468 PrintOut(LOG_INFO,
5469 caughtsigHUP==1?
5470 "Signal HUP - rereading configuration file %s\n":
5471 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5472 configfile);
5473 notify_msg("Reloading ...");
5474 }
5475
5476 {
5477 dev_config_vector conf_entries; // Entries read from smartd.conf
5478 smart_device_list scanned_devs; // Devices found during scan
5479 // (re)reads config file, makes >=0 entries
5480 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5481
5482 if (entries>=0) {
5483 // checks devices, then moves onto ata/scsi list or deallocates.
5484 if (!register_devices(conf_entries, scanned_devs, configs, states, devices)) {
5485 status = EXIT_BADDEV;
5486 break;
5487 }
5488 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5489 throw std::logic_error("Invalid result from RegisterDevices");
5490 // Handle limitations if capabilities are dropped
5491 capabilities_check_config(configs);
5492 }
5493 else if ( quit == QUIT_NEVER
5494 || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
5495 // user has asked to continue on error in configuration file
5496 if (!firstpass)
5497 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5498 }
5499 else {
5500 // exit with configuration file error status
5501 status = (entries == -3 ? EXIT_READCONF : entries == -2 ? EXIT_NOCONF : EXIT_BADCONF);
5502 break;
5503 }
5504 }
5505
5506 if (!( devices.size() > 0 || quit == QUIT_NEVER
5507 || (quit == QUIT_NODEVSTARTUP && !firstpass))) {
5508 PrintOut(LOG_INFO, "Unable to monitor any SMART enabled devices. %sExiting...\n",
5509 (!debugmode ? "Try debug (-d) option. " : ""));
5510 status = EXIT_NODEV;
5511 break;
5512 }
5513
5514 // Log number of devices we are monitoring...
5515 int numata = 0, numscsi = 0;
5516 for (unsigned i = 0; i < devices.size(); i++) {
5517 const smart_device * dev = devices.at(i);
5518 if (dev->is_ata())
5519 numata++;
5520 else if (dev->is_scsi())
5521 numscsi++;
5522 }
5523 PrintOut(LOG_INFO, "Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5524 numata, numscsi, (int)devices.size() - numata - numscsi);
5525
5526 if (quit == QUIT_SHOWTESTS) {
5527 // user has asked to print test schedule
5528 PrintTestSchedule(configs, states, devices);
5529 // assert(firstpass);
5530 return 0;
5531 }
5532
5533 // reset signal
5534 caughtsigHUP=0;
5535
5536 // Always write state files after (re)configuration
5537 write_states_always = true;
5538 }
5539
5540 // check all devices once,
5541 // self tests are not started in first pass unless '-q onecheck' is specified
5542 notify_check((int)devices.size());
5543 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit == QUIT_ONECHECK));
5544
5545 // Write state files
5546 if (!state_path_prefix.empty())
5547 write_all_dev_states(configs, states, write_states_always);
5548 write_states_always = false;
5549
5550 // Write attribute logs
5551 if (!attrlog_path_prefix.empty())
5552 write_all_dev_attrlogs(configs, states);
5553
5554 // user has asked us to exit after first check
5555 if (quit == QUIT_ONECHECK) {
5556 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices successfully checked once.\n"
5557 "smartd is exiting (exit status 0)\n");
5558 // assert(firstpass);
5559 return 0;
5560 }
5561
5562 if (firstpass) {
5563 if (!debugmode) {
5564 // fork() into background if needed, close ALL file descriptors,
5565 // redirect stdin, stdout, and stderr, chdir to "/".
5566 status = daemon_init();
5567 if (status >= 0)
5568 return status;
5569
5570 // Write PID file if configured
5571 if (!write_pid_file())
5572 return EXIT_PID;
5573 }
5574
5575 // Set exit and signal handlers
5576 install_signal_handlers();
5577
5578 // Initialize wakeup time to CURRENT time
5579 wakeuptime = time(0);
5580
5581 firstpass = false;
5582 }
5583
5584 // sleep until next check time, or a signal arrives
5585 wakeuptime = dosleep(wakeuptime, write_states_always, (int)devices.size());
5586
5587 } while (!caughtsigEXIT);
5588
5589 if (caughtsigEXIT && status < 0) {
5590 // Loop exited on signal
5591 if (caughtsigEXIT == SIGTERM || (debugmode && caughtsigEXIT == SIGQUIT)) {
5592 PrintOut(LOG_INFO, "smartd received signal %d: %s\n",
5593 caughtsigEXIT, strsignal(caughtsigEXIT));
5594 }
5595 else {
5596 // Unexpected SIGINT or SIGQUIT
5597 PrintOut(LOG_CRIT, "smartd received unexpected signal %d: %s\n",
5598 caughtsigEXIT, strsignal(caughtsigEXIT));
5599 status = EXIT_SIGNAL;
5600 }
5601 }
5602
5603 // Status unset above implies success
5604 if (status < 0)
5605 status = 0;
5606
5607 if (!firstpass) {
5608 // Loop exited after daemon_init() and write_pid_file()
5609
5610 // Write state files only on normal exit
5611 if (!status && !state_path_prefix.empty())
5612 write_all_dev_states(configs, states);
5613
5614 // Delete PID file, if one was created
5615 if (!pid_file.empty() && unlink(pid_file.c_str()))
5616 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
5617 pid_file.c_str(), strerror(errno));
5618
5619 // and this should be the final output from smartd before it exits
5620 PrintOut((status ? LOG_CRIT : LOG_INFO), "smartd is exiting (exit status %d)\n",
5621 status);
5622 }
5623
5624 return status;
5625 }
5626
5627
5628 #ifndef _WIN32
5629 // Main program
5630 int main(int argc, char **argv)
5631 #else
5632 // Windows: internal main function started direct or by service control manager
5633 static int smartd_main(int argc, char **argv)
5634 #endif
5635 {
5636 int status;
5637 try {
5638 // Do the real work ...
5639 status = main_worker(argc, argv);
5640 }
5641 catch (const std::bad_alloc & /*ex*/) {
5642 // Memory allocation failed (also thrown by std::operator new)
5643 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5644 status = EXIT_NOMEM;
5645 }
5646 catch (const std::exception & ex) {
5647 // Other fatal errors
5648 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5649 status = EXIT_BADCODE;
5650 }
5651
5652 // Check for remaining device objects
5653 if (smart_device::get_num_objects() != 0) {
5654 PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5655 smart_device::get_num_objects());
5656 status = EXIT_BADCODE;
5657 }
5658
5659 if (status == EXIT_BADCODE)
5660 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5661
5662 notify_exit(status);
5663 #ifdef _WIN32
5664 daemon_winsvc_exitcode = status;
5665 #endif
5666 return status;
5667 }
5668
5669
5670 #ifdef _WIN32
5671 // Main function for Windows
5672 int main(int argc, char **argv){
5673 // Options for smartd windows service
5674 static const daemon_winsvc_options svc_opts = {
5675 "--service", // cmd_opt
5676 "smartd", "SmartD Service", // servicename, displayname
5677 // description
5678 "Controls and monitors storage devices using the Self-Monitoring, "
5679 "Analysis and Reporting Technology System (SMART) built into "
5680 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5681 "www.smartmontools.org"
5682 };
5683 // daemon_main() handles daemon and service specific commands
5684 // and starts smartd_main() direct, from a new process,
5685 // or via service control manager
5686 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5687 }
5688 #endif