2 * Home page of code is: http://www.smartmontools.org
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-18 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
9 * SPDX-License-Identifier: GPL-2.0-or-later
13 #define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
15 // unconditionally included files
18 #include <sys/types.h>
19 #include <sys/stat.h> // umask
34 #include <algorithm> // std::replace()
36 // conditionally included files
45 #include "os_win32/popen.h" // popen/pclose()
47 #pragma warning(disable:4761) // "conversion supplied"
48 typedef unsigned short mode_t
;
51 #include <io.h> // umask()
52 #include <process.h> // getpid()
56 #include <io.h> // setmode()
63 #ifdef HAVE_LIBSYSTEMD
64 #include <systemd/sd-daemon.h>
65 #endif // HAVE_LIBSYSTEMD
67 // locally included files
69 #include "dev_interface.h"
70 #include "knowndrives.h"
76 // fork()/signal()/initd simulation for native Windows
77 #include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
78 #define strsignal daemon_strsignal
79 #define sleep daemon_sleep
80 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
81 #define SIGQUIT SIGBREAK
82 #define SIGQUIT_KEYNAME "CONTROL-Break"
84 #define SIGQUIT_KEYNAME "CONTROL-\\"
87 const char * smartd_cpp_cvsid
= "$Id: smartd.cpp 4864 2018-12-20 13:02:39Z chrfranke $"
91 typedef void (*signal_handler_type
)(int);
94 static void set_signal_if_not_ignored(int sig
, signal_handler_type handler
)
98 daemon_signal(sig
, handler
);
100 #elif defined(HAVE_SIGACTION)
101 // SVr4, POSIX.1-2001, POSIX.1-2008
103 sa
.sa_handler
= SIG_DFL
;
104 sigaction(sig
, (struct sigaction
*)0, &sa
);
105 if (sa
.sa_handler
== SIG_IGN
)
108 memset(&sa
, 0, sizeof(sa
));
109 sa
.sa_handler
= handler
;
110 sa
.sa_flags
= SA_RESTART
; // BSD signal() semantics
111 sigaction(sig
, &sa
, (struct sigaction
*)0);
113 #elif defined(HAVE_SIGSET)
114 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
115 if (sigset(sig
, handler
) == SIG_IGN
)
116 sigset(sig
, SIG_IGN
);
119 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
120 // Important: BSD semantics is required. Traditional signal()
121 // resets the handler to SIG_DFL after the first signal is caught.
122 if (signal(sig
, handler
) == SIG_IGN
)
123 signal(sig
, SIG_IGN
);
127 using namespace smartmontools
;
130 #define EXIT_BADCMD 1 // command line did not parse
131 #define EXIT_BADCONF 2 // syntax error in config file
132 #define EXIT_STARTUP 3 // problem forking daemon
133 #define EXIT_PID 4 // problem creating pid file
134 #define EXIT_NOCONF 5 // config file does not exist
135 #define EXIT_READCONF 6 // config file exists but cannot be read
137 #define EXIT_NOMEM 8 // out of memory
138 #define EXIT_BADCODE 10 // internal error - should NEVER happen
140 #define EXIT_BADDEV 16 // we can't monitor this device
141 #define EXIT_NODEV 17 // no devices to monitor
143 #define EXIT_SIGNAL 254 // abort on signal
146 // command-line: 1=debug mode, 2=print presets
147 static unsigned char debugmode
= 0;
149 // command-line: how long to sleep between checks
150 #define CHECKTIME 1800
151 static int checktime
=CHECKTIME
;
153 // command-line: name of PID file (empty for no pid file)
154 static std::string pid_file
;
156 // command-line: path prefix of persistent state file, empty if no persistence.
157 static std::string state_path_prefix
158 #ifdef SMARTMONTOOLS_SAVESTATES
159 = SMARTMONTOOLS_SAVESTATES
163 // command-line: path prefix of attribute log file, empty if no logs.
164 static std::string attrlog_path_prefix
165 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
166 = SMARTMONTOOLS_ATTRIBUTELOG
170 // configuration file name
171 static const char * configfile
;
172 // configuration file "name" if read from stdin
173 static const char * const configfile_stdin
= "<stdin>";
174 // path of alternate configuration file
175 static std::string configfile_alt
;
177 // warning script file
178 static std::string warning_script
;
180 // command-line: when should we exit?
182 QUIT_NODEV
, QUIT_NODEVSTARTUP
, QUIT_NEVER
, QUIT_ONECHECK
,
183 QUIT_SHOWTESTS
, QUIT_ERRORS
185 static quit_t quit
= QUIT_NODEV
;
187 // command-line; this is the default syslog(3) log facility to use.
188 static int facility
=LOG_DAEMON
;
191 // command-line: fork into background?
192 static bool do_fork
=true;
195 // TODO: This smartctl only variable is also used in some os_*.cpp
196 unsigned char failuretest_permissive
= 0;
198 // set to one if we catch a USR1 (check devices now)
199 static volatile int caughtsigUSR1
=0;
202 // set to one if we catch a USR2 (toggle debug mode)
203 static volatile int caughtsigUSR2
=0;
206 // set to one if we catch a HUP (reload config file). In debug mode,
207 // set to two, if we catch INT (also reload config file).
208 static volatile int caughtsigHUP
=0;
210 // set to signal value if we catch INT, QUIT, or TERM
211 static volatile int caughtsigEXIT
=0;
213 // This function prints either to stdout or to the syslog as needed.
214 static void PrintOut(int priority
, const char *fmt
, ...)
215 __attribute_format_printf(2, 3);
217 #ifdef HAVE_LIBSYSTEMD
218 // systemd notify support
220 static bool notify_enabled
= false;
222 static inline void notify_init()
224 if (!getenv("NOTIFY_SOCKET"))
226 notify_enabled
= true;
229 static inline bool notify_post_init()
234 PrintOut(LOG_CRIT
, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
240 static void notify_msg(const char * msg
, bool ready
= false)
245 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready
? "READY=1\\n" : ""), msg
);
248 sd_notifyf(0, "%sSTATUS=%s", (ready
? "READY=1\n" : ""), msg
);
251 static void notify_check(int numdev
)
256 snprintf(msg
, sizeof(msg
), "Checking %d device%s ...",
257 numdev
, (numdev
!= 1 ? "s" : ""));
261 static void notify_wait(time_t wakeuptime
, int numdev
)
265 char ts
[16], msg
[64];
266 strftime(ts
, sizeof(ts
), "%H:%M:%S", localtime(&wakeuptime
));
267 snprintf(msg
, sizeof(msg
), "Next check of %d device%s will start at %s",
268 numdev
, (numdev
!= 1 ? "s" : ""), ts
);
269 static bool ready
= true; // first call notifies READY=1
270 notify_msg(msg
, ready
);
274 static void notify_exit(int status
)
280 case 0: msg
= "Exiting ..."; break;
281 case EXIT_BADCMD
: msg
= "Error in command line (see SYSLOG)"; break;
282 case EXIT_BADCONF
: case EXIT_NOCONF
:
283 case EXIT_READCONF
: msg
= "Error in config file (see SYSLOG)"; break;
284 case EXIT_BADDEV
: msg
= "Unable to register a device (see SYSLOG)"; break;
285 case EXIT_NODEV
: msg
= "No devices to monitor"; break;
286 default: msg
= "Error (see SYSLOG)"; break;
291 #else // HAVE_LIBSYSTEMD
292 // No systemd notify support
294 static inline bool notify_post_init()
297 if (getenv("NOTIFY_SOCKET")) {
298 PrintOut(LOG_CRIT
, "This version of smartd was build without 'Type=notify' support.\n");
305 static inline void notify_init() { }
306 static inline void notify_msg(const char *) { }
307 static inline void notify_check(int) { }
308 static inline void notify_wait(time_t, int) { }
309 static inline void notify_exit(int) { }
311 #endif // HAVE_LIBSYSTEMD
313 // Attribute monitoring flags.
314 // See monitor_attr_flags below.
316 MONITOR_IGN_FAILUSE
= 0x01,
317 MONITOR_IGNORE
= 0x02,
318 MONITOR_RAW_PRINT
= 0x04,
320 MONITOR_AS_CRIT
= 0x10,
321 MONITOR_RAW_AS_CRIT
= 0x20,
324 // Array of flags for each attribute.
325 class attribute_flags
329 { memset(m_flags
, 0, sizeof(m_flags
)); }
331 bool is_set(int id
, unsigned char flag
) const
332 { return (0 < id
&& id
< (int)sizeof(m_flags
) && (m_flags
[id
] & flag
)); }
334 void set(int id
, unsigned char flags
)
336 if (0 < id
&& id
< (int)sizeof(m_flags
))
337 m_flags
[id
] |= flags
;
341 unsigned char m_flags
[256];
345 /// Configuration data for a device. Read from smartd.conf.
346 /// Supports copy & assignment and is compatible with STL containers.
349 int lineno
; // Line number of entry in file
350 std::string name
; // Device name (with optional extra info)
351 std::string dev_name
; // Device name (plain, for SMARTD_DEVICE variable)
352 std::string dev_type
; // Device type argument from -d directive, empty if none
353 std::string dev_idinfo
; // Device identify info for warning emails
354 std::string state_file
; // Path of the persistent state file, empty if none
355 std::string attrlog_file
; // Path of the persistent attrlog file, empty if none
356 bool ignore
; // Ignore this entry
357 bool id_is_unique
; // True if dev_idinfo is unique (includes S/N or WWN)
358 bool smartcheck
; // Check SMART status
359 bool usagefailed
; // Check for failed Usage Attributes
360 bool prefail
; // Track changes in Prefail Attributes
361 bool usage
; // Track changes in Usage Attributes
362 bool selftest
; // Monitor number of selftest errors
363 bool errorlog
; // Monitor number of ATA errors
364 bool xerrorlog
; // Monitor number of ATA errors (Extended Comprehensive error log)
365 bool offlinests
; // Monitor changes in offline data collection status
366 bool offlinests_ns
; // Disable auto standby if in progress
367 bool selfteststs
; // Monitor changes in self-test execution status
368 bool selfteststs_ns
; // Disable auto standby if in progress
369 bool permissive
; // Ignore failed SMART commands
370 char autosave
; // 1=disable, 2=enable Autosave Attributes
371 char autoofflinetest
; // 1=disable, 2=enable Auto Offline Test
372 firmwarebug_defs firmwarebugs
; // -F directives from drivedb or smartd.conf
373 bool ignorepresets
; // Ignore database of -v options
374 bool showpresets
; // Show database entry for this device
375 bool removable
; // Device may disappear (not be present)
376 char powermode
; // skip check, if disk in idle or standby mode
377 bool powerquiet
; // skip powermode 'skipping checks' message
378 int powerskipmax
; // how many times can be check skipped
379 unsigned char tempdiff
; // Track Temperature changes >= this limit
380 unsigned char tempinfo
, tempcrit
; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
381 regular_expression test_regex
; // Regex for scheduled testing
383 // Configuration of email warning messages
384 std::string emailcmdline
; // script to execute, empty if no messages
385 std::string emailaddress
; // email address, or empty
386 unsigned char emailfreq
; // Emails once (1) daily (2) diminishing (3)
387 bool emailtest
; // Send test email?
390 int dev_rpm
; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
391 int set_aam
; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
392 int set_apm
; // disable(-1), enable(2..255->1..254) Advanced Power Management
393 int set_lookahead
; // disable(-1), enable(1) read look-ahead
394 int set_standby
; // set(1..255->0..254) standby timer
395 bool set_security_freeze
; // Freeze ATA security
396 int set_wcache
; // disable(-1), enable(1) write cache
397 int set_dsn
; // disable(0x2), enable(0x1) DSN
399 bool sct_erc_set
; // set SCT ERC to:
400 unsigned short sct_erc_readtime
; // ERC read time (deciseconds)
401 unsigned short sct_erc_writetime
; // ERC write time (deciseconds)
403 unsigned char curr_pending_id
; // ID of current pending sector count, 0 if none
404 unsigned char offl_pending_id
; // ID of offline uncorrectable sector count, 0 if none
405 bool curr_pending_incr
, offl_pending_incr
; // True if current/offline pending values increase
406 bool curr_pending_set
, offl_pending_set
; // True if '-C', '-U' set in smartd.conf
408 attribute_flags monitor_attr_flags
; // MONITOR_* flags for each attribute
410 ata_vendor_attr_defs attribute_defs
; // -v options
415 dev_config::dev_config()
426 offlinests(false), offlinests_ns(false),
427 selfteststs(false), selfteststs_ns(false),
431 ignorepresets(false),
438 tempinfo(0), tempcrit(0),
442 set_aam(0), set_apm(0),
445 set_security_freeze(false),
446 set_wcache(0), set_dsn(0),
448 sct_erc_readtime(0), sct_erc_writetime(0),
449 curr_pending_id(0), offl_pending_id(0),
450 curr_pending_incr(false), offl_pending_incr(false),
451 curr_pending_set(false), offl_pending_set(false)
456 // Number of allowed mail message types
457 static const int SMARTD_NMAIL
= 13;
458 // Type for '-M test' mails (state not persistent)
459 static const int MAILTYPE_TEST
= 0;
460 // TODO: Add const or enum for all mail types.
463 int logged
;// number of times an email has been sent
464 time_t firstsent
;// time first email was sent, as defined by time(2)
465 time_t lastsent
; // time last email was sent, as defined by time(2)
468 : logged(0), firstsent(0), lastsent(0) { }
471 /// Persistent state data for a device.
472 struct persistent_dev_state
474 unsigned char tempmin
, tempmax
; // Min/Max Temperatures
476 unsigned char selflogcount
; // total number of self-test errors
477 unsigned short selfloghour
; // lifetime hours of last self-test error
479 time_t scheduled_test_next_check
; // Time of next check for scheduled self-tests
481 uint64_t selective_test_last_start
; // Start LBA of last scheduled selective self-test
482 uint64_t selective_test_last_end
; // End LBA of last scheduled selective self-test
484 mailinfo maillog
[SMARTD_NMAIL
]; // log info on when mail sent
487 int ataerrorcount
; // Total number of ATA errors
489 // Persistent part of ata_smart_values:
490 struct ata_attribute
{
493 unsigned char worst
; // Byte needed for 'raw64' attribute only.
497 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
499 ata_attribute ata_attributes
[NUMBER_ATA_SMART_ATTRIBUTES
];
503 struct scsi_error_counter_t
{
504 struct scsiErrorCounter errCounter
;
506 scsi_error_counter_t() : found(0)
507 { memset(&errCounter
, 0, sizeof(errCounter
)); }
509 scsi_error_counter_t scsi_error_counters
[3];
511 struct scsi_nonmedium_error_t
{
512 struct scsiNonMediumError nme
;
514 scsi_nonmedium_error_t() : found(0)
515 { memset(&nme
, 0, sizeof(nme
)); }
517 scsi_nonmedium_error_t scsi_nonmedium_error
;
520 uint64_t nvme_err_log_entries
;
522 persistent_dev_state();
525 persistent_dev_state::persistent_dev_state()
526 : tempmin(0), tempmax(0),
529 scheduled_test_next_check(0),
530 selective_test_last_start(0),
531 selective_test_last_end(0),
533 nvme_err_log_entries(0)
537 /// Non-persistent state data for a device.
538 struct temp_dev_state
540 bool must_write
; // true if persistent part should be written
542 bool not_cap_offline
; // true == not capable of offline testing
543 bool not_cap_conveyance
;
546 bool not_cap_selective
;
548 unsigned char temperature
; // last recorded Temperature (in Celsius)
549 time_t tempmin_delay
; // time where Min Temperature tracking will start
551 bool removed
; // true if open() failed for removable device
553 bool powermodefail
; // true if power mode check failed
554 int powerskipcnt
; // Number of checks skipped due to idle or standby mode
555 int lastpowermodeskipped
; // the last power mode that was skipped
558 unsigned char SmartPageSupported
; // has log sense IE page (0x2f)
559 unsigned char TempPageSupported
; // has log sense temperature page (0xd)
560 unsigned char ReadECounterPageSupported
;
561 unsigned char WriteECounterPageSupported
;
562 unsigned char VerifyECounterPageSupported
;
563 unsigned char NonMediumErrorPageSupported
;
564 unsigned char SuppressReport
; // minimize nuisance reports
565 unsigned char modese_len
; // mode sense/select cmd len: 0 (don't
568 uint64_t num_sectors
; // Number of sectors
569 ata_smart_values smartval
; // SMART data
570 ata_smart_thresholds_pvt smartthres
; // SMART thresholds
571 bool offline_started
; // true if offline data collection was started
572 bool selftest_started
; // true if self-test was started
577 temp_dev_state::temp_dev_state()
579 not_cap_offline(false),
580 not_cap_conveyance(false),
581 not_cap_short(false),
583 not_cap_selective(false),
587 powermodefail(false),
589 lastpowermodeskipped(0),
590 SmartPageSupported(false),
591 TempPageSupported(false),
592 ReadECounterPageSupported(false),
593 WriteECounterPageSupported(false),
594 VerifyECounterPageSupported(false),
595 NonMediumErrorPageSupported(false),
596 SuppressReport(false),
599 offline_started(false),
600 selftest_started(false)
602 memset(&smartval
, 0, sizeof(smartval
));
603 memset(&smartthres
, 0, sizeof(smartthres
));
606 /// Runtime state data for a device.
608 : public persistent_dev_state
,
609 public temp_dev_state
611 void update_persistent_state();
612 void update_temp_state();
615 /// Container for configuration info for each device.
616 typedef std::vector
<dev_config
> dev_config_vector
;
618 /// Container for state info for each device.
619 typedef std::vector
<dev_state
> dev_state_vector
;
621 // Copy ATA attributes to persistent state.
622 void dev_state::update_persistent_state()
624 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
625 const ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
626 ata_attribute
& pa
= ata_attributes
[i
];
629 pa
.val
= pa
.worst
= 0; pa
.raw
= 0;
637 | ((uint64_t)ta
.raw
[3] << 24)
638 | ((uint64_t)ta
.raw
[4] << 32)
639 | ((uint64_t)ta
.raw
[5] << 40);
640 pa
.resvd
= ta
.reserv
;
644 // Copy ATA from persistent to temp state.
645 void dev_state::update_temp_state()
647 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
648 const ata_attribute
& pa
= ata_attributes
[i
];
649 ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
652 ta
.current
= ta
.worst
= 0;
653 memset(ta
.raw
, 0, sizeof(ta
.raw
));
658 ta
.raw
[0] = (unsigned char) pa
.raw
;
659 ta
.raw
[1] = (unsigned char)(pa
.raw
>> 8);
660 ta
.raw
[2] = (unsigned char)(pa
.raw
>> 16);
661 ta
.raw
[3] = (unsigned char)(pa
.raw
>> 24);
662 ta
.raw
[4] = (unsigned char)(pa
.raw
>> 32);
663 ta
.raw
[5] = (unsigned char)(pa
.raw
>> 40);
664 ta
.reserv
= pa
.resvd
;
668 // Parse a line from a state file.
669 static bool parse_dev_state_line(const char * line
, persistent_dev_state
& state
)
671 static const regular_expression
regex(
673 "((temperature-min)" // (1 (2)
674 "|(temperature-max)" // (3)
675 "|(self-test-errors)" // (4)
676 "|(self-test-last-err-hour)" // (5)
677 "|(scheduled-test-next-check)" // (6)
678 "|(selective-test-last-start)" // (7)
679 "|(selective-test-last-end)" // (8)
680 "|(ata-error-count)" // (9)
681 "|(mail\\.([0-9]+)\\." // (10 (11)
682 "((count)" // (12 (13)
683 "|(first-sent-time)" // (14)
684 "|(last-sent-time)" // (15)
687 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
695 "|(nvme-err-log-entries)" // (24)
697 " *= *([0-9]+)[ \n]*$" // (25)
700 const int nmatch
= 1+25;
701 regular_expression::match_range match
[nmatch
];
702 if (!regex
.execute(line
, nmatch
, match
))
704 if (match
[nmatch
-1].rm_so
< 0)
707 uint64_t val
= strtoull(line
+ match
[nmatch
-1].rm_so
, (char **)0, 10);
710 if (match
[++m
].rm_so
>= 0)
711 state
.tempmin
= (unsigned char)val
;
712 else if (match
[++m
].rm_so
>= 0)
713 state
.tempmax
= (unsigned char)val
;
714 else if (match
[++m
].rm_so
>= 0)
715 state
.selflogcount
= (unsigned char)val
;
716 else if (match
[++m
].rm_so
>= 0)
717 state
.selfloghour
= (unsigned short)val
;
718 else if (match
[++m
].rm_so
>= 0)
719 state
.scheduled_test_next_check
= (time_t)val
;
720 else if (match
[++m
].rm_so
>= 0)
721 state
.selective_test_last_start
= val
;
722 else if (match
[++m
].rm_so
>= 0)
723 state
.selective_test_last_end
= val
;
724 else if (match
[++m
].rm_so
>= 0)
725 state
.ataerrorcount
= (int)val
;
726 else if (match
[m
+=2].rm_so
>= 0) {
727 int i
= atoi(line
+match
[m
].rm_so
);
728 if (!(0 <= i
&& i
< SMARTD_NMAIL
))
730 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
732 if (match
[m
+=2].rm_so
>= 0)
733 state
.maillog
[i
].logged
= (int)val
;
734 else if (match
[++m
].rm_so
>= 0)
735 state
.maillog
[i
].firstsent
= (time_t)val
;
736 else if (match
[++m
].rm_so
>= 0)
737 state
.maillog
[i
].lastsent
= (time_t)val
;
741 else if (match
[m
+=5+1].rm_so
>= 0) {
742 int i
= atoi(line
+match
[m
].rm_so
);
743 if (!(0 <= i
&& i
< NUMBER_ATA_SMART_ATTRIBUTES
))
745 if (match
[m
+=2].rm_so
>= 0)
746 state
.ata_attributes
[i
].id
= (unsigned char)val
;
747 else if (match
[++m
].rm_so
>= 0)
748 state
.ata_attributes
[i
].val
= (unsigned char)val
;
749 else if (match
[++m
].rm_so
>= 0)
750 state
.ata_attributes
[i
].worst
= (unsigned char)val
;
751 else if (match
[++m
].rm_so
>= 0)
752 state
.ata_attributes
[i
].raw
= val
;
753 else if (match
[++m
].rm_so
>= 0)
754 state
.ata_attributes
[i
].resvd
= (unsigned char)val
;
758 else if (match
[m
+7].rm_so
>= 0)
759 state
.nvme_err_log_entries
= val
;
765 // Read a state file.
766 static bool read_dev_state(const char * path
, persistent_dev_state
& state
)
768 stdio_file
f(path
, "r");
771 pout("Cannot read state file \"%s\"\n", path
);
775 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
778 persistent_dev_state new_state
;
779 int good
= 0, bad
= 0;
781 while (fgets(line
, sizeof(line
), f
)) {
782 const char * s
= line
+ strspn(line
, " \t");
783 if (!*s
|| *s
== '#')
785 if (!parse_dev_state_line(line
, new_state
))
793 pout("%s: format error\n", path
);
796 pout("%s: %d invalid line(s) ignored\n", path
, bad
);
799 // This sets the values missing in the file to 0.
804 static void write_dev_state_line(FILE * f
, const char * name
, uint64_t val
)
807 fprintf(f
, "%s = %" PRIu64
"\n", name
, val
);
810 static void write_dev_state_line(FILE * f
, const char * name1
, int id
, const char * name2
, uint64_t val
)
813 fprintf(f
, "%s.%d.%s = %" PRIu64
"\n", name1
, id
, name2
, val
);
816 // Write a state file
817 static bool write_dev_state(const char * path
, const persistent_dev_state
& state
)
819 // Rename old "file" to "file~"
820 std::string pathbak
= path
; pathbak
+= '~';
821 unlink(pathbak
.c_str());
822 rename(path
, pathbak
.c_str());
824 stdio_file
f(path
, "w");
826 pout("Cannot create state file \"%s\"\n", path
);
830 fprintf(f
, "# smartd state file\n");
831 write_dev_state_line(f
, "temperature-min", state
.tempmin
);
832 write_dev_state_line(f
, "temperature-max", state
.tempmax
);
833 write_dev_state_line(f
, "self-test-errors", state
.selflogcount
);
834 write_dev_state_line(f
, "self-test-last-err-hour", state
.selfloghour
);
835 write_dev_state_line(f
, "scheduled-test-next-check", state
.scheduled_test_next_check
);
836 write_dev_state_line(f
, "selective-test-last-start", state
.selective_test_last_start
);
837 write_dev_state_line(f
, "selective-test-last-end", state
.selective_test_last_end
);
840 for (i
= 0; i
< SMARTD_NMAIL
; i
++) {
841 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
843 const mailinfo
& mi
= state
.maillog
[i
];
846 write_dev_state_line(f
, "mail", i
, "count", mi
.logged
);
847 write_dev_state_line(f
, "mail", i
, "first-sent-time", mi
.firstsent
);
848 write_dev_state_line(f
, "mail", i
, "last-sent-time", mi
.lastsent
);
852 write_dev_state_line(f
, "ata-error-count", state
.ataerrorcount
);
854 for (i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
855 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
858 write_dev_state_line(f
, "ata-smart-attribute", i
, "id", pa
.id
);
859 write_dev_state_line(f
, "ata-smart-attribute", i
, "val", pa
.val
);
860 write_dev_state_line(f
, "ata-smart-attribute", i
, "worst", pa
.worst
);
861 write_dev_state_line(f
, "ata-smart-attribute", i
, "raw", pa
.raw
);
862 write_dev_state_line(f
, "ata-smart-attribute", i
, "resvd", pa
.resvd
);
866 write_dev_state_line(f
, "nvme-err-log-entries", state
.nvme_err_log_entries
);
871 // Write to the attrlog file
872 static bool write_dev_attrlog(const char * path
, const dev_state
& state
)
874 stdio_file
f(path
, "a");
876 pout("Cannot create attribute log file \"%s\"\n", path
);
881 time_t now
= time(0);
882 struct tm
* tms
= gmtime(&now
);
883 fprintf(f
, "%d-%02d-%02d %02d:%02d:%02d;",
884 1900+tms
->tm_year
, 1+tms
->tm_mon
, tms
->tm_mday
,
885 tms
->tm_hour
, tms
->tm_min
, tms
->tm_sec
);
887 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
888 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
891 fprintf(f
, "\t%d;%d;%" PRIu64
";", pa
.id
, pa
.val
, pa
.raw
);
894 const struct scsiErrorCounter
* ecp
;
895 const char * pageNames
[3] = {"read", "write", "verify"};
896 for (int k
= 0; k
< 3; ++k
) {
897 if ( !state
.scsi_error_counters
[k
].found
) continue;
898 ecp
= &state
.scsi_error_counters
[k
].errCounter
;
899 fprintf(f
, "\t%s-corr-by-ecc-fast;%" PRIu64
";"
900 "\t%s-corr-by-ecc-delayed;%" PRIu64
";"
901 "\t%s-corr-by-retry;%" PRIu64
";"
902 "\t%s-total-err-corrected;%" PRIu64
";"
903 "\t%s-corr-algorithm-invocations;%" PRIu64
";"
904 "\t%s-gb-processed;%.3f;"
905 "\t%s-total-unc-errors;%" PRIu64
";",
906 pageNames
[k
], ecp
->counter
[0],
907 pageNames
[k
], ecp
->counter
[1],
908 pageNames
[k
], ecp
->counter
[2],
909 pageNames
[k
], ecp
->counter
[3],
910 pageNames
[k
], ecp
->counter
[4],
911 pageNames
[k
], (ecp
->counter
[5] / 1000000000.0),
912 pageNames
[k
], ecp
->counter
[6]);
914 if(state
.scsi_nonmedium_error
.found
&& state
.scsi_nonmedium_error
.nme
.gotPC0
) {
915 fprintf(f
, "\tnon-medium-errors;%" PRIu64
";", state
.scsi_nonmedium_error
.nme
.counterPC0
);
917 // write SCSI current temperature if it is monitored
918 if (state
.temperature
)
919 fprintf(f
, "\ttemperature;%d;", state
.temperature
);
925 // Write all state files. If write_always is false, don't write
926 // unless must_write is set.
927 static void write_all_dev_states(const dev_config_vector
& configs
,
928 dev_state_vector
& states
,
929 bool write_always
= true)
931 for (unsigned i
= 0; i
< states
.size(); i
++) {
932 const dev_config
& cfg
= configs
.at(i
);
933 if (cfg
.state_file
.empty())
935 dev_state
& state
= states
[i
];
936 if (!write_always
&& !state
.must_write
)
938 if (!write_dev_state(cfg
.state_file
.c_str(), state
))
940 state
.must_write
= false;
941 if (write_always
|| debugmode
)
942 PrintOut(LOG_INFO
, "Device: %s, state written to %s\n",
943 cfg
.name
.c_str(), cfg
.state_file
.c_str());
947 // Write to all attrlog files
948 static void write_all_dev_attrlogs(const dev_config_vector
& configs
,
949 dev_state_vector
& states
)
951 for (unsigned i
= 0; i
< states
.size(); i
++) {
952 const dev_config
& cfg
= configs
.at(i
);
953 if (cfg
.attrlog_file
.empty())
955 dev_state
& state
= states
[i
];
956 write_dev_attrlog(cfg
.attrlog_file
.c_str(), state
);
960 extern "C" { // signal handlers require C-linkage
962 // Note if we catch a SIGUSR1
963 static void USR1handler(int sig
)
971 // Note if we catch a SIGUSR2
972 static void USR2handler(int sig
)
980 // Note if we catch a HUP (or INT in debug mode)
981 static void HUPhandler(int sig
)
990 // signal handler for TERM, QUIT, and INT (if not in debug mode)
991 static void sighandler(int sig
)
1000 #ifdef HAVE_LIBCAP_NG
1001 // capabilities(7) support
1003 static bool capabilities_enabled
= false;
1005 static void capabilities_drop_now()
1007 if (!capabilities_enabled
)
1009 capng_clear(CAPNG_SELECT_BOTH
);
1010 capng_updatev(CAPNG_ADD
, (capng_type_t
)(CAPNG_EFFECTIVE
|CAPNG_PERMITTED
),
1011 CAP_SYS_ADMIN
, CAP_MKNOD
, CAP_SYS_RAWIO
, -1);
1012 capng_apply(CAPNG_SELECT_BOTH
);
1015 static void capabilities_check_config(dev_config_vector
& configs
)
1017 if (!capabilities_enabled
)
1019 for (unsigned i
= 0; i
< configs
.size(); i
++) {
1020 dev_config
& cfg
= configs
[i
];
1021 if (!cfg
.emailaddress
.empty() || !cfg
.emailcmdline
.empty()) {
1022 PrintOut(LOG_INFO
, "Device: %s, --capabilites is set, mail will be suppressed.\n",
1024 cfg
.emailaddress
.clear(); cfg
.emailcmdline
.clear();
1029 #else // HAVE_LIBCAP_NG
1030 // No capabilities(7) support
1032 static inline void capabilities_drop_now() { }
1033 static inline void capabilities_check_config(dev_config_vector
&) { }
1035 #endif // HAVE_LIBCAP_NG
1037 // a replacement for setenv() which is not available on all platforms.
1038 // Note that the string passed to putenv must not be freed or made
1039 // invalid, since a pointer to it is kept by putenv(). This means that
1040 // it must either be a static buffer or allocated off the heap. The
1041 // string can be freed if the environment variable is redefined via
1042 // another call to putenv(). There is no portable way to unset a variable
1043 // with putenv(). So we manage the buffer in a static object.
1044 // Using setenv() if available is not considered because some
1045 // implementations may produce memory leaks.
1051 : m_buf((char *)0) { }
1053 void set(const char * name
, const char * value
);
1058 env_buffer(const env_buffer
&);
1059 void operator=(const env_buffer
&);
1062 void env_buffer::set(const char * name
, const char * value
)
1064 int size
= strlen(name
) + 1 + strlen(value
) + 1;
1065 char * newbuf
= new char[size
];
1066 snprintf(newbuf
, size
, "%s=%s", name
, value
);
1069 throw std::runtime_error("putenv() failed");
1071 // This assumes that the same NAME is passed on each call
1076 #define EBUFLEN 1024
1078 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1079 __attribute_format_printf(4, 5);
1081 // If either address or executable path is non-null then send and log
1082 // a warning email, or execute executable
1083 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1085 static const char * const whichfail
[] = {
1091 "FailedHealthCheck", // 5
1092 "FailedReadSmartData", // 6
1093 "FailedReadSmartErrorLog", // 7
1094 "FailedReadSmartSelfTestLog", // 8
1095 "FailedOpenDevice", // 9
1096 "CurrentPendingSector", // 10
1097 "OfflineUncorrectableSector", // 11
1101 // See if user wants us to send mail
1102 if (cfg
.emailaddress
.empty() && cfg
.emailcmdline
.empty())
1105 std::string address
= cfg
.emailaddress
;
1106 const char * executable
= cfg
.emailcmdline
.c_str();
1108 // which type of mail are we sending?
1109 mailinfo
* mail
=(state
.maillog
)+which
;
1111 // checks for sanity
1112 if (cfg
.emailfreq
<1 || cfg
.emailfreq
>3) {
1113 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg
.emailfreq
);
1116 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
1117 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
1118 which
, (int)sizeof(whichfail
));
1122 // Return if a single warning mail has been sent.
1123 if ((cfg
.emailfreq
==1) && mail
->logged
)
1126 // Return if this is an email test and one has already been sent.
1127 if (which
== 0 && mail
->logged
)
1130 // To decide if to send mail, we need to know what time it is.
1131 time_t epoch
= time(0);
1133 // Return if less than one day has gone by
1134 const int day
= 24*3600;
1135 if (cfg
.emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
1138 // Return if less than 2^(logged-1) days have gone by
1139 if (cfg
.emailfreq
==3 && mail
->logged
) {
1140 int days
= 0x01 << (mail
->logged
- 1);
1142 if (epoch
<(mail
->lastsent
+days
))
1146 // record the time of this mail message, and the first mail message
1148 mail
->firstsent
=epoch
;
1149 mail
->lastsent
=epoch
;
1151 // print warning string into message
1155 vsnprintf(message
, sizeof(message
), fmt
, ap
);
1158 // replace commas by spaces to separate recipients
1159 std::replace(address
.begin(), address
.end(), ',', ' ');
1161 // Export information in environment variables that will be useful
1163 static env_buffer env
[12];
1164 env
[0].set("SMARTD_MAILER", executable
);
1165 env
[1].set("SMARTD_MESSAGE", message
);
1166 char dates
[DATEANDEPOCHLEN
];
1167 snprintf(dates
, sizeof(dates
), "%d", mail
->logged
);
1168 env
[2].set("SMARTD_PREVCNT", dates
);
1169 dateandtimezoneepoch(dates
, mail
->firstsent
);
1170 env
[3].set("SMARTD_TFIRST", dates
);
1171 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
1172 env
[4].set("SMARTD_TFIRSTEPOCH", dates
);
1173 env
[5].set("SMARTD_FAILTYPE", whichfail
[which
]);
1174 env
[6].set("SMARTD_ADDRESS", address
.c_str());
1175 env
[7].set("SMARTD_DEVICESTRING", cfg
.name
.c_str());
1177 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1178 env
[8].set("SMARTD_DEVICETYPE",
1179 (!cfg
.dev_type
.empty() ? cfg
.dev_type
.c_str() : "auto"));
1180 env
[9].set("SMARTD_DEVICE", cfg
.dev_name
.c_str());
1182 env
[10].set("SMARTD_DEVICEINFO", cfg
.dev_idinfo
.c_str());
1184 if (which
) switch (cfg
.emailfreq
) {
1185 case 2: dates
[0] = '1'; dates
[1] = 0; break;
1186 case 3: snprintf(dates
, sizeof(dates
), "%d", (0x01)<<mail
->logged
);
1188 env
[11].set("SMARTD_NEXTDAYS", dates
);
1190 // now construct a command to send this as EMAIL
1192 executable
= "<mail>";
1193 const char * newadd
= (!address
.empty()? address
.c_str() : "<nomailer>");
1194 const char * newwarn
= (which
? "Warning via" : "Test of");
1198 // Path may contain spaces
1199 snprintf(command
, sizeof(command
), "\"%s\" 2>&1", warning_script
.c_str());
1201 snprintf(command
, sizeof(command
), "%s 2>&1", warning_script
.c_str());
1204 // tell SYSLOG what we are about to do...
1205 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
1206 which
?"Sending warning via":"Executing test of", executable
, newadd
);
1208 // issue the command to send mail or to run the user's executable
1211 if (!(pfp
=popen(command
, "r")))
1212 // failed to popen() mail process
1213 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1214 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
1218 char buffer
[EBUFLEN
];
1220 // if unexpected output on stdout/stderr, null terminate, print, and flush
1221 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
1223 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
1224 buffer
[newlen
]='\0';
1225 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1226 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
1228 // flush pipe if needed
1229 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
1232 // tell user that pipe was flushed, or that something is really wrong
1233 if (count
&& count
<EBUFLEN
)
1234 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1235 newwarn
, executable
, newadd
);
1237 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1238 newwarn
, executable
, newadd
);
1241 // if something went wrong with mail process, print warning
1243 if (-1==(status
=pclose(pfp
)))
1244 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
1245 errno
?strerror(errno
):"");
1247 // mail process apparently succeeded. Check and report exit status
1248 if (WIFEXITED(status
)) {
1249 // exited 'normally' (but perhaps with nonzero status)
1250 int status8
= WEXITSTATUS(status
);
1252 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1253 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
1255 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1256 newwarn
, executable
, newadd
, status
, status8
);
1258 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
1261 if (WIFSIGNALED(status
))
1262 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1263 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
1265 // this branch is probably not possible. If subprocess is
1266 // stopped then pclose() should not return.
1267 if (WIFSTOPPED(status
))
1268 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1269 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
1274 // increment mail sent counter
1278 static void reset_warning_mail(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1279 __attribute_format_printf(4, 5);
1281 static void reset_warning_mail(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1283 if (!(0 <= which
&& which
< SMARTD_NMAIL
))
1286 // Return if no mail sent yet
1287 mailinfo
& mi
= state
.maillog
[which
];
1291 // Format & print message
1295 vsnprintf(msg
, sizeof(msg
), fmt
, ap
);
1298 PrintOut(LOG_INFO
, "Device: %s, %s, warning condition reset after %d email%s\n", cfg
.name
.c_str(),
1299 msg
, mi
.logged
, (mi
.logged
==1 ? "" : "s"));
1301 // Clear mail counter and timestamps
1303 state
.must_write
= true;
1308 // Output multiple lines via separate syslog(3) calls.
1309 __attribute_format_printf(2, 0)
1310 static void vsyslog_lines(int priority
, const char * fmt
, va_list ap
)
1312 char buf
[512+EBUFLEN
]; // enough space for exec cmd output in MailWarning()
1313 vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
1315 for (char * p
= buf
, * q
; p
&& *p
; p
= q
) {
1316 if ((q
= strchr(p
, '\n')))
1319 syslog(priority
, "%s\n", p
);
1324 // os_win32/syslog_win32.cpp supports multiple lines.
1325 #define vsyslog_lines vsyslog
1328 // Printing function for watching ataprint commands, or losing them
1329 // [From GLIBC Manual: Since the prototype doesn't specify types for
1330 // optional arguments, in a call to a variadic function the default
1331 // argument promotions are performed on the optional argument
1332 // values. This means the objects of type char or short int (whether
1333 // signed or not) are promoted to either int or unsigned int, as
1335 void pout(const char *fmt
, ...){
1338 // get the correct time in syslog()
1339 FixGlibcTimeZoneBug();
1340 // initialize variable argument list
1342 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1343 if (debugmode
&& debugmode
!= 2) {
1346 if (facility
== LOG_LOCAL1
) // logging to stdout
1349 vfprintf(f
, fmt
, ap
);
1352 // in debugmode==2 mode we print output from knowndrives.o functions
1353 else if (debugmode
==2 || ata_debugmode
|| scsi_debugmode
) {
1354 openlog("smartd", LOG_PID
, facility
);
1355 vsyslog_lines(LOG_INFO
, fmt
, ap
);
1362 // This function prints either to stdout or to the syslog as needed.
1363 static void PrintOut(int priority
, const char *fmt
, ...){
1366 // get the correct time in syslog()
1367 FixGlibcTimeZoneBug();
1368 // initialize variable argument list
1373 if (facility
== LOG_LOCAL1
) // logging to stdout
1376 vfprintf(f
, fmt
, ap
);
1380 openlog("smartd", LOG_PID
, facility
);
1381 vsyslog_lines(priority
, fmt
, ap
);
1388 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1389 void checksumwarning(const char * string
)
1391 pout("Warning! %s error: invalid SMART checksum.\n", string
);
1396 // Wait for the pid file to show up, this makes sure a calling program knows
1397 // that the daemon is really up and running and has a pid to kill it
1398 static bool WaitForPidFile()
1400 int waited
, max_wait
= 10;
1401 struct stat stat_buf
;
1403 if (pid_file
.empty() || debugmode
)
1406 for(waited
= 0; waited
< max_wait
; ++waited
) {
1407 if (!stat(pid_file
.c_str(), &stat_buf
)) {
1417 // Forks new process if needed, closes ALL file descriptors,
1418 // redirects stdin, stdout, and stderr. Not quite daemon().
1419 // See https://www.linuxjournal.com/article/2335
1420 // for a good description of why we do things this way.
1421 static int daemon_init()
1425 // flush all buffered streams. Else we might get two copies of open
1426 // streams since both parent and child get copies of the buffers.
1431 if ((pid
=fork()) < 0) {
1433 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1434 return EXIT_STARTUP
;
1437 // we are the parent process, wait for pid file, then exit cleanly
1438 if(!WaitForPidFile()) {
1439 PrintOut(LOG_CRIT
,"PID file %s didn't show up!\n", pid_file
.c_str());
1440 return EXIT_STARTUP
;
1445 // from here on, we are the child process.
1448 // Fork one more time to avoid any possibility of having terminals
1449 if ((pid
=fork()) < 0) {
1451 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1452 return EXIT_STARTUP
;
1455 // we are the parent process -- exit cleanly
1458 // Now we are the child's child...
1461 // close any open file descriptors
1462 for (int i
= getdtablesize(); --i
>= 0; )
1465 // redirect any IO attempts to /dev/null and change to root directory
1466 int fd
= open("/dev/null", O_RDWR
);
1467 if (!(fd
== 0 && dup(fd
) == 1 && dup(fd
) == 2 && !chdir("/"))) {
1468 PrintOut(LOG_CRIT
, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1469 return EXIT_STARTUP
;
1474 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1478 // No fork() on native Win32
1479 // Detach this process from console
1481 if (daemon_detach("smartd")) {
1482 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1483 return EXIT_STARTUP
;
1485 // stdin/out/err now closed if not redirected
1489 // No error, continue in main_worker()
1493 // create a PID file containing the current process id
1494 static bool write_pid_file()
1496 if (!pid_file
.empty()) {
1497 pid_t pid
= getpid();
1500 old_umask
= umask(0077); // rwx------
1502 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1503 old_umask
= umask(0033); // rwxr--r--
1506 stdio_file
f(pid_file
.c_str(), "w");
1508 if (!(f
&& fprintf(f
, "%d\n", (int)pid
) > 0 && f
.close())) {
1509 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
.c_str());
1512 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
.c_str(), (int)pid
);
1517 // Prints header identifying version of code and home
1518 static void PrintHead()
1520 PrintOut(LOG_INFO
, "%s\n", format_version_info("smartd").c_str());
1523 // prints help info for configuration file Directives
1524 static void Directives()
1527 "Configuration file (%s) Directives (after device name):\n"
1528 " -d TYPE Set the device type: auto, ignore, removable,\n"
1530 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1531 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1532 " -S VAL Enable/disable attribute autosave (on/off)\n"
1533 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1534 " -H Monitor SMART Health Status, report if failed\n"
1535 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1536 " -l TYPE Monitor SMART log or self-test status:\n"
1537 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1538 " -l scterc,R,W Set SCT Error Recovery Control\n"
1539 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1540 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1541 " -f Monitor 'Usage' Attributes, report failures\n"
1542 " -m ADD Send email warning to address ADD\n"
1543 " -M TYPE Modify email warning behavior (see man page)\n"
1544 " -p Report changes in 'Prefailure' Attributes\n"
1545 " -u Report changes in 'Usage' Attributes\n"
1546 " -t Equivalent to -p and -u Directives\n"
1547 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1548 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1549 " -i ID Ignore Attribute ID for -f Directive\n"
1550 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1551 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1552 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1553 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1554 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1555 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1556 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1557 " -F TYPE Use firmware bug workaround:\n"
1559 " # Comment: text after a hash sign is ignored\n"
1560 " \\ Line continuation character\n"
1561 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1562 "Use ID = 0 to turn off -C and/or -U Directives\n"
1563 "Example: /dev/sda -a\n",
1565 smi()->get_valid_dev_types_str().c_str(),
1566 get_valid_firmwarebug_args());
1569 /* Returns a pointer to a static string containing a formatted list of the valid
1570 arguments to the option opt or NULL on failure. */
1571 static const char *GetValidArgList(char opt
)
1576 return "<PATH_PREFIX>";
1578 return "[+]<FILE_NAME>";
1580 return "<FILE_NAME>, -";
1582 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1584 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1586 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1589 return "<FILE_NAME>";
1591 return "<INTEGER_SECONDS>";
1597 /* prints help information for command syntax */
1600 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1601 PrintOut(LOG_INFO
," -A PREFIX, --attributelog=PREFIX\n");
1602 PrintOut(LOG_INFO
," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1603 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1604 PrintOut(LOG_INFO
," [default is " SMARTMONTOOLS_ATTRIBUTELOG
"MODEL-SERIAL.ata.csv]\n");
1606 PrintOut(LOG_INFO
,"\n");
1607 PrintOut(LOG_INFO
," -B [+]FILE, --drivedb=[+]FILE\n");
1608 PrintOut(LOG_INFO
," Read and replace [add] drive database from FILE\n");
1609 PrintOut(LOG_INFO
," [default is +%s", get_drivedb_path_add());
1610 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1611 PrintOut(LOG_INFO
,"\n");
1612 PrintOut(LOG_INFO
," and then %s", get_drivedb_path_default());
1614 PrintOut(LOG_INFO
,"]\n\n");
1615 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1616 PrintOut(LOG_INFO
," Read configuration file NAME or stdin\n");
1617 PrintOut(LOG_INFO
," [default is %s]\n\n", configfile
);
1618 #ifdef HAVE_LIBCAP_NG
1619 PrintOut(LOG_INFO
," -C, --capabilities\n");
1620 PrintOut(LOG_INFO
," Drop unneeded Linux process capabilities.\n"
1621 " Warning: Mail notification does not work when used.\n\n");
1623 PrintOut(LOG_INFO
," -d, --debug\n");
1624 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1625 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1626 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1627 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1628 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1629 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1630 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1631 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1633 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1635 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1638 PrintOut(LOG_INFO
," -n, --no-fork\n");
1639 PrintOut(LOG_INFO
," Do not fork into background\n");
1640 #ifdef HAVE_LIBSYSTEMD
1641 PrintOut(LOG_INFO
," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1642 #endif // HAVE_LIBSYSTEMD
1643 PrintOut(LOG_INFO
,"\n");
1645 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1646 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1647 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1648 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1649 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1650 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1651 PrintOut(LOG_INFO
," -s PREFIX, --savestates=PREFIX\n");
1652 PrintOut(LOG_INFO
," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1653 #ifdef SMARTMONTOOLS_SAVESTATES
1654 PrintOut(LOG_INFO
," [default is " SMARTMONTOOLS_SAVESTATES
"MODEL-SERIAL.TYPE.state]\n");
1656 PrintOut(LOG_INFO
,"\n");
1657 PrintOut(LOG_INFO
," -w NAME, --warnexec=NAME\n");
1658 PrintOut(LOG_INFO
," Run executable NAME on warnings\n");
1660 PrintOut(LOG_INFO
," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR
"/smartd_warning.sh]\n\n");
1662 PrintOut(LOG_INFO
," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1665 PrintOut(LOG_INFO
," --service\n");
1666 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1667 PrintOut(LOG_INFO
," smartd install [options]\n");
1668 PrintOut(LOG_INFO
," Remove service with:\n");
1669 PrintOut(LOG_INFO
," smartd remove\n\n");
1671 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1672 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1675 static int CloseDevice(smart_device
* device
, const char * name
)
1677 if (!device
->close()){
1678 PrintOut(LOG_INFO
,"Device: %s, %s, close() failed\n", name
, device
->get_errmsg());
1681 // device successfully closed
1685 // return true if a char is not allowed in a state file name
1686 static bool not_allowed_in_filename(char c
)
1688 return !( ('0' <= c
&& c
<= '9')
1689 || ('A' <= c
&& c
<= 'Z')
1690 || ('a' <= c
&& c
<= 'z'));
1693 // Read error count from Summary or Extended Comprehensive SMART error log
1694 // Return -1 on error
1695 static int read_ata_error_count(ata_device
* device
, const char * name
,
1696 firmwarebug_defs firmwarebugs
, bool extended
)
1699 ata_smart_errorlog log
;
1700 if (ataReadErrorLog(device
, &log
, firmwarebugs
)){
1701 PrintOut(LOG_INFO
,"Device: %s, Read Summary SMART Error Log failed\n",name
);
1704 return (log
.error_log_pointer
? log
.ata_error_count
: 0);
1707 ata_smart_exterrlog logx
;
1708 if (!ataReadExtErrorLog(device
, &logx
, 0, 1 /*first sector only*/, firmwarebugs
)) {
1709 PrintOut(LOG_INFO
,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name
);
1712 // Some disks use the reserved byte as index, see ataprint.cpp.
1713 return (logx
.error_log_index
|| logx
.reserved1
? logx
.device_error_count
: 0);
1717 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1718 // error count, and top bits are the power-on hours of the last error.
1719 static int SelfTestErrorCount(ata_device
* device
, const char * name
,
1720 firmwarebug_defs firmwarebugs
)
1722 struct ata_smart_selftestlog log
;
1724 if (ataReadSelfTestLog(device
, &log
, firmwarebugs
)){
1725 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1729 if (!log
.mostrecenttest
)
1733 // Count failed self-tests
1734 int errcnt
= 0, hours
= 0;
1735 for (int i
= 20; i
>= 0; i
--) {
1736 int j
= (i
+ log
.mostrecenttest
) % 21;
1737 const ata_smart_selftestlog_struct
& entry
= log
.selftest_struct
[j
];
1738 if (!nonempty(&entry
, sizeof(entry
)))
1741 int status
= entry
.selfteststatus
>> 4;
1742 if (status
== 0x0 && (entry
.selftestnumber
& 0x7f) == 0x02)
1743 // First successful extended self-test, stop count
1746 if (0x3 <= status
&& status
<= 0x8) {
1747 // Self-test showed an error
1749 // Keep track of time of most recent error
1751 hours
= entry
.timestamp
;
1755 return ((hours
<< 8) | errcnt
);
1758 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1759 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1761 // Check offline data collection status
1762 static inline bool is_offl_coll_in_progress(unsigned char status
)
1764 return ((status
& 0x7f) == 0x03);
1767 // Check self-test execution status
1768 static inline bool is_self_test_in_progress(unsigned char status
)
1770 return ((status
>> 4) == 0xf);
1773 // Log offline data collection status
1774 static void log_offline_data_coll_status(const char * name
, unsigned char status
)
1777 switch (status
& 0x7f) {
1778 case 0x00: msg
= "was never started"; break;
1779 case 0x02: msg
= "was completed without error"; break;
1780 case 0x03: msg
= "is in progress"; break;
1781 case 0x04: msg
= "was suspended by an interrupting command from host"; break;
1782 case 0x05: msg
= "was aborted by an interrupting command from host"; break;
1783 case 0x06: msg
= "was aborted by the device with a fatal error"; break;
1788 PrintOut(((status
& 0x7f) == 0x06 ? LOG_CRIT
: LOG_INFO
),
1789 "Device: %s, offline data collection %s%s\n", name
, msg
,
1790 ((status
& 0x80) ? " (auto:on)" : ""));
1792 PrintOut(LOG_INFO
, "Device: %s, unknown offline data collection status 0x%02x\n",
1796 // Log self-test execution status
1797 static void log_self_test_exec_status(const char * name
, unsigned char status
)
1800 switch (status
>> 4) {
1801 case 0x0: msg
= "completed without error"; break;
1802 case 0x1: msg
= "was aborted by the host"; break;
1803 case 0x2: msg
= "was interrupted by the host with a reset"; break;
1804 case 0x3: msg
= "could not complete due to a fatal or unknown error"; break;
1805 case 0x4: msg
= "completed with error (unknown test element)"; break;
1806 case 0x5: msg
= "completed with error (electrical test element)"; break;
1807 case 0x6: msg
= "completed with error (servo/seek test element)"; break;
1808 case 0x7: msg
= "completed with error (read test element)"; break;
1809 case 0x8: msg
= "completed with error (handling damage?)"; break;
1814 PrintOut(((status
>> 4) >= 0x4 ? LOG_CRIT
: LOG_INFO
),
1815 "Device: %s, previous self-test %s\n", name
, msg
);
1816 else if ((status
>> 4) == 0xf)
1817 PrintOut(LOG_INFO
, "Device: %s, self-test in progress, %u0%% remaining\n",
1818 name
, status
& 0x0f);
1820 PrintOut(LOG_INFO
, "Device: %s, unknown self-test status 0x%02x\n",
1824 // Check pending sector count id (-C, -U directives).
1825 static bool check_pending_id(const dev_config
& cfg
, const dev_state
& state
,
1826 unsigned char id
, const char * msg
)
1828 // Check attribute index
1829 int i
= ata_find_attr_index(id
, state
.smartval
);
1831 PrintOut(LOG_INFO
, "Device: %s, can't monitor %s count - no Attribute %d\n",
1832 cfg
.name
.c_str(), msg
, id
);
1837 uint64_t rawval
= ata_get_attr_raw_value(state
.smartval
.vendor_attributes
[i
],
1838 cfg
.attribute_defs
);
1839 if (rawval
>= (state
.num_sectors
? state
.num_sectors
: 0xffffffffULL
)) {
1840 PrintOut(LOG_INFO
, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64
" (0x%" PRIx64
")\n",
1841 cfg
.name
.c_str(), msg
, id
, rawval
, rawval
);
1848 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1849 static void finish_device_scan(dev_config
& cfg
, dev_state
& state
)
1851 // Set cfg.emailfreq if user hasn't set it
1852 if ((!cfg
.emailaddress
.empty() || !cfg
.emailcmdline
.empty()) && !cfg
.emailfreq
) {
1853 // Avoid that emails are suppressed forever due to state persistence
1854 if (cfg
.state_file
.empty())
1855 cfg
.emailfreq
= 1; // '-M once'
1857 cfg
.emailfreq
= 2; // '-M daily'
1860 // Start self-test regex check now if time was not read from state file
1861 if (!cfg
.test_regex
.empty() && !state
.scheduled_test_next_check
)
1862 state
.scheduled_test_next_check
= time(0);
1865 // Common function to format result message for ATA setting
1866 static void format_set_result_msg(std::string
& msg
, const char * name
, bool ok
,
1867 int set_option
= 0, bool has_value
= false)
1874 else if (set_option
< 0)
1877 msg
+= strprintf(":%d", set_option
-1);
1878 else if (set_option
> 0)
1882 // Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1883 static bool is_duplicate_dev_idinfo(const dev_config
& cfg
, const dev_config_vector
& prev_cfgs
)
1885 if (!cfg
.id_is_unique
)
1888 for (unsigned i
= 0; i
< prev_cfgs
.size(); i
++) {
1889 if (!prev_cfgs
[i
].id_is_unique
)
1891 if (cfg
.dev_idinfo
!= prev_cfgs
[i
].dev_idinfo
)
1894 PrintOut(LOG_INFO
, "Device: %s, same identity as %s, ignored\n",
1895 cfg
.dev_name
.c_str(), prev_cfgs
[i
].dev_name
.c_str());
1902 // TODO: Add '-F swapid' directive
1903 const bool fix_swapped_id
= false;
1905 // scan to see what ata devices there are, and if they support SMART
1906 static int ATADeviceScan(dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
,
1907 const dev_config_vector
* prev_cfgs
)
1910 struct ata_identify_device drive
;
1911 const char *name
= cfg
.name
.c_str();
1914 // Device must be open
1916 // Get drive identity structure
1917 if ((retid
= ata_read_identity(atadev
, &drive
, fix_swapped_id
))) {
1919 // Unable to read Identity structure
1920 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1922 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1923 name
, packetdevicetype(retid
-1));
1924 CloseDevice(atadev
, name
);
1928 // Get drive identity, size and rotation rate (HDD/SSD)
1929 char model
[40+1], serial
[20+1], firmware
[8+1];
1930 ata_format_id_string(model
, drive
.model
, sizeof(model
)-1);
1931 ata_format_id_string(serial
, drive
.serial_no
, sizeof(serial
)-1);
1932 ata_format_id_string(firmware
, drive
.fw_rev
, sizeof(firmware
)-1);
1934 ata_size_info sizes
;
1935 ata_get_size_info(&drive
, sizes
);
1936 state
.num_sectors
= sizes
.sectors
;
1937 cfg
.dev_rpm
= ata_get_rotation_rate(&drive
);
1939 char wwn
[30]; wwn
[0] = 0;
1940 unsigned oui
= 0; uint64_t unique_id
= 0;
1941 int naa
= ata_get_wwn(&drive
, oui
, unique_id
);
1943 snprintf(wwn
, sizeof(wwn
), "WWN:%x-%06x-%09" PRIx64
", ", naa
, oui
, unique_id
);
1945 // Format device id string for warning emails
1947 cfg
.dev_idinfo
= strprintf("%s, S/N:%s, %sFW:%s, %s", model
, serial
, wwn
, firmware
,
1948 format_capacity(cap
, sizeof(cap
), sizes
.capacity
, "."));
1949 cfg
.id_is_unique
= true; // TODO: Check serial?
1951 PrintOut(LOG_INFO
, "Device: %s, %s\n", name
, cfg
.dev_idinfo
.c_str());
1953 // Check for duplicates
1954 if (prev_cfgs
&& is_duplicate_dev_idinfo(cfg
, *prev_cfgs
)) {
1955 CloseDevice(atadev
, name
);
1959 // Show if device in database, and use preset vendor attribute
1960 // options unless user has requested otherwise.
1961 if (cfg
.ignorepresets
)
1962 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1964 // Apply vendor specific presets, print warning if present
1965 const drive_settings
* dbentry
= lookup_drive_apply_presets(
1966 &drive
, cfg
.attribute_defs
, cfg
.firmwarebugs
);
1968 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1970 PrintOut(LOG_INFO
, "Device: %s, found in smartd database%s%s\n",
1971 name
, (*dbentry
->modelfamily
? ": " : "."), (*dbentry
->modelfamily
? dbentry
->modelfamily
: ""));
1972 if (*dbentry
->warningmsg
)
1973 PrintOut(LOG_CRIT
, "Device: %s, WARNING: %s\n", name
, dbentry
->warningmsg
);
1977 // Check for ATA Security LOCK
1978 unsigned short word128
= drive
.words088_255
[128-88];
1979 bool locked
= ((word128
& 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1981 PrintOut(LOG_INFO
, "Device: %s, ATA Security is **LOCKED**\n", name
);
1983 // Set default '-C 197[+]' if no '-C ID' is specified.
1984 if (!cfg
.curr_pending_set
)
1985 cfg
.curr_pending_id
= get_unc_attr_id(false, cfg
.attribute_defs
, cfg
.curr_pending_incr
);
1986 // Set default '-U 198[+]' if no '-U ID' is specified.
1987 if (!cfg
.offl_pending_set
)
1988 cfg
.offl_pending_id
= get_unc_attr_id(true, cfg
.attribute_defs
, cfg
.offl_pending_incr
);
1990 // If requested, show which presets would be used for this drive
1991 if (cfg
.showpresets
) {
1992 int savedebugmode
=debugmode
;
1993 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1996 show_presets(&drive
);
1997 debugmode
=savedebugmode
;
2000 // see if drive supports SMART
2001 supported
=ataSmartSupport(&drive
);
2004 // drive does NOT support SMART
2005 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
2007 // can't tell if drive supports SMART
2008 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
2010 // should we proceed anyway?
2011 if (cfg
.permissive
) {
2012 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
2015 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
2016 CloseDevice(atadev
, name
);
2021 if (ataEnableSmart(atadev
)) {
2022 // Enable SMART command has failed
2023 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
2025 if (ataIsSmartEnabled(&drive
) <= 0) {
2026 if (!cfg
.permissive
) {
2027 PrintOut(LOG_INFO
, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name
);
2028 CloseDevice(atadev
, name
);
2031 PrintOut(LOG_INFO
, "Device: %s, proceeding since '-T permissive' Directive given.\n", name
);
2034 PrintOut(LOG_INFO
, "Device: %s, proceeding since SMART is already enabled\n", name
);
2038 // disable device attribute autosave...
2039 if (cfg
.autosave
==1) {
2040 if (ataDisableAutoSave(atadev
))
2041 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
2043 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
2046 // or enable device attribute autosave
2047 if (cfg
.autosave
==2) {
2048 if (ataEnableAutoSave(atadev
))
2049 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
2051 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
2054 // capability check: SMART status
2055 if (cfg
.smartcheck
&& ataSmartStatus2(atadev
) == -1) {
2056 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
2057 cfg
.smartcheck
= false;
2060 // capability check: Read smart values and thresholds. Note that
2061 // smart values are ALSO needed even if we ONLY want to know if the
2062 // device is self-test log or error-log capable! After ATA-5, this
2063 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2064 // but sadly not for ATA-5. Sigh.
2066 // do we need to get SMART data?
2067 bool smart_val_ok
= false;
2068 if ( cfg
.autoofflinetest
|| cfg
.selftest
2069 || cfg
.errorlog
|| cfg
.xerrorlog
2070 || cfg
.offlinests
|| cfg
.selfteststs
2071 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
2072 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
2073 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
) {
2075 if (ataReadSmartValues(atadev
, &state
.smartval
)) {
2076 PrintOut(LOG_INFO
, "Device: %s, Read SMART Values failed\n", name
);
2077 cfg
.usagefailed
= cfg
.prefail
= cfg
.usage
= false;
2078 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2079 cfg
.curr_pending_id
= cfg
.offl_pending_id
= 0;
2082 smart_val_ok
= true;
2083 if (ataReadSmartThresholds(atadev
, &state
.smartthres
)) {
2084 PrintOut(LOG_INFO
, "Device: %s, Read SMART Thresholds failed%s\n",
2085 name
, (cfg
.usagefailed
? ", ignoring -f Directive" : ""));
2086 cfg
.usagefailed
= false;
2087 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2088 memset(&state
.smartthres
, 0, sizeof(state
.smartthres
));
2092 // see if the necessary Attribute is there to monitor offline or
2093 // current pending sectors or temperature
2094 if ( cfg
.curr_pending_id
2095 && !check_pending_id(cfg
, state
, cfg
.curr_pending_id
,
2096 "Current_Pending_Sector"))
2097 cfg
.curr_pending_id
= 0;
2099 if ( cfg
.offl_pending_id
2100 && !check_pending_id(cfg
, state
, cfg
.offl_pending_id
,
2101 "Offline_Uncorrectable"))
2102 cfg
.offl_pending_id
= 0;
2104 if ( (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
2105 && !ata_return_temperature_value(&state
.smartval
, cfg
.attribute_defs
)) {
2106 PrintOut(LOG_INFO
, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2107 name
, cfg
.tempdiff
, cfg
.tempinfo
, cfg
.tempcrit
);
2108 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2111 // Report ignored '-r' or '-R' directives
2112 for (int id
= 1; id
<= 255; id
++) {
2113 if (cfg
.monitor_attr_flags
.is_set(id
, MONITOR_RAW_PRINT
)) {
2114 char opt
= (!cfg
.monitor_attr_flags
.is_set(id
, MONITOR_RAW
) ? 'r' : 'R');
2115 const char * excl
= (cfg
.monitor_attr_flags
.is_set(id
,
2116 (opt
== 'r' ? MONITOR_AS_CRIT
: MONITOR_RAW_AS_CRIT
)) ? "!" : "");
2118 int idx
= ata_find_attr_index(id
, state
.smartval
);
2120 PrintOut(LOG_INFO
,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name
, id
, opt
, id
, excl
);
2122 bool prefail
= !!ATTRIBUTE_FLAGS_PREFAILURE(state
.smartval
.vendor_attributes
[idx
].flags
);
2123 if (!((prefail
&& cfg
.prefail
) || (!prefail
&& cfg
.usage
)))
2124 PrintOut(LOG_INFO
,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name
,
2125 (prefail
? "Prefailure" : "Usage"), opt
, id
, excl
);
2131 // enable/disable automatic on-line testing
2132 if (cfg
.autoofflinetest
) {
2133 // is this an enable or disable request?
2134 const char *what
=(cfg
.autoofflinetest
==1)?"disable":"enable";
2136 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
2138 // if command appears unsupported, issue a warning...
2139 if (!isSupportAutomaticTimer(&state
.smartval
))
2140 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
2141 // ... but then try anyway
2142 if ((cfg
.autoofflinetest
==1)?ataDisableAutoOffline(atadev
):ataEnableAutoOffline(atadev
))
2143 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
2145 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
2149 // Read log directories if required for capability check
2150 ata_smart_log_directory smart_logdir
, gp_logdir
;
2151 bool smart_logdir_ok
= false, gp_logdir_ok
= false;
2153 if ( isGeneralPurposeLoggingCapable(&drive
)
2154 && (cfg
.errorlog
|| cfg
.selftest
)
2155 && !cfg
.firmwarebugs
.is_set(BUG_NOLOGDIR
)) {
2156 if (!ataReadLogDirectory(atadev
, &smart_logdir
, false))
2157 smart_logdir_ok
= true;
2160 if (cfg
.xerrorlog
&& !cfg
.firmwarebugs
.is_set(BUG_NOLOGDIR
)) {
2161 if (!ataReadLogDirectory(atadev
, &gp_logdir
, true))
2162 gp_logdir_ok
= true;
2165 // capability check: self-test-log
2166 state
.selflogcount
= 0; state
.selfloghour
= 0;
2169 if (!( cfg
.permissive
2170 || ( smart_logdir_ok
&& smart_logdir
.entry
[0x06-1].numsectors
)
2171 || (!smart_logdir_ok
&& smart_val_ok
&& isSmartTestLogCapable(&state
.smartval
, &drive
)))) {
2172 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name
);
2173 cfg
.selftest
= false;
2175 else if ((retval
= SelfTestErrorCount(atadev
, name
, cfg
.firmwarebugs
)) < 0) {
2176 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name
);
2177 cfg
.selftest
= false;
2180 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
2181 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
2185 // capability check: ATA error log
2186 state
.ataerrorcount
= 0;
2189 if (!( cfg
.permissive
2190 || ( smart_logdir_ok
&& smart_logdir
.entry
[0x01-1].numsectors
)
2191 || (!smart_logdir_ok
&& smart_val_ok
&& isSmartErrorLogCapable(&state
.smartval
, &drive
)))) {
2192 PrintOut(LOG_INFO
, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name
);
2193 cfg
.errorlog
= false;
2195 else if ((errcnt1
= read_ata_error_count(atadev
, name
, cfg
.firmwarebugs
, false)) < 0) {
2196 PrintOut(LOG_INFO
, "Device: %s, no SMART Error Log, ignoring -l error\n", name
);
2197 cfg
.errorlog
= false;
2200 state
.ataerrorcount
= errcnt1
;
2203 if (cfg
.xerrorlog
) {
2205 if (!( cfg
.permissive
|| cfg
.firmwarebugs
.is_set(BUG_NOLOGDIR
)
2206 || (gp_logdir_ok
&& gp_logdir
.entry
[0x03-1].numsectors
) )) {
2207 PrintOut(LOG_INFO
, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2209 cfg
.xerrorlog
= false;
2211 else if ((errcnt2
= read_ata_error_count(atadev
, name
, cfg
.firmwarebugs
, true)) < 0) {
2212 PrintOut(LOG_INFO
, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name
);
2213 cfg
.xerrorlog
= false;
2215 else if (cfg
.errorlog
&& state
.ataerrorcount
!= errcnt2
) {
2216 PrintOut(LOG_INFO
, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2217 name
, state
.ataerrorcount
, errcnt2
);
2218 // Record max error count
2219 if (errcnt2
> state
.ataerrorcount
)
2220 state
.ataerrorcount
= errcnt2
;
2223 state
.ataerrorcount
= errcnt2
;
2226 // capability check: self-test and offline data collection status
2227 if (cfg
.offlinests
|| cfg
.selfteststs
) {
2228 if (!(cfg
.permissive
|| (smart_val_ok
&& state
.smartval
.offline_data_collection_capability
))) {
2230 PrintOut(LOG_INFO
, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name
);
2231 if (cfg
.selfteststs
)
2232 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name
);
2233 cfg
.offlinests
= cfg
.selfteststs
= false;
2237 // capabilities check -- does it support powermode?
2238 if (cfg
.powermode
) {
2239 int powermode
= ataCheckPowerMode(atadev
);
2241 if (-1 == powermode
) {
2242 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
2245 else if (powermode
!=0x00 && powermode
!=0x01
2246 && powermode
!=0x40 && powermode
!=0x41
2247 && powermode
!=0x80 && powermode
!=0x81 && powermode
!=0x82 && powermode
!=0x83
2248 && powermode
!=0xff) {
2249 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2255 // Apply ATA settings
2259 format_set_result_msg(msg
, "AAM", (cfg
.set_aam
> 0 ?
2260 ata_set_features(atadev
, ATA_ENABLE_AAM
, cfg
.set_aam
-1) :
2261 ata_set_features(atadev
, ATA_DISABLE_AAM
)), cfg
.set_aam
, true);
2264 format_set_result_msg(msg
, "APM", (cfg
.set_apm
> 0 ?
2265 ata_set_features(atadev
, ATA_ENABLE_APM
, cfg
.set_apm
-1) :
2266 ata_set_features(atadev
, ATA_DISABLE_APM
)), cfg
.set_apm
, true);
2268 if (cfg
.set_lookahead
)
2269 format_set_result_msg(msg
, "Rd-ahead", ata_set_features(atadev
,
2270 (cfg
.set_lookahead
> 0 ? ATA_ENABLE_READ_LOOK_AHEAD
: ATA_DISABLE_READ_LOOK_AHEAD
)),
2274 format_set_result_msg(msg
, "Wr-cache", ata_set_features(atadev
,
2275 (cfg
.set_wcache
> 0? ATA_ENABLE_WRITE_CACHE
: ATA_DISABLE_WRITE_CACHE
)), cfg
.set_wcache
);
2278 format_set_result_msg(msg
, "DSN", ata_set_features(atadev
,
2279 ATA_ENABLE_DISABLE_DSN
, (cfg
.set_dsn
> 0 ? 0x1 : 0x2)));
2281 if (cfg
.set_security_freeze
)
2282 format_set_result_msg(msg
, "Security freeze",
2283 ata_nodata_command(atadev
, ATA_SECURITY_FREEZE_LOCK
));
2285 if (cfg
.set_standby
)
2286 format_set_result_msg(msg
, "Standby",
2287 ata_nodata_command(atadev
, ATA_IDLE
, cfg
.set_standby
-1), cfg
.set_standby
, true);
2289 // Report as one log entry
2291 PrintOut(LOG_INFO
, "Device: %s, ATA settings applied: %s\n", name
, msg
.c_str());
2293 // set SCT Error Recovery Control if requested
2294 if (cfg
.sct_erc_set
) {
2295 if (!isSCTErrorRecoveryControlCapable(&drive
))
2296 PrintOut(LOG_INFO
, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2299 PrintOut(LOG_INFO
, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2301 else if ( ataSetSCTErrorRecoveryControltime(atadev
, 1, cfg
.sct_erc_readtime
)
2302 || ataSetSCTErrorRecoveryControltime(atadev
, 2, cfg
.sct_erc_writetime
))
2303 PrintOut(LOG_INFO
, "Device: %s, set of SCT Error Recovery Control failed\n", name
);
2305 PrintOut(LOG_INFO
, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2306 name
, cfg
.sct_erc_readtime
, cfg
.sct_erc_writetime
);
2309 // If no tests available or selected, return
2310 if (!( cfg
.smartcheck
|| cfg
.selftest
2311 || cfg
.errorlog
|| cfg
.xerrorlog
2312 || cfg
.offlinests
|| cfg
.selfteststs
2313 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
2314 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
2315 CloseDevice(atadev
, name
);
2319 // tell user we are registering device
2320 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
2322 // close file descriptor
2323 CloseDevice(atadev
, name
);
2325 if (!state_path_prefix
.empty() || !attrlog_path_prefix
.empty()) {
2326 // Build file name for state file
2327 std::replace_if(model
, model
+strlen(model
), not_allowed_in_filename
, '_');
2328 std::replace_if(serial
, serial
+strlen(serial
), not_allowed_in_filename
, '_');
2329 if (!state_path_prefix
.empty()) {
2330 cfg
.state_file
= strprintf("%s%s-%s.ata.state", state_path_prefix
.c_str(), model
, serial
);
2331 // Read previous state
2332 if (read_dev_state(cfg
.state_file
.c_str(), state
)) {
2333 PrintOut(LOG_INFO
, "Device: %s, state read from %s\n", name
, cfg
.state_file
.c_str());
2334 // Copy ATA attribute values to temp state
2335 state
.update_temp_state();
2338 if (!attrlog_path_prefix
.empty())
2339 cfg
.attrlog_file
= strprintf("%s%s-%s.ata.csv", attrlog_path_prefix
.c_str(), model
, serial
);
2342 finish_device_scan(cfg
, state
);
2347 // on success, return 0. On failure, return >0. Never return <0,
2349 static int SCSIDeviceScan(dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
,
2350 const dev_config_vector
* prev_cfgs
)
2352 int err
, req_len
, avail_len
, version
, len
;
2353 const char *device
= cfg
.name
.c_str();
2354 struct scsi_iec_mode_page iec
;
2357 uint8_t vpdBuf
[252];
2358 char lu_id
[64], serial
[256], vendor
[40], model
[40];
2360 // Device must be open
2361 memset(inqBuf
, 0, 96);
2363 if ((err
= scsiStdInquiry(scsidev
, inqBuf
, req_len
))) {
2364 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2366 if ((err
= scsiStdInquiry(scsidev
, inqBuf
, req_len
))) {
2367 PrintOut(LOG_INFO
, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2368 "skip device\n", device
);
2372 version
= (inqBuf
[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2374 avail_len
= inqBuf
[4] + 5;
2375 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
2377 PrintOut(LOG_INFO
, "Device: %s, INQUIRY response less than 36 bytes; "
2378 "skip device\n", device
);
2382 int pdt
= inqBuf
[0] & 0x1f;
2384 if (! ((0 == pdt
) || (4 == pdt
) || (5 == pdt
) || (7 == pdt
) ||
2386 PrintOut(LOG_INFO
, "Device: %s, not a disk like device [PDT=0x%x], "
2387 "skip\n", device
, pdt
);
2391 if (supported_vpd_pages_p
) {
2392 delete supported_vpd_pages_p
;
2393 supported_vpd_pages_p
= NULL
;
2395 supported_vpd_pages_p
= new supported_vpd_pages(scsidev
);
2398 if ((version
>= 0x3) && (version
< 0x8)) {
2400 if (0 == scsiInquiryVpd(scsidev
, SCSI_VPD_DEVICE_IDENTIFICATION
,
2401 vpdBuf
, sizeof(vpdBuf
))) {
2403 scsi_decode_lu_dev_id(vpdBuf
+ 4, len
, lu_id
, sizeof(lu_id
), NULL
);
2407 if (0 == scsiInquiryVpd(scsidev
, SCSI_VPD_UNIT_SERIAL_NUMBER
,
2408 vpdBuf
, sizeof(vpdBuf
))) {
2410 vpdBuf
[4 + len
] = '\0';
2411 scsi_format_id_string(serial
, &vpdBuf
[4], len
);
2415 struct scsi_readcap_resp srr
;
2416 uint64_t capacity
= scsiGetSize(scsidev
, scsidev
->use_rcap16(), &srr
);
2419 format_capacity(si_str
, sizeof(si_str
), capacity
, ".");
2423 // Format device id string for warning emails
2424 cfg
.dev_idinfo
= strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2425 (char *)&inqBuf
[8], (char *)&inqBuf
[16], (char *)&inqBuf
[32],
2426 (lu_id
[0] ? ", lu id: " : ""), (lu_id
[0] ? lu_id
: ""),
2427 (serial
[0] ? ", S/N: " : ""), (serial
[0] ? serial
: ""),
2428 (si_str
[0] ? ", " : ""), (si_str
[0] ? si_str
: ""));
2429 cfg
.id_is_unique
= (lu_id
[0] || serial
[0]);
2431 // format "model" string
2432 scsi_format_id_string(vendor
, &inqBuf
[8], 8);
2433 scsi_format_id_string(model
, &inqBuf
[16], 16);
2434 PrintOut(LOG_INFO
, "Device: %s, %s\n", device
, cfg
.dev_idinfo
.c_str());
2436 // Check for duplicates
2437 if (prev_cfgs
&& is_duplicate_dev_idinfo(cfg
, *prev_cfgs
)) {
2438 CloseDevice(scsidev
, device
);
2442 // check that device is ready for commands. IE stores its stuff on
2444 if ((err
= scsiTestUnitReady(scsidev
))) {
2445 if (SIMPLE_ERR_NOT_READY
== err
)
2446 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
2447 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
2448 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
2449 else if (SIMPLE_ERR_BECOMING_READY
== err
)
2450 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
2452 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
2453 CloseDevice(scsidev
, device
);
2457 // Badly-conforming USB storage devices may fail this check.
2458 // The response to the following IE mode page fetch (current and
2459 // changeable values) is carefully examined. It has been found
2460 // that various USB devices that malform the response will lock up
2461 // if asked for a log page (e.g. temperature) so it is best to
2463 if (!(err
= scsiFetchIECmpage(scsidev
, &iec
, state
.modese_len
)))
2464 state
.modese_len
= iec
.modese_len
;
2465 else if (SIMPLE_ERR_BAD_FIELD
== err
)
2466 ; /* continue since it is reasonable not to support IE mpage */
2467 else { /* any other error (including malformed response) unreasonable */
2469 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2471 CloseDevice(scsidev
, device
);
2475 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2476 // smart if it is off). This may change to be the same as the ATA side.
2477 if (!scsi_IsExceptionControlEnabled(&iec
)) {
2478 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
2479 "Try 'smartctl -s on %s' to turn on SMART features\n",
2481 CloseDevice(scsidev
, device
);
2485 // Flag that certain log pages are supported (information may be
2486 // available from other sources).
2487 if (0 == scsiLogSense(scsidev
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0) ||
2488 0 == scsiLogSense(scsidev
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 68))
2489 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2491 for (int k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
2493 case TEMPERATURE_LPAGE
:
2494 state
.TempPageSupported
= 1;
2497 state
.SmartPageSupported
= 1;
2499 case READ_ERROR_COUNTER_LPAGE
:
2500 state
.ReadECounterPageSupported
= 1;
2502 case WRITE_ERROR_COUNTER_LPAGE
:
2503 state
.WriteECounterPageSupported
= 1;
2505 case VERIFY_ERROR_COUNTER_LPAGE
:
2506 state
.VerifyECounterPageSupported
= 1;
2508 case NON_MEDIUM_ERROR_LPAGE
:
2509 state
.NonMediumErrorPageSupported
= 1;
2517 // Check if scsiCheckIE() is going to work
2521 uint8_t currenttemp
= 0;
2522 uint8_t triptemp
= 0;
2524 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
2525 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2526 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
2527 state
.SuppressReport
= 1;
2529 if ( (state
.SuppressReport
|| !currenttemp
)
2530 && (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
2531 PrintOut(LOG_INFO
, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2532 device
, cfg
.tempdiff
, cfg
.tempinfo
, cfg
.tempcrit
);
2533 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2537 // capability check: self-test-log
2539 int retval
= scsiCountFailedSelfTests(scsidev
, 0);
2541 // no self-test log, turn off monitoring
2542 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
2543 cfg
.selftest
= false;
2544 state
.selflogcount
= 0;
2545 state
.selfloghour
= 0;
2548 // register starting values to watch for changes
2549 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
2550 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
2554 // disable autosave (set GLTSD bit)
2555 if (cfg
.autosave
==1){
2556 if (scsiSetControlGLTSD(scsidev
, 1, state
.modese_len
))
2557 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
2559 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
2562 // or enable autosave (clear GLTSD bit)
2563 if (cfg
.autosave
==2){
2564 if (scsiSetControlGLTSD(scsidev
, 0, state
.modese_len
))
2565 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
2567 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
2570 // tell user we are registering device
2571 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
2573 // Make sure that init_standby_check() ignores SCSI devices
2574 cfg
.offlinests_ns
= cfg
.selfteststs_ns
= false;
2576 // close file descriptor
2577 CloseDevice(scsidev
, device
);
2579 if (!state_path_prefix
.empty() || !attrlog_path_prefix
.empty()) {
2580 // Build file name for state file
2581 std::replace_if(model
, model
+strlen(model
), not_allowed_in_filename
, '_');
2582 std::replace_if(serial
, serial
+strlen(serial
), not_allowed_in_filename
, '_');
2583 if (!state_path_prefix
.empty()) {
2584 cfg
.state_file
= strprintf("%s%s-%s-%s.scsi.state", state_path_prefix
.c_str(), vendor
, model
, serial
);
2585 // Read previous state
2586 if (read_dev_state(cfg
.state_file
.c_str(), state
)) {
2587 PrintOut(LOG_INFO
, "Device: %s, state read from %s\n", device
, cfg
.state_file
.c_str());
2588 // Copy ATA attribute values to temp state
2589 state
.update_temp_state();
2592 if (!attrlog_path_prefix
.empty())
2593 cfg
.attrlog_file
= strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix
.c_str(), vendor
, model
, serial
);
2596 finish_device_scan(cfg
, state
);
2601 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2602 static uint64_t le128_to_uint64(const unsigned char (& val
)[16])
2604 for (int i
= 8; i
< 16; i
++) {
2606 return ~(uint64_t)0;
2608 uint64_t lo
= val
[7];
2609 for (int i
= 7-1; i
>= 0; i
--) {
2610 lo
<<= 8; lo
+= val
[i
];
2615 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2616 static int nvme_get_max_temp_kelvin(const nvme_smart_log
& smart_log
)
2618 int k
= (smart_log
.temperature
[1] << 8) | smart_log
.temperature
[0];
2619 for (int i
= 0; i
< 8; i
++) {
2620 if (smart_log
.temp_sensor
[i
] > k
)
2621 k
= smart_log
.temp_sensor
[i
];
2626 static int NVMeDeviceScan(dev_config
& cfg
, dev_state
& state
, nvme_device
* nvmedev
,
2627 const dev_config_vector
* prev_cfgs
)
2629 const char *name
= cfg
.name
.c_str();
2631 // Device must be open
2633 // Get ID Controller
2634 nvme_id_ctrl id_ctrl
;
2635 if (!nvme_read_id_ctrl(nvmedev
, id_ctrl
)) {
2636 PrintOut(LOG_INFO
, "Device: %s, NVMe Identify Controller failed\n", name
);
2637 CloseDevice(nvmedev
, name
);
2641 // Get drive identity
2642 char model
[40+1], serial
[20+1], firmware
[8+1];
2643 format_char_array(model
, id_ctrl
.mn
);
2644 format_char_array(serial
, id_ctrl
.sn
);
2645 format_char_array(firmware
, id_ctrl
.fr
);
2647 // Format device id string for warning emails
2648 char nsstr
[32] = "", capstr
[32] = "";
2649 unsigned nsid
= nvmedev
->get_nsid();
2650 if (nsid
!= 0xffffffff)
2651 snprintf(nsstr
, sizeof(nsstr
), ", NSID:%u", nsid
);
2652 uint64_t capacity
= le128_to_uint64(id_ctrl
.tnvmcap
);
2654 format_capacity(capstr
, sizeof(capstr
), capacity
, ".");
2655 cfg
.dev_idinfo
= strprintf("%s, S/N:%s, FW:%s%s%s%s", model
, serial
, firmware
,
2656 nsstr
, (capstr
[0] ? ", " : ""), capstr
);
2657 cfg
.id_is_unique
= true; // TODO: Check serial?
2659 PrintOut(LOG_INFO
, "Device: %s, %s\n", name
, cfg
.dev_idinfo
.c_str());
2661 // Check for duplicates
2662 if (prev_cfgs
&& is_duplicate_dev_idinfo(cfg
, *prev_cfgs
)) {
2663 CloseDevice(nvmedev
, name
);
2667 // Read SMART/Health log
2668 nvme_smart_log smart_log
;
2669 if (!nvme_read_smart_log(nvmedev
, smart_log
)) {
2670 PrintOut(LOG_INFO
, "Device: %s, failed to read NVMe SMART/Health Information\n", name
);
2671 CloseDevice(nvmedev
, name
);
2675 // Check temperature sensor support
2676 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
) {
2677 if (!nvme_get_max_temp_kelvin(smart_log
)) {
2678 PrintOut(LOG_INFO
, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2679 name
, cfg
.tempdiff
, cfg
.tempinfo
, cfg
.tempcrit
);
2680 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2684 // Init total error count
2685 if (cfg
.errorlog
|| cfg
.xerrorlog
) {
2686 state
.nvme_err_log_entries
= le128_to_uint64(smart_log
.num_err_log_entries
);
2689 // If no supported tests selected, return
2690 if (!( cfg
.smartcheck
|| cfg
.errorlog
|| cfg
.xerrorlog
2691 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
2692 CloseDevice(nvmedev
, name
);
2696 // Tell user we are registering device
2697 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name
);
2699 // Make sure that init_standby_check() ignores NVMe devices
2700 cfg
.offlinests_ns
= cfg
.selfteststs_ns
= false;
2702 CloseDevice(nvmedev
, name
);
2704 if (!state_path_prefix
.empty()) {
2705 // Build file name for state file
2706 std::replace_if(model
, model
+strlen(model
), not_allowed_in_filename
, '_');
2707 std::replace_if(serial
, serial
+strlen(serial
), not_allowed_in_filename
, '_');
2709 if (nsid
!= 0xffffffff)
2710 snprintf(nsstr
, sizeof(nsstr
), "-n%u", nsid
);
2711 cfg
.state_file
= strprintf("%s%s-%s%s.nvme.state", state_path_prefix
.c_str(), model
, serial
, nsstr
);
2712 // Read previous state
2713 if (read_dev_state(cfg
.state_file
.c_str(), state
))
2714 PrintOut(LOG_INFO
, "Device: %s, state read from %s\n", name
, cfg
.state_file
.c_str());
2717 finish_device_scan(cfg
, state
);
2722 // Open device for next check, return false on error
2723 static bool open_device(const dev_config
& cfg
, dev_state
& state
, smart_device
* device
,
2726 const char * name
= cfg
.name
.c_str();
2728 // If user has asked, test the email warning system
2730 MailWarning(cfg
, state
, 0, "TEST EMAIL from smartd for device: %s", name
);
2732 // User may have requested (with the -n Directive) to leave the disk
2733 // alone if it is in idle or standby mode. In this case check the
2734 // power mode first before opening the device for full access,
2735 // and exit without check if disk is reported in standby.
2736 if (device
->is_ata() && cfg
.powermode
&& !state
.powermodefail
&& !state
.removed
) {
2737 // Note that 'is_powered_down()' handles opening the device itself, and
2738 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2739 if (device
->is_powered_down())
2741 // skip at most powerskipmax checks
2742 if (!cfg
.powerskipmax
|| state
.powerskipcnt
<cfg
.powerskipmax
) {
2743 // report first only except if state has changed, avoid waking up system disk
2744 if ((!state
.powerskipcnt
|| state
.lastpowermodeskipped
!= -1) && !cfg
.powerquiet
) {
2745 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, "STANDBY (OS)");
2746 state
.lastpowermodeskipped
= -1;
2748 state
.powerskipcnt
++;
2754 // if we can't open device, fail gracefully rather than hard --
2755 // perhaps the next time around we'll be able to open it
2756 if (!device
->open()) {
2757 // For removable devices, print error message only once and suppress email
2758 if (!cfg
.removable
) {
2759 PrintOut(LOG_INFO
, "Device: %s, open() of %s device failed: %s\n", name
, type
, device
->get_errmsg());
2760 MailWarning(cfg
, state
, 9, "Device: %s, unable to open %s device", name
, type
);
2762 else if (!state
.removed
) {
2763 PrintOut(LOG_INFO
, "Device: %s, removed %s device: %s\n", name
, type
, device
->get_errmsg());
2764 state
.removed
= true;
2767 PrintOut(LOG_INFO
, "Device: %s, %s device still removed: %s\n", name
, type
, device
->get_errmsg());
2772 PrintOut(LOG_INFO
,"Device: %s, opened %s device\n", name
, type
);
2775 reset_warning_mail(cfg
, state
, 9, "open of %s device worked again", type
);
2776 else if (state
.removed
) {
2777 PrintOut(LOG_INFO
, "Device: %s, reconnected %s device\n", name
, type
);
2778 state
.removed
= false;
2784 // If the self-test log has got more self-test errors (or more recent
2785 // self-test errors) recorded, then notify user.
2786 static void CheckSelfTestLogs(const dev_config
& cfg
, dev_state
& state
, int newi
)
2788 const char * name
= cfg
.name
.c_str();
2792 MailWarning(cfg
, state
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
2794 reset_warning_mail(cfg
, state
, 8, "Read SMART Self-Test Log worked again");
2796 // old and new error counts
2797 int oldc
=state
.selflogcount
;
2798 int newc
=SELFTEST_ERRORCOUNT(newi
);
2800 // old and new error timestamps in hours
2801 int oldh
=state
.selfloghour
;
2802 int newh
=SELFTEST_ERRORHOURS(newi
);
2805 // increase in error count
2806 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2808 MailWarning(cfg
, state
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2810 state
.must_write
= true;
2812 else if (newc
> 0 && oldh
!= newh
) {
2813 // more recent error
2814 // a 'more recent' error might actually be a smaller hour number,
2815 // if the hour number has wrapped.
2816 // There's still a bug here. You might just happen to run a new test
2817 // exactly 32768 hours after the previous failure, and have run exactly
2818 // 20 tests between the two, in which case smartd will miss the
2820 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2822 MailWarning(cfg
, state
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2824 state
.must_write
= true;
2827 // Print info if error entries have disappeared
2828 // or newer successful successful extended self-test exits
2830 PrintOut(LOG_INFO
, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2833 reset_warning_mail(cfg
, state
, 3, "Self-Test Log does no longer report errors");
2836 // Needed since self-test error count may DECREASE. Hour might
2837 // also have changed.
2838 state
.selflogcount
= newc
;
2839 state
.selfloghour
= newh
;
2844 // Test types, ordered by priority.
2845 static const char test_type_chars
[] = "LncrSCO";
2846 static const unsigned num_test_types
= sizeof(test_type_chars
)-1;
2848 // returns test type if time to do test of type testtype,
2849 // 0 if not time to do test.
2850 static char next_scheduled_test(const dev_config
& cfg
, dev_state
& state
, bool scsi
, time_t usetime
= 0)
2852 // check that self-testing has been requested
2853 if (cfg
.test_regex
.empty())
2856 // Exit if drive not capable of any test
2857 if ( state
.not_cap_long
&& state
.not_cap_short
&&
2858 (scsi
|| (state
.not_cap_conveyance
&& state
.not_cap_offline
)))
2861 // since we are about to call localtime(), be sure glibc is informed
2862 // of any timezone changes we make.
2864 FixGlibcTimeZoneBug();
2866 // Is it time for next check?
2867 time_t now
= (!usetime
? time(0) : usetime
);
2868 if (now
< state
.scheduled_test_next_check
)
2871 // Limit time check interval to 90 days
2872 if (state
.scheduled_test_next_check
+ (3600L*24*90) < now
)
2873 state
.scheduled_test_next_check
= now
- (3600L*24*90);
2875 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2877 time_t testtime
= 0; int testhour
= 0;
2878 int maxtest
= num_test_types
-1;
2880 for (time_t t
= state
.scheduled_test_next_check
; ; ) {
2881 struct tm
* tms
= localtime(&t
);
2882 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2883 int weekday
= (tms
->tm_wday
? tms
->tm_wday
: 7);
2884 for (int i
= 0; i
<= maxtest
; i
++) {
2885 // Skip if drive not capable of this test
2886 switch (test_type_chars
[i
]) {
2887 case 'L': if (state
.not_cap_long
) continue; break;
2888 case 'S': if (state
.not_cap_short
) continue; break;
2889 case 'C': if (scsi
|| state
.not_cap_conveyance
) continue; break;
2890 case 'O': if (scsi
|| state
.not_cap_offline
) continue; break;
2892 case 'r': if (scsi
|| state
.not_cap_selective
) continue; break;
2895 // Try match of "T/MM/DD/d/HH"
2897 snprintf(pattern
, sizeof(pattern
), "%c/%02d/%02d/%1d/%02d",
2898 test_type_chars
[i
], tms
->tm_mon
+1, tms
->tm_mday
, weekday
, tms
->tm_hour
);
2899 if (cfg
.test_regex
.full_match(pattern
)) {
2901 testtype
= pattern
[0];
2902 testtime
= t
; testhour
= tms
->tm_hour
;
2903 // Limit further matches to higher priority self-tests
2908 // Exit if no tests left or current time reached
2914 if ((t
+= 3600) > now
)
2918 // Do next check not before next hour.
2919 struct tm
* tmnow
= localtime(&now
);
2920 state
.scheduled_test_next_check
= now
+ (3600 - tmnow
->tm_min
*60 - tmnow
->tm_sec
);
2923 state
.must_write
= true;
2924 // Tell user if an old test was found.
2925 if (!usetime
&& !(testhour
== tmnow
->tm_hour
&& testtime
+ 3600 > now
)) {
2926 char datebuf
[DATEANDEPOCHLEN
]; dateandtimezoneepoch(datebuf
, testtime
);
2927 PrintOut(LOG_INFO
, "Device: %s, old test of type %c not run at %s, starting now.\n",
2928 cfg
.name
.c_str(), testtype
, datebuf
);
2935 // Print a list of future tests.
2936 static void PrintTestSchedule(const dev_config_vector
& configs
, dev_state_vector
& states
, const smart_device_list
& devices
)
2938 unsigned numdev
= configs
.size();
2941 std::vector
<int> testcnts(numdev
* num_test_types
, 0);
2943 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2945 // FixGlibcTimeZoneBug(); // done in PrintOut()
2946 time_t now
= time(0);
2947 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2948 dateandtimezoneepoch(datenow
, now
);
2951 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2952 // Check for each device whether a test will be run
2953 time_t testtime
= now
+ seconds
;
2954 for (unsigned i
= 0; i
< numdev
; i
++) {
2955 const dev_config
& cfg
= configs
.at(i
);
2956 dev_state
& state
= states
.at(i
);
2958 char testtype
= next_scheduled_test(cfg
, state
, devices
.at(i
)->is_scsi(), testtime
);
2959 if (testtype
&& (p
= strchr(test_type_chars
, testtype
))) {
2960 unsigned t
= (p
- test_type_chars
);
2961 // Report at most 5 tests of each type
2962 if (++testcnts
[i
*num_test_types
+ t
] <= 5) {
2963 dateandtimezoneepoch(date
, testtime
);
2964 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
.name
.c_str(),
2965 testcnts
[i
*num_test_types
+ t
], testtype
, date
);
2972 dateandtimezoneepoch(date
, now
+seconds
);
2973 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2974 for (unsigned i
= 0; i
< numdev
; i
++) {
2975 const dev_config
& cfg
= configs
.at(i
);
2976 bool scsi
= devices
.at(i
)->is_scsi();
2977 for (unsigned t
= 0; t
< num_test_types
; t
++) {
2978 int cnt
= testcnts
[i
*num_test_types
+ t
];
2979 if (cnt
== 0 && !strchr((scsi
? "LS" : "LSCO"), test_type_chars
[t
]))
2981 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
.name
.c_str(),
2982 cnt
, (cnt
==1?"":"s"), test_type_chars
[t
]);
2988 // Return zero on success, nonzero on failure. Perform offline (background)
2989 // short or long (extended) self test on given scsi device.
2990 static int DoSCSISelfTest(const dev_config
& cfg
, dev_state
& state
, scsi_device
* device
, char testtype
)
2993 const char *testname
= 0;
2994 const char *name
= cfg
.name
.c_str();
2997 if (scsiSelfTestInProgress(device
, &inProgress
)) {
2998 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2999 state
.not_cap_short
= state
.not_cap_long
= true;
3003 if (1 == inProgress
) {
3004 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
3005 "progress.\n", name
);
3011 testname
= "Short Self";
3012 retval
= scsiSmartShortSelfTest(device
);
3015 testname
= "Long Self";
3016 retval
= scsiSmartExtendSelfTest(device
);
3019 // If we can't do the test, exit
3020 if (NULL
== testname
) {
3021 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
3026 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
3027 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
3028 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
3031 state
.not_cap_long
= true;
3033 state
.not_cap_short
= true;
3037 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
3042 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
3047 // Do an offline immediate or self-test. Return zero on success,
3048 // nonzero on failure.
3049 static int DoATASelfTest(const dev_config
& cfg
, dev_state
& state
, ata_device
* device
, char testtype
)
3051 const char *name
= cfg
.name
.c_str();
3053 // Read current smart data and check status/capability
3054 struct ata_smart_values data
;
3055 if (ataReadSmartValues(device
, &data
) || !(data
.offline_data_collection_capability
)) {
3056 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
3060 // Check for capability to do the test
3061 int dotest
= -1, mode
= 0;
3062 const char *testname
= 0;
3065 testname
="Offline Immediate ";
3066 if (isSupportExecuteOfflineImmediate(&data
))
3067 dotest
=OFFLINE_FULL_SCAN
;
3069 state
.not_cap_offline
= true;
3072 testname
="Conveyance Self-";
3073 if (isSupportConveyanceSelfTest(&data
))
3074 dotest
=CONVEYANCE_SELF_TEST
;
3076 state
.not_cap_conveyance
= true;
3079 testname
="Short Self-";
3080 if (isSupportSelfTest(&data
))
3081 dotest
=SHORT_SELF_TEST
;
3083 state
.not_cap_short
= true;
3086 testname
="Long Self-";
3087 if (isSupportSelfTest(&data
))
3088 dotest
=EXTEND_SELF_TEST
;
3090 state
.not_cap_long
= true;
3093 case 'c': case 'n': case 'r':
3094 testname
= "Selective Self-";
3095 if (isSupportSelectiveSelfTest(&data
)) {
3096 dotest
= SELECTIVE_SELF_TEST
;
3098 case 'c': mode
= SEL_CONT
; break;
3099 case 'n': mode
= SEL_NEXT
; break;
3100 case 'r': mode
= SEL_REDO
; break;
3104 state
.not_cap_selective
= true;
3108 // If we can't do the test, exit
3110 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
3114 // If currently running a self-test, do not interrupt it to start another.
3115 if (15==(data
.self_test_exec_status
>> 4)) {
3116 if (cfg
.firmwarebugs
.is_set(BUG_SAMSUNG3
) && data
.self_test_exec_status
== 0xf0) {
3117 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
3118 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
3120 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3121 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
3126 if (dotest
== SELECTIVE_SELF_TEST
) {
3128 ata_selective_selftest_args selargs
, prev_args
;
3129 selargs
.num_spans
= 1;
3130 selargs
.span
[0].mode
= mode
;
3131 prev_args
.num_spans
= 1;
3132 prev_args
.span
[0].start
= state
.selective_test_last_start
;
3133 prev_args
.span
[0].end
= state
.selective_test_last_end
;
3134 if (ataWriteSelectiveSelfTestLog(device
, selargs
, &data
, state
.num_sectors
, &prev_args
)) {
3135 PrintOut(LOG_CRIT
, "Device: %s, prepare %sTest failed\n", name
, testname
);
3138 uint64_t start
= selargs
.span
[0].start
, end
= selargs
.span
[0].end
;
3139 PrintOut(LOG_INFO
, "Device: %s, %s test span at LBA %" PRIu64
" - %" PRIu64
" (%" PRIu64
" sectors, %u%% - %u%% of disk).\n",
3140 name
, (selargs
.span
[0].mode
== SEL_NEXT
? "next" : "redo"),
3141 start
, end
, end
- start
+ 1,
3142 (unsigned)((100 * start
+ state
.num_sectors
/2) / state
.num_sectors
),
3143 (unsigned)((100 * end
+ state
.num_sectors
/2) / state
.num_sectors
));
3144 state
.selective_test_last_start
= start
;
3145 state
.selective_test_last_end
= end
;
3148 // execute the test, and return status
3149 int retval
= smartcommandhandler(device
, IMMEDIATE_OFFLINE
, dotest
, NULL
);
3151 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
3155 // Report recent test start to do_disable_standby_check()
3156 // and force log of next test status
3157 if (testtype
== 'O')
3158 state
.offline_started
= true;
3160 state
.selftest_started
= true;
3162 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
3166 // Check pending sector count attribute values (-C, -U directives).
3167 static void check_pending(const dev_config
& cfg
, dev_state
& state
,
3168 unsigned char id
, bool increase_only
,
3169 const ata_smart_values
& smartval
,
3170 int mailtype
, const char * msg
)
3172 // Find attribute index
3173 int i
= ata_find_attr_index(id
, smartval
);
3174 if (!(i
>= 0 && ata_find_attr_index(id
, state
.smartval
) == i
))
3177 // No report if no sectors pending.
3178 uint64_t rawval
= ata_get_attr_raw_value(smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
3180 reset_warning_mail(cfg
, state
, mailtype
, "No more %s", msg
);
3184 // If attribute is not reset, report only sector count increases.
3185 uint64_t prev_rawval
= ata_get_attr_raw_value(state
.smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
3186 if (!(!increase_only
|| prev_rawval
< rawval
))
3190 std::string s
= strprintf("Device: %s, %" PRId64
" %s", cfg
.name
.c_str(), rawval
, msg
);
3191 if (prev_rawval
> 0 && rawval
!= prev_rawval
)
3192 s
+= strprintf(" (changed %+" PRId64
")", rawval
- prev_rawval
);
3194 PrintOut(LOG_CRIT
, "%s\n", s
.c_str());
3195 MailWarning(cfg
, state
, mailtype
, "%s", s
.c_str());
3196 state
.must_write
= true;
3199 // Format Temperature value
3200 static const char * fmt_temp(unsigned char x
, char (& buf
)[20])
3204 snprintf(buf
, sizeof(buf
), "%u", x
);
3208 // Check Temperature limits
3209 static void CheckTemperature(const dev_config
& cfg
, dev_state
& state
, unsigned char currtemp
, unsigned char triptemp
)
3211 if (!(0 < currtemp
&& currtemp
< 255)) {
3212 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
.name
.c_str());
3216 // Update Max Temperature
3217 const char * minchg
= "", * maxchg
= "";
3218 if (currtemp
> state
.tempmax
) {
3221 state
.tempmax
= currtemp
;
3222 state
.must_write
= true;
3226 if (!state
.temperature
) {
3228 if (!state
.tempmin
|| currtemp
< state
.tempmin
)
3229 // Delay Min Temperature update by ~ 30 minutes.
3230 state
.tempmin_delay
= time(0) + CHECKTIME
- 60;
3231 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3232 cfg
.name
.c_str(), (int)currtemp
, fmt_temp(state
.tempmin
, buf
), state
.tempmax
, maxchg
);
3234 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
3235 state
.temperature
= currtemp
;
3238 if (state
.tempmin_delay
) {
3239 // End Min Temperature update delay if ...
3240 if ( (state
.tempmin
&& currtemp
> state
.tempmin
) // current temp exceeds recorded min,
3241 || (state
.tempmin_delay
<= time(0))) { // or delay time is over.
3242 state
.tempmin_delay
= 0;
3244 state
.tempmin
= 255;
3248 // Update Min Temperature
3249 if (!state
.tempmin_delay
&& currtemp
< state
.tempmin
) {
3250 state
.tempmin
= currtemp
;
3251 state
.must_write
= true;
3252 if (currtemp
!= state
.temperature
)
3257 if (cfg
.tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)state
.temperature
) >= cfg
.tempdiff
)) {
3258 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3259 cfg
.name
.c_str(), (int)currtemp
-(int)state
.temperature
, currtemp
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
3260 state
.temperature
= currtemp
;
3265 if (cfg
.tempcrit
&& currtemp
>= cfg
.tempcrit
) {
3266 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3267 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
3268 MailWarning(cfg
, state
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3269 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
3271 else if (cfg
.tempinfo
&& currtemp
>= cfg
.tempinfo
) {
3272 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3273 cfg
.name
.c_str(), currtemp
, cfg
.tempinfo
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
3275 else if (cfg
.tempcrit
) {
3276 unsigned char limit
= (cfg
.tempinfo
? cfg
.tempinfo
: cfg
.tempcrit
-5);
3277 if (currtemp
< limit
)
3278 reset_warning_mail(cfg
, state
, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp
, limit
);
3282 // Check normalized and raw attribute values.
3283 static void check_attribute(const dev_config
& cfg
, dev_state
& state
,
3284 const ata_smart_attribute
& attr
,
3285 const ata_smart_attribute
& prev
,
3287 const ata_smart_threshold_entry
* thresholds
)
3289 // Check attribute and threshold
3290 ata_attr_state attrstate
= ata_get_attr_state(attr
, attridx
, thresholds
, cfg
.attribute_defs
);
3291 if (attrstate
== ATTRSTATE_NON_EXISTING
)
3294 // If requested, check for usage attributes that have failed.
3295 if ( cfg
.usagefailed
&& attrstate
== ATTRSTATE_FAILED_NOW
3296 && !cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGN_FAILUSE
)) {
3297 std::string attrname
= ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
, cfg
.dev_rpm
);
3298 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
3299 MailWarning(cfg
, state
, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
3300 state
.must_write
= true;
3303 // Return if we're not tracking this type of attribute
3304 bool prefail
= !!ATTRIBUTE_FLAGS_PREFAILURE(attr
.flags
);
3305 if (!( ( prefail
&& cfg
.prefail
)
3306 || (!prefail
&& cfg
.usage
)))
3309 // Return if '-I ID' was specified
3310 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGNORE
))
3313 // Issue warning if they don't have the same ID in all structures.
3314 if (attr
.id
!= prev
.id
) {
3315 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3316 cfg
.name
.c_str(), attr
.id
, prev
.id
);
3320 // Compare normalized values if valid.
3321 bool valchanged
= false;
3322 if (attrstate
> ATTRSTATE_NO_NORMVAL
) {
3323 if (attr
.current
!= prev
.current
)
3327 // Compare raw values if requested.
3328 bool rawchanged
= false;
3329 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW
)) {
3330 if ( ata_get_attr_raw_value(attr
, cfg
.attribute_defs
)
3331 != ata_get_attr_raw_value(prev
, cfg
.attribute_defs
))
3335 // Return if no change
3336 if (!(valchanged
|| rawchanged
))
3339 // Format value strings
3340 std::string currstr
, prevstr
;
3341 if (attrstate
== ATTRSTATE_NO_NORMVAL
) {
3342 // Print raw values only
3343 currstr
= strprintf("%s (Raw)",
3344 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
3345 prevstr
= strprintf("%s (Raw)",
3346 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
3348 else if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_PRINT
)) {
3349 // Print normalized and raw values
3350 currstr
= strprintf("%d [Raw %s]", attr
.current
,
3351 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
3352 prevstr
= strprintf("%d [Raw %s]", prev
.current
,
3353 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
3356 // Print normalized values only
3357 currstr
= strprintf("%d", attr
.current
);
3358 prevstr
= strprintf("%d", prev
.current
);
3362 std::string msg
= strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3363 cfg
.name
.c_str(), (prefail
? "Prefailure" : "Usage"), attr
.id
,
3364 ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
, cfg
.dev_rpm
).c_str(),
3365 prevstr
.c_str(), currstr
.c_str());
3367 // Report this change as critical ?
3368 if ( (valchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_AS_CRIT
))
3369 || (rawchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_AS_CRIT
))) {
3370 PrintOut(LOG_CRIT
, "%s\n", msg
.c_str());
3371 MailWarning(cfg
, state
, 2, "%s", msg
.c_str());
3374 PrintOut(LOG_INFO
, "%s\n", msg
.c_str());
3376 state
.must_write
= true;
3380 static int ATACheckDevice(const dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
,
3381 bool firstpass
, bool allow_selftests
)
3383 if (!open_device(cfg
, state
, atadev
, "ATA"))
3386 const char * name
= cfg
.name
.c_str();
3388 // user may have requested (with the -n Directive) to leave the disk
3389 // alone if it is in idle or sleeping mode. In this case check the
3390 // power mode and exit without check if needed
3391 if (cfg
.powermode
&& !state
.powermodefail
) {
3392 int dontcheck
=0, powermode
=ataCheckPowerMode(atadev
);
3393 const char * mode
= 0;
3394 if (0 <= powermode
&& powermode
< 0xff) {
3395 // wait for possible spin up and check again
3398 powermode2
= ataCheckPowerMode(atadev
);
3399 if (powermode2
> powermode
)
3400 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
3401 powermode
= powermode2
;
3408 if (cfg
.powermode
>=1)
3414 if (cfg
.powermode
>=2)
3420 if (cfg
.powermode
>=2)
3426 if (cfg
.powermode
>=3)
3432 if (cfg
.powermode
>=3)
3438 if (cfg
.powermode
>=3)
3444 if (cfg
.powermode
>=3)
3453 mode
="ACTIVE or IDLE";
3457 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3459 state
.powermodefail
= true;
3463 // if we are going to skip a check, return now
3465 // skip at most powerskipmax checks
3466 if (!cfg
.powerskipmax
|| state
.powerskipcnt
<cfg
.powerskipmax
) {
3467 CloseDevice(atadev
, name
);
3468 // report first only except if state has changed, avoid waking up system disk
3469 if ((!state
.powerskipcnt
|| state
.lastpowermodeskipped
!= powermode
) && !cfg
.powerquiet
) {
3470 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
3471 state
.lastpowermodeskipped
= powermode
;
3473 state
.powerskipcnt
++;
3477 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3478 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
3480 state
.powerskipcnt
= 0;
3481 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
3483 else if (state
.powerskipcnt
) {
3484 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3485 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
3486 state
.powerskipcnt
= 0;
3487 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
3491 // check smart status
3492 if (cfg
.smartcheck
) {
3493 int status
=ataSmartStatus2(atadev
);
3495 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
3496 MailWarning(cfg
, state
, 5, "Device: %s, not capable of SMART self-check", name
);
3497 state
.must_write
= true;
3499 else if (status
==1){
3500 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
3501 MailWarning(cfg
, state
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
3502 state
.must_write
= true;
3506 // Check everything that depends upon SMART Data (eg, Attribute values)
3507 if ( cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
3508 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
3509 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
3510 || cfg
.selftest
|| cfg
.offlinests
|| cfg
.selfteststs
) {
3512 // Read current attribute values.
3513 ata_smart_values curval
;
3514 if (ataReadSmartValues(atadev
, &curval
)){
3515 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
3516 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
3517 state
.must_write
= true;
3520 reset_warning_mail(cfg
, state
, 6, "read SMART Attribute Data worked again");
3522 // look for current or offline pending sectors
3523 if (cfg
.curr_pending_id
)
3524 check_pending(cfg
, state
, cfg
.curr_pending_id
, cfg
.curr_pending_incr
, curval
, 10,
3525 (!cfg
.curr_pending_incr
? "Currently unreadable (pending) sectors"
3526 : "Total unreadable (pending) sectors" ));
3528 if (cfg
.offl_pending_id
)
3529 check_pending(cfg
, state
, cfg
.offl_pending_id
, cfg
.offl_pending_incr
, curval
, 11,
3530 (!cfg
.offl_pending_incr
? "Offline uncorrectable sectors"
3531 : "Total offline uncorrectable sectors"));
3533 // check temperature limits
3534 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
3535 CheckTemperature(cfg
, state
, ata_return_temperature_value(&curval
, cfg
.attribute_defs
), 0);
3537 // look for failed usage attributes, or track usage or prefail attributes
3538 if (cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
) {
3539 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
3540 check_attribute(cfg
, state
,
3541 curval
.vendor_attributes
[i
],
3542 state
.smartval
.vendor_attributes
[i
],
3543 i
, state
.smartthres
.thres_entries
);
3547 // Log changes of offline data collection status
3548 if (cfg
.offlinests
) {
3549 if ( curval
.offline_data_collection_status
3550 != state
.smartval
.offline_data_collection_status
3551 || state
.offline_started
// test was started in previous call
3552 || (firstpass
&& (debugmode
|| (curval
.offline_data_collection_status
& 0x7d))))
3553 log_offline_data_coll_status(name
, curval
.offline_data_collection_status
);
3556 // Log changes of self-test execution status
3557 if (cfg
.selfteststs
) {
3558 if ( curval
.self_test_exec_status
!= state
.smartval
.self_test_exec_status
3559 || state
.selftest_started
// test was started in previous call
3560 || (firstpass
&& (debugmode
|| (curval
.self_test_exec_status
& 0xf0))))
3561 log_self_test_exec_status(name
, curval
.self_test_exec_status
);
3564 // Save the new values for the next time around
3565 state
.smartval
= curval
;
3568 state
.offline_started
= state
.selftest_started
= false;
3570 // check if number of selftest errors has increased (note: may also DECREASE)
3572 CheckSelfTestLogs(cfg
, state
, SelfTestErrorCount(atadev
, name
, cfg
.firmwarebugs
));
3574 // check if number of ATA errors has increased
3575 if (cfg
.errorlog
|| cfg
.xerrorlog
) {
3577 int errcnt1
= -1, errcnt2
= -1;
3579 errcnt1
= read_ata_error_count(atadev
, name
, cfg
.firmwarebugs
, false);
3581 errcnt2
= read_ata_error_count(atadev
, name
, cfg
.firmwarebugs
, true);
3583 // new number of errors is max of both logs
3584 int newc
= (errcnt1
>= errcnt2
? errcnt1
: errcnt2
);
3586 // did command fail?
3588 // lack of PrintOut here is INTENTIONAL
3589 MailWarning(cfg
, state
, 7, "Device: %s, Read SMART Error Log Failed", name
);
3591 // has error count increased?
3592 int oldc
= state
.ataerrorcount
;
3594 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
3596 MailWarning(cfg
, state
, 4, "Device: %s, ATA error count increased from %d to %d",
3598 state
.must_write
= true;
3602 state
.ataerrorcount
=newc
;
3605 // if the user has asked, and device is capable (or we're not yet
3606 // sure) check whether a self test should be done now.
3607 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
3608 char testtype
= next_scheduled_test(cfg
, state
, false/*!scsi*/);
3610 DoATASelfTest(cfg
, state
, atadev
, testtype
);
3613 // Don't leave device open -- the OS/user may want to access it
3614 // before the next smartd cycle!
3615 CloseDevice(atadev
, name
);
3617 // Copy ATA attribute values to persistent state
3618 state
.update_persistent_state();
3623 static int SCSICheckDevice(const dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
, bool allow_selftests
)
3625 if (!open_device(cfg
, state
, scsidev
, "SCSI"))
3628 const char * name
= cfg
.name
.c_str();
3630 uint8_t asc
= 0, ascq
= 0;
3631 uint8_t currenttemp
= 0, triptemp
= 0;
3632 if (!state
.SuppressReport
) {
3633 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
3634 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
3635 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
3637 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART values", name
);
3638 state
.SuppressReport
= 1;
3642 const char * cp
= scsiGetIEString(asc
, ascq
);
3644 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
3645 MailWarning(cfg
, state
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
3646 } else if (asc
== 4 && ascq
== 9) {
3647 PrintOut(LOG_INFO
,"Device: %s, self-test in progress\n", name
);
3648 } else if (debugmode
)
3649 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3650 name
, (int)asc
, (int)ascq
);
3651 } else if (debugmode
)
3652 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
3654 // check temperature limits
3655 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
3656 CheckTemperature(cfg
, state
, currenttemp
, triptemp
);
3658 // check if number of selftest errors has increased (note: may also DECREASE)
3660 CheckSelfTestLogs(cfg
, state
, scsiCountFailedSelfTests(scsidev
, 0));
3662 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
3663 char testtype
= next_scheduled_test(cfg
, state
, true/*scsi*/);
3665 DoSCSISelfTest(cfg
, state
, scsidev
, testtype
);
3667 if (!cfg
.attrlog_file
.empty()){
3668 // saving error counters to state
3670 if (state
.ReadECounterPageSupported
&& (0 == scsiLogSense(scsidev
,
3671 READ_ERROR_COUNTER_LPAGE
, 0, tBuf
, sizeof(tBuf
), 0))) {
3672 scsiDecodeErrCounterPage(tBuf
, &state
.scsi_error_counters
[0].errCounter
);
3673 state
.scsi_error_counters
[0].found
=1;
3675 if (state
.WriteECounterPageSupported
&& (0 == scsiLogSense(scsidev
,
3676 WRITE_ERROR_COUNTER_LPAGE
, 0, tBuf
, sizeof(tBuf
), 0))) {
3677 scsiDecodeErrCounterPage(tBuf
, &state
.scsi_error_counters
[1].errCounter
);
3678 state
.scsi_error_counters
[1].found
=1;
3680 if (state
.VerifyECounterPageSupported
&& (0 == scsiLogSense(scsidev
,
3681 VERIFY_ERROR_COUNTER_LPAGE
, 0, tBuf
, sizeof(tBuf
), 0))) {
3682 scsiDecodeErrCounterPage(tBuf
, &state
.scsi_error_counters
[2].errCounter
);
3683 state
.scsi_error_counters
[2].found
=1;
3685 if (state
.NonMediumErrorPageSupported
&& (0 == scsiLogSense(scsidev
,
3686 NON_MEDIUM_ERROR_LPAGE
, 0, tBuf
, sizeof(tBuf
), 0))) {
3687 scsiDecodeNonMediumErrPage(tBuf
, &state
.scsi_nonmedium_error
.nme
);
3688 state
.scsi_nonmedium_error
.found
=1;
3690 // store temperature if not done by CheckTemperature() above
3691 if (!(cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
))
3692 state
.temperature
= currenttemp
;
3694 CloseDevice(scsidev
, name
);
3698 static int NVMeCheckDevice(const dev_config
& cfg
, dev_state
& state
, nvme_device
* nvmedev
)
3700 if (!open_device(cfg
, state
, nvmedev
, "NVMe"))
3703 const char * name
= cfg
.name
.c_str();
3705 // Read SMART/Health log
3706 nvme_smart_log smart_log
;
3707 if (!nvme_read_smart_log(nvmedev
, smart_log
)) {
3708 PrintOut(LOG_INFO
, "Device: %s, failed to read NVMe SMART/Health Information\n", name
);
3709 MailWarning(cfg
, state
, 6, "Device: %s, failed to read NVMe SMART/Health Information", name
);
3710 state
.must_write
= true;
3714 // Check Critical Warning bits
3715 if (cfg
.smartcheck
&& smart_log
.critical_warning
) {
3716 unsigned char w
= smart_log
.critical_warning
;
3718 static const char * const wnames
[] =
3719 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3721 for (unsigned b
= 0, cnt
= 0; b
< 8 ; b
++) {
3722 if (!(w
& (1 << b
)))
3727 msg
+= "..."; break;
3729 if (b
>= sizeof(wnames
)/sizeof(wnames
[0])) {
3730 msg
+= "*Unknown*"; break;
3735 PrintOut(LOG_CRIT
, "Device: %s, Critical Warning (0x%02x): %s\n", name
, w
, msg
.c_str());
3736 MailWarning(cfg
, state
, 1, "Device: %s, Critical Warning (0x%02x): %s", name
, w
, msg
.c_str());
3737 state
.must_write
= true;
3740 // Check temperature limits
3741 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
) {
3742 int k
= nvme_get_max_temp_kelvin(smart_log
);
3743 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3749 CheckTemperature(cfg
, state
, c
, 0);
3752 // Check if number of errors has increased
3753 if (cfg
.errorlog
|| cfg
.xerrorlog
) {
3754 uint64_t oldcnt
= state
.nvme_err_log_entries
;
3755 uint64_t newcnt
= le128_to_uint64(smart_log
.num_err_log_entries
);
3756 if (newcnt
> oldcnt
) {
3757 PrintOut(LOG_CRIT
, "Device: %s, number of Error Log entries increased from %" PRIu64
" to %" PRIu64
"\n",
3758 name
, oldcnt
, newcnt
);
3759 MailWarning(cfg
, state
, 4, "Device: %s, number of Error Log entries increased from %" PRIu64
" to %" PRIu64
,
3760 name
, oldcnt
, newcnt
);
3761 state
.must_write
= true;
3763 state
.nvme_err_log_entries
= newcnt
;
3766 CloseDevice(nvmedev
, name
);
3770 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3771 static int standby_disable_state
= 0;
3773 static void init_disable_standby_check(dev_config_vector
& configs
)
3775 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3776 bool sts1
= false, sts2
= false;
3777 for (unsigned i
= 0; i
< configs
.size() && !(sts1
|| sts2
); i
++) {
3778 const dev_config
& cfg
= configs
.at(i
);
3779 if (cfg
.offlinests_ns
)
3781 if (cfg
.selfteststs_ns
)
3785 // Check for support of disable auto standby
3786 // Reenable standby if smartd.conf was reread
3787 if (sts1
|| sts2
|| standby_disable_state
== 3) {
3788 if (!smi()->disable_system_auto_standby(false)) {
3789 if (standby_disable_state
== 3)
3790 PrintOut(LOG_CRIT
, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3792 PrintOut(LOG_INFO
, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3793 (sts1
? "-l offlinests,ns" : ""), (sts1
&& sts2
? " and " : ""), (sts2
? "-l selfteststs,ns" : ""));
3794 sts1
= sts2
= false;
3799 standby_disable_state
= (sts1
|| sts2
? 1 : 0);
3802 static void do_disable_standby_check(const dev_config_vector
& configs
, const dev_state_vector
& states
)
3804 if (!standby_disable_state
)
3807 // Check for just started or still running self-tests
3808 bool running
= false;
3809 for (unsigned i
= 0; i
< configs
.size() && !running
; i
++) {
3810 const dev_config
& cfg
= configs
.at(i
); const dev_state
& state
= states
.at(i
);
3812 if ( ( cfg
.offlinests_ns
3813 && (state
.offline_started
||
3814 is_offl_coll_in_progress(state
.smartval
.offline_data_collection_status
)))
3815 || ( cfg
.selfteststs_ns
3816 && (state
.selftest_started
||
3817 is_self_test_in_progress(state
.smartval
.self_test_exec_status
))) )
3819 // state.offline/selftest_started will be reset after next logging of test status
3822 // Disable/enable auto standby and log state changes
3824 if (standby_disable_state
!= 1) {
3825 if (!smi()->disable_system_auto_standby(false))
3826 PrintOut(LOG_CRIT
, "Self-test(s) completed, system auto standby enable failed: %s\n",
3827 smi()->get_errmsg());
3829 PrintOut(LOG_INFO
, "Self-test(s) completed, system auto standby enabled\n");
3830 standby_disable_state
= 1;
3833 else if (!smi()->disable_system_auto_standby(true)) {
3834 if (standby_disable_state
!= 2) {
3835 PrintOut(LOG_INFO
, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3836 smi()->get_errmsg());
3837 standby_disable_state
= 2;
3841 if (standby_disable_state
!= 3) {
3842 PrintOut(LOG_INFO
, "Self-test(s) in progress, system auto standby disabled\n");
3843 standby_disable_state
= 3;
3848 // Checks the SMART status of all ATA and SCSI devices
3849 static void CheckDevicesOnce(const dev_config_vector
& configs
, dev_state_vector
& states
,
3850 smart_device_list
& devices
, bool firstpass
, bool allow_selftests
)
3852 for (unsigned i
= 0; i
< configs
.size(); i
++) {
3853 const dev_config
& cfg
= configs
.at(i
);
3854 dev_state
& state
= states
.at(i
);
3855 smart_device
* dev
= devices
.at(i
);
3857 ATACheckDevice(cfg
, state
, dev
->to_ata(), firstpass
, allow_selftests
);
3858 else if (dev
->is_scsi())
3859 SCSICheckDevice(cfg
, state
, dev
->to_scsi(), allow_selftests
);
3860 else if (dev
->is_nvme())
3861 NVMeCheckDevice(cfg
, state
, dev
->to_nvme());
3864 do_disable_standby_check(configs
, states
);
3867 // Install all signal handlers
3868 static void install_signal_handlers()
3870 // normal and abnormal exit
3871 set_signal_if_not_ignored(SIGTERM
, sighandler
);
3872 set_signal_if_not_ignored(SIGQUIT
, sighandler
);
3874 // in debug mode, <CONTROL-C> ==> HUP
3875 set_signal_if_not_ignored(SIGINT
, (debugmode
? HUPhandler
: sighandler
));
3877 // Catch HUP and USR1
3878 set_signal_if_not_ignored(SIGHUP
, HUPhandler
);
3879 set_signal_if_not_ignored(SIGUSR1
, USR1handler
);
3881 set_signal_if_not_ignored(SIGUSR2
, USR2handler
);
3886 // Toggle debug mode implemented for native windows only
3887 // (there is no easy way to reopen tty on *nix)
3888 static void ToggleDebugMode()
3891 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
3892 if (!daemon_enable_console("smartd [Debug]")) {
3894 daemon_signal(SIGINT
, HUPhandler
);
3895 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
3898 PrintOut(LOG_INFO
,"enable console failed\n");
3900 else if (debugmode
== 1) {
3901 daemon_disable_console();
3903 daemon_signal(SIGINT
, sighandler
);
3904 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
3907 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
3911 static time_t dosleep(time_t wakeuptime
, bool & sigwakeup
, int numdev
)
3913 // If past wake-up-time, compute next wake-up-time
3914 time_t timenow
=time(NULL
);
3915 while (wakeuptime
<=timenow
){
3916 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
3917 wakeuptime
+=intervals
*checktime
;
3920 notify_wait(wakeuptime
, numdev
);
3922 // sleep until we catch SIGUSR1 or have completed sleeping
3924 while (timenow
< wakeuptime
+addtime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
) {
3926 // protect user again system clock being adjusted backwards
3927 if (wakeuptime
>timenow
+checktime
){
3928 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3929 wakeuptime
=timenow
+checktime
;
3932 // Exit sleep when time interval has expired or a signal is received
3933 sleep(wakeuptime
+addtime
-timenow
);
3936 // toggle debug mode?
3937 if (caughtsigUSR2
) {
3945 // Actual sleep time too long?
3946 if (!addtime
&& timenow
> wakeuptime
+60) {
3948 PrintOut(LOG_INFO
, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3949 (int)(timenow
-wakeuptime
));
3950 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3951 addtime
= timenow
-wakeuptime
+20;
3952 // Use next wake-up-time if close
3953 int nextcheck
= checktime
- addtime
% checktime
;
3954 if (nextcheck
<= 20)
3955 addtime
+= nextcheck
;
3959 // if we caught a SIGUSR1 then print message and clear signal
3961 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3962 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
3967 // return adjusted wakeuptime
3971 // Print out a list of valid arguments for the Directive d
3972 static void printoutvaliddirectiveargs(int priority
, char d
)
3976 PrintOut(priority
, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3979 PrintOut(priority
, "valid_regular_expression");
3982 PrintOut(priority
, "%s", smi()->get_valid_dev_types_str().c_str());
3985 PrintOut(priority
, "normal, permissive");
3989 PrintOut(priority
, "on, off");
3992 PrintOut(priority
, "error, selftest");
3995 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3998 PrintOut(priority
, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4001 PrintOut(priority
, "use, ignore, show, showall");
4004 PrintOut(priority
, "%s", get_valid_firmwarebug_args());
4007 PrintOut(priority
, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4008 "security-freeze, standby,[N|off], wcache,[on|off]");
4013 // exits with an error message, or returns integer value of token
4014 static int GetInteger(const char *arg
, const char *name
, const char *token
, int lineno
, const char *cfgfile
,
4015 int min
, int max
, char * suffix
= 0)
4017 // make sure argument is there
4019 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4020 cfgfile
, lineno
, name
, token
, min
, max
);
4024 // get argument value (base 10), check that it's integer, and in-range
4026 int val
= strtol(arg
,&endptr
,10);
4028 // optional suffix present?
4030 if (!strcmp(endptr
, suffix
))
4031 endptr
+= strlen(suffix
);
4036 if (!(!*endptr
&& min
<= val
&& val
<= max
)) {
4037 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4038 cfgfile
, lineno
, name
, token
, arg
, min
, max
);
4042 // all is well; return value
4047 // Get 1-3 small integer(s) for '-W' directive
4048 static int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *cfgfile
,
4049 unsigned char *val1
, unsigned char *val2
, unsigned char *val3
)
4051 unsigned v1
= 0, v2
= 0, v3
= 0;
4052 int n1
= -1, n2
= -1, n3
= -1, len
;
4054 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4055 cfgfile
, lineno
, name
, token
);
4060 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
4061 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
4062 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4063 cfgfile
, lineno
, name
, token
, arg
);
4066 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
4073 // Concatenate strtok() results if quoted with "..."
4074 static const char * strtok_dequote(const char * delimiters
)
4076 const char * t
= strtok(0, delimiters
);
4077 if (!t
|| t
[0] != '"')
4080 static std::string token
;
4083 t
= strtok(0, delimiters
);
4087 int len
= strlen(t
);
4088 if (t
[len
-1] == '"') {
4089 token
+= std::string(t
, len
-1);
4094 return token
.c_str();
4100 // This function returns 1 if it has correctly parsed one token (and
4101 // any arguments), else zero if no tokens remain. It returns -1 if an
4102 // error was encountered.
4103 static int ParseToken(char * token
, dev_config
& cfg
, smart_devtype_list
& scan_types
)
4106 const char * name
= cfg
.name
.c_str();
4107 int lineno
=cfg
.lineno
;
4108 const char *delim
= " \n\t";
4111 const char *arg
= 0;
4113 // is the rest of the line a comment
4117 // is the token not recognized?
4118 if (*token
!='-' || strlen(token
)!=2) {
4119 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
4120 configfile
, lineno
, name
, token
);
4121 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
4125 // token we will be parsing:
4128 // parse the token and swallow its argument
4130 char plus
[] = "+", excl
[] = "!";
4134 // monitor current pending sector count (default 197)
4135 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
4137 cfg
.curr_pending_id
= (unsigned char)val
;
4138 cfg
.curr_pending_incr
= (*plus
== '+');
4139 cfg
.curr_pending_set
= true;
4142 // monitor offline uncorrectable sectors (default 198)
4143 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
4145 cfg
.offl_pending_id
= (unsigned char)val
;
4146 cfg
.offl_pending_incr
= (*plus
== '+');
4147 cfg
.offl_pending_set
= true;
4150 // Set tolerance level for SMART command failures
4151 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
4153 } else if (!strcmp(arg
, "normal")) {
4154 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4155 // not on failure of an optional S.M.A.R.T. command.
4156 // This is the default so we don't need to actually do anything here.
4157 cfg
.permissive
= false;
4158 } else if (!strcmp(arg
, "permissive")) {
4159 // Permissive mode; ignore errors from Mandatory SMART commands
4160 cfg
.permissive
= true;
4166 // specify the device type
4167 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
4169 } else if (!strcmp(arg
, "ignore")) {
4171 } else if (!strcmp(arg
, "removable")) {
4172 cfg
.removable
= true;
4173 } else if (!strcmp(arg
, "auto")) {
4178 scan_types
.push_back(arg
);
4183 if (!(arg
= strtok(0, delim
)))
4185 else if (!parse_firmwarebug_def(arg
, cfg
.firmwarebugs
))
4189 // check SMART status
4190 cfg
.smartcheck
= true;
4193 // check for failure of usage attributes
4194 cfg
.usagefailed
= true;
4197 // track changes in all vendor attributes
4202 // track changes in prefail vendor attributes
4206 // track changes in usage vendor attributes
4210 // track changes in SMART logs
4211 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
4213 } else if (!strcmp(arg
, "selftest")) {
4214 // track changes in self-test log
4215 cfg
.selftest
= true;
4216 } else if (!strcmp(arg
, "error")) {
4217 // track changes in ATA error log
4218 cfg
.errorlog
= true;
4219 } else if (!strcmp(arg
, "xerror")) {
4220 // track changes in Extended Comprehensive SMART error log
4221 cfg
.xerrorlog
= true;
4222 } else if (!strcmp(arg
, "offlinests")) {
4223 // track changes in offline data collection status
4224 cfg
.offlinests
= true;
4225 } else if (!strcmp(arg
, "offlinests,ns")) {
4226 // track changes in offline data collection status, disable auto standby
4227 cfg
.offlinests
= cfg
.offlinests_ns
= true;
4228 } else if (!strcmp(arg
, "selfteststs")) {
4229 // track changes in self-test execution status
4230 cfg
.selfteststs
= true;
4231 } else if (!strcmp(arg
, "selfteststs,ns")) {
4232 // track changes in self-test execution status, disable auto standby
4233 cfg
.selfteststs
= cfg
.selfteststs_ns
= true;
4234 } else if (!strncmp(arg
, "scterc,", sizeof("scterc,")-1)) {
4235 // set SCT Error Recovery Control
4236 unsigned rt
= ~0, wt
= ~0; int nc
= -1;
4237 sscanf(arg
,"scterc,%u,%u%n", &rt
, &wt
, &nc
);
4238 if (nc
== (int)strlen(arg
) && rt
<= 999 && wt
<= 999) {
4239 cfg
.sct_erc_set
= true;
4240 cfg
.sct_erc_readtime
= rt
;
4241 cfg
.sct_erc_writetime
= wt
;
4250 // monitor everything
4251 cfg
.smartcheck
= true;
4253 cfg
.usagefailed
= true;
4255 cfg
.selftest
= true;
4256 cfg
.errorlog
= true;
4257 cfg
.selfteststs
= true;
4260 // automatic offline testing enable/disable
4261 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
4263 } else if (!strcmp(arg
, "on")) {
4264 cfg
.autoofflinetest
= 2;
4265 } else if (!strcmp(arg
, "off")) {
4266 cfg
.autoofflinetest
= 1;
4272 // skip disk check if in idle or standby mode
4273 if (!(arg
= strtok(NULL
, delim
)))
4276 char *endptr
= NULL
;
4277 char *next
= strchr(const_cast<char*>(arg
), ',');
4279 cfg
.powerquiet
= false;
4280 cfg
.powerskipmax
= 0;
4282 if (next
!=NULL
) *next
='\0';
4283 if (!strcmp(arg
, "never"))
4285 else if (!strcmp(arg
, "sleep"))
4287 else if (!strcmp(arg
, "standby"))
4289 else if (!strcmp(arg
, "idle"))
4294 // if optional arguments are present
4295 if (!badarg
&& next
!=NULL
) {
4297 cfg
.powerskipmax
= strtol(next
, &endptr
, 10);
4299 cfg
.powerskipmax
= 0;
4301 next
= endptr
+ (*endptr
!= '\0');
4302 if (cfg
.powerskipmax
<= 0)
4305 if (*next
!= '\0') {
4306 if (!strcmp("q", next
))
4307 cfg
.powerquiet
= true;
4316 // automatic attribute autosave enable/disable
4317 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
4319 } else if (!strcmp(arg
, "on")) {
4321 } else if (!strcmp(arg
, "off")) {
4328 // warn user, and delete any previously given -s REGEXP Directives
4329 if (!cfg
.test_regex
.empty()){
4330 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4331 configfile
, lineno
, name
, cfg
.test_regex
.get_pattern());
4332 cfg
.test_regex
= regular_expression();
4334 // check for missing argument
4335 if (!(arg
= strtok(NULL
, delim
))) {
4340 if (!cfg
.test_regex
.compile(arg
)) {
4341 // not a valid regular expression!
4342 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4343 configfile
, lineno
, name
, arg
, cfg
.test_regex
.get_errmsg());
4346 // Do a bit of sanity checking and warn user if we think that
4347 // their regexp is "strange". User probably confused about shell
4348 // glob(3) syntax versus regular expression syntax regexp(7).
4349 if (arg
[(val
= strspn(arg
, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4350 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4351 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
4355 // send email to address that follows
4356 if (!(arg
= strtok(NULL
,delim
)))
4359 if (!cfg
.emailaddress
.empty())
4360 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4361 configfile
, lineno
, name
, cfg
.emailaddress
.c_str());
4362 cfg
.emailaddress
= arg
;
4366 // email warning options
4367 if (!(arg
= strtok(NULL
, delim
)))
4369 else if (!strcmp(arg
, "once"))
4371 else if (!strcmp(arg
, "daily"))
4373 else if (!strcmp(arg
, "diminishing"))
4375 else if (!strcmp(arg
, "test"))
4377 else if (!strcmp(arg
, "exec")) {
4378 // Get the next argument (the command line)
4380 // Allow "/path name/with spaces/..." on Windows
4381 arg
= strtok_dequote(delim
);
4382 if (arg
&& arg
[0] == '"') {
4383 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4384 configfile
, lineno
, name
, token
);
4388 arg
= strtok(0, delim
);
4391 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4392 configfile
, lineno
, name
, token
);
4395 // Free the last cmd line given if any, and copy new one
4396 if (!cfg
.emailcmdline
.empty())
4397 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4398 configfile
, lineno
, name
, cfg
.emailcmdline
.c_str());
4399 cfg
.emailcmdline
= arg
;
4405 // ignore failure of usage attribute
4406 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
4408 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGN_FAILUSE
);
4411 // ignore attribute for tracking purposes
4412 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
4414 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGNORE
);
4417 // print raw value when tracking
4418 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
4420 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
);
4421 if (*excl
== '!') // attribute change is critical
4422 cfg
.monitor_attr_flags
.set(val
, MONITOR_AS_CRIT
);
4425 // track changes in raw value (forces printing of raw value)
4426 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
4428 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
|MONITOR_RAW
);
4429 if (*excl
== '!') // raw value change is critical
4430 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_AS_CRIT
);
4433 // track Temperature
4434 if (Get3Integers(arg
=strtok(NULL
, delim
), name
, token
, lineno
, configfile
,
4435 &cfg
.tempdiff
, &cfg
.tempinfo
, &cfg
.tempcrit
) < 0)
4439 // non-default vendor-specific attribute meaning
4440 if (!(arg
=strtok(NULL
,delim
))) {
4442 } else if (!parse_attribute_def(arg
, cfg
.attribute_defs
, PRIOR_USER
)) {
4447 // Define use of drive-specific presets.
4448 if (!(arg
= strtok(NULL
, delim
))) {
4450 } else if (!strcmp(arg
, "use")) {
4451 cfg
.ignorepresets
= false;
4452 } else if (!strcmp(arg
, "ignore")) {
4453 cfg
.ignorepresets
= true;
4454 } else if (!strcmp(arg
, "show")) {
4455 cfg
.showpresets
= true;
4456 } else if (!strcmp(arg
, "showall")) {
4464 // Various ATA settings
4465 if (!(arg
= strtok(NULL
, delim
))) {
4469 char arg2
[16+1]; unsigned val
;
4470 int n1
= -1, n2
= -1, n3
= -1, len
= strlen(arg
);
4471 if (sscanf(arg
, "%16[^,=]%n%*[,=]%n%u%n", arg2
, &n1
, &n2
, &val
, &n3
) >= 1
4472 && (n1
== len
|| n2
> 0)) {
4473 bool on
= (n2
> 0 && !strcmp(arg
+n2
, "on"));
4474 bool off
= (n2
> 0 && !strcmp(arg
+n2
, "off"));
4478 if (!strcmp(arg2
, "aam")) {
4481 else if (val
<= 254)
4482 cfg
.set_aam
= val
+ 1;
4486 else if (!strcmp(arg2
, "apm")) {
4489 else if (1 <= val
&& val
<= 254)
4490 cfg
.set_apm
= val
+ 1;
4494 else if (!strcmp(arg2
, "lookahead")) {
4496 cfg
.set_lookahead
= -1;
4498 cfg
.set_lookahead
= 1;
4502 else if (!strcmp(arg
, "security-freeze")) {
4503 cfg
.set_security_freeze
= true;
4505 else if (!strcmp(arg2
, "standby")) {
4507 cfg
.set_standby
= 0 + 1;
4508 else if (val
<= 255)
4509 cfg
.set_standby
= val
+ 1;
4513 else if (!strcmp(arg2
, "wcache")) {
4515 cfg
.set_wcache
= -1;
4521 else if (!strcmp(arg2
, "dsn")) {
4538 // Directive not recognized
4539 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
4540 configfile
, lineno
, name
, token
);
4545 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4546 configfile
, lineno
, name
, token
);
4549 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4550 configfile
, lineno
, name
, token
, arg
);
4552 if (missingarg
|| badarg
) {
4553 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
4554 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
4555 PrintOut(LOG_CRIT
, "\n");
4562 // Scan directive for configuration file
4563 #define SCANDIRECTIVE "DEVICESCAN"
4565 // This is the routine that adds things to the conf_entries list.
4567 // Return values are:
4568 // 1: parsed a normal line
4569 // 0: found DEFAULT setting or comment or blank line
4570 // -1: found SCANDIRECTIVE line
4571 // -2: found an error
4573 // Note: this routine modifies *line from the caller!
4574 static int ParseConfigLine(dev_config_vector
& conf_entries
, dev_config
& default_conf
,
4575 smart_devtype_list
& scan_types
, int lineno
, /*const*/ char * line
)
4577 const char *delim
= " \n\t";
4579 // get first token: device name. If a comment, skip line
4580 const char * name
= strtok(line
, delim
);
4581 if (!name
|| *name
== '#')
4584 // Check device name for DEFAULT or DEVICESCAN
4586 if (!strcmp("DEFAULT", name
)) {
4588 // Restart with empty defaults
4589 default_conf
= dev_config();
4592 retval
= (!strcmp(SCANDIRECTIVE
, name
) ? -1 : 1);
4593 // Init new entry with current defaults
4594 conf_entries
.push_back(default_conf
);
4596 dev_config
& cfg
= (retval
? conf_entries
.back() : default_conf
);
4598 cfg
.name
= name
; // Later replaced by dev->get_info().info_name
4599 cfg
.dev_name
= name
; // If DEVICESCAN later replaced by get->dev_info().dev_name
4600 cfg
.lineno
= lineno
;
4602 // parse tokens one at a time from the file.
4603 while (char * token
= strtok(0, delim
)) {
4604 int rc
= ParseToken(token
, cfg
, scan_types
);
4606 // error found on the line
4613 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4616 // Check for multiple -d TYPE directives
4617 if (retval
!= -1 && scan_types
.size() > 1) {
4618 PrintOut(LOG_CRIT
, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4619 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4623 // Don't perform checks below for DEFAULT entries
4627 // If NO monitoring directives are set, then set all of them.
4628 if (!( cfg
.smartcheck
|| cfg
.selftest
4629 || cfg
.errorlog
|| cfg
.xerrorlog
4630 || cfg
.offlinests
|| cfg
.selfteststs
4631 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
4632 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
4634 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4635 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4637 cfg
.smartcheck
= true;
4638 cfg
.usagefailed
= true;
4641 cfg
.selftest
= true;
4642 cfg
.errorlog
= true;
4643 cfg
.selfteststs
= true;
4646 // additional sanity check. Has user set -M options without -m?
4647 if (cfg
.emailaddress
.empty() && (!cfg
.emailcmdline
.empty() || cfg
.emailfreq
|| cfg
.emailtest
)){
4648 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4649 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4653 // has the user has set <nomailer>?
4654 if (cfg
.emailaddress
== "<nomailer>") {
4655 // check that -M exec is also set
4656 if (cfg
.emailcmdline
.empty()){
4657 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4658 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4661 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4662 cfg
.emailaddress
.clear();
4668 // Parses a configuration file. Return values are:
4669 // N=>0: found N entries
4670 // -1: syntax error in config file
4671 // -2: config file does not exist
4672 // -3: config file exists but cannot be read
4674 // In the case where the return value is 0, there are three
4676 // Empty configuration file ==> conf_entries.empty()
4677 // No configuration file ==> conf_entries[0].lineno == 0
4678 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4679 static int ParseConfigFile(dev_config_vector
& conf_entries
, smart_devtype_list
& scan_types
)
4681 // maximum line length in configuration file
4682 const int MAXLINELEN
= 256;
4683 // maximum length of a continued line in configuration file
4684 const int MAXCONTLINE
= 1023;
4687 // Open config file, if it exists and is not <stdin>
4688 if (!(configfile
== configfile_stdin
)) { // pointer comparison ok here
4689 if (!f
.open(configfile
,"r") && (errno
!=ENOENT
|| !configfile_alt
.empty())) {
4690 // file exists but we can't read it or it should exist due to '-c' option
4691 int ret
= (errno
!=ENOENT
? -3 : -2);
4692 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
4693 strerror(errno
),configfile
);
4697 else // read from stdin ('-c -' option)
4700 // Start with empty defaults
4701 dev_config default_conf
;
4703 // No configuration file found -- use fake one
4706 char fakeconfig
[] = SCANDIRECTIVE
" -a"; // TODO: Remove this hack, build cfg_entry.
4708 if (ParseConfigLine(conf_entries
, default_conf
, scan_types
, 0, fakeconfig
) != -1)
4709 throw std::logic_error("Internal error parsing " SCANDIRECTIVE
);
4714 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
4717 // configuration file exists
4718 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
4720 // parse config file line by line
4721 int lineno
= 1, cont
= 0, contlineno
= 0;
4722 char line
[MAXLINELEN
+2];
4723 char fullline
[MAXCONTLINE
+1];
4726 int len
=0,scandevice
;
4731 // make debugging simpler
4732 memset(line
,0,sizeof(line
));
4735 code
=fgets(line
, MAXLINELEN
+2, f
);
4737 // are we at the end of the file?
4740 scandevice
= ParseConfigLine(conf_entries
, default_conf
, scan_types
, contlineno
, fullline
);
4741 // See if we found a SCANDIRECTIVE directive
4744 // did we find a syntax error
4747 // the final line is part of a continuation line
4753 // input file line number
4756 // See if line is too long
4758 if (len
>MAXLINELEN
){
4760 if (line
[len
-1]=='\n')
4761 warn
="(including newline!) ";
4764 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4765 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
4769 // Ignore anything after comment symbol
4770 if ((comment
=strchr(line
,'#'))){
4775 // is the total line (made of all continuation lines) too long?
4776 if (cont
+len
>MAXCONTLINE
){
4777 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4778 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
4782 // copy string so far into fullline, and increment length
4783 snprintf(fullline
+cont
, sizeof(fullline
)-cont
, "%s" ,line
);
4786 // is this a continuation line. If so, replace \ by space and look at next line
4787 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
4788 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
4792 // Not a continuation line. Parse it
4794 scandevice
= ParseConfigLine(conf_entries
, default_conf
, scan_types
, contlineno
, fullline
);
4796 // did we find a scandevice directive?
4799 // did we find a syntax error
4808 // note -- may be zero if syntax of file OK, but no valid entries!
4812 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4813 <LIST> is the list of valid arguments for option opt. */
4814 static void PrintValidArgs(char opt
)
4818 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
4819 if (!(s
= GetValidArgList(opt
)))
4820 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
4822 PrintOut(LOG_CRIT
, "%s", (char *)s
);
4823 PrintOut(LOG_CRIT
, " <=======\n");
4827 // Report error and return false if specified path is not absolute.
4828 static bool check_abs_path(char option
, const std::string
& path
)
4830 if (path
.empty() || path
[0] == '/')
4835 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option
, path
.c_str());
4836 PrintOut(LOG_CRIT
, "Error: relative path names are not allowed\n\n");
4841 // Parses input line, prints usage message and
4842 // version/license/copyright messages
4843 static int parse_options(int argc
, char **argv
)
4845 // Init default path names
4847 configfile
= SMARTMONTOOLS_SYSCONFDIR
"/smartd.conf";
4848 warning_script
= SMARTMONTOOLS_SMARTDSCRIPTDIR
"/smartd_warning.sh";
4850 std::string exedir
= get_exe_dir();
4851 static std::string configfile_str
= exedir
+ "/smartd.conf";
4852 configfile
= configfile_str
.c_str();
4853 warning_script
= exedir
+ "/smartd_warning.cmd";
4856 // Please update GetValidArgList() if you edit shortopts
4857 static const char shortopts
[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4858 #ifdef HAVE_LIBCAP_NG
4862 // Please update GetValidArgList() if you edit longopts
4863 struct option longopts
[] = {
4864 { "configfile", required_argument
, 0, 'c' },
4865 { "logfacility", required_argument
, 0, 'l' },
4866 { "quit", required_argument
, 0, 'q' },
4867 { "debug", no_argument
, 0, 'd' },
4868 { "showdirectives", no_argument
, 0, 'D' },
4869 { "interval", required_argument
, 0, 'i' },
4871 { "no-fork", no_argument
, 0, 'n' },
4873 { "service", no_argument
, 0, 'n' },
4875 { "pidfile", required_argument
, 0, 'p' },
4876 { "report", required_argument
, 0, 'r' },
4877 { "savestates", required_argument
, 0, 's' },
4878 { "attributelog", required_argument
, 0, 'A' },
4879 { "drivedb", required_argument
, 0, 'B' },
4880 { "warnexec", required_argument
, 0, 'w' },
4881 { "version", no_argument
, 0, 'V' },
4882 { "license", no_argument
, 0, 'V' },
4883 { "copyright", no_argument
, 0, 'V' },
4884 { "help", no_argument
, 0, 'h' },
4885 { "usage", no_argument
, 0, 'h' },
4886 #ifdef HAVE_LIBCAP_NG
4887 { "capabilities", no_argument
, 0, 'C' },
4893 bool badarg
= false;
4894 bool use_default_db
= true; // set false on '-B FILE'
4896 // Parse input options.
4898 while ((optchar
= getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)) != -1) {
4906 if (!strcmp(optarg
, "nodev"))
4908 else if (!strcmp(optarg
, "nodevstartup"))
4909 quit
= QUIT_NODEVSTARTUP
;
4910 else if (!strcmp(optarg
, "never"))
4912 else if (!strcmp(optarg
, "onecheck")) {
4913 quit
= QUIT_ONECHECK
;
4916 else if (!strcmp(optarg
, "showtests")) {
4917 quit
= QUIT_SHOWTESTS
;
4920 else if (!strcmp(optarg
, "errors"))
4926 // set the log facility level
4927 if (!strcmp(optarg
, "daemon"))
4928 facility
=LOG_DAEMON
;
4929 else if (!strcmp(optarg
, "local0"))
4930 facility
=LOG_LOCAL0
;
4931 else if (!strcmp(optarg
, "local1"))
4932 facility
=LOG_LOCAL1
;
4933 else if (!strcmp(optarg
, "local2"))
4934 facility
=LOG_LOCAL2
;
4935 else if (!strcmp(optarg
, "local3"))
4936 facility
=LOG_LOCAL3
;
4937 else if (!strcmp(optarg
, "local4"))
4938 facility
=LOG_LOCAL4
;
4939 else if (!strcmp(optarg
, "local5"))
4940 facility
=LOG_LOCAL5
;
4941 else if (!strcmp(optarg
, "local6"))
4942 facility
=LOG_LOCAL6
;
4943 else if (!strcmp(optarg
, "local7"))
4944 facility
=LOG_LOCAL7
;
4949 // enable debug mode
4954 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4959 // print summary of all valid directives
4964 // Period (time interval) for checking
4965 // strtol will set errno in the event of overflow, so we'll check it.
4967 lchecktime
= strtol(optarg
, &tailptr
, 10);
4968 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
4971 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
4972 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
4973 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4976 checktime
= (int)lchecktime
;
4979 // report IOCTL transactions
4981 int n1
= -1, n2
= -1, len
= strlen(optarg
);
4982 char s
[9+1]; unsigned i
= 1;
4983 sscanf(optarg
, "%9[a-z]%n,%u%n", s
, &n1
, &i
, &n2
);
4984 if (!((n1
== len
|| n2
== len
) && 1 <= i
&& i
<= 4)) {
4986 } else if (!strcmp(s
,"ioctl")) {
4987 ata_debugmode
= scsi_debugmode
= nvme_debugmode
= i
;
4988 } else if (!strcmp(s
,"ataioctl")) {
4990 } else if (!strcmp(s
,"scsiioctl")) {
4992 } else if (!strcmp(s
,"nvmeioctl")) {
5000 // alternate configuration file
5001 if (strcmp(optarg
,"-"))
5002 configfile
= (configfile_alt
= optarg
).c_str();
5003 else // read from stdin
5004 configfile
=configfile_stdin
;
5007 // output file with PID number
5011 // path prefix of persistent state file
5012 state_path_prefix
= optarg
;
5015 // path prefix of attribute log file
5016 attrlog_path_prefix
= optarg
;
5020 const char * path
= optarg
;
5021 if (*path
== '+' && path
[1])
5024 use_default_db
= false;
5025 unsigned char savedebug
= debugmode
; debugmode
= 1;
5026 if (!read_drive_database(path
))
5028 debugmode
= savedebug
;
5032 warning_script
= optarg
;
5035 // print version and CVS info
5037 PrintOut(LOG_INFO
, "%s", format_version_info("smartd", true /*full*/).c_str());
5039 #ifdef HAVE_LIBCAP_NG
5041 // enable capabilities
5042 capabilities_enabled
= true;
5046 // help: print summary of command-line options
5053 // unrecognized option
5056 // Point arg to the argument in which this option was found.
5057 arg
= argv
[optind
-1];
5058 // Check whether the option is a long option that doesn't map to -h.
5059 if (arg
[1] == '-' && optchar
!= 'h') {
5060 // Iff optopt holds a valid option then argument must be missing.
5061 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
5062 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
5063 PrintValidArgs(optopt
);
5065 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
5067 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
5071 // Iff optopt holds a valid option then argument must be missing.
5072 if (strchr(shortopts
, optopt
) != NULL
){
5073 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
5074 PrintValidArgs(optopt
);
5076 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
5078 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
5085 // Check to see if option had an unrecognized or incorrect argument.
5089 // It would be nice to print the actual option name given by the user
5090 // here, but we just print the short form. Please fix this if you know
5091 // a clean way to do it.
5092 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
5093 PrintValidArgs(optchar
);
5094 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
5099 // non-option arguments are not allowed
5100 if (argc
> optind
) {
5103 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
5104 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
5108 // no pidfile in debug mode
5109 if (debugmode
&& !pid_file
.empty()) {
5112 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
5113 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
.c_str());
5119 // absolute path names are required due to chdir('/') in daemon_init()
5120 if (!( check_abs_path('p', pid_file
)
5121 && check_abs_path('s', state_path_prefix
)
5122 && check_abs_path('A', attrlog_path_prefix
)))
5127 // Read or init drive database
5129 unsigned char savedebug
= debugmode
; debugmode
= 1;
5130 if (!init_drive_database(use_default_db
))
5132 debugmode
= savedebug
;
5135 // Check option compatibility of notify support
5136 if (!notify_post_init())
5142 // No error, continue in main_worker()
5146 // Function we call if no configuration file was found or if the
5147 // SCANDIRECTIVE Directive was found. It makes entries for device
5148 // names returned by scan_smart_devices() in os_OSNAME.cpp
5149 static int MakeConfigEntries(const dev_config
& base_cfg
,
5150 dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
,
5151 const smart_devtype_list
& types
)
5153 // make list of devices
5154 smart_device_list devlist
;
5155 if (!smi()->scan_smart_devices(devlist
, types
)) {
5156 PrintOut(LOG_CRIT
, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5160 // if no devices, return
5161 if (devlist
.size() <= 0)
5164 // add empty device slots for existing config entries
5165 while (scanned_devs
.size() < conf_entries
.size())
5166 scanned_devs
.push_back((smart_device
*)0);
5168 // loop over entries to create
5169 for (unsigned i
= 0; i
< devlist
.size(); i
++) {
5170 // Move device pointer
5171 smart_device
* dev
= devlist
.release(i
);
5172 scanned_devs
.push_back(dev
);
5174 // Copy configuration, update device and type name
5175 conf_entries
.push_back(base_cfg
);
5176 dev_config
& cfg
= conf_entries
.back();
5177 cfg
.name
= dev
->get_info().info_name
;
5178 cfg
.dev_name
= dev
->get_info().dev_name
;
5179 cfg
.dev_type
= dev
->get_info().dev_type
;
5182 return devlist
.size();
5185 // Returns negative value (see ParseConfigFile()) if config file
5186 // had errors, else number of entries which may be zero or positive.
5187 static int ReadOrMakeConfigEntries(dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
)
5189 // parse configuration file configfile (normally /etc/smartd.conf)
5190 smart_devtype_list scan_types
;
5191 int entries
= ParseConfigFile(conf_entries
, scan_types
);
5194 // There was an error reading the configuration file.
5195 conf_entries
.clear();
5197 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
5201 // no error parsing config file.
5203 // we did not find a SCANDIRECTIVE and did find valid entries
5204 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
5206 else if (!conf_entries
.empty()) {
5207 // we found a SCANDIRECTIVE or there was no configuration file so
5208 // scan. Configuration file's last entry contains all options
5210 dev_config first
= conf_entries
.back();
5211 conf_entries
.pop_back();
5214 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
5216 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
5218 // make config list of devices to search for
5219 MakeConfigEntries(first
, conf_entries
, scanned_devs
, scan_types
);
5221 // warn user if scan table found no devices
5222 if (conf_entries
.empty())
5223 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
5226 PrintOut(LOG_CRIT
, "Configuration file %s parsed but has no entries\n", configfile
);
5228 return conf_entries
.size();
5231 // Return true if TYPE contains a RAID drive number
5232 static bool is_raid_type(const char * type
)
5234 if (str_starts_with(type
, "sat,"))
5237 if (sscanf(type
, "%*[^,],%d", &i
) != 1)
5242 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5243 static bool is_duplicate_device(const smart_device
* dev
,
5244 const smart_device_list
& devices
, unsigned numdevs
,
5245 const dev_config_vector
& ignored
)
5247 const smart_device::device_info
& info1
= dev
->get_info();
5248 bool is_raid1
= is_raid_type(info1
.dev_type
.c_str());
5250 for (unsigned i
= 0; i
< numdevs
; i
++) {
5251 const smart_device::device_info
& info2
= devices
.at(i
)->get_info();
5252 // -d TYPE options must match if RAID drive number is specified
5253 if ( info1
.dev_name
== info2
.dev_name
5254 && ( info1
.dev_type
== info2
.dev_type
5255 || !is_raid1
|| !is_raid_type(info2
.dev_type
.c_str())))
5259 for (unsigned i
= 0; i
< ignored
.size(); i
++) {
5260 const dev_config
& cfg2
= ignored
.at(i
);
5261 if ( info1
.dev_name
== cfg2
.dev_name
5262 && ( info1
.dev_type
== cfg2
.dev_type
5263 || !is_raid1
|| !is_raid_type(cfg2
.dev_type
.c_str())))
5269 // Register one device, return false on error
5270 static bool register_device(dev_config
& cfg
, dev_state
& state
, smart_device_auto_ptr
& dev
,
5271 const dev_config_vector
* prev_cfgs
)
5275 // Get device of appropriate type
5276 dev
= smi()->get_smart_device(cfg
.name
.c_str(), cfg
.dev_type
.c_str());
5278 if (cfg
.dev_type
.empty())
5279 PrintOut(LOG_INFO
, "Device: %s, unable to autodetect device type\n", cfg
.name
.c_str());
5281 PrintOut(LOG_INFO
, "Device: %s, unsupported device type '%s'\n", cfg
.name
.c_str(), cfg
.dev_type
.c_str());
5287 // Use device from device scan
5292 smart_device::device_info oldinfo
= dev
->get_info();
5294 // Open with autodetect support, may return 'better' device
5295 dev
.replace( dev
->autodetect_open() );
5297 // Report if type has changed
5298 if (oldinfo
.dev_type
!= dev
->get_dev_type())
5299 PrintOut(LOG_INFO
, "Device: %s, type changed from '%s' to '%s'\n",
5300 cfg
.name
.c_str(), oldinfo
.dev_type
.c_str(), dev
->get_dev_type());
5302 // Return if autodetect_open() failed
5303 if (!dev
->is_open()) {
5304 if (debugmode
|| !scanning
)
5305 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", dev
->get_info_name(), dev
->get_errmsg());
5309 // Update informal name
5310 cfg
.name
= dev
->get_info().info_name
;
5311 PrintOut(LOG_INFO
, "Device: %s, opened\n", cfg
.name
.c_str());
5314 const char * typemsg
;
5315 // register ATA device
5318 status
= ATADeviceScan(cfg
, state
, dev
->to_ata(), prev_cfgs
);
5320 // or register SCSI device
5321 else if (dev
->is_scsi()){
5323 status
= SCSIDeviceScan(cfg
, state
, dev
->to_scsi(), prev_cfgs
);
5325 // or register NVMe device
5326 else if (dev
->is_nvme()) {
5328 status
= NVMeDeviceScan(cfg
, state
, dev
->to_nvme(), prev_cfgs
);
5331 PrintOut(LOG_INFO
, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg
.name
.c_str());
5336 if (!scanning
|| debugmode
) {
5338 PrintOut(scanning
? LOG_INFO
: LOG_CRIT
,
5339 "Unable to register %s device %s at line %d of file %s\n",
5340 typemsg
, cfg
.name
.c_str(), cfg
.lineno
, configfile
);
5342 PrintOut(LOG_INFO
, "Unable to register %s device %s\n",
5343 typemsg
, cfg
.name
.c_str());
5352 // This function tries devices from conf_entries. Each one that can be
5353 // registered is moved onto the [ata|scsi]devices lists and removed
5354 // from the conf_entries list.
5355 static bool register_devices(const dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
,
5356 dev_config_vector
& configs
, dev_state_vector
& states
, smart_device_list
& devices
)
5358 // start by clearing lists/memory of ALL existing devices
5364 dev_config_vector ignored_entries
;
5365 unsigned numnoscan
= 0;
5366 for (unsigned i
= 0; i
< conf_entries
.size(); i
++){
5368 dev_config cfg
= conf_entries
[i
];
5371 // Store for is_duplicate_device() check and ignore
5372 PrintOut(LOG_INFO
, "Device: %s%s%s%s, ignored\n", cfg
.name
.c_str(),
5373 (!cfg
.dev_type
.empty() ? " [" : ""),
5374 cfg
.dev_type
.c_str(),
5375 (!cfg
.dev_type
.empty() ? "]" : ""));
5376 ignored_entries
.push_back(cfg
);
5380 smart_device_auto_ptr dev
;
5382 // Device may already be detected during devicescan
5383 bool scanning
= false;
5384 if (i
< scanned_devs
.size()) {
5385 dev
= scanned_devs
.release(i
);
5387 // Check for a preceding non-DEVICESCAN entry for the same device
5388 if ( (numnoscan
|| !ignored_entries
.empty())
5389 && is_duplicate_device(dev
.get(), devices
, numnoscan
, ignored_entries
)) {
5390 PrintOut(LOG_INFO
, "Device: %s, duplicate, ignored\n", dev
->get_info_name());
5398 // If scanning, pass dev_idinfo of previous devices for duplicate check
5400 if (!register_device(cfg
, state
, dev
, (scanning
? &configs
: 0))) {
5401 // if device is explicitly listed and we can't register it, then
5402 // exit unless the user has specified that the device is removable
5404 if (!(cfg
.removable
|| quit
== QUIT_NEVER
)) {
5405 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg
.name
.c_str());
5408 PrintOut(LOG_INFO
, "Device: %s, not available\n", cfg
.name
.c_str());
5409 // Prevent retry of registration
5410 ignored_entries
.push_back(cfg
);
5415 // move onto the list of devices
5416 configs
.push_back(cfg
);
5417 states
.push_back(state
);
5418 devices
.push_back(dev
);
5420 numnoscan
= devices
.size();
5423 init_disable_standby_check(configs
);
5428 // Main program without exception handling
5429 static int main_worker(int argc
, char **argv
)
5431 // Initialize interface
5432 smart_interface::init();
5436 // Check whether systemd notify is supported and enabled
5439 // parse input and print header and usage info if needed
5440 int status
= parse_options(argc
,argv
);
5444 // Configuration for each device
5445 dev_config_vector configs
;
5447 dev_state_vector states
;
5448 // Devices to monitor
5449 smart_device_list devices
;
5451 // Drop capabilities if supported and enabled
5452 capabilities_drop_now();
5454 notify_msg("Initializing ...");
5456 // the main loop of the code
5457 bool firstpass
= true, write_states_always
= true;
5458 time_t wakeuptime
= 0;
5459 // assert(status < 0);
5461 // Should we (re)read the config file?
5462 if (firstpass
|| caughtsigHUP
){
5464 // Write state files
5465 if (!state_path_prefix
.empty())
5466 write_all_dev_states(configs
, states
);
5470 "Signal HUP - rereading configuration file %s\n":
5471 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME
" quits)\n\n",
5473 notify_msg("Reloading ...");
5477 dev_config_vector conf_entries
; // Entries read from smartd.conf
5478 smart_device_list scanned_devs
; // Devices found during scan
5479 // (re)reads config file, makes >=0 entries
5480 int entries
= ReadOrMakeConfigEntries(conf_entries
, scanned_devs
);
5483 // checks devices, then moves onto ata/scsi list or deallocates.
5484 if (!register_devices(conf_entries
, scanned_devs
, configs
, states
, devices
)) {
5485 status
= EXIT_BADDEV
;
5488 if (!(configs
.size() == devices
.size() && configs
.size() == states
.size()))
5489 throw std::logic_error("Invalid result from RegisterDevices");
5490 // Handle limitations if capabilities are dropped
5491 capabilities_check_config(configs
);
5493 else if ( quit
== QUIT_NEVER
5494 || ((quit
== QUIT_NODEV
|| quit
== QUIT_NODEVSTARTUP
) && !firstpass
)) {
5495 // user has asked to continue on error in configuration file
5497 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
5500 // exit with configuration file error status
5501 status
= (entries
== -3 ? EXIT_READCONF
: entries
== -2 ? EXIT_NOCONF
: EXIT_BADCONF
);
5506 if (!( devices
.size() > 0 || quit
== QUIT_NEVER
5507 || (quit
== QUIT_NODEVSTARTUP
&& !firstpass
))) {
5508 PrintOut(LOG_INFO
, "Unable to monitor any SMART enabled devices. %sExiting...\n",
5509 (!debugmode
? "Try debug (-d) option. " : ""));
5510 status
= EXIT_NODEV
;
5514 // Log number of devices we are monitoring...
5515 int numata
= 0, numscsi
= 0;
5516 for (unsigned i
= 0; i
< devices
.size(); i
++) {
5517 const smart_device
* dev
= devices
.at(i
);
5520 else if (dev
->is_scsi())
5523 PrintOut(LOG_INFO
, "Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5524 numata
, numscsi
, (int)devices
.size() - numata
- numscsi
);
5526 if (quit
== QUIT_SHOWTESTS
) {
5527 // user has asked to print test schedule
5528 PrintTestSchedule(configs
, states
, devices
);
5529 // assert(firstpass);
5536 // Always write state files after (re)configuration
5537 write_states_always
= true;
5540 // check all devices once,
5541 // self tests are not started in first pass unless '-q onecheck' is specified
5542 notify_check((int)devices
.size());
5543 CheckDevicesOnce(configs
, states
, devices
, firstpass
, (!firstpass
|| quit
== QUIT_ONECHECK
));
5545 // Write state files
5546 if (!state_path_prefix
.empty())
5547 write_all_dev_states(configs
, states
, write_states_always
);
5548 write_states_always
= false;
5550 // Write attribute logs
5551 if (!attrlog_path_prefix
.empty())
5552 write_all_dev_attrlogs(configs
, states
);
5554 // user has asked us to exit after first check
5555 if (quit
== QUIT_ONECHECK
) {
5556 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices successfully checked once.\n"
5557 "smartd is exiting (exit status 0)\n");
5558 // assert(firstpass);
5564 // fork() into background if needed, close ALL file descriptors,
5565 // redirect stdin, stdout, and stderr, chdir to "/".
5566 status
= daemon_init();
5570 // Write PID file if configured
5571 if (!write_pid_file())
5575 // Set exit and signal handlers
5576 install_signal_handlers();
5578 // Initialize wakeup time to CURRENT time
5579 wakeuptime
= time(0);
5584 // sleep until next check time, or a signal arrives
5585 wakeuptime
= dosleep(wakeuptime
, write_states_always
, (int)devices
.size());
5587 } while (!caughtsigEXIT
);
5589 if (caughtsigEXIT
&& status
< 0) {
5590 // Loop exited on signal
5591 if (caughtsigEXIT
== SIGTERM
|| (debugmode
&& caughtsigEXIT
== SIGQUIT
)) {
5592 PrintOut(LOG_INFO
, "smartd received signal %d: %s\n",
5593 caughtsigEXIT
, strsignal(caughtsigEXIT
));
5596 // Unexpected SIGINT or SIGQUIT
5597 PrintOut(LOG_CRIT
, "smartd received unexpected signal %d: %s\n",
5598 caughtsigEXIT
, strsignal(caughtsigEXIT
));
5599 status
= EXIT_SIGNAL
;
5603 // Status unset above implies success
5608 // Loop exited after daemon_init() and write_pid_file()
5610 // Write state files only on normal exit
5611 if (!status
&& !state_path_prefix
.empty())
5612 write_all_dev_states(configs
, states
);
5614 // Delete PID file, if one was created
5615 if (!pid_file
.empty() && unlink(pid_file
.c_str()))
5616 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
5617 pid_file
.c_str(), strerror(errno
));
5619 // and this should be the final output from smartd before it exits
5620 PrintOut((status
? LOG_CRIT
: LOG_INFO
), "smartd is exiting (exit status %d)\n",
5630 int main(int argc
, char **argv
)
5632 // Windows: internal main function started direct or by service control manager
5633 static int smartd_main(int argc
, char **argv
)
5638 // Do the real work ...
5639 status
= main_worker(argc
, argv
);
5641 catch (const std::bad_alloc
& /*ex*/) {
5642 // Memory allocation failed (also thrown by std::operator new)
5643 PrintOut(LOG_CRIT
, "Smartd: Out of memory\n");
5644 status
= EXIT_NOMEM
;
5646 catch (const std::exception
& ex
) {
5647 // Other fatal errors
5648 PrintOut(LOG_CRIT
, "Smartd: Exception: %s\n", ex
.what());
5649 status
= EXIT_BADCODE
;
5652 // Check for remaining device objects
5653 if (smart_device::get_num_objects() != 0) {
5654 PrintOut(LOG_CRIT
, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5655 smart_device::get_num_objects());
5656 status
= EXIT_BADCODE
;
5659 if (status
== EXIT_BADCODE
)
5660 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
5662 notify_exit(status
);
5664 daemon_winsvc_exitcode
= status
;
5671 // Main function for Windows
5672 int main(int argc
, char **argv
){
5673 // Options for smartd windows service
5674 static const daemon_winsvc_options svc_opts
= {
5675 "--service", // cmd_opt
5676 "smartd", "SmartD Service", // servicename, displayname
5678 "Controls and monitors storage devices using the Self-Monitoring, "
5679 "Analysis and Reporting Technology System (SMART) built into "
5680 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5681 "www.smartmontools.org"
5683 // daemon_main() handles daemon and service specific commands
5684 // and starts smartd_main() direct, from a new process,
5685 // or via service control manager
5686 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);