2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-12 Christian Franke <smartmontools-support@lists.sourceforge.net>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
25 // TODO: Why is this define necessary?
29 // unconditionally included files
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
47 #include <algorithm> // std::replace()
49 // see which system files to conditionally include
52 // conditionally included files
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t
;
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
74 #include <io.h> // setmode()
81 // locally included files
84 #include "dev_interface.h"
85 #include "knowndrives.h"
89 // This is for solaris, where signal() resets the handler to SIG_DFL
90 // after the first signal is caught.
92 #define SIGNALFN sigset
94 #define SIGNALFN signal
98 #include "hostname_win32.h" // gethost/domainname()
99 #define HAVE_GETHOSTNAME 1
100 #define HAVE_GETDOMAINNAME 1
101 // fork()/signal()/initd simulation for native Windows
102 #include "daemon_win32.h" // daemon_main/detach/signal()
104 #define SIGNALFN daemon_signal
105 #define strsignal daemon_strsignal
106 #define sleep daemon_sleep
107 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
108 #define SIGQUIT SIGBREAK
109 #define SIGQUIT_KEYNAME "CONTROL-Break"
111 #define SIGQUIT_KEYNAME "CONTROL-\\"
114 #if defined (__SVR4) && defined (__sun)
115 extern "C" int getdomainname(char *, int); // no declaration in header files!
118 #define ARGUSED(x) ((void)(x))
120 const char * smartd_cpp_cvsid
= "$Id: smartd.cpp 3513 2012-02-15 21:57:21Z chrfranke $"
124 #define EXIT_BADCMD 1 // command line did not parse
125 #define EXIT_BADCONF 2 // syntax error in config file
126 #define EXIT_STARTUP 3 // problem forking daemon
127 #define EXIT_PID 4 // problem creating pid file
128 #define EXIT_NOCONF 5 // config file does not exist
129 #define EXIT_READCONF 6 // config file exists but cannot be read
131 #define EXIT_NOMEM 8 // out of memory
132 #define EXIT_BADCODE 10 // internal error - should NEVER happen
134 #define EXIT_BADDEV 16 // we can't monitor this device
135 #define EXIT_NODEV 17 // no devices to monitor
137 #define EXIT_SIGNAL 254 // abort on signal
140 // command-line: 1=debug mode, 2=print presets
141 static unsigned char debugmode
= 0;
143 // command-line: how long to sleep between checks
144 #define CHECKTIME 1800
145 static int checktime
=CHECKTIME
;
147 // command-line: name of PID file (empty for no pid file)
148 static std::string pid_file
;
150 // command-line: path prefix of persistent state file, empty if no persistence.
151 static std::string state_path_prefix
152 #ifdef SMARTMONTOOLS_SAVESTATES
153 = SMARTMONTOOLS_SAVESTATES
157 // command-line: path prefix of attribute log file, empty if no logs.
158 static std::string attrlog_path_prefix
159 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
160 = SMARTMONTOOLS_ATTRIBUTELOG
164 // configuration file name
165 static const char * configfile
;
166 // configuration file "name" if read from stdin
167 static const char * const configfile_stdin
= "<stdin>";
168 // path of alternate configuration file
169 static std::string configfile_alt
;
171 // command-line: when should we exit?
174 // command-line; this is the default syslog(3) log facility to use.
175 static int facility
=LOG_DAEMON
;
178 // command-line: fork into background?
179 static bool do_fork
=true;
182 #ifdef HAVE_LIBCAP_NG
183 // command-line: enable capabilities?
184 static bool enable_capabilities
= false;
187 #if defined(_WIN32) || defined(__CYGWIN__)
188 // TODO: This smartctl only variable is also used in os_win32.cpp
189 unsigned char failuretest_permissive
= 0;
192 // set to one if we catch a USR1 (check devices now)
193 static volatile int caughtsigUSR1
=0;
196 // set to one if we catch a USR2 (toggle debug mode)
197 static volatile int caughtsigUSR2
=0;
200 // set to one if we catch a HUP (reload config file). In debug mode,
201 // set to two, if we catch INT (also reload config file).
202 static volatile int caughtsigHUP
=0;
204 // set to signal value if we catch INT, QUIT, or TERM
205 static volatile int caughtsigEXIT
=0;
207 // This function prints either to stdout or to the syslog as needed.
208 static void PrintOut(int priority
, const char *fmt
, ...)
209 __attribute_format_printf(2, 3);
211 // Attribute monitoring flags.
212 // See monitor_attr_flags below.
214 MONITOR_IGN_FAILUSE
= 0x01,
215 MONITOR_IGNORE
= 0x02,
216 MONITOR_RAW_PRINT
= 0x04,
218 MONITOR_AS_CRIT
= 0x10,
219 MONITOR_RAW_AS_CRIT
= 0x20,
222 // Array of flags for each attribute.
223 class attribute_flags
227 { memset(m_flags
, 0, sizeof(m_flags
)); }
229 bool is_set(int id
, unsigned char flag
) const
230 { return (0 < id
&& id
< (int)sizeof(m_flags
) && (m_flags
[id
] & flag
)); }
232 void set(int id
, unsigned char flags
)
234 if (0 < id
&& id
< (int)sizeof(m_flags
))
235 m_flags
[id
] |= flags
;
239 unsigned char m_flags
[256];
243 /// Configuration data for a device. Read from smartd.conf.
244 /// Supports copy & assignment and is compatible with STL containers.
247 int lineno
; // Line number of entry in file
248 std::string name
; // Device name (with optional extra info)
249 std::string dev_name
; // Device name (plain, for SMARTD_DEVICE variable)
250 std::string dev_type
; // Device type argument from -d directive, empty if none
251 std::string state_file
; // Path of the persistent state file, empty if none
252 std::string attrlog_file
; // Path of the persistent attrlog file, empty if none
253 bool smartcheck
; // Check SMART status
254 bool usagefailed
; // Check for failed Usage Attributes
255 bool prefail
; // Track changes in Prefail Attributes
256 bool usage
; // Track changes in Usage Attributes
257 bool selftest
; // Monitor number of selftest errors
258 bool errorlog
; // Monitor number of ATA errors
259 bool xerrorlog
; // Monitor number of ATA errors (Extended Comprehensive error log)
260 bool offlinests
; // Monitor changes in offline data collection status
261 bool offlinests_ns
; // Disable auto standby if in progress
262 bool selfteststs
; // Monitor changes in self-test execution status
263 bool selfteststs_ns
; // Disable auto standby if in progress
264 bool permissive
; // Ignore failed SMART commands
265 char autosave
; // 1=disable, 2=enable Autosave Attributes
266 char autoofflinetest
; // 1=disable, 2=enable Auto Offline Test
267 unsigned char fix_firmwarebug
; // FIX_*, see atacmds.h
268 bool ignorepresets
; // Ignore database of -v options
269 bool showpresets
; // Show database entry for this device
270 bool removable
; // Device may disappear (not be present)
271 char powermode
; // skip check, if disk in idle or standby mode
272 bool powerquiet
; // skip powermode 'skipping checks' message
273 int powerskipmax
; // how many times can be check skipped
274 unsigned char tempdiff
; // Track Temperature changes >= this limit
275 unsigned char tempinfo
, tempcrit
; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
276 regular_expression test_regex
; // Regex for scheduled testing
278 // Configuration of email warning messages
279 std::string emailcmdline
; // script to execute, empty if no messages
280 std::string emailaddress
; // email address, or empty
281 unsigned char emailfreq
; // Emails once (1) daily (2) diminishing (3)
282 bool emailtest
; // Send test email?
285 int set_aam
; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
286 int set_apm
; // disable(-1), enable(2..255->1..254) Advanced Power Management
287 int set_lookahead
; // disable(-1), enable(1) read look-ahead
288 int set_standby
; // set(1..255->0..254) standby timer
289 bool set_security_freeze
; // Freeze ATA security
290 int set_wcache
; // disable(-1), enable(1) write cache
292 bool sct_erc_set
; // set SCT ERC to:
293 unsigned short sct_erc_readtime
; // ERC read time (deciseconds)
294 unsigned short sct_erc_writetime
; // ERC write time (deciseconds)
296 unsigned char curr_pending_id
; // ID of current pending sector count, 0 if none
297 unsigned char offl_pending_id
; // ID of offline uncorrectable sector count, 0 if none
298 bool curr_pending_incr
, offl_pending_incr
; // True if current/offline pending values increase
299 bool curr_pending_set
, offl_pending_set
; // True if '-C', '-U' set in smartd.conf
301 attribute_flags monitor_attr_flags
; // MONITOR_* flags for each attribute
303 ata_vendor_attr_defs attribute_defs
; // -v options
308 dev_config::dev_config()
317 offlinests(false), offlinests_ns(false),
318 selfteststs(false), selfteststs_ns(false),
322 fix_firmwarebug(FIX_NOTSPECIFIED
),
323 ignorepresets(false),
330 tempinfo(0), tempcrit(0),
333 set_aam(0), set_apm(0),
336 set_security_freeze(false),
339 sct_erc_readtime(0), sct_erc_writetime(0),
340 curr_pending_id(0), offl_pending_id(0),
341 curr_pending_incr(false), offl_pending_incr(false),
342 curr_pending_set(false), offl_pending_set(false)
347 // Number of allowed mail message types
348 static const int SMARTD_NMAIL
= 13;
349 // Type for '-M test' mails (state not persistent)
350 static const int MAILTYPE_TEST
= 0;
351 // TODO: Add const or enum for all mail types.
354 int logged
;// number of times an email has been sent
355 time_t firstsent
;// time first email was sent, as defined by time(2)
356 time_t lastsent
; // time last email was sent, as defined by time(2)
359 : logged(0), firstsent(0), lastsent(0) { }
362 /// Persistent state data for a device.
363 struct persistent_dev_state
365 unsigned char tempmin
, tempmax
; // Min/Max Temperatures
367 unsigned char selflogcount
; // total number of self-test errors
368 unsigned short selfloghour
; // lifetime hours of last self-test error
370 time_t scheduled_test_next_check
; // Time of next check for scheduled self-tests
372 uint64_t selective_test_last_start
; // Start LBA of last scheduled selective self-test
373 uint64_t selective_test_last_end
; // End LBA of last scheduled selective self-test
375 mailinfo maillog
[SMARTD_NMAIL
]; // log info on when mail sent
378 int ataerrorcount
; // Total number of ATA errors
380 // Persistent part of ata_smart_values:
381 struct ata_attribute
{
384 unsigned char worst
; // Byte needed for 'raw64' attribute only.
388 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
390 ata_attribute ata_attributes
[NUMBER_ATA_SMART_ATTRIBUTES
];
392 persistent_dev_state();
395 persistent_dev_state::persistent_dev_state()
396 : tempmin(0), tempmax(0),
399 scheduled_test_next_check(0),
400 selective_test_last_start(0),
401 selective_test_last_end(0),
406 /// Non-persistent state data for a device.
407 struct temp_dev_state
409 bool must_write
; // true if persistent part should be written
411 bool not_cap_offline
; // true == not capable of offline testing
412 bool not_cap_conveyance
;
415 bool not_cap_selective
;
417 unsigned char temperature
; // last recorded Temperature (in Celsius)
418 time_t tempmin_delay
; // time where Min Temperature tracking will start
420 bool powermodefail
; // true if power mode check failed
421 int powerskipcnt
; // Number of checks skipped due to idle or standby mode
424 unsigned char SmartPageSupported
; // has log sense IE page (0x2f)
425 unsigned char TempPageSupported
; // has log sense temperature page (0xd)
426 unsigned char SuppressReport
; // minimize nuisance reports
427 unsigned char modese_len
; // mode sense/select cmd len: 0 (don't
431 uint64_t num_sectors
; // Number of sectors
432 ata_smart_values smartval
; // SMART data
433 ata_smart_thresholds_pvt smartthres
; // SMART thresholds
434 bool offline_started
; // true if offline data collection was started
435 bool selftest_started
; // true if self-test was started
440 temp_dev_state::temp_dev_state()
442 not_cap_offline(false),
443 not_cap_conveyance(false),
444 not_cap_short(false),
446 not_cap_selective(false),
449 powermodefail(false),
451 SmartPageSupported(false),
452 TempPageSupported(false),
453 SuppressReport(false),
456 offline_started(false),
457 selftest_started(false)
459 memset(&smartval
, 0, sizeof(smartval
));
460 memset(&smartthres
, 0, sizeof(smartthres
));
463 /// Runtime state data for a device.
465 : public persistent_dev_state
,
466 public temp_dev_state
468 void update_persistent_state();
469 void update_temp_state();
472 /// Container for configuration info for each device.
473 typedef std::vector
<dev_config
> dev_config_vector
;
475 /// Container for state info for each device.
476 typedef std::vector
<dev_state
> dev_state_vector
;
478 // Copy ATA attributes to persistent state.
479 void dev_state::update_persistent_state()
481 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
482 const ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
483 ata_attribute
& pa
= ata_attributes
[i
];
486 pa
.val
= pa
.worst
= 0; pa
.raw
= 0;
494 | ((uint64_t)ta
.raw
[3] << 24)
495 | ((uint64_t)ta
.raw
[4] << 32)
496 | ((uint64_t)ta
.raw
[5] << 40);
497 pa
.resvd
= ta
.reserv
;
501 // Copy ATA from persistent to temp state.
502 void dev_state::update_temp_state()
504 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
505 const ata_attribute
& pa
= ata_attributes
[i
];
506 ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
509 ta
.current
= ta
.worst
= 0;
510 memset(ta
.raw
, 0, sizeof(ta
.raw
));
515 ta
.raw
[0] = (unsigned char) pa
.raw
;
516 ta
.raw
[1] = (unsigned char)(pa
.raw
>> 8);
517 ta
.raw
[2] = (unsigned char)(pa
.raw
>> 16);
518 ta
.raw
[3] = (unsigned char)(pa
.raw
>> 24);
519 ta
.raw
[4] = (unsigned char)(pa
.raw
>> 32);
520 ta
.raw
[5] = (unsigned char)(pa
.raw
>> 40);
521 ta
.reserv
= pa
.resvd
;
525 // Parse a line from a state file.
526 static bool parse_dev_state_line(const char * line
, persistent_dev_state
& state
)
528 static const regular_expression
regex(
530 "((temperature-min)" // (1 (2)
531 "|(temperature-max)" // (3)
532 "|(self-test-errors)" // (4)
533 "|(self-test-last-err-hour)" // (5)
534 "|(scheduled-test-next-check)" // (6)
535 "|(selective-test-last-start)" // (7)
536 "|(selective-test-last-end)" // (8)
537 "|(ata-error-count)" // (9)
538 "|(mail\\.([0-9]+)\\." // (10 (11)
539 "((count)" // (12 (13)
540 "|(first-sent-time)" // (14)
541 "|(last-sent-time)" // (15)
544 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
553 " *= *([0-9]+)[ \n]*$", // (24)
557 const int nmatch
= 1+24;
558 regmatch_t match
[nmatch
];
559 if (!regex
.execute(line
, nmatch
, match
))
561 if (match
[nmatch
-1].rm_so
< 0)
564 uint64_t val
= strtoull(line
+ match
[nmatch
-1].rm_so
, (char **)0, 10);
567 if (match
[++m
].rm_so
>= 0)
568 state
.tempmin
= (unsigned char)val
;
569 else if (match
[++m
].rm_so
>= 0)
570 state
.tempmax
= (unsigned char)val
;
571 else if (match
[++m
].rm_so
>= 0)
572 state
.selflogcount
= (unsigned char)val
;
573 else if (match
[++m
].rm_so
>= 0)
574 state
.selfloghour
= (unsigned short)val
;
575 else if (match
[++m
].rm_so
>= 0)
576 state
.scheduled_test_next_check
= (time_t)val
;
577 else if (match
[++m
].rm_so
>= 0)
578 state
.selective_test_last_start
= val
;
579 else if (match
[++m
].rm_so
>= 0)
580 state
.selective_test_last_end
= val
;
581 else if (match
[++m
].rm_so
>= 0)
582 state
.ataerrorcount
= (int)val
;
583 else if (match
[m
+=2].rm_so
>= 0) {
584 int i
= atoi(line
+match
[m
].rm_so
);
585 if (!(0 <= i
&& i
< SMARTD_NMAIL
))
587 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
589 if (match
[m
+=2].rm_so
>= 0)
590 state
.maillog
[i
].logged
= (int)val
;
591 else if (match
[++m
].rm_so
>= 0)
592 state
.maillog
[i
].firstsent
= (time_t)val
;
593 else if (match
[++m
].rm_so
>= 0)
594 state
.maillog
[i
].lastsent
= (time_t)val
;
598 else if (match
[m
+=5+1].rm_so
>= 0) {
599 int i
= atoi(line
+match
[m
].rm_so
);
600 if (!(0 <= i
&& i
< NUMBER_ATA_SMART_ATTRIBUTES
))
602 if (match
[m
+=2].rm_so
>= 0)
603 state
.ata_attributes
[i
].id
= (unsigned char)val
;
604 else if (match
[++m
].rm_so
>= 0)
605 state
.ata_attributes
[i
].val
= (unsigned char)val
;
606 else if (match
[++m
].rm_so
>= 0)
607 state
.ata_attributes
[i
].worst
= (unsigned char)val
;
608 else if (match
[++m
].rm_so
>= 0)
609 state
.ata_attributes
[i
].raw
= val
;
610 else if (match
[++m
].rm_so
>= 0)
611 state
.ata_attributes
[i
].resvd
= (unsigned char)val
;
620 // Read a state file.
621 static bool read_dev_state(const char * path
, persistent_dev_state
& state
)
623 stdio_file
f(path
, "r");
626 pout("Cannot read state file \"%s\"\n", path
);
630 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
633 persistent_dev_state new_state
;
634 int good
= 0, bad
= 0;
636 while (fgets(line
, sizeof(line
), f
)) {
637 const char * s
= line
+ strspn(line
, " \t");
638 if (!*s
|| *s
== '#')
640 if (!parse_dev_state_line(line
, new_state
))
648 pout("%s: format error\n", path
);
651 pout("%s: %d invalid line(s) ignored\n", path
, bad
);
654 // This sets the values missing in the file to 0.
659 static void write_dev_state_line(FILE * f
, const char * name
, uint64_t val
)
662 fprintf(f
, "%s = %"PRIu64
"\n", name
, val
);
665 static void write_dev_state_line(FILE * f
, const char * name1
, int id
, const char * name2
, uint64_t val
)
668 fprintf(f
, "%s.%d.%s = %"PRIu64
"\n", name1
, id
, name2
, val
);
671 // Write a state file
672 static bool write_dev_state(const char * path
, const persistent_dev_state
& state
)
674 // Rename old "file" to "file~"
675 std::string pathbak
= path
; pathbak
+= '~';
676 unlink(pathbak
.c_str());
677 rename(path
, pathbak
.c_str());
679 stdio_file
f(path
, "w");
681 pout("Cannot create state file \"%s\"\n", path
);
685 fprintf(f
, "# smartd state file\n");
686 write_dev_state_line(f
, "temperature-min", state
.tempmin
);
687 write_dev_state_line(f
, "temperature-max", state
.tempmax
);
688 write_dev_state_line(f
, "self-test-errors", state
.selflogcount
);
689 write_dev_state_line(f
, "self-test-last-err-hour", state
.selfloghour
);
690 write_dev_state_line(f
, "scheduled-test-next-check", state
.scheduled_test_next_check
);
691 write_dev_state_line(f
, "selective-test-last-start", state
.selective_test_last_start
);
692 write_dev_state_line(f
, "selective-test-last-end", state
.selective_test_last_end
);
695 for (i
= 0; i
< SMARTD_NMAIL
; i
++) {
696 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
698 const mailinfo
& mi
= state
.maillog
[i
];
701 write_dev_state_line(f
, "mail", i
, "count", mi
.logged
);
702 write_dev_state_line(f
, "mail", i
, "first-sent-time", mi
.firstsent
);
703 write_dev_state_line(f
, "mail", i
, "last-sent-time", mi
.lastsent
);
707 write_dev_state_line(f
, "ata-error-count", state
.ataerrorcount
);
709 for (i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
710 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
713 write_dev_state_line(f
, "ata-smart-attribute", i
, "id", pa
.id
);
714 write_dev_state_line(f
, "ata-smart-attribute", i
, "val", pa
.val
);
715 write_dev_state_line(f
, "ata-smart-attribute", i
, "worst", pa
.worst
);
716 write_dev_state_line(f
, "ata-smart-attribute", i
, "raw", pa
.raw
);
717 write_dev_state_line(f
, "ata-smart-attribute", i
, "resvd", pa
.resvd
);
723 // Write to the attrlog file
724 static bool write_dev_attrlog(const char * path
, const persistent_dev_state
& state
)
726 stdio_file
f(path
, "a");
728 pout("Cannot create attribute log file \"%s\"\n", path
);
733 time_t now
= time(0);
734 struct tm
* tms
= gmtime(&now
);
735 fprintf(f
, "%d-%02d-%02d %02d:%02d:%02d;",
736 1900+tms
->tm_year
, 1+tms
->tm_mon
, tms
->tm_mday
,
737 tms
->tm_hour
, tms
->tm_min
, tms
->tm_sec
);
738 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
739 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
742 fprintf(f
, "\t%d;%d;%"PRIu64
";", pa
.id
, pa
.val
, pa
.raw
);
749 // Write all state files. If write_always is false, don't write
750 // unless must_write is set.
751 static void write_all_dev_states(const dev_config_vector
& configs
,
752 dev_state_vector
& states
,
753 bool write_always
= true)
755 for (unsigned i
= 0; i
< states
.size(); i
++) {
756 const dev_config
& cfg
= configs
.at(i
);
757 if (cfg
.state_file
.empty())
759 dev_state
& state
= states
[i
];
760 if (!write_always
&& !state
.must_write
)
762 if (!write_dev_state(cfg
.state_file
.c_str(), state
))
764 state
.must_write
= false;
765 if (write_always
|| debugmode
)
766 PrintOut(LOG_INFO
, "Device: %s, state written to %s\n",
767 cfg
.name
.c_str(), cfg
.state_file
.c_str());
771 // Write to all attrlog files
772 static void write_all_dev_attrlogs(const dev_config_vector
& configs
,
773 dev_state_vector
& states
)
775 for (unsigned i
= 0; i
< states
.size(); i
++) {
776 const dev_config
& cfg
= configs
.at(i
);
777 if (cfg
.attrlog_file
.empty())
779 dev_state
& state
= states
[i
];
780 write_dev_attrlog(cfg
.attrlog_file
.c_str(), state
);
784 // remove the PID file
785 static void RemovePidFile()
787 if (!pid_file
.empty()) {
788 if (unlink(pid_file
.c_str()))
789 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
790 pid_file
.c_str(), strerror(errno
));
796 extern "C" { // signal handlers require C-linkage
798 // Note if we catch a SIGUSR1
799 static void USR1handler(int sig
)
807 // Note if we catch a SIGUSR2
808 static void USR2handler(int sig
)
816 // Note if we catch a HUP (or INT in debug mode)
817 static void HUPhandler(int sig
)
826 // signal handler for TERM, QUIT, and INT (if not in debug mode)
827 static void sighandler(int sig
)
836 // Cleanup, print Goodbye message and remove pidfile
837 static int Goodbye(int status
)
839 // delete PID file, if one was created
842 // if we are exiting because of a code bug, tell user
843 if (status
==EXIT_BADCODE
)
844 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
846 // and this should be the final output from smartd before it exits
847 PrintOut(status
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", status
);
852 #define ENVLENGTH 1024
854 // a replacement for setenv() which is not available on all platforms.
855 // Note that the string passed to putenv must not be freed or made
856 // invalid, since a pointer to it is kept by putenv(). This means that
857 // it must either be a static buffer or allocated off the heap. The
858 // string can be freed if the environment variable is redefined or
859 // deleted via another call to putenv(). So we keep these on the stack
860 // as long as the popen() call is underway.
861 static int exportenv(char *stackspace
, const char *name
, const char *value
)
863 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
864 return putenv(stackspace
);
867 static char *dnsdomain(const char *hostname
)
870 #ifdef HAVE_GETADDRINFO
871 static char canon_name
[NI_MAXHOST
];
872 struct addrinfo
*info
= NULL
;
873 struct addrinfo hints
;
876 memset(&hints
, 0, sizeof(hints
));
877 hints
.ai_flags
= AI_CANONNAME
;
878 if ((err
= getaddrinfo(hostname
, NULL
, &hints
, &info
)) || (!info
)) {
879 PrintOut(LOG_CRIT
, "Error retrieving getaddrinfo(%s): %s\n", hostname
, gai_strerror(err
));
882 if (info
->ai_canonname
) {
883 strncpy(canon_name
, info
->ai_canonname
, sizeof(canon_name
));
884 canon_name
[NI_MAXHOST
- 1] = '\0';
886 if ((p
= strchr(canon_name
, '.')))
890 #elif HAVE_GETHOSTBYNAME
892 if ((hp
= gethostbyname(hostname
))) {
893 // Does this work if gethostbyname() returns an IPv6 name in
894 // colon/dot notation? [BA]
895 if ((p
= strchr(hp
->h_name
, '.')))
906 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
907 __attribute_format_printf(4, 5);
909 // If either address or executable path is non-null then send and log
910 // a warning email, or execute executable
911 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...){
912 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
913 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
914 char environ_strings
[11][ENVLENGTH
];
917 const int day
=24*3600;
919 const char * const whichfail
[]={
925 "FailedHealthCheck", // 5
926 "FailedReadSmartData", // 6
927 "FailedReadSmartErrorLog", // 7
928 "FailedReadSmartSelfTestLog", // 8
929 "FailedOpenDevice", // 9
930 "CurrentPendingSector", // 10
931 "OfflineUncorrectableSector", // 11
935 const char *unknown
="[Unknown]";
937 // See if user wants us to send mail
938 if (cfg
.emailaddress
.empty() && cfg
.emailcmdline
.empty())
941 std::string address
= cfg
.emailaddress
;
942 const char * executable
= cfg
.emailcmdline
.c_str();
944 // which type of mail are we sending?
945 mailinfo
* mail
=(state
.maillog
)+which
;
948 if (cfg
.emailfreq
<1 || cfg
.emailfreq
>3) {
949 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg
.emailfreq
);
952 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
953 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
954 which
, (int)sizeof(whichfail
));
958 // Return if a single warning mail has been sent.
959 if ((cfg
.emailfreq
==1) && mail
->logged
)
962 // Return if this is an email test and one has already been sent.
963 if (which
== 0 && mail
->logged
)
966 // To decide if to send mail, we need to know what time it is.
969 // Return if less than one day has gone by
970 if (cfg
.emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
973 // Return if less than 2^(logged-1) days have gone by
974 if (cfg
.emailfreq
==3 && mail
->logged
) {
975 days
=0x01<<(mail
->logged
-1);
977 if (epoch
<(mail
->lastsent
+days
))
981 #ifdef HAVE_LIBCAP_NG
982 if (enable_capabilities
) {
983 PrintOut(LOG_ERR
, "Sending a mail was supressed. "
984 "Mails can't be send when capabilites are enabled\n");
989 // record the time of this mail message, and the first mail message
991 mail
->firstsent
=epoch
;
992 mail
->lastsent
=epoch
;
994 // get system host & domain names (not null terminated if length=MAX)
995 #ifdef HAVE_GETHOSTNAME
996 if (gethostname(hostname
, 256))
997 strcpy(hostname
, unknown
);
1001 p
= dnsdomain(hostname
);
1003 strncpy(domainname
, p
, 255);
1004 domainname
[255]='\0';
1006 strcpy(domainname
, unknown
);
1009 strcpy(hostname
, unknown
);
1010 strcpy(domainname
, unknown
);
1013 #ifdef HAVE_GETDOMAINNAME
1014 if (getdomainname(nisdomain
, 256))
1015 strcpy(nisdomain
, unknown
);
1017 nisdomain
[255]='\0';
1019 strcpy(nisdomain
, unknown
);
1022 // print warning string into message
1024 vsnprintf(message
, 256, fmt
, ap
);
1027 // appropriate message about further information
1028 additional
[0]=original
[0]=further
[0]='\0';
1030 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
1032 switch (cfg
.emailfreq
) {
1034 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
1037 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
1040 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
1041 (0x01)<<mail
->logged
);
1044 if (cfg
.emailfreq
>1 && mail
->logged
) {
1045 dateandtimezoneepoch(dates
, mail
->firstsent
);
1046 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
1050 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
1052 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1054 #ifdef DEFAULT_MAILER
1055 executable
= DEFAULT_MAILER
;
1058 executable
= "mail";
1060 executable
= "blat"; // http://blat.sourceforge.net/
1064 #ifndef _WIN32 // blat mailer needs comma
1065 // replace commas by spaces to separate recipients
1066 std::replace(address
.begin(), address
.end(), ',', ' ');
1068 // Export information in environment variables that will be useful
1070 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
1071 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
1072 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
1073 dateandtimezoneepoch(dates
, mail
->firstsent
);
1074 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
1075 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
1076 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
1077 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
1078 if (!address
.empty())
1079 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
.c_str());
1080 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
.name
.c_str());
1082 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1083 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE",
1084 (!cfg
.dev_type
.empty() ? cfg
.dev_type
.c_str() : "auto"));
1085 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
.dev_name
.c_str());
1087 snprintf(fullmessage
, 1024,
1088 "This email was generated by the smartd daemon running on:\n\n"
1091 " NIS domain: %s\n\n"
1092 "The following warning/error was logged by the smartd daemon:\n\n"
1094 "For details see host's SYSLOG.\n\n"
1096 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
1097 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
1099 // now construct a command to send this as EMAIL
1101 if (!address
.empty())
1102 snprintf(command
, 2048,
1103 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1104 "%sENDMAIL\n", subject
, address
.c_str(), fullmessage
);
1106 snprintf(command
, 2048, "%s 2>&1", executable
);
1108 // tell SYSLOG what we are about to do...
1109 const char * newadd
= (!address
.empty()? address
.c_str() : "<nomailer>");
1110 const char * newwarn
= (which
? "Warning via" : "Test of");
1112 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
1113 which
?"Sending warning via":"Executing test of", executable
, newadd
);
1115 // issue the command to send mail or to run the user's executable
1118 if (!(pfp
=popen(command
, "r")))
1119 // failed to popen() mail process
1120 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1121 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
1125 char buffer
[EBUFLEN
];
1127 // if unexpected output on stdout/stderr, null terminate, print, and flush
1128 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
1130 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
1131 buffer
[newlen
]='\0';
1132 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1133 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
1135 // flush pipe if needed
1136 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
1139 // tell user that pipe was flushed, or that something is really wrong
1140 if (count
&& count
<EBUFLEN
)
1141 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1142 newwarn
, executable
, newadd
);
1144 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1145 newwarn
, executable
, newadd
);
1148 // if something went wrong with mail process, print warning
1150 if (-1==(status
=pclose(pfp
)))
1151 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
1152 errno
?strerror(errno
):"");
1154 // mail process apparently succeeded. Check and report exit status
1157 if (WIFEXITED(status
)) {
1158 // exited 'normally' (but perhaps with nonzero status)
1159 status8
=WEXITSTATUS(status
);
1162 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1163 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
1165 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1166 newwarn
, executable
, newadd
, status
, status8
);
1168 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
1171 if (WIFSIGNALED(status
))
1172 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1173 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
1175 // this branch is probably not possible. If subprocess is
1176 // stopped then pclose() should not return.
1177 if (WIFSTOPPED(status
))
1178 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1179 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
1186 // No "here-documents" on Windows, so must use separate commandline and stdin
1187 char stdinbuf
[1024];
1188 command
[0] = stdinbuf
[0] = 0;
1189 int boxtype
= -1, boxmsgoffs
= 0;
1190 const char * newadd
= "<nomailer>";
1191 if (!address
.empty()) {
1192 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1193 char addr1
[9+1+13] = ""; int n1
= -1, n2
= -1;
1194 if (sscanf(address
.c_str(), "%9[a-z]%n,%n", addr1
, &n1
, &n2
) == 1 && (n1
== (int)address
.size() || n2
> 0)) {
1195 if (!strcmp(addr1
, "msgbox"))
1197 else if (!strcmp(addr1
, "sysmsgbox"))
1200 address
.erase(0, (n2
> n1
? n2
: n1
));
1203 if (!address
.empty()) {
1204 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1205 snprintf(command
, sizeof(command
),
1206 "%s - -q -subject \"%s\" -to \"%s\"",
1207 executable
, subject
, address
.c_str());
1208 newadd
= address
.c_str();
1211 // Message for mail [0...] and messagebox [boxmsgoffs...]
1212 snprintf(stdinbuf
, sizeof(stdinbuf
),
1213 "This email was generated by the smartd daemon running on:\n\n"
1216 // " NIS domain: %s\n"
1218 hostname
, /*domainname, */ nisdomain
);
1219 boxmsgoffs
= strlen(stdinbuf
);
1220 snprintf(stdinbuf
+boxmsgoffs
, sizeof(stdinbuf
)-boxmsgoffs
,
1221 "The following warning/error was logged by the smartd daemon:\n\n"
1223 "For details see the event log or log file of smartd.\n\n"
1226 message
, further
, original
, additional
);
1229 snprintf(command
, sizeof(command
), "%s", executable
);
1231 const char * newwarn
= (which
? "Warning via" : "Test of");
1234 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
1235 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
1238 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
1241 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
1242 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
1243 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
1244 if (rc
>= 0 && stdoutbuf
[0])
1245 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1246 newwarn
, executable
, newadd
, (int)strlen(stdoutbuf
), stdoutbuf
);
1248 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
1249 newwarn
, executable
, newadd
, rc
);
1251 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
1256 // increment mail sent counter
1260 static void reset_warning_mail(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1261 __attribute_format_printf(4, 5);
1263 static void reset_warning_mail(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
1265 if (!(0 <= which
&& which
< SMARTD_NMAIL
))
1268 // Return if no mail sent yet
1269 mailinfo
& mi
= state
.maillog
[which
];
1273 // Format & print message
1277 vsnprintf(msg
, sizeof(msg
), fmt
, ap
);
1280 PrintOut(LOG_INFO
, "Device: %s, %s, warning condition reset after %d email%s\n", cfg
.name
.c_str(),
1281 msg
, mi
.logged
, (mi
.logged
==1 ? "" : "s"));
1283 // Clear mail counter and timestamps
1285 state
.must_write
= true;
1290 // Output multiple lines via separate syslog(3) calls.
1291 static void vsyslog_lines(int priority
, const char * fmt
, va_list ap
)
1293 char buf
[512+EBUFLEN
]; // enough space for exec cmd output in MailWarning()
1294 vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
1296 for (char * p
= buf
, * q
; p
&& *p
; p
= q
) {
1297 if ((q
= strchr(p
, '\n')))
1300 syslog(priority
, "%s\n", p
);
1305 // os_win32/syslog_win32.cpp supports multiple lines.
1306 #define vsyslog_lines vsyslog
1309 // Printing function for watching ataprint commands, or losing them
1310 // [From GLIBC Manual: Since the prototype doesn't specify types for
1311 // optional arguments, in a call to a variadic function the default
1312 // argument promotions are performed on the optional argument
1313 // values. This means the objects of type char or short int (whether
1314 // signed or not) are promoted to either int or unsigned int, as
1316 void pout(const char *fmt
, ...){
1319 // get the correct time in syslog()
1320 FixGlibcTimeZoneBug();
1321 // initialize variable argument list
1323 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1324 if (debugmode
&& debugmode
!=2)
1326 if (facility
== LOG_LOCAL1
) // logging to stdout
1327 vfprintf(stderr
,fmt
,ap
);
1331 // in debugmode==2 mode we print output from knowndrives.o functions
1332 else if (debugmode
==2 || ata_debugmode
|| scsi_debugmode
) {
1333 openlog("smartd", LOG_PID
, facility
);
1334 vsyslog_lines(LOG_INFO
, fmt
, ap
);
1342 // This function prints either to stdout or to the syslog as needed.
1343 static void PrintOut(int priority
, const char *fmt
, ...){
1346 // get the correct time in syslog()
1347 FixGlibcTimeZoneBug();
1348 // initialize variable argument list
1352 if (facility
== LOG_LOCAL1
) // logging to stdout
1353 vfprintf(stderr
,fmt
,ap
);
1358 openlog("smartd", LOG_PID
, facility
);
1359 vsyslog_lines(priority
, fmt
, ap
);
1366 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1367 void checksumwarning(const char * string
)
1369 pout("Warning! %s error: invalid SMART checksum.\n", string
);
1374 // Wait for the pid file to show up, this makes sure a calling program knows
1375 // that the daemon is really up and running and has a pid to kill it
1376 static bool WaitForPidFile()
1378 int waited
, max_wait
= 10;
1379 struct stat stat_buf
;
1381 if (pid_file
.empty() || debugmode
)
1384 for(waited
= 0; waited
< max_wait
; ++waited
) {
1385 if (!stat(pid_file
.c_str(), &stat_buf
)) {
1395 // Forks new process, closes ALL file descriptors, redirects stdin,
1396 // stdout, and stderr. Not quite daemon(). See
1397 // http://www.linuxjournal.com/article/2335
1398 // for a good description of why we do things this way.
1399 static void DaemonInit()
1405 // flush all buffered streams. Else we might get two copies of open
1406 // streams since both parent and child get copies of the buffers.
1410 if ((pid
=fork()) < 0) {
1412 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1416 // we are the parent process, wait for pid file, then exit cleanly
1417 if(!WaitForPidFile()) {
1418 PrintOut(LOG_CRIT
,"PID file %s didn't show up!\n", pid_file
.c_str());
1424 // from here on, we are the child process.
1427 // Fork one more time to avoid any possibility of having terminals
1428 if ((pid
=fork()) < 0) {
1430 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1434 // we are the parent process -- exit cleanly
1437 // Now we are the child's child...
1440 // close any open file descriptors
1441 for (i
=getdtablesize();i
>=0;--i
)
1444 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1446 // redirect any IO attempts to /dev/null for stdin
1447 i
=open("/dev/null",O_RDWR
);
1450 NO_warn_unused_result(dup(i
));
1452 NO_warn_unused_result(dup(i
));
1455 NO_warn_unused_result(chdir("/"));
1458 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1462 // No fork() on native Win32
1463 // Detach this process from console
1465 if (daemon_detach("smartd")) {
1466 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1469 // stdin/out/err now closed if not redirected
1475 // create a PID file containing the current process id
1476 static void WritePidFile()
1478 if (!pid_file
.empty()) {
1479 pid_t pid
= getpid();
1482 old_umask
= umask(0077); // rwx------
1484 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1485 old_umask
= umask(0033); // rwxr--r--
1488 stdio_file
f(pid_file
.c_str(), "w");
1490 if (!(f
&& fprintf(f
, "%d\n", (int)pid
) > 0 && f
.close())) {
1491 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
.c_str());
1494 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
.c_str(), (int)pid
);
1498 // Prints header identifying version of code and home
1499 static void PrintHead()
1501 PrintOut(LOG_INFO
, "%s\n", format_version_info("smartd").c_str());
1504 // prints help info for configuration file Directives
1505 static void Directives()
1508 "Configuration file (%s) Directives (after device name):\n"
1509 " -d TYPE Set the device type: %s, auto, removable\n"
1510 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1511 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1512 " -S VAL Enable/disable attribute autosave (on/off)\n"
1513 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1514 " -H Monitor SMART Health Status, report if failed\n"
1515 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1516 " -l TYPE Monitor SMART log or self-test status:\n"
1517 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1518 " -l scterc,R,W Set SCT Error Recovery Control\n"
1519 " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1520 " security-freeze, standby,[N|off], wcache,[on|off]\n"
1521 " -f Monitor 'Usage' Attributes, report failures\n"
1522 " -m ADD Send email warning to address ADD\n"
1523 " -M TYPE Modify email warning behavior (see man page)\n"
1524 " -p Report changes in 'Prefailure' Attributes\n"
1525 " -u Report changes in 'Usage' Attributes\n"
1526 " -t Equivalent to -p and -u Directives\n"
1527 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1528 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1529 " -i ID Ignore Attribute ID for -f Directive\n"
1530 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1531 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1532 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1533 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1534 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1535 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1536 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1537 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1538 " # Comment: text after a hash sign is ignored\n"
1539 " \\ Line continuation character\n"
1540 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1541 "Use ID = 0 to turn off -C and/or -U Directives\n"
1542 "Example: /dev/hda -a\n",
1543 configfile
, smi()->get_valid_dev_types_str().c_str());
1547 /* Returns a pointer to a static string containing a formatted list of the valid
1548 arguments to the option opt or NULL on failure. */
1549 static const char *GetValidArgList(char opt
)
1554 return "<PATH_PREFIX>";
1556 return "<FILE_NAME>, -";
1558 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1560 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1562 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1565 return "<FILE_NAME>";
1567 return "<INTEGER_SECONDS>";
1573 /* prints help information for command syntax */
1576 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1577 PrintOut(LOG_INFO
," -A PREFIX, --attributelog=PREFIX\n");
1578 PrintOut(LOG_INFO
," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1579 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1580 PrintOut(LOG_INFO
," [default is "SMARTMONTOOLS_ATTRIBUTELOG
"MODEL-SERIAL.ata.csv]\n");
1582 PrintOut(LOG_INFO
,"\n");
1583 PrintOut(LOG_INFO
," -B [+]FILE, --drivedb=[+]FILE\n");
1584 PrintOut(LOG_INFO
," Read and replace [add] drive database from FILE\n");
1585 PrintOut(LOG_INFO
," [default is +%s", get_drivedb_path_add());
1586 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1587 PrintOut(LOG_INFO
,"\n");
1588 PrintOut(LOG_INFO
," and then %s", get_drivedb_path_default());
1590 PrintOut(LOG_INFO
,"]\n\n");
1591 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1592 PrintOut(LOG_INFO
," Read configuration file NAME or stdin\n");
1593 PrintOut(LOG_INFO
," [default is %s]\n\n", configfile
);
1594 #ifdef HAVE_LIBCAP_NG
1595 PrintOut(LOG_INFO
," -C, --capabilities\n");
1596 PrintOut(LOG_INFO
," Use capabilities.\n"
1597 " Warning: Mail notification does not work when used.\n\n");
1599 PrintOut(LOG_INFO
," -d, --debug\n");
1600 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1601 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1602 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1603 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1604 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1605 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1606 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1607 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1609 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1611 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1614 PrintOut(LOG_INFO
," -n, --no-fork\n");
1615 PrintOut(LOG_INFO
," Do not fork into background\n\n");
1617 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1618 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1619 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1620 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1621 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1622 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1623 PrintOut(LOG_INFO
," -s PREFIX, --savestates=PREFIX\n");
1624 PrintOut(LOG_INFO
," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1625 #ifdef SMARTMONTOOLS_SAVESTATES
1626 PrintOut(LOG_INFO
," [default is "SMARTMONTOOLS_SAVESTATES
"MODEL-SERIAL.TYPE.state]\n");
1628 PrintOut(LOG_INFO
,"\n");
1630 PrintOut(LOG_INFO
," --service\n");
1631 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1632 PrintOut(LOG_INFO
," smartd install [options]\n");
1633 PrintOut(LOG_INFO
," Remove service with:\n");
1634 PrintOut(LOG_INFO
," smartd remove\n\n");
1636 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1637 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1640 static int CloseDevice(smart_device
* device
, const char * name
)
1642 if (!device
->close()){
1643 PrintOut(LOG_INFO
,"Device: %s, %s, close() failed\n", name
, device
->get_errmsg());
1646 // device sucessfully closed
1650 // return true if a char is not allowed in a state file name
1651 static bool not_allowed_in_filename(char c
)
1653 return !( ('0' <= c
&& c
<= '9')
1654 || ('A' <= c
&& c
<= 'Z')
1655 || ('a' <= c
&& c
<= 'z'));
1658 // Read error count from Summary or Extended Comprehensive SMART error log
1659 // Return -1 on error
1660 static int read_ata_error_count(ata_device
* device
, const char * name
,
1661 unsigned char fix_firmwarebug
, bool extended
)
1664 ata_smart_errorlog log
;
1665 if (ataReadErrorLog(device
, &log
, fix_firmwarebug
)){
1666 PrintOut(LOG_INFO
,"Device: %s, Read Summary SMART Error Log failed\n",name
);
1669 return (log
.error_log_pointer
? log
.ata_error_count
: 0);
1672 ata_smart_exterrlog logx
;
1673 if (!ataReadExtErrorLog(device
, &logx
, 1 /*first sector only*/)) {
1674 PrintOut(LOG_INFO
,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name
);
1677 // Some disks use the reserved byte as index, see ataprint.cpp.
1678 return (logx
.error_log_index
|| logx
.reserved1
? logx
.device_error_count
: 0);
1682 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1683 // error count, and top bits are the power-on hours of the last error.
1684 static int SelfTestErrorCount(ata_device
* device
, const char * name
,
1685 unsigned char fix_firmwarebug
)
1687 struct ata_smart_selftestlog log
;
1689 if (ataReadSelfTestLog(device
, &log
, fix_firmwarebug
)){
1690 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1694 // return current number of self-test errors
1695 return ataPrintSmartSelfTestlog(&log
, false, fix_firmwarebug
);
1698 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1699 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1701 // Check offline data collection status
1702 static inline bool is_offl_coll_in_progress(unsigned char status
)
1704 return ((status
& 0x7f) == 0x03);
1707 // Check self-test execution status
1708 static inline bool is_self_test_in_progress(unsigned char status
)
1710 return ((status
>> 4) == 0xf);
1713 // Log offline data collection status
1714 static void log_offline_data_coll_status(const char * name
, unsigned char status
)
1717 switch (status
& 0x7f) {
1718 case 0x00: msg
= "was never started"; break;
1719 case 0x02: msg
= "was completed without error"; break;
1720 case 0x03: msg
= "is in progress"; break;
1721 case 0x04: msg
= "was suspended by an interrupting command from host"; break;
1722 case 0x05: msg
= "was aborted by an interrupting command from host"; break;
1723 case 0x06: msg
= "was aborted by the device with a fatal error"; break;
1728 PrintOut(((status
& 0x7f) == 0x06 ? LOG_CRIT
: LOG_INFO
),
1729 "Device: %s, offline data collection %s%s\n", name
, msg
,
1730 ((status
& 0x80) ? " (auto:on)" : ""));
1732 PrintOut(LOG_INFO
, "Device: %s, unknown offline data collection status 0x%02x\n",
1736 // Log self-test execution status
1737 static void log_self_test_exec_status(const char * name
, unsigned char status
)
1740 switch (status
>> 4) {
1741 case 0x0: msg
= "completed without error"; break;
1742 case 0x1: msg
= "was aborted by the host"; break;
1743 case 0x2: msg
= "was interrupted by the host with a reset"; break;
1744 case 0x3: msg
= "could not complete due to a fatal or unknown error"; break;
1745 case 0x4: msg
= "completed with error (unknown test element)"; break;
1746 case 0x5: msg
= "completed with error (electrical test element)"; break;
1747 case 0x6: msg
= "completed with error (servo/seek test element)"; break;
1748 case 0x7: msg
= "completed with error (read test element)"; break;
1749 case 0x8: msg
= "completed with error (handling damage?)"; break;
1754 PrintOut(((status
>> 4) >= 0x4 ? LOG_CRIT
: LOG_INFO
),
1755 "Device: %s, previous self-test %s\n", name
, msg
);
1756 else if ((status
>> 4) == 0xf)
1757 PrintOut(LOG_INFO
, "Device: %s, self-test in progress, %u0%% remaining\n",
1758 name
, status
& 0x0f);
1760 PrintOut(LOG_INFO
, "Device: %s, unknown self-test status 0x%02x\n",
1764 // Check pending sector count id (-C, -U directives).
1765 static bool check_pending_id(const dev_config
& cfg
, const dev_state
& state
,
1766 unsigned char id
, const char * msg
)
1768 // Check attribute index
1769 int i
= ata_find_attr_index(id
, state
.smartval
);
1771 PrintOut(LOG_INFO
, "Device: %s, can't monitor %s count - no Attribute %d\n",
1772 cfg
.name
.c_str(), msg
, id
);
1777 uint64_t rawval
= ata_get_attr_raw_value(state
.smartval
.vendor_attributes
[i
],
1778 cfg
.attribute_defs
);
1779 if (rawval
>= (state
.num_sectors
? state
.num_sectors
: 0xffffffffULL
)) {
1780 PrintOut(LOG_INFO
, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64
" (0x%"PRIx64
")\n",
1781 cfg
.name
.c_str(), msg
, id
, rawval
, rawval
);
1788 // Called by ATA/SCSIDeviceScan() after successful device check
1789 static void finish_device_scan(dev_config
& cfg
, dev_state
& state
)
1791 // Set cfg.emailfreq if user hasn't set it
1792 if ((!cfg
.emailaddress
.empty() || !cfg
.emailcmdline
.empty()) && !cfg
.emailfreq
) {
1793 // Avoid that emails are suppressed forever due to state persistence
1794 if (cfg
.state_file
.empty())
1795 cfg
.emailfreq
= 1; // '-M once'
1797 cfg
.emailfreq
= 2; // '-M daily'
1800 // Start self-test regex check now if time was not read from state file
1801 if (!cfg
.test_regex
.empty() && !state
.scheduled_test_next_check
)
1802 state
.scheduled_test_next_check
= time(0);
1805 // Common function to format result message for ATA setting
1806 static void format_set_result_msg(std::string
& msg
, const char * name
, bool ok
,
1807 int set_option
= 0, bool has_value
= false)
1814 else if (set_option
< 0)
1817 msg
+= strprintf(":%d", set_option
-1);
1818 else if (set_option
> 0)
1823 // TODO: Add '-F swapid' directive
1824 const bool fix_swapped_id
= false;
1826 // scan to see what ata devices there are, and if they support SMART
1827 static int ATADeviceScan(dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
)
1830 struct ata_identify_device drive
;
1831 const char *name
= cfg
.name
.c_str();
1834 // Device must be open
1836 // Get drive identity structure
1837 if ((retid
= ata_read_identity(atadev
, &drive
, fix_swapped_id
))) {
1839 // Unable to read Identity structure
1840 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1842 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1843 name
, packetdevicetype(retid
-1));
1844 CloseDevice(atadev
, name
);
1848 // Log drive identity and size
1849 char model
[40+1], serial
[20+1], firmware
[8+1];
1850 ata_format_id_string(model
, drive
.model
, sizeof(model
)-1);
1851 ata_format_id_string(serial
, drive
.serial_no
, sizeof(serial
)-1);
1852 ata_format_id_string(firmware
, drive
.fw_rev
, sizeof(firmware
)-1);
1854 ata_size_info sizes
;
1855 ata_get_size_info(&drive
, sizes
);
1856 state
.num_sectors
= sizes
.sectors
;
1858 char wwn
[30]; wwn
[0] = 0;
1859 unsigned oui
= 0; uint64_t unique_id
= 0;
1860 int naa
= ata_get_wwn(&drive
, oui
, unique_id
);
1862 snprintf(wwn
, sizeof(wwn
), "WWN:%x-%06x-%09"PRIx64
", ", naa
, oui
, unique_id
);
1865 PrintOut(LOG_INFO
, "Device: %s, %s, S/N:%s, %sFW:%s, %s\n", name
,
1866 model
, serial
, wwn
, firmware
,
1867 format_capacity(cap
, sizeof(cap
), sizes
.capacity
, "."));
1869 // Show if device in database, and use preset vendor attribute
1870 // options unless user has requested otherwise.
1871 if (cfg
.ignorepresets
)
1872 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1874 // Apply vendor specific presets, print warning if present
1875 const drive_settings
* dbentry
= lookup_drive_apply_presets(
1876 &drive
, cfg
.attribute_defs
, cfg
.fix_firmwarebug
);
1878 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1880 PrintOut(LOG_INFO
, "Device: %s, found in smartd database%s%s\n",
1881 name
, (*dbentry
->modelfamily
? ": " : "."), (*dbentry
->modelfamily
? dbentry
->modelfamily
: ""));
1882 if (*dbentry
->warningmsg
)
1883 PrintOut(LOG_CRIT
, "Device: %s, WARNING: %s\n", name
, dbentry
->warningmsg
);
1887 // Set default '-C 197[+]' if no '-C ID' is specified.
1888 if (!cfg
.curr_pending_set
)
1889 cfg
.curr_pending_id
= get_unc_attr_id(false, cfg
.attribute_defs
, cfg
.curr_pending_incr
);
1890 // Set default '-U 198[+]' if no '-U ID' is specified.
1891 if (!cfg
.offl_pending_set
)
1892 cfg
.offl_pending_id
= get_unc_attr_id(true, cfg
.attribute_defs
, cfg
.offl_pending_incr
);
1894 // If requested, show which presets would be used for this drive
1895 if (cfg
.showpresets
) {
1896 int savedebugmode
=debugmode
;
1897 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1900 show_presets(&drive
);
1901 debugmode
=savedebugmode
;
1904 // see if drive supports SMART
1905 supported
=ataSmartSupport(&drive
);
1908 // drive does NOT support SMART
1909 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1911 // can't tell if drive supports SMART
1912 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1914 // should we proceed anyway?
1915 if (cfg
.permissive
) {
1916 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1919 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1920 CloseDevice(atadev
, name
);
1925 if (ataEnableSmart(atadev
)) {
1926 // Enable SMART command has failed
1927 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1928 CloseDevice(atadev
, name
);
1932 // disable device attribute autosave...
1933 if (cfg
.autosave
==1) {
1934 if (ataDisableAutoSave(atadev
))
1935 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1937 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1940 // or enable device attribute autosave
1941 if (cfg
.autosave
==2) {
1942 if (ataEnableAutoSave(atadev
))
1943 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1945 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1948 // capability check: SMART status
1949 if (cfg
.smartcheck
&& ataSmartStatus2(atadev
) == -1) {
1950 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1951 cfg
.smartcheck
= false;
1954 // capability check: Read smart values and thresholds. Note that
1955 // smart values are ALSO needed even if we ONLY want to know if the
1956 // device is self-test log or error-log capable! After ATA-5, this
1957 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1958 // but sadly not for ATA-5. Sigh.
1960 // do we need to get SMART data?
1961 bool smart_val_ok
= false;
1962 if ( cfg
.autoofflinetest
|| cfg
.selftest
1963 || cfg
.errorlog
|| cfg
.xerrorlog
1964 || cfg
.offlinests
|| cfg
.selfteststs
1965 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
1966 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
1967 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
) {
1969 if (ataReadSmartValues(atadev
, &state
.smartval
)) {
1970 PrintOut(LOG_INFO
, "Device: %s, Read SMART Values failed\n", name
);
1971 cfg
.usagefailed
= cfg
.prefail
= cfg
.usage
= false;
1972 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
1973 cfg
.curr_pending_id
= cfg
.offl_pending_id
= 0;
1976 smart_val_ok
= true;
1977 if (ataReadSmartThresholds(atadev
, &state
.smartthres
)) {
1978 PrintOut(LOG_INFO
, "Device: %s, Read SMART Thresholds failed%s\n",
1979 name
, (cfg
.usagefailed
? ", ignoring -f Directive" : ""));
1980 cfg
.usagefailed
= false;
1981 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1982 memset(&state
.smartthres
, 0, sizeof(state
.smartthres
));
1986 // see if the necessary Attribute is there to monitor offline or
1987 // current pending sectors or temperature
1988 if ( cfg
.curr_pending_id
1989 && !check_pending_id(cfg
, state
, cfg
.curr_pending_id
,
1990 "Current_Pending_Sector"))
1991 cfg
.curr_pending_id
= 0;
1993 if ( cfg
.offl_pending_id
1994 && !check_pending_id(cfg
, state
, cfg
.offl_pending_id
,
1995 "Offline_Uncorrectable"))
1996 cfg
.offl_pending_id
= 0;
1998 if ( (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
1999 && !ata_return_temperature_value(&state
.smartval
, cfg
.attribute_defs
)) {
2000 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
2001 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2004 // Report ignored '-r' or '-R' directives
2005 for (int id
= 1; id
<= 255; id
++) {
2006 if (cfg
.monitor_attr_flags
.is_set(id
, MONITOR_RAW_PRINT
)) {
2007 char opt
= (!cfg
.monitor_attr_flags
.is_set(id
, MONITOR_RAW
) ? 'r' : 'R');
2008 const char * excl
= (cfg
.monitor_attr_flags
.is_set(id
,
2009 (opt
== 'r' ? MONITOR_AS_CRIT
: MONITOR_RAW_AS_CRIT
)) ? "!" : "");
2011 int idx
= ata_find_attr_index(id
, state
.smartval
);
2013 PrintOut(LOG_INFO
,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name
, id
, opt
, id
, excl
);
2015 bool prefail
= !!ATTRIBUTE_FLAGS_PREFAILURE(state
.smartval
.vendor_attributes
[idx
].flags
);
2016 if (!((prefail
&& cfg
.prefail
) || (!prefail
&& cfg
.usage
)))
2017 PrintOut(LOG_INFO
,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name
,
2018 (prefail
? "Prefailure" : "Usage"), opt
, id
, excl
);
2024 // enable/disable automatic on-line testing
2025 if (cfg
.autoofflinetest
) {
2026 // is this an enable or disable request?
2027 const char *what
=(cfg
.autoofflinetest
==1)?"disable":"enable";
2029 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
2031 // if command appears unsupported, issue a warning...
2032 if (!isSupportAutomaticTimer(&state
.smartval
))
2033 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
2034 // ... but then try anyway
2035 if ((cfg
.autoofflinetest
==1)?ataDisableAutoOffline(atadev
):ataEnableAutoOffline(atadev
))
2036 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
2038 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
2042 // Read log directories if required for capability check
2043 ata_smart_log_directory smart_logdir
, gp_logdir
;
2044 bool smart_logdir_ok
= false, gp_logdir_ok
= false;
2046 if ( isGeneralPurposeLoggingCapable(&drive
)
2047 && (cfg
.errorlog
|| cfg
.selftest
) ) {
2048 if (!ataReadLogDirectory(atadev
, &smart_logdir
, false))
2049 smart_logdir_ok
= true;
2052 if (cfg
.xerrorlog
) {
2053 if (!ataReadLogDirectory(atadev
, &gp_logdir
, true))
2054 gp_logdir_ok
= true;
2057 // capability check: self-test-log
2058 state
.selflogcount
= 0; state
.selfloghour
= 0;
2061 if (!( cfg
.permissive
2062 || ( smart_logdir_ok
&& smart_logdir
.entry
[0x06-1].numsectors
)
2063 || (!smart_logdir_ok
&& smart_val_ok
&& isSmartTestLogCapable(&state
.smartval
, &drive
)))) {
2064 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name
);
2065 cfg
.selftest
= false;
2067 else if ((retval
= SelfTestErrorCount(atadev
, name
, cfg
.fix_firmwarebug
)) < 0) {
2068 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name
);
2069 cfg
.selftest
= false;
2072 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
2073 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
2077 // capability check: ATA error log
2078 state
.ataerrorcount
= 0;
2081 if (!( cfg
.permissive
2082 || ( smart_logdir_ok
&& smart_logdir
.entry
[0x01-1].numsectors
)
2083 || (!smart_logdir_ok
&& smart_val_ok
&& isSmartErrorLogCapable(&state
.smartval
, &drive
)))) {
2084 PrintOut(LOG_INFO
, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name
);
2085 cfg
.errorlog
= false;
2087 else if ((errcnt1
= read_ata_error_count(atadev
, name
, cfg
.fix_firmwarebug
, false)) < 0) {
2088 PrintOut(LOG_INFO
, "Device: %s, no SMART Error Log, ignoring -l error\n", name
);
2089 cfg
.errorlog
= false;
2092 state
.ataerrorcount
= errcnt1
;
2095 if (cfg
.xerrorlog
) {
2097 if (!(cfg
.permissive
|| (gp_logdir_ok
&& gp_logdir
.entry
[0x03-1].numsectors
))) {
2098 PrintOut(LOG_INFO
, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2100 cfg
.xerrorlog
= false;
2102 else if ((errcnt2
= read_ata_error_count(atadev
, name
, cfg
.fix_firmwarebug
, true)) < 0) {
2103 PrintOut(LOG_INFO
, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name
);
2104 cfg
.xerrorlog
= false;
2106 else if (cfg
.errorlog
&& state
.ataerrorcount
!= errcnt2
) {
2107 PrintOut(LOG_INFO
, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2108 name
, state
.ataerrorcount
, errcnt2
);
2109 // Record max error count
2110 if (errcnt2
> state
.ataerrorcount
)
2111 state
.ataerrorcount
= errcnt2
;
2114 state
.ataerrorcount
= errcnt2
;
2117 // capability check: self-test and offline data collection status
2118 if (cfg
.offlinests
|| cfg
.selfteststs
) {
2119 if (!(cfg
.permissive
|| (smart_val_ok
&& state
.smartval
.offline_data_collection_capability
))) {
2121 PrintOut(LOG_INFO
, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name
);
2122 if (cfg
.selfteststs
)
2123 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name
);
2124 cfg
.offlinests
= cfg
.selfteststs
= false;
2128 // capabilities check -- does it support powermode?
2129 if (cfg
.powermode
) {
2130 int powermode
= ataCheckPowerMode(atadev
);
2132 if (-1 == powermode
) {
2133 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
2136 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
2137 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2143 // Apply ATA settings
2147 format_set_result_msg(msg
, "AAM", (cfg
.set_aam
> 0 ?
2148 ata_set_features(atadev
, ATA_ENABLE_AAM
, cfg
.set_aam
-1) :
2149 ata_set_features(atadev
, ATA_DISABLE_AAM
)), cfg
.set_aam
, true);
2152 format_set_result_msg(msg
, "APM", (cfg
.set_apm
> 0 ?
2153 ata_set_features(atadev
, ATA_ENABLE_APM
, cfg
.set_apm
-1) :
2154 ata_set_features(atadev
, ATA_DISABLE_APM
)), cfg
.set_apm
, true);
2156 if (cfg
.set_lookahead
)
2157 format_set_result_msg(msg
, "Rd-ahead", ata_set_features(atadev
,
2158 (cfg
.set_lookahead
> 0 ? ATA_ENABLE_READ_LOOK_AHEAD
: ATA_DISABLE_READ_LOOK_AHEAD
)),
2162 format_set_result_msg(msg
, "Wr-cache", ata_set_features(atadev
,
2163 (cfg
.set_wcache
> 0? ATA_ENABLE_WRITE_CACHE
: ATA_DISABLE_WRITE_CACHE
)), cfg
.set_wcache
);
2165 if (cfg
.set_security_freeze
)
2166 format_set_result_msg(msg
, "Security freeze",
2167 ata_nodata_command(atadev
, ATA_SECURITY_FREEZE_LOCK
));
2169 if (cfg
.set_standby
)
2170 format_set_result_msg(msg
, "Standby",
2171 ata_nodata_command(atadev
, ATA_IDLE
, cfg
.set_standby
-1), cfg
.set_standby
, true);
2173 // Report as one log entry
2175 PrintOut(LOG_INFO
, "Device: %s, ATA settings applied: %s\n", name
, msg
.c_str());
2177 // set SCT Error Recovery Control if requested
2178 if (cfg
.sct_erc_set
) {
2179 if (!isSCTErrorRecoveryControlCapable(&drive
))
2180 PrintOut(LOG_INFO
, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2182 else if ( ataSetSCTErrorRecoveryControltime(atadev
, 1, cfg
.sct_erc_readtime
)
2183 || ataSetSCTErrorRecoveryControltime(atadev
, 2, cfg
.sct_erc_writetime
))
2184 PrintOut(LOG_INFO
, "Device: %s, set of SCT Error Recovery Control failed\n", name
);
2186 PrintOut(LOG_INFO
, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2187 name
, cfg
.sct_erc_readtime
, cfg
.sct_erc_writetime
);
2190 // If no tests available or selected, return
2191 if (!( cfg
.smartcheck
|| cfg
.selftest
2192 || cfg
.errorlog
|| cfg
.xerrorlog
2193 || cfg
.offlinests
|| cfg
.selfteststs
2194 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
2195 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
2196 CloseDevice(atadev
, name
);
2200 // tell user we are registering device
2201 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
2203 // close file descriptor
2204 CloseDevice(atadev
, name
);
2206 if (!state_path_prefix
.empty() || !attrlog_path_prefix
.empty()) {
2207 // Build file name for state file
2208 std::replace_if(model
, model
+strlen(model
), not_allowed_in_filename
, '_');
2209 std::replace_if(serial
, serial
+strlen(serial
), not_allowed_in_filename
, '_');
2210 if (!state_path_prefix
.empty()) {
2211 cfg
.state_file
= strprintf("%s%s-%s.ata.state", state_path_prefix
.c_str(), model
, serial
);
2212 // Read previous state
2213 if (read_dev_state(cfg
.state_file
.c_str(), state
)) {
2214 PrintOut(LOG_INFO
, "Device: %s, state read from %s\n", name
, cfg
.state_file
.c_str());
2215 // Copy ATA attribute values to temp state
2216 state
.update_temp_state();
2219 if (!attrlog_path_prefix
.empty())
2220 cfg
.attrlog_file
= strprintf("%s%s-%s.ata.csv", attrlog_path_prefix
.c_str(), model
, serial
);
2223 finish_device_scan(cfg
, state
);
2228 // on success, return 0. On failure, return >0. Never return <0,
2230 static int SCSIDeviceScan(dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
)
2232 int k
, err
, req_len
, avail_len
, version
, len
;
2233 const char *device
= cfg
.name
.c_str();
2234 struct scsi_iec_mode_page iec
;
2240 // Device must be open
2241 memset(inqBuf
, 0, 96);
2243 if ((err
= scsiStdInquiry(scsidev
, inqBuf
, req_len
))) {
2244 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2246 if ((err
= scsiStdInquiry(scsidev
, inqBuf
, req_len
))) {
2247 PrintOut(LOG_INFO
, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2248 "skip device\n", device
);
2252 version
= inqBuf
[2];
2253 avail_len
= inqBuf
[4] + 5;
2254 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
2256 PrintOut(LOG_INFO
, "Device: %s, INQUIRY response less than 36 bytes; "
2257 "skip device\n", device
);
2261 int pdt
= inqBuf
[0] & 0x1f;
2263 if (! ((0 == pdt
) || (4 == pdt
) || (5 == pdt
) || (7 == pdt
) ||
2265 PrintOut(LOG_INFO
, "Device: %s, not a disk like device [PDT=0x%x], "
2266 "skip\n", device
, pdt
);
2270 if ((version
>= 0x4) && (version
< 0x8)) {
2271 /* SPC-2 to SPC-5 */
2272 if (0 == (err
= scsiInquiryVpd(scsidev
, 0x83, vpdBuf
, sizeof(vpdBuf
)))) {
2274 scsi_decode_lu_dev_id(vpdBuf
+ 4, len
, lu_id
, sizeof(lu_id
), NULL
);
2278 unsigned int lb_size
;
2280 uint64_t capacity
= scsiGetSize(scsidev
, &lb_size
);
2283 format_capacity(si_str
, sizeof(si_str
), capacity
);
2286 PrintOut(LOG_INFO
, "Device: %s, [%.8s %.16s %.4s]%s%s%s%s\n",
2287 device
, (char *)&inqBuf
[8], (char *)&inqBuf
[16],
2288 (char *)&inqBuf
[32],
2289 (lu_id
[0] ? ", lu id: " : ""), (lu_id
[0] ? lu_id
: ""),
2290 (si_str
[0] ? ", " : ""), (si_str
[0] ? si_str
: ""));
2292 // check that device is ready for commands. IE stores its stuff on
2294 if ((err
= scsiTestUnitReady(scsidev
))) {
2295 if (SIMPLE_ERR_NOT_READY
== err
)
2296 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
2297 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
2298 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
2299 else if (SIMPLE_ERR_BECOMING_READY
== err
)
2300 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
2302 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
2303 CloseDevice(scsidev
, device
);
2307 // Badly-conforming USB storage devices may fail this check.
2308 // The response to the following IE mode page fetch (current and
2309 // changeable values) is carefully examined. It has been found
2310 // that various USB devices that malform the response will lock up
2311 // if asked for a log page (e.g. temperature) so it is best to
2313 if (!(err
= scsiFetchIECmpage(scsidev
, &iec
, state
.modese_len
)))
2314 state
.modese_len
= iec
.modese_len
;
2315 else if (SIMPLE_ERR_BAD_FIELD
== err
)
2316 ; /* continue since it is reasonable not to support IE mpage */
2317 else { /* any other error (including malformed response) unreasonable */
2319 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2321 CloseDevice(scsidev
, device
);
2325 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2326 // smart if it is off). This may change to be the same as the ATA side.
2327 if (!scsi_IsExceptionControlEnabled(&iec
)) {
2328 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
2329 "Try 'smartctl -s on %s' to turn on SMART features\n",
2331 CloseDevice(scsidev
, device
);
2335 // Flag that certain log pages are supported (information may be
2336 // available from other sources).
2337 if (0 == scsiLogSense(scsidev
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
2338 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
2340 case TEMPERATURE_LPAGE
:
2341 state
.TempPageSupported
= 1;
2344 state
.SmartPageSupported
= 1;
2352 // Check if scsiCheckIE() is going to work
2356 UINT8 currenttemp
= 0;
2359 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
2360 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2361 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
2362 state
.SuppressReport
= 1;
2363 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
) {
2364 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
2365 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
2370 // capability check: self-test-log
2372 int retval
= scsiCountFailedSelfTests(scsidev
, 0);
2374 // no self-test log, turn off monitoring
2375 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
2376 cfg
.selftest
= false;
2377 state
.selflogcount
= 0;
2378 state
.selfloghour
= 0;
2381 // register starting values to watch for changes
2382 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
2383 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
2387 // disable autosave (set GLTSD bit)
2388 if (cfg
.autosave
==1){
2389 if (scsiSetControlGLTSD(scsidev
, 1, state
.modese_len
))
2390 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
2392 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
2395 // or enable autosave (clear GLTSD bit)
2396 if (cfg
.autosave
==2){
2397 if (scsiSetControlGLTSD(scsidev
, 0, state
.modese_len
))
2398 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
2400 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
2403 // tell user we are registering device
2404 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
2406 // TODO: Build file name for state file
2407 if (!state_path_prefix
.empty()) {
2408 PrintOut(LOG_INFO
, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device
);
2410 // TODO: Build file name for attribute log file
2411 if (!attrlog_path_prefix
.empty()) {
2412 PrintOut(LOG_INFO
, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device
);
2415 // Make sure that init_standby_check() ignores SCSI devices
2416 cfg
.offlinests_ns
= cfg
.selfteststs_ns
= false;
2418 // close file descriptor
2419 CloseDevice(scsidev
, device
);
2421 finish_device_scan(cfg
, state
);
2426 // If the self-test log has got more self-test errors (or more recent
2427 // self-test errors) recorded, then notify user.
2428 static void CheckSelfTestLogs(const dev_config
& cfg
, dev_state
& state
, int newi
)
2430 const char * name
= cfg
.name
.c_str();
2434 MailWarning(cfg
, state
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
2436 reset_warning_mail(cfg
, state
, 8, "Read SMART Self-Test Log worked again");
2438 // old and new error counts
2439 int oldc
=state
.selflogcount
;
2440 int newc
=SELFTEST_ERRORCOUNT(newi
);
2442 // old and new error timestamps in hours
2443 int oldh
=state
.selfloghour
;
2444 int newh
=SELFTEST_ERRORHOURS(newi
);
2447 // increase in error count
2448 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2450 MailWarning(cfg
, state
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2452 state
.must_write
= true;
2454 else if (newc
> 0 && oldh
!= newh
) {
2455 // more recent error
2456 // a 'more recent' error might actually be a smaller hour number,
2457 // if the hour number has wrapped.
2458 // There's still a bug here. You might just happen to run a new test
2459 // exactly 32768 hours after the previous failure, and have run exactly
2460 // 20 tests between the two, in which case smartd will miss the
2462 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2464 MailWarning(cfg
, state
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2466 state
.must_write
= true;
2469 // Print info if error entries have disappeared
2470 // or newer successful successful extended self-test exits
2472 PrintOut(LOG_INFO
, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2475 reset_warning_mail(cfg
, state
, 3, "Self-Test Log does no longer report errors");
2478 // Needed since self-test error count may DECREASE. Hour might
2479 // also have changed.
2480 state
.selflogcount
= newc
;
2481 state
.selfloghour
= newh
;
2486 // Test types, ordered by priority.
2487 static const char test_type_chars
[] = "LncrSCO";
2488 static const unsigned num_test_types
= sizeof(test_type_chars
)-1;
2490 // returns test type if time to do test of type testtype,
2491 // 0 if not time to do test.
2492 static char next_scheduled_test(const dev_config
& cfg
, dev_state
& state
, bool scsi
, time_t usetime
= 0)
2494 // check that self-testing has been requested
2495 if (cfg
.test_regex
.empty())
2498 // Exit if drive not capable of any test
2499 if ( state
.not_cap_long
&& state
.not_cap_short
&&
2500 (scsi
|| (state
.not_cap_conveyance
&& state
.not_cap_offline
)))
2503 // since we are about to call localtime(), be sure glibc is informed
2504 // of any timezone changes we make.
2506 FixGlibcTimeZoneBug();
2508 // Is it time for next check?
2509 time_t now
= (!usetime
? time(0) : usetime
);
2510 if (now
< state
.scheduled_test_next_check
)
2513 // Limit time check interval to 90 days
2514 if (state
.scheduled_test_next_check
+ (3600L*24*90) < now
)
2515 state
.scheduled_test_next_check
= now
- (3600L*24*90);
2517 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2519 time_t testtime
= 0; int testhour
= 0;
2520 int maxtest
= num_test_types
-1;
2522 for (time_t t
= state
.scheduled_test_next_check
; ; ) {
2523 struct tm
* tms
= localtime(&t
);
2524 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2525 int weekday
= (tms
->tm_wday
? tms
->tm_wday
: 7);
2526 for (int i
= 0; i
<= maxtest
; i
++) {
2527 // Skip if drive not capable of this test
2528 switch (test_type_chars
[i
]) {
2529 case 'L': if (state
.not_cap_long
) continue; break;
2530 case 'S': if (state
.not_cap_short
) continue; break;
2531 case 'C': if (scsi
|| state
.not_cap_conveyance
) continue; break;
2532 case 'O': if (scsi
|| state
.not_cap_offline
) continue; break;
2534 case 'r': if (scsi
|| state
.not_cap_selective
) continue; break;
2537 // Try match of "T/MM/DD/d/HH"
2539 snprintf(pattern
, sizeof(pattern
), "%c/%02d/%02d/%1d/%02d",
2540 test_type_chars
[i
], tms
->tm_mon
+1, tms
->tm_mday
, weekday
, tms
->tm_hour
);
2541 if (cfg
.test_regex
.full_match(pattern
)) {
2543 testtype
= pattern
[0];
2544 testtime
= t
; testhour
= tms
->tm_hour
;
2545 // Limit further matches to higher priority self-tests
2550 // Exit if no tests left or current time reached
2556 if ((t
+= 3600) > now
)
2560 // Do next check not before next hour.
2561 struct tm
* tmnow
= localtime(&now
);
2562 state
.scheduled_test_next_check
= now
+ (3600 - tmnow
->tm_min
*60 - tmnow
->tm_sec
);
2565 state
.must_write
= true;
2566 // Tell user if an old test was found.
2567 if (!usetime
&& !(testhour
== tmnow
->tm_hour
&& testtime
+ 3600 > now
)) {
2568 char datebuf
[DATEANDEPOCHLEN
]; dateandtimezoneepoch(datebuf
, testtime
);
2569 PrintOut(LOG_INFO
, "Device: %s, old test of type %c not run at %s, starting now.\n",
2570 cfg
.name
.c_str(), testtype
, datebuf
);
2577 // Print a list of future tests.
2578 static void PrintTestSchedule(const dev_config_vector
& configs
, dev_state_vector
& states
, const smart_device_list
& devices
)
2580 unsigned numdev
= configs
.size();
2583 std::vector
<int> testcnts(numdev
* num_test_types
, 0);
2585 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2587 // FixGlibcTimeZoneBug(); // done in PrintOut()
2588 time_t now
= time(0);
2589 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2590 dateandtimezoneepoch(datenow
, now
);
2593 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2594 // Check for each device whether a test will be run
2595 time_t testtime
= now
+ seconds
;
2596 for (unsigned i
= 0; i
< numdev
; i
++) {
2597 const dev_config
& cfg
= configs
.at(i
);
2598 dev_state
& state
= states
.at(i
);
2600 char testtype
= next_scheduled_test(cfg
, state
, devices
.at(i
)->is_scsi(), testtime
);
2601 if (testtype
&& (p
= strchr(test_type_chars
, testtype
))) {
2602 unsigned t
= (p
- test_type_chars
);
2603 // Report at most 5 tests of each type
2604 if (++testcnts
[i
*num_test_types
+ t
] <= 5) {
2605 dateandtimezoneepoch(date
, testtime
);
2606 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
.name
.c_str(),
2607 testcnts
[i
*num_test_types
+ t
], testtype
, date
);
2614 dateandtimezoneepoch(date
, now
+seconds
);
2615 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2616 for (unsigned i
= 0; i
< numdev
; i
++) {
2617 const dev_config
& cfg
= configs
.at(i
);
2618 bool scsi
= devices
.at(i
)->is_scsi();
2619 for (unsigned t
= 0; t
< num_test_types
; t
++) {
2620 int cnt
= testcnts
[i
*num_test_types
+ t
];
2621 if (cnt
== 0 && !strchr((scsi
? "LS" : "LSCO"), test_type_chars
[t
]))
2623 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
.name
.c_str(),
2624 cnt
, (cnt
==1?"":"s"), test_type_chars
[t
]);
2630 // Return zero on success, nonzero on failure. Perform offline (background)
2631 // short or long (extended) self test on given scsi device.
2632 static int DoSCSISelfTest(const dev_config
& cfg
, dev_state
& state
, scsi_device
* device
, char testtype
)
2635 const char *testname
= 0;
2636 const char *name
= cfg
.name
.c_str();
2639 if (scsiSelfTestInProgress(device
, &inProgress
)) {
2640 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2641 state
.not_cap_short
= state
.not_cap_long
= true;
2645 if (1 == inProgress
) {
2646 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2647 "progress.\n", name
);
2653 testname
= "Short Self";
2654 retval
= scsiSmartShortSelfTest(device
);
2657 testname
= "Long Self";
2658 retval
= scsiSmartExtendSelfTest(device
);
2661 // If we can't do the test, exit
2662 if (NULL
== testname
) {
2663 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2668 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2669 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2670 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2673 state
.not_cap_long
= true;
2675 state
.not_cap_short
= true;
2679 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2684 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2689 // Do an offline immediate or self-test. Return zero on success,
2690 // nonzero on failure.
2691 static int DoATASelfTest(const dev_config
& cfg
, dev_state
& state
, ata_device
* device
, char testtype
)
2693 const char *name
= cfg
.name
.c_str();
2695 // Read current smart data and check status/capability
2696 struct ata_smart_values data
;
2697 if (ataReadSmartValues(device
, &data
) || !(data
.offline_data_collection_capability
)) {
2698 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2702 // Check for capability to do the test
2703 int dotest
= -1, mode
= 0;
2704 const char *testname
= 0;
2707 testname
="Offline Immediate ";
2708 if (isSupportExecuteOfflineImmediate(&data
))
2709 dotest
=OFFLINE_FULL_SCAN
;
2711 state
.not_cap_offline
= true;
2714 testname
="Conveyance Self-";
2715 if (isSupportConveyanceSelfTest(&data
))
2716 dotest
=CONVEYANCE_SELF_TEST
;
2718 state
.not_cap_conveyance
= true;
2721 testname
="Short Self-";
2722 if (isSupportSelfTest(&data
))
2723 dotest
=SHORT_SELF_TEST
;
2725 state
.not_cap_short
= true;
2728 testname
="Long Self-";
2729 if (isSupportSelfTest(&data
))
2730 dotest
=EXTEND_SELF_TEST
;
2732 state
.not_cap_long
= true;
2735 case 'c': case 'n': case 'r':
2736 testname
= "Selective Self-";
2737 if (isSupportSelectiveSelfTest(&data
)) {
2738 dotest
= SELECTIVE_SELF_TEST
;
2740 case 'c': mode
= SEL_CONT
; break;
2741 case 'n': mode
= SEL_NEXT
; break;
2742 case 'r': mode
= SEL_REDO
; break;
2746 state
.not_cap_selective
= true;
2750 // If we can't do the test, exit
2752 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2756 // If currently running a self-test, do not interrupt it to start another.
2757 if (15==(data
.self_test_exec_status
>> 4)) {
2758 if (cfg
.fix_firmwarebug
== FIX_SAMSUNG3
&& data
.self_test_exec_status
== 0xf0) {
2759 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
2760 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
2762 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2763 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2768 if (dotest
== SELECTIVE_SELF_TEST
) {
2770 ata_selective_selftest_args selargs
, prev_args
;
2771 selargs
.num_spans
= 1;
2772 selargs
.span
[0].mode
= mode
;
2773 prev_args
.num_spans
= 1;
2774 prev_args
.span
[0].start
= state
.selective_test_last_start
;
2775 prev_args
.span
[0].end
= state
.selective_test_last_end
;
2776 if (ataWriteSelectiveSelfTestLog(device
, selargs
, &data
, state
.num_sectors
, &prev_args
)) {
2777 PrintOut(LOG_CRIT
, "Device: %s, prepare %sTest failed\n", name
, testname
);
2780 uint64_t start
= selargs
.span
[0].start
, end
= selargs
.span
[0].end
;
2781 PrintOut(LOG_INFO
, "Device: %s, %s test span at LBA %"PRIu64
" - %"PRIu64
" (%"PRIu64
" sectors, %u%% - %u%% of disk).\n",
2782 name
, (selargs
.span
[0].mode
== SEL_NEXT
? "next" : "redo"),
2783 start
, end
, end
- start
+ 1,
2784 (unsigned)((100 * start
+ state
.num_sectors
/2) / state
.num_sectors
),
2785 (unsigned)((100 * end
+ state
.num_sectors
/2) / state
.num_sectors
));
2786 state
.selective_test_last_start
= start
;
2787 state
.selective_test_last_end
= end
;
2790 // execute the test, and return status
2791 int retval
= smartcommandhandler(device
, IMMEDIATE_OFFLINE
, dotest
, NULL
);
2793 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2797 // Report recent test start to do_disable_standby_check()
2798 // and force log of next test status
2799 if (testtype
== 'O')
2800 state
.offline_started
= true;
2802 state
.selftest_started
= true;
2804 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2808 // Check pending sector count attribute values (-C, -U directives).
2809 static void check_pending(const dev_config
& cfg
, dev_state
& state
,
2810 unsigned char id
, bool increase_only
,
2811 const ata_smart_values
& smartval
,
2812 int mailtype
, const char * msg
)
2814 // Find attribute index
2815 int i
= ata_find_attr_index(id
, smartval
);
2816 if (!(i
>= 0 && ata_find_attr_index(id
, state
.smartval
) == i
))
2819 // No report if no sectors pending.
2820 uint64_t rawval
= ata_get_attr_raw_value(smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
2822 reset_warning_mail(cfg
, state
, mailtype
, "No more %s", msg
);
2826 // If attribute is not reset, report only sector count increases.
2827 uint64_t prev_rawval
= ata_get_attr_raw_value(state
.smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
2828 if (!(!increase_only
|| prev_rawval
< rawval
))
2832 std::string s
= strprintf("Device: %s, %"PRId64
" %s", cfg
.name
.c_str(), rawval
, msg
);
2833 if (prev_rawval
> 0 && rawval
!= prev_rawval
)
2834 s
+= strprintf(" (changed %+"PRId64
")", rawval
- prev_rawval
);
2836 PrintOut(LOG_CRIT
, "%s\n", s
.c_str());
2837 MailWarning(cfg
, state
, mailtype
, "%s\n", s
.c_str());
2838 state
.must_write
= true;
2841 // Format Temperature value
2842 static const char * fmt_temp(unsigned char x
, char * buf
)
2847 sprintf(buf
, "%u", x
);
2851 // Check Temperature limits
2852 static void CheckTemperature(const dev_config
& cfg
, dev_state
& state
, unsigned char currtemp
, unsigned char triptemp
)
2854 if (!(0 < currtemp
&& currtemp
< 255)) {
2855 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
.name
.c_str());
2859 // Update Max Temperature
2860 const char * minchg
= "", * maxchg
= "";
2861 if (currtemp
> state
.tempmax
) {
2864 state
.tempmax
= currtemp
;
2865 state
.must_write
= true;
2869 if (!state
.temperature
) {
2871 if (!state
.tempmin
|| currtemp
< state
.tempmin
)
2872 // Delay Min Temperature update by ~ 30 minutes.
2873 state
.tempmin_delay
= time(0) + CHECKTIME
- 60;
2874 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2875 cfg
.name
.c_str(), (int)currtemp
, fmt_temp(state
.tempmin
, buf
), state
.tempmax
, maxchg
);
2877 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2878 state
.temperature
= currtemp
;
2881 if (state
.tempmin_delay
) {
2882 // End Min Temperature update delay if ...
2883 if ( (state
.tempmin
&& currtemp
> state
.tempmin
) // current temp exceeds recorded min,
2884 || (state
.tempmin_delay
<= time(0))) { // or delay time is over.
2885 state
.tempmin_delay
= 0;
2887 state
.tempmin
= 255;
2891 // Update Min Temperature
2892 if (!state
.tempmin_delay
&& currtemp
< state
.tempmin
) {
2893 state
.tempmin
= currtemp
;
2894 state
.must_write
= true;
2895 if (currtemp
!= state
.temperature
)
2900 if (cfg
.tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)state
.temperature
) >= cfg
.tempdiff
)) {
2901 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2902 cfg
.name
.c_str(), (int)currtemp
-(int)state
.temperature
, currtemp
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2903 state
.temperature
= currtemp
;
2908 if (cfg
.tempcrit
&& currtemp
>= cfg
.tempcrit
) {
2909 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2910 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2911 MailWarning(cfg
, state
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2912 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2914 else if (cfg
.tempinfo
&& currtemp
>= cfg
.tempinfo
) {
2915 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2916 cfg
.name
.c_str(), currtemp
, cfg
.tempinfo
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2918 else if (cfg
.tempcrit
) {
2919 unsigned char limit
= (cfg
.tempinfo
? cfg
.tempinfo
: cfg
.tempcrit
-5);
2920 if (currtemp
< limit
)
2921 reset_warning_mail(cfg
, state
, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp
, limit
);
2925 // Check normalized and raw attribute values.
2926 static void check_attribute(const dev_config
& cfg
, dev_state
& state
,
2927 const ata_smart_attribute
& attr
,
2928 const ata_smart_attribute
& prev
,
2930 const ata_smart_threshold_entry
* thresholds
)
2932 // Check attribute and threshold
2933 ata_attr_state attrstate
= ata_get_attr_state(attr
, attridx
, thresholds
, cfg
.attribute_defs
);
2934 if (attrstate
== ATTRSTATE_NON_EXISTING
)
2937 // If requested, check for usage attributes that have failed.
2938 if ( cfg
.usagefailed
&& attrstate
== ATTRSTATE_FAILED_NOW
2939 && !cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGN_FAILUSE
)) {
2940 std::string attrname
= ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
);
2941 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
2942 MailWarning(cfg
, state
, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
2943 state
.must_write
= true;
2946 // Return if we're not tracking this type of attribute
2947 bool prefail
= !!ATTRIBUTE_FLAGS_PREFAILURE(attr
.flags
);
2948 if (!( ( prefail
&& cfg
.prefail
)
2949 || (!prefail
&& cfg
.usage
)))
2952 // Return if '-I ID' was specified
2953 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGNORE
))
2956 // Issue warning if they don't have the same ID in all structures.
2957 if (attr
.id
!= prev
.id
) {
2958 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2959 cfg
.name
.c_str(), attr
.id
, prev
.id
);
2963 // Compare normalized values if valid.
2964 bool valchanged
= false;
2965 if (attrstate
> ATTRSTATE_NO_NORMVAL
) {
2966 if (attr
.current
!= prev
.current
)
2970 // Compare raw values if requested.
2971 bool rawchanged
= false;
2972 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW
)) {
2973 if ( ata_get_attr_raw_value(attr
, cfg
.attribute_defs
)
2974 != ata_get_attr_raw_value(prev
, cfg
.attribute_defs
))
2978 // Return if no change
2979 if (!(valchanged
|| rawchanged
))
2982 // Format value strings
2983 std::string currstr
, prevstr
;
2984 if (attrstate
== ATTRSTATE_NO_NORMVAL
) {
2985 // Print raw values only
2986 currstr
= strprintf("%s (Raw)",
2987 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
2988 prevstr
= strprintf("%s (Raw)",
2989 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
2991 else if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_PRINT
)) {
2992 // Print normalized and raw values
2993 currstr
= strprintf("%d [Raw %s]", attr
.current
,
2994 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
2995 prevstr
= strprintf("%d [Raw %s]", prev
.current
,
2996 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
2999 // Print normalized values only
3000 currstr
= strprintf("%d", attr
.current
);
3001 prevstr
= strprintf("%d", prev
.current
);
3005 std::string msg
= strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3006 cfg
.name
.c_str(), (prefail
? "Prefailure" : "Usage"), attr
.id
,
3007 ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
).c_str(),
3008 prevstr
.c_str(), currstr
.c_str());
3010 // Report this change as critical ?
3011 if ( (valchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_AS_CRIT
))
3012 || (rawchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_AS_CRIT
))) {
3013 PrintOut(LOG_CRIT
, "%s\n", msg
.c_str());
3014 MailWarning(cfg
, state
, 2, "%s", msg
.c_str());
3017 PrintOut(LOG_INFO
, "%s\n", msg
.c_str());
3019 state
.must_write
= true;
3023 static int ATACheckDevice(const dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
,
3024 bool firstpass
, bool allow_selftests
)
3026 const char * name
= cfg
.name
.c_str();
3028 // If user has asked, test the email warning system
3030 MailWarning(cfg
, state
, 0, "TEST EMAIL from smartd for device: %s", name
);
3032 // if we can't open device, fail gracefully rather than hard --
3033 // perhaps the next time around we'll be able to open it. ATAPI
3034 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3035 // given (see linux cdrom driver).
3036 if (!atadev
->open()) {
3037 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", name
, atadev
->get_errmsg());
3038 MailWarning(cfg
, state
, 9, "Device: %s, unable to open device", name
);
3042 PrintOut(LOG_INFO
,"Device: %s, opened ATA device\n", name
);
3043 reset_warning_mail(cfg
, state
, 9, "open device worked again");
3045 // user may have requested (with the -n Directive) to leave the disk
3046 // alone if it is in idle or sleeping mode. In this case check the
3047 // power mode and exit without check if needed
3048 if (cfg
.powermode
&& !state
.powermodefail
) {
3049 int dontcheck
=0, powermode
=ataCheckPowerMode(atadev
);
3050 const char * mode
= 0;
3051 if (0 <= powermode
&& powermode
< 0xff) {
3052 // wait for possible spin up and check again
3055 powermode2
= ataCheckPowerMode(atadev
);
3056 if (powermode2
> powermode
)
3057 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
3058 powermode
= powermode2
;
3065 if (cfg
.powermode
>=1)
3071 if (cfg
.powermode
>=2)
3077 if (cfg
.powermode
>=3)
3082 mode
="ACTIVE or IDLE";
3086 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3088 state
.powermodefail
= true;
3092 // if we are going to skip a check, return now
3094 // skip at most powerskipmax checks
3095 if (!cfg
.powerskipmax
|| state
.powerskipcnt
<cfg
.powerskipmax
) {
3096 CloseDevice(atadev
, name
);
3097 if (!state
.powerskipcnt
&& !cfg
.powerquiet
) // report first only and avoid waking up system disk
3098 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
3099 state
.powerskipcnt
++;
3103 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3104 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
3106 state
.powerskipcnt
= 0;
3107 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
3109 else if (state
.powerskipcnt
) {
3110 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3111 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
3112 state
.powerskipcnt
= 0;
3113 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
3117 // check smart status
3118 if (cfg
.smartcheck
) {
3119 int status
=ataSmartStatus2(atadev
);
3121 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
3122 MailWarning(cfg
, state
, 5, "Device: %s, not capable of SMART self-check", name
);
3123 state
.must_write
= true;
3125 else if (status
==1){
3126 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
3127 MailWarning(cfg
, state
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
3128 state
.must_write
= true;
3132 // Check everything that depends upon SMART Data (eg, Attribute values)
3133 if ( cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
3134 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
3135 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
3136 || cfg
.selftest
|| cfg
.offlinests
|| cfg
.selfteststs
) {
3138 // Read current attribute values.
3139 ata_smart_values curval
;
3140 if (ataReadSmartValues(atadev
, &curval
)){
3141 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
3142 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
3143 state
.must_write
= true;
3146 reset_warning_mail(cfg
, state
, 6, "read SMART Attribute Data worked again");
3148 // look for current or offline pending sectors
3149 if (cfg
.curr_pending_id
)
3150 check_pending(cfg
, state
, cfg
.curr_pending_id
, cfg
.curr_pending_incr
, curval
, 10,
3151 (!cfg
.curr_pending_incr
? "Currently unreadable (pending) sectors"
3152 : "Total unreadable (pending) sectors" ));
3154 if (cfg
.offl_pending_id
)
3155 check_pending(cfg
, state
, cfg
.offl_pending_id
, cfg
.offl_pending_incr
, curval
, 11,
3156 (!cfg
.offl_pending_incr
? "Offline uncorrectable sectors"
3157 : "Total offline uncorrectable sectors"));
3159 // check temperature limits
3160 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
3161 CheckTemperature(cfg
, state
, ata_return_temperature_value(&curval
, cfg
.attribute_defs
), 0);
3163 // look for failed usage attributes, or track usage or prefail attributes
3164 if (cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
) {
3165 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
3166 check_attribute(cfg
, state
,
3167 curval
.vendor_attributes
[i
],
3168 state
.smartval
.vendor_attributes
[i
],
3169 i
, state
.smartthres
.thres_entries
);
3173 // Log changes of offline data collection status
3174 if (cfg
.offlinests
) {
3175 if ( curval
.offline_data_collection_status
3176 != state
.smartval
.offline_data_collection_status
3177 || state
.offline_started
// test was started in previous call
3178 || (firstpass
&& (debugmode
|| (curval
.offline_data_collection_status
& 0x7d))))
3179 log_offline_data_coll_status(name
, curval
.offline_data_collection_status
);
3182 // Log changes of self-test execution status
3183 if (cfg
.selfteststs
) {
3184 if ( curval
.self_test_exec_status
!= state
.smartval
.self_test_exec_status
3185 || state
.selftest_started
// test was started in previous call
3186 || (firstpass
&& (debugmode
|| curval
.self_test_exec_status
!= 0x00)))
3187 log_self_test_exec_status(name
, curval
.self_test_exec_status
);
3190 // Save the new values for the next time around
3191 state
.smartval
= curval
;
3194 state
.offline_started
= state
.selftest_started
= false;
3196 // check if number of selftest errors has increased (note: may also DECREASE)
3198 CheckSelfTestLogs(cfg
, state
, SelfTestErrorCount(atadev
, name
, cfg
.fix_firmwarebug
));
3200 // check if number of ATA errors has increased
3201 if (cfg
.errorlog
|| cfg
.xerrorlog
) {
3203 int errcnt1
= -1, errcnt2
= -1;
3205 errcnt1
= read_ata_error_count(atadev
, name
, cfg
.fix_firmwarebug
, false);
3207 errcnt2
= read_ata_error_count(atadev
, name
, cfg
.fix_firmwarebug
, true);
3209 // new number of errors is max of both logs
3210 int newc
= (errcnt1
>= errcnt2
? errcnt1
: errcnt2
);
3212 // did command fail?
3214 // lack of PrintOut here is INTENTIONAL
3215 MailWarning(cfg
, state
, 7, "Device: %s, Read SMART Error Log Failed", name
);
3217 // has error count increased?
3218 int oldc
= state
.ataerrorcount
;
3220 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
3222 MailWarning(cfg
, state
, 4, "Device: %s, ATA error count increased from %d to %d",
3224 state
.must_write
= true;
3228 state
.ataerrorcount
=newc
;
3231 // if the user has asked, and device is capable (or we're not yet
3232 // sure) check whether a self test should be done now.
3233 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
3234 char testtype
= next_scheduled_test(cfg
, state
, false/*!scsi*/);
3236 DoATASelfTest(cfg
, state
, atadev
, testtype
);
3239 // Don't leave device open -- the OS/user may want to access it
3240 // before the next smartd cycle!
3241 CloseDevice(atadev
, name
);
3243 // Copy ATA attribute values to persistent state
3244 state
.update_persistent_state();
3249 static int SCSICheckDevice(const dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
, bool allow_selftests
)
3254 const char * name
= cfg
.name
.c_str();
3257 // If the user has asked for it, test the email warning system
3259 MailWarning(cfg
, state
, 0, "TEST EMAIL from smartd for device: %s", name
);
3261 // if we can't open device, fail gracefully rather than hard --
3262 // perhaps the next time around we'll be able to open it
3263 if (!scsidev
->open()) {
3264 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", name
, scsidev
->get_errmsg());
3265 MailWarning(cfg
, state
, 9, "Device: %s, unable to open device", name
);
3267 } else if (debugmode
)
3268 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
3272 if (!state
.SuppressReport
) {
3273 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
3274 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
3275 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
3277 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART values", name
);
3278 state
.SuppressReport
= 1;
3282 cp
= scsiGetIEString(asc
, ascq
);
3284 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
3285 MailWarning(cfg
, state
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
3286 } else if (debugmode
)
3287 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3288 name
, (int)asc
, (int)ascq
);
3289 } else if (debugmode
)
3290 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
3292 // check temperature limits
3293 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
3294 CheckTemperature(cfg
, state
, currenttemp
, triptemp
);
3296 // check if number of selftest errors has increased (note: may also DECREASE)
3298 CheckSelfTestLogs(cfg
, state
, scsiCountFailedSelfTests(scsidev
, 0));
3300 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
3301 char testtype
= next_scheduled_test(cfg
, state
, true/*scsi*/);
3303 DoSCSISelfTest(cfg
, state
, scsidev
, testtype
);
3305 CloseDevice(scsidev
, name
);
3309 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3310 static int standby_disable_state
= 0;
3312 static void init_disable_standby_check(dev_config_vector
& configs
)
3314 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3315 bool sts1
= false, sts2
= false;
3316 for (unsigned i
= 0; i
< configs
.size() && !(sts1
|| sts2
); i
++) {
3317 const dev_config
& cfg
= configs
.at(i
);
3318 if (cfg
.offlinests_ns
)
3320 if (cfg
.selfteststs_ns
)
3324 // Check for support of disable auto standby
3325 // Reenable standby if smartd.conf was reread
3326 if (sts1
|| sts2
|| standby_disable_state
== 3) {
3327 if (!smi()->disable_system_auto_standby(false)) {
3328 if (standby_disable_state
== 3)
3329 PrintOut(LOG_CRIT
, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3331 PrintOut(LOG_INFO
, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3332 (sts1
? "-l offlinests,ns" : ""), (sts1
&& sts2
? " and " : ""), (sts2
? "-l selfteststs,ns" : ""));
3333 sts1
= sts2
= false;
3338 standby_disable_state
= (sts1
|| sts2
? 1 : 0);
3341 static void do_disable_standby_check(const dev_config_vector
& configs
, const dev_state_vector
& states
)
3343 if (!standby_disable_state
)
3346 // Check for just started or still running self-tests
3347 bool running
= false;
3348 for (unsigned i
= 0; i
< configs
.size() && !running
; i
++) {
3349 const dev_config
& cfg
= configs
.at(i
); const dev_state
& state
= states
.at(i
);
3351 if ( ( cfg
.offlinests_ns
3352 && (state
.offline_started
||
3353 is_offl_coll_in_progress(state
.smartval
.offline_data_collection_status
)))
3354 || ( cfg
.selfteststs_ns
3355 && (state
.selftest_started
||
3356 is_self_test_in_progress(state
.smartval
.self_test_exec_status
))) )
3358 // state.offline/selftest_started will be reset after next logging of test status
3361 // Disable/enable auto standby and log state changes
3363 if (standby_disable_state
!= 1) {
3364 if (!smi()->disable_system_auto_standby(false))
3365 PrintOut(LOG_CRIT
, "Self-test(s) completed, system auto standby enable failed: %s\n",
3366 smi()->get_errmsg());
3368 PrintOut(LOG_INFO
, "Self-test(s) completed, system auto standby enabled\n");
3369 standby_disable_state
= 1;
3372 else if (!smi()->disable_system_auto_standby(true)) {
3373 if (standby_disable_state
!= 2) {
3374 PrintOut(LOG_INFO
, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3375 smi()->get_errmsg());
3376 standby_disable_state
= 2;
3380 if (standby_disable_state
!= 3) {
3381 PrintOut(LOG_INFO
, "Self-test(s) in progress, system auto standby disabled\n");
3382 standby_disable_state
= 3;
3387 // Checks the SMART status of all ATA and SCSI devices
3388 static void CheckDevicesOnce(const dev_config_vector
& configs
, dev_state_vector
& states
,
3389 smart_device_list
& devices
, bool firstpass
, bool allow_selftests
)
3391 for (unsigned i
= 0; i
< configs
.size(); i
++) {
3392 const dev_config
& cfg
= configs
.at(i
);
3393 dev_state
& state
= states
.at(i
);
3394 smart_device
* dev
= devices
.at(i
);
3396 ATACheckDevice(cfg
, state
, dev
->to_ata(), firstpass
, allow_selftests
);
3397 else if (dev
->is_scsi())
3398 SCSICheckDevice(cfg
, state
, dev
->to_scsi(), allow_selftests
);
3401 do_disable_standby_check(configs
, states
);
3404 // Set if Initialize() was called
3405 static bool is_initialized
= false;
3407 // Does initialization right after fork to daemon mode
3408 static void Initialize(time_t *wakeuptime
)
3410 // Call Goodbye() on exit
3411 is_initialized
= true;
3417 // install signal handlers. On Solaris, can't use signal() because
3418 // it resets the handler to SIG_DFL after each call. So use sigset()
3419 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3421 // normal and abnormal exit
3422 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
3423 SIGNALFN(SIGTERM
, SIG_IGN
);
3424 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
3425 SIGNALFN(SIGQUIT
, SIG_IGN
);
3427 // in debug mode, <CONTROL-C> ==> HUP
3428 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
3429 SIGNALFN(SIGINT
, SIG_IGN
);
3431 // Catch HUP and USR1
3432 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
3433 SIGNALFN(SIGHUP
, SIG_IGN
);
3434 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
3435 SIGNALFN(SIGUSR1
, SIG_IGN
);
3437 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
3438 SIGNALFN(SIGUSR2
, SIG_IGN
);
3441 // initialize wakeup time to CURRENT time
3442 *wakeuptime
=time(NULL
);
3448 // Toggle debug mode implemented for native windows only
3449 // (there is no easy way to reopen tty on *nix)
3450 static void ToggleDebugMode()
3453 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
3454 if (!daemon_enable_console("smartd [Debug]")) {
3456 daemon_signal(SIGINT
, HUPhandler
);
3457 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
3460 PrintOut(LOG_INFO
,"enable console failed\n");
3462 else if (debugmode
== 1) {
3463 daemon_disable_console();
3465 daemon_signal(SIGINT
, sighandler
);
3466 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
3469 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
3473 static time_t dosleep(time_t wakeuptime
, bool & sigwakeup
)
3475 // If past wake-up-time, compute next wake-up-time
3476 time_t timenow
=time(NULL
);
3477 while (wakeuptime
<=timenow
){
3478 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
3479 wakeuptime
+=intervals
*checktime
;
3482 // sleep until we catch SIGUSR1 or have completed sleeping
3484 while (timenow
< wakeuptime
+addtime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
) {
3486 // protect user again system clock being adjusted backwards
3487 if (wakeuptime
>timenow
+checktime
){
3488 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3489 wakeuptime
=timenow
+checktime
;
3492 // Exit sleep when time interval has expired or a signal is received
3493 sleep(wakeuptime
+addtime
-timenow
);
3496 // toggle debug mode?
3497 if (caughtsigUSR2
) {
3505 // Actual sleep time too long?
3506 if (!addtime
&& timenow
> wakeuptime
+60) {
3508 PrintOut(LOG_INFO
, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3509 (int)(timenow
-wakeuptime
));
3510 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3511 addtime
= timenow
-wakeuptime
+20;
3512 // Use next wake-up-time if close
3513 int nextcheck
= checktime
- addtime
% checktime
;
3514 if (nextcheck
<= 20)
3515 addtime
+= nextcheck
;
3519 // if we caught a SIGUSR1 then print message and clear signal
3521 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3522 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
3527 // return adjusted wakeuptime
3531 // Print out a list of valid arguments for the Directive d
3532 static void printoutvaliddirectiveargs(int priority
, char d
)
3536 PrintOut(priority
, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3539 PrintOut(priority
, "valid_regular_expression");
3542 PrintOut(priority
, "%s", smi()->get_valid_dev_types_str().c_str());
3545 PrintOut(priority
, "normal, permissive");
3549 PrintOut(priority
, "on, off");
3552 PrintOut(priority
, "error, selftest");
3555 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3558 PrintOut(priority
, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3561 PrintOut(priority
, "use, ignore, show, showall");
3564 PrintOut(priority
, "none, samsung, samsung2, samsung3");
3566 PrintOut(priority
, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3567 "security-freeze, standby,[N|off], wcache,[on|off]");
3572 // exits with an error message, or returns integer value of token
3573 static int GetInteger(const char *arg
, const char *name
, const char *token
, int lineno
, const char *cfgfile
,
3574 int min
, int max
, char * suffix
= 0)
3576 // make sure argument is there
3578 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3579 cfgfile
, lineno
, name
, token
, min
, max
);
3583 // get argument value (base 10), check that it's integer, and in-range
3585 int val
= strtol(arg
,&endptr
,10);
3587 // optional suffix present?
3589 if (!strcmp(endptr
, suffix
))
3590 endptr
+= strlen(suffix
);
3595 if (!(!*endptr
&& min
<= val
&& val
<= max
)) {
3596 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3597 cfgfile
, lineno
, name
, token
, arg
, min
, max
);
3601 // all is well; return value
3606 // Get 1-3 small integer(s) for '-W' directive
3607 static int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *cfgfile
,
3608 unsigned char *val1
, unsigned char *val2
, unsigned char *val3
)
3610 unsigned v1
= 0, v2
= 0, v3
= 0;
3611 int n1
= -1, n2
= -1, n3
= -1, len
;
3613 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3614 cfgfile
, lineno
, name
, token
);
3619 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
3620 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
3621 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3622 cfgfile
, lineno
, name
, token
, arg
);
3625 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
3630 // This function returns 1 if it has correctly parsed one token (and
3631 // any arguments), else zero if no tokens remain. It returns -1 if an
3632 // error was encountered.
3633 static int ParseToken(char * token
, dev_config
& cfg
)
3636 const char * name
= cfg
.name
.c_str();
3637 int lineno
=cfg
.lineno
;
3638 const char *delim
= " \n\t";
3641 const char *arg
= 0;
3643 // is the rest of the line a comment
3647 // is the token not recognized?
3648 if (*token
!='-' || strlen(token
)!=2) {
3649 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3650 configfile
, lineno
, name
, token
);
3651 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
3655 // token we will be parsing:
3658 // parse the token and swallow its argument
3660 char plus
[] = "+", excl
[] = "!";
3664 // monitor current pending sector count (default 197)
3665 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
3667 cfg
.curr_pending_id
= (unsigned char)val
;
3668 cfg
.curr_pending_incr
= (*plus
== '+');
3669 cfg
.curr_pending_set
= true;
3672 // monitor offline uncorrectable sectors (default 198)
3673 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
3675 cfg
.offl_pending_id
= (unsigned char)val
;
3676 cfg
.offl_pending_incr
= (*plus
== '+');
3677 cfg
.offl_pending_set
= true;
3680 // Set tolerance level for SMART command failures
3681 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3683 } else if (!strcmp(arg
, "normal")) {
3684 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3685 // not on failure of an optional S.M.A.R.T. command.
3686 // This is the default so we don't need to actually do anything here.
3687 cfg
.permissive
= false;
3688 } else if (!strcmp(arg
, "permissive")) {
3689 // Permissive mode; ignore errors from Mandatory SMART commands
3690 cfg
.permissive
= true;
3696 // specify the device type
3697 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3699 } else if (!strcmp(arg
, "removable")) {
3700 cfg
.removable
= true;
3701 } else if (!strcmp(arg
, "auto")) {
3709 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3711 } else if (!strcmp(arg
, "none")) {
3712 cfg
.fix_firmwarebug
= FIX_NONE
;
3713 } else if (!strcmp(arg
, "samsung")) {
3714 cfg
.fix_firmwarebug
= FIX_SAMSUNG
;
3715 } else if (!strcmp(arg
, "samsung2")) {
3716 cfg
.fix_firmwarebug
= FIX_SAMSUNG2
;
3717 } else if (!strcmp(arg
, "samsung3")) {
3718 cfg
.fix_firmwarebug
= FIX_SAMSUNG3
;
3724 // check SMART status
3725 cfg
.smartcheck
= true;
3728 // check for failure of usage attributes
3729 cfg
.usagefailed
= true;
3732 // track changes in all vendor attributes
3737 // track changes in prefail vendor attributes
3741 // track changes in usage vendor attributes
3745 // track changes in SMART logs
3746 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3748 } else if (!strcmp(arg
, "selftest")) {
3749 // track changes in self-test log
3750 cfg
.selftest
= true;
3751 } else if (!strcmp(arg
, "error")) {
3752 // track changes in ATA error log
3753 cfg
.errorlog
= true;
3754 } else if (!strcmp(arg
, "xerror")) {
3755 // track changes in Extended Comprehensive SMART error log
3756 cfg
.xerrorlog
= true;
3757 } else if (!strcmp(arg
, "offlinests")) {
3758 // track changes in offline data collection status
3759 cfg
.offlinests
= true;
3760 } else if (!strcmp(arg
, "offlinests,ns")) {
3761 // track changes in offline data collection status, disable auto standby
3762 cfg
.offlinests
= cfg
.offlinests_ns
= true;
3763 } else if (!strcmp(arg
, "selfteststs")) {
3764 // track changes in self-test execution status
3765 cfg
.selfteststs
= true;
3766 } else if (!strcmp(arg
, "selfteststs,ns")) {
3767 // track changes in self-test execution status, disable auto standby
3768 cfg
.selfteststs
= cfg
.selfteststs_ns
= true;
3769 } else if (!strncmp(arg
, "scterc,", sizeof("scterc,")-1)) {
3770 // set SCT Error Recovery Control
3771 unsigned rt
= ~0, wt
= ~0; int nc
= -1;
3772 sscanf(arg
,"scterc,%u,%u%n", &rt
, &wt
, &nc
);
3773 if (nc
== (int)strlen(arg
) && rt
<= 999 && wt
<= 999) {
3774 cfg
.sct_erc_set
= true;
3775 cfg
.sct_erc_readtime
= rt
;
3776 cfg
.sct_erc_writetime
= wt
;
3785 // monitor everything
3786 cfg
.smartcheck
= true;
3788 cfg
.usagefailed
= true;
3790 cfg
.selftest
= true;
3791 cfg
.errorlog
= true;
3792 cfg
.selfteststs
= true;
3795 // automatic offline testing enable/disable
3796 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3798 } else if (!strcmp(arg
, "on")) {
3799 cfg
.autoofflinetest
= 2;
3800 } else if (!strcmp(arg
, "off")) {
3801 cfg
.autoofflinetest
= 1;
3807 // skip disk check if in idle or standby mode
3808 if (!(arg
= strtok(NULL
, delim
)))
3811 char *endptr
= NULL
;
3812 char *next
= strchr(const_cast<char*>(arg
), ',');
3814 cfg
.powerquiet
= false;
3815 cfg
.powerskipmax
= 0;
3817 if (next
!=NULL
) *next
='\0';
3818 if (!strcmp(arg
, "never"))
3820 else if (!strcmp(arg
, "sleep"))
3822 else if (!strcmp(arg
, "standby"))
3824 else if (!strcmp(arg
, "idle"))
3829 // if optional arguments are present
3830 if (!badarg
&& next
!=NULL
) {
3832 cfg
.powerskipmax
= strtol(next
, &endptr
, 10);
3834 cfg
.powerskipmax
= 0;
3836 next
= endptr
+ (*endptr
!= '\0');
3837 if (cfg
.powerskipmax
<= 0)
3840 if (*next
!= '\0') {
3841 if (!strcmp("q", next
))
3842 cfg
.powerquiet
= true;
3851 // automatic attribute autosave enable/disable
3852 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3854 } else if (!strcmp(arg
, "on")) {
3856 } else if (!strcmp(arg
, "off")) {
3863 // warn user, and delete any previously given -s REGEXP Directives
3864 if (!cfg
.test_regex
.empty()){
3865 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3866 configfile
, lineno
, name
, cfg
.test_regex
.get_pattern());
3867 cfg
.test_regex
= regular_expression();
3869 // check for missing argument
3870 if (!(arg
= strtok(NULL
, delim
))) {
3875 if (!cfg
.test_regex
.compile(arg
, REG_EXTENDED
)) {
3876 // not a valid regular expression!
3877 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3878 configfile
, lineno
, name
, arg
, cfg
.test_regex
.get_errmsg());
3882 // Do a bit of sanity checking and warn user if we think that
3883 // their regexp is "strange". User probably confused about shell
3884 // glob(3) syntax versus regular expression syntax regexp(7).
3885 if (arg
[(val
= strspn(arg
, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3886 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3887 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3890 // send email to address that follows
3891 if (!(arg
= strtok(NULL
,delim
)))
3894 if (!cfg
.emailaddress
.empty())
3895 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3896 configfile
, lineno
, name
, cfg
.emailaddress
.c_str());
3897 cfg
.emailaddress
= arg
;
3901 // email warning options
3902 if (!(arg
= strtok(NULL
, delim
)))
3904 else if (!strcmp(arg
, "once"))
3906 else if (!strcmp(arg
, "daily"))
3908 else if (!strcmp(arg
, "diminishing"))
3910 else if (!strcmp(arg
, "test"))
3912 else if (!strcmp(arg
, "exec")) {
3913 // Get the next argument (the command line)
3914 if (!(arg
= strtok(NULL
, delim
))) {
3915 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3916 configfile
, lineno
, name
, token
);
3919 // Free the last cmd line given if any, and copy new one
3920 if (!cfg
.emailcmdline
.empty())
3921 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3922 configfile
, lineno
, name
, cfg
.emailcmdline
.c_str());
3923 cfg
.emailcmdline
= arg
;
3929 // ignore failure of usage attribute
3930 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3932 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGN_FAILUSE
);
3935 // ignore attribute for tracking purposes
3936 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3938 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGNORE
);
3941 // print raw value when tracking
3942 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
3944 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
);
3945 if (*excl
== '!') // attribute change is critical
3946 cfg
.monitor_attr_flags
.set(val
, MONITOR_AS_CRIT
);
3949 // track changes in raw value (forces printing of raw value)
3950 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
3952 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
|MONITOR_RAW
);
3953 if (*excl
== '!') // raw value change is critical
3954 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_AS_CRIT
);
3957 // track Temperature
3958 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3959 &cfg
.tempdiff
, &cfg
.tempinfo
, &cfg
.tempcrit
))<0)
3963 // non-default vendor-specific attribute meaning
3964 if (!(arg
=strtok(NULL
,delim
))) {
3966 } else if (!parse_attribute_def(arg
, cfg
.attribute_defs
, PRIOR_USER
)) {
3971 // Define use of drive-specific presets.
3972 if (!(arg
= strtok(NULL
, delim
))) {
3974 } else if (!strcmp(arg
, "use")) {
3975 cfg
.ignorepresets
= false;
3976 } else if (!strcmp(arg
, "ignore")) {
3977 cfg
.ignorepresets
= true;
3978 } else if (!strcmp(arg
, "show")) {
3979 cfg
.showpresets
= true;
3980 } else if (!strcmp(arg
, "showall")) {
3988 // Various ATA settings
3989 if (!(arg
= strtok(NULL
, delim
))) {
3993 char arg2
[16+1]; unsigned val
;
3994 int n1
= -1, n2
= -1, n3
= -1, len
= strlen(arg
);
3995 if (sscanf(arg
, "%16[^,=]%n%*[,=]%n%u%n", arg2
, &n1
, &n2
, &val
, &n3
) >= 1
3996 && (n1
== len
|| n2
> 0)) {
3997 bool on
= (n2
> 0 && !strcmp(arg
+n2
, "on"));
3998 bool off
= (n2
> 0 && !strcmp(arg
+n2
, "off"));
4002 if (!strcmp(arg2
, "aam")) {
4005 else if (val
<= 254)
4006 cfg
.set_aam
= val
+ 1;
4010 else if (!strcmp(arg2
, "apm")) {
4013 else if (1 <= val
&& val
<= 254)
4014 cfg
.set_apm
= val
+ 1;
4018 else if (!strcmp(arg2
, "lookahead")) {
4020 cfg
.set_lookahead
= -1;
4022 cfg
.set_lookahead
= 1;
4026 else if (!strcmp(arg
, "security-freeze")) {
4027 cfg
.set_security_freeze
= true;
4029 else if (!strcmp(arg2
, "standby")) {
4031 cfg
.set_standby
= 0 + 1;
4032 else if (val
<= 255)
4033 cfg
.set_standby
= val
+ 1;
4037 else if (!strcmp(arg2
, "wcache")) {
4039 cfg
.set_wcache
= -1;
4054 // Directive not recognized
4055 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
4056 configfile
, lineno
, name
, token
);
4061 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4062 configfile
, lineno
, name
, token
);
4065 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4066 configfile
, lineno
, name
, token
, arg
);
4068 if (missingarg
|| badarg
) {
4069 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
4070 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
4071 PrintOut(LOG_CRIT
, "\n");
4078 // Scan directive for configuration file
4079 #define SCANDIRECTIVE "DEVICESCAN"
4081 // This is the routine that adds things to the conf_entries list.
4083 // Return values are:
4084 // 1: parsed a normal line
4085 // 0: found DEFAULT setting or comment or blank line
4086 // -1: found SCANDIRECTIVE line
4087 // -2: found an error
4089 // Note: this routine modifies *line from the caller!
4090 static int ParseConfigLine(dev_config_vector
& conf_entries
, dev_config
& default_conf
, int lineno
, /*const*/ char * line
)
4092 const char *delim
= " \n\t";
4094 // get first token: device name. If a comment, skip line
4095 const char * name
= strtok(line
, delim
);
4096 if (!name
|| *name
== '#')
4099 // Check device name for DEFAULT or DEVICESCAN
4101 if (!strcmp("DEFAULT", name
)) {
4103 // Restart with empty defaults
4104 default_conf
= dev_config();
4107 retval
= (!strcmp(SCANDIRECTIVE
, name
) ? -1 : 1);
4108 // Init new entry with current defaults
4109 conf_entries
.push_back(default_conf
);
4111 dev_config
& cfg
= (retval
? conf_entries
.back() : default_conf
);
4113 cfg
.name
= name
; // Later replaced by dev->get_info().info_name
4114 cfg
.dev_name
= name
; // If DEVICESCAN later replaced by get->dev_info().dev_name
4115 cfg
.lineno
= lineno
;
4117 // parse tokens one at a time from the file.
4118 while (char * token
= strtok(0, delim
)) {
4119 int rc
= ParseToken(token
, cfg
);
4121 // error found on the line
4128 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4131 // Don't perform checks below for DEFAULT entries
4135 // If NO monitoring directives are set, then set all of them.
4136 if (!( cfg
.smartcheck
|| cfg
.selftest
4137 || cfg
.errorlog
|| cfg
.xerrorlog
4138 || cfg
.offlinests
|| cfg
.selfteststs
4139 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
4140 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
4142 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4143 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4145 cfg
.smartcheck
= true;
4146 cfg
.usagefailed
= true;
4149 cfg
.selftest
= true;
4150 cfg
.errorlog
= true;
4151 cfg
.selfteststs
= true;
4154 // additional sanity check. Has user set -M options without -m?
4155 if (cfg
.emailaddress
.empty() && (!cfg
.emailcmdline
.empty() || cfg
.emailfreq
|| cfg
.emailtest
)){
4156 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4157 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4161 // has the user has set <nomailer>?
4162 if (cfg
.emailaddress
== "<nomailer>") {
4163 // check that -M exec is also set
4164 if (cfg
.emailcmdline
.empty()){
4165 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4166 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
4169 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
4170 cfg
.emailaddress
.clear();
4176 // Parses a configuration file. Return values are:
4177 // N=>0: found N entries
4178 // -1: syntax error in config file
4179 // -2: config file does not exist
4180 // -3: config file exists but cannot be read
4182 // In the case where the return value is 0, there are three
4184 // Empty configuration file ==> conf_entries.empty()
4185 // No configuration file ==> conf_entries[0].lineno == 0
4186 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4187 static int ParseConfigFile(dev_config_vector
& conf_entries
)
4189 // maximum line length in configuration file
4190 const int MAXLINELEN
= 256;
4191 // maximum length of a continued line in configuration file
4192 const int MAXCONTLINE
= 1023;
4195 // Open config file, if it exists and is not <stdin>
4196 if (!(configfile
== configfile_stdin
)) { // pointer comparison ok here
4197 if (!f
.open(configfile
,"r") && (errno
!=ENOENT
|| !configfile_alt
.empty())) {
4198 // file exists but we can't read it or it should exist due to '-c' option
4199 int ret
= (errno
!=ENOENT
? -3 : -2);
4200 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
4201 strerror(errno
),configfile
);
4205 else // read from stdin ('-c -' option)
4208 // Start with empty defaults
4209 dev_config default_conf
;
4211 // No configuration file found -- use fake one
4214 char fakeconfig
[] = SCANDIRECTIVE
" -a"; // TODO: Remove this hack, build cfg_entry.
4216 if (ParseConfigLine(conf_entries
, default_conf
, 0, fakeconfig
) != -1)
4217 throw std::logic_error("Internal error parsing "SCANDIRECTIVE
);
4222 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
4225 // configuration file exists
4226 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
4228 // parse config file line by line
4229 int lineno
= 1, cont
= 0, contlineno
= 0;
4230 char line
[MAXLINELEN
+2];
4231 char fullline
[MAXCONTLINE
+1];
4234 int len
=0,scandevice
;
4239 // make debugging simpler
4240 memset(line
,0,sizeof(line
));
4243 code
=fgets(line
, MAXLINELEN
+2, f
);
4245 // are we at the end of the file?
4248 scandevice
= ParseConfigLine(conf_entries
, default_conf
, contlineno
, fullline
);
4249 // See if we found a SCANDIRECTIVE directive
4252 // did we find a syntax error
4255 // the final line is part of a continuation line
4262 // input file line number
4265 // See if line is too long
4267 if (len
>MAXLINELEN
){
4269 if (line
[len
-1]=='\n')
4270 warn
="(including newline!) ";
4273 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4274 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
4278 // Ignore anything after comment symbol
4279 if ((comment
=strchr(line
,'#'))){
4284 // is the total line (made of all continuation lines) too long?
4285 if (cont
+len
>MAXCONTLINE
){
4286 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4287 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
4291 // copy string so far into fullline, and increment length
4292 strcpy(fullline
+cont
,line
);
4295 // is this a continuation line. If so, replace \ by space and look at next line
4296 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
4297 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
4301 // Not a continuation line. Parse it
4302 scandevice
= ParseConfigLine(conf_entries
, default_conf
, contlineno
, fullline
);
4304 // did we find a scandevice directive?
4307 // did we find a syntax error
4316 // note -- may be zero if syntax of file OK, but no valid entries!
4320 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4321 <LIST> is the list of valid arguments for option opt. */
4322 static void PrintValidArgs(char opt
)
4326 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
4327 if (!(s
= GetValidArgList(opt
)))
4328 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
4330 PrintOut(LOG_CRIT
, "%s", (char *)s
);
4331 PrintOut(LOG_CRIT
, " <=======\n");
4335 // Report error and exit if specified path is not absolute.
4336 static void check_abs_path(char option
, const std::string
& path
)
4338 if (path
.empty() || path
[0] == '/')
4343 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option
, path
.c_str());
4344 PrintOut(LOG_CRIT
, "Error: relative path names are not allowed\n\n");
4349 // Parses input line, prints usage message and
4350 // version/license/copyright messages
4351 static void ParseOpts(int argc
, char **argv
)
4353 // Init default configfile path
4355 configfile
= SMARTMONTOOLS_SYSCONFDIR
"/smartd.conf";
4357 static std::string configfile_str
= get_exe_dir() + "/smartd.conf";
4358 configfile
= configfile_str
.c_str();
4361 // Please update GetValidArgList() if you edit shortopts
4362 static const char shortopts
[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
4363 #ifdef HAVE_LIBCAP_NG
4367 // Please update GetValidArgList() if you edit longopts
4368 struct option longopts
[] = {
4369 { "configfile", required_argument
, 0, 'c' },
4370 { "logfacility", required_argument
, 0, 'l' },
4371 { "quit", required_argument
, 0, 'q' },
4372 { "debug", no_argument
, 0, 'd' },
4373 { "showdirectives", no_argument
, 0, 'D' },
4374 { "interval", required_argument
, 0, 'i' },
4376 { "no-fork", no_argument
, 0, 'n' },
4378 { "service", no_argument
, 0, 'n' },
4380 { "pidfile", required_argument
, 0, 'p' },
4381 { "report", required_argument
, 0, 'r' },
4382 { "savestates", required_argument
, 0, 's' },
4383 { "attributelog", required_argument
, 0, 'A' },
4384 { "drivedb", required_argument
, 0, 'B' },
4385 { "version", no_argument
, 0, 'V' },
4386 { "license", no_argument
, 0, 'V' },
4387 { "copyright", no_argument
, 0, 'V' },
4388 { "help", no_argument
, 0, 'h' },
4389 { "usage", no_argument
, 0, 'h' },
4390 #ifdef HAVE_LIBCAP_NG
4391 { "capabilities", no_argument
, 0, 'C' },
4397 bool badarg
= false;
4398 bool no_defaultdb
= false; // set true on '-B FILE'
4400 // Parse input options.
4402 while ((optchar
= getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)) != -1) {
4410 if (!(strcmp(optarg
,"nodev"))) {
4412 } else if (!(strcmp(optarg
,"nodevstartup"))) {
4414 } else if (!(strcmp(optarg
,"never"))) {
4416 } else if (!(strcmp(optarg
,"onecheck"))) {
4419 } else if (!(strcmp(optarg
,"showtests"))) {
4422 } else if (!(strcmp(optarg
,"errors"))) {
4429 // set the log facility level
4430 if (!strcmp(optarg
, "daemon"))
4431 facility
=LOG_DAEMON
;
4432 else if (!strcmp(optarg
, "local0"))
4433 facility
=LOG_LOCAL0
;
4434 else if (!strcmp(optarg
, "local1"))
4435 facility
=LOG_LOCAL1
;
4436 else if (!strcmp(optarg
, "local2"))
4437 facility
=LOG_LOCAL2
;
4438 else if (!strcmp(optarg
, "local3"))
4439 facility
=LOG_LOCAL3
;
4440 else if (!strcmp(optarg
, "local4"))
4441 facility
=LOG_LOCAL4
;
4442 else if (!strcmp(optarg
, "local5"))
4443 facility
=LOG_LOCAL5
;
4444 else if (!strcmp(optarg
, "local6"))
4445 facility
=LOG_LOCAL6
;
4446 else if (!strcmp(optarg
, "local7"))
4447 facility
=LOG_LOCAL7
;
4452 // enable debug mode
4457 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4462 // print summary of all valid directives
4468 // Period (time interval) for checking
4469 // strtol will set errno in the event of overflow, so we'll check it.
4471 lchecktime
= strtol(optarg
, &tailptr
, 10);
4472 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
4475 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
4476 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
4477 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4480 checktime
= (int)lchecktime
;
4483 // report IOCTL transactions
4488 // split_report_arg() may modify its first argument string, so use a
4489 // copy of optarg in case we want optarg for an error message.
4490 if (!(s
= strdup(optarg
))) {
4491 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
4494 if (split_report_arg(s
, &i
)) {
4496 } else if (i
<1 || i
>3) {
4499 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
4500 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4502 } else if (!strcmp(s
,"ioctl")) {
4503 ata_debugmode
= scsi_debugmode
= i
;
4504 } else if (!strcmp(s
,"ataioctl")) {
4506 } else if (!strcmp(s
,"scsiioctl")) {
4511 free(s
); // TODO: use std::string
4515 // alternate configuration file
4516 if (strcmp(optarg
,"-"))
4517 configfile
= (configfile_alt
= optarg
).c_str();
4518 else // read from stdin
4519 configfile
=configfile_stdin
;
4522 // output file with PID number
4526 // path prefix of persistent state file
4527 state_path_prefix
= optarg
;
4530 // path prefix of attribute log file
4531 attrlog_path_prefix
= optarg
;
4535 const char * path
= optarg
;
4536 if (*path
== '+' && path
[1])
4539 no_defaultdb
= true;
4540 unsigned char savedebug
= debugmode
; debugmode
= 1;
4541 if (!read_drive_database(path
))
4543 debugmode
= savedebug
;
4547 // print version and CVS info
4549 PrintOut(LOG_INFO
, "%s", format_version_info("smartd", true /*full*/).c_str());
4552 #ifdef HAVE_LIBCAP_NG
4554 // enable capabilities
4555 enable_capabilities
= true;
4559 // help: print summary of command-line options
4567 // unrecognized option
4570 // Point arg to the argument in which this option was found.
4571 arg
= argv
[optind
-1];
4572 // Check whether the option is a long option that doesn't map to -h.
4573 if (arg
[1] == '-' && optchar
!= 'h') {
4574 // Iff optopt holds a valid option then argument must be missing.
4575 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
4576 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
4577 PrintValidArgs(optopt
);
4579 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
4581 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
4585 // Iff optopt holds a valid option then argument must be missing.
4586 if (strchr(shortopts
, optopt
) != NULL
){
4587 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
4588 PrintValidArgs(optopt
);
4590 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
4592 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4599 // Check to see if option had an unrecognized or incorrect argument.
4603 // It would be nice to print the actual option name given by the user
4604 // here, but we just print the short form. Please fix this if you know
4605 // a clean way to do it.
4606 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
4607 PrintValidArgs(optchar
);
4608 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4613 // non-option arguments are not allowed
4614 if (argc
> optind
) {
4617 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
4618 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4622 // no pidfile in debug mode
4623 if (debugmode
&& !pid_file
.empty()) {
4626 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4627 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
.c_str());
4633 // absolute path names are required due to chdir('/') after fork().
4634 check_abs_path('p', pid_file
);
4635 check_abs_path('s', state_path_prefix
);
4636 check_abs_path('A', attrlog_path_prefix
);
4640 // Read or init drive database
4641 if (!no_defaultdb
) {
4642 unsigned char savedebug
= debugmode
; debugmode
= 1;
4643 if (!read_default_drive_databases())
4645 debugmode
= savedebug
;
4652 // Function we call if no configuration file was found or if the
4653 // SCANDIRECTIVE Directive was found. It makes entries for device
4654 // names returned by scan_smart_devices() in os_OSNAME.cpp
4655 static int MakeConfigEntries(const dev_config
& base_cfg
,
4656 dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
, const char * type
)
4658 // make list of devices
4659 smart_device_list devlist
;
4660 if (!smi()->scan_smart_devices(devlist
, (*type
? type
: 0)))
4661 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4663 // if no devices, or error constructing list, return
4664 if (devlist
.size() <= 0)
4667 // add empty device slots for existing config entries
4668 while (scanned_devs
.size() < conf_entries
.size())
4669 scanned_devs
.push_back((smart_device
*)0);
4671 // loop over entries to create
4672 for (unsigned i
= 0; i
< devlist
.size(); i
++) {
4673 // Move device pointer
4674 smart_device
* dev
= devlist
.release(i
);
4675 scanned_devs
.push_back(dev
);
4677 // Copy configuration, update device and type name
4678 conf_entries
.push_back(base_cfg
);
4679 dev_config
& cfg
= conf_entries
.back();
4680 cfg
.name
= dev
->get_info().info_name
;
4681 cfg
.dev_name
= dev
->get_info().dev_name
;
4682 cfg
.dev_type
= type
;
4685 return devlist
.size();
4688 static void CanNotRegister(const char *name
, const char *type
, int line
, bool scandirective
)
4690 if (!debugmode
&& scandirective
)
4693 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4694 "Unable to register %s device %s at line %d of file %s\n",
4695 type
, name
, line
, configfile
);
4697 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4702 // Returns negative value (see ParseConfigFile()) if config file
4703 // had errors, else number of entries which may be zero or positive.
4704 static int ReadOrMakeConfigEntries(dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
)
4706 // parse configuration file configfile (normally /etc/smartd.conf)
4707 int entries
= ParseConfigFile(conf_entries
);
4710 // There was an error reading the configuration file.
4711 conf_entries
.clear();
4713 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4717 // no error parsing config file.
4719 // we did not find a SCANDIRECTIVE and did find valid entries
4720 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4722 else if (!conf_entries
.empty()) {
4723 // we found a SCANDIRECTIVE or there was no configuration file so
4724 // scan. Configuration file's last entry contains all options
4726 dev_config first
= conf_entries
.back();
4727 conf_entries
.pop_back();
4730 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4732 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4734 // make config list of devices to search for
4735 MakeConfigEntries(first
, conf_entries
, scanned_devs
, first
.dev_type
.c_str());
4737 // warn user if scan table found no devices
4738 if (conf_entries
.empty())
4739 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4742 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4744 return conf_entries
.size();
4748 // This function tries devices from conf_entries. Each one that can be
4749 // registered is moved onto the [ata|scsi]devices lists and removed
4750 // from the conf_entries list.
4751 static void RegisterDevices(const dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
,
4752 dev_config_vector
& configs
, dev_state_vector
& states
, smart_device_list
& devices
)
4754 // start by clearing lists/memory of ALL existing devices
4760 for (unsigned i
= 0; i
< conf_entries
.size(); i
++){
4762 dev_config cfg
= conf_entries
[i
];
4764 // get device of appropriate type
4765 smart_device_auto_ptr dev
;
4766 bool scanning
= false;
4768 // Device may already be detected during devicescan
4769 if (i
< scanned_devs
.size()) {
4770 dev
= scanned_devs
.release(i
);
4776 dev
= smi()->get_smart_device(cfg
.name
.c_str(), cfg
.dev_type
.c_str());
4778 if (cfg
.dev_type
.empty())
4779 PrintOut(LOG_INFO
,"Device: %s, unable to autodetect device type\n", cfg
.name
.c_str());
4781 PrintOut(LOG_INFO
,"Device: %s, unsupported device type '%s'\n", cfg
.name
.c_str(), cfg
.dev_type
.c_str());
4787 smart_device::device_info oldinfo
= dev
->get_info();
4789 // Open with autodetect support, may return 'better' device
4790 dev
.replace( dev
->autodetect_open() );
4792 // Report if type has changed
4793 if (oldinfo
.dev_type
!= dev
->get_dev_type())
4794 PrintOut(LOG_INFO
,"Device: %s, type changed from '%s' to '%s'\n",
4795 cfg
.name
.c_str(), oldinfo
.dev_type
.c_str(), dev
->get_dev_type());
4797 if (!dev
->is_open()) {
4798 // For linux+devfs, a nonexistent device gives a strange error
4799 // message. This makes the error message a bit more sensible.
4800 // If no debug and scanning - don't print errors
4801 if (debugmode
|| !scanning
)
4802 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", dev
->get_info_name(), dev
->get_errmsg());
4806 // Update informal name
4807 cfg
.name
= dev
->get_info().info_name
;
4808 PrintOut(LOG_INFO
, "Device: %s, opened\n", cfg
.name
.c_str());
4810 // Prepare initial state
4813 // register ATA devices
4815 if (ATADeviceScan(cfg
, state
, dev
->to_ata())) {
4816 CanNotRegister(cfg
.name
.c_str(), "ATA", cfg
.lineno
, scanning
);
4820 // or register SCSI devices
4821 else if (dev
->is_scsi()){
4822 if (SCSIDeviceScan(cfg
, state
, dev
->to_scsi())) {
4823 CanNotRegister(cfg
.name
.c_str(), "SCSI", cfg
.lineno
, scanning
);
4828 PrintOut(LOG_INFO
, "Device: %s, neither ATA nor SCSI device\n", cfg
.name
.c_str());
4833 // move onto the list of devices
4834 configs
.push_back(cfg
);
4835 states
.push_back(state
);
4836 devices
.push_back(dev
);
4838 // if device is explictly listed and we can't register it, then
4839 // exit unless the user has specified that the device is removable
4840 else if (!scanning
) {
4841 if (cfg
.removable
|| quit
==2)
4842 PrintOut(LOG_INFO
, "Device %s not available\n", cfg
.name
.c_str());
4844 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg
.name
.c_str());
4850 init_disable_standby_check(configs
);
4854 // Main program without exception handling
4855 static int main_worker(int argc
, char **argv
)
4857 // Initialize interface
4858 smart_interface::init();
4862 // is it our first pass through?
4863 bool firstpass
= true;
4865 // next time to wake up
4866 time_t wakeuptime
= 0;
4868 // parse input and print header and usage info if needed
4869 ParseOpts(argc
,argv
);
4871 // Configuration for each device
4872 dev_config_vector configs
;
4874 dev_state_vector states
;
4875 // Devices to monitor
4876 smart_device_list devices
;
4878 bool write_states_always
= true;
4880 #ifdef HAVE_LIBCAP_NG
4881 // Drop capabilities
4882 if (enable_capabilities
) {
4883 capng_clear(CAPNG_SELECT_BOTH
);
4884 capng_updatev(CAPNG_ADD
, (capng_type_t
)(CAPNG_EFFECTIVE
|CAPNG_PERMITTED
),
4885 CAP_SYS_ADMIN
, CAP_MKNOD
, CAP_SYS_RAWIO
, -1);
4886 capng_apply(CAPNG_SELECT_BOTH
);
4890 // the main loop of the code
4893 // are we exiting from a signal?
4894 if (caughtsigEXIT
) {
4895 // are we exiting with SIGTERM?
4896 int isterm
=(caughtsigEXIT
==SIGTERM
);
4897 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4898 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4900 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4901 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4906 // Write state files
4907 if (!state_path_prefix
.empty())
4908 write_all_dev_states(configs
, states
);
4913 // Should we (re)read the config file?
4914 if (firstpass
|| caughtsigHUP
){
4916 // Write state files
4917 if (!state_path_prefix
.empty())
4918 write_all_dev_states(configs
, states
);
4922 "Signal HUP - rereading configuration file %s\n":
4923 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4928 dev_config_vector conf_entries
; // Entries read from smartd.conf
4929 smart_device_list scanned_devs
; // Devices found during scan
4930 // (re)reads config file, makes >=0 entries
4931 int entries
= ReadOrMakeConfigEntries(conf_entries
, scanned_devs
);
4934 // checks devices, then moves onto ata/scsi list or deallocates.
4935 RegisterDevices(conf_entries
, scanned_devs
, configs
, states
, devices
);
4936 if (!(configs
.size() == devices
.size() && configs
.size() == states
.size()))
4937 throw std::logic_error("Invalid result from RegisterDevices");
4939 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4940 // user has asked to continue on error in configuration file
4942 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4945 // exit with configuration file error status
4946 return (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4950 // Log number of devices we are monitoring...
4951 if (devices
.size() > 0 || quit
==2 || (quit
==1 && !firstpass
)) {
4953 for (unsigned i
= 0; i
< devices
.size(); i
++) {
4954 if (devices
.at(i
)->is_ata())
4957 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4958 numata
, devices
.size() - numata
);
4961 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4966 // user has asked to print test schedule
4967 PrintTestSchedule(configs
, states
, devices
);
4971 #ifdef HAVE_LIBCAP_NG
4972 if (enable_capabilities
) {
4973 for (unsigned i
= 0; i
< configs
.size(); i
++) {
4974 if (!configs
[i
].emailaddress
.empty() || !configs
[i
].emailcmdline
.empty()) {
4975 PrintOut(LOG_WARNING
, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4985 // Always write state files after (re)configuration
4986 write_states_always
= true;
4989 // check all devices once,
4990 // self tests are not started in first pass unless '-q onecheck' is specified
4991 CheckDevicesOnce(configs
, states
, devices
, firstpass
, (!firstpass
|| quit
==3));
4993 // Write state files
4994 if (!state_path_prefix
.empty())
4995 write_all_dev_states(configs
, states
, write_states_always
);
4996 write_states_always
= false;
4998 // Write attribute logs
4999 if (!attrlog_path_prefix
.empty())
5000 write_all_dev_attrlogs(configs
, states
);
5002 // user has asked us to exit after first check
5004 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5005 "smartd is exiting (exit status 0)\n");
5009 // fork into background if needed
5010 if (firstpass
&& !debugmode
) {
5014 // set exit and signal handlers, write PID file, set wake-up time
5016 Initialize(&wakeuptime
);
5020 // sleep until next check time, or a signal arrives
5021 wakeuptime
= dosleep(wakeuptime
, write_states_always
);
5028 int main(int argc
, char **argv
)
5030 // Windows: internal main function started direct or by service control manager
5031 static int smartd_main(int argc
, char **argv
)
5036 // Do the real work ...
5037 status
= main_worker(argc
, argv
);
5040 // EXIT(status) arrives here
5043 catch (const std::bad_alloc
& /*ex*/) {
5044 // Memory allocation failed (also thrown by std::operator new)
5045 PrintOut(LOG_CRIT
, "Smartd: Out of memory\n");
5046 status
= EXIT_NOMEM
;
5048 catch (const std::exception
& ex
) {
5049 // Other fatal errors
5050 PrintOut(LOG_CRIT
, "Smartd: Exception: %s\n", ex
.what());
5051 status
= EXIT_BADCODE
;
5055 status
= Goodbye(status
);
5058 daemon_winsvc_exitcode
= status
;
5065 // Main function for Windows
5066 int main(int argc
, char **argv
){
5067 // Options for smartd windows service
5068 static const daemon_winsvc_options svc_opts
= {
5069 "--service", // cmd_opt
5070 "smartd", "SmartD Service", // servicename, displayname
5072 "Controls and monitors storage devices using the Self-Monitoring, "
5073 "Analysis and Reporting Technology System (S.M.A.R.T.) "
5074 "built into ATA and SCSI Hard Drives. "
5077 // daemon_main() handles daemon and service specific commands
5078 // and starts smartd_main() direct, from a new process,
5079 // or via service control manager
5080 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);