2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-10 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-10 Christian Franke <smartmontools-support@lists.sourceforge.net>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
25 // TODO: Why is this define necessary?
29 // unconditionally included files
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
47 #include <algorithm> // std::replace()
49 // see which system files to conditionally include
52 // conditionally included files
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t
;
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall
FreeConsole(void);
77 #include <io.h> // setmode()
84 // locally included files
87 #include "dev_interface.h"
89 #include "knowndrives.h"
93 // This is for solaris, where signal() resets the handler to SIG_DFL
94 // after the first signal is caught.
96 #define SIGNALFN sigset
98 #define SIGNALFN signal
102 #include "hostname_win32.h" // gethost/domainname()
103 #define HAVE_GETHOSTNAME 1
104 #define HAVE_GETDOMAINNAME 1
105 // fork()/signal()/initd simulation for native Windows
106 #include "daemon_win32.h" // daemon_main/detach/signal()
108 #define SIGNALFN daemon_signal
109 #define strsignal daemon_strsignal
110 #define sleep daemon_sleep
111 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
112 #define SIGQUIT SIGBREAK
113 #define SIGQUIT_KEYNAME "CONTROL-Break"
116 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
117 #define SIGQUIT_KEYNAME "2x CONTROL-C"
119 #define SIGQUIT_KEYNAME "CONTROL-\\"
123 #if defined (__SVR4) && defined (__sun)
124 extern "C" int getdomainname(char *, int); // no declaration in header files!
127 #define ARGUSED(x) ((void)(x))
129 const char * smartd_cpp_cvsid
= "$Id: smartd.cpp 3075 2010-03-12 22:01:44Z chrfranke $"
130 CONFIG_H_CVSID EXTERN_H_CVSID
;
132 extern const char *reportbug
;
134 extern unsigned char debugmode
;
137 #define EXIT_BADCMD 1 // command line did not parse
138 #define EXIT_BADCONF 2 // syntax error in config file
139 #define EXIT_STARTUP 3 // problem forking daemon
140 #define EXIT_PID 4 // problem creating pid file
141 #define EXIT_NOCONF 5 // config file does not exist
142 #define EXIT_READCONF 6 // config file exists but cannot be read
144 #define EXIT_NOMEM 8 // out of memory
145 #define EXIT_BADCODE 10 // internal error - should NEVER happen
147 #define EXIT_BADDEV 16 // we can't monitor this device
148 #define EXIT_NODEV 17 // no devices to monitor
150 #define EXIT_SIGNAL 254 // abort on signal
152 // command-line: how long to sleep between checks
153 #define CHECKTIME 1800
154 static int checktime
=CHECKTIME
;
156 // command-line: name of PID file (empty for no pid file)
157 static std::string pid_file
;
159 // command-line: path prefix of persistent state file, empty if no persistence.
160 static std::string state_path_prefix
161 #ifdef SMARTMONTOOLS_SAVESTATES
162 = SMARTMONTOOLS_SAVESTATES
166 // command-line: path prefix of attribute log file, empty if no logs.
167 static std::string attrlog_path_prefix
168 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
169 = SMARTMONTOOLS_ATTRIBUTELOG
173 // configuration file name
174 #define CONFIGFILENAME "smartd.conf"
177 static const char *configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
179 static const char *configfile
= "./" CONFIGFILENAME
;
181 // configuration file "name" if read from stdin
182 static const char * const configfile_stdin
= "<stdin>";
183 // path of alternate configuration file
184 static std::string configfile_alt
;
186 // command-line: when should we exit?
189 // command-line; this is the default syslog(3) log facility to use.
190 static int facility
=LOG_DAEMON
;
193 // command-line: fork into background?
194 static bool do_fork
=true;
197 #ifdef HAVE_LIBCAP_NG
198 // command-line: enable capabilities?
199 static bool enable_capabilities
= false;
202 // used for control of printing, passing arguments to atacmds.c
203 smartmonctrl
*con
=NULL
;
205 // set to one if we catch a USR1 (check devices now)
206 volatile int caughtsigUSR1
=0;
209 // set to one if we catch a USR2 (toggle debug mode)
210 volatile int caughtsigUSR2
=0;
213 // set to one if we catch a HUP (reload config file). In debug mode,
214 // set to two, if we catch INT (also reload config file).
215 volatile int caughtsigHUP
=0;
217 // set to signal value if we catch INT, QUIT, or TERM
218 volatile int caughtsigEXIT
=0;
220 // Attribute monitoring flags.
221 // See monitor_attr_flags below.
223 MONITOR_IGN_FAILUSE
= 0x01,
224 MONITOR_IGNORE
= 0x02,
225 MONITOR_RAW_PRINT
= 0x04,
227 MONITOR_AS_CRIT
= 0x10,
228 MONITOR_RAW_AS_CRIT
= 0x20,
231 // Array of flags for each attribute.
232 class attribute_flags
236 { memset(m_flags
, 0, sizeof(m_flags
)); }
238 bool is_set(int id
, unsigned char flag
) const
239 { return (0 < id
&& id
< (int)sizeof(m_flags
) && (m_flags
[id
] & flag
)); }
241 void set(int id
, unsigned char flags
)
243 if (0 < id
&& id
< (int)sizeof(m_flags
))
244 m_flags
[id
] |= flags
;
248 unsigned char m_flags
[256];
252 /// Configuration data for a device. Read from smartd.conf.
253 /// Supports copy & assignment and is compatible with STL containers.
256 int lineno
; // Line number of entry in file
257 std::string name
; // Device name
258 std::string dev_type
; // Device type argument from -d directive, empty if none
259 std::string state_file
; // Path of the persistent state file, empty if none
260 std::string attrlog_file
; // Path of the persistent attrlog file, empty if none
261 bool smartcheck
; // Check SMART status
262 bool usagefailed
; // Check for failed Usage Attributes
263 bool prefail
; // Track changes in Prefail Attributes
264 bool usage
; // Track changes in Usage Attributes
265 bool selftest
; // Monitor number of selftest errors
266 bool errorlog
; // Monitor number of ATA errors
267 bool permissive
; // Ignore failed SMART commands
268 char autosave
; // 1=disable, 2=enable Autosave Attributes
269 char autoofflinetest
; // 1=disable, 2=enable Auto Offline Test
270 unsigned char fix_firmwarebug
; // FIX_*, see atacmds.h
271 bool ignorepresets
; // Ignore database of -v options
272 bool showpresets
; // Show database entry for this device
273 bool removable
; // Device may disappear (not be present)
274 char powermode
; // skip check, if disk in idle or standby mode
275 bool powerquiet
; // skip powermode 'skipping checks' message
276 int powerskipmax
; // how many times can be check skipped
277 unsigned char tempdiff
; // Track Temperature changes >= this limit
278 unsigned char tempinfo
, tempcrit
; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
279 regular_expression test_regex
; // Regex for scheduled testing
281 // Configuration of email warning messages
282 std::string emailcmdline
; // script to execute, empty if no messages
283 std::string emailaddress
; // email address, or empty
284 unsigned char emailfreq
; // Emails once (1) daily (2) diminishing (3)
285 bool emailtest
; // Send test email?
288 unsigned char curr_pending_id
; // ID of current pending sector count, 0 if none
289 unsigned char offl_pending_id
; // ID of offline uncorrectable sector count, 0 if none
290 bool curr_pending_incr
, offl_pending_incr
; // True if current/offline pending values increase
291 bool curr_pending_set
, offl_pending_set
; // True if '-C', '-U' set in smartd.conf
293 attribute_flags monitor_attr_flags
; // MONITOR_* flags for each attribute
295 ata_vendor_attr_defs attribute_defs
; // -v options
300 dev_config::dev_config()
311 fix_firmwarebug(FIX_NOTSPECIFIED
),
312 ignorepresets(false),
319 tempinfo(0), tempcrit(0),
322 curr_pending_id(0), offl_pending_id(0),
323 curr_pending_incr(false), offl_pending_incr(false),
324 curr_pending_set(false), offl_pending_set(false)
329 // Number of allowed mail message types
330 const int SMARTD_NMAIL
= 13;
331 // Type for '-M test' mails (state not persistent)
332 const int MAILTYPE_TEST
= 0;
333 // TODO: Add const or enum for all mail types.
336 int logged
;// number of times an email has been sent
337 time_t firstsent
;// time first email was sent, as defined by time(2)
338 time_t lastsent
; // time last email was sent, as defined by time(2)
341 : logged(0), firstsent(0), lastsent(0) { }
344 /// Persistent state data for a device.
345 struct persistent_dev_state
347 unsigned char tempmin
, tempmax
; // Min/Max Temperatures
349 unsigned char selflogcount
; // total number of self-test errors
350 unsigned short selfloghour
; // lifetime hours of last self-test error
352 time_t scheduled_test_next_check
; // Time of next check for scheduled self-tests
354 mailinfo maillog
[SMARTD_NMAIL
]; // log info on when mail sent
357 int ataerrorcount
; // Total number of ATA errors
359 // Persistent part of ata_smart_values:
360 struct ata_attribute
{
363 unsigned char worst
; // Byte needed for 'raw64' attribute only.
366 ata_attribute() : id(0), val(0), worst(0), raw(0) { }
368 ata_attribute ata_attributes
[NUMBER_ATA_SMART_ATTRIBUTES
];
370 persistent_dev_state();
373 persistent_dev_state::persistent_dev_state()
374 : tempmin(0), tempmax(0),
377 scheduled_test_next_check(0),
382 /// Non-persistent state data for a device.
383 struct temp_dev_state
385 bool must_write
; // true if persistent part should be written
387 bool not_cap_offline
; // true == not capable of offline testing
388 bool not_cap_conveyance
;
391 bool not_cap_selective
;
393 unsigned char temperature
; // last recorded Temperature (in Celsius)
394 time_t tempmin_delay
; // time where Min Temperature tracking will start
396 bool powermodefail
; // true if power mode check failed
397 int powerskipcnt
; // Number of checks skipped due to idle or standby mode
400 unsigned char SmartPageSupported
; // has log sense IE page (0x2f)
401 unsigned char TempPageSupported
; // has log sense temperature page (0xd)
402 unsigned char SuppressReport
; // minimize nuisance reports
403 unsigned char modese_len
; // mode sense/select cmd len: 0 (don't
407 uint64_t num_sectors
; // Number of sectors (for selective self-test only)
408 ata_smart_values smartval
; // SMART data
409 ata_smart_thresholds_pvt smartthres
; // SMART thresholds
414 temp_dev_state::temp_dev_state()
416 not_cap_offline(false),
417 not_cap_conveyance(false),
418 not_cap_short(false),
420 not_cap_selective(false),
423 powermodefail(false),
425 SmartPageSupported(false),
426 TempPageSupported(false),
427 SuppressReport(false),
431 memset(&smartval
, 0, sizeof(smartval
));
432 memset(&smartthres
, 0, sizeof(smartthres
));
435 /// Runtime state data for a device.
437 : public persistent_dev_state
,
438 public temp_dev_state
440 void update_persistent_state();
441 void update_temp_state();
444 /// Container for configuration info for each device.
445 typedef std::vector
<dev_config
> dev_config_vector
;
447 /// Container for state info for each device.
448 typedef std::vector
<dev_state
> dev_state_vector
;
450 // Copy ATA attributes to persistent state.
451 void dev_state::update_persistent_state()
453 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
454 const ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
455 ata_attribute
& pa
= ata_attributes
[i
];
458 pa
.val
= pa
.worst
= 0; pa
.raw
= 0;
466 | ((uint64_t)ta
.raw
[3] << 24)
467 | ((uint64_t)ta
.raw
[4] << 32)
468 | ((uint64_t)ta
.raw
[5] << 40);
472 // Copy ATA from persistent to temp state.
473 void dev_state::update_temp_state()
475 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
476 const ata_attribute
& pa
= ata_attributes
[i
];
477 ata_smart_attribute
& ta
= smartval
.vendor_attributes
[i
];
480 ta
.current
= ta
.worst
= 0;
481 memset(ta
.raw
, 0, sizeof(ta
.raw
));
486 ta
.raw
[0] = (unsigned char) pa
.raw
;
487 ta
.raw
[1] = (unsigned char)(pa
.raw
>> 8);
488 ta
.raw
[2] = (unsigned char)(pa
.raw
>> 16);
489 ta
.raw
[3] = (unsigned char)(pa
.raw
>> 24);
490 ta
.raw
[4] = (unsigned char)(pa
.raw
>> 32);
491 ta
.raw
[5] = (unsigned char)(pa
.raw
>> 40);
495 // Parse a line from a state file.
496 static bool parse_dev_state_line(const char * line
, persistent_dev_state
& state
)
498 static regular_expression
regex(
500 "((temperature-min)" // (1 (2)
501 "|(temperature-max)" // (3)
502 "|(self-test-errors)" // (4)
503 "|(self-test-last-err-hour)" // (5)
504 "|(scheduled-test-next-check)" // (6)
505 "|(ata-error-count)" // (7)
506 "|(mail\\.([0-9]+)\\." // (8 (9)
507 "((count)" // (10 (11)
508 "|(first-sent-time)" // (12)
509 "|(last-sent-time)" // (13)
512 "|(ata-smart-attribute\\.([0-9]+)\\." // (14 (15)
520 " *= *([0-9]+)[ \n]*$", // (21)
524 throw std::logic_error("parse_dev_state_line: invalid regex");
526 const int nmatch
= 1+21;
527 regmatch_t match
[nmatch
];
528 if (!regex
.execute(line
, nmatch
, match
))
530 if (match
[nmatch
-1].rm_so
< 0)
533 uint64_t val
= strtoull(line
+ match
[nmatch
-1].rm_so
, (char **)0, 10);
536 if (match
[++m
].rm_so
>= 0)
537 state
.tempmin
= (unsigned char)val
;
538 else if (match
[++m
].rm_so
>= 0)
539 state
.tempmax
= (unsigned char)val
;
540 else if (match
[++m
].rm_so
>= 0)
541 state
.selflogcount
= (unsigned char)val
;
542 else if (match
[++m
].rm_so
>= 0)
543 state
.selfloghour
= (unsigned short)val
;
544 else if (match
[++m
].rm_so
>= 0)
545 state
.scheduled_test_next_check
= (time_t)val
;
546 else if (match
[++m
].rm_so
>= 0)
547 state
.ataerrorcount
= (int)val
;
548 else if (match
[m
+=2].rm_so
>= 0) {
549 int i
= atoi(line
+match
[m
].rm_so
);
550 if (!(0 <= i
&& i
< SMARTD_NMAIL
))
552 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
554 if (match
[m
+=2].rm_so
>= 0)
555 state
.maillog
[i
].logged
= (int)val
;
556 else if (match
[++m
].rm_so
>= 0)
557 state
.maillog
[i
].firstsent
= (time_t)val
;
558 else if (match
[++m
].rm_so
>= 0)
559 state
.maillog
[i
].lastsent
= (time_t)val
;
563 else if (match
[m
+=5+1].rm_so
>= 0) {
564 int i
= atoi(line
+match
[m
].rm_so
);
565 if (!(0 <= i
&& i
< NUMBER_ATA_SMART_ATTRIBUTES
))
567 if (match
[m
+=2].rm_so
>= 0)
568 state
.ata_attributes
[i
].id
= (unsigned char)val
;
569 else if (match
[++m
].rm_so
>= 0)
570 state
.ata_attributes
[i
].val
= (unsigned char)val
;
571 else if (match
[++m
].rm_so
>= 0)
572 state
.ata_attributes
[i
].worst
= (unsigned char)val
;
573 else if (match
[++m
].rm_so
>= 0)
574 state
.ata_attributes
[i
].raw
= val
;
583 // Read a state file.
584 static bool read_dev_state(const char * path
, persistent_dev_state
& state
)
586 stdio_file
f(path
, "r");
589 pout("Cannot read state file \"%s\"\n", path
);
593 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
596 int good
= 0, bad
= 0;
598 while (fgets(line
, sizeof(line
), f
)) {
599 const char * s
= line
+ strspn(line
, " \t");
600 if (!*s
|| *s
== '#')
602 if (!parse_dev_state_line(line
, state
))
610 pout("%s: format error\n", path
);
613 pout("%s: %d invalid line(s) ignored\n", path
, bad
);
618 static void write_dev_state_line(FILE * f
, const char * name
, uint64_t val
)
621 fprintf(f
, "%s = %"PRIu64
"\n", name
, val
);
624 static void write_dev_state_line(FILE * f
, const char * name1
, int id
, const char * name2
, uint64_t val
)
627 fprintf(f
, "%s.%d.%s = %"PRIu64
"\n", name1
, id
, name2
, val
);
630 // Write a state file
631 static bool write_dev_state(const char * path
, const persistent_dev_state
& state
)
633 // Rename old "file" to "file~"
634 std::string pathbak
= path
; pathbak
+= '~';
635 unlink(pathbak
.c_str());
636 rename(path
, pathbak
.c_str());
638 stdio_file
f(path
, "w");
640 pout("Cannot create state file \"%s\"\n", path
);
644 fprintf(f
, "# smartd state file\n");
645 write_dev_state_line(f
, "temperature-min", state
.tempmin
);
646 write_dev_state_line(f
, "temperature-max", state
.tempmax
);
647 write_dev_state_line(f
, "self-test-errors", state
.selflogcount
);
648 write_dev_state_line(f
, "self-test-last-err-hour", state
.selfloghour
);
649 write_dev_state_line(f
, "scheduled-test-next-check", state
.scheduled_test_next_check
);
652 for (i
= 0; i
< SMARTD_NMAIL
; i
++) {
653 if (i
== MAILTYPE_TEST
) // Don't suppress test mails
655 const mailinfo
& mi
= state
.maillog
[i
];
658 write_dev_state_line(f
, "mail", i
, "count", mi
.logged
);
659 write_dev_state_line(f
, "mail", i
, "first-sent-time", mi
.firstsent
);
660 write_dev_state_line(f
, "mail", i
, "last-sent-time", mi
.lastsent
);
664 write_dev_state_line(f
, "ata-error-count", state
.ataerrorcount
);
666 for (i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
667 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
670 write_dev_state_line(f
, "ata-smart-attribute", i
, "id", pa
.id
);
671 write_dev_state_line(f
, "ata-smart-attribute", i
, "val", pa
.val
);
672 write_dev_state_line(f
, "ata-smart-attribute", i
, "worst", pa
.worst
);
673 write_dev_state_line(f
, "ata-smart-attribute", i
, "raw", pa
.raw
);
679 // Write to the attrlog file
680 static bool write_dev_attrlog(const char * path
, const persistent_dev_state
& state
)
682 stdio_file
f(path
, "a");
684 pout("Cannot create attribute log file \"%s\"\n", path
);
689 time_t now
= time(0);
690 struct tm
* tms
= gmtime(&now
);
691 fprintf(f
, "%d-%02d-%02d %02d:%02d:%02d;",
692 1900+tms
->tm_year
, 1+tms
->tm_mon
, tms
->tm_mday
,
693 tms
->tm_hour
, tms
->tm_min
, tms
->tm_sec
);
694 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
695 const persistent_dev_state::ata_attribute
& pa
= state
.ata_attributes
[i
];
698 fprintf(f
, "\t%d;%d;%"PRIu64
";", pa
.id
, pa
.val
, pa
.raw
);
705 // Write all state files. If write_always is false, don't write
706 // unless must_write is set.
707 static void write_all_dev_states(const dev_config_vector
& configs
,
708 dev_state_vector
& states
,
709 bool write_always
= true)
711 for (unsigned i
= 0; i
< states
.size(); i
++) {
712 const dev_config
& cfg
= configs
.at(i
);
713 if (cfg
.state_file
.empty())
715 dev_state
& state
= states
[i
];
716 if (!write_always
&& !state
.must_write
)
718 if (!write_dev_state(cfg
.state_file
.c_str(), state
))
720 state
.must_write
= false;
721 if (write_always
|| debugmode
)
722 PrintOut(LOG_INFO
, "Device: %s, state written to %s\n",
723 cfg
.name
.c_str(), cfg
.state_file
.c_str());
727 // Write to all attrlog files
728 static void write_all_dev_attrlogs(const dev_config_vector
& configs
,
729 dev_state_vector
& states
)
731 for (unsigned i
= 0; i
< states
.size(); i
++) {
732 const dev_config
& cfg
= configs
.at(i
);
733 if (cfg
.attrlog_file
.empty())
735 dev_state
& state
= states
[i
];
736 write_dev_attrlog(cfg
.attrlog_file
.c_str(), state
);
740 // remove the PID file
741 void RemovePidFile(){
742 if (!pid_file
.empty()) {
743 if (unlink(pid_file
.c_str()))
744 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
745 pid_file
.c_str(), strerror(errno
));
751 extern "C" { // signal handlers require C-linkage
753 // Note if we catch a SIGUSR1
754 void USR1handler(int sig
){
761 // Note if we catch a SIGUSR2
762 void USR2handler(int sig
){
769 // Note if we catch a HUP (or INT in debug mode)
770 void HUPhandler(int sig
){
778 // signal handler for TERM, QUIT, and INT (if not in debug mode)
779 void sighandler(int sig
){
787 // Cleanup, print Goodbye message and remove pidfile
788 static int Goodbye(int status
)
790 // delete PID file, if one was created
793 // if we are exiting because of a code bug, tell user
794 if (status
==EXIT_BADCODE
)
795 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
797 // and this should be the final output from smartd before it exits
798 PrintOut(status
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", status
);
803 #define ENVLENGTH 1024
805 // a replacement for setenv() which is not available on all platforms.
806 // Note that the string passed to putenv must not be freed or made
807 // invalid, since a pointer to it is kept by putenv(). This means that
808 // it must either be a static buffer or allocated off the heap. The
809 // string can be freed if the environment variable is redefined or
810 // deleted via another call to putenv(). So we keep these on the stack
811 // as long as the popen() call is underway.
812 int exportenv(char* stackspace
, const char *name
, const char *value
){
813 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
814 return putenv(stackspace
);
817 char* dnsdomain(const char* hostname
) {
819 #ifdef HAVE_GETADDRINFO
820 static char canon_name
[NI_MAXHOST
];
821 struct addrinfo
*info
= NULL
;
822 struct addrinfo hints
;
825 memset(&hints
, 0, sizeof(hints
));
826 hints
.ai_flags
= AI_CANONNAME
;
827 if ((err
= getaddrinfo(hostname
, NULL
, &hints
, &info
)) || (!info
)) {
828 PrintOut(LOG_CRIT
, "Error retrieving getaddrinfo(%s): %s\n", hostname
, gai_strerror(err
));
831 if (info
->ai_canonname
) {
832 strncpy(canon_name
, info
->ai_canonname
, sizeof(canon_name
));
833 canon_name
[NI_MAXHOST
- 1] = '\0';
835 if ((p
= strchr(canon_name
, '.')))
839 #elif HAVE_GETHOSTBYNAME
841 if ((hp
= gethostbyname(hostname
))) {
842 // Does this work if gethostbyname() returns an IPv6 name in
843 // colon/dot notation? [BA]
844 if ((p
= strchr(hp
->h_name
, '.')))
855 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...)
856 __attribute__ ((format (printf
, 4, 5)));
858 // If either address or executable path is non-null then send and log
859 // a warning email, or execute executable
860 static void MailWarning(const dev_config
& cfg
, dev_state
& state
, int which
, const char *fmt
, ...){
861 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
862 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
863 char environ_strings
[11][ENVLENGTH
];
866 const int day
=24*3600;
868 const char * const whichfail
[]={
874 "FailedHealthCheck", // 5
875 "FailedReadSmartData", // 6
876 "FailedReadSmartErrorLog", // 7
877 "FailedReadSmartSelfTestLog", // 8
878 "FailedOpenDevice", // 9
879 "CurrentPendingSector", // 10
880 "OfflineUncorrectableSector", // 11
884 const char *unknown
="[Unknown]";
886 // See if user wants us to send mail
887 if (cfg
.emailaddress
.empty() && cfg
.emailcmdline
.empty())
890 std::string address
= cfg
.emailaddress
;
891 const char * executable
= cfg
.emailcmdline
.c_str();
893 // which type of mail are we sending?
894 mailinfo
* mail
=(state
.maillog
)+which
;
897 if (cfg
.emailfreq
<1 || cfg
.emailfreq
>3) {
898 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg
.emailfreq
);
901 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
902 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
903 which
, (int)sizeof(whichfail
));
907 // Return if a single warning mail has been sent.
908 if ((cfg
.emailfreq
==1) && mail
->logged
)
911 // Return if this is an email test and one has already been sent.
912 if (which
== 0 && mail
->logged
)
915 // To decide if to send mail, we need to know what time it is.
918 // Return if less than one day has gone by
919 if (cfg
.emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
922 // Return if less than 2^(logged-1) days have gone by
923 if (cfg
.emailfreq
==3 && mail
->logged
) {
924 days
=0x01<<(mail
->logged
-1);
926 if (epoch
<(mail
->lastsent
+days
))
930 #ifdef HAVE_LIBCAP_NG
931 if (enable_capabilities
) {
932 PrintOut(LOG_ERR
, "Sending a mail was supressed. "
933 "Mails can't be send when capabilites are enabled\n");
938 // record the time of this mail message, and the first mail message
940 mail
->firstsent
=epoch
;
941 mail
->lastsent
=epoch
;
943 // get system host & domain names (not null terminated if length=MAX)
944 #ifdef HAVE_GETHOSTNAME
945 if (gethostname(hostname
, 256))
946 strcpy(hostname
, unknown
);
950 p
= dnsdomain(hostname
);
952 strncpy(domainname
, p
, 255);
953 domainname
[255]='\0';
955 strcpy(domainname
, unknown
);
958 strcpy(hostname
, unknown
);
959 strcpy(domainname
, unknown
);
962 #ifdef HAVE_GETDOMAINNAME
963 if (getdomainname(nisdomain
, 256))
964 strcpy(nisdomain
, unknown
);
968 strcpy(nisdomain
, unknown
);
971 // print warning string into message
973 vsnprintf(message
, 256, fmt
, ap
);
976 // appropriate message about further information
977 additional
[0]=original
[0]=further
[0]='\0';
979 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
981 switch (cfg
.emailfreq
) {
983 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
986 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
989 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
990 (0x01)<<mail
->logged
);
993 if (cfg
.emailfreq
>1 && mail
->logged
) {
994 dateandtimezoneepoch(dates
, mail
->firstsent
);
995 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
999 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
1001 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1003 #ifdef DEFAULT_MAILER
1004 executable
= DEFAULT_MAILER
;
1007 executable
= "mail";
1009 executable
= "blat"; // http://blat.sourceforge.net/
1013 #ifndef _WIN32 // blat mailer needs comma
1014 // replace commas by spaces to separate recipients
1015 std::replace(address
.begin(), address
.end(), ',', ' ');
1017 // Export information in environment variables that will be useful
1019 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
1020 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
1021 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
1022 dateandtimezoneepoch(dates
, mail
->firstsent
);
1023 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
1024 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
1025 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
1026 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
1027 if (!address
.empty())
1028 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
.c_str());
1029 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
.name
.c_str());
1031 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", cfg
.dev_type
.c_str());
1032 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
.name
.c_str());
1034 snprintf(fullmessage
, 1024,
1035 "This email was generated by the smartd daemon running on:\n\n"
1038 " NIS domain: %s\n\n"
1039 "The following warning/error was logged by the smartd daemon:\n\n"
1041 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
1043 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
1044 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
1046 // now construct a command to send this as EMAIL
1048 if (!address
.empty())
1049 snprintf(command
, 2048,
1050 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1051 "%sENDMAIL\n", subject
, address
.c_str(), fullmessage
);
1053 snprintf(command
, 2048, "%s 2>&1", executable
);
1055 // tell SYSLOG what we are about to do...
1056 const char * newadd
= (!address
.empty()? address
.c_str() : "<nomailer>");
1057 const char * newwarn
= (which
? "Warning via" : "Test of");
1059 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
1060 which
?"Sending warning via":"Executing test of", executable
, newadd
);
1062 // issue the command to send mail or to run the user's executable
1065 if (!(pfp
=popen(command
, "r")))
1066 // failed to popen() mail process
1067 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1068 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
1072 char buffer
[EBUFLEN
];
1074 // if unexpected output on stdout/stderr, null terminate, print, and flush
1075 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
1077 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
1078 buffer
[newlen
]='\0';
1079 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1080 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
1082 // flush pipe if needed
1083 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
1086 // tell user that pipe was flushed, or that something is really wrong
1087 if (count
&& count
<EBUFLEN
)
1088 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1089 newwarn
, executable
, newadd
);
1091 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1092 newwarn
, executable
, newadd
);
1095 // if something went wrong with mail process, print warning
1097 if (-1==(status
=pclose(pfp
)))
1098 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
1099 errno
?strerror(errno
):"");
1101 // mail process apparently succeeded. Check and report exit status
1104 if (WIFEXITED(status
)) {
1105 // exited 'normally' (but perhaps with nonzero status)
1106 status8
=WEXITSTATUS(status
);
1109 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1110 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
1112 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1113 newwarn
, executable
, newadd
, status
, status8
);
1115 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
1118 if (WIFSIGNALED(status
))
1119 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1120 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
1122 // this branch is probably not possible. If subprocess is
1123 // stopped then pclose() should not return.
1124 if (WIFSTOPPED(status
))
1125 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1126 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
1133 // No "here-documents" on Windows, so must use separate commandline and stdin
1134 char stdinbuf
[1024];
1135 command
[0] = stdinbuf
[0] = 0;
1136 int boxtype
= -1, boxmsgoffs
= 0;
1137 const char * newadd
= "<nomailer>";
1138 if (!address
.empty()) {
1139 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1140 char addr1
[9+1+13] = ""; int n1
= -1, n2
= -1;
1141 if (sscanf(address
.c_str(), "%9[a-z]%n,%n", addr1
, &n1
, &n2
) == 1 && (n1
== (int)address
.size() || n2
> 0)) {
1142 if (!strcmp(addr1
, "msgbox"))
1144 else if (!strcmp(addr1
, "sysmsgbox"))
1147 address
.erase(0, (n2
> n1
? n2
: n1
));
1150 if (!address
.empty()) {
1151 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1152 snprintf(command
, sizeof(command
),
1153 "%s - -q -subject \"%s\" -to \"%s\"",
1154 executable
, subject
, address
.c_str());
1155 newadd
= address
.c_str();
1159 _set_printf_count_output(1); // "%n" disabled by default
1161 // Message for mail [0...] and messagebox [boxmsgoffs...]
1162 snprintf(stdinbuf
, sizeof(stdinbuf
),
1163 "This email was generated by the smartd daemon running on:\n\n"
1166 // " NIS domain: %s\n"
1168 "The following warning/error was logged by the smartd daemon:\n\n"
1170 "For details see the event log or log file of smartd.\n\n"
1173 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
1176 snprintf(command
, sizeof(command
), "%s", executable
);
1178 const char * newwarn
= (which
? "Warning via" : "Test of");
1181 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
1182 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
1185 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
1188 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
1189 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
1190 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
1191 if (rc
>= 0 && stdoutbuf
[0])
1192 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1193 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
1195 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
1196 newwarn
, executable
, newadd
, rc
);
1198 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
1203 // increment mail sent counter
1207 // Printing function for watching ataprint commands, or losing them
1208 // [From GLIBC Manual: Since the prototype doesn't specify types for
1209 // optional arguments, in a call to a variadic function the default
1210 // argument promotions are performed on the optional argument
1211 // values. This means the objects of type char or short int (whether
1212 // signed or not) are promoted to either int or unsigned int, as
1214 void pout(const char *fmt
, ...){
1217 // get the correct time in syslog()
1218 FixGlibcTimeZoneBug();
1219 // initialize variable argument list
1221 // in debug==1 mode we will print the output from the ataprint.o functions!
1222 if (debugmode
&& debugmode
!=2)
1224 if (facility
== LOG_LOCAL1
) // logging to stdout
1225 vfprintf(stderr
,fmt
,ap
);
1229 // in debug==2 mode we print output from knowndrives.o functions
1230 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
/*|| con->controller_port???*/) {
1231 openlog("smartd", LOG_PID
, facility
);
1232 vsyslog(LOG_INFO
, fmt
, ap
);
1240 // This function prints either to stdout or to the syslog as needed.
1241 // This function is also used by utility.cpp to report LOG_CRIT errors.
1242 void PrintOut(int priority
, const char *fmt
, ...){
1245 // get the correct time in syslog()
1246 FixGlibcTimeZoneBug();
1247 // initialize variable argument list
1251 if (facility
== LOG_LOCAL1
) // logging to stdout
1252 vfprintf(stderr
,fmt
,ap
);
1257 openlog("smartd", LOG_PID
, facility
);
1258 vsyslog(priority
,fmt
,ap
);
1265 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1266 void checksumwarning(const char * string
)
1268 pout("Warning! %s error: invalid SMART checksum.\n", string
);
1271 // Wait for the pid file to show up, this makes sure a calling program knows
1272 // that the daemon is really up and running and has a pid to kill it
1273 bool WaitForPidFile()
1275 int waited
, max_wait
= 10;
1276 struct stat stat_buf
;
1278 if (pid_file
.empty() || debugmode
)
1281 for(waited
= 0; waited
< max_wait
; ++waited
) {
1282 if (!stat(pid_file
.c_str(), &stat_buf
)) {
1291 // Forks new process, closes ALL file descriptors, redirects stdin,
1292 // stdout, and stderr. Not quite daemon(). See
1293 // http://www.linuxjournal.com/article/2335
1294 // for a good description of why we do things this way.
1300 // flush all buffered streams. Else we might get two copies of open
1301 // streams since both parent and child get copies of the buffers.
1305 if ((pid
=fork()) < 0) {
1307 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1311 // we are the parent process, wait for pid file, then exit cleanly
1312 if(!WaitForPidFile()) {
1313 PrintOut(LOG_CRIT
,"PID file %s didn't show up!\n", pid_file
.c_str());
1319 // from here on, we are the child process.
1322 // Fork one more time to avoid any possibility of having terminals
1323 if ((pid
=fork()) < 0) {
1325 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1329 // we are the parent process -- exit cleanly
1332 // Now we are the child's child...
1335 // close any open file descriptors
1336 for (i
=getdtablesize();i
>=0;--i
)
1340 // Cygwin's setsid() does not detach the process from Windows console
1342 #endif // __CYGWIN__
1344 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1346 // redirect any IO attempts to /dev/null for stdin
1347 i
=open("/dev/null",O_RDWR
);
1350 NO_warn_unused_result(dup(i
));
1352 NO_warn_unused_result(dup(i
));
1355 NO_warn_unused_result(chdir("/"));
1358 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1362 // No fork() on native Win32
1363 // Detach this process from console
1365 if (daemon_detach("smartd")) {
1366 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1369 // stdin/out/err now closed if not redirected
1375 // create a PID file containing the current process id
1376 static void WritePidFile()
1378 if (!pid_file
.empty()) {
1379 pid_t pid
= getpid();
1382 old_umask
= umask(0077); // rwx------
1384 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1385 old_umask
= umask(0033); // rwxr--r--
1388 stdio_file
f(pid_file
.c_str(), "w");
1390 if (!(f
&& fprintf(f
, "%d\n", (int)pid
) > 0 && f
.close())) {
1391 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
.c_str());
1394 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
.c_str(), (int)pid
);
1398 // Prints header identifying version of code and home
1399 static void PrintHead()
1401 PrintOut(LOG_INFO
, "%s\n", format_version_info("smartd").c_str());
1404 // prints help info for configuration file Directives
1407 "Configuration file (%s) Directives (after device name):\n"
1408 " -d TYPE Set the device type: %s\n"
1409 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1410 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1411 " -S VAL Enable/disable attribute autosave (on/off)\n"
1412 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1413 " -H Monitor SMART Health Status, report if failed\n"
1414 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1415 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1416 " -f Monitor 'Usage' Attributes, report failures\n"
1417 " -m ADD Send email warning to address ADD\n"
1418 " -M TYPE Modify email warning behavior (see man page)\n"
1419 " -p Report changes in 'Prefailure' Attributes\n"
1420 " -u Report changes in 'Usage' Attributes\n"
1421 " -t Equivalent to -p and -u Directives\n"
1422 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1423 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1424 " -i ID Ignore Attribute ID for -f Directive\n"
1425 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1426 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1427 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1428 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1429 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1430 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1431 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1432 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1433 " # Comment: text after a hash sign is ignored\n"
1434 " \\ Line continuation character\n"
1435 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1436 "Use ID = 0 to turn off -C and/or -U Directives\n"
1437 "Example: /dev/hda -a\n",
1438 configfile
, smi()->get_valid_dev_types_str().c_str());
1442 /* Returns a pointer to a static string containing a formatted list of the valid
1443 arguments to the option opt or NULL on failure. */
1444 const char *GetValidArgList(char opt
) {
1448 return "<PATH_PREFIX>";
1450 return "<FILE_NAME>, -";
1452 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1454 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1456 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1459 return "<FILE_NAME>";
1461 return "<INTEGER_SECONDS>";
1467 /* prints help information for command syntax */
1469 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1470 PrintOut(LOG_INFO
," -A PREFIX, --attributelog=PREFIX\n");
1471 PrintOut(LOG_INFO
," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1472 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1473 PrintOut(LOG_INFO
," [default is "SMARTMONTOOLS_ATTRIBUTELOG
"MODEL-SERIAL.ata.csv]\n");
1475 PrintOut(LOG_INFO
,"\n");
1476 PrintOut(LOG_INFO
," -B [+]FILE, --drivedb=[+]FILE\n");
1477 PrintOut(LOG_INFO
," Read and replace [add] drive database from FILE\n");
1478 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1479 PrintOut(LOG_INFO
," [default is "SMARTMONTOOLS_DRIVEDBDIR
"/drivedb.h]\n");
1481 PrintOut(LOG_INFO
,"\n");
1482 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1483 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1484 #ifdef HAVE_LIBCAP_NG
1485 PrintOut(LOG_INFO
," -C, --capabilities\n");
1486 PrintOut(LOG_INFO
," Use capabilities (EXPERIMENTAL).\n"
1487 " Warning: Mail notification does not work when used.\n\n");
1489 PrintOut(LOG_INFO
," -d, --debug\n");
1490 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1491 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1492 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1493 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1494 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1495 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1496 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1497 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1499 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1501 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1504 PrintOut(LOG_INFO
," -n, --no-fork\n");
1505 PrintOut(LOG_INFO
," Do not fork into background\n\n");
1507 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1508 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1509 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1510 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1511 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1512 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1513 PrintOut(LOG_INFO
," -s PREFIX, --savestates=PREFIX\n");
1514 PrintOut(LOG_INFO
," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1515 #ifdef SMARTMONTOOLS_SAVESTATES
1516 PrintOut(LOG_INFO
," [default is "SMARTMONTOOLS_SAVESTATES
"MODEL-SERIAL.TYPE.state]\n");
1518 PrintOut(LOG_INFO
,"\n");
1520 PrintOut(LOG_INFO
," --service\n");
1521 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1522 PrintOut(LOG_INFO
," smartd install [options]\n");
1523 PrintOut(LOG_INFO
," Remove service with:\n");
1524 PrintOut(LOG_INFO
," smartd remove\n\n");
1526 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1527 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1530 static int CloseDevice(smart_device
* device
, const char * name
)
1532 if (!device
->close()){
1533 PrintOut(LOG_INFO
,"Device: %s, %s, close() failed\n", name
, device
->get_errmsg());
1536 // device sucessfully closed
1540 // return true if a char is not allowed in a state file name
1541 static bool not_allowed_in_filename(char c
)
1543 return !( ('0' <= c
&& c
<= '9')
1544 || ('A' <= c
&& c
<= 'Z')
1545 || ('a' <= c
&& c
<= 'z'));
1548 // returns <0 on failure
1549 static int ATAErrorCount(ata_device
* device
, const char * name
,
1550 unsigned char fix_firmwarebug
)
1552 struct ata_smart_errorlog log
;
1554 if (ataReadErrorLog(device
, &log
, fix_firmwarebug
)){
1555 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1559 // return current number of ATA errors
1560 return log
.error_log_pointer
?log
.ata_error_count
:0;
1563 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1564 // error count, and top bits are the power-on hours of the last error.
1565 static int SelfTestErrorCount(ata_device
* device
, const char * name
,
1566 unsigned char fix_firmwarebug
)
1568 struct ata_smart_selftestlog log
;
1570 if (ataReadSelfTestLog(device
, &log
, fix_firmwarebug
)){
1571 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1575 // return current number of self-test errors
1576 return ataPrintSmartSelfTestlog(&log
, false, fix_firmwarebug
);
1579 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1580 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1582 // Log self-test execution status
1583 static void log_self_test_exec_status(const char * name
, unsigned char status
)
1586 switch (status
>> 4) {
1587 case 0x0: msg
= "completed without error"; break;
1588 case 0x1: msg
= "was aborted by the host"; break;
1589 case 0x2: msg
= "was interrupted by the host with a reset"; break;
1590 case 0x3: msg
= "could not complete due to a fatal or unknown error"; break;
1591 case 0x4: msg
= "completed with error (unknown test element)"; break;
1592 case 0x5: msg
= "completed with error (electrical test element)"; break;
1593 case 0x6: msg
= "completed with error (servo/seek test element)"; break;
1594 case 0x7: msg
= "completed with error (read test element)"; break;
1595 case 0x8: msg
= "completed with error (handling damage?)"; break;
1600 PrintOut(((status
>> 4) >= 0x4 ? LOG_CRIT
: LOG_INFO
),
1601 "Device: %s, previous self-test %s\n", name
, msg
);
1602 else if ((status
>> 4) == 0xf)
1603 PrintOut(LOG_INFO
, "Device: %s, self-test in progress, %u0%% remaining\n",
1604 name
, status
& 0x0f);
1606 PrintOut(LOG_INFO
, "Device: %s, unknown self-test status 0x%02x\n",
1611 // TODO: Add '-F swapid' directive
1612 const bool fix_swapped_id
= false;
1614 // scan to see what ata devices there are, and if they support SMART
1615 static int ATADeviceScan(dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
)
1618 struct ata_identify_device drive
;
1619 const char *name
= cfg
.name
.c_str();
1622 // Device must be open
1624 // Get drive identity structure
1625 if ((retid
=ataReadHDIdentity (atadev
, &drive
))){
1627 // Unable to read Identity structure
1628 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1630 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1631 name
, packetdevicetype(retid
-1));
1632 CloseDevice(atadev
, name
);
1635 // Store drive size (for selective self-test only)
1636 state
.num_sectors
= get_num_sectors(&drive
);
1638 // Show if device in database, and use preset vendor attribute
1639 // options unless user has requested otherwise.
1640 if (cfg
.ignorepresets
)
1641 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1643 // do whatever applypresets decides to do.
1644 if (!apply_presets(&drive
, cfg
.attribute_defs
, cfg
.fix_firmwarebug
, fix_swapped_id
))
1645 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1647 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1650 // Set default '-C 197[+]' if no '-C ID' is specified.
1651 if (!cfg
.curr_pending_set
)
1652 cfg
.curr_pending_id
= get_unc_attr_id(false, cfg
.attribute_defs
, cfg
.curr_pending_incr
);
1653 // Set default '-U 198[+]' if no '-U ID' is specified.
1654 if (!cfg
.offl_pending_set
)
1655 cfg
.offl_pending_id
= get_unc_attr_id(true, cfg
.attribute_defs
, cfg
.offl_pending_incr
);
1657 // If requested, show which presets would be used for this drive
1658 if (cfg
.showpresets
) {
1659 int savedebugmode
=debugmode
;
1660 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1663 show_presets(&drive
, false);
1664 debugmode
=savedebugmode
;
1667 // see if drive supports SMART
1668 supported
=ataSmartSupport(&drive
);
1671 // drive does NOT support SMART
1672 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1674 // can't tell if drive supports SMART
1675 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1677 // should we proceed anyway?
1678 if (cfg
.permissive
) {
1679 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1682 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1683 CloseDevice(atadev
, name
);
1688 if (ataEnableSmart(atadev
)) {
1689 // Enable SMART command has failed
1690 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1691 CloseDevice(atadev
, name
);
1695 // disable device attribute autosave...
1696 if (cfg
.autosave
==1) {
1697 if (ataDisableAutoSave(atadev
))
1698 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1700 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1703 // or enable device attribute autosave
1704 if (cfg
.autosave
==2) {
1705 if (ataEnableAutoSave(atadev
))
1706 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1708 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1711 // capability check: SMART status
1712 if (cfg
.smartcheck
&& ataSmartStatus2(atadev
) == -1) {
1713 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1714 cfg
.smartcheck
= false;
1717 // capability check: Read smart values and thresholds. Note that
1718 // smart values are ALSO needed even if we ONLY want to know if the
1719 // device is self-test log or error-log capable! After ATA-5, this
1720 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1721 // but sadly not for ATA-5. Sigh.
1723 // do we need to get SMART data?
1724 bool smart_val_ok
= false;
1725 if ( cfg
.autoofflinetest
|| cfg
.errorlog
|| cfg
.selftest
1726 || cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
1727 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
1728 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
) {
1730 if (ataReadSmartValues(atadev
, &state
.smartval
)) {
1731 PrintOut(LOG_INFO
, "Device: %s, Read SMART Values failed\n", name
);
1732 cfg
.usagefailed
= cfg
.prefail
= cfg
.usage
= false;
1733 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
1734 cfg
.curr_pending_id
= cfg
.offl_pending_id
= 0;
1737 smart_val_ok
= true;
1738 if (ataReadSmartThresholds(atadev
, &state
.smartthres
)) {
1739 PrintOut(LOG_INFO
, "Device: %s, Read SMART Thresholds failed%s\n",
1740 name
, (cfg
.usagefailed
? ", ignoring -f Directive" : ""));
1741 cfg
.usagefailed
= false;
1742 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1743 memset(&state
.smartthres
, 0, sizeof(state
.smartthres
));
1747 // see if the necessary Attribute is there to monitor offline or
1748 // current pending sectors or temperature
1749 if (cfg
.curr_pending_id
&& ata_find_attr_index(cfg
.curr_pending_id
, state
.smartval
) < 0) {
1750 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1751 name
, cfg
.curr_pending_id
);
1752 cfg
.curr_pending_id
= 0;
1755 if (cfg
.offl_pending_id
&& ata_find_attr_index(cfg
.offl_pending_id
, state
.smartval
) < 0) {
1756 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1757 name
, cfg
.offl_pending_id
);
1758 cfg
.offl_pending_id
= 0;
1761 if ( (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
1762 && !ata_return_temperature_value(&state
.smartval
, cfg
.attribute_defs
)) {
1763 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1764 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
1768 // enable/disable automatic on-line testing
1769 if (cfg
.autoofflinetest
) {
1770 // is this an enable or disable request?
1771 const char *what
=(cfg
.autoofflinetest
==1)?"disable":"enable";
1773 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1775 // if command appears unsupported, issue a warning...
1776 if (!isSupportAutomaticTimer(&state
.smartval
))
1777 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1778 // ... but then try anyway
1779 if ((cfg
.autoofflinetest
==1)?ataDisableAutoOffline(atadev
):ataEnableAutoOffline(atadev
))
1780 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1782 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1786 // capability check: self-test-log
1790 // start with service disabled, and re-enable it if all works OK
1791 cfg
.selftest
= false;
1792 state
.selflogcount
= 0;
1793 state
.selfloghour
= 0;
1796 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1797 else if (!cfg
.permissive
&& !isSmartTestLogCapable(&state
.smartval
, &drive
))
1798 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1799 else if ((retval
= SelfTestErrorCount(atadev
, name
, cfg
.fix_firmwarebug
)) < 0)
1800 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1802 cfg
.selftest
= true;
1803 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1804 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
1808 // capability check: ATA error log
1812 // start with service disabled, and re-enable it if all works OK
1813 cfg
.errorlog
= false;
1814 state
.ataerrorcount
=0;
1817 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1818 else if (!cfg
.permissive
&& !isSmartErrorLogCapable(&state
.smartval
, &drive
))
1819 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1820 else if ((val
= ATAErrorCount(atadev
, name
, cfg
.fix_firmwarebug
)) < 0)
1821 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1823 cfg
.errorlog
= true;
1824 state
.ataerrorcount
=val
;
1828 // capabilities check -- does it support powermode?
1829 if (cfg
.powermode
) {
1830 int powermode
= ataCheckPowerMode(atadev
);
1832 if (-1 == powermode
) {
1833 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1836 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1837 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1843 // If no tests available or selected, return
1844 if (!(cfg
.errorlog
|| cfg
.selftest
|| cfg
.smartcheck
||
1845 cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
||
1846 cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
1847 CloseDevice(atadev
, name
);
1851 // tell user we are registering device
1852 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1854 // record number of device, type of device, increment device count
1855 if (cfg
.dev_type
.empty())
1856 cfg
.dev_type
= "ata";
1858 // close file descriptor
1859 CloseDevice(atadev
, name
);
1861 if (!state_path_prefix
.empty() || !attrlog_path_prefix
.empty()) {
1862 // Build file name for state file
1863 char model
[40+1], serial
[20+1];
1864 format_ata_string(model
, drive
.model
, sizeof(model
)-1, fix_swapped_id
);
1865 format_ata_string(serial
, drive
.serial_no
, sizeof(serial
)-1, fix_swapped_id
);
1866 std::replace_if(model
, model
+strlen(model
), not_allowed_in_filename
, '_');
1867 std::replace_if(serial
, serial
+strlen(serial
), not_allowed_in_filename
, '_');
1868 if (!state_path_prefix
.empty()) {
1869 cfg
.state_file
= strprintf("%s%s-%s.ata.state", state_path_prefix
.c_str(), model
, serial
);
1870 // Read previous state
1871 if (read_dev_state(cfg
.state_file
.c_str(), state
)) {
1872 PrintOut(LOG_INFO
, "Device: %s, state read from %s\n", name
, cfg
.state_file
.c_str());
1873 // Copy ATA attribute values to temp state
1874 state
.update_temp_state();
1877 if (!attrlog_path_prefix
.empty())
1878 cfg
.attrlog_file
= strprintf("%s%s-%s.ata.csv", attrlog_path_prefix
.c_str(), model
, serial
);
1881 // Start self-test regex check now if time was not read from state file
1882 if (!cfg
.test_regex
.empty() && !state
.scheduled_test_next_check
)
1883 state
.scheduled_test_next_check
= time(0);
1888 // on success, return 0. On failure, return >0. Never return <0,
1890 static int SCSIDeviceScan(dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
)
1893 const char *device
= cfg
.name
.c_str();
1894 struct scsi_iec_mode_page iec
;
1897 // Device must be open
1899 // check that device is ready for commands. IE stores its stuff on
1901 if ((err
= scsiTestUnitReady(scsidev
))) {
1902 if (SIMPLE_ERR_NOT_READY
== err
)
1903 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1904 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1905 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1906 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1907 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1909 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1910 CloseDevice(scsidev
, device
);
1914 // Badly-conforming USB storage devices may fail this check.
1915 // The response to the following IE mode page fetch (current and
1916 // changeable values) is carefully examined. It has been found
1917 // that various USB devices that malform the response will lock up
1918 // if asked for a log page (e.g. temperature) so it is best to
1920 if (!(err
= scsiFetchIECmpage(scsidev
, &iec
, state
.modese_len
)))
1921 state
.modese_len
= iec
.modese_len
;
1922 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1923 ; /* continue since it is reasonable not to support IE mpage */
1924 else { /* any other error (including malformed response) unreasonable */
1926 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1928 CloseDevice(scsidev
, device
);
1932 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1933 // smart if it is off). This may change to be the same as the ATA side.
1934 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1935 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1936 "Try 'smartctl -s on %s' to turn on SMART features\n",
1938 CloseDevice(scsidev
, device
);
1942 // Flag that certain log pages are supported (information may be
1943 // available from other sources).
1944 if (0 == scsiLogSense(scsidev
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1945 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1947 case TEMPERATURE_LPAGE
:
1948 state
.TempPageSupported
= 1;
1951 state
.SmartPageSupported
= 1;
1959 // record type of device
1960 if (cfg
.dev_type
.empty())
1961 cfg
.dev_type
= "scsi";
1963 // Check if scsiCheckIE() is going to work
1967 UINT8 currenttemp
= 0;
1970 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
1971 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1972 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1973 state
.SuppressReport
= 1;
1974 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
) {
1975 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1976 cfg
.tempdiff
= cfg
.tempinfo
= cfg
.tempcrit
= 0;
1981 // capability check: self-test-log
1983 int retval
= scsiCountFailedSelfTests(scsidev
, 0);
1985 // no self-test log, turn off monitoring
1986 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1987 cfg
.selftest
= false;
1988 state
.selflogcount
= 0;
1989 state
.selfloghour
= 0;
1992 // register starting values to watch for changes
1993 state
.selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1994 state
.selfloghour
=SELFTEST_ERRORHOURS(retval
);
1998 // disable autosave (set GLTSD bit)
1999 if (cfg
.autosave
==1){
2000 if (scsiSetControlGLTSD(scsidev
, 1, state
.modese_len
))
2001 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
2003 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
2006 // or enable autosave (clear GLTSD bit)
2007 if (cfg
.autosave
==2){
2008 if (scsiSetControlGLTSD(scsidev
, 0, state
.modese_len
))
2009 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
2011 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
2014 // tell user we are registering device
2015 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
2017 // TODO: Build file name for state file
2018 if (!state_path_prefix
.empty()) {
2019 PrintOut(LOG_INFO
, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device
);
2021 // TODO: Build file name for attribute log file
2022 if (!attrlog_path_prefix
.empty()) {
2023 PrintOut(LOG_INFO
, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device
);
2026 // close file descriptor
2027 CloseDevice(scsidev
, device
);
2029 // Start self-test regex check now if time was not read from state file
2030 if (!cfg
.test_regex
.empty() && !state
.scheduled_test_next_check
)
2031 state
.scheduled_test_next_check
= time(0);
2036 // If the self-test log has got more self-test errors (or more recent
2037 // self-test errors) recorded, then notify user.
2038 static void CheckSelfTestLogs(const dev_config
& cfg
, dev_state
& state
, int newi
)
2040 const char * name
= cfg
.name
.c_str();
2044 MailWarning(cfg
, state
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
2046 // old and new error counts
2047 int oldc
=state
.selflogcount
;
2048 int newc
=SELFTEST_ERRORCOUNT(newi
);
2050 // old and new error timestamps in hours
2051 int oldh
=state
.selfloghour
;
2052 int newh
=SELFTEST_ERRORHOURS(newi
);
2055 // increase in error count
2056 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2058 MailWarning(cfg
, state
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2060 state
.must_write
= true;
2061 } else if (oldh
!=newh
) {
2062 // more recent error
2063 // a 'more recent' error might actually be a smaller hour number,
2064 // if the hour number has wrapped.
2065 // There's still a bug here. You might just happen to run a new test
2066 // exactly 32768 hours after the previous failure, and have run exactly
2067 // 20 tests between the two, in which case smartd will miss the
2069 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2071 MailWarning(cfg
, state
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2073 state
.must_write
= true;
2076 // Needed since self-test error count may DECREASE. Hour might
2077 // also have changed.
2078 state
.selflogcount
= newc
;
2079 state
.selfloghour
= newh
;
2084 // Test types, ordered by priority.
2085 static const char test_type_chars
[] = "LncrSCO";
2086 const unsigned num_test_types
= sizeof(test_type_chars
)-1;
2088 // returns test type if time to do test of type testtype,
2089 // 0 if not time to do test.
2090 static char next_scheduled_test(const dev_config
& cfg
, dev_state
& state
, bool scsi
, time_t usetime
= 0)
2092 // check that self-testing has been requested
2093 if (cfg
.test_regex
.empty())
2096 // Exit if drive not capable of any test
2097 if ( state
.not_cap_long
&& state
.not_cap_short
&&
2098 (scsi
|| (state
.not_cap_conveyance
&& state
.not_cap_offline
)))
2101 // since we are about to call localtime(), be sure glibc is informed
2102 // of any timezone changes we make.
2104 FixGlibcTimeZoneBug();
2106 // Is it time for next check?
2107 time_t now
= (!usetime
? time(0) : usetime
);
2108 if (now
< state
.scheduled_test_next_check
)
2111 // Limit time check interval to 90 days
2112 if (state
.scheduled_test_next_check
+ (3600L*24*90) < now
)
2113 state
.scheduled_test_next_check
= now
- (3600L*24*90);
2115 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2117 time_t testtime
= 0; int testhour
= 0;
2118 int maxtest
= num_test_types
-1;
2120 for (time_t t
= state
.scheduled_test_next_check
; ; ) {
2121 struct tm
* tms
= localtime(&t
);
2122 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2123 int weekday
= (tms
->tm_wday
? tms
->tm_wday
: 7);
2124 for (int i
= 0; i
<= maxtest
; i
++) {
2125 // Skip if drive not capable of this test
2126 switch (test_type_chars
[i
]) {
2127 case 'L': if (state
.not_cap_long
) continue; break;
2128 case 'S': if (state
.not_cap_short
) continue; break;
2129 case 'C': if (scsi
|| state
.not_cap_conveyance
) continue; break;
2130 case 'O': if (scsi
|| state
.not_cap_offline
) continue; break;
2132 case 'r': if (scsi
|| state
.not_cap_selective
) continue; break;
2135 // Try match of "T/MM/DD/d/HH"
2137 snprintf(pattern
, sizeof(pattern
), "%c/%02d/%02d/%1d/%02d",
2138 test_type_chars
[i
], tms
->tm_mon
+1, tms
->tm_mday
, weekday
, tms
->tm_hour
);
2139 if (cfg
.test_regex
.full_match(pattern
)) {
2141 testtype
= pattern
[0];
2142 testtime
= t
; testhour
= tms
->tm_hour
;
2143 // Limit further matches to higher priority self-tests
2148 // Exit if no tests left or current time reached
2154 if ((t
+= 3600) > now
)
2158 // Do next check not before next hour.
2159 struct tm
* tmnow
= localtime(&now
);
2160 state
.scheduled_test_next_check
= now
+ (3600 - tmnow
->tm_min
*60 - tmnow
->tm_sec
);
2163 state
.must_write
= true;
2164 // Tell user if an old test was found.
2165 if (!usetime
&& !(testhour
== tmnow
->tm_hour
&& testtime
+ 3600 > now
)) {
2166 char datebuf
[DATEANDEPOCHLEN
]; dateandtimezoneepoch(datebuf
, testtime
);
2167 PrintOut(LOG_INFO
, "Device: %s, old test of type %c not run at %s, starting now.\n",
2168 cfg
.name
.c_str(), testtype
, datebuf
);
2175 // Print a list of future tests.
2176 static void PrintTestSchedule(const dev_config_vector
& configs
, dev_state_vector
& states
, const smart_device_list
& devices
)
2178 unsigned numdev
= configs
.size();
2181 std::vector
<int> testcnts(numdev
* num_test_types
, 0);
2183 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2185 // FixGlibcTimeZoneBug(); // done in PrintOut()
2186 time_t now
= time(0);
2187 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2188 dateandtimezoneepoch(datenow
, now
);
2191 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2192 // Check for each device whether a test will be run
2193 time_t testtime
= now
+ seconds
;
2194 for (unsigned i
= 0; i
< numdev
; i
++) {
2195 const dev_config
& cfg
= configs
.at(i
);
2196 dev_state
& state
= states
.at(i
);
2198 char testtype
= next_scheduled_test(cfg
, state
, devices
.at(i
)->is_scsi(), testtime
);
2199 if (testtype
&& (p
= strchr(test_type_chars
, testtype
))) {
2200 unsigned t
= (p
- test_type_chars
);
2201 // Report at most 5 tests of each type
2202 if (++testcnts
[i
*num_test_types
+ t
] <= 5) {
2203 dateandtimezoneepoch(date
, testtime
);
2204 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
.name
.c_str(),
2205 testcnts
[i
*num_test_types
+ t
], testtype
, date
);
2212 dateandtimezoneepoch(date
, now
+seconds
);
2213 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2214 for (unsigned i
= 0; i
< numdev
; i
++) {
2215 const dev_config
& cfg
= configs
.at(i
);
2216 bool scsi
= devices
.at(i
)->is_scsi();
2217 for (unsigned t
= 0; t
< num_test_types
; t
++) {
2218 int cnt
= testcnts
[i
*num_test_types
+ t
];
2219 if (cnt
== 0 && !strchr((scsi
? "LS" : "LSCO"), test_type_chars
[t
]))
2221 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
.name
.c_str(),
2222 cnt
, (cnt
==1?"":"s"), test_type_chars
[t
]);
2228 // Return zero on success, nonzero on failure. Perform offline (background)
2229 // short or long (extended) self test on given scsi device.
2230 static int DoSCSISelfTest(const dev_config
& cfg
, dev_state
& state
, scsi_device
* device
, char testtype
)
2233 const char *testname
= 0;
2234 const char *name
= cfg
.name
.c_str();
2237 if (scsiSelfTestInProgress(device
, &inProgress
)) {
2238 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2239 state
.not_cap_short
= state
.not_cap_long
= true;
2243 if (1 == inProgress
) {
2244 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2245 "progress.\n", name
);
2251 testname
= "Short Self";
2252 retval
= scsiSmartShortSelfTest(device
);
2255 testname
= "Long Self";
2256 retval
= scsiSmartExtendSelfTest(device
);
2259 // If we can't do the test, exit
2260 if (NULL
== testname
) {
2261 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2266 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2267 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2268 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2271 state
.not_cap_long
= true;
2273 state
.not_cap_short
= true;
2277 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2282 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2287 // Do an offline immediate or self-test. Return zero on success,
2288 // nonzero on failure.
2289 static int DoATASelfTest(const dev_config
& cfg
, dev_state
& state
, ata_device
* device
, char testtype
)
2291 const char *name
= cfg
.name
.c_str();
2293 // Read current smart data and check status/capability
2294 struct ata_smart_values data
;
2295 if (ataReadSmartValues(device
, &data
) || !(data
.offline_data_collection_capability
)) {
2296 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2300 // Check for capability to do the test
2301 int dotest
= -1, mode
= 0;
2302 const char *testname
= 0;
2305 testname
="Offline Immediate ";
2306 if (isSupportExecuteOfflineImmediate(&data
))
2307 dotest
=OFFLINE_FULL_SCAN
;
2309 state
.not_cap_offline
= true;
2312 testname
="Conveyance Self-";
2313 if (isSupportConveyanceSelfTest(&data
))
2314 dotest
=CONVEYANCE_SELF_TEST
;
2316 state
.not_cap_conveyance
= true;
2319 testname
="Short Self-";
2320 if (isSupportSelfTest(&data
))
2321 dotest
=SHORT_SELF_TEST
;
2323 state
.not_cap_short
= true;
2326 testname
="Long Self-";
2327 if (isSupportSelfTest(&data
))
2328 dotest
=EXTEND_SELF_TEST
;
2330 state
.not_cap_long
= true;
2333 case 'c': case 'n': case 'r':
2334 testname
= "Selective Self-";
2335 if (isSupportSelectiveSelfTest(&data
)) {
2336 dotest
= SELECTIVE_SELF_TEST
;
2338 case 'c': mode
= SEL_CONT
; break;
2339 case 'n': mode
= SEL_NEXT
; break;
2340 case 'r': mode
= SEL_REDO
; break;
2344 state
.not_cap_selective
= true;
2348 // If we can't do the test, exit
2350 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2354 // If currently running a self-test, do not interrupt it to start another.
2355 if (15==(data
.self_test_exec_status
>> 4)) {
2356 if (cfg
.fix_firmwarebug
== FIX_SAMSUNG3
&& data
.self_test_exec_status
== 0xf0) {
2357 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
2358 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
2360 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2361 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2366 if (dotest
== SELECTIVE_SELF_TEST
) {
2368 ata_selective_selftest_args selargs
;
2369 selargs
.num_spans
= 1;
2370 selargs
.span
[0].mode
= mode
;
2371 if (ataWriteSelectiveSelfTestLog(device
, selargs
, &data
, state
.num_sectors
)) {
2372 PrintOut(LOG_CRIT
, "Device: %s, prepare %sTest failed\n", name
, testname
);
2375 uint64_t start
= selargs
.span
[0].start
, end
= selargs
.span
[0].end
;
2376 PrintOut(LOG_INFO
, "Device: %s, %s test span at LBA %"PRIu64
" - %"PRIu64
" (%"PRIu64
" sectors, %u%% - %u%% of disk).\n",
2377 name
, (selargs
.span
[0].mode
== SEL_NEXT
? "next" : "redo"),
2378 start
, end
, end
- start
+ 1,
2379 (unsigned)((100 * start
+ state
.num_sectors
/2) / state
.num_sectors
),
2380 (unsigned)((100 * end
+ state
.num_sectors
/2) / state
.num_sectors
));
2383 // execute the test, and return status
2384 int retval
= smartcommandhandler(device
, IMMEDIATE_OFFLINE
, dotest
, NULL
);
2386 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2390 if (testtype
!= 'O')
2391 // Log next self-test execution status
2392 state
.smartval
.self_test_exec_status
= 0xff;
2394 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2398 // Check pending sector count attribute values (-C, -U directives).
2399 static void check_pending(const dev_config
& cfg
, dev_state
& state
,
2400 unsigned char id
, bool increase_only
,
2401 const ata_smart_values
& smartval
,
2402 int mailtype
, const char * msg
)
2404 // Find attribute index
2405 int i
= ata_find_attr_index(id
, smartval
);
2406 if (!(i
>= 0 && ata_find_attr_index(id
, state
.smartval
) == i
))
2409 // No report if no sectors pending.
2410 uint64_t rawval
= ata_get_attr_raw_value(smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
2414 // If attribute is not reset, report only sector count increases.
2415 uint64_t prev_rawval
= ata_get_attr_raw_value(state
.smartval
.vendor_attributes
[i
], cfg
.attribute_defs
);
2416 if (!(!increase_only
|| prev_rawval
< rawval
))
2420 std::string s
= strprintf("Device: %s, %"PRId64
" %s", cfg
.name
.c_str(), rawval
, msg
);
2421 if (prev_rawval
> 0 && rawval
!= prev_rawval
)
2422 s
+= strprintf(" (changed %+"PRId64
")", rawval
- prev_rawval
);
2424 PrintOut(LOG_CRIT
, "%s\n", s
.c_str());
2425 MailWarning(cfg
, state
, mailtype
, "%s\n", s
.c_str());
2426 state
.must_write
= true;
2429 // Format Temperature value
2430 static const char * fmt_temp(unsigned char x
, char * buf
)
2435 sprintf(buf
, "%u", x
);
2439 // Check Temperature limits
2440 static void CheckTemperature(const dev_config
& cfg
, dev_state
& state
, unsigned char currtemp
, unsigned char triptemp
)
2442 if (!(0 < currtemp
&& currtemp
< 255)) {
2443 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
.name
.c_str());
2447 // Update Max Temperature
2448 const char * minchg
= "", * maxchg
= "";
2449 if (currtemp
> state
.tempmax
) {
2452 state
.tempmax
= currtemp
;
2453 state
.must_write
= true;
2457 if (!state
.temperature
) {
2459 if (!state
.tempmin
|| currtemp
< state
.tempmin
)
2460 // Delay Min Temperature update by ~ 30 minutes.
2461 state
.tempmin_delay
= time(0) + CHECKTIME
- 60;
2462 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2463 cfg
.name
.c_str(), (int)currtemp
, fmt_temp(state
.tempmin
, buf
), state
.tempmax
, maxchg
);
2465 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2466 state
.temperature
= currtemp
;
2469 if (state
.tempmin_delay
) {
2470 // End Min Temperature update delay if ...
2471 if ( (state
.tempmin
&& currtemp
> state
.tempmin
) // current temp exceeds recorded min,
2472 || (state
.tempmin_delay
<= time(0))) { // or delay time is over.
2473 state
.tempmin_delay
= 0;
2475 state
.tempmin
= 255;
2479 // Update Min Temperature
2480 if (!state
.tempmin_delay
&& currtemp
< state
.tempmin
) {
2481 state
.tempmin
= currtemp
;
2482 state
.must_write
= true;
2483 if (currtemp
!= state
.temperature
)
2488 if (cfg
.tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)state
.temperature
) >= cfg
.tempdiff
)) {
2489 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2490 cfg
.name
.c_str(), (int)currtemp
-(int)state
.temperature
, currtemp
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2491 state
.temperature
= currtemp
;
2496 if (cfg
.tempcrit
&& currtemp
>= cfg
.tempcrit
) {
2497 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2498 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2499 MailWarning(cfg
, state
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2500 cfg
.name
.c_str(), currtemp
, cfg
.tempcrit
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2502 else if (cfg
.tempinfo
&& currtemp
>= cfg
.tempinfo
) {
2503 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2504 cfg
.name
.c_str(), currtemp
, cfg
.tempinfo
, fmt_temp(state
.tempmin
, buf
), minchg
, state
.tempmax
, maxchg
);
2508 // Check normalized and raw attribute values.
2509 static void check_attribute(const dev_config
& cfg
, dev_state
& state
,
2510 const ata_smart_attribute
& attr
,
2511 const ata_smart_attribute
& prev
,
2512 const ata_smart_threshold_entry
& thre
)
2514 // Check attribute and threshold
2515 ata_attr_state attrstate
= ata_get_attr_state(attr
, thre
, cfg
.attribute_defs
);
2516 if (attrstate
== ATTRSTATE_NON_EXISTING
)
2519 // If requested, check for usage attributes that have failed.
2520 if ( cfg
.usagefailed
&& attrstate
== ATTRSTATE_FAILED_NOW
2521 && !cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGN_FAILUSE
)) {
2522 std::string attrname
= ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
);
2523 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
2524 MailWarning(cfg
, state
, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg
.name
.c_str(), attr
.id
, attrname
.c_str());
2525 state
.must_write
= true;
2528 // Return if we're not tracking this type of attribute
2529 bool prefail
= !!ATTRIBUTE_FLAGS_PREFAILURE(attr
.flags
);
2530 if (!( ( prefail
&& cfg
.prefail
)
2531 || (!prefail
&& cfg
.usage
)))
2534 // Return if '-I ID' was specified
2535 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_IGNORE
))
2538 // Issue warning if they don't have the same ID in all structures.
2539 if (attr
.id
!= prev
.id
|| attrstate
== ATTRSTATE_BAD_THRESHOLD
) {
2540 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
2541 cfg
.name
.c_str(), attr
.id
, prev
.id
, thre
.id
);
2545 // Compare normalized values if valid.
2546 bool valchanged
= false;
2547 if (attrstate
> ATTRSTATE_NO_NORMVAL
) {
2548 if (attr
.current
!= prev
.current
)
2552 // Compare raw values if requested.
2553 bool rawchanged
= false;
2554 if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW
)) {
2555 if ( ata_get_attr_raw_value(attr
, cfg
.attribute_defs
)
2556 != ata_get_attr_raw_value(prev
, cfg
.attribute_defs
))
2560 // Return if no change
2561 if (!(valchanged
|| rawchanged
))
2564 // Format value strings
2565 std::string currstr
, prevstr
;
2566 if (attrstate
== ATTRSTATE_NO_NORMVAL
) {
2567 // Print raw values only
2568 currstr
= strprintf("%s (Raw)",
2569 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
2570 prevstr
= strprintf("%s (Raw)",
2571 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
2573 else if (cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_PRINT
)) {
2574 // Print normalized and raw values
2575 currstr
= strprintf("%d [Raw %s]", attr
.current
,
2576 ata_format_attr_raw_value(attr
, cfg
.attribute_defs
).c_str());
2577 prevstr
= strprintf("%d [Raw %s]", prev
.current
,
2578 ata_format_attr_raw_value(prev
, cfg
.attribute_defs
).c_str());
2581 // Print normalized values only
2582 currstr
= strprintf("%d", attr
.current
);
2583 prevstr
= strprintf("%d", prev
.current
);
2587 std::string msg
= strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2588 cfg
.name
.c_str(), (prefail
? "Prefailure" : "Usage"), attr
.id
,
2589 ata_get_smart_attr_name(attr
.id
, cfg
.attribute_defs
).c_str(),
2590 prevstr
.c_str(), currstr
.c_str());
2592 // Report this change as critical ?
2593 if ( (valchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_AS_CRIT
))
2594 || (rawchanged
&& cfg
.monitor_attr_flags
.is_set(attr
.id
, MONITOR_RAW_AS_CRIT
))) {
2595 PrintOut(LOG_CRIT
, "%s\n", msg
.c_str());
2596 MailWarning(cfg
, state
, 2, "%s", msg
.c_str());
2599 PrintOut(LOG_INFO
, "%s\n", msg
.c_str());
2601 state
.must_write
= true;
2605 static int ATACheckDevice(const dev_config
& cfg
, dev_state
& state
, ata_device
* atadev
, bool allow_selftests
)
2607 const char * name
= cfg
.name
.c_str();
2609 // If user has asked, test the email warning system
2611 MailWarning(cfg
, state
, 0, "TEST EMAIL from smartd for device: %s", name
);
2613 // if we can't open device, fail gracefully rather than hard --
2614 // perhaps the next time around we'll be able to open it. ATAPI
2615 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2616 // given (see linux cdrom driver).
2617 if (!atadev
->open()) {
2618 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", name
, atadev
->get_errmsg());
2619 MailWarning(cfg
, state
, 9, "Device: %s, unable to open device", name
);
2621 } else if (debugmode
)
2622 PrintOut(LOG_INFO
,"Device: %s, opened ATA device\n", name
);
2624 // user may have requested (with the -n Directive) to leave the disk
2625 // alone if it is in idle or sleeping mode. In this case check the
2626 // power mode and exit without check if needed
2627 if (cfg
.powermode
&& !state
.powermodefail
) {
2628 int dontcheck
=0, powermode
=ataCheckPowerMode(atadev
);
2629 const char * mode
= 0;
2630 if (0 <= powermode
&& powermode
< 0xff) {
2631 // wait for possible spin up and check again
2634 powermode2
= ataCheckPowerMode(atadev
);
2635 if (powermode2
> powermode
)
2636 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2637 powermode
= powermode2
;
2644 if (cfg
.powermode
>=1)
2650 if (cfg
.powermode
>=2)
2656 if (cfg
.powermode
>=3)
2661 mode
="ACTIVE or IDLE";
2665 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2667 state
.powermodefail
= true;
2671 // if we are going to skip a check, return now
2673 // skip at most powerskipmax checks
2674 if (!cfg
.powerskipmax
|| state
.powerskipcnt
<cfg
.powerskipmax
) {
2675 CloseDevice(atadev
, name
);
2676 if (!state
.powerskipcnt
&& !cfg
.powerquiet
) // report first only and avoid waking up system disk
2677 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2678 state
.powerskipcnt
++;
2682 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2683 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
2685 state
.powerskipcnt
= 0;
2686 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
2688 else if (state
.powerskipcnt
) {
2689 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2690 name
, mode
, state
.powerskipcnt
, (state
.powerskipcnt
==1?"":"s"));
2691 state
.powerskipcnt
= 0;
2692 state
.tempmin_delay
= time(0) + CHECKTIME
- 60; // Delay Min Temperature update
2696 // check smart status
2697 if (cfg
.smartcheck
) {
2698 int status
=ataSmartStatus2(atadev
);
2700 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2701 MailWarning(cfg
, state
, 5, "Device: %s, not capable of SMART self-check", name
);
2702 state
.must_write
= true;
2704 else if (status
==1){
2705 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2706 MailWarning(cfg
, state
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2707 state
.must_write
= true;
2711 // Check everything that depends upon SMART Data (eg, Attribute values)
2712 if ( cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
2713 || cfg
.curr_pending_id
|| cfg
.offl_pending_id
2714 || cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
|| cfg
.selftest
) {
2716 // Read current attribute values.
2717 ata_smart_values curval
;
2718 if (ataReadSmartValues(atadev
, &curval
)){
2719 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2720 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2721 state
.must_write
= true;
2724 // look for current or offline pending sectors
2725 if (cfg
.curr_pending_id
)
2726 check_pending(cfg
, state
, cfg
.curr_pending_id
, cfg
.curr_pending_incr
, curval
, 10,
2727 (!cfg
.curr_pending_incr
? "Currently unreadable (pending) sectors"
2728 : "Total unreadable (pending) sectors" ));
2730 if (cfg
.offl_pending_id
)
2731 check_pending(cfg
, state
, cfg
.offl_pending_id
, cfg
.offl_pending_incr
, curval
, 11,
2732 (!cfg
.offl_pending_incr
? "Offline uncorrectable sectors"
2733 : "Total offline uncorrectable sectors"));
2735 // check temperature limits
2736 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
2737 CheckTemperature(cfg
, state
, ata_return_temperature_value(&curval
, cfg
.attribute_defs
), 0);
2739 if (cfg
.usagefailed
|| cfg
.prefail
|| cfg
.usage
) {
2741 // look for failed usage attributes, or track usage or prefail attributes
2742 for (int i
= 0; i
< NUMBER_ATA_SMART_ATTRIBUTES
; i
++) {
2743 check_attribute(cfg
, state
,
2744 curval
.vendor_attributes
[i
],
2745 state
.smartval
.vendor_attributes
[i
],
2746 state
.smartthres
.thres_entries
[i
]);
2750 // Log changes of self-test execution status
2751 if ( curval
.self_test_exec_status
!= state
.smartval
.self_test_exec_status
2752 || (!allow_selftests
&& curval
.self_test_exec_status
!= 0x00) )
2753 log_self_test_exec_status(name
, curval
.self_test_exec_status
);
2756 // Save the new values into *drive for the next time around
2757 state
.smartval
= curval
;
2762 // check if number of selftest errors has increased (note: may also DECREASE)
2764 CheckSelfTestLogs(cfg
, state
, SelfTestErrorCount(atadev
, name
, cfg
.fix_firmwarebug
));
2766 // check if number of ATA errors has increased
2769 int newc
, oldc
= state
.ataerrorcount
;
2771 // new number of errors
2772 newc
= ATAErrorCount(atadev
, name
, cfg
.fix_firmwarebug
);
2774 // did command fail?
2776 // lack of PrintOut here is INTENTIONAL
2777 MailWarning(cfg
, state
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2779 // has error count increased?
2781 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2783 MailWarning(cfg
, state
, 4, "Device: %s, ATA error count increased from %d to %d",
2785 state
.must_write
= true;
2788 // this last line is probably not needed, count always increases
2790 state
.ataerrorcount
=newc
;
2793 // if the user has asked, and device is capable (or we're not yet
2794 // sure) check whether a self test should be done now.
2795 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
2796 char testtype
= next_scheduled_test(cfg
, state
, false/*!scsi*/);
2798 DoATASelfTest(cfg
, state
, atadev
, testtype
);
2801 // Don't leave device open -- the OS/user may want to access it
2802 // before the next smartd cycle!
2803 CloseDevice(atadev
, name
);
2805 // Copy ATA attribute values to persistent state
2806 state
.update_persistent_state();
2811 static int SCSICheckDevice(const dev_config
& cfg
, dev_state
& state
, scsi_device
* scsidev
, bool allow_selftests
)
2816 const char * name
= cfg
.name
.c_str();
2819 // If the user has asked for it, test the email warning system
2821 MailWarning(cfg
, state
, 0, "TEST EMAIL from smartd for device: %s", name
);
2823 // if we can't open device, fail gracefully rather than hard --
2824 // perhaps the next time around we'll be able to open it
2825 if (!scsidev
->open()) {
2826 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", name
, scsidev
->get_errmsg());
2827 MailWarning(cfg
, state
, 9, "Device: %s, unable to open device", name
);
2829 } else if (debugmode
)
2830 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
2834 if (!state
.SuppressReport
) {
2835 if (scsiCheckIE(scsidev
, state
.SmartPageSupported
, state
.TempPageSupported
,
2836 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2837 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2839 MailWarning(cfg
, state
, 6, "Device: %s, failed to read SMART values", name
);
2840 state
.SuppressReport
= 1;
2844 cp
= scsiGetIEString(asc
, ascq
);
2846 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2847 MailWarning(cfg
, state
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2848 } else if (debugmode
)
2849 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2850 name
, (int)asc
, (int)ascq
);
2851 } else if (debugmode
)
2852 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
2854 // check temperature limits
2855 if (cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)
2856 CheckTemperature(cfg
, state
, currenttemp
, triptemp
);
2858 // check if number of selftest errors has increased (note: may also DECREASE)
2860 CheckSelfTestLogs(cfg
, state
, scsiCountFailedSelfTests(scsidev
, 0));
2862 if (allow_selftests
&& !cfg
.test_regex
.empty()) {
2863 char testtype
= next_scheduled_test(cfg
, state
, true/*scsi*/);
2865 DoSCSISelfTest(cfg
, state
, scsidev
, testtype
);
2867 CloseDevice(scsidev
, name
);
2871 // Checks the SMART status of all ATA and SCSI devices
2872 static void CheckDevicesOnce(const dev_config_vector
& configs
, dev_state_vector
& states
,
2873 smart_device_list
& devices
, bool allow_selftests
)
2875 for (unsigned i
= 0; i
< configs
.size(); i
++) {
2876 const dev_config
& cfg
= configs
.at(i
);
2877 dev_state
& state
= states
.at(i
);
2878 smart_device
* dev
= devices
.at(i
);
2880 ATACheckDevice(cfg
, state
, dev
->to_ata(), allow_selftests
);
2881 else if (dev
->is_scsi())
2882 SCSICheckDevice(cfg
, state
, dev
->to_scsi(), allow_selftests
);
2886 // Set if Initialize() was called
2887 static bool is_initialized
= false;
2889 // Does initialization right after fork to daemon mode
2890 void Initialize(time_t *wakeuptime
){
2892 // Call Goodbye() on exit
2893 is_initialized
= true;
2899 // install signal handlers. On Solaris, can't use signal() because
2900 // it resets the handler to SIG_DFL after each call. So use sigset()
2901 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2903 // normal and abnormal exit
2904 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2905 SIGNALFN(SIGTERM
, SIG_IGN
);
2906 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2907 SIGNALFN(SIGQUIT
, SIG_IGN
);
2909 // in debug mode, <CONTROL-C> ==> HUP
2910 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2911 SIGNALFN(SIGINT
, SIG_IGN
);
2913 // Catch HUP and USR1
2914 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2915 SIGNALFN(SIGHUP
, SIG_IGN
);
2916 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2917 SIGNALFN(SIGUSR1
, SIG_IGN
);
2919 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2920 SIGNALFN(SIGUSR2
, SIG_IGN
);
2923 // initialize wakeup time to CURRENT time
2924 *wakeuptime
=time(NULL
);
2930 // Toggle debug mode implemented for native windows only
2931 // (there is no easy way to reopen tty on *nix)
2932 static void ToggleDebugMode()
2935 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2936 if (!daemon_enable_console("smartd [Debug]")) {
2938 daemon_signal(SIGINT
, HUPhandler
);
2939 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2942 PrintOut(LOG_INFO
,"enable console failed\n");
2944 else if (debugmode
== 1) {
2945 daemon_disable_console();
2947 daemon_signal(SIGINT
, sighandler
);
2948 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2951 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2955 static time_t dosleep(time_t wakeuptime
, bool & sigwakeup
)
2957 // If past wake-up-time, compute next wake-up-time
2958 time_t timenow
=time(NULL
);
2959 while (wakeuptime
<=timenow
){
2960 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2961 wakeuptime
+=intervals
*checktime
;
2964 // sleep until we catch SIGUSR1 or have completed sleeping
2965 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2967 // protect user again system clock being adjusted backwards
2968 if (wakeuptime
>timenow
+checktime
){
2969 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2970 wakeuptime
=timenow
+checktime
;
2973 // Exit sleep when time interval has expired or a signal is received
2974 sleep(wakeuptime
-timenow
);
2977 // toggle debug mode?
2978 if (caughtsigUSR2
) {
2987 // if we caught a SIGUSR1 then print message and clear signal
2989 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2990 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2995 // return adjusted wakeuptime
2999 // Print out a list of valid arguments for the Directive d
3000 void printoutvaliddirectiveargs(int priority
, char d
) {
3004 PrintOut(priority
, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3007 PrintOut(priority
, "valid_regular_expression");
3010 PrintOut(priority
, "%s", smi()->get_valid_dev_types_str().c_str());
3013 PrintOut(priority
, "normal, permissive");
3017 PrintOut(priority
, "on, off");
3020 PrintOut(priority
, "error, selftest");
3023 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3026 PrintOut(priority
, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3029 PrintOut(priority
, "use, ignore, show, showall");
3032 PrintOut(priority
, "none, samsung, samsung2, samsung3");
3037 // exits with an error message, or returns integer value of token
3038 int GetInteger(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
3039 int min
, int max
, char * suffix
= 0)
3041 // make sure argument is there
3043 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3044 configfile
, lineno
, name
, token
, min
, max
);
3048 // get argument value (base 10), check that it's integer, and in-range
3050 int val
= strtol(arg
,&endptr
,10);
3052 // optional suffix present?
3054 if (!strcmp(endptr
, suffix
))
3055 endptr
+= strlen(suffix
);
3060 if (!(!*endptr
&& min
<= val
&& val
<= max
)) {
3061 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3062 configfile
, lineno
, name
, token
, arg
, min
, max
);
3066 // all is well; return value
3071 // Get 1-3 small integer(s) for '-W' directive
3072 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
3073 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
3074 unsigned v1
= 0, v2
= 0, v3
= 0;
3075 int n1
= -1, n2
= -1, n3
= -1, len
;
3077 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3078 configfile
, lineno
, name
, token
);
3083 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
3084 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
3085 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3086 configfile
, lineno
, name
, token
, arg
);
3089 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
3094 // This function returns 1 if it has correctly parsed one token (and
3095 // any arguments), else zero if no tokens remain. It returns -1 if an
3096 // error was encountered.
3097 static int ParseToken(char * token
, dev_config
& cfg
)
3100 const char * name
= cfg
.name
.c_str();
3101 int lineno
=cfg
.lineno
;
3102 const char *delim
= " \n\t";
3105 const char *arg
= 0;
3107 // is the rest of the line a comment
3111 // is the token not recognized?
3112 if (*token
!='-' || strlen(token
)!=2) {
3113 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3114 configfile
, lineno
, name
, token
);
3115 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
3119 // token we will be parsing:
3122 // parse the token and swallow its argument
3124 char plus
[] = "+", excl
[] = "!";
3128 // monitor current pending sector count (default 197)
3129 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
3131 cfg
.curr_pending_id
= (unsigned char)val
;
3132 cfg
.curr_pending_incr
= (*plus
== '+');
3133 cfg
.curr_pending_set
= true;
3136 // monitor offline uncorrectable sectors (default 198)
3137 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255, plus
)) < 0)
3139 cfg
.offl_pending_id
= (unsigned char)val
;
3140 cfg
.offl_pending_incr
= (*plus
== '+');
3141 cfg
.offl_pending_set
= true;
3144 // Set tolerance level for SMART command failures
3145 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3147 } else if (!strcmp(arg
, "normal")) {
3148 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3149 // not on failure of an optional S.M.A.R.T. command.
3150 // This is the default so we don't need to actually do anything here.
3151 cfg
.permissive
= false;
3152 } else if (!strcmp(arg
, "permissive")) {
3153 // Permissive mode; ignore errors from Mandatory SMART commands
3154 cfg
.permissive
= true;
3160 // specify the device type
3161 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3163 } else if (!strcmp(arg
, "removable")) {
3164 cfg
.removable
= true;
3171 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3173 } else if (!strcmp(arg
, "none")) {
3174 cfg
.fix_firmwarebug
= FIX_NONE
;
3175 } else if (!strcmp(arg
, "samsung")) {
3176 cfg
.fix_firmwarebug
= FIX_SAMSUNG
;
3177 } else if (!strcmp(arg
, "samsung2")) {
3178 cfg
.fix_firmwarebug
= FIX_SAMSUNG2
;
3179 } else if (!strcmp(arg
, "samsung3")) {
3180 cfg
.fix_firmwarebug
= FIX_SAMSUNG3
;
3186 // check SMART status
3187 cfg
.smartcheck
= true;
3190 // check for failure of usage attributes
3191 cfg
.usagefailed
= true;
3194 // track changes in all vendor attributes
3199 // track changes in prefail vendor attributes
3203 // track changes in usage vendor attributes
3207 // track changes in SMART logs
3208 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3210 } else if (!strcmp(arg
, "selftest")) {
3211 // track changes in self-test log
3212 cfg
.selftest
= true;
3213 } else if (!strcmp(arg
, "error")) {
3214 // track changes in ATA error log
3215 cfg
.errorlog
= true;
3221 // monitor everything
3222 cfg
.smartcheck
= true;
3224 cfg
.usagefailed
= true;
3226 cfg
.selftest
= true;
3227 cfg
.errorlog
= true;
3230 // automatic offline testing enable/disable
3231 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3233 } else if (!strcmp(arg
, "on")) {
3234 cfg
.autoofflinetest
= 2;
3235 } else if (!strcmp(arg
, "off")) {
3236 cfg
.autoofflinetest
= 1;
3242 // skip disk check if in idle or standby mode
3243 if (!(arg
= strtok(NULL
, delim
)))
3246 char *endptr
= NULL
;
3247 char *next
= strchr(const_cast<char*>(arg
), ',');
3249 cfg
.powerquiet
= false;
3250 cfg
.powerskipmax
= 0;
3252 if (next
!=NULL
) *next
='\0';
3253 if (!strcmp(arg
, "never"))
3255 else if (!strcmp(arg
, "sleep"))
3257 else if (!strcmp(arg
, "standby"))
3259 else if (!strcmp(arg
, "idle"))
3264 // if optional arguments are present
3265 if (!badarg
&& next
!=NULL
) {
3267 cfg
.powerskipmax
= strtol(next
, &endptr
, 10);
3269 cfg
.powerskipmax
= 0;
3271 next
= endptr
+ (*endptr
!= '\0');
3272 if (cfg
.powerskipmax
<= 0)
3275 if (*next
!= '\0') {
3276 if (!strcmp("q", next
))
3277 cfg
.powerquiet
= true;
3286 // automatic attribute autosave enable/disable
3287 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3289 } else if (!strcmp(arg
, "on")) {
3291 } else if (!strcmp(arg
, "off")) {
3298 // warn user, and delete any previously given -s REGEXP Directives
3299 if (!cfg
.test_regex
.empty()){
3300 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3301 configfile
, lineno
, name
, cfg
.test_regex
.get_pattern());
3302 cfg
.test_regex
= regular_expression();
3304 // check for missing argument
3305 if (!(arg
= strtok(NULL
, delim
))) {
3310 if (!cfg
.test_regex
.compile(arg
, REG_EXTENDED
)) {
3311 // not a valid regular expression!
3312 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3313 configfile
, lineno
, name
, arg
, cfg
.test_regex
.get_errmsg());
3317 // Do a bit of sanity checking and warn user if we think that
3318 // their regexp is "strange". User probably confused about shell
3319 // glob(3) syntax versus regular expression syntax regexp(7).
3320 if (arg
[(val
= strspn(arg
, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3321 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3322 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3325 // send email to address that follows
3326 if (!(arg
= strtok(NULL
,delim
)))
3329 if (!cfg
.emailaddress
.empty())
3330 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3331 configfile
, lineno
, name
, cfg
.emailaddress
.c_str());
3332 cfg
.emailaddress
= arg
;
3336 // email warning options
3337 if (!(arg
= strtok(NULL
, delim
)))
3339 else if (!strcmp(arg
, "once"))
3341 else if (!strcmp(arg
, "daily"))
3343 else if (!strcmp(arg
, "diminishing"))
3345 else if (!strcmp(arg
, "test"))
3347 else if (!strcmp(arg
, "exec")) {
3348 // Get the next argument (the command line)
3349 if (!(arg
= strtok(NULL
, delim
))) {
3350 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3351 configfile
, lineno
, name
, token
);
3354 // Free the last cmd line given if any, and copy new one
3355 if (!cfg
.emailcmdline
.empty())
3356 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3357 configfile
, lineno
, name
, cfg
.emailcmdline
.c_str());
3358 cfg
.emailcmdline
= arg
;
3364 // ignore failure of usage attribute
3365 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3367 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGN_FAILUSE
);
3370 // ignore attribute for tracking purposes
3371 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3373 cfg
.monitor_attr_flags
.set(val
, MONITOR_IGNORE
);
3376 // print raw value when tracking
3377 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
3379 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
);
3380 if (*excl
== '!') // attribute change is critical
3381 cfg
.monitor_attr_flags
.set(val
, MONITOR_AS_CRIT
);
3384 // track changes in raw value (forces printing of raw value)
3385 if ((val
= GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255, excl
)) < 0)
3387 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_PRINT
|MONITOR_RAW
);
3388 if (*excl
== '!') // raw value change is critical
3389 cfg
.monitor_attr_flags
.set(val
, MONITOR_RAW_AS_CRIT
);
3392 // track Temperature
3393 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3394 &cfg
.tempdiff
, &cfg
.tempinfo
, &cfg
.tempcrit
))<0)
3398 // non-default vendor-specific attribute meaning
3399 if (!(arg
=strtok(NULL
,delim
))) {
3401 } else if (!parse_attribute_def(arg
, cfg
.attribute_defs
, PRIOR_USER
)) {
3406 // Define use of drive-specific presets.
3407 if (!(arg
= strtok(NULL
, delim
))) {
3409 } else if (!strcmp(arg
, "use")) {
3410 cfg
.ignorepresets
= false;
3411 } else if (!strcmp(arg
, "ignore")) {
3412 cfg
.ignorepresets
= true;
3413 } else if (!strcmp(arg
, "show")) {
3414 cfg
.showpresets
= true;
3415 } else if (!strcmp(arg
, "showall")) {
3422 // Directive not recognized
3423 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3424 configfile
, lineno
, name
, token
);
3429 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3430 configfile
, lineno
, name
, token
);
3433 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3434 configfile
, lineno
, name
, token
, arg
);
3436 if (missingarg
|| badarg
) {
3437 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3438 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3439 PrintOut(LOG_CRIT
, "\n");
3446 // Scan directive for configuration file
3447 #define SCANDIRECTIVE "DEVICESCAN"
3449 // This is the routine that adds things to the conf_entries list.
3451 // Return values are:
3452 // 1: parsed a normal line
3453 // 0: found comment or blank line
3454 // -1: found SCANDIRECTIVE line
3455 // -2: found an error
3457 // Note: this routine modifies *line from the caller!
3458 static int ParseConfigLine(dev_config_vector
& conf_entries
, int /*entry*/, int lineno
, /*const*/ char * line
)
3462 const char *delim
= " \n\t";
3465 // get first token: device name. If a comment, skip line
3466 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3470 // Have we detected the SCANDIRECTIVE directive?
3471 if (!strcmp(SCANDIRECTIVE
,name
)){
3475 // We've got a legit entry, make space to store it
3476 conf_entries
.push_back( dev_config() );
3477 dev_config
& cfg
= conf_entries
.back();
3481 // Store line number, and by default check for both device types.
3484 // parse tokens one at a time from the file.
3485 while ((token
=strtok(NULL
,delim
))){
3486 int retval
=ParseToken(token
,cfg
);
3495 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3501 // error found on the line
3506 // If NO monitoring directives are set, then set all of them.
3507 if (!(cfg
.smartcheck
|| cfg
.usagefailed
|| cfg
.prefail
||
3508 cfg
.usage
|| cfg
.selftest
|| cfg
.errorlog
||
3509 cfg
.tempdiff
|| cfg
.tempinfo
|| cfg
.tempcrit
)) {
3511 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3512 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
3514 cfg
.smartcheck
= true;
3515 cfg
.usagefailed
= true;
3518 cfg
.selftest
= true;
3519 cfg
.errorlog
= true;
3522 // additional sanity check. Has user set -M options without -m?
3523 if (cfg
.emailaddress
.empty() && (!cfg
.emailcmdline
.empty() || cfg
.emailfreq
|| cfg
.emailtest
)){
3524 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3525 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
3529 // has the user has set <nomailer>?
3530 if (cfg
.emailaddress
== "<nomailer>") {
3531 // check that -M exec is also set
3532 if (cfg
.emailcmdline
.empty()){
3533 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3534 cfg
.name
.c_str(), cfg
.lineno
, configfile
);
3537 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3538 cfg
.emailaddress
.clear();
3541 // set cfg.emailfreq to 1 (once) if user hasn't set it
3542 if ((!cfg
.emailaddress
.empty() || !cfg
.emailcmdline
.empty()) && !cfg
.emailfreq
)
3551 // Parses a configuration file. Return values are:
3552 // N=>0: found N entries
3553 // -1: syntax error in config file
3554 // -2: config file does not exist
3555 // -3: config file exists but cannot be read
3557 // In the case where the return value is 0, there are three
3559 // Empty configuration file ==> conf_entries.empty()
3560 // No configuration file ==> conf_entries[0].lineno == 0
3561 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
3562 static int ParseConfigFile(dev_config_vector
& conf_entries
)
3564 // maximum line length in configuration file
3565 const int MAXLINELEN
= 256;
3566 // maximum length of a continued line in configuration file
3567 const int MAXCONTLINE
= 1023;
3570 // Open config file, if it exists and is not <stdin>
3571 if (!(configfile
== configfile_stdin
)) { // pointer comparison ok here
3572 if (!f
.open(configfile
,"r") && (errno
!=ENOENT
|| !configfile_alt
.empty())) {
3573 // file exists but we can't read it or it should exist due to '-c' option
3574 int ret
= (errno
!=ENOENT
? -3 : -2);
3575 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3576 strerror(errno
),configfile
);
3580 else // read from stdin ('-c -' option)
3583 // No configuration file found -- use fake one
3586 char fakeconfig
[] = SCANDIRECTIVE
" -a"; // TODO: Remove this hack, build cfg_entry.
3588 if (ParseConfigLine(conf_entries
, entry
, 0, fakeconfig
) != -1)
3589 throw std::logic_error("Internal error parsing "SCANDIRECTIVE
);
3594 setmode(fileno(f
), O_TEXT
); // Allow files with \r\n
3597 // configuration file exists
3598 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3600 // parse config file line by line
3601 int lineno
= 1, cont
= 0, contlineno
= 0;
3602 char line
[MAXLINELEN
+2];
3603 char fullline
[MAXCONTLINE
+1];
3606 int len
=0,scandevice
;
3611 // make debugging simpler
3612 memset(line
,0,sizeof(line
));
3615 code
=fgets(line
, MAXLINELEN
+2, f
);
3617 // are we at the end of the file?
3620 scandevice
= ParseConfigLine(conf_entries
, entry
, contlineno
, fullline
);
3621 // See if we found a SCANDIRECTIVE directive
3624 // did we find a syntax error
3627 // the final line is part of a continuation line
3634 // input file line number
3637 // See if line is too long
3639 if (len
>MAXLINELEN
){
3641 if (line
[len
-1]=='\n')
3642 warn
="(including newline!) ";
3645 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3646 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3650 // Ignore anything after comment symbol
3651 if ((comment
=strchr(line
,'#'))){
3656 // is the total line (made of all continuation lines) too long?
3657 if (cont
+len
>MAXCONTLINE
){
3658 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3659 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3663 // copy string so far into fullline, and increment length
3664 strcpy(fullline
+cont
,line
);
3667 // is this a continuation line. If so, replace \ by space and look at next line
3668 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3669 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3673 // Not a continuation line. Parse it
3674 scandevice
= ParseConfigLine(conf_entries
, entry
, contlineno
, fullline
);
3676 // did we find a scandevice directive?
3679 // did we find a syntax error
3688 // note -- may be zero if syntax of file OK, but no valid entries!
3692 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3693 <LIST> is the list of valid arguments for option opt. */
3694 void PrintValidArgs(char opt
) {
3697 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3698 if (!(s
= GetValidArgList(opt
)))
3699 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3701 PrintOut(LOG_CRIT
, "%s", (char *)s
);
3702 PrintOut(LOG_CRIT
, " <=======\n");
3705 // Return true if absolute path name
3706 static bool is_abs_path(const char * path
)
3710 #if defined(_WIN32) || defined(__CYGWIN__)
3714 sscanf(path
, "%*1[A-Za-z]:%*1[/\\]%n", &n
);
3721 // Parses input line, prints usage message and
3722 // version/license/copyright messages
3723 void ParseOpts(int argc
, char **argv
){
3727 // Please update GetValidArgList() if you edit shortopts
3728 static const char shortopts
[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
3729 #ifdef HAVE_LIBCAP_NG
3734 // Please update GetValidArgList() if you edit longopts
3735 struct option longopts
[] = {
3736 { "configfile", required_argument
, 0, 'c' },
3737 { "logfacility", required_argument
, 0, 'l' },
3738 { "quit", required_argument
, 0, 'q' },
3739 { "debug", no_argument
, 0, 'd' },
3740 { "showdirectives", no_argument
, 0, 'D' },
3741 { "interval", required_argument
, 0, 'i' },
3743 { "no-fork", no_argument
, 0, 'n' },
3745 { "pidfile", required_argument
, 0, 'p' },
3746 { "report", required_argument
, 0, 'r' },
3747 { "savestates", required_argument
, 0, 's' },
3748 { "attributelog", required_argument
, 0, 'A' },
3749 { "drivedb", required_argument
, 0, 'B' },
3750 #if defined(_WIN32) || defined(__CYGWIN__)
3751 { "service", no_argument
, 0, 'n' },
3753 { "version", no_argument
, 0, 'V' },
3754 { "license", no_argument
, 0, 'V' },
3755 { "copyright", no_argument
, 0, 'V' },
3756 { "help", no_argument
, 0, 'h' },
3757 { "usage", no_argument
, 0, 'h' },
3758 #ifdef HAVE_LIBCAP_NG
3759 { "capabilities", no_argument
, 0, 'C' },
3765 bool badarg
= false;
3766 bool no_defaultdb
= false; // set true on '-B FILE'
3768 // Parse input options. This horrible construction is so that emacs
3769 // indents properly. Sorry.
3770 while (-1 != (optchar
=
3771 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3777 if (!(strcmp(optarg
,"nodev"))) {
3779 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3781 } else if (!(strcmp(optarg
,"never"))) {
3783 } else if (!(strcmp(optarg
,"onecheck"))) {
3786 } else if (!(strcmp(optarg
,"showtests"))) {
3789 } else if (!(strcmp(optarg
,"errors"))) {
3796 // set the log facility level
3797 if (!strcmp(optarg
, "daemon"))
3798 facility
=LOG_DAEMON
;
3799 else if (!strcmp(optarg
, "local0"))
3800 facility
=LOG_LOCAL0
;
3801 else if (!strcmp(optarg
, "local1"))
3802 facility
=LOG_LOCAL1
;
3803 else if (!strcmp(optarg
, "local2"))
3804 facility
=LOG_LOCAL2
;
3805 else if (!strcmp(optarg
, "local3"))
3806 facility
=LOG_LOCAL3
;
3807 else if (!strcmp(optarg
, "local4"))
3808 facility
=LOG_LOCAL4
;
3809 else if (!strcmp(optarg
, "local5"))
3810 facility
=LOG_LOCAL5
;
3811 else if (!strcmp(optarg
, "local6"))
3812 facility
=LOG_LOCAL6
;
3813 else if (!strcmp(optarg
, "local7"))
3814 facility
=LOG_LOCAL7
;
3819 // enable debug mode
3824 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3829 // print summary of all valid directives
3835 // Period (time interval) for checking
3836 // strtol will set errno in the event of overflow, so we'll check it.
3838 lchecktime
= strtol(optarg
, &tailptr
, 10);
3839 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3842 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3843 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3844 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3847 checktime
= (int)lchecktime
;
3850 // report IOCTL transactions
3855 // split_report_arg() may modify its first argument string, so use a
3856 // copy of optarg in case we want optarg for an error message.
3857 if (!(s
= strdup(optarg
))) {
3858 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3861 if (split_report_arg(s
, &i
)) {
3863 } else if (i
<1 || i
>3) {
3866 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3867 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3869 } else if (!strcmp(s
,"ioctl")) {
3870 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3871 } else if (!strcmp(s
,"ataioctl")) {
3872 con
->reportataioctl
= i
;
3873 } else if (!strcmp(s
,"scsiioctl")) {
3874 con
->reportscsiioctl
= i
;
3878 free(s
); // TODO: use std::string
3882 // alternate configuration file
3883 if (strcmp(optarg
,"-"))
3884 configfile
= (configfile_alt
= optarg
).c_str();
3885 else // read from stdin
3886 configfile
=configfile_stdin
;
3889 // output file with PID number
3893 // path prefix of persistent state file
3894 state_path_prefix
= optarg
;
3897 // path prefix of attribute log file
3898 attrlog_path_prefix
= optarg
;
3902 const char * path
= optarg
;
3903 if (*path
== '+' && path
[1])
3906 no_defaultdb
= true;
3907 unsigned char savedebug
= debugmode
; debugmode
= 1;
3908 if (!read_drive_database(path
))
3910 debugmode
= savedebug
;
3914 // print version and CVS info
3916 PrintOut(LOG_INFO
, "%s", format_version_info("smartd", true /*full*/).c_str());
3919 #ifdef HAVE_LIBCAP_NG
3921 // enable capabilities
3922 enable_capabilities
= true;
3926 // help: print summary of command-line options
3934 // unrecognized option
3937 // Point arg to the argument in which this option was found.
3938 arg
= argv
[optind
-1];
3939 // Check whether the option is a long option that doesn't map to -h.
3940 if (arg
[1] == '-' && optchar
!= 'h') {
3941 // Iff optopt holds a valid option then argument must be missing.
3942 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
3943 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
3944 PrintValidArgs(optopt
);
3946 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
3948 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
3952 // Iff optopt holds a valid option then argument must be missing.
3953 if (strchr(shortopts
, optopt
) != NULL
){
3954 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
3955 PrintValidArgs(optopt
);
3957 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
3959 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3966 // Check to see if option had an unrecognized or incorrect argument.
3970 // It would be nice to print the actual option name given by the user
3971 // here, but we just print the short form. Please fix this if you know
3972 // a clean way to do it.
3973 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
3974 PrintValidArgs(optchar
);
3975 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3980 // non-option arguments are not allowed
3981 if (argc
> optind
) {
3984 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
3985 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3989 // no pidfile in debug mode
3990 if (debugmode
&& !pid_file
.empty()) {
3993 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
3994 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
.c_str());
3998 // absolute path is required due to chdir('/') after fork().
3999 if (!state_path_prefix
.empty() && !debugmode
&& !is_abs_path(state_path_prefix
.c_str())) {
4002 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4003 PrintOut(LOG_CRIT
, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4004 state_path_prefix
.c_str());
4008 // absolute path is required due to chdir('/') after fork().
4009 if (!attrlog_path_prefix
.empty() && !debugmode
&& !is_abs_path(attrlog_path_prefix
.c_str())) {
4012 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4013 PrintOut(LOG_CRIT
, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4014 attrlog_path_prefix
.c_str());
4018 // Read or init drive database
4019 if (!no_defaultdb
) {
4020 unsigned char savedebug
= debugmode
; debugmode
= 1;
4021 if (!read_default_drive_databases())
4023 debugmode
= savedebug
;
4030 // Function we call if no configuration file was found or if the
4031 // SCANDIRECTIVE Directive was found. It makes entries for device
4032 // names returned by scan_smart_devices() in os_OSNAME.cpp
4033 static int MakeConfigEntries(const dev_config
& base_cfg
,
4034 dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
, const char * type
)
4036 // make list of devices
4037 smart_device_list devlist
;
4038 if (!smi()->scan_smart_devices(devlist
, (*type
? type
: 0)))
4039 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4041 // if no devices, or error constructing list, return
4042 if (devlist
.size() <= 0)
4045 // add empty device slots for existing config entries
4046 while (scanned_devs
.size() < conf_entries
.size())
4047 scanned_devs
.push_back((smart_device
*)0);
4049 // loop over entries to create
4050 for (unsigned i
= 0; i
< devlist
.size(); i
++) {
4051 // Move device pointer
4052 smart_device
* dev
= devlist
.release(i
);
4053 scanned_devs
.push_back(dev
);
4055 // Copy configuration, update device and type name
4056 conf_entries
.push_back(base_cfg
);
4057 dev_config
& cfg
= conf_entries
.back();
4058 cfg
.name
= dev
->get_info().info_name
;
4059 cfg
.dev_type
= type
;
4062 return devlist
.size();
4065 static void CanNotRegister(const char *name
, const char *type
, int line
, bool scandirective
)
4067 if (!debugmode
&& scandirective
)
4070 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4071 "Unable to register %s device %s at line %d of file %s\n",
4072 type
, name
, line
, configfile
);
4074 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4079 // Returns negative value (see ParseConfigFile()) if config file
4080 // had errors, else number of entries which may be zero or positive.
4081 static int ReadOrMakeConfigEntries(dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
)
4083 // parse configuration file configfile (normally /etc/smartd.conf)
4084 int entries
= ParseConfigFile(conf_entries
);
4087 // There was an error reading the configuration file.
4088 conf_entries
.clear();
4090 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4094 // no error parsing config file.
4096 // we did not find a SCANDIRECTIVE and did find valid entries
4097 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4099 else if (!conf_entries
.empty()) {
4100 // we found a SCANDIRECTIVE or there was no configuration file so
4101 // scan. Configuration file's last entry contains all options
4103 dev_config first
= conf_entries
.back();
4104 conf_entries
.pop_back();
4107 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4109 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4111 // make config list of devices to search for
4112 MakeConfigEntries(first
, conf_entries
, scanned_devs
, first
.dev_type
.c_str());
4114 // warn user if scan table found no devices
4115 if (conf_entries
.empty())
4116 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4119 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4121 return conf_entries
.size();
4125 // This function tries devices from conf_entries. Each one that can be
4126 // registered is moved onto the [ata|scsi]devices lists and removed
4127 // from the conf_entries list.
4128 static void RegisterDevices(const dev_config_vector
& conf_entries
, smart_device_list
& scanned_devs
,
4129 dev_config_vector
& configs
, dev_state_vector
& states
, smart_device_list
& devices
)
4131 // start by clearing lists/memory of ALL existing devices
4137 for (unsigned i
= 0; i
< conf_entries
.size(); i
++){
4139 dev_config cfg
= conf_entries
[i
];
4141 // get device of appropriate type
4142 smart_device_auto_ptr dev
;
4143 bool scanning
= false;
4145 // Device may already be detected during devicescan
4146 if (i
< scanned_devs
.size()) {
4147 dev
= scanned_devs
.release(i
);
4153 dev
= smi()->get_smart_device(cfg
.name
.c_str(), cfg
.dev_type
.c_str());
4155 if (cfg
.dev_type
.empty())
4156 PrintOut(LOG_INFO
,"Device: %s, unable to autodetect device type\n", cfg
.name
.c_str());
4158 PrintOut(LOG_INFO
,"Device: %s, unsupported device type '%s'\n", cfg
.name
.c_str(), cfg
.dev_type
.c_str());
4164 smart_device::device_info oldinfo
= dev
->get_info();
4166 // Open with autodetect support, may return 'better' device
4167 dev
.replace( dev
->autodetect_open() );
4169 // Report if type has changed
4170 if (oldinfo
.dev_type
!= dev
->get_dev_type())
4171 PrintOut(LOG_INFO
,"Device: %s, type changed from '%s' to '%s'\n",
4172 cfg
.name
.c_str(), oldinfo
.dev_type
.c_str(), dev
->get_dev_type());
4174 if (!dev
->is_open()) {
4175 // For linux+devfs, a nonexistent device gives a strange error
4176 // message. This makes the error message a bit more sensible.
4177 // If no debug and scanning - don't print errors
4178 if (debugmode
|| !scanning
)
4179 PrintOut(LOG_INFO
, "Device: %s, open() failed: %s\n", dev
->get_info_name(), dev
->get_errmsg());
4183 // Update informal name
4184 cfg
.name
= dev
->get_info().info_name
;
4185 PrintOut(LOG_INFO
, "Device: %s, opened\n", cfg
.name
.c_str());
4187 // Prepare initial state
4190 // register ATA devices
4192 if (ATADeviceScan(cfg
, state
, dev
->to_ata())) {
4193 CanNotRegister(cfg
.name
.c_str(), "ATA", cfg
.lineno
, scanning
);
4197 // or register SCSI devices
4198 else if (dev
->is_scsi()){
4199 if (SCSIDeviceScan(cfg
, state
, dev
->to_scsi())) {
4200 CanNotRegister(cfg
.name
.c_str(), "SCSI", cfg
.lineno
, scanning
);
4205 PrintOut(LOG_INFO
, "Device: %s, neither ATA nor SCSI device\n", cfg
.name
.c_str());
4210 // move onto the list of devices
4211 configs
.push_back(cfg
);
4212 states
.push_back(state
);
4213 devices
.push_back(dev
);
4215 // if device is explictly listed and we can't register it, then
4216 // exit unless the user has specified that the device is removable
4217 else if (!scanning
) {
4218 if (cfg
.removable
|| quit
==2)
4219 PrintOut(LOG_INFO
, "Device %s not available\n", cfg
.name
.c_str());
4221 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg
.name
.c_str());
4229 // Main program without exception handling
4230 int main_worker(int argc
, char **argv
)
4232 // Initialize interface
4233 smart_interface::init();
4237 // external control variables for ATA disks
4238 smartmonctrl control
;
4240 // is it our first pass through?
4241 bool firstpass
= true;
4243 // next time to wake up
4246 // for simplicity, null all global communications variables/lists
4248 memset(con
, 0,sizeof(control
));
4250 // parse input and print header and usage info if needed
4251 ParseOpts(argc
,argv
);
4253 // do we mute printing from ataprint commands?
4254 con
->printing_switchable
= false;
4255 con
->dont_print
= !debugmode
;
4257 // Configuration for each device
4258 dev_config_vector configs
;
4260 dev_state_vector states
;
4261 // Devices to monitor
4262 smart_device_list devices
;
4264 bool write_states_always
= true;
4266 #ifdef HAVE_LIBCAP_NG
4267 // Drop capabilities
4268 if (enable_capabilities
) {
4269 capng_clear(CAPNG_SELECT_BOTH
);
4270 capng_updatev(CAPNG_ADD
, (capng_type_t
)(CAPNG_EFFECTIVE
|CAPNG_PERMITTED
),
4271 CAP_SYS_ADMIN
, CAP_MKNOD
, CAP_SYS_RAWIO
, -1);
4272 capng_apply(CAPNG_SELECT_BOTH
);
4276 // the main loop of the code
4279 // are we exiting from a signal?
4280 if (caughtsigEXIT
) {
4281 // are we exiting with SIGTERM?
4282 int isterm
=(caughtsigEXIT
==SIGTERM
);
4283 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4284 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4286 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4287 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4292 // Write state files
4293 if (!state_path_prefix
.empty())
4294 write_all_dev_states(configs
, states
);
4299 // Should we (re)read the config file?
4300 if (firstpass
|| caughtsigHUP
){
4303 // Workaround for missing SIGQUIT via keyboard on Cygwin
4304 if (caughtsigHUP
==2) {
4305 // Simulate SIGQUIT if another SIGINT arrives soon
4308 if (caughtsigHUP
==2) {
4309 caughtsigEXIT
=SIGQUIT
;
4315 // Write state files
4316 if (!state_path_prefix
.empty())
4317 write_all_dev_states(configs
, states
);
4321 "Signal HUP - rereading configuration file %s\n":
4322 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4327 dev_config_vector conf_entries
; // Entries read from smartd.conf
4328 smart_device_list scanned_devs
; // Devices found during scan
4329 // (re)reads config file, makes >=0 entries
4330 int entries
= ReadOrMakeConfigEntries(conf_entries
, scanned_devs
);
4333 // checks devices, then moves onto ata/scsi list or deallocates.
4334 RegisterDevices(conf_entries
, scanned_devs
, configs
, states
, devices
);
4335 if (!(configs
.size() == devices
.size() && configs
.size() == states
.size()))
4336 throw std::logic_error("Invalid result from RegisterDevices");
4338 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4339 // user has asked to continue on error in configuration file
4341 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4344 // exit with configuration file error status
4345 return (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4349 // Log number of devices we are monitoring...
4350 if (devices
.size() > 0 || quit
==2 || (quit
==1 && !firstpass
)) {
4352 for (unsigned i
= 0; i
< devices
.size(); i
++) {
4353 if (devices
.at(i
)->is_ata())
4356 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4357 numata
, devices
.size() - numata
);
4360 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4365 // user has asked to print test schedule
4366 PrintTestSchedule(configs
, states
, devices
);
4370 #ifdef HAVE_LIBCAP_NG
4371 if (enable_capabilities
) {
4372 for (unsigned i
= 0; i
< configs
.size(); i
++) {
4373 if (!configs
[i
].emailaddress
.empty() || !configs
[i
].emailcmdline
.empty()) {
4374 PrintOut(LOG_WARNING
, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4384 // Always write state files after (re)configuration
4385 write_states_always
= true;
4388 // check all devices once,
4389 // self tests are not started in first pass unless '-q onecheck' is specified
4390 CheckDevicesOnce(configs
, states
, devices
, (!firstpass
|| quit
==3));
4392 // Write state files
4393 if (!state_path_prefix
.empty())
4394 write_all_dev_states(configs
, states
, write_states_always
);
4395 write_states_always
= false;
4397 // Write attribute logs
4398 if (!attrlog_path_prefix
.empty())
4399 write_all_dev_attrlogs(configs
, states
);
4401 // user has asked us to exit after first check
4403 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4404 "smartd is exiting (exit status 0)\n");
4408 // fork into background if needed
4409 if (firstpass
&& !debugmode
) {
4413 // set exit and signal handlers, write PID file, set wake-up time
4415 Initialize(&wakeuptime
);
4419 // sleep until next check time, or a signal arrives
4420 wakeuptime
= dosleep(wakeuptime
, write_states_always
);
4427 int main(int argc
, char **argv
)
4429 // Windows: internal main function started direct or by service control manager
4430 static int smartd_main(int argc
, char **argv
)
4435 // Do the real work ...
4436 status
= main_worker(argc
, argv
);
4439 // EXIT(status) arrives here
4442 catch (const std::bad_alloc
& /*ex*/) {
4443 // Memory allocation failed (also thrown by std::operator new)
4444 PrintOut(LOG_CRIT
, "Smartd: Out of memory\n");
4445 status
= EXIT_NOMEM
;
4447 catch (const std::exception
& ex
) {
4448 // Other fatal errors
4449 PrintOut(LOG_CRIT
, "Smartd: Exception: %s\n", ex
.what());
4450 status
= EXIT_BADCODE
;
4454 status
= Goodbye(status
);
4457 daemon_winsvc_exitcode
= status
;
4464 // Main function for Windows
4465 int main(int argc
, char **argv
){
4466 // Options for smartd windows service
4467 static const daemon_winsvc_options svc_opts
= {
4468 "--service", // cmd_opt
4469 "smartd", "SmartD Service", // servicename, displayname
4471 "Controls and monitors storage devices using the Self-Monitoring, "
4472 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4473 "built into ATA and SCSI Hard Drives. "
4476 // daemon_main() handles daemon and service specific commands
4477 // and starts smartd_main() direct, from a new process,
4478 // or via service control manager
4479 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);