]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Imported Upstream version 5.38+svn2920
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-9 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-9 Christian Franke <smartmontools-support@lists.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #ifndef _GNU_SOURCE
25 // TODO: Why is this define necessary?
26 #define _GNU_SOURCE
27 #endif
28
29 // unconditionally included files
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
33 #include <signal.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <time.h>
41 #include <limits.h>
42 #include <getopt.h>
43
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 #include <algorithm> // std::replace()
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifndef _WIN32
54 #include <sys/wait.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #ifdef HAVE_NETDB_H
60 #include <netdb.h>
61 #endif
62
63 #ifdef _WIN32
64 #ifdef _MSC_VER
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t;
67 typedef int pid_t;
68 #endif
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
71 #endif // _WIN32
72
73 #ifdef __CYGWIN__
74 // From <windows.h>:
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall FreeConsole(void);
77 #include <io.h> // setmode()
78 #endif // __CYGWIN__
79
80 // locally included files
81 #include "int64.h"
82 #include "atacmds.h"
83 #include "dev_interface.h"
84 #include "extern.h"
85 #include "knowndrives.h"
86 #include "scsicmds.h"
87 #include "utility.h"
88
89 // This is for solaris, where signal() resets the handler to SIG_DFL
90 // after the first signal is caught.
91 #ifdef HAVE_SIGSET
92 #define SIGNALFN sigset
93 #else
94 #define SIGNALFN signal
95 #endif
96
97 #ifdef _WIN32
98 #include "hostname_win32.h" // gethost/domainname()
99 #define HAVE_GETHOSTNAME 1
100 #define HAVE_GETDOMAINNAME 1
101 // fork()/signal()/initd simulation for native Windows
102 #include "daemon_win32.h" // daemon_main/detach/signal()
103 #undef SIGNALFN
104 #define SIGNALFN daemon_signal
105 #define strsignal daemon_strsignal
106 #define sleep daemon_sleep
107 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
108 #define SIGQUIT SIGBREAK
109 #define SIGQUIT_KEYNAME "CONTROL-Break"
110 #else // _WIN32
111 #ifdef __CYGWIN__
112 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
113 #define SIGQUIT_KEYNAME "2x CONTROL-C"
114 #else // __CYGWIN__
115 #define SIGQUIT_KEYNAME "CONTROL-\\"
116 #endif // __CYGWIN__
117 #endif // _WIN32
118
119 #if defined (__SVR4) && defined (__sun)
120 extern "C" int getdomainname(char *, int); // no declaration in header files!
121 #endif
122
123 #define ARGUSED(x) ((void)(x))
124
125 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 2915 2009-09-18 21:17:37Z chrfranke $"
126 CONFIG_H_CVSID EXTERN_H_CVSID;
127
128 extern const char *reportbug;
129
130 extern unsigned char debugmode;
131
132 // smartd exit codes
133 #define EXIT_BADCMD 1 // command line did not parse
134 #define EXIT_BADCONF 2 // syntax error in config file
135 #define EXIT_STARTUP 3 // problem forking daemon
136 #define EXIT_PID 4 // problem creating pid file
137 #define EXIT_NOCONF 5 // config file does not exist
138 #define EXIT_READCONF 6 // config file exists but cannot be read
139
140 #define EXIT_NOMEM 8 // out of memory
141 #define EXIT_BADCODE 10 // internal error - should NEVER happen
142
143 #define EXIT_BADDEV 16 // we can't monitor this device
144 #define EXIT_NODEV 17 // no devices to monitor
145
146 #define EXIT_SIGNAL 254 // abort on signal
147
148 // command-line: how long to sleep between checks
149 #define CHECKTIME 1800
150 static int checktime=CHECKTIME;
151
152 // command-line: name of PID file (empty for no pid file)
153 static std::string pid_file;
154
155 // command-line: path prefix of persistent state file, empty if no persistence.
156 static std::string state_path_prefix
157 #ifdef SMARTMONTOOLS_SAVESTATES
158 = SMARTMONTOOLS_SAVESTATES
159 #endif
160 ;
161
162 // command-line: path prefix of attribute log file, empty if no logs.
163 static std::string attrlog_path_prefix
164 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
165 = SMARTMONTOOLS_ATTRIBUTELOG
166 #endif
167 ;
168
169 // configuration file name
170 #define CONFIGFILENAME "smartd.conf"
171
172 #ifndef _WIN32
173 static const char *configfile = SMARTMONTOOLS_SYSCONFDIR "/" CONFIGFILENAME ;
174 #else
175 static const char *configfile = "./" CONFIGFILENAME ;
176 #endif
177 // configuration file "name" if read from stdin
178 static const char * const configfile_stdin = "<stdin>";
179 // path of alternate configuration file
180 static std::string configfile_alt;
181
182 // command-line: when should we exit?
183 static int quit=0;
184
185 // command-line; this is the default syslog(3) log facility to use.
186 static int facility=LOG_DAEMON;
187
188 #ifndef _WIN32
189 // command-line: fork into background?
190 static bool do_fork=true;
191 #endif
192
193 // used for control of printing, passing arguments to atacmds.c
194 smartmonctrl *con=NULL;
195
196 // set to one if we catch a USR1 (check devices now)
197 volatile int caughtsigUSR1=0;
198
199 #ifdef _WIN32
200 // set to one if we catch a USR2 (toggle debug mode)
201 volatile int caughtsigUSR2=0;
202 #endif
203
204 // set to one if we catch a HUP (reload config file). In debug mode,
205 // set to two, if we catch INT (also reload config file).
206 volatile int caughtsigHUP=0;
207
208 // set to signal value if we catch INT, QUIT, or TERM
209 volatile int caughtsigEXIT=0;
210
211 // Attribute monitoring flags.
212 // See monitor_attr_flags below.
213 enum {
214 MONITOR_IGN_FAILUSE = 0x01,
215 MONITOR_IGNORE = 0x02,
216 MONITOR_RAW_PRINT = 0x04,
217 MONITOR_RAW = 0x08,
218 MONITOR_AS_CRIT = 0x10,
219 MONITOR_RAW_AS_CRIT = 0x20,
220 };
221
222 // Array of flags for each attribute.
223 class attribute_flags
224 {
225 public:
226 attribute_flags()
227 { memset(m_flags, 0, sizeof(m_flags)); }
228
229 bool is_set(int id, unsigned char flag) const
230 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
231
232 void set(int id, unsigned char flags)
233 {
234 if (0 < id && id < (int)sizeof(m_flags))
235 m_flags[id] |= flags;
236 }
237
238 private:
239 unsigned char m_flags[256];
240 };
241
242
243 /// Configuration data for a device. Read from smartd.conf.
244 /// Supports copy & assignment and is compatible with STL containers.
245 struct dev_config
246 {
247 int lineno; // Line number of entry in file
248 std::string name; // Device name
249 std::string dev_type; // Device type argument from -d directive, empty if none
250 std::string state_file; // Path of the persistent state file, empty if none
251 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
252 bool smartcheck; // Check SMART status
253 bool usagefailed; // Check for failed Usage Attributes
254 bool prefail; // Track changes in Prefail Attributes
255 bool usage; // Track changes in Usage Attributes
256 bool selftest; // Monitor number of selftest errors
257 bool errorlog; // Monitor number of ATA errors
258 bool permissive; // Ignore failed SMART commands
259 char autosave; // 1=disable, 2=enable Autosave Attributes
260 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
261 unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
262 bool ignorepresets; // Ignore database of -v options
263 bool showpresets; // Show database entry for this device
264 bool removable; // Device may disappear (not be present)
265 char powermode; // skip check, if disk in idle or standby mode
266 bool powerquiet; // skip powermode 'skipping checks' message
267 int powerskipmax; // how many times can be check skipped
268 unsigned char tempdiff; // Track Temperature changes >= this limit
269 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
270 regular_expression test_regex; // Regex for scheduled testing
271
272 // Configuration of email warning messages
273 std::string emailcmdline; // script to execute, empty if no messages
274 std::string emailaddress; // email address, or empty
275 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
276 bool emailtest; // Send test email?
277
278 // ATA ONLY
279 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
280 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
281 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
282 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
283
284 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
285
286 // TODO: Encapsulate, add get/set functions
287 unsigned char attributedefs[256]; // -v options, see end of extern.h for def
288
289 dev_config();
290 };
291
292 dev_config::dev_config()
293 : lineno(0),
294 smartcheck(false),
295 usagefailed(false),
296 prefail(false),
297 usage(false),
298 selftest(false),
299 errorlog(false),
300 permissive(false),
301 autosave(0),
302 autoofflinetest(0),
303 fix_firmwarebug(FIX_NOTSPECIFIED),
304 ignorepresets(false),
305 showpresets(false),
306 removable(false),
307 powermode(0),
308 powerquiet(false),
309 powerskipmax(0),
310 tempdiff(0),
311 tempinfo(0), tempcrit(0),
312 emailfreq(0),
313 emailtest(false),
314 curr_pending_id(0), offl_pending_id(0),
315 curr_pending_incr(false), offl_pending_incr(false),
316 curr_pending_set(false), offl_pending_set(false)
317 {
318 memset(attributedefs, 0, sizeof(attributedefs));
319 }
320
321
322 // Number of allowed mail message types
323 const int SMARTD_NMAIL = 13;
324 // Type for '-M test' mails (state not persistent)
325 const int MAILTYPE_TEST = 0;
326 // TODO: Add const or enum for all mail types.
327
328 struct mailinfo {
329 int logged;// number of times an email has been sent
330 time_t firstsent;// time first email was sent, as defined by time(2)
331 time_t lastsent; // time last email was sent, as defined by time(2)
332
333 mailinfo()
334 : logged(0), firstsent(0), lastsent(0) { }
335 };
336
337 /// Persistent state data for a device.
338 struct persistent_dev_state
339 {
340 unsigned char tempmin, tempmax; // Min/Max Temperatures
341
342 unsigned char selflogcount; // total number of self-test errors
343 unsigned short selfloghour; // lifetime hours of last self-test error
344
345 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
346
347 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
348
349 // ATA ONLY
350 int ataerrorcount; // Total number of ATA errors
351
352 // Persistent part of ata_smart_values:
353 struct ata_attribute {
354 unsigned char id;
355 unsigned char val;
356 uint64_t raw;
357
358 ata_attribute() : id(0), val(0), raw(0) { }
359 };
360 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
361
362 persistent_dev_state();
363 };
364
365 persistent_dev_state::persistent_dev_state()
366 : tempmin(0), tempmax(0),
367 selflogcount(0),
368 selfloghour(0),
369 scheduled_test_next_check(0),
370 ataerrorcount(0)
371 {
372 }
373
374 /// Non-persistent state data for a device.
375 struct temp_dev_state
376 {
377 bool must_write; // true if persistent part should be written
378
379 bool not_cap_offline; // true == not capable of offline testing
380 bool not_cap_conveyance;
381 bool not_cap_short;
382 bool not_cap_long;
383 bool not_cap_selective;
384
385 unsigned char temperature; // last recorded Temperature (in Celsius)
386 time_t tempmin_delay; // time where Min Temperature tracking will start
387
388 bool powermodefail; // true if power mode check failed
389 int powerskipcnt; // Number of checks skipped due to idle or standby mode
390
391 // SCSI ONLY
392 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
393 unsigned char TempPageSupported; // has log sense temperature page (0xd)
394 unsigned char SuppressReport; // minimize nuisance reports
395 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
396 // know yet) 6 or 10
397
398 // ATA ONLY
399 uint64_t num_sectors; // Number of sectors (for selective self-test only)
400 ata_smart_values smartval; // SMART data
401 ata_smart_thresholds_pvt smartthres; // SMART thresholds
402
403 temp_dev_state();
404 };
405
406 temp_dev_state::temp_dev_state()
407 : must_write(false),
408 not_cap_offline(false),
409 not_cap_conveyance(false),
410 not_cap_short(false),
411 not_cap_long(false),
412 not_cap_selective(false),
413 temperature(0),
414 tempmin_delay(0),
415 powermodefail(false),
416 powerskipcnt(0),
417 SmartPageSupported(false),
418 TempPageSupported(false),
419 SuppressReport(false),
420 modese_len(0),
421 num_sectors(0)
422 {
423 memset(&smartval, 0, sizeof(smartval));
424 memset(&smartthres, 0, sizeof(smartthres));
425 }
426
427 /// Runtime state data for a device.
428 struct dev_state
429 : public persistent_dev_state,
430 public temp_dev_state
431 {
432 void update_persistent_state();
433 void update_temp_state();
434 };
435
436 /// Container for configuration info for each device.
437 typedef std::vector<dev_config> dev_config_vector;
438
439 /// Container for state info for each device.
440 typedef std::vector<dev_state> dev_state_vector;
441
442 // Copy ATA attributes to persistent state.
443 void dev_state::update_persistent_state()
444 {
445 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
446 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
447 ata_attribute & pa = ata_attributes[i];
448 pa.id = ta.id;
449 if (ta.id == 0) {
450 pa.val = 0; pa.raw = 0;
451 continue;
452 }
453 pa.val = ta.current;
454 pa.raw = ta.raw[0]
455 | ( ta.raw[1] << 8)
456 | ( ta.raw[2] << 16)
457 | ((uint64_t)ta.raw[3] << 24)
458 | ((uint64_t)ta.raw[4] << 32)
459 | ((uint64_t)ta.raw[5] << 40);
460 }
461 }
462
463 // Copy ATA from persistent to temp state.
464 void dev_state::update_temp_state()
465 {
466 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
467 const ata_attribute & pa = ata_attributes[i];
468 ata_smart_attribute & ta = smartval.vendor_attributes[i];
469 ta.id = pa.id;
470 if (pa.id == 0) {
471 ta.current = 0; memset(ta.raw, 0, sizeof(ta.raw));
472 continue;
473 }
474 ta.current = pa.val;
475 ta.raw[0] = (unsigned char) pa.raw;
476 ta.raw[1] = (unsigned char)(pa.raw >> 8);
477 ta.raw[2] = (unsigned char)(pa.raw >> 16);
478 ta.raw[3] = (unsigned char)(pa.raw >> 24);
479 ta.raw[4] = (unsigned char)(pa.raw >> 32);
480 ta.raw[5] = (unsigned char)(pa.raw >> 40);
481 }
482 }
483
484 // Parse a line from a state file.
485 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
486 {
487 static regular_expression regex(
488 "^ *"
489 "((temperature-min)" // (1 (2)
490 "|(temperature-max)" // (3)
491 "|(self-test-errors)" // (4)
492 "|(self-test-last-err-hour)" // (5)
493 "|(scheduled-test-next-check)" // (6)
494 "|(ata-error-count)" // (7)
495 "|(mail\\.([0-9]+)\\." // (8 (9)
496 "((count)" // (10 (11)
497 "|(first-sent-time)" // (12)
498 "|(last-sent-time)" // (13)
499 ")" // 14)
500 ")" // 8)
501 "|(ata-smart-attribute\\.([0-9]+)\\." // (14 (15)
502 "((id)" // (16)
503 "|(val)" // (17)
504 "|(raw)" // (18)
505 ")" // 19)
506 ")" // 14)
507 ")" // 1)
508 " *= *([0-9]+)[ \n]*$", // (20)
509 REG_EXTENDED
510 );
511 if (regex.empty())
512 throw std::logic_error("parse_dev_state_line: invalid regex");
513
514 const int nmatch = 1+20;
515 regmatch_t match[nmatch];
516 if (!regex.execute(line, nmatch, match))
517 return false;
518 if (match[nmatch-1].rm_so < 0)
519 return false;
520
521 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
522
523 int m = 1;
524 if (match[++m].rm_so >= 0)
525 state.tempmin = (unsigned char)val;
526 else if (match[++m].rm_so >= 0)
527 state.tempmax = (unsigned char)val;
528 else if (match[++m].rm_so >= 0)
529 state.selflogcount = (unsigned char)val;
530 else if (match[++m].rm_so >= 0)
531 state.selfloghour = (unsigned short)val;
532 else if (match[++m].rm_so >= 0)
533 state.scheduled_test_next_check = (time_t)val;
534 else if (match[++m].rm_so >= 0)
535 state.ataerrorcount = (int)val;
536 else if (match[m+=2].rm_so >= 0) {
537 int i = atoi(line+match[m].rm_so);
538 if (!(0 <= i && i < SMARTD_NMAIL))
539 return false;
540 if (i == MAILTYPE_TEST) // Don't suppress test mails
541 return true;
542 if (match[m+=2].rm_so >= 0)
543 state.maillog[i].logged = (int)val;
544 else if (match[++m].rm_so >= 0)
545 state.maillog[i].firstsent = (time_t)val;
546 else if (match[++m].rm_so >= 0)
547 state.maillog[i].lastsent = (time_t)val;
548 else
549 return false;
550 }
551 else if (match[m+=5+1].rm_so >= 0) {
552 int i = atoi(line+match[m].rm_so);
553 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
554 return false;
555 if (match[m+=2].rm_so >= 0)
556 state.ata_attributes[i].id = (unsigned char)val;
557 else if (match[++m].rm_so >= 0)
558 state.ata_attributes[i].val = (unsigned char)val;
559 else if (match[++m].rm_so >= 0)
560 state.ata_attributes[i].raw = val;
561 else
562 return false;
563 }
564 else
565 return false;
566 return true;
567 }
568
569 // Read a state file.
570 static bool read_dev_state(const char * path, persistent_dev_state & state)
571 {
572 stdio_file f(path, "r");
573 if (!f) {
574 if (errno != ENOENT)
575 pout("Cannot read state file \"%s\"\n", path);
576 return false;
577 }
578 #ifdef __CYGWIN__
579 setmode(fileno(f), O_TEXT); // Allow files with \r\n
580 #endif
581
582 int good = 0, bad = 0;
583 char line[256];
584 while (fgets(line, sizeof(line), f)) {
585 const char * s = line + strspn(line, " \t");
586 if (!*s || *s == '#')
587 continue;
588 if (!parse_dev_state_line(line, state))
589 bad++;
590 else
591 good++;
592 }
593
594 if (bad) {
595 if (!good) {
596 pout("%s: format error\n", path);
597 return false;
598 }
599 pout("%s: %d invalid line(s) ignored\n", path, bad);
600 }
601 return true;
602 }
603
604 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
605 {
606 if (val)
607 fprintf(f, "%s = %"PRIu64"\n", name, val);
608 }
609
610 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
611 {
612 if (val)
613 fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
614 }
615
616 // Write a state file
617 static bool write_dev_state(const char * path, const persistent_dev_state & state)
618 {
619 // Rename old "file" to "file~"
620 std::string pathbak = path; pathbak += '~';
621 unlink(pathbak.c_str());
622 rename(path, pathbak.c_str());
623
624 stdio_file f(path, "w");
625 if (!f) {
626 pout("Cannot create state file \"%s\"\n", path);
627 return false;
628 }
629
630 fprintf(f, "# smartd state file\n");
631 write_dev_state_line(f, "temperature-min", state.tempmin);
632 write_dev_state_line(f, "temperature-max", state.tempmax);
633 write_dev_state_line(f, "self-test-errors", state.selflogcount);
634 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
635 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
636
637 int i;
638 for (i = 0; i < SMARTD_NMAIL; i++) {
639 if (i == MAILTYPE_TEST) // Don't suppress test mails
640 continue;
641 const mailinfo & mi = state.maillog[i];
642 if (!mi.logged)
643 continue;
644 write_dev_state_line(f, "mail", i, "count", mi.logged);
645 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
646 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
647 }
648
649 // ATA ONLY
650 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
651
652 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
653 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
654 if (!pa.id)
655 continue;
656 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
657 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
658 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
659 }
660
661 return true;
662 }
663
664 // Write to the attrlog file
665 static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
666 {
667 stdio_file f(path, "a");
668 if (!f) {
669 pout("Cannot create attribute log file \"%s\"\n", path);
670 return false;
671 }
672
673 // ATA ONLY
674 time_t now = time(0);
675 struct tm * tms = gmtime(&now);
676 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
677 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
678 tms->tm_hour, tms->tm_min, tms->tm_sec);
679 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
680 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
681 if (!pa.id)
682 continue;
683 fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
684 }
685 fprintf(f, "\n");
686
687 return true;
688 }
689
690 // Write all state files. If write_always is false, don't write
691 // unless must_write is set.
692 static void write_all_dev_states(const dev_config_vector & configs,
693 dev_state_vector & states,
694 bool write_always = true)
695 {
696 for (unsigned i = 0; i < states.size(); i++) {
697 const dev_config & cfg = configs.at(i);
698 if (cfg.state_file.empty())
699 continue;
700 dev_state & state = states[i];
701 if (!write_always && !state.must_write)
702 continue;
703 if (!write_dev_state(cfg.state_file.c_str(), state))
704 continue;
705 state.must_write = false;
706 if (write_always || debugmode)
707 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
708 cfg.name.c_str(), cfg.state_file.c_str());
709 }
710 }
711
712 // Write to all attrlog files
713 static void write_all_dev_attrlogs(const dev_config_vector & configs,
714 dev_state_vector & states)
715 {
716 for (unsigned i = 0; i < states.size(); i++) {
717 const dev_config & cfg = configs.at(i);
718 if (cfg.attrlog_file.empty())
719 continue;
720 dev_state & state = states[i];
721 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
722 }
723 }
724
725 // remove the PID file
726 void RemovePidFile(){
727 if (!pid_file.empty()) {
728 if (unlink(pid_file.c_str()))
729 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
730 pid_file.c_str(), strerror(errno));
731 pid_file.clear();
732 }
733 return;
734 }
735
736 extern "C" { // signal handlers require C-linkage
737
738 // Note if we catch a SIGUSR1
739 void USR1handler(int sig){
740 if (SIGUSR1==sig)
741 caughtsigUSR1=1;
742 return;
743 }
744
745 #ifdef _WIN32
746 // Note if we catch a SIGUSR2
747 void USR2handler(int sig){
748 if (SIGUSR2==sig)
749 caughtsigUSR2=1;
750 return;
751 }
752 #endif
753
754 // Note if we catch a HUP (or INT in debug mode)
755 void HUPhandler(int sig){
756 if (sig==SIGHUP)
757 caughtsigHUP=1;
758 else
759 caughtsigHUP=2;
760 return;
761 }
762
763 // signal handler for TERM, QUIT, and INT (if not in debug mode)
764 void sighandler(int sig){
765 if (!caughtsigEXIT)
766 caughtsigEXIT=sig;
767 return;
768 }
769
770 } // extern "C"
771
772 // Cleanup, print Goodbye message and remove pidfile
773 static int Goodbye(int status)
774 {
775 // delete PID file, if one was created
776 RemovePidFile();
777
778 // if we are exiting because of a code bug, tell user
779 if (status==EXIT_BADCODE)
780 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
781
782 // and this should be the final output from smartd before it exits
783 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
784
785 return status;
786 }
787
788 #define ENVLENGTH 1024
789
790 // a replacement for setenv() which is not available on all platforms.
791 // Note that the string passed to putenv must not be freed or made
792 // invalid, since a pointer to it is kept by putenv(). This means that
793 // it must either be a static buffer or allocated off the heap. The
794 // string can be freed if the environment variable is redefined or
795 // deleted via another call to putenv(). So we keep these on the stack
796 // as long as the popen() call is underway.
797 int exportenv(char* stackspace, const char *name, const char *value){
798 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
799 return putenv(stackspace);
800 }
801
802 char* dnsdomain(const char* hostname) {
803 char *p = NULL;
804 #ifdef HAVE_GETADDRINFO
805 static char canon_name[NI_MAXHOST];
806 struct addrinfo *info = NULL;
807 struct addrinfo hints;
808 int err;
809
810 memset(&hints, 0, sizeof(hints));
811 hints.ai_flags = AI_CANONNAME;
812 if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
813 PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
814 return NULL;
815 }
816 if (info->ai_canonname) {
817 strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
818 canon_name[NI_MAXHOST - 1] = '\0';
819 p = canon_name;
820 if ((p = strchr(canon_name, '.')))
821 p++;
822 }
823 freeaddrinfo(info);
824 #elif HAVE_GETHOSTBYNAME
825 struct hostent *hp;
826 if ((hp = gethostbyname(hostname))) {
827 // Does this work if gethostbyname() returns an IPv6 name in
828 // colon/dot notation? [BA]
829 if ((p = strchr(hp->h_name, '.')))
830 p++; // skip "."
831 }
832 #else
833 ARGUSED(hostname);
834 #endif
835 return p;
836 }
837
838 #define EBUFLEN 1024
839
840 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
841 __attribute__ ((format (printf, 4, 5)));
842
843 // If either address or executable path is non-null then send and log
844 // a warning email, or execute executable
845 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
846 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
847 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
848 char environ_strings[11][ENVLENGTH];
849 time_t epoch;
850 va_list ap;
851 const int day=24*3600;
852 int days=0;
853 const char * const whichfail[]={
854 "EmailTest", // 0
855 "Health", // 1
856 "Usage", // 2
857 "SelfTest", // 3
858 "ErrorCount", // 4
859 "FailedHealthCheck", // 5
860 "FailedReadSmartData", // 6
861 "FailedReadSmartErrorLog", // 7
862 "FailedReadSmartSelfTestLog", // 8
863 "FailedOpenDevice", // 9
864 "CurrentPendingSector", // 10
865 "OfflineUncorrectableSector", // 11
866 "Temperature" // 12
867 };
868
869 const char *unknown="[Unknown]";
870
871 // See if user wants us to send mail
872 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
873 return;
874
875 std::string address = cfg.emailaddress;
876 const char * executable = cfg.emailcmdline.c_str();
877
878 // which type of mail are we sending?
879 mailinfo * mail=(state.maillog)+which;
880
881 // checks for sanity
882 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
883 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
884 return;
885 }
886 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
887 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
888 which, (int)sizeof(whichfail));
889 return;
890 }
891
892 // Return if a single warning mail has been sent.
893 if ((cfg.emailfreq==1) && mail->logged)
894 return;
895
896 // Return if this is an email test and one has already been sent.
897 if (which == 0 && mail->logged)
898 return;
899
900 // To decide if to send mail, we need to know what time it is.
901 epoch=time(NULL);
902
903 // Return if less than one day has gone by
904 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
905 return;
906
907 // Return if less than 2^(logged-1) days have gone by
908 if (cfg.emailfreq==3 && mail->logged) {
909 days=0x01<<(mail->logged-1);
910 days*=day;
911 if (epoch<(mail->lastsent+days))
912 return;
913 }
914
915 // record the time of this mail message, and the first mail message
916 if (!mail->logged)
917 mail->firstsent=epoch;
918 mail->lastsent=epoch;
919
920 // get system host & domain names (not null terminated if length=MAX)
921 #ifdef HAVE_GETHOSTNAME
922 if (gethostname(hostname, 256))
923 strcpy(hostname, unknown);
924 else {
925 char *p=NULL;
926 hostname[255]='\0';
927 p = dnsdomain(hostname);
928 if (p && *p) {
929 strncpy(domainname, p, 255);
930 domainname[255]='\0';
931 } else
932 strcpy(domainname, unknown);
933 }
934 #else
935 strcpy(hostname, unknown);
936 strcpy(domainname, unknown);
937 #endif
938
939 #ifdef HAVE_GETDOMAINNAME
940 if (getdomainname(nisdomain, 256))
941 strcpy(nisdomain, unknown);
942 else
943 nisdomain[255]='\0';
944 #else
945 strcpy(nisdomain, unknown);
946 #endif
947
948 // print warning string into message
949 va_start(ap, fmt);
950 vsnprintf(message, 256, fmt, ap);
951 va_end(ap);
952
953 // appropriate message about further information
954 additional[0]=original[0]=further[0]='\0';
955 if (which) {
956 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
957
958 switch (cfg.emailfreq) {
959 case 1:
960 sprintf(additional,"No additional email messages about this problem will be sent.\n");
961 break;
962 case 2:
963 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
964 break;
965 case 3:
966 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
967 (0x01)<<mail->logged);
968 break;
969 }
970 if (cfg.emailfreq>1 && mail->logged) {
971 dateandtimezoneepoch(dates, mail->firstsent);
972 sprintf(original,"The original email about this issue was sent at %s\n", dates);
973 }
974 }
975
976 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
977
978 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
979 if (!*executable)
980 #ifdef DEFAULT_MAILER
981 executable = DEFAULT_MAILER ;
982 #else
983 #ifndef _WIN32
984 executable = "mail";
985 #else
986 executable = "blat"; // http://blat.sourceforge.net/
987 #endif
988 #endif
989
990 #ifndef _WIN32 // blat mailer needs comma
991 // replace commas by spaces to separate recipients
992 std::replace(address.begin(), address.end(), ',', ' ');
993 #endif
994 // Export information in environment variables that will be useful
995 // for user scripts
996 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
997 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
998 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
999 dateandtimezoneepoch(dates, mail->firstsent);
1000 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1001 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1002 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1003 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1004 if (!address.empty())
1005 exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1006 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1007
1008 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", cfg.dev_type.c_str());
1009 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.name.c_str());
1010
1011 snprintf(fullmessage, 1024,
1012 "This email was generated by the smartd daemon running on:\n\n"
1013 " host name: %s\n"
1014 " DNS domain: %s\n"
1015 " NIS domain: %s\n\n"
1016 "The following warning/error was logged by the smartd daemon:\n\n"
1017 "%s\n\n"
1018 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
1019 "%s%s%s",
1020 hostname, domainname, nisdomain, message, further, original, additional);
1021 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1022
1023 // now construct a command to send this as EMAIL
1024 #ifndef _WIN32
1025 if (!address.empty())
1026 snprintf(command, 2048,
1027 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1028 "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1029 else
1030 snprintf(command, 2048, "%s 2>&1", executable);
1031
1032 // tell SYSLOG what we are about to do...
1033 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1034 const char * newwarn = (which? "Warning via" : "Test of");
1035
1036 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1037 which?"Sending warning via":"Executing test of", executable, newadd);
1038
1039 // issue the command to send mail or to run the user's executable
1040 errno=0;
1041 FILE * pfp;
1042 if (!(pfp=popen(command, "r")))
1043 // failed to popen() mail process
1044 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1045 newwarn, executable, newadd, errno?strerror(errno):"");
1046 else {
1047 // pipe suceeded!
1048 int len, status;
1049 char buffer[EBUFLEN];
1050
1051 // if unexpected output on stdout/stderr, null terminate, print, and flush
1052 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1053 int count=0;
1054 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1055 buffer[newlen]='\0';
1056 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1057 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1058
1059 // flush pipe if needed
1060 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1061 count++;
1062
1063 // tell user that pipe was flushed, or that something is really wrong
1064 if (count && count<EBUFLEN)
1065 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1066 newwarn, executable, newadd);
1067 else if (count)
1068 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1069 newwarn, executable, newadd);
1070 }
1071
1072 // if something went wrong with mail process, print warning
1073 errno=0;
1074 if (-1==(status=pclose(pfp)))
1075 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1076 errno?strerror(errno):"");
1077 else {
1078 // mail process apparently succeeded. Check and report exit status
1079 int status8;
1080
1081 if (WIFEXITED(status)) {
1082 // exited 'normally' (but perhaps with nonzero status)
1083 status8=WEXITSTATUS(status);
1084
1085 if (status8>128)
1086 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1087 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1088 else if (status8)
1089 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1090 newwarn, executable, newadd, status, status8);
1091 else
1092 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1093 }
1094
1095 if (WIFSIGNALED(status))
1096 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1097 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1098
1099 // this branch is probably not possible. If subprocess is
1100 // stopped then pclose() should not return.
1101 if (WIFSTOPPED(status))
1102 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1103 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1104
1105 }
1106 }
1107
1108 #else // _WIN32
1109
1110 // No "here-documents" on Windows, so must use separate commandline and stdin
1111 char stdinbuf[1024];
1112 command[0] = stdinbuf[0] = 0;
1113 int boxtype = -1, boxmsgoffs = 0;
1114 const char * newadd = "<nomailer>";
1115 if (!address.empty()) {
1116 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1117 char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1118 if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1119 if (!strcmp(addr1, "msgbox"))
1120 boxtype = 0;
1121 else if (!strcmp(addr1, "sysmsgbox"))
1122 boxtype = 1;
1123 if (boxtype >= 0)
1124 address.erase(0, (n2 > n1 ? n2 : n1));
1125 }
1126
1127 if (!address.empty()) {
1128 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1129 snprintf(command, sizeof(command),
1130 "%s - -q -subject \"%s\" -to \"%s\"",
1131 executable, subject, address.c_str());
1132 newadd = address.c_str();
1133 }
1134
1135 #ifdef _MSC_VER
1136 _set_printf_count_output(1); // "%n" disabled by default
1137 #endif
1138 // Message for mail [0...] and messagebox [boxmsgoffs...]
1139 snprintf(stdinbuf, sizeof(stdinbuf),
1140 "This email was generated by the smartd daemon running on:\n\n"
1141 " host name: %s\n"
1142 " DNS domain: %s\n"
1143 // " NIS domain: %s\n"
1144 "\n%n"
1145 "The following warning/error was logged by the smartd daemon:\n\n"
1146 "%s\n\n"
1147 "For details see the event log or log file of smartd.\n\n"
1148 "%s%s%s"
1149 "\n",
1150 hostname, /*domainname, */ nisdomain, &boxmsgoffs, message, further, original, additional);
1151 }
1152 else
1153 snprintf(command, sizeof(command), "%s", executable);
1154
1155 const char * newwarn = (which ? "Warning via" : "Test of");
1156 if (boxtype >= 0) {
1157 // show message box
1158 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1159 PrintOut(LOG_INFO,"%s message box\n", newwarn);
1160 }
1161 if (command[0]) {
1162 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1163 int rc;
1164 // run command
1165 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1166 (which?"Sending warning via":"Executing test of"), executable, newadd);
1167 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1168 if (rc >= 0 && stdoutbuf[0])
1169 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1170 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
1171 if (rc != 0)
1172 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1173 newwarn, executable, newadd, rc);
1174 else
1175 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1176 }
1177
1178 #endif // _WIN32
1179
1180 // increment mail sent counter
1181 mail->logged++;
1182 }
1183
1184 // Printing function for watching ataprint commands, or losing them
1185 // [From GLIBC Manual: Since the prototype doesn't specify types for
1186 // optional arguments, in a call to a variadic function the default
1187 // argument promotions are performed on the optional argument
1188 // values. This means the objects of type char or short int (whether
1189 // signed or not) are promoted to either int or unsigned int, as
1190 // appropriate.]
1191 void pout(const char *fmt, ...){
1192 va_list ap;
1193
1194 // get the correct time in syslog()
1195 FixGlibcTimeZoneBug();
1196 // initialize variable argument list
1197 va_start(ap,fmt);
1198 // in debug==1 mode we will print the output from the ataprint.o functions!
1199 if (debugmode && debugmode!=2)
1200 #ifdef _WIN32
1201 if (facility == LOG_LOCAL1) // logging to stdout
1202 vfprintf(stderr,fmt,ap);
1203 else
1204 #endif
1205 vprintf(fmt,ap);
1206 // in debug==2 mode we print output from knowndrives.o functions
1207 else if (debugmode==2 || con->reportataioctl || con->reportscsiioctl /*|| con->controller_port???*/) {
1208 openlog("smartd", LOG_PID, facility);
1209 vsyslog(LOG_INFO, fmt, ap);
1210 closelog();
1211 }
1212 va_end(ap);
1213 fflush(NULL);
1214 return;
1215 }
1216
1217 // This function prints either to stdout or to the syslog as needed.
1218 // This function is also used by utility.cpp to report LOG_CRIT errors.
1219 void PrintOut(int priority, const char *fmt, ...){
1220 va_list ap;
1221
1222 // get the correct time in syslog()
1223 FixGlibcTimeZoneBug();
1224 // initialize variable argument list
1225 va_start(ap,fmt);
1226 if (debugmode)
1227 #ifdef _WIN32
1228 if (facility == LOG_LOCAL1) // logging to stdout
1229 vfprintf(stderr,fmt,ap);
1230 else
1231 #endif
1232 vprintf(fmt,ap);
1233 else {
1234 openlog("smartd", LOG_PID, facility);
1235 vsyslog(priority,fmt,ap);
1236 closelog();
1237 }
1238 va_end(ap);
1239 return;
1240 }
1241
1242 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1243 void checksumwarning(const char * string)
1244 {
1245 pout("Warning! %s error: invalid SMART checksum.\n", string);
1246 }
1247
1248 // Wait for the pid file to show up, this makes sure a calling program knows
1249 // that the daemon is really up and running and has a pid to kill it
1250 bool WaitForPidFile()
1251 {
1252 int waited, max_wait = 10;
1253 struct stat stat_buf;
1254
1255 if (pid_file.empty() || debugmode)
1256 return true;
1257
1258 for(waited = 0; waited < max_wait; ++waited) {
1259 if (!stat(pid_file.c_str(), &stat_buf)) {
1260 return true;
1261 } else
1262 sleep(1);
1263 }
1264 return false;
1265 }
1266
1267
1268 // Forks new process, closes ALL file descriptors, redirects stdin,
1269 // stdout, and stderr. Not quite daemon(). See
1270 // http://www.linuxjournal.com/article/2335
1271 // for a good description of why we do things this way.
1272 void DaemonInit(){
1273 #ifndef _WIN32
1274 pid_t pid;
1275 int i;
1276
1277 // flush all buffered streams. Else we might get two copies of open
1278 // streams since both parent and child get copies of the buffers.
1279 fflush(NULL);
1280
1281 if (do_fork) {
1282 if ((pid=fork()) < 0) {
1283 // unable to fork!
1284 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1285 EXIT(EXIT_STARTUP);
1286 }
1287 else if (pid) {
1288 // we are the parent process, wait for pid file, then exit cleanly
1289 if(!WaitForPidFile()) {
1290 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1291 EXIT(EXIT_STARTUP);
1292 } else
1293 EXIT(0);
1294 }
1295
1296 // from here on, we are the child process.
1297 setsid();
1298
1299 // Fork one more time to avoid any possibility of having terminals
1300 if ((pid=fork()) < 0) {
1301 // unable to fork!
1302 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1303 EXIT(EXIT_STARTUP);
1304 }
1305 else if (pid)
1306 // we are the parent process -- exit cleanly
1307 EXIT(0);
1308
1309 // Now we are the child's child...
1310 }
1311
1312 // close any open file descriptors
1313 for (i=getdtablesize();i>=0;--i)
1314 close(i);
1315
1316 #ifdef __CYGWIN__
1317 // Cygwin's setsid() does not detach the process from Windows console
1318 FreeConsole();
1319 #endif // __CYGWIN__
1320
1321 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1322
1323 // redirect any IO attempts to /dev/null for stdin
1324 i=open("/dev/null",O_RDWR);
1325 if (i>=0) {
1326 // stdout
1327 NO_warn_unused_result(dup(i));
1328 // stderr
1329 NO_warn_unused_result(dup(i));
1330 };
1331 umask(0022);
1332 NO_warn_unused_result(chdir("/"));
1333
1334 if (do_fork)
1335 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1336
1337 #else // _WIN32
1338
1339 // No fork() on native Win32
1340 // Detach this process from console
1341 fflush(NULL);
1342 if (daemon_detach("smartd")) {
1343 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1344 EXIT(EXIT_STARTUP);
1345 }
1346 // stdin/out/err now closed if not redirected
1347
1348 #endif // _WIN32
1349 return;
1350 }
1351
1352 // create a PID file containing the current process id
1353 static void WritePidFile()
1354 {
1355 if (!pid_file.empty()) {
1356 pid_t pid = getpid();
1357 mode_t old_umask;
1358 #ifndef __CYGWIN__
1359 old_umask = umask(0077); // rwx------
1360 #else
1361 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1362 old_umask = umask(0033); // rwxr--r--
1363 #endif
1364
1365 stdio_file f(pid_file.c_str(), "w");
1366 umask(old_umask);
1367 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1368 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1369 EXIT(EXIT_PID);
1370 }
1371 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1372 }
1373 }
1374
1375 // Prints header identifying version of code and home
1376 static void PrintHead()
1377 {
1378 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1379 }
1380
1381 // prints help info for configuration file Directives
1382 void Directives() {
1383 PrintOut(LOG_INFO,
1384 "Configuration file (%s) Directives (after device name):\n"
1385 " -d TYPE Set the device type: %s\n"
1386 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1387 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1388 " -S VAL Enable/disable attribute autosave (on/off)\n"
1389 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1390 " -H Monitor SMART Health Status, report if failed\n"
1391 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1392 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1393 " -f Monitor 'Usage' Attributes, report failures\n"
1394 " -m ADD Send email warning to address ADD\n"
1395 " -M TYPE Modify email warning behavior (see man page)\n"
1396 " -p Report changes in 'Prefailure' Attributes\n"
1397 " -u Report changes in 'Usage' Attributes\n"
1398 " -t Equivalent to -p and -u Directives\n"
1399 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1400 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1401 " -i ID Ignore Attribute ID for -f Directive\n"
1402 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1403 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1404 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1405 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1406 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1407 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1408 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1409 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1410 " # Comment: text after a hash sign is ignored\n"
1411 " \\ Line continuation character\n"
1412 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1413 "Use ID = 0 to turn off -C and/or -U Directives\n"
1414 "Example: /dev/hda -a\n",
1415 configfile, smi()->get_valid_dev_types_str().c_str());
1416 return;
1417 }
1418
1419 /* Returns a pointer to a static string containing a formatted list of the valid
1420 arguments to the option opt or NULL on failure. */
1421 const char *GetValidArgList(char opt) {
1422 switch (opt) {
1423 case 'c':
1424 return "<FILE_NAME>, -";
1425 case 's':
1426 return "valid_regular_expression";
1427 case 'l':
1428 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1429 case 'q':
1430 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1431 case 'r':
1432 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1433 case 'p':
1434 return "<FILE_NAME>";
1435 case 'i':
1436 return "<INTEGER_SECONDS>";
1437 default:
1438 return NULL;
1439 }
1440 }
1441
1442 /* prints help information for command syntax */
1443 void Usage (void){
1444 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1445 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1446 PrintOut(LOG_INFO," Read configuration file NAME or stdin [default is %s]\n\n", configfile);
1447 PrintOut(LOG_INFO," -d, --debug\n");
1448 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1449 PrintOut(LOG_INFO," -D, --showdirectives\n");
1450 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1451 PrintOut(LOG_INFO," -h, --help, --usage\n");
1452 PrintOut(LOG_INFO," Display this help and exit\n\n");
1453 PrintOut(LOG_INFO," -i N, --interval=N\n");
1454 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1455 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1456 #ifndef _WIN32
1457 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1458 #else
1459 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1460 #endif
1461 #ifndef _WIN32
1462 PrintOut(LOG_INFO," -n, --no-fork\n");
1463 PrintOut(LOG_INFO," Do not fork into background\n\n");
1464 #endif // _WIN32
1465 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1466 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1467 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1468 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1469 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1470 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1471 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1472 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1473 #ifdef SMARTMONTOOLS_SAVESTATES
1474 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1475 #endif
1476 PrintOut(LOG_INFO,"\n");
1477 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1478 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1479 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1480 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_DRIVEDBDIR"/drivedb.h]\n");
1481 #endif
1482 PrintOut(LOG_INFO,"\n");
1483 #ifdef _WIN32
1484 PrintOut(LOG_INFO," --service\n");
1485 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1486 PrintOut(LOG_INFO," smartd install [options]\n");
1487 PrintOut(LOG_INFO," Remove service with:\n");
1488 PrintOut(LOG_INFO," smartd remove\n\n");
1489 #endif // _WIN32
1490 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1491 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1492 }
1493
1494 static int CloseDevice(smart_device * device, const char * name)
1495 {
1496 if (!device->close()){
1497 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1498 return 1;
1499 }
1500 // device sucessfully closed
1501 return 0;
1502 }
1503
1504 // return true if a char is not allowed in a state file name
1505 static bool not_allowed_in_filename(char c)
1506 {
1507 return !( ('0' <= c && c <= '9')
1508 || ('A' <= c && c <= 'Z')
1509 || ('a' <= c && c <= 'z'));
1510 }
1511
1512 // returns <0 on failure
1513 static int ATAErrorCount(ata_device * device, const char * name,
1514 unsigned char fix_firmwarebug)
1515 {
1516 struct ata_smart_errorlog log;
1517
1518 if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1519 PrintOut(LOG_INFO,"Device: %s, Read SMART Error Log Failed\n",name);
1520 return -1;
1521 }
1522
1523 // return current number of ATA errors
1524 return log.error_log_pointer?log.ata_error_count:0;
1525 }
1526
1527 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1528 // error count, and top bits are the power-on hours of the last error.
1529 static int SelfTestErrorCount(ata_device * device, const char * name,
1530 unsigned char fix_firmwarebug)
1531 {
1532 struct ata_smart_selftestlog log;
1533
1534 if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1535 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1536 return -1;
1537 }
1538
1539 // return current number of self-test errors
1540 return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1541 }
1542
1543 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1544 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1545
1546 // Log self-test execution status
1547 static void log_self_test_exec_status(const char * name, unsigned char status)
1548 {
1549 const char * msg;
1550 switch (status >> 4) {
1551 case 0x0: msg = "completed without error"; break;
1552 case 0x1: msg = "was aborted by the host"; break;
1553 case 0x2: msg = "was interrupted by the host with a reset"; break;
1554 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1555 case 0x4: msg = "completed with error (unknown test element)"; break;
1556 case 0x5: msg = "completed with error (electrical test element)"; break;
1557 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1558 case 0x7: msg = "completed with error (read test element)"; break;
1559 case 0x8: msg = "completed with error (handling damage?)"; break;
1560 default: msg = 0;
1561 }
1562
1563 if (msg)
1564 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1565 "Device: %s, previous self-test %s\n", name, msg);
1566 else if ((status >> 4) == 0xf)
1567 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1568 name, status & 0x0f);
1569 else
1570 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1571 name, status);
1572 }
1573
1574
1575 // TODO: Add '-F swapid' directive
1576 const bool fix_swapped_id = false;
1577
1578 // scan to see what ata devices there are, and if they support SMART
1579 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1580 {
1581 int supported=0;
1582 struct ata_identify_device drive;
1583 const char *name = cfg.name.c_str();
1584 int retid;
1585
1586 // Device must be open
1587
1588 // Get drive identity structure
1589 if ((retid=ataReadHDIdentity (atadev, &drive))){
1590 if (retid<0)
1591 // Unable to read Identity structure
1592 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1593 else
1594 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1595 name, packetdevicetype(retid-1));
1596 CloseDevice(atadev, name);
1597 return 2;
1598 }
1599 // Store drive size (for selective self-test only)
1600 state.num_sectors = get_num_sectors(&drive);
1601
1602 // Show if device in database, and use preset vendor attribute
1603 // options unless user has requested otherwise.
1604 if (cfg.ignorepresets)
1605 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1606 else {
1607 // do whatever applypresets decides to do.
1608 if (!apply_presets(&drive, cfg.attributedefs, cfg.fix_firmwarebug, fix_swapped_id))
1609 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1610 else
1611 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1612 }
1613
1614 // Set default '-C 197[+]' if no '-C ID' is specified.
1615 if (!cfg.curr_pending_set)
1616 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attributedefs, cfg.curr_pending_incr);
1617 // Set default '-U 198[+]' if no '-U ID' is specified.
1618 if (!cfg.offl_pending_set)
1619 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attributedefs, cfg.offl_pending_incr);
1620
1621 // If requested, show which presets would be used for this drive
1622 if (cfg.showpresets) {
1623 int savedebugmode=debugmode;
1624 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1625 if (!debugmode)
1626 debugmode=2;
1627 show_presets(&drive, false);
1628 debugmode=savedebugmode;
1629 }
1630
1631 // see if drive supports SMART
1632 supported=ataSmartSupport(&drive);
1633 if (supported!=1) {
1634 if (supported==0)
1635 // drive does NOT support SMART
1636 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1637 else
1638 // can't tell if drive supports SMART
1639 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1640
1641 // should we proceed anyway?
1642 if (cfg.permissive) {
1643 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1644 }
1645 else {
1646 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1647 CloseDevice(atadev, name);
1648 return 2;
1649 }
1650 }
1651
1652 if (ataEnableSmart(atadev)) {
1653 // Enable SMART command has failed
1654 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1655 CloseDevice(atadev, name);
1656 return 2;
1657 }
1658
1659 // disable device attribute autosave...
1660 if (cfg.autosave==1) {
1661 if (ataDisableAutoSave(atadev))
1662 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1663 else
1664 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1665 }
1666
1667 // or enable device attribute autosave
1668 if (cfg.autosave==2) {
1669 if (ataEnableAutoSave(atadev))
1670 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1671 else
1672 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1673 }
1674
1675 // capability check: SMART status
1676 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1677 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1678 cfg.smartcheck = false;
1679 }
1680
1681 // capability check: Read smart values and thresholds. Note that
1682 // smart values are ALSO needed even if we ONLY want to know if the
1683 // device is self-test log or error-log capable! After ATA-5, this
1684 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1685 // but sadly not for ATA-5. Sigh.
1686
1687 // do we need to retain SMART data after returning from this routine?
1688 bool retainsmartdata = (cfg.usagefailed || cfg.prefail || cfg.usage || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit);
1689
1690 // do we need to get SMART data?
1691 bool smart_val_ok = false;
1692 if ( retainsmartdata || cfg.autoofflinetest || cfg.selftest || cfg.errorlog
1693 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1694
1695 if (ataReadSmartValues(atadev, &state.smartval) ||
1696 ataReadSmartThresholds (atadev, &state.smartthres)) {
1697 PrintOut(LOG_INFO,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name);
1698 retainsmartdata = cfg.usagefailed = cfg.prefail = cfg.usage = false;
1699 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1700 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1701 }
1702 else
1703 smart_val_ok = true;
1704
1705 // see if the necessary Attribute is there to monitor offline or
1706 // current pending sectors or temperature
1707 if (cfg.curr_pending_id && ATAReturnAttributeRawValue(cfg.curr_pending_id, &state.smartval) < 0) {
1708 PrintOut(LOG_INFO,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1709 name, cfg.curr_pending_id);
1710 cfg.curr_pending_id = 0;
1711 }
1712
1713 if (cfg.offl_pending_id && ATAReturnAttributeRawValue(cfg.offl_pending_id, &state.smartval) < 0) {
1714 PrintOut(LOG_INFO,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1715 name, cfg.offl_pending_id);
1716 cfg.offl_pending_id = 0;
1717 }
1718
1719 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1720 && !ATAReturnTemperatureValue(&state.smartval, cfg.attributedefs)) {
1721 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1722 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1723 }
1724 }
1725
1726 // enable/disable automatic on-line testing
1727 if (cfg.autoofflinetest) {
1728 // is this an enable or disable request?
1729 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1730 if (!smart_val_ok)
1731 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1732 else {
1733 // if command appears unsupported, issue a warning...
1734 if (!isSupportAutomaticTimer(&state.smartval))
1735 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1736 // ... but then try anyway
1737 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1738 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1739 else
1740 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1741 }
1742 }
1743
1744 // capability check: self-test-log
1745 if (cfg.selftest) {
1746 int retval;
1747
1748 // start with service disabled, and re-enable it if all works OK
1749 cfg.selftest = false;
1750 state.selflogcount = 0;
1751 state.selfloghour = 0;
1752
1753 if (!smart_val_ok)
1754 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name);
1755 else if (!cfg.permissive && !isSmartTestLogCapable(&state.smartval, &drive))
1756 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name);
1757 else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0)
1758 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name);
1759 else {
1760 cfg.selftest = true;
1761 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1762 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1763 }
1764 }
1765
1766 // capability check: ATA error log
1767 if (cfg.errorlog) {
1768 int val;
1769
1770 // start with service disabled, and re-enable it if all works OK
1771 cfg.errorlog = false;
1772 state.ataerrorcount=0;
1773
1774 if (!smart_val_ok)
1775 PrintOut(LOG_INFO, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name);
1776 else if (!cfg.permissive && !isSmartErrorLogCapable(&state.smartval, &drive))
1777 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name);
1778 else if ((val = ATAErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0)
1779 PrintOut(LOG_INFO, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name);
1780 else {
1781 cfg.errorlog = true;
1782 state.ataerrorcount=val;
1783 }
1784 }
1785
1786 // capabilities check -- does it support powermode?
1787 if (cfg.powermode) {
1788 int powermode = ataCheckPowerMode(atadev);
1789
1790 if (-1 == powermode) {
1791 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
1792 cfg.powermode=0;
1793 }
1794 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
1795 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1796 name, powermode);
1797 cfg.powermode=0;
1798 }
1799 }
1800
1801 // If no tests available or selected, return
1802 if (!(cfg.errorlog || cfg.selftest || cfg.smartcheck ||
1803 cfg.usagefailed || cfg.prefail || cfg.usage ||
1804 cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
1805 CloseDevice(atadev, name);
1806 return 3;
1807 }
1808
1809 // tell user we are registering device
1810 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
1811
1812 // record number of device, type of device, increment device count
1813 if (cfg.dev_type.empty())
1814 cfg.dev_type = "ata";
1815
1816 // close file descriptor
1817 CloseDevice(atadev, name);
1818
1819 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
1820 // Build file name for state file
1821 char model[40+1], serial[20+1];
1822 format_ata_string(model, drive.model, sizeof(model)-1, fix_swapped_id);
1823 format_ata_string(serial, drive.serial_no, sizeof(serial)-1, fix_swapped_id);
1824 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
1825 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
1826 if (!state_path_prefix.empty()) {
1827 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
1828 // Read previous state
1829 if (read_dev_state(cfg.state_file.c_str(), state)) {
1830 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
1831 // Copy ATA attribute values to temp state
1832 state.update_temp_state();
1833 }
1834 }
1835 if (!attrlog_path_prefix.empty())
1836 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
1837 }
1838
1839 // Start self-test regex check now if time was not read from state file
1840 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1841 state.scheduled_test_next_check = time(0);
1842
1843 return 0;
1844 }
1845
1846 // on success, return 0. On failure, return >0. Never return <0,
1847 // please.
1848 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
1849 {
1850 int k, err;
1851 const char *device = cfg.name.c_str();
1852 struct scsi_iec_mode_page iec;
1853 UINT8 tBuf[64];
1854
1855 // Device must be open
1856
1857 // check that device is ready for commands. IE stores its stuff on
1858 // the media.
1859 if ((err = scsiTestUnitReady(scsidev))) {
1860 if (SIMPLE_ERR_NOT_READY == err)
1861 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
1862 else if (SIMPLE_ERR_NO_MEDIUM == err)
1863 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
1864 else if (SIMPLE_ERR_BECOMING_READY == err)
1865 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
1866 else
1867 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
1868 CloseDevice(scsidev, device);
1869 return 2;
1870 }
1871
1872 // Badly-conforming USB storage devices may fail this check.
1873 // The response to the following IE mode page fetch (current and
1874 // changeable values) is carefully examined. It has been found
1875 // that various USB devices that malform the response will lock up
1876 // if asked for a log page (e.g. temperature) so it is best to
1877 // bail out now.
1878 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
1879 state.modese_len = iec.modese_len;
1880 else if (SIMPLE_ERR_BAD_FIELD == err)
1881 ; /* continue since it is reasonable not to support IE mpage */
1882 else { /* any other error (including malformed response) unreasonable */
1883 PrintOut(LOG_INFO,
1884 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1885 device, err);
1886 CloseDevice(scsidev, device);
1887 return 3;
1888 }
1889
1890 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1891 // smart if it is off). This may change to be the same as the ATA side.
1892 if (!scsi_IsExceptionControlEnabled(&iec)) {
1893 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
1894 "Try 'smartctl -s on %s' to turn on SMART features\n",
1895 device, device);
1896 CloseDevice(scsidev, device);
1897 return 3;
1898 }
1899
1900 // Flag that certain log pages are supported (information may be
1901 // available from other sources).
1902 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
1903 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
1904 switch (tBuf[k]) {
1905 case TEMPERATURE_LPAGE:
1906 state.TempPageSupported = 1;
1907 break;
1908 case IE_LPAGE:
1909 state.SmartPageSupported = 1;
1910 break;
1911 default:
1912 break;
1913 }
1914 }
1915 }
1916
1917 // record type of device
1918 if (cfg.dev_type.empty())
1919 cfg.dev_type = "scsi";
1920
1921 // Check if scsiCheckIE() is going to work
1922 {
1923 UINT8 asc = 0;
1924 UINT8 ascq = 0;
1925 UINT8 currenttemp = 0;
1926 UINT8 triptemp = 0;
1927
1928 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
1929 &asc, &ascq, &currenttemp, &triptemp)) {
1930 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
1931 state.SuppressReport = 1;
1932 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
1933 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
1934 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1935 }
1936 }
1937 }
1938
1939 // capability check: self-test-log
1940 if (cfg.selftest){
1941 int retval = scsiCountFailedSelfTests(scsidev, 0);
1942 if (retval<0) {
1943 // no self-test log, turn off monitoring
1944 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
1945 cfg.selftest = false;
1946 state.selflogcount = 0;
1947 state.selfloghour = 0;
1948 }
1949 else {
1950 // register starting values to watch for changes
1951 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1952 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1953 }
1954 }
1955
1956 // disable autosave (set GLTSD bit)
1957 if (cfg.autosave==1){
1958 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
1959 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
1960 else
1961 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
1962 }
1963
1964 // or enable autosave (clear GLTSD bit)
1965 if (cfg.autosave==2){
1966 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
1967 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
1968 else
1969 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
1970 }
1971
1972 // tell user we are registering device
1973 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
1974
1975 // TODO: Build file name for state file
1976 if (!state_path_prefix.empty()) {
1977 PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
1978 }
1979 // TODO: Build file name for attribute log file
1980 if (!attrlog_path_prefix.empty()) {
1981 PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
1982 }
1983
1984 // close file descriptor
1985 CloseDevice(scsidev, device);
1986
1987 // Start self-test regex check now if time was not read from state file
1988 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1989 state.scheduled_test_next_check = time(0);
1990
1991 return 0;
1992 }
1993
1994
1995 struct changedattribute_t {
1996 unsigned char newval;
1997 unsigned char oldval;
1998 unsigned char id;
1999 unsigned char prefail;
2000 unsigned char sameraw;
2001 };
2002
2003 // We compare old and new values of the n'th attribute. Note that n
2004 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
2005 // then return 0, else nonzero.
2006 static int ATACompareValues(changedattribute_t *delta,
2007 struct ata_smart_values *newv,
2008 struct ata_smart_values *oldv,
2009 struct ata_smart_thresholds_pvt *thresholds,
2010 int n, const char * name)
2011 {
2012 struct ata_smart_attribute *now,*was;
2013 struct ata_smart_threshold_entry *thre;
2014 unsigned char oldval,newval;
2015 int sameraw;
2016
2017 // check that attribute number in range, and no null pointers
2018 if (n<0 || n>=NUMBER_ATA_SMART_ATTRIBUTES || !newv || !oldv || !thresholds)
2019 return 0;
2020
2021 // pointers to disk's values and vendor's thresholds
2022 now=newv->vendor_attributes+n;
2023 was=oldv->vendor_attributes+n;
2024 thre=thresholds->thres_entries+n;
2025
2026 // consider only valid attributes
2027 if (!now->id || !was->id || !thre->id)
2028 return 0;
2029
2030
2031 // issue warning if they don't have the same ID in all structures:
2032 if ( (now->id != was->id) || (now->id != thre->id) ){
2033 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
2034 name, (int)now->id, (int)was->id, (int)thre->id);
2035 return 0;
2036 }
2037
2038 // new and old values of Normalized Attributes
2039 newval=now->current;
2040 oldval=was->current;
2041
2042 // See if the RAW values are unchanged (ie, the same)
2043 if (memcmp(now->raw, was->raw, 6))
2044 sameraw=0;
2045 else
2046 sameraw=1;
2047
2048 // if any values out of the allowed range, or if the values haven't
2049 // changed, return 0
2050 if (!newval || !oldval || newval>0xfe || oldval>0xfe || (oldval==newval && sameraw))
2051 return 0;
2052
2053 // values have changed. Construct output and return
2054 delta->newval=newval;
2055 delta->oldval=oldval;
2056 delta->id=now->id;
2057 delta->prefail=ATTRIBUTE_FLAGS_PREFAILURE(now->flags);
2058 delta->sameraw=sameraw;
2059
2060 return 1;
2061 }
2062
2063 // If the self-test log has got more self-test errors (or more recent
2064 // self-test errors) recorded, then notify user.
2065 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2066 {
2067 const char * name = cfg.name.c_str();
2068
2069 if (newi<0)
2070 // command failed
2071 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2072 else {
2073 // old and new error counts
2074 int oldc=state.selflogcount;
2075 int newc=SELFTEST_ERRORCOUNT(newi);
2076
2077 // old and new error timestamps in hours
2078 int oldh=state.selfloghour;
2079 int newh=SELFTEST_ERRORHOURS(newi);
2080
2081 if (oldc<newc) {
2082 // increase in error count
2083 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2084 name, oldc, newc);
2085 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2086 name, oldc, newc);
2087 state.must_write = true;
2088 } else if (oldh!=newh) {
2089 // more recent error
2090 // a 'more recent' error might actually be a smaller hour number,
2091 // if the hour number has wrapped.
2092 // There's still a bug here. You might just happen to run a new test
2093 // exactly 32768 hours after the previous failure, and have run exactly
2094 // 20 tests between the two, in which case smartd will miss the
2095 // new failure.
2096 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2097 name, newh);
2098 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2099 name, newh);
2100 state.must_write = true;
2101 }
2102
2103 // Needed since self-test error count may DECREASE. Hour might
2104 // also have changed.
2105 state.selflogcount= newc;
2106 state.selfloghour = newh;
2107 }
2108 return;
2109 }
2110
2111 // Test types, ordered by priority.
2112 static const char test_type_chars[] = "LncrSCO";
2113 const unsigned num_test_types = sizeof(test_type_chars)-1;
2114
2115 // returns test type if time to do test of type testtype,
2116 // 0 if not time to do test.
2117 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2118 {
2119 // check that self-testing has been requested
2120 if (cfg.test_regex.empty())
2121 return 0;
2122
2123 // Exit if drive not capable of any test
2124 if ( state.not_cap_long && state.not_cap_short &&
2125 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2126 return 0;
2127
2128 // since we are about to call localtime(), be sure glibc is informed
2129 // of any timezone changes we make.
2130 if (!usetime)
2131 FixGlibcTimeZoneBug();
2132
2133 // Is it time for next check?
2134 time_t now = (!usetime ? time(0) : usetime);
2135 if (now < state.scheduled_test_next_check)
2136 return 0;
2137
2138 // Limit time check interval to 90 days
2139 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2140 state.scheduled_test_next_check = now - (3600L*24*90);
2141
2142 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2143 char testtype = 0;
2144 time_t testtime = 0; int testhour = 0;
2145 int maxtest = num_test_types-1;
2146
2147 for (time_t t = state.scheduled_test_next_check; ; ) {
2148 struct tm * tms = localtime(&t);
2149 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2150 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2151 for (int i = 0; i <= maxtest; i++) {
2152 // Skip if drive not capable of this test
2153 switch (test_type_chars[i]) {
2154 case 'L': if (state.not_cap_long) continue; break;
2155 case 'S': if (state.not_cap_short) continue; break;
2156 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2157 case 'O': if (scsi || state.not_cap_offline) continue; break;
2158 case 'c': case 'n':
2159 case 'r': if (scsi || state.not_cap_selective) continue; break;
2160 default: continue;
2161 }
2162 // Try match of "T/MM/DD/d/HH"
2163 char pattern[16];
2164 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2165 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2166 if (cfg.test_regex.full_match(pattern)) {
2167 // Test found
2168 testtype = pattern[0];
2169 testtime = t; testhour = tms->tm_hour;
2170 // Limit further matches to higher priority self-tests
2171 maxtest = i-1;
2172 break;
2173 }
2174 }
2175 // Exit if no tests left or current time reached
2176 if (maxtest < 0)
2177 break;
2178 if (t >= now)
2179 break;
2180 // Check next hour
2181 if ((t += 3600) > now)
2182 t = now;
2183 }
2184
2185 // Do next check not before next hour.
2186 struct tm * tmnow = localtime(&now);
2187 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2188
2189 if (testtype) {
2190 state.must_write = true;
2191 // Tell user if an old test was found.
2192 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2193 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2194 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2195 cfg.name.c_str(), testtype, datebuf);
2196 }
2197 }
2198
2199 return testtype;
2200 }
2201
2202 // Print a list of future tests.
2203 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2204 {
2205 unsigned numdev = configs.size();
2206 if (!numdev)
2207 return;
2208 std::vector<int> testcnts(numdev * num_test_types, 0);
2209
2210 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2211
2212 // FixGlibcTimeZoneBug(); // done in PrintOut()
2213 time_t now = time(0);
2214 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2215 dateandtimezoneepoch(datenow, now);
2216
2217 long seconds;
2218 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2219 // Check for each device whether a test will be run
2220 time_t testtime = now + seconds;
2221 for (unsigned i = 0; i < numdev; i++) {
2222 const dev_config & cfg = configs.at(i);
2223 dev_state & state = states.at(i);
2224 const char * p;
2225 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2226 if (testtype && (p = strchr(test_type_chars, testtype))) {
2227 unsigned t = (p - test_type_chars);
2228 // Report at most 5 tests of each type
2229 if (++testcnts[i*num_test_types + t] <= 5) {
2230 dateandtimezoneepoch(date, testtime);
2231 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2232 testcnts[i*num_test_types + t], testtype, date);
2233 }
2234 }
2235 }
2236 }
2237
2238 // Report totals
2239 dateandtimezoneepoch(date, now+seconds);
2240 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2241 for (unsigned i = 0; i < numdev; i++) {
2242 const dev_config & cfg = configs.at(i);
2243 bool scsi = devices.at(i)->is_scsi();
2244 for (unsigned t = 0; t < num_test_types; t++) {
2245 int cnt = testcnts[i*num_test_types + t];
2246 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2247 continue;
2248 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2249 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2250 }
2251 }
2252
2253 }
2254
2255 // Return zero on success, nonzero on failure. Perform offline (background)
2256 // short or long (extended) self test on given scsi device.
2257 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2258 {
2259 int retval = 0;
2260 const char *testname = 0;
2261 const char *name = cfg.name.c_str();
2262 int inProgress;
2263
2264 if (scsiSelfTestInProgress(device, &inProgress)) {
2265 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2266 state.not_cap_short = state.not_cap_long = true;
2267 return 1;
2268 }
2269
2270 if (1 == inProgress) {
2271 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2272 "progress.\n", name);
2273 return 1;
2274 }
2275
2276 switch (testtype) {
2277 case 'S':
2278 testname = "Short Self";
2279 retval = scsiSmartShortSelfTest(device);
2280 break;
2281 case 'L':
2282 testname = "Long Self";
2283 retval = scsiSmartExtendSelfTest(device);
2284 break;
2285 }
2286 // If we can't do the test, exit
2287 if (NULL == testname) {
2288 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2289 testtype);
2290 return 1;
2291 }
2292 if (retval) {
2293 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2294 (SIMPLE_ERR_BAD_FIELD == retval)) {
2295 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2296 testname);
2297 if ('L'==testtype)
2298 state.not_cap_long = true;
2299 else
2300 state.not_cap_short = true;
2301
2302 return 1;
2303 }
2304 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2305 testname, retval);
2306 return 1;
2307 }
2308
2309 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2310
2311 return 0;
2312 }
2313
2314 // Do an offline immediate or self-test. Return zero on success,
2315 // nonzero on failure.
2316 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2317 {
2318 const char *name = cfg.name.c_str();
2319
2320 // Read current smart data and check status/capability
2321 struct ata_smart_values data;
2322 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2323 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2324 return 1;
2325 }
2326
2327 // Check for capability to do the test
2328 int dotest = -1, mode = 0;
2329 const char *testname = 0;
2330 switch (testtype) {
2331 case 'O':
2332 testname="Offline Immediate ";
2333 if (isSupportExecuteOfflineImmediate(&data))
2334 dotest=OFFLINE_FULL_SCAN;
2335 else
2336 state.not_cap_offline = true;
2337 break;
2338 case 'C':
2339 testname="Conveyance Self-";
2340 if (isSupportConveyanceSelfTest(&data))
2341 dotest=CONVEYANCE_SELF_TEST;
2342 else
2343 state.not_cap_conveyance = true;
2344 break;
2345 case 'S':
2346 testname="Short Self-";
2347 if (isSupportSelfTest(&data))
2348 dotest=SHORT_SELF_TEST;
2349 else
2350 state.not_cap_short = true;
2351 break;
2352 case 'L':
2353 testname="Long Self-";
2354 if (isSupportSelfTest(&data))
2355 dotest=EXTEND_SELF_TEST;
2356 else
2357 state.not_cap_long = true;
2358 break;
2359
2360 case 'c': case 'n': case 'r':
2361 testname = "Selective Self-";
2362 if (isSupportSelectiveSelfTest(&data)) {
2363 dotest = SELECTIVE_SELF_TEST;
2364 switch (testtype) {
2365 case 'c': mode = SEL_CONT; break;
2366 case 'n': mode = SEL_NEXT; break;
2367 case 'r': mode = SEL_REDO; break;
2368 }
2369 }
2370 else
2371 state.not_cap_selective = true;
2372 break;
2373 }
2374
2375 // If we can't do the test, exit
2376 if (dotest<0) {
2377 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2378 return 1;
2379 }
2380
2381 // If currently running a self-test, do not interrupt it to start another.
2382 if (15==(data.self_test_exec_status >> 4)) {
2383 if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2384 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2385 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2386 } else {
2387 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2388 name, testname, (int)(data.self_test_exec_status & 0x0f));
2389 return 1;
2390 }
2391 }
2392
2393 if (dotest == SELECTIVE_SELF_TEST) {
2394 // Set test span
2395 ata_selective_selftest_args selargs;
2396 selargs.num_spans = 1;
2397 selargs.span[0].mode = mode;
2398 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors)) {
2399 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2400 return 1;
2401 }
2402 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2403 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2404 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2405 start, end, end - start + 1,
2406 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2407 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2408 }
2409
2410 // execute the test, and return status
2411 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2412 if (retval) {
2413 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2414 return retval;
2415 }
2416
2417 if (testtype != 'O')
2418 // Log next self-test execution status
2419 state.smartval.self_test_exec_status = 0xff;
2420
2421 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2422 return 0;
2423 }
2424
2425 // Check pending sector count attribute values (-C, -U directives).
2426 static void check_pending(const dev_config & cfg, dev_state & state,
2427 unsigned char id, bool increase_only,
2428 const ata_smart_values & smartval,
2429 int mailtype, const char * msg)
2430 {
2431 // No report if no sectors pending.
2432 int64_t rawval = ATAReturnAttributeRawValue(id, &smartval);
2433 if (rawval <= 0)
2434 return;
2435
2436 // If attribute is not reset, report only sector count increases.
2437 int64_t prev_rawval = ATAReturnAttributeRawValue(id, &state.smartval);
2438 if (!(!increase_only || prev_rawval < rawval))
2439 return;
2440
2441 // Format message.
2442 std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2443 if (prev_rawval > 0 && rawval != prev_rawval)
2444 s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2445
2446 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2447 MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2448 state.must_write = true;
2449 }
2450
2451 // Format Temperature value
2452 static const char * fmt_temp(unsigned char x, char * buf)
2453 {
2454 if (!x) // unset
2455 strcpy(buf, "??");
2456 else
2457 sprintf(buf, "%u", x);
2458 return buf;
2459 }
2460
2461 // Check Temperature limits
2462 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2463 {
2464 if (!(0 < currtemp && currtemp < 255)) {
2465 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2466 return;
2467 }
2468
2469 // Update Max Temperature
2470 const char * minchg = "", * maxchg = "";
2471 if (currtemp > state.tempmax) {
2472 if (state.tempmax)
2473 maxchg = "!";
2474 state.tempmax = currtemp;
2475 state.must_write = true;
2476 }
2477
2478 char buf[20];
2479 if (!state.temperature) {
2480 // First check
2481 if (!state.tempmin || currtemp < state.tempmin)
2482 // Delay Min Temperature update by ~ 30 minutes.
2483 state.tempmin_delay = time(0) + CHECKTIME - 60;
2484 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2485 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2486 if (triptemp)
2487 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2488 state.temperature = currtemp;
2489 }
2490 else {
2491 if (state.tempmin_delay) {
2492 // End Min Temperature update delay if ...
2493 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2494 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2495 state.tempmin_delay = 0;
2496 if (!state.tempmin)
2497 state.tempmin = 255;
2498 }
2499 }
2500
2501 // Update Min Temperature
2502 if (!state.tempmin_delay && currtemp < state.tempmin) {
2503 state.tempmin = currtemp;
2504 state.must_write = true;
2505 if (currtemp != state.temperature)
2506 minchg = "!";
2507 }
2508
2509 // Track changes
2510 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2511 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2512 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2513 state.temperature = currtemp;
2514 }
2515 }
2516
2517 // Check limits
2518 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2519 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2520 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2521 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2522 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2523 }
2524 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2525 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2526 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2527 }
2528 }
2529
2530 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev, bool allow_selftests)
2531 {
2532 const char * name = cfg.name.c_str();
2533
2534 // If user has asked, test the email warning system
2535 if (cfg.emailtest)
2536 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2537
2538 // if we can't open device, fail gracefully rather than hard --
2539 // perhaps the next time around we'll be able to open it. ATAPI
2540 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2541 // given (see linux cdrom driver).
2542 if (!atadev->open()) {
2543 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2544 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2545 return 1;
2546 } else if (debugmode)
2547 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2548
2549 // user may have requested (with the -n Directive) to leave the disk
2550 // alone if it is in idle or sleeping mode. In this case check the
2551 // power mode and exit without check if needed
2552 if (cfg.powermode && !state.powermodefail) {
2553 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2554 const char * mode = 0;
2555 if (0 <= powermode && powermode < 0xff) {
2556 // wait for possible spin up and check again
2557 int powermode2;
2558 sleep(5);
2559 powermode2 = ataCheckPowerMode(atadev);
2560 if (powermode2 > powermode)
2561 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2562 powermode = powermode2;
2563 }
2564
2565 switch (powermode){
2566 case -1:
2567 // SLEEP
2568 mode="SLEEP";
2569 if (cfg.powermode>=1)
2570 dontcheck=1;
2571 break;
2572 case 0:
2573 // STANDBY
2574 mode="STANDBY";
2575 if (cfg.powermode>=2)
2576 dontcheck=1;
2577 break;
2578 case 0x80:
2579 // IDLE
2580 mode="IDLE";
2581 if (cfg.powermode>=3)
2582 dontcheck=1;
2583 break;
2584 case 0xff:
2585 // ACTIVE/IDLE
2586 mode="ACTIVE or IDLE";
2587 break;
2588 default:
2589 // UNKNOWN
2590 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2591 name, powermode);
2592 state.powermodefail = true;
2593 break;
2594 }
2595
2596 // if we are going to skip a check, return now
2597 if (dontcheck){
2598 // skip at most powerskipmax checks
2599 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2600 CloseDevice(atadev, name);
2601 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2602 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2603 state.powerskipcnt++;
2604 return 0;
2605 }
2606 else {
2607 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2608 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2609 }
2610 state.powerskipcnt = 0;
2611 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2612 }
2613 else if (state.powerskipcnt) {
2614 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2615 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2616 state.powerskipcnt = 0;
2617 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2618 }
2619 }
2620
2621 // check smart status
2622 if (cfg.smartcheck) {
2623 int status=ataSmartStatus2(atadev);
2624 if (status==-1){
2625 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2626 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
2627 state.must_write = true;
2628 }
2629 else if (status==1){
2630 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2631 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2632 state.must_write = true;
2633 }
2634 }
2635
2636 // Check everything that depends upon SMART Data (eg, Attribute values)
2637 if ( cfg.usagefailed || cfg.prefail || cfg.usage
2638 || cfg.curr_pending_id || cfg.offl_pending_id
2639 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || cfg.selftest) {
2640 struct ata_smart_values curval;
2641 struct ata_smart_thresholds_pvt * thresh = &state.smartthres;
2642
2643 // Read current attribute values. *drive contains old values and thresholds
2644 if (ataReadSmartValues(atadev, &curval)){
2645 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2646 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
2647 state.must_write = true;
2648 }
2649 else {
2650 // look for current or offline pending sectors
2651 if (cfg.curr_pending_id)
2652 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
2653 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
2654 : "Total unreadable (pending) sectors" ));
2655
2656 if (cfg.offl_pending_id)
2657 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
2658 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
2659 : "Total offline uncorrectable sectors"));
2660
2661 // check temperature limits
2662 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2663 CheckTemperature(cfg, state, ATAReturnTemperatureValue(&curval, cfg.attributedefs), 0);
2664
2665 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
2666
2667 // look for failed usage attributes, or track usage or prefail attributes
2668 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
2669
2670 // This block looks for usage attributes that have failed.
2671 // Prefail attributes that have failed are returned with a
2672 // positive sign. No failure returns 0. Usage attributes<0.
2673 int att;
2674 if (cfg.usagefailed && ((att=ataCheckAttribute(&curval, thresh, i))<0)){
2675
2676 // are we ignoring failures of this attribute?
2677 att *= -1;
2678 if (!cfg.monitor_attr_flags.is_set(att, MONITOR_IGN_FAILUSE)) {
2679 char attname[64], *loc=attname;
2680
2681 // get attribute name & skip white space
2682 ataPrintSmartAttribName(loc, att, cfg.attributedefs);
2683 while (*loc && *loc==' ') loc++;
2684
2685 // warning message
2686 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc);
2687 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %s.", name, loc);
2688 state.must_write = true;
2689 }
2690 }
2691
2692 // This block tracks usage or prefailure attributes to see if
2693 // they are changing. It also looks for changes in RAW values
2694 // if this has been requested by user.
2695 changedattribute_t delta;
2696 if ((cfg.usage || cfg.prefail) && ATACompareValues(&delta, &curval, &state.smartval, thresh, i, name)){
2697
2698 // Continue if we're not tracking this type of attribute
2699 if (!( ( delta.prefail && cfg.prefail)
2700 || (!delta.prefail && cfg.usage )))
2701 continue;
2702
2703 // Continue if '-I ID' was specified
2704 unsigned char id = delta.id;
2705 if (cfg.monitor_attr_flags.is_set(id, MONITOR_IGNORE))
2706 continue;
2707
2708 // if the only change is the raw value, and we're not
2709 // tracking raw value, then continue loop over attributes
2710 if ( !delta.sameraw && delta.newval == delta.oldval
2711 && !cfg.monitor_attr_flags.is_set(id, MONITOR_RAW))
2712 continue;
2713
2714 // get attribute name, skip spaces
2715 char attname[64], *loc = attname;
2716 ataPrintSmartAttribName(loc, id, cfg.attributedefs);
2717 while (*loc && *loc==' ')
2718 loc++;
2719
2720 // has the user asked for us to print raw values?
2721 char newrawstring[64], oldrawstring[64];
2722 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
2723 // get raw values (as a string) and add to printout
2724 char rawstring[64];
2725 ataPrintSmartAttribRawValue(rawstring, curval.vendor_attributes+i, cfg.attributedefs);
2726 sprintf(newrawstring, " [Raw %s]", rawstring);
2727 ataPrintSmartAttribRawValue(rawstring, state.smartval.vendor_attributes+i, cfg.attributedefs);
2728 sprintf(oldrawstring, " [Raw %s]", rawstring);
2729 }
2730 else
2731 newrawstring[0]=oldrawstring[0]='\0';
2732
2733 // Format message
2734 std::string msg = strprintf("Device: %s, SMART %s Attribute: %s changed from %d%s to %d%s",
2735 name, (delta.prefail ? "Prefailure" : "Usage"), loc,
2736 delta.oldval, oldrawstring, delta.newval, newrawstring);
2737
2738 // Report this change as critical ?
2739 if ( (delta.newval != delta.oldval && cfg.monitor_attr_flags.is_set(id, MONITOR_AS_CRIT))
2740 || (!delta.sameraw && cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_AS_CRIT))) {
2741 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2742 MailWarning(cfg, state, 2, "%s", msg.c_str());
2743 }
2744 else {
2745 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2746 }
2747 state.must_write = true;
2748 } // endof block tracking usage or prefailure
2749 } // end of loop over attributes
2750
2751 if (cfg.selftest) {
2752 // Log changes of self-test executions status
2753 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
2754 || (!allow_selftests && curval.self_test_exec_status != 0x00) )
2755 log_self_test_exec_status(name, curval.self_test_exec_status);
2756 }
2757
2758 // Save the new values into *drive for the next time around
2759 state.smartval = curval;
2760 }
2761 }
2762 }
2763
2764 // check if number of selftest errors has increased (note: may also DECREASE)
2765 if (cfg.selftest)
2766 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
2767
2768 // check if number of ATA errors has increased
2769 if (cfg.errorlog) {
2770
2771 int newc, oldc= state.ataerrorcount;
2772
2773 // new number of errors
2774 newc = ATAErrorCount(atadev, name, cfg.fix_firmwarebug);
2775
2776 // did command fail?
2777 if (newc<0)
2778 // lack of PrintOut here is INTENTIONAL
2779 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
2780
2781 // has error count increased?
2782 if (newc>oldc){
2783 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
2784 name, oldc, newc);
2785 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
2786 name, oldc, newc);
2787 state.must_write = true;
2788 }
2789
2790 // this last line is probably not needed, count always increases
2791 if (newc>=0)
2792 state.ataerrorcount=newc;
2793 }
2794
2795 // if the user has asked, and device is capable (or we're not yet
2796 // sure) check whether a self test should be done now.
2797 if (allow_selftests && !cfg.test_regex.empty()) {
2798 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
2799 if (testtype)
2800 DoATASelfTest(cfg, state, atadev, testtype);
2801 }
2802
2803 // Don't leave device open -- the OS/user may want to access it
2804 // before the next smartd cycle!
2805 CloseDevice(atadev, name);
2806
2807 // Copy ATA attribute values to persistent state
2808 state.update_persistent_state();
2809
2810 return 0;
2811 }
2812
2813 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
2814 {
2815 UINT8 asc, ascq;
2816 UINT8 currenttemp;
2817 UINT8 triptemp;
2818 const char * name = cfg.name.c_str();
2819 const char *cp;
2820
2821 // If the user has asked for it, test the email warning system
2822 if (cfg.emailtest)
2823 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2824
2825 // if we can't open device, fail gracefully rather than hard --
2826 // perhaps the next time around we'll be able to open it
2827 if (!scsidev->open()) {
2828 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
2829 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2830 return 1;
2831 } else if (debugmode)
2832 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
2833 currenttemp = 0;
2834 asc = 0;
2835 ascq = 0;
2836 if (!state.SuppressReport) {
2837 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2838 &asc, &ascq, &currenttemp, &triptemp)) {
2839 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
2840 name);
2841 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
2842 state.SuppressReport = 1;
2843 }
2844 }
2845 if (asc > 0) {
2846 cp = scsiGetIEString(asc, ascq);
2847 if (cp) {
2848 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
2849 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
2850 } else if (debugmode)
2851 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2852 name, (int)asc, (int)ascq);
2853 } else if (debugmode)
2854 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
2855
2856 // check temperature limits
2857 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2858 CheckTemperature(cfg, state, currenttemp, triptemp);
2859
2860 // check if number of selftest errors has increased (note: may also DECREASE)
2861 if (cfg.selftest)
2862 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
2863
2864 if (allow_selftests && !cfg.test_regex.empty()) {
2865 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
2866 if (testtype)
2867 DoSCSISelfTest(cfg, state, scsidev, testtype);
2868 }
2869 CloseDevice(scsidev, name);
2870 return 0;
2871 }
2872
2873 // Checks the SMART status of all ATA and SCSI devices
2874 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
2875 smart_device_list & devices, bool allow_selftests)
2876 {
2877 for (unsigned i = 0; i < configs.size(); i++) {
2878 const dev_config & cfg = configs.at(i);
2879 dev_state & state = states.at(i);
2880 smart_device * dev = devices.at(i);
2881 if (dev->is_ata())
2882 ATACheckDevice(cfg, state, dev->to_ata(), allow_selftests);
2883 else if (dev->is_scsi())
2884 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
2885 }
2886 }
2887
2888 // Set if Initialize() was called
2889 static bool is_initialized = false;
2890
2891 // Does initialization right after fork to daemon mode
2892 void Initialize(time_t *wakeuptime){
2893
2894 // Call Goodbye() on exit
2895 is_initialized = true;
2896
2897 // write PID file
2898 if (!debugmode)
2899 WritePidFile();
2900
2901 // install signal handlers. On Solaris, can't use signal() because
2902 // it resets the handler to SIG_DFL after each call. So use sigset()
2903 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2904
2905 // normal and abnormal exit
2906 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
2907 SIGNALFN(SIGTERM, SIG_IGN);
2908 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
2909 SIGNALFN(SIGQUIT, SIG_IGN);
2910
2911 // in debug mode, <CONTROL-C> ==> HUP
2912 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
2913 SIGNALFN(SIGINT, SIG_IGN);
2914
2915 // Catch HUP and USR1
2916 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
2917 SIGNALFN(SIGHUP, SIG_IGN);
2918 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
2919 SIGNALFN(SIGUSR1, SIG_IGN);
2920 #ifdef _WIN32
2921 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
2922 SIGNALFN(SIGUSR2, SIG_IGN);
2923 #endif
2924
2925 // initialize wakeup time to CURRENT time
2926 *wakeuptime=time(NULL);
2927
2928 return;
2929 }
2930
2931 #ifdef _WIN32
2932 // Toggle debug mode implemented for native windows only
2933 // (there is no easy way to reopen tty on *nix)
2934 static void ToggleDebugMode()
2935 {
2936 if (!debugmode) {
2937 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
2938 if (!daemon_enable_console("smartd [Debug]")) {
2939 debugmode = 1;
2940 daemon_signal(SIGINT, HUPhandler);
2941 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
2942 }
2943 else
2944 PrintOut(LOG_INFO,"enable console failed\n");
2945 }
2946 else if (debugmode == 1) {
2947 daemon_disable_console();
2948 debugmode = 0;
2949 daemon_signal(SIGINT, sighandler);
2950 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
2951 }
2952 else
2953 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
2954 }
2955 #endif
2956
2957 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
2958 {
2959 // If past wake-up-time, compute next wake-up-time
2960 time_t timenow=time(NULL);
2961 while (wakeuptime<=timenow){
2962 int intervals=1+(timenow-wakeuptime)/checktime;
2963 wakeuptime+=intervals*checktime;
2964 }
2965
2966 // sleep until we catch SIGUSR1 or have completed sleeping
2967 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
2968
2969 // protect user again system clock being adjusted backwards
2970 if (wakeuptime>timenow+checktime){
2971 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2972 wakeuptime=timenow+checktime;
2973 }
2974
2975 // Exit sleep when time interval has expired or a signal is received
2976 sleep(wakeuptime-timenow);
2977
2978 #ifdef _WIN32
2979 // toggle debug mode?
2980 if (caughtsigUSR2) {
2981 ToggleDebugMode();
2982 caughtsigUSR2 = 0;
2983 }
2984 #endif
2985
2986 timenow=time(NULL);
2987 }
2988
2989 // if we caught a SIGUSR1 then print message and clear signal
2990 if (caughtsigUSR1){
2991 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2992 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
2993 caughtsigUSR1=0;
2994 sigwakeup = true;
2995 }
2996
2997 // return adjusted wakeuptime
2998 return wakeuptime;
2999 }
3000
3001 // Print out a list of valid arguments for the Directive d
3002 void printoutvaliddirectiveargs(int priority, char d) {
3003
3004 switch (d) {
3005 case 'n':
3006 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3007 break;
3008 case 's':
3009 PrintOut(priority, "valid_regular_expression");
3010 break;
3011 case 'd':
3012 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3013 break;
3014 case 'T':
3015 PrintOut(priority, "normal, permissive");
3016 break;
3017 case 'o':
3018 case 'S':
3019 PrintOut(priority, "on, off");
3020 break;
3021 case 'l':
3022 PrintOut(priority, "error, selftest");
3023 break;
3024 case 'M':
3025 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3026 break;
3027 case 'v':
3028 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3029 break;
3030 case 'P':
3031 PrintOut(priority, "use, ignore, show, showall");
3032 break;
3033 case 'F':
3034 PrintOut(priority, "none, samsung, samsung2, samsung3");
3035 break;
3036 }
3037 }
3038
3039 // exits with an error message, or returns integer value of token
3040 int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3041 int min, int max, char * suffix = 0)
3042 {
3043 // make sure argument is there
3044 if (!arg) {
3045 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3046 configfile, lineno, name, token, min, max);
3047 return -1;
3048 }
3049
3050 // get argument value (base 10), check that it's integer, and in-range
3051 char *endptr;
3052 int val = strtol(arg,&endptr,10);
3053
3054 // optional suffix present?
3055 if (suffix) {
3056 if (!strcmp(endptr, suffix))
3057 endptr += strlen(suffix);
3058 else
3059 *suffix = 0;
3060 }
3061
3062 if (!(!*endptr && min <= val && val <= max)) {
3063 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3064 configfile, lineno, name, token, arg, min, max);
3065 return -1;
3066 }
3067
3068 // all is well; return value
3069 return val;
3070 }
3071
3072
3073 // Get 1-3 small integer(s) for '-W' directive
3074 int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3075 unsigned char * val1, unsigned char * val2, unsigned char * val3){
3076 unsigned v1 = 0, v2 = 0, v3 = 0;
3077 int n1 = -1, n2 = -1, n3 = -1, len;
3078 if (!arg) {
3079 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3080 configfile, lineno, name, token);
3081 return -1;
3082 }
3083
3084 len = strlen(arg);
3085 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3086 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3087 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3088 configfile, lineno, name, token, arg);
3089 return -1;
3090 }
3091 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3092 return 0;
3093 }
3094
3095
3096 // This function returns 1 if it has correctly parsed one token (and
3097 // any arguments), else zero if no tokens remain. It returns -1 if an
3098 // error was encountered.
3099 static int ParseToken(char * token, dev_config & cfg)
3100 {
3101 char sym;
3102 const char * name = cfg.name.c_str();
3103 int lineno=cfg.lineno;
3104 const char *delim = " \n\t";
3105 int badarg = 0;
3106 int missingarg = 0;
3107 const char *arg = 0;
3108
3109 // is the rest of the line a comment
3110 if (*token=='#')
3111 return 1;
3112
3113 // is the token not recognized?
3114 if (*token!='-' || strlen(token)!=2) {
3115 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3116 configfile, lineno, name, token);
3117 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3118 return -1;
3119 }
3120
3121 // token we will be parsing:
3122 sym=token[1];
3123
3124 // parse the token and swallow its argument
3125 int val;
3126 char plus[] = "+", excl[] = "!";
3127
3128 switch (sym) {
3129 case 'C':
3130 // monitor current pending sector count (default 197)
3131 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3132 return -1;
3133 cfg.curr_pending_id = (unsigned char)val;
3134 cfg.curr_pending_incr = (*plus == '+');
3135 cfg.curr_pending_set = true;
3136 break;
3137 case 'U':
3138 // monitor offline uncorrectable sectors (default 198)
3139 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3140 return -1;
3141 cfg.offl_pending_id = (unsigned char)val;
3142 cfg.offl_pending_incr = (*plus == '+');
3143 cfg.offl_pending_set = true;
3144 break;
3145 case 'T':
3146 // Set tolerance level for SMART command failures
3147 if ((arg = strtok(NULL, delim)) == NULL) {
3148 missingarg = 1;
3149 } else if (!strcmp(arg, "normal")) {
3150 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3151 // not on failure of an optional S.M.A.R.T. command.
3152 // This is the default so we don't need to actually do anything here.
3153 cfg.permissive = false;
3154 } else if (!strcmp(arg, "permissive")) {
3155 // Permissive mode; ignore errors from Mandatory SMART commands
3156 cfg.permissive = true;
3157 } else {
3158 badarg = 1;
3159 }
3160 break;
3161 case 'd':
3162 // specify the device type
3163 if ((arg = strtok(NULL, delim)) == NULL) {
3164 missingarg = 1;
3165 } else if (!strcmp(arg, "removable")) {
3166 cfg.removable = true;
3167 } else {
3168 cfg.dev_type = arg;
3169 }
3170 break;
3171 case 'F':
3172 // fix firmware bug
3173 if ((arg = strtok(NULL, delim)) == NULL) {
3174 missingarg = 1;
3175 } else if (!strcmp(arg, "none")) {
3176 cfg.fix_firmwarebug = FIX_NONE;
3177 } else if (!strcmp(arg, "samsung")) {
3178 cfg.fix_firmwarebug = FIX_SAMSUNG;
3179 } else if (!strcmp(arg, "samsung2")) {
3180 cfg.fix_firmwarebug = FIX_SAMSUNG2;
3181 } else if (!strcmp(arg, "samsung3")) {
3182 cfg.fix_firmwarebug = FIX_SAMSUNG3;
3183 } else {
3184 badarg = 1;
3185 }
3186 break;
3187 case 'H':
3188 // check SMART status
3189 cfg.smartcheck = true;
3190 break;
3191 case 'f':
3192 // check for failure of usage attributes
3193 cfg.usagefailed = true;
3194 break;
3195 case 't':
3196 // track changes in all vendor attributes
3197 cfg.prefail = true;
3198 cfg.usage = true;
3199 break;
3200 case 'p':
3201 // track changes in prefail vendor attributes
3202 cfg.prefail = true;
3203 break;
3204 case 'u':
3205 // track changes in usage vendor attributes
3206 cfg.usage = true;
3207 break;
3208 case 'l':
3209 // track changes in SMART logs
3210 if ((arg = strtok(NULL, delim)) == NULL) {
3211 missingarg = 1;
3212 } else if (!strcmp(arg, "selftest")) {
3213 // track changes in self-test log
3214 cfg.selftest = true;
3215 } else if (!strcmp(arg, "error")) {
3216 // track changes in ATA error log
3217 cfg.errorlog = true;
3218 } else {
3219 badarg = 1;
3220 }
3221 break;
3222 case 'a':
3223 // monitor everything
3224 cfg.smartcheck = true;
3225 cfg.prefail = true;
3226 cfg.usagefailed = true;
3227 cfg.usage = true;
3228 cfg.selftest = true;
3229 cfg.errorlog = true;
3230 break;
3231 case 'o':
3232 // automatic offline testing enable/disable
3233 if ((arg = strtok(NULL, delim)) == NULL) {
3234 missingarg = 1;
3235 } else if (!strcmp(arg, "on")) {
3236 cfg.autoofflinetest = 2;
3237 } else if (!strcmp(arg, "off")) {
3238 cfg.autoofflinetest = 1;
3239 } else {
3240 badarg = 1;
3241 }
3242 break;
3243 case 'n':
3244 // skip disk check if in idle or standby mode
3245 if (!(arg = strtok(NULL, delim)))
3246 missingarg = 1;
3247 else {
3248 char *endptr = NULL;
3249 char *next = strchr(const_cast<char*>(arg), ',');
3250
3251 cfg.powerquiet = false;
3252 cfg.powerskipmax = 0;
3253
3254 if (next!=NULL) *next='\0';
3255 if (!strcmp(arg, "never"))
3256 cfg.powermode = 0;
3257 else if (!strcmp(arg, "sleep"))
3258 cfg.powermode = 1;
3259 else if (!strcmp(arg, "standby"))
3260 cfg.powermode = 2;
3261 else if (!strcmp(arg, "idle"))
3262 cfg.powermode = 3;
3263 else
3264 badarg = 1;
3265
3266 // if optional arguments are present
3267 if (!badarg && next!=NULL) {
3268 next++;
3269 cfg.powerskipmax = strtol(next, &endptr, 10);
3270 if (endptr == next)
3271 cfg.powerskipmax = 0;
3272 else {
3273 next = endptr + (*endptr != '\0');
3274 if (cfg.powerskipmax <= 0)
3275 badarg = 1;
3276 }
3277 if (*next != '\0') {
3278 if (!strcmp("q", next))
3279 cfg.powerquiet = true;
3280 else {
3281 badarg = 1;
3282 }
3283 }
3284 }
3285 }
3286 break;
3287 case 'S':
3288 // automatic attribute autosave enable/disable
3289 if ((arg = strtok(NULL, delim)) == NULL) {
3290 missingarg = 1;
3291 } else if (!strcmp(arg, "on")) {
3292 cfg.autosave = 2;
3293 } else if (!strcmp(arg, "off")) {
3294 cfg.autosave = 1;
3295 } else {
3296 badarg = 1;
3297 }
3298 break;
3299 case 's':
3300 // warn user, and delete any previously given -s REGEXP Directives
3301 if (!cfg.test_regex.empty()){
3302 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3303 configfile, lineno, name, cfg.test_regex.get_pattern());
3304 cfg.test_regex = regular_expression();
3305 }
3306 // check for missing argument
3307 if (!(arg = strtok(NULL, delim))) {
3308 missingarg = 1;
3309 }
3310 // Compile regex
3311 else {
3312 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3313 // not a valid regular expression!
3314 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3315 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3316 return -1;
3317 }
3318 }
3319 // Do a bit of sanity checking and warn user if we think that
3320 // their regexp is "strange". User probably confused about shell
3321 // glob(3) syntax versus regular expression syntax regexp(7).
3322 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3323 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3324 configfile, lineno, name, val+1, arg[val], arg);
3325 break;
3326 case 'm':
3327 // send email to address that follows
3328 if (!(arg = strtok(NULL,delim)))
3329 missingarg = 1;
3330 else {
3331 if (!cfg.emailaddress.empty())
3332 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3333 configfile, lineno, name, cfg.emailaddress.c_str());
3334 cfg.emailaddress = arg;
3335 }
3336 break;
3337 case 'M':
3338 // email warning options
3339 if (!(arg = strtok(NULL, delim)))
3340 missingarg = 1;
3341 else if (!strcmp(arg, "once"))
3342 cfg.emailfreq = 1;
3343 else if (!strcmp(arg, "daily"))
3344 cfg.emailfreq = 2;
3345 else if (!strcmp(arg, "diminishing"))
3346 cfg.emailfreq = 3;
3347 else if (!strcmp(arg, "test"))
3348 cfg.emailtest = 1;
3349 else if (!strcmp(arg, "exec")) {
3350 // Get the next argument (the command line)
3351 if (!(arg = strtok(NULL, delim))) {
3352 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3353 configfile, lineno, name, token);
3354 return -1;
3355 }
3356 // Free the last cmd line given if any, and copy new one
3357 if (!cfg.emailcmdline.empty())
3358 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3359 configfile, lineno, name, cfg.emailcmdline.c_str());
3360 cfg.emailcmdline = arg;
3361 }
3362 else
3363 badarg = 1;
3364 break;
3365 case 'i':
3366 // ignore failure of usage attribute
3367 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3368 return -1;
3369 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3370 break;
3371 case 'I':
3372 // ignore attribute for tracking purposes
3373 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3374 return -1;
3375 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3376 break;
3377 case 'r':
3378 // print raw value when tracking
3379 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3380 return -1;
3381 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3382 if (*excl == '!') // attribute change is critical
3383 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3384 break;
3385 case 'R':
3386 // track changes in raw value (forces printing of raw value)
3387 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3388 return -1;
3389 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3390 if (*excl == '!') // raw value change is critical
3391 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3392 break;
3393 case 'W':
3394 // track Temperature
3395 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3396 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3397 return -1;
3398 break;
3399 case 'v':
3400 // non-default vendor-specific attribute meaning
3401 if (!(arg=strtok(NULL,delim))) {
3402 missingarg = 1;
3403 } else if (parse_attribute_def(arg, cfg.attributedefs)) {
3404 badarg = 1;
3405 }
3406 break;
3407 case 'P':
3408 // Define use of drive-specific presets.
3409 if (!(arg = strtok(NULL, delim))) {
3410 missingarg = 1;
3411 } else if (!strcmp(arg, "use")) {
3412 cfg.ignorepresets = false;
3413 } else if (!strcmp(arg, "ignore")) {
3414 cfg.ignorepresets = true;
3415 } else if (!strcmp(arg, "show")) {
3416 cfg.showpresets = true;
3417 } else if (!strcmp(arg, "showall")) {
3418 showallpresets();
3419 } else {
3420 badarg = 1;
3421 }
3422 break;
3423 default:
3424 // Directive not recognized
3425 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3426 configfile, lineno, name, token);
3427 Directives();
3428 return -1;
3429 }
3430 if (missingarg) {
3431 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3432 configfile, lineno, name, token);
3433 }
3434 if (badarg) {
3435 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3436 configfile, lineno, name, token, arg);
3437 }
3438 if (missingarg || badarg) {
3439 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3440 printoutvaliddirectiveargs(LOG_CRIT, sym);
3441 PrintOut(LOG_CRIT, "\n");
3442 return -1;
3443 }
3444
3445 return 1;
3446 }
3447
3448 // Scan directive for configuration file
3449 #define SCANDIRECTIVE "DEVICESCAN"
3450
3451 // This is the routine that adds things to the conf_entries list.
3452 //
3453 // Return values are:
3454 // 1: parsed a normal line
3455 // 0: found comment or blank line
3456 // -1: found SCANDIRECTIVE line
3457 // -2: found an error
3458 //
3459 // Note: this routine modifies *line from the caller!
3460 static int ParseConfigLine(dev_config_vector & conf_entries, int entry, int lineno, /*const*/ char * line)
3461 {
3462 char *token=NULL;
3463 char *name=NULL;
3464 const char *delim = " \n\t";
3465 int devscan=0;
3466
3467 // get first token: device name. If a comment, skip line
3468 if (!(name=strtok(line,delim)) || *name=='#') {
3469 return 0;
3470 }
3471
3472 // Have we detected the SCANDIRECTIVE directive?
3473 if (!strcmp(SCANDIRECTIVE,name)){
3474 devscan=1;
3475 if (entry) {
3476 PrintOut(LOG_INFO,"Scan Directive %s (line %d) must be the first entry in %s\n",name, lineno, configfile);
3477 return -2;
3478 }
3479 }
3480
3481 // We've got a legit entry, make space to store it
3482 conf_entries.push_back( dev_config() );
3483 dev_config & cfg = conf_entries.back();
3484
3485 cfg.name = name;
3486
3487 // Store line number, and by default check for both device types.
3488 cfg.lineno=lineno;
3489
3490 // parse tokens one at a time from the file.
3491 while ((token=strtok(NULL,delim))){
3492 int retval=ParseToken(token,cfg);
3493
3494 if (retval==0)
3495 // No tokens left:
3496 break;
3497
3498 if (retval>0) {
3499 // Parsed token
3500 #if (0)
3501 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3502 #endif
3503 continue;
3504 }
3505
3506 if (retval<0) {
3507 // error found on the line
3508 return -2;
3509 }
3510 }
3511
3512 // If NO monitoring directives are set, then set all of them.
3513 if (!(cfg.smartcheck || cfg.usagefailed || cfg.prefail ||
3514 cfg.usage || cfg.selftest || cfg.errorlog ||
3515 cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
3516
3517 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3518 cfg.name.c_str(), cfg.lineno, configfile);
3519
3520 cfg.smartcheck = true;
3521 cfg.usagefailed = true;
3522 cfg.prefail = true;
3523 cfg.usage = true;
3524 cfg.selftest = true;
3525 cfg.errorlog = true;
3526 }
3527
3528 // additional sanity check. Has user set -M options without -m?
3529 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3530 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3531 cfg.name.c_str(), cfg.lineno, configfile);
3532 return -2;
3533 }
3534
3535 // has the user has set <nomailer>?
3536 if (cfg.emailaddress == "<nomailer>") {
3537 // check that -M exec is also set
3538 if (cfg.emailcmdline.empty()){
3539 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3540 cfg.name.c_str(), cfg.lineno, configfile);
3541 return -2;
3542 }
3543 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3544 cfg.emailaddress.clear();
3545 }
3546
3547 // set cfg.emailfreq to 1 (once) if user hasn't set it
3548 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq)
3549 cfg.emailfreq = 1;
3550
3551 if (devscan)
3552 return -1;
3553 else
3554 return 1;
3555 }
3556
3557 // Parses a configuration file. Return values are:
3558 // N=>0: found N entries
3559 // -1: syntax error in config file
3560 // -2: config file does not exist
3561 // -3: config file exists but cannot be read
3562 //
3563 // In the case where the return value is 0, there are three
3564 // possiblities:
3565 // Empty configuration file ==> conf_entries.empty()
3566 // No configuration file ==> conf_entries[0].lineno == 0
3567 // SCANDIRECTIVE found ==> conf_entries[0].lineno != 0
3568 static int ParseConfigFile(dev_config_vector & conf_entries)
3569 {
3570 // maximum line length in configuration file
3571 const int MAXLINELEN = 256;
3572 // maximum length of a continued line in configuration file
3573 const int MAXCONTLINE = 1023;
3574
3575 stdio_file f;
3576 // Open config file, if it exists and is not <stdin>
3577 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3578 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3579 // file exists but we can't read it or it should exist due to '-c' option
3580 int ret = (errno!=ENOENT ? -3 : -2);
3581 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3582 strerror(errno),configfile);
3583 return ret;
3584 }
3585 }
3586 else // read from stdin ('-c -' option)
3587 f.open(stdin);
3588
3589 // No configuration file found -- use fake one
3590 int entry = 0;
3591 if (!f) {
3592 char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3593
3594 if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3595 throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3596 return 0;
3597 }
3598
3599 #ifdef __CYGWIN__
3600 setmode(fileno(f), O_TEXT); // Allow files with \r\n
3601 #endif
3602
3603 // configuration file exists
3604 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3605
3606 // parse config file line by line
3607 int lineno = 1, cont = 0, contlineno = 0;
3608 char line[MAXLINELEN+2];
3609 char fullline[MAXCONTLINE+1];
3610
3611 for (;;) {
3612 int len=0,scandevice;
3613 char *lastslash;
3614 char *comment;
3615 char *code;
3616
3617 // make debugging simpler
3618 memset(line,0,sizeof(line));
3619
3620 // get a line
3621 code=fgets(line, MAXLINELEN+2, f);
3622
3623 // are we at the end of the file?
3624 if (!code){
3625 if (cont) {
3626 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3627 // See if we found a SCANDIRECTIVE directive
3628 if (scandevice==-1)
3629 return 0;
3630 // did we find a syntax error
3631 if (scandevice==-2)
3632 return -1;
3633 // the final line is part of a continuation line
3634 cont=0;
3635 entry+=scandevice;
3636 }
3637 break;
3638 }
3639
3640 // input file line number
3641 contlineno++;
3642
3643 // See if line is too long
3644 len=strlen(line);
3645 if (len>MAXLINELEN){
3646 const char *warn;
3647 if (line[len-1]=='\n')
3648 warn="(including newline!) ";
3649 else
3650 warn="";
3651 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3652 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3653 return -1;
3654 }
3655
3656 // Ignore anything after comment symbol
3657 if ((comment=strchr(line,'#'))){
3658 *comment='\0';
3659 len=strlen(line);
3660 }
3661
3662 // is the total line (made of all continuation lines) too long?
3663 if (cont+len>MAXCONTLINE){
3664 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3665 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3666 return -1;
3667 }
3668
3669 // copy string so far into fullline, and increment length
3670 strcpy(fullline+cont,line);
3671 cont+=len;
3672
3673 // is this a continuation line. If so, replace \ by space and look at next line
3674 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3675 *(fullline+(cont-len)+(lastslash-line))=' ';
3676 continue;
3677 }
3678
3679 // Not a continuation line. Parse it
3680 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3681
3682 // did we find a scandevice directive?
3683 if (scandevice==-1)
3684 return 0;
3685 // did we find a syntax error
3686 if (scandevice==-2)
3687 return -1;
3688
3689 entry+=scandevice;
3690 lineno++;
3691 cont=0;
3692 }
3693
3694 // note -- may be zero if syntax of file OK, but no valid entries!
3695 return entry;
3696 }
3697
3698 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3699 <LIST> is the list of valid arguments for option opt. */
3700 void PrintValidArgs(char opt) {
3701 const char *s;
3702
3703 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3704 if (!(s = GetValidArgList(opt)))
3705 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3706 else
3707 PrintOut(LOG_CRIT, "%s", (char *)s);
3708 PrintOut(LOG_CRIT, " <=======\n");
3709 }
3710
3711 // Return true if absolute path name
3712 static bool is_abs_path(const char * path)
3713 {
3714 if (*path == '/')
3715 return true;
3716 #if defined(_WIN32) || defined(__CYGWIN__)
3717 if (*path == '\\')
3718 return true;
3719 int n = -1;
3720 sscanf(path, "%*1[A-Za-z]:%*1[/\\]%n", &n);
3721 if (n > 0)
3722 return true;
3723 #endif
3724 return false;
3725 }
3726
3727 // Parses input line, prints usage message and
3728 // version/license/copyright messages
3729 void ParseOpts(int argc, char **argv){
3730 int optchar;
3731 char *tailptr;
3732 long lchecktime;
3733 // Please update GetValidArgList() if you edit shortopts
3734 const char *shortopts = "c:l:q:dDni:p:r:s:A:B:Vh?";
3735 char *arg;
3736 // Please update GetValidArgList() if you edit longopts
3737 struct option longopts[] = {
3738 { "configfile", required_argument, 0, 'c' },
3739 { "logfacility", required_argument, 0, 'l' },
3740 { "quit", required_argument, 0, 'q' },
3741 { "debug", no_argument, 0, 'd' },
3742 { "showdirectives", no_argument, 0, 'D' },
3743 { "interval", required_argument, 0, 'i' },
3744 #ifndef _WIN32
3745 { "no-fork", no_argument, 0, 'n' },
3746 #endif
3747 { "pidfile", required_argument, 0, 'p' },
3748 { "report", required_argument, 0, 'r' },
3749 { "savestates", required_argument, 0, 's' },
3750 { "attributelog", required_argument, 0, 'A' },
3751 { "drivedb", required_argument, 0, 'B' },
3752 #if defined(_WIN32) || defined(__CYGWIN__)
3753 { "service", no_argument, 0, 'n' },
3754 #endif
3755 { "version", no_argument, 0, 'V' },
3756 { "license", no_argument, 0, 'V' },
3757 { "copyright", no_argument, 0, 'V' },
3758 { "help", no_argument, 0, 'h' },
3759 { "usage", no_argument, 0, 'h' },
3760 { 0, 0, 0, 0 }
3761 };
3762
3763 opterr=optopt=0;
3764 bool badarg = false;
3765 bool no_defaultdb = false; // set true on '-B FILE'
3766
3767 // Parse input options. This horrible construction is so that emacs
3768 // indents properly. Sorry.
3769 while (-1 != (optchar =
3770 getopt_long(argc, argv, shortopts, longopts, NULL)
3771 )) {
3772
3773 switch(optchar) {
3774 case 'q':
3775 // when to quit
3776 if (!(strcmp(optarg,"nodev"))) {
3777 quit=0;
3778 } else if (!(strcmp(optarg,"nodevstartup"))) {
3779 quit=1;
3780 } else if (!(strcmp(optarg,"never"))) {
3781 quit=2;
3782 } else if (!(strcmp(optarg,"onecheck"))) {
3783 quit=3;
3784 debugmode=1;
3785 } else if (!(strcmp(optarg,"showtests"))) {
3786 quit=4;
3787 debugmode=1;
3788 } else if (!(strcmp(optarg,"errors"))) {
3789 quit=5;
3790 } else {
3791 badarg = true;
3792 }
3793 break;
3794 case 'l':
3795 // set the log facility level
3796 if (!strcmp(optarg, "daemon"))
3797 facility=LOG_DAEMON;
3798 else if (!strcmp(optarg, "local0"))
3799 facility=LOG_LOCAL0;
3800 else if (!strcmp(optarg, "local1"))
3801 facility=LOG_LOCAL1;
3802 else if (!strcmp(optarg, "local2"))
3803 facility=LOG_LOCAL2;
3804 else if (!strcmp(optarg, "local3"))
3805 facility=LOG_LOCAL3;
3806 else if (!strcmp(optarg, "local4"))
3807 facility=LOG_LOCAL4;
3808 else if (!strcmp(optarg, "local5"))
3809 facility=LOG_LOCAL5;
3810 else if (!strcmp(optarg, "local6"))
3811 facility=LOG_LOCAL6;
3812 else if (!strcmp(optarg, "local7"))
3813 facility=LOG_LOCAL7;
3814 else
3815 badarg = true;
3816 break;
3817 case 'd':
3818 // enable debug mode
3819 debugmode = 1;
3820 break;
3821 case 'n':
3822 // don't fork()
3823 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3824 do_fork = false;
3825 #endif
3826 break;
3827 case 'D':
3828 // print summary of all valid directives
3829 debugmode = 1;
3830 Directives();
3831 EXIT(0);
3832 break;
3833 case 'i':
3834 // Period (time interval) for checking
3835 // strtol will set errno in the event of overflow, so we'll check it.
3836 errno = 0;
3837 lchecktime = strtol(optarg, &tailptr, 10);
3838 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
3839 debugmode=1;
3840 PrintHead();
3841 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
3842 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
3843 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3844 EXIT(EXIT_BADCMD);
3845 }
3846 checktime = (int)lchecktime;
3847 break;
3848 case 'r':
3849 // report IOCTL transactions
3850 {
3851 int i;
3852 char *s;
3853
3854 // split_report_arg() may modify its first argument string, so use a
3855 // copy of optarg in case we want optarg for an error message.
3856 if (!(s = strdup(optarg))) {
3857 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
3858 EXIT(EXIT_NOMEM);
3859 }
3860 if (split_report_arg(s, &i)) {
3861 badarg = true;
3862 } else if (i<1 || i>3) {
3863 debugmode=1;
3864 PrintHead();
3865 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
3866 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3867 EXIT(EXIT_BADCMD);
3868 } else if (!strcmp(s,"ioctl")) {
3869 con->reportataioctl = con->reportscsiioctl = i;
3870 } else if (!strcmp(s,"ataioctl")) {
3871 con->reportataioctl = i;
3872 } else if (!strcmp(s,"scsiioctl")) {
3873 con->reportscsiioctl = i;
3874 } else {
3875 badarg = true;
3876 }
3877 free(s); // TODO: use std::string
3878 }
3879 break;
3880 case 'c':
3881 // alternate configuration file
3882 if (strcmp(optarg,"-"))
3883 configfile = (configfile_alt = optarg).c_str();
3884 else // read from stdin
3885 configfile=configfile_stdin;
3886 break;
3887 case 'p':
3888 // output file with PID number
3889 pid_file = optarg;
3890 break;
3891 case 's':
3892 // path prefix of persistent state file
3893 state_path_prefix = optarg;
3894 break;
3895 case 'A':
3896 // path prefix of attribute log file
3897 attrlog_path_prefix = optarg;
3898 break;
3899 case 'B':
3900 {
3901 const char * path = optarg;
3902 if (*path == '+' && path[1])
3903 path++;
3904 else
3905 no_defaultdb = true;
3906 unsigned char savedebug = debugmode; debugmode = 1;
3907 if (!read_drive_database(path))
3908 EXIT(EXIT_BADCMD);
3909 debugmode = savedebug;
3910 }
3911 break;
3912 case 'V':
3913 // print version and CVS info
3914 debugmode = 1;
3915 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
3916 EXIT(0);
3917 break;
3918 case 'h':
3919 // help: print summary of command-line options
3920 debugmode=1;
3921 PrintHead();
3922 Usage();
3923 EXIT(0);
3924 break;
3925 case '?':
3926 default:
3927 // unrecognized option
3928 debugmode=1;
3929 PrintHead();
3930 // Point arg to the argument in which this option was found.
3931 arg = argv[optind-1];
3932 // Check whether the option is a long option that doesn't map to -h.
3933 if (arg[1] == '-' && optchar != 'h') {
3934 // Iff optopt holds a valid option then argument must be missing.
3935 if (optopt && (strchr(shortopts, optopt) != NULL)) {
3936 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
3937 PrintValidArgs(optopt);
3938 } else {
3939 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
3940 }
3941 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
3942 EXIT(EXIT_BADCMD);
3943 }
3944 if (optopt) {
3945 // Iff optopt holds a valid option then argument must be missing.
3946 if (strchr(shortopts, optopt) != NULL){
3947 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
3948 PrintValidArgs(optopt);
3949 } else {
3950 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
3951 }
3952 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3953 EXIT(EXIT_BADCMD);
3954 }
3955 Usage();
3956 EXIT(0);
3957 }
3958
3959 // Check to see if option had an unrecognized or incorrect argument.
3960 if (badarg) {
3961 debugmode=1;
3962 PrintHead();
3963 // It would be nice to print the actual option name given by the user
3964 // here, but we just print the short form. Please fix this if you know
3965 // a clean way to do it.
3966 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
3967 PrintValidArgs(optchar);
3968 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3969 EXIT(EXIT_BADCMD);
3970 }
3971 }
3972
3973 // non-option arguments are not allowed
3974 if (argc > optind) {
3975 debugmode=1;
3976 PrintHead();
3977 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
3978 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3979 EXIT(EXIT_BADCMD);
3980 }
3981
3982 // no pidfile in debug mode
3983 if (debugmode && !pid_file.empty()) {
3984 debugmode=1;
3985 PrintHead();
3986 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
3987 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
3988 EXIT(EXIT_BADCMD);
3989 }
3990
3991 // absolute path is required due to chdir('/') after fork().
3992 if (!state_path_prefix.empty() && !debugmode && !is_abs_path(state_path_prefix.c_str())) {
3993 debugmode=1;
3994 PrintHead();
3995 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
3996 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
3997 state_path_prefix.c_str());
3998 EXIT(EXIT_BADCMD);
3999 }
4000
4001 // absolute path is required due to chdir('/') after fork().
4002 if (!attrlog_path_prefix.empty() && !debugmode && !is_abs_path(attrlog_path_prefix.c_str())) {
4003 debugmode=1;
4004 PrintHead();
4005 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4006 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4007 attrlog_path_prefix.c_str());
4008 EXIT(EXIT_BADCMD);
4009 }
4010
4011 // Read or init drive database
4012 if (!no_defaultdb) {
4013 unsigned char savedebug = debugmode; debugmode = 1;
4014 if (!read_default_drive_databases())
4015 EXIT(EXIT_BADCMD);
4016 debugmode = savedebug;
4017 }
4018
4019 // print header
4020 PrintHead();
4021 }
4022
4023 // Function we call if no configuration file was found or if the
4024 // SCANDIRECTIVE Directive was found. It makes entries for device
4025 // names returned by scan_smart_devices() in os_OSNAME.cpp
4026 static int MakeConfigEntries(const dev_config & base_cfg,
4027 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4028 {
4029 // make list of devices
4030 smart_device_list devlist;
4031 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4032 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4033
4034 // if no devices, or error constructing list, return
4035 if (devlist.size() <= 0)
4036 return 0;
4037
4038 // loop over entries to create
4039 for (unsigned i = 0; i < devlist.size(); i++) {
4040 // Move device pointer
4041 smart_device * dev = devlist.release(i);
4042 scanned_devs.push_back(dev);
4043
4044 // Copy configuration, update device and type name
4045 conf_entries.push_back(base_cfg);
4046 dev_config & cfg = conf_entries.back();
4047 cfg.name = dev->get_info().info_name;
4048 cfg.dev_type = type;
4049 }
4050
4051 return devlist.size();
4052 }
4053
4054 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4055 {
4056 if (!debugmode && scandirective)
4057 return;
4058 if (line)
4059 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4060 "Unable to register %s device %s at line %d of file %s\n",
4061 type, name, line, configfile);
4062 else
4063 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4064 type, name);
4065 return;
4066 }
4067
4068 // Returns negative value (see ParseConfigFile()) if config file
4069 // had errors, else number of entries which may be zero or positive.
4070 // If we found no configuration file, or it contained SCANDIRECTIVE,
4071 // then *scanning is set to 1, else 0.
4072 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4073 {
4074 // parse configuration file configfile (normally /etc/smartd.conf)
4075 int entries = ParseConfigFile(conf_entries);
4076
4077 if (entries < 0) {
4078 // There was an error reading the configuration file.
4079 conf_entries.clear();
4080 if (entries == -1)
4081 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4082 return entries;
4083 }
4084
4085 // no error parsing config file.
4086 if (entries) {
4087 // we did not find a SCANDIRECTIVE and did find valid entries
4088 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4089 }
4090 else if (conf_entries.size() == 1) {
4091 // we found a SCANDIRECTIVE or there was no configuration file so
4092 // scan. Configuration file's first entry contains all options
4093 // that were set
4094 dev_config first = conf_entries.front();
4095 conf_entries.clear();
4096
4097 if (first.lineno)
4098 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4099 else
4100 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4101
4102 // make config list of devices to search for
4103 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4104
4105 // warn user if scan table found no devices
4106 if (conf_entries.empty())
4107 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4108 }
4109 else
4110 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4111
4112 return conf_entries.size();
4113 }
4114
4115
4116 // This function tries devices from conf_entries. Each one that can be
4117 // registered is moved onto the [ata|scsi]devices lists and removed
4118 // from the conf_entries list.
4119 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4120 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4121 {
4122 // start by clearing lists/memory of ALL existing devices
4123 configs.clear();
4124 devices.clear();
4125 states.clear();
4126
4127 // Register entries
4128 for (unsigned i = 0; i < conf_entries.size(); i++){
4129
4130 dev_config cfg = conf_entries[i];
4131
4132 // get device of appropriate type
4133 // TODO: exception handling
4134 smart_device * dev = 0;
4135 bool scanning = false;
4136
4137 // Device may already be detected during devicescan
4138 if (i < scanned_devs.size()) {
4139 dev = scanned_devs.release(i);
4140 if (dev)
4141 scanning = true;
4142 }
4143
4144 if (!dev) {
4145 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4146 if (!dev) {
4147 if (cfg.dev_type.empty())
4148 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4149 else
4150 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4151 continue;
4152 }
4153 }
4154
4155 // Save old info
4156 smart_device::device_info oldinfo = dev->get_info();
4157
4158 // Open with autodetect support, may return 'better' device
4159 dev = dev->autodetect_open();
4160
4161 // Report if type has changed
4162 if (/* ent->dev_type && */ oldinfo.dev_type != dev->get_dev_type())
4163 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4164 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4165
4166 if (!dev->is_open()) {
4167 // For linux+devfs, a nonexistent device gives a strange error
4168 // message. This makes the error message a bit more sensible.
4169 // If no debug and scanning - don't print errors
4170 if (debugmode || !scanning)
4171 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4172 delete dev;
4173 continue;
4174 }
4175
4176 // Update informal name
4177 cfg.name = dev->get_info().info_name;
4178 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4179
4180 // Prepare initial state
4181 dev_state state;
4182
4183 // register ATA devices
4184 if (dev->is_ata()){
4185 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4186 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4187 delete dev; dev = 0;
4188 }
4189 else {
4190 // move onto the list of ata devices
4191 configs.push_back(cfg);
4192 states.push_back(state);
4193 devices.push_back(dev);
4194 }
4195 }
4196
4197 // or register SCSI devices
4198 else if (dev->is_scsi()){
4199 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4200 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4201 delete dev; dev = 0;
4202 }
4203 else {
4204 // move onto the list of scsi devices
4205 configs.push_back(cfg);
4206 states.push_back(state);
4207 devices.push_back(dev);
4208 }
4209 }
4210
4211 // if device is explictly listed and we can't register it, then
4212 // exit unless the user has specified that the device is removable
4213 if (!dev && !scanning) {
4214 if (cfg.removable || quit==2)
4215 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4216 else {
4217 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4218 EXIT(EXIT_BADDEV);
4219 }
4220 }
4221 }
4222 }
4223
4224
4225 // Main program without exception handling
4226 int main_worker(int argc, char **argv)
4227 {
4228 // Initialize interface
4229 smart_interface::init();
4230 if (!smi())
4231 return 1;
4232
4233 // external control variables for ATA disks
4234 smartmonctrl control;
4235
4236 // is it our first pass through?
4237 bool firstpass = true;
4238
4239 // next time to wake up
4240 time_t wakeuptime;
4241
4242 // for simplicity, null all global communications variables/lists
4243 con=&control;
4244 memset(con, 0,sizeof(control));
4245
4246 // parse input and print header and usage info if needed
4247 ParseOpts(argc,argv);
4248
4249 // do we mute printing from ataprint commands?
4250 con->printing_switchable = false;
4251 con->dont_print = !debugmode;
4252
4253 // Configuration for each device
4254 dev_config_vector configs;
4255 // Device states
4256 dev_state_vector states;
4257 // Devices to monitor
4258 smart_device_list devices;
4259
4260 bool write_states_always = true;
4261
4262 // the main loop of the code
4263 for (;;) {
4264
4265 // are we exiting from a signal?
4266 if (caughtsigEXIT) {
4267 // are we exiting with SIGTERM?
4268 int isterm=(caughtsigEXIT==SIGTERM);
4269 int isquit=(caughtsigEXIT==SIGQUIT);
4270 int isok=debugmode?isterm || isquit:isterm;
4271
4272 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4273 caughtsigEXIT, strsignal(caughtsigEXIT));
4274
4275 if (!isok)
4276 return EXIT_SIGNAL;
4277
4278 // Write state files
4279 if (!state_path_prefix.empty())
4280 write_all_dev_states(configs, states);
4281
4282 return 0;
4283 }
4284
4285 // Should we (re)read the config file?
4286 if (firstpass || caughtsigHUP){
4287 if (!firstpass) {
4288 #ifdef __CYGWIN__
4289 // Workaround for missing SIGQUIT via keyboard on Cygwin
4290 if (caughtsigHUP==2) {
4291 // Simulate SIGQUIT if another SIGINT arrives soon
4292 caughtsigHUP=0;
4293 sleep(1);
4294 if (caughtsigHUP==2) {
4295 caughtsigEXIT=SIGQUIT;
4296 continue;
4297 }
4298 caughtsigHUP=2;
4299 }
4300 #endif
4301 // Write state files
4302 if (!state_path_prefix.empty())
4303 write_all_dev_states(configs, states);
4304
4305 PrintOut(LOG_INFO,
4306 caughtsigHUP==1?
4307 "Signal HUP - rereading configuration file %s\n":
4308 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4309 configfile);
4310 }
4311
4312 {
4313 dev_config_vector conf_entries; // Entries read from smartd.conf
4314 smart_device_list scanned_devs; // Devices found during scan
4315 // (re)reads config file, makes >=0 entries
4316 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4317
4318 if (entries>=0) {
4319 // checks devices, then moves onto ata/scsi list or deallocates.
4320 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4321 if (!(configs.size() == devices.size() && configs.size() == states.size()))
4322 throw std::logic_error("Invalid result from RegisterDevices");
4323 }
4324 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4325 // user has asked to continue on error in configuration file
4326 if (!firstpass)
4327 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4328 }
4329 else {
4330 // exit with configuration file error status
4331 int status = (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4332 EXIT(status);
4333 }
4334 }
4335
4336 // Log number of devices we are monitoring...
4337 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4338 int numata = 0;
4339 for (unsigned i = 0; i < devices.size(); i++) {
4340 if (devices.at(i)->is_ata())
4341 numata++;
4342 }
4343 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4344 numata, devices.size() - numata);
4345 }
4346 else {
4347 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4348 EXIT(EXIT_NODEV);
4349 }
4350
4351 if (quit==4) {
4352 // user has asked to print test schedule
4353 PrintTestSchedule(configs, states, devices);
4354 EXIT(0);
4355 }
4356
4357 // reset signal
4358 caughtsigHUP=0;
4359
4360 // Always write state files after (re)configuration
4361 write_states_always = true;
4362 }
4363
4364 // check all devices once,
4365 // self tests are not started in first pass unless '-q onecheck' is specified
4366 CheckDevicesOnce(configs, states, devices, (!firstpass || quit==3));
4367
4368 // Write state files
4369 if (!state_path_prefix.empty())
4370 write_all_dev_states(configs, states, write_states_always);
4371 write_states_always = false;
4372
4373 // Write attribute logs
4374 if (!attrlog_path_prefix.empty())
4375 write_all_dev_attrlogs(configs, states);
4376
4377 // user has asked us to exit after first check
4378 if (quit==3) {
4379 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4380 "smartd is exiting (exit status 0)\n");
4381 EXIT(0);
4382 }
4383
4384 // fork into background if needed
4385 if (firstpass && !debugmode) {
4386 DaemonInit();
4387 }
4388
4389 // set exit and signal handlers, write PID file, set wake-up time
4390 if (firstpass){
4391 Initialize(&wakeuptime);
4392 firstpass = false;
4393 }
4394
4395 // sleep until next check time, or a signal arrives
4396 wakeuptime = dosleep(wakeuptime, write_states_always);
4397 }
4398 }
4399
4400
4401 #ifndef _WIN32
4402 // Main program
4403 int main(int argc, char **argv)
4404 #else
4405 // Windows: internal main function started direct or by service control manager
4406 static int smartd_main(int argc, char **argv)
4407 #endif
4408 {
4409 int status;
4410 try {
4411 // Do the real work ...
4412 status = main_worker(argc, argv);
4413 }
4414 catch (int ex) {
4415 // EXIT(status) arrives here
4416 status = ex;
4417 }
4418 catch (const std::bad_alloc & /*ex*/) {
4419 // Memory allocation failed (also thrown by std::operator new)
4420 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4421 status = EXIT_NOMEM;
4422 }
4423 catch (const std::exception & ex) {
4424 // Other fatal errors
4425 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4426 status = EXIT_BADCODE;
4427 }
4428
4429 if (is_initialized)
4430 status = Goodbye(status);
4431
4432 #ifdef _WIN32
4433 daemon_winsvc_exitcode = status;
4434 #endif
4435 return status;
4436 }
4437
4438
4439 #ifdef _WIN32
4440 // Main function for Windows
4441 int main(int argc, char **argv){
4442 // Options for smartd windows service
4443 static const daemon_winsvc_options svc_opts = {
4444 "--service", // cmd_opt
4445 "smartd", "SmartD Service", // servicename, displayname
4446 // description
4447 "Controls and monitors storage devices using the Self-Monitoring, "
4448 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4449 "built into ATA and SCSI Hard Drives. "
4450 PACKAGE_HOMEPAGE
4451 };
4452 // daemon_main() handles daemon and service specific commands
4453 // and starts smartd_main() direct, from a new process,
4454 // or via service control manager
4455 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4456 }
4457 #endif