]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Imported Upstream version 5.38+svn2993
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-9 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-9 Christian Franke <smartmontools-support@lists.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #ifndef _GNU_SOURCE
25 // TODO: Why is this define necessary?
26 #define _GNU_SOURCE
27 #endif
28
29 // unconditionally included files
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
33 #include <signal.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <time.h>
41 #include <limits.h>
42 #include <getopt.h>
43
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 #include <algorithm> // std::replace()
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifndef _WIN32
54 #include <sys/wait.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #ifdef HAVE_NETDB_H
60 #include <netdb.h>
61 #endif
62
63 #ifdef _WIN32
64 #ifdef _MSC_VER
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t;
67 typedef int pid_t;
68 #endif
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
71 #endif // _WIN32
72
73 #ifdef __CYGWIN__
74 // From <windows.h>:
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall FreeConsole(void);
77 #include <io.h> // setmode()
78 #endif // __CYGWIN__
79
80 // locally included files
81 #include "int64.h"
82 #include "atacmds.h"
83 #include "dev_interface.h"
84 #include "extern.h"
85 #include "knowndrives.h"
86 #include "scsicmds.h"
87 #include "utility.h"
88
89 // This is for solaris, where signal() resets the handler to SIG_DFL
90 // after the first signal is caught.
91 #ifdef HAVE_SIGSET
92 #define SIGNALFN sigset
93 #else
94 #define SIGNALFN signal
95 #endif
96
97 #ifdef _WIN32
98 #include "hostname_win32.h" // gethost/domainname()
99 #define HAVE_GETHOSTNAME 1
100 #define HAVE_GETDOMAINNAME 1
101 // fork()/signal()/initd simulation for native Windows
102 #include "daemon_win32.h" // daemon_main/detach/signal()
103 #undef SIGNALFN
104 #define SIGNALFN daemon_signal
105 #define strsignal daemon_strsignal
106 #define sleep daemon_sleep
107 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
108 #define SIGQUIT SIGBREAK
109 #define SIGQUIT_KEYNAME "CONTROL-Break"
110 #else // _WIN32
111 #ifdef __CYGWIN__
112 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
113 #define SIGQUIT_KEYNAME "2x CONTROL-C"
114 #else // __CYGWIN__
115 #define SIGQUIT_KEYNAME "CONTROL-\\"
116 #endif // __CYGWIN__
117 #endif // _WIN32
118
119 #if defined (__SVR4) && defined (__sun)
120 extern "C" int getdomainname(char *, int); // no declaration in header files!
121 #endif
122
123 #define ARGUSED(x) ((void)(x))
124
125 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 2984 2009-11-14 22:46:31Z chrfranke $"
126 CONFIG_H_CVSID EXTERN_H_CVSID;
127
128 extern const char *reportbug;
129
130 extern unsigned char debugmode;
131
132 // smartd exit codes
133 #define EXIT_BADCMD 1 // command line did not parse
134 #define EXIT_BADCONF 2 // syntax error in config file
135 #define EXIT_STARTUP 3 // problem forking daemon
136 #define EXIT_PID 4 // problem creating pid file
137 #define EXIT_NOCONF 5 // config file does not exist
138 #define EXIT_READCONF 6 // config file exists but cannot be read
139
140 #define EXIT_NOMEM 8 // out of memory
141 #define EXIT_BADCODE 10 // internal error - should NEVER happen
142
143 #define EXIT_BADDEV 16 // we can't monitor this device
144 #define EXIT_NODEV 17 // no devices to monitor
145
146 #define EXIT_SIGNAL 254 // abort on signal
147
148 // command-line: how long to sleep between checks
149 #define CHECKTIME 1800
150 static int checktime=CHECKTIME;
151
152 // command-line: name of PID file (empty for no pid file)
153 static std::string pid_file;
154
155 // command-line: path prefix of persistent state file, empty if no persistence.
156 static std::string state_path_prefix
157 #ifdef SMARTMONTOOLS_SAVESTATES
158 = SMARTMONTOOLS_SAVESTATES
159 #endif
160 ;
161
162 // command-line: path prefix of attribute log file, empty if no logs.
163 static std::string attrlog_path_prefix
164 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
165 = SMARTMONTOOLS_ATTRIBUTELOG
166 #endif
167 ;
168
169 // configuration file name
170 #define CONFIGFILENAME "smartd.conf"
171
172 #ifndef _WIN32
173 static const char *configfile = SMARTMONTOOLS_SYSCONFDIR "/" CONFIGFILENAME ;
174 #else
175 static const char *configfile = "./" CONFIGFILENAME ;
176 #endif
177 // configuration file "name" if read from stdin
178 static const char * const configfile_stdin = "<stdin>";
179 // path of alternate configuration file
180 static std::string configfile_alt;
181
182 // command-line: when should we exit?
183 static int quit=0;
184
185 // command-line; this is the default syslog(3) log facility to use.
186 static int facility=LOG_DAEMON;
187
188 #ifndef _WIN32
189 // command-line: fork into background?
190 static bool do_fork=true;
191 #endif
192
193 // used for control of printing, passing arguments to atacmds.c
194 smartmonctrl *con=NULL;
195
196 // set to one if we catch a USR1 (check devices now)
197 volatile int caughtsigUSR1=0;
198
199 #ifdef _WIN32
200 // set to one if we catch a USR2 (toggle debug mode)
201 volatile int caughtsigUSR2=0;
202 #endif
203
204 // set to one if we catch a HUP (reload config file). In debug mode,
205 // set to two, if we catch INT (also reload config file).
206 volatile int caughtsigHUP=0;
207
208 // set to signal value if we catch INT, QUIT, or TERM
209 volatile int caughtsigEXIT=0;
210
211 // Attribute monitoring flags.
212 // See monitor_attr_flags below.
213 enum {
214 MONITOR_IGN_FAILUSE = 0x01,
215 MONITOR_IGNORE = 0x02,
216 MONITOR_RAW_PRINT = 0x04,
217 MONITOR_RAW = 0x08,
218 MONITOR_AS_CRIT = 0x10,
219 MONITOR_RAW_AS_CRIT = 0x20,
220 };
221
222 // Array of flags for each attribute.
223 class attribute_flags
224 {
225 public:
226 attribute_flags()
227 { memset(m_flags, 0, sizeof(m_flags)); }
228
229 bool is_set(int id, unsigned char flag) const
230 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
231
232 void set(int id, unsigned char flags)
233 {
234 if (0 < id && id < (int)sizeof(m_flags))
235 m_flags[id] |= flags;
236 }
237
238 private:
239 unsigned char m_flags[256];
240 };
241
242
243 /// Configuration data for a device. Read from smartd.conf.
244 /// Supports copy & assignment and is compatible with STL containers.
245 struct dev_config
246 {
247 int lineno; // Line number of entry in file
248 std::string name; // Device name
249 std::string dev_type; // Device type argument from -d directive, empty if none
250 std::string state_file; // Path of the persistent state file, empty if none
251 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
252 bool smartcheck; // Check SMART status
253 bool usagefailed; // Check for failed Usage Attributes
254 bool prefail; // Track changes in Prefail Attributes
255 bool usage; // Track changes in Usage Attributes
256 bool selftest; // Monitor number of selftest errors
257 bool errorlog; // Monitor number of ATA errors
258 bool permissive; // Ignore failed SMART commands
259 char autosave; // 1=disable, 2=enable Autosave Attributes
260 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
261 unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
262 bool ignorepresets; // Ignore database of -v options
263 bool showpresets; // Show database entry for this device
264 bool removable; // Device may disappear (not be present)
265 char powermode; // skip check, if disk in idle or standby mode
266 bool powerquiet; // skip powermode 'skipping checks' message
267 int powerskipmax; // how many times can be check skipped
268 unsigned char tempdiff; // Track Temperature changes >= this limit
269 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
270 regular_expression test_regex; // Regex for scheduled testing
271
272 // Configuration of email warning messages
273 std::string emailcmdline; // script to execute, empty if no messages
274 std::string emailaddress; // email address, or empty
275 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
276 bool emailtest; // Send test email?
277
278 // ATA ONLY
279 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
280 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
281 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
282 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
283
284 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
285
286 ata_vendor_attr_defs attribute_defs; // -v options
287
288 dev_config();
289 };
290
291 dev_config::dev_config()
292 : lineno(0),
293 smartcheck(false),
294 usagefailed(false),
295 prefail(false),
296 usage(false),
297 selftest(false),
298 errorlog(false),
299 permissive(false),
300 autosave(0),
301 autoofflinetest(0),
302 fix_firmwarebug(FIX_NOTSPECIFIED),
303 ignorepresets(false),
304 showpresets(false),
305 removable(false),
306 powermode(0),
307 powerquiet(false),
308 powerskipmax(0),
309 tempdiff(0),
310 tempinfo(0), tempcrit(0),
311 emailfreq(0),
312 emailtest(false),
313 curr_pending_id(0), offl_pending_id(0),
314 curr_pending_incr(false), offl_pending_incr(false),
315 curr_pending_set(false), offl_pending_set(false)
316 {
317 }
318
319
320 // Number of allowed mail message types
321 const int SMARTD_NMAIL = 13;
322 // Type for '-M test' mails (state not persistent)
323 const int MAILTYPE_TEST = 0;
324 // TODO: Add const or enum for all mail types.
325
326 struct mailinfo {
327 int logged;// number of times an email has been sent
328 time_t firstsent;// time first email was sent, as defined by time(2)
329 time_t lastsent; // time last email was sent, as defined by time(2)
330
331 mailinfo()
332 : logged(0), firstsent(0), lastsent(0) { }
333 };
334
335 /// Persistent state data for a device.
336 struct persistent_dev_state
337 {
338 unsigned char tempmin, tempmax; // Min/Max Temperatures
339
340 unsigned char selflogcount; // total number of self-test errors
341 unsigned short selfloghour; // lifetime hours of last self-test error
342
343 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
344
345 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
346
347 // ATA ONLY
348 int ataerrorcount; // Total number of ATA errors
349
350 // Persistent part of ata_smart_values:
351 struct ata_attribute {
352 unsigned char id;
353 unsigned char val;
354 unsigned char worst; // Byte needed for 'raw64' attribute only.
355 uint64_t raw;
356
357 ata_attribute() : id(0), val(0), worst(0), raw(0) { }
358 };
359 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
360
361 persistent_dev_state();
362 };
363
364 persistent_dev_state::persistent_dev_state()
365 : tempmin(0), tempmax(0),
366 selflogcount(0),
367 selfloghour(0),
368 scheduled_test_next_check(0),
369 ataerrorcount(0)
370 {
371 }
372
373 /// Non-persistent state data for a device.
374 struct temp_dev_state
375 {
376 bool must_write; // true if persistent part should be written
377
378 bool not_cap_offline; // true == not capable of offline testing
379 bool not_cap_conveyance;
380 bool not_cap_short;
381 bool not_cap_long;
382 bool not_cap_selective;
383
384 unsigned char temperature; // last recorded Temperature (in Celsius)
385 time_t tempmin_delay; // time where Min Temperature tracking will start
386
387 bool powermodefail; // true if power mode check failed
388 int powerskipcnt; // Number of checks skipped due to idle or standby mode
389
390 // SCSI ONLY
391 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
392 unsigned char TempPageSupported; // has log sense temperature page (0xd)
393 unsigned char SuppressReport; // minimize nuisance reports
394 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
395 // know yet) 6 or 10
396
397 // ATA ONLY
398 uint64_t num_sectors; // Number of sectors (for selective self-test only)
399 ata_smart_values smartval; // SMART data
400 ata_smart_thresholds_pvt smartthres; // SMART thresholds
401
402 temp_dev_state();
403 };
404
405 temp_dev_state::temp_dev_state()
406 : must_write(false),
407 not_cap_offline(false),
408 not_cap_conveyance(false),
409 not_cap_short(false),
410 not_cap_long(false),
411 not_cap_selective(false),
412 temperature(0),
413 tempmin_delay(0),
414 powermodefail(false),
415 powerskipcnt(0),
416 SmartPageSupported(false),
417 TempPageSupported(false),
418 SuppressReport(false),
419 modese_len(0),
420 num_sectors(0)
421 {
422 memset(&smartval, 0, sizeof(smartval));
423 memset(&smartthres, 0, sizeof(smartthres));
424 }
425
426 /// Runtime state data for a device.
427 struct dev_state
428 : public persistent_dev_state,
429 public temp_dev_state
430 {
431 void update_persistent_state();
432 void update_temp_state();
433 };
434
435 /// Container for configuration info for each device.
436 typedef std::vector<dev_config> dev_config_vector;
437
438 /// Container for state info for each device.
439 typedef std::vector<dev_state> dev_state_vector;
440
441 // Copy ATA attributes to persistent state.
442 void dev_state::update_persistent_state()
443 {
444 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
445 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
446 ata_attribute & pa = ata_attributes[i];
447 pa.id = ta.id;
448 if (ta.id == 0) {
449 pa.val = pa.worst = 0; pa.raw = 0;
450 continue;
451 }
452 pa.val = ta.current;
453 pa.worst = ta.worst;
454 pa.raw = ta.raw[0]
455 | ( ta.raw[1] << 8)
456 | ( ta.raw[2] << 16)
457 | ((uint64_t)ta.raw[3] << 24)
458 | ((uint64_t)ta.raw[4] << 32)
459 | ((uint64_t)ta.raw[5] << 40);
460 }
461 }
462
463 // Copy ATA from persistent to temp state.
464 void dev_state::update_temp_state()
465 {
466 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
467 const ata_attribute & pa = ata_attributes[i];
468 ata_smart_attribute & ta = smartval.vendor_attributes[i];
469 ta.id = pa.id;
470 if (pa.id == 0) {
471 ta.current = ta.worst = 0;
472 memset(ta.raw, 0, sizeof(ta.raw));
473 continue;
474 }
475 ta.current = pa.val;
476 ta.worst = pa.worst;
477 ta.raw[0] = (unsigned char) pa.raw;
478 ta.raw[1] = (unsigned char)(pa.raw >> 8);
479 ta.raw[2] = (unsigned char)(pa.raw >> 16);
480 ta.raw[3] = (unsigned char)(pa.raw >> 24);
481 ta.raw[4] = (unsigned char)(pa.raw >> 32);
482 ta.raw[5] = (unsigned char)(pa.raw >> 40);
483 }
484 }
485
486 // Parse a line from a state file.
487 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
488 {
489 static regular_expression regex(
490 "^ *"
491 "((temperature-min)" // (1 (2)
492 "|(temperature-max)" // (3)
493 "|(self-test-errors)" // (4)
494 "|(self-test-last-err-hour)" // (5)
495 "|(scheduled-test-next-check)" // (6)
496 "|(ata-error-count)" // (7)
497 "|(mail\\.([0-9]+)\\." // (8 (9)
498 "((count)" // (10 (11)
499 "|(first-sent-time)" // (12)
500 "|(last-sent-time)" // (13)
501 ")" // 10)
502 ")" // 8)
503 "|(ata-smart-attribute\\.([0-9]+)\\." // (14 (15)
504 "((id)" // (16 (17)
505 "|(val)" // (18)
506 "|(worst)" // (19)
507 "|(raw)" // (20)
508 ")" // 16)
509 ")" // 14)
510 ")" // 1)
511 " *= *([0-9]+)[ \n]*$", // (21)
512 REG_EXTENDED
513 );
514 if (regex.empty())
515 throw std::logic_error("parse_dev_state_line: invalid regex");
516
517 const int nmatch = 1+21;
518 regmatch_t match[nmatch];
519 if (!regex.execute(line, nmatch, match))
520 return false;
521 if (match[nmatch-1].rm_so < 0)
522 return false;
523
524 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
525
526 int m = 1;
527 if (match[++m].rm_so >= 0)
528 state.tempmin = (unsigned char)val;
529 else if (match[++m].rm_so >= 0)
530 state.tempmax = (unsigned char)val;
531 else if (match[++m].rm_so >= 0)
532 state.selflogcount = (unsigned char)val;
533 else if (match[++m].rm_so >= 0)
534 state.selfloghour = (unsigned short)val;
535 else if (match[++m].rm_so >= 0)
536 state.scheduled_test_next_check = (time_t)val;
537 else if (match[++m].rm_so >= 0)
538 state.ataerrorcount = (int)val;
539 else if (match[m+=2].rm_so >= 0) {
540 int i = atoi(line+match[m].rm_so);
541 if (!(0 <= i && i < SMARTD_NMAIL))
542 return false;
543 if (i == MAILTYPE_TEST) // Don't suppress test mails
544 return true;
545 if (match[m+=2].rm_so >= 0)
546 state.maillog[i].logged = (int)val;
547 else if (match[++m].rm_so >= 0)
548 state.maillog[i].firstsent = (time_t)val;
549 else if (match[++m].rm_so >= 0)
550 state.maillog[i].lastsent = (time_t)val;
551 else
552 return false;
553 }
554 else if (match[m+=5+1].rm_so >= 0) {
555 int i = atoi(line+match[m].rm_so);
556 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
557 return false;
558 if (match[m+=2].rm_so >= 0)
559 state.ata_attributes[i].id = (unsigned char)val;
560 else if (match[++m].rm_so >= 0)
561 state.ata_attributes[i].val = (unsigned char)val;
562 else if (match[++m].rm_so >= 0)
563 state.ata_attributes[i].worst = (unsigned char)val;
564 else if (match[++m].rm_so >= 0)
565 state.ata_attributes[i].raw = val;
566 else
567 return false;
568 }
569 else
570 return false;
571 return true;
572 }
573
574 // Read a state file.
575 static bool read_dev_state(const char * path, persistent_dev_state & state)
576 {
577 stdio_file f(path, "r");
578 if (!f) {
579 if (errno != ENOENT)
580 pout("Cannot read state file \"%s\"\n", path);
581 return false;
582 }
583 #ifdef __CYGWIN__
584 setmode(fileno(f), O_TEXT); // Allow files with \r\n
585 #endif
586
587 int good = 0, bad = 0;
588 char line[256];
589 while (fgets(line, sizeof(line), f)) {
590 const char * s = line + strspn(line, " \t");
591 if (!*s || *s == '#')
592 continue;
593 if (!parse_dev_state_line(line, state))
594 bad++;
595 else
596 good++;
597 }
598
599 if (bad) {
600 if (!good) {
601 pout("%s: format error\n", path);
602 return false;
603 }
604 pout("%s: %d invalid line(s) ignored\n", path, bad);
605 }
606 return true;
607 }
608
609 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
610 {
611 if (val)
612 fprintf(f, "%s = %"PRIu64"\n", name, val);
613 }
614
615 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
616 {
617 if (val)
618 fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
619 }
620
621 // Write a state file
622 static bool write_dev_state(const char * path, const persistent_dev_state & state)
623 {
624 // Rename old "file" to "file~"
625 std::string pathbak = path; pathbak += '~';
626 unlink(pathbak.c_str());
627 rename(path, pathbak.c_str());
628
629 stdio_file f(path, "w");
630 if (!f) {
631 pout("Cannot create state file \"%s\"\n", path);
632 return false;
633 }
634
635 fprintf(f, "# smartd state file\n");
636 write_dev_state_line(f, "temperature-min", state.tempmin);
637 write_dev_state_line(f, "temperature-max", state.tempmax);
638 write_dev_state_line(f, "self-test-errors", state.selflogcount);
639 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
640 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
641
642 int i;
643 for (i = 0; i < SMARTD_NMAIL; i++) {
644 if (i == MAILTYPE_TEST) // Don't suppress test mails
645 continue;
646 const mailinfo & mi = state.maillog[i];
647 if (!mi.logged)
648 continue;
649 write_dev_state_line(f, "mail", i, "count", mi.logged);
650 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
651 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
652 }
653
654 // ATA ONLY
655 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
656
657 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
658 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
659 if (!pa.id)
660 continue;
661 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
662 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
663 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
664 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
665 }
666
667 return true;
668 }
669
670 // Write to the attrlog file
671 static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
672 {
673 stdio_file f(path, "a");
674 if (!f) {
675 pout("Cannot create attribute log file \"%s\"\n", path);
676 return false;
677 }
678
679 // ATA ONLY
680 time_t now = time(0);
681 struct tm * tms = gmtime(&now);
682 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
683 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
684 tms->tm_hour, tms->tm_min, tms->tm_sec);
685 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
686 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
687 if (!pa.id)
688 continue;
689 fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
690 }
691 fprintf(f, "\n");
692
693 return true;
694 }
695
696 // Write all state files. If write_always is false, don't write
697 // unless must_write is set.
698 static void write_all_dev_states(const dev_config_vector & configs,
699 dev_state_vector & states,
700 bool write_always = true)
701 {
702 for (unsigned i = 0; i < states.size(); i++) {
703 const dev_config & cfg = configs.at(i);
704 if (cfg.state_file.empty())
705 continue;
706 dev_state & state = states[i];
707 if (!write_always && !state.must_write)
708 continue;
709 if (!write_dev_state(cfg.state_file.c_str(), state))
710 continue;
711 state.must_write = false;
712 if (write_always || debugmode)
713 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
714 cfg.name.c_str(), cfg.state_file.c_str());
715 }
716 }
717
718 // Write to all attrlog files
719 static void write_all_dev_attrlogs(const dev_config_vector & configs,
720 dev_state_vector & states)
721 {
722 for (unsigned i = 0; i < states.size(); i++) {
723 const dev_config & cfg = configs.at(i);
724 if (cfg.attrlog_file.empty())
725 continue;
726 dev_state & state = states[i];
727 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
728 }
729 }
730
731 // remove the PID file
732 void RemovePidFile(){
733 if (!pid_file.empty()) {
734 if (unlink(pid_file.c_str()))
735 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
736 pid_file.c_str(), strerror(errno));
737 pid_file.clear();
738 }
739 return;
740 }
741
742 extern "C" { // signal handlers require C-linkage
743
744 // Note if we catch a SIGUSR1
745 void USR1handler(int sig){
746 if (SIGUSR1==sig)
747 caughtsigUSR1=1;
748 return;
749 }
750
751 #ifdef _WIN32
752 // Note if we catch a SIGUSR2
753 void USR2handler(int sig){
754 if (SIGUSR2==sig)
755 caughtsigUSR2=1;
756 return;
757 }
758 #endif
759
760 // Note if we catch a HUP (or INT in debug mode)
761 void HUPhandler(int sig){
762 if (sig==SIGHUP)
763 caughtsigHUP=1;
764 else
765 caughtsigHUP=2;
766 return;
767 }
768
769 // signal handler for TERM, QUIT, and INT (if not in debug mode)
770 void sighandler(int sig){
771 if (!caughtsigEXIT)
772 caughtsigEXIT=sig;
773 return;
774 }
775
776 } // extern "C"
777
778 // Cleanup, print Goodbye message and remove pidfile
779 static int Goodbye(int status)
780 {
781 // delete PID file, if one was created
782 RemovePidFile();
783
784 // if we are exiting because of a code bug, tell user
785 if (status==EXIT_BADCODE)
786 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
787
788 // and this should be the final output from smartd before it exits
789 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
790
791 return status;
792 }
793
794 #define ENVLENGTH 1024
795
796 // a replacement for setenv() which is not available on all platforms.
797 // Note that the string passed to putenv must not be freed or made
798 // invalid, since a pointer to it is kept by putenv(). This means that
799 // it must either be a static buffer or allocated off the heap. The
800 // string can be freed if the environment variable is redefined or
801 // deleted via another call to putenv(). So we keep these on the stack
802 // as long as the popen() call is underway.
803 int exportenv(char* stackspace, const char *name, const char *value){
804 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
805 return putenv(stackspace);
806 }
807
808 char* dnsdomain(const char* hostname) {
809 char *p = NULL;
810 #ifdef HAVE_GETADDRINFO
811 static char canon_name[NI_MAXHOST];
812 struct addrinfo *info = NULL;
813 struct addrinfo hints;
814 int err;
815
816 memset(&hints, 0, sizeof(hints));
817 hints.ai_flags = AI_CANONNAME;
818 if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
819 PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
820 return NULL;
821 }
822 if (info->ai_canonname) {
823 strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
824 canon_name[NI_MAXHOST - 1] = '\0';
825 p = canon_name;
826 if ((p = strchr(canon_name, '.')))
827 p++;
828 }
829 freeaddrinfo(info);
830 #elif HAVE_GETHOSTBYNAME
831 struct hostent *hp;
832 if ((hp = gethostbyname(hostname))) {
833 // Does this work if gethostbyname() returns an IPv6 name in
834 // colon/dot notation? [BA]
835 if ((p = strchr(hp->h_name, '.')))
836 p++; // skip "."
837 }
838 #else
839 ARGUSED(hostname);
840 #endif
841 return p;
842 }
843
844 #define EBUFLEN 1024
845
846 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
847 __attribute__ ((format (printf, 4, 5)));
848
849 // If either address or executable path is non-null then send and log
850 // a warning email, or execute executable
851 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
852 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
853 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
854 char environ_strings[11][ENVLENGTH];
855 time_t epoch;
856 va_list ap;
857 const int day=24*3600;
858 int days=0;
859 const char * const whichfail[]={
860 "EmailTest", // 0
861 "Health", // 1
862 "Usage", // 2
863 "SelfTest", // 3
864 "ErrorCount", // 4
865 "FailedHealthCheck", // 5
866 "FailedReadSmartData", // 6
867 "FailedReadSmartErrorLog", // 7
868 "FailedReadSmartSelfTestLog", // 8
869 "FailedOpenDevice", // 9
870 "CurrentPendingSector", // 10
871 "OfflineUncorrectableSector", // 11
872 "Temperature" // 12
873 };
874
875 const char *unknown="[Unknown]";
876
877 // See if user wants us to send mail
878 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
879 return;
880
881 std::string address = cfg.emailaddress;
882 const char * executable = cfg.emailcmdline.c_str();
883
884 // which type of mail are we sending?
885 mailinfo * mail=(state.maillog)+which;
886
887 // checks for sanity
888 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
889 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
890 return;
891 }
892 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
893 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
894 which, (int)sizeof(whichfail));
895 return;
896 }
897
898 // Return if a single warning mail has been sent.
899 if ((cfg.emailfreq==1) && mail->logged)
900 return;
901
902 // Return if this is an email test and one has already been sent.
903 if (which == 0 && mail->logged)
904 return;
905
906 // To decide if to send mail, we need to know what time it is.
907 epoch=time(NULL);
908
909 // Return if less than one day has gone by
910 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
911 return;
912
913 // Return if less than 2^(logged-1) days have gone by
914 if (cfg.emailfreq==3 && mail->logged) {
915 days=0x01<<(mail->logged-1);
916 days*=day;
917 if (epoch<(mail->lastsent+days))
918 return;
919 }
920
921 // record the time of this mail message, and the first mail message
922 if (!mail->logged)
923 mail->firstsent=epoch;
924 mail->lastsent=epoch;
925
926 // get system host & domain names (not null terminated if length=MAX)
927 #ifdef HAVE_GETHOSTNAME
928 if (gethostname(hostname, 256))
929 strcpy(hostname, unknown);
930 else {
931 char *p=NULL;
932 hostname[255]='\0';
933 p = dnsdomain(hostname);
934 if (p && *p) {
935 strncpy(domainname, p, 255);
936 domainname[255]='\0';
937 } else
938 strcpy(domainname, unknown);
939 }
940 #else
941 strcpy(hostname, unknown);
942 strcpy(domainname, unknown);
943 #endif
944
945 #ifdef HAVE_GETDOMAINNAME
946 if (getdomainname(nisdomain, 256))
947 strcpy(nisdomain, unknown);
948 else
949 nisdomain[255]='\0';
950 #else
951 strcpy(nisdomain, unknown);
952 #endif
953
954 // print warning string into message
955 va_start(ap, fmt);
956 vsnprintf(message, 256, fmt, ap);
957 va_end(ap);
958
959 // appropriate message about further information
960 additional[0]=original[0]=further[0]='\0';
961 if (which) {
962 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
963
964 switch (cfg.emailfreq) {
965 case 1:
966 sprintf(additional,"No additional email messages about this problem will be sent.\n");
967 break;
968 case 2:
969 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
970 break;
971 case 3:
972 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
973 (0x01)<<mail->logged);
974 break;
975 }
976 if (cfg.emailfreq>1 && mail->logged) {
977 dateandtimezoneepoch(dates, mail->firstsent);
978 sprintf(original,"The original email about this issue was sent at %s\n", dates);
979 }
980 }
981
982 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
983
984 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
985 if (!*executable)
986 #ifdef DEFAULT_MAILER
987 executable = DEFAULT_MAILER ;
988 #else
989 #ifndef _WIN32
990 executable = "mail";
991 #else
992 executable = "blat"; // http://blat.sourceforge.net/
993 #endif
994 #endif
995
996 #ifndef _WIN32 // blat mailer needs comma
997 // replace commas by spaces to separate recipients
998 std::replace(address.begin(), address.end(), ',', ' ');
999 #endif
1000 // Export information in environment variables that will be useful
1001 // for user scripts
1002 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1003 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1004 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1005 dateandtimezoneepoch(dates, mail->firstsent);
1006 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1007 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1008 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1009 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1010 if (!address.empty())
1011 exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1012 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1013
1014 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", cfg.dev_type.c_str());
1015 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.name.c_str());
1016
1017 snprintf(fullmessage, 1024,
1018 "This email was generated by the smartd daemon running on:\n\n"
1019 " host name: %s\n"
1020 " DNS domain: %s\n"
1021 " NIS domain: %s\n\n"
1022 "The following warning/error was logged by the smartd daemon:\n\n"
1023 "%s\n\n"
1024 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
1025 "%s%s%s",
1026 hostname, domainname, nisdomain, message, further, original, additional);
1027 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1028
1029 // now construct a command to send this as EMAIL
1030 #ifndef _WIN32
1031 if (!address.empty())
1032 snprintf(command, 2048,
1033 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1034 "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1035 else
1036 snprintf(command, 2048, "%s 2>&1", executable);
1037
1038 // tell SYSLOG what we are about to do...
1039 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1040 const char * newwarn = (which? "Warning via" : "Test of");
1041
1042 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1043 which?"Sending warning via":"Executing test of", executable, newadd);
1044
1045 // issue the command to send mail or to run the user's executable
1046 errno=0;
1047 FILE * pfp;
1048 if (!(pfp=popen(command, "r")))
1049 // failed to popen() mail process
1050 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1051 newwarn, executable, newadd, errno?strerror(errno):"");
1052 else {
1053 // pipe suceeded!
1054 int len, status;
1055 char buffer[EBUFLEN];
1056
1057 // if unexpected output on stdout/stderr, null terminate, print, and flush
1058 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1059 int count=0;
1060 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1061 buffer[newlen]='\0';
1062 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1063 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1064
1065 // flush pipe if needed
1066 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1067 count++;
1068
1069 // tell user that pipe was flushed, or that something is really wrong
1070 if (count && count<EBUFLEN)
1071 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1072 newwarn, executable, newadd);
1073 else if (count)
1074 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1075 newwarn, executable, newadd);
1076 }
1077
1078 // if something went wrong with mail process, print warning
1079 errno=0;
1080 if (-1==(status=pclose(pfp)))
1081 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1082 errno?strerror(errno):"");
1083 else {
1084 // mail process apparently succeeded. Check and report exit status
1085 int status8;
1086
1087 if (WIFEXITED(status)) {
1088 // exited 'normally' (but perhaps with nonzero status)
1089 status8=WEXITSTATUS(status);
1090
1091 if (status8>128)
1092 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1093 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1094 else if (status8)
1095 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1096 newwarn, executable, newadd, status, status8);
1097 else
1098 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1099 }
1100
1101 if (WIFSIGNALED(status))
1102 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1103 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1104
1105 // this branch is probably not possible. If subprocess is
1106 // stopped then pclose() should not return.
1107 if (WIFSTOPPED(status))
1108 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1109 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1110
1111 }
1112 }
1113
1114 #else // _WIN32
1115
1116 // No "here-documents" on Windows, so must use separate commandline and stdin
1117 char stdinbuf[1024];
1118 command[0] = stdinbuf[0] = 0;
1119 int boxtype = -1, boxmsgoffs = 0;
1120 const char * newadd = "<nomailer>";
1121 if (!address.empty()) {
1122 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1123 char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1124 if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1125 if (!strcmp(addr1, "msgbox"))
1126 boxtype = 0;
1127 else if (!strcmp(addr1, "sysmsgbox"))
1128 boxtype = 1;
1129 if (boxtype >= 0)
1130 address.erase(0, (n2 > n1 ? n2 : n1));
1131 }
1132
1133 if (!address.empty()) {
1134 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1135 snprintf(command, sizeof(command),
1136 "%s - -q -subject \"%s\" -to \"%s\"",
1137 executable, subject, address.c_str());
1138 newadd = address.c_str();
1139 }
1140
1141 #ifdef _MSC_VER
1142 _set_printf_count_output(1); // "%n" disabled by default
1143 #endif
1144 // Message for mail [0...] and messagebox [boxmsgoffs...]
1145 snprintf(stdinbuf, sizeof(stdinbuf),
1146 "This email was generated by the smartd daemon running on:\n\n"
1147 " host name: %s\n"
1148 " DNS domain: %s\n"
1149 // " NIS domain: %s\n"
1150 "\n%n"
1151 "The following warning/error was logged by the smartd daemon:\n\n"
1152 "%s\n\n"
1153 "For details see the event log or log file of smartd.\n\n"
1154 "%s%s%s"
1155 "\n",
1156 hostname, /*domainname, */ nisdomain, &boxmsgoffs, message, further, original, additional);
1157 }
1158 else
1159 snprintf(command, sizeof(command), "%s", executable);
1160
1161 const char * newwarn = (which ? "Warning via" : "Test of");
1162 if (boxtype >= 0) {
1163 // show message box
1164 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1165 PrintOut(LOG_INFO,"%s message box\n", newwarn);
1166 }
1167 if (command[0]) {
1168 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1169 int rc;
1170 // run command
1171 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1172 (which?"Sending warning via":"Executing test of"), executable, newadd);
1173 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1174 if (rc >= 0 && stdoutbuf[0])
1175 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1176 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
1177 if (rc != 0)
1178 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1179 newwarn, executable, newadd, rc);
1180 else
1181 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1182 }
1183
1184 #endif // _WIN32
1185
1186 // increment mail sent counter
1187 mail->logged++;
1188 }
1189
1190 // Printing function for watching ataprint commands, or losing them
1191 // [From GLIBC Manual: Since the prototype doesn't specify types for
1192 // optional arguments, in a call to a variadic function the default
1193 // argument promotions are performed on the optional argument
1194 // values. This means the objects of type char or short int (whether
1195 // signed or not) are promoted to either int or unsigned int, as
1196 // appropriate.]
1197 void pout(const char *fmt, ...){
1198 va_list ap;
1199
1200 // get the correct time in syslog()
1201 FixGlibcTimeZoneBug();
1202 // initialize variable argument list
1203 va_start(ap,fmt);
1204 // in debug==1 mode we will print the output from the ataprint.o functions!
1205 if (debugmode && debugmode!=2)
1206 #ifdef _WIN32
1207 if (facility == LOG_LOCAL1) // logging to stdout
1208 vfprintf(stderr,fmt,ap);
1209 else
1210 #endif
1211 vprintf(fmt,ap);
1212 // in debug==2 mode we print output from knowndrives.o functions
1213 else if (debugmode==2 || con->reportataioctl || con->reportscsiioctl /*|| con->controller_port???*/) {
1214 openlog("smartd", LOG_PID, facility);
1215 vsyslog(LOG_INFO, fmt, ap);
1216 closelog();
1217 }
1218 va_end(ap);
1219 fflush(NULL);
1220 return;
1221 }
1222
1223 // This function prints either to stdout or to the syslog as needed.
1224 // This function is also used by utility.cpp to report LOG_CRIT errors.
1225 void PrintOut(int priority, const char *fmt, ...){
1226 va_list ap;
1227
1228 // get the correct time in syslog()
1229 FixGlibcTimeZoneBug();
1230 // initialize variable argument list
1231 va_start(ap,fmt);
1232 if (debugmode)
1233 #ifdef _WIN32
1234 if (facility == LOG_LOCAL1) // logging to stdout
1235 vfprintf(stderr,fmt,ap);
1236 else
1237 #endif
1238 vprintf(fmt,ap);
1239 else {
1240 openlog("smartd", LOG_PID, facility);
1241 vsyslog(priority,fmt,ap);
1242 closelog();
1243 }
1244 va_end(ap);
1245 return;
1246 }
1247
1248 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1249 void checksumwarning(const char * string)
1250 {
1251 pout("Warning! %s error: invalid SMART checksum.\n", string);
1252 }
1253
1254 // Wait for the pid file to show up, this makes sure a calling program knows
1255 // that the daemon is really up and running and has a pid to kill it
1256 bool WaitForPidFile()
1257 {
1258 int waited, max_wait = 10;
1259 struct stat stat_buf;
1260
1261 if (pid_file.empty() || debugmode)
1262 return true;
1263
1264 for(waited = 0; waited < max_wait; ++waited) {
1265 if (!stat(pid_file.c_str(), &stat_buf)) {
1266 return true;
1267 } else
1268 sleep(1);
1269 }
1270 return false;
1271 }
1272
1273
1274 // Forks new process, closes ALL file descriptors, redirects stdin,
1275 // stdout, and stderr. Not quite daemon(). See
1276 // http://www.linuxjournal.com/article/2335
1277 // for a good description of why we do things this way.
1278 void DaemonInit(){
1279 #ifndef _WIN32
1280 pid_t pid;
1281 int i;
1282
1283 // flush all buffered streams. Else we might get two copies of open
1284 // streams since both parent and child get copies of the buffers.
1285 fflush(NULL);
1286
1287 if (do_fork) {
1288 if ((pid=fork()) < 0) {
1289 // unable to fork!
1290 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1291 EXIT(EXIT_STARTUP);
1292 }
1293 else if (pid) {
1294 // we are the parent process, wait for pid file, then exit cleanly
1295 if(!WaitForPidFile()) {
1296 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1297 EXIT(EXIT_STARTUP);
1298 } else
1299 EXIT(0);
1300 }
1301
1302 // from here on, we are the child process.
1303 setsid();
1304
1305 // Fork one more time to avoid any possibility of having terminals
1306 if ((pid=fork()) < 0) {
1307 // unable to fork!
1308 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1309 EXIT(EXIT_STARTUP);
1310 }
1311 else if (pid)
1312 // we are the parent process -- exit cleanly
1313 EXIT(0);
1314
1315 // Now we are the child's child...
1316 }
1317
1318 // close any open file descriptors
1319 for (i=getdtablesize();i>=0;--i)
1320 close(i);
1321
1322 #ifdef __CYGWIN__
1323 // Cygwin's setsid() does not detach the process from Windows console
1324 FreeConsole();
1325 #endif // __CYGWIN__
1326
1327 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1328
1329 // redirect any IO attempts to /dev/null for stdin
1330 i=open("/dev/null",O_RDWR);
1331 if (i>=0) {
1332 // stdout
1333 NO_warn_unused_result(dup(i));
1334 // stderr
1335 NO_warn_unused_result(dup(i));
1336 };
1337 umask(0022);
1338 NO_warn_unused_result(chdir("/"));
1339
1340 if (do_fork)
1341 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1342
1343 #else // _WIN32
1344
1345 // No fork() on native Win32
1346 // Detach this process from console
1347 fflush(NULL);
1348 if (daemon_detach("smartd")) {
1349 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1350 EXIT(EXIT_STARTUP);
1351 }
1352 // stdin/out/err now closed if not redirected
1353
1354 #endif // _WIN32
1355 return;
1356 }
1357
1358 // create a PID file containing the current process id
1359 static void WritePidFile()
1360 {
1361 if (!pid_file.empty()) {
1362 pid_t pid = getpid();
1363 mode_t old_umask;
1364 #ifndef __CYGWIN__
1365 old_umask = umask(0077); // rwx------
1366 #else
1367 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1368 old_umask = umask(0033); // rwxr--r--
1369 #endif
1370
1371 stdio_file f(pid_file.c_str(), "w");
1372 umask(old_umask);
1373 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1374 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1375 EXIT(EXIT_PID);
1376 }
1377 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1378 }
1379 }
1380
1381 // Prints header identifying version of code and home
1382 static void PrintHead()
1383 {
1384 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1385 }
1386
1387 // prints help info for configuration file Directives
1388 void Directives() {
1389 PrintOut(LOG_INFO,
1390 "Configuration file (%s) Directives (after device name):\n"
1391 " -d TYPE Set the device type: %s\n"
1392 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1393 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1394 " -S VAL Enable/disable attribute autosave (on/off)\n"
1395 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1396 " -H Monitor SMART Health Status, report if failed\n"
1397 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1398 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1399 " -f Monitor 'Usage' Attributes, report failures\n"
1400 " -m ADD Send email warning to address ADD\n"
1401 " -M TYPE Modify email warning behavior (see man page)\n"
1402 " -p Report changes in 'Prefailure' Attributes\n"
1403 " -u Report changes in 'Usage' Attributes\n"
1404 " -t Equivalent to -p and -u Directives\n"
1405 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1406 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1407 " -i ID Ignore Attribute ID for -f Directive\n"
1408 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1409 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1410 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1411 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1412 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1413 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1414 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1415 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1416 " # Comment: text after a hash sign is ignored\n"
1417 " \\ Line continuation character\n"
1418 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1419 "Use ID = 0 to turn off -C and/or -U Directives\n"
1420 "Example: /dev/hda -a\n",
1421 configfile, smi()->get_valid_dev_types_str().c_str());
1422 return;
1423 }
1424
1425 /* Returns a pointer to a static string containing a formatted list of the valid
1426 arguments to the option opt or NULL on failure. */
1427 const char *GetValidArgList(char opt) {
1428 switch (opt) {
1429 case 'A':
1430 case 's':
1431 return "<PATH_PREFIX>";
1432 case 'c':
1433 return "<FILE_NAME>, -";
1434 case 'l':
1435 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1436 case 'q':
1437 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1438 case 'r':
1439 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1440 case 'B':
1441 case 'p':
1442 return "<FILE_NAME>";
1443 case 'i':
1444 return "<INTEGER_SECONDS>";
1445 default:
1446 return NULL;
1447 }
1448 }
1449
1450 /* prints help information for command syntax */
1451 void Usage (void){
1452 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1453 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1454 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1455 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1456 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1457 #endif
1458 PrintOut(LOG_INFO,"\n");
1459 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1460 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1461 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1462 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_DRIVEDBDIR"/drivedb.h]\n");
1463 #endif
1464 PrintOut(LOG_INFO,"\n");
1465 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1466 PrintOut(LOG_INFO," Read configuration file NAME or stdin [default is %s]\n\n", configfile);
1467 PrintOut(LOG_INFO," -d, --debug\n");
1468 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1469 PrintOut(LOG_INFO," -D, --showdirectives\n");
1470 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1471 PrintOut(LOG_INFO," -h, --help, --usage\n");
1472 PrintOut(LOG_INFO," Display this help and exit\n\n");
1473 PrintOut(LOG_INFO," -i N, --interval=N\n");
1474 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1475 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1476 #ifndef _WIN32
1477 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1478 #else
1479 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1480 #endif
1481 #ifndef _WIN32
1482 PrintOut(LOG_INFO," -n, --no-fork\n");
1483 PrintOut(LOG_INFO," Do not fork into background\n\n");
1484 #endif // _WIN32
1485 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1486 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1487 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1488 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1489 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1490 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1491 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1492 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1493 #ifdef SMARTMONTOOLS_SAVESTATES
1494 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1495 #endif
1496 PrintOut(LOG_INFO,"\n");
1497 #ifdef _WIN32
1498 PrintOut(LOG_INFO," --service\n");
1499 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1500 PrintOut(LOG_INFO," smartd install [options]\n");
1501 PrintOut(LOG_INFO," Remove service with:\n");
1502 PrintOut(LOG_INFO," smartd remove\n\n");
1503 #endif // _WIN32
1504 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1505 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1506 }
1507
1508 static int CloseDevice(smart_device * device, const char * name)
1509 {
1510 if (!device->close()){
1511 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1512 return 1;
1513 }
1514 // device sucessfully closed
1515 return 0;
1516 }
1517
1518 // return true if a char is not allowed in a state file name
1519 static bool not_allowed_in_filename(char c)
1520 {
1521 return !( ('0' <= c && c <= '9')
1522 || ('A' <= c && c <= 'Z')
1523 || ('a' <= c && c <= 'z'));
1524 }
1525
1526 // returns <0 on failure
1527 static int ATAErrorCount(ata_device * device, const char * name,
1528 unsigned char fix_firmwarebug)
1529 {
1530 struct ata_smart_errorlog log;
1531
1532 if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1533 PrintOut(LOG_INFO,"Device: %s, Read SMART Error Log Failed\n",name);
1534 return -1;
1535 }
1536
1537 // return current number of ATA errors
1538 return log.error_log_pointer?log.ata_error_count:0;
1539 }
1540
1541 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1542 // error count, and top bits are the power-on hours of the last error.
1543 static int SelfTestErrorCount(ata_device * device, const char * name,
1544 unsigned char fix_firmwarebug)
1545 {
1546 struct ata_smart_selftestlog log;
1547
1548 if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1549 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1550 return -1;
1551 }
1552
1553 // return current number of self-test errors
1554 return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1555 }
1556
1557 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1558 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1559
1560 // Log self-test execution status
1561 static void log_self_test_exec_status(const char * name, unsigned char status)
1562 {
1563 const char * msg;
1564 switch (status >> 4) {
1565 case 0x0: msg = "completed without error"; break;
1566 case 0x1: msg = "was aborted by the host"; break;
1567 case 0x2: msg = "was interrupted by the host with a reset"; break;
1568 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1569 case 0x4: msg = "completed with error (unknown test element)"; break;
1570 case 0x5: msg = "completed with error (electrical test element)"; break;
1571 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1572 case 0x7: msg = "completed with error (read test element)"; break;
1573 case 0x8: msg = "completed with error (handling damage?)"; break;
1574 default: msg = 0;
1575 }
1576
1577 if (msg)
1578 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1579 "Device: %s, previous self-test %s\n", name, msg);
1580 else if ((status >> 4) == 0xf)
1581 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1582 name, status & 0x0f);
1583 else
1584 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1585 name, status);
1586 }
1587
1588
1589 // TODO: Add '-F swapid' directive
1590 const bool fix_swapped_id = false;
1591
1592 // scan to see what ata devices there are, and if they support SMART
1593 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1594 {
1595 int supported=0;
1596 struct ata_identify_device drive;
1597 const char *name = cfg.name.c_str();
1598 int retid;
1599
1600 // Device must be open
1601
1602 // Get drive identity structure
1603 if ((retid=ataReadHDIdentity (atadev, &drive))){
1604 if (retid<0)
1605 // Unable to read Identity structure
1606 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1607 else
1608 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1609 name, packetdevicetype(retid-1));
1610 CloseDevice(atadev, name);
1611 return 2;
1612 }
1613 // Store drive size (for selective self-test only)
1614 state.num_sectors = get_num_sectors(&drive);
1615
1616 // Show if device in database, and use preset vendor attribute
1617 // options unless user has requested otherwise.
1618 if (cfg.ignorepresets)
1619 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1620 else {
1621 // do whatever applypresets decides to do.
1622 if (!apply_presets(&drive, cfg.attribute_defs, cfg.fix_firmwarebug, fix_swapped_id))
1623 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1624 else
1625 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1626 }
1627
1628 // Set default '-C 197[+]' if no '-C ID' is specified.
1629 if (!cfg.curr_pending_set)
1630 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1631 // Set default '-U 198[+]' if no '-U ID' is specified.
1632 if (!cfg.offl_pending_set)
1633 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1634
1635 // If requested, show which presets would be used for this drive
1636 if (cfg.showpresets) {
1637 int savedebugmode=debugmode;
1638 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1639 if (!debugmode)
1640 debugmode=2;
1641 show_presets(&drive, false);
1642 debugmode=savedebugmode;
1643 }
1644
1645 // see if drive supports SMART
1646 supported=ataSmartSupport(&drive);
1647 if (supported!=1) {
1648 if (supported==0)
1649 // drive does NOT support SMART
1650 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1651 else
1652 // can't tell if drive supports SMART
1653 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1654
1655 // should we proceed anyway?
1656 if (cfg.permissive) {
1657 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1658 }
1659 else {
1660 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1661 CloseDevice(atadev, name);
1662 return 2;
1663 }
1664 }
1665
1666 if (ataEnableSmart(atadev)) {
1667 // Enable SMART command has failed
1668 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1669 CloseDevice(atadev, name);
1670 return 2;
1671 }
1672
1673 // disable device attribute autosave...
1674 if (cfg.autosave==1) {
1675 if (ataDisableAutoSave(atadev))
1676 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1677 else
1678 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1679 }
1680
1681 // or enable device attribute autosave
1682 if (cfg.autosave==2) {
1683 if (ataEnableAutoSave(atadev))
1684 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1685 else
1686 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1687 }
1688
1689 // capability check: SMART status
1690 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1691 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1692 cfg.smartcheck = false;
1693 }
1694
1695 // capability check: Read smart values and thresholds. Note that
1696 // smart values are ALSO needed even if we ONLY want to know if the
1697 // device is self-test log or error-log capable! After ATA-5, this
1698 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1699 // but sadly not for ATA-5. Sigh.
1700
1701 // do we need to retain SMART data after returning from this routine?
1702 bool retainsmartdata = (cfg.usagefailed || cfg.prefail || cfg.usage || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit);
1703
1704 // do we need to get SMART data?
1705 bool smart_val_ok = false;
1706 if ( retainsmartdata || cfg.autoofflinetest || cfg.selftest || cfg.errorlog
1707 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1708
1709 if (ataReadSmartValues(atadev, &state.smartval) ||
1710 ataReadSmartThresholds (atadev, &state.smartthres)) {
1711 PrintOut(LOG_INFO,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name);
1712 retainsmartdata = cfg.usagefailed = cfg.prefail = cfg.usage = false;
1713 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1714 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1715 }
1716 else
1717 smart_val_ok = true;
1718
1719 // see if the necessary Attribute is there to monitor offline or
1720 // current pending sectors or temperature
1721 if (cfg.curr_pending_id && ata_find_attr_index(cfg.curr_pending_id, state.smartval) < 0) {
1722 PrintOut(LOG_INFO,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1723 name, cfg.curr_pending_id);
1724 cfg.curr_pending_id = 0;
1725 }
1726
1727 if (cfg.offl_pending_id && ata_find_attr_index(cfg.offl_pending_id, state.smartval) < 0) {
1728 PrintOut(LOG_INFO,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1729 name, cfg.offl_pending_id);
1730 cfg.offl_pending_id = 0;
1731 }
1732
1733 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1734 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1735 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1736 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1737 }
1738 }
1739
1740 // enable/disable automatic on-line testing
1741 if (cfg.autoofflinetest) {
1742 // is this an enable or disable request?
1743 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1744 if (!smart_val_ok)
1745 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1746 else {
1747 // if command appears unsupported, issue a warning...
1748 if (!isSupportAutomaticTimer(&state.smartval))
1749 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1750 // ... but then try anyway
1751 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1752 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1753 else
1754 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1755 }
1756 }
1757
1758 // capability check: self-test-log
1759 if (cfg.selftest) {
1760 int retval;
1761
1762 // start with service disabled, and re-enable it if all works OK
1763 cfg.selftest = false;
1764 state.selflogcount = 0;
1765 state.selfloghour = 0;
1766
1767 if (!smart_val_ok)
1768 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name);
1769 else if (!cfg.permissive && !isSmartTestLogCapable(&state.smartval, &drive))
1770 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name);
1771 else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0)
1772 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name);
1773 else {
1774 cfg.selftest = true;
1775 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1776 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1777 }
1778 }
1779
1780 // capability check: ATA error log
1781 if (cfg.errorlog) {
1782 int val;
1783
1784 // start with service disabled, and re-enable it if all works OK
1785 cfg.errorlog = false;
1786 state.ataerrorcount=0;
1787
1788 if (!smart_val_ok)
1789 PrintOut(LOG_INFO, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name);
1790 else if (!cfg.permissive && !isSmartErrorLogCapable(&state.smartval, &drive))
1791 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name);
1792 else if ((val = ATAErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0)
1793 PrintOut(LOG_INFO, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name);
1794 else {
1795 cfg.errorlog = true;
1796 state.ataerrorcount=val;
1797 }
1798 }
1799
1800 // capabilities check -- does it support powermode?
1801 if (cfg.powermode) {
1802 int powermode = ataCheckPowerMode(atadev);
1803
1804 if (-1 == powermode) {
1805 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
1806 cfg.powermode=0;
1807 }
1808 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
1809 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1810 name, powermode);
1811 cfg.powermode=0;
1812 }
1813 }
1814
1815 // If no tests available or selected, return
1816 if (!(cfg.errorlog || cfg.selftest || cfg.smartcheck ||
1817 cfg.usagefailed || cfg.prefail || cfg.usage ||
1818 cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
1819 CloseDevice(atadev, name);
1820 return 3;
1821 }
1822
1823 // tell user we are registering device
1824 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
1825
1826 // record number of device, type of device, increment device count
1827 if (cfg.dev_type.empty())
1828 cfg.dev_type = "ata";
1829
1830 // close file descriptor
1831 CloseDevice(atadev, name);
1832
1833 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
1834 // Build file name for state file
1835 char model[40+1], serial[20+1];
1836 format_ata_string(model, drive.model, sizeof(model)-1, fix_swapped_id);
1837 format_ata_string(serial, drive.serial_no, sizeof(serial)-1, fix_swapped_id);
1838 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
1839 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
1840 if (!state_path_prefix.empty()) {
1841 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
1842 // Read previous state
1843 if (read_dev_state(cfg.state_file.c_str(), state)) {
1844 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
1845 // Copy ATA attribute values to temp state
1846 state.update_temp_state();
1847 }
1848 }
1849 if (!attrlog_path_prefix.empty())
1850 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
1851 }
1852
1853 // Start self-test regex check now if time was not read from state file
1854 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1855 state.scheduled_test_next_check = time(0);
1856
1857 return 0;
1858 }
1859
1860 // on success, return 0. On failure, return >0. Never return <0,
1861 // please.
1862 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
1863 {
1864 int k, err;
1865 const char *device = cfg.name.c_str();
1866 struct scsi_iec_mode_page iec;
1867 UINT8 tBuf[64];
1868
1869 // Device must be open
1870
1871 // check that device is ready for commands. IE stores its stuff on
1872 // the media.
1873 if ((err = scsiTestUnitReady(scsidev))) {
1874 if (SIMPLE_ERR_NOT_READY == err)
1875 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
1876 else if (SIMPLE_ERR_NO_MEDIUM == err)
1877 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
1878 else if (SIMPLE_ERR_BECOMING_READY == err)
1879 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
1880 else
1881 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
1882 CloseDevice(scsidev, device);
1883 return 2;
1884 }
1885
1886 // Badly-conforming USB storage devices may fail this check.
1887 // The response to the following IE mode page fetch (current and
1888 // changeable values) is carefully examined. It has been found
1889 // that various USB devices that malform the response will lock up
1890 // if asked for a log page (e.g. temperature) so it is best to
1891 // bail out now.
1892 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
1893 state.modese_len = iec.modese_len;
1894 else if (SIMPLE_ERR_BAD_FIELD == err)
1895 ; /* continue since it is reasonable not to support IE mpage */
1896 else { /* any other error (including malformed response) unreasonable */
1897 PrintOut(LOG_INFO,
1898 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1899 device, err);
1900 CloseDevice(scsidev, device);
1901 return 3;
1902 }
1903
1904 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1905 // smart if it is off). This may change to be the same as the ATA side.
1906 if (!scsi_IsExceptionControlEnabled(&iec)) {
1907 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
1908 "Try 'smartctl -s on %s' to turn on SMART features\n",
1909 device, device);
1910 CloseDevice(scsidev, device);
1911 return 3;
1912 }
1913
1914 // Flag that certain log pages are supported (information may be
1915 // available from other sources).
1916 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
1917 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
1918 switch (tBuf[k]) {
1919 case TEMPERATURE_LPAGE:
1920 state.TempPageSupported = 1;
1921 break;
1922 case IE_LPAGE:
1923 state.SmartPageSupported = 1;
1924 break;
1925 default:
1926 break;
1927 }
1928 }
1929 }
1930
1931 // record type of device
1932 if (cfg.dev_type.empty())
1933 cfg.dev_type = "scsi";
1934
1935 // Check if scsiCheckIE() is going to work
1936 {
1937 UINT8 asc = 0;
1938 UINT8 ascq = 0;
1939 UINT8 currenttemp = 0;
1940 UINT8 triptemp = 0;
1941
1942 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
1943 &asc, &ascq, &currenttemp, &triptemp)) {
1944 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
1945 state.SuppressReport = 1;
1946 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
1947 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
1948 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1949 }
1950 }
1951 }
1952
1953 // capability check: self-test-log
1954 if (cfg.selftest){
1955 int retval = scsiCountFailedSelfTests(scsidev, 0);
1956 if (retval<0) {
1957 // no self-test log, turn off monitoring
1958 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
1959 cfg.selftest = false;
1960 state.selflogcount = 0;
1961 state.selfloghour = 0;
1962 }
1963 else {
1964 // register starting values to watch for changes
1965 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1966 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1967 }
1968 }
1969
1970 // disable autosave (set GLTSD bit)
1971 if (cfg.autosave==1){
1972 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
1973 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
1974 else
1975 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
1976 }
1977
1978 // or enable autosave (clear GLTSD bit)
1979 if (cfg.autosave==2){
1980 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
1981 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
1982 else
1983 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
1984 }
1985
1986 // tell user we are registering device
1987 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
1988
1989 // TODO: Build file name for state file
1990 if (!state_path_prefix.empty()) {
1991 PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
1992 }
1993 // TODO: Build file name for attribute log file
1994 if (!attrlog_path_prefix.empty()) {
1995 PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
1996 }
1997
1998 // close file descriptor
1999 CloseDevice(scsidev, device);
2000
2001 // Start self-test regex check now if time was not read from state file
2002 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
2003 state.scheduled_test_next_check = time(0);
2004
2005 return 0;
2006 }
2007
2008 // If the self-test log has got more self-test errors (or more recent
2009 // self-test errors) recorded, then notify user.
2010 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2011 {
2012 const char * name = cfg.name.c_str();
2013
2014 if (newi<0)
2015 // command failed
2016 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2017 else {
2018 // old and new error counts
2019 int oldc=state.selflogcount;
2020 int newc=SELFTEST_ERRORCOUNT(newi);
2021
2022 // old and new error timestamps in hours
2023 int oldh=state.selfloghour;
2024 int newh=SELFTEST_ERRORHOURS(newi);
2025
2026 if (oldc<newc) {
2027 // increase in error count
2028 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2029 name, oldc, newc);
2030 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2031 name, oldc, newc);
2032 state.must_write = true;
2033 } else if (oldh!=newh) {
2034 // more recent error
2035 // a 'more recent' error might actually be a smaller hour number,
2036 // if the hour number has wrapped.
2037 // There's still a bug here. You might just happen to run a new test
2038 // exactly 32768 hours after the previous failure, and have run exactly
2039 // 20 tests between the two, in which case smartd will miss the
2040 // new failure.
2041 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2042 name, newh);
2043 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2044 name, newh);
2045 state.must_write = true;
2046 }
2047
2048 // Needed since self-test error count may DECREASE. Hour might
2049 // also have changed.
2050 state.selflogcount= newc;
2051 state.selfloghour = newh;
2052 }
2053 return;
2054 }
2055
2056 // Test types, ordered by priority.
2057 static const char test_type_chars[] = "LncrSCO";
2058 const unsigned num_test_types = sizeof(test_type_chars)-1;
2059
2060 // returns test type if time to do test of type testtype,
2061 // 0 if not time to do test.
2062 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2063 {
2064 // check that self-testing has been requested
2065 if (cfg.test_regex.empty())
2066 return 0;
2067
2068 // Exit if drive not capable of any test
2069 if ( state.not_cap_long && state.not_cap_short &&
2070 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2071 return 0;
2072
2073 // since we are about to call localtime(), be sure glibc is informed
2074 // of any timezone changes we make.
2075 if (!usetime)
2076 FixGlibcTimeZoneBug();
2077
2078 // Is it time for next check?
2079 time_t now = (!usetime ? time(0) : usetime);
2080 if (now < state.scheduled_test_next_check)
2081 return 0;
2082
2083 // Limit time check interval to 90 days
2084 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2085 state.scheduled_test_next_check = now - (3600L*24*90);
2086
2087 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2088 char testtype = 0;
2089 time_t testtime = 0; int testhour = 0;
2090 int maxtest = num_test_types-1;
2091
2092 for (time_t t = state.scheduled_test_next_check; ; ) {
2093 struct tm * tms = localtime(&t);
2094 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2095 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2096 for (int i = 0; i <= maxtest; i++) {
2097 // Skip if drive not capable of this test
2098 switch (test_type_chars[i]) {
2099 case 'L': if (state.not_cap_long) continue; break;
2100 case 'S': if (state.not_cap_short) continue; break;
2101 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2102 case 'O': if (scsi || state.not_cap_offline) continue; break;
2103 case 'c': case 'n':
2104 case 'r': if (scsi || state.not_cap_selective) continue; break;
2105 default: continue;
2106 }
2107 // Try match of "T/MM/DD/d/HH"
2108 char pattern[16];
2109 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2110 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2111 if (cfg.test_regex.full_match(pattern)) {
2112 // Test found
2113 testtype = pattern[0];
2114 testtime = t; testhour = tms->tm_hour;
2115 // Limit further matches to higher priority self-tests
2116 maxtest = i-1;
2117 break;
2118 }
2119 }
2120 // Exit if no tests left or current time reached
2121 if (maxtest < 0)
2122 break;
2123 if (t >= now)
2124 break;
2125 // Check next hour
2126 if ((t += 3600) > now)
2127 t = now;
2128 }
2129
2130 // Do next check not before next hour.
2131 struct tm * tmnow = localtime(&now);
2132 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2133
2134 if (testtype) {
2135 state.must_write = true;
2136 // Tell user if an old test was found.
2137 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2138 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2139 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2140 cfg.name.c_str(), testtype, datebuf);
2141 }
2142 }
2143
2144 return testtype;
2145 }
2146
2147 // Print a list of future tests.
2148 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2149 {
2150 unsigned numdev = configs.size();
2151 if (!numdev)
2152 return;
2153 std::vector<int> testcnts(numdev * num_test_types, 0);
2154
2155 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2156
2157 // FixGlibcTimeZoneBug(); // done in PrintOut()
2158 time_t now = time(0);
2159 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2160 dateandtimezoneepoch(datenow, now);
2161
2162 long seconds;
2163 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2164 // Check for each device whether a test will be run
2165 time_t testtime = now + seconds;
2166 for (unsigned i = 0; i < numdev; i++) {
2167 const dev_config & cfg = configs.at(i);
2168 dev_state & state = states.at(i);
2169 const char * p;
2170 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2171 if (testtype && (p = strchr(test_type_chars, testtype))) {
2172 unsigned t = (p - test_type_chars);
2173 // Report at most 5 tests of each type
2174 if (++testcnts[i*num_test_types + t] <= 5) {
2175 dateandtimezoneepoch(date, testtime);
2176 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2177 testcnts[i*num_test_types + t], testtype, date);
2178 }
2179 }
2180 }
2181 }
2182
2183 // Report totals
2184 dateandtimezoneepoch(date, now+seconds);
2185 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2186 for (unsigned i = 0; i < numdev; i++) {
2187 const dev_config & cfg = configs.at(i);
2188 bool scsi = devices.at(i)->is_scsi();
2189 for (unsigned t = 0; t < num_test_types; t++) {
2190 int cnt = testcnts[i*num_test_types + t];
2191 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2192 continue;
2193 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2194 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2195 }
2196 }
2197
2198 }
2199
2200 // Return zero on success, nonzero on failure. Perform offline (background)
2201 // short or long (extended) self test on given scsi device.
2202 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2203 {
2204 int retval = 0;
2205 const char *testname = 0;
2206 const char *name = cfg.name.c_str();
2207 int inProgress;
2208
2209 if (scsiSelfTestInProgress(device, &inProgress)) {
2210 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2211 state.not_cap_short = state.not_cap_long = true;
2212 return 1;
2213 }
2214
2215 if (1 == inProgress) {
2216 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2217 "progress.\n", name);
2218 return 1;
2219 }
2220
2221 switch (testtype) {
2222 case 'S':
2223 testname = "Short Self";
2224 retval = scsiSmartShortSelfTest(device);
2225 break;
2226 case 'L':
2227 testname = "Long Self";
2228 retval = scsiSmartExtendSelfTest(device);
2229 break;
2230 }
2231 // If we can't do the test, exit
2232 if (NULL == testname) {
2233 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2234 testtype);
2235 return 1;
2236 }
2237 if (retval) {
2238 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2239 (SIMPLE_ERR_BAD_FIELD == retval)) {
2240 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2241 testname);
2242 if ('L'==testtype)
2243 state.not_cap_long = true;
2244 else
2245 state.not_cap_short = true;
2246
2247 return 1;
2248 }
2249 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2250 testname, retval);
2251 return 1;
2252 }
2253
2254 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2255
2256 return 0;
2257 }
2258
2259 // Do an offline immediate or self-test. Return zero on success,
2260 // nonzero on failure.
2261 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2262 {
2263 const char *name = cfg.name.c_str();
2264
2265 // Read current smart data and check status/capability
2266 struct ata_smart_values data;
2267 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2268 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2269 return 1;
2270 }
2271
2272 // Check for capability to do the test
2273 int dotest = -1, mode = 0;
2274 const char *testname = 0;
2275 switch (testtype) {
2276 case 'O':
2277 testname="Offline Immediate ";
2278 if (isSupportExecuteOfflineImmediate(&data))
2279 dotest=OFFLINE_FULL_SCAN;
2280 else
2281 state.not_cap_offline = true;
2282 break;
2283 case 'C':
2284 testname="Conveyance Self-";
2285 if (isSupportConveyanceSelfTest(&data))
2286 dotest=CONVEYANCE_SELF_TEST;
2287 else
2288 state.not_cap_conveyance = true;
2289 break;
2290 case 'S':
2291 testname="Short Self-";
2292 if (isSupportSelfTest(&data))
2293 dotest=SHORT_SELF_TEST;
2294 else
2295 state.not_cap_short = true;
2296 break;
2297 case 'L':
2298 testname="Long Self-";
2299 if (isSupportSelfTest(&data))
2300 dotest=EXTEND_SELF_TEST;
2301 else
2302 state.not_cap_long = true;
2303 break;
2304
2305 case 'c': case 'n': case 'r':
2306 testname = "Selective Self-";
2307 if (isSupportSelectiveSelfTest(&data)) {
2308 dotest = SELECTIVE_SELF_TEST;
2309 switch (testtype) {
2310 case 'c': mode = SEL_CONT; break;
2311 case 'n': mode = SEL_NEXT; break;
2312 case 'r': mode = SEL_REDO; break;
2313 }
2314 }
2315 else
2316 state.not_cap_selective = true;
2317 break;
2318 }
2319
2320 // If we can't do the test, exit
2321 if (dotest<0) {
2322 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2323 return 1;
2324 }
2325
2326 // If currently running a self-test, do not interrupt it to start another.
2327 if (15==(data.self_test_exec_status >> 4)) {
2328 if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2329 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2330 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2331 } else {
2332 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2333 name, testname, (int)(data.self_test_exec_status & 0x0f));
2334 return 1;
2335 }
2336 }
2337
2338 if (dotest == SELECTIVE_SELF_TEST) {
2339 // Set test span
2340 ata_selective_selftest_args selargs;
2341 selargs.num_spans = 1;
2342 selargs.span[0].mode = mode;
2343 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors)) {
2344 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2345 return 1;
2346 }
2347 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2348 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2349 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2350 start, end, end - start + 1,
2351 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2352 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2353 }
2354
2355 // execute the test, and return status
2356 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2357 if (retval) {
2358 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2359 return retval;
2360 }
2361
2362 if (testtype != 'O')
2363 // Log next self-test execution status
2364 state.smartval.self_test_exec_status = 0xff;
2365
2366 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2367 return 0;
2368 }
2369
2370 // Check pending sector count attribute values (-C, -U directives).
2371 static void check_pending(const dev_config & cfg, dev_state & state,
2372 unsigned char id, bool increase_only,
2373 const ata_smart_values & smartval,
2374 int mailtype, const char * msg)
2375 {
2376 // Find attribute index
2377 int i = ata_find_attr_index(id, smartval);
2378 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2379 return;
2380
2381 // No report if no sectors pending.
2382 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2383 if (rawval == 0)
2384 return;
2385
2386 // If attribute is not reset, report only sector count increases.
2387 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2388 if (!(!increase_only || prev_rawval < rawval))
2389 return;
2390
2391 // Format message.
2392 std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2393 if (prev_rawval > 0 && rawval != prev_rawval)
2394 s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2395
2396 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2397 MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2398 state.must_write = true;
2399 }
2400
2401 // Format Temperature value
2402 static const char * fmt_temp(unsigned char x, char * buf)
2403 {
2404 if (!x) // unset
2405 strcpy(buf, "??");
2406 else
2407 sprintf(buf, "%u", x);
2408 return buf;
2409 }
2410
2411 // Check Temperature limits
2412 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2413 {
2414 if (!(0 < currtemp && currtemp < 255)) {
2415 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2416 return;
2417 }
2418
2419 // Update Max Temperature
2420 const char * minchg = "", * maxchg = "";
2421 if (currtemp > state.tempmax) {
2422 if (state.tempmax)
2423 maxchg = "!";
2424 state.tempmax = currtemp;
2425 state.must_write = true;
2426 }
2427
2428 char buf[20];
2429 if (!state.temperature) {
2430 // First check
2431 if (!state.tempmin || currtemp < state.tempmin)
2432 // Delay Min Temperature update by ~ 30 minutes.
2433 state.tempmin_delay = time(0) + CHECKTIME - 60;
2434 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2435 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2436 if (triptemp)
2437 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2438 state.temperature = currtemp;
2439 }
2440 else {
2441 if (state.tempmin_delay) {
2442 // End Min Temperature update delay if ...
2443 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2444 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2445 state.tempmin_delay = 0;
2446 if (!state.tempmin)
2447 state.tempmin = 255;
2448 }
2449 }
2450
2451 // Update Min Temperature
2452 if (!state.tempmin_delay && currtemp < state.tempmin) {
2453 state.tempmin = currtemp;
2454 state.must_write = true;
2455 if (currtemp != state.temperature)
2456 minchg = "!";
2457 }
2458
2459 // Track changes
2460 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2461 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2462 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2463 state.temperature = currtemp;
2464 }
2465 }
2466
2467 // Check limits
2468 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2469 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2470 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2471 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2472 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2473 }
2474 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2475 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2476 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2477 }
2478 }
2479
2480 // Check normalized and raw attribute values.
2481 static void check_attribute(const dev_config & cfg, dev_state & state,
2482 const ata_smart_attribute & attr,
2483 const ata_smart_attribute & prev,
2484 const ata_smart_threshold_entry & thre)
2485 {
2486 // Check attribute and threshold
2487 ata_attr_state attrstate = ata_get_attr_state(attr, thre, cfg.attribute_defs);
2488 if (attrstate == ATTRSTATE_NON_EXISTING)
2489 return;
2490
2491 // If requested, check for usage attributes that have failed.
2492 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2493 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2494 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2495 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2496 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2497 state.must_write = true;
2498 }
2499
2500 // Return if we're not tracking this type of attribute
2501 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2502 if (!( ( prefail && cfg.prefail)
2503 || (!prefail && cfg.usage )))
2504 return;
2505
2506 // Return if '-I ID' was specified
2507 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2508 return;
2509
2510 // Issue warning if they don't have the same ID in all structures.
2511 if (attr.id != prev.id || attrstate == ATTRSTATE_BAD_THRESHOLD) {
2512 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
2513 cfg.name.c_str(), attr.id, prev.id, thre.id);
2514 return;
2515 }
2516
2517 // Compare normalized values if valid.
2518 bool valchanged = false;
2519 if (attrstate > ATTRSTATE_NO_NORMVAL) {
2520 if (attr.current != prev.current)
2521 valchanged = true;
2522 }
2523
2524 // Compare raw values if requested.
2525 bool rawchanged = false;
2526 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2527 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2528 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2529 rawchanged = true;
2530 }
2531
2532 // Return if no change
2533 if (!(valchanged || rawchanged))
2534 return;
2535
2536 // Format value strings
2537 std::string currstr, prevstr;
2538 if (attrstate == ATTRSTATE_NO_NORMVAL) {
2539 // Print raw values only
2540 currstr = strprintf("%s (Raw)",
2541 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2542 prevstr = strprintf("%s (Raw)",
2543 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2544 }
2545 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2546 // Print normalized and raw values
2547 currstr = strprintf("%d [Raw %s]", attr.current,
2548 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2549 prevstr = strprintf("%d [Raw %s]", prev.current,
2550 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2551 }
2552 else {
2553 // Print normalized values only
2554 currstr = strprintf("%d", attr.current);
2555 prevstr = strprintf("%d", prev.current);
2556 }
2557
2558 // Format message
2559 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2560 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2561 ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
2562 prevstr.c_str(), currstr.c_str());
2563
2564 // Report this change as critical ?
2565 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2566 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2567 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2568 MailWarning(cfg, state, 2, "%s", msg.c_str());
2569 }
2570 else {
2571 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2572 }
2573 state.must_write = true;
2574 }
2575
2576
2577 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev, bool allow_selftests)
2578 {
2579 const char * name = cfg.name.c_str();
2580
2581 // If user has asked, test the email warning system
2582 if (cfg.emailtest)
2583 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2584
2585 // if we can't open device, fail gracefully rather than hard --
2586 // perhaps the next time around we'll be able to open it. ATAPI
2587 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2588 // given (see linux cdrom driver).
2589 if (!atadev->open()) {
2590 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2591 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2592 return 1;
2593 } else if (debugmode)
2594 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2595
2596 // user may have requested (with the -n Directive) to leave the disk
2597 // alone if it is in idle or sleeping mode. In this case check the
2598 // power mode and exit without check if needed
2599 if (cfg.powermode && !state.powermodefail) {
2600 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2601 const char * mode = 0;
2602 if (0 <= powermode && powermode < 0xff) {
2603 // wait for possible spin up and check again
2604 int powermode2;
2605 sleep(5);
2606 powermode2 = ataCheckPowerMode(atadev);
2607 if (powermode2 > powermode)
2608 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2609 powermode = powermode2;
2610 }
2611
2612 switch (powermode){
2613 case -1:
2614 // SLEEP
2615 mode="SLEEP";
2616 if (cfg.powermode>=1)
2617 dontcheck=1;
2618 break;
2619 case 0:
2620 // STANDBY
2621 mode="STANDBY";
2622 if (cfg.powermode>=2)
2623 dontcheck=1;
2624 break;
2625 case 0x80:
2626 // IDLE
2627 mode="IDLE";
2628 if (cfg.powermode>=3)
2629 dontcheck=1;
2630 break;
2631 case 0xff:
2632 // ACTIVE/IDLE
2633 mode="ACTIVE or IDLE";
2634 break;
2635 default:
2636 // UNKNOWN
2637 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2638 name, powermode);
2639 state.powermodefail = true;
2640 break;
2641 }
2642
2643 // if we are going to skip a check, return now
2644 if (dontcheck){
2645 // skip at most powerskipmax checks
2646 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2647 CloseDevice(atadev, name);
2648 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2649 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2650 state.powerskipcnt++;
2651 return 0;
2652 }
2653 else {
2654 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2655 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2656 }
2657 state.powerskipcnt = 0;
2658 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2659 }
2660 else if (state.powerskipcnt) {
2661 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2662 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2663 state.powerskipcnt = 0;
2664 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2665 }
2666 }
2667
2668 // check smart status
2669 if (cfg.smartcheck) {
2670 int status=ataSmartStatus2(atadev);
2671 if (status==-1){
2672 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2673 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
2674 state.must_write = true;
2675 }
2676 else if (status==1){
2677 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2678 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2679 state.must_write = true;
2680 }
2681 }
2682
2683 // Check everything that depends upon SMART Data (eg, Attribute values)
2684 if ( cfg.usagefailed || cfg.prefail || cfg.usage
2685 || cfg.curr_pending_id || cfg.offl_pending_id
2686 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || cfg.selftest) {
2687
2688 // Read current attribute values.
2689 ata_smart_values curval;
2690 if (ataReadSmartValues(atadev, &curval)){
2691 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2692 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
2693 state.must_write = true;
2694 }
2695 else {
2696 // look for current or offline pending sectors
2697 if (cfg.curr_pending_id)
2698 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
2699 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
2700 : "Total unreadable (pending) sectors" ));
2701
2702 if (cfg.offl_pending_id)
2703 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
2704 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
2705 : "Total offline uncorrectable sectors"));
2706
2707 // check temperature limits
2708 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2709 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
2710
2711 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
2712
2713 // look for failed usage attributes, or track usage or prefail attributes
2714 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
2715 check_attribute(cfg, state,
2716 curval.vendor_attributes[i],
2717 state.smartval.vendor_attributes[i],
2718 state.smartthres.thres_entries[i]);
2719 }
2720
2721 if (cfg.selftest) {
2722 // Log changes of self-test execution status
2723 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
2724 || (!allow_selftests && curval.self_test_exec_status != 0x00) )
2725 log_self_test_exec_status(name, curval.self_test_exec_status);
2726 }
2727
2728 // Save the new values into *drive for the next time around
2729 state.smartval = curval;
2730 }
2731 }
2732 }
2733
2734 // check if number of selftest errors has increased (note: may also DECREASE)
2735 if (cfg.selftest)
2736 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
2737
2738 // check if number of ATA errors has increased
2739 if (cfg.errorlog) {
2740
2741 int newc, oldc= state.ataerrorcount;
2742
2743 // new number of errors
2744 newc = ATAErrorCount(atadev, name, cfg.fix_firmwarebug);
2745
2746 // did command fail?
2747 if (newc<0)
2748 // lack of PrintOut here is INTENTIONAL
2749 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
2750
2751 // has error count increased?
2752 if (newc>oldc){
2753 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
2754 name, oldc, newc);
2755 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
2756 name, oldc, newc);
2757 state.must_write = true;
2758 }
2759
2760 // this last line is probably not needed, count always increases
2761 if (newc>=0)
2762 state.ataerrorcount=newc;
2763 }
2764
2765 // if the user has asked, and device is capable (or we're not yet
2766 // sure) check whether a self test should be done now.
2767 if (allow_selftests && !cfg.test_regex.empty()) {
2768 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
2769 if (testtype)
2770 DoATASelfTest(cfg, state, atadev, testtype);
2771 }
2772
2773 // Don't leave device open -- the OS/user may want to access it
2774 // before the next smartd cycle!
2775 CloseDevice(atadev, name);
2776
2777 // Copy ATA attribute values to persistent state
2778 state.update_persistent_state();
2779
2780 return 0;
2781 }
2782
2783 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
2784 {
2785 UINT8 asc, ascq;
2786 UINT8 currenttemp;
2787 UINT8 triptemp;
2788 const char * name = cfg.name.c_str();
2789 const char *cp;
2790
2791 // If the user has asked for it, test the email warning system
2792 if (cfg.emailtest)
2793 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2794
2795 // if we can't open device, fail gracefully rather than hard --
2796 // perhaps the next time around we'll be able to open it
2797 if (!scsidev->open()) {
2798 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
2799 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2800 return 1;
2801 } else if (debugmode)
2802 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
2803 currenttemp = 0;
2804 asc = 0;
2805 ascq = 0;
2806 if (!state.SuppressReport) {
2807 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2808 &asc, &ascq, &currenttemp, &triptemp)) {
2809 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
2810 name);
2811 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
2812 state.SuppressReport = 1;
2813 }
2814 }
2815 if (asc > 0) {
2816 cp = scsiGetIEString(asc, ascq);
2817 if (cp) {
2818 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
2819 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
2820 } else if (debugmode)
2821 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2822 name, (int)asc, (int)ascq);
2823 } else if (debugmode)
2824 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
2825
2826 // check temperature limits
2827 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2828 CheckTemperature(cfg, state, currenttemp, triptemp);
2829
2830 // check if number of selftest errors has increased (note: may also DECREASE)
2831 if (cfg.selftest)
2832 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
2833
2834 if (allow_selftests && !cfg.test_regex.empty()) {
2835 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
2836 if (testtype)
2837 DoSCSISelfTest(cfg, state, scsidev, testtype);
2838 }
2839 CloseDevice(scsidev, name);
2840 return 0;
2841 }
2842
2843 // Checks the SMART status of all ATA and SCSI devices
2844 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
2845 smart_device_list & devices, bool allow_selftests)
2846 {
2847 for (unsigned i = 0; i < configs.size(); i++) {
2848 const dev_config & cfg = configs.at(i);
2849 dev_state & state = states.at(i);
2850 smart_device * dev = devices.at(i);
2851 if (dev->is_ata())
2852 ATACheckDevice(cfg, state, dev->to_ata(), allow_selftests);
2853 else if (dev->is_scsi())
2854 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
2855 }
2856 }
2857
2858 // Set if Initialize() was called
2859 static bool is_initialized = false;
2860
2861 // Does initialization right after fork to daemon mode
2862 void Initialize(time_t *wakeuptime){
2863
2864 // Call Goodbye() on exit
2865 is_initialized = true;
2866
2867 // write PID file
2868 if (!debugmode)
2869 WritePidFile();
2870
2871 // install signal handlers. On Solaris, can't use signal() because
2872 // it resets the handler to SIG_DFL after each call. So use sigset()
2873 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2874
2875 // normal and abnormal exit
2876 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
2877 SIGNALFN(SIGTERM, SIG_IGN);
2878 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
2879 SIGNALFN(SIGQUIT, SIG_IGN);
2880
2881 // in debug mode, <CONTROL-C> ==> HUP
2882 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
2883 SIGNALFN(SIGINT, SIG_IGN);
2884
2885 // Catch HUP and USR1
2886 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
2887 SIGNALFN(SIGHUP, SIG_IGN);
2888 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
2889 SIGNALFN(SIGUSR1, SIG_IGN);
2890 #ifdef _WIN32
2891 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
2892 SIGNALFN(SIGUSR2, SIG_IGN);
2893 #endif
2894
2895 // initialize wakeup time to CURRENT time
2896 *wakeuptime=time(NULL);
2897
2898 return;
2899 }
2900
2901 #ifdef _WIN32
2902 // Toggle debug mode implemented for native windows only
2903 // (there is no easy way to reopen tty on *nix)
2904 static void ToggleDebugMode()
2905 {
2906 if (!debugmode) {
2907 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
2908 if (!daemon_enable_console("smartd [Debug]")) {
2909 debugmode = 1;
2910 daemon_signal(SIGINT, HUPhandler);
2911 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
2912 }
2913 else
2914 PrintOut(LOG_INFO,"enable console failed\n");
2915 }
2916 else if (debugmode == 1) {
2917 daemon_disable_console();
2918 debugmode = 0;
2919 daemon_signal(SIGINT, sighandler);
2920 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
2921 }
2922 else
2923 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
2924 }
2925 #endif
2926
2927 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
2928 {
2929 // If past wake-up-time, compute next wake-up-time
2930 time_t timenow=time(NULL);
2931 while (wakeuptime<=timenow){
2932 int intervals=1+(timenow-wakeuptime)/checktime;
2933 wakeuptime+=intervals*checktime;
2934 }
2935
2936 // sleep until we catch SIGUSR1 or have completed sleeping
2937 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
2938
2939 // protect user again system clock being adjusted backwards
2940 if (wakeuptime>timenow+checktime){
2941 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2942 wakeuptime=timenow+checktime;
2943 }
2944
2945 // Exit sleep when time interval has expired or a signal is received
2946 sleep(wakeuptime-timenow);
2947
2948 #ifdef _WIN32
2949 // toggle debug mode?
2950 if (caughtsigUSR2) {
2951 ToggleDebugMode();
2952 caughtsigUSR2 = 0;
2953 }
2954 #endif
2955
2956 timenow=time(NULL);
2957 }
2958
2959 // if we caught a SIGUSR1 then print message and clear signal
2960 if (caughtsigUSR1){
2961 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2962 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
2963 caughtsigUSR1=0;
2964 sigwakeup = true;
2965 }
2966
2967 // return adjusted wakeuptime
2968 return wakeuptime;
2969 }
2970
2971 // Print out a list of valid arguments for the Directive d
2972 void printoutvaliddirectiveargs(int priority, char d) {
2973
2974 switch (d) {
2975 case 'n':
2976 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
2977 break;
2978 case 's':
2979 PrintOut(priority, "valid_regular_expression");
2980 break;
2981 case 'd':
2982 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
2983 break;
2984 case 'T':
2985 PrintOut(priority, "normal, permissive");
2986 break;
2987 case 'o':
2988 case 'S':
2989 PrintOut(priority, "on, off");
2990 break;
2991 case 'l':
2992 PrintOut(priority, "error, selftest");
2993 break;
2994 case 'M':
2995 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2996 break;
2997 case 'v':
2998 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
2999 break;
3000 case 'P':
3001 PrintOut(priority, "use, ignore, show, showall");
3002 break;
3003 case 'F':
3004 PrintOut(priority, "none, samsung, samsung2, samsung3");
3005 break;
3006 }
3007 }
3008
3009 // exits with an error message, or returns integer value of token
3010 int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3011 int min, int max, char * suffix = 0)
3012 {
3013 // make sure argument is there
3014 if (!arg) {
3015 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3016 configfile, lineno, name, token, min, max);
3017 return -1;
3018 }
3019
3020 // get argument value (base 10), check that it's integer, and in-range
3021 char *endptr;
3022 int val = strtol(arg,&endptr,10);
3023
3024 // optional suffix present?
3025 if (suffix) {
3026 if (!strcmp(endptr, suffix))
3027 endptr += strlen(suffix);
3028 else
3029 *suffix = 0;
3030 }
3031
3032 if (!(!*endptr && min <= val && val <= max)) {
3033 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3034 configfile, lineno, name, token, arg, min, max);
3035 return -1;
3036 }
3037
3038 // all is well; return value
3039 return val;
3040 }
3041
3042
3043 // Get 1-3 small integer(s) for '-W' directive
3044 int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3045 unsigned char * val1, unsigned char * val2, unsigned char * val3){
3046 unsigned v1 = 0, v2 = 0, v3 = 0;
3047 int n1 = -1, n2 = -1, n3 = -1, len;
3048 if (!arg) {
3049 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3050 configfile, lineno, name, token);
3051 return -1;
3052 }
3053
3054 len = strlen(arg);
3055 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3056 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3057 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3058 configfile, lineno, name, token, arg);
3059 return -1;
3060 }
3061 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3062 return 0;
3063 }
3064
3065
3066 // This function returns 1 if it has correctly parsed one token (and
3067 // any arguments), else zero if no tokens remain. It returns -1 if an
3068 // error was encountered.
3069 static int ParseToken(char * token, dev_config & cfg)
3070 {
3071 char sym;
3072 const char * name = cfg.name.c_str();
3073 int lineno=cfg.lineno;
3074 const char *delim = " \n\t";
3075 int badarg = 0;
3076 int missingarg = 0;
3077 const char *arg = 0;
3078
3079 // is the rest of the line a comment
3080 if (*token=='#')
3081 return 1;
3082
3083 // is the token not recognized?
3084 if (*token!='-' || strlen(token)!=2) {
3085 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3086 configfile, lineno, name, token);
3087 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3088 return -1;
3089 }
3090
3091 // token we will be parsing:
3092 sym=token[1];
3093
3094 // parse the token and swallow its argument
3095 int val;
3096 char plus[] = "+", excl[] = "!";
3097
3098 switch (sym) {
3099 case 'C':
3100 // monitor current pending sector count (default 197)
3101 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3102 return -1;
3103 cfg.curr_pending_id = (unsigned char)val;
3104 cfg.curr_pending_incr = (*plus == '+');
3105 cfg.curr_pending_set = true;
3106 break;
3107 case 'U':
3108 // monitor offline uncorrectable sectors (default 198)
3109 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3110 return -1;
3111 cfg.offl_pending_id = (unsigned char)val;
3112 cfg.offl_pending_incr = (*plus == '+');
3113 cfg.offl_pending_set = true;
3114 break;
3115 case 'T':
3116 // Set tolerance level for SMART command failures
3117 if ((arg = strtok(NULL, delim)) == NULL) {
3118 missingarg = 1;
3119 } else if (!strcmp(arg, "normal")) {
3120 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3121 // not on failure of an optional S.M.A.R.T. command.
3122 // This is the default so we don't need to actually do anything here.
3123 cfg.permissive = false;
3124 } else if (!strcmp(arg, "permissive")) {
3125 // Permissive mode; ignore errors from Mandatory SMART commands
3126 cfg.permissive = true;
3127 } else {
3128 badarg = 1;
3129 }
3130 break;
3131 case 'd':
3132 // specify the device type
3133 if ((arg = strtok(NULL, delim)) == NULL) {
3134 missingarg = 1;
3135 } else if (!strcmp(arg, "removable")) {
3136 cfg.removable = true;
3137 } else {
3138 cfg.dev_type = arg;
3139 }
3140 break;
3141 case 'F':
3142 // fix firmware bug
3143 if ((arg = strtok(NULL, delim)) == NULL) {
3144 missingarg = 1;
3145 } else if (!strcmp(arg, "none")) {
3146 cfg.fix_firmwarebug = FIX_NONE;
3147 } else if (!strcmp(arg, "samsung")) {
3148 cfg.fix_firmwarebug = FIX_SAMSUNG;
3149 } else if (!strcmp(arg, "samsung2")) {
3150 cfg.fix_firmwarebug = FIX_SAMSUNG2;
3151 } else if (!strcmp(arg, "samsung3")) {
3152 cfg.fix_firmwarebug = FIX_SAMSUNG3;
3153 } else {
3154 badarg = 1;
3155 }
3156 break;
3157 case 'H':
3158 // check SMART status
3159 cfg.smartcheck = true;
3160 break;
3161 case 'f':
3162 // check for failure of usage attributes
3163 cfg.usagefailed = true;
3164 break;
3165 case 't':
3166 // track changes in all vendor attributes
3167 cfg.prefail = true;
3168 cfg.usage = true;
3169 break;
3170 case 'p':
3171 // track changes in prefail vendor attributes
3172 cfg.prefail = true;
3173 break;
3174 case 'u':
3175 // track changes in usage vendor attributes
3176 cfg.usage = true;
3177 break;
3178 case 'l':
3179 // track changes in SMART logs
3180 if ((arg = strtok(NULL, delim)) == NULL) {
3181 missingarg = 1;
3182 } else if (!strcmp(arg, "selftest")) {
3183 // track changes in self-test log
3184 cfg.selftest = true;
3185 } else if (!strcmp(arg, "error")) {
3186 // track changes in ATA error log
3187 cfg.errorlog = true;
3188 } else {
3189 badarg = 1;
3190 }
3191 break;
3192 case 'a':
3193 // monitor everything
3194 cfg.smartcheck = true;
3195 cfg.prefail = true;
3196 cfg.usagefailed = true;
3197 cfg.usage = true;
3198 cfg.selftest = true;
3199 cfg.errorlog = true;
3200 break;
3201 case 'o':
3202 // automatic offline testing enable/disable
3203 if ((arg = strtok(NULL, delim)) == NULL) {
3204 missingarg = 1;
3205 } else if (!strcmp(arg, "on")) {
3206 cfg.autoofflinetest = 2;
3207 } else if (!strcmp(arg, "off")) {
3208 cfg.autoofflinetest = 1;
3209 } else {
3210 badarg = 1;
3211 }
3212 break;
3213 case 'n':
3214 // skip disk check if in idle or standby mode
3215 if (!(arg = strtok(NULL, delim)))
3216 missingarg = 1;
3217 else {
3218 char *endptr = NULL;
3219 char *next = strchr(const_cast<char*>(arg), ',');
3220
3221 cfg.powerquiet = false;
3222 cfg.powerskipmax = 0;
3223
3224 if (next!=NULL) *next='\0';
3225 if (!strcmp(arg, "never"))
3226 cfg.powermode = 0;
3227 else if (!strcmp(arg, "sleep"))
3228 cfg.powermode = 1;
3229 else if (!strcmp(arg, "standby"))
3230 cfg.powermode = 2;
3231 else if (!strcmp(arg, "idle"))
3232 cfg.powermode = 3;
3233 else
3234 badarg = 1;
3235
3236 // if optional arguments are present
3237 if (!badarg && next!=NULL) {
3238 next++;
3239 cfg.powerskipmax = strtol(next, &endptr, 10);
3240 if (endptr == next)
3241 cfg.powerskipmax = 0;
3242 else {
3243 next = endptr + (*endptr != '\0');
3244 if (cfg.powerskipmax <= 0)
3245 badarg = 1;
3246 }
3247 if (*next != '\0') {
3248 if (!strcmp("q", next))
3249 cfg.powerquiet = true;
3250 else {
3251 badarg = 1;
3252 }
3253 }
3254 }
3255 }
3256 break;
3257 case 'S':
3258 // automatic attribute autosave enable/disable
3259 if ((arg = strtok(NULL, delim)) == NULL) {
3260 missingarg = 1;
3261 } else if (!strcmp(arg, "on")) {
3262 cfg.autosave = 2;
3263 } else if (!strcmp(arg, "off")) {
3264 cfg.autosave = 1;
3265 } else {
3266 badarg = 1;
3267 }
3268 break;
3269 case 's':
3270 // warn user, and delete any previously given -s REGEXP Directives
3271 if (!cfg.test_regex.empty()){
3272 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3273 configfile, lineno, name, cfg.test_regex.get_pattern());
3274 cfg.test_regex = regular_expression();
3275 }
3276 // check for missing argument
3277 if (!(arg = strtok(NULL, delim))) {
3278 missingarg = 1;
3279 }
3280 // Compile regex
3281 else {
3282 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3283 // not a valid regular expression!
3284 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3285 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3286 return -1;
3287 }
3288 }
3289 // Do a bit of sanity checking and warn user if we think that
3290 // their regexp is "strange". User probably confused about shell
3291 // glob(3) syntax versus regular expression syntax regexp(7).
3292 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3293 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3294 configfile, lineno, name, val+1, arg[val], arg);
3295 break;
3296 case 'm':
3297 // send email to address that follows
3298 if (!(arg = strtok(NULL,delim)))
3299 missingarg = 1;
3300 else {
3301 if (!cfg.emailaddress.empty())
3302 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3303 configfile, lineno, name, cfg.emailaddress.c_str());
3304 cfg.emailaddress = arg;
3305 }
3306 break;
3307 case 'M':
3308 // email warning options
3309 if (!(arg = strtok(NULL, delim)))
3310 missingarg = 1;
3311 else if (!strcmp(arg, "once"))
3312 cfg.emailfreq = 1;
3313 else if (!strcmp(arg, "daily"))
3314 cfg.emailfreq = 2;
3315 else if (!strcmp(arg, "diminishing"))
3316 cfg.emailfreq = 3;
3317 else if (!strcmp(arg, "test"))
3318 cfg.emailtest = 1;
3319 else if (!strcmp(arg, "exec")) {
3320 // Get the next argument (the command line)
3321 if (!(arg = strtok(NULL, delim))) {
3322 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3323 configfile, lineno, name, token);
3324 return -1;
3325 }
3326 // Free the last cmd line given if any, and copy new one
3327 if (!cfg.emailcmdline.empty())
3328 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3329 configfile, lineno, name, cfg.emailcmdline.c_str());
3330 cfg.emailcmdline = arg;
3331 }
3332 else
3333 badarg = 1;
3334 break;
3335 case 'i':
3336 // ignore failure of usage attribute
3337 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3338 return -1;
3339 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3340 break;
3341 case 'I':
3342 // ignore attribute for tracking purposes
3343 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3344 return -1;
3345 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3346 break;
3347 case 'r':
3348 // print raw value when tracking
3349 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3350 return -1;
3351 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3352 if (*excl == '!') // attribute change is critical
3353 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3354 break;
3355 case 'R':
3356 // track changes in raw value (forces printing of raw value)
3357 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3358 return -1;
3359 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3360 if (*excl == '!') // raw value change is critical
3361 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3362 break;
3363 case 'W':
3364 // track Temperature
3365 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3366 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3367 return -1;
3368 break;
3369 case 'v':
3370 // non-default vendor-specific attribute meaning
3371 if (!(arg=strtok(NULL,delim))) {
3372 missingarg = 1;
3373 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3374 badarg = 1;
3375 }
3376 break;
3377 case 'P':
3378 // Define use of drive-specific presets.
3379 if (!(arg = strtok(NULL, delim))) {
3380 missingarg = 1;
3381 } else if (!strcmp(arg, "use")) {
3382 cfg.ignorepresets = false;
3383 } else if (!strcmp(arg, "ignore")) {
3384 cfg.ignorepresets = true;
3385 } else if (!strcmp(arg, "show")) {
3386 cfg.showpresets = true;
3387 } else if (!strcmp(arg, "showall")) {
3388 showallpresets();
3389 } else {
3390 badarg = 1;
3391 }
3392 break;
3393 default:
3394 // Directive not recognized
3395 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3396 configfile, lineno, name, token);
3397 Directives();
3398 return -1;
3399 }
3400 if (missingarg) {
3401 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3402 configfile, lineno, name, token);
3403 }
3404 if (badarg) {
3405 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3406 configfile, lineno, name, token, arg);
3407 }
3408 if (missingarg || badarg) {
3409 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3410 printoutvaliddirectiveargs(LOG_CRIT, sym);
3411 PrintOut(LOG_CRIT, "\n");
3412 return -1;
3413 }
3414
3415 return 1;
3416 }
3417
3418 // Scan directive for configuration file
3419 #define SCANDIRECTIVE "DEVICESCAN"
3420
3421 // This is the routine that adds things to the conf_entries list.
3422 //
3423 // Return values are:
3424 // 1: parsed a normal line
3425 // 0: found comment or blank line
3426 // -1: found SCANDIRECTIVE line
3427 // -2: found an error
3428 //
3429 // Note: this routine modifies *line from the caller!
3430 static int ParseConfigLine(dev_config_vector & conf_entries, int entry, int lineno, /*const*/ char * line)
3431 {
3432 char *token=NULL;
3433 char *name=NULL;
3434 const char *delim = " \n\t";
3435 int devscan=0;
3436
3437 // get first token: device name. If a comment, skip line
3438 if (!(name=strtok(line,delim)) || *name=='#') {
3439 return 0;
3440 }
3441
3442 // Have we detected the SCANDIRECTIVE directive?
3443 if (!strcmp(SCANDIRECTIVE,name)){
3444 devscan=1;
3445 if (entry) {
3446 PrintOut(LOG_INFO,"Scan Directive %s (line %d) must be the first entry in %s\n",name, lineno, configfile);
3447 return -2;
3448 }
3449 }
3450
3451 // We've got a legit entry, make space to store it
3452 conf_entries.push_back( dev_config() );
3453 dev_config & cfg = conf_entries.back();
3454
3455 cfg.name = name;
3456
3457 // Store line number, and by default check for both device types.
3458 cfg.lineno=lineno;
3459
3460 // parse tokens one at a time from the file.
3461 while ((token=strtok(NULL,delim))){
3462 int retval=ParseToken(token,cfg);
3463
3464 if (retval==0)
3465 // No tokens left:
3466 break;
3467
3468 if (retval>0) {
3469 // Parsed token
3470 #if (0)
3471 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3472 #endif
3473 continue;
3474 }
3475
3476 if (retval<0) {
3477 // error found on the line
3478 return -2;
3479 }
3480 }
3481
3482 // If NO monitoring directives are set, then set all of them.
3483 if (!(cfg.smartcheck || cfg.usagefailed || cfg.prefail ||
3484 cfg.usage || cfg.selftest || cfg.errorlog ||
3485 cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
3486
3487 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3488 cfg.name.c_str(), cfg.lineno, configfile);
3489
3490 cfg.smartcheck = true;
3491 cfg.usagefailed = true;
3492 cfg.prefail = true;
3493 cfg.usage = true;
3494 cfg.selftest = true;
3495 cfg.errorlog = true;
3496 }
3497
3498 // additional sanity check. Has user set -M options without -m?
3499 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3500 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3501 cfg.name.c_str(), cfg.lineno, configfile);
3502 return -2;
3503 }
3504
3505 // has the user has set <nomailer>?
3506 if (cfg.emailaddress == "<nomailer>") {
3507 // check that -M exec is also set
3508 if (cfg.emailcmdline.empty()){
3509 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3510 cfg.name.c_str(), cfg.lineno, configfile);
3511 return -2;
3512 }
3513 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3514 cfg.emailaddress.clear();
3515 }
3516
3517 // set cfg.emailfreq to 1 (once) if user hasn't set it
3518 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq)
3519 cfg.emailfreq = 1;
3520
3521 if (devscan)
3522 return -1;
3523 else
3524 return 1;
3525 }
3526
3527 // Parses a configuration file. Return values are:
3528 // N=>0: found N entries
3529 // -1: syntax error in config file
3530 // -2: config file does not exist
3531 // -3: config file exists but cannot be read
3532 //
3533 // In the case where the return value is 0, there are three
3534 // possiblities:
3535 // Empty configuration file ==> conf_entries.empty()
3536 // No configuration file ==> conf_entries[0].lineno == 0
3537 // SCANDIRECTIVE found ==> conf_entries[0].lineno != 0
3538 static int ParseConfigFile(dev_config_vector & conf_entries)
3539 {
3540 // maximum line length in configuration file
3541 const int MAXLINELEN = 256;
3542 // maximum length of a continued line in configuration file
3543 const int MAXCONTLINE = 1023;
3544
3545 stdio_file f;
3546 // Open config file, if it exists and is not <stdin>
3547 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3548 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3549 // file exists but we can't read it or it should exist due to '-c' option
3550 int ret = (errno!=ENOENT ? -3 : -2);
3551 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3552 strerror(errno),configfile);
3553 return ret;
3554 }
3555 }
3556 else // read from stdin ('-c -' option)
3557 f.open(stdin);
3558
3559 // No configuration file found -- use fake one
3560 int entry = 0;
3561 if (!f) {
3562 char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3563
3564 if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3565 throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3566 return 0;
3567 }
3568
3569 #ifdef __CYGWIN__
3570 setmode(fileno(f), O_TEXT); // Allow files with \r\n
3571 #endif
3572
3573 // configuration file exists
3574 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3575
3576 // parse config file line by line
3577 int lineno = 1, cont = 0, contlineno = 0;
3578 char line[MAXLINELEN+2];
3579 char fullline[MAXCONTLINE+1];
3580
3581 for (;;) {
3582 int len=0,scandevice;
3583 char *lastslash;
3584 char *comment;
3585 char *code;
3586
3587 // make debugging simpler
3588 memset(line,0,sizeof(line));
3589
3590 // get a line
3591 code=fgets(line, MAXLINELEN+2, f);
3592
3593 // are we at the end of the file?
3594 if (!code){
3595 if (cont) {
3596 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3597 // See if we found a SCANDIRECTIVE directive
3598 if (scandevice==-1)
3599 return 0;
3600 // did we find a syntax error
3601 if (scandevice==-2)
3602 return -1;
3603 // the final line is part of a continuation line
3604 cont=0;
3605 entry+=scandevice;
3606 }
3607 break;
3608 }
3609
3610 // input file line number
3611 contlineno++;
3612
3613 // See if line is too long
3614 len=strlen(line);
3615 if (len>MAXLINELEN){
3616 const char *warn;
3617 if (line[len-1]=='\n')
3618 warn="(including newline!) ";
3619 else
3620 warn="";
3621 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3622 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3623 return -1;
3624 }
3625
3626 // Ignore anything after comment symbol
3627 if ((comment=strchr(line,'#'))){
3628 *comment='\0';
3629 len=strlen(line);
3630 }
3631
3632 // is the total line (made of all continuation lines) too long?
3633 if (cont+len>MAXCONTLINE){
3634 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3635 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3636 return -1;
3637 }
3638
3639 // copy string so far into fullline, and increment length
3640 strcpy(fullline+cont,line);
3641 cont+=len;
3642
3643 // is this a continuation line. If so, replace \ by space and look at next line
3644 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3645 *(fullline+(cont-len)+(lastslash-line))=' ';
3646 continue;
3647 }
3648
3649 // Not a continuation line. Parse it
3650 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3651
3652 // did we find a scandevice directive?
3653 if (scandevice==-1)
3654 return 0;
3655 // did we find a syntax error
3656 if (scandevice==-2)
3657 return -1;
3658
3659 entry+=scandevice;
3660 lineno++;
3661 cont=0;
3662 }
3663
3664 // note -- may be zero if syntax of file OK, but no valid entries!
3665 return entry;
3666 }
3667
3668 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3669 <LIST> is the list of valid arguments for option opt. */
3670 void PrintValidArgs(char opt) {
3671 const char *s;
3672
3673 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3674 if (!(s = GetValidArgList(opt)))
3675 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3676 else
3677 PrintOut(LOG_CRIT, "%s", (char *)s);
3678 PrintOut(LOG_CRIT, " <=======\n");
3679 }
3680
3681 // Return true if absolute path name
3682 static bool is_abs_path(const char * path)
3683 {
3684 if (*path == '/')
3685 return true;
3686 #if defined(_WIN32) || defined(__CYGWIN__)
3687 if (*path == '\\')
3688 return true;
3689 int n = -1;
3690 sscanf(path, "%*1[A-Za-z]:%*1[/\\]%n", &n);
3691 if (n > 0)
3692 return true;
3693 #endif
3694 return false;
3695 }
3696
3697 // Parses input line, prints usage message and
3698 // version/license/copyright messages
3699 void ParseOpts(int argc, char **argv){
3700 int optchar;
3701 char *tailptr;
3702 long lchecktime;
3703 // Please update GetValidArgList() if you edit shortopts
3704 const char *shortopts = "c:l:q:dDni:p:r:s:A:B:Vh?";
3705 char *arg;
3706 // Please update GetValidArgList() if you edit longopts
3707 struct option longopts[] = {
3708 { "configfile", required_argument, 0, 'c' },
3709 { "logfacility", required_argument, 0, 'l' },
3710 { "quit", required_argument, 0, 'q' },
3711 { "debug", no_argument, 0, 'd' },
3712 { "showdirectives", no_argument, 0, 'D' },
3713 { "interval", required_argument, 0, 'i' },
3714 #ifndef _WIN32
3715 { "no-fork", no_argument, 0, 'n' },
3716 #endif
3717 { "pidfile", required_argument, 0, 'p' },
3718 { "report", required_argument, 0, 'r' },
3719 { "savestates", required_argument, 0, 's' },
3720 { "attributelog", required_argument, 0, 'A' },
3721 { "drivedb", required_argument, 0, 'B' },
3722 #if defined(_WIN32) || defined(__CYGWIN__)
3723 { "service", no_argument, 0, 'n' },
3724 #endif
3725 { "version", no_argument, 0, 'V' },
3726 { "license", no_argument, 0, 'V' },
3727 { "copyright", no_argument, 0, 'V' },
3728 { "help", no_argument, 0, 'h' },
3729 { "usage", no_argument, 0, 'h' },
3730 { 0, 0, 0, 0 }
3731 };
3732
3733 opterr=optopt=0;
3734 bool badarg = false;
3735 bool no_defaultdb = false; // set true on '-B FILE'
3736
3737 // Parse input options. This horrible construction is so that emacs
3738 // indents properly. Sorry.
3739 while (-1 != (optchar =
3740 getopt_long(argc, argv, shortopts, longopts, NULL)
3741 )) {
3742
3743 switch(optchar) {
3744 case 'q':
3745 // when to quit
3746 if (!(strcmp(optarg,"nodev"))) {
3747 quit=0;
3748 } else if (!(strcmp(optarg,"nodevstartup"))) {
3749 quit=1;
3750 } else if (!(strcmp(optarg,"never"))) {
3751 quit=2;
3752 } else if (!(strcmp(optarg,"onecheck"))) {
3753 quit=3;
3754 debugmode=1;
3755 } else if (!(strcmp(optarg,"showtests"))) {
3756 quit=4;
3757 debugmode=1;
3758 } else if (!(strcmp(optarg,"errors"))) {
3759 quit=5;
3760 } else {
3761 badarg = true;
3762 }
3763 break;
3764 case 'l':
3765 // set the log facility level
3766 if (!strcmp(optarg, "daemon"))
3767 facility=LOG_DAEMON;
3768 else if (!strcmp(optarg, "local0"))
3769 facility=LOG_LOCAL0;
3770 else if (!strcmp(optarg, "local1"))
3771 facility=LOG_LOCAL1;
3772 else if (!strcmp(optarg, "local2"))
3773 facility=LOG_LOCAL2;
3774 else if (!strcmp(optarg, "local3"))
3775 facility=LOG_LOCAL3;
3776 else if (!strcmp(optarg, "local4"))
3777 facility=LOG_LOCAL4;
3778 else if (!strcmp(optarg, "local5"))
3779 facility=LOG_LOCAL5;
3780 else if (!strcmp(optarg, "local6"))
3781 facility=LOG_LOCAL6;
3782 else if (!strcmp(optarg, "local7"))
3783 facility=LOG_LOCAL7;
3784 else
3785 badarg = true;
3786 break;
3787 case 'd':
3788 // enable debug mode
3789 debugmode = 1;
3790 break;
3791 case 'n':
3792 // don't fork()
3793 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3794 do_fork = false;
3795 #endif
3796 break;
3797 case 'D':
3798 // print summary of all valid directives
3799 debugmode = 1;
3800 Directives();
3801 EXIT(0);
3802 break;
3803 case 'i':
3804 // Period (time interval) for checking
3805 // strtol will set errno in the event of overflow, so we'll check it.
3806 errno = 0;
3807 lchecktime = strtol(optarg, &tailptr, 10);
3808 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
3809 debugmode=1;
3810 PrintHead();
3811 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
3812 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
3813 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3814 EXIT(EXIT_BADCMD);
3815 }
3816 checktime = (int)lchecktime;
3817 break;
3818 case 'r':
3819 // report IOCTL transactions
3820 {
3821 int i;
3822 char *s;
3823
3824 // split_report_arg() may modify its first argument string, so use a
3825 // copy of optarg in case we want optarg for an error message.
3826 if (!(s = strdup(optarg))) {
3827 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
3828 EXIT(EXIT_NOMEM);
3829 }
3830 if (split_report_arg(s, &i)) {
3831 badarg = true;
3832 } else if (i<1 || i>3) {
3833 debugmode=1;
3834 PrintHead();
3835 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
3836 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3837 EXIT(EXIT_BADCMD);
3838 } else if (!strcmp(s,"ioctl")) {
3839 con->reportataioctl = con->reportscsiioctl = i;
3840 } else if (!strcmp(s,"ataioctl")) {
3841 con->reportataioctl = i;
3842 } else if (!strcmp(s,"scsiioctl")) {
3843 con->reportscsiioctl = i;
3844 } else {
3845 badarg = true;
3846 }
3847 free(s); // TODO: use std::string
3848 }
3849 break;
3850 case 'c':
3851 // alternate configuration file
3852 if (strcmp(optarg,"-"))
3853 configfile = (configfile_alt = optarg).c_str();
3854 else // read from stdin
3855 configfile=configfile_stdin;
3856 break;
3857 case 'p':
3858 // output file with PID number
3859 pid_file = optarg;
3860 break;
3861 case 's':
3862 // path prefix of persistent state file
3863 state_path_prefix = optarg;
3864 break;
3865 case 'A':
3866 // path prefix of attribute log file
3867 attrlog_path_prefix = optarg;
3868 break;
3869 case 'B':
3870 {
3871 const char * path = optarg;
3872 if (*path == '+' && path[1])
3873 path++;
3874 else
3875 no_defaultdb = true;
3876 unsigned char savedebug = debugmode; debugmode = 1;
3877 if (!read_drive_database(path))
3878 EXIT(EXIT_BADCMD);
3879 debugmode = savedebug;
3880 }
3881 break;
3882 case 'V':
3883 // print version and CVS info
3884 debugmode = 1;
3885 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
3886 EXIT(0);
3887 break;
3888 case 'h':
3889 // help: print summary of command-line options
3890 debugmode=1;
3891 PrintHead();
3892 Usage();
3893 EXIT(0);
3894 break;
3895 case '?':
3896 default:
3897 // unrecognized option
3898 debugmode=1;
3899 PrintHead();
3900 // Point arg to the argument in which this option was found.
3901 arg = argv[optind-1];
3902 // Check whether the option is a long option that doesn't map to -h.
3903 if (arg[1] == '-' && optchar != 'h') {
3904 // Iff optopt holds a valid option then argument must be missing.
3905 if (optopt && (strchr(shortopts, optopt) != NULL)) {
3906 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
3907 PrintValidArgs(optopt);
3908 } else {
3909 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
3910 }
3911 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
3912 EXIT(EXIT_BADCMD);
3913 }
3914 if (optopt) {
3915 // Iff optopt holds a valid option then argument must be missing.
3916 if (strchr(shortopts, optopt) != NULL){
3917 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
3918 PrintValidArgs(optopt);
3919 } else {
3920 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
3921 }
3922 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3923 EXIT(EXIT_BADCMD);
3924 }
3925 Usage();
3926 EXIT(0);
3927 }
3928
3929 // Check to see if option had an unrecognized or incorrect argument.
3930 if (badarg) {
3931 debugmode=1;
3932 PrintHead();
3933 // It would be nice to print the actual option name given by the user
3934 // here, but we just print the short form. Please fix this if you know
3935 // a clean way to do it.
3936 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
3937 PrintValidArgs(optchar);
3938 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3939 EXIT(EXIT_BADCMD);
3940 }
3941 }
3942
3943 // non-option arguments are not allowed
3944 if (argc > optind) {
3945 debugmode=1;
3946 PrintHead();
3947 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
3948 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3949 EXIT(EXIT_BADCMD);
3950 }
3951
3952 // no pidfile in debug mode
3953 if (debugmode && !pid_file.empty()) {
3954 debugmode=1;
3955 PrintHead();
3956 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
3957 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
3958 EXIT(EXIT_BADCMD);
3959 }
3960
3961 // absolute path is required due to chdir('/') after fork().
3962 if (!state_path_prefix.empty() && !debugmode && !is_abs_path(state_path_prefix.c_str())) {
3963 debugmode=1;
3964 PrintHead();
3965 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
3966 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
3967 state_path_prefix.c_str());
3968 EXIT(EXIT_BADCMD);
3969 }
3970
3971 // absolute path is required due to chdir('/') after fork().
3972 if (!attrlog_path_prefix.empty() && !debugmode && !is_abs_path(attrlog_path_prefix.c_str())) {
3973 debugmode=1;
3974 PrintHead();
3975 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
3976 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
3977 attrlog_path_prefix.c_str());
3978 EXIT(EXIT_BADCMD);
3979 }
3980
3981 // Read or init drive database
3982 if (!no_defaultdb) {
3983 unsigned char savedebug = debugmode; debugmode = 1;
3984 if (!read_default_drive_databases())
3985 EXIT(EXIT_BADCMD);
3986 debugmode = savedebug;
3987 }
3988
3989 // print header
3990 PrintHead();
3991 }
3992
3993 // Function we call if no configuration file was found or if the
3994 // SCANDIRECTIVE Directive was found. It makes entries for device
3995 // names returned by scan_smart_devices() in os_OSNAME.cpp
3996 static int MakeConfigEntries(const dev_config & base_cfg,
3997 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
3998 {
3999 // make list of devices
4000 smart_device_list devlist;
4001 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4002 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4003
4004 // if no devices, or error constructing list, return
4005 if (devlist.size() <= 0)
4006 return 0;
4007
4008 // loop over entries to create
4009 for (unsigned i = 0; i < devlist.size(); i++) {
4010 // Move device pointer
4011 smart_device * dev = devlist.release(i);
4012 scanned_devs.push_back(dev);
4013
4014 // Copy configuration, update device and type name
4015 conf_entries.push_back(base_cfg);
4016 dev_config & cfg = conf_entries.back();
4017 cfg.name = dev->get_info().info_name;
4018 cfg.dev_type = type;
4019 }
4020
4021 return devlist.size();
4022 }
4023
4024 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4025 {
4026 if (!debugmode && scandirective)
4027 return;
4028 if (line)
4029 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4030 "Unable to register %s device %s at line %d of file %s\n",
4031 type, name, line, configfile);
4032 else
4033 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4034 type, name);
4035 return;
4036 }
4037
4038 // Returns negative value (see ParseConfigFile()) if config file
4039 // had errors, else number of entries which may be zero or positive.
4040 // If we found no configuration file, or it contained SCANDIRECTIVE,
4041 // then *scanning is set to 1, else 0.
4042 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4043 {
4044 // parse configuration file configfile (normally /etc/smartd.conf)
4045 int entries = ParseConfigFile(conf_entries);
4046
4047 if (entries < 0) {
4048 // There was an error reading the configuration file.
4049 conf_entries.clear();
4050 if (entries == -1)
4051 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4052 return entries;
4053 }
4054
4055 // no error parsing config file.
4056 if (entries) {
4057 // we did not find a SCANDIRECTIVE and did find valid entries
4058 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4059 }
4060 else if (conf_entries.size() == 1) {
4061 // we found a SCANDIRECTIVE or there was no configuration file so
4062 // scan. Configuration file's first entry contains all options
4063 // that were set
4064 dev_config first = conf_entries.front();
4065 conf_entries.clear();
4066
4067 if (first.lineno)
4068 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4069 else
4070 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4071
4072 // make config list of devices to search for
4073 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4074
4075 // warn user if scan table found no devices
4076 if (conf_entries.empty())
4077 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4078 }
4079 else
4080 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4081
4082 return conf_entries.size();
4083 }
4084
4085
4086 // This function tries devices from conf_entries. Each one that can be
4087 // registered is moved onto the [ata|scsi]devices lists and removed
4088 // from the conf_entries list.
4089 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4090 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4091 {
4092 // start by clearing lists/memory of ALL existing devices
4093 configs.clear();
4094 devices.clear();
4095 states.clear();
4096
4097 // Register entries
4098 for (unsigned i = 0; i < conf_entries.size(); i++){
4099
4100 dev_config cfg = conf_entries[i];
4101
4102 // get device of appropriate type
4103 smart_device_auto_ptr dev;
4104 bool scanning = false;
4105
4106 // Device may already be detected during devicescan
4107 if (i < scanned_devs.size()) {
4108 dev = scanned_devs.release(i);
4109 if (dev)
4110 scanning = true;
4111 }
4112
4113 if (!dev) {
4114 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4115 if (!dev) {
4116 if (cfg.dev_type.empty())
4117 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4118 else
4119 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4120 continue;
4121 }
4122 }
4123
4124 // Save old info
4125 smart_device::device_info oldinfo = dev->get_info();
4126
4127 // Open with autodetect support, may return 'better' device
4128 dev.replace( dev->autodetect_open() );
4129
4130 // Report if type has changed
4131 if (oldinfo.dev_type != dev->get_dev_type())
4132 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4133 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4134
4135 if (!dev->is_open()) {
4136 // For linux+devfs, a nonexistent device gives a strange error
4137 // message. This makes the error message a bit more sensible.
4138 // If no debug and scanning - don't print errors
4139 if (debugmode || !scanning)
4140 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4141 continue;
4142 }
4143
4144 // Update informal name
4145 cfg.name = dev->get_info().info_name;
4146 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4147
4148 // Prepare initial state
4149 dev_state state;
4150
4151 // register ATA devices
4152 if (dev->is_ata()){
4153 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4154 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4155 dev.reset();
4156 }
4157 }
4158 // or register SCSI devices
4159 else if (dev->is_scsi()){
4160 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4161 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4162 dev.reset();
4163 }
4164 }
4165 else {
4166 PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4167 dev.reset();
4168 }
4169
4170 if (dev) {
4171 // move onto the list of devices
4172 configs.push_back(cfg);
4173 states.push_back(state);
4174 devices.push_back(dev);
4175 }
4176 // if device is explictly listed and we can't register it, then
4177 // exit unless the user has specified that the device is removable
4178 else if (!scanning) {
4179 if (cfg.removable || quit==2)
4180 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4181 else {
4182 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4183 EXIT(EXIT_BADDEV);
4184 }
4185 }
4186 }
4187 }
4188
4189
4190 // Main program without exception handling
4191 int main_worker(int argc, char **argv)
4192 {
4193 // Initialize interface
4194 smart_interface::init();
4195 if (!smi())
4196 return 1;
4197
4198 // external control variables for ATA disks
4199 smartmonctrl control;
4200
4201 // is it our first pass through?
4202 bool firstpass = true;
4203
4204 // next time to wake up
4205 time_t wakeuptime;
4206
4207 // for simplicity, null all global communications variables/lists
4208 con=&control;
4209 memset(con, 0,sizeof(control));
4210
4211 // parse input and print header and usage info if needed
4212 ParseOpts(argc,argv);
4213
4214 // do we mute printing from ataprint commands?
4215 con->printing_switchable = false;
4216 con->dont_print = !debugmode;
4217
4218 // Configuration for each device
4219 dev_config_vector configs;
4220 // Device states
4221 dev_state_vector states;
4222 // Devices to monitor
4223 smart_device_list devices;
4224
4225 bool write_states_always = true;
4226
4227 // the main loop of the code
4228 for (;;) {
4229
4230 // are we exiting from a signal?
4231 if (caughtsigEXIT) {
4232 // are we exiting with SIGTERM?
4233 int isterm=(caughtsigEXIT==SIGTERM);
4234 int isquit=(caughtsigEXIT==SIGQUIT);
4235 int isok=debugmode?isterm || isquit:isterm;
4236
4237 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4238 caughtsigEXIT, strsignal(caughtsigEXIT));
4239
4240 if (!isok)
4241 return EXIT_SIGNAL;
4242
4243 // Write state files
4244 if (!state_path_prefix.empty())
4245 write_all_dev_states(configs, states);
4246
4247 return 0;
4248 }
4249
4250 // Should we (re)read the config file?
4251 if (firstpass || caughtsigHUP){
4252 if (!firstpass) {
4253 #ifdef __CYGWIN__
4254 // Workaround for missing SIGQUIT via keyboard on Cygwin
4255 if (caughtsigHUP==2) {
4256 // Simulate SIGQUIT if another SIGINT arrives soon
4257 caughtsigHUP=0;
4258 sleep(1);
4259 if (caughtsigHUP==2) {
4260 caughtsigEXIT=SIGQUIT;
4261 continue;
4262 }
4263 caughtsigHUP=2;
4264 }
4265 #endif
4266 // Write state files
4267 if (!state_path_prefix.empty())
4268 write_all_dev_states(configs, states);
4269
4270 PrintOut(LOG_INFO,
4271 caughtsigHUP==1?
4272 "Signal HUP - rereading configuration file %s\n":
4273 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4274 configfile);
4275 }
4276
4277 {
4278 dev_config_vector conf_entries; // Entries read from smartd.conf
4279 smart_device_list scanned_devs; // Devices found during scan
4280 // (re)reads config file, makes >=0 entries
4281 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4282
4283 if (entries>=0) {
4284 // checks devices, then moves onto ata/scsi list or deallocates.
4285 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4286 if (!(configs.size() == devices.size() && configs.size() == states.size()))
4287 throw std::logic_error("Invalid result from RegisterDevices");
4288 }
4289 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4290 // user has asked to continue on error in configuration file
4291 if (!firstpass)
4292 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4293 }
4294 else {
4295 // exit with configuration file error status
4296 int status = (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4297 EXIT(status);
4298 }
4299 }
4300
4301 // Log number of devices we are monitoring...
4302 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4303 int numata = 0;
4304 for (unsigned i = 0; i < devices.size(); i++) {
4305 if (devices.at(i)->is_ata())
4306 numata++;
4307 }
4308 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4309 numata, devices.size() - numata);
4310 }
4311 else {
4312 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4313 EXIT(EXIT_NODEV);
4314 }
4315
4316 if (quit==4) {
4317 // user has asked to print test schedule
4318 PrintTestSchedule(configs, states, devices);
4319 EXIT(0);
4320 }
4321
4322 // reset signal
4323 caughtsigHUP=0;
4324
4325 // Always write state files after (re)configuration
4326 write_states_always = true;
4327 }
4328
4329 // check all devices once,
4330 // self tests are not started in first pass unless '-q onecheck' is specified
4331 CheckDevicesOnce(configs, states, devices, (!firstpass || quit==3));
4332
4333 // Write state files
4334 if (!state_path_prefix.empty())
4335 write_all_dev_states(configs, states, write_states_always);
4336 write_states_always = false;
4337
4338 // Write attribute logs
4339 if (!attrlog_path_prefix.empty())
4340 write_all_dev_attrlogs(configs, states);
4341
4342 // user has asked us to exit after first check
4343 if (quit==3) {
4344 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4345 "smartd is exiting (exit status 0)\n");
4346 EXIT(0);
4347 }
4348
4349 // fork into background if needed
4350 if (firstpass && !debugmode) {
4351 DaemonInit();
4352 }
4353
4354 // set exit and signal handlers, write PID file, set wake-up time
4355 if (firstpass){
4356 Initialize(&wakeuptime);
4357 firstpass = false;
4358 }
4359
4360 // sleep until next check time, or a signal arrives
4361 wakeuptime = dosleep(wakeuptime, write_states_always);
4362 }
4363 }
4364
4365
4366 #ifndef _WIN32
4367 // Main program
4368 int main(int argc, char **argv)
4369 #else
4370 // Windows: internal main function started direct or by service control manager
4371 static int smartd_main(int argc, char **argv)
4372 #endif
4373 {
4374 int status;
4375 try {
4376 // Do the real work ...
4377 status = main_worker(argc, argv);
4378 }
4379 catch (int ex) {
4380 // EXIT(status) arrives here
4381 status = ex;
4382 }
4383 catch (const std::bad_alloc & /*ex*/) {
4384 // Memory allocation failed (also thrown by std::operator new)
4385 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4386 status = EXIT_NOMEM;
4387 }
4388 catch (const std::exception & ex) {
4389 // Other fatal errors
4390 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4391 status = EXIT_BADCODE;
4392 }
4393
4394 if (is_initialized)
4395 status = Goodbye(status);
4396
4397 #ifdef _WIN32
4398 daemon_winsvc_exitcode = status;
4399 #endif
4400 return status;
4401 }
4402
4403
4404 #ifdef _WIN32
4405 // Main function for Windows
4406 int main(int argc, char **argv){
4407 // Options for smartd windows service
4408 static const daemon_winsvc_options svc_opts = {
4409 "--service", // cmd_opt
4410 "smartd", "SmartD Service", // servicename, displayname
4411 // description
4412 "Controls and monitors storage devices using the Self-Monitoring, "
4413 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4414 "built into ATA and SCSI Hard Drives. "
4415 PACKAGE_HOMEPAGE
4416 };
4417 // daemon_main() handles daemon and service specific commands
4418 // and starts smartd_main() direct, from a new process,
4419 // or via service control manager
4420 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4421 }
4422 #endif