]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Updated changelog
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-11 Christian Franke <smartmontools-support@lists.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #ifndef _GNU_SOURCE
25 // TODO: Why is this define necessary?
26 #define _GNU_SOURCE
27 #endif
28
29 // unconditionally included files
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
33 #include <signal.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <time.h>
41 #include <limits.h>
42 #include <getopt.h>
43
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 #include <algorithm> // std::replace()
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifndef _WIN32
54 #include <sys/wait.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #ifdef HAVE_NETDB_H
60 #include <netdb.h>
61 #endif
62
63 #ifdef _WIN32
64 #ifdef _MSC_VER
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t;
67 typedef int pid_t;
68 #endif
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
71 #endif // _WIN32
72
73 #ifdef __CYGWIN__
74 // From <windows.h>:
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall FreeConsole(void);
77 #include <io.h> // setmode()
78 #endif // __CYGWIN__
79
80 #ifdef HAVE_LIBCAP_NG
81 #include <cap-ng.h>
82 #endif // LIBCAP_NG
83
84 // locally included files
85 #include "int64.h"
86 #include "atacmds.h"
87 #include "dev_interface.h"
88 #include "knowndrives.h"
89 #include "scsicmds.h"
90 #include "utility.h"
91
92 // This is for solaris, where signal() resets the handler to SIG_DFL
93 // after the first signal is caught.
94 #ifdef HAVE_SIGSET
95 #define SIGNALFN sigset
96 #else
97 #define SIGNALFN signal
98 #endif
99
100 #ifdef _WIN32
101 #include "hostname_win32.h" // gethost/domainname()
102 #define HAVE_GETHOSTNAME 1
103 #define HAVE_GETDOMAINNAME 1
104 // fork()/signal()/initd simulation for native Windows
105 #include "daemon_win32.h" // daemon_main/detach/signal()
106 #undef SIGNALFN
107 #define SIGNALFN daemon_signal
108 #define strsignal daemon_strsignal
109 #define sleep daemon_sleep
110 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
111 #define SIGQUIT SIGBREAK
112 #define SIGQUIT_KEYNAME "CONTROL-Break"
113 #else // _WIN32
114 #ifdef __CYGWIN__
115 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
116 #define SIGQUIT_KEYNAME "2x CONTROL-C"
117 #else // __CYGWIN__
118 #define SIGQUIT_KEYNAME "CONTROL-\\"
119 #endif // __CYGWIN__
120 #endif // _WIN32
121
122 #if defined (__SVR4) && defined (__sun)
123 extern "C" int getdomainname(char *, int); // no declaration in header files!
124 #endif
125
126 #define ARGUSED(x) ((void)(x))
127
128 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3349 2011-05-27 21:46:31Z chrfranke $"
129 CONFIG_H_CVSID;
130
131 // smartd exit codes
132 #define EXIT_BADCMD 1 // command line did not parse
133 #define EXIT_BADCONF 2 // syntax error in config file
134 #define EXIT_STARTUP 3 // problem forking daemon
135 #define EXIT_PID 4 // problem creating pid file
136 #define EXIT_NOCONF 5 // config file does not exist
137 #define EXIT_READCONF 6 // config file exists but cannot be read
138
139 #define EXIT_NOMEM 8 // out of memory
140 #define EXIT_BADCODE 10 // internal error - should NEVER happen
141
142 #define EXIT_BADDEV 16 // we can't monitor this device
143 #define EXIT_NODEV 17 // no devices to monitor
144
145 #define EXIT_SIGNAL 254 // abort on signal
146
147
148 // command-line: 1=debug mode, 2=print presets
149 static unsigned char debugmode = 0;
150
151 // command-line: how long to sleep between checks
152 #define CHECKTIME 1800
153 static int checktime=CHECKTIME;
154
155 // command-line: name of PID file (empty for no pid file)
156 static std::string pid_file;
157
158 // command-line: path prefix of persistent state file, empty if no persistence.
159 static std::string state_path_prefix
160 #ifdef SMARTMONTOOLS_SAVESTATES
161 = SMARTMONTOOLS_SAVESTATES
162 #endif
163 ;
164
165 // command-line: path prefix of attribute log file, empty if no logs.
166 static std::string attrlog_path_prefix
167 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
168 = SMARTMONTOOLS_ATTRIBUTELOG
169 #endif
170 ;
171
172 // configuration file name
173 static const char * configfile;
174 // configuration file "name" if read from stdin
175 static const char * const configfile_stdin = "<stdin>";
176 // path of alternate configuration file
177 static std::string configfile_alt;
178
179 // command-line: when should we exit?
180 static int quit=0;
181
182 // command-line; this is the default syslog(3) log facility to use.
183 static int facility=LOG_DAEMON;
184
185 #ifndef _WIN32
186 // command-line: fork into background?
187 static bool do_fork=true;
188 #endif
189
190 #ifdef HAVE_LIBCAP_NG
191 // command-line: enable capabilities?
192 static bool enable_capabilities = false;
193 #endif
194
195 #if defined(_WIN32) || defined(__CYGWIN__)
196 // TODO: This smartctl only variable is also used in os_win32.cpp
197 unsigned char failuretest_permissive = 0;
198 #endif
199
200 // set to one if we catch a USR1 (check devices now)
201 static volatile int caughtsigUSR1=0;
202
203 #ifdef _WIN32
204 // set to one if we catch a USR2 (toggle debug mode)
205 static volatile int caughtsigUSR2=0;
206 #endif
207
208 // set to one if we catch a HUP (reload config file). In debug mode,
209 // set to two, if we catch INT (also reload config file).
210 static volatile int caughtsigHUP=0;
211
212 // set to signal value if we catch INT, QUIT, or TERM
213 static volatile int caughtsigEXIT=0;
214
215 // This function prints either to stdout or to the syslog as needed.
216 static void PrintOut(int priority, const char *fmt, ...)
217 __attribute__ ((format(printf, 2, 3)));
218
219 // Attribute monitoring flags.
220 // See monitor_attr_flags below.
221 enum {
222 MONITOR_IGN_FAILUSE = 0x01,
223 MONITOR_IGNORE = 0x02,
224 MONITOR_RAW_PRINT = 0x04,
225 MONITOR_RAW = 0x08,
226 MONITOR_AS_CRIT = 0x10,
227 MONITOR_RAW_AS_CRIT = 0x20,
228 };
229
230 // Array of flags for each attribute.
231 class attribute_flags
232 {
233 public:
234 attribute_flags()
235 { memset(m_flags, 0, sizeof(m_flags)); }
236
237 bool is_set(int id, unsigned char flag) const
238 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
239
240 void set(int id, unsigned char flags)
241 {
242 if (0 < id && id < (int)sizeof(m_flags))
243 m_flags[id] |= flags;
244 }
245
246 private:
247 unsigned char m_flags[256];
248 };
249
250
251 /// Configuration data for a device. Read from smartd.conf.
252 /// Supports copy & assignment and is compatible with STL containers.
253 struct dev_config
254 {
255 int lineno; // Line number of entry in file
256 std::string name; // Device name (with optional extra info)
257 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
258 std::string dev_type; // Device type argument from -d directive, empty if none
259 std::string state_file; // Path of the persistent state file, empty if none
260 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
261 bool smartcheck; // Check SMART status
262 bool usagefailed; // Check for failed Usage Attributes
263 bool prefail; // Track changes in Prefail Attributes
264 bool usage; // Track changes in Usage Attributes
265 bool selftest; // Monitor number of selftest errors
266 bool errorlog; // Monitor number of ATA errors
267 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
268 bool permissive; // Ignore failed SMART commands
269 char autosave; // 1=disable, 2=enable Autosave Attributes
270 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
271 unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
272 bool ignorepresets; // Ignore database of -v options
273 bool showpresets; // Show database entry for this device
274 bool removable; // Device may disappear (not be present)
275 char powermode; // skip check, if disk in idle or standby mode
276 bool powerquiet; // skip powermode 'skipping checks' message
277 int powerskipmax; // how many times can be check skipped
278 unsigned char tempdiff; // Track Temperature changes >= this limit
279 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
280 regular_expression test_regex; // Regex for scheduled testing
281
282 // Configuration of email warning messages
283 std::string emailcmdline; // script to execute, empty if no messages
284 std::string emailaddress; // email address, or empty
285 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
286 bool emailtest; // Send test email?
287
288 // ATA ONLY
289 bool sct_erc_set; // set SCT ERC to:
290 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
291 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
292
293 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
294 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
295 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
296 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
297
298 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
299
300 ata_vendor_attr_defs attribute_defs; // -v options
301
302 dev_config();
303 };
304
305 dev_config::dev_config()
306 : lineno(0),
307 smartcheck(false),
308 usagefailed(false),
309 prefail(false),
310 usage(false),
311 selftest(false),
312 errorlog(false),
313 xerrorlog(false),
314 permissive(false),
315 autosave(0),
316 autoofflinetest(0),
317 fix_firmwarebug(FIX_NOTSPECIFIED),
318 ignorepresets(false),
319 showpresets(false),
320 removable(false),
321 powermode(0),
322 powerquiet(false),
323 powerskipmax(0),
324 tempdiff(0),
325 tempinfo(0), tempcrit(0),
326 emailfreq(0),
327 emailtest(false),
328 sct_erc_set(false),
329 sct_erc_readtime(0), sct_erc_writetime(0),
330 curr_pending_id(0), offl_pending_id(0),
331 curr_pending_incr(false), offl_pending_incr(false),
332 curr_pending_set(false), offl_pending_set(false)
333 {
334 }
335
336
337 // Number of allowed mail message types
338 static const int SMARTD_NMAIL = 13;
339 // Type for '-M test' mails (state not persistent)
340 static const int MAILTYPE_TEST = 0;
341 // TODO: Add const or enum for all mail types.
342
343 struct mailinfo {
344 int logged;// number of times an email has been sent
345 time_t firstsent;// time first email was sent, as defined by time(2)
346 time_t lastsent; // time last email was sent, as defined by time(2)
347
348 mailinfo()
349 : logged(0), firstsent(0), lastsent(0) { }
350 };
351
352 /// Persistent state data for a device.
353 struct persistent_dev_state
354 {
355 unsigned char tempmin, tempmax; // Min/Max Temperatures
356
357 unsigned char selflogcount; // total number of self-test errors
358 unsigned short selfloghour; // lifetime hours of last self-test error
359
360 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
361
362 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
363 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
364
365 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
366
367 // ATA ONLY
368 int ataerrorcount; // Total number of ATA errors
369
370 // Persistent part of ata_smart_values:
371 struct ata_attribute {
372 unsigned char id;
373 unsigned char val;
374 unsigned char worst; // Byte needed for 'raw64' attribute only.
375 uint64_t raw;
376 unsigned char resvd;
377
378 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
379 };
380 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
381
382 persistent_dev_state();
383 };
384
385 persistent_dev_state::persistent_dev_state()
386 : tempmin(0), tempmax(0),
387 selflogcount(0),
388 selfloghour(0),
389 scheduled_test_next_check(0),
390 selective_test_last_start(0),
391 selective_test_last_end(0),
392 ataerrorcount(0)
393 {
394 }
395
396 /// Non-persistent state data for a device.
397 struct temp_dev_state
398 {
399 bool must_write; // true if persistent part should be written
400
401 bool not_cap_offline; // true == not capable of offline testing
402 bool not_cap_conveyance;
403 bool not_cap_short;
404 bool not_cap_long;
405 bool not_cap_selective;
406
407 unsigned char temperature; // last recorded Temperature (in Celsius)
408 time_t tempmin_delay; // time where Min Temperature tracking will start
409
410 bool powermodefail; // true if power mode check failed
411 int powerskipcnt; // Number of checks skipped due to idle or standby mode
412
413 // SCSI ONLY
414 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
415 unsigned char TempPageSupported; // has log sense temperature page (0xd)
416 unsigned char SuppressReport; // minimize nuisance reports
417 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
418 // know yet) 6 or 10
419
420 // ATA ONLY
421 uint64_t num_sectors; // Number of sectors
422 ata_smart_values smartval; // SMART data
423 ata_smart_thresholds_pvt smartthres; // SMART thresholds
424
425 temp_dev_state();
426 };
427
428 temp_dev_state::temp_dev_state()
429 : must_write(false),
430 not_cap_offline(false),
431 not_cap_conveyance(false),
432 not_cap_short(false),
433 not_cap_long(false),
434 not_cap_selective(false),
435 temperature(0),
436 tempmin_delay(0),
437 powermodefail(false),
438 powerskipcnt(0),
439 SmartPageSupported(false),
440 TempPageSupported(false),
441 SuppressReport(false),
442 modese_len(0),
443 num_sectors(0)
444 {
445 memset(&smartval, 0, sizeof(smartval));
446 memset(&smartthres, 0, sizeof(smartthres));
447 }
448
449 /// Runtime state data for a device.
450 struct dev_state
451 : public persistent_dev_state,
452 public temp_dev_state
453 {
454 void update_persistent_state();
455 void update_temp_state();
456 };
457
458 /// Container for configuration info for each device.
459 typedef std::vector<dev_config> dev_config_vector;
460
461 /// Container for state info for each device.
462 typedef std::vector<dev_state> dev_state_vector;
463
464 // Copy ATA attributes to persistent state.
465 void dev_state::update_persistent_state()
466 {
467 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
468 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
469 ata_attribute & pa = ata_attributes[i];
470 pa.id = ta.id;
471 if (ta.id == 0) {
472 pa.val = pa.worst = 0; pa.raw = 0;
473 continue;
474 }
475 pa.val = ta.current;
476 pa.worst = ta.worst;
477 pa.raw = ta.raw[0]
478 | ( ta.raw[1] << 8)
479 | ( ta.raw[2] << 16)
480 | ((uint64_t)ta.raw[3] << 24)
481 | ((uint64_t)ta.raw[4] << 32)
482 | ((uint64_t)ta.raw[5] << 40);
483 pa.resvd = ta.reserv;
484 }
485 }
486
487 // Copy ATA from persistent to temp state.
488 void dev_state::update_temp_state()
489 {
490 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
491 const ata_attribute & pa = ata_attributes[i];
492 ata_smart_attribute & ta = smartval.vendor_attributes[i];
493 ta.id = pa.id;
494 if (pa.id == 0) {
495 ta.current = ta.worst = 0;
496 memset(ta.raw, 0, sizeof(ta.raw));
497 continue;
498 }
499 ta.current = pa.val;
500 ta.worst = pa.worst;
501 ta.raw[0] = (unsigned char) pa.raw;
502 ta.raw[1] = (unsigned char)(pa.raw >> 8);
503 ta.raw[2] = (unsigned char)(pa.raw >> 16);
504 ta.raw[3] = (unsigned char)(pa.raw >> 24);
505 ta.raw[4] = (unsigned char)(pa.raw >> 32);
506 ta.raw[5] = (unsigned char)(pa.raw >> 40);
507 ta.reserv = pa.resvd;
508 }
509 }
510
511 // Parse a line from a state file.
512 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
513 {
514 static const regular_expression regex(
515 "^ *"
516 "((temperature-min)" // (1 (2)
517 "|(temperature-max)" // (3)
518 "|(self-test-errors)" // (4)
519 "|(self-test-last-err-hour)" // (5)
520 "|(scheduled-test-next-check)" // (6)
521 "|(selective-test-last-start)" // (7)
522 "|(selective-test-last-end)" // (8)
523 "|(ata-error-count)" // (9)
524 "|(mail\\.([0-9]+)\\." // (10 (11)
525 "((count)" // (12 (13)
526 "|(first-sent-time)" // (14)
527 "|(last-sent-time)" // (15)
528 ")" // 12)
529 ")" // 10)
530 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
531 "((id)" // (18 (19)
532 "|(val)" // (20)
533 "|(worst)" // (21)
534 "|(raw)" // (22)
535 "|(resvd)" // (23)
536 ")" // 18)
537 ")" // 16)
538 ")" // 1)
539 " *= *([0-9]+)[ \n]*$", // (24)
540 REG_EXTENDED
541 );
542
543 const int nmatch = 1+24;
544 regmatch_t match[nmatch];
545 if (!regex.execute(line, nmatch, match))
546 return false;
547 if (match[nmatch-1].rm_so < 0)
548 return false;
549
550 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
551
552 int m = 1;
553 if (match[++m].rm_so >= 0)
554 state.tempmin = (unsigned char)val;
555 else if (match[++m].rm_so >= 0)
556 state.tempmax = (unsigned char)val;
557 else if (match[++m].rm_so >= 0)
558 state.selflogcount = (unsigned char)val;
559 else if (match[++m].rm_so >= 0)
560 state.selfloghour = (unsigned short)val;
561 else if (match[++m].rm_so >= 0)
562 state.scheduled_test_next_check = (time_t)val;
563 else if (match[++m].rm_so >= 0)
564 state.selective_test_last_start = val;
565 else if (match[++m].rm_so >= 0)
566 state.selective_test_last_end = val;
567 else if (match[++m].rm_so >= 0)
568 state.ataerrorcount = (int)val;
569 else if (match[m+=2].rm_so >= 0) {
570 int i = atoi(line+match[m].rm_so);
571 if (!(0 <= i && i < SMARTD_NMAIL))
572 return false;
573 if (i == MAILTYPE_TEST) // Don't suppress test mails
574 return true;
575 if (match[m+=2].rm_so >= 0)
576 state.maillog[i].logged = (int)val;
577 else if (match[++m].rm_so >= 0)
578 state.maillog[i].firstsent = (time_t)val;
579 else if (match[++m].rm_so >= 0)
580 state.maillog[i].lastsent = (time_t)val;
581 else
582 return false;
583 }
584 else if (match[m+=5+1].rm_so >= 0) {
585 int i = atoi(line+match[m].rm_so);
586 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
587 return false;
588 if (match[m+=2].rm_so >= 0)
589 state.ata_attributes[i].id = (unsigned char)val;
590 else if (match[++m].rm_so >= 0)
591 state.ata_attributes[i].val = (unsigned char)val;
592 else if (match[++m].rm_so >= 0)
593 state.ata_attributes[i].worst = (unsigned char)val;
594 else if (match[++m].rm_so >= 0)
595 state.ata_attributes[i].raw = val;
596 else if (match[++m].rm_so >= 0)
597 state.ata_attributes[i].resvd = (unsigned char)val;
598 else
599 return false;
600 }
601 else
602 return false;
603 return true;
604 }
605
606 // Read a state file.
607 static bool read_dev_state(const char * path, persistent_dev_state & state)
608 {
609 stdio_file f(path, "r");
610 if (!f) {
611 if (errno != ENOENT)
612 pout("Cannot read state file \"%s\"\n", path);
613 return false;
614 }
615 #ifdef __CYGWIN__
616 setmode(fileno(f), O_TEXT); // Allow files with \r\n
617 #endif
618
619 persistent_dev_state new_state;
620 int good = 0, bad = 0;
621 char line[256];
622 while (fgets(line, sizeof(line), f)) {
623 const char * s = line + strspn(line, " \t");
624 if (!*s || *s == '#')
625 continue;
626 if (!parse_dev_state_line(line, new_state))
627 bad++;
628 else
629 good++;
630 }
631
632 if (bad) {
633 if (!good) {
634 pout("%s: format error\n", path);
635 return false;
636 }
637 pout("%s: %d invalid line(s) ignored\n", path, bad);
638 }
639
640 // This sets the values missing in the file to 0.
641 state = new_state;
642 return true;
643 }
644
645 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
646 {
647 if (val)
648 fprintf(f, "%s = %"PRIu64"\n", name, val);
649 }
650
651 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
652 {
653 if (val)
654 fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
655 }
656
657 // Write a state file
658 static bool write_dev_state(const char * path, const persistent_dev_state & state)
659 {
660 // Rename old "file" to "file~"
661 std::string pathbak = path; pathbak += '~';
662 unlink(pathbak.c_str());
663 rename(path, pathbak.c_str());
664
665 stdio_file f(path, "w");
666 if (!f) {
667 pout("Cannot create state file \"%s\"\n", path);
668 return false;
669 }
670
671 fprintf(f, "# smartd state file\n");
672 write_dev_state_line(f, "temperature-min", state.tempmin);
673 write_dev_state_line(f, "temperature-max", state.tempmax);
674 write_dev_state_line(f, "self-test-errors", state.selflogcount);
675 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
676 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
677 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
678 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
679
680 int i;
681 for (i = 0; i < SMARTD_NMAIL; i++) {
682 if (i == MAILTYPE_TEST) // Don't suppress test mails
683 continue;
684 const mailinfo & mi = state.maillog[i];
685 if (!mi.logged)
686 continue;
687 write_dev_state_line(f, "mail", i, "count", mi.logged);
688 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
689 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
690 }
691
692 // ATA ONLY
693 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
694
695 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
696 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
697 if (!pa.id)
698 continue;
699 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
700 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
701 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
702 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
703 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
704 }
705
706 return true;
707 }
708
709 // Write to the attrlog file
710 static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
711 {
712 stdio_file f(path, "a");
713 if (!f) {
714 pout("Cannot create attribute log file \"%s\"\n", path);
715 return false;
716 }
717
718 // ATA ONLY
719 time_t now = time(0);
720 struct tm * tms = gmtime(&now);
721 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
722 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
723 tms->tm_hour, tms->tm_min, tms->tm_sec);
724 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
725 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
726 if (!pa.id)
727 continue;
728 fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
729 }
730 fprintf(f, "\n");
731
732 return true;
733 }
734
735 // Write all state files. If write_always is false, don't write
736 // unless must_write is set.
737 static void write_all_dev_states(const dev_config_vector & configs,
738 dev_state_vector & states,
739 bool write_always = true)
740 {
741 for (unsigned i = 0; i < states.size(); i++) {
742 const dev_config & cfg = configs.at(i);
743 if (cfg.state_file.empty())
744 continue;
745 dev_state & state = states[i];
746 if (!write_always && !state.must_write)
747 continue;
748 if (!write_dev_state(cfg.state_file.c_str(), state))
749 continue;
750 state.must_write = false;
751 if (write_always || debugmode)
752 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
753 cfg.name.c_str(), cfg.state_file.c_str());
754 }
755 }
756
757 // Write to all attrlog files
758 static void write_all_dev_attrlogs(const dev_config_vector & configs,
759 dev_state_vector & states)
760 {
761 for (unsigned i = 0; i < states.size(); i++) {
762 const dev_config & cfg = configs.at(i);
763 if (cfg.attrlog_file.empty())
764 continue;
765 dev_state & state = states[i];
766 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
767 }
768 }
769
770 // remove the PID file
771 static void RemovePidFile()
772 {
773 if (!pid_file.empty()) {
774 if (unlink(pid_file.c_str()))
775 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
776 pid_file.c_str(), strerror(errno));
777 pid_file.clear();
778 }
779 return;
780 }
781
782 extern "C" { // signal handlers require C-linkage
783
784 // Note if we catch a SIGUSR1
785 static void USR1handler(int sig)
786 {
787 if (SIGUSR1==sig)
788 caughtsigUSR1=1;
789 return;
790 }
791
792 #ifdef _WIN32
793 // Note if we catch a SIGUSR2
794 static void USR2handler(int sig)
795 {
796 if (SIGUSR2==sig)
797 caughtsigUSR2=1;
798 return;
799 }
800 #endif
801
802 // Note if we catch a HUP (or INT in debug mode)
803 static void HUPhandler(int sig)
804 {
805 if (sig==SIGHUP)
806 caughtsigHUP=1;
807 else
808 caughtsigHUP=2;
809 return;
810 }
811
812 // signal handler for TERM, QUIT, and INT (if not in debug mode)
813 static void sighandler(int sig)
814 {
815 if (!caughtsigEXIT)
816 caughtsigEXIT=sig;
817 return;
818 }
819
820 } // extern "C"
821
822 // Cleanup, print Goodbye message and remove pidfile
823 static int Goodbye(int status)
824 {
825 // delete PID file, if one was created
826 RemovePidFile();
827
828 // if we are exiting because of a code bug, tell user
829 if (status==EXIT_BADCODE)
830 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
831
832 // and this should be the final output from smartd before it exits
833 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
834
835 return status;
836 }
837
838 #define ENVLENGTH 1024
839
840 // a replacement for setenv() which is not available on all platforms.
841 // Note that the string passed to putenv must not be freed or made
842 // invalid, since a pointer to it is kept by putenv(). This means that
843 // it must either be a static buffer or allocated off the heap. The
844 // string can be freed if the environment variable is redefined or
845 // deleted via another call to putenv(). So we keep these on the stack
846 // as long as the popen() call is underway.
847 static int exportenv(char *stackspace, const char *name, const char *value)
848 {
849 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
850 return putenv(stackspace);
851 }
852
853 static char *dnsdomain(const char *hostname)
854 {
855 char *p = NULL;
856 #ifdef HAVE_GETADDRINFO
857 static char canon_name[NI_MAXHOST];
858 struct addrinfo *info = NULL;
859 struct addrinfo hints;
860 int err;
861
862 memset(&hints, 0, sizeof(hints));
863 hints.ai_flags = AI_CANONNAME;
864 if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
865 PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
866 return NULL;
867 }
868 if (info->ai_canonname) {
869 strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
870 canon_name[NI_MAXHOST - 1] = '\0';
871 p = canon_name;
872 if ((p = strchr(canon_name, '.')))
873 p++;
874 }
875 freeaddrinfo(info);
876 #elif HAVE_GETHOSTBYNAME
877 struct hostent *hp;
878 if ((hp = gethostbyname(hostname))) {
879 // Does this work if gethostbyname() returns an IPv6 name in
880 // colon/dot notation? [BA]
881 if ((p = strchr(hp->h_name, '.')))
882 p++; // skip "."
883 }
884 #else
885 ARGUSED(hostname);
886 #endif
887 return p;
888 }
889
890 #define EBUFLEN 1024
891
892 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
893 __attribute__ ((format (printf, 4, 5)));
894
895 // If either address or executable path is non-null then send and log
896 // a warning email, or execute executable
897 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
898 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
899 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
900 char environ_strings[11][ENVLENGTH];
901 time_t epoch;
902 va_list ap;
903 const int day=24*3600;
904 int days=0;
905 const char * const whichfail[]={
906 "EmailTest", // 0
907 "Health", // 1
908 "Usage", // 2
909 "SelfTest", // 3
910 "ErrorCount", // 4
911 "FailedHealthCheck", // 5
912 "FailedReadSmartData", // 6
913 "FailedReadSmartErrorLog", // 7
914 "FailedReadSmartSelfTestLog", // 8
915 "FailedOpenDevice", // 9
916 "CurrentPendingSector", // 10
917 "OfflineUncorrectableSector", // 11
918 "Temperature" // 12
919 };
920
921 const char *unknown="[Unknown]";
922
923 // See if user wants us to send mail
924 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
925 return;
926
927 std::string address = cfg.emailaddress;
928 const char * executable = cfg.emailcmdline.c_str();
929
930 // which type of mail are we sending?
931 mailinfo * mail=(state.maillog)+which;
932
933 // checks for sanity
934 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
935 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
936 return;
937 }
938 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
939 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
940 which, (int)sizeof(whichfail));
941 return;
942 }
943
944 // Return if a single warning mail has been sent.
945 if ((cfg.emailfreq==1) && mail->logged)
946 return;
947
948 // Return if this is an email test and one has already been sent.
949 if (which == 0 && mail->logged)
950 return;
951
952 // To decide if to send mail, we need to know what time it is.
953 epoch=time(NULL);
954
955 // Return if less than one day has gone by
956 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
957 return;
958
959 // Return if less than 2^(logged-1) days have gone by
960 if (cfg.emailfreq==3 && mail->logged) {
961 days=0x01<<(mail->logged-1);
962 days*=day;
963 if (epoch<(mail->lastsent+days))
964 return;
965 }
966
967 #ifdef HAVE_LIBCAP_NG
968 if (enable_capabilities) {
969 PrintOut(LOG_ERR, "Sending a mail was supressed. "
970 "Mails can't be send when capabilites are enabled\n");
971 return;
972 }
973 #endif
974
975 // record the time of this mail message, and the first mail message
976 if (!mail->logged)
977 mail->firstsent=epoch;
978 mail->lastsent=epoch;
979
980 // get system host & domain names (not null terminated if length=MAX)
981 #ifdef HAVE_GETHOSTNAME
982 if (gethostname(hostname, 256))
983 strcpy(hostname, unknown);
984 else {
985 char *p=NULL;
986 hostname[255]='\0';
987 p = dnsdomain(hostname);
988 if (p && *p) {
989 strncpy(domainname, p, 255);
990 domainname[255]='\0';
991 } else
992 strcpy(domainname, unknown);
993 }
994 #else
995 strcpy(hostname, unknown);
996 strcpy(domainname, unknown);
997 #endif
998
999 #ifdef HAVE_GETDOMAINNAME
1000 if (getdomainname(nisdomain, 256))
1001 strcpy(nisdomain, unknown);
1002 else
1003 nisdomain[255]='\0';
1004 #else
1005 strcpy(nisdomain, unknown);
1006 #endif
1007
1008 // print warning string into message
1009 va_start(ap, fmt);
1010 vsnprintf(message, 256, fmt, ap);
1011 va_end(ap);
1012
1013 // appropriate message about further information
1014 additional[0]=original[0]=further[0]='\0';
1015 if (which) {
1016 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
1017
1018 switch (cfg.emailfreq) {
1019 case 1:
1020 sprintf(additional,"No additional email messages about this problem will be sent.\n");
1021 break;
1022 case 2:
1023 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
1024 break;
1025 case 3:
1026 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
1027 (0x01)<<mail->logged);
1028 break;
1029 }
1030 if (cfg.emailfreq>1 && mail->logged) {
1031 dateandtimezoneepoch(dates, mail->firstsent);
1032 sprintf(original,"The original email about this issue was sent at %s\n", dates);
1033 }
1034 }
1035
1036 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
1037
1038 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1039 if (!*executable)
1040 #ifdef DEFAULT_MAILER
1041 executable = DEFAULT_MAILER ;
1042 #else
1043 #ifndef _WIN32
1044 executable = "mail";
1045 #else
1046 executable = "blat"; // http://blat.sourceforge.net/
1047 #endif
1048 #endif
1049
1050 #ifndef _WIN32 // blat mailer needs comma
1051 // replace commas by spaces to separate recipients
1052 std::replace(address.begin(), address.end(), ',', ' ');
1053 #endif
1054 // Export information in environment variables that will be useful
1055 // for user scripts
1056 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1057 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1058 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1059 dateandtimezoneepoch(dates, mail->firstsent);
1060 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1061 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1062 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1063 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1064 if (!address.empty())
1065 exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1066 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1067
1068 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1069 exportenv(environ_strings[8], "SMARTD_DEVICETYPE",
1070 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1071 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.dev_name.c_str());
1072
1073 snprintf(fullmessage, 1024,
1074 "This email was generated by the smartd daemon running on:\n\n"
1075 " host name: %s\n"
1076 " DNS domain: %s\n"
1077 " NIS domain: %s\n\n"
1078 "The following warning/error was logged by the smartd daemon:\n\n"
1079 "%s\n\n"
1080 "For details see host's SYSLOG.\n\n"
1081 "%s%s%s",
1082 hostname, domainname, nisdomain, message, further, original, additional);
1083 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1084
1085 // now construct a command to send this as EMAIL
1086 #ifndef _WIN32
1087 if (!address.empty())
1088 snprintf(command, 2048,
1089 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1090 "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1091 else
1092 snprintf(command, 2048, "%s 2>&1", executable);
1093
1094 // tell SYSLOG what we are about to do...
1095 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1096 const char * newwarn = (which? "Warning via" : "Test of");
1097
1098 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1099 which?"Sending warning via":"Executing test of", executable, newadd);
1100
1101 // issue the command to send mail or to run the user's executable
1102 errno=0;
1103 FILE * pfp;
1104 if (!(pfp=popen(command, "r")))
1105 // failed to popen() mail process
1106 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1107 newwarn, executable, newadd, errno?strerror(errno):"");
1108 else {
1109 // pipe suceeded!
1110 int len, status;
1111 char buffer[EBUFLEN];
1112
1113 // if unexpected output on stdout/stderr, null terminate, print, and flush
1114 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1115 int count=0;
1116 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1117 buffer[newlen]='\0';
1118 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1119 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1120
1121 // flush pipe if needed
1122 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1123 count++;
1124
1125 // tell user that pipe was flushed, or that something is really wrong
1126 if (count && count<EBUFLEN)
1127 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1128 newwarn, executable, newadd);
1129 else if (count)
1130 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1131 newwarn, executable, newadd);
1132 }
1133
1134 // if something went wrong with mail process, print warning
1135 errno=0;
1136 if (-1==(status=pclose(pfp)))
1137 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1138 errno?strerror(errno):"");
1139 else {
1140 // mail process apparently succeeded. Check and report exit status
1141 int status8;
1142
1143 if (WIFEXITED(status)) {
1144 // exited 'normally' (but perhaps with nonzero status)
1145 status8=WEXITSTATUS(status);
1146
1147 if (status8>128)
1148 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1149 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1150 else if (status8)
1151 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1152 newwarn, executable, newadd, status, status8);
1153 else
1154 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1155 }
1156
1157 if (WIFSIGNALED(status))
1158 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1159 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1160
1161 // this branch is probably not possible. If subprocess is
1162 // stopped then pclose() should not return.
1163 if (WIFSTOPPED(status))
1164 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1165 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1166
1167 }
1168 }
1169
1170 #else // _WIN32
1171
1172 // No "here-documents" on Windows, so must use separate commandline and stdin
1173 char stdinbuf[1024];
1174 command[0] = stdinbuf[0] = 0;
1175 int boxtype = -1, boxmsgoffs = 0;
1176 const char * newadd = "<nomailer>";
1177 if (!address.empty()) {
1178 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1179 char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1180 if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1181 if (!strcmp(addr1, "msgbox"))
1182 boxtype = 0;
1183 else if (!strcmp(addr1, "sysmsgbox"))
1184 boxtype = 1;
1185 if (boxtype >= 0)
1186 address.erase(0, (n2 > n1 ? n2 : n1));
1187 }
1188
1189 if (!address.empty()) {
1190 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1191 snprintf(command, sizeof(command),
1192 "%s - -q -subject \"%s\" -to \"%s\"",
1193 executable, subject, address.c_str());
1194 newadd = address.c_str();
1195 }
1196
1197 // Message for mail [0...] and messagebox [boxmsgoffs...]
1198 snprintf(stdinbuf, sizeof(stdinbuf),
1199 "This email was generated by the smartd daemon running on:\n\n"
1200 " host name: %s\n"
1201 " DNS domain: %s\n"
1202 // " NIS domain: %s\n"
1203 "\n",
1204 hostname, /*domainname, */ nisdomain);
1205 boxmsgoffs = strlen(stdinbuf);
1206 snprintf(stdinbuf+boxmsgoffs, sizeof(stdinbuf)-boxmsgoffs,
1207 "The following warning/error was logged by the smartd daemon:\n\n"
1208 "%s\n\n"
1209 "For details see the event log or log file of smartd.\n\n"
1210 "%s%s%s"
1211 "\n",
1212 message, further, original, additional);
1213 }
1214 else
1215 snprintf(command, sizeof(command), "%s", executable);
1216
1217 const char * newwarn = (which ? "Warning via" : "Test of");
1218 if (boxtype >= 0) {
1219 // show message box
1220 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1221 PrintOut(LOG_INFO,"%s message box\n", newwarn);
1222 }
1223 if (command[0]) {
1224 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1225 int rc;
1226 // run command
1227 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1228 (which?"Sending warning via":"Executing test of"), executable, newadd);
1229 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1230 if (rc >= 0 && stdoutbuf[0])
1231 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1232 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
1233 if (rc != 0)
1234 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1235 newwarn, executable, newadd, rc);
1236 else
1237 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1238 }
1239
1240 #endif // _WIN32
1241
1242 // increment mail sent counter
1243 mail->logged++;
1244 }
1245
1246 #ifndef _WIN32
1247
1248 // Output multiple lines via separate syslog(3) calls.
1249 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1250 {
1251 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1252 vsnprintf(buf, sizeof(buf), fmt, ap);
1253
1254 for (char * p = buf, * q; p && *p; p = q) {
1255 if ((q = strchr(p, '\n')))
1256 *q++ = 0;
1257 if (*p)
1258 syslog(priority, "%s\n", p);
1259 }
1260 }
1261
1262 #else // _WIN32
1263 // os_win32/syslog_win32.cpp supports multiple lines.
1264 #define vsyslog_lines vsyslog
1265 #endif // _WIN32
1266
1267 // Printing function for watching ataprint commands, or losing them
1268 // [From GLIBC Manual: Since the prototype doesn't specify types for
1269 // optional arguments, in a call to a variadic function the default
1270 // argument promotions are performed on the optional argument
1271 // values. This means the objects of type char or short int (whether
1272 // signed or not) are promoted to either int or unsigned int, as
1273 // appropriate.]
1274 void pout(const char *fmt, ...){
1275 va_list ap;
1276
1277 // get the correct time in syslog()
1278 FixGlibcTimeZoneBug();
1279 // initialize variable argument list
1280 va_start(ap,fmt);
1281 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1282 if (debugmode && debugmode!=2)
1283 #ifdef _WIN32
1284 if (facility == LOG_LOCAL1) // logging to stdout
1285 vfprintf(stderr,fmt,ap);
1286 else
1287 #endif
1288 vprintf(fmt,ap);
1289 // in debugmode==2 mode we print output from knowndrives.o functions
1290 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1291 openlog("smartd", LOG_PID, facility);
1292 vsyslog_lines(LOG_INFO, fmt, ap);
1293 closelog();
1294 }
1295 va_end(ap);
1296 fflush(NULL);
1297 return;
1298 }
1299
1300 // This function prints either to stdout or to the syslog as needed.
1301 static void PrintOut(int priority, const char *fmt, ...){
1302 va_list ap;
1303
1304 // get the correct time in syslog()
1305 FixGlibcTimeZoneBug();
1306 // initialize variable argument list
1307 va_start(ap,fmt);
1308 if (debugmode)
1309 #ifdef _WIN32
1310 if (facility == LOG_LOCAL1) // logging to stdout
1311 vfprintf(stderr,fmt,ap);
1312 else
1313 #endif
1314 vprintf(fmt,ap);
1315 else {
1316 openlog("smartd", LOG_PID, facility);
1317 vsyslog_lines(priority, fmt, ap);
1318 closelog();
1319 }
1320 va_end(ap);
1321 return;
1322 }
1323
1324 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1325 void checksumwarning(const char * string)
1326 {
1327 pout("Warning! %s error: invalid SMART checksum.\n", string);
1328 }
1329
1330 #ifndef _WIN32
1331
1332 // Wait for the pid file to show up, this makes sure a calling program knows
1333 // that the daemon is really up and running and has a pid to kill it
1334 static bool WaitForPidFile()
1335 {
1336 int waited, max_wait = 10;
1337 struct stat stat_buf;
1338
1339 if (pid_file.empty() || debugmode)
1340 return true;
1341
1342 for(waited = 0; waited < max_wait; ++waited) {
1343 if (!stat(pid_file.c_str(), &stat_buf)) {
1344 return true;
1345 } else
1346 sleep(1);
1347 }
1348 return false;
1349 }
1350
1351 #endif // _WIN32
1352
1353 // Forks new process, closes ALL file descriptors, redirects stdin,
1354 // stdout, and stderr. Not quite daemon(). See
1355 // http://www.linuxjournal.com/article/2335
1356 // for a good description of why we do things this way.
1357 static void DaemonInit()
1358 {
1359 #ifndef _WIN32
1360 pid_t pid;
1361 int i;
1362
1363 // flush all buffered streams. Else we might get two copies of open
1364 // streams since both parent and child get copies of the buffers.
1365 fflush(NULL);
1366
1367 if (do_fork) {
1368 if ((pid=fork()) < 0) {
1369 // unable to fork!
1370 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1371 EXIT(EXIT_STARTUP);
1372 }
1373 else if (pid) {
1374 // we are the parent process, wait for pid file, then exit cleanly
1375 if(!WaitForPidFile()) {
1376 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1377 EXIT(EXIT_STARTUP);
1378 } else
1379 EXIT(0);
1380 }
1381
1382 // from here on, we are the child process.
1383 setsid();
1384
1385 // Fork one more time to avoid any possibility of having terminals
1386 if ((pid=fork()) < 0) {
1387 // unable to fork!
1388 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1389 EXIT(EXIT_STARTUP);
1390 }
1391 else if (pid)
1392 // we are the parent process -- exit cleanly
1393 EXIT(0);
1394
1395 // Now we are the child's child...
1396 }
1397
1398 // close any open file descriptors
1399 for (i=getdtablesize();i>=0;--i)
1400 close(i);
1401
1402 #ifdef __CYGWIN__
1403 // Cygwin's setsid() does not detach the process from Windows console
1404 FreeConsole();
1405 #endif // __CYGWIN__
1406
1407 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1408
1409 // redirect any IO attempts to /dev/null for stdin
1410 i=open("/dev/null",O_RDWR);
1411 if (i>=0) {
1412 // stdout
1413 NO_warn_unused_result(dup(i));
1414 // stderr
1415 NO_warn_unused_result(dup(i));
1416 };
1417 umask(0022);
1418 NO_warn_unused_result(chdir("/"));
1419
1420 if (do_fork)
1421 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1422
1423 #else // _WIN32
1424
1425 // No fork() on native Win32
1426 // Detach this process from console
1427 fflush(NULL);
1428 if (daemon_detach("smartd")) {
1429 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1430 EXIT(EXIT_STARTUP);
1431 }
1432 // stdin/out/err now closed if not redirected
1433
1434 #endif // _WIN32
1435 return;
1436 }
1437
1438 // create a PID file containing the current process id
1439 static void WritePidFile()
1440 {
1441 if (!pid_file.empty()) {
1442 pid_t pid = getpid();
1443 mode_t old_umask;
1444 #ifndef __CYGWIN__
1445 old_umask = umask(0077); // rwx------
1446 #else
1447 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1448 old_umask = umask(0033); // rwxr--r--
1449 #endif
1450
1451 stdio_file f(pid_file.c_str(), "w");
1452 umask(old_umask);
1453 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1454 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1455 EXIT(EXIT_PID);
1456 }
1457 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1458 }
1459 }
1460
1461 // Prints header identifying version of code and home
1462 static void PrintHead()
1463 {
1464 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1465 }
1466
1467 // prints help info for configuration file Directives
1468 static void Directives()
1469 {
1470 PrintOut(LOG_INFO,
1471 "Configuration file (%s) Directives (after device name):\n"
1472 " -d TYPE Set the device type: %s, auto, removable\n"
1473 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1474 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1475 " -S VAL Enable/disable attribute autosave (on/off)\n"
1476 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1477 " -H Monitor SMART Health Status, report if failed\n"
1478 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1479 " -l TYPE Monitor SMART log. Type is one of: error, selftest, xerror\n"
1480 " -l scterc,R,W Set SCT Error Recovery Control\n"
1481 " -f Monitor 'Usage' Attributes, report failures\n"
1482 " -m ADD Send email warning to address ADD\n"
1483 " -M TYPE Modify email warning behavior (see man page)\n"
1484 " -p Report changes in 'Prefailure' Attributes\n"
1485 " -u Report changes in 'Usage' Attributes\n"
1486 " -t Equivalent to -p and -u Directives\n"
1487 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1488 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1489 " -i ID Ignore Attribute ID for -f Directive\n"
1490 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1491 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1492 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1493 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1494 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1495 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1496 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1497 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1498 " # Comment: text after a hash sign is ignored\n"
1499 " \\ Line continuation character\n"
1500 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1501 "Use ID = 0 to turn off -C and/or -U Directives\n"
1502 "Example: /dev/hda -a\n",
1503 configfile, smi()->get_valid_dev_types_str().c_str());
1504 return;
1505 }
1506
1507 /* Returns a pointer to a static string containing a formatted list of the valid
1508 arguments to the option opt or NULL on failure. */
1509 static const char *GetValidArgList(char opt)
1510 {
1511 switch (opt) {
1512 case 'A':
1513 case 's':
1514 return "<PATH_PREFIX>";
1515 case 'c':
1516 return "<FILE_NAME>, -";
1517 case 'l':
1518 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1519 case 'q':
1520 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1521 case 'r':
1522 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1523 case 'B':
1524 case 'p':
1525 return "<FILE_NAME>";
1526 case 'i':
1527 return "<INTEGER_SECONDS>";
1528 default:
1529 return NULL;
1530 }
1531 }
1532
1533 /* prints help information for command syntax */
1534 static void Usage()
1535 {
1536 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1537 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1538 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1539 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1540 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1541 #endif
1542 PrintOut(LOG_INFO,"\n");
1543 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1544 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1545 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1546 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1547 PrintOut(LOG_INFO,"\n");
1548 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1549 #endif
1550 PrintOut(LOG_INFO,"]\n\n");
1551 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1552 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1553 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1554 #ifdef HAVE_LIBCAP_NG
1555 PrintOut(LOG_INFO," -C, --capabilities\n");
1556 PrintOut(LOG_INFO," Use capabilities (EXPERIMENTAL).\n"
1557 " Warning: Mail notification does not work when used.\n\n");
1558 #endif
1559 PrintOut(LOG_INFO," -d, --debug\n");
1560 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1561 PrintOut(LOG_INFO," -D, --showdirectives\n");
1562 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1563 PrintOut(LOG_INFO," -h, --help, --usage\n");
1564 PrintOut(LOG_INFO," Display this help and exit\n\n");
1565 PrintOut(LOG_INFO," -i N, --interval=N\n");
1566 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1567 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1568 #ifndef _WIN32
1569 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1570 #else
1571 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1572 #endif
1573 #ifndef _WIN32
1574 PrintOut(LOG_INFO," -n, --no-fork\n");
1575 PrintOut(LOG_INFO," Do not fork into background\n\n");
1576 #endif // _WIN32
1577 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1578 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1579 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1580 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1581 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1582 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1583 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1584 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1585 #ifdef SMARTMONTOOLS_SAVESTATES
1586 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1587 #endif
1588 PrintOut(LOG_INFO,"\n");
1589 #ifdef _WIN32
1590 PrintOut(LOG_INFO," --service\n");
1591 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1592 PrintOut(LOG_INFO," smartd install [options]\n");
1593 PrintOut(LOG_INFO," Remove service with:\n");
1594 PrintOut(LOG_INFO," smartd remove\n\n");
1595 #endif // _WIN32
1596 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1597 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1598 }
1599
1600 static int CloseDevice(smart_device * device, const char * name)
1601 {
1602 if (!device->close()){
1603 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1604 return 1;
1605 }
1606 // device sucessfully closed
1607 return 0;
1608 }
1609
1610 // return true if a char is not allowed in a state file name
1611 static bool not_allowed_in_filename(char c)
1612 {
1613 return !( ('0' <= c && c <= '9')
1614 || ('A' <= c && c <= 'Z')
1615 || ('a' <= c && c <= 'z'));
1616 }
1617
1618 // Read error count from Summary or Extended Comprehensive SMART error log
1619 // Return -1 on error
1620 static int read_ata_error_count(ata_device * device, const char * name,
1621 unsigned char fix_firmwarebug, bool extended)
1622 {
1623 if (!extended) {
1624 ata_smart_errorlog log;
1625 if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1626 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1627 return -1;
1628 }
1629 return (log.error_log_pointer ? log.ata_error_count : 0);
1630 }
1631 else {
1632 ata_smart_exterrlog logx;
1633 if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/)) {
1634 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1635 return -1;
1636 }
1637 // Some disks use the reserved byte as index, see ataprint.cpp.
1638 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1639 }
1640 }
1641
1642 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1643 // error count, and top bits are the power-on hours of the last error.
1644 static int SelfTestErrorCount(ata_device * device, const char * name,
1645 unsigned char fix_firmwarebug)
1646 {
1647 struct ata_smart_selftestlog log;
1648
1649 if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1650 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1651 return -1;
1652 }
1653
1654 // return current number of self-test errors
1655 return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1656 }
1657
1658 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1659 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1660
1661 // Log offline data collection status
1662 static void log_offline_data_coll_status(const char * name, unsigned char status)
1663 {
1664 const char * msg;
1665 switch (status & 0x7f) {
1666 case 0x00: msg = "was never started"; break;
1667 case 0x02: msg = "was completed without error"; break;
1668 case 0x03: msg = (status == 0x03 ? "is in progress" : 0); break;
1669 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1670 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1671 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1672 default: msg = 0;
1673 }
1674
1675 if (msg)
1676 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1677 "Device: %s, offline data collection %s%s\n", name, msg,
1678 ((status & 0x80) ? " (auto:on)" : ""));
1679 else
1680 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1681 name, status);
1682 }
1683
1684 // Log self-test execution status
1685 static void log_self_test_exec_status(const char * name, unsigned char status)
1686 {
1687 const char * msg;
1688 switch (status >> 4) {
1689 case 0x0: msg = "completed without error"; break;
1690 case 0x1: msg = "was aborted by the host"; break;
1691 case 0x2: msg = "was interrupted by the host with a reset"; break;
1692 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1693 case 0x4: msg = "completed with error (unknown test element)"; break;
1694 case 0x5: msg = "completed with error (electrical test element)"; break;
1695 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1696 case 0x7: msg = "completed with error (read test element)"; break;
1697 case 0x8: msg = "completed with error (handling damage?)"; break;
1698 default: msg = 0;
1699 }
1700
1701 if (msg)
1702 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1703 "Device: %s, previous self-test %s\n", name, msg);
1704 else if ((status >> 4) == 0xf)
1705 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1706 name, status & 0x0f);
1707 else
1708 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1709 name, status);
1710 }
1711
1712 // Check pending sector count id (-C, -U directives).
1713 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1714 unsigned char id, const char * msg)
1715 {
1716 // Check attribute index
1717 int i = ata_find_attr_index(id, state.smartval);
1718 if (i < 0) {
1719 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1720 cfg.name.c_str(), msg, id);
1721 return false;
1722 }
1723
1724 // Check value
1725 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1726 cfg.attribute_defs);
1727 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1728 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1729 cfg.name.c_str(), msg, id, rawval, rawval);
1730 return false;
1731 }
1732
1733 return true;
1734 }
1735
1736 // Called by ATA/SCSIDeviceScan() after successful device check
1737 static void finish_device_scan(dev_config & cfg, dev_state & state)
1738 {
1739 // Set cfg.emailfreq if user hasn't set it
1740 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1741 // Avoid that emails are suppressed forever due to state persistence
1742 if (cfg.state_file.empty())
1743 cfg.emailfreq = 1; // '-M once'
1744 else
1745 cfg.emailfreq = 2; // '-M daily'
1746 }
1747
1748 // Start self-test regex check now if time was not read from state file
1749 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1750 state.scheduled_test_next_check = time(0);
1751 }
1752
1753
1754 // TODO: Add '-F swapid' directive
1755 const bool fix_swapped_id = false;
1756
1757 // scan to see what ata devices there are, and if they support SMART
1758 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1759 {
1760 int supported=0;
1761 struct ata_identify_device drive;
1762 const char *name = cfg.name.c_str();
1763 int retid;
1764
1765 // Device must be open
1766
1767 // Get drive identity structure
1768 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1769 if (retid<0)
1770 // Unable to read Identity structure
1771 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1772 else
1773 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1774 name, packetdevicetype(retid-1));
1775 CloseDevice(atadev, name);
1776 return 2;
1777 }
1778
1779 // Log drive identity and size
1780 char model[40+1], serial[20+1], firmware[8+1];
1781 ata_format_id_string(model, drive.model, sizeof(model)-1);
1782 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1783 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1784
1785 ata_size_info sizes;
1786 ata_get_size_info(&drive, sizes);
1787 state.num_sectors = sizes.sectors;
1788
1789 char wwn[30]; wwn[0] = 0;
1790 unsigned oui = 0; uint64_t unique_id = 0;
1791 int naa = ata_get_wwn(&drive, oui, unique_id);
1792 if (naa >= 0)
1793 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09"PRIx64", ", naa, oui, unique_id);
1794
1795 char cap[32];
1796 PrintOut(LOG_INFO, "Device: %s, %s, S/N:%s, %sFW:%s, %s\n", name,
1797 model, serial, wwn, firmware,
1798 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1799
1800 // Show if device in database, and use preset vendor attribute
1801 // options unless user has requested otherwise.
1802 if (cfg.ignorepresets)
1803 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1804 else {
1805 // Apply vendor specific presets, print warning if present
1806 const drive_settings * dbentry = lookup_drive_apply_presets(
1807 &drive, cfg.attribute_defs, cfg.fix_firmwarebug);
1808 if (!dbentry)
1809 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1810 else {
1811 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1812 if (*dbentry->warningmsg)
1813 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1814 }
1815 }
1816
1817 // Set default '-C 197[+]' if no '-C ID' is specified.
1818 if (!cfg.curr_pending_set)
1819 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1820 // Set default '-U 198[+]' if no '-U ID' is specified.
1821 if (!cfg.offl_pending_set)
1822 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1823
1824 // If requested, show which presets would be used for this drive
1825 if (cfg.showpresets) {
1826 int savedebugmode=debugmode;
1827 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1828 if (!debugmode)
1829 debugmode=2;
1830 show_presets(&drive);
1831 debugmode=savedebugmode;
1832 }
1833
1834 // see if drive supports SMART
1835 supported=ataSmartSupport(&drive);
1836 if (supported!=1) {
1837 if (supported==0)
1838 // drive does NOT support SMART
1839 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1840 else
1841 // can't tell if drive supports SMART
1842 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1843
1844 // should we proceed anyway?
1845 if (cfg.permissive) {
1846 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1847 }
1848 else {
1849 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1850 CloseDevice(atadev, name);
1851 return 2;
1852 }
1853 }
1854
1855 if (ataEnableSmart(atadev)) {
1856 // Enable SMART command has failed
1857 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1858 CloseDevice(atadev, name);
1859 return 2;
1860 }
1861
1862 // disable device attribute autosave...
1863 if (cfg.autosave==1) {
1864 if (ataDisableAutoSave(atadev))
1865 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1866 else
1867 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1868 }
1869
1870 // or enable device attribute autosave
1871 if (cfg.autosave==2) {
1872 if (ataEnableAutoSave(atadev))
1873 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1874 else
1875 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1876 }
1877
1878 // capability check: SMART status
1879 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1880 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1881 cfg.smartcheck = false;
1882 }
1883
1884 // capability check: Read smart values and thresholds. Note that
1885 // smart values are ALSO needed even if we ONLY want to know if the
1886 // device is self-test log or error-log capable! After ATA-5, this
1887 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1888 // but sadly not for ATA-5. Sigh.
1889
1890 // do we need to get SMART data?
1891 bool smart_val_ok = false;
1892 if ( cfg.autoofflinetest || cfg.selftest
1893 || cfg.errorlog || cfg.xerrorlog
1894 || cfg.usagefailed || cfg.prefail || cfg.usage
1895 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1896 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1897
1898 if (ataReadSmartValues(atadev, &state.smartval)) {
1899 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1900 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1901 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1902 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1903 }
1904 else {
1905 smart_val_ok = true;
1906 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1907 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1908 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1909 cfg.usagefailed = false;
1910 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1911 memset(&state.smartthres, 0, sizeof(state.smartthres));
1912 }
1913 }
1914
1915 // see if the necessary Attribute is there to monitor offline or
1916 // current pending sectors or temperature
1917 if ( cfg.curr_pending_id
1918 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1919 "Current_Pending_Sector"))
1920 cfg.curr_pending_id = 0;
1921
1922 if ( cfg.offl_pending_id
1923 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1924 "Offline_Uncorrectable"))
1925 cfg.offl_pending_id = 0;
1926
1927 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1928 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1929 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1930 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1931 }
1932 }
1933
1934 // enable/disable automatic on-line testing
1935 if (cfg.autoofflinetest) {
1936 // is this an enable or disable request?
1937 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1938 if (!smart_val_ok)
1939 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1940 else {
1941 // if command appears unsupported, issue a warning...
1942 if (!isSupportAutomaticTimer(&state.smartval))
1943 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1944 // ... but then try anyway
1945 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1946 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1947 else
1948 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1949 }
1950 }
1951
1952 // Read log directories if required for capability check
1953 ata_smart_log_directory smart_logdir, gp_logdir;
1954 bool smart_logdir_ok = false, gp_logdir_ok = false;
1955
1956 if ( isGeneralPurposeLoggingCapable(&drive)
1957 && (cfg.errorlog || cfg.selftest) ) {
1958 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1959 smart_logdir_ok = true;
1960 }
1961
1962 if (cfg.xerrorlog) {
1963 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1964 gp_logdir_ok = true;
1965 }
1966
1967 // capability check: self-test-log
1968 state.selflogcount = 0; state.selfloghour = 0;
1969 if (cfg.selftest) {
1970 int retval;
1971 if (!( cfg.permissive
1972 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
1973 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
1974 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
1975 cfg.selftest = false;
1976 }
1977 else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0) {
1978 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
1979 cfg.selftest = false;
1980 }
1981 else {
1982 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1983 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1984 }
1985 }
1986
1987 // capability check: ATA error log
1988 state.ataerrorcount = 0;
1989 if (cfg.errorlog) {
1990 int errcnt1;
1991 if (!( cfg.permissive
1992 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
1993 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
1994 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
1995 cfg.errorlog = false;
1996 }
1997 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false)) < 0) {
1998 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
1999 cfg.errorlog = false;
2000 }
2001 else
2002 state.ataerrorcount = errcnt1;
2003 }
2004
2005 if (cfg.xerrorlog) {
2006 int errcnt2;
2007 if (!(cfg.permissive || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors))) {
2008 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2009 name);
2010 cfg.xerrorlog = false;
2011 }
2012 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true)) < 0) {
2013 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2014 cfg.xerrorlog = false;
2015 }
2016 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2017 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2018 name, state.ataerrorcount, errcnt2);
2019 // Record max error count
2020 if (errcnt2 > state.ataerrorcount)
2021 state.ataerrorcount = errcnt2;
2022 }
2023 else
2024 state.ataerrorcount = errcnt2;
2025 }
2026
2027 // capabilities check -- does it support powermode?
2028 if (cfg.powermode) {
2029 int powermode = ataCheckPowerMode(atadev);
2030
2031 if (-1 == powermode) {
2032 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2033 cfg.powermode=0;
2034 }
2035 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2036 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2037 name, powermode);
2038 cfg.powermode=0;
2039 }
2040 }
2041
2042 // set SCT Error Recovery Control if requested
2043 if (cfg.sct_erc_set) {
2044 if (!isSCTErrorRecoveryControlCapable(&drive))
2045 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2046 name);
2047 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2048 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2049 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2050 else
2051 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2052 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2053 }
2054
2055 // If no tests available or selected, return
2056 if (!( cfg.smartcheck || cfg.selftest
2057 || cfg.errorlog || cfg.xerrorlog
2058 || cfg.usagefailed || cfg.prefail || cfg.usage
2059 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2060 CloseDevice(atadev, name);
2061 return 3;
2062 }
2063
2064 // tell user we are registering device
2065 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2066
2067 // close file descriptor
2068 CloseDevice(atadev, name);
2069
2070 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2071 // Build file name for state file
2072 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2073 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2074 if (!state_path_prefix.empty()) {
2075 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2076 // Read previous state
2077 if (read_dev_state(cfg.state_file.c_str(), state)) {
2078 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2079 // Copy ATA attribute values to temp state
2080 state.update_temp_state();
2081 }
2082 }
2083 if (!attrlog_path_prefix.empty())
2084 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2085 }
2086
2087 finish_device_scan(cfg, state);
2088
2089 return 0;
2090 }
2091
2092 // on success, return 0. On failure, return >0. Never return <0,
2093 // please.
2094 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2095 {
2096 int k, err;
2097 const char *device = cfg.name.c_str();
2098 struct scsi_iec_mode_page iec;
2099 UINT8 tBuf[64];
2100
2101 // Device must be open
2102
2103 // check that device is ready for commands. IE stores its stuff on
2104 // the media.
2105 if ((err = scsiTestUnitReady(scsidev))) {
2106 if (SIMPLE_ERR_NOT_READY == err)
2107 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2108 else if (SIMPLE_ERR_NO_MEDIUM == err)
2109 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2110 else if (SIMPLE_ERR_BECOMING_READY == err)
2111 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2112 else
2113 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2114 CloseDevice(scsidev, device);
2115 return 2;
2116 }
2117
2118 // Badly-conforming USB storage devices may fail this check.
2119 // The response to the following IE mode page fetch (current and
2120 // changeable values) is carefully examined. It has been found
2121 // that various USB devices that malform the response will lock up
2122 // if asked for a log page (e.g. temperature) so it is best to
2123 // bail out now.
2124 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2125 state.modese_len = iec.modese_len;
2126 else if (SIMPLE_ERR_BAD_FIELD == err)
2127 ; /* continue since it is reasonable not to support IE mpage */
2128 else { /* any other error (including malformed response) unreasonable */
2129 PrintOut(LOG_INFO,
2130 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2131 device, err);
2132 CloseDevice(scsidev, device);
2133 return 3;
2134 }
2135
2136 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2137 // smart if it is off). This may change to be the same as the ATA side.
2138 if (!scsi_IsExceptionControlEnabled(&iec)) {
2139 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2140 "Try 'smartctl -s on %s' to turn on SMART features\n",
2141 device, device);
2142 CloseDevice(scsidev, device);
2143 return 3;
2144 }
2145
2146 // Flag that certain log pages are supported (information may be
2147 // available from other sources).
2148 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2149 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2150 switch (tBuf[k]) {
2151 case TEMPERATURE_LPAGE:
2152 state.TempPageSupported = 1;
2153 break;
2154 case IE_LPAGE:
2155 state.SmartPageSupported = 1;
2156 break;
2157 default:
2158 break;
2159 }
2160 }
2161 }
2162
2163 // Check if scsiCheckIE() is going to work
2164 {
2165 UINT8 asc = 0;
2166 UINT8 ascq = 0;
2167 UINT8 currenttemp = 0;
2168 UINT8 triptemp = 0;
2169
2170 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2171 &asc, &ascq, &currenttemp, &triptemp)) {
2172 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2173 state.SuppressReport = 1;
2174 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2175 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
2176 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2177 }
2178 }
2179 }
2180
2181 // capability check: self-test-log
2182 if (cfg.selftest){
2183 int retval = scsiCountFailedSelfTests(scsidev, 0);
2184 if (retval<0) {
2185 // no self-test log, turn off monitoring
2186 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2187 cfg.selftest = false;
2188 state.selflogcount = 0;
2189 state.selfloghour = 0;
2190 }
2191 else {
2192 // register starting values to watch for changes
2193 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2194 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2195 }
2196 }
2197
2198 // disable autosave (set GLTSD bit)
2199 if (cfg.autosave==1){
2200 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2201 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2202 else
2203 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2204 }
2205
2206 // or enable autosave (clear GLTSD bit)
2207 if (cfg.autosave==2){
2208 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2209 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2210 else
2211 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2212 }
2213
2214 // tell user we are registering device
2215 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2216
2217 // TODO: Build file name for state file
2218 if (!state_path_prefix.empty()) {
2219 PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
2220 }
2221 // TODO: Build file name for attribute log file
2222 if (!attrlog_path_prefix.empty()) {
2223 PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
2224 }
2225
2226 // close file descriptor
2227 CloseDevice(scsidev, device);
2228
2229 finish_device_scan(cfg, state);
2230
2231 return 0;
2232 }
2233
2234 // If the self-test log has got more self-test errors (or more recent
2235 // self-test errors) recorded, then notify user.
2236 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2237 {
2238 const char * name = cfg.name.c_str();
2239
2240 if (newi<0)
2241 // command failed
2242 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2243 else {
2244 // old and new error counts
2245 int oldc=state.selflogcount;
2246 int newc=SELFTEST_ERRORCOUNT(newi);
2247
2248 // old and new error timestamps in hours
2249 int oldh=state.selfloghour;
2250 int newh=SELFTEST_ERRORHOURS(newi);
2251
2252 if (oldc<newc) {
2253 // increase in error count
2254 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2255 name, oldc, newc);
2256 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2257 name, oldc, newc);
2258 state.must_write = true;
2259 }
2260 else if (newc > 0 && oldh != newh) {
2261 // more recent error
2262 // a 'more recent' error might actually be a smaller hour number,
2263 // if the hour number has wrapped.
2264 // There's still a bug here. You might just happen to run a new test
2265 // exactly 32768 hours after the previous failure, and have run exactly
2266 // 20 tests between the two, in which case smartd will miss the
2267 // new failure.
2268 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2269 name, newh);
2270 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2271 name, newh);
2272 state.must_write = true;
2273 }
2274
2275 // Print info if error entries have disappeared
2276 if (oldc > newc)
2277 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2278 name, oldc, newc);
2279
2280 // Needed since self-test error count may DECREASE. Hour might
2281 // also have changed.
2282 state.selflogcount= newc;
2283 state.selfloghour = newh;
2284 }
2285 return;
2286 }
2287
2288 // Test types, ordered by priority.
2289 static const char test_type_chars[] = "LncrSCO";
2290 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2291
2292 // returns test type if time to do test of type testtype,
2293 // 0 if not time to do test.
2294 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2295 {
2296 // check that self-testing has been requested
2297 if (cfg.test_regex.empty())
2298 return 0;
2299
2300 // Exit if drive not capable of any test
2301 if ( state.not_cap_long && state.not_cap_short &&
2302 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2303 return 0;
2304
2305 // since we are about to call localtime(), be sure glibc is informed
2306 // of any timezone changes we make.
2307 if (!usetime)
2308 FixGlibcTimeZoneBug();
2309
2310 // Is it time for next check?
2311 time_t now = (!usetime ? time(0) : usetime);
2312 if (now < state.scheduled_test_next_check)
2313 return 0;
2314
2315 // Limit time check interval to 90 days
2316 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2317 state.scheduled_test_next_check = now - (3600L*24*90);
2318
2319 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2320 char testtype = 0;
2321 time_t testtime = 0; int testhour = 0;
2322 int maxtest = num_test_types-1;
2323
2324 for (time_t t = state.scheduled_test_next_check; ; ) {
2325 struct tm * tms = localtime(&t);
2326 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2327 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2328 for (int i = 0; i <= maxtest; i++) {
2329 // Skip if drive not capable of this test
2330 switch (test_type_chars[i]) {
2331 case 'L': if (state.not_cap_long) continue; break;
2332 case 'S': if (state.not_cap_short) continue; break;
2333 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2334 case 'O': if (scsi || state.not_cap_offline) continue; break;
2335 case 'c': case 'n':
2336 case 'r': if (scsi || state.not_cap_selective) continue; break;
2337 default: continue;
2338 }
2339 // Try match of "T/MM/DD/d/HH"
2340 char pattern[16];
2341 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2342 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2343 if (cfg.test_regex.full_match(pattern)) {
2344 // Test found
2345 testtype = pattern[0];
2346 testtime = t; testhour = tms->tm_hour;
2347 // Limit further matches to higher priority self-tests
2348 maxtest = i-1;
2349 break;
2350 }
2351 }
2352 // Exit if no tests left or current time reached
2353 if (maxtest < 0)
2354 break;
2355 if (t >= now)
2356 break;
2357 // Check next hour
2358 if ((t += 3600) > now)
2359 t = now;
2360 }
2361
2362 // Do next check not before next hour.
2363 struct tm * tmnow = localtime(&now);
2364 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2365
2366 if (testtype) {
2367 state.must_write = true;
2368 // Tell user if an old test was found.
2369 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2370 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2371 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2372 cfg.name.c_str(), testtype, datebuf);
2373 }
2374 }
2375
2376 return testtype;
2377 }
2378
2379 // Print a list of future tests.
2380 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2381 {
2382 unsigned numdev = configs.size();
2383 if (!numdev)
2384 return;
2385 std::vector<int> testcnts(numdev * num_test_types, 0);
2386
2387 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2388
2389 // FixGlibcTimeZoneBug(); // done in PrintOut()
2390 time_t now = time(0);
2391 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2392 dateandtimezoneepoch(datenow, now);
2393
2394 long seconds;
2395 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2396 // Check for each device whether a test will be run
2397 time_t testtime = now + seconds;
2398 for (unsigned i = 0; i < numdev; i++) {
2399 const dev_config & cfg = configs.at(i);
2400 dev_state & state = states.at(i);
2401 const char * p;
2402 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2403 if (testtype && (p = strchr(test_type_chars, testtype))) {
2404 unsigned t = (p - test_type_chars);
2405 // Report at most 5 tests of each type
2406 if (++testcnts[i*num_test_types + t] <= 5) {
2407 dateandtimezoneepoch(date, testtime);
2408 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2409 testcnts[i*num_test_types + t], testtype, date);
2410 }
2411 }
2412 }
2413 }
2414
2415 // Report totals
2416 dateandtimezoneepoch(date, now+seconds);
2417 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2418 for (unsigned i = 0; i < numdev; i++) {
2419 const dev_config & cfg = configs.at(i);
2420 bool scsi = devices.at(i)->is_scsi();
2421 for (unsigned t = 0; t < num_test_types; t++) {
2422 int cnt = testcnts[i*num_test_types + t];
2423 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2424 continue;
2425 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2426 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2427 }
2428 }
2429
2430 }
2431
2432 // Return zero on success, nonzero on failure. Perform offline (background)
2433 // short or long (extended) self test on given scsi device.
2434 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2435 {
2436 int retval = 0;
2437 const char *testname = 0;
2438 const char *name = cfg.name.c_str();
2439 int inProgress;
2440
2441 if (scsiSelfTestInProgress(device, &inProgress)) {
2442 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2443 state.not_cap_short = state.not_cap_long = true;
2444 return 1;
2445 }
2446
2447 if (1 == inProgress) {
2448 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2449 "progress.\n", name);
2450 return 1;
2451 }
2452
2453 switch (testtype) {
2454 case 'S':
2455 testname = "Short Self";
2456 retval = scsiSmartShortSelfTest(device);
2457 break;
2458 case 'L':
2459 testname = "Long Self";
2460 retval = scsiSmartExtendSelfTest(device);
2461 break;
2462 }
2463 // If we can't do the test, exit
2464 if (NULL == testname) {
2465 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2466 testtype);
2467 return 1;
2468 }
2469 if (retval) {
2470 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2471 (SIMPLE_ERR_BAD_FIELD == retval)) {
2472 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2473 testname);
2474 if ('L'==testtype)
2475 state.not_cap_long = true;
2476 else
2477 state.not_cap_short = true;
2478
2479 return 1;
2480 }
2481 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2482 testname, retval);
2483 return 1;
2484 }
2485
2486 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2487
2488 return 0;
2489 }
2490
2491 // Do an offline immediate or self-test. Return zero on success,
2492 // nonzero on failure.
2493 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2494 {
2495 const char *name = cfg.name.c_str();
2496
2497 // Read current smart data and check status/capability
2498 struct ata_smart_values data;
2499 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2500 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2501 return 1;
2502 }
2503
2504 // Check for capability to do the test
2505 int dotest = -1, mode = 0;
2506 const char *testname = 0;
2507 switch (testtype) {
2508 case 'O':
2509 testname="Offline Immediate ";
2510 if (isSupportExecuteOfflineImmediate(&data))
2511 dotest=OFFLINE_FULL_SCAN;
2512 else
2513 state.not_cap_offline = true;
2514 break;
2515 case 'C':
2516 testname="Conveyance Self-";
2517 if (isSupportConveyanceSelfTest(&data))
2518 dotest=CONVEYANCE_SELF_TEST;
2519 else
2520 state.not_cap_conveyance = true;
2521 break;
2522 case 'S':
2523 testname="Short Self-";
2524 if (isSupportSelfTest(&data))
2525 dotest=SHORT_SELF_TEST;
2526 else
2527 state.not_cap_short = true;
2528 break;
2529 case 'L':
2530 testname="Long Self-";
2531 if (isSupportSelfTest(&data))
2532 dotest=EXTEND_SELF_TEST;
2533 else
2534 state.not_cap_long = true;
2535 break;
2536
2537 case 'c': case 'n': case 'r':
2538 testname = "Selective Self-";
2539 if (isSupportSelectiveSelfTest(&data)) {
2540 dotest = SELECTIVE_SELF_TEST;
2541 switch (testtype) {
2542 case 'c': mode = SEL_CONT; break;
2543 case 'n': mode = SEL_NEXT; break;
2544 case 'r': mode = SEL_REDO; break;
2545 }
2546 }
2547 else
2548 state.not_cap_selective = true;
2549 break;
2550 }
2551
2552 // If we can't do the test, exit
2553 if (dotest<0) {
2554 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2555 return 1;
2556 }
2557
2558 // If currently running a self-test, do not interrupt it to start another.
2559 if (15==(data.self_test_exec_status >> 4)) {
2560 if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2561 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2562 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2563 } else {
2564 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2565 name, testname, (int)(data.self_test_exec_status & 0x0f));
2566 return 1;
2567 }
2568 }
2569
2570 if (dotest == SELECTIVE_SELF_TEST) {
2571 // Set test span
2572 ata_selective_selftest_args selargs, prev_args;
2573 selargs.num_spans = 1;
2574 selargs.span[0].mode = mode;
2575 prev_args.num_spans = 1;
2576 prev_args.span[0].start = state.selective_test_last_start;
2577 prev_args.span[0].end = state.selective_test_last_end;
2578 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2579 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2580 return 1;
2581 }
2582 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2583 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2584 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2585 start, end, end - start + 1,
2586 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2587 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2588 state.selective_test_last_start = start;
2589 state.selective_test_last_end = end;
2590 }
2591
2592 // execute the test, and return status
2593 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2594 if (retval) {
2595 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2596 return retval;
2597 }
2598
2599 if (testtype != 'O')
2600 // Log next self-test execution status
2601 state.smartval.self_test_exec_status = 0xff;
2602
2603 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2604 return 0;
2605 }
2606
2607 // Check pending sector count attribute values (-C, -U directives).
2608 static void check_pending(const dev_config & cfg, dev_state & state,
2609 unsigned char id, bool increase_only,
2610 const ata_smart_values & smartval,
2611 int mailtype, const char * msg)
2612 {
2613 // Find attribute index
2614 int i = ata_find_attr_index(id, smartval);
2615 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2616 return;
2617
2618 // No report if no sectors pending.
2619 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2620 if (rawval == 0)
2621 return;
2622
2623 // If attribute is not reset, report only sector count increases.
2624 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2625 if (!(!increase_only || prev_rawval < rawval))
2626 return;
2627
2628 // Format message.
2629 std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2630 if (prev_rawval > 0 && rawval != prev_rawval)
2631 s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2632
2633 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2634 MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2635 state.must_write = true;
2636 }
2637
2638 // Format Temperature value
2639 static const char * fmt_temp(unsigned char x, char * buf)
2640 {
2641 if (!x) // unset
2642 strcpy(buf, "??");
2643 else
2644 sprintf(buf, "%u", x);
2645 return buf;
2646 }
2647
2648 // Check Temperature limits
2649 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2650 {
2651 if (!(0 < currtemp && currtemp < 255)) {
2652 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2653 return;
2654 }
2655
2656 // Update Max Temperature
2657 const char * minchg = "", * maxchg = "";
2658 if (currtemp > state.tempmax) {
2659 if (state.tempmax)
2660 maxchg = "!";
2661 state.tempmax = currtemp;
2662 state.must_write = true;
2663 }
2664
2665 char buf[20];
2666 if (!state.temperature) {
2667 // First check
2668 if (!state.tempmin || currtemp < state.tempmin)
2669 // Delay Min Temperature update by ~ 30 minutes.
2670 state.tempmin_delay = time(0) + CHECKTIME - 60;
2671 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2672 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2673 if (triptemp)
2674 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2675 state.temperature = currtemp;
2676 }
2677 else {
2678 if (state.tempmin_delay) {
2679 // End Min Temperature update delay if ...
2680 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2681 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2682 state.tempmin_delay = 0;
2683 if (!state.tempmin)
2684 state.tempmin = 255;
2685 }
2686 }
2687
2688 // Update Min Temperature
2689 if (!state.tempmin_delay && currtemp < state.tempmin) {
2690 state.tempmin = currtemp;
2691 state.must_write = true;
2692 if (currtemp != state.temperature)
2693 minchg = "!";
2694 }
2695
2696 // Track changes
2697 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2698 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2699 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2700 state.temperature = currtemp;
2701 }
2702 }
2703
2704 // Check limits
2705 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2706 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2707 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2708 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2709 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2710 }
2711 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2712 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2713 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2714 }
2715 }
2716
2717 // Check normalized and raw attribute values.
2718 static void check_attribute(const dev_config & cfg, dev_state & state,
2719 const ata_smart_attribute & attr,
2720 const ata_smart_attribute & prev,
2721 int attridx,
2722 const ata_smart_threshold_entry * thresholds)
2723 {
2724 // Check attribute and threshold
2725 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2726 if (attrstate == ATTRSTATE_NON_EXISTING)
2727 return;
2728
2729 // If requested, check for usage attributes that have failed.
2730 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2731 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2732 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2733 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2734 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2735 state.must_write = true;
2736 }
2737
2738 // Return if we're not tracking this type of attribute
2739 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2740 if (!( ( prefail && cfg.prefail)
2741 || (!prefail && cfg.usage )))
2742 return;
2743
2744 // Return if '-I ID' was specified
2745 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2746 return;
2747
2748 // Issue warning if they don't have the same ID in all structures.
2749 if (attr.id != prev.id) {
2750 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2751 cfg.name.c_str(), attr.id, prev.id);
2752 return;
2753 }
2754
2755 // Compare normalized values if valid.
2756 bool valchanged = false;
2757 if (attrstate > ATTRSTATE_NO_NORMVAL) {
2758 if (attr.current != prev.current)
2759 valchanged = true;
2760 }
2761
2762 // Compare raw values if requested.
2763 bool rawchanged = false;
2764 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2765 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2766 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2767 rawchanged = true;
2768 }
2769
2770 // Return if no change
2771 if (!(valchanged || rawchanged))
2772 return;
2773
2774 // Format value strings
2775 std::string currstr, prevstr;
2776 if (attrstate == ATTRSTATE_NO_NORMVAL) {
2777 // Print raw values only
2778 currstr = strprintf("%s (Raw)",
2779 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2780 prevstr = strprintf("%s (Raw)",
2781 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2782 }
2783 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2784 // Print normalized and raw values
2785 currstr = strprintf("%d [Raw %s]", attr.current,
2786 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2787 prevstr = strprintf("%d [Raw %s]", prev.current,
2788 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2789 }
2790 else {
2791 // Print normalized values only
2792 currstr = strprintf("%d", attr.current);
2793 prevstr = strprintf("%d", prev.current);
2794 }
2795
2796 // Format message
2797 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2798 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2799 ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
2800 prevstr.c_str(), currstr.c_str());
2801
2802 // Report this change as critical ?
2803 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2804 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2805 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2806 MailWarning(cfg, state, 2, "%s", msg.c_str());
2807 }
2808 else {
2809 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2810 }
2811 state.must_write = true;
2812 }
2813
2814
2815 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
2816 bool firstpass, bool allow_selftests)
2817 {
2818 const char * name = cfg.name.c_str();
2819
2820 // If user has asked, test the email warning system
2821 if (cfg.emailtest)
2822 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2823
2824 // if we can't open device, fail gracefully rather than hard --
2825 // perhaps the next time around we'll be able to open it. ATAPI
2826 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2827 // given (see linux cdrom driver).
2828 if (!atadev->open()) {
2829 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2830 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2831 return 1;
2832 } else if (debugmode)
2833 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2834
2835 // user may have requested (with the -n Directive) to leave the disk
2836 // alone if it is in idle or sleeping mode. In this case check the
2837 // power mode and exit without check if needed
2838 if (cfg.powermode && !state.powermodefail) {
2839 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2840 const char * mode = 0;
2841 if (0 <= powermode && powermode < 0xff) {
2842 // wait for possible spin up and check again
2843 int powermode2;
2844 sleep(5);
2845 powermode2 = ataCheckPowerMode(atadev);
2846 if (powermode2 > powermode)
2847 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2848 powermode = powermode2;
2849 }
2850
2851 switch (powermode){
2852 case -1:
2853 // SLEEP
2854 mode="SLEEP";
2855 if (cfg.powermode>=1)
2856 dontcheck=1;
2857 break;
2858 case 0:
2859 // STANDBY
2860 mode="STANDBY";
2861 if (cfg.powermode>=2)
2862 dontcheck=1;
2863 break;
2864 case 0x80:
2865 // IDLE
2866 mode="IDLE";
2867 if (cfg.powermode>=3)
2868 dontcheck=1;
2869 break;
2870 case 0xff:
2871 // ACTIVE/IDLE
2872 mode="ACTIVE or IDLE";
2873 break;
2874 default:
2875 // UNKNOWN
2876 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2877 name, powermode);
2878 state.powermodefail = true;
2879 break;
2880 }
2881
2882 // if we are going to skip a check, return now
2883 if (dontcheck){
2884 // skip at most powerskipmax checks
2885 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2886 CloseDevice(atadev, name);
2887 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2888 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2889 state.powerskipcnt++;
2890 return 0;
2891 }
2892 else {
2893 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2894 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2895 }
2896 state.powerskipcnt = 0;
2897 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2898 }
2899 else if (state.powerskipcnt) {
2900 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2901 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2902 state.powerskipcnt = 0;
2903 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2904 }
2905 }
2906
2907 // check smart status
2908 if (cfg.smartcheck) {
2909 int status=ataSmartStatus2(atadev);
2910 if (status==-1){
2911 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2912 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
2913 state.must_write = true;
2914 }
2915 else if (status==1){
2916 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2917 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2918 state.must_write = true;
2919 }
2920 }
2921
2922 // Check everything that depends upon SMART Data (eg, Attribute values)
2923 if ( cfg.usagefailed || cfg.prefail || cfg.usage
2924 || cfg.curr_pending_id || cfg.offl_pending_id
2925 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || cfg.selftest) {
2926
2927 // Read current attribute values.
2928 ata_smart_values curval;
2929 if (ataReadSmartValues(atadev, &curval)){
2930 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2931 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
2932 state.must_write = true;
2933 }
2934 else {
2935 // look for current or offline pending sectors
2936 if (cfg.curr_pending_id)
2937 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
2938 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
2939 : "Total unreadable (pending) sectors" ));
2940
2941 if (cfg.offl_pending_id)
2942 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
2943 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
2944 : "Total offline uncorrectable sectors"));
2945
2946 // check temperature limits
2947 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2948 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
2949
2950 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
2951
2952 // look for failed usage attributes, or track usage or prefail attributes
2953 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
2954 check_attribute(cfg, state,
2955 curval.vendor_attributes[i],
2956 state.smartval.vendor_attributes[i],
2957 i, state.smartthres.thres_entries);
2958 }
2959
2960 if (cfg.selftest) {
2961 // Log changes of offline data collection and self-test execution status
2962 if ( curval.offline_data_collection_status
2963 != state.smartval.offline_data_collection_status
2964 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
2965 log_offline_data_coll_status(name, curval.offline_data_collection_status);
2966
2967 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
2968 || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
2969 log_self_test_exec_status(name, curval.self_test_exec_status);
2970 }
2971
2972 // Save the new values into *drive for the next time around
2973 state.smartval = curval;
2974 }
2975 }
2976 }
2977
2978 // check if number of selftest errors has increased (note: may also DECREASE)
2979 if (cfg.selftest)
2980 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
2981
2982 // check if number of ATA errors has increased
2983 if (cfg.errorlog || cfg.xerrorlog) {
2984
2985 int errcnt1 = -1, errcnt2 = -1;
2986 if (cfg.errorlog)
2987 errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false);
2988 if (cfg.xerrorlog)
2989 errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true);
2990
2991 // new number of errors is max of both logs
2992 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
2993
2994 // did command fail?
2995 if (newc<0)
2996 // lack of PrintOut here is INTENTIONAL
2997 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
2998
2999 // has error count increased?
3000 int oldc = state.ataerrorcount;
3001 if (newc>oldc){
3002 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3003 name, oldc, newc);
3004 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3005 name, oldc, newc);
3006 state.must_write = true;
3007 }
3008
3009 if (newc>=0)
3010 state.ataerrorcount=newc;
3011 }
3012
3013 // if the user has asked, and device is capable (or we're not yet
3014 // sure) check whether a self test should be done now.
3015 if (allow_selftests && !cfg.test_regex.empty()) {
3016 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3017 if (testtype)
3018 DoATASelfTest(cfg, state, atadev, testtype);
3019 }
3020
3021 // Don't leave device open -- the OS/user may want to access it
3022 // before the next smartd cycle!
3023 CloseDevice(atadev, name);
3024
3025 // Copy ATA attribute values to persistent state
3026 state.update_persistent_state();
3027
3028 return 0;
3029 }
3030
3031 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3032 {
3033 UINT8 asc, ascq;
3034 UINT8 currenttemp;
3035 UINT8 triptemp;
3036 const char * name = cfg.name.c_str();
3037 const char *cp;
3038
3039 // If the user has asked for it, test the email warning system
3040 if (cfg.emailtest)
3041 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3042
3043 // if we can't open device, fail gracefully rather than hard --
3044 // perhaps the next time around we'll be able to open it
3045 if (!scsidev->open()) {
3046 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3047 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3048 return 1;
3049 } else if (debugmode)
3050 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3051 currenttemp = 0;
3052 asc = 0;
3053 ascq = 0;
3054 if (!state.SuppressReport) {
3055 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3056 &asc, &ascq, &currenttemp, &triptemp)) {
3057 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3058 name);
3059 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3060 state.SuppressReport = 1;
3061 }
3062 }
3063 if (asc > 0) {
3064 cp = scsiGetIEString(asc, ascq);
3065 if (cp) {
3066 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3067 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3068 } else if (debugmode)
3069 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3070 name, (int)asc, (int)ascq);
3071 } else if (debugmode)
3072 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3073
3074 // check temperature limits
3075 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3076 CheckTemperature(cfg, state, currenttemp, triptemp);
3077
3078 // check if number of selftest errors has increased (note: may also DECREASE)
3079 if (cfg.selftest)
3080 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3081
3082 if (allow_selftests && !cfg.test_regex.empty()) {
3083 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3084 if (testtype)
3085 DoSCSISelfTest(cfg, state, scsidev, testtype);
3086 }
3087 CloseDevice(scsidev, name);
3088 return 0;
3089 }
3090
3091 // Checks the SMART status of all ATA and SCSI devices
3092 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3093 smart_device_list & devices, bool firstpass, bool allow_selftests)
3094 {
3095 for (unsigned i = 0; i < configs.size(); i++) {
3096 const dev_config & cfg = configs.at(i);
3097 dev_state & state = states.at(i);
3098 smart_device * dev = devices.at(i);
3099 if (dev->is_ata())
3100 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3101 else if (dev->is_scsi())
3102 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3103 }
3104 }
3105
3106 // Set if Initialize() was called
3107 static bool is_initialized = false;
3108
3109 // Does initialization right after fork to daemon mode
3110 static void Initialize(time_t *wakeuptime)
3111 {
3112 // Call Goodbye() on exit
3113 is_initialized = true;
3114
3115 // write PID file
3116 if (!debugmode)
3117 WritePidFile();
3118
3119 // install signal handlers. On Solaris, can't use signal() because
3120 // it resets the handler to SIG_DFL after each call. So use sigset()
3121 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3122
3123 // normal and abnormal exit
3124 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3125 SIGNALFN(SIGTERM, SIG_IGN);
3126 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3127 SIGNALFN(SIGQUIT, SIG_IGN);
3128
3129 // in debug mode, <CONTROL-C> ==> HUP
3130 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3131 SIGNALFN(SIGINT, SIG_IGN);
3132
3133 // Catch HUP and USR1
3134 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3135 SIGNALFN(SIGHUP, SIG_IGN);
3136 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3137 SIGNALFN(SIGUSR1, SIG_IGN);
3138 #ifdef _WIN32
3139 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3140 SIGNALFN(SIGUSR2, SIG_IGN);
3141 #endif
3142
3143 // initialize wakeup time to CURRENT time
3144 *wakeuptime=time(NULL);
3145
3146 return;
3147 }
3148
3149 #ifdef _WIN32
3150 // Toggle debug mode implemented for native windows only
3151 // (there is no easy way to reopen tty on *nix)
3152 static void ToggleDebugMode()
3153 {
3154 if (!debugmode) {
3155 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3156 if (!daemon_enable_console("smartd [Debug]")) {
3157 debugmode = 1;
3158 daemon_signal(SIGINT, HUPhandler);
3159 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3160 }
3161 else
3162 PrintOut(LOG_INFO,"enable console failed\n");
3163 }
3164 else if (debugmode == 1) {
3165 daemon_disable_console();
3166 debugmode = 0;
3167 daemon_signal(SIGINT, sighandler);
3168 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3169 }
3170 else
3171 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3172 }
3173 #endif
3174
3175 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3176 {
3177 // If past wake-up-time, compute next wake-up-time
3178 time_t timenow=time(NULL);
3179 while (wakeuptime<=timenow){
3180 int intervals=1+(timenow-wakeuptime)/checktime;
3181 wakeuptime+=intervals*checktime;
3182 }
3183
3184 // sleep until we catch SIGUSR1 or have completed sleeping
3185 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
3186
3187 // protect user again system clock being adjusted backwards
3188 if (wakeuptime>timenow+checktime){
3189 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3190 wakeuptime=timenow+checktime;
3191 }
3192
3193 // Exit sleep when time interval has expired or a signal is received
3194 sleep(wakeuptime-timenow);
3195
3196 #ifdef _WIN32
3197 // toggle debug mode?
3198 if (caughtsigUSR2) {
3199 ToggleDebugMode();
3200 caughtsigUSR2 = 0;
3201 }
3202 #endif
3203
3204 timenow=time(NULL);
3205 }
3206
3207 // if we caught a SIGUSR1 then print message and clear signal
3208 if (caughtsigUSR1){
3209 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3210 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3211 caughtsigUSR1=0;
3212 sigwakeup = true;
3213 }
3214
3215 // return adjusted wakeuptime
3216 return wakeuptime;
3217 }
3218
3219 // Print out a list of valid arguments for the Directive d
3220 static void printoutvaliddirectiveargs(int priority, char d)
3221 {
3222 switch (d) {
3223 case 'n':
3224 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3225 break;
3226 case 's':
3227 PrintOut(priority, "valid_regular_expression");
3228 break;
3229 case 'd':
3230 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3231 break;
3232 case 'T':
3233 PrintOut(priority, "normal, permissive");
3234 break;
3235 case 'o':
3236 case 'S':
3237 PrintOut(priority, "on, off");
3238 break;
3239 case 'l':
3240 PrintOut(priority, "error, selftest");
3241 break;
3242 case 'M':
3243 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3244 break;
3245 case 'v':
3246 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3247 break;
3248 case 'P':
3249 PrintOut(priority, "use, ignore, show, showall");
3250 break;
3251 case 'F':
3252 PrintOut(priority, "none, samsung, samsung2, samsung3");
3253 break;
3254 }
3255 }
3256
3257 // exits with an error message, or returns integer value of token
3258 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3259 int min, int max, char * suffix = 0)
3260 {
3261 // make sure argument is there
3262 if (!arg) {
3263 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3264 cfgfile, lineno, name, token, min, max);
3265 return -1;
3266 }
3267
3268 // get argument value (base 10), check that it's integer, and in-range
3269 char *endptr;
3270 int val = strtol(arg,&endptr,10);
3271
3272 // optional suffix present?
3273 if (suffix) {
3274 if (!strcmp(endptr, suffix))
3275 endptr += strlen(suffix);
3276 else
3277 *suffix = 0;
3278 }
3279
3280 if (!(!*endptr && min <= val && val <= max)) {
3281 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3282 cfgfile, lineno, name, token, arg, min, max);
3283 return -1;
3284 }
3285
3286 // all is well; return value
3287 return val;
3288 }
3289
3290
3291 // Get 1-3 small integer(s) for '-W' directive
3292 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3293 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3294 {
3295 unsigned v1 = 0, v2 = 0, v3 = 0;
3296 int n1 = -1, n2 = -1, n3 = -1, len;
3297 if (!arg) {
3298 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3299 cfgfile, lineno, name, token);
3300 return -1;
3301 }
3302
3303 len = strlen(arg);
3304 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3305 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3306 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3307 cfgfile, lineno, name, token, arg);
3308 return -1;
3309 }
3310 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3311 return 0;
3312 }
3313
3314
3315 // This function returns 1 if it has correctly parsed one token (and
3316 // any arguments), else zero if no tokens remain. It returns -1 if an
3317 // error was encountered.
3318 static int ParseToken(char * token, dev_config & cfg)
3319 {
3320 char sym;
3321 const char * name = cfg.name.c_str();
3322 int lineno=cfg.lineno;
3323 const char *delim = " \n\t";
3324 int badarg = 0;
3325 int missingarg = 0;
3326 const char *arg = 0;
3327
3328 // is the rest of the line a comment
3329 if (*token=='#')
3330 return 1;
3331
3332 // is the token not recognized?
3333 if (*token!='-' || strlen(token)!=2) {
3334 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3335 configfile, lineno, name, token);
3336 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3337 return -1;
3338 }
3339
3340 // token we will be parsing:
3341 sym=token[1];
3342
3343 // parse the token and swallow its argument
3344 int val;
3345 char plus[] = "+", excl[] = "!";
3346
3347 switch (sym) {
3348 case 'C':
3349 // monitor current pending sector count (default 197)
3350 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3351 return -1;
3352 cfg.curr_pending_id = (unsigned char)val;
3353 cfg.curr_pending_incr = (*plus == '+');
3354 cfg.curr_pending_set = true;
3355 break;
3356 case 'U':
3357 // monitor offline uncorrectable sectors (default 198)
3358 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3359 return -1;
3360 cfg.offl_pending_id = (unsigned char)val;
3361 cfg.offl_pending_incr = (*plus == '+');
3362 cfg.offl_pending_set = true;
3363 break;
3364 case 'T':
3365 // Set tolerance level for SMART command failures
3366 if ((arg = strtok(NULL, delim)) == NULL) {
3367 missingarg = 1;
3368 } else if (!strcmp(arg, "normal")) {
3369 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3370 // not on failure of an optional S.M.A.R.T. command.
3371 // This is the default so we don't need to actually do anything here.
3372 cfg.permissive = false;
3373 } else if (!strcmp(arg, "permissive")) {
3374 // Permissive mode; ignore errors from Mandatory SMART commands
3375 cfg.permissive = true;
3376 } else {
3377 badarg = 1;
3378 }
3379 break;
3380 case 'd':
3381 // specify the device type
3382 if ((arg = strtok(NULL, delim)) == NULL) {
3383 missingarg = 1;
3384 } else if (!strcmp(arg, "removable")) {
3385 cfg.removable = true;
3386 } else if (!strcmp(arg, "auto")) {
3387 cfg.dev_type = "";
3388 } else {
3389 cfg.dev_type = arg;
3390 }
3391 break;
3392 case 'F':
3393 // fix firmware bug
3394 if ((arg = strtok(NULL, delim)) == NULL) {
3395 missingarg = 1;
3396 } else if (!strcmp(arg, "none")) {
3397 cfg.fix_firmwarebug = FIX_NONE;
3398 } else if (!strcmp(arg, "samsung")) {
3399 cfg.fix_firmwarebug = FIX_SAMSUNG;
3400 } else if (!strcmp(arg, "samsung2")) {
3401 cfg.fix_firmwarebug = FIX_SAMSUNG2;
3402 } else if (!strcmp(arg, "samsung3")) {
3403 cfg.fix_firmwarebug = FIX_SAMSUNG3;
3404 } else {
3405 badarg = 1;
3406 }
3407 break;
3408 case 'H':
3409 // check SMART status
3410 cfg.smartcheck = true;
3411 break;
3412 case 'f':
3413 // check for failure of usage attributes
3414 cfg.usagefailed = true;
3415 break;
3416 case 't':
3417 // track changes in all vendor attributes
3418 cfg.prefail = true;
3419 cfg.usage = true;
3420 break;
3421 case 'p':
3422 // track changes in prefail vendor attributes
3423 cfg.prefail = true;
3424 break;
3425 case 'u':
3426 // track changes in usage vendor attributes
3427 cfg.usage = true;
3428 break;
3429 case 'l':
3430 // track changes in SMART logs
3431 if ((arg = strtok(NULL, delim)) == NULL) {
3432 missingarg = 1;
3433 } else if (!strcmp(arg, "selftest")) {
3434 // track changes in self-test log
3435 cfg.selftest = true;
3436 } else if (!strcmp(arg, "error")) {
3437 // track changes in ATA error log
3438 cfg.errorlog = true;
3439 } else if (!strcmp(arg, "xerror")) {
3440 // track changes in Extended Comprehensive SMART error log
3441 cfg.xerrorlog = true;
3442 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3443 // set SCT Error Recovery Control
3444 unsigned rt = ~0, wt = ~0; int nc = -1;
3445 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3446 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3447 cfg.sct_erc_set = true;
3448 cfg.sct_erc_readtime = rt;
3449 cfg.sct_erc_writetime = wt;
3450 }
3451 else
3452 badarg = 1;
3453 } else {
3454 badarg = 1;
3455 }
3456 break;
3457 case 'a':
3458 // monitor everything
3459 cfg.smartcheck = true;
3460 cfg.prefail = true;
3461 cfg.usagefailed = true;
3462 cfg.usage = true;
3463 cfg.selftest = true;
3464 cfg.errorlog = true;
3465 break;
3466 case 'o':
3467 // automatic offline testing enable/disable
3468 if ((arg = strtok(NULL, delim)) == NULL) {
3469 missingarg = 1;
3470 } else if (!strcmp(arg, "on")) {
3471 cfg.autoofflinetest = 2;
3472 } else if (!strcmp(arg, "off")) {
3473 cfg.autoofflinetest = 1;
3474 } else {
3475 badarg = 1;
3476 }
3477 break;
3478 case 'n':
3479 // skip disk check if in idle or standby mode
3480 if (!(arg = strtok(NULL, delim)))
3481 missingarg = 1;
3482 else {
3483 char *endptr = NULL;
3484 char *next = strchr(const_cast<char*>(arg), ',');
3485
3486 cfg.powerquiet = false;
3487 cfg.powerskipmax = 0;
3488
3489 if (next!=NULL) *next='\0';
3490 if (!strcmp(arg, "never"))
3491 cfg.powermode = 0;
3492 else if (!strcmp(arg, "sleep"))
3493 cfg.powermode = 1;
3494 else if (!strcmp(arg, "standby"))
3495 cfg.powermode = 2;
3496 else if (!strcmp(arg, "idle"))
3497 cfg.powermode = 3;
3498 else
3499 badarg = 1;
3500
3501 // if optional arguments are present
3502 if (!badarg && next!=NULL) {
3503 next++;
3504 cfg.powerskipmax = strtol(next, &endptr, 10);
3505 if (endptr == next)
3506 cfg.powerskipmax = 0;
3507 else {
3508 next = endptr + (*endptr != '\0');
3509 if (cfg.powerskipmax <= 0)
3510 badarg = 1;
3511 }
3512 if (*next != '\0') {
3513 if (!strcmp("q", next))
3514 cfg.powerquiet = true;
3515 else {
3516 badarg = 1;
3517 }
3518 }
3519 }
3520 }
3521 break;
3522 case 'S':
3523 // automatic attribute autosave enable/disable
3524 if ((arg = strtok(NULL, delim)) == NULL) {
3525 missingarg = 1;
3526 } else if (!strcmp(arg, "on")) {
3527 cfg.autosave = 2;
3528 } else if (!strcmp(arg, "off")) {
3529 cfg.autosave = 1;
3530 } else {
3531 badarg = 1;
3532 }
3533 break;
3534 case 's':
3535 // warn user, and delete any previously given -s REGEXP Directives
3536 if (!cfg.test_regex.empty()){
3537 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3538 configfile, lineno, name, cfg.test_regex.get_pattern());
3539 cfg.test_regex = regular_expression();
3540 }
3541 // check for missing argument
3542 if (!(arg = strtok(NULL, delim))) {
3543 missingarg = 1;
3544 }
3545 // Compile regex
3546 else {
3547 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3548 // not a valid regular expression!
3549 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3550 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3551 return -1;
3552 }
3553 }
3554 // Do a bit of sanity checking and warn user if we think that
3555 // their regexp is "strange". User probably confused about shell
3556 // glob(3) syntax versus regular expression syntax regexp(7).
3557 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3558 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3559 configfile, lineno, name, val+1, arg[val], arg);
3560 break;
3561 case 'm':
3562 // send email to address that follows
3563 if (!(arg = strtok(NULL,delim)))
3564 missingarg = 1;
3565 else {
3566 if (!cfg.emailaddress.empty())
3567 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3568 configfile, lineno, name, cfg.emailaddress.c_str());
3569 cfg.emailaddress = arg;
3570 }
3571 break;
3572 case 'M':
3573 // email warning options
3574 if (!(arg = strtok(NULL, delim)))
3575 missingarg = 1;
3576 else if (!strcmp(arg, "once"))
3577 cfg.emailfreq = 1;
3578 else if (!strcmp(arg, "daily"))
3579 cfg.emailfreq = 2;
3580 else if (!strcmp(arg, "diminishing"))
3581 cfg.emailfreq = 3;
3582 else if (!strcmp(arg, "test"))
3583 cfg.emailtest = 1;
3584 else if (!strcmp(arg, "exec")) {
3585 // Get the next argument (the command line)
3586 if (!(arg = strtok(NULL, delim))) {
3587 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3588 configfile, lineno, name, token);
3589 return -1;
3590 }
3591 // Free the last cmd line given if any, and copy new one
3592 if (!cfg.emailcmdline.empty())
3593 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3594 configfile, lineno, name, cfg.emailcmdline.c_str());
3595 cfg.emailcmdline = arg;
3596 }
3597 else
3598 badarg = 1;
3599 break;
3600 case 'i':
3601 // ignore failure of usage attribute
3602 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3603 return -1;
3604 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3605 break;
3606 case 'I':
3607 // ignore attribute for tracking purposes
3608 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3609 return -1;
3610 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3611 break;
3612 case 'r':
3613 // print raw value when tracking
3614 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3615 return -1;
3616 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3617 if (*excl == '!') // attribute change is critical
3618 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3619 break;
3620 case 'R':
3621 // track changes in raw value (forces printing of raw value)
3622 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3623 return -1;
3624 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3625 if (*excl == '!') // raw value change is critical
3626 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3627 break;
3628 case 'W':
3629 // track Temperature
3630 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3631 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3632 return -1;
3633 break;
3634 case 'v':
3635 // non-default vendor-specific attribute meaning
3636 if (!(arg=strtok(NULL,delim))) {
3637 missingarg = 1;
3638 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3639 badarg = 1;
3640 }
3641 break;
3642 case 'P':
3643 // Define use of drive-specific presets.
3644 if (!(arg = strtok(NULL, delim))) {
3645 missingarg = 1;
3646 } else if (!strcmp(arg, "use")) {
3647 cfg.ignorepresets = false;
3648 } else if (!strcmp(arg, "ignore")) {
3649 cfg.ignorepresets = true;
3650 } else if (!strcmp(arg, "show")) {
3651 cfg.showpresets = true;
3652 } else if (!strcmp(arg, "showall")) {
3653 showallpresets();
3654 } else {
3655 badarg = 1;
3656 }
3657 break;
3658 default:
3659 // Directive not recognized
3660 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3661 configfile, lineno, name, token);
3662 Directives();
3663 return -1;
3664 }
3665 if (missingarg) {
3666 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3667 configfile, lineno, name, token);
3668 }
3669 if (badarg) {
3670 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3671 configfile, lineno, name, token, arg);
3672 }
3673 if (missingarg || badarg) {
3674 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3675 printoutvaliddirectiveargs(LOG_CRIT, sym);
3676 PrintOut(LOG_CRIT, "\n");
3677 return -1;
3678 }
3679
3680 return 1;
3681 }
3682
3683 // Scan directive for configuration file
3684 #define SCANDIRECTIVE "DEVICESCAN"
3685
3686 // This is the routine that adds things to the conf_entries list.
3687 //
3688 // Return values are:
3689 // 1: parsed a normal line
3690 // 0: found comment or blank line
3691 // -1: found SCANDIRECTIVE line
3692 // -2: found an error
3693 //
3694 // Note: this routine modifies *line from the caller!
3695 static int ParseConfigLine(dev_config_vector & conf_entries, int /*entry*/, int lineno, /*const*/ char * line)
3696 {
3697 char *token=NULL;
3698 char *name=NULL;
3699 const char *delim = " \n\t";
3700 int devscan=0;
3701
3702 // get first token: device name. If a comment, skip line
3703 if (!(name=strtok(line,delim)) || *name=='#') {
3704 return 0;
3705 }
3706
3707 // Have we detected the SCANDIRECTIVE directive?
3708 if (!strcmp(SCANDIRECTIVE,name)){
3709 devscan=1;
3710 }
3711
3712 // We've got a legit entry, make space to store it
3713 conf_entries.push_back( dev_config() );
3714 dev_config & cfg = conf_entries.back();
3715
3716 cfg.name = name; // Later replaced by dev->get_info().info_name
3717 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
3718
3719 // Store line number, and by default check for both device types.
3720 cfg.lineno=lineno;
3721
3722 // parse tokens one at a time from the file.
3723 while ((token=strtok(NULL,delim))){
3724 int retval=ParseToken(token,cfg);
3725
3726 if (retval==0)
3727 // No tokens left:
3728 break;
3729
3730 if (retval>0) {
3731 // Parsed token
3732 #if (0)
3733 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3734 #endif
3735 continue;
3736 }
3737
3738 if (retval<0) {
3739 // error found on the line
3740 return -2;
3741 }
3742 }
3743
3744 // If NO monitoring directives are set, then set all of them.
3745 if (!( cfg.smartcheck || cfg.selftest
3746 || cfg.errorlog || cfg.xerrorlog
3747 || cfg.usagefailed || cfg.prefail || cfg.usage
3748 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
3749
3750 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3751 cfg.name.c_str(), cfg.lineno, configfile);
3752
3753 cfg.smartcheck = true;
3754 cfg.usagefailed = true;
3755 cfg.prefail = true;
3756 cfg.usage = true;
3757 cfg.selftest = true;
3758 cfg.errorlog = true;
3759 }
3760
3761 // additional sanity check. Has user set -M options without -m?
3762 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3763 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3764 cfg.name.c_str(), cfg.lineno, configfile);
3765 return -2;
3766 }
3767
3768 // has the user has set <nomailer>?
3769 if (cfg.emailaddress == "<nomailer>") {
3770 // check that -M exec is also set
3771 if (cfg.emailcmdline.empty()){
3772 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3773 cfg.name.c_str(), cfg.lineno, configfile);
3774 return -2;
3775 }
3776 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3777 cfg.emailaddress.clear();
3778 }
3779
3780 if (devscan)
3781 return -1;
3782 else
3783 return 1;
3784 }
3785
3786 // Parses a configuration file. Return values are:
3787 // N=>0: found N entries
3788 // -1: syntax error in config file
3789 // -2: config file does not exist
3790 // -3: config file exists but cannot be read
3791 //
3792 // In the case where the return value is 0, there are three
3793 // possiblities:
3794 // Empty configuration file ==> conf_entries.empty()
3795 // No configuration file ==> conf_entries[0].lineno == 0
3796 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
3797 static int ParseConfigFile(dev_config_vector & conf_entries)
3798 {
3799 // maximum line length in configuration file
3800 const int MAXLINELEN = 256;
3801 // maximum length of a continued line in configuration file
3802 const int MAXCONTLINE = 1023;
3803
3804 stdio_file f;
3805 // Open config file, if it exists and is not <stdin>
3806 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3807 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3808 // file exists but we can't read it or it should exist due to '-c' option
3809 int ret = (errno!=ENOENT ? -3 : -2);
3810 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3811 strerror(errno),configfile);
3812 return ret;
3813 }
3814 }
3815 else // read from stdin ('-c -' option)
3816 f.open(stdin);
3817
3818 // No configuration file found -- use fake one
3819 int entry = 0;
3820 if (!f) {
3821 char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3822
3823 if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3824 throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3825 return 0;
3826 }
3827
3828 #ifdef __CYGWIN__
3829 setmode(fileno(f), O_TEXT); // Allow files with \r\n
3830 #endif
3831
3832 // configuration file exists
3833 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3834
3835 // parse config file line by line
3836 int lineno = 1, cont = 0, contlineno = 0;
3837 char line[MAXLINELEN+2];
3838 char fullline[MAXCONTLINE+1];
3839
3840 for (;;) {
3841 int len=0,scandevice;
3842 char *lastslash;
3843 char *comment;
3844 char *code;
3845
3846 // make debugging simpler
3847 memset(line,0,sizeof(line));
3848
3849 // get a line
3850 code=fgets(line, MAXLINELEN+2, f);
3851
3852 // are we at the end of the file?
3853 if (!code){
3854 if (cont) {
3855 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3856 // See if we found a SCANDIRECTIVE directive
3857 if (scandevice==-1)
3858 return 0;
3859 // did we find a syntax error
3860 if (scandevice==-2)
3861 return -1;
3862 // the final line is part of a continuation line
3863 cont=0;
3864 entry+=scandevice;
3865 }
3866 break;
3867 }
3868
3869 // input file line number
3870 contlineno++;
3871
3872 // See if line is too long
3873 len=strlen(line);
3874 if (len>MAXLINELEN){
3875 const char *warn;
3876 if (line[len-1]=='\n')
3877 warn="(including newline!) ";
3878 else
3879 warn="";
3880 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3881 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3882 return -1;
3883 }
3884
3885 // Ignore anything after comment symbol
3886 if ((comment=strchr(line,'#'))){
3887 *comment='\0';
3888 len=strlen(line);
3889 }
3890
3891 // is the total line (made of all continuation lines) too long?
3892 if (cont+len>MAXCONTLINE){
3893 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3894 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3895 return -1;
3896 }
3897
3898 // copy string so far into fullline, and increment length
3899 strcpy(fullline+cont,line);
3900 cont+=len;
3901
3902 // is this a continuation line. If so, replace \ by space and look at next line
3903 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3904 *(fullline+(cont-len)+(lastslash-line))=' ';
3905 continue;
3906 }
3907
3908 // Not a continuation line. Parse it
3909 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3910
3911 // did we find a scandevice directive?
3912 if (scandevice==-1)
3913 return 0;
3914 // did we find a syntax error
3915 if (scandevice==-2)
3916 return -1;
3917
3918 entry+=scandevice;
3919 lineno++;
3920 cont=0;
3921 }
3922
3923 // note -- may be zero if syntax of file OK, but no valid entries!
3924 return entry;
3925 }
3926
3927 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3928 <LIST> is the list of valid arguments for option opt. */
3929 static void PrintValidArgs(char opt)
3930 {
3931 const char *s;
3932
3933 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3934 if (!(s = GetValidArgList(opt)))
3935 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3936 else
3937 PrintOut(LOG_CRIT, "%s", (char *)s);
3938 PrintOut(LOG_CRIT, " <=======\n");
3939 }
3940
3941 // Return true if absolute path name
3942 static bool is_abs_path(const char * path)
3943 {
3944 if (*path == '/')
3945 return true;
3946 #if defined(_WIN32) || defined(__CYGWIN__)
3947 if (*path == '\\')
3948 return true;
3949 int n = -1;
3950 sscanf(path, "%*1[A-Za-z]:%*1[/\\]%n", &n);
3951 if (n > 0)
3952 return true;
3953 #endif
3954 return false;
3955 }
3956
3957 // Parses input line, prints usage message and
3958 // version/license/copyright messages
3959 static void ParseOpts(int argc, char **argv)
3960 {
3961 // Init default configfile path
3962 #ifndef _WIN32
3963 configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
3964 #else
3965 static std::string configfile_str = get_exe_dir() + "/smartd.conf";
3966 configfile = configfile_str.c_str();
3967 #endif
3968
3969 // Please update GetValidArgList() if you edit shortopts
3970 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
3971 #ifdef HAVE_LIBCAP_NG
3972 "C"
3973 #endif
3974 ;
3975 // Please update GetValidArgList() if you edit longopts
3976 struct option longopts[] = {
3977 { "configfile", required_argument, 0, 'c' },
3978 { "logfacility", required_argument, 0, 'l' },
3979 { "quit", required_argument, 0, 'q' },
3980 { "debug", no_argument, 0, 'd' },
3981 { "showdirectives", no_argument, 0, 'D' },
3982 { "interval", required_argument, 0, 'i' },
3983 #ifndef _WIN32
3984 { "no-fork", no_argument, 0, 'n' },
3985 #endif
3986 { "pidfile", required_argument, 0, 'p' },
3987 { "report", required_argument, 0, 'r' },
3988 { "savestates", required_argument, 0, 's' },
3989 { "attributelog", required_argument, 0, 'A' },
3990 { "drivedb", required_argument, 0, 'B' },
3991 #if defined(_WIN32) || defined(__CYGWIN__)
3992 { "service", no_argument, 0, 'n' },
3993 #endif
3994 { "version", no_argument, 0, 'V' },
3995 { "license", no_argument, 0, 'V' },
3996 { "copyright", no_argument, 0, 'V' },
3997 { "help", no_argument, 0, 'h' },
3998 { "usage", no_argument, 0, 'h' },
3999 #ifdef HAVE_LIBCAP_NG
4000 { "capabilities", no_argument, 0, 'C' },
4001 #endif
4002 { 0, 0, 0, 0 }
4003 };
4004
4005 opterr=optopt=0;
4006 bool badarg = false;
4007 bool no_defaultdb = false; // set true on '-B FILE'
4008
4009 // Parse input options.
4010 int optchar;
4011 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4012 char *arg;
4013 char *tailptr;
4014 long lchecktime;
4015
4016 switch(optchar) {
4017 case 'q':
4018 // when to quit
4019 if (!(strcmp(optarg,"nodev"))) {
4020 quit=0;
4021 } else if (!(strcmp(optarg,"nodevstartup"))) {
4022 quit=1;
4023 } else if (!(strcmp(optarg,"never"))) {
4024 quit=2;
4025 } else if (!(strcmp(optarg,"onecheck"))) {
4026 quit=3;
4027 debugmode=1;
4028 } else if (!(strcmp(optarg,"showtests"))) {
4029 quit=4;
4030 debugmode=1;
4031 } else if (!(strcmp(optarg,"errors"))) {
4032 quit=5;
4033 } else {
4034 badarg = true;
4035 }
4036 break;
4037 case 'l':
4038 // set the log facility level
4039 if (!strcmp(optarg, "daemon"))
4040 facility=LOG_DAEMON;
4041 else if (!strcmp(optarg, "local0"))
4042 facility=LOG_LOCAL0;
4043 else if (!strcmp(optarg, "local1"))
4044 facility=LOG_LOCAL1;
4045 else if (!strcmp(optarg, "local2"))
4046 facility=LOG_LOCAL2;
4047 else if (!strcmp(optarg, "local3"))
4048 facility=LOG_LOCAL3;
4049 else if (!strcmp(optarg, "local4"))
4050 facility=LOG_LOCAL4;
4051 else if (!strcmp(optarg, "local5"))
4052 facility=LOG_LOCAL5;
4053 else if (!strcmp(optarg, "local6"))
4054 facility=LOG_LOCAL6;
4055 else if (!strcmp(optarg, "local7"))
4056 facility=LOG_LOCAL7;
4057 else
4058 badarg = true;
4059 break;
4060 case 'd':
4061 // enable debug mode
4062 debugmode = 1;
4063 break;
4064 case 'n':
4065 // don't fork()
4066 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4067 do_fork = false;
4068 #endif
4069 break;
4070 case 'D':
4071 // print summary of all valid directives
4072 debugmode = 1;
4073 Directives();
4074 EXIT(0);
4075 break;
4076 case 'i':
4077 // Period (time interval) for checking
4078 // strtol will set errno in the event of overflow, so we'll check it.
4079 errno = 0;
4080 lchecktime = strtol(optarg, &tailptr, 10);
4081 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4082 debugmode=1;
4083 PrintHead();
4084 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4085 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4086 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4087 EXIT(EXIT_BADCMD);
4088 }
4089 checktime = (int)lchecktime;
4090 break;
4091 case 'r':
4092 // report IOCTL transactions
4093 {
4094 int i;
4095 char *s;
4096
4097 // split_report_arg() may modify its first argument string, so use a
4098 // copy of optarg in case we want optarg for an error message.
4099 if (!(s = strdup(optarg))) {
4100 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4101 EXIT(EXIT_NOMEM);
4102 }
4103 if (split_report_arg(s, &i)) {
4104 badarg = true;
4105 } else if (i<1 || i>3) {
4106 debugmode=1;
4107 PrintHead();
4108 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4109 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4110 EXIT(EXIT_BADCMD);
4111 } else if (!strcmp(s,"ioctl")) {
4112 ata_debugmode = scsi_debugmode = i;
4113 } else if (!strcmp(s,"ataioctl")) {
4114 ata_debugmode = i;
4115 } else if (!strcmp(s,"scsiioctl")) {
4116 scsi_debugmode = i;
4117 } else {
4118 badarg = true;
4119 }
4120 free(s); // TODO: use std::string
4121 }
4122 break;
4123 case 'c':
4124 // alternate configuration file
4125 if (strcmp(optarg,"-"))
4126 configfile = (configfile_alt = optarg).c_str();
4127 else // read from stdin
4128 configfile=configfile_stdin;
4129 break;
4130 case 'p':
4131 // output file with PID number
4132 pid_file = optarg;
4133 break;
4134 case 's':
4135 // path prefix of persistent state file
4136 state_path_prefix = optarg;
4137 break;
4138 case 'A':
4139 // path prefix of attribute log file
4140 attrlog_path_prefix = optarg;
4141 break;
4142 case 'B':
4143 {
4144 const char * path = optarg;
4145 if (*path == '+' && path[1])
4146 path++;
4147 else
4148 no_defaultdb = true;
4149 unsigned char savedebug = debugmode; debugmode = 1;
4150 if (!read_drive_database(path))
4151 EXIT(EXIT_BADCMD);
4152 debugmode = savedebug;
4153 }
4154 break;
4155 case 'V':
4156 // print version and CVS info
4157 debugmode = 1;
4158 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4159 EXIT(0);
4160 break;
4161 #ifdef HAVE_LIBCAP_NG
4162 case 'C':
4163 // enable capabilities
4164 enable_capabilities = true;
4165 break;
4166 #endif
4167 case 'h':
4168 // help: print summary of command-line options
4169 debugmode=1;
4170 PrintHead();
4171 Usage();
4172 EXIT(0);
4173 break;
4174 case '?':
4175 default:
4176 // unrecognized option
4177 debugmode=1;
4178 PrintHead();
4179 // Point arg to the argument in which this option was found.
4180 arg = argv[optind-1];
4181 // Check whether the option is a long option that doesn't map to -h.
4182 if (arg[1] == '-' && optchar != 'h') {
4183 // Iff optopt holds a valid option then argument must be missing.
4184 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4185 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4186 PrintValidArgs(optopt);
4187 } else {
4188 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4189 }
4190 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4191 EXIT(EXIT_BADCMD);
4192 }
4193 if (optopt) {
4194 // Iff optopt holds a valid option then argument must be missing.
4195 if (strchr(shortopts, optopt) != NULL){
4196 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4197 PrintValidArgs(optopt);
4198 } else {
4199 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4200 }
4201 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4202 EXIT(EXIT_BADCMD);
4203 }
4204 Usage();
4205 EXIT(0);
4206 }
4207
4208 // Check to see if option had an unrecognized or incorrect argument.
4209 if (badarg) {
4210 debugmode=1;
4211 PrintHead();
4212 // It would be nice to print the actual option name given by the user
4213 // here, but we just print the short form. Please fix this if you know
4214 // a clean way to do it.
4215 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4216 PrintValidArgs(optchar);
4217 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4218 EXIT(EXIT_BADCMD);
4219 }
4220 }
4221
4222 // non-option arguments are not allowed
4223 if (argc > optind) {
4224 debugmode=1;
4225 PrintHead();
4226 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4227 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4228 EXIT(EXIT_BADCMD);
4229 }
4230
4231 // no pidfile in debug mode
4232 if (debugmode && !pid_file.empty()) {
4233 debugmode=1;
4234 PrintHead();
4235 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4236 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4237 EXIT(EXIT_BADCMD);
4238 }
4239
4240 // absolute path is required due to chdir('/') after fork().
4241 if (!state_path_prefix.empty() && !debugmode && !is_abs_path(state_path_prefix.c_str())) {
4242 debugmode=1;
4243 PrintHead();
4244 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4245 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4246 state_path_prefix.c_str());
4247 EXIT(EXIT_BADCMD);
4248 }
4249
4250 // absolute path is required due to chdir('/') after fork().
4251 if (!attrlog_path_prefix.empty() && !debugmode && !is_abs_path(attrlog_path_prefix.c_str())) {
4252 debugmode=1;
4253 PrintHead();
4254 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4255 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4256 attrlog_path_prefix.c_str());
4257 EXIT(EXIT_BADCMD);
4258 }
4259
4260 // Read or init drive database
4261 if (!no_defaultdb) {
4262 unsigned char savedebug = debugmode; debugmode = 1;
4263 if (!read_default_drive_databases())
4264 EXIT(EXIT_BADCMD);
4265 debugmode = savedebug;
4266 }
4267
4268 // print header
4269 PrintHead();
4270 }
4271
4272 // Function we call if no configuration file was found or if the
4273 // SCANDIRECTIVE Directive was found. It makes entries for device
4274 // names returned by scan_smart_devices() in os_OSNAME.cpp
4275 static int MakeConfigEntries(const dev_config & base_cfg,
4276 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4277 {
4278 // make list of devices
4279 smart_device_list devlist;
4280 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4281 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4282
4283 // if no devices, or error constructing list, return
4284 if (devlist.size() <= 0)
4285 return 0;
4286
4287 // add empty device slots for existing config entries
4288 while (scanned_devs.size() < conf_entries.size())
4289 scanned_devs.push_back((smart_device *)0);
4290
4291 // loop over entries to create
4292 for (unsigned i = 0; i < devlist.size(); i++) {
4293 // Move device pointer
4294 smart_device * dev = devlist.release(i);
4295 scanned_devs.push_back(dev);
4296
4297 // Copy configuration, update device and type name
4298 conf_entries.push_back(base_cfg);
4299 dev_config & cfg = conf_entries.back();
4300 cfg.name = dev->get_info().info_name;
4301 cfg.dev_name = dev->get_info().dev_name;
4302 cfg.dev_type = type;
4303 }
4304
4305 return devlist.size();
4306 }
4307
4308 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4309 {
4310 if (!debugmode && scandirective)
4311 return;
4312 if (line)
4313 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4314 "Unable to register %s device %s at line %d of file %s\n",
4315 type, name, line, configfile);
4316 else
4317 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4318 type, name);
4319 return;
4320 }
4321
4322 // Returns negative value (see ParseConfigFile()) if config file
4323 // had errors, else number of entries which may be zero or positive.
4324 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4325 {
4326 // parse configuration file configfile (normally /etc/smartd.conf)
4327 int entries = ParseConfigFile(conf_entries);
4328
4329 if (entries < 0) {
4330 // There was an error reading the configuration file.
4331 conf_entries.clear();
4332 if (entries == -1)
4333 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4334 return entries;
4335 }
4336
4337 // no error parsing config file.
4338 if (entries) {
4339 // we did not find a SCANDIRECTIVE and did find valid entries
4340 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4341 }
4342 else if (!conf_entries.empty()) {
4343 // we found a SCANDIRECTIVE or there was no configuration file so
4344 // scan. Configuration file's last entry contains all options
4345 // that were set
4346 dev_config first = conf_entries.back();
4347 conf_entries.pop_back();
4348
4349 if (first.lineno)
4350 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4351 else
4352 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4353
4354 // make config list of devices to search for
4355 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4356
4357 // warn user if scan table found no devices
4358 if (conf_entries.empty())
4359 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4360 }
4361 else
4362 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4363
4364 return conf_entries.size();
4365 }
4366
4367
4368 // This function tries devices from conf_entries. Each one that can be
4369 // registered is moved onto the [ata|scsi]devices lists and removed
4370 // from the conf_entries list.
4371 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4372 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4373 {
4374 // start by clearing lists/memory of ALL existing devices
4375 configs.clear();
4376 devices.clear();
4377 states.clear();
4378
4379 // Register entries
4380 for (unsigned i = 0; i < conf_entries.size(); i++){
4381
4382 dev_config cfg = conf_entries[i];
4383
4384 // get device of appropriate type
4385 smart_device_auto_ptr dev;
4386 bool scanning = false;
4387
4388 // Device may already be detected during devicescan
4389 if (i < scanned_devs.size()) {
4390 dev = scanned_devs.release(i);
4391 if (dev)
4392 scanning = true;
4393 }
4394
4395 if (!dev) {
4396 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4397 if (!dev) {
4398 if (cfg.dev_type.empty())
4399 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4400 else
4401 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4402 continue;
4403 }
4404 }
4405
4406 // Save old info
4407 smart_device::device_info oldinfo = dev->get_info();
4408
4409 // Open with autodetect support, may return 'better' device
4410 dev.replace( dev->autodetect_open() );
4411
4412 // Report if type has changed
4413 if (oldinfo.dev_type != dev->get_dev_type())
4414 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4415 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4416
4417 if (!dev->is_open()) {
4418 // For linux+devfs, a nonexistent device gives a strange error
4419 // message. This makes the error message a bit more sensible.
4420 // If no debug and scanning - don't print errors
4421 if (debugmode || !scanning)
4422 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4423 continue;
4424 }
4425
4426 // Update informal name
4427 cfg.name = dev->get_info().info_name;
4428 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4429
4430 // Prepare initial state
4431 dev_state state;
4432
4433 // register ATA devices
4434 if (dev->is_ata()){
4435 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4436 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4437 dev.reset();
4438 }
4439 }
4440 // or register SCSI devices
4441 else if (dev->is_scsi()){
4442 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4443 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4444 dev.reset();
4445 }
4446 }
4447 else {
4448 PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4449 dev.reset();
4450 }
4451
4452 if (dev) {
4453 // move onto the list of devices
4454 configs.push_back(cfg);
4455 states.push_back(state);
4456 devices.push_back(dev);
4457 }
4458 // if device is explictly listed and we can't register it, then
4459 // exit unless the user has specified that the device is removable
4460 else if (!scanning) {
4461 if (cfg.removable || quit==2)
4462 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4463 else {
4464 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4465 EXIT(EXIT_BADDEV);
4466 }
4467 }
4468 }
4469 }
4470
4471
4472 // Main program without exception handling
4473 static int main_worker(int argc, char **argv)
4474 {
4475 // Initialize interface
4476 smart_interface::init();
4477 if (!smi())
4478 return 1;
4479
4480 // is it our first pass through?
4481 bool firstpass = true;
4482
4483 // next time to wake up
4484 time_t wakeuptime = 0;
4485
4486 // parse input and print header and usage info if needed
4487 ParseOpts(argc,argv);
4488
4489 // Configuration for each device
4490 dev_config_vector configs;
4491 // Device states
4492 dev_state_vector states;
4493 // Devices to monitor
4494 smart_device_list devices;
4495
4496 bool write_states_always = true;
4497
4498 #ifdef HAVE_LIBCAP_NG
4499 // Drop capabilities
4500 if (enable_capabilities) {
4501 capng_clear(CAPNG_SELECT_BOTH);
4502 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4503 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4504 capng_apply(CAPNG_SELECT_BOTH);
4505 }
4506 #endif
4507
4508 // the main loop of the code
4509 for (;;) {
4510
4511 // are we exiting from a signal?
4512 if (caughtsigEXIT) {
4513 // are we exiting with SIGTERM?
4514 int isterm=(caughtsigEXIT==SIGTERM);
4515 int isquit=(caughtsigEXIT==SIGQUIT);
4516 int isok=debugmode?isterm || isquit:isterm;
4517
4518 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4519 caughtsigEXIT, strsignal(caughtsigEXIT));
4520
4521 if (!isok)
4522 return EXIT_SIGNAL;
4523
4524 // Write state files
4525 if (!state_path_prefix.empty())
4526 write_all_dev_states(configs, states);
4527
4528 return 0;
4529 }
4530
4531 // Should we (re)read the config file?
4532 if (firstpass || caughtsigHUP){
4533 if (!firstpass) {
4534 #ifdef __CYGWIN__
4535 // Workaround for missing SIGQUIT via keyboard on Cygwin
4536 if (caughtsigHUP==2) {
4537 // Simulate SIGQUIT if another SIGINT arrives soon
4538 caughtsigHUP=0;
4539 sleep(1);
4540 if (caughtsigHUP==2) {
4541 caughtsigEXIT=SIGQUIT;
4542 continue;
4543 }
4544 caughtsigHUP=2;
4545 }
4546 #endif
4547 // Write state files
4548 if (!state_path_prefix.empty())
4549 write_all_dev_states(configs, states);
4550
4551 PrintOut(LOG_INFO,
4552 caughtsigHUP==1?
4553 "Signal HUP - rereading configuration file %s\n":
4554 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4555 configfile);
4556 }
4557
4558 {
4559 dev_config_vector conf_entries; // Entries read from smartd.conf
4560 smart_device_list scanned_devs; // Devices found during scan
4561 // (re)reads config file, makes >=0 entries
4562 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4563
4564 if (entries>=0) {
4565 // checks devices, then moves onto ata/scsi list or deallocates.
4566 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4567 if (!(configs.size() == devices.size() && configs.size() == states.size()))
4568 throw std::logic_error("Invalid result from RegisterDevices");
4569 }
4570 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4571 // user has asked to continue on error in configuration file
4572 if (!firstpass)
4573 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4574 }
4575 else {
4576 // exit with configuration file error status
4577 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4578 }
4579 }
4580
4581 // Log number of devices we are monitoring...
4582 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4583 int numata = 0;
4584 for (unsigned i = 0; i < devices.size(); i++) {
4585 if (devices.at(i)->is_ata())
4586 numata++;
4587 }
4588 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4589 numata, devices.size() - numata);
4590 }
4591 else {
4592 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4593 return EXIT_NODEV;
4594 }
4595
4596 if (quit==4) {
4597 // user has asked to print test schedule
4598 PrintTestSchedule(configs, states, devices);
4599 return 0;
4600 }
4601
4602 #ifdef HAVE_LIBCAP_NG
4603 if (enable_capabilities) {
4604 for (unsigned i = 0; i < configs.size(); i++) {
4605 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
4606 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4607 break;
4608 }
4609 }
4610 }
4611 #endif
4612
4613 // reset signal
4614 caughtsigHUP=0;
4615
4616 // Always write state files after (re)configuration
4617 write_states_always = true;
4618 }
4619
4620 // check all devices once,
4621 // self tests are not started in first pass unless '-q onecheck' is specified
4622 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
4623
4624 // Write state files
4625 if (!state_path_prefix.empty())
4626 write_all_dev_states(configs, states, write_states_always);
4627 write_states_always = false;
4628
4629 // Write attribute logs
4630 if (!attrlog_path_prefix.empty())
4631 write_all_dev_attrlogs(configs, states);
4632
4633 // user has asked us to exit after first check
4634 if (quit==3) {
4635 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4636 "smartd is exiting (exit status 0)\n");
4637 return 0;
4638 }
4639
4640 // fork into background if needed
4641 if (firstpass && !debugmode) {
4642 DaemonInit();
4643 }
4644
4645 // set exit and signal handlers, write PID file, set wake-up time
4646 if (firstpass){
4647 Initialize(&wakeuptime);
4648 firstpass = false;
4649 }
4650
4651 // sleep until next check time, or a signal arrives
4652 wakeuptime = dosleep(wakeuptime, write_states_always);
4653 }
4654 }
4655
4656
4657 #ifndef _WIN32
4658 // Main program
4659 int main(int argc, char **argv)
4660 #else
4661 // Windows: internal main function started direct or by service control manager
4662 static int smartd_main(int argc, char **argv)
4663 #endif
4664 {
4665 int status;
4666 try {
4667 // Do the real work ...
4668 status = main_worker(argc, argv);
4669 }
4670 catch (int ex) {
4671 // EXIT(status) arrives here
4672 status = ex;
4673 }
4674 catch (const std::bad_alloc & /*ex*/) {
4675 // Memory allocation failed (also thrown by std::operator new)
4676 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4677 status = EXIT_NOMEM;
4678 }
4679 catch (const std::exception & ex) {
4680 // Other fatal errors
4681 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4682 status = EXIT_BADCODE;
4683 }
4684
4685 if (is_initialized)
4686 status = Goodbye(status);
4687
4688 #ifdef _WIN32
4689 daemon_winsvc_exitcode = status;
4690 #endif
4691 return status;
4692 }
4693
4694
4695 #ifdef _WIN32
4696 // Main function for Windows
4697 int main(int argc, char **argv){
4698 // Options for smartd windows service
4699 static const daemon_winsvc_options svc_opts = {
4700 "--service", // cmd_opt
4701 "smartd", "SmartD Service", // servicename, displayname
4702 // description
4703 "Controls and monitors storage devices using the Self-Monitoring, "
4704 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4705 "built into ATA and SCSI Hard Drives. "
4706 PACKAGE_HOMEPAGE
4707 };
4708 // daemon_main() handles daemon and service specific commands
4709 // and starts smartd_main() direct, from a new process,
4710 // or via service control manager
4711 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4712 }
4713 #endif