]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Merge commit 'upstream/5.40+svn3296'
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-11 Christian Franke <smartmontools-support@lists.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #ifndef _GNU_SOURCE
25 // TODO: Why is this define necessary?
26 #define _GNU_SOURCE
27 #endif
28
29 // unconditionally included files
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
33 #include <signal.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <time.h>
41 #include <limits.h>
42 #include <getopt.h>
43
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 #include <algorithm> // std::replace()
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifndef _WIN32
54 #include <sys/wait.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #ifdef HAVE_NETDB_H
60 #include <netdb.h>
61 #endif
62
63 #ifdef _WIN32
64 #ifdef _MSC_VER
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t;
67 typedef int pid_t;
68 #endif
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
71 #endif // _WIN32
72
73 #ifdef __CYGWIN__
74 // From <windows.h>:
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall FreeConsole(void);
77 #include <io.h> // setmode()
78 #endif // __CYGWIN__
79
80 #ifdef HAVE_LIBCAP_NG
81 #include <cap-ng.h>
82 #endif // LIBCAP_NG
83
84 // locally included files
85 #include "int64.h"
86 #include "atacmds.h"
87 #include "dev_interface.h"
88 #include "knowndrives.h"
89 #include "scsicmds.h"
90 #include "utility.h"
91
92 // This is for solaris, where signal() resets the handler to SIG_DFL
93 // after the first signal is caught.
94 #ifdef HAVE_SIGSET
95 #define SIGNALFN sigset
96 #else
97 #define SIGNALFN signal
98 #endif
99
100 #ifdef _WIN32
101 #include "hostname_win32.h" // gethost/domainname()
102 #define HAVE_GETHOSTNAME 1
103 #define HAVE_GETDOMAINNAME 1
104 // fork()/signal()/initd simulation for native Windows
105 #include "daemon_win32.h" // daemon_main/detach/signal()
106 #undef SIGNALFN
107 #define SIGNALFN daemon_signal
108 #define strsignal daemon_strsignal
109 #define sleep daemon_sleep
110 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
111 #define SIGQUIT SIGBREAK
112 #define SIGQUIT_KEYNAME "CONTROL-Break"
113 #else // _WIN32
114 #ifdef __CYGWIN__
115 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
116 #define SIGQUIT_KEYNAME "2x CONTROL-C"
117 #else // __CYGWIN__
118 #define SIGQUIT_KEYNAME "CONTROL-\\"
119 #endif // __CYGWIN__
120 #endif // _WIN32
121
122 #if defined (__SVR4) && defined (__sun)
123 extern "C" int getdomainname(char *, int); // no declaration in header files!
124 #endif
125
126 #define ARGUSED(x) ((void)(x))
127
128 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3288 2011-03-09 18:40:36Z chrfranke $"
129 CONFIG_H_CVSID;
130
131 // smartd exit codes
132 #define EXIT_BADCMD 1 // command line did not parse
133 #define EXIT_BADCONF 2 // syntax error in config file
134 #define EXIT_STARTUP 3 // problem forking daemon
135 #define EXIT_PID 4 // problem creating pid file
136 #define EXIT_NOCONF 5 // config file does not exist
137 #define EXIT_READCONF 6 // config file exists but cannot be read
138
139 #define EXIT_NOMEM 8 // out of memory
140 #define EXIT_BADCODE 10 // internal error - should NEVER happen
141
142 #define EXIT_BADDEV 16 // we can't monitor this device
143 #define EXIT_NODEV 17 // no devices to monitor
144
145 #define EXIT_SIGNAL 254 // abort on signal
146
147
148 // command-line: 1=debug mode, 2=print presets
149 static unsigned char debugmode = 0;
150
151 // command-line: how long to sleep between checks
152 #define CHECKTIME 1800
153 static int checktime=CHECKTIME;
154
155 // command-line: name of PID file (empty for no pid file)
156 static std::string pid_file;
157
158 // command-line: path prefix of persistent state file, empty if no persistence.
159 static std::string state_path_prefix
160 #ifdef SMARTMONTOOLS_SAVESTATES
161 = SMARTMONTOOLS_SAVESTATES
162 #endif
163 ;
164
165 // command-line: path prefix of attribute log file, empty if no logs.
166 static std::string attrlog_path_prefix
167 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
168 = SMARTMONTOOLS_ATTRIBUTELOG
169 #endif
170 ;
171
172 // configuration file name
173 static const char * configfile;
174 // configuration file "name" if read from stdin
175 static const char * const configfile_stdin = "<stdin>";
176 // path of alternate configuration file
177 static std::string configfile_alt;
178
179 // command-line: when should we exit?
180 static int quit=0;
181
182 // command-line; this is the default syslog(3) log facility to use.
183 static int facility=LOG_DAEMON;
184
185 #ifndef _WIN32
186 // command-line: fork into background?
187 static bool do_fork=true;
188 #endif
189
190 #ifdef HAVE_LIBCAP_NG
191 // command-line: enable capabilities?
192 static bool enable_capabilities = false;
193 #endif
194
195 #if defined(_WIN32) || defined(__CYGWIN__)
196 // TODO: This smartctl only variable is also used in os_win32.cpp
197 unsigned char failuretest_permissive = 0;
198 #endif
199
200 // set to one if we catch a USR1 (check devices now)
201 static volatile int caughtsigUSR1=0;
202
203 #ifdef _WIN32
204 // set to one if we catch a USR2 (toggle debug mode)
205 static volatile int caughtsigUSR2=0;
206 #endif
207
208 // set to one if we catch a HUP (reload config file). In debug mode,
209 // set to two, if we catch INT (also reload config file).
210 static volatile int caughtsigHUP=0;
211
212 // set to signal value if we catch INT, QUIT, or TERM
213 static volatile int caughtsigEXIT=0;
214
215 // This function prints either to stdout or to the syslog as needed.
216 static void PrintOut(int priority, const char *fmt, ...)
217 __attribute__ ((format(printf, 2, 3)));
218
219 // Attribute monitoring flags.
220 // See monitor_attr_flags below.
221 enum {
222 MONITOR_IGN_FAILUSE = 0x01,
223 MONITOR_IGNORE = 0x02,
224 MONITOR_RAW_PRINT = 0x04,
225 MONITOR_RAW = 0x08,
226 MONITOR_AS_CRIT = 0x10,
227 MONITOR_RAW_AS_CRIT = 0x20,
228 };
229
230 // Array of flags for each attribute.
231 class attribute_flags
232 {
233 public:
234 attribute_flags()
235 { memset(m_flags, 0, sizeof(m_flags)); }
236
237 bool is_set(int id, unsigned char flag) const
238 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
239
240 void set(int id, unsigned char flags)
241 {
242 if (0 < id && id < (int)sizeof(m_flags))
243 m_flags[id] |= flags;
244 }
245
246 private:
247 unsigned char m_flags[256];
248 };
249
250
251 /// Configuration data for a device. Read from smartd.conf.
252 /// Supports copy & assignment and is compatible with STL containers.
253 struct dev_config
254 {
255 int lineno; // Line number of entry in file
256 std::string name; // Device name (with optional extra info)
257 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
258 std::string dev_type; // Device type argument from -d directive, empty if none
259 std::string state_file; // Path of the persistent state file, empty if none
260 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
261 bool smartcheck; // Check SMART status
262 bool usagefailed; // Check for failed Usage Attributes
263 bool prefail; // Track changes in Prefail Attributes
264 bool usage; // Track changes in Usage Attributes
265 bool selftest; // Monitor number of selftest errors
266 bool errorlog; // Monitor number of ATA errors
267 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
268 bool permissive; // Ignore failed SMART commands
269 char autosave; // 1=disable, 2=enable Autosave Attributes
270 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
271 unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
272 bool ignorepresets; // Ignore database of -v options
273 bool showpresets; // Show database entry for this device
274 bool removable; // Device may disappear (not be present)
275 char powermode; // skip check, if disk in idle or standby mode
276 bool powerquiet; // skip powermode 'skipping checks' message
277 int powerskipmax; // how many times can be check skipped
278 unsigned char tempdiff; // Track Temperature changes >= this limit
279 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
280 regular_expression test_regex; // Regex for scheduled testing
281
282 // Configuration of email warning messages
283 std::string emailcmdline; // script to execute, empty if no messages
284 std::string emailaddress; // email address, or empty
285 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
286 bool emailtest; // Send test email?
287
288 // ATA ONLY
289 bool sct_erc_set; // set SCT ERC to:
290 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
291 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
292
293 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
294 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
295 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
296 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
297
298 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
299
300 ata_vendor_attr_defs attribute_defs; // -v options
301
302 dev_config();
303 };
304
305 dev_config::dev_config()
306 : lineno(0),
307 smartcheck(false),
308 usagefailed(false),
309 prefail(false),
310 usage(false),
311 selftest(false),
312 errorlog(false),
313 xerrorlog(false),
314 permissive(false),
315 autosave(0),
316 autoofflinetest(0),
317 fix_firmwarebug(FIX_NOTSPECIFIED),
318 ignorepresets(false),
319 showpresets(false),
320 removable(false),
321 powermode(0),
322 powerquiet(false),
323 powerskipmax(0),
324 tempdiff(0),
325 tempinfo(0), tempcrit(0),
326 emailfreq(0),
327 emailtest(false),
328 sct_erc_set(false),
329 sct_erc_readtime(0), sct_erc_writetime(0),
330 curr_pending_id(0), offl_pending_id(0),
331 curr_pending_incr(false), offl_pending_incr(false),
332 curr_pending_set(false), offl_pending_set(false)
333 {
334 }
335
336
337 // Number of allowed mail message types
338 static const int SMARTD_NMAIL = 13;
339 // Type for '-M test' mails (state not persistent)
340 static const int MAILTYPE_TEST = 0;
341 // TODO: Add const or enum for all mail types.
342
343 struct mailinfo {
344 int logged;// number of times an email has been sent
345 time_t firstsent;// time first email was sent, as defined by time(2)
346 time_t lastsent; // time last email was sent, as defined by time(2)
347
348 mailinfo()
349 : logged(0), firstsent(0), lastsent(0) { }
350 };
351
352 /// Persistent state data for a device.
353 struct persistent_dev_state
354 {
355 unsigned char tempmin, tempmax; // Min/Max Temperatures
356
357 unsigned char selflogcount; // total number of self-test errors
358 unsigned short selfloghour; // lifetime hours of last self-test error
359
360 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
361
362 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
363 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
364
365 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
366
367 // ATA ONLY
368 int ataerrorcount; // Total number of ATA errors
369
370 // Persistent part of ata_smart_values:
371 struct ata_attribute {
372 unsigned char id;
373 unsigned char val;
374 unsigned char worst; // Byte needed for 'raw64' attribute only.
375 uint64_t raw;
376 unsigned char resvd;
377
378 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
379 };
380 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
381
382 persistent_dev_state();
383 };
384
385 persistent_dev_state::persistent_dev_state()
386 : tempmin(0), tempmax(0),
387 selflogcount(0),
388 selfloghour(0),
389 scheduled_test_next_check(0),
390 selective_test_last_start(0),
391 selective_test_last_end(0),
392 ataerrorcount(0)
393 {
394 }
395
396 /// Non-persistent state data for a device.
397 struct temp_dev_state
398 {
399 bool must_write; // true if persistent part should be written
400
401 bool not_cap_offline; // true == not capable of offline testing
402 bool not_cap_conveyance;
403 bool not_cap_short;
404 bool not_cap_long;
405 bool not_cap_selective;
406
407 unsigned char temperature; // last recorded Temperature (in Celsius)
408 time_t tempmin_delay; // time where Min Temperature tracking will start
409
410 bool powermodefail; // true if power mode check failed
411 int powerskipcnt; // Number of checks skipped due to idle or standby mode
412
413 // SCSI ONLY
414 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
415 unsigned char TempPageSupported; // has log sense temperature page (0xd)
416 unsigned char SuppressReport; // minimize nuisance reports
417 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
418 // know yet) 6 or 10
419
420 // ATA ONLY
421 uint64_t num_sectors; // Number of sectors
422 ata_smart_values smartval; // SMART data
423 ata_smart_thresholds_pvt smartthres; // SMART thresholds
424
425 temp_dev_state();
426 };
427
428 temp_dev_state::temp_dev_state()
429 : must_write(false),
430 not_cap_offline(false),
431 not_cap_conveyance(false),
432 not_cap_short(false),
433 not_cap_long(false),
434 not_cap_selective(false),
435 temperature(0),
436 tempmin_delay(0),
437 powermodefail(false),
438 powerskipcnt(0),
439 SmartPageSupported(false),
440 TempPageSupported(false),
441 SuppressReport(false),
442 modese_len(0),
443 num_sectors(0)
444 {
445 memset(&smartval, 0, sizeof(smartval));
446 memset(&smartthres, 0, sizeof(smartthres));
447 }
448
449 /// Runtime state data for a device.
450 struct dev_state
451 : public persistent_dev_state,
452 public temp_dev_state
453 {
454 void update_persistent_state();
455 void update_temp_state();
456 };
457
458 /// Container for configuration info for each device.
459 typedef std::vector<dev_config> dev_config_vector;
460
461 /// Container for state info for each device.
462 typedef std::vector<dev_state> dev_state_vector;
463
464 // Copy ATA attributes to persistent state.
465 void dev_state::update_persistent_state()
466 {
467 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
468 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
469 ata_attribute & pa = ata_attributes[i];
470 pa.id = ta.id;
471 if (ta.id == 0) {
472 pa.val = pa.worst = 0; pa.raw = 0;
473 continue;
474 }
475 pa.val = ta.current;
476 pa.worst = ta.worst;
477 pa.raw = ta.raw[0]
478 | ( ta.raw[1] << 8)
479 | ( ta.raw[2] << 16)
480 | ((uint64_t)ta.raw[3] << 24)
481 | ((uint64_t)ta.raw[4] << 32)
482 | ((uint64_t)ta.raw[5] << 40);
483 pa.resvd = ta.reserv;
484 }
485 }
486
487 // Copy ATA from persistent to temp state.
488 void dev_state::update_temp_state()
489 {
490 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
491 const ata_attribute & pa = ata_attributes[i];
492 ata_smart_attribute & ta = smartval.vendor_attributes[i];
493 ta.id = pa.id;
494 if (pa.id == 0) {
495 ta.current = ta.worst = 0;
496 memset(ta.raw, 0, sizeof(ta.raw));
497 continue;
498 }
499 ta.current = pa.val;
500 ta.worst = pa.worst;
501 ta.raw[0] = (unsigned char) pa.raw;
502 ta.raw[1] = (unsigned char)(pa.raw >> 8);
503 ta.raw[2] = (unsigned char)(pa.raw >> 16);
504 ta.raw[3] = (unsigned char)(pa.raw >> 24);
505 ta.raw[4] = (unsigned char)(pa.raw >> 32);
506 ta.raw[5] = (unsigned char)(pa.raw >> 40);
507 ta.reserv = pa.resvd;
508 }
509 }
510
511 // Parse a line from a state file.
512 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
513 {
514 static const regular_expression regex(
515 "^ *"
516 "((temperature-min)" // (1 (2)
517 "|(temperature-max)" // (3)
518 "|(self-test-errors)" // (4)
519 "|(self-test-last-err-hour)" // (5)
520 "|(scheduled-test-next-check)" // (6)
521 "|(selective-test-last-start)" // (7)
522 "|(selective-test-last-end)" // (8)
523 "|(ata-error-count)" // (9)
524 "|(mail\\.([0-9]+)\\." // (10 (11)
525 "((count)" // (12 (13)
526 "|(first-sent-time)" // (14)
527 "|(last-sent-time)" // (15)
528 ")" // 12)
529 ")" // 10)
530 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
531 "((id)" // (18 (19)
532 "|(val)" // (20)
533 "|(worst)" // (21)
534 "|(raw)" // (22)
535 "|(resvd)" // (23)
536 ")" // 18)
537 ")" // 16)
538 ")" // 1)
539 " *= *([0-9]+)[ \n]*$", // (24)
540 REG_EXTENDED
541 );
542
543 const int nmatch = 1+24;
544 regmatch_t match[nmatch];
545 if (!regex.execute(line, nmatch, match))
546 return false;
547 if (match[nmatch-1].rm_so < 0)
548 return false;
549
550 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
551
552 int m = 1;
553 if (match[++m].rm_so >= 0)
554 state.tempmin = (unsigned char)val;
555 else if (match[++m].rm_so >= 0)
556 state.tempmax = (unsigned char)val;
557 else if (match[++m].rm_so >= 0)
558 state.selflogcount = (unsigned char)val;
559 else if (match[++m].rm_so >= 0)
560 state.selfloghour = (unsigned short)val;
561 else if (match[++m].rm_so >= 0)
562 state.scheduled_test_next_check = (time_t)val;
563 else if (match[++m].rm_so >= 0)
564 state.selective_test_last_start = val;
565 else if (match[++m].rm_so >= 0)
566 state.selective_test_last_end = val;
567 else if (match[++m].rm_so >= 0)
568 state.ataerrorcount = (int)val;
569 else if (match[m+=2].rm_so >= 0) {
570 int i = atoi(line+match[m].rm_so);
571 if (!(0 <= i && i < SMARTD_NMAIL))
572 return false;
573 if (i == MAILTYPE_TEST) // Don't suppress test mails
574 return true;
575 if (match[m+=2].rm_so >= 0)
576 state.maillog[i].logged = (int)val;
577 else if (match[++m].rm_so >= 0)
578 state.maillog[i].firstsent = (time_t)val;
579 else if (match[++m].rm_so >= 0)
580 state.maillog[i].lastsent = (time_t)val;
581 else
582 return false;
583 }
584 else if (match[m+=5+1].rm_so >= 0) {
585 int i = atoi(line+match[m].rm_so);
586 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
587 return false;
588 if (match[m+=2].rm_so >= 0)
589 state.ata_attributes[i].id = (unsigned char)val;
590 else if (match[++m].rm_so >= 0)
591 state.ata_attributes[i].val = (unsigned char)val;
592 else if (match[++m].rm_so >= 0)
593 state.ata_attributes[i].worst = (unsigned char)val;
594 else if (match[++m].rm_so >= 0)
595 state.ata_attributes[i].raw = val;
596 else if (match[++m].rm_so >= 0)
597 state.ata_attributes[i].resvd = (unsigned char)val;
598 else
599 return false;
600 }
601 else
602 return false;
603 return true;
604 }
605
606 // Read a state file.
607 static bool read_dev_state(const char * path, persistent_dev_state & state)
608 {
609 stdio_file f(path, "r");
610 if (!f) {
611 if (errno != ENOENT)
612 pout("Cannot read state file \"%s\"\n", path);
613 return false;
614 }
615 #ifdef __CYGWIN__
616 setmode(fileno(f), O_TEXT); // Allow files with \r\n
617 #endif
618
619 persistent_dev_state new_state;
620 int good = 0, bad = 0;
621 char line[256];
622 while (fgets(line, sizeof(line), f)) {
623 const char * s = line + strspn(line, " \t");
624 if (!*s || *s == '#')
625 continue;
626 if (!parse_dev_state_line(line, new_state))
627 bad++;
628 else
629 good++;
630 }
631
632 if (bad) {
633 if (!good) {
634 pout("%s: format error\n", path);
635 return false;
636 }
637 pout("%s: %d invalid line(s) ignored\n", path, bad);
638 }
639
640 // This sets the values missing in the file to 0.
641 state = new_state;
642 return true;
643 }
644
645 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
646 {
647 if (val)
648 fprintf(f, "%s = %"PRIu64"\n", name, val);
649 }
650
651 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
652 {
653 if (val)
654 fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
655 }
656
657 // Write a state file
658 static bool write_dev_state(const char * path, const persistent_dev_state & state)
659 {
660 // Rename old "file" to "file~"
661 std::string pathbak = path; pathbak += '~';
662 unlink(pathbak.c_str());
663 rename(path, pathbak.c_str());
664
665 stdio_file f(path, "w");
666 if (!f) {
667 pout("Cannot create state file \"%s\"\n", path);
668 return false;
669 }
670
671 fprintf(f, "# smartd state file\n");
672 write_dev_state_line(f, "temperature-min", state.tempmin);
673 write_dev_state_line(f, "temperature-max", state.tempmax);
674 write_dev_state_line(f, "self-test-errors", state.selflogcount);
675 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
676 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
677 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
678 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
679
680 int i;
681 for (i = 0; i < SMARTD_NMAIL; i++) {
682 if (i == MAILTYPE_TEST) // Don't suppress test mails
683 continue;
684 const mailinfo & mi = state.maillog[i];
685 if (!mi.logged)
686 continue;
687 write_dev_state_line(f, "mail", i, "count", mi.logged);
688 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
689 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
690 }
691
692 // ATA ONLY
693 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
694
695 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
696 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
697 if (!pa.id)
698 continue;
699 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
700 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
701 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
702 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
703 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
704 }
705
706 return true;
707 }
708
709 // Write to the attrlog file
710 static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
711 {
712 stdio_file f(path, "a");
713 if (!f) {
714 pout("Cannot create attribute log file \"%s\"\n", path);
715 return false;
716 }
717
718 // ATA ONLY
719 time_t now = time(0);
720 struct tm * tms = gmtime(&now);
721 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
722 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
723 tms->tm_hour, tms->tm_min, tms->tm_sec);
724 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
725 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
726 if (!pa.id)
727 continue;
728 fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
729 }
730 fprintf(f, "\n");
731
732 return true;
733 }
734
735 // Write all state files. If write_always is false, don't write
736 // unless must_write is set.
737 static void write_all_dev_states(const dev_config_vector & configs,
738 dev_state_vector & states,
739 bool write_always = true)
740 {
741 for (unsigned i = 0; i < states.size(); i++) {
742 const dev_config & cfg = configs.at(i);
743 if (cfg.state_file.empty())
744 continue;
745 dev_state & state = states[i];
746 if (!write_always && !state.must_write)
747 continue;
748 if (!write_dev_state(cfg.state_file.c_str(), state))
749 continue;
750 state.must_write = false;
751 if (write_always || debugmode)
752 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
753 cfg.name.c_str(), cfg.state_file.c_str());
754 }
755 }
756
757 // Write to all attrlog files
758 static void write_all_dev_attrlogs(const dev_config_vector & configs,
759 dev_state_vector & states)
760 {
761 for (unsigned i = 0; i < states.size(); i++) {
762 const dev_config & cfg = configs.at(i);
763 if (cfg.attrlog_file.empty())
764 continue;
765 dev_state & state = states[i];
766 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
767 }
768 }
769
770 // remove the PID file
771 static void RemovePidFile()
772 {
773 if (!pid_file.empty()) {
774 if (unlink(pid_file.c_str()))
775 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
776 pid_file.c_str(), strerror(errno));
777 pid_file.clear();
778 }
779 return;
780 }
781
782 extern "C" { // signal handlers require C-linkage
783
784 // Note if we catch a SIGUSR1
785 static void USR1handler(int sig)
786 {
787 if (SIGUSR1==sig)
788 caughtsigUSR1=1;
789 return;
790 }
791
792 #ifdef _WIN32
793 // Note if we catch a SIGUSR2
794 static void USR2handler(int sig)
795 {
796 if (SIGUSR2==sig)
797 caughtsigUSR2=1;
798 return;
799 }
800 #endif
801
802 // Note if we catch a HUP (or INT in debug mode)
803 static void HUPhandler(int sig)
804 {
805 if (sig==SIGHUP)
806 caughtsigHUP=1;
807 else
808 caughtsigHUP=2;
809 return;
810 }
811
812 // signal handler for TERM, QUIT, and INT (if not in debug mode)
813 static void sighandler(int sig)
814 {
815 if (!caughtsigEXIT)
816 caughtsigEXIT=sig;
817 return;
818 }
819
820 } // extern "C"
821
822 // Cleanup, print Goodbye message and remove pidfile
823 static int Goodbye(int status)
824 {
825 // delete PID file, if one was created
826 RemovePidFile();
827
828 // if we are exiting because of a code bug, tell user
829 if (status==EXIT_BADCODE)
830 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
831
832 // and this should be the final output from smartd before it exits
833 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
834
835 return status;
836 }
837
838 #define ENVLENGTH 1024
839
840 // a replacement for setenv() which is not available on all platforms.
841 // Note that the string passed to putenv must not be freed or made
842 // invalid, since a pointer to it is kept by putenv(). This means that
843 // it must either be a static buffer or allocated off the heap. The
844 // string can be freed if the environment variable is redefined or
845 // deleted via another call to putenv(). So we keep these on the stack
846 // as long as the popen() call is underway.
847 static int exportenv(char *stackspace, const char *name, const char *value)
848 {
849 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
850 return putenv(stackspace);
851 }
852
853 static char *dnsdomain(const char *hostname)
854 {
855 char *p = NULL;
856 #ifdef HAVE_GETADDRINFO
857 static char canon_name[NI_MAXHOST];
858 struct addrinfo *info = NULL;
859 struct addrinfo hints;
860 int err;
861
862 memset(&hints, 0, sizeof(hints));
863 hints.ai_flags = AI_CANONNAME;
864 if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
865 PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
866 return NULL;
867 }
868 if (info->ai_canonname) {
869 strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
870 canon_name[NI_MAXHOST - 1] = '\0';
871 p = canon_name;
872 if ((p = strchr(canon_name, '.')))
873 p++;
874 }
875 freeaddrinfo(info);
876 #elif HAVE_GETHOSTBYNAME
877 struct hostent *hp;
878 if ((hp = gethostbyname(hostname))) {
879 // Does this work if gethostbyname() returns an IPv6 name in
880 // colon/dot notation? [BA]
881 if ((p = strchr(hp->h_name, '.')))
882 p++; // skip "."
883 }
884 #else
885 ARGUSED(hostname);
886 #endif
887 return p;
888 }
889
890 #define EBUFLEN 1024
891
892 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
893 __attribute__ ((format (printf, 4, 5)));
894
895 // If either address or executable path is non-null then send and log
896 // a warning email, or execute executable
897 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
898 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
899 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
900 char environ_strings[11][ENVLENGTH];
901 time_t epoch;
902 va_list ap;
903 const int day=24*3600;
904 int days=0;
905 const char * const whichfail[]={
906 "EmailTest", // 0
907 "Health", // 1
908 "Usage", // 2
909 "SelfTest", // 3
910 "ErrorCount", // 4
911 "FailedHealthCheck", // 5
912 "FailedReadSmartData", // 6
913 "FailedReadSmartErrorLog", // 7
914 "FailedReadSmartSelfTestLog", // 8
915 "FailedOpenDevice", // 9
916 "CurrentPendingSector", // 10
917 "OfflineUncorrectableSector", // 11
918 "Temperature" // 12
919 };
920
921 const char *unknown="[Unknown]";
922
923 // See if user wants us to send mail
924 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
925 return;
926
927 std::string address = cfg.emailaddress;
928 const char * executable = cfg.emailcmdline.c_str();
929
930 // which type of mail are we sending?
931 mailinfo * mail=(state.maillog)+which;
932
933 // checks for sanity
934 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
935 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
936 return;
937 }
938 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
939 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
940 which, (int)sizeof(whichfail));
941 return;
942 }
943
944 // Return if a single warning mail has been sent.
945 if ((cfg.emailfreq==1) && mail->logged)
946 return;
947
948 // Return if this is an email test and one has already been sent.
949 if (which == 0 && mail->logged)
950 return;
951
952 // To decide if to send mail, we need to know what time it is.
953 epoch=time(NULL);
954
955 // Return if less than one day has gone by
956 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
957 return;
958
959 // Return if less than 2^(logged-1) days have gone by
960 if (cfg.emailfreq==3 && mail->logged) {
961 days=0x01<<(mail->logged-1);
962 days*=day;
963 if (epoch<(mail->lastsent+days))
964 return;
965 }
966
967 #ifdef HAVE_LIBCAP_NG
968 if (enable_capabilities) {
969 PrintOut(LOG_ERR, "Sending a mail was supressed. "
970 "Mails can't be send when capabilites are enabled\n");
971 return;
972 }
973 #endif
974
975 // record the time of this mail message, and the first mail message
976 if (!mail->logged)
977 mail->firstsent=epoch;
978 mail->lastsent=epoch;
979
980 // get system host & domain names (not null terminated if length=MAX)
981 #ifdef HAVE_GETHOSTNAME
982 if (gethostname(hostname, 256))
983 strcpy(hostname, unknown);
984 else {
985 char *p=NULL;
986 hostname[255]='\0';
987 p = dnsdomain(hostname);
988 if (p && *p) {
989 strncpy(domainname, p, 255);
990 domainname[255]='\0';
991 } else
992 strcpy(domainname, unknown);
993 }
994 #else
995 strcpy(hostname, unknown);
996 strcpy(domainname, unknown);
997 #endif
998
999 #ifdef HAVE_GETDOMAINNAME
1000 if (getdomainname(nisdomain, 256))
1001 strcpy(nisdomain, unknown);
1002 else
1003 nisdomain[255]='\0';
1004 #else
1005 strcpy(nisdomain, unknown);
1006 #endif
1007
1008 // print warning string into message
1009 va_start(ap, fmt);
1010 vsnprintf(message, 256, fmt, ap);
1011 va_end(ap);
1012
1013 // appropriate message about further information
1014 additional[0]=original[0]=further[0]='\0';
1015 if (which) {
1016 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
1017
1018 switch (cfg.emailfreq) {
1019 case 1:
1020 sprintf(additional,"No additional email messages about this problem will be sent.\n");
1021 break;
1022 case 2:
1023 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
1024 break;
1025 case 3:
1026 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
1027 (0x01)<<mail->logged);
1028 break;
1029 }
1030 if (cfg.emailfreq>1 && mail->logged) {
1031 dateandtimezoneepoch(dates, mail->firstsent);
1032 sprintf(original,"The original email about this issue was sent at %s\n", dates);
1033 }
1034 }
1035
1036 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
1037
1038 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1039 if (!*executable)
1040 #ifdef DEFAULT_MAILER
1041 executable = DEFAULT_MAILER ;
1042 #else
1043 #ifndef _WIN32
1044 executable = "mail";
1045 #else
1046 executable = "blat"; // http://blat.sourceforge.net/
1047 #endif
1048 #endif
1049
1050 #ifndef _WIN32 // blat mailer needs comma
1051 // replace commas by spaces to separate recipients
1052 std::replace(address.begin(), address.end(), ',', ' ');
1053 #endif
1054 // Export information in environment variables that will be useful
1055 // for user scripts
1056 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1057 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1058 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1059 dateandtimezoneepoch(dates, mail->firstsent);
1060 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1061 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1062 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1063 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1064 if (!address.empty())
1065 exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1066 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1067
1068 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1069 exportenv(environ_strings[8], "SMARTD_DEVICETYPE",
1070 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1071 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.dev_name.c_str());
1072
1073 snprintf(fullmessage, 1024,
1074 "This email was generated by the smartd daemon running on:\n\n"
1075 " host name: %s\n"
1076 " DNS domain: %s\n"
1077 " NIS domain: %s\n\n"
1078 "The following warning/error was logged by the smartd daemon:\n\n"
1079 "%s\n\n"
1080 "For details see host's SYSLOG.\n\n"
1081 "%s%s%s",
1082 hostname, domainname, nisdomain, message, further, original, additional);
1083 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1084
1085 // now construct a command to send this as EMAIL
1086 #ifndef _WIN32
1087 if (!address.empty())
1088 snprintf(command, 2048,
1089 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1090 "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1091 else
1092 snprintf(command, 2048, "%s 2>&1", executable);
1093
1094 // tell SYSLOG what we are about to do...
1095 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1096 const char * newwarn = (which? "Warning via" : "Test of");
1097
1098 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1099 which?"Sending warning via":"Executing test of", executable, newadd);
1100
1101 // issue the command to send mail or to run the user's executable
1102 errno=0;
1103 FILE * pfp;
1104 if (!(pfp=popen(command, "r")))
1105 // failed to popen() mail process
1106 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1107 newwarn, executable, newadd, errno?strerror(errno):"");
1108 else {
1109 // pipe suceeded!
1110 int len, status;
1111 char buffer[EBUFLEN];
1112
1113 // if unexpected output on stdout/stderr, null terminate, print, and flush
1114 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1115 int count=0;
1116 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1117 buffer[newlen]='\0';
1118 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1119 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1120
1121 // flush pipe if needed
1122 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1123 count++;
1124
1125 // tell user that pipe was flushed, or that something is really wrong
1126 if (count && count<EBUFLEN)
1127 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1128 newwarn, executable, newadd);
1129 else if (count)
1130 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1131 newwarn, executable, newadd);
1132 }
1133
1134 // if something went wrong with mail process, print warning
1135 errno=0;
1136 if (-1==(status=pclose(pfp)))
1137 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1138 errno?strerror(errno):"");
1139 else {
1140 // mail process apparently succeeded. Check and report exit status
1141 int status8;
1142
1143 if (WIFEXITED(status)) {
1144 // exited 'normally' (but perhaps with nonzero status)
1145 status8=WEXITSTATUS(status);
1146
1147 if (status8>128)
1148 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1149 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1150 else if (status8)
1151 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1152 newwarn, executable, newadd, status, status8);
1153 else
1154 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1155 }
1156
1157 if (WIFSIGNALED(status))
1158 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1159 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1160
1161 // this branch is probably not possible. If subprocess is
1162 // stopped then pclose() should not return.
1163 if (WIFSTOPPED(status))
1164 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1165 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1166
1167 }
1168 }
1169
1170 #else // _WIN32
1171
1172 // No "here-documents" on Windows, so must use separate commandline and stdin
1173 char stdinbuf[1024];
1174 command[0] = stdinbuf[0] = 0;
1175 int boxtype = -1, boxmsgoffs = 0;
1176 const char * newadd = "<nomailer>";
1177 if (!address.empty()) {
1178 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1179 char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1180 if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1181 if (!strcmp(addr1, "msgbox"))
1182 boxtype = 0;
1183 else if (!strcmp(addr1, "sysmsgbox"))
1184 boxtype = 1;
1185 if (boxtype >= 0)
1186 address.erase(0, (n2 > n1 ? n2 : n1));
1187 }
1188
1189 if (!address.empty()) {
1190 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1191 snprintf(command, sizeof(command),
1192 "%s - -q -subject \"%s\" -to \"%s\"",
1193 executable, subject, address.c_str());
1194 newadd = address.c_str();
1195 }
1196
1197 #ifdef _MSC_VER
1198 _set_printf_count_output(1); // "%n" disabled by default
1199 #endif
1200 // Message for mail [0...] and messagebox [boxmsgoffs...]
1201 snprintf(stdinbuf, sizeof(stdinbuf),
1202 "This email was generated by the smartd daemon running on:\n\n"
1203 " host name: %s\n"
1204 " DNS domain: %s\n"
1205 // " NIS domain: %s\n"
1206 "\n%n"
1207 "The following warning/error was logged by the smartd daemon:\n\n"
1208 "%s\n\n"
1209 "For details see the event log or log file of smartd.\n\n"
1210 "%s%s%s"
1211 "\n",
1212 hostname, /*domainname, */ nisdomain, &boxmsgoffs, message, further, original, additional);
1213 }
1214 else
1215 snprintf(command, sizeof(command), "%s", executable);
1216
1217 const char * newwarn = (which ? "Warning via" : "Test of");
1218 if (boxtype >= 0) {
1219 // show message box
1220 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1221 PrintOut(LOG_INFO,"%s message box\n", newwarn);
1222 }
1223 if (command[0]) {
1224 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1225 int rc;
1226 // run command
1227 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1228 (which?"Sending warning via":"Executing test of"), executable, newadd);
1229 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1230 if (rc >= 0 && stdoutbuf[0])
1231 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1232 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
1233 if (rc != 0)
1234 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1235 newwarn, executable, newadd, rc);
1236 else
1237 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1238 }
1239
1240 #endif // _WIN32
1241
1242 // increment mail sent counter
1243 mail->logged++;
1244 }
1245
1246 #ifndef _WIN32
1247
1248 // Output multiple lines via separate syslog(3) calls.
1249 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1250 {
1251 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1252 vsnprintf(buf, sizeof(buf), fmt, ap);
1253
1254 for (char * p = buf, * q; p && *p; p = q) {
1255 if ((q = strchr(p, '\n')))
1256 *q++ = 0;
1257 if (*p)
1258 syslog(priority, "%s\n", p);
1259 }
1260 }
1261
1262 #else // _WIN32
1263 // os_win32/syslog_win32.cpp supports multiple lines.
1264 #define vsyslog_lines vsyslog
1265 #endif // _WIN32
1266
1267 // Printing function for watching ataprint commands, or losing them
1268 // [From GLIBC Manual: Since the prototype doesn't specify types for
1269 // optional arguments, in a call to a variadic function the default
1270 // argument promotions are performed on the optional argument
1271 // values. This means the objects of type char or short int (whether
1272 // signed or not) are promoted to either int or unsigned int, as
1273 // appropriate.]
1274 void pout(const char *fmt, ...){
1275 va_list ap;
1276
1277 // get the correct time in syslog()
1278 FixGlibcTimeZoneBug();
1279 // initialize variable argument list
1280 va_start(ap,fmt);
1281 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1282 if (debugmode && debugmode!=2)
1283 #ifdef _WIN32
1284 if (facility == LOG_LOCAL1) // logging to stdout
1285 vfprintf(stderr,fmt,ap);
1286 else
1287 #endif
1288 vprintf(fmt,ap);
1289 // in debugmode==2 mode we print output from knowndrives.o functions
1290 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1291 openlog("smartd", LOG_PID, facility);
1292 vsyslog_lines(LOG_INFO, fmt, ap);
1293 closelog();
1294 }
1295 va_end(ap);
1296 fflush(NULL);
1297 return;
1298 }
1299
1300 // This function prints either to stdout or to the syslog as needed.
1301 static void PrintOut(int priority, const char *fmt, ...){
1302 va_list ap;
1303
1304 // get the correct time in syslog()
1305 FixGlibcTimeZoneBug();
1306 // initialize variable argument list
1307 va_start(ap,fmt);
1308 if (debugmode)
1309 #ifdef _WIN32
1310 if (facility == LOG_LOCAL1) // logging to stdout
1311 vfprintf(stderr,fmt,ap);
1312 else
1313 #endif
1314 vprintf(fmt,ap);
1315 else {
1316 openlog("smartd", LOG_PID, facility);
1317 vsyslog_lines(priority, fmt, ap);
1318 closelog();
1319 }
1320 va_end(ap);
1321 return;
1322 }
1323
1324 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1325 void checksumwarning(const char * string)
1326 {
1327 pout("Warning! %s error: invalid SMART checksum.\n", string);
1328 }
1329
1330 #ifndef _WIN32
1331
1332 // Wait for the pid file to show up, this makes sure a calling program knows
1333 // that the daemon is really up and running and has a pid to kill it
1334 static bool WaitForPidFile()
1335 {
1336 int waited, max_wait = 10;
1337 struct stat stat_buf;
1338
1339 if (pid_file.empty() || debugmode)
1340 return true;
1341
1342 for(waited = 0; waited < max_wait; ++waited) {
1343 if (!stat(pid_file.c_str(), &stat_buf)) {
1344 return true;
1345 } else
1346 sleep(1);
1347 }
1348 return false;
1349 }
1350
1351 #endif // _WIN32
1352
1353 // Forks new process, closes ALL file descriptors, redirects stdin,
1354 // stdout, and stderr. Not quite daemon(). See
1355 // http://www.linuxjournal.com/article/2335
1356 // for a good description of why we do things this way.
1357 static void DaemonInit()
1358 {
1359 #ifndef _WIN32
1360 pid_t pid;
1361 int i;
1362
1363 // flush all buffered streams. Else we might get two copies of open
1364 // streams since both parent and child get copies of the buffers.
1365 fflush(NULL);
1366
1367 if (do_fork) {
1368 if ((pid=fork()) < 0) {
1369 // unable to fork!
1370 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1371 EXIT(EXIT_STARTUP);
1372 }
1373 else if (pid) {
1374 // we are the parent process, wait for pid file, then exit cleanly
1375 if(!WaitForPidFile()) {
1376 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1377 EXIT(EXIT_STARTUP);
1378 } else
1379 EXIT(0);
1380 }
1381
1382 // from here on, we are the child process.
1383 setsid();
1384
1385 // Fork one more time to avoid any possibility of having terminals
1386 if ((pid=fork()) < 0) {
1387 // unable to fork!
1388 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1389 EXIT(EXIT_STARTUP);
1390 }
1391 else if (pid)
1392 // we are the parent process -- exit cleanly
1393 EXIT(0);
1394
1395 // Now we are the child's child...
1396 }
1397
1398 // close any open file descriptors
1399 for (i=getdtablesize();i>=0;--i)
1400 close(i);
1401
1402 #ifdef __CYGWIN__
1403 // Cygwin's setsid() does not detach the process from Windows console
1404 FreeConsole();
1405 #endif // __CYGWIN__
1406
1407 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1408
1409 // redirect any IO attempts to /dev/null for stdin
1410 i=open("/dev/null",O_RDWR);
1411 if (i>=0) {
1412 // stdout
1413 NO_warn_unused_result(dup(i));
1414 // stderr
1415 NO_warn_unused_result(dup(i));
1416 };
1417 umask(0022);
1418 NO_warn_unused_result(chdir("/"));
1419
1420 if (do_fork)
1421 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1422
1423 #else // _WIN32
1424
1425 // No fork() on native Win32
1426 // Detach this process from console
1427 fflush(NULL);
1428 if (daemon_detach("smartd")) {
1429 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1430 EXIT(EXIT_STARTUP);
1431 }
1432 // stdin/out/err now closed if not redirected
1433
1434 #endif // _WIN32
1435 return;
1436 }
1437
1438 // create a PID file containing the current process id
1439 static void WritePidFile()
1440 {
1441 if (!pid_file.empty()) {
1442 pid_t pid = getpid();
1443 mode_t old_umask;
1444 #ifndef __CYGWIN__
1445 old_umask = umask(0077); // rwx------
1446 #else
1447 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1448 old_umask = umask(0033); // rwxr--r--
1449 #endif
1450
1451 stdio_file f(pid_file.c_str(), "w");
1452 umask(old_umask);
1453 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1454 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1455 EXIT(EXIT_PID);
1456 }
1457 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1458 }
1459 }
1460
1461 // Prints header identifying version of code and home
1462 static void PrintHead()
1463 {
1464 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1465 }
1466
1467 // prints help info for configuration file Directives
1468 static void Directives()
1469 {
1470 PrintOut(LOG_INFO,
1471 "Configuration file (%s) Directives (after device name):\n"
1472 " -d TYPE Set the device type: %s, auto, removable\n"
1473 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1474 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1475 " -S VAL Enable/disable attribute autosave (on/off)\n"
1476 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1477 " -H Monitor SMART Health Status, report if failed\n"
1478 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1479 " -l TYPE Monitor SMART log. Type is one of: error, selftest, xerror\n"
1480 " -l scterc,R,W Set SCT Error Recovery Control\n"
1481 " -f Monitor 'Usage' Attributes, report failures\n"
1482 " -m ADD Send email warning to address ADD\n"
1483 " -M TYPE Modify email warning behavior (see man page)\n"
1484 " -p Report changes in 'Prefailure' Attributes\n"
1485 " -u Report changes in 'Usage' Attributes\n"
1486 " -t Equivalent to -p and -u Directives\n"
1487 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1488 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1489 " -i ID Ignore Attribute ID for -f Directive\n"
1490 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1491 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1492 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1493 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1494 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1495 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1496 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1497 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1498 " # Comment: text after a hash sign is ignored\n"
1499 " \\ Line continuation character\n"
1500 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1501 "Use ID = 0 to turn off -C and/or -U Directives\n"
1502 "Example: /dev/hda -a\n",
1503 configfile, smi()->get_valid_dev_types_str().c_str());
1504 return;
1505 }
1506
1507 /* Returns a pointer to a static string containing a formatted list of the valid
1508 arguments to the option opt or NULL on failure. */
1509 static const char *GetValidArgList(char opt)
1510 {
1511 switch (opt) {
1512 case 'A':
1513 case 's':
1514 return "<PATH_PREFIX>";
1515 case 'c':
1516 return "<FILE_NAME>, -";
1517 case 'l':
1518 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1519 case 'q':
1520 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1521 case 'r':
1522 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1523 case 'B':
1524 case 'p':
1525 return "<FILE_NAME>";
1526 case 'i':
1527 return "<INTEGER_SECONDS>";
1528 default:
1529 return NULL;
1530 }
1531 }
1532
1533 /* prints help information for command syntax */
1534 static void Usage()
1535 {
1536 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1537 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1538 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1539 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1540 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1541 #endif
1542 PrintOut(LOG_INFO,"\n");
1543 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1544 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1545 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1546 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1547 PrintOut(LOG_INFO,"\n");
1548 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1549 #endif
1550 PrintOut(LOG_INFO,"]\n\n");
1551 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1552 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1553 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1554 #ifdef HAVE_LIBCAP_NG
1555 PrintOut(LOG_INFO," -C, --capabilities\n");
1556 PrintOut(LOG_INFO," Use capabilities (EXPERIMENTAL).\n"
1557 " Warning: Mail notification does not work when used.\n\n");
1558 #endif
1559 PrintOut(LOG_INFO," -d, --debug\n");
1560 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1561 PrintOut(LOG_INFO," -D, --showdirectives\n");
1562 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1563 PrintOut(LOG_INFO," -h, --help, --usage\n");
1564 PrintOut(LOG_INFO," Display this help and exit\n\n");
1565 PrintOut(LOG_INFO," -i N, --interval=N\n");
1566 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1567 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1568 #ifndef _WIN32
1569 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1570 #else
1571 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1572 #endif
1573 #ifndef _WIN32
1574 PrintOut(LOG_INFO," -n, --no-fork\n");
1575 PrintOut(LOG_INFO," Do not fork into background\n\n");
1576 #endif // _WIN32
1577 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1578 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1579 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1580 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1581 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1582 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1583 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1584 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1585 #ifdef SMARTMONTOOLS_SAVESTATES
1586 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1587 #endif
1588 PrintOut(LOG_INFO,"\n");
1589 #ifdef _WIN32
1590 PrintOut(LOG_INFO," --service\n");
1591 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1592 PrintOut(LOG_INFO," smartd install [options]\n");
1593 PrintOut(LOG_INFO," Remove service with:\n");
1594 PrintOut(LOG_INFO," smartd remove\n\n");
1595 #endif // _WIN32
1596 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1597 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1598 }
1599
1600 static int CloseDevice(smart_device * device, const char * name)
1601 {
1602 if (!device->close()){
1603 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1604 return 1;
1605 }
1606 // device sucessfully closed
1607 return 0;
1608 }
1609
1610 // return true if a char is not allowed in a state file name
1611 static bool not_allowed_in_filename(char c)
1612 {
1613 return !( ('0' <= c && c <= '9')
1614 || ('A' <= c && c <= 'Z')
1615 || ('a' <= c && c <= 'z'));
1616 }
1617
1618 // Read error count from Summary or Extended Comprehensive SMART error log
1619 // Return -1 on error
1620 static int read_ata_error_count(ata_device * device, const char * name,
1621 unsigned char fix_firmwarebug, bool extended)
1622 {
1623 if (!extended) {
1624 ata_smart_errorlog log;
1625 if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1626 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1627 return -1;
1628 }
1629 return (log.error_log_pointer ? log.ata_error_count : 0);
1630 }
1631 else {
1632 ata_smart_exterrlog logx;
1633 if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/)) {
1634 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1635 return -1;
1636 }
1637 // Some disks use the reserved byte as index, see ataprint.cpp.
1638 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1639 }
1640 }
1641
1642 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1643 // error count, and top bits are the power-on hours of the last error.
1644 static int SelfTestErrorCount(ata_device * device, const char * name,
1645 unsigned char fix_firmwarebug)
1646 {
1647 struct ata_smart_selftestlog log;
1648
1649 if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1650 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1651 return -1;
1652 }
1653
1654 // return current number of self-test errors
1655 return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1656 }
1657
1658 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1659 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1660
1661 // Log self-test execution status
1662 static void log_self_test_exec_status(const char * name, unsigned char status)
1663 {
1664 const char * msg;
1665 switch (status >> 4) {
1666 case 0x0: msg = "completed without error"; break;
1667 case 0x1: msg = "was aborted by the host"; break;
1668 case 0x2: msg = "was interrupted by the host with a reset"; break;
1669 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1670 case 0x4: msg = "completed with error (unknown test element)"; break;
1671 case 0x5: msg = "completed with error (electrical test element)"; break;
1672 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1673 case 0x7: msg = "completed with error (read test element)"; break;
1674 case 0x8: msg = "completed with error (handling damage?)"; break;
1675 default: msg = 0;
1676 }
1677
1678 if (msg)
1679 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1680 "Device: %s, previous self-test %s\n", name, msg);
1681 else if ((status >> 4) == 0xf)
1682 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1683 name, status & 0x0f);
1684 else
1685 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1686 name, status);
1687 }
1688
1689 // Check pending sector count id (-C, -U directives).
1690 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1691 unsigned char id, const char * msg)
1692 {
1693 // Check attribute index
1694 int i = ata_find_attr_index(id, state.smartval);
1695 if (i < 0) {
1696 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1697 cfg.name.c_str(), msg, id);
1698 return false;
1699 }
1700
1701 // Check value
1702 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1703 cfg.attribute_defs);
1704 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1705 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1706 cfg.name.c_str(), msg, id, rawval, rawval);
1707 return false;
1708 }
1709
1710 return true;
1711 }
1712
1713 // Called by ATA/SCSIDeviceScan() after successful device check
1714 static void finish_device_scan(dev_config & cfg, dev_state & state)
1715 {
1716 // Set cfg.emailfreq if user hasn't set it
1717 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1718 // Avoid that emails are suppressed forever due to state persistence
1719 if (cfg.state_file.empty())
1720 cfg.emailfreq = 1; // '-M once'
1721 else
1722 cfg.emailfreq = 2; // '-M daily'
1723 }
1724
1725 // Start self-test regex check now if time was not read from state file
1726 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1727 state.scheduled_test_next_check = time(0);
1728 }
1729
1730
1731 // TODO: Add '-F swapid' directive
1732 const bool fix_swapped_id = false;
1733
1734 // scan to see what ata devices there are, and if they support SMART
1735 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1736 {
1737 int supported=0;
1738 struct ata_identify_device drive;
1739 const char *name = cfg.name.c_str();
1740 int retid;
1741
1742 // Device must be open
1743
1744 // Get drive identity structure
1745 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1746 if (retid<0)
1747 // Unable to read Identity structure
1748 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1749 else
1750 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1751 name, packetdevicetype(retid-1));
1752 CloseDevice(atadev, name);
1753 return 2;
1754 }
1755
1756 // Log drive identity and size
1757 char model[40+1], serial[20+1], firmware[8+1];
1758 ata_format_id_string(model, drive.model, sizeof(model)-1);
1759 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1760 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1761 state.num_sectors = get_num_sectors(&drive);
1762 PrintOut(LOG_INFO, "Device: %s, %s, S/N:%s, FW:%s, %"PRIu64" sectors\n", name,
1763 model, serial, firmware, state.num_sectors);
1764
1765 // Show if device in database, and use preset vendor attribute
1766 // options unless user has requested otherwise.
1767 if (cfg.ignorepresets)
1768 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1769 else {
1770 // Apply vendor specific presets, print warning if present
1771 const drive_settings * dbentry = lookup_drive_apply_presets(
1772 &drive, cfg.attribute_defs, cfg.fix_firmwarebug);
1773 if (!dbentry)
1774 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1775 else {
1776 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1777 if (*dbentry->warningmsg)
1778 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1779 }
1780 }
1781
1782 // Set default '-C 197[+]' if no '-C ID' is specified.
1783 if (!cfg.curr_pending_set)
1784 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1785 // Set default '-U 198[+]' if no '-U ID' is specified.
1786 if (!cfg.offl_pending_set)
1787 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1788
1789 // If requested, show which presets would be used for this drive
1790 if (cfg.showpresets) {
1791 int savedebugmode=debugmode;
1792 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1793 if (!debugmode)
1794 debugmode=2;
1795 show_presets(&drive);
1796 debugmode=savedebugmode;
1797 }
1798
1799 // see if drive supports SMART
1800 supported=ataSmartSupport(&drive);
1801 if (supported!=1) {
1802 if (supported==0)
1803 // drive does NOT support SMART
1804 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1805 else
1806 // can't tell if drive supports SMART
1807 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1808
1809 // should we proceed anyway?
1810 if (cfg.permissive) {
1811 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1812 }
1813 else {
1814 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1815 CloseDevice(atadev, name);
1816 return 2;
1817 }
1818 }
1819
1820 if (ataEnableSmart(atadev)) {
1821 // Enable SMART command has failed
1822 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1823 CloseDevice(atadev, name);
1824 return 2;
1825 }
1826
1827 // disable device attribute autosave...
1828 if (cfg.autosave==1) {
1829 if (ataDisableAutoSave(atadev))
1830 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1831 else
1832 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1833 }
1834
1835 // or enable device attribute autosave
1836 if (cfg.autosave==2) {
1837 if (ataEnableAutoSave(atadev))
1838 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1839 else
1840 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1841 }
1842
1843 // capability check: SMART status
1844 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1845 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1846 cfg.smartcheck = false;
1847 }
1848
1849 // capability check: Read smart values and thresholds. Note that
1850 // smart values are ALSO needed even if we ONLY want to know if the
1851 // device is self-test log or error-log capable! After ATA-5, this
1852 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1853 // but sadly not for ATA-5. Sigh.
1854
1855 // do we need to get SMART data?
1856 bool smart_val_ok = false;
1857 if ( cfg.autoofflinetest || cfg.selftest
1858 || cfg.errorlog || cfg.xerrorlog
1859 || cfg.usagefailed || cfg.prefail || cfg.usage
1860 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1861 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1862
1863 if (ataReadSmartValues(atadev, &state.smartval)) {
1864 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1865 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1866 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1867 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1868 }
1869 else {
1870 smart_val_ok = true;
1871 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1872 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1873 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1874 cfg.usagefailed = false;
1875 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1876 memset(&state.smartthres, 0, sizeof(state.smartthres));
1877 }
1878 }
1879
1880 // see if the necessary Attribute is there to monitor offline or
1881 // current pending sectors or temperature
1882 if ( cfg.curr_pending_id
1883 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1884 "Current_Pending_Sector"))
1885 cfg.curr_pending_id = 0;
1886
1887 if ( cfg.offl_pending_id
1888 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1889 "Offline_Uncorrectable"))
1890 cfg.offl_pending_id = 0;
1891
1892 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1893 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1894 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1895 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1896 }
1897 }
1898
1899 // enable/disable automatic on-line testing
1900 if (cfg.autoofflinetest) {
1901 // is this an enable or disable request?
1902 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1903 if (!smart_val_ok)
1904 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1905 else {
1906 // if command appears unsupported, issue a warning...
1907 if (!isSupportAutomaticTimer(&state.smartval))
1908 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1909 // ... but then try anyway
1910 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1911 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1912 else
1913 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1914 }
1915 }
1916
1917 // Read log directories if required for capability check
1918 ata_smart_log_directory smart_logdir, gp_logdir;
1919 bool smart_logdir_ok = false, gp_logdir_ok = false;
1920
1921 if ( isGeneralPurposeLoggingCapable(&drive)
1922 && (cfg.errorlog || cfg.selftest) ) {
1923 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1924 smart_logdir_ok = true;
1925 }
1926
1927 if (cfg.xerrorlog) {
1928 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1929 gp_logdir_ok = true;
1930 }
1931
1932 // capability check: self-test-log
1933 state.selflogcount = 0; state.selfloghour = 0;
1934 if (cfg.selftest) {
1935 int retval;
1936 if (!( cfg.permissive
1937 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
1938 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
1939 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
1940 cfg.selftest = false;
1941 }
1942 else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0) {
1943 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
1944 cfg.selftest = false;
1945 }
1946 else {
1947 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1948 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1949 }
1950 }
1951
1952 // capability check: ATA error log
1953 state.ataerrorcount = 0;
1954 if (cfg.errorlog) {
1955 int errcnt1;
1956 if (!( cfg.permissive
1957 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
1958 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
1959 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
1960 cfg.errorlog = false;
1961 }
1962 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false)) < 0) {
1963 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
1964 cfg.errorlog = false;
1965 }
1966 else
1967 state.ataerrorcount = errcnt1;
1968 }
1969
1970 if (cfg.xerrorlog) {
1971 int errcnt2;
1972 if (!(cfg.permissive || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors))) {
1973 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
1974 name);
1975 cfg.xerrorlog = false;
1976 }
1977 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true)) < 0) {
1978 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
1979 cfg.xerrorlog = false;
1980 }
1981 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
1982 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
1983 name, state.ataerrorcount, errcnt2);
1984 // Record max error count
1985 if (errcnt2 > state.ataerrorcount)
1986 state.ataerrorcount = errcnt2;
1987 }
1988 else
1989 state.ataerrorcount = errcnt2;
1990 }
1991
1992 // capabilities check -- does it support powermode?
1993 if (cfg.powermode) {
1994 int powermode = ataCheckPowerMode(atadev);
1995
1996 if (-1 == powermode) {
1997 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
1998 cfg.powermode=0;
1999 }
2000 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2001 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2002 name, powermode);
2003 cfg.powermode=0;
2004 }
2005 }
2006
2007 // set SCT Error Recovery Control if requested
2008 if (cfg.sct_erc_set) {
2009 if (!isSCTErrorRecoveryControlCapable(&drive))
2010 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2011 name);
2012 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2013 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2014 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2015 else
2016 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2017 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2018 }
2019
2020 // If no tests available or selected, return
2021 if (!( cfg.smartcheck || cfg.selftest
2022 || cfg.errorlog || cfg.xerrorlog
2023 || cfg.usagefailed || cfg.prefail || cfg.usage
2024 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2025 CloseDevice(atadev, name);
2026 return 3;
2027 }
2028
2029 // tell user we are registering device
2030 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2031
2032 // close file descriptor
2033 CloseDevice(atadev, name);
2034
2035 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2036 // Build file name for state file
2037 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2038 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2039 if (!state_path_prefix.empty()) {
2040 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2041 // Read previous state
2042 if (read_dev_state(cfg.state_file.c_str(), state)) {
2043 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2044 // Copy ATA attribute values to temp state
2045 state.update_temp_state();
2046 }
2047 }
2048 if (!attrlog_path_prefix.empty())
2049 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2050 }
2051
2052 finish_device_scan(cfg, state);
2053
2054 return 0;
2055 }
2056
2057 // on success, return 0. On failure, return >0. Never return <0,
2058 // please.
2059 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2060 {
2061 int k, err;
2062 const char *device = cfg.name.c_str();
2063 struct scsi_iec_mode_page iec;
2064 UINT8 tBuf[64];
2065
2066 // Device must be open
2067
2068 // check that device is ready for commands. IE stores its stuff on
2069 // the media.
2070 if ((err = scsiTestUnitReady(scsidev))) {
2071 if (SIMPLE_ERR_NOT_READY == err)
2072 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2073 else if (SIMPLE_ERR_NO_MEDIUM == err)
2074 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2075 else if (SIMPLE_ERR_BECOMING_READY == err)
2076 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2077 else
2078 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2079 CloseDevice(scsidev, device);
2080 return 2;
2081 }
2082
2083 // Badly-conforming USB storage devices may fail this check.
2084 // The response to the following IE mode page fetch (current and
2085 // changeable values) is carefully examined. It has been found
2086 // that various USB devices that malform the response will lock up
2087 // if asked for a log page (e.g. temperature) so it is best to
2088 // bail out now.
2089 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2090 state.modese_len = iec.modese_len;
2091 else if (SIMPLE_ERR_BAD_FIELD == err)
2092 ; /* continue since it is reasonable not to support IE mpage */
2093 else { /* any other error (including malformed response) unreasonable */
2094 PrintOut(LOG_INFO,
2095 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2096 device, err);
2097 CloseDevice(scsidev, device);
2098 return 3;
2099 }
2100
2101 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2102 // smart if it is off). This may change to be the same as the ATA side.
2103 if (!scsi_IsExceptionControlEnabled(&iec)) {
2104 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2105 "Try 'smartctl -s on %s' to turn on SMART features\n",
2106 device, device);
2107 CloseDevice(scsidev, device);
2108 return 3;
2109 }
2110
2111 // Flag that certain log pages are supported (information may be
2112 // available from other sources).
2113 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2114 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2115 switch (tBuf[k]) {
2116 case TEMPERATURE_LPAGE:
2117 state.TempPageSupported = 1;
2118 break;
2119 case IE_LPAGE:
2120 state.SmartPageSupported = 1;
2121 break;
2122 default:
2123 break;
2124 }
2125 }
2126 }
2127
2128 // Check if scsiCheckIE() is going to work
2129 {
2130 UINT8 asc = 0;
2131 UINT8 ascq = 0;
2132 UINT8 currenttemp = 0;
2133 UINT8 triptemp = 0;
2134
2135 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2136 &asc, &ascq, &currenttemp, &triptemp)) {
2137 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2138 state.SuppressReport = 1;
2139 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2140 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
2141 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2142 }
2143 }
2144 }
2145
2146 // capability check: self-test-log
2147 if (cfg.selftest){
2148 int retval = scsiCountFailedSelfTests(scsidev, 0);
2149 if (retval<0) {
2150 // no self-test log, turn off monitoring
2151 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2152 cfg.selftest = false;
2153 state.selflogcount = 0;
2154 state.selfloghour = 0;
2155 }
2156 else {
2157 // register starting values to watch for changes
2158 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2159 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2160 }
2161 }
2162
2163 // disable autosave (set GLTSD bit)
2164 if (cfg.autosave==1){
2165 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2166 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2167 else
2168 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2169 }
2170
2171 // or enable autosave (clear GLTSD bit)
2172 if (cfg.autosave==2){
2173 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2174 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2175 else
2176 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2177 }
2178
2179 // tell user we are registering device
2180 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2181
2182 // TODO: Build file name for state file
2183 if (!state_path_prefix.empty()) {
2184 PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
2185 }
2186 // TODO: Build file name for attribute log file
2187 if (!attrlog_path_prefix.empty()) {
2188 PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
2189 }
2190
2191 // close file descriptor
2192 CloseDevice(scsidev, device);
2193
2194 finish_device_scan(cfg, state);
2195
2196 return 0;
2197 }
2198
2199 // If the self-test log has got more self-test errors (or more recent
2200 // self-test errors) recorded, then notify user.
2201 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2202 {
2203 const char * name = cfg.name.c_str();
2204
2205 if (newi<0)
2206 // command failed
2207 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2208 else {
2209 // old and new error counts
2210 int oldc=state.selflogcount;
2211 int newc=SELFTEST_ERRORCOUNT(newi);
2212
2213 // old and new error timestamps in hours
2214 int oldh=state.selfloghour;
2215 int newh=SELFTEST_ERRORHOURS(newi);
2216
2217 if (oldc<newc) {
2218 // increase in error count
2219 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2220 name, oldc, newc);
2221 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2222 name, oldc, newc);
2223 state.must_write = true;
2224 }
2225 else if (newc > 0 && oldh != newh) {
2226 // more recent error
2227 // a 'more recent' error might actually be a smaller hour number,
2228 // if the hour number has wrapped.
2229 // There's still a bug here. You might just happen to run a new test
2230 // exactly 32768 hours after the previous failure, and have run exactly
2231 // 20 tests between the two, in which case smartd will miss the
2232 // new failure.
2233 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2234 name, newh);
2235 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2236 name, newh);
2237 state.must_write = true;
2238 }
2239
2240 // Print info if error entries have disappeared
2241 if (oldc > newc)
2242 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2243 name, oldc, newc);
2244
2245 // Needed since self-test error count may DECREASE. Hour might
2246 // also have changed.
2247 state.selflogcount= newc;
2248 state.selfloghour = newh;
2249 }
2250 return;
2251 }
2252
2253 // Test types, ordered by priority.
2254 static const char test_type_chars[] = "LncrSCO";
2255 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2256
2257 // returns test type if time to do test of type testtype,
2258 // 0 if not time to do test.
2259 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2260 {
2261 // check that self-testing has been requested
2262 if (cfg.test_regex.empty())
2263 return 0;
2264
2265 // Exit if drive not capable of any test
2266 if ( state.not_cap_long && state.not_cap_short &&
2267 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2268 return 0;
2269
2270 // since we are about to call localtime(), be sure glibc is informed
2271 // of any timezone changes we make.
2272 if (!usetime)
2273 FixGlibcTimeZoneBug();
2274
2275 // Is it time for next check?
2276 time_t now = (!usetime ? time(0) : usetime);
2277 if (now < state.scheduled_test_next_check)
2278 return 0;
2279
2280 // Limit time check interval to 90 days
2281 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2282 state.scheduled_test_next_check = now - (3600L*24*90);
2283
2284 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2285 char testtype = 0;
2286 time_t testtime = 0; int testhour = 0;
2287 int maxtest = num_test_types-1;
2288
2289 for (time_t t = state.scheduled_test_next_check; ; ) {
2290 struct tm * tms = localtime(&t);
2291 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2292 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2293 for (int i = 0; i <= maxtest; i++) {
2294 // Skip if drive not capable of this test
2295 switch (test_type_chars[i]) {
2296 case 'L': if (state.not_cap_long) continue; break;
2297 case 'S': if (state.not_cap_short) continue; break;
2298 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2299 case 'O': if (scsi || state.not_cap_offline) continue; break;
2300 case 'c': case 'n':
2301 case 'r': if (scsi || state.not_cap_selective) continue; break;
2302 default: continue;
2303 }
2304 // Try match of "T/MM/DD/d/HH"
2305 char pattern[16];
2306 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2307 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2308 if (cfg.test_regex.full_match(pattern)) {
2309 // Test found
2310 testtype = pattern[0];
2311 testtime = t; testhour = tms->tm_hour;
2312 // Limit further matches to higher priority self-tests
2313 maxtest = i-1;
2314 break;
2315 }
2316 }
2317 // Exit if no tests left or current time reached
2318 if (maxtest < 0)
2319 break;
2320 if (t >= now)
2321 break;
2322 // Check next hour
2323 if ((t += 3600) > now)
2324 t = now;
2325 }
2326
2327 // Do next check not before next hour.
2328 struct tm * tmnow = localtime(&now);
2329 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2330
2331 if (testtype) {
2332 state.must_write = true;
2333 // Tell user if an old test was found.
2334 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2335 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2336 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2337 cfg.name.c_str(), testtype, datebuf);
2338 }
2339 }
2340
2341 return testtype;
2342 }
2343
2344 // Print a list of future tests.
2345 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2346 {
2347 unsigned numdev = configs.size();
2348 if (!numdev)
2349 return;
2350 std::vector<int> testcnts(numdev * num_test_types, 0);
2351
2352 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2353
2354 // FixGlibcTimeZoneBug(); // done in PrintOut()
2355 time_t now = time(0);
2356 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2357 dateandtimezoneepoch(datenow, now);
2358
2359 long seconds;
2360 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2361 // Check for each device whether a test will be run
2362 time_t testtime = now + seconds;
2363 for (unsigned i = 0; i < numdev; i++) {
2364 const dev_config & cfg = configs.at(i);
2365 dev_state & state = states.at(i);
2366 const char * p;
2367 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2368 if (testtype && (p = strchr(test_type_chars, testtype))) {
2369 unsigned t = (p - test_type_chars);
2370 // Report at most 5 tests of each type
2371 if (++testcnts[i*num_test_types + t] <= 5) {
2372 dateandtimezoneepoch(date, testtime);
2373 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2374 testcnts[i*num_test_types + t], testtype, date);
2375 }
2376 }
2377 }
2378 }
2379
2380 // Report totals
2381 dateandtimezoneepoch(date, now+seconds);
2382 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2383 for (unsigned i = 0; i < numdev; i++) {
2384 const dev_config & cfg = configs.at(i);
2385 bool scsi = devices.at(i)->is_scsi();
2386 for (unsigned t = 0; t < num_test_types; t++) {
2387 int cnt = testcnts[i*num_test_types + t];
2388 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2389 continue;
2390 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2391 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2392 }
2393 }
2394
2395 }
2396
2397 // Return zero on success, nonzero on failure. Perform offline (background)
2398 // short or long (extended) self test on given scsi device.
2399 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2400 {
2401 int retval = 0;
2402 const char *testname = 0;
2403 const char *name = cfg.name.c_str();
2404 int inProgress;
2405
2406 if (scsiSelfTestInProgress(device, &inProgress)) {
2407 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2408 state.not_cap_short = state.not_cap_long = true;
2409 return 1;
2410 }
2411
2412 if (1 == inProgress) {
2413 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2414 "progress.\n", name);
2415 return 1;
2416 }
2417
2418 switch (testtype) {
2419 case 'S':
2420 testname = "Short Self";
2421 retval = scsiSmartShortSelfTest(device);
2422 break;
2423 case 'L':
2424 testname = "Long Self";
2425 retval = scsiSmartExtendSelfTest(device);
2426 break;
2427 }
2428 // If we can't do the test, exit
2429 if (NULL == testname) {
2430 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2431 testtype);
2432 return 1;
2433 }
2434 if (retval) {
2435 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2436 (SIMPLE_ERR_BAD_FIELD == retval)) {
2437 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2438 testname);
2439 if ('L'==testtype)
2440 state.not_cap_long = true;
2441 else
2442 state.not_cap_short = true;
2443
2444 return 1;
2445 }
2446 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2447 testname, retval);
2448 return 1;
2449 }
2450
2451 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2452
2453 return 0;
2454 }
2455
2456 // Do an offline immediate or self-test. Return zero on success,
2457 // nonzero on failure.
2458 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2459 {
2460 const char *name = cfg.name.c_str();
2461
2462 // Read current smart data and check status/capability
2463 struct ata_smart_values data;
2464 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2465 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2466 return 1;
2467 }
2468
2469 // Check for capability to do the test
2470 int dotest = -1, mode = 0;
2471 const char *testname = 0;
2472 switch (testtype) {
2473 case 'O':
2474 testname="Offline Immediate ";
2475 if (isSupportExecuteOfflineImmediate(&data))
2476 dotest=OFFLINE_FULL_SCAN;
2477 else
2478 state.not_cap_offline = true;
2479 break;
2480 case 'C':
2481 testname="Conveyance Self-";
2482 if (isSupportConveyanceSelfTest(&data))
2483 dotest=CONVEYANCE_SELF_TEST;
2484 else
2485 state.not_cap_conveyance = true;
2486 break;
2487 case 'S':
2488 testname="Short Self-";
2489 if (isSupportSelfTest(&data))
2490 dotest=SHORT_SELF_TEST;
2491 else
2492 state.not_cap_short = true;
2493 break;
2494 case 'L':
2495 testname="Long Self-";
2496 if (isSupportSelfTest(&data))
2497 dotest=EXTEND_SELF_TEST;
2498 else
2499 state.not_cap_long = true;
2500 break;
2501
2502 case 'c': case 'n': case 'r':
2503 testname = "Selective Self-";
2504 if (isSupportSelectiveSelfTest(&data)) {
2505 dotest = SELECTIVE_SELF_TEST;
2506 switch (testtype) {
2507 case 'c': mode = SEL_CONT; break;
2508 case 'n': mode = SEL_NEXT; break;
2509 case 'r': mode = SEL_REDO; break;
2510 }
2511 }
2512 else
2513 state.not_cap_selective = true;
2514 break;
2515 }
2516
2517 // If we can't do the test, exit
2518 if (dotest<0) {
2519 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2520 return 1;
2521 }
2522
2523 // If currently running a self-test, do not interrupt it to start another.
2524 if (15==(data.self_test_exec_status >> 4)) {
2525 if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2526 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2527 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2528 } else {
2529 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2530 name, testname, (int)(data.self_test_exec_status & 0x0f));
2531 return 1;
2532 }
2533 }
2534
2535 if (dotest == SELECTIVE_SELF_TEST) {
2536 // Set test span
2537 ata_selective_selftest_args selargs, prev_args;
2538 selargs.num_spans = 1;
2539 selargs.span[0].mode = mode;
2540 prev_args.num_spans = 1;
2541 prev_args.span[0].start = state.selective_test_last_start;
2542 prev_args.span[0].end = state.selective_test_last_end;
2543 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2544 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2545 return 1;
2546 }
2547 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2548 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2549 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2550 start, end, end - start + 1,
2551 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2552 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2553 state.selective_test_last_start = start;
2554 state.selective_test_last_end = end;
2555 }
2556
2557 // execute the test, and return status
2558 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2559 if (retval) {
2560 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2561 return retval;
2562 }
2563
2564 if (testtype != 'O')
2565 // Log next self-test execution status
2566 state.smartval.self_test_exec_status = 0xff;
2567
2568 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2569 return 0;
2570 }
2571
2572 // Check pending sector count attribute values (-C, -U directives).
2573 static void check_pending(const dev_config & cfg, dev_state & state,
2574 unsigned char id, bool increase_only,
2575 const ata_smart_values & smartval,
2576 int mailtype, const char * msg)
2577 {
2578 // Find attribute index
2579 int i = ata_find_attr_index(id, smartval);
2580 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2581 return;
2582
2583 // No report if no sectors pending.
2584 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2585 if (rawval == 0)
2586 return;
2587
2588 // If attribute is not reset, report only sector count increases.
2589 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2590 if (!(!increase_only || prev_rawval < rawval))
2591 return;
2592
2593 // Format message.
2594 std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2595 if (prev_rawval > 0 && rawval != prev_rawval)
2596 s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2597
2598 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2599 MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2600 state.must_write = true;
2601 }
2602
2603 // Format Temperature value
2604 static const char * fmt_temp(unsigned char x, char * buf)
2605 {
2606 if (!x) // unset
2607 strcpy(buf, "??");
2608 else
2609 sprintf(buf, "%u", x);
2610 return buf;
2611 }
2612
2613 // Check Temperature limits
2614 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2615 {
2616 if (!(0 < currtemp && currtemp < 255)) {
2617 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2618 return;
2619 }
2620
2621 // Update Max Temperature
2622 const char * minchg = "", * maxchg = "";
2623 if (currtemp > state.tempmax) {
2624 if (state.tempmax)
2625 maxchg = "!";
2626 state.tempmax = currtemp;
2627 state.must_write = true;
2628 }
2629
2630 char buf[20];
2631 if (!state.temperature) {
2632 // First check
2633 if (!state.tempmin || currtemp < state.tempmin)
2634 // Delay Min Temperature update by ~ 30 minutes.
2635 state.tempmin_delay = time(0) + CHECKTIME - 60;
2636 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2637 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2638 if (triptemp)
2639 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2640 state.temperature = currtemp;
2641 }
2642 else {
2643 if (state.tempmin_delay) {
2644 // End Min Temperature update delay if ...
2645 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2646 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2647 state.tempmin_delay = 0;
2648 if (!state.tempmin)
2649 state.tempmin = 255;
2650 }
2651 }
2652
2653 // Update Min Temperature
2654 if (!state.tempmin_delay && currtemp < state.tempmin) {
2655 state.tempmin = currtemp;
2656 state.must_write = true;
2657 if (currtemp != state.temperature)
2658 minchg = "!";
2659 }
2660
2661 // Track changes
2662 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2663 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2664 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2665 state.temperature = currtemp;
2666 }
2667 }
2668
2669 // Check limits
2670 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2671 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2672 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2673 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2674 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2675 }
2676 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2677 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2678 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2679 }
2680 }
2681
2682 // Check normalized and raw attribute values.
2683 static void check_attribute(const dev_config & cfg, dev_state & state,
2684 const ata_smart_attribute & attr,
2685 const ata_smart_attribute & prev,
2686 int attridx,
2687 const ata_smart_threshold_entry * thresholds)
2688 {
2689 // Check attribute and threshold
2690 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2691 if (attrstate == ATTRSTATE_NON_EXISTING)
2692 return;
2693
2694 // If requested, check for usage attributes that have failed.
2695 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2696 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2697 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2698 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2699 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2700 state.must_write = true;
2701 }
2702
2703 // Return if we're not tracking this type of attribute
2704 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2705 if (!( ( prefail && cfg.prefail)
2706 || (!prefail && cfg.usage )))
2707 return;
2708
2709 // Return if '-I ID' was specified
2710 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2711 return;
2712
2713 // Issue warning if they don't have the same ID in all structures.
2714 if (attr.id != prev.id) {
2715 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2716 cfg.name.c_str(), attr.id, prev.id);
2717 return;
2718 }
2719
2720 // Compare normalized values if valid.
2721 bool valchanged = false;
2722 if (attrstate > ATTRSTATE_NO_NORMVAL) {
2723 if (attr.current != prev.current)
2724 valchanged = true;
2725 }
2726
2727 // Compare raw values if requested.
2728 bool rawchanged = false;
2729 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2730 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2731 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2732 rawchanged = true;
2733 }
2734
2735 // Return if no change
2736 if (!(valchanged || rawchanged))
2737 return;
2738
2739 // Format value strings
2740 std::string currstr, prevstr;
2741 if (attrstate == ATTRSTATE_NO_NORMVAL) {
2742 // Print raw values only
2743 currstr = strprintf("%s (Raw)",
2744 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2745 prevstr = strprintf("%s (Raw)",
2746 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2747 }
2748 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2749 // Print normalized and raw values
2750 currstr = strprintf("%d [Raw %s]", attr.current,
2751 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2752 prevstr = strprintf("%d [Raw %s]", prev.current,
2753 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2754 }
2755 else {
2756 // Print normalized values only
2757 currstr = strprintf("%d", attr.current);
2758 prevstr = strprintf("%d", prev.current);
2759 }
2760
2761 // Format message
2762 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2763 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2764 ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
2765 prevstr.c_str(), currstr.c_str());
2766
2767 // Report this change as critical ?
2768 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2769 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2770 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2771 MailWarning(cfg, state, 2, "%s", msg.c_str());
2772 }
2773 else {
2774 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2775 }
2776 state.must_write = true;
2777 }
2778
2779
2780 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev, bool allow_selftests)
2781 {
2782 const char * name = cfg.name.c_str();
2783
2784 // If user has asked, test the email warning system
2785 if (cfg.emailtest)
2786 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2787
2788 // if we can't open device, fail gracefully rather than hard --
2789 // perhaps the next time around we'll be able to open it. ATAPI
2790 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2791 // given (see linux cdrom driver).
2792 if (!atadev->open()) {
2793 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2794 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2795 return 1;
2796 } else if (debugmode)
2797 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2798
2799 // user may have requested (with the -n Directive) to leave the disk
2800 // alone if it is in idle or sleeping mode. In this case check the
2801 // power mode and exit without check if needed
2802 if (cfg.powermode && !state.powermodefail) {
2803 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2804 const char * mode = 0;
2805 if (0 <= powermode && powermode < 0xff) {
2806 // wait for possible spin up and check again
2807 int powermode2;
2808 sleep(5);
2809 powermode2 = ataCheckPowerMode(atadev);
2810 if (powermode2 > powermode)
2811 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2812 powermode = powermode2;
2813 }
2814
2815 switch (powermode){
2816 case -1:
2817 // SLEEP
2818 mode="SLEEP";
2819 if (cfg.powermode>=1)
2820 dontcheck=1;
2821 break;
2822 case 0:
2823 // STANDBY
2824 mode="STANDBY";
2825 if (cfg.powermode>=2)
2826 dontcheck=1;
2827 break;
2828 case 0x80:
2829 // IDLE
2830 mode="IDLE";
2831 if (cfg.powermode>=3)
2832 dontcheck=1;
2833 break;
2834 case 0xff:
2835 // ACTIVE/IDLE
2836 mode="ACTIVE or IDLE";
2837 break;
2838 default:
2839 // UNKNOWN
2840 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2841 name, powermode);
2842 state.powermodefail = true;
2843 break;
2844 }
2845
2846 // if we are going to skip a check, return now
2847 if (dontcheck){
2848 // skip at most powerskipmax checks
2849 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2850 CloseDevice(atadev, name);
2851 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2852 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2853 state.powerskipcnt++;
2854 return 0;
2855 }
2856 else {
2857 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2858 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2859 }
2860 state.powerskipcnt = 0;
2861 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2862 }
2863 else if (state.powerskipcnt) {
2864 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2865 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2866 state.powerskipcnt = 0;
2867 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2868 }
2869 }
2870
2871 // check smart status
2872 if (cfg.smartcheck) {
2873 int status=ataSmartStatus2(atadev);
2874 if (status==-1){
2875 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2876 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
2877 state.must_write = true;
2878 }
2879 else if (status==1){
2880 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2881 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2882 state.must_write = true;
2883 }
2884 }
2885
2886 // Check everything that depends upon SMART Data (eg, Attribute values)
2887 if ( cfg.usagefailed || cfg.prefail || cfg.usage
2888 || cfg.curr_pending_id || cfg.offl_pending_id
2889 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || cfg.selftest) {
2890
2891 // Read current attribute values.
2892 ata_smart_values curval;
2893 if (ataReadSmartValues(atadev, &curval)){
2894 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2895 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
2896 state.must_write = true;
2897 }
2898 else {
2899 // look for current or offline pending sectors
2900 if (cfg.curr_pending_id)
2901 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
2902 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
2903 : "Total unreadable (pending) sectors" ));
2904
2905 if (cfg.offl_pending_id)
2906 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
2907 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
2908 : "Total offline uncorrectable sectors"));
2909
2910 // check temperature limits
2911 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2912 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
2913
2914 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
2915
2916 // look for failed usage attributes, or track usage or prefail attributes
2917 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
2918 check_attribute(cfg, state,
2919 curval.vendor_attributes[i],
2920 state.smartval.vendor_attributes[i],
2921 i, state.smartthres.thres_entries);
2922 }
2923
2924 if (cfg.selftest) {
2925 // Log changes of self-test execution status
2926 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
2927 || (!allow_selftests && curval.self_test_exec_status != 0x00) )
2928 log_self_test_exec_status(name, curval.self_test_exec_status);
2929 }
2930
2931 // Save the new values into *drive for the next time around
2932 state.smartval = curval;
2933 }
2934 }
2935 }
2936
2937 // check if number of selftest errors has increased (note: may also DECREASE)
2938 if (cfg.selftest)
2939 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
2940
2941 // check if number of ATA errors has increased
2942 if (cfg.errorlog || cfg.xerrorlog) {
2943
2944 int errcnt1 = -1, errcnt2 = -1;
2945 if (cfg.errorlog)
2946 errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false);
2947 if (cfg.xerrorlog)
2948 errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true);
2949
2950 // new number of errors is max of both logs
2951 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
2952
2953 // did command fail?
2954 if (newc<0)
2955 // lack of PrintOut here is INTENTIONAL
2956 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
2957
2958 // has error count increased?
2959 int oldc = state.ataerrorcount;
2960 if (newc>oldc){
2961 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
2962 name, oldc, newc);
2963 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
2964 name, oldc, newc);
2965 state.must_write = true;
2966 }
2967
2968 if (newc>=0)
2969 state.ataerrorcount=newc;
2970 }
2971
2972 // if the user has asked, and device is capable (or we're not yet
2973 // sure) check whether a self test should be done now.
2974 if (allow_selftests && !cfg.test_regex.empty()) {
2975 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
2976 if (testtype)
2977 DoATASelfTest(cfg, state, atadev, testtype);
2978 }
2979
2980 // Don't leave device open -- the OS/user may want to access it
2981 // before the next smartd cycle!
2982 CloseDevice(atadev, name);
2983
2984 // Copy ATA attribute values to persistent state
2985 state.update_persistent_state();
2986
2987 return 0;
2988 }
2989
2990 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
2991 {
2992 UINT8 asc, ascq;
2993 UINT8 currenttemp;
2994 UINT8 triptemp;
2995 const char * name = cfg.name.c_str();
2996 const char *cp;
2997
2998 // If the user has asked for it, test the email warning system
2999 if (cfg.emailtest)
3000 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3001
3002 // if we can't open device, fail gracefully rather than hard --
3003 // perhaps the next time around we'll be able to open it
3004 if (!scsidev->open()) {
3005 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3006 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3007 return 1;
3008 } else if (debugmode)
3009 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3010 currenttemp = 0;
3011 asc = 0;
3012 ascq = 0;
3013 if (!state.SuppressReport) {
3014 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3015 &asc, &ascq, &currenttemp, &triptemp)) {
3016 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3017 name);
3018 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3019 state.SuppressReport = 1;
3020 }
3021 }
3022 if (asc > 0) {
3023 cp = scsiGetIEString(asc, ascq);
3024 if (cp) {
3025 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3026 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3027 } else if (debugmode)
3028 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3029 name, (int)asc, (int)ascq);
3030 } else if (debugmode)
3031 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3032
3033 // check temperature limits
3034 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3035 CheckTemperature(cfg, state, currenttemp, triptemp);
3036
3037 // check if number of selftest errors has increased (note: may also DECREASE)
3038 if (cfg.selftest)
3039 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3040
3041 if (allow_selftests && !cfg.test_regex.empty()) {
3042 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3043 if (testtype)
3044 DoSCSISelfTest(cfg, state, scsidev, testtype);
3045 }
3046 CloseDevice(scsidev, name);
3047 return 0;
3048 }
3049
3050 // Checks the SMART status of all ATA and SCSI devices
3051 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3052 smart_device_list & devices, bool allow_selftests)
3053 {
3054 for (unsigned i = 0; i < configs.size(); i++) {
3055 const dev_config & cfg = configs.at(i);
3056 dev_state & state = states.at(i);
3057 smart_device * dev = devices.at(i);
3058 if (dev->is_ata())
3059 ATACheckDevice(cfg, state, dev->to_ata(), allow_selftests);
3060 else if (dev->is_scsi())
3061 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3062 }
3063 }
3064
3065 // Set if Initialize() was called
3066 static bool is_initialized = false;
3067
3068 // Does initialization right after fork to daemon mode
3069 static void Initialize(time_t *wakeuptime)
3070 {
3071 // Call Goodbye() on exit
3072 is_initialized = true;
3073
3074 // write PID file
3075 if (!debugmode)
3076 WritePidFile();
3077
3078 // install signal handlers. On Solaris, can't use signal() because
3079 // it resets the handler to SIG_DFL after each call. So use sigset()
3080 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3081
3082 // normal and abnormal exit
3083 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3084 SIGNALFN(SIGTERM, SIG_IGN);
3085 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3086 SIGNALFN(SIGQUIT, SIG_IGN);
3087
3088 // in debug mode, <CONTROL-C> ==> HUP
3089 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3090 SIGNALFN(SIGINT, SIG_IGN);
3091
3092 // Catch HUP and USR1
3093 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3094 SIGNALFN(SIGHUP, SIG_IGN);
3095 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3096 SIGNALFN(SIGUSR1, SIG_IGN);
3097 #ifdef _WIN32
3098 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3099 SIGNALFN(SIGUSR2, SIG_IGN);
3100 #endif
3101
3102 // initialize wakeup time to CURRENT time
3103 *wakeuptime=time(NULL);
3104
3105 return;
3106 }
3107
3108 #ifdef _WIN32
3109 // Toggle debug mode implemented for native windows only
3110 // (there is no easy way to reopen tty on *nix)
3111 static void ToggleDebugMode()
3112 {
3113 if (!debugmode) {
3114 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3115 if (!daemon_enable_console("smartd [Debug]")) {
3116 debugmode = 1;
3117 daemon_signal(SIGINT, HUPhandler);
3118 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3119 }
3120 else
3121 PrintOut(LOG_INFO,"enable console failed\n");
3122 }
3123 else if (debugmode == 1) {
3124 daemon_disable_console();
3125 debugmode = 0;
3126 daemon_signal(SIGINT, sighandler);
3127 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3128 }
3129 else
3130 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3131 }
3132 #endif
3133
3134 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3135 {
3136 // If past wake-up-time, compute next wake-up-time
3137 time_t timenow=time(NULL);
3138 while (wakeuptime<=timenow){
3139 int intervals=1+(timenow-wakeuptime)/checktime;
3140 wakeuptime+=intervals*checktime;
3141 }
3142
3143 // sleep until we catch SIGUSR1 or have completed sleeping
3144 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
3145
3146 // protect user again system clock being adjusted backwards
3147 if (wakeuptime>timenow+checktime){
3148 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3149 wakeuptime=timenow+checktime;
3150 }
3151
3152 // Exit sleep when time interval has expired or a signal is received
3153 sleep(wakeuptime-timenow);
3154
3155 #ifdef _WIN32
3156 // toggle debug mode?
3157 if (caughtsigUSR2) {
3158 ToggleDebugMode();
3159 caughtsigUSR2 = 0;
3160 }
3161 #endif
3162
3163 timenow=time(NULL);
3164 }
3165
3166 // if we caught a SIGUSR1 then print message and clear signal
3167 if (caughtsigUSR1){
3168 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3169 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3170 caughtsigUSR1=0;
3171 sigwakeup = true;
3172 }
3173
3174 // return adjusted wakeuptime
3175 return wakeuptime;
3176 }
3177
3178 // Print out a list of valid arguments for the Directive d
3179 static void printoutvaliddirectiveargs(int priority, char d)
3180 {
3181 switch (d) {
3182 case 'n':
3183 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3184 break;
3185 case 's':
3186 PrintOut(priority, "valid_regular_expression");
3187 break;
3188 case 'd':
3189 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3190 break;
3191 case 'T':
3192 PrintOut(priority, "normal, permissive");
3193 break;
3194 case 'o':
3195 case 'S':
3196 PrintOut(priority, "on, off");
3197 break;
3198 case 'l':
3199 PrintOut(priority, "error, selftest");
3200 break;
3201 case 'M':
3202 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3203 break;
3204 case 'v':
3205 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3206 break;
3207 case 'P':
3208 PrintOut(priority, "use, ignore, show, showall");
3209 break;
3210 case 'F':
3211 PrintOut(priority, "none, samsung, samsung2, samsung3");
3212 break;
3213 }
3214 }
3215
3216 // exits with an error message, or returns integer value of token
3217 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3218 int min, int max, char * suffix = 0)
3219 {
3220 // make sure argument is there
3221 if (!arg) {
3222 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3223 cfgfile, lineno, name, token, min, max);
3224 return -1;
3225 }
3226
3227 // get argument value (base 10), check that it's integer, and in-range
3228 char *endptr;
3229 int val = strtol(arg,&endptr,10);
3230
3231 // optional suffix present?
3232 if (suffix) {
3233 if (!strcmp(endptr, suffix))
3234 endptr += strlen(suffix);
3235 else
3236 *suffix = 0;
3237 }
3238
3239 if (!(!*endptr && min <= val && val <= max)) {
3240 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3241 cfgfile, lineno, name, token, arg, min, max);
3242 return -1;
3243 }
3244
3245 // all is well; return value
3246 return val;
3247 }
3248
3249
3250 // Get 1-3 small integer(s) for '-W' directive
3251 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3252 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3253 {
3254 unsigned v1 = 0, v2 = 0, v3 = 0;
3255 int n1 = -1, n2 = -1, n3 = -1, len;
3256 if (!arg) {
3257 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3258 cfgfile, lineno, name, token);
3259 return -1;
3260 }
3261
3262 len = strlen(arg);
3263 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3264 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3265 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3266 cfgfile, lineno, name, token, arg);
3267 return -1;
3268 }
3269 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3270 return 0;
3271 }
3272
3273
3274 // This function returns 1 if it has correctly parsed one token (and
3275 // any arguments), else zero if no tokens remain. It returns -1 if an
3276 // error was encountered.
3277 static int ParseToken(char * token, dev_config & cfg)
3278 {
3279 char sym;
3280 const char * name = cfg.name.c_str();
3281 int lineno=cfg.lineno;
3282 const char *delim = " \n\t";
3283 int badarg = 0;
3284 int missingarg = 0;
3285 const char *arg = 0;
3286
3287 // is the rest of the line a comment
3288 if (*token=='#')
3289 return 1;
3290
3291 // is the token not recognized?
3292 if (*token!='-' || strlen(token)!=2) {
3293 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3294 configfile, lineno, name, token);
3295 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3296 return -1;
3297 }
3298
3299 // token we will be parsing:
3300 sym=token[1];
3301
3302 // parse the token and swallow its argument
3303 int val;
3304 char plus[] = "+", excl[] = "!";
3305
3306 switch (sym) {
3307 case 'C':
3308 // monitor current pending sector count (default 197)
3309 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3310 return -1;
3311 cfg.curr_pending_id = (unsigned char)val;
3312 cfg.curr_pending_incr = (*plus == '+');
3313 cfg.curr_pending_set = true;
3314 break;
3315 case 'U':
3316 // monitor offline uncorrectable sectors (default 198)
3317 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3318 return -1;
3319 cfg.offl_pending_id = (unsigned char)val;
3320 cfg.offl_pending_incr = (*plus == '+');
3321 cfg.offl_pending_set = true;
3322 break;
3323 case 'T':
3324 // Set tolerance level for SMART command failures
3325 if ((arg = strtok(NULL, delim)) == NULL) {
3326 missingarg = 1;
3327 } else if (!strcmp(arg, "normal")) {
3328 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3329 // not on failure of an optional S.M.A.R.T. command.
3330 // This is the default so we don't need to actually do anything here.
3331 cfg.permissive = false;
3332 } else if (!strcmp(arg, "permissive")) {
3333 // Permissive mode; ignore errors from Mandatory SMART commands
3334 cfg.permissive = true;
3335 } else {
3336 badarg = 1;
3337 }
3338 break;
3339 case 'd':
3340 // specify the device type
3341 if ((arg = strtok(NULL, delim)) == NULL) {
3342 missingarg = 1;
3343 } else if (!strcmp(arg, "removable")) {
3344 cfg.removable = true;
3345 } else if (!strcmp(arg, "auto")) {
3346 cfg.dev_type = "";
3347 } else {
3348 cfg.dev_type = arg;
3349 }
3350 break;
3351 case 'F':
3352 // fix firmware bug
3353 if ((arg = strtok(NULL, delim)) == NULL) {
3354 missingarg = 1;
3355 } else if (!strcmp(arg, "none")) {
3356 cfg.fix_firmwarebug = FIX_NONE;
3357 } else if (!strcmp(arg, "samsung")) {
3358 cfg.fix_firmwarebug = FIX_SAMSUNG;
3359 } else if (!strcmp(arg, "samsung2")) {
3360 cfg.fix_firmwarebug = FIX_SAMSUNG2;
3361 } else if (!strcmp(arg, "samsung3")) {
3362 cfg.fix_firmwarebug = FIX_SAMSUNG3;
3363 } else {
3364 badarg = 1;
3365 }
3366 break;
3367 case 'H':
3368 // check SMART status
3369 cfg.smartcheck = true;
3370 break;
3371 case 'f':
3372 // check for failure of usage attributes
3373 cfg.usagefailed = true;
3374 break;
3375 case 't':
3376 // track changes in all vendor attributes
3377 cfg.prefail = true;
3378 cfg.usage = true;
3379 break;
3380 case 'p':
3381 // track changes in prefail vendor attributes
3382 cfg.prefail = true;
3383 break;
3384 case 'u':
3385 // track changes in usage vendor attributes
3386 cfg.usage = true;
3387 break;
3388 case 'l':
3389 // track changes in SMART logs
3390 if ((arg = strtok(NULL, delim)) == NULL) {
3391 missingarg = 1;
3392 } else if (!strcmp(arg, "selftest")) {
3393 // track changes in self-test log
3394 cfg.selftest = true;
3395 } else if (!strcmp(arg, "error")) {
3396 // track changes in ATA error log
3397 cfg.errorlog = true;
3398 } else if (!strcmp(arg, "xerror")) {
3399 // track changes in Extended Comprehensive SMART error log
3400 cfg.xerrorlog = true;
3401 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3402 // set SCT Error Recovery Control
3403 unsigned rt = ~0, wt = ~0; int nc = -1;
3404 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3405 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3406 cfg.sct_erc_set = true;
3407 cfg.sct_erc_readtime = rt;
3408 cfg.sct_erc_writetime = wt;
3409 }
3410 else
3411 badarg = 1;
3412 } else {
3413 badarg = 1;
3414 }
3415 break;
3416 case 'a':
3417 // monitor everything
3418 cfg.smartcheck = true;
3419 cfg.prefail = true;
3420 cfg.usagefailed = true;
3421 cfg.usage = true;
3422 cfg.selftest = true;
3423 cfg.errorlog = true;
3424 break;
3425 case 'o':
3426 // automatic offline testing enable/disable
3427 if ((arg = strtok(NULL, delim)) == NULL) {
3428 missingarg = 1;
3429 } else if (!strcmp(arg, "on")) {
3430 cfg.autoofflinetest = 2;
3431 } else if (!strcmp(arg, "off")) {
3432 cfg.autoofflinetest = 1;
3433 } else {
3434 badarg = 1;
3435 }
3436 break;
3437 case 'n':
3438 // skip disk check if in idle or standby mode
3439 if (!(arg = strtok(NULL, delim)))
3440 missingarg = 1;
3441 else {
3442 char *endptr = NULL;
3443 char *next = strchr(const_cast<char*>(arg), ',');
3444
3445 cfg.powerquiet = false;
3446 cfg.powerskipmax = 0;
3447
3448 if (next!=NULL) *next='\0';
3449 if (!strcmp(arg, "never"))
3450 cfg.powermode = 0;
3451 else if (!strcmp(arg, "sleep"))
3452 cfg.powermode = 1;
3453 else if (!strcmp(arg, "standby"))
3454 cfg.powermode = 2;
3455 else if (!strcmp(arg, "idle"))
3456 cfg.powermode = 3;
3457 else
3458 badarg = 1;
3459
3460 // if optional arguments are present
3461 if (!badarg && next!=NULL) {
3462 next++;
3463 cfg.powerskipmax = strtol(next, &endptr, 10);
3464 if (endptr == next)
3465 cfg.powerskipmax = 0;
3466 else {
3467 next = endptr + (*endptr != '\0');
3468 if (cfg.powerskipmax <= 0)
3469 badarg = 1;
3470 }
3471 if (*next != '\0') {
3472 if (!strcmp("q", next))
3473 cfg.powerquiet = true;
3474 else {
3475 badarg = 1;
3476 }
3477 }
3478 }
3479 }
3480 break;
3481 case 'S':
3482 // automatic attribute autosave enable/disable
3483 if ((arg = strtok(NULL, delim)) == NULL) {
3484 missingarg = 1;
3485 } else if (!strcmp(arg, "on")) {
3486 cfg.autosave = 2;
3487 } else if (!strcmp(arg, "off")) {
3488 cfg.autosave = 1;
3489 } else {
3490 badarg = 1;
3491 }
3492 break;
3493 case 's':
3494 // warn user, and delete any previously given -s REGEXP Directives
3495 if (!cfg.test_regex.empty()){
3496 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3497 configfile, lineno, name, cfg.test_regex.get_pattern());
3498 cfg.test_regex = regular_expression();
3499 }
3500 // check for missing argument
3501 if (!(arg = strtok(NULL, delim))) {
3502 missingarg = 1;
3503 }
3504 // Compile regex
3505 else {
3506 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3507 // not a valid regular expression!
3508 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3509 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3510 return -1;
3511 }
3512 }
3513 // Do a bit of sanity checking and warn user if we think that
3514 // their regexp is "strange". User probably confused about shell
3515 // glob(3) syntax versus regular expression syntax regexp(7).
3516 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3517 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3518 configfile, lineno, name, val+1, arg[val], arg);
3519 break;
3520 case 'm':
3521 // send email to address that follows
3522 if (!(arg = strtok(NULL,delim)))
3523 missingarg = 1;
3524 else {
3525 if (!cfg.emailaddress.empty())
3526 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3527 configfile, lineno, name, cfg.emailaddress.c_str());
3528 cfg.emailaddress = arg;
3529 }
3530 break;
3531 case 'M':
3532 // email warning options
3533 if (!(arg = strtok(NULL, delim)))
3534 missingarg = 1;
3535 else if (!strcmp(arg, "once"))
3536 cfg.emailfreq = 1;
3537 else if (!strcmp(arg, "daily"))
3538 cfg.emailfreq = 2;
3539 else if (!strcmp(arg, "diminishing"))
3540 cfg.emailfreq = 3;
3541 else if (!strcmp(arg, "test"))
3542 cfg.emailtest = 1;
3543 else if (!strcmp(arg, "exec")) {
3544 // Get the next argument (the command line)
3545 if (!(arg = strtok(NULL, delim))) {
3546 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3547 configfile, lineno, name, token);
3548 return -1;
3549 }
3550 // Free the last cmd line given if any, and copy new one
3551 if (!cfg.emailcmdline.empty())
3552 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3553 configfile, lineno, name, cfg.emailcmdline.c_str());
3554 cfg.emailcmdline = arg;
3555 }
3556 else
3557 badarg = 1;
3558 break;
3559 case 'i':
3560 // ignore failure of usage attribute
3561 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3562 return -1;
3563 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3564 break;
3565 case 'I':
3566 // ignore attribute for tracking purposes
3567 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3568 return -1;
3569 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3570 break;
3571 case 'r':
3572 // print raw value when tracking
3573 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3574 return -1;
3575 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3576 if (*excl == '!') // attribute change is critical
3577 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3578 break;
3579 case 'R':
3580 // track changes in raw value (forces printing of raw value)
3581 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3582 return -1;
3583 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3584 if (*excl == '!') // raw value change is critical
3585 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3586 break;
3587 case 'W':
3588 // track Temperature
3589 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3590 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3591 return -1;
3592 break;
3593 case 'v':
3594 // non-default vendor-specific attribute meaning
3595 if (!(arg=strtok(NULL,delim))) {
3596 missingarg = 1;
3597 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3598 badarg = 1;
3599 }
3600 break;
3601 case 'P':
3602 // Define use of drive-specific presets.
3603 if (!(arg = strtok(NULL, delim))) {
3604 missingarg = 1;
3605 } else if (!strcmp(arg, "use")) {
3606 cfg.ignorepresets = false;
3607 } else if (!strcmp(arg, "ignore")) {
3608 cfg.ignorepresets = true;
3609 } else if (!strcmp(arg, "show")) {
3610 cfg.showpresets = true;
3611 } else if (!strcmp(arg, "showall")) {
3612 showallpresets();
3613 } else {
3614 badarg = 1;
3615 }
3616 break;
3617 default:
3618 // Directive not recognized
3619 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3620 configfile, lineno, name, token);
3621 Directives();
3622 return -1;
3623 }
3624 if (missingarg) {
3625 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3626 configfile, lineno, name, token);
3627 }
3628 if (badarg) {
3629 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3630 configfile, lineno, name, token, arg);
3631 }
3632 if (missingarg || badarg) {
3633 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3634 printoutvaliddirectiveargs(LOG_CRIT, sym);
3635 PrintOut(LOG_CRIT, "\n");
3636 return -1;
3637 }
3638
3639 return 1;
3640 }
3641
3642 // Scan directive for configuration file
3643 #define SCANDIRECTIVE "DEVICESCAN"
3644
3645 // This is the routine that adds things to the conf_entries list.
3646 //
3647 // Return values are:
3648 // 1: parsed a normal line
3649 // 0: found comment or blank line
3650 // -1: found SCANDIRECTIVE line
3651 // -2: found an error
3652 //
3653 // Note: this routine modifies *line from the caller!
3654 static int ParseConfigLine(dev_config_vector & conf_entries, int /*entry*/, int lineno, /*const*/ char * line)
3655 {
3656 char *token=NULL;
3657 char *name=NULL;
3658 const char *delim = " \n\t";
3659 int devscan=0;
3660
3661 // get first token: device name. If a comment, skip line
3662 if (!(name=strtok(line,delim)) || *name=='#') {
3663 return 0;
3664 }
3665
3666 // Have we detected the SCANDIRECTIVE directive?
3667 if (!strcmp(SCANDIRECTIVE,name)){
3668 devscan=1;
3669 }
3670
3671 // We've got a legit entry, make space to store it
3672 conf_entries.push_back( dev_config() );
3673 dev_config & cfg = conf_entries.back();
3674
3675 cfg.name = name; // Later replaced by dev->get_info().info_name
3676 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
3677
3678 // Store line number, and by default check for both device types.
3679 cfg.lineno=lineno;
3680
3681 // parse tokens one at a time from the file.
3682 while ((token=strtok(NULL,delim))){
3683 int retval=ParseToken(token,cfg);
3684
3685 if (retval==0)
3686 // No tokens left:
3687 break;
3688
3689 if (retval>0) {
3690 // Parsed token
3691 #if (0)
3692 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3693 #endif
3694 continue;
3695 }
3696
3697 if (retval<0) {
3698 // error found on the line
3699 return -2;
3700 }
3701 }
3702
3703 // If NO monitoring directives are set, then set all of them.
3704 if (!( cfg.smartcheck || cfg.selftest
3705 || cfg.errorlog || cfg.xerrorlog
3706 || cfg.usagefailed || cfg.prefail || cfg.usage
3707 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
3708
3709 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3710 cfg.name.c_str(), cfg.lineno, configfile);
3711
3712 cfg.smartcheck = true;
3713 cfg.usagefailed = true;
3714 cfg.prefail = true;
3715 cfg.usage = true;
3716 cfg.selftest = true;
3717 cfg.errorlog = true;
3718 }
3719
3720 // additional sanity check. Has user set -M options without -m?
3721 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3722 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3723 cfg.name.c_str(), cfg.lineno, configfile);
3724 return -2;
3725 }
3726
3727 // has the user has set <nomailer>?
3728 if (cfg.emailaddress == "<nomailer>") {
3729 // check that -M exec is also set
3730 if (cfg.emailcmdline.empty()){
3731 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3732 cfg.name.c_str(), cfg.lineno, configfile);
3733 return -2;
3734 }
3735 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3736 cfg.emailaddress.clear();
3737 }
3738
3739 if (devscan)
3740 return -1;
3741 else
3742 return 1;
3743 }
3744
3745 // Parses a configuration file. Return values are:
3746 // N=>0: found N entries
3747 // -1: syntax error in config file
3748 // -2: config file does not exist
3749 // -3: config file exists but cannot be read
3750 //
3751 // In the case where the return value is 0, there are three
3752 // possiblities:
3753 // Empty configuration file ==> conf_entries.empty()
3754 // No configuration file ==> conf_entries[0].lineno == 0
3755 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
3756 static int ParseConfigFile(dev_config_vector & conf_entries)
3757 {
3758 // maximum line length in configuration file
3759 const int MAXLINELEN = 256;
3760 // maximum length of a continued line in configuration file
3761 const int MAXCONTLINE = 1023;
3762
3763 stdio_file f;
3764 // Open config file, if it exists and is not <stdin>
3765 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3766 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3767 // file exists but we can't read it or it should exist due to '-c' option
3768 int ret = (errno!=ENOENT ? -3 : -2);
3769 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3770 strerror(errno),configfile);
3771 return ret;
3772 }
3773 }
3774 else // read from stdin ('-c -' option)
3775 f.open(stdin);
3776
3777 // No configuration file found -- use fake one
3778 int entry = 0;
3779 if (!f) {
3780 char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3781
3782 if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3783 throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3784 return 0;
3785 }
3786
3787 #ifdef __CYGWIN__
3788 setmode(fileno(f), O_TEXT); // Allow files with \r\n
3789 #endif
3790
3791 // configuration file exists
3792 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3793
3794 // parse config file line by line
3795 int lineno = 1, cont = 0, contlineno = 0;
3796 char line[MAXLINELEN+2];
3797 char fullline[MAXCONTLINE+1];
3798
3799 for (;;) {
3800 int len=0,scandevice;
3801 char *lastslash;
3802 char *comment;
3803 char *code;
3804
3805 // make debugging simpler
3806 memset(line,0,sizeof(line));
3807
3808 // get a line
3809 code=fgets(line, MAXLINELEN+2, f);
3810
3811 // are we at the end of the file?
3812 if (!code){
3813 if (cont) {
3814 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3815 // See if we found a SCANDIRECTIVE directive
3816 if (scandevice==-1)
3817 return 0;
3818 // did we find a syntax error
3819 if (scandevice==-2)
3820 return -1;
3821 // the final line is part of a continuation line
3822 cont=0;
3823 entry+=scandevice;
3824 }
3825 break;
3826 }
3827
3828 // input file line number
3829 contlineno++;
3830
3831 // See if line is too long
3832 len=strlen(line);
3833 if (len>MAXLINELEN){
3834 const char *warn;
3835 if (line[len-1]=='\n')
3836 warn="(including newline!) ";
3837 else
3838 warn="";
3839 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3840 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3841 return -1;
3842 }
3843
3844 // Ignore anything after comment symbol
3845 if ((comment=strchr(line,'#'))){
3846 *comment='\0';
3847 len=strlen(line);
3848 }
3849
3850 // is the total line (made of all continuation lines) too long?
3851 if (cont+len>MAXCONTLINE){
3852 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3853 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3854 return -1;
3855 }
3856
3857 // copy string so far into fullline, and increment length
3858 strcpy(fullline+cont,line);
3859 cont+=len;
3860
3861 // is this a continuation line. If so, replace \ by space and look at next line
3862 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3863 *(fullline+(cont-len)+(lastslash-line))=' ';
3864 continue;
3865 }
3866
3867 // Not a continuation line. Parse it
3868 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3869
3870 // did we find a scandevice directive?
3871 if (scandevice==-1)
3872 return 0;
3873 // did we find a syntax error
3874 if (scandevice==-2)
3875 return -1;
3876
3877 entry+=scandevice;
3878 lineno++;
3879 cont=0;
3880 }
3881
3882 // note -- may be zero if syntax of file OK, but no valid entries!
3883 return entry;
3884 }
3885
3886 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3887 <LIST> is the list of valid arguments for option opt. */
3888 static void PrintValidArgs(char opt)
3889 {
3890 const char *s;
3891
3892 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3893 if (!(s = GetValidArgList(opt)))
3894 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3895 else
3896 PrintOut(LOG_CRIT, "%s", (char *)s);
3897 PrintOut(LOG_CRIT, " <=======\n");
3898 }
3899
3900 // Return true if absolute path name
3901 static bool is_abs_path(const char * path)
3902 {
3903 if (*path == '/')
3904 return true;
3905 #if defined(_WIN32) || defined(__CYGWIN__)
3906 if (*path == '\\')
3907 return true;
3908 int n = -1;
3909 sscanf(path, "%*1[A-Za-z]:%*1[/\\]%n", &n);
3910 if (n > 0)
3911 return true;
3912 #endif
3913 return false;
3914 }
3915
3916 // Parses input line, prints usage message and
3917 // version/license/copyright messages
3918 static void ParseOpts(int argc, char **argv)
3919 {
3920 // Init default configfile path
3921 #ifndef _WIN32
3922 configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
3923 #else
3924 static std::string configfile_str = get_exe_dir() + "/smartd.conf";
3925 configfile = configfile_str.c_str();
3926 #endif
3927
3928 // Please update GetValidArgList() if you edit shortopts
3929 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
3930 #ifdef HAVE_LIBCAP_NG
3931 "C"
3932 #endif
3933 ;
3934 // Please update GetValidArgList() if you edit longopts
3935 struct option longopts[] = {
3936 { "configfile", required_argument, 0, 'c' },
3937 { "logfacility", required_argument, 0, 'l' },
3938 { "quit", required_argument, 0, 'q' },
3939 { "debug", no_argument, 0, 'd' },
3940 { "showdirectives", no_argument, 0, 'D' },
3941 { "interval", required_argument, 0, 'i' },
3942 #ifndef _WIN32
3943 { "no-fork", no_argument, 0, 'n' },
3944 #endif
3945 { "pidfile", required_argument, 0, 'p' },
3946 { "report", required_argument, 0, 'r' },
3947 { "savestates", required_argument, 0, 's' },
3948 { "attributelog", required_argument, 0, 'A' },
3949 { "drivedb", required_argument, 0, 'B' },
3950 #if defined(_WIN32) || defined(__CYGWIN__)
3951 { "service", no_argument, 0, 'n' },
3952 #endif
3953 { "version", no_argument, 0, 'V' },
3954 { "license", no_argument, 0, 'V' },
3955 { "copyright", no_argument, 0, 'V' },
3956 { "help", no_argument, 0, 'h' },
3957 { "usage", no_argument, 0, 'h' },
3958 #ifdef HAVE_LIBCAP_NG
3959 { "capabilities", no_argument, 0, 'C' },
3960 #endif
3961 { 0, 0, 0, 0 }
3962 };
3963
3964 opterr=optopt=0;
3965 bool badarg = false;
3966 bool no_defaultdb = false; // set true on '-B FILE'
3967
3968 // Parse input options.
3969 int optchar;
3970 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
3971 char *arg;
3972 char *tailptr;
3973 long lchecktime;
3974
3975 switch(optchar) {
3976 case 'q':
3977 // when to quit
3978 if (!(strcmp(optarg,"nodev"))) {
3979 quit=0;
3980 } else if (!(strcmp(optarg,"nodevstartup"))) {
3981 quit=1;
3982 } else if (!(strcmp(optarg,"never"))) {
3983 quit=2;
3984 } else if (!(strcmp(optarg,"onecheck"))) {
3985 quit=3;
3986 debugmode=1;
3987 } else if (!(strcmp(optarg,"showtests"))) {
3988 quit=4;
3989 debugmode=1;
3990 } else if (!(strcmp(optarg,"errors"))) {
3991 quit=5;
3992 } else {
3993 badarg = true;
3994 }
3995 break;
3996 case 'l':
3997 // set the log facility level
3998 if (!strcmp(optarg, "daemon"))
3999 facility=LOG_DAEMON;
4000 else if (!strcmp(optarg, "local0"))
4001 facility=LOG_LOCAL0;
4002 else if (!strcmp(optarg, "local1"))
4003 facility=LOG_LOCAL1;
4004 else if (!strcmp(optarg, "local2"))
4005 facility=LOG_LOCAL2;
4006 else if (!strcmp(optarg, "local3"))
4007 facility=LOG_LOCAL3;
4008 else if (!strcmp(optarg, "local4"))
4009 facility=LOG_LOCAL4;
4010 else if (!strcmp(optarg, "local5"))
4011 facility=LOG_LOCAL5;
4012 else if (!strcmp(optarg, "local6"))
4013 facility=LOG_LOCAL6;
4014 else if (!strcmp(optarg, "local7"))
4015 facility=LOG_LOCAL7;
4016 else
4017 badarg = true;
4018 break;
4019 case 'd':
4020 // enable debug mode
4021 debugmode = 1;
4022 break;
4023 case 'n':
4024 // don't fork()
4025 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4026 do_fork = false;
4027 #endif
4028 break;
4029 case 'D':
4030 // print summary of all valid directives
4031 debugmode = 1;
4032 Directives();
4033 EXIT(0);
4034 break;
4035 case 'i':
4036 // Period (time interval) for checking
4037 // strtol will set errno in the event of overflow, so we'll check it.
4038 errno = 0;
4039 lchecktime = strtol(optarg, &tailptr, 10);
4040 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4041 debugmode=1;
4042 PrintHead();
4043 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4044 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4045 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4046 EXIT(EXIT_BADCMD);
4047 }
4048 checktime = (int)lchecktime;
4049 break;
4050 case 'r':
4051 // report IOCTL transactions
4052 {
4053 int i;
4054 char *s;
4055
4056 // split_report_arg() may modify its first argument string, so use a
4057 // copy of optarg in case we want optarg for an error message.
4058 if (!(s = strdup(optarg))) {
4059 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4060 EXIT(EXIT_NOMEM);
4061 }
4062 if (split_report_arg(s, &i)) {
4063 badarg = true;
4064 } else if (i<1 || i>3) {
4065 debugmode=1;
4066 PrintHead();
4067 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4068 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4069 EXIT(EXIT_BADCMD);
4070 } else if (!strcmp(s,"ioctl")) {
4071 ata_debugmode = scsi_debugmode = i;
4072 } else if (!strcmp(s,"ataioctl")) {
4073 ata_debugmode = i;
4074 } else if (!strcmp(s,"scsiioctl")) {
4075 scsi_debugmode = i;
4076 } else {
4077 badarg = true;
4078 }
4079 free(s); // TODO: use std::string
4080 }
4081 break;
4082 case 'c':
4083 // alternate configuration file
4084 if (strcmp(optarg,"-"))
4085 configfile = (configfile_alt = optarg).c_str();
4086 else // read from stdin
4087 configfile=configfile_stdin;
4088 break;
4089 case 'p':
4090 // output file with PID number
4091 pid_file = optarg;
4092 break;
4093 case 's':
4094 // path prefix of persistent state file
4095 state_path_prefix = optarg;
4096 break;
4097 case 'A':
4098 // path prefix of attribute log file
4099 attrlog_path_prefix = optarg;
4100 break;
4101 case 'B':
4102 {
4103 const char * path = optarg;
4104 if (*path == '+' && path[1])
4105 path++;
4106 else
4107 no_defaultdb = true;
4108 unsigned char savedebug = debugmode; debugmode = 1;
4109 if (!read_drive_database(path))
4110 EXIT(EXIT_BADCMD);
4111 debugmode = savedebug;
4112 }
4113 break;
4114 case 'V':
4115 // print version and CVS info
4116 debugmode = 1;
4117 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4118 EXIT(0);
4119 break;
4120 #ifdef HAVE_LIBCAP_NG
4121 case 'C':
4122 // enable capabilities
4123 enable_capabilities = true;
4124 break;
4125 #endif
4126 case 'h':
4127 // help: print summary of command-line options
4128 debugmode=1;
4129 PrintHead();
4130 Usage();
4131 EXIT(0);
4132 break;
4133 case '?':
4134 default:
4135 // unrecognized option
4136 debugmode=1;
4137 PrintHead();
4138 // Point arg to the argument in which this option was found.
4139 arg = argv[optind-1];
4140 // Check whether the option is a long option that doesn't map to -h.
4141 if (arg[1] == '-' && optchar != 'h') {
4142 // Iff optopt holds a valid option then argument must be missing.
4143 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4144 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4145 PrintValidArgs(optopt);
4146 } else {
4147 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4148 }
4149 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4150 EXIT(EXIT_BADCMD);
4151 }
4152 if (optopt) {
4153 // Iff optopt holds a valid option then argument must be missing.
4154 if (strchr(shortopts, optopt) != NULL){
4155 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4156 PrintValidArgs(optopt);
4157 } else {
4158 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4159 }
4160 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4161 EXIT(EXIT_BADCMD);
4162 }
4163 Usage();
4164 EXIT(0);
4165 }
4166
4167 // Check to see if option had an unrecognized or incorrect argument.
4168 if (badarg) {
4169 debugmode=1;
4170 PrintHead();
4171 // It would be nice to print the actual option name given by the user
4172 // here, but we just print the short form. Please fix this if you know
4173 // a clean way to do it.
4174 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4175 PrintValidArgs(optchar);
4176 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4177 EXIT(EXIT_BADCMD);
4178 }
4179 }
4180
4181 // non-option arguments are not allowed
4182 if (argc > optind) {
4183 debugmode=1;
4184 PrintHead();
4185 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4186 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4187 EXIT(EXIT_BADCMD);
4188 }
4189
4190 // no pidfile in debug mode
4191 if (debugmode && !pid_file.empty()) {
4192 debugmode=1;
4193 PrintHead();
4194 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4195 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4196 EXIT(EXIT_BADCMD);
4197 }
4198
4199 // absolute path is required due to chdir('/') after fork().
4200 if (!state_path_prefix.empty() && !debugmode && !is_abs_path(state_path_prefix.c_str())) {
4201 debugmode=1;
4202 PrintHead();
4203 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4204 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4205 state_path_prefix.c_str());
4206 EXIT(EXIT_BADCMD);
4207 }
4208
4209 // absolute path is required due to chdir('/') after fork().
4210 if (!attrlog_path_prefix.empty() && !debugmode && !is_abs_path(attrlog_path_prefix.c_str())) {
4211 debugmode=1;
4212 PrintHead();
4213 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4214 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4215 attrlog_path_prefix.c_str());
4216 EXIT(EXIT_BADCMD);
4217 }
4218
4219 // Read or init drive database
4220 if (!no_defaultdb) {
4221 unsigned char savedebug = debugmode; debugmode = 1;
4222 if (!read_default_drive_databases())
4223 EXIT(EXIT_BADCMD);
4224 debugmode = savedebug;
4225 }
4226
4227 // print header
4228 PrintHead();
4229 }
4230
4231 // Function we call if no configuration file was found or if the
4232 // SCANDIRECTIVE Directive was found. It makes entries for device
4233 // names returned by scan_smart_devices() in os_OSNAME.cpp
4234 static int MakeConfigEntries(const dev_config & base_cfg,
4235 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4236 {
4237 // make list of devices
4238 smart_device_list devlist;
4239 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4240 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4241
4242 // if no devices, or error constructing list, return
4243 if (devlist.size() <= 0)
4244 return 0;
4245
4246 // add empty device slots for existing config entries
4247 while (scanned_devs.size() < conf_entries.size())
4248 scanned_devs.push_back((smart_device *)0);
4249
4250 // loop over entries to create
4251 for (unsigned i = 0; i < devlist.size(); i++) {
4252 // Move device pointer
4253 smart_device * dev = devlist.release(i);
4254 scanned_devs.push_back(dev);
4255
4256 // Copy configuration, update device and type name
4257 conf_entries.push_back(base_cfg);
4258 dev_config & cfg = conf_entries.back();
4259 cfg.name = dev->get_info().info_name;
4260 cfg.dev_name = dev->get_info().dev_name;
4261 cfg.dev_type = type;
4262 }
4263
4264 return devlist.size();
4265 }
4266
4267 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4268 {
4269 if (!debugmode && scandirective)
4270 return;
4271 if (line)
4272 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4273 "Unable to register %s device %s at line %d of file %s\n",
4274 type, name, line, configfile);
4275 else
4276 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4277 type, name);
4278 return;
4279 }
4280
4281 // Returns negative value (see ParseConfigFile()) if config file
4282 // had errors, else number of entries which may be zero or positive.
4283 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4284 {
4285 // parse configuration file configfile (normally /etc/smartd.conf)
4286 int entries = ParseConfigFile(conf_entries);
4287
4288 if (entries < 0) {
4289 // There was an error reading the configuration file.
4290 conf_entries.clear();
4291 if (entries == -1)
4292 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4293 return entries;
4294 }
4295
4296 // no error parsing config file.
4297 if (entries) {
4298 // we did not find a SCANDIRECTIVE and did find valid entries
4299 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4300 }
4301 else if (!conf_entries.empty()) {
4302 // we found a SCANDIRECTIVE or there was no configuration file so
4303 // scan. Configuration file's last entry contains all options
4304 // that were set
4305 dev_config first = conf_entries.back();
4306 conf_entries.pop_back();
4307
4308 if (first.lineno)
4309 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4310 else
4311 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4312
4313 // make config list of devices to search for
4314 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4315
4316 // warn user if scan table found no devices
4317 if (conf_entries.empty())
4318 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4319 }
4320 else
4321 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4322
4323 return conf_entries.size();
4324 }
4325
4326
4327 // This function tries devices from conf_entries. Each one that can be
4328 // registered is moved onto the [ata|scsi]devices lists and removed
4329 // from the conf_entries list.
4330 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4331 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4332 {
4333 // start by clearing lists/memory of ALL existing devices
4334 configs.clear();
4335 devices.clear();
4336 states.clear();
4337
4338 // Register entries
4339 for (unsigned i = 0; i < conf_entries.size(); i++){
4340
4341 dev_config cfg = conf_entries[i];
4342
4343 // get device of appropriate type
4344 smart_device_auto_ptr dev;
4345 bool scanning = false;
4346
4347 // Device may already be detected during devicescan
4348 if (i < scanned_devs.size()) {
4349 dev = scanned_devs.release(i);
4350 if (dev)
4351 scanning = true;
4352 }
4353
4354 if (!dev) {
4355 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4356 if (!dev) {
4357 if (cfg.dev_type.empty())
4358 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4359 else
4360 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4361 continue;
4362 }
4363 }
4364
4365 // Save old info
4366 smart_device::device_info oldinfo = dev->get_info();
4367
4368 // Open with autodetect support, may return 'better' device
4369 dev.replace( dev->autodetect_open() );
4370
4371 // Report if type has changed
4372 if (oldinfo.dev_type != dev->get_dev_type())
4373 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4374 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4375
4376 if (!dev->is_open()) {
4377 // For linux+devfs, a nonexistent device gives a strange error
4378 // message. This makes the error message a bit more sensible.
4379 // If no debug and scanning - don't print errors
4380 if (debugmode || !scanning)
4381 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4382 continue;
4383 }
4384
4385 // Update informal name
4386 cfg.name = dev->get_info().info_name;
4387 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4388
4389 // Prepare initial state
4390 dev_state state;
4391
4392 // register ATA devices
4393 if (dev->is_ata()){
4394 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4395 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4396 dev.reset();
4397 }
4398 }
4399 // or register SCSI devices
4400 else if (dev->is_scsi()){
4401 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4402 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4403 dev.reset();
4404 }
4405 }
4406 else {
4407 PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4408 dev.reset();
4409 }
4410
4411 if (dev) {
4412 // move onto the list of devices
4413 configs.push_back(cfg);
4414 states.push_back(state);
4415 devices.push_back(dev);
4416 }
4417 // if device is explictly listed and we can't register it, then
4418 // exit unless the user has specified that the device is removable
4419 else if (!scanning) {
4420 if (cfg.removable || quit==2)
4421 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4422 else {
4423 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4424 EXIT(EXIT_BADDEV);
4425 }
4426 }
4427 }
4428 }
4429
4430
4431 // Main program without exception handling
4432 static int main_worker(int argc, char **argv)
4433 {
4434 // Initialize interface
4435 smart_interface::init();
4436 if (!smi())
4437 return 1;
4438
4439 // is it our first pass through?
4440 bool firstpass = true;
4441
4442 // next time to wake up
4443 time_t wakeuptime = 0;
4444
4445 // parse input and print header and usage info if needed
4446 ParseOpts(argc,argv);
4447
4448 // Configuration for each device
4449 dev_config_vector configs;
4450 // Device states
4451 dev_state_vector states;
4452 // Devices to monitor
4453 smart_device_list devices;
4454
4455 bool write_states_always = true;
4456
4457 #ifdef HAVE_LIBCAP_NG
4458 // Drop capabilities
4459 if (enable_capabilities) {
4460 capng_clear(CAPNG_SELECT_BOTH);
4461 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4462 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4463 capng_apply(CAPNG_SELECT_BOTH);
4464 }
4465 #endif
4466
4467 // the main loop of the code
4468 for (;;) {
4469
4470 // are we exiting from a signal?
4471 if (caughtsigEXIT) {
4472 // are we exiting with SIGTERM?
4473 int isterm=(caughtsigEXIT==SIGTERM);
4474 int isquit=(caughtsigEXIT==SIGQUIT);
4475 int isok=debugmode?isterm || isquit:isterm;
4476
4477 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4478 caughtsigEXIT, strsignal(caughtsigEXIT));
4479
4480 if (!isok)
4481 return EXIT_SIGNAL;
4482
4483 // Write state files
4484 if (!state_path_prefix.empty())
4485 write_all_dev_states(configs, states);
4486
4487 return 0;
4488 }
4489
4490 // Should we (re)read the config file?
4491 if (firstpass || caughtsigHUP){
4492 if (!firstpass) {
4493 #ifdef __CYGWIN__
4494 // Workaround for missing SIGQUIT via keyboard on Cygwin
4495 if (caughtsigHUP==2) {
4496 // Simulate SIGQUIT if another SIGINT arrives soon
4497 caughtsigHUP=0;
4498 sleep(1);
4499 if (caughtsigHUP==2) {
4500 caughtsigEXIT=SIGQUIT;
4501 continue;
4502 }
4503 caughtsigHUP=2;
4504 }
4505 #endif
4506 // Write state files
4507 if (!state_path_prefix.empty())
4508 write_all_dev_states(configs, states);
4509
4510 PrintOut(LOG_INFO,
4511 caughtsigHUP==1?
4512 "Signal HUP - rereading configuration file %s\n":
4513 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4514 configfile);
4515 }
4516
4517 {
4518 dev_config_vector conf_entries; // Entries read from smartd.conf
4519 smart_device_list scanned_devs; // Devices found during scan
4520 // (re)reads config file, makes >=0 entries
4521 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4522
4523 if (entries>=0) {
4524 // checks devices, then moves onto ata/scsi list or deallocates.
4525 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4526 if (!(configs.size() == devices.size() && configs.size() == states.size()))
4527 throw std::logic_error("Invalid result from RegisterDevices");
4528 }
4529 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4530 // user has asked to continue on error in configuration file
4531 if (!firstpass)
4532 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4533 }
4534 else {
4535 // exit with configuration file error status
4536 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4537 }
4538 }
4539
4540 // Log number of devices we are monitoring...
4541 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4542 int numata = 0;
4543 for (unsigned i = 0; i < devices.size(); i++) {
4544 if (devices.at(i)->is_ata())
4545 numata++;
4546 }
4547 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4548 numata, devices.size() - numata);
4549 }
4550 else {
4551 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4552 return EXIT_NODEV;
4553 }
4554
4555 if (quit==4) {
4556 // user has asked to print test schedule
4557 PrintTestSchedule(configs, states, devices);
4558 return 0;
4559 }
4560
4561 #ifdef HAVE_LIBCAP_NG
4562 if (enable_capabilities) {
4563 for (unsigned i = 0; i < configs.size(); i++) {
4564 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
4565 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4566 break;
4567 }
4568 }
4569 }
4570 #endif
4571
4572 // reset signal
4573 caughtsigHUP=0;
4574
4575 // Always write state files after (re)configuration
4576 write_states_always = true;
4577 }
4578
4579 // check all devices once,
4580 // self tests are not started in first pass unless '-q onecheck' is specified
4581 CheckDevicesOnce(configs, states, devices, (!firstpass || quit==3));
4582
4583 // Write state files
4584 if (!state_path_prefix.empty())
4585 write_all_dev_states(configs, states, write_states_always);
4586 write_states_always = false;
4587
4588 // Write attribute logs
4589 if (!attrlog_path_prefix.empty())
4590 write_all_dev_attrlogs(configs, states);
4591
4592 // user has asked us to exit after first check
4593 if (quit==3) {
4594 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4595 "smartd is exiting (exit status 0)\n");
4596 return 0;
4597 }
4598
4599 // fork into background if needed
4600 if (firstpass && !debugmode) {
4601 DaemonInit();
4602 }
4603
4604 // set exit and signal handlers, write PID file, set wake-up time
4605 if (firstpass){
4606 Initialize(&wakeuptime);
4607 firstpass = false;
4608 }
4609
4610 // sleep until next check time, or a signal arrives
4611 wakeuptime = dosleep(wakeuptime, write_states_always);
4612 }
4613 }
4614
4615
4616 #ifndef _WIN32
4617 // Main program
4618 int main(int argc, char **argv)
4619 #else
4620 // Windows: internal main function started direct or by service control manager
4621 static int smartd_main(int argc, char **argv)
4622 #endif
4623 {
4624 int status;
4625 try {
4626 // Do the real work ...
4627 status = main_worker(argc, argv);
4628 }
4629 catch (int ex) {
4630 // EXIT(status) arrives here
4631 status = ex;
4632 }
4633 catch (const std::bad_alloc & /*ex*/) {
4634 // Memory allocation failed (also thrown by std::operator new)
4635 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4636 status = EXIT_NOMEM;
4637 }
4638 catch (const std::exception & ex) {
4639 // Other fatal errors
4640 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4641 status = EXIT_BADCODE;
4642 }
4643
4644 if (is_initialized)
4645 status = Goodbye(status);
4646
4647 #ifdef _WIN32
4648 daemon_winsvc_exitcode = status;
4649 #endif
4650 return status;
4651 }
4652
4653
4654 #ifdef _WIN32
4655 // Main function for Windows
4656 int main(int argc, char **argv){
4657 // Options for smartd windows service
4658 static const daemon_winsvc_options svc_opts = {
4659 "--service", // cmd_opt
4660 "smartd", "SmartD Service", // servicename, displayname
4661 // description
4662 "Controls and monitors storage devices using the Self-Monitoring, "
4663 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4664 "built into ATA and SCSI Hard Drives. "
4665 PACKAGE_HOMEPAGE
4666 };
4667 // daemon_main() handles daemon and service specific commands
4668 // and starts smartd_main() direct, from a new process,
4669 // or via service control manager
4670 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4671 }
4672 #endif