]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Imported Upstream version 5.39.1+svn3124
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-10 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7 * Copyright (C) 2008-10 Christian Franke <smartmontools-support@lists.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #ifndef _GNU_SOURCE
25 // TODO: Why is this define necessary?
26 #define _GNU_SOURCE
27 #endif
28
29 // unconditionally included files
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> // umask
33 #include <signal.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <time.h>
41 #include <limits.h>
42 #include <getopt.h>
43
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 #include <algorithm> // std::replace()
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifndef _WIN32
54 #include <sys/wait.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #ifdef HAVE_NETDB_H
60 #include <netdb.h>
61 #endif
62
63 #ifdef _WIN32
64 #ifdef _MSC_VER
65 #pragma warning(disable:4761) // "conversion supplied"
66 typedef unsigned short mode_t;
67 typedef int pid_t;
68 #endif
69 #include <io.h> // umask()
70 #include <process.h> // getpid()
71 #endif // _WIN32
72
73 #ifdef __CYGWIN__
74 // From <windows.h>:
75 // BOOL WINAPI FreeConsole(void);
76 extern "C" int __stdcall FreeConsole(void);
77 #include <io.h> // setmode()
78 #endif // __CYGWIN__
79
80 #ifdef HAVE_LIBCAP_NG
81 #include <cap-ng.h>
82 #endif // LIBCAP_NG
83
84 // locally included files
85 #include "int64.h"
86 #include "atacmds.h"
87 #include "dev_interface.h"
88 #include "extern.h"
89 #include "knowndrives.h"
90 #include "scsicmds.h"
91 #include "utility.h"
92
93 // This is for solaris, where signal() resets the handler to SIG_DFL
94 // after the first signal is caught.
95 #ifdef HAVE_SIGSET
96 #define SIGNALFN sigset
97 #else
98 #define SIGNALFN signal
99 #endif
100
101 #ifdef _WIN32
102 #include "hostname_win32.h" // gethost/domainname()
103 #define HAVE_GETHOSTNAME 1
104 #define HAVE_GETDOMAINNAME 1
105 // fork()/signal()/initd simulation for native Windows
106 #include "daemon_win32.h" // daemon_main/detach/signal()
107 #undef SIGNALFN
108 #define SIGNALFN daemon_signal
109 #define strsignal daemon_strsignal
110 #define sleep daemon_sleep
111 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
112 #define SIGQUIT SIGBREAK
113 #define SIGQUIT_KEYNAME "CONTROL-Break"
114 #else // _WIN32
115 #ifdef __CYGWIN__
116 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
117 #define SIGQUIT_KEYNAME "2x CONTROL-C"
118 #else // __CYGWIN__
119 #define SIGQUIT_KEYNAME "CONTROL-\\"
120 #endif // __CYGWIN__
121 #endif // _WIN32
122
123 #if defined (__SVR4) && defined (__sun)
124 extern "C" int getdomainname(char *, int); // no declaration in header files!
125 #endif
126
127 #define ARGUSED(x) ((void)(x))
128
129 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3101 2010-05-04 16:03:18Z chrfranke $"
130 CONFIG_H_CVSID EXTERN_H_CVSID;
131
132 extern const char *reportbug;
133
134 extern unsigned char debugmode;
135
136 // smartd exit codes
137 #define EXIT_BADCMD 1 // command line did not parse
138 #define EXIT_BADCONF 2 // syntax error in config file
139 #define EXIT_STARTUP 3 // problem forking daemon
140 #define EXIT_PID 4 // problem creating pid file
141 #define EXIT_NOCONF 5 // config file does not exist
142 #define EXIT_READCONF 6 // config file exists but cannot be read
143
144 #define EXIT_NOMEM 8 // out of memory
145 #define EXIT_BADCODE 10 // internal error - should NEVER happen
146
147 #define EXIT_BADDEV 16 // we can't monitor this device
148 #define EXIT_NODEV 17 // no devices to monitor
149
150 #define EXIT_SIGNAL 254 // abort on signal
151
152 // command-line: how long to sleep between checks
153 #define CHECKTIME 1800
154 static int checktime=CHECKTIME;
155
156 // command-line: name of PID file (empty for no pid file)
157 static std::string pid_file;
158
159 // command-line: path prefix of persistent state file, empty if no persistence.
160 static std::string state_path_prefix
161 #ifdef SMARTMONTOOLS_SAVESTATES
162 = SMARTMONTOOLS_SAVESTATES
163 #endif
164 ;
165
166 // command-line: path prefix of attribute log file, empty if no logs.
167 static std::string attrlog_path_prefix
168 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
169 = SMARTMONTOOLS_ATTRIBUTELOG
170 #endif
171 ;
172
173 // configuration file name
174 static const char * configfile;
175 // configuration file "name" if read from stdin
176 static const char * const configfile_stdin = "<stdin>";
177 // path of alternate configuration file
178 static std::string configfile_alt;
179
180 // command-line: when should we exit?
181 static int quit=0;
182
183 // command-line; this is the default syslog(3) log facility to use.
184 static int facility=LOG_DAEMON;
185
186 #ifndef _WIN32
187 // command-line: fork into background?
188 static bool do_fork=true;
189 #endif
190
191 #ifdef HAVE_LIBCAP_NG
192 // command-line: enable capabilities?
193 static bool enable_capabilities = false;
194 #endif
195
196 // used for control of printing, passing arguments to atacmds.c
197 smartmonctrl *con=NULL;
198
199 // set to one if we catch a USR1 (check devices now)
200 volatile int caughtsigUSR1=0;
201
202 #ifdef _WIN32
203 // set to one if we catch a USR2 (toggle debug mode)
204 volatile int caughtsigUSR2=0;
205 #endif
206
207 // set to one if we catch a HUP (reload config file). In debug mode,
208 // set to two, if we catch INT (also reload config file).
209 volatile int caughtsigHUP=0;
210
211 // set to signal value if we catch INT, QUIT, or TERM
212 volatile int caughtsigEXIT=0;
213
214 // Attribute monitoring flags.
215 // See monitor_attr_flags below.
216 enum {
217 MONITOR_IGN_FAILUSE = 0x01,
218 MONITOR_IGNORE = 0x02,
219 MONITOR_RAW_PRINT = 0x04,
220 MONITOR_RAW = 0x08,
221 MONITOR_AS_CRIT = 0x10,
222 MONITOR_RAW_AS_CRIT = 0x20,
223 };
224
225 // Array of flags for each attribute.
226 class attribute_flags
227 {
228 public:
229 attribute_flags()
230 { memset(m_flags, 0, sizeof(m_flags)); }
231
232 bool is_set(int id, unsigned char flag) const
233 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
234
235 void set(int id, unsigned char flags)
236 {
237 if (0 < id && id < (int)sizeof(m_flags))
238 m_flags[id] |= flags;
239 }
240
241 private:
242 unsigned char m_flags[256];
243 };
244
245
246 /// Configuration data for a device. Read from smartd.conf.
247 /// Supports copy & assignment and is compatible with STL containers.
248 struct dev_config
249 {
250 int lineno; // Line number of entry in file
251 std::string name; // Device name
252 std::string dev_type; // Device type argument from -d directive, empty if none
253 std::string state_file; // Path of the persistent state file, empty if none
254 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
255 bool smartcheck; // Check SMART status
256 bool usagefailed; // Check for failed Usage Attributes
257 bool prefail; // Track changes in Prefail Attributes
258 bool usage; // Track changes in Usage Attributes
259 bool selftest; // Monitor number of selftest errors
260 bool errorlog; // Monitor number of ATA errors
261 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
262 bool permissive; // Ignore failed SMART commands
263 char autosave; // 1=disable, 2=enable Autosave Attributes
264 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
265 unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
266 bool ignorepresets; // Ignore database of -v options
267 bool showpresets; // Show database entry for this device
268 bool removable; // Device may disappear (not be present)
269 char powermode; // skip check, if disk in idle or standby mode
270 bool powerquiet; // skip powermode 'skipping checks' message
271 int powerskipmax; // how many times can be check skipped
272 unsigned char tempdiff; // Track Temperature changes >= this limit
273 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
274 regular_expression test_regex; // Regex for scheduled testing
275
276 // Configuration of email warning messages
277 std::string emailcmdline; // script to execute, empty if no messages
278 std::string emailaddress; // email address, or empty
279 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
280 bool emailtest; // Send test email?
281
282 // ATA ONLY
283 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
284 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
285 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
286 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
287
288 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
289
290 ata_vendor_attr_defs attribute_defs; // -v options
291
292 dev_config();
293 };
294
295 dev_config::dev_config()
296 : lineno(0),
297 smartcheck(false),
298 usagefailed(false),
299 prefail(false),
300 usage(false),
301 selftest(false),
302 errorlog(false),
303 xerrorlog(false),
304 permissive(false),
305 autosave(0),
306 autoofflinetest(0),
307 fix_firmwarebug(FIX_NOTSPECIFIED),
308 ignorepresets(false),
309 showpresets(false),
310 removable(false),
311 powermode(0),
312 powerquiet(false),
313 powerskipmax(0),
314 tempdiff(0),
315 tempinfo(0), tempcrit(0),
316 emailfreq(0),
317 emailtest(false),
318 curr_pending_id(0), offl_pending_id(0),
319 curr_pending_incr(false), offl_pending_incr(false),
320 curr_pending_set(false), offl_pending_set(false)
321 {
322 }
323
324
325 // Number of allowed mail message types
326 const int SMARTD_NMAIL = 13;
327 // Type for '-M test' mails (state not persistent)
328 const int MAILTYPE_TEST = 0;
329 // TODO: Add const or enum for all mail types.
330
331 struct mailinfo {
332 int logged;// number of times an email has been sent
333 time_t firstsent;// time first email was sent, as defined by time(2)
334 time_t lastsent; // time last email was sent, as defined by time(2)
335
336 mailinfo()
337 : logged(0), firstsent(0), lastsent(0) { }
338 };
339
340 /// Persistent state data for a device.
341 struct persistent_dev_state
342 {
343 unsigned char tempmin, tempmax; // Min/Max Temperatures
344
345 unsigned char selflogcount; // total number of self-test errors
346 unsigned short selfloghour; // lifetime hours of last self-test error
347
348 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
349
350 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
351
352 // ATA ONLY
353 int ataerrorcount; // Total number of ATA errors
354
355 // Persistent part of ata_smart_values:
356 struct ata_attribute {
357 unsigned char id;
358 unsigned char val;
359 unsigned char worst; // Byte needed for 'raw64' attribute only.
360 uint64_t raw;
361
362 ata_attribute() : id(0), val(0), worst(0), raw(0) { }
363 };
364 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
365
366 persistent_dev_state();
367 };
368
369 persistent_dev_state::persistent_dev_state()
370 : tempmin(0), tempmax(0),
371 selflogcount(0),
372 selfloghour(0),
373 scheduled_test_next_check(0),
374 ataerrorcount(0)
375 {
376 }
377
378 /// Non-persistent state data for a device.
379 struct temp_dev_state
380 {
381 bool must_write; // true if persistent part should be written
382
383 bool not_cap_offline; // true == not capable of offline testing
384 bool not_cap_conveyance;
385 bool not_cap_short;
386 bool not_cap_long;
387 bool not_cap_selective;
388
389 unsigned char temperature; // last recorded Temperature (in Celsius)
390 time_t tempmin_delay; // time where Min Temperature tracking will start
391
392 bool powermodefail; // true if power mode check failed
393 int powerskipcnt; // Number of checks skipped due to idle or standby mode
394
395 // SCSI ONLY
396 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
397 unsigned char TempPageSupported; // has log sense temperature page (0xd)
398 unsigned char SuppressReport; // minimize nuisance reports
399 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
400 // know yet) 6 or 10
401
402 // ATA ONLY
403 uint64_t num_sectors; // Number of sectors (for selective self-test only)
404 ata_smart_values smartval; // SMART data
405 ata_smart_thresholds_pvt smartthres; // SMART thresholds
406
407 temp_dev_state();
408 };
409
410 temp_dev_state::temp_dev_state()
411 : must_write(false),
412 not_cap_offline(false),
413 not_cap_conveyance(false),
414 not_cap_short(false),
415 not_cap_long(false),
416 not_cap_selective(false),
417 temperature(0),
418 tempmin_delay(0),
419 powermodefail(false),
420 powerskipcnt(0),
421 SmartPageSupported(false),
422 TempPageSupported(false),
423 SuppressReport(false),
424 modese_len(0),
425 num_sectors(0)
426 {
427 memset(&smartval, 0, sizeof(smartval));
428 memset(&smartthres, 0, sizeof(smartthres));
429 }
430
431 /// Runtime state data for a device.
432 struct dev_state
433 : public persistent_dev_state,
434 public temp_dev_state
435 {
436 void update_persistent_state();
437 void update_temp_state();
438 };
439
440 /// Container for configuration info for each device.
441 typedef std::vector<dev_config> dev_config_vector;
442
443 /// Container for state info for each device.
444 typedef std::vector<dev_state> dev_state_vector;
445
446 // Copy ATA attributes to persistent state.
447 void dev_state::update_persistent_state()
448 {
449 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
450 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
451 ata_attribute & pa = ata_attributes[i];
452 pa.id = ta.id;
453 if (ta.id == 0) {
454 pa.val = pa.worst = 0; pa.raw = 0;
455 continue;
456 }
457 pa.val = ta.current;
458 pa.worst = ta.worst;
459 pa.raw = ta.raw[0]
460 | ( ta.raw[1] << 8)
461 | ( ta.raw[2] << 16)
462 | ((uint64_t)ta.raw[3] << 24)
463 | ((uint64_t)ta.raw[4] << 32)
464 | ((uint64_t)ta.raw[5] << 40);
465 }
466 }
467
468 // Copy ATA from persistent to temp state.
469 void dev_state::update_temp_state()
470 {
471 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
472 const ata_attribute & pa = ata_attributes[i];
473 ata_smart_attribute & ta = smartval.vendor_attributes[i];
474 ta.id = pa.id;
475 if (pa.id == 0) {
476 ta.current = ta.worst = 0;
477 memset(ta.raw, 0, sizeof(ta.raw));
478 continue;
479 }
480 ta.current = pa.val;
481 ta.worst = pa.worst;
482 ta.raw[0] = (unsigned char) pa.raw;
483 ta.raw[1] = (unsigned char)(pa.raw >> 8);
484 ta.raw[2] = (unsigned char)(pa.raw >> 16);
485 ta.raw[3] = (unsigned char)(pa.raw >> 24);
486 ta.raw[4] = (unsigned char)(pa.raw >> 32);
487 ta.raw[5] = (unsigned char)(pa.raw >> 40);
488 }
489 }
490
491 // Parse a line from a state file.
492 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
493 {
494 static regular_expression regex(
495 "^ *"
496 "((temperature-min)" // (1 (2)
497 "|(temperature-max)" // (3)
498 "|(self-test-errors)" // (4)
499 "|(self-test-last-err-hour)" // (5)
500 "|(scheduled-test-next-check)" // (6)
501 "|(ata-error-count)" // (7)
502 "|(mail\\.([0-9]+)\\." // (8 (9)
503 "((count)" // (10 (11)
504 "|(first-sent-time)" // (12)
505 "|(last-sent-time)" // (13)
506 ")" // 10)
507 ")" // 8)
508 "|(ata-smart-attribute\\.([0-9]+)\\." // (14 (15)
509 "((id)" // (16 (17)
510 "|(val)" // (18)
511 "|(worst)" // (19)
512 "|(raw)" // (20)
513 ")" // 16)
514 ")" // 14)
515 ")" // 1)
516 " *= *([0-9]+)[ \n]*$", // (21)
517 REG_EXTENDED
518 );
519 if (regex.empty())
520 throw std::logic_error("parse_dev_state_line: invalid regex");
521
522 const int nmatch = 1+21;
523 regmatch_t match[nmatch];
524 if (!regex.execute(line, nmatch, match))
525 return false;
526 if (match[nmatch-1].rm_so < 0)
527 return false;
528
529 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
530
531 int m = 1;
532 if (match[++m].rm_so >= 0)
533 state.tempmin = (unsigned char)val;
534 else if (match[++m].rm_so >= 0)
535 state.tempmax = (unsigned char)val;
536 else if (match[++m].rm_so >= 0)
537 state.selflogcount = (unsigned char)val;
538 else if (match[++m].rm_so >= 0)
539 state.selfloghour = (unsigned short)val;
540 else if (match[++m].rm_so >= 0)
541 state.scheduled_test_next_check = (time_t)val;
542 else if (match[++m].rm_so >= 0)
543 state.ataerrorcount = (int)val;
544 else if (match[m+=2].rm_so >= 0) {
545 int i = atoi(line+match[m].rm_so);
546 if (!(0 <= i && i < SMARTD_NMAIL))
547 return false;
548 if (i == MAILTYPE_TEST) // Don't suppress test mails
549 return true;
550 if (match[m+=2].rm_so >= 0)
551 state.maillog[i].logged = (int)val;
552 else if (match[++m].rm_so >= 0)
553 state.maillog[i].firstsent = (time_t)val;
554 else if (match[++m].rm_so >= 0)
555 state.maillog[i].lastsent = (time_t)val;
556 else
557 return false;
558 }
559 else if (match[m+=5+1].rm_so >= 0) {
560 int i = atoi(line+match[m].rm_so);
561 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
562 return false;
563 if (match[m+=2].rm_so >= 0)
564 state.ata_attributes[i].id = (unsigned char)val;
565 else if (match[++m].rm_so >= 0)
566 state.ata_attributes[i].val = (unsigned char)val;
567 else if (match[++m].rm_so >= 0)
568 state.ata_attributes[i].worst = (unsigned char)val;
569 else if (match[++m].rm_so >= 0)
570 state.ata_attributes[i].raw = val;
571 else
572 return false;
573 }
574 else
575 return false;
576 return true;
577 }
578
579 // Read a state file.
580 static bool read_dev_state(const char * path, persistent_dev_state & state)
581 {
582 stdio_file f(path, "r");
583 if (!f) {
584 if (errno != ENOENT)
585 pout("Cannot read state file \"%s\"\n", path);
586 return false;
587 }
588 #ifdef __CYGWIN__
589 setmode(fileno(f), O_TEXT); // Allow files with \r\n
590 #endif
591
592 persistent_dev_state new_state;
593 int good = 0, bad = 0;
594 char line[256];
595 while (fgets(line, sizeof(line), f)) {
596 const char * s = line + strspn(line, " \t");
597 if (!*s || *s == '#')
598 continue;
599 if (!parse_dev_state_line(line, new_state))
600 bad++;
601 else
602 good++;
603 }
604
605 if (bad) {
606 if (!good) {
607 pout("%s: format error\n", path);
608 return false;
609 }
610 pout("%s: %d invalid line(s) ignored\n", path, bad);
611 }
612
613 // This sets the values missing in the file to 0.
614 state = new_state;
615 return true;
616 }
617
618 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
619 {
620 if (val)
621 fprintf(f, "%s = %"PRIu64"\n", name, val);
622 }
623
624 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
625 {
626 if (val)
627 fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
628 }
629
630 // Write a state file
631 static bool write_dev_state(const char * path, const persistent_dev_state & state)
632 {
633 // Rename old "file" to "file~"
634 std::string pathbak = path; pathbak += '~';
635 unlink(pathbak.c_str());
636 rename(path, pathbak.c_str());
637
638 stdio_file f(path, "w");
639 if (!f) {
640 pout("Cannot create state file \"%s\"\n", path);
641 return false;
642 }
643
644 fprintf(f, "# smartd state file\n");
645 write_dev_state_line(f, "temperature-min", state.tempmin);
646 write_dev_state_line(f, "temperature-max", state.tempmax);
647 write_dev_state_line(f, "self-test-errors", state.selflogcount);
648 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
649 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
650
651 int i;
652 for (i = 0; i < SMARTD_NMAIL; i++) {
653 if (i == MAILTYPE_TEST) // Don't suppress test mails
654 continue;
655 const mailinfo & mi = state.maillog[i];
656 if (!mi.logged)
657 continue;
658 write_dev_state_line(f, "mail", i, "count", mi.logged);
659 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
660 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
661 }
662
663 // ATA ONLY
664 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
665
666 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
667 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
668 if (!pa.id)
669 continue;
670 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
671 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
672 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
673 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
674 }
675
676 return true;
677 }
678
679 // Write to the attrlog file
680 static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
681 {
682 stdio_file f(path, "a");
683 if (!f) {
684 pout("Cannot create attribute log file \"%s\"\n", path);
685 return false;
686 }
687
688 // ATA ONLY
689 time_t now = time(0);
690 struct tm * tms = gmtime(&now);
691 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
692 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
693 tms->tm_hour, tms->tm_min, tms->tm_sec);
694 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
695 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
696 if (!pa.id)
697 continue;
698 fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
699 }
700 fprintf(f, "\n");
701
702 return true;
703 }
704
705 // Write all state files. If write_always is false, don't write
706 // unless must_write is set.
707 static void write_all_dev_states(const dev_config_vector & configs,
708 dev_state_vector & states,
709 bool write_always = true)
710 {
711 for (unsigned i = 0; i < states.size(); i++) {
712 const dev_config & cfg = configs.at(i);
713 if (cfg.state_file.empty())
714 continue;
715 dev_state & state = states[i];
716 if (!write_always && !state.must_write)
717 continue;
718 if (!write_dev_state(cfg.state_file.c_str(), state))
719 continue;
720 state.must_write = false;
721 if (write_always || debugmode)
722 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
723 cfg.name.c_str(), cfg.state_file.c_str());
724 }
725 }
726
727 // Write to all attrlog files
728 static void write_all_dev_attrlogs(const dev_config_vector & configs,
729 dev_state_vector & states)
730 {
731 for (unsigned i = 0; i < states.size(); i++) {
732 const dev_config & cfg = configs.at(i);
733 if (cfg.attrlog_file.empty())
734 continue;
735 dev_state & state = states[i];
736 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
737 }
738 }
739
740 // remove the PID file
741 void RemovePidFile(){
742 if (!pid_file.empty()) {
743 if (unlink(pid_file.c_str()))
744 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
745 pid_file.c_str(), strerror(errno));
746 pid_file.clear();
747 }
748 return;
749 }
750
751 extern "C" { // signal handlers require C-linkage
752
753 // Note if we catch a SIGUSR1
754 void USR1handler(int sig){
755 if (SIGUSR1==sig)
756 caughtsigUSR1=1;
757 return;
758 }
759
760 #ifdef _WIN32
761 // Note if we catch a SIGUSR2
762 void USR2handler(int sig){
763 if (SIGUSR2==sig)
764 caughtsigUSR2=1;
765 return;
766 }
767 #endif
768
769 // Note if we catch a HUP (or INT in debug mode)
770 void HUPhandler(int sig){
771 if (sig==SIGHUP)
772 caughtsigHUP=1;
773 else
774 caughtsigHUP=2;
775 return;
776 }
777
778 // signal handler for TERM, QUIT, and INT (if not in debug mode)
779 void sighandler(int sig){
780 if (!caughtsigEXIT)
781 caughtsigEXIT=sig;
782 return;
783 }
784
785 } // extern "C"
786
787 // Cleanup, print Goodbye message and remove pidfile
788 static int Goodbye(int status)
789 {
790 // delete PID file, if one was created
791 RemovePidFile();
792
793 // if we are exiting because of a code bug, tell user
794 if (status==EXIT_BADCODE)
795 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
796
797 // and this should be the final output from smartd before it exits
798 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
799
800 return status;
801 }
802
803 #define ENVLENGTH 1024
804
805 // a replacement for setenv() which is not available on all platforms.
806 // Note that the string passed to putenv must not be freed or made
807 // invalid, since a pointer to it is kept by putenv(). This means that
808 // it must either be a static buffer or allocated off the heap. The
809 // string can be freed if the environment variable is redefined or
810 // deleted via another call to putenv(). So we keep these on the stack
811 // as long as the popen() call is underway.
812 int exportenv(char* stackspace, const char *name, const char *value){
813 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
814 return putenv(stackspace);
815 }
816
817 char* dnsdomain(const char* hostname) {
818 char *p = NULL;
819 #ifdef HAVE_GETADDRINFO
820 static char canon_name[NI_MAXHOST];
821 struct addrinfo *info = NULL;
822 struct addrinfo hints;
823 int err;
824
825 memset(&hints, 0, sizeof(hints));
826 hints.ai_flags = AI_CANONNAME;
827 if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
828 PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
829 return NULL;
830 }
831 if (info->ai_canonname) {
832 strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
833 canon_name[NI_MAXHOST - 1] = '\0';
834 p = canon_name;
835 if ((p = strchr(canon_name, '.')))
836 p++;
837 }
838 freeaddrinfo(info);
839 #elif HAVE_GETHOSTBYNAME
840 struct hostent *hp;
841 if ((hp = gethostbyname(hostname))) {
842 // Does this work if gethostbyname() returns an IPv6 name in
843 // colon/dot notation? [BA]
844 if ((p = strchr(hp->h_name, '.')))
845 p++; // skip "."
846 }
847 #else
848 ARGUSED(hostname);
849 #endif
850 return p;
851 }
852
853 #define EBUFLEN 1024
854
855 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
856 __attribute__ ((format (printf, 4, 5)));
857
858 // If either address or executable path is non-null then send and log
859 // a warning email, or execute executable
860 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
861 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
862 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
863 char environ_strings[11][ENVLENGTH];
864 time_t epoch;
865 va_list ap;
866 const int day=24*3600;
867 int days=0;
868 const char * const whichfail[]={
869 "EmailTest", // 0
870 "Health", // 1
871 "Usage", // 2
872 "SelfTest", // 3
873 "ErrorCount", // 4
874 "FailedHealthCheck", // 5
875 "FailedReadSmartData", // 6
876 "FailedReadSmartErrorLog", // 7
877 "FailedReadSmartSelfTestLog", // 8
878 "FailedOpenDevice", // 9
879 "CurrentPendingSector", // 10
880 "OfflineUncorrectableSector", // 11
881 "Temperature" // 12
882 };
883
884 const char *unknown="[Unknown]";
885
886 // See if user wants us to send mail
887 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
888 return;
889
890 std::string address = cfg.emailaddress;
891 const char * executable = cfg.emailcmdline.c_str();
892
893 // which type of mail are we sending?
894 mailinfo * mail=(state.maillog)+which;
895
896 // checks for sanity
897 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
898 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
899 return;
900 }
901 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
902 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
903 which, (int)sizeof(whichfail));
904 return;
905 }
906
907 // Return if a single warning mail has been sent.
908 if ((cfg.emailfreq==1) && mail->logged)
909 return;
910
911 // Return if this is an email test and one has already been sent.
912 if (which == 0 && mail->logged)
913 return;
914
915 // To decide if to send mail, we need to know what time it is.
916 epoch=time(NULL);
917
918 // Return if less than one day has gone by
919 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
920 return;
921
922 // Return if less than 2^(logged-1) days have gone by
923 if (cfg.emailfreq==3 && mail->logged) {
924 days=0x01<<(mail->logged-1);
925 days*=day;
926 if (epoch<(mail->lastsent+days))
927 return;
928 }
929
930 #ifdef HAVE_LIBCAP_NG
931 if (enable_capabilities) {
932 PrintOut(LOG_ERR, "Sending a mail was supressed. "
933 "Mails can't be send when capabilites are enabled\n");
934 return;
935 }
936 #endif
937
938 // record the time of this mail message, and the first mail message
939 if (!mail->logged)
940 mail->firstsent=epoch;
941 mail->lastsent=epoch;
942
943 // get system host & domain names (not null terminated if length=MAX)
944 #ifdef HAVE_GETHOSTNAME
945 if (gethostname(hostname, 256))
946 strcpy(hostname, unknown);
947 else {
948 char *p=NULL;
949 hostname[255]='\0';
950 p = dnsdomain(hostname);
951 if (p && *p) {
952 strncpy(domainname, p, 255);
953 domainname[255]='\0';
954 } else
955 strcpy(domainname, unknown);
956 }
957 #else
958 strcpy(hostname, unknown);
959 strcpy(domainname, unknown);
960 #endif
961
962 #ifdef HAVE_GETDOMAINNAME
963 if (getdomainname(nisdomain, 256))
964 strcpy(nisdomain, unknown);
965 else
966 nisdomain[255]='\0';
967 #else
968 strcpy(nisdomain, unknown);
969 #endif
970
971 // print warning string into message
972 va_start(ap, fmt);
973 vsnprintf(message, 256, fmt, ap);
974 va_end(ap);
975
976 // appropriate message about further information
977 additional[0]=original[0]=further[0]='\0';
978 if (which) {
979 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
980
981 switch (cfg.emailfreq) {
982 case 1:
983 sprintf(additional,"No additional email messages about this problem will be sent.\n");
984 break;
985 case 2:
986 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
987 break;
988 case 3:
989 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
990 (0x01)<<mail->logged);
991 break;
992 }
993 if (cfg.emailfreq>1 && mail->logged) {
994 dateandtimezoneepoch(dates, mail->firstsent);
995 sprintf(original,"The original email about this issue was sent at %s\n", dates);
996 }
997 }
998
999 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
1000
1001 // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1002 if (!*executable)
1003 #ifdef DEFAULT_MAILER
1004 executable = DEFAULT_MAILER ;
1005 #else
1006 #ifndef _WIN32
1007 executable = "mail";
1008 #else
1009 executable = "blat"; // http://blat.sourceforge.net/
1010 #endif
1011 #endif
1012
1013 #ifndef _WIN32 // blat mailer needs comma
1014 // replace commas by spaces to separate recipients
1015 std::replace(address.begin(), address.end(), ',', ' ');
1016 #endif
1017 // Export information in environment variables that will be useful
1018 // for user scripts
1019 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1020 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1021 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1022 dateandtimezoneepoch(dates, mail->firstsent);
1023 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1024 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1025 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1026 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1027 if (!address.empty())
1028 exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1029 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1030
1031 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", cfg.dev_type.c_str());
1032 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.name.c_str());
1033
1034 snprintf(fullmessage, 1024,
1035 "This email was generated by the smartd daemon running on:\n\n"
1036 " host name: %s\n"
1037 " DNS domain: %s\n"
1038 " NIS domain: %s\n\n"
1039 "The following warning/error was logged by the smartd daemon:\n\n"
1040 "%s\n\n"
1041 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
1042 "%s%s%s",
1043 hostname, domainname, nisdomain, message, further, original, additional);
1044 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1045
1046 // now construct a command to send this as EMAIL
1047 #ifndef _WIN32
1048 if (!address.empty())
1049 snprintf(command, 2048,
1050 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1051 "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1052 else
1053 snprintf(command, 2048, "%s 2>&1", executable);
1054
1055 // tell SYSLOG what we are about to do...
1056 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1057 const char * newwarn = (which? "Warning via" : "Test of");
1058
1059 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1060 which?"Sending warning via":"Executing test of", executable, newadd);
1061
1062 // issue the command to send mail or to run the user's executable
1063 errno=0;
1064 FILE * pfp;
1065 if (!(pfp=popen(command, "r")))
1066 // failed to popen() mail process
1067 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1068 newwarn, executable, newadd, errno?strerror(errno):"");
1069 else {
1070 // pipe suceeded!
1071 int len, status;
1072 char buffer[EBUFLEN];
1073
1074 // if unexpected output on stdout/stderr, null terminate, print, and flush
1075 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1076 int count=0;
1077 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1078 buffer[newlen]='\0';
1079 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1080 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1081
1082 // flush pipe if needed
1083 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1084 count++;
1085
1086 // tell user that pipe was flushed, or that something is really wrong
1087 if (count && count<EBUFLEN)
1088 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1089 newwarn, executable, newadd);
1090 else if (count)
1091 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1092 newwarn, executable, newadd);
1093 }
1094
1095 // if something went wrong with mail process, print warning
1096 errno=0;
1097 if (-1==(status=pclose(pfp)))
1098 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1099 errno?strerror(errno):"");
1100 else {
1101 // mail process apparently succeeded. Check and report exit status
1102 int status8;
1103
1104 if (WIFEXITED(status)) {
1105 // exited 'normally' (but perhaps with nonzero status)
1106 status8=WEXITSTATUS(status);
1107
1108 if (status8>128)
1109 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1110 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1111 else if (status8)
1112 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1113 newwarn, executable, newadd, status, status8);
1114 else
1115 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1116 }
1117
1118 if (WIFSIGNALED(status))
1119 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1120 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1121
1122 // this branch is probably not possible. If subprocess is
1123 // stopped then pclose() should not return.
1124 if (WIFSTOPPED(status))
1125 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1126 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1127
1128 }
1129 }
1130
1131 #else // _WIN32
1132
1133 // No "here-documents" on Windows, so must use separate commandline and stdin
1134 char stdinbuf[1024];
1135 command[0] = stdinbuf[0] = 0;
1136 int boxtype = -1, boxmsgoffs = 0;
1137 const char * newadd = "<nomailer>";
1138 if (!address.empty()) {
1139 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1140 char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1141 if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1142 if (!strcmp(addr1, "msgbox"))
1143 boxtype = 0;
1144 else if (!strcmp(addr1, "sysmsgbox"))
1145 boxtype = 1;
1146 if (boxtype >= 0)
1147 address.erase(0, (n2 > n1 ? n2 : n1));
1148 }
1149
1150 if (!address.empty()) {
1151 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1152 snprintf(command, sizeof(command),
1153 "%s - -q -subject \"%s\" -to \"%s\"",
1154 executable, subject, address.c_str());
1155 newadd = address.c_str();
1156 }
1157
1158 #ifdef _MSC_VER
1159 _set_printf_count_output(1); // "%n" disabled by default
1160 #endif
1161 // Message for mail [0...] and messagebox [boxmsgoffs...]
1162 snprintf(stdinbuf, sizeof(stdinbuf),
1163 "This email was generated by the smartd daemon running on:\n\n"
1164 " host name: %s\n"
1165 " DNS domain: %s\n"
1166 // " NIS domain: %s\n"
1167 "\n%n"
1168 "The following warning/error was logged by the smartd daemon:\n\n"
1169 "%s\n\n"
1170 "For details see the event log or log file of smartd.\n\n"
1171 "%s%s%s"
1172 "\n",
1173 hostname, /*domainname, */ nisdomain, &boxmsgoffs, message, further, original, additional);
1174 }
1175 else
1176 snprintf(command, sizeof(command), "%s", executable);
1177
1178 const char * newwarn = (which ? "Warning via" : "Test of");
1179 if (boxtype >= 0) {
1180 // show message box
1181 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1182 PrintOut(LOG_INFO,"%s message box\n", newwarn);
1183 }
1184 if (command[0]) {
1185 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1186 int rc;
1187 // run command
1188 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1189 (which?"Sending warning via":"Executing test of"), executable, newadd);
1190 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1191 if (rc >= 0 && stdoutbuf[0])
1192 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1193 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
1194 if (rc != 0)
1195 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1196 newwarn, executable, newadd, rc);
1197 else
1198 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1199 }
1200
1201 #endif // _WIN32
1202
1203 // increment mail sent counter
1204 mail->logged++;
1205 }
1206
1207 // Printing function for watching ataprint commands, or losing them
1208 // [From GLIBC Manual: Since the prototype doesn't specify types for
1209 // optional arguments, in a call to a variadic function the default
1210 // argument promotions are performed on the optional argument
1211 // values. This means the objects of type char or short int (whether
1212 // signed or not) are promoted to either int or unsigned int, as
1213 // appropriate.]
1214 void pout(const char *fmt, ...){
1215 va_list ap;
1216
1217 // get the correct time in syslog()
1218 FixGlibcTimeZoneBug();
1219 // initialize variable argument list
1220 va_start(ap,fmt);
1221 // in debug==1 mode we will print the output from the ataprint.o functions!
1222 if (debugmode && debugmode!=2)
1223 #ifdef _WIN32
1224 if (facility == LOG_LOCAL1) // logging to stdout
1225 vfprintf(stderr,fmt,ap);
1226 else
1227 #endif
1228 vprintf(fmt,ap);
1229 // in debug==2 mode we print output from knowndrives.o functions
1230 else if (debugmode==2 || con->reportataioctl || con->reportscsiioctl /*|| con->controller_port???*/) {
1231 openlog("smartd", LOG_PID, facility);
1232 vsyslog(LOG_INFO, fmt, ap);
1233 closelog();
1234 }
1235 va_end(ap);
1236 fflush(NULL);
1237 return;
1238 }
1239
1240 // This function prints either to stdout or to the syslog as needed.
1241 // This function is also used by utility.cpp to report LOG_CRIT errors.
1242 void PrintOut(int priority, const char *fmt, ...){
1243 va_list ap;
1244
1245 // get the correct time in syslog()
1246 FixGlibcTimeZoneBug();
1247 // initialize variable argument list
1248 va_start(ap,fmt);
1249 if (debugmode)
1250 #ifdef _WIN32
1251 if (facility == LOG_LOCAL1) // logging to stdout
1252 vfprintf(stderr,fmt,ap);
1253 else
1254 #endif
1255 vprintf(fmt,ap);
1256 else {
1257 openlog("smartd", LOG_PID, facility);
1258 vsyslog(priority,fmt,ap);
1259 closelog();
1260 }
1261 va_end(ap);
1262 return;
1263 }
1264
1265 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1266 void checksumwarning(const char * string)
1267 {
1268 pout("Warning! %s error: invalid SMART checksum.\n", string);
1269 }
1270
1271 // Wait for the pid file to show up, this makes sure a calling program knows
1272 // that the daemon is really up and running and has a pid to kill it
1273 bool WaitForPidFile()
1274 {
1275 int waited, max_wait = 10;
1276 struct stat stat_buf;
1277
1278 if (pid_file.empty() || debugmode)
1279 return true;
1280
1281 for(waited = 0; waited < max_wait; ++waited) {
1282 if (!stat(pid_file.c_str(), &stat_buf)) {
1283 return true;
1284 } else
1285 sleep(1);
1286 }
1287 return false;
1288 }
1289
1290
1291 // Forks new process, closes ALL file descriptors, redirects stdin,
1292 // stdout, and stderr. Not quite daemon(). See
1293 // http://www.linuxjournal.com/article/2335
1294 // for a good description of why we do things this way.
1295 void DaemonInit(){
1296 #ifndef _WIN32
1297 pid_t pid;
1298 int i;
1299
1300 // flush all buffered streams. Else we might get two copies of open
1301 // streams since both parent and child get copies of the buffers.
1302 fflush(NULL);
1303
1304 if (do_fork) {
1305 if ((pid=fork()) < 0) {
1306 // unable to fork!
1307 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1308 EXIT(EXIT_STARTUP);
1309 }
1310 else if (pid) {
1311 // we are the parent process, wait for pid file, then exit cleanly
1312 if(!WaitForPidFile()) {
1313 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1314 EXIT(EXIT_STARTUP);
1315 } else
1316 EXIT(0);
1317 }
1318
1319 // from here on, we are the child process.
1320 setsid();
1321
1322 // Fork one more time to avoid any possibility of having terminals
1323 if ((pid=fork()) < 0) {
1324 // unable to fork!
1325 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1326 EXIT(EXIT_STARTUP);
1327 }
1328 else if (pid)
1329 // we are the parent process -- exit cleanly
1330 EXIT(0);
1331
1332 // Now we are the child's child...
1333 }
1334
1335 // close any open file descriptors
1336 for (i=getdtablesize();i>=0;--i)
1337 close(i);
1338
1339 #ifdef __CYGWIN__
1340 // Cygwin's setsid() does not detach the process from Windows console
1341 FreeConsole();
1342 #endif // __CYGWIN__
1343
1344 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1345
1346 // redirect any IO attempts to /dev/null for stdin
1347 i=open("/dev/null",O_RDWR);
1348 if (i>=0) {
1349 // stdout
1350 NO_warn_unused_result(dup(i));
1351 // stderr
1352 NO_warn_unused_result(dup(i));
1353 };
1354 umask(0022);
1355 NO_warn_unused_result(chdir("/"));
1356
1357 if (do_fork)
1358 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1359
1360 #else // _WIN32
1361
1362 // No fork() on native Win32
1363 // Detach this process from console
1364 fflush(NULL);
1365 if (daemon_detach("smartd")) {
1366 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1367 EXIT(EXIT_STARTUP);
1368 }
1369 // stdin/out/err now closed if not redirected
1370
1371 #endif // _WIN32
1372 return;
1373 }
1374
1375 // create a PID file containing the current process id
1376 static void WritePidFile()
1377 {
1378 if (!pid_file.empty()) {
1379 pid_t pid = getpid();
1380 mode_t old_umask;
1381 #ifndef __CYGWIN__
1382 old_umask = umask(0077); // rwx------
1383 #else
1384 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1385 old_umask = umask(0033); // rwxr--r--
1386 #endif
1387
1388 stdio_file f(pid_file.c_str(), "w");
1389 umask(old_umask);
1390 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1391 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1392 EXIT(EXIT_PID);
1393 }
1394 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1395 }
1396 }
1397
1398 // Prints header identifying version of code and home
1399 static void PrintHead()
1400 {
1401 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1402 }
1403
1404 // prints help info for configuration file Directives
1405 void Directives() {
1406 PrintOut(LOG_INFO,
1407 "Configuration file (%s) Directives (after device name):\n"
1408 " -d TYPE Set the device type: %s\n"
1409 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1410 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1411 " -S VAL Enable/disable attribute autosave (on/off)\n"
1412 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1413 " -H Monitor SMART Health Status, report if failed\n"
1414 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1415 " -l TYPE Monitor SMART log. Type is one of: error, selftest, xerror\n"
1416 " -f Monitor 'Usage' Attributes, report failures\n"
1417 " -m ADD Send email warning to address ADD\n"
1418 " -M TYPE Modify email warning behavior (see man page)\n"
1419 " -p Report changes in 'Prefailure' Attributes\n"
1420 " -u Report changes in 'Usage' Attributes\n"
1421 " -t Equivalent to -p and -u Directives\n"
1422 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1423 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1424 " -i ID Ignore Attribute ID for -f Directive\n"
1425 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1426 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1427 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1428 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1429 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1430 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1431 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1432 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1433 " # Comment: text after a hash sign is ignored\n"
1434 " \\ Line continuation character\n"
1435 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1436 "Use ID = 0 to turn off -C and/or -U Directives\n"
1437 "Example: /dev/hda -a\n",
1438 configfile, smi()->get_valid_dev_types_str().c_str());
1439 return;
1440 }
1441
1442 /* Returns a pointer to a static string containing a formatted list of the valid
1443 arguments to the option opt or NULL on failure. */
1444 const char *GetValidArgList(char opt) {
1445 switch (opt) {
1446 case 'A':
1447 case 's':
1448 return "<PATH_PREFIX>";
1449 case 'c':
1450 return "<FILE_NAME>, -";
1451 case 'l':
1452 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1453 case 'q':
1454 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1455 case 'r':
1456 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1457 case 'B':
1458 case 'p':
1459 return "<FILE_NAME>";
1460 case 'i':
1461 return "<INTEGER_SECONDS>";
1462 default:
1463 return NULL;
1464 }
1465 }
1466
1467 /* prints help information for command syntax */
1468 void Usage (void){
1469 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1470 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1471 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1472 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1473 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1474 #endif
1475 PrintOut(LOG_INFO,"\n");
1476 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1477 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1478 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1479 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1480 PrintOut(LOG_INFO,"\n");
1481 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1482 #endif
1483 PrintOut(LOG_INFO,"]\n\n");
1484 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1485 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1486 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1487 #ifdef HAVE_LIBCAP_NG
1488 PrintOut(LOG_INFO," -C, --capabilities\n");
1489 PrintOut(LOG_INFO," Use capabilities (EXPERIMENTAL).\n"
1490 " Warning: Mail notification does not work when used.\n\n");
1491 #endif
1492 PrintOut(LOG_INFO," -d, --debug\n");
1493 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1494 PrintOut(LOG_INFO," -D, --showdirectives\n");
1495 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1496 PrintOut(LOG_INFO," -h, --help, --usage\n");
1497 PrintOut(LOG_INFO," Display this help and exit\n\n");
1498 PrintOut(LOG_INFO," -i N, --interval=N\n");
1499 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1500 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1501 #ifndef _WIN32
1502 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1503 #else
1504 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1505 #endif
1506 #ifndef _WIN32
1507 PrintOut(LOG_INFO," -n, --no-fork\n");
1508 PrintOut(LOG_INFO," Do not fork into background\n\n");
1509 #endif // _WIN32
1510 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1511 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1512 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1513 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1514 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1515 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1516 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1517 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1518 #ifdef SMARTMONTOOLS_SAVESTATES
1519 PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1520 #endif
1521 PrintOut(LOG_INFO,"\n");
1522 #ifdef _WIN32
1523 PrintOut(LOG_INFO," --service\n");
1524 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1525 PrintOut(LOG_INFO," smartd install [options]\n");
1526 PrintOut(LOG_INFO," Remove service with:\n");
1527 PrintOut(LOG_INFO," smartd remove\n\n");
1528 #endif // _WIN32
1529 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1530 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1531 }
1532
1533 static int CloseDevice(smart_device * device, const char * name)
1534 {
1535 if (!device->close()){
1536 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1537 return 1;
1538 }
1539 // device sucessfully closed
1540 return 0;
1541 }
1542
1543 // return true if a char is not allowed in a state file name
1544 static bool not_allowed_in_filename(char c)
1545 {
1546 return !( ('0' <= c && c <= '9')
1547 || ('A' <= c && c <= 'Z')
1548 || ('a' <= c && c <= 'z'));
1549 }
1550
1551 // Read error count from Summary or Extended Comprehensive SMART error log
1552 // Return -1 on error
1553 static int read_ata_error_count(ata_device * device, const char * name,
1554 unsigned char fix_firmwarebug, bool extended)
1555 {
1556 if (!extended) {
1557 ata_smart_errorlog log;
1558 if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1559 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1560 return -1;
1561 }
1562 return (log.error_log_pointer ? log.ata_error_count : 0);
1563 }
1564 else {
1565 ata_smart_exterrlog logx;
1566 if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/)) {
1567 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1568 return -1;
1569 }
1570 // Some disks use the reserved byte as index, see ataprint.cpp.
1571 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1572 }
1573 }
1574
1575 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1576 // error count, and top bits are the power-on hours of the last error.
1577 static int SelfTestErrorCount(ata_device * device, const char * name,
1578 unsigned char fix_firmwarebug)
1579 {
1580 struct ata_smart_selftestlog log;
1581
1582 if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1583 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1584 return -1;
1585 }
1586
1587 // return current number of self-test errors
1588 return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1589 }
1590
1591 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1592 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1593
1594 // Log self-test execution status
1595 static void log_self_test_exec_status(const char * name, unsigned char status)
1596 {
1597 const char * msg;
1598 switch (status >> 4) {
1599 case 0x0: msg = "completed without error"; break;
1600 case 0x1: msg = "was aborted by the host"; break;
1601 case 0x2: msg = "was interrupted by the host with a reset"; break;
1602 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1603 case 0x4: msg = "completed with error (unknown test element)"; break;
1604 case 0x5: msg = "completed with error (electrical test element)"; break;
1605 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1606 case 0x7: msg = "completed with error (read test element)"; break;
1607 case 0x8: msg = "completed with error (handling damage?)"; break;
1608 default: msg = 0;
1609 }
1610
1611 if (msg)
1612 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1613 "Device: %s, previous self-test %s\n", name, msg);
1614 else if ((status >> 4) == 0xf)
1615 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1616 name, status & 0x0f);
1617 else
1618 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1619 name, status);
1620 }
1621
1622
1623 // TODO: Add '-F swapid' directive
1624 const bool fix_swapped_id = false;
1625
1626 // scan to see what ata devices there are, and if they support SMART
1627 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1628 {
1629 int supported=0;
1630 struct ata_identify_device drive;
1631 const char *name = cfg.name.c_str();
1632 int retid;
1633
1634 // Device must be open
1635
1636 // Get drive identity structure
1637 if ((retid=ataReadHDIdentity (atadev, &drive))){
1638 if (retid<0)
1639 // Unable to read Identity structure
1640 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1641 else
1642 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1643 name, packetdevicetype(retid-1));
1644 CloseDevice(atadev, name);
1645 return 2;
1646 }
1647 // Store drive size (for selective self-test only)
1648 state.num_sectors = get_num_sectors(&drive);
1649
1650 // Show if device in database, and use preset vendor attribute
1651 // options unless user has requested otherwise.
1652 if (cfg.ignorepresets)
1653 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1654 else {
1655 // do whatever applypresets decides to do.
1656 if (!apply_presets(&drive, cfg.attribute_defs, cfg.fix_firmwarebug, fix_swapped_id))
1657 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1658 else
1659 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1660 }
1661
1662 // Set default '-C 197[+]' if no '-C ID' is specified.
1663 if (!cfg.curr_pending_set)
1664 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1665 // Set default '-U 198[+]' if no '-U ID' is specified.
1666 if (!cfg.offl_pending_set)
1667 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1668
1669 // If requested, show which presets would be used for this drive
1670 if (cfg.showpresets) {
1671 int savedebugmode=debugmode;
1672 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1673 if (!debugmode)
1674 debugmode=2;
1675 show_presets(&drive, false);
1676 debugmode=savedebugmode;
1677 }
1678
1679 // see if drive supports SMART
1680 supported=ataSmartSupport(&drive);
1681 if (supported!=1) {
1682 if (supported==0)
1683 // drive does NOT support SMART
1684 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1685 else
1686 // can't tell if drive supports SMART
1687 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1688
1689 // should we proceed anyway?
1690 if (cfg.permissive) {
1691 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1692 }
1693 else {
1694 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1695 CloseDevice(atadev, name);
1696 return 2;
1697 }
1698 }
1699
1700 if (ataEnableSmart(atadev)) {
1701 // Enable SMART command has failed
1702 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1703 CloseDevice(atadev, name);
1704 return 2;
1705 }
1706
1707 // disable device attribute autosave...
1708 if (cfg.autosave==1) {
1709 if (ataDisableAutoSave(atadev))
1710 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1711 else
1712 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1713 }
1714
1715 // or enable device attribute autosave
1716 if (cfg.autosave==2) {
1717 if (ataEnableAutoSave(atadev))
1718 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1719 else
1720 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1721 }
1722
1723 // capability check: SMART status
1724 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1725 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1726 cfg.smartcheck = false;
1727 }
1728
1729 // capability check: Read smart values and thresholds. Note that
1730 // smart values are ALSO needed even if we ONLY want to know if the
1731 // device is self-test log or error-log capable! After ATA-5, this
1732 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1733 // but sadly not for ATA-5. Sigh.
1734
1735 // do we need to get SMART data?
1736 bool smart_val_ok = false;
1737 if ( cfg.autoofflinetest || cfg.selftest
1738 || cfg.errorlog || cfg.xerrorlog
1739 || cfg.usagefailed || cfg.prefail || cfg.usage
1740 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1741 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1742
1743 if (ataReadSmartValues(atadev, &state.smartval)) {
1744 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1745 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1746 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1747 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1748 }
1749 else {
1750 smart_val_ok = true;
1751 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1752 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1753 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1754 cfg.usagefailed = false;
1755 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1756 memset(&state.smartthres, 0, sizeof(state.smartthres));
1757 }
1758 }
1759
1760 // see if the necessary Attribute is there to monitor offline or
1761 // current pending sectors or temperature
1762 if (cfg.curr_pending_id && ata_find_attr_index(cfg.curr_pending_id, state.smartval) < 0) {
1763 PrintOut(LOG_INFO,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1764 name, cfg.curr_pending_id);
1765 cfg.curr_pending_id = 0;
1766 }
1767
1768 if (cfg.offl_pending_id && ata_find_attr_index(cfg.offl_pending_id, state.smartval) < 0) {
1769 PrintOut(LOG_INFO,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1770 name, cfg.offl_pending_id);
1771 cfg.offl_pending_id = 0;
1772 }
1773
1774 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1775 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1776 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1777 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1778 }
1779 }
1780
1781 // enable/disable automatic on-line testing
1782 if (cfg.autoofflinetest) {
1783 // is this an enable or disable request?
1784 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1785 if (!smart_val_ok)
1786 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1787 else {
1788 // if command appears unsupported, issue a warning...
1789 if (!isSupportAutomaticTimer(&state.smartval))
1790 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1791 // ... but then try anyway
1792 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1793 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1794 else
1795 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1796 }
1797 }
1798
1799 // capability check: self-test-log
1800 if (cfg.selftest) {
1801 int retval;
1802
1803 // start with service disabled, and re-enable it if all works OK
1804 cfg.selftest = false;
1805 state.selflogcount = 0;
1806 state.selfloghour = 0;
1807
1808 if (!smart_val_ok)
1809 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name);
1810 else if (!cfg.permissive && !isSmartTestLogCapable(&state.smartval, &drive))
1811 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name);
1812 else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0)
1813 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name);
1814 else {
1815 cfg.selftest = true;
1816 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1817 state.selfloghour =SELFTEST_ERRORHOURS(retval);
1818 }
1819 }
1820
1821 // capability check: ATA error log
1822 if (cfg.errorlog || cfg.xerrorlog) {
1823
1824 state.ataerrorcount=0;
1825 if (!(cfg.permissive || (smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
1826 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log (%s), ignoring -l [x]error (override with -T permissive)\n",
1827 name, (!smart_val_ok ? "SMART READ DATA failed" : "capability missing"));
1828 cfg.errorlog = cfg.xerrorlog = false;
1829 }
1830 else {
1831 int errcnt1 = -1, errcnt2 = -1;
1832 if (cfg.errorlog && (errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false)) < 0) {
1833 PrintOut(LOG_INFO, "Device: %s, no Summary SMART Error Log, ignoring -l error\n", name);
1834 cfg.errorlog = false;
1835 }
1836 if (cfg.xerrorlog && (errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true)) < 0) {
1837 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
1838 cfg.xerrorlog = false;
1839 }
1840 if (cfg.errorlog || cfg.xerrorlog) {
1841 if (cfg.errorlog && cfg.xerrorlog && errcnt1 != errcnt2) {
1842 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
1843 name, errcnt1, errcnt2);
1844 }
1845 // Record max error count
1846 state.ataerrorcount = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
1847 }
1848 }
1849 }
1850
1851 // capabilities check -- does it support powermode?
1852 if (cfg.powermode) {
1853 int powermode = ataCheckPowerMode(atadev);
1854
1855 if (-1 == powermode) {
1856 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
1857 cfg.powermode=0;
1858 }
1859 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
1860 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1861 name, powermode);
1862 cfg.powermode=0;
1863 }
1864 }
1865
1866 // If no tests available or selected, return
1867 if (!( cfg.smartcheck || cfg.selftest
1868 || cfg.errorlog || cfg.xerrorlog
1869 || cfg.usagefailed || cfg.prefail || cfg.usage
1870 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
1871 CloseDevice(atadev, name);
1872 return 3;
1873 }
1874
1875 // tell user we are registering device
1876 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
1877
1878 // record number of device, type of device, increment device count
1879 if (cfg.dev_type.empty())
1880 cfg.dev_type = "ata";
1881
1882 // close file descriptor
1883 CloseDevice(atadev, name);
1884
1885 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
1886 // Build file name for state file
1887 char model[40+1], serial[20+1];
1888 format_ata_string(model, drive.model, sizeof(model)-1, fix_swapped_id);
1889 format_ata_string(serial, drive.serial_no, sizeof(serial)-1, fix_swapped_id);
1890 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
1891 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
1892 if (!state_path_prefix.empty()) {
1893 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
1894 // Read previous state
1895 if (read_dev_state(cfg.state_file.c_str(), state)) {
1896 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
1897 // Copy ATA attribute values to temp state
1898 state.update_temp_state();
1899 }
1900 }
1901 if (!attrlog_path_prefix.empty())
1902 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
1903 }
1904
1905 // Start self-test regex check now if time was not read from state file
1906 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1907 state.scheduled_test_next_check = time(0);
1908
1909 return 0;
1910 }
1911
1912 // on success, return 0. On failure, return >0. Never return <0,
1913 // please.
1914 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
1915 {
1916 int k, err;
1917 const char *device = cfg.name.c_str();
1918 struct scsi_iec_mode_page iec;
1919 UINT8 tBuf[64];
1920
1921 // Device must be open
1922
1923 // check that device is ready for commands. IE stores its stuff on
1924 // the media.
1925 if ((err = scsiTestUnitReady(scsidev))) {
1926 if (SIMPLE_ERR_NOT_READY == err)
1927 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
1928 else if (SIMPLE_ERR_NO_MEDIUM == err)
1929 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
1930 else if (SIMPLE_ERR_BECOMING_READY == err)
1931 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
1932 else
1933 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
1934 CloseDevice(scsidev, device);
1935 return 2;
1936 }
1937
1938 // Badly-conforming USB storage devices may fail this check.
1939 // The response to the following IE mode page fetch (current and
1940 // changeable values) is carefully examined. It has been found
1941 // that various USB devices that malform the response will lock up
1942 // if asked for a log page (e.g. temperature) so it is best to
1943 // bail out now.
1944 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
1945 state.modese_len = iec.modese_len;
1946 else if (SIMPLE_ERR_BAD_FIELD == err)
1947 ; /* continue since it is reasonable not to support IE mpage */
1948 else { /* any other error (including malformed response) unreasonable */
1949 PrintOut(LOG_INFO,
1950 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1951 device, err);
1952 CloseDevice(scsidev, device);
1953 return 3;
1954 }
1955
1956 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1957 // smart if it is off). This may change to be the same as the ATA side.
1958 if (!scsi_IsExceptionControlEnabled(&iec)) {
1959 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
1960 "Try 'smartctl -s on %s' to turn on SMART features\n",
1961 device, device);
1962 CloseDevice(scsidev, device);
1963 return 3;
1964 }
1965
1966 // Flag that certain log pages are supported (information may be
1967 // available from other sources).
1968 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
1969 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
1970 switch (tBuf[k]) {
1971 case TEMPERATURE_LPAGE:
1972 state.TempPageSupported = 1;
1973 break;
1974 case IE_LPAGE:
1975 state.SmartPageSupported = 1;
1976 break;
1977 default:
1978 break;
1979 }
1980 }
1981 }
1982
1983 // record type of device
1984 if (cfg.dev_type.empty())
1985 cfg.dev_type = "scsi";
1986
1987 // Check if scsiCheckIE() is going to work
1988 {
1989 UINT8 asc = 0;
1990 UINT8 ascq = 0;
1991 UINT8 currenttemp = 0;
1992 UINT8 triptemp = 0;
1993
1994 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
1995 &asc, &ascq, &currenttemp, &triptemp)) {
1996 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
1997 state.SuppressReport = 1;
1998 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
1999 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
2000 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2001 }
2002 }
2003 }
2004
2005 // capability check: self-test-log
2006 if (cfg.selftest){
2007 int retval = scsiCountFailedSelfTests(scsidev, 0);
2008 if (retval<0) {
2009 // no self-test log, turn off monitoring
2010 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2011 cfg.selftest = false;
2012 state.selflogcount = 0;
2013 state.selfloghour = 0;
2014 }
2015 else {
2016 // register starting values to watch for changes
2017 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2018 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2019 }
2020 }
2021
2022 // disable autosave (set GLTSD bit)
2023 if (cfg.autosave==1){
2024 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2025 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2026 else
2027 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2028 }
2029
2030 // or enable autosave (clear GLTSD bit)
2031 if (cfg.autosave==2){
2032 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2033 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2034 else
2035 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2036 }
2037
2038 // tell user we are registering device
2039 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2040
2041 // TODO: Build file name for state file
2042 if (!state_path_prefix.empty()) {
2043 PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
2044 }
2045 // TODO: Build file name for attribute log file
2046 if (!attrlog_path_prefix.empty()) {
2047 PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
2048 }
2049
2050 // close file descriptor
2051 CloseDevice(scsidev, device);
2052
2053 // Start self-test regex check now if time was not read from state file
2054 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
2055 state.scheduled_test_next_check = time(0);
2056
2057 return 0;
2058 }
2059
2060 // If the self-test log has got more self-test errors (or more recent
2061 // self-test errors) recorded, then notify user.
2062 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2063 {
2064 const char * name = cfg.name.c_str();
2065
2066 if (newi<0)
2067 // command failed
2068 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2069 else {
2070 // old and new error counts
2071 int oldc=state.selflogcount;
2072 int newc=SELFTEST_ERRORCOUNT(newi);
2073
2074 // old and new error timestamps in hours
2075 int oldh=state.selfloghour;
2076 int newh=SELFTEST_ERRORHOURS(newi);
2077
2078 if (oldc<newc) {
2079 // increase in error count
2080 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2081 name, oldc, newc);
2082 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2083 name, oldc, newc);
2084 state.must_write = true;
2085 }
2086 else if (newc > 0 && oldh != newh) {
2087 // more recent error
2088 // a 'more recent' error might actually be a smaller hour number,
2089 // if the hour number has wrapped.
2090 // There's still a bug here. You might just happen to run a new test
2091 // exactly 32768 hours after the previous failure, and have run exactly
2092 // 20 tests between the two, in which case smartd will miss the
2093 // new failure.
2094 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2095 name, newh);
2096 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2097 name, newh);
2098 state.must_write = true;
2099 }
2100
2101 // Print info if error entries have disappeared
2102 if (oldc > newc)
2103 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2104 name, oldc, newc);
2105
2106 // Needed since self-test error count may DECREASE. Hour might
2107 // also have changed.
2108 state.selflogcount= newc;
2109 state.selfloghour = newh;
2110 }
2111 return;
2112 }
2113
2114 // Test types, ordered by priority.
2115 static const char test_type_chars[] = "LncrSCO";
2116 const unsigned num_test_types = sizeof(test_type_chars)-1;
2117
2118 // returns test type if time to do test of type testtype,
2119 // 0 if not time to do test.
2120 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2121 {
2122 // check that self-testing has been requested
2123 if (cfg.test_regex.empty())
2124 return 0;
2125
2126 // Exit if drive not capable of any test
2127 if ( state.not_cap_long && state.not_cap_short &&
2128 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2129 return 0;
2130
2131 // since we are about to call localtime(), be sure glibc is informed
2132 // of any timezone changes we make.
2133 if (!usetime)
2134 FixGlibcTimeZoneBug();
2135
2136 // Is it time for next check?
2137 time_t now = (!usetime ? time(0) : usetime);
2138 if (now < state.scheduled_test_next_check)
2139 return 0;
2140
2141 // Limit time check interval to 90 days
2142 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2143 state.scheduled_test_next_check = now - (3600L*24*90);
2144
2145 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2146 char testtype = 0;
2147 time_t testtime = 0; int testhour = 0;
2148 int maxtest = num_test_types-1;
2149
2150 for (time_t t = state.scheduled_test_next_check; ; ) {
2151 struct tm * tms = localtime(&t);
2152 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2153 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2154 for (int i = 0; i <= maxtest; i++) {
2155 // Skip if drive not capable of this test
2156 switch (test_type_chars[i]) {
2157 case 'L': if (state.not_cap_long) continue; break;
2158 case 'S': if (state.not_cap_short) continue; break;
2159 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2160 case 'O': if (scsi || state.not_cap_offline) continue; break;
2161 case 'c': case 'n':
2162 case 'r': if (scsi || state.not_cap_selective) continue; break;
2163 default: continue;
2164 }
2165 // Try match of "T/MM/DD/d/HH"
2166 char pattern[16];
2167 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2168 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2169 if (cfg.test_regex.full_match(pattern)) {
2170 // Test found
2171 testtype = pattern[0];
2172 testtime = t; testhour = tms->tm_hour;
2173 // Limit further matches to higher priority self-tests
2174 maxtest = i-1;
2175 break;
2176 }
2177 }
2178 // Exit if no tests left or current time reached
2179 if (maxtest < 0)
2180 break;
2181 if (t >= now)
2182 break;
2183 // Check next hour
2184 if ((t += 3600) > now)
2185 t = now;
2186 }
2187
2188 // Do next check not before next hour.
2189 struct tm * tmnow = localtime(&now);
2190 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2191
2192 if (testtype) {
2193 state.must_write = true;
2194 // Tell user if an old test was found.
2195 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2196 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2197 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2198 cfg.name.c_str(), testtype, datebuf);
2199 }
2200 }
2201
2202 return testtype;
2203 }
2204
2205 // Print a list of future tests.
2206 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2207 {
2208 unsigned numdev = configs.size();
2209 if (!numdev)
2210 return;
2211 std::vector<int> testcnts(numdev * num_test_types, 0);
2212
2213 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2214
2215 // FixGlibcTimeZoneBug(); // done in PrintOut()
2216 time_t now = time(0);
2217 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2218 dateandtimezoneepoch(datenow, now);
2219
2220 long seconds;
2221 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2222 // Check for each device whether a test will be run
2223 time_t testtime = now + seconds;
2224 for (unsigned i = 0; i < numdev; i++) {
2225 const dev_config & cfg = configs.at(i);
2226 dev_state & state = states.at(i);
2227 const char * p;
2228 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2229 if (testtype && (p = strchr(test_type_chars, testtype))) {
2230 unsigned t = (p - test_type_chars);
2231 // Report at most 5 tests of each type
2232 if (++testcnts[i*num_test_types + t] <= 5) {
2233 dateandtimezoneepoch(date, testtime);
2234 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2235 testcnts[i*num_test_types + t], testtype, date);
2236 }
2237 }
2238 }
2239 }
2240
2241 // Report totals
2242 dateandtimezoneepoch(date, now+seconds);
2243 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2244 for (unsigned i = 0; i < numdev; i++) {
2245 const dev_config & cfg = configs.at(i);
2246 bool scsi = devices.at(i)->is_scsi();
2247 for (unsigned t = 0; t < num_test_types; t++) {
2248 int cnt = testcnts[i*num_test_types + t];
2249 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2250 continue;
2251 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2252 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2253 }
2254 }
2255
2256 }
2257
2258 // Return zero on success, nonzero on failure. Perform offline (background)
2259 // short or long (extended) self test on given scsi device.
2260 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2261 {
2262 int retval = 0;
2263 const char *testname = 0;
2264 const char *name = cfg.name.c_str();
2265 int inProgress;
2266
2267 if (scsiSelfTestInProgress(device, &inProgress)) {
2268 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2269 state.not_cap_short = state.not_cap_long = true;
2270 return 1;
2271 }
2272
2273 if (1 == inProgress) {
2274 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2275 "progress.\n", name);
2276 return 1;
2277 }
2278
2279 switch (testtype) {
2280 case 'S':
2281 testname = "Short Self";
2282 retval = scsiSmartShortSelfTest(device);
2283 break;
2284 case 'L':
2285 testname = "Long Self";
2286 retval = scsiSmartExtendSelfTest(device);
2287 break;
2288 }
2289 // If we can't do the test, exit
2290 if (NULL == testname) {
2291 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2292 testtype);
2293 return 1;
2294 }
2295 if (retval) {
2296 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2297 (SIMPLE_ERR_BAD_FIELD == retval)) {
2298 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2299 testname);
2300 if ('L'==testtype)
2301 state.not_cap_long = true;
2302 else
2303 state.not_cap_short = true;
2304
2305 return 1;
2306 }
2307 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2308 testname, retval);
2309 return 1;
2310 }
2311
2312 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2313
2314 return 0;
2315 }
2316
2317 // Do an offline immediate or self-test. Return zero on success,
2318 // nonzero on failure.
2319 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2320 {
2321 const char *name = cfg.name.c_str();
2322
2323 // Read current smart data and check status/capability
2324 struct ata_smart_values data;
2325 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2326 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2327 return 1;
2328 }
2329
2330 // Check for capability to do the test
2331 int dotest = -1, mode = 0;
2332 const char *testname = 0;
2333 switch (testtype) {
2334 case 'O':
2335 testname="Offline Immediate ";
2336 if (isSupportExecuteOfflineImmediate(&data))
2337 dotest=OFFLINE_FULL_SCAN;
2338 else
2339 state.not_cap_offline = true;
2340 break;
2341 case 'C':
2342 testname="Conveyance Self-";
2343 if (isSupportConveyanceSelfTest(&data))
2344 dotest=CONVEYANCE_SELF_TEST;
2345 else
2346 state.not_cap_conveyance = true;
2347 break;
2348 case 'S':
2349 testname="Short Self-";
2350 if (isSupportSelfTest(&data))
2351 dotest=SHORT_SELF_TEST;
2352 else
2353 state.not_cap_short = true;
2354 break;
2355 case 'L':
2356 testname="Long Self-";
2357 if (isSupportSelfTest(&data))
2358 dotest=EXTEND_SELF_TEST;
2359 else
2360 state.not_cap_long = true;
2361 break;
2362
2363 case 'c': case 'n': case 'r':
2364 testname = "Selective Self-";
2365 if (isSupportSelectiveSelfTest(&data)) {
2366 dotest = SELECTIVE_SELF_TEST;
2367 switch (testtype) {
2368 case 'c': mode = SEL_CONT; break;
2369 case 'n': mode = SEL_NEXT; break;
2370 case 'r': mode = SEL_REDO; break;
2371 }
2372 }
2373 else
2374 state.not_cap_selective = true;
2375 break;
2376 }
2377
2378 // If we can't do the test, exit
2379 if (dotest<0) {
2380 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2381 return 1;
2382 }
2383
2384 // If currently running a self-test, do not interrupt it to start another.
2385 if (15==(data.self_test_exec_status >> 4)) {
2386 if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2387 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2388 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2389 } else {
2390 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2391 name, testname, (int)(data.self_test_exec_status & 0x0f));
2392 return 1;
2393 }
2394 }
2395
2396 if (dotest == SELECTIVE_SELF_TEST) {
2397 // Set test span
2398 ata_selective_selftest_args selargs;
2399 selargs.num_spans = 1;
2400 selargs.span[0].mode = mode;
2401 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors)) {
2402 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2403 return 1;
2404 }
2405 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2406 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2407 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2408 start, end, end - start + 1,
2409 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2410 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2411 }
2412
2413 // execute the test, and return status
2414 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2415 if (retval) {
2416 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2417 return retval;
2418 }
2419
2420 if (testtype != 'O')
2421 // Log next self-test execution status
2422 state.smartval.self_test_exec_status = 0xff;
2423
2424 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2425 return 0;
2426 }
2427
2428 // Check pending sector count attribute values (-C, -U directives).
2429 static void check_pending(const dev_config & cfg, dev_state & state,
2430 unsigned char id, bool increase_only,
2431 const ata_smart_values & smartval,
2432 int mailtype, const char * msg)
2433 {
2434 // Find attribute index
2435 int i = ata_find_attr_index(id, smartval);
2436 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2437 return;
2438
2439 // No report if no sectors pending.
2440 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2441 if (rawval == 0)
2442 return;
2443
2444 // If attribute is not reset, report only sector count increases.
2445 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2446 if (!(!increase_only || prev_rawval < rawval))
2447 return;
2448
2449 // Format message.
2450 std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2451 if (prev_rawval > 0 && rawval != prev_rawval)
2452 s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2453
2454 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2455 MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2456 state.must_write = true;
2457 }
2458
2459 // Format Temperature value
2460 static const char * fmt_temp(unsigned char x, char * buf)
2461 {
2462 if (!x) // unset
2463 strcpy(buf, "??");
2464 else
2465 sprintf(buf, "%u", x);
2466 return buf;
2467 }
2468
2469 // Check Temperature limits
2470 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2471 {
2472 if (!(0 < currtemp && currtemp < 255)) {
2473 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2474 return;
2475 }
2476
2477 // Update Max Temperature
2478 const char * minchg = "", * maxchg = "";
2479 if (currtemp > state.tempmax) {
2480 if (state.tempmax)
2481 maxchg = "!";
2482 state.tempmax = currtemp;
2483 state.must_write = true;
2484 }
2485
2486 char buf[20];
2487 if (!state.temperature) {
2488 // First check
2489 if (!state.tempmin || currtemp < state.tempmin)
2490 // Delay Min Temperature update by ~ 30 minutes.
2491 state.tempmin_delay = time(0) + CHECKTIME - 60;
2492 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2493 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2494 if (triptemp)
2495 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2496 state.temperature = currtemp;
2497 }
2498 else {
2499 if (state.tempmin_delay) {
2500 // End Min Temperature update delay if ...
2501 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2502 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2503 state.tempmin_delay = 0;
2504 if (!state.tempmin)
2505 state.tempmin = 255;
2506 }
2507 }
2508
2509 // Update Min Temperature
2510 if (!state.tempmin_delay && currtemp < state.tempmin) {
2511 state.tempmin = currtemp;
2512 state.must_write = true;
2513 if (currtemp != state.temperature)
2514 minchg = "!";
2515 }
2516
2517 // Track changes
2518 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2519 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2520 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2521 state.temperature = currtemp;
2522 }
2523 }
2524
2525 // Check limits
2526 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2527 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2528 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2529 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2530 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2531 }
2532 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2533 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2534 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2535 }
2536 }
2537
2538 // Check normalized and raw attribute values.
2539 static void check_attribute(const dev_config & cfg, dev_state & state,
2540 const ata_smart_attribute & attr,
2541 const ata_smart_attribute & prev,
2542 const ata_smart_threshold_entry & thre)
2543 {
2544 // Check attribute and threshold
2545 ata_attr_state attrstate = ata_get_attr_state(attr, thre, cfg.attribute_defs);
2546 if (attrstate == ATTRSTATE_NON_EXISTING)
2547 return;
2548
2549 // If requested, check for usage attributes that have failed.
2550 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2551 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2552 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2553 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2554 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2555 state.must_write = true;
2556 }
2557
2558 // Return if we're not tracking this type of attribute
2559 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2560 if (!( ( prefail && cfg.prefail)
2561 || (!prefail && cfg.usage )))
2562 return;
2563
2564 // Return if '-I ID' was specified
2565 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2566 return;
2567
2568 // Issue warning if they don't have the same ID in all structures.
2569 if (attr.id != prev.id || attrstate == ATTRSTATE_BAD_THRESHOLD) {
2570 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
2571 cfg.name.c_str(), attr.id, prev.id, thre.id);
2572 return;
2573 }
2574
2575 // Compare normalized values if valid.
2576 bool valchanged = false;
2577 if (attrstate > ATTRSTATE_NO_NORMVAL) {
2578 if (attr.current != prev.current)
2579 valchanged = true;
2580 }
2581
2582 // Compare raw values if requested.
2583 bool rawchanged = false;
2584 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2585 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2586 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2587 rawchanged = true;
2588 }
2589
2590 // Return if no change
2591 if (!(valchanged || rawchanged))
2592 return;
2593
2594 // Format value strings
2595 std::string currstr, prevstr;
2596 if (attrstate == ATTRSTATE_NO_NORMVAL) {
2597 // Print raw values only
2598 currstr = strprintf("%s (Raw)",
2599 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2600 prevstr = strprintf("%s (Raw)",
2601 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2602 }
2603 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2604 // Print normalized and raw values
2605 currstr = strprintf("%d [Raw %s]", attr.current,
2606 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2607 prevstr = strprintf("%d [Raw %s]", prev.current,
2608 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2609 }
2610 else {
2611 // Print normalized values only
2612 currstr = strprintf("%d", attr.current);
2613 prevstr = strprintf("%d", prev.current);
2614 }
2615
2616 // Format message
2617 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2618 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2619 ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
2620 prevstr.c_str(), currstr.c_str());
2621
2622 // Report this change as critical ?
2623 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2624 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2625 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2626 MailWarning(cfg, state, 2, "%s", msg.c_str());
2627 }
2628 else {
2629 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2630 }
2631 state.must_write = true;
2632 }
2633
2634
2635 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev, bool allow_selftests)
2636 {
2637 const char * name = cfg.name.c_str();
2638
2639 // If user has asked, test the email warning system
2640 if (cfg.emailtest)
2641 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2642
2643 // if we can't open device, fail gracefully rather than hard --
2644 // perhaps the next time around we'll be able to open it. ATAPI
2645 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2646 // given (see linux cdrom driver).
2647 if (!atadev->open()) {
2648 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2649 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2650 return 1;
2651 } else if (debugmode)
2652 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2653
2654 // user may have requested (with the -n Directive) to leave the disk
2655 // alone if it is in idle or sleeping mode. In this case check the
2656 // power mode and exit without check if needed
2657 if (cfg.powermode && !state.powermodefail) {
2658 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2659 const char * mode = 0;
2660 if (0 <= powermode && powermode < 0xff) {
2661 // wait for possible spin up and check again
2662 int powermode2;
2663 sleep(5);
2664 powermode2 = ataCheckPowerMode(atadev);
2665 if (powermode2 > powermode)
2666 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2667 powermode = powermode2;
2668 }
2669
2670 switch (powermode){
2671 case -1:
2672 // SLEEP
2673 mode="SLEEP";
2674 if (cfg.powermode>=1)
2675 dontcheck=1;
2676 break;
2677 case 0:
2678 // STANDBY
2679 mode="STANDBY";
2680 if (cfg.powermode>=2)
2681 dontcheck=1;
2682 break;
2683 case 0x80:
2684 // IDLE
2685 mode="IDLE";
2686 if (cfg.powermode>=3)
2687 dontcheck=1;
2688 break;
2689 case 0xff:
2690 // ACTIVE/IDLE
2691 mode="ACTIVE or IDLE";
2692 break;
2693 default:
2694 // UNKNOWN
2695 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2696 name, powermode);
2697 state.powermodefail = true;
2698 break;
2699 }
2700
2701 // if we are going to skip a check, return now
2702 if (dontcheck){
2703 // skip at most powerskipmax checks
2704 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2705 CloseDevice(atadev, name);
2706 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2707 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2708 state.powerskipcnt++;
2709 return 0;
2710 }
2711 else {
2712 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2713 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2714 }
2715 state.powerskipcnt = 0;
2716 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2717 }
2718 else if (state.powerskipcnt) {
2719 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2720 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2721 state.powerskipcnt = 0;
2722 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2723 }
2724 }
2725
2726 // check smart status
2727 if (cfg.smartcheck) {
2728 int status=ataSmartStatus2(atadev);
2729 if (status==-1){
2730 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2731 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
2732 state.must_write = true;
2733 }
2734 else if (status==1){
2735 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2736 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2737 state.must_write = true;
2738 }
2739 }
2740
2741 // Check everything that depends upon SMART Data (eg, Attribute values)
2742 if ( cfg.usagefailed || cfg.prefail || cfg.usage
2743 || cfg.curr_pending_id || cfg.offl_pending_id
2744 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || cfg.selftest) {
2745
2746 // Read current attribute values.
2747 ata_smart_values curval;
2748 if (ataReadSmartValues(atadev, &curval)){
2749 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2750 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
2751 state.must_write = true;
2752 }
2753 else {
2754 // look for current or offline pending sectors
2755 if (cfg.curr_pending_id)
2756 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
2757 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
2758 : "Total unreadable (pending) sectors" ));
2759
2760 if (cfg.offl_pending_id)
2761 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
2762 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
2763 : "Total offline uncorrectable sectors"));
2764
2765 // check temperature limits
2766 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2767 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
2768
2769 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
2770
2771 // look for failed usage attributes, or track usage or prefail attributes
2772 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
2773 check_attribute(cfg, state,
2774 curval.vendor_attributes[i],
2775 state.smartval.vendor_attributes[i],
2776 state.smartthres.thres_entries[i]);
2777 }
2778
2779 if (cfg.selftest) {
2780 // Log changes of self-test execution status
2781 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
2782 || (!allow_selftests && curval.self_test_exec_status != 0x00) )
2783 log_self_test_exec_status(name, curval.self_test_exec_status);
2784 }
2785
2786 // Save the new values into *drive for the next time around
2787 state.smartval = curval;
2788 }
2789 }
2790 }
2791
2792 // check if number of selftest errors has increased (note: may also DECREASE)
2793 if (cfg.selftest)
2794 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
2795
2796 // check if number of ATA errors has increased
2797 if (cfg.errorlog || cfg.xerrorlog) {
2798
2799 int errcnt1 = -1, errcnt2 = -1;
2800 if (cfg.errorlog)
2801 errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false);
2802 if (cfg.xerrorlog)
2803 errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true);
2804
2805 // new number of errors is max of both logs
2806 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
2807
2808 // did command fail?
2809 if (newc<0)
2810 // lack of PrintOut here is INTENTIONAL
2811 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
2812
2813 // has error count increased?
2814 int oldc = state.ataerrorcount;
2815 if (newc>oldc){
2816 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
2817 name, oldc, newc);
2818 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
2819 name, oldc, newc);
2820 state.must_write = true;
2821 }
2822
2823 if (newc>=0)
2824 state.ataerrorcount=newc;
2825 }
2826
2827 // if the user has asked, and device is capable (or we're not yet
2828 // sure) check whether a self test should be done now.
2829 if (allow_selftests && !cfg.test_regex.empty()) {
2830 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
2831 if (testtype)
2832 DoATASelfTest(cfg, state, atadev, testtype);
2833 }
2834
2835 // Don't leave device open -- the OS/user may want to access it
2836 // before the next smartd cycle!
2837 CloseDevice(atadev, name);
2838
2839 // Copy ATA attribute values to persistent state
2840 state.update_persistent_state();
2841
2842 return 0;
2843 }
2844
2845 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
2846 {
2847 UINT8 asc, ascq;
2848 UINT8 currenttemp;
2849 UINT8 triptemp;
2850 const char * name = cfg.name.c_str();
2851 const char *cp;
2852
2853 // If the user has asked for it, test the email warning system
2854 if (cfg.emailtest)
2855 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2856
2857 // if we can't open device, fail gracefully rather than hard --
2858 // perhaps the next time around we'll be able to open it
2859 if (!scsidev->open()) {
2860 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
2861 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2862 return 1;
2863 } else if (debugmode)
2864 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
2865 currenttemp = 0;
2866 asc = 0;
2867 ascq = 0;
2868 if (!state.SuppressReport) {
2869 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2870 &asc, &ascq, &currenttemp, &triptemp)) {
2871 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
2872 name);
2873 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
2874 state.SuppressReport = 1;
2875 }
2876 }
2877 if (asc > 0) {
2878 cp = scsiGetIEString(asc, ascq);
2879 if (cp) {
2880 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
2881 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
2882 } else if (debugmode)
2883 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2884 name, (int)asc, (int)ascq);
2885 } else if (debugmode)
2886 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
2887
2888 // check temperature limits
2889 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2890 CheckTemperature(cfg, state, currenttemp, triptemp);
2891
2892 // check if number of selftest errors has increased (note: may also DECREASE)
2893 if (cfg.selftest)
2894 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
2895
2896 if (allow_selftests && !cfg.test_regex.empty()) {
2897 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
2898 if (testtype)
2899 DoSCSISelfTest(cfg, state, scsidev, testtype);
2900 }
2901 CloseDevice(scsidev, name);
2902 return 0;
2903 }
2904
2905 // Checks the SMART status of all ATA and SCSI devices
2906 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
2907 smart_device_list & devices, bool allow_selftests)
2908 {
2909 for (unsigned i = 0; i < configs.size(); i++) {
2910 const dev_config & cfg = configs.at(i);
2911 dev_state & state = states.at(i);
2912 smart_device * dev = devices.at(i);
2913 if (dev->is_ata())
2914 ATACheckDevice(cfg, state, dev->to_ata(), allow_selftests);
2915 else if (dev->is_scsi())
2916 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
2917 }
2918 }
2919
2920 // Set if Initialize() was called
2921 static bool is_initialized = false;
2922
2923 // Does initialization right after fork to daemon mode
2924 void Initialize(time_t *wakeuptime){
2925
2926 // Call Goodbye() on exit
2927 is_initialized = true;
2928
2929 // write PID file
2930 if (!debugmode)
2931 WritePidFile();
2932
2933 // install signal handlers. On Solaris, can't use signal() because
2934 // it resets the handler to SIG_DFL after each call. So use sigset()
2935 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2936
2937 // normal and abnormal exit
2938 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
2939 SIGNALFN(SIGTERM, SIG_IGN);
2940 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
2941 SIGNALFN(SIGQUIT, SIG_IGN);
2942
2943 // in debug mode, <CONTROL-C> ==> HUP
2944 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
2945 SIGNALFN(SIGINT, SIG_IGN);
2946
2947 // Catch HUP and USR1
2948 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
2949 SIGNALFN(SIGHUP, SIG_IGN);
2950 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
2951 SIGNALFN(SIGUSR1, SIG_IGN);
2952 #ifdef _WIN32
2953 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
2954 SIGNALFN(SIGUSR2, SIG_IGN);
2955 #endif
2956
2957 // initialize wakeup time to CURRENT time
2958 *wakeuptime=time(NULL);
2959
2960 return;
2961 }
2962
2963 #ifdef _WIN32
2964 // Toggle debug mode implemented for native windows only
2965 // (there is no easy way to reopen tty on *nix)
2966 static void ToggleDebugMode()
2967 {
2968 if (!debugmode) {
2969 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
2970 if (!daemon_enable_console("smartd [Debug]")) {
2971 debugmode = 1;
2972 daemon_signal(SIGINT, HUPhandler);
2973 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
2974 }
2975 else
2976 PrintOut(LOG_INFO,"enable console failed\n");
2977 }
2978 else if (debugmode == 1) {
2979 daemon_disable_console();
2980 debugmode = 0;
2981 daemon_signal(SIGINT, sighandler);
2982 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
2983 }
2984 else
2985 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
2986 }
2987 #endif
2988
2989 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
2990 {
2991 // If past wake-up-time, compute next wake-up-time
2992 time_t timenow=time(NULL);
2993 while (wakeuptime<=timenow){
2994 int intervals=1+(timenow-wakeuptime)/checktime;
2995 wakeuptime+=intervals*checktime;
2996 }
2997
2998 // sleep until we catch SIGUSR1 or have completed sleeping
2999 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
3000
3001 // protect user again system clock being adjusted backwards
3002 if (wakeuptime>timenow+checktime){
3003 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3004 wakeuptime=timenow+checktime;
3005 }
3006
3007 // Exit sleep when time interval has expired or a signal is received
3008 sleep(wakeuptime-timenow);
3009
3010 #ifdef _WIN32
3011 // toggle debug mode?
3012 if (caughtsigUSR2) {
3013 ToggleDebugMode();
3014 caughtsigUSR2 = 0;
3015 }
3016 #endif
3017
3018 timenow=time(NULL);
3019 }
3020
3021 // if we caught a SIGUSR1 then print message and clear signal
3022 if (caughtsigUSR1){
3023 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3024 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3025 caughtsigUSR1=0;
3026 sigwakeup = true;
3027 }
3028
3029 // return adjusted wakeuptime
3030 return wakeuptime;
3031 }
3032
3033 // Print out a list of valid arguments for the Directive d
3034 void printoutvaliddirectiveargs(int priority, char d) {
3035
3036 switch (d) {
3037 case 'n':
3038 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3039 break;
3040 case 's':
3041 PrintOut(priority, "valid_regular_expression");
3042 break;
3043 case 'd':
3044 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3045 break;
3046 case 'T':
3047 PrintOut(priority, "normal, permissive");
3048 break;
3049 case 'o':
3050 case 'S':
3051 PrintOut(priority, "on, off");
3052 break;
3053 case 'l':
3054 PrintOut(priority, "error, selftest");
3055 break;
3056 case 'M':
3057 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3058 break;
3059 case 'v':
3060 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3061 break;
3062 case 'P':
3063 PrintOut(priority, "use, ignore, show, showall");
3064 break;
3065 case 'F':
3066 PrintOut(priority, "none, samsung, samsung2, samsung3");
3067 break;
3068 }
3069 }
3070
3071 // exits with an error message, or returns integer value of token
3072 int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3073 int min, int max, char * suffix = 0)
3074 {
3075 // make sure argument is there
3076 if (!arg) {
3077 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3078 configfile, lineno, name, token, min, max);
3079 return -1;
3080 }
3081
3082 // get argument value (base 10), check that it's integer, and in-range
3083 char *endptr;
3084 int val = strtol(arg,&endptr,10);
3085
3086 // optional suffix present?
3087 if (suffix) {
3088 if (!strcmp(endptr, suffix))
3089 endptr += strlen(suffix);
3090 else
3091 *suffix = 0;
3092 }
3093
3094 if (!(!*endptr && min <= val && val <= max)) {
3095 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3096 configfile, lineno, name, token, arg, min, max);
3097 return -1;
3098 }
3099
3100 // all is well; return value
3101 return val;
3102 }
3103
3104
3105 // Get 1-3 small integer(s) for '-W' directive
3106 int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
3107 unsigned char * val1, unsigned char * val2, unsigned char * val3){
3108 unsigned v1 = 0, v2 = 0, v3 = 0;
3109 int n1 = -1, n2 = -1, n3 = -1, len;
3110 if (!arg) {
3111 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3112 configfile, lineno, name, token);
3113 return -1;
3114 }
3115
3116 len = strlen(arg);
3117 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3118 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3119 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3120 configfile, lineno, name, token, arg);
3121 return -1;
3122 }
3123 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3124 return 0;
3125 }
3126
3127
3128 // This function returns 1 if it has correctly parsed one token (and
3129 // any arguments), else zero if no tokens remain. It returns -1 if an
3130 // error was encountered.
3131 static int ParseToken(char * token, dev_config & cfg)
3132 {
3133 char sym;
3134 const char * name = cfg.name.c_str();
3135 int lineno=cfg.lineno;
3136 const char *delim = " \n\t";
3137 int badarg = 0;
3138 int missingarg = 0;
3139 const char *arg = 0;
3140
3141 // is the rest of the line a comment
3142 if (*token=='#')
3143 return 1;
3144
3145 // is the token not recognized?
3146 if (*token!='-' || strlen(token)!=2) {
3147 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3148 configfile, lineno, name, token);
3149 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3150 return -1;
3151 }
3152
3153 // token we will be parsing:
3154 sym=token[1];
3155
3156 // parse the token and swallow its argument
3157 int val;
3158 char plus[] = "+", excl[] = "!";
3159
3160 switch (sym) {
3161 case 'C':
3162 // monitor current pending sector count (default 197)
3163 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3164 return -1;
3165 cfg.curr_pending_id = (unsigned char)val;
3166 cfg.curr_pending_incr = (*plus == '+');
3167 cfg.curr_pending_set = true;
3168 break;
3169 case 'U':
3170 // monitor offline uncorrectable sectors (default 198)
3171 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3172 return -1;
3173 cfg.offl_pending_id = (unsigned char)val;
3174 cfg.offl_pending_incr = (*plus == '+');
3175 cfg.offl_pending_set = true;
3176 break;
3177 case 'T':
3178 // Set tolerance level for SMART command failures
3179 if ((arg = strtok(NULL, delim)) == NULL) {
3180 missingarg = 1;
3181 } else if (!strcmp(arg, "normal")) {
3182 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3183 // not on failure of an optional S.M.A.R.T. command.
3184 // This is the default so we don't need to actually do anything here.
3185 cfg.permissive = false;
3186 } else if (!strcmp(arg, "permissive")) {
3187 // Permissive mode; ignore errors from Mandatory SMART commands
3188 cfg.permissive = true;
3189 } else {
3190 badarg = 1;
3191 }
3192 break;
3193 case 'd':
3194 // specify the device type
3195 if ((arg = strtok(NULL, delim)) == NULL) {
3196 missingarg = 1;
3197 } else if (!strcmp(arg, "removable")) {
3198 cfg.removable = true;
3199 } else {
3200 cfg.dev_type = arg;
3201 }
3202 break;
3203 case 'F':
3204 // fix firmware bug
3205 if ((arg = strtok(NULL, delim)) == NULL) {
3206 missingarg = 1;
3207 } else if (!strcmp(arg, "none")) {
3208 cfg.fix_firmwarebug = FIX_NONE;
3209 } else if (!strcmp(arg, "samsung")) {
3210 cfg.fix_firmwarebug = FIX_SAMSUNG;
3211 } else if (!strcmp(arg, "samsung2")) {
3212 cfg.fix_firmwarebug = FIX_SAMSUNG2;
3213 } else if (!strcmp(arg, "samsung3")) {
3214 cfg.fix_firmwarebug = FIX_SAMSUNG3;
3215 } else {
3216 badarg = 1;
3217 }
3218 break;
3219 case 'H':
3220 // check SMART status
3221 cfg.smartcheck = true;
3222 break;
3223 case 'f':
3224 // check for failure of usage attributes
3225 cfg.usagefailed = true;
3226 break;
3227 case 't':
3228 // track changes in all vendor attributes
3229 cfg.prefail = true;
3230 cfg.usage = true;
3231 break;
3232 case 'p':
3233 // track changes in prefail vendor attributes
3234 cfg.prefail = true;
3235 break;
3236 case 'u':
3237 // track changes in usage vendor attributes
3238 cfg.usage = true;
3239 break;
3240 case 'l':
3241 // track changes in SMART logs
3242 if ((arg = strtok(NULL, delim)) == NULL) {
3243 missingarg = 1;
3244 } else if (!strcmp(arg, "selftest")) {
3245 // track changes in self-test log
3246 cfg.selftest = true;
3247 } else if (!strcmp(arg, "error")) {
3248 // track changes in ATA error log
3249 cfg.errorlog = true;
3250 } else if (!strcmp(arg, "xerror")) {
3251 // track changes in Extended Comprehensive SMART error log
3252 cfg.xerrorlog = true;
3253 } else {
3254 badarg = 1;
3255 }
3256 break;
3257 case 'a':
3258 // monitor everything
3259 cfg.smartcheck = true;
3260 cfg.prefail = true;
3261 cfg.usagefailed = true;
3262 cfg.usage = true;
3263 cfg.selftest = true;
3264 cfg.errorlog = true;
3265 break;
3266 case 'o':
3267 // automatic offline testing enable/disable
3268 if ((arg = strtok(NULL, delim)) == NULL) {
3269 missingarg = 1;
3270 } else if (!strcmp(arg, "on")) {
3271 cfg.autoofflinetest = 2;
3272 } else if (!strcmp(arg, "off")) {
3273 cfg.autoofflinetest = 1;
3274 } else {
3275 badarg = 1;
3276 }
3277 break;
3278 case 'n':
3279 // skip disk check if in idle or standby mode
3280 if (!(arg = strtok(NULL, delim)))
3281 missingarg = 1;
3282 else {
3283 char *endptr = NULL;
3284 char *next = strchr(const_cast<char*>(arg), ',');
3285
3286 cfg.powerquiet = false;
3287 cfg.powerskipmax = 0;
3288
3289 if (next!=NULL) *next='\0';
3290 if (!strcmp(arg, "never"))
3291 cfg.powermode = 0;
3292 else if (!strcmp(arg, "sleep"))
3293 cfg.powermode = 1;
3294 else if (!strcmp(arg, "standby"))
3295 cfg.powermode = 2;
3296 else if (!strcmp(arg, "idle"))
3297 cfg.powermode = 3;
3298 else
3299 badarg = 1;
3300
3301 // if optional arguments are present
3302 if (!badarg && next!=NULL) {
3303 next++;
3304 cfg.powerskipmax = strtol(next, &endptr, 10);
3305 if (endptr == next)
3306 cfg.powerskipmax = 0;
3307 else {
3308 next = endptr + (*endptr != '\0');
3309 if (cfg.powerskipmax <= 0)
3310 badarg = 1;
3311 }
3312 if (*next != '\0') {
3313 if (!strcmp("q", next))
3314 cfg.powerquiet = true;
3315 else {
3316 badarg = 1;
3317 }
3318 }
3319 }
3320 }
3321 break;
3322 case 'S':
3323 // automatic attribute autosave enable/disable
3324 if ((arg = strtok(NULL, delim)) == NULL) {
3325 missingarg = 1;
3326 } else if (!strcmp(arg, "on")) {
3327 cfg.autosave = 2;
3328 } else if (!strcmp(arg, "off")) {
3329 cfg.autosave = 1;
3330 } else {
3331 badarg = 1;
3332 }
3333 break;
3334 case 's':
3335 // warn user, and delete any previously given -s REGEXP Directives
3336 if (!cfg.test_regex.empty()){
3337 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3338 configfile, lineno, name, cfg.test_regex.get_pattern());
3339 cfg.test_regex = regular_expression();
3340 }
3341 // check for missing argument
3342 if (!(arg = strtok(NULL, delim))) {
3343 missingarg = 1;
3344 }
3345 // Compile regex
3346 else {
3347 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3348 // not a valid regular expression!
3349 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3350 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3351 return -1;
3352 }
3353 }
3354 // Do a bit of sanity checking and warn user if we think that
3355 // their regexp is "strange". User probably confused about shell
3356 // glob(3) syntax versus regular expression syntax regexp(7).
3357 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3358 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3359 configfile, lineno, name, val+1, arg[val], arg);
3360 break;
3361 case 'm':
3362 // send email to address that follows
3363 if (!(arg = strtok(NULL,delim)))
3364 missingarg = 1;
3365 else {
3366 if (!cfg.emailaddress.empty())
3367 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3368 configfile, lineno, name, cfg.emailaddress.c_str());
3369 cfg.emailaddress = arg;
3370 }
3371 break;
3372 case 'M':
3373 // email warning options
3374 if (!(arg = strtok(NULL, delim)))
3375 missingarg = 1;
3376 else if (!strcmp(arg, "once"))
3377 cfg.emailfreq = 1;
3378 else if (!strcmp(arg, "daily"))
3379 cfg.emailfreq = 2;
3380 else if (!strcmp(arg, "diminishing"))
3381 cfg.emailfreq = 3;
3382 else if (!strcmp(arg, "test"))
3383 cfg.emailtest = 1;
3384 else if (!strcmp(arg, "exec")) {
3385 // Get the next argument (the command line)
3386 if (!(arg = strtok(NULL, delim))) {
3387 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3388 configfile, lineno, name, token);
3389 return -1;
3390 }
3391 // Free the last cmd line given if any, and copy new one
3392 if (!cfg.emailcmdline.empty())
3393 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3394 configfile, lineno, name, cfg.emailcmdline.c_str());
3395 cfg.emailcmdline = arg;
3396 }
3397 else
3398 badarg = 1;
3399 break;
3400 case 'i':
3401 // ignore failure of usage attribute
3402 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3403 return -1;
3404 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3405 break;
3406 case 'I':
3407 // ignore attribute for tracking purposes
3408 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3409 return -1;
3410 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3411 break;
3412 case 'r':
3413 // print raw value when tracking
3414 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3415 return -1;
3416 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3417 if (*excl == '!') // attribute change is critical
3418 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3419 break;
3420 case 'R':
3421 // track changes in raw value (forces printing of raw value)
3422 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3423 return -1;
3424 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3425 if (*excl == '!') // raw value change is critical
3426 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3427 break;
3428 case 'W':
3429 // track Temperature
3430 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3431 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3432 return -1;
3433 break;
3434 case 'v':
3435 // non-default vendor-specific attribute meaning
3436 if (!(arg=strtok(NULL,delim))) {
3437 missingarg = 1;
3438 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3439 badarg = 1;
3440 }
3441 break;
3442 case 'P':
3443 // Define use of drive-specific presets.
3444 if (!(arg = strtok(NULL, delim))) {
3445 missingarg = 1;
3446 } else if (!strcmp(arg, "use")) {
3447 cfg.ignorepresets = false;
3448 } else if (!strcmp(arg, "ignore")) {
3449 cfg.ignorepresets = true;
3450 } else if (!strcmp(arg, "show")) {
3451 cfg.showpresets = true;
3452 } else if (!strcmp(arg, "showall")) {
3453 showallpresets();
3454 } else {
3455 badarg = 1;
3456 }
3457 break;
3458 default:
3459 // Directive not recognized
3460 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3461 configfile, lineno, name, token);
3462 Directives();
3463 return -1;
3464 }
3465 if (missingarg) {
3466 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3467 configfile, lineno, name, token);
3468 }
3469 if (badarg) {
3470 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3471 configfile, lineno, name, token, arg);
3472 }
3473 if (missingarg || badarg) {
3474 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3475 printoutvaliddirectiveargs(LOG_CRIT, sym);
3476 PrintOut(LOG_CRIT, "\n");
3477 return -1;
3478 }
3479
3480 return 1;
3481 }
3482
3483 // Scan directive for configuration file
3484 #define SCANDIRECTIVE "DEVICESCAN"
3485
3486 // This is the routine that adds things to the conf_entries list.
3487 //
3488 // Return values are:
3489 // 1: parsed a normal line
3490 // 0: found comment or blank line
3491 // -1: found SCANDIRECTIVE line
3492 // -2: found an error
3493 //
3494 // Note: this routine modifies *line from the caller!
3495 static int ParseConfigLine(dev_config_vector & conf_entries, int /*entry*/, int lineno, /*const*/ char * line)
3496 {
3497 char *token=NULL;
3498 char *name=NULL;
3499 const char *delim = " \n\t";
3500 int devscan=0;
3501
3502 // get first token: device name. If a comment, skip line
3503 if (!(name=strtok(line,delim)) || *name=='#') {
3504 return 0;
3505 }
3506
3507 // Have we detected the SCANDIRECTIVE directive?
3508 if (!strcmp(SCANDIRECTIVE,name)){
3509 devscan=1;
3510 }
3511
3512 // We've got a legit entry, make space to store it
3513 conf_entries.push_back( dev_config() );
3514 dev_config & cfg = conf_entries.back();
3515
3516 cfg.name = name;
3517
3518 // Store line number, and by default check for both device types.
3519 cfg.lineno=lineno;
3520
3521 // parse tokens one at a time from the file.
3522 while ((token=strtok(NULL,delim))){
3523 int retval=ParseToken(token,cfg);
3524
3525 if (retval==0)
3526 // No tokens left:
3527 break;
3528
3529 if (retval>0) {
3530 // Parsed token
3531 #if (0)
3532 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3533 #endif
3534 continue;
3535 }
3536
3537 if (retval<0) {
3538 // error found on the line
3539 return -2;
3540 }
3541 }
3542
3543 // If NO monitoring directives are set, then set all of them.
3544 if (!( cfg.smartcheck || cfg.selftest
3545 || cfg.errorlog || cfg.xerrorlog
3546 || cfg.usagefailed || cfg.prefail || cfg.usage
3547 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
3548
3549 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3550 cfg.name.c_str(), cfg.lineno, configfile);
3551
3552 cfg.smartcheck = true;
3553 cfg.usagefailed = true;
3554 cfg.prefail = true;
3555 cfg.usage = true;
3556 cfg.selftest = true;
3557 cfg.errorlog = true;
3558 }
3559
3560 // additional sanity check. Has user set -M options without -m?
3561 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3562 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3563 cfg.name.c_str(), cfg.lineno, configfile);
3564 return -2;
3565 }
3566
3567 // has the user has set <nomailer>?
3568 if (cfg.emailaddress == "<nomailer>") {
3569 // check that -M exec is also set
3570 if (cfg.emailcmdline.empty()){
3571 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3572 cfg.name.c_str(), cfg.lineno, configfile);
3573 return -2;
3574 }
3575 // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3576 cfg.emailaddress.clear();
3577 }
3578
3579 // set cfg.emailfreq to 1 (once) if user hasn't set it
3580 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq)
3581 cfg.emailfreq = 1;
3582
3583 if (devscan)
3584 return -1;
3585 else
3586 return 1;
3587 }
3588
3589 // Parses a configuration file. Return values are:
3590 // N=>0: found N entries
3591 // -1: syntax error in config file
3592 // -2: config file does not exist
3593 // -3: config file exists but cannot be read
3594 //
3595 // In the case where the return value is 0, there are three
3596 // possiblities:
3597 // Empty configuration file ==> conf_entries.empty()
3598 // No configuration file ==> conf_entries[0].lineno == 0
3599 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
3600 static int ParseConfigFile(dev_config_vector & conf_entries)
3601 {
3602 // maximum line length in configuration file
3603 const int MAXLINELEN = 256;
3604 // maximum length of a continued line in configuration file
3605 const int MAXCONTLINE = 1023;
3606
3607 stdio_file f;
3608 // Open config file, if it exists and is not <stdin>
3609 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3610 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3611 // file exists but we can't read it or it should exist due to '-c' option
3612 int ret = (errno!=ENOENT ? -3 : -2);
3613 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3614 strerror(errno),configfile);
3615 return ret;
3616 }
3617 }
3618 else // read from stdin ('-c -' option)
3619 f.open(stdin);
3620
3621 // No configuration file found -- use fake one
3622 int entry = 0;
3623 if (!f) {
3624 char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3625
3626 if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3627 throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3628 return 0;
3629 }
3630
3631 #ifdef __CYGWIN__
3632 setmode(fileno(f), O_TEXT); // Allow files with \r\n
3633 #endif
3634
3635 // configuration file exists
3636 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3637
3638 // parse config file line by line
3639 int lineno = 1, cont = 0, contlineno = 0;
3640 char line[MAXLINELEN+2];
3641 char fullline[MAXCONTLINE+1];
3642
3643 for (;;) {
3644 int len=0,scandevice;
3645 char *lastslash;
3646 char *comment;
3647 char *code;
3648
3649 // make debugging simpler
3650 memset(line,0,sizeof(line));
3651
3652 // get a line
3653 code=fgets(line, MAXLINELEN+2, f);
3654
3655 // are we at the end of the file?
3656 if (!code){
3657 if (cont) {
3658 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3659 // See if we found a SCANDIRECTIVE directive
3660 if (scandevice==-1)
3661 return 0;
3662 // did we find a syntax error
3663 if (scandevice==-2)
3664 return -1;
3665 // the final line is part of a continuation line
3666 cont=0;
3667 entry+=scandevice;
3668 }
3669 break;
3670 }
3671
3672 // input file line number
3673 contlineno++;
3674
3675 // See if line is too long
3676 len=strlen(line);
3677 if (len>MAXLINELEN){
3678 const char *warn;
3679 if (line[len-1]=='\n')
3680 warn="(including newline!) ";
3681 else
3682 warn="";
3683 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3684 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3685 return -1;
3686 }
3687
3688 // Ignore anything after comment symbol
3689 if ((comment=strchr(line,'#'))){
3690 *comment='\0';
3691 len=strlen(line);
3692 }
3693
3694 // is the total line (made of all continuation lines) too long?
3695 if (cont+len>MAXCONTLINE){
3696 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3697 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3698 return -1;
3699 }
3700
3701 // copy string so far into fullline, and increment length
3702 strcpy(fullline+cont,line);
3703 cont+=len;
3704
3705 // is this a continuation line. If so, replace \ by space and look at next line
3706 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3707 *(fullline+(cont-len)+(lastslash-line))=' ';
3708 continue;
3709 }
3710
3711 // Not a continuation line. Parse it
3712 scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3713
3714 // did we find a scandevice directive?
3715 if (scandevice==-1)
3716 return 0;
3717 // did we find a syntax error
3718 if (scandevice==-2)
3719 return -1;
3720
3721 entry+=scandevice;
3722 lineno++;
3723 cont=0;
3724 }
3725
3726 // note -- may be zero if syntax of file OK, but no valid entries!
3727 return entry;
3728 }
3729
3730 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3731 <LIST> is the list of valid arguments for option opt. */
3732 void PrintValidArgs(char opt) {
3733 const char *s;
3734
3735 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3736 if (!(s = GetValidArgList(opt)))
3737 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3738 else
3739 PrintOut(LOG_CRIT, "%s", (char *)s);
3740 PrintOut(LOG_CRIT, " <=======\n");
3741 }
3742
3743 // Return true if absolute path name
3744 static bool is_abs_path(const char * path)
3745 {
3746 if (*path == '/')
3747 return true;
3748 #if defined(_WIN32) || defined(__CYGWIN__)
3749 if (*path == '\\')
3750 return true;
3751 int n = -1;
3752 sscanf(path, "%*1[A-Za-z]:%*1[/\\]%n", &n);
3753 if (n > 0)
3754 return true;
3755 #endif
3756 return false;
3757 }
3758
3759 // Parses input line, prints usage message and
3760 // version/license/copyright messages
3761 void ParseOpts(int argc, char **argv)
3762 {
3763 // Init default configfile path
3764 #ifndef _WIN32
3765 configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
3766 #else
3767 static std::string configfile_str = get_exe_dir() + "/smartd.conf";
3768 configfile = configfile_str.c_str();
3769 #endif
3770
3771 // Please update GetValidArgList() if you edit shortopts
3772 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
3773 #ifdef HAVE_LIBCAP_NG
3774 "C"
3775 #endif
3776 ;
3777 // Please update GetValidArgList() if you edit longopts
3778 struct option longopts[] = {
3779 { "configfile", required_argument, 0, 'c' },
3780 { "logfacility", required_argument, 0, 'l' },
3781 { "quit", required_argument, 0, 'q' },
3782 { "debug", no_argument, 0, 'd' },
3783 { "showdirectives", no_argument, 0, 'D' },
3784 { "interval", required_argument, 0, 'i' },
3785 #ifndef _WIN32
3786 { "no-fork", no_argument, 0, 'n' },
3787 #endif
3788 { "pidfile", required_argument, 0, 'p' },
3789 { "report", required_argument, 0, 'r' },
3790 { "savestates", required_argument, 0, 's' },
3791 { "attributelog", required_argument, 0, 'A' },
3792 { "drivedb", required_argument, 0, 'B' },
3793 #if defined(_WIN32) || defined(__CYGWIN__)
3794 { "service", no_argument, 0, 'n' },
3795 #endif
3796 { "version", no_argument, 0, 'V' },
3797 { "license", no_argument, 0, 'V' },
3798 { "copyright", no_argument, 0, 'V' },
3799 { "help", no_argument, 0, 'h' },
3800 { "usage", no_argument, 0, 'h' },
3801 #ifdef HAVE_LIBCAP_NG
3802 { "capabilities", no_argument, 0, 'C' },
3803 #endif
3804 { 0, 0, 0, 0 }
3805 };
3806
3807 opterr=optopt=0;
3808 bool badarg = false;
3809 bool no_defaultdb = false; // set true on '-B FILE'
3810
3811 // Parse input options.
3812 int optchar;
3813 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
3814 char *arg;
3815 char *tailptr;
3816 long lchecktime;
3817
3818 switch(optchar) {
3819 case 'q':
3820 // when to quit
3821 if (!(strcmp(optarg,"nodev"))) {
3822 quit=0;
3823 } else if (!(strcmp(optarg,"nodevstartup"))) {
3824 quit=1;
3825 } else if (!(strcmp(optarg,"never"))) {
3826 quit=2;
3827 } else if (!(strcmp(optarg,"onecheck"))) {
3828 quit=3;
3829 debugmode=1;
3830 } else if (!(strcmp(optarg,"showtests"))) {
3831 quit=4;
3832 debugmode=1;
3833 } else if (!(strcmp(optarg,"errors"))) {
3834 quit=5;
3835 } else {
3836 badarg = true;
3837 }
3838 break;
3839 case 'l':
3840 // set the log facility level
3841 if (!strcmp(optarg, "daemon"))
3842 facility=LOG_DAEMON;
3843 else if (!strcmp(optarg, "local0"))
3844 facility=LOG_LOCAL0;
3845 else if (!strcmp(optarg, "local1"))
3846 facility=LOG_LOCAL1;
3847 else if (!strcmp(optarg, "local2"))
3848 facility=LOG_LOCAL2;
3849 else if (!strcmp(optarg, "local3"))
3850 facility=LOG_LOCAL3;
3851 else if (!strcmp(optarg, "local4"))
3852 facility=LOG_LOCAL4;
3853 else if (!strcmp(optarg, "local5"))
3854 facility=LOG_LOCAL5;
3855 else if (!strcmp(optarg, "local6"))
3856 facility=LOG_LOCAL6;
3857 else if (!strcmp(optarg, "local7"))
3858 facility=LOG_LOCAL7;
3859 else
3860 badarg = true;
3861 break;
3862 case 'd':
3863 // enable debug mode
3864 debugmode = 1;
3865 break;
3866 case 'n':
3867 // don't fork()
3868 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3869 do_fork = false;
3870 #endif
3871 break;
3872 case 'D':
3873 // print summary of all valid directives
3874 debugmode = 1;
3875 Directives();
3876 EXIT(0);
3877 break;
3878 case 'i':
3879 // Period (time interval) for checking
3880 // strtol will set errno in the event of overflow, so we'll check it.
3881 errno = 0;
3882 lchecktime = strtol(optarg, &tailptr, 10);
3883 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
3884 debugmode=1;
3885 PrintHead();
3886 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
3887 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
3888 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3889 EXIT(EXIT_BADCMD);
3890 }
3891 checktime = (int)lchecktime;
3892 break;
3893 case 'r':
3894 // report IOCTL transactions
3895 {
3896 int i;
3897 char *s;
3898
3899 // split_report_arg() may modify its first argument string, so use a
3900 // copy of optarg in case we want optarg for an error message.
3901 if (!(s = strdup(optarg))) {
3902 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
3903 EXIT(EXIT_NOMEM);
3904 }
3905 if (split_report_arg(s, &i)) {
3906 badarg = true;
3907 } else if (i<1 || i>3) {
3908 debugmode=1;
3909 PrintHead();
3910 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
3911 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3912 EXIT(EXIT_BADCMD);
3913 } else if (!strcmp(s,"ioctl")) {
3914 con->reportataioctl = con->reportscsiioctl = i;
3915 } else if (!strcmp(s,"ataioctl")) {
3916 con->reportataioctl = i;
3917 } else if (!strcmp(s,"scsiioctl")) {
3918 con->reportscsiioctl = i;
3919 } else {
3920 badarg = true;
3921 }
3922 free(s); // TODO: use std::string
3923 }
3924 break;
3925 case 'c':
3926 // alternate configuration file
3927 if (strcmp(optarg,"-"))
3928 configfile = (configfile_alt = optarg).c_str();
3929 else // read from stdin
3930 configfile=configfile_stdin;
3931 break;
3932 case 'p':
3933 // output file with PID number
3934 pid_file = optarg;
3935 break;
3936 case 's':
3937 // path prefix of persistent state file
3938 state_path_prefix = optarg;
3939 break;
3940 case 'A':
3941 // path prefix of attribute log file
3942 attrlog_path_prefix = optarg;
3943 break;
3944 case 'B':
3945 {
3946 const char * path = optarg;
3947 if (*path == '+' && path[1])
3948 path++;
3949 else
3950 no_defaultdb = true;
3951 unsigned char savedebug = debugmode; debugmode = 1;
3952 if (!read_drive_database(path))
3953 EXIT(EXIT_BADCMD);
3954 debugmode = savedebug;
3955 }
3956 break;
3957 case 'V':
3958 // print version and CVS info
3959 debugmode = 1;
3960 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
3961 EXIT(0);
3962 break;
3963 #ifdef HAVE_LIBCAP_NG
3964 case 'C':
3965 // enable capabilities
3966 enable_capabilities = true;
3967 break;
3968 #endif
3969 case 'h':
3970 // help: print summary of command-line options
3971 debugmode=1;
3972 PrintHead();
3973 Usage();
3974 EXIT(0);
3975 break;
3976 case '?':
3977 default:
3978 // unrecognized option
3979 debugmode=1;
3980 PrintHead();
3981 // Point arg to the argument in which this option was found.
3982 arg = argv[optind-1];
3983 // Check whether the option is a long option that doesn't map to -h.
3984 if (arg[1] == '-' && optchar != 'h') {
3985 // Iff optopt holds a valid option then argument must be missing.
3986 if (optopt && (strchr(shortopts, optopt) != NULL)) {
3987 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
3988 PrintValidArgs(optopt);
3989 } else {
3990 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
3991 }
3992 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
3993 EXIT(EXIT_BADCMD);
3994 }
3995 if (optopt) {
3996 // Iff optopt holds a valid option then argument must be missing.
3997 if (strchr(shortopts, optopt) != NULL){
3998 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
3999 PrintValidArgs(optopt);
4000 } else {
4001 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4002 }
4003 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4004 EXIT(EXIT_BADCMD);
4005 }
4006 Usage();
4007 EXIT(0);
4008 }
4009
4010 // Check to see if option had an unrecognized or incorrect argument.
4011 if (badarg) {
4012 debugmode=1;
4013 PrintHead();
4014 // It would be nice to print the actual option name given by the user
4015 // here, but we just print the short form. Please fix this if you know
4016 // a clean way to do it.
4017 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4018 PrintValidArgs(optchar);
4019 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4020 EXIT(EXIT_BADCMD);
4021 }
4022 }
4023
4024 // non-option arguments are not allowed
4025 if (argc > optind) {
4026 debugmode=1;
4027 PrintHead();
4028 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4029 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4030 EXIT(EXIT_BADCMD);
4031 }
4032
4033 // no pidfile in debug mode
4034 if (debugmode && !pid_file.empty()) {
4035 debugmode=1;
4036 PrintHead();
4037 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4038 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4039 EXIT(EXIT_BADCMD);
4040 }
4041
4042 // absolute path is required due to chdir('/') after fork().
4043 if (!state_path_prefix.empty() && !debugmode && !is_abs_path(state_path_prefix.c_str())) {
4044 debugmode=1;
4045 PrintHead();
4046 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4047 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4048 state_path_prefix.c_str());
4049 EXIT(EXIT_BADCMD);
4050 }
4051
4052 // absolute path is required due to chdir('/') after fork().
4053 if (!attrlog_path_prefix.empty() && !debugmode && !is_abs_path(attrlog_path_prefix.c_str())) {
4054 debugmode=1;
4055 PrintHead();
4056 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -s <======= \n\n");
4057 PrintOut(LOG_CRIT, "Error: relative path %s is only allowed in debug (-d) mode\n\n",
4058 attrlog_path_prefix.c_str());
4059 EXIT(EXIT_BADCMD);
4060 }
4061
4062 // Read or init drive database
4063 if (!no_defaultdb) {
4064 unsigned char savedebug = debugmode; debugmode = 1;
4065 if (!read_default_drive_databases())
4066 EXIT(EXIT_BADCMD);
4067 debugmode = savedebug;
4068 }
4069
4070 // print header
4071 PrintHead();
4072 }
4073
4074 // Function we call if no configuration file was found or if the
4075 // SCANDIRECTIVE Directive was found. It makes entries for device
4076 // names returned by scan_smart_devices() in os_OSNAME.cpp
4077 static int MakeConfigEntries(const dev_config & base_cfg,
4078 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4079 {
4080 // make list of devices
4081 smart_device_list devlist;
4082 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4083 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4084
4085 // if no devices, or error constructing list, return
4086 if (devlist.size() <= 0)
4087 return 0;
4088
4089 // add empty device slots for existing config entries
4090 while (scanned_devs.size() < conf_entries.size())
4091 scanned_devs.push_back((smart_device *)0);
4092
4093 // loop over entries to create
4094 for (unsigned i = 0; i < devlist.size(); i++) {
4095 // Move device pointer
4096 smart_device * dev = devlist.release(i);
4097 scanned_devs.push_back(dev);
4098
4099 // Copy configuration, update device and type name
4100 conf_entries.push_back(base_cfg);
4101 dev_config & cfg = conf_entries.back();
4102 cfg.name = dev->get_info().info_name;
4103 cfg.dev_type = type;
4104 }
4105
4106 return devlist.size();
4107 }
4108
4109 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4110 {
4111 if (!debugmode && scandirective)
4112 return;
4113 if (line)
4114 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4115 "Unable to register %s device %s at line %d of file %s\n",
4116 type, name, line, configfile);
4117 else
4118 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4119 type, name);
4120 return;
4121 }
4122
4123 // Returns negative value (see ParseConfigFile()) if config file
4124 // had errors, else number of entries which may be zero or positive.
4125 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4126 {
4127 // parse configuration file configfile (normally /etc/smartd.conf)
4128 int entries = ParseConfigFile(conf_entries);
4129
4130 if (entries < 0) {
4131 // There was an error reading the configuration file.
4132 conf_entries.clear();
4133 if (entries == -1)
4134 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4135 return entries;
4136 }
4137
4138 // no error parsing config file.
4139 if (entries) {
4140 // we did not find a SCANDIRECTIVE and did find valid entries
4141 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4142 }
4143 else if (!conf_entries.empty()) {
4144 // we found a SCANDIRECTIVE or there was no configuration file so
4145 // scan. Configuration file's last entry contains all options
4146 // that were set
4147 dev_config first = conf_entries.back();
4148 conf_entries.pop_back();
4149
4150 if (first.lineno)
4151 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4152 else
4153 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4154
4155 // make config list of devices to search for
4156 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4157
4158 // warn user if scan table found no devices
4159 if (conf_entries.empty())
4160 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4161 }
4162 else
4163 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4164
4165 return conf_entries.size();
4166 }
4167
4168
4169 // This function tries devices from conf_entries. Each one that can be
4170 // registered is moved onto the [ata|scsi]devices lists and removed
4171 // from the conf_entries list.
4172 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4173 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4174 {
4175 // start by clearing lists/memory of ALL existing devices
4176 configs.clear();
4177 devices.clear();
4178 states.clear();
4179
4180 // Register entries
4181 for (unsigned i = 0; i < conf_entries.size(); i++){
4182
4183 dev_config cfg = conf_entries[i];
4184
4185 // get device of appropriate type
4186 smart_device_auto_ptr dev;
4187 bool scanning = false;
4188
4189 // Device may already be detected during devicescan
4190 if (i < scanned_devs.size()) {
4191 dev = scanned_devs.release(i);
4192 if (dev)
4193 scanning = true;
4194 }
4195
4196 if (!dev) {
4197 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4198 if (!dev) {
4199 if (cfg.dev_type.empty())
4200 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4201 else
4202 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4203 continue;
4204 }
4205 }
4206
4207 // Save old info
4208 smart_device::device_info oldinfo = dev->get_info();
4209
4210 // Open with autodetect support, may return 'better' device
4211 dev.replace( dev->autodetect_open() );
4212
4213 // Report if type has changed
4214 if (oldinfo.dev_type != dev->get_dev_type())
4215 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4216 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4217
4218 if (!dev->is_open()) {
4219 // For linux+devfs, a nonexistent device gives a strange error
4220 // message. This makes the error message a bit more sensible.
4221 // If no debug and scanning - don't print errors
4222 if (debugmode || !scanning)
4223 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4224 continue;
4225 }
4226
4227 // Update informal name
4228 cfg.name = dev->get_info().info_name;
4229 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4230
4231 // Prepare initial state
4232 dev_state state;
4233
4234 // register ATA devices
4235 if (dev->is_ata()){
4236 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4237 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4238 dev.reset();
4239 }
4240 }
4241 // or register SCSI devices
4242 else if (dev->is_scsi()){
4243 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4244 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4245 dev.reset();
4246 }
4247 }
4248 else {
4249 PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4250 dev.reset();
4251 }
4252
4253 if (dev) {
4254 // move onto the list of devices
4255 configs.push_back(cfg);
4256 states.push_back(state);
4257 devices.push_back(dev);
4258 }
4259 // if device is explictly listed and we can't register it, then
4260 // exit unless the user has specified that the device is removable
4261 else if (!scanning) {
4262 if (cfg.removable || quit==2)
4263 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4264 else {
4265 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4266 EXIT(EXIT_BADDEV);
4267 }
4268 }
4269 }
4270 }
4271
4272
4273 // Main program without exception handling
4274 int main_worker(int argc, char **argv)
4275 {
4276 // Initialize interface
4277 smart_interface::init();
4278 if (!smi())
4279 return 1;
4280
4281 // external control variables for ATA disks
4282 smartmonctrl control;
4283
4284 // is it our first pass through?
4285 bool firstpass = true;
4286
4287 // next time to wake up
4288 time_t wakeuptime;
4289
4290 // for simplicity, null all global communications variables/lists
4291 con=&control;
4292 memset(con, 0,sizeof(control));
4293
4294 // parse input and print header and usage info if needed
4295 ParseOpts(argc,argv);
4296
4297 // do we mute printing from ataprint commands?
4298 con->printing_switchable = false;
4299 con->dont_print = !debugmode;
4300
4301 // Configuration for each device
4302 dev_config_vector configs;
4303 // Device states
4304 dev_state_vector states;
4305 // Devices to monitor
4306 smart_device_list devices;
4307
4308 bool write_states_always = true;
4309
4310 #ifdef HAVE_LIBCAP_NG
4311 // Drop capabilities
4312 if (enable_capabilities) {
4313 capng_clear(CAPNG_SELECT_BOTH);
4314 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4315 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4316 capng_apply(CAPNG_SELECT_BOTH);
4317 }
4318 #endif
4319
4320 // the main loop of the code
4321 for (;;) {
4322
4323 // are we exiting from a signal?
4324 if (caughtsigEXIT) {
4325 // are we exiting with SIGTERM?
4326 int isterm=(caughtsigEXIT==SIGTERM);
4327 int isquit=(caughtsigEXIT==SIGQUIT);
4328 int isok=debugmode?isterm || isquit:isterm;
4329
4330 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4331 caughtsigEXIT, strsignal(caughtsigEXIT));
4332
4333 if (!isok)
4334 return EXIT_SIGNAL;
4335
4336 // Write state files
4337 if (!state_path_prefix.empty())
4338 write_all_dev_states(configs, states);
4339
4340 return 0;
4341 }
4342
4343 // Should we (re)read the config file?
4344 if (firstpass || caughtsigHUP){
4345 if (!firstpass) {
4346 #ifdef __CYGWIN__
4347 // Workaround for missing SIGQUIT via keyboard on Cygwin
4348 if (caughtsigHUP==2) {
4349 // Simulate SIGQUIT if another SIGINT arrives soon
4350 caughtsigHUP=0;
4351 sleep(1);
4352 if (caughtsigHUP==2) {
4353 caughtsigEXIT=SIGQUIT;
4354 continue;
4355 }
4356 caughtsigHUP=2;
4357 }
4358 #endif
4359 // Write state files
4360 if (!state_path_prefix.empty())
4361 write_all_dev_states(configs, states);
4362
4363 PrintOut(LOG_INFO,
4364 caughtsigHUP==1?
4365 "Signal HUP - rereading configuration file %s\n":
4366 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4367 configfile);
4368 }
4369
4370 {
4371 dev_config_vector conf_entries; // Entries read from smartd.conf
4372 smart_device_list scanned_devs; // Devices found during scan
4373 // (re)reads config file, makes >=0 entries
4374 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4375
4376 if (entries>=0) {
4377 // checks devices, then moves onto ata/scsi list or deallocates.
4378 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4379 if (!(configs.size() == devices.size() && configs.size() == states.size()))
4380 throw std::logic_error("Invalid result from RegisterDevices");
4381 }
4382 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4383 // user has asked to continue on error in configuration file
4384 if (!firstpass)
4385 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4386 }
4387 else {
4388 // exit with configuration file error status
4389 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4390 }
4391 }
4392
4393 // Log number of devices we are monitoring...
4394 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4395 int numata = 0;
4396 for (unsigned i = 0; i < devices.size(); i++) {
4397 if (devices.at(i)->is_ata())
4398 numata++;
4399 }
4400 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4401 numata, devices.size() - numata);
4402 }
4403 else {
4404 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4405 return EXIT_NODEV;
4406 }
4407
4408 if (quit==4) {
4409 // user has asked to print test schedule
4410 PrintTestSchedule(configs, states, devices);
4411 return 0;
4412 }
4413
4414 #ifdef HAVE_LIBCAP_NG
4415 if (enable_capabilities) {
4416 for (unsigned i = 0; i < configs.size(); i++) {
4417 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
4418 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4419 break;
4420 }
4421 }
4422 }
4423 #endif
4424
4425 // reset signal
4426 caughtsigHUP=0;
4427
4428 // Always write state files after (re)configuration
4429 write_states_always = true;
4430 }
4431
4432 // check all devices once,
4433 // self tests are not started in first pass unless '-q onecheck' is specified
4434 CheckDevicesOnce(configs, states, devices, (!firstpass || quit==3));
4435
4436 // Write state files
4437 if (!state_path_prefix.empty())
4438 write_all_dev_states(configs, states, write_states_always);
4439 write_states_always = false;
4440
4441 // Write attribute logs
4442 if (!attrlog_path_prefix.empty())
4443 write_all_dev_attrlogs(configs, states);
4444
4445 // user has asked us to exit after first check
4446 if (quit==3) {
4447 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4448 "smartd is exiting (exit status 0)\n");
4449 return 0;
4450 }
4451
4452 // fork into background if needed
4453 if (firstpass && !debugmode) {
4454 DaemonInit();
4455 }
4456
4457 // set exit and signal handlers, write PID file, set wake-up time
4458 if (firstpass){
4459 Initialize(&wakeuptime);
4460 firstpass = false;
4461 }
4462
4463 // sleep until next check time, or a signal arrives
4464 wakeuptime = dosleep(wakeuptime, write_states_always);
4465 }
4466 }
4467
4468
4469 #ifndef _WIN32
4470 // Main program
4471 int main(int argc, char **argv)
4472 #else
4473 // Windows: internal main function started direct or by service control manager
4474 static int smartd_main(int argc, char **argv)
4475 #endif
4476 {
4477 int status;
4478 try {
4479 // Do the real work ...
4480 status = main_worker(argc, argv);
4481 }
4482 catch (int ex) {
4483 // EXIT(status) arrives here
4484 status = ex;
4485 }
4486 catch (const std::bad_alloc & /*ex*/) {
4487 // Memory allocation failed (also thrown by std::operator new)
4488 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4489 status = EXIT_NOMEM;
4490 }
4491 catch (const std::exception & ex) {
4492 // Other fatal errors
4493 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4494 status = EXIT_BADCODE;
4495 }
4496
4497 if (is_initialized)
4498 status = Goodbye(status);
4499
4500 #ifdef _WIN32
4501 daemon_winsvc_exitcode = status;
4502 #endif
4503 return status;
4504 }
4505
4506
4507 #ifdef _WIN32
4508 // Main function for Windows
4509 int main(int argc, char **argv){
4510 // Options for smartd windows service
4511 static const daemon_winsvc_options svc_opts = {
4512 "--service", // cmd_opt
4513 "smartd", "SmartD Service", // servicename, displayname
4514 // description
4515 "Controls and monitors storage devices using the Self-Monitoring, "
4516 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4517 "built into ATA and SCSI Hard Drives. "
4518 PACKAGE_HOMEPAGE
4519 };
4520 // daemon_main() handles daemon and service specific commands
4521 // and starts smartd_main() direct, from a new process,
4522 // or via service control manager
4523 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4524 }
4525 #endif