]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Stop passing arguments to dh_installinit
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-16 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #include "config.h"
25 #include "int64.h"
26
27 // unconditionally included files
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
31 #include <signal.h>
32 #include <fcntl.h>
33 #include <string.h>
34 #include <syslog.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <time.h>
39 #include <limits.h>
40 #include <getopt.h>
41
42 #include <stdexcept>
43 #include <string>
44 #include <vector>
45 #include <algorithm> // std::replace()
46
47 // conditionally included files
48 #ifndef _WIN32
49 #include <sys/wait.h>
50 #endif
51 #ifdef HAVE_UNISTD_H
52 #include <unistd.h>
53 #endif
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57
58 #ifdef _WIN32
59 #ifdef _MSC_VER
60 #pragma warning(disable:4761) // "conversion supplied"
61 typedef unsigned short mode_t;
62 typedef int pid_t;
63 #endif
64 #include <io.h> // umask()
65 #include <process.h> // getpid()
66 #endif // _WIN32
67
68 #ifdef __CYGWIN__
69 #include <io.h> // setmode()
70 #endif // __CYGWIN__
71
72 #ifdef HAVE_LIBCAP_NG
73 #include <cap-ng.h>
74 #endif // LIBCAP_NG
75
76 // locally included files
77 #include "atacmds.h"
78 #include "dev_interface.h"
79 #include "knowndrives.h"
80 #include "scsicmds.h"
81 #include "utility.h"
82
83 // This is for solaris, where signal() resets the handler to SIG_DFL
84 // after the first signal is caught.
85 #ifdef HAVE_SIGSET
86 #define SIGNALFN sigset
87 #else
88 #define SIGNALFN signal
89 #endif
90
91 #ifdef _WIN32
92 // fork()/signal()/initd simulation for native Windows
93 #include "daemon_win32.h" // daemon_main/detach/signal()
94 #undef SIGNALFN
95 #define SIGNALFN daemon_signal
96 #define strsignal daemon_strsignal
97 #define sleep daemon_sleep
98 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
99 #define SIGQUIT SIGBREAK
100 #define SIGQUIT_KEYNAME "CONTROL-Break"
101 #else // _WIN32
102 #define SIGQUIT_KEYNAME "CONTROL-\\"
103 #endif // _WIN32
104
105 #if defined (__SVR4) && defined (__sun)
106 extern "C" int getdomainname(char *, int); // no declaration in header files!
107 #endif
108
109 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4207 2016-01-22 19:35:10Z chrfranke $"
110 CONFIG_H_CVSID;
111
112 // smartd exit codes
113 #define EXIT_BADCMD 1 // command line did not parse
114 #define EXIT_BADCONF 2 // syntax error in config file
115 #define EXIT_STARTUP 3 // problem forking daemon
116 #define EXIT_PID 4 // problem creating pid file
117 #define EXIT_NOCONF 5 // config file does not exist
118 #define EXIT_READCONF 6 // config file exists but cannot be read
119
120 #define EXIT_NOMEM 8 // out of memory
121 #define EXIT_BADCODE 10 // internal error - should NEVER happen
122
123 #define EXIT_BADDEV 16 // we can't monitor this device
124 #define EXIT_NODEV 17 // no devices to monitor
125
126 #define EXIT_SIGNAL 254 // abort on signal
127
128
129 // command-line: 1=debug mode, 2=print presets
130 static unsigned char debugmode = 0;
131
132 // command-line: how long to sleep between checks
133 #define CHECKTIME 1800
134 static int checktime=CHECKTIME;
135
136 // command-line: name of PID file (empty for no pid file)
137 static std::string pid_file;
138
139 // command-line: path prefix of persistent state file, empty if no persistence.
140 static std::string state_path_prefix
141 #ifdef SMARTMONTOOLS_SAVESTATES
142 = SMARTMONTOOLS_SAVESTATES
143 #endif
144 ;
145
146 // command-line: path prefix of attribute log file, empty if no logs.
147 static std::string attrlog_path_prefix
148 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
149 = SMARTMONTOOLS_ATTRIBUTELOG
150 #endif
151 ;
152
153 // configuration file name
154 static const char * configfile;
155 // configuration file "name" if read from stdin
156 static const char * const configfile_stdin = "<stdin>";
157 // path of alternate configuration file
158 static std::string configfile_alt;
159
160 // warning script file
161 static std::string warning_script;
162
163 // command-line: when should we exit?
164 static int quit=0;
165
166 // command-line; this is the default syslog(3) log facility to use.
167 static int facility=LOG_DAEMON;
168
169 #ifndef _WIN32
170 // command-line: fork into background?
171 static bool do_fork=true;
172 #endif
173
174 #ifdef HAVE_LIBCAP_NG
175 // command-line: enable capabilities?
176 static bool enable_capabilities = false;
177 #endif
178
179 // TODO: This smartctl only variable is also used in os_win32.cpp
180 unsigned char failuretest_permissive = 0;
181
182 // set to one if we catch a USR1 (check devices now)
183 static volatile int caughtsigUSR1=0;
184
185 #ifdef _WIN32
186 // set to one if we catch a USR2 (toggle debug mode)
187 static volatile int caughtsigUSR2=0;
188 #endif
189
190 // set to one if we catch a HUP (reload config file). In debug mode,
191 // set to two, if we catch INT (also reload config file).
192 static volatile int caughtsigHUP=0;
193
194 // set to signal value if we catch INT, QUIT, or TERM
195 static volatile int caughtsigEXIT=0;
196
197 // This function prints either to stdout or to the syslog as needed.
198 static void PrintOut(int priority, const char *fmt, ...)
199 __attribute_format_printf(2, 3);
200
201 // Attribute monitoring flags.
202 // See monitor_attr_flags below.
203 enum {
204 MONITOR_IGN_FAILUSE = 0x01,
205 MONITOR_IGNORE = 0x02,
206 MONITOR_RAW_PRINT = 0x04,
207 MONITOR_RAW = 0x08,
208 MONITOR_AS_CRIT = 0x10,
209 MONITOR_RAW_AS_CRIT = 0x20,
210 };
211
212 // Array of flags for each attribute.
213 class attribute_flags
214 {
215 public:
216 attribute_flags()
217 { memset(m_flags, 0, sizeof(m_flags)); }
218
219 bool is_set(int id, unsigned char flag) const
220 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
221
222 void set(int id, unsigned char flags)
223 {
224 if (0 < id && id < (int)sizeof(m_flags))
225 m_flags[id] |= flags;
226 }
227
228 private:
229 unsigned char m_flags[256];
230 };
231
232
233 /// Configuration data for a device. Read from smartd.conf.
234 /// Supports copy & assignment and is compatible with STL containers.
235 struct dev_config
236 {
237 int lineno; // Line number of entry in file
238 std::string name; // Device name (with optional extra info)
239 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
240 std::string dev_type; // Device type argument from -d directive, empty if none
241 std::string dev_idinfo; // Device identify info for warning emails
242 std::string state_file; // Path of the persistent state file, empty if none
243 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
244 bool ignore; // Ignore this entry
245 bool smartcheck; // Check SMART status
246 bool usagefailed; // Check for failed Usage Attributes
247 bool prefail; // Track changes in Prefail Attributes
248 bool usage; // Track changes in Usage Attributes
249 bool selftest; // Monitor number of selftest errors
250 bool errorlog; // Monitor number of ATA errors
251 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
252 bool offlinests; // Monitor changes in offline data collection status
253 bool offlinests_ns; // Disable auto standby if in progress
254 bool selfteststs; // Monitor changes in self-test execution status
255 bool selfteststs_ns; // Disable auto standby if in progress
256 bool permissive; // Ignore failed SMART commands
257 char autosave; // 1=disable, 2=enable Autosave Attributes
258 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
259 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
260 bool ignorepresets; // Ignore database of -v options
261 bool showpresets; // Show database entry for this device
262 bool removable; // Device may disappear (not be present)
263 char powermode; // skip check, if disk in idle or standby mode
264 bool powerquiet; // skip powermode 'skipping checks' message
265 int powerskipmax; // how many times can be check skipped
266 unsigned char tempdiff; // Track Temperature changes >= this limit
267 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
268 regular_expression test_regex; // Regex for scheduled testing
269
270 // Configuration of email warning messages
271 std::string emailcmdline; // script to execute, empty if no messages
272 std::string emailaddress; // email address, or empty
273 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
274 bool emailtest; // Send test email?
275
276 // ATA ONLY
277 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
278 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
279 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
280 int set_lookahead; // disable(-1), enable(1) read look-ahead
281 int set_standby; // set(1..255->0..254) standby timer
282 bool set_security_freeze; // Freeze ATA security
283 int set_wcache; // disable(-1), enable(1) write cache
284
285 bool sct_erc_set; // set SCT ERC to:
286 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
287 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
288
289 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
290 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
291 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
292 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
293
294 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
295
296 ata_vendor_attr_defs attribute_defs; // -v options
297
298 dev_config();
299 };
300
301 dev_config::dev_config()
302 : lineno(0),
303 ignore(false),
304 smartcheck(false),
305 usagefailed(false),
306 prefail(false),
307 usage(false),
308 selftest(false),
309 errorlog(false),
310 xerrorlog(false),
311 offlinests(false), offlinests_ns(false),
312 selfteststs(false), selfteststs_ns(false),
313 permissive(false),
314 autosave(0),
315 autoofflinetest(0),
316 ignorepresets(false),
317 showpresets(false),
318 removable(false),
319 powermode(0),
320 powerquiet(false),
321 powerskipmax(0),
322 tempdiff(0),
323 tempinfo(0), tempcrit(0),
324 emailfreq(0),
325 emailtest(false),
326 dev_rpm(0),
327 set_aam(0), set_apm(0),
328 set_lookahead(0),
329 set_standby(0),
330 set_security_freeze(false),
331 set_wcache(0),
332 sct_erc_set(false),
333 sct_erc_readtime(0), sct_erc_writetime(0),
334 curr_pending_id(0), offl_pending_id(0),
335 curr_pending_incr(false), offl_pending_incr(false),
336 curr_pending_set(false), offl_pending_set(false)
337 {
338 }
339
340
341 // Number of allowed mail message types
342 static const int SMARTD_NMAIL = 13;
343 // Type for '-M test' mails (state not persistent)
344 static const int MAILTYPE_TEST = 0;
345 // TODO: Add const or enum for all mail types.
346
347 struct mailinfo {
348 int logged;// number of times an email has been sent
349 time_t firstsent;// time first email was sent, as defined by time(2)
350 time_t lastsent; // time last email was sent, as defined by time(2)
351
352 mailinfo()
353 : logged(0), firstsent(0), lastsent(0) { }
354 };
355
356 /// Persistent state data for a device.
357 struct persistent_dev_state
358 {
359 unsigned char tempmin, tempmax; // Min/Max Temperatures
360
361 unsigned char selflogcount; // total number of self-test errors
362 unsigned short selfloghour; // lifetime hours of last self-test error
363
364 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
365
366 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
367 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
368
369 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
370
371 // ATA ONLY
372 int ataerrorcount; // Total number of ATA errors
373
374 // Persistent part of ata_smart_values:
375 struct ata_attribute {
376 unsigned char id;
377 unsigned char val;
378 unsigned char worst; // Byte needed for 'raw64' attribute only.
379 uint64_t raw;
380 unsigned char resvd;
381
382 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
383 };
384 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
385
386 // SCSI ONLY
387
388 struct scsi_error_counter_t {
389 struct scsiErrorCounter errCounter;
390 unsigned char found;
391 scsi_error_counter_t() : found(0)
392 { memset(&errCounter, 0, sizeof(errCounter)); }
393 };
394 scsi_error_counter_t scsi_error_counters[3];
395
396 struct scsi_nonmedium_error_t {
397 struct scsiNonMediumError nme;
398 unsigned char found;
399 scsi_nonmedium_error_t() : found(0)
400 { memset(&nme, 0, sizeof(nme)); }
401 };
402 scsi_nonmedium_error_t scsi_nonmedium_error;
403
404 persistent_dev_state();
405 };
406
407 persistent_dev_state::persistent_dev_state()
408 : tempmin(0), tempmax(0),
409 selflogcount(0),
410 selfloghour(0),
411 scheduled_test_next_check(0),
412 selective_test_last_start(0),
413 selective_test_last_end(0),
414 ataerrorcount(0)
415 {
416 }
417
418 /// Non-persistent state data for a device.
419 struct temp_dev_state
420 {
421 bool must_write; // true if persistent part should be written
422
423 bool not_cap_offline; // true == not capable of offline testing
424 bool not_cap_conveyance;
425 bool not_cap_short;
426 bool not_cap_long;
427 bool not_cap_selective;
428
429 unsigned char temperature; // last recorded Temperature (in Celsius)
430 time_t tempmin_delay; // time where Min Temperature tracking will start
431
432 bool powermodefail; // true if power mode check failed
433 int powerskipcnt; // Number of checks skipped due to idle or standby mode
434
435 // SCSI ONLY
436 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
437 unsigned char TempPageSupported; // has log sense temperature page (0xd)
438 unsigned char ReadECounterPageSupported;
439 unsigned char WriteECounterPageSupported;
440 unsigned char VerifyECounterPageSupported;
441 unsigned char NonMediumErrorPageSupported;
442 unsigned char SuppressReport; // minimize nuisance reports
443 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
444 // know yet) 6 or 10
445 // ATA ONLY
446 uint64_t num_sectors; // Number of sectors
447 ata_smart_values smartval; // SMART data
448 ata_smart_thresholds_pvt smartthres; // SMART thresholds
449 bool offline_started; // true if offline data collection was started
450 bool selftest_started; // true if self-test was started
451
452 temp_dev_state();
453 };
454
455 temp_dev_state::temp_dev_state()
456 : must_write(false),
457 not_cap_offline(false),
458 not_cap_conveyance(false),
459 not_cap_short(false),
460 not_cap_long(false),
461 not_cap_selective(false),
462 temperature(0),
463 tempmin_delay(0),
464 powermodefail(false),
465 powerskipcnt(0),
466 SmartPageSupported(false),
467 TempPageSupported(false),
468 ReadECounterPageSupported(false),
469 WriteECounterPageSupported(false),
470 VerifyECounterPageSupported(false),
471 NonMediumErrorPageSupported(false),
472 SuppressReport(false),
473 modese_len(0),
474 num_sectors(0),
475 offline_started(false),
476 selftest_started(false)
477 {
478 memset(&smartval, 0, sizeof(smartval));
479 memset(&smartthres, 0, sizeof(smartthres));
480 }
481
482 /// Runtime state data for a device.
483 struct dev_state
484 : public persistent_dev_state,
485 public temp_dev_state
486 {
487 void update_persistent_state();
488 void update_temp_state();
489 };
490
491 /// Container for configuration info for each device.
492 typedef std::vector<dev_config> dev_config_vector;
493
494 /// Container for state info for each device.
495 typedef std::vector<dev_state> dev_state_vector;
496
497 // Copy ATA attributes to persistent state.
498 void dev_state::update_persistent_state()
499 {
500 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
501 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
502 ata_attribute & pa = ata_attributes[i];
503 pa.id = ta.id;
504 if (ta.id == 0) {
505 pa.val = pa.worst = 0; pa.raw = 0;
506 continue;
507 }
508 pa.val = ta.current;
509 pa.worst = ta.worst;
510 pa.raw = ta.raw[0]
511 | ( ta.raw[1] << 8)
512 | ( ta.raw[2] << 16)
513 | ((uint64_t)ta.raw[3] << 24)
514 | ((uint64_t)ta.raw[4] << 32)
515 | ((uint64_t)ta.raw[5] << 40);
516 pa.resvd = ta.reserv;
517 }
518 }
519
520 // Copy ATA from persistent to temp state.
521 void dev_state::update_temp_state()
522 {
523 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
524 const ata_attribute & pa = ata_attributes[i];
525 ata_smart_attribute & ta = smartval.vendor_attributes[i];
526 ta.id = pa.id;
527 if (pa.id == 0) {
528 ta.current = ta.worst = 0;
529 memset(ta.raw, 0, sizeof(ta.raw));
530 continue;
531 }
532 ta.current = pa.val;
533 ta.worst = pa.worst;
534 ta.raw[0] = (unsigned char) pa.raw;
535 ta.raw[1] = (unsigned char)(pa.raw >> 8);
536 ta.raw[2] = (unsigned char)(pa.raw >> 16);
537 ta.raw[3] = (unsigned char)(pa.raw >> 24);
538 ta.raw[4] = (unsigned char)(pa.raw >> 32);
539 ta.raw[5] = (unsigned char)(pa.raw >> 40);
540 ta.reserv = pa.resvd;
541 }
542 }
543
544 // Parse a line from a state file.
545 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
546 {
547 static const regular_expression regex(
548 "^ *"
549 "((temperature-min)" // (1 (2)
550 "|(temperature-max)" // (3)
551 "|(self-test-errors)" // (4)
552 "|(self-test-last-err-hour)" // (5)
553 "|(scheduled-test-next-check)" // (6)
554 "|(selective-test-last-start)" // (7)
555 "|(selective-test-last-end)" // (8)
556 "|(ata-error-count)" // (9)
557 "|(mail\\.([0-9]+)\\." // (10 (11)
558 "((count)" // (12 (13)
559 "|(first-sent-time)" // (14)
560 "|(last-sent-time)" // (15)
561 ")" // 12)
562 ")" // 10)
563 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
564 "((id)" // (18 (19)
565 "|(val)" // (20)
566 "|(worst)" // (21)
567 "|(raw)" // (22)
568 "|(resvd)" // (23)
569 ")" // 18)
570 ")" // 16)
571 ")" // 1)
572 " *= *([0-9]+)[ \n]*$", // (24)
573 REG_EXTENDED
574 );
575
576 const int nmatch = 1+24;
577 regmatch_t match[nmatch];
578 if (!regex.execute(line, nmatch, match))
579 return false;
580 if (match[nmatch-1].rm_so < 0)
581 return false;
582
583 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
584
585 int m = 1;
586 if (match[++m].rm_so >= 0)
587 state.tempmin = (unsigned char)val;
588 else if (match[++m].rm_so >= 0)
589 state.tempmax = (unsigned char)val;
590 else if (match[++m].rm_so >= 0)
591 state.selflogcount = (unsigned char)val;
592 else if (match[++m].rm_so >= 0)
593 state.selfloghour = (unsigned short)val;
594 else if (match[++m].rm_so >= 0)
595 state.scheduled_test_next_check = (time_t)val;
596 else if (match[++m].rm_so >= 0)
597 state.selective_test_last_start = val;
598 else if (match[++m].rm_so >= 0)
599 state.selective_test_last_end = val;
600 else if (match[++m].rm_so >= 0)
601 state.ataerrorcount = (int)val;
602 else if (match[m+=2].rm_so >= 0) {
603 int i = atoi(line+match[m].rm_so);
604 if (!(0 <= i && i < SMARTD_NMAIL))
605 return false;
606 if (i == MAILTYPE_TEST) // Don't suppress test mails
607 return true;
608 if (match[m+=2].rm_so >= 0)
609 state.maillog[i].logged = (int)val;
610 else if (match[++m].rm_so >= 0)
611 state.maillog[i].firstsent = (time_t)val;
612 else if (match[++m].rm_so >= 0)
613 state.maillog[i].lastsent = (time_t)val;
614 else
615 return false;
616 }
617 else if (match[m+=5+1].rm_so >= 0) {
618 int i = atoi(line+match[m].rm_so);
619 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
620 return false;
621 if (match[m+=2].rm_so >= 0)
622 state.ata_attributes[i].id = (unsigned char)val;
623 else if (match[++m].rm_so >= 0)
624 state.ata_attributes[i].val = (unsigned char)val;
625 else if (match[++m].rm_so >= 0)
626 state.ata_attributes[i].worst = (unsigned char)val;
627 else if (match[++m].rm_so >= 0)
628 state.ata_attributes[i].raw = val;
629 else if (match[++m].rm_so >= 0)
630 state.ata_attributes[i].resvd = (unsigned char)val;
631 else
632 return false;
633 }
634 else
635 return false;
636 return true;
637 }
638
639 // Read a state file.
640 static bool read_dev_state(const char * path, persistent_dev_state & state)
641 {
642 stdio_file f(path, "r");
643 if (!f) {
644 if (errno != ENOENT)
645 pout("Cannot read state file \"%s\"\n", path);
646 return false;
647 }
648 #ifdef __CYGWIN__
649 setmode(fileno(f), O_TEXT); // Allow files with \r\n
650 #endif
651
652 persistent_dev_state new_state;
653 int good = 0, bad = 0;
654 char line[256];
655 while (fgets(line, sizeof(line), f)) {
656 const char * s = line + strspn(line, " \t");
657 if (!*s || *s == '#')
658 continue;
659 if (!parse_dev_state_line(line, new_state))
660 bad++;
661 else
662 good++;
663 }
664
665 if (bad) {
666 if (!good) {
667 pout("%s: format error\n", path);
668 return false;
669 }
670 pout("%s: %d invalid line(s) ignored\n", path, bad);
671 }
672
673 // This sets the values missing in the file to 0.
674 state = new_state;
675 return true;
676 }
677
678 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
679 {
680 if (val)
681 fprintf(f, "%s = %" PRIu64 "\n", name, val);
682 }
683
684 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
685 {
686 if (val)
687 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
688 }
689
690 // Write a state file
691 static bool write_dev_state(const char * path, const persistent_dev_state & state)
692 {
693 // Rename old "file" to "file~"
694 std::string pathbak = path; pathbak += '~';
695 unlink(pathbak.c_str());
696 rename(path, pathbak.c_str());
697
698 stdio_file f(path, "w");
699 if (!f) {
700 pout("Cannot create state file \"%s\"\n", path);
701 return false;
702 }
703
704 fprintf(f, "# smartd state file\n");
705 write_dev_state_line(f, "temperature-min", state.tempmin);
706 write_dev_state_line(f, "temperature-max", state.tempmax);
707 write_dev_state_line(f, "self-test-errors", state.selflogcount);
708 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
709 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
710 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
711 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
712
713 int i;
714 for (i = 0; i < SMARTD_NMAIL; i++) {
715 if (i == MAILTYPE_TEST) // Don't suppress test mails
716 continue;
717 const mailinfo & mi = state.maillog[i];
718 if (!mi.logged)
719 continue;
720 write_dev_state_line(f, "mail", i, "count", mi.logged);
721 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
722 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
723 }
724
725 // ATA ONLY
726 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
727
728 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
729 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
730 if (!pa.id)
731 continue;
732 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
733 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
734 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
735 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
736 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
737 }
738
739 return true;
740 }
741
742 // Write to the attrlog file
743 static bool write_dev_attrlog(const char * path, const dev_state & state)
744 {
745 stdio_file f(path, "a");
746 if (!f) {
747 pout("Cannot create attribute log file \"%s\"\n", path);
748 return false;
749 }
750
751
752 time_t now = time(0);
753 struct tm * tms = gmtime(&now);
754 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
755 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
756 tms->tm_hour, tms->tm_min, tms->tm_sec);
757 // ATA ONLY
758 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
759 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
760 if (!pa.id)
761 continue;
762 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
763 }
764 // SCSI ONLY
765 const struct scsiErrorCounter * ecp;
766 const char * pageNames[3] = {"read", "write", "verify"};
767 for (int k = 0; k < 3; ++k) {
768 if ( !state.scsi_error_counters[k].found ) continue;
769 ecp = &state.scsi_error_counters[k].errCounter;
770 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
771 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
772 "\t%s-corr-by-retry;%" PRIu64 ";"
773 "\t%s-total-err-corrected;%" PRIu64 ";"
774 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
775 "\t%s-gb-processed;%.3f;"
776 "\t%s-total-unc-errors;%" PRIu64 ";",
777 pageNames[k], ecp->counter[0],
778 pageNames[k], ecp->counter[1],
779 pageNames[k], ecp->counter[2],
780 pageNames[k], ecp->counter[3],
781 pageNames[k], ecp->counter[4],
782 pageNames[k], (ecp->counter[5] / 1000000000.0),
783 pageNames[k], ecp->counter[6]);
784 }
785 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
786 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
787 }
788 // write SCSI current temperature if it is monitored
789 if(state.TempPageSupported && state.temperature)
790 fprintf(f, "\ttemperature;%d;", state.temperature);
791 // end of line
792 fprintf(f, "\n");
793 return true;
794 }
795
796 // Write all state files. If write_always is false, don't write
797 // unless must_write is set.
798 static void write_all_dev_states(const dev_config_vector & configs,
799 dev_state_vector & states,
800 bool write_always = true)
801 {
802 for (unsigned i = 0; i < states.size(); i++) {
803 const dev_config & cfg = configs.at(i);
804 if (cfg.state_file.empty())
805 continue;
806 dev_state & state = states[i];
807 if (!write_always && !state.must_write)
808 continue;
809 if (!write_dev_state(cfg.state_file.c_str(), state))
810 continue;
811 state.must_write = false;
812 if (write_always || debugmode)
813 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
814 cfg.name.c_str(), cfg.state_file.c_str());
815 }
816 }
817
818 // Write to all attrlog files
819 static void write_all_dev_attrlogs(const dev_config_vector & configs,
820 dev_state_vector & states)
821 {
822 for (unsigned i = 0; i < states.size(); i++) {
823 const dev_config & cfg = configs.at(i);
824 if (cfg.attrlog_file.empty())
825 continue;
826 dev_state & state = states[i];
827 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
828 }
829 }
830
831 // remove the PID file
832 static void RemovePidFile()
833 {
834 if (!pid_file.empty()) {
835 if (unlink(pid_file.c_str()))
836 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
837 pid_file.c_str(), strerror(errno));
838 pid_file.clear();
839 }
840 return;
841 }
842
843 extern "C" { // signal handlers require C-linkage
844
845 // Note if we catch a SIGUSR1
846 static void USR1handler(int sig)
847 {
848 if (SIGUSR1==sig)
849 caughtsigUSR1=1;
850 return;
851 }
852
853 #ifdef _WIN32
854 // Note if we catch a SIGUSR2
855 static void USR2handler(int sig)
856 {
857 if (SIGUSR2==sig)
858 caughtsigUSR2=1;
859 return;
860 }
861 #endif
862
863 // Note if we catch a HUP (or INT in debug mode)
864 static void HUPhandler(int sig)
865 {
866 if (sig==SIGHUP)
867 caughtsigHUP=1;
868 else
869 caughtsigHUP=2;
870 return;
871 }
872
873 // signal handler for TERM, QUIT, and INT (if not in debug mode)
874 static void sighandler(int sig)
875 {
876 if (!caughtsigEXIT)
877 caughtsigEXIT=sig;
878 return;
879 }
880
881 } // extern "C"
882
883 // Cleanup, print Goodbye message and remove pidfile
884 static int Goodbye(int status)
885 {
886 // delete PID file, if one was created
887 RemovePidFile();
888
889 // if we are exiting because of a code bug, tell user
890 if (status==EXIT_BADCODE)
891 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
892
893 // and this should be the final output from smartd before it exits
894 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
895
896 return status;
897 }
898
899 // a replacement for setenv() which is not available on all platforms.
900 // Note that the string passed to putenv must not be freed or made
901 // invalid, since a pointer to it is kept by putenv(). This means that
902 // it must either be a static buffer or allocated off the heap. The
903 // string can be freed if the environment variable is redefined via
904 // another call to putenv(). There is no portable way to unset a variable
905 // with putenv(). So we manage the buffer in a static object.
906 // Using setenv() if available is not considered because some
907 // implementations may produce memory leaks.
908
909 class env_buffer
910 {
911 public:
912 env_buffer()
913 : m_buf((char *)0) { }
914
915 void set(const char * name, const char * value);
916
917 private:
918 char * m_buf;
919
920 env_buffer(const env_buffer &);
921 void operator=(const env_buffer &);
922 };
923
924 void env_buffer::set(const char * name, const char * value)
925 {
926 int size = strlen(name) + 1 + strlen(value) + 1;
927 char * newbuf = new char[size];
928 snprintf(newbuf, size, "%s=%s", name, value);
929
930 if (putenv(newbuf))
931 throw std::runtime_error("putenv() failed");
932
933 // This assumes that the same NAME is passed on each call
934 delete [] m_buf;
935 m_buf = newbuf;
936 }
937
938 #define EBUFLEN 1024
939
940 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
941 __attribute_format_printf(4, 5);
942
943 // If either address or executable path is non-null then send and log
944 // a warning email, or execute executable
945 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
946 {
947 static const char * const whichfail[] = {
948 "EmailTest", // 0
949 "Health", // 1
950 "Usage", // 2
951 "SelfTest", // 3
952 "ErrorCount", // 4
953 "FailedHealthCheck", // 5
954 "FailedReadSmartData", // 6
955 "FailedReadSmartErrorLog", // 7
956 "FailedReadSmartSelfTestLog", // 8
957 "FailedOpenDevice", // 9
958 "CurrentPendingSector", // 10
959 "OfflineUncorrectableSector", // 11
960 "Temperature" // 12
961 };
962
963 // See if user wants us to send mail
964 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
965 return;
966
967 std::string address = cfg.emailaddress;
968 const char * executable = cfg.emailcmdline.c_str();
969
970 // which type of mail are we sending?
971 mailinfo * mail=(state.maillog)+which;
972
973 // checks for sanity
974 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
975 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
976 return;
977 }
978 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
979 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
980 which, (int)sizeof(whichfail));
981 return;
982 }
983
984 // Return if a single warning mail has been sent.
985 if ((cfg.emailfreq==1) && mail->logged)
986 return;
987
988 // Return if this is an email test and one has already been sent.
989 if (which == 0 && mail->logged)
990 return;
991
992 // To decide if to send mail, we need to know what time it is.
993 time_t epoch = time(0);
994
995 // Return if less than one day has gone by
996 const int day = 24*3600;
997 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
998 return;
999
1000 // Return if less than 2^(logged-1) days have gone by
1001 if (cfg.emailfreq==3 && mail->logged) {
1002 int days = 0x01 << (mail->logged - 1);
1003 days*=day;
1004 if (epoch<(mail->lastsent+days))
1005 return;
1006 }
1007
1008 #ifdef HAVE_LIBCAP_NG
1009 if (enable_capabilities) {
1010 PrintOut(LOG_ERR, "Sending a mail was supressed. "
1011 "Mails can't be send when capabilites are enabled\n");
1012 return;
1013 }
1014 #endif
1015
1016 // record the time of this mail message, and the first mail message
1017 if (!mail->logged)
1018 mail->firstsent=epoch;
1019 mail->lastsent=epoch;
1020
1021 // print warning string into message
1022 char message[256];
1023 va_list ap;
1024 va_start(ap, fmt);
1025 vsnprintf(message, sizeof(message), fmt, ap);
1026 va_end(ap);
1027
1028 // replace commas by spaces to separate recipients
1029 std::replace(address.begin(), address.end(), ',', ' ');
1030
1031 // Export information in environment variables that will be useful
1032 // for user scripts
1033 static env_buffer env[12];
1034 env[0].set("SMARTD_MAILER", executable);
1035 env[1].set("SMARTD_MESSAGE", message);
1036 char dates[DATEANDEPOCHLEN];
1037 snprintf(dates, sizeof(dates), "%d", mail->logged);
1038 env[2].set("SMARTD_PREVCNT", dates);
1039 dateandtimezoneepoch(dates, mail->firstsent);
1040 env[3].set("SMARTD_TFIRST", dates);
1041 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1042 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1043 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1044 env[6].set("SMARTD_ADDRESS", address.c_str());
1045 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1046
1047 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1048 env[8].set("SMARTD_DEVICETYPE",
1049 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1050 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1051
1052 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1053 dates[0] = 0;
1054 if (which) switch (cfg.emailfreq) {
1055 case 2: dates[0] = '1'; dates[1] = 0; break;
1056 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1057 }
1058 env[11].set("SMARTD_NEXTDAYS", dates);
1059
1060 // now construct a command to send this as EMAIL
1061 if (!*executable)
1062 executable = "<mail>";
1063 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1064 const char * newwarn = (which? "Warning via" : "Test of");
1065
1066 #ifndef _WIN32
1067 char command[2048];
1068 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1069
1070 // tell SYSLOG what we are about to do...
1071 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1072 which?"Sending warning via":"Executing test of", executable, newadd);
1073
1074 // issue the command to send mail or to run the user's executable
1075 errno=0;
1076 FILE * pfp;
1077 if (!(pfp=popen(command, "r")))
1078 // failed to popen() mail process
1079 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1080 newwarn, executable, newadd, errno?strerror(errno):"");
1081 else {
1082 // pipe suceeded!
1083 int len, status;
1084 char buffer[EBUFLEN];
1085
1086 // if unexpected output on stdout/stderr, null terminate, print, and flush
1087 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1088 int count=0;
1089 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1090 buffer[newlen]='\0';
1091 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1092 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1093
1094 // flush pipe if needed
1095 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1096 count++;
1097
1098 // tell user that pipe was flushed, or that something is really wrong
1099 if (count && count<EBUFLEN)
1100 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1101 newwarn, executable, newadd);
1102 else if (count)
1103 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1104 newwarn, executable, newadd);
1105 }
1106
1107 // if something went wrong with mail process, print warning
1108 errno=0;
1109 if (-1==(status=pclose(pfp)))
1110 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1111 errno?strerror(errno):"");
1112 else {
1113 // mail process apparently succeeded. Check and report exit status
1114 if (WIFEXITED(status)) {
1115 // exited 'normally' (but perhaps with nonzero status)
1116 int status8 = WEXITSTATUS(status);
1117 if (status8>128)
1118 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1119 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1120 else if (status8)
1121 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1122 newwarn, executable, newadd, status, status8);
1123 else
1124 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1125 }
1126
1127 if (WIFSIGNALED(status))
1128 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1129 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1130
1131 // this branch is probably not possible. If subprocess is
1132 // stopped then pclose() should not return.
1133 if (WIFSTOPPED(status))
1134 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1135 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1136
1137 }
1138 }
1139
1140 #else // _WIN32
1141 {
1142 char command[2048];
1143 snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1144
1145 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1146 int rc;
1147 // run command
1148 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1149 (which?"Sending warning via":"Executing test of"), executable, newadd);
1150 rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1151 if (rc >= 0 && stdoutbuf[0])
1152 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1153 newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1154 if (rc != 0)
1155 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1156 newwarn, executable, newadd, rc);
1157 else
1158 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1159 }
1160
1161 #endif // _WIN32
1162
1163 // increment mail sent counter
1164 mail->logged++;
1165 }
1166
1167 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1168 __attribute_format_printf(4, 5);
1169
1170 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1171 {
1172 if (!(0 <= which && which < SMARTD_NMAIL))
1173 return;
1174
1175 // Return if no mail sent yet
1176 mailinfo & mi = state.maillog[which];
1177 if (!mi.logged)
1178 return;
1179
1180 // Format & print message
1181 char msg[256];
1182 va_list ap;
1183 va_start(ap, fmt);
1184 vsnprintf(msg, sizeof(msg), fmt, ap);
1185 va_end(ap);
1186
1187 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1188 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1189
1190 // Clear mail counter and timestamps
1191 mi = mailinfo();
1192 state.must_write = true;
1193 }
1194
1195 #ifndef _WIN32
1196
1197 // Output multiple lines via separate syslog(3) calls.
1198 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1199 {
1200 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1201 vsnprintf(buf, sizeof(buf), fmt, ap);
1202
1203 for (char * p = buf, * q; p && *p; p = q) {
1204 if ((q = strchr(p, '\n')))
1205 *q++ = 0;
1206 if (*p)
1207 syslog(priority, "%s\n", p);
1208 }
1209 }
1210
1211 #else // _WIN32
1212 // os_win32/syslog_win32.cpp supports multiple lines.
1213 #define vsyslog_lines vsyslog
1214 #endif // _WIN32
1215
1216 // Printing function for watching ataprint commands, or losing them
1217 // [From GLIBC Manual: Since the prototype doesn't specify types for
1218 // optional arguments, in a call to a variadic function the default
1219 // argument promotions are performed on the optional argument
1220 // values. This means the objects of type char or short int (whether
1221 // signed or not) are promoted to either int or unsigned int, as
1222 // appropriate.]
1223 void pout(const char *fmt, ...){
1224 va_list ap;
1225
1226 // get the correct time in syslog()
1227 FixGlibcTimeZoneBug();
1228 // initialize variable argument list
1229 va_start(ap,fmt);
1230 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1231 if (debugmode && debugmode != 2) {
1232 FILE * f = stdout;
1233 #ifdef _WIN32
1234 if (facility == LOG_LOCAL1) // logging to stdout
1235 f = stderr;
1236 #endif
1237 vfprintf(f, fmt, ap);
1238 fflush(f);
1239 }
1240 // in debugmode==2 mode we print output from knowndrives.o functions
1241 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1242 openlog("smartd", LOG_PID, facility);
1243 vsyslog_lines(LOG_INFO, fmt, ap);
1244 closelog();
1245 }
1246 va_end(ap);
1247 return;
1248 }
1249
1250 // This function prints either to stdout or to the syslog as needed.
1251 static void PrintOut(int priority, const char *fmt, ...){
1252 va_list ap;
1253
1254 // get the correct time in syslog()
1255 FixGlibcTimeZoneBug();
1256 // initialize variable argument list
1257 va_start(ap,fmt);
1258 if (debugmode) {
1259 FILE * f = stdout;
1260 #ifdef _WIN32
1261 if (facility == LOG_LOCAL1) // logging to stdout
1262 f = stderr;
1263 #endif
1264 vfprintf(f, fmt, ap);
1265 fflush(f);
1266 }
1267 else {
1268 openlog("smartd", LOG_PID, facility);
1269 vsyslog_lines(priority, fmt, ap);
1270 closelog();
1271 }
1272 va_end(ap);
1273 return;
1274 }
1275
1276 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1277 void checksumwarning(const char * string)
1278 {
1279 pout("Warning! %s error: invalid SMART checksum.\n", string);
1280 }
1281
1282 #ifndef _WIN32
1283
1284 // Wait for the pid file to show up, this makes sure a calling program knows
1285 // that the daemon is really up and running and has a pid to kill it
1286 static bool WaitForPidFile()
1287 {
1288 int waited, max_wait = 10;
1289 struct stat stat_buf;
1290
1291 if (pid_file.empty() || debugmode)
1292 return true;
1293
1294 for(waited = 0; waited < max_wait; ++waited) {
1295 if (!stat(pid_file.c_str(), &stat_buf)) {
1296 return true;
1297 } else
1298 sleep(1);
1299 }
1300 return false;
1301 }
1302
1303 #endif // _WIN32
1304
1305 // Forks new process, closes ALL file descriptors, redirects stdin,
1306 // stdout, and stderr. Not quite daemon(). See
1307 // http://www.linuxjournal.com/article/2335
1308 // for a good description of why we do things this way.
1309 static void DaemonInit()
1310 {
1311 #ifndef _WIN32
1312 pid_t pid;
1313 int i;
1314
1315 // flush all buffered streams. Else we might get two copies of open
1316 // streams since both parent and child get copies of the buffers.
1317 fflush(NULL);
1318
1319 if (do_fork) {
1320 if ((pid=fork()) < 0) {
1321 // unable to fork!
1322 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1323 EXIT(EXIT_STARTUP);
1324 }
1325 else if (pid) {
1326 // we are the parent process, wait for pid file, then exit cleanly
1327 if(!WaitForPidFile()) {
1328 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1329 EXIT(EXIT_STARTUP);
1330 } else
1331 EXIT(0);
1332 }
1333
1334 // from here on, we are the child process.
1335 setsid();
1336
1337 // Fork one more time to avoid any possibility of having terminals
1338 if ((pid=fork()) < 0) {
1339 // unable to fork!
1340 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1341 EXIT(EXIT_STARTUP);
1342 }
1343 else if (pid)
1344 // we are the parent process -- exit cleanly
1345 EXIT(0);
1346
1347 // Now we are the child's child...
1348 }
1349
1350 // close any open file descriptors
1351 for (i=getdtablesize();i>=0;--i)
1352 close(i);
1353
1354 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1355
1356 // redirect any IO attempts to /dev/null for stdin
1357 i=open("/dev/null",O_RDWR);
1358 if (i>=0) {
1359 // stdout
1360 NO_warn_unused_result(dup(i));
1361 // stderr
1362 NO_warn_unused_result(dup(i));
1363 };
1364 umask(0022);
1365 NO_warn_unused_result(chdir("/"));
1366
1367 if (do_fork)
1368 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1369
1370 #else // _WIN32
1371
1372 // No fork() on native Win32
1373 // Detach this process from console
1374 fflush(NULL);
1375 if (daemon_detach("smartd")) {
1376 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1377 EXIT(EXIT_STARTUP);
1378 }
1379 // stdin/out/err now closed if not redirected
1380
1381 #endif // _WIN32
1382 return;
1383 }
1384
1385 // create a PID file containing the current process id
1386 static void WritePidFile()
1387 {
1388 if (!pid_file.empty()) {
1389 pid_t pid = getpid();
1390 mode_t old_umask;
1391 #ifndef __CYGWIN__
1392 old_umask = umask(0077); // rwx------
1393 #else
1394 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1395 old_umask = umask(0033); // rwxr--r--
1396 #endif
1397
1398 stdio_file f(pid_file.c_str(), "w");
1399 umask(old_umask);
1400 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1401 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1402 EXIT(EXIT_PID);
1403 }
1404 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1405 }
1406 }
1407
1408 // Prints header identifying version of code and home
1409 static void PrintHead()
1410 {
1411 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1412 }
1413
1414 // prints help info for configuration file Directives
1415 static void Directives()
1416 {
1417 PrintOut(LOG_INFO,
1418 "Configuration file (%s) Directives (after device name):\n"
1419 " -d TYPE Set the device type: auto, ignore, removable,\n"
1420 " %s\n"
1421 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1422 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1423 " -S VAL Enable/disable attribute autosave (on/off)\n"
1424 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1425 " -H Monitor SMART Health Status, report if failed\n"
1426 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1427 " -l TYPE Monitor SMART log or self-test status:\n"
1428 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1429 " -l scterc,R,W Set SCT Error Recovery Control\n"
1430 " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1431 " security-freeze, standby,[N|off], wcache,[on|off]\n"
1432 " -f Monitor 'Usage' Attributes, report failures\n"
1433 " -m ADD Send email warning to address ADD\n"
1434 " -M TYPE Modify email warning behavior (see man page)\n"
1435 " -p Report changes in 'Prefailure' Attributes\n"
1436 " -u Report changes in 'Usage' Attributes\n"
1437 " -t Equivalent to -p and -u Directives\n"
1438 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1439 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1440 " -i ID Ignore Attribute ID for -f Directive\n"
1441 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1442 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1443 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1444 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1445 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1446 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1447 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1448 " -F TYPE Use firmware bug workaround:\n"
1449 " %s\n"
1450 " # Comment: text after a hash sign is ignored\n"
1451 " \\ Line continuation character\n"
1452 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1453 "Use ID = 0 to turn off -C and/or -U Directives\n"
1454 "Example: /dev/sda -a\n",
1455 configfile,
1456 smi()->get_valid_dev_types_str().c_str(),
1457 get_valid_firmwarebug_args());
1458 }
1459
1460 /* Returns a pointer to a static string containing a formatted list of the valid
1461 arguments to the option opt or NULL on failure. */
1462 static const char *GetValidArgList(char opt)
1463 {
1464 switch (opt) {
1465 case 'A':
1466 case 's':
1467 return "<PATH_PREFIX>";
1468 case 'c':
1469 return "<FILE_NAME>, -";
1470 case 'l':
1471 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1472 case 'q':
1473 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1474 case 'r':
1475 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1476 case 'B':
1477 case 'p':
1478 case 'w':
1479 return "<FILE_NAME>";
1480 case 'i':
1481 return "<INTEGER_SECONDS>";
1482 default:
1483 return NULL;
1484 }
1485 }
1486
1487 /* prints help information for command syntax */
1488 static void Usage()
1489 {
1490 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1491 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1492 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1493 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1494 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1495 #endif
1496 PrintOut(LOG_INFO,"\n");
1497 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1498 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1499 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1500 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1501 PrintOut(LOG_INFO,"\n");
1502 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1503 #endif
1504 PrintOut(LOG_INFO,"]\n\n");
1505 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1506 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1507 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1508 #ifdef HAVE_LIBCAP_NG
1509 PrintOut(LOG_INFO," -C, --capabilities\n");
1510 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1511 " Warning: Mail notification does not work when used.\n\n");
1512 #endif
1513 PrintOut(LOG_INFO," -d, --debug\n");
1514 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1515 PrintOut(LOG_INFO," -D, --showdirectives\n");
1516 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1517 PrintOut(LOG_INFO," -h, --help, --usage\n");
1518 PrintOut(LOG_INFO," Display this help and exit\n\n");
1519 PrintOut(LOG_INFO," -i N, --interval=N\n");
1520 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1521 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1522 #ifndef _WIN32
1523 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1524 #else
1525 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1526 #endif
1527 #ifndef _WIN32
1528 PrintOut(LOG_INFO," -n, --no-fork\n");
1529 PrintOut(LOG_INFO," Do not fork into background\n\n");
1530 #endif // _WIN32
1531 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1532 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1533 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1534 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1535 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1536 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1537 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1538 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1539 #ifdef SMARTMONTOOLS_SAVESTATES
1540 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1541 #endif
1542 PrintOut(LOG_INFO,"\n");
1543 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1544 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1545 #ifndef _WIN32
1546 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1547 #else
1548 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1549 #endif
1550 #ifdef _WIN32
1551 PrintOut(LOG_INFO," --service\n");
1552 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1553 PrintOut(LOG_INFO," smartd install [options]\n");
1554 PrintOut(LOG_INFO," Remove service with:\n");
1555 PrintOut(LOG_INFO," smartd remove\n\n");
1556 #endif // _WIN32
1557 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1558 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1559 }
1560
1561 static int CloseDevice(smart_device * device, const char * name)
1562 {
1563 if (!device->close()){
1564 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1565 return 1;
1566 }
1567 // device sucessfully closed
1568 return 0;
1569 }
1570
1571 // return true if a char is not allowed in a state file name
1572 static bool not_allowed_in_filename(char c)
1573 {
1574 return !( ('0' <= c && c <= '9')
1575 || ('A' <= c && c <= 'Z')
1576 || ('a' <= c && c <= 'z'));
1577 }
1578
1579 // Read error count from Summary or Extended Comprehensive SMART error log
1580 // Return -1 on error
1581 static int read_ata_error_count(ata_device * device, const char * name,
1582 firmwarebug_defs firmwarebugs, bool extended)
1583 {
1584 if (!extended) {
1585 ata_smart_errorlog log;
1586 if (ataReadErrorLog(device, &log, firmwarebugs)){
1587 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1588 return -1;
1589 }
1590 return (log.error_log_pointer ? log.ata_error_count : 0);
1591 }
1592 else {
1593 ata_smart_exterrlog logx;
1594 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1595 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1596 return -1;
1597 }
1598 // Some disks use the reserved byte as index, see ataprint.cpp.
1599 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1600 }
1601 }
1602
1603 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1604 // error count, and top bits are the power-on hours of the last error.
1605 static int SelfTestErrorCount(ata_device * device, const char * name,
1606 firmwarebug_defs firmwarebugs)
1607 {
1608 struct ata_smart_selftestlog log;
1609
1610 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1611 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1612 return -1;
1613 }
1614
1615 // return current number of self-test errors
1616 return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1617 }
1618
1619 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1620 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1621
1622 // Check offline data collection status
1623 static inline bool is_offl_coll_in_progress(unsigned char status)
1624 {
1625 return ((status & 0x7f) == 0x03);
1626 }
1627
1628 // Check self-test execution status
1629 static inline bool is_self_test_in_progress(unsigned char status)
1630 {
1631 return ((status >> 4) == 0xf);
1632 }
1633
1634 // Log offline data collection status
1635 static void log_offline_data_coll_status(const char * name, unsigned char status)
1636 {
1637 const char * msg;
1638 switch (status & 0x7f) {
1639 case 0x00: msg = "was never started"; break;
1640 case 0x02: msg = "was completed without error"; break;
1641 case 0x03: msg = "is in progress"; break;
1642 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1643 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1644 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1645 default: msg = 0;
1646 }
1647
1648 if (msg)
1649 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1650 "Device: %s, offline data collection %s%s\n", name, msg,
1651 ((status & 0x80) ? " (auto:on)" : ""));
1652 else
1653 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1654 name, status);
1655 }
1656
1657 // Log self-test execution status
1658 static void log_self_test_exec_status(const char * name, unsigned char status)
1659 {
1660 const char * msg;
1661 switch (status >> 4) {
1662 case 0x0: msg = "completed without error"; break;
1663 case 0x1: msg = "was aborted by the host"; break;
1664 case 0x2: msg = "was interrupted by the host with a reset"; break;
1665 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1666 case 0x4: msg = "completed with error (unknown test element)"; break;
1667 case 0x5: msg = "completed with error (electrical test element)"; break;
1668 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1669 case 0x7: msg = "completed with error (read test element)"; break;
1670 case 0x8: msg = "completed with error (handling damage?)"; break;
1671 default: msg = 0;
1672 }
1673
1674 if (msg)
1675 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1676 "Device: %s, previous self-test %s\n", name, msg);
1677 else if ((status >> 4) == 0xf)
1678 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1679 name, status & 0x0f);
1680 else
1681 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1682 name, status);
1683 }
1684
1685 // Check pending sector count id (-C, -U directives).
1686 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1687 unsigned char id, const char * msg)
1688 {
1689 // Check attribute index
1690 int i = ata_find_attr_index(id, state.smartval);
1691 if (i < 0) {
1692 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1693 cfg.name.c_str(), msg, id);
1694 return false;
1695 }
1696
1697 // Check value
1698 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1699 cfg.attribute_defs);
1700 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1701 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1702 cfg.name.c_str(), msg, id, rawval, rawval);
1703 return false;
1704 }
1705
1706 return true;
1707 }
1708
1709 // Called by ATA/SCSIDeviceScan() after successful device check
1710 static void finish_device_scan(dev_config & cfg, dev_state & state)
1711 {
1712 // Set cfg.emailfreq if user hasn't set it
1713 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1714 // Avoid that emails are suppressed forever due to state persistence
1715 if (cfg.state_file.empty())
1716 cfg.emailfreq = 1; // '-M once'
1717 else
1718 cfg.emailfreq = 2; // '-M daily'
1719 }
1720
1721 // Start self-test regex check now if time was not read from state file
1722 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1723 state.scheduled_test_next_check = time(0);
1724 }
1725
1726 // Common function to format result message for ATA setting
1727 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1728 int set_option = 0, bool has_value = false)
1729 {
1730 if (!msg.empty())
1731 msg += ", ";
1732 msg += name;
1733 if (!ok)
1734 msg += ":--";
1735 else if (set_option < 0)
1736 msg += ":off";
1737 else if (has_value)
1738 msg += strprintf(":%d", set_option-1);
1739 else if (set_option > 0)
1740 msg += ":on";
1741 }
1742
1743
1744 // TODO: Add '-F swapid' directive
1745 const bool fix_swapped_id = false;
1746
1747 // scan to see what ata devices there are, and if they support SMART
1748 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1749 {
1750 int supported=0;
1751 struct ata_identify_device drive;
1752 const char *name = cfg.name.c_str();
1753 int retid;
1754
1755 // Device must be open
1756
1757 // Get drive identity structure
1758 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1759 if (retid<0)
1760 // Unable to read Identity structure
1761 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1762 else
1763 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1764 name, packetdevicetype(retid-1));
1765 CloseDevice(atadev, name);
1766 return 2;
1767 }
1768
1769 // Get drive identity, size and rotation rate (HDD/SSD)
1770 char model[40+1], serial[20+1], firmware[8+1];
1771 ata_format_id_string(model, drive.model, sizeof(model)-1);
1772 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1773 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1774
1775 ata_size_info sizes;
1776 ata_get_size_info(&drive, sizes);
1777 state.num_sectors = sizes.sectors;
1778 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1779
1780 char wwn[30]; wwn[0] = 0;
1781 unsigned oui = 0; uint64_t unique_id = 0;
1782 int naa = ata_get_wwn(&drive, oui, unique_id);
1783 if (naa >= 0)
1784 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1785
1786 // Format device id string for warning emails
1787 char cap[32];
1788 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1789 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1790
1791 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1792
1793 // Show if device in database, and use preset vendor attribute
1794 // options unless user has requested otherwise.
1795 if (cfg.ignorepresets)
1796 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1797 else {
1798 // Apply vendor specific presets, print warning if present
1799 const drive_settings * dbentry = lookup_drive_apply_presets(
1800 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1801 if (!dbentry)
1802 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1803 else {
1804 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1805 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1806 if (*dbentry->warningmsg)
1807 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1808 }
1809 }
1810
1811 // Check for ATA Security LOCK
1812 unsigned short word128 = drive.words088_255[128-88];
1813 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1814 if (locked)
1815 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1816
1817 // Set default '-C 197[+]' if no '-C ID' is specified.
1818 if (!cfg.curr_pending_set)
1819 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1820 // Set default '-U 198[+]' if no '-U ID' is specified.
1821 if (!cfg.offl_pending_set)
1822 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1823
1824 // If requested, show which presets would be used for this drive
1825 if (cfg.showpresets) {
1826 int savedebugmode=debugmode;
1827 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1828 if (!debugmode)
1829 debugmode=2;
1830 show_presets(&drive);
1831 debugmode=savedebugmode;
1832 }
1833
1834 // see if drive supports SMART
1835 supported=ataSmartSupport(&drive);
1836 if (supported!=1) {
1837 if (supported==0)
1838 // drive does NOT support SMART
1839 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1840 else
1841 // can't tell if drive supports SMART
1842 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1843
1844 // should we proceed anyway?
1845 if (cfg.permissive) {
1846 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1847 }
1848 else {
1849 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1850 CloseDevice(atadev, name);
1851 return 2;
1852 }
1853 }
1854
1855 if (ataEnableSmart(atadev)) {
1856 // Enable SMART command has failed
1857 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1858
1859 if (ataIsSmartEnabled(&drive) <= 0) {
1860 CloseDevice(atadev, name);
1861 return 2;
1862 }
1863 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
1864 }
1865
1866 // disable device attribute autosave...
1867 if (cfg.autosave==1) {
1868 if (ataDisableAutoSave(atadev))
1869 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1870 else
1871 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1872 }
1873
1874 // or enable device attribute autosave
1875 if (cfg.autosave==2) {
1876 if (ataEnableAutoSave(atadev))
1877 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1878 else
1879 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1880 }
1881
1882 // capability check: SMART status
1883 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1884 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1885 cfg.smartcheck = false;
1886 }
1887
1888 // capability check: Read smart values and thresholds. Note that
1889 // smart values are ALSO needed even if we ONLY want to know if the
1890 // device is self-test log or error-log capable! After ATA-5, this
1891 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1892 // but sadly not for ATA-5. Sigh.
1893
1894 // do we need to get SMART data?
1895 bool smart_val_ok = false;
1896 if ( cfg.autoofflinetest || cfg.selftest
1897 || cfg.errorlog || cfg.xerrorlog
1898 || cfg.offlinests || cfg.selfteststs
1899 || cfg.usagefailed || cfg.prefail || cfg.usage
1900 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1901 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1902
1903 if (ataReadSmartValues(atadev, &state.smartval)) {
1904 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1905 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1906 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1907 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1908 }
1909 else {
1910 smart_val_ok = true;
1911 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1912 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1913 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1914 cfg.usagefailed = false;
1915 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1916 memset(&state.smartthres, 0, sizeof(state.smartthres));
1917 }
1918 }
1919
1920 // see if the necessary Attribute is there to monitor offline or
1921 // current pending sectors or temperature
1922 if ( cfg.curr_pending_id
1923 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1924 "Current_Pending_Sector"))
1925 cfg.curr_pending_id = 0;
1926
1927 if ( cfg.offl_pending_id
1928 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1929 "Offline_Uncorrectable"))
1930 cfg.offl_pending_id = 0;
1931
1932 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1933 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1934 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1935 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1936 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1937 }
1938
1939 // Report ignored '-r' or '-R' directives
1940 for (int id = 1; id <= 255; id++) {
1941 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1942 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1943 const char * excl = (cfg.monitor_attr_flags.is_set(id,
1944 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1945
1946 int idx = ata_find_attr_index(id, state.smartval);
1947 if (idx < 0)
1948 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1949 else {
1950 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1951 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1952 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1953 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1954 }
1955 }
1956 }
1957 }
1958
1959 // enable/disable automatic on-line testing
1960 if (cfg.autoofflinetest) {
1961 // is this an enable or disable request?
1962 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1963 if (!smart_val_ok)
1964 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1965 else {
1966 // if command appears unsupported, issue a warning...
1967 if (!isSupportAutomaticTimer(&state.smartval))
1968 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1969 // ... but then try anyway
1970 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1971 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1972 else
1973 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1974 }
1975 }
1976
1977 // Read log directories if required for capability check
1978 ata_smart_log_directory smart_logdir, gp_logdir;
1979 bool smart_logdir_ok = false, gp_logdir_ok = false;
1980
1981 if ( isGeneralPurposeLoggingCapable(&drive)
1982 && (cfg.errorlog || cfg.selftest)
1983 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1984 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1985 smart_logdir_ok = true;
1986 }
1987
1988 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1989 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1990 gp_logdir_ok = true;
1991 }
1992
1993 // capability check: self-test-log
1994 state.selflogcount = 0; state.selfloghour = 0;
1995 if (cfg.selftest) {
1996 int retval;
1997 if (!( cfg.permissive
1998 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
1999 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2000 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2001 cfg.selftest = false;
2002 }
2003 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2004 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2005 cfg.selftest = false;
2006 }
2007 else {
2008 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2009 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2010 }
2011 }
2012
2013 // capability check: ATA error log
2014 state.ataerrorcount = 0;
2015 if (cfg.errorlog) {
2016 int errcnt1;
2017 if (!( cfg.permissive
2018 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2019 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2020 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2021 cfg.errorlog = false;
2022 }
2023 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2024 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2025 cfg.errorlog = false;
2026 }
2027 else
2028 state.ataerrorcount = errcnt1;
2029 }
2030
2031 if (cfg.xerrorlog) {
2032 int errcnt2;
2033 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2034 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2035 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2036 name);
2037 cfg.xerrorlog = false;
2038 }
2039 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2040 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2041 cfg.xerrorlog = false;
2042 }
2043 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2044 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2045 name, state.ataerrorcount, errcnt2);
2046 // Record max error count
2047 if (errcnt2 > state.ataerrorcount)
2048 state.ataerrorcount = errcnt2;
2049 }
2050 else
2051 state.ataerrorcount = errcnt2;
2052 }
2053
2054 // capability check: self-test and offline data collection status
2055 if (cfg.offlinests || cfg.selfteststs) {
2056 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2057 if (cfg.offlinests)
2058 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2059 if (cfg.selfteststs)
2060 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2061 cfg.offlinests = cfg.selfteststs = false;
2062 }
2063 }
2064
2065 // capabilities check -- does it support powermode?
2066 if (cfg.powermode) {
2067 int powermode = ataCheckPowerMode(atadev);
2068
2069 if (-1 == powermode) {
2070 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2071 cfg.powermode=0;
2072 }
2073 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2074 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2075 name, powermode);
2076 cfg.powermode=0;
2077 }
2078 }
2079
2080 // Apply ATA settings
2081 std::string msg;
2082
2083 if (cfg.set_aam)
2084 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2085 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2086 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2087
2088 if (cfg.set_apm)
2089 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2090 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2091 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2092
2093 if (cfg.set_lookahead)
2094 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2095 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2096 cfg.set_lookahead);
2097
2098 if (cfg.set_wcache)
2099 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2100 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2101
2102 if (cfg.set_security_freeze)
2103 format_set_result_msg(msg, "Security freeze",
2104 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2105
2106 if (cfg.set_standby)
2107 format_set_result_msg(msg, "Standby",
2108 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2109
2110 // Report as one log entry
2111 if (!msg.empty())
2112 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2113
2114 // set SCT Error Recovery Control if requested
2115 if (cfg.sct_erc_set) {
2116 if (!isSCTErrorRecoveryControlCapable(&drive))
2117 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2118 name);
2119 else if (locked)
2120 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2121 name);
2122 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2123 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2124 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2125 else
2126 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2127 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2128 }
2129
2130 // If no tests available or selected, return
2131 if (!( cfg.smartcheck || cfg.selftest
2132 || cfg.errorlog || cfg.xerrorlog
2133 || cfg.offlinests || cfg.selfteststs
2134 || cfg.usagefailed || cfg.prefail || cfg.usage
2135 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2136 CloseDevice(atadev, name);
2137 return 3;
2138 }
2139
2140 // tell user we are registering device
2141 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2142
2143 // close file descriptor
2144 CloseDevice(atadev, name);
2145
2146 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2147 // Build file name for state file
2148 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2149 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2150 if (!state_path_prefix.empty()) {
2151 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2152 // Read previous state
2153 if (read_dev_state(cfg.state_file.c_str(), state)) {
2154 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2155 // Copy ATA attribute values to temp state
2156 state.update_temp_state();
2157 }
2158 }
2159 if (!attrlog_path_prefix.empty())
2160 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2161 }
2162
2163 finish_device_scan(cfg, state);
2164
2165 return 0;
2166 }
2167
2168 // on success, return 0. On failure, return >0. Never return <0,
2169 // please.
2170 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2171 {
2172 int err, req_len, avail_len, version, len;
2173 const char *device = cfg.name.c_str();
2174 struct scsi_iec_mode_page iec;
2175 UINT8 tBuf[64];
2176 UINT8 inqBuf[96];
2177 UINT8 vpdBuf[252];
2178 char lu_id[64], serial[256], vendor[40], model[40];
2179
2180 // Device must be open
2181 memset(inqBuf, 0, 96);
2182 req_len = 36;
2183 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2184 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2185 req_len = 64;
2186 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2187 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2188 "skip device\n", device);
2189 return 2;
2190 }
2191 }
2192 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2193
2194 avail_len = inqBuf[4] + 5;
2195 len = (avail_len < req_len) ? avail_len : req_len;
2196 if (len < 36) {
2197 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2198 "skip device\n", device);
2199 return 2;
2200 }
2201
2202 int pdt = inqBuf[0] & 0x1f;
2203
2204 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2205 (0xe == pdt))) {
2206 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2207 "skip\n", device, pdt);
2208 return 2;
2209 }
2210
2211 if (supported_vpd_pages_p) {
2212 delete supported_vpd_pages_p;
2213 supported_vpd_pages_p = NULL;
2214 }
2215 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2216
2217 lu_id[0] = '\0';
2218 if ((version >= 0x3) && (version < 0x8)) {
2219 /* SPC to SPC-5 */
2220 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2221 vpdBuf, sizeof(vpdBuf))) {
2222 len = vpdBuf[3];
2223 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2224 }
2225 }
2226 serial[0] = '\0';
2227 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2228 vpdBuf, sizeof(vpdBuf))) {
2229 len = vpdBuf[3];
2230 vpdBuf[4 + len] = '\0';
2231 scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2232 }
2233
2234 unsigned int lb_size;
2235 char si_str[64];
2236 uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
2237
2238 if (capacity)
2239 format_capacity(si_str, sizeof(si_str), capacity);
2240 else
2241 si_str[0] = '\0';
2242
2243 // Format device id string for warning emails
2244 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2245 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2246 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2247 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2248 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2249
2250 // format "model" string
2251 scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2252 scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2253 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2254
2255 // check that device is ready for commands. IE stores its stuff on
2256 // the media.
2257 if ((err = scsiTestUnitReady(scsidev))) {
2258 if (SIMPLE_ERR_NOT_READY == err)
2259 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2260 else if (SIMPLE_ERR_NO_MEDIUM == err)
2261 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2262 else if (SIMPLE_ERR_BECOMING_READY == err)
2263 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2264 else
2265 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2266 CloseDevice(scsidev, device);
2267 return 2;
2268 }
2269
2270 // Badly-conforming USB storage devices may fail this check.
2271 // The response to the following IE mode page fetch (current and
2272 // changeable values) is carefully examined. It has been found
2273 // that various USB devices that malform the response will lock up
2274 // if asked for a log page (e.g. temperature) so it is best to
2275 // bail out now.
2276 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2277 state.modese_len = iec.modese_len;
2278 else if (SIMPLE_ERR_BAD_FIELD == err)
2279 ; /* continue since it is reasonable not to support IE mpage */
2280 else { /* any other error (including malformed response) unreasonable */
2281 PrintOut(LOG_INFO,
2282 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2283 device, err);
2284 CloseDevice(scsidev, device);
2285 return 3;
2286 }
2287
2288 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2289 // smart if it is off). This may change to be the same as the ATA side.
2290 if (!scsi_IsExceptionControlEnabled(&iec)) {
2291 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2292 "Try 'smartctl -s on %s' to turn on SMART features\n",
2293 device, device);
2294 CloseDevice(scsidev, device);
2295 return 3;
2296 }
2297
2298 // Flag that certain log pages are supported (information may be
2299 // available from other sources).
2300 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2301 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2302 switch (tBuf[k]) {
2303 case TEMPERATURE_LPAGE:
2304 state.TempPageSupported = 1;
2305 break;
2306 case IE_LPAGE:
2307 state.SmartPageSupported = 1;
2308 break;
2309 case READ_ERROR_COUNTER_LPAGE:
2310 state.ReadECounterPageSupported = 1;
2311 break;
2312 case WRITE_ERROR_COUNTER_LPAGE:
2313 state.WriteECounterPageSupported = 1;
2314 break;
2315 case VERIFY_ERROR_COUNTER_LPAGE:
2316 state.VerifyECounterPageSupported = 1;
2317 break;
2318 case NON_MEDIUM_ERROR_LPAGE:
2319 state.NonMediumErrorPageSupported = 1;
2320 break;
2321 default:
2322 break;
2323 }
2324 }
2325 }
2326
2327 // Check if scsiCheckIE() is going to work
2328 {
2329 UINT8 asc = 0;
2330 UINT8 ascq = 0;
2331 UINT8 currenttemp = 0;
2332 UINT8 triptemp = 0;
2333
2334 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2335 &asc, &ascq, &currenttemp, &triptemp)) {
2336 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2337 state.SuppressReport = 1;
2338 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2339 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2340 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2341 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2342 }
2343 }
2344 }
2345
2346 // capability check: self-test-log
2347 if (cfg.selftest){
2348 int retval = scsiCountFailedSelfTests(scsidev, 0);
2349 if (retval<0) {
2350 // no self-test log, turn off monitoring
2351 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2352 cfg.selftest = false;
2353 state.selflogcount = 0;
2354 state.selfloghour = 0;
2355 }
2356 else {
2357 // register starting values to watch for changes
2358 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2359 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2360 }
2361 }
2362
2363 // disable autosave (set GLTSD bit)
2364 if (cfg.autosave==1){
2365 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2366 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2367 else
2368 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2369 }
2370
2371 // or enable autosave (clear GLTSD bit)
2372 if (cfg.autosave==2){
2373 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2374 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2375 else
2376 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2377 }
2378
2379 // tell user we are registering device
2380 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2381
2382 // Make sure that init_standby_check() ignores SCSI devices
2383 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2384
2385 // close file descriptor
2386 CloseDevice(scsidev, device);
2387
2388 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2389 // Build file name for state file
2390 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2391 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2392 if (!state_path_prefix.empty()) {
2393 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2394 // Read previous state
2395 if (read_dev_state(cfg.state_file.c_str(), state)) {
2396 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2397 // Copy ATA attribute values to temp state
2398 state.update_temp_state();
2399 }
2400 }
2401 if (!attrlog_path_prefix.empty())
2402 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2403 }
2404
2405 finish_device_scan(cfg, state);
2406
2407 return 0;
2408 }
2409
2410 // If the self-test log has got more self-test errors (or more recent
2411 // self-test errors) recorded, then notify user.
2412 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2413 {
2414 const char * name = cfg.name.c_str();
2415
2416 if (newi<0)
2417 // command failed
2418 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2419 else {
2420 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2421
2422 // old and new error counts
2423 int oldc=state.selflogcount;
2424 int newc=SELFTEST_ERRORCOUNT(newi);
2425
2426 // old and new error timestamps in hours
2427 int oldh=state.selfloghour;
2428 int newh=SELFTEST_ERRORHOURS(newi);
2429
2430 if (oldc<newc) {
2431 // increase in error count
2432 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2433 name, oldc, newc);
2434 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2435 name, oldc, newc);
2436 state.must_write = true;
2437 }
2438 else if (newc > 0 && oldh != newh) {
2439 // more recent error
2440 // a 'more recent' error might actually be a smaller hour number,
2441 // if the hour number has wrapped.
2442 // There's still a bug here. You might just happen to run a new test
2443 // exactly 32768 hours after the previous failure, and have run exactly
2444 // 20 tests between the two, in which case smartd will miss the
2445 // new failure.
2446 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2447 name, newh);
2448 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2449 name, newh);
2450 state.must_write = true;
2451 }
2452
2453 // Print info if error entries have disappeared
2454 // or newer successful successful extended self-test exits
2455 if (oldc > newc) {
2456 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2457 name, oldc, newc);
2458 if (newc == 0)
2459 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2460 }
2461
2462 // Needed since self-test error count may DECREASE. Hour might
2463 // also have changed.
2464 state.selflogcount= newc;
2465 state.selfloghour = newh;
2466 }
2467 return;
2468 }
2469
2470 // Test types, ordered by priority.
2471 static const char test_type_chars[] = "LncrSCO";
2472 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2473
2474 // returns test type if time to do test of type testtype,
2475 // 0 if not time to do test.
2476 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2477 {
2478 // check that self-testing has been requested
2479 if (cfg.test_regex.empty())
2480 return 0;
2481
2482 // Exit if drive not capable of any test
2483 if ( state.not_cap_long && state.not_cap_short &&
2484 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2485 return 0;
2486
2487 // since we are about to call localtime(), be sure glibc is informed
2488 // of any timezone changes we make.
2489 if (!usetime)
2490 FixGlibcTimeZoneBug();
2491
2492 // Is it time for next check?
2493 time_t now = (!usetime ? time(0) : usetime);
2494 if (now < state.scheduled_test_next_check)
2495 return 0;
2496
2497 // Limit time check interval to 90 days
2498 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2499 state.scheduled_test_next_check = now - (3600L*24*90);
2500
2501 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2502 char testtype = 0;
2503 time_t testtime = 0; int testhour = 0;
2504 int maxtest = num_test_types-1;
2505
2506 for (time_t t = state.scheduled_test_next_check; ; ) {
2507 struct tm * tms = localtime(&t);
2508 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2509 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2510 for (int i = 0; i <= maxtest; i++) {
2511 // Skip if drive not capable of this test
2512 switch (test_type_chars[i]) {
2513 case 'L': if (state.not_cap_long) continue; break;
2514 case 'S': if (state.not_cap_short) continue; break;
2515 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2516 case 'O': if (scsi || state.not_cap_offline) continue; break;
2517 case 'c': case 'n':
2518 case 'r': if (scsi || state.not_cap_selective) continue; break;
2519 default: continue;
2520 }
2521 // Try match of "T/MM/DD/d/HH"
2522 char pattern[16];
2523 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2524 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2525 if (cfg.test_regex.full_match(pattern)) {
2526 // Test found
2527 testtype = pattern[0];
2528 testtime = t; testhour = tms->tm_hour;
2529 // Limit further matches to higher priority self-tests
2530 maxtest = i-1;
2531 break;
2532 }
2533 }
2534 // Exit if no tests left or current time reached
2535 if (maxtest < 0)
2536 break;
2537 if (t >= now)
2538 break;
2539 // Check next hour
2540 if ((t += 3600) > now)
2541 t = now;
2542 }
2543
2544 // Do next check not before next hour.
2545 struct tm * tmnow = localtime(&now);
2546 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2547
2548 if (testtype) {
2549 state.must_write = true;
2550 // Tell user if an old test was found.
2551 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2552 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2553 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2554 cfg.name.c_str(), testtype, datebuf);
2555 }
2556 }
2557
2558 return testtype;
2559 }
2560
2561 // Print a list of future tests.
2562 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2563 {
2564 unsigned numdev = configs.size();
2565 if (!numdev)
2566 return;
2567 std::vector<int> testcnts(numdev * num_test_types, 0);
2568
2569 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2570
2571 // FixGlibcTimeZoneBug(); // done in PrintOut()
2572 time_t now = time(0);
2573 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2574 dateandtimezoneepoch(datenow, now);
2575
2576 long seconds;
2577 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2578 // Check for each device whether a test will be run
2579 time_t testtime = now + seconds;
2580 for (unsigned i = 0; i < numdev; i++) {
2581 const dev_config & cfg = configs.at(i);
2582 dev_state & state = states.at(i);
2583 const char * p;
2584 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2585 if (testtype && (p = strchr(test_type_chars, testtype))) {
2586 unsigned t = (p - test_type_chars);
2587 // Report at most 5 tests of each type
2588 if (++testcnts[i*num_test_types + t] <= 5) {
2589 dateandtimezoneepoch(date, testtime);
2590 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2591 testcnts[i*num_test_types + t], testtype, date);
2592 }
2593 }
2594 }
2595 }
2596
2597 // Report totals
2598 dateandtimezoneepoch(date, now+seconds);
2599 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2600 for (unsigned i = 0; i < numdev; i++) {
2601 const dev_config & cfg = configs.at(i);
2602 bool scsi = devices.at(i)->is_scsi();
2603 for (unsigned t = 0; t < num_test_types; t++) {
2604 int cnt = testcnts[i*num_test_types + t];
2605 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2606 continue;
2607 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2608 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2609 }
2610 }
2611
2612 }
2613
2614 // Return zero on success, nonzero on failure. Perform offline (background)
2615 // short or long (extended) self test on given scsi device.
2616 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2617 {
2618 int retval = 0;
2619 const char *testname = 0;
2620 const char *name = cfg.name.c_str();
2621 int inProgress;
2622
2623 if (scsiSelfTestInProgress(device, &inProgress)) {
2624 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2625 state.not_cap_short = state.not_cap_long = true;
2626 return 1;
2627 }
2628
2629 if (1 == inProgress) {
2630 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2631 "progress.\n", name);
2632 return 1;
2633 }
2634
2635 switch (testtype) {
2636 case 'S':
2637 testname = "Short Self";
2638 retval = scsiSmartShortSelfTest(device);
2639 break;
2640 case 'L':
2641 testname = "Long Self";
2642 retval = scsiSmartExtendSelfTest(device);
2643 break;
2644 }
2645 // If we can't do the test, exit
2646 if (NULL == testname) {
2647 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2648 testtype);
2649 return 1;
2650 }
2651 if (retval) {
2652 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2653 (SIMPLE_ERR_BAD_FIELD == retval)) {
2654 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2655 testname);
2656 if ('L'==testtype)
2657 state.not_cap_long = true;
2658 else
2659 state.not_cap_short = true;
2660
2661 return 1;
2662 }
2663 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2664 testname, retval);
2665 return 1;
2666 }
2667
2668 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2669
2670 return 0;
2671 }
2672
2673 // Do an offline immediate or self-test. Return zero on success,
2674 // nonzero on failure.
2675 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2676 {
2677 const char *name = cfg.name.c_str();
2678
2679 // Read current smart data and check status/capability
2680 struct ata_smart_values data;
2681 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2682 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2683 return 1;
2684 }
2685
2686 // Check for capability to do the test
2687 int dotest = -1, mode = 0;
2688 const char *testname = 0;
2689 switch (testtype) {
2690 case 'O':
2691 testname="Offline Immediate ";
2692 if (isSupportExecuteOfflineImmediate(&data))
2693 dotest=OFFLINE_FULL_SCAN;
2694 else
2695 state.not_cap_offline = true;
2696 break;
2697 case 'C':
2698 testname="Conveyance Self-";
2699 if (isSupportConveyanceSelfTest(&data))
2700 dotest=CONVEYANCE_SELF_TEST;
2701 else
2702 state.not_cap_conveyance = true;
2703 break;
2704 case 'S':
2705 testname="Short Self-";
2706 if (isSupportSelfTest(&data))
2707 dotest=SHORT_SELF_TEST;
2708 else
2709 state.not_cap_short = true;
2710 break;
2711 case 'L':
2712 testname="Long Self-";
2713 if (isSupportSelfTest(&data))
2714 dotest=EXTEND_SELF_TEST;
2715 else
2716 state.not_cap_long = true;
2717 break;
2718
2719 case 'c': case 'n': case 'r':
2720 testname = "Selective Self-";
2721 if (isSupportSelectiveSelfTest(&data)) {
2722 dotest = SELECTIVE_SELF_TEST;
2723 switch (testtype) {
2724 case 'c': mode = SEL_CONT; break;
2725 case 'n': mode = SEL_NEXT; break;
2726 case 'r': mode = SEL_REDO; break;
2727 }
2728 }
2729 else
2730 state.not_cap_selective = true;
2731 break;
2732 }
2733
2734 // If we can't do the test, exit
2735 if (dotest<0) {
2736 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2737 return 1;
2738 }
2739
2740 // If currently running a self-test, do not interrupt it to start another.
2741 if (15==(data.self_test_exec_status >> 4)) {
2742 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
2743 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2744 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2745 } else {
2746 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2747 name, testname, (int)(data.self_test_exec_status & 0x0f));
2748 return 1;
2749 }
2750 }
2751
2752 if (dotest == SELECTIVE_SELF_TEST) {
2753 // Set test span
2754 ata_selective_selftest_args selargs, prev_args;
2755 selargs.num_spans = 1;
2756 selargs.span[0].mode = mode;
2757 prev_args.num_spans = 1;
2758 prev_args.span[0].start = state.selective_test_last_start;
2759 prev_args.span[0].end = state.selective_test_last_end;
2760 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2761 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2762 return 1;
2763 }
2764 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2765 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
2766 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2767 start, end, end - start + 1,
2768 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2769 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2770 state.selective_test_last_start = start;
2771 state.selective_test_last_end = end;
2772 }
2773
2774 // execute the test, and return status
2775 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2776 if (retval) {
2777 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2778 return retval;
2779 }
2780
2781 // Report recent test start to do_disable_standby_check()
2782 // and force log of next test status
2783 if (testtype == 'O')
2784 state.offline_started = true;
2785 else
2786 state.selftest_started = true;
2787
2788 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2789 return 0;
2790 }
2791
2792 // Check pending sector count attribute values (-C, -U directives).
2793 static void check_pending(const dev_config & cfg, dev_state & state,
2794 unsigned char id, bool increase_only,
2795 const ata_smart_values & smartval,
2796 int mailtype, const char * msg)
2797 {
2798 // Find attribute index
2799 int i = ata_find_attr_index(id, smartval);
2800 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2801 return;
2802
2803 // No report if no sectors pending.
2804 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2805 if (rawval == 0) {
2806 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2807 return;
2808 }
2809
2810 // If attribute is not reset, report only sector count increases.
2811 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2812 if (!(!increase_only || prev_rawval < rawval))
2813 return;
2814
2815 // Format message.
2816 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
2817 if (prev_rawval > 0 && rawval != prev_rawval)
2818 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
2819
2820 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2821 MailWarning(cfg, state, mailtype, "%s", s.c_str());
2822 state.must_write = true;
2823 }
2824
2825 // Format Temperature value
2826 static const char * fmt_temp(unsigned char x, char (& buf)[20])
2827 {
2828 if (!x) // unset
2829 return "??";
2830 snprintf(buf, sizeof(buf), "%u", x);
2831 return buf;
2832 }
2833
2834 // Check Temperature limits
2835 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2836 {
2837 if (!(0 < currtemp && currtemp < 255)) {
2838 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2839 return;
2840 }
2841
2842 // Update Max Temperature
2843 const char * minchg = "", * maxchg = "";
2844 if (currtemp > state.tempmax) {
2845 if (state.tempmax)
2846 maxchg = "!";
2847 state.tempmax = currtemp;
2848 state.must_write = true;
2849 }
2850
2851 char buf[20];
2852 if (!state.temperature) {
2853 // First check
2854 if (!state.tempmin || currtemp < state.tempmin)
2855 // Delay Min Temperature update by ~ 30 minutes.
2856 state.tempmin_delay = time(0) + CHECKTIME - 60;
2857 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2858 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2859 if (triptemp)
2860 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2861 state.temperature = currtemp;
2862 }
2863 else {
2864 if (state.tempmin_delay) {
2865 // End Min Temperature update delay if ...
2866 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2867 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2868 state.tempmin_delay = 0;
2869 if (!state.tempmin)
2870 state.tempmin = 255;
2871 }
2872 }
2873
2874 // Update Min Temperature
2875 if (!state.tempmin_delay && currtemp < state.tempmin) {
2876 state.tempmin = currtemp;
2877 state.must_write = true;
2878 if (currtemp != state.temperature)
2879 minchg = "!";
2880 }
2881
2882 // Track changes
2883 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2884 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2885 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2886 state.temperature = currtemp;
2887 }
2888 }
2889
2890 // Check limits
2891 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2892 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2893 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2894 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
2895 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2896 }
2897 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2898 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2899 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2900 }
2901 else if (cfg.tempcrit) {
2902 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
2903 if (currtemp < limit)
2904 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
2905 }
2906 }
2907
2908 // Check normalized and raw attribute values.
2909 static void check_attribute(const dev_config & cfg, dev_state & state,
2910 const ata_smart_attribute & attr,
2911 const ata_smart_attribute & prev,
2912 int attridx,
2913 const ata_smart_threshold_entry * thresholds)
2914 {
2915 // Check attribute and threshold
2916 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2917 if (attrstate == ATTRSTATE_NON_EXISTING)
2918 return;
2919
2920 // If requested, check for usage attributes that have failed.
2921 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2922 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2923 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
2924 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2925 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2926 state.must_write = true;
2927 }
2928
2929 // Return if we're not tracking this type of attribute
2930 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2931 if (!( ( prefail && cfg.prefail)
2932 || (!prefail && cfg.usage )))
2933 return;
2934
2935 // Return if '-I ID' was specified
2936 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2937 return;
2938
2939 // Issue warning if they don't have the same ID in all structures.
2940 if (attr.id != prev.id) {
2941 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2942 cfg.name.c_str(), attr.id, prev.id);
2943 return;
2944 }
2945
2946 // Compare normalized values if valid.
2947 bool valchanged = false;
2948 if (attrstate > ATTRSTATE_NO_NORMVAL) {
2949 if (attr.current != prev.current)
2950 valchanged = true;
2951 }
2952
2953 // Compare raw values if requested.
2954 bool rawchanged = false;
2955 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2956 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2957 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2958 rawchanged = true;
2959 }
2960
2961 // Return if no change
2962 if (!(valchanged || rawchanged))
2963 return;
2964
2965 // Format value strings
2966 std::string currstr, prevstr;
2967 if (attrstate == ATTRSTATE_NO_NORMVAL) {
2968 // Print raw values only
2969 currstr = strprintf("%s (Raw)",
2970 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2971 prevstr = strprintf("%s (Raw)",
2972 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2973 }
2974 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2975 // Print normalized and raw values
2976 currstr = strprintf("%d [Raw %s]", attr.current,
2977 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2978 prevstr = strprintf("%d [Raw %s]", prev.current,
2979 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2980 }
2981 else {
2982 // Print normalized values only
2983 currstr = strprintf("%d", attr.current);
2984 prevstr = strprintf("%d", prev.current);
2985 }
2986
2987 // Format message
2988 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2989 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2990 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
2991 prevstr.c_str(), currstr.c_str());
2992
2993 // Report this change as critical ?
2994 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2995 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2996 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2997 MailWarning(cfg, state, 2, "%s", msg.c_str());
2998 }
2999 else {
3000 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3001 }
3002 state.must_write = true;
3003 }
3004
3005
3006 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3007 bool firstpass, bool allow_selftests)
3008 {
3009 const char * name = cfg.name.c_str();
3010
3011 // If user has asked, test the email warning system
3012 if (cfg.emailtest)
3013 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3014
3015 // if we can't open device, fail gracefully rather than hard --
3016 // perhaps the next time around we'll be able to open it. ATAPI
3017 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3018 // given (see linux cdrom driver).
3019 if (!atadev->open()) {
3020 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3021 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3022 return 1;
3023 }
3024 if (debugmode)
3025 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3026 reset_warning_mail(cfg, state, 9, "open device worked again");
3027
3028 // user may have requested (with the -n Directive) to leave the disk
3029 // alone if it is in idle or sleeping mode. In this case check the
3030 // power mode and exit without check if needed
3031 if (cfg.powermode && !state.powermodefail) {
3032 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3033 const char * mode = 0;
3034 if (0 <= powermode && powermode < 0xff) {
3035 // wait for possible spin up and check again
3036 int powermode2;
3037 sleep(5);
3038 powermode2 = ataCheckPowerMode(atadev);
3039 if (powermode2 > powermode)
3040 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3041 powermode = powermode2;
3042 }
3043
3044 switch (powermode){
3045 case -1:
3046 // SLEEP
3047 mode="SLEEP";
3048 if (cfg.powermode>=1)
3049 dontcheck=1;
3050 break;
3051 case 0:
3052 // STANDBY
3053 mode="STANDBY";
3054 if (cfg.powermode>=2)
3055 dontcheck=1;
3056 break;
3057 case 0x80:
3058 // IDLE
3059 mode="IDLE";
3060 if (cfg.powermode>=3)
3061 dontcheck=1;
3062 break;
3063 case 0xff:
3064 // ACTIVE/IDLE
3065 mode="ACTIVE or IDLE";
3066 break;
3067 default:
3068 // UNKNOWN
3069 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3070 name, powermode);
3071 state.powermodefail = true;
3072 break;
3073 }
3074
3075 // if we are going to skip a check, return now
3076 if (dontcheck){
3077 // skip at most powerskipmax checks
3078 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3079 CloseDevice(atadev, name);
3080 if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
3081 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3082 state.powerskipcnt++;
3083 return 0;
3084 }
3085 else {
3086 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3087 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3088 }
3089 state.powerskipcnt = 0;
3090 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3091 }
3092 else if (state.powerskipcnt) {
3093 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3094 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3095 state.powerskipcnt = 0;
3096 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3097 }
3098 }
3099
3100 // check smart status
3101 if (cfg.smartcheck) {
3102 int status=ataSmartStatus2(atadev);
3103 if (status==-1){
3104 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3105 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3106 state.must_write = true;
3107 }
3108 else if (status==1){
3109 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3110 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3111 state.must_write = true;
3112 }
3113 }
3114
3115 // Check everything that depends upon SMART Data (eg, Attribute values)
3116 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3117 || cfg.curr_pending_id || cfg.offl_pending_id
3118 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3119 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3120
3121 // Read current attribute values.
3122 ata_smart_values curval;
3123 if (ataReadSmartValues(atadev, &curval)){
3124 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3125 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3126 state.must_write = true;
3127 }
3128 else {
3129 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3130
3131 // look for current or offline pending sectors
3132 if (cfg.curr_pending_id)
3133 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3134 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3135 : "Total unreadable (pending) sectors" ));
3136
3137 if (cfg.offl_pending_id)
3138 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3139 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3140 : "Total offline uncorrectable sectors"));
3141
3142 // check temperature limits
3143 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3144 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3145
3146 // look for failed usage attributes, or track usage or prefail attributes
3147 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3148 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3149 check_attribute(cfg, state,
3150 curval.vendor_attributes[i],
3151 state.smartval.vendor_attributes[i],
3152 i, state.smartthres.thres_entries);
3153 }
3154 }
3155
3156 // Log changes of offline data collection status
3157 if (cfg.offlinests) {
3158 if ( curval.offline_data_collection_status
3159 != state.smartval.offline_data_collection_status
3160 || state.offline_started // test was started in previous call
3161 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3162 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3163 }
3164
3165 // Log changes of self-test execution status
3166 if (cfg.selfteststs) {
3167 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3168 || state.selftest_started // test was started in previous call
3169 || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3170 log_self_test_exec_status(name, curval.self_test_exec_status);
3171 }
3172
3173 // Save the new values for the next time around
3174 state.smartval = curval;
3175 }
3176 }
3177 state.offline_started = state.selftest_started = false;
3178
3179 // check if number of selftest errors has increased (note: may also DECREASE)
3180 if (cfg.selftest)
3181 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3182
3183 // check if number of ATA errors has increased
3184 if (cfg.errorlog || cfg.xerrorlog) {
3185
3186 int errcnt1 = -1, errcnt2 = -1;
3187 if (cfg.errorlog)
3188 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3189 if (cfg.xerrorlog)
3190 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3191
3192 // new number of errors is max of both logs
3193 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3194
3195 // did command fail?
3196 if (newc<0)
3197 // lack of PrintOut here is INTENTIONAL
3198 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3199
3200 // has error count increased?
3201 int oldc = state.ataerrorcount;
3202 if (newc>oldc){
3203 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3204 name, oldc, newc);
3205 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3206 name, oldc, newc);
3207 state.must_write = true;
3208 }
3209
3210 if (newc>=0)
3211 state.ataerrorcount=newc;
3212 }
3213
3214 // if the user has asked, and device is capable (or we're not yet
3215 // sure) check whether a self test should be done now.
3216 if (allow_selftests && !cfg.test_regex.empty()) {
3217 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3218 if (testtype)
3219 DoATASelfTest(cfg, state, atadev, testtype);
3220 }
3221
3222 // Don't leave device open -- the OS/user may want to access it
3223 // before the next smartd cycle!
3224 CloseDevice(atadev, name);
3225
3226 // Copy ATA attribute values to persistent state
3227 state.update_persistent_state();
3228
3229 return 0;
3230 }
3231
3232 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3233 {
3234 const char * name = cfg.name.c_str();
3235
3236 // If the user has asked for it, test the email warning system
3237 if (cfg.emailtest)
3238 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3239
3240 // if we can't open device, fail gracefully rather than hard --
3241 // perhaps the next time around we'll be able to open it
3242 if (!scsidev->open()) {
3243 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3244 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3245 return 1;
3246 } else if (debugmode)
3247 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3248 reset_warning_mail(cfg, state, 9, "open device worked again");
3249
3250 UINT8 asc = 0, ascq = 0;
3251 UINT8 currenttemp = 0, triptemp = 0;
3252 if (!state.SuppressReport) {
3253 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3254 &asc, &ascq, &currenttemp, &triptemp)) {
3255 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3256 name);
3257 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3258 state.SuppressReport = 1;
3259 }
3260 }
3261 if (asc > 0) {
3262 const char * cp = scsiGetIEString(asc, ascq);
3263 if (cp) {
3264 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3265 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3266 } else if (asc == 4 && ascq == 9) {
3267 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3268 } else if (debugmode)
3269 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3270 name, (int)asc, (int)ascq);
3271 } else if (debugmode)
3272 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3273
3274 // check temperature limits
3275 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || !cfg.attrlog_file.empty())
3276 CheckTemperature(cfg, state, currenttemp, triptemp);
3277
3278 // check if number of selftest errors has increased (note: may also DECREASE)
3279 if (cfg.selftest)
3280 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3281
3282 if (allow_selftests && !cfg.test_regex.empty()) {
3283 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3284 if (testtype)
3285 DoSCSISelfTest(cfg, state, scsidev, testtype);
3286 }
3287 if (!cfg.attrlog_file.empty()){
3288 // saving error counters to state
3289 UINT8 tBuf[252];
3290 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3291 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3292 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3293 state.scsi_error_counters[0].found=1;
3294 }
3295 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3296 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3297 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3298 state.scsi_error_counters[1].found=1;
3299 }
3300 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3301 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3302 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3303 state.scsi_error_counters[2].found=1;
3304 }
3305 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3306 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3307 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3308 state.scsi_nonmedium_error.found=1;
3309 }
3310 }
3311 CloseDevice(scsidev, name);
3312 return 0;
3313 }
3314
3315 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3316 static int standby_disable_state = 0;
3317
3318 static void init_disable_standby_check(dev_config_vector & configs)
3319 {
3320 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3321 bool sts1 = false, sts2 = false;
3322 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3323 const dev_config & cfg = configs.at(i);
3324 if (cfg.offlinests_ns)
3325 sts1 = true;
3326 if (cfg.selfteststs_ns)
3327 sts2 = true;
3328 }
3329
3330 // Check for support of disable auto standby
3331 // Reenable standby if smartd.conf was reread
3332 if (sts1 || sts2 || standby_disable_state == 3) {
3333 if (!smi()->disable_system_auto_standby(false)) {
3334 if (standby_disable_state == 3)
3335 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3336 if (sts1 || sts2) {
3337 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3338 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3339 sts1 = sts2 = false;
3340 }
3341 }
3342 }
3343
3344 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3345 }
3346
3347 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3348 {
3349 if (!standby_disable_state)
3350 return;
3351
3352 // Check for just started or still running self-tests
3353 bool running = false;
3354 for (unsigned i = 0; i < configs.size() && !running; i++) {
3355 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3356
3357 if ( ( cfg.offlinests_ns
3358 && (state.offline_started ||
3359 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3360 || ( cfg.selfteststs_ns
3361 && (state.selftest_started ||
3362 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3363 running = true;
3364 // state.offline/selftest_started will be reset after next logging of test status
3365 }
3366
3367 // Disable/enable auto standby and log state changes
3368 if (!running) {
3369 if (standby_disable_state != 1) {
3370 if (!smi()->disable_system_auto_standby(false))
3371 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3372 smi()->get_errmsg());
3373 else
3374 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3375 standby_disable_state = 1;
3376 }
3377 }
3378 else if (!smi()->disable_system_auto_standby(true)) {
3379 if (standby_disable_state != 2) {
3380 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3381 smi()->get_errmsg());
3382 standby_disable_state = 2;
3383 }
3384 }
3385 else {
3386 if (standby_disable_state != 3) {
3387 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3388 standby_disable_state = 3;
3389 }
3390 }
3391 }
3392
3393 // Checks the SMART status of all ATA and SCSI devices
3394 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3395 smart_device_list & devices, bool firstpass, bool allow_selftests)
3396 {
3397 for (unsigned i = 0; i < configs.size(); i++) {
3398 const dev_config & cfg = configs.at(i);
3399 dev_state & state = states.at(i);
3400 smart_device * dev = devices.at(i);
3401 if (dev->is_ata())
3402 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3403 else if (dev->is_scsi())
3404 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3405 }
3406
3407 do_disable_standby_check(configs, states);
3408 }
3409
3410 // Set if Initialize() was called
3411 static bool is_initialized = false;
3412
3413 // Does initialization right after fork to daemon mode
3414 static void Initialize(time_t *wakeuptime)
3415 {
3416 // Call Goodbye() on exit
3417 is_initialized = true;
3418
3419 // write PID file
3420 if (!debugmode)
3421 WritePidFile();
3422
3423 // install signal handlers. On Solaris, can't use signal() because
3424 // it resets the handler to SIG_DFL after each call. So use sigset()
3425 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3426
3427 // normal and abnormal exit
3428 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3429 SIGNALFN(SIGTERM, SIG_IGN);
3430 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3431 SIGNALFN(SIGQUIT, SIG_IGN);
3432
3433 // in debug mode, <CONTROL-C> ==> HUP
3434 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3435 SIGNALFN(SIGINT, SIG_IGN);
3436
3437 // Catch HUP and USR1
3438 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3439 SIGNALFN(SIGHUP, SIG_IGN);
3440 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3441 SIGNALFN(SIGUSR1, SIG_IGN);
3442 #ifdef _WIN32
3443 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3444 SIGNALFN(SIGUSR2, SIG_IGN);
3445 #endif
3446
3447 // initialize wakeup time to CURRENT time
3448 *wakeuptime=time(NULL);
3449
3450 return;
3451 }
3452
3453 #ifdef _WIN32
3454 // Toggle debug mode implemented for native windows only
3455 // (there is no easy way to reopen tty on *nix)
3456 static void ToggleDebugMode()
3457 {
3458 if (!debugmode) {
3459 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3460 if (!daemon_enable_console("smartd [Debug]")) {
3461 debugmode = 1;
3462 daemon_signal(SIGINT, HUPhandler);
3463 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3464 }
3465 else
3466 PrintOut(LOG_INFO,"enable console failed\n");
3467 }
3468 else if (debugmode == 1) {
3469 daemon_disable_console();
3470 debugmode = 0;
3471 daemon_signal(SIGINT, sighandler);
3472 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3473 }
3474 else
3475 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3476 }
3477 #endif
3478
3479 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3480 {
3481 // If past wake-up-time, compute next wake-up-time
3482 time_t timenow=time(NULL);
3483 while (wakeuptime<=timenow){
3484 int intervals=1+(timenow-wakeuptime)/checktime;
3485 wakeuptime+=intervals*checktime;
3486 }
3487
3488 // sleep until we catch SIGUSR1 or have completed sleeping
3489 int addtime = 0;
3490 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3491
3492 // protect user again system clock being adjusted backwards
3493 if (wakeuptime>timenow+checktime){
3494 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3495 wakeuptime=timenow+checktime;
3496 }
3497
3498 // Exit sleep when time interval has expired or a signal is received
3499 sleep(wakeuptime+addtime-timenow);
3500
3501 #ifdef _WIN32
3502 // toggle debug mode?
3503 if (caughtsigUSR2) {
3504 ToggleDebugMode();
3505 caughtsigUSR2 = 0;
3506 }
3507 #endif
3508
3509 timenow=time(NULL);
3510
3511 // Actual sleep time too long?
3512 if (!addtime && timenow > wakeuptime+60) {
3513 if (debugmode)
3514 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3515 (int)(timenow-wakeuptime));
3516 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3517 addtime = timenow-wakeuptime+20;
3518 // Use next wake-up-time if close
3519 int nextcheck = checktime - addtime % checktime;
3520 if (nextcheck <= 20)
3521 addtime += nextcheck;
3522 }
3523 }
3524
3525 // if we caught a SIGUSR1 then print message and clear signal
3526 if (caughtsigUSR1){
3527 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3528 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3529 caughtsigUSR1=0;
3530 sigwakeup = true;
3531 }
3532
3533 // return adjusted wakeuptime
3534 return wakeuptime;
3535 }
3536
3537 // Print out a list of valid arguments for the Directive d
3538 static void printoutvaliddirectiveargs(int priority, char d)
3539 {
3540 switch (d) {
3541 case 'n':
3542 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3543 break;
3544 case 's':
3545 PrintOut(priority, "valid_regular_expression");
3546 break;
3547 case 'd':
3548 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3549 break;
3550 case 'T':
3551 PrintOut(priority, "normal, permissive");
3552 break;
3553 case 'o':
3554 case 'S':
3555 PrintOut(priority, "on, off");
3556 break;
3557 case 'l':
3558 PrintOut(priority, "error, selftest");
3559 break;
3560 case 'M':
3561 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3562 break;
3563 case 'v':
3564 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3565 break;
3566 case 'P':
3567 PrintOut(priority, "use, ignore, show, showall");
3568 break;
3569 case 'F':
3570 PrintOut(priority, "%s", get_valid_firmwarebug_args());
3571 break;
3572 case 'e':
3573 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3574 "security-freeze, standby,[N|off], wcache,[on|off]");
3575 break;
3576 }
3577 }
3578
3579 // exits with an error message, or returns integer value of token
3580 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3581 int min, int max, char * suffix = 0)
3582 {
3583 // make sure argument is there
3584 if (!arg) {
3585 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3586 cfgfile, lineno, name, token, min, max);
3587 return -1;
3588 }
3589
3590 // get argument value (base 10), check that it's integer, and in-range
3591 char *endptr;
3592 int val = strtol(arg,&endptr,10);
3593
3594 // optional suffix present?
3595 if (suffix) {
3596 if (!strcmp(endptr, suffix))
3597 endptr += strlen(suffix);
3598 else
3599 *suffix = 0;
3600 }
3601
3602 if (!(!*endptr && min <= val && val <= max)) {
3603 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3604 cfgfile, lineno, name, token, arg, min, max);
3605 return -1;
3606 }
3607
3608 // all is well; return value
3609 return val;
3610 }
3611
3612
3613 // Get 1-3 small integer(s) for '-W' directive
3614 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3615 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3616 {
3617 unsigned v1 = 0, v2 = 0, v3 = 0;
3618 int n1 = -1, n2 = -1, n3 = -1, len;
3619 if (!arg) {
3620 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3621 cfgfile, lineno, name, token);
3622 return -1;
3623 }
3624
3625 len = strlen(arg);
3626 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3627 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3628 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3629 cfgfile, lineno, name, token, arg);
3630 return -1;
3631 }
3632 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3633 return 0;
3634 }
3635
3636
3637 #ifdef _WIN32
3638
3639 // Concatenate strtok() results if quoted with "..."
3640 static const char * strtok_dequote(const char * delimiters)
3641 {
3642 const char * t = strtok(0, delimiters);
3643 if (!t || t[0] != '"')
3644 return t;
3645
3646 static std::string token;
3647 token = t+1;
3648 for (;;) {
3649 t = strtok(0, delimiters);
3650 if (!t || !*t)
3651 return "\"";
3652 token += ' ';
3653 int len = strlen(t);
3654 if (t[len-1] == '"') {
3655 token += std::string(t, len-1);
3656 break;
3657 }
3658 token += t;
3659 }
3660 return token.c_str();
3661 }
3662
3663 #endif // _WIN32
3664
3665
3666 // This function returns 1 if it has correctly parsed one token (and
3667 // any arguments), else zero if no tokens remain. It returns -1 if an
3668 // error was encountered.
3669 static int ParseToken(char * token, dev_config & cfg)
3670 {
3671 char sym;
3672 const char * name = cfg.name.c_str();
3673 int lineno=cfg.lineno;
3674 const char *delim = " \n\t";
3675 int badarg = 0;
3676 int missingarg = 0;
3677 const char *arg = 0;
3678
3679 // is the rest of the line a comment
3680 if (*token=='#')
3681 return 1;
3682
3683 // is the token not recognized?
3684 if (*token!='-' || strlen(token)!=2) {
3685 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3686 configfile, lineno, name, token);
3687 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3688 return -1;
3689 }
3690
3691 // token we will be parsing:
3692 sym=token[1];
3693
3694 // parse the token and swallow its argument
3695 int val;
3696 char plus[] = "+", excl[] = "!";
3697
3698 switch (sym) {
3699 case 'C':
3700 // monitor current pending sector count (default 197)
3701 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3702 return -1;
3703 cfg.curr_pending_id = (unsigned char)val;
3704 cfg.curr_pending_incr = (*plus == '+');
3705 cfg.curr_pending_set = true;
3706 break;
3707 case 'U':
3708 // monitor offline uncorrectable sectors (default 198)
3709 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3710 return -1;
3711 cfg.offl_pending_id = (unsigned char)val;
3712 cfg.offl_pending_incr = (*plus == '+');
3713 cfg.offl_pending_set = true;
3714 break;
3715 case 'T':
3716 // Set tolerance level for SMART command failures
3717 if ((arg = strtok(NULL, delim)) == NULL) {
3718 missingarg = 1;
3719 } else if (!strcmp(arg, "normal")) {
3720 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3721 // not on failure of an optional S.M.A.R.T. command.
3722 // This is the default so we don't need to actually do anything here.
3723 cfg.permissive = false;
3724 } else if (!strcmp(arg, "permissive")) {
3725 // Permissive mode; ignore errors from Mandatory SMART commands
3726 cfg.permissive = true;
3727 } else {
3728 badarg = 1;
3729 }
3730 break;
3731 case 'd':
3732 // specify the device type
3733 if ((arg = strtok(NULL, delim)) == NULL) {
3734 missingarg = 1;
3735 } else if (!strcmp(arg, "ignore")) {
3736 cfg.ignore = true;
3737 } else if (!strcmp(arg, "removable")) {
3738 cfg.removable = true;
3739 } else if (!strcmp(arg, "auto")) {
3740 cfg.dev_type = "";
3741 } else {
3742 cfg.dev_type = arg;
3743 }
3744 break;
3745 case 'F':
3746 // fix firmware bug
3747 if (!(arg = strtok(0, delim)))
3748 missingarg = 1;
3749 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
3750 badarg = 1;
3751 break;
3752 case 'H':
3753 // check SMART status
3754 cfg.smartcheck = true;
3755 break;
3756 case 'f':
3757 // check for failure of usage attributes
3758 cfg.usagefailed = true;
3759 break;
3760 case 't':
3761 // track changes in all vendor attributes
3762 cfg.prefail = true;
3763 cfg.usage = true;
3764 break;
3765 case 'p':
3766 // track changes in prefail vendor attributes
3767 cfg.prefail = true;
3768 break;
3769 case 'u':
3770 // track changes in usage vendor attributes
3771 cfg.usage = true;
3772 break;
3773 case 'l':
3774 // track changes in SMART logs
3775 if ((arg = strtok(NULL, delim)) == NULL) {
3776 missingarg = 1;
3777 } else if (!strcmp(arg, "selftest")) {
3778 // track changes in self-test log
3779 cfg.selftest = true;
3780 } else if (!strcmp(arg, "error")) {
3781 // track changes in ATA error log
3782 cfg.errorlog = true;
3783 } else if (!strcmp(arg, "xerror")) {
3784 // track changes in Extended Comprehensive SMART error log
3785 cfg.xerrorlog = true;
3786 } else if (!strcmp(arg, "offlinests")) {
3787 // track changes in offline data collection status
3788 cfg.offlinests = true;
3789 } else if (!strcmp(arg, "offlinests,ns")) {
3790 // track changes in offline data collection status, disable auto standby
3791 cfg.offlinests = cfg.offlinests_ns = true;
3792 } else if (!strcmp(arg, "selfteststs")) {
3793 // track changes in self-test execution status
3794 cfg.selfteststs = true;
3795 } else if (!strcmp(arg, "selfteststs,ns")) {
3796 // track changes in self-test execution status, disable auto standby
3797 cfg.selfteststs = cfg.selfteststs_ns = true;
3798 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3799 // set SCT Error Recovery Control
3800 unsigned rt = ~0, wt = ~0; int nc = -1;
3801 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3802 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3803 cfg.sct_erc_set = true;
3804 cfg.sct_erc_readtime = rt;
3805 cfg.sct_erc_writetime = wt;
3806 }
3807 else
3808 badarg = 1;
3809 } else {
3810 badarg = 1;
3811 }
3812 break;
3813 case 'a':
3814 // monitor everything
3815 cfg.smartcheck = true;
3816 cfg.prefail = true;
3817 cfg.usagefailed = true;
3818 cfg.usage = true;
3819 cfg.selftest = true;
3820 cfg.errorlog = true;
3821 cfg.selfteststs = true;
3822 break;
3823 case 'o':
3824 // automatic offline testing enable/disable
3825 if ((arg = strtok(NULL, delim)) == NULL) {
3826 missingarg = 1;
3827 } else if (!strcmp(arg, "on")) {
3828 cfg.autoofflinetest = 2;
3829 } else if (!strcmp(arg, "off")) {
3830 cfg.autoofflinetest = 1;
3831 } else {
3832 badarg = 1;
3833 }
3834 break;
3835 case 'n':
3836 // skip disk check if in idle or standby mode
3837 if (!(arg = strtok(NULL, delim)))
3838 missingarg = 1;
3839 else {
3840 char *endptr = NULL;
3841 char *next = strchr(const_cast<char*>(arg), ',');
3842
3843 cfg.powerquiet = false;
3844 cfg.powerskipmax = 0;
3845
3846 if (next!=NULL) *next='\0';
3847 if (!strcmp(arg, "never"))
3848 cfg.powermode = 0;
3849 else if (!strcmp(arg, "sleep"))
3850 cfg.powermode = 1;
3851 else if (!strcmp(arg, "standby"))
3852 cfg.powermode = 2;
3853 else if (!strcmp(arg, "idle"))
3854 cfg.powermode = 3;
3855 else
3856 badarg = 1;
3857
3858 // if optional arguments are present
3859 if (!badarg && next!=NULL) {
3860 next++;
3861 cfg.powerskipmax = strtol(next, &endptr, 10);
3862 if (endptr == next)
3863 cfg.powerskipmax = 0;
3864 else {
3865 next = endptr + (*endptr != '\0');
3866 if (cfg.powerskipmax <= 0)
3867 badarg = 1;
3868 }
3869 if (*next != '\0') {
3870 if (!strcmp("q", next))
3871 cfg.powerquiet = true;
3872 else {
3873 badarg = 1;
3874 }
3875 }
3876 }
3877 }
3878 break;
3879 case 'S':
3880 // automatic attribute autosave enable/disable
3881 if ((arg = strtok(NULL, delim)) == NULL) {
3882 missingarg = 1;
3883 } else if (!strcmp(arg, "on")) {
3884 cfg.autosave = 2;
3885 } else if (!strcmp(arg, "off")) {
3886 cfg.autosave = 1;
3887 } else {
3888 badarg = 1;
3889 }
3890 break;
3891 case 's':
3892 // warn user, and delete any previously given -s REGEXP Directives
3893 if (!cfg.test_regex.empty()){
3894 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3895 configfile, lineno, name, cfg.test_regex.get_pattern());
3896 cfg.test_regex = regular_expression();
3897 }
3898 // check for missing argument
3899 if (!(arg = strtok(NULL, delim))) {
3900 missingarg = 1;
3901 }
3902 // Compile regex
3903 else {
3904 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3905 // not a valid regular expression!
3906 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3907 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3908 return -1;
3909 }
3910 // Do a bit of sanity checking and warn user if we think that
3911 // their regexp is "strange". User probably confused about shell
3912 // glob(3) syntax versus regular expression syntax regexp(7).
3913 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3914 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3915 configfile, lineno, name, val+1, arg[val], arg);
3916 }
3917 break;
3918 case 'm':
3919 // send email to address that follows
3920 if (!(arg = strtok(NULL,delim)))
3921 missingarg = 1;
3922 else {
3923 if (!cfg.emailaddress.empty())
3924 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3925 configfile, lineno, name, cfg.emailaddress.c_str());
3926 #ifdef _WIN32
3927 if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
3928 || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
3929 cfg.emailaddress = "console";
3930 const char * arg2 = strchr(arg, ',');
3931 if (arg2)
3932 cfg.emailaddress += arg2;
3933 PrintOut(LOG_INFO, "File %s line %d (drive %s): Deprecated -m %s changed to -m %s\n",
3934 configfile, lineno, name, arg, cfg.emailaddress.c_str());
3935 }
3936 else
3937 #endif
3938 cfg.emailaddress = arg;
3939 }
3940 break;
3941 case 'M':
3942 // email warning options
3943 if (!(arg = strtok(NULL, delim)))
3944 missingarg = 1;
3945 else if (!strcmp(arg, "once"))
3946 cfg.emailfreq = 1;
3947 else if (!strcmp(arg, "daily"))
3948 cfg.emailfreq = 2;
3949 else if (!strcmp(arg, "diminishing"))
3950 cfg.emailfreq = 3;
3951 else if (!strcmp(arg, "test"))
3952 cfg.emailtest = 1;
3953 else if (!strcmp(arg, "exec")) {
3954 // Get the next argument (the command line)
3955 #ifdef _WIN32
3956 // Allow "/path name/with spaces/..." on Windows
3957 arg = strtok_dequote(delim);
3958 if (arg && arg[0] == '"') {
3959 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
3960 configfile, lineno, name, token);
3961 return -1;
3962 }
3963 #else
3964 arg = strtok(0, delim);
3965 #endif
3966 if (!arg) {
3967 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3968 configfile, lineno, name, token);
3969 return -1;
3970 }
3971 // Free the last cmd line given if any, and copy new one
3972 if (!cfg.emailcmdline.empty())
3973 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3974 configfile, lineno, name, cfg.emailcmdline.c_str());
3975 cfg.emailcmdline = arg;
3976 }
3977 else
3978 badarg = 1;
3979 break;
3980 case 'i':
3981 // ignore failure of usage attribute
3982 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3983 return -1;
3984 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3985 break;
3986 case 'I':
3987 // ignore attribute for tracking purposes
3988 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3989 return -1;
3990 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3991 break;
3992 case 'r':
3993 // print raw value when tracking
3994 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3995 return -1;
3996 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3997 if (*excl == '!') // attribute change is critical
3998 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3999 break;
4000 case 'R':
4001 // track changes in raw value (forces printing of raw value)
4002 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4003 return -1;
4004 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4005 if (*excl == '!') // raw value change is critical
4006 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4007 break;
4008 case 'W':
4009 // track Temperature
4010 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4011 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4012 return -1;
4013 break;
4014 case 'v':
4015 // non-default vendor-specific attribute meaning
4016 if (!(arg=strtok(NULL,delim))) {
4017 missingarg = 1;
4018 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4019 badarg = 1;
4020 }
4021 break;
4022 case 'P':
4023 // Define use of drive-specific presets.
4024 if (!(arg = strtok(NULL, delim))) {
4025 missingarg = 1;
4026 } else if (!strcmp(arg, "use")) {
4027 cfg.ignorepresets = false;
4028 } else if (!strcmp(arg, "ignore")) {
4029 cfg.ignorepresets = true;
4030 } else if (!strcmp(arg, "show")) {
4031 cfg.showpresets = true;
4032 } else if (!strcmp(arg, "showall")) {
4033 showallpresets();
4034 } else {
4035 badarg = 1;
4036 }
4037 break;
4038
4039 case 'e':
4040 // Various ATA settings
4041 if (!(arg = strtok(NULL, delim))) {
4042 missingarg = true;
4043 }
4044 else {
4045 char arg2[16+1]; unsigned val;
4046 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4047 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4048 && (n1 == len || n2 > 0)) {
4049 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4050 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4051 if (n3 != len)
4052 val = ~0U;
4053
4054 if (!strcmp(arg2, "aam")) {
4055 if (off)
4056 cfg.set_aam = -1;
4057 else if (val <= 254)
4058 cfg.set_aam = val + 1;
4059 else
4060 badarg = true;
4061 }
4062 else if (!strcmp(arg2, "apm")) {
4063 if (off)
4064 cfg.set_apm = -1;
4065 else if (1 <= val && val <= 254)
4066 cfg.set_apm = val + 1;
4067 else
4068 badarg = true;
4069 }
4070 else if (!strcmp(arg2, "lookahead")) {
4071 if (off)
4072 cfg.set_lookahead = -1;
4073 else if (on)
4074 cfg.set_lookahead = 1;
4075 else
4076 badarg = true;
4077 }
4078 else if (!strcmp(arg, "security-freeze")) {
4079 cfg.set_security_freeze = true;
4080 }
4081 else if (!strcmp(arg2, "standby")) {
4082 if (off)
4083 cfg.set_standby = 0 + 1;
4084 else if (val <= 255)
4085 cfg.set_standby = val + 1;
4086 else
4087 badarg = true;
4088 }
4089 else if (!strcmp(arg2, "wcache")) {
4090 if (off)
4091 cfg.set_wcache = -1;
4092 else if (on)
4093 cfg.set_wcache = 1;
4094 else
4095 badarg = true;
4096 }
4097 else
4098 badarg = true;
4099 }
4100 else
4101 badarg = true;
4102 }
4103 break;
4104
4105 default:
4106 // Directive not recognized
4107 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4108 configfile, lineno, name, token);
4109 Directives();
4110 return -1;
4111 }
4112 if (missingarg) {
4113 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4114 configfile, lineno, name, token);
4115 }
4116 if (badarg) {
4117 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4118 configfile, lineno, name, token, arg);
4119 }
4120 if (missingarg || badarg) {
4121 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4122 printoutvaliddirectiveargs(LOG_CRIT, sym);
4123 PrintOut(LOG_CRIT, "\n");
4124 return -1;
4125 }
4126
4127 return 1;
4128 }
4129
4130 // Scan directive for configuration file
4131 #define SCANDIRECTIVE "DEVICESCAN"
4132
4133 // This is the routine that adds things to the conf_entries list.
4134 //
4135 // Return values are:
4136 // 1: parsed a normal line
4137 // 0: found DEFAULT setting or comment or blank line
4138 // -1: found SCANDIRECTIVE line
4139 // -2: found an error
4140 //
4141 // Note: this routine modifies *line from the caller!
4142 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf, int lineno, /*const*/ char * line)
4143 {
4144 const char *delim = " \n\t";
4145
4146 // get first token: device name. If a comment, skip line
4147 const char * name = strtok(line, delim);
4148 if (!name || *name == '#')
4149 return 0;
4150
4151 // Check device name for DEFAULT or DEVICESCAN
4152 int retval;
4153 if (!strcmp("DEFAULT", name)) {
4154 retval = 0;
4155 // Restart with empty defaults
4156 default_conf = dev_config();
4157 }
4158 else {
4159 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4160 // Init new entry with current defaults
4161 conf_entries.push_back(default_conf);
4162 }
4163 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4164
4165 cfg.name = name; // Later replaced by dev->get_info().info_name
4166 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4167 cfg.lineno = lineno;
4168
4169 // parse tokens one at a time from the file.
4170 while (char * token = strtok(0, delim)) {
4171 int rc = ParseToken(token, cfg);
4172 if (rc < 0)
4173 // error found on the line
4174 return -2;
4175
4176 if (rc == 0)
4177 // No tokens left
4178 break;
4179
4180 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4181 }
4182
4183 // Don't perform checks below for DEFAULT entries
4184 if (retval == 0)
4185 return retval;
4186
4187 // If NO monitoring directives are set, then set all of them.
4188 if (!( cfg.smartcheck || cfg.selftest
4189 || cfg.errorlog || cfg.xerrorlog
4190 || cfg.offlinests || cfg.selfteststs
4191 || cfg.usagefailed || cfg.prefail || cfg.usage
4192 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4193
4194 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4195 cfg.name.c_str(), cfg.lineno, configfile);
4196
4197 cfg.smartcheck = true;
4198 cfg.usagefailed = true;
4199 cfg.prefail = true;
4200 cfg.usage = true;
4201 cfg.selftest = true;
4202 cfg.errorlog = true;
4203 cfg.selfteststs = true;
4204 }
4205
4206 // additional sanity check. Has user set -M options without -m?
4207 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4208 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4209 cfg.name.c_str(), cfg.lineno, configfile);
4210 return -2;
4211 }
4212
4213 // has the user has set <nomailer>?
4214 if (cfg.emailaddress == "<nomailer>") {
4215 // check that -M exec is also set
4216 if (cfg.emailcmdline.empty()){
4217 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4218 cfg.name.c_str(), cfg.lineno, configfile);
4219 return -2;
4220 }
4221 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4222 cfg.emailaddress.clear();
4223 }
4224
4225 return retval;
4226 }
4227
4228 // Parses a configuration file. Return values are:
4229 // N=>0: found N entries
4230 // -1: syntax error in config file
4231 // -2: config file does not exist
4232 // -3: config file exists but cannot be read
4233 //
4234 // In the case where the return value is 0, there are three
4235 // possiblities:
4236 // Empty configuration file ==> conf_entries.empty()
4237 // No configuration file ==> conf_entries[0].lineno == 0
4238 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4239 static int ParseConfigFile(dev_config_vector & conf_entries)
4240 {
4241 // maximum line length in configuration file
4242 const int MAXLINELEN = 256;
4243 // maximum length of a continued line in configuration file
4244 const int MAXCONTLINE = 1023;
4245
4246 stdio_file f;
4247 // Open config file, if it exists and is not <stdin>
4248 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4249 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4250 // file exists but we can't read it or it should exist due to '-c' option
4251 int ret = (errno!=ENOENT ? -3 : -2);
4252 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4253 strerror(errno),configfile);
4254 return ret;
4255 }
4256 }
4257 else // read from stdin ('-c -' option)
4258 f.open(stdin);
4259
4260 // Start with empty defaults
4261 dev_config default_conf;
4262
4263 // No configuration file found -- use fake one
4264 int entry = 0;
4265 if (!f) {
4266 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4267
4268 if (ParseConfigLine(conf_entries, default_conf, 0, fakeconfig) != -1)
4269 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4270 return 0;
4271 }
4272
4273 #ifdef __CYGWIN__
4274 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4275 #endif
4276
4277 // configuration file exists
4278 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4279
4280 // parse config file line by line
4281 int lineno = 1, cont = 0, contlineno = 0;
4282 char line[MAXLINELEN+2];
4283 char fullline[MAXCONTLINE+1];
4284
4285 for (;;) {
4286 int len=0,scandevice;
4287 char *lastslash;
4288 char *comment;
4289 char *code;
4290
4291 // make debugging simpler
4292 memset(line,0,sizeof(line));
4293
4294 // get a line
4295 code=fgets(line, MAXLINELEN+2, f);
4296
4297 // are we at the end of the file?
4298 if (!code){
4299 if (cont) {
4300 scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
4301 // See if we found a SCANDIRECTIVE directive
4302 if (scandevice==-1)
4303 return 0;
4304 // did we find a syntax error
4305 if (scandevice==-2)
4306 return -1;
4307 // the final line is part of a continuation line
4308 entry+=scandevice;
4309 }
4310 break;
4311 }
4312
4313 // input file line number
4314 contlineno++;
4315
4316 // See if line is too long
4317 len=strlen(line);
4318 if (len>MAXLINELEN){
4319 const char *warn;
4320 if (line[len-1]=='\n')
4321 warn="(including newline!) ";
4322 else
4323 warn="";
4324 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4325 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4326 return -1;
4327 }
4328
4329 // Ignore anything after comment symbol
4330 if ((comment=strchr(line,'#'))){
4331 *comment='\0';
4332 len=strlen(line);
4333 }
4334
4335 // is the total line (made of all continuation lines) too long?
4336 if (cont+len>MAXCONTLINE){
4337 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4338 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4339 return -1;
4340 }
4341
4342 // copy string so far into fullline, and increment length
4343 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4344 cont+=len;
4345
4346 // is this a continuation line. If so, replace \ by space and look at next line
4347 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4348 *(fullline+(cont-len)+(lastslash-line))=' ';
4349 continue;
4350 }
4351
4352 // Not a continuation line. Parse it
4353 scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
4354
4355 // did we find a scandevice directive?
4356 if (scandevice==-1)
4357 return 0;
4358 // did we find a syntax error
4359 if (scandevice==-2)
4360 return -1;
4361
4362 entry+=scandevice;
4363 lineno++;
4364 cont=0;
4365 }
4366
4367 // note -- may be zero if syntax of file OK, but no valid entries!
4368 return entry;
4369 }
4370
4371 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4372 <LIST> is the list of valid arguments for option opt. */
4373 static void PrintValidArgs(char opt)
4374 {
4375 const char *s;
4376
4377 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4378 if (!(s = GetValidArgList(opt)))
4379 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4380 else
4381 PrintOut(LOG_CRIT, "%s", (char *)s);
4382 PrintOut(LOG_CRIT, " <=======\n");
4383 }
4384
4385 #ifndef _WIN32
4386 // Report error and exit if specified path is not absolute.
4387 static void check_abs_path(char option, const std::string & path)
4388 {
4389 if (path.empty() || path[0] == '/')
4390 return;
4391
4392 debugmode = 1;
4393 PrintHead();
4394 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4395 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4396 EXIT(EXIT_BADCMD);
4397 }
4398 #endif // !_WIN32
4399
4400 // Parses input line, prints usage message and
4401 // version/license/copyright messages
4402 static void ParseOpts(int argc, char **argv)
4403 {
4404 // Init default path names
4405 #ifndef _WIN32
4406 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4407 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4408 #else
4409 std::string exedir = get_exe_dir();
4410 static std::string configfile_str = exedir + "/smartd.conf";
4411 configfile = configfile_str.c_str();
4412 warning_script = exedir + "/smartd_warning.cmd";
4413 #endif
4414
4415 // Please update GetValidArgList() if you edit shortopts
4416 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4417 #ifdef HAVE_LIBCAP_NG
4418 "C"
4419 #endif
4420 ;
4421 // Please update GetValidArgList() if you edit longopts
4422 struct option longopts[] = {
4423 { "configfile", required_argument, 0, 'c' },
4424 { "logfacility", required_argument, 0, 'l' },
4425 { "quit", required_argument, 0, 'q' },
4426 { "debug", no_argument, 0, 'd' },
4427 { "showdirectives", no_argument, 0, 'D' },
4428 { "interval", required_argument, 0, 'i' },
4429 #ifndef _WIN32
4430 { "no-fork", no_argument, 0, 'n' },
4431 #else
4432 { "service", no_argument, 0, 'n' },
4433 #endif
4434 { "pidfile", required_argument, 0, 'p' },
4435 { "report", required_argument, 0, 'r' },
4436 { "savestates", required_argument, 0, 's' },
4437 { "attributelog", required_argument, 0, 'A' },
4438 { "drivedb", required_argument, 0, 'B' },
4439 { "warnexec", required_argument, 0, 'w' },
4440 { "version", no_argument, 0, 'V' },
4441 { "license", no_argument, 0, 'V' },
4442 { "copyright", no_argument, 0, 'V' },
4443 { "help", no_argument, 0, 'h' },
4444 { "usage", no_argument, 0, 'h' },
4445 #ifdef HAVE_LIBCAP_NG
4446 { "capabilities", no_argument, 0, 'C' },
4447 #endif
4448 { 0, 0, 0, 0 }
4449 };
4450
4451 opterr=optopt=0;
4452 bool badarg = false;
4453 bool use_default_db = true; // set false on '-B FILE'
4454
4455 // Parse input options.
4456 int optchar;
4457 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4458 char *arg;
4459 char *tailptr;
4460 long lchecktime;
4461
4462 switch(optchar) {
4463 case 'q':
4464 // when to quit
4465 if (!(strcmp(optarg,"nodev"))) {
4466 quit=0;
4467 } else if (!(strcmp(optarg,"nodevstartup"))) {
4468 quit=1;
4469 } else if (!(strcmp(optarg,"never"))) {
4470 quit=2;
4471 } else if (!(strcmp(optarg,"onecheck"))) {
4472 quit=3;
4473 debugmode=1;
4474 } else if (!(strcmp(optarg,"showtests"))) {
4475 quit=4;
4476 debugmode=1;
4477 } else if (!(strcmp(optarg,"errors"))) {
4478 quit=5;
4479 } else {
4480 badarg = true;
4481 }
4482 break;
4483 case 'l':
4484 // set the log facility level
4485 if (!strcmp(optarg, "daemon"))
4486 facility=LOG_DAEMON;
4487 else if (!strcmp(optarg, "local0"))
4488 facility=LOG_LOCAL0;
4489 else if (!strcmp(optarg, "local1"))
4490 facility=LOG_LOCAL1;
4491 else if (!strcmp(optarg, "local2"))
4492 facility=LOG_LOCAL2;
4493 else if (!strcmp(optarg, "local3"))
4494 facility=LOG_LOCAL3;
4495 else if (!strcmp(optarg, "local4"))
4496 facility=LOG_LOCAL4;
4497 else if (!strcmp(optarg, "local5"))
4498 facility=LOG_LOCAL5;
4499 else if (!strcmp(optarg, "local6"))
4500 facility=LOG_LOCAL6;
4501 else if (!strcmp(optarg, "local7"))
4502 facility=LOG_LOCAL7;
4503 else
4504 badarg = true;
4505 break;
4506 case 'd':
4507 // enable debug mode
4508 debugmode = 1;
4509 break;
4510 case 'n':
4511 // don't fork()
4512 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4513 do_fork = false;
4514 #endif
4515 break;
4516 case 'D':
4517 // print summary of all valid directives
4518 debugmode = 1;
4519 Directives();
4520 EXIT(0);
4521 break;
4522 case 'i':
4523 // Period (time interval) for checking
4524 // strtol will set errno in the event of overflow, so we'll check it.
4525 errno = 0;
4526 lchecktime = strtol(optarg, &tailptr, 10);
4527 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4528 debugmode=1;
4529 PrintHead();
4530 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4531 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4532 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4533 EXIT(EXIT_BADCMD);
4534 }
4535 checktime = (int)lchecktime;
4536 break;
4537 case 'r':
4538 // report IOCTL transactions
4539 {
4540 int i;
4541 char *s;
4542
4543 // split_report_arg() may modify its first argument string, so use a
4544 // copy of optarg in case we want optarg for an error message.
4545 if (!(s = strdup(optarg))) {
4546 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4547 EXIT(EXIT_NOMEM);
4548 }
4549 if (split_report_arg(s, &i)) {
4550 badarg = true;
4551 } else if (i<1 || i>3) {
4552 debugmode=1;
4553 PrintHead();
4554 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4555 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4556 EXIT(EXIT_BADCMD);
4557 } else if (!strcmp(s,"ioctl")) {
4558 ata_debugmode = scsi_debugmode = i;
4559 } else if (!strcmp(s,"ataioctl")) {
4560 ata_debugmode = i;
4561 } else if (!strcmp(s,"scsiioctl")) {
4562 scsi_debugmode = i;
4563 } else {
4564 badarg = true;
4565 }
4566 free(s); // TODO: use std::string
4567 }
4568 break;
4569 case 'c':
4570 // alternate configuration file
4571 if (strcmp(optarg,"-"))
4572 configfile = (configfile_alt = optarg).c_str();
4573 else // read from stdin
4574 configfile=configfile_stdin;
4575 break;
4576 case 'p':
4577 // output file with PID number
4578 pid_file = optarg;
4579 break;
4580 case 's':
4581 // path prefix of persistent state file
4582 state_path_prefix = optarg;
4583 break;
4584 case 'A':
4585 // path prefix of attribute log file
4586 attrlog_path_prefix = optarg;
4587 break;
4588 case 'B':
4589 {
4590 const char * path = optarg;
4591 if (*path == '+' && path[1])
4592 path++;
4593 else
4594 use_default_db = false;
4595 unsigned char savedebug = debugmode; debugmode = 1;
4596 if (!read_drive_database(path))
4597 EXIT(EXIT_BADCMD);
4598 debugmode = savedebug;
4599 }
4600 break;
4601 case 'w':
4602 warning_script = optarg;
4603 break;
4604 case 'V':
4605 // print version and CVS info
4606 debugmode = 1;
4607 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4608 EXIT(0);
4609 break;
4610 #ifdef HAVE_LIBCAP_NG
4611 case 'C':
4612 // enable capabilities
4613 enable_capabilities = true;
4614 break;
4615 #endif
4616 case 'h':
4617 // help: print summary of command-line options
4618 debugmode=1;
4619 PrintHead();
4620 Usage();
4621 EXIT(0);
4622 break;
4623 case '?':
4624 default:
4625 // unrecognized option
4626 debugmode=1;
4627 PrintHead();
4628 // Point arg to the argument in which this option was found.
4629 arg = argv[optind-1];
4630 // Check whether the option is a long option that doesn't map to -h.
4631 if (arg[1] == '-' && optchar != 'h') {
4632 // Iff optopt holds a valid option then argument must be missing.
4633 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4634 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4635 PrintValidArgs(optopt);
4636 } else {
4637 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4638 }
4639 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4640 EXIT(EXIT_BADCMD);
4641 }
4642 if (optopt) {
4643 // Iff optopt holds a valid option then argument must be missing.
4644 if (strchr(shortopts, optopt) != NULL){
4645 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4646 PrintValidArgs(optopt);
4647 } else {
4648 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4649 }
4650 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4651 EXIT(EXIT_BADCMD);
4652 }
4653 Usage();
4654 EXIT(0);
4655 }
4656
4657 // Check to see if option had an unrecognized or incorrect argument.
4658 if (badarg) {
4659 debugmode=1;
4660 PrintHead();
4661 // It would be nice to print the actual option name given by the user
4662 // here, but we just print the short form. Please fix this if you know
4663 // a clean way to do it.
4664 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4665 PrintValidArgs(optchar);
4666 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4667 EXIT(EXIT_BADCMD);
4668 }
4669 }
4670
4671 // non-option arguments are not allowed
4672 if (argc > optind) {
4673 debugmode=1;
4674 PrintHead();
4675 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4676 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4677 EXIT(EXIT_BADCMD);
4678 }
4679
4680 // no pidfile in debug mode
4681 if (debugmode && !pid_file.empty()) {
4682 debugmode=1;
4683 PrintHead();
4684 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4685 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4686 EXIT(EXIT_BADCMD);
4687 }
4688
4689 #ifndef _WIN32
4690 if (!debugmode) {
4691 // absolute path names are required due to chdir('/') after fork().
4692 check_abs_path('p', pid_file);
4693 check_abs_path('s', state_path_prefix);
4694 check_abs_path('A', attrlog_path_prefix);
4695 }
4696 #endif
4697
4698 // Read or init drive database
4699 {
4700 unsigned char savedebug = debugmode; debugmode = 1;
4701 if (!init_drive_database(use_default_db))
4702 EXIT(EXIT_BADCMD);
4703 debugmode = savedebug;
4704 }
4705
4706 // print header
4707 PrintHead();
4708 }
4709
4710 // Function we call if no configuration file was found or if the
4711 // SCANDIRECTIVE Directive was found. It makes entries for device
4712 // names returned by scan_smart_devices() in os_OSNAME.cpp
4713 static int MakeConfigEntries(const dev_config & base_cfg,
4714 dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4715 {
4716 // make list of devices
4717 smart_device_list devlist;
4718 if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4719 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4720
4721 // if no devices, or error constructing list, return
4722 if (devlist.size() <= 0)
4723 return 0;
4724
4725 // add empty device slots for existing config entries
4726 while (scanned_devs.size() < conf_entries.size())
4727 scanned_devs.push_back((smart_device *)0);
4728
4729 // loop over entries to create
4730 for (unsigned i = 0; i < devlist.size(); i++) {
4731 // Move device pointer
4732 smart_device * dev = devlist.release(i);
4733 scanned_devs.push_back(dev);
4734
4735 // Copy configuration, update device and type name
4736 conf_entries.push_back(base_cfg);
4737 dev_config & cfg = conf_entries.back();
4738 cfg.name = dev->get_info().info_name;
4739 cfg.dev_name = dev->get_info().dev_name;
4740 cfg.dev_type = type;
4741 }
4742
4743 return devlist.size();
4744 }
4745
4746 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4747 {
4748 if (!debugmode && scandirective)
4749 return;
4750 if (line)
4751 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4752 "Unable to register %s device %s at line %d of file %s\n",
4753 type, name, line, configfile);
4754 else
4755 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4756 type, name);
4757 return;
4758 }
4759
4760 // Returns negative value (see ParseConfigFile()) if config file
4761 // had errors, else number of entries which may be zero or positive.
4762 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4763 {
4764 // parse configuration file configfile (normally /etc/smartd.conf)
4765 int entries = ParseConfigFile(conf_entries);
4766
4767 if (entries < 0) {
4768 // There was an error reading the configuration file.
4769 conf_entries.clear();
4770 if (entries == -1)
4771 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4772 return entries;
4773 }
4774
4775 // no error parsing config file.
4776 if (entries) {
4777 // we did not find a SCANDIRECTIVE and did find valid entries
4778 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4779 }
4780 else if (!conf_entries.empty()) {
4781 // we found a SCANDIRECTIVE or there was no configuration file so
4782 // scan. Configuration file's last entry contains all options
4783 // that were set
4784 dev_config first = conf_entries.back();
4785 conf_entries.pop_back();
4786
4787 if (first.lineno)
4788 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4789 else
4790 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4791
4792 // make config list of devices to search for
4793 MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4794
4795 // warn user if scan table found no devices
4796 if (conf_entries.empty())
4797 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4798 }
4799 else
4800 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
4801
4802 return conf_entries.size();
4803 }
4804
4805 // Return true if TYPE contains a RAID drive number
4806 static bool is_raid_type(const char * type)
4807 {
4808 if (str_starts_with(type, "sat,"))
4809 return false;
4810 int i;
4811 if (sscanf(type, "%*[^,],%d", &i) != 1)
4812 return false;
4813 return true;
4814 }
4815
4816 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
4817 static bool is_duplicate_device(const smart_device * dev,
4818 const smart_device_list & devices, unsigned numdevs,
4819 const dev_config_vector & ignored)
4820 {
4821 const smart_device::device_info & info1 = dev->get_info();
4822 bool is_raid1 = is_raid_type(info1.dev_type.c_str());
4823
4824 for (unsigned i = 0; i < numdevs; i++) {
4825 const smart_device::device_info & info2 = devices.at(i)->get_info();
4826 // -d TYPE options must match if RAID drive number is specified
4827 if ( info1.dev_name == info2.dev_name
4828 && ( info1.dev_type == info2.dev_type
4829 || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
4830 return true;
4831 }
4832
4833 for (unsigned i = 0; i < ignored.size(); i++) {
4834 const dev_config & cfg2 = ignored.at(i);
4835 if ( info1.dev_name == cfg2.dev_name
4836 && ( info1.dev_type == cfg2.dev_type
4837 || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
4838 return true;
4839 }
4840 return false;
4841 }
4842
4843 // This function tries devices from conf_entries. Each one that can be
4844 // registered is moved onto the [ata|scsi]devices lists and removed
4845 // from the conf_entries list.
4846 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4847 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4848 {
4849 // start by clearing lists/memory of ALL existing devices
4850 configs.clear();
4851 devices.clear();
4852 states.clear();
4853
4854 // Register entries
4855 dev_config_vector ignored_entries;
4856 unsigned numnoscan = 0;
4857 for (unsigned i = 0; i < conf_entries.size(); i++){
4858
4859 dev_config cfg = conf_entries[i];
4860
4861 if (cfg.ignore) {
4862 // Store for is_duplicate_device() check and ignore
4863 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
4864 (!cfg.dev_type.empty() ? " [" : ""),
4865 cfg.dev_type.c_str(),
4866 (!cfg.dev_type.empty() ? "]" : ""));
4867 ignored_entries.push_back(cfg);
4868 continue;
4869 }
4870
4871 // get device of appropriate type
4872 smart_device_auto_ptr dev;
4873 bool scanning = false;
4874
4875 // Device may already be detected during devicescan
4876 if (i < scanned_devs.size()) {
4877 dev = scanned_devs.release(i);
4878 if (dev) {
4879 // Check for a preceding non-DEVICESCAN entry for the same device
4880 if ( (numnoscan || !ignored_entries.empty())
4881 && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
4882 PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
4883 continue;
4884 }
4885 scanning = true;
4886 }
4887 }
4888
4889 if (!dev) {
4890 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4891 if (!dev) {
4892 if (cfg.dev_type.empty())
4893 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4894 else
4895 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4896 continue;
4897 }
4898 }
4899
4900 // Save old info
4901 smart_device::device_info oldinfo = dev->get_info();
4902
4903 // Open with autodetect support, may return 'better' device
4904 dev.replace( dev->autodetect_open() );
4905
4906 // Report if type has changed
4907 if (oldinfo.dev_type != dev->get_dev_type())
4908 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4909 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4910
4911 if (!dev->is_open()) {
4912 // For linux+devfs, a nonexistent device gives a strange error
4913 // message. This makes the error message a bit more sensible.
4914 // If no debug and scanning - don't print errors
4915 if (debugmode || !scanning)
4916 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4917 continue;
4918 }
4919
4920 // Update informal name
4921 cfg.name = dev->get_info().info_name;
4922 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4923
4924 // Prepare initial state
4925 dev_state state;
4926
4927 // register ATA devices
4928 if (dev->is_ata()){
4929 if (ATADeviceScan(cfg, state, dev->to_ata())) {
4930 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4931 dev.reset();
4932 }
4933 }
4934 // or register SCSI devices
4935 else if (dev->is_scsi()){
4936 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4937 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4938 dev.reset();
4939 }
4940 }
4941 else {
4942 PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4943 dev.reset();
4944 }
4945
4946 if (dev) {
4947 // move onto the list of devices
4948 configs.push_back(cfg);
4949 states.push_back(state);
4950 devices.push_back(dev);
4951 if (!scanning)
4952 numnoscan = devices.size();
4953 }
4954 // if device is explictly listed and we can't register it, then
4955 // exit unless the user has specified that the device is removable
4956 else if (!scanning) {
4957 if (cfg.removable || quit==2)
4958 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4959 else {
4960 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4961 EXIT(EXIT_BADDEV);
4962 }
4963 }
4964 }
4965
4966 init_disable_standby_check(configs);
4967 }
4968
4969
4970 // Main program without exception handling
4971 static int main_worker(int argc, char **argv)
4972 {
4973 // Initialize interface
4974 smart_interface::init();
4975 if (!smi())
4976 return 1;
4977
4978 // is it our first pass through?
4979 bool firstpass = true;
4980
4981 // next time to wake up
4982 time_t wakeuptime = 0;
4983
4984 // parse input and print header and usage info if needed
4985 ParseOpts(argc,argv);
4986
4987 // Configuration for each device
4988 dev_config_vector configs;
4989 // Device states
4990 dev_state_vector states;
4991 // Devices to monitor
4992 smart_device_list devices;
4993
4994 bool write_states_always = true;
4995
4996 #ifdef HAVE_LIBCAP_NG
4997 // Drop capabilities
4998 if (enable_capabilities) {
4999 capng_clear(CAPNG_SELECT_BOTH);
5000 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
5001 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
5002 capng_apply(CAPNG_SELECT_BOTH);
5003 }
5004 #endif
5005
5006 // the main loop of the code
5007 for (;;) {
5008
5009 // are we exiting from a signal?
5010 if (caughtsigEXIT) {
5011 // are we exiting with SIGTERM?
5012 int isterm=(caughtsigEXIT==SIGTERM);
5013 int isquit=(caughtsigEXIT==SIGQUIT);
5014 int isok=debugmode?isterm || isquit:isterm;
5015
5016 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5017 caughtsigEXIT, strsignal(caughtsigEXIT));
5018
5019 if (!isok)
5020 return EXIT_SIGNAL;
5021
5022 // Write state files
5023 if (!state_path_prefix.empty())
5024 write_all_dev_states(configs, states);
5025
5026 return 0;
5027 }
5028
5029 // Should we (re)read the config file?
5030 if (firstpass || caughtsigHUP){
5031 if (!firstpass) {
5032 // Write state files
5033 if (!state_path_prefix.empty())
5034 write_all_dev_states(configs, states);
5035
5036 PrintOut(LOG_INFO,
5037 caughtsigHUP==1?
5038 "Signal HUP - rereading configuration file %s\n":
5039 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5040 configfile);
5041 }
5042
5043 {
5044 dev_config_vector conf_entries; // Entries read from smartd.conf
5045 smart_device_list scanned_devs; // Devices found during scan
5046 // (re)reads config file, makes >=0 entries
5047 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5048
5049 if (entries>=0) {
5050 // checks devices, then moves onto ata/scsi list or deallocates.
5051 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5052 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5053 throw std::logic_error("Invalid result from RegisterDevices");
5054 }
5055 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
5056 // user has asked to continue on error in configuration file
5057 if (!firstpass)
5058 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5059 }
5060 else {
5061 // exit with configuration file error status
5062 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5063 }
5064 }
5065
5066 // Log number of devices we are monitoring...
5067 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
5068 int numata = 0;
5069 for (unsigned i = 0; i < devices.size(); i++) {
5070 if (devices.at(i)->is_ata())
5071 numata++;
5072 }
5073 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
5074 numata, devices.size() - numata);
5075 }
5076 else {
5077 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5078 return EXIT_NODEV;
5079 }
5080
5081 if (quit==4) {
5082 // user has asked to print test schedule
5083 PrintTestSchedule(configs, states, devices);
5084 return 0;
5085 }
5086
5087 #ifdef HAVE_LIBCAP_NG
5088 if (enable_capabilities) {
5089 for (unsigned i = 0; i < configs.size(); i++) {
5090 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5091 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5092 break;
5093 }
5094 }
5095 }
5096 #endif
5097
5098 // reset signal
5099 caughtsigHUP=0;
5100
5101 // Always write state files after (re)configuration
5102 write_states_always = true;
5103 }
5104
5105 // check all devices once,
5106 // self tests are not started in first pass unless '-q onecheck' is specified
5107 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
5108
5109 // Write state files
5110 if (!state_path_prefix.empty())
5111 write_all_dev_states(configs, states, write_states_always);
5112 write_states_always = false;
5113
5114 // Write attribute logs
5115 if (!attrlog_path_prefix.empty())
5116 write_all_dev_attrlogs(configs, states);
5117
5118 // user has asked us to exit after first check
5119 if (quit==3) {
5120 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5121 "smartd is exiting (exit status 0)\n");
5122 return 0;
5123 }
5124
5125 // fork into background if needed
5126 if (firstpass && !debugmode) {
5127 DaemonInit();
5128 }
5129
5130 // set exit and signal handlers, write PID file, set wake-up time
5131 if (firstpass){
5132 Initialize(&wakeuptime);
5133 firstpass = false;
5134 }
5135
5136 // sleep until next check time, or a signal arrives
5137 wakeuptime = dosleep(wakeuptime, write_states_always);
5138 }
5139 }
5140
5141
5142 #ifndef _WIN32
5143 // Main program
5144 int main(int argc, char **argv)
5145 #else
5146 // Windows: internal main function started direct or by service control manager
5147 static int smartd_main(int argc, char **argv)
5148 #endif
5149 {
5150 int status;
5151 try {
5152 // Do the real work ...
5153 status = main_worker(argc, argv);
5154 }
5155 catch (int ex) {
5156 // EXIT(status) arrives here
5157 status = ex;
5158 }
5159 catch (const std::bad_alloc & /*ex*/) {
5160 // Memory allocation failed (also thrown by std::operator new)
5161 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5162 status = EXIT_NOMEM;
5163 }
5164 catch (const std::exception & ex) {
5165 // Other fatal errors
5166 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5167 status = EXIT_BADCODE;
5168 }
5169
5170 if (is_initialized)
5171 status = Goodbye(status);
5172
5173 #ifdef _WIN32
5174 daemon_winsvc_exitcode = status;
5175 #endif
5176 return status;
5177 }
5178
5179
5180 #ifdef _WIN32
5181 // Main function for Windows
5182 int main(int argc, char **argv){
5183 // Options for smartd windows service
5184 static const daemon_winsvc_options svc_opts = {
5185 "--service", // cmd_opt
5186 "smartd", "SmartD Service", // servicename, displayname
5187 // description
5188 "Controls and monitors storage devices using the Self-Monitoring, "
5189 "Analysis and Reporting Technology System (SMART) built into "
5190 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5191 "www.smartmontools.org"
5192 };
5193 // daemon_main() handles daemon and service specific commands
5194 // and starts smartd_main() direct, from a new process,
5195 // or via service control manager
5196 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5197 }
5198 #endif