]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Closes #831504
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-16 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #include "config.h"
25 #include "int64.h"
26
27 // unconditionally included files
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
31 #include <signal.h>
32 #include <fcntl.h>
33 #include <string.h>
34 #include <syslog.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <time.h>
39 #include <limits.h>
40 #include <getopt.h>
41
42 #include <stdexcept>
43 #include <string>
44 #include <vector>
45 #include <algorithm> // std::replace()
46
47 // conditionally included files
48 #ifndef _WIN32
49 #include <sys/wait.h>
50 #endif
51 #ifdef HAVE_UNISTD_H
52 #include <unistd.h>
53 #endif
54
55 #ifdef _WIN32
56 #ifdef _MSC_VER
57 #pragma warning(disable:4761) // "conversion supplied"
58 typedef unsigned short mode_t;
59 typedef int pid_t;
60 #endif
61 #include <io.h> // umask()
62 #include <process.h> // getpid()
63 #endif // _WIN32
64
65 #ifdef __CYGWIN__
66 #include <io.h> // setmode()
67 #endif // __CYGWIN__
68
69 #ifdef HAVE_LIBCAP_NG
70 #include <cap-ng.h>
71 #endif // LIBCAP_NG
72
73 // locally included files
74 #include "atacmds.h"
75 #include "dev_interface.h"
76 #include "knowndrives.h"
77 #include "scsicmds.h"
78 #include "nvmecmds.h"
79 #include "utility.h"
80
81 // This is for solaris, where signal() resets the handler to SIG_DFL
82 // after the first signal is caught.
83 #ifdef HAVE_SIGSET
84 #define SIGNALFN sigset
85 #else
86 #define SIGNALFN signal
87 #endif
88
89 #ifdef _WIN32
90 // fork()/signal()/initd simulation for native Windows
91 #include "daemon_win32.h" // daemon_main/detach/signal()
92 #undef SIGNALFN
93 #define SIGNALFN daemon_signal
94 #define strsignal daemon_strsignal
95 #define sleep daemon_sleep
96 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
97 #define SIGQUIT SIGBREAK
98 #define SIGQUIT_KEYNAME "CONTROL-Break"
99 #else // _WIN32
100 #define SIGQUIT_KEYNAME "CONTROL-\\"
101 #endif // _WIN32
102
103 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4308 2016-04-24 13:36:10Z chrfranke $"
104 CONFIG_H_CVSID;
105
106 using namespace smartmontools;
107
108 // smartd exit codes
109 #define EXIT_BADCMD 1 // command line did not parse
110 #define EXIT_BADCONF 2 // syntax error in config file
111 #define EXIT_STARTUP 3 // problem forking daemon
112 #define EXIT_PID 4 // problem creating pid file
113 #define EXIT_NOCONF 5 // config file does not exist
114 #define EXIT_READCONF 6 // config file exists but cannot be read
115
116 #define EXIT_NOMEM 8 // out of memory
117 #define EXIT_BADCODE 10 // internal error - should NEVER happen
118
119 #define EXIT_BADDEV 16 // we can't monitor this device
120 #define EXIT_NODEV 17 // no devices to monitor
121
122 #define EXIT_SIGNAL 254 // abort on signal
123
124
125 // command-line: 1=debug mode, 2=print presets
126 static unsigned char debugmode = 0;
127
128 // command-line: how long to sleep between checks
129 #define CHECKTIME 1800
130 static int checktime=CHECKTIME;
131
132 // command-line: name of PID file (empty for no pid file)
133 static std::string pid_file;
134
135 // command-line: path prefix of persistent state file, empty if no persistence.
136 static std::string state_path_prefix
137 #ifdef SMARTMONTOOLS_SAVESTATES
138 = SMARTMONTOOLS_SAVESTATES
139 #endif
140 ;
141
142 // command-line: path prefix of attribute log file, empty if no logs.
143 static std::string attrlog_path_prefix
144 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
145 = SMARTMONTOOLS_ATTRIBUTELOG
146 #endif
147 ;
148
149 // configuration file name
150 static const char * configfile;
151 // configuration file "name" if read from stdin
152 static const char * const configfile_stdin = "<stdin>";
153 // path of alternate configuration file
154 static std::string configfile_alt;
155
156 // warning script file
157 static std::string warning_script;
158
159 // command-line: when should we exit?
160 static int quit=0;
161
162 // command-line; this is the default syslog(3) log facility to use.
163 static int facility=LOG_DAEMON;
164
165 #ifndef _WIN32
166 // command-line: fork into background?
167 static bool do_fork=true;
168 #endif
169
170 #ifdef HAVE_LIBCAP_NG
171 // command-line: enable capabilities?
172 static bool enable_capabilities = false;
173 #endif
174
175 // TODO: This smartctl only variable is also used in os_win32.cpp
176 unsigned char failuretest_permissive = 0;
177
178 // set to one if we catch a USR1 (check devices now)
179 static volatile int caughtsigUSR1=0;
180
181 #ifdef _WIN32
182 // set to one if we catch a USR2 (toggle debug mode)
183 static volatile int caughtsigUSR2=0;
184 #endif
185
186 // set to one if we catch a HUP (reload config file). In debug mode,
187 // set to two, if we catch INT (also reload config file).
188 static volatile int caughtsigHUP=0;
189
190 // set to signal value if we catch INT, QUIT, or TERM
191 static volatile int caughtsigEXIT=0;
192
193 // This function prints either to stdout or to the syslog as needed.
194 static void PrintOut(int priority, const char *fmt, ...)
195 __attribute_format_printf(2, 3);
196
197 // Attribute monitoring flags.
198 // See monitor_attr_flags below.
199 enum {
200 MONITOR_IGN_FAILUSE = 0x01,
201 MONITOR_IGNORE = 0x02,
202 MONITOR_RAW_PRINT = 0x04,
203 MONITOR_RAW = 0x08,
204 MONITOR_AS_CRIT = 0x10,
205 MONITOR_RAW_AS_CRIT = 0x20,
206 };
207
208 // Array of flags for each attribute.
209 class attribute_flags
210 {
211 public:
212 attribute_flags()
213 { memset(m_flags, 0, sizeof(m_flags)); }
214
215 bool is_set(int id, unsigned char flag) const
216 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
217
218 void set(int id, unsigned char flags)
219 {
220 if (0 < id && id < (int)sizeof(m_flags))
221 m_flags[id] |= flags;
222 }
223
224 private:
225 unsigned char m_flags[256];
226 };
227
228
229 /// Configuration data for a device. Read from smartd.conf.
230 /// Supports copy & assignment and is compatible with STL containers.
231 struct dev_config
232 {
233 int lineno; // Line number of entry in file
234 std::string name; // Device name (with optional extra info)
235 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
236 std::string dev_type; // Device type argument from -d directive, empty if none
237 std::string dev_idinfo; // Device identify info for warning emails
238 std::string state_file; // Path of the persistent state file, empty if none
239 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
240 bool ignore; // Ignore this entry
241 bool smartcheck; // Check SMART status
242 bool usagefailed; // Check for failed Usage Attributes
243 bool prefail; // Track changes in Prefail Attributes
244 bool usage; // Track changes in Usage Attributes
245 bool selftest; // Monitor number of selftest errors
246 bool errorlog; // Monitor number of ATA errors
247 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
248 bool offlinests; // Monitor changes in offline data collection status
249 bool offlinests_ns; // Disable auto standby if in progress
250 bool selfteststs; // Monitor changes in self-test execution status
251 bool selfteststs_ns; // Disable auto standby if in progress
252 bool permissive; // Ignore failed SMART commands
253 char autosave; // 1=disable, 2=enable Autosave Attributes
254 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
255 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
256 bool ignorepresets; // Ignore database of -v options
257 bool showpresets; // Show database entry for this device
258 bool removable; // Device may disappear (not be present)
259 char powermode; // skip check, if disk in idle or standby mode
260 bool powerquiet; // skip powermode 'skipping checks' message
261 int powerskipmax; // how many times can be check skipped
262 unsigned char tempdiff; // Track Temperature changes >= this limit
263 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
264 regular_expression test_regex; // Regex for scheduled testing
265
266 // Configuration of email warning messages
267 std::string emailcmdline; // script to execute, empty if no messages
268 std::string emailaddress; // email address, or empty
269 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
270 bool emailtest; // Send test email?
271
272 // ATA ONLY
273 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
274 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
275 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
276 int set_lookahead; // disable(-1), enable(1) read look-ahead
277 int set_standby; // set(1..255->0..254) standby timer
278 bool set_security_freeze; // Freeze ATA security
279 int set_wcache; // disable(-1), enable(1) write cache
280
281 bool sct_erc_set; // set SCT ERC to:
282 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
283 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
284
285 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
286 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
287 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
288 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
289
290 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
291
292 ata_vendor_attr_defs attribute_defs; // -v options
293
294 dev_config();
295 };
296
297 dev_config::dev_config()
298 : lineno(0),
299 ignore(false),
300 smartcheck(false),
301 usagefailed(false),
302 prefail(false),
303 usage(false),
304 selftest(false),
305 errorlog(false),
306 xerrorlog(false),
307 offlinests(false), offlinests_ns(false),
308 selfteststs(false), selfteststs_ns(false),
309 permissive(false),
310 autosave(0),
311 autoofflinetest(0),
312 ignorepresets(false),
313 showpresets(false),
314 removable(false),
315 powermode(0),
316 powerquiet(false),
317 powerskipmax(0),
318 tempdiff(0),
319 tempinfo(0), tempcrit(0),
320 emailfreq(0),
321 emailtest(false),
322 dev_rpm(0),
323 set_aam(0), set_apm(0),
324 set_lookahead(0),
325 set_standby(0),
326 set_security_freeze(false),
327 set_wcache(0),
328 sct_erc_set(false),
329 sct_erc_readtime(0), sct_erc_writetime(0),
330 curr_pending_id(0), offl_pending_id(0),
331 curr_pending_incr(false), offl_pending_incr(false),
332 curr_pending_set(false), offl_pending_set(false)
333 {
334 }
335
336
337 // Number of allowed mail message types
338 static const int SMARTD_NMAIL = 13;
339 // Type for '-M test' mails (state not persistent)
340 static const int MAILTYPE_TEST = 0;
341 // TODO: Add const or enum for all mail types.
342
343 struct mailinfo {
344 int logged;// number of times an email has been sent
345 time_t firstsent;// time first email was sent, as defined by time(2)
346 time_t lastsent; // time last email was sent, as defined by time(2)
347
348 mailinfo()
349 : logged(0), firstsent(0), lastsent(0) { }
350 };
351
352 /// Persistent state data for a device.
353 struct persistent_dev_state
354 {
355 unsigned char tempmin, tempmax; // Min/Max Temperatures
356
357 unsigned char selflogcount; // total number of self-test errors
358 unsigned short selfloghour; // lifetime hours of last self-test error
359
360 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
361
362 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
363 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
364
365 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
366
367 // ATA ONLY
368 int ataerrorcount; // Total number of ATA errors
369
370 // Persistent part of ata_smart_values:
371 struct ata_attribute {
372 unsigned char id;
373 unsigned char val;
374 unsigned char worst; // Byte needed for 'raw64' attribute only.
375 uint64_t raw;
376 unsigned char resvd;
377
378 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
379 };
380 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
381
382 // SCSI ONLY
383
384 struct scsi_error_counter_t {
385 struct scsiErrorCounter errCounter;
386 unsigned char found;
387 scsi_error_counter_t() : found(0)
388 { memset(&errCounter, 0, sizeof(errCounter)); }
389 };
390 scsi_error_counter_t scsi_error_counters[3];
391
392 struct scsi_nonmedium_error_t {
393 struct scsiNonMediumError nme;
394 unsigned char found;
395 scsi_nonmedium_error_t() : found(0)
396 { memset(&nme, 0, sizeof(nme)); }
397 };
398 scsi_nonmedium_error_t scsi_nonmedium_error;
399
400 // NVMe only
401 uint64_t nvme_err_log_entries;
402
403 persistent_dev_state();
404 };
405
406 persistent_dev_state::persistent_dev_state()
407 : tempmin(0), tempmax(0),
408 selflogcount(0),
409 selfloghour(0),
410 scheduled_test_next_check(0),
411 selective_test_last_start(0),
412 selective_test_last_end(0),
413 ataerrorcount(0),
414 nvme_err_log_entries(0)
415 {
416 }
417
418 /// Non-persistent state data for a device.
419 struct temp_dev_state
420 {
421 bool must_write; // true if persistent part should be written
422
423 bool not_cap_offline; // true == not capable of offline testing
424 bool not_cap_conveyance;
425 bool not_cap_short;
426 bool not_cap_long;
427 bool not_cap_selective;
428
429 unsigned char temperature; // last recorded Temperature (in Celsius)
430 time_t tempmin_delay; // time where Min Temperature tracking will start
431
432 bool powermodefail; // true if power mode check failed
433 int powerskipcnt; // Number of checks skipped due to idle or standby mode
434 int lastpowermodeskipped; // the last power mode that was skipped
435
436 // SCSI ONLY
437 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
438 unsigned char TempPageSupported; // has log sense temperature page (0xd)
439 unsigned char ReadECounterPageSupported;
440 unsigned char WriteECounterPageSupported;
441 unsigned char VerifyECounterPageSupported;
442 unsigned char NonMediumErrorPageSupported;
443 unsigned char SuppressReport; // minimize nuisance reports
444 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
445 // know yet) 6 or 10
446 // ATA ONLY
447 uint64_t num_sectors; // Number of sectors
448 ata_smart_values smartval; // SMART data
449 ata_smart_thresholds_pvt smartthres; // SMART thresholds
450 bool offline_started; // true if offline data collection was started
451 bool selftest_started; // true if self-test was started
452
453 temp_dev_state();
454 };
455
456 temp_dev_state::temp_dev_state()
457 : must_write(false),
458 not_cap_offline(false),
459 not_cap_conveyance(false),
460 not_cap_short(false),
461 not_cap_long(false),
462 not_cap_selective(false),
463 temperature(0),
464 tempmin_delay(0),
465 powermodefail(false),
466 powerskipcnt(0),
467 lastpowermodeskipped(0),
468 SmartPageSupported(false),
469 TempPageSupported(false),
470 ReadECounterPageSupported(false),
471 WriteECounterPageSupported(false),
472 VerifyECounterPageSupported(false),
473 NonMediumErrorPageSupported(false),
474 SuppressReport(false),
475 modese_len(0),
476 num_sectors(0),
477 offline_started(false),
478 selftest_started(false)
479 {
480 memset(&smartval, 0, sizeof(smartval));
481 memset(&smartthres, 0, sizeof(smartthres));
482 }
483
484 /// Runtime state data for a device.
485 struct dev_state
486 : public persistent_dev_state,
487 public temp_dev_state
488 {
489 void update_persistent_state();
490 void update_temp_state();
491 };
492
493 /// Container for configuration info for each device.
494 typedef std::vector<dev_config> dev_config_vector;
495
496 /// Container for state info for each device.
497 typedef std::vector<dev_state> dev_state_vector;
498
499 // Copy ATA attributes to persistent state.
500 void dev_state::update_persistent_state()
501 {
502 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
503 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
504 ata_attribute & pa = ata_attributes[i];
505 pa.id = ta.id;
506 if (ta.id == 0) {
507 pa.val = pa.worst = 0; pa.raw = 0;
508 continue;
509 }
510 pa.val = ta.current;
511 pa.worst = ta.worst;
512 pa.raw = ta.raw[0]
513 | ( ta.raw[1] << 8)
514 | ( ta.raw[2] << 16)
515 | ((uint64_t)ta.raw[3] << 24)
516 | ((uint64_t)ta.raw[4] << 32)
517 | ((uint64_t)ta.raw[5] << 40);
518 pa.resvd = ta.reserv;
519 }
520 }
521
522 // Copy ATA from persistent to temp state.
523 void dev_state::update_temp_state()
524 {
525 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
526 const ata_attribute & pa = ata_attributes[i];
527 ata_smart_attribute & ta = smartval.vendor_attributes[i];
528 ta.id = pa.id;
529 if (pa.id == 0) {
530 ta.current = ta.worst = 0;
531 memset(ta.raw, 0, sizeof(ta.raw));
532 continue;
533 }
534 ta.current = pa.val;
535 ta.worst = pa.worst;
536 ta.raw[0] = (unsigned char) pa.raw;
537 ta.raw[1] = (unsigned char)(pa.raw >> 8);
538 ta.raw[2] = (unsigned char)(pa.raw >> 16);
539 ta.raw[3] = (unsigned char)(pa.raw >> 24);
540 ta.raw[4] = (unsigned char)(pa.raw >> 32);
541 ta.raw[5] = (unsigned char)(pa.raw >> 40);
542 ta.reserv = pa.resvd;
543 }
544 }
545
546 // Parse a line from a state file.
547 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
548 {
549 static const regular_expression regex(
550 "^ *"
551 "((temperature-min)" // (1 (2)
552 "|(temperature-max)" // (3)
553 "|(self-test-errors)" // (4)
554 "|(self-test-last-err-hour)" // (5)
555 "|(scheduled-test-next-check)" // (6)
556 "|(selective-test-last-start)" // (7)
557 "|(selective-test-last-end)" // (8)
558 "|(ata-error-count)" // (9)
559 "|(mail\\.([0-9]+)\\." // (10 (11)
560 "((count)" // (12 (13)
561 "|(first-sent-time)" // (14)
562 "|(last-sent-time)" // (15)
563 ")" // 12)
564 ")" // 10)
565 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
566 "((id)" // (18 (19)
567 "|(val)" // (20)
568 "|(worst)" // (21)
569 "|(raw)" // (22)
570 "|(resvd)" // (23)
571 ")" // 18)
572 ")" // 16)
573 "|(nvme-err-log-entries)" // (24)
574 ")" // 1)
575 " *= *([0-9]+)[ \n]*$", // (25)
576 REG_EXTENDED
577 );
578
579 const int nmatch = 1+25;
580 regmatch_t match[nmatch];
581 if (!regex.execute(line, nmatch, match))
582 return false;
583 if (match[nmatch-1].rm_so < 0)
584 return false;
585
586 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
587
588 int m = 1;
589 if (match[++m].rm_so >= 0)
590 state.tempmin = (unsigned char)val;
591 else if (match[++m].rm_so >= 0)
592 state.tempmax = (unsigned char)val;
593 else if (match[++m].rm_so >= 0)
594 state.selflogcount = (unsigned char)val;
595 else if (match[++m].rm_so >= 0)
596 state.selfloghour = (unsigned short)val;
597 else if (match[++m].rm_so >= 0)
598 state.scheduled_test_next_check = (time_t)val;
599 else if (match[++m].rm_so >= 0)
600 state.selective_test_last_start = val;
601 else if (match[++m].rm_so >= 0)
602 state.selective_test_last_end = val;
603 else if (match[++m].rm_so >= 0)
604 state.ataerrorcount = (int)val;
605 else if (match[m+=2].rm_so >= 0) {
606 int i = atoi(line+match[m].rm_so);
607 if (!(0 <= i && i < SMARTD_NMAIL))
608 return false;
609 if (i == MAILTYPE_TEST) // Don't suppress test mails
610 return true;
611 if (match[m+=2].rm_so >= 0)
612 state.maillog[i].logged = (int)val;
613 else if (match[++m].rm_so >= 0)
614 state.maillog[i].firstsent = (time_t)val;
615 else if (match[++m].rm_so >= 0)
616 state.maillog[i].lastsent = (time_t)val;
617 else
618 return false;
619 }
620 else if (match[m+=5+1].rm_so >= 0) {
621 int i = atoi(line+match[m].rm_so);
622 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
623 return false;
624 if (match[m+=2].rm_so >= 0)
625 state.ata_attributes[i].id = (unsigned char)val;
626 else if (match[++m].rm_so >= 0)
627 state.ata_attributes[i].val = (unsigned char)val;
628 else if (match[++m].rm_so >= 0)
629 state.ata_attributes[i].worst = (unsigned char)val;
630 else if (match[++m].rm_so >= 0)
631 state.ata_attributes[i].raw = val;
632 else if (match[++m].rm_so >= 0)
633 state.ata_attributes[i].resvd = (unsigned char)val;
634 else
635 return false;
636 }
637 else if (match[m+7].rm_so >= 0)
638 state.nvme_err_log_entries = val;
639 else
640 return false;
641 return true;
642 }
643
644 // Read a state file.
645 static bool read_dev_state(const char * path, persistent_dev_state & state)
646 {
647 stdio_file f(path, "r");
648 if (!f) {
649 if (errno != ENOENT)
650 pout("Cannot read state file \"%s\"\n", path);
651 return false;
652 }
653 #ifdef __CYGWIN__
654 setmode(fileno(f), O_TEXT); // Allow files with \r\n
655 #endif
656
657 persistent_dev_state new_state;
658 int good = 0, bad = 0;
659 char line[256];
660 while (fgets(line, sizeof(line), f)) {
661 const char * s = line + strspn(line, " \t");
662 if (!*s || *s == '#')
663 continue;
664 if (!parse_dev_state_line(line, new_state))
665 bad++;
666 else
667 good++;
668 }
669
670 if (bad) {
671 if (!good) {
672 pout("%s: format error\n", path);
673 return false;
674 }
675 pout("%s: %d invalid line(s) ignored\n", path, bad);
676 }
677
678 // This sets the values missing in the file to 0.
679 state = new_state;
680 return true;
681 }
682
683 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
684 {
685 if (val)
686 fprintf(f, "%s = %" PRIu64 "\n", name, val);
687 }
688
689 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
690 {
691 if (val)
692 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
693 }
694
695 // Write a state file
696 static bool write_dev_state(const char * path, const persistent_dev_state & state)
697 {
698 // Rename old "file" to "file~"
699 std::string pathbak = path; pathbak += '~';
700 unlink(pathbak.c_str());
701 rename(path, pathbak.c_str());
702
703 stdio_file f(path, "w");
704 if (!f) {
705 pout("Cannot create state file \"%s\"\n", path);
706 return false;
707 }
708
709 fprintf(f, "# smartd state file\n");
710 write_dev_state_line(f, "temperature-min", state.tempmin);
711 write_dev_state_line(f, "temperature-max", state.tempmax);
712 write_dev_state_line(f, "self-test-errors", state.selflogcount);
713 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
714 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
715 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
716 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
717
718 int i;
719 for (i = 0; i < SMARTD_NMAIL; i++) {
720 if (i == MAILTYPE_TEST) // Don't suppress test mails
721 continue;
722 const mailinfo & mi = state.maillog[i];
723 if (!mi.logged)
724 continue;
725 write_dev_state_line(f, "mail", i, "count", mi.logged);
726 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
727 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
728 }
729
730 // ATA ONLY
731 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
732
733 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
734 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
735 if (!pa.id)
736 continue;
737 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
738 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
739 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
740 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
741 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
742 }
743
744 // NVMe only
745 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
746
747 return true;
748 }
749
750 // Write to the attrlog file
751 static bool write_dev_attrlog(const char * path, const dev_state & state)
752 {
753 stdio_file f(path, "a");
754 if (!f) {
755 pout("Cannot create attribute log file \"%s\"\n", path);
756 return false;
757 }
758
759
760 time_t now = time(0);
761 struct tm * tms = gmtime(&now);
762 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
763 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
764 tms->tm_hour, tms->tm_min, tms->tm_sec);
765 // ATA ONLY
766 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
767 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
768 if (!pa.id)
769 continue;
770 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
771 }
772 // SCSI ONLY
773 const struct scsiErrorCounter * ecp;
774 const char * pageNames[3] = {"read", "write", "verify"};
775 for (int k = 0; k < 3; ++k) {
776 if ( !state.scsi_error_counters[k].found ) continue;
777 ecp = &state.scsi_error_counters[k].errCounter;
778 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
779 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
780 "\t%s-corr-by-retry;%" PRIu64 ";"
781 "\t%s-total-err-corrected;%" PRIu64 ";"
782 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
783 "\t%s-gb-processed;%.3f;"
784 "\t%s-total-unc-errors;%" PRIu64 ";",
785 pageNames[k], ecp->counter[0],
786 pageNames[k], ecp->counter[1],
787 pageNames[k], ecp->counter[2],
788 pageNames[k], ecp->counter[3],
789 pageNames[k], ecp->counter[4],
790 pageNames[k], (ecp->counter[5] / 1000000000.0),
791 pageNames[k], ecp->counter[6]);
792 }
793 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
794 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
795 }
796 // write SCSI current temperature if it is monitored
797 if(state.TempPageSupported && state.temperature)
798 fprintf(f, "\ttemperature;%d;", state.temperature);
799 // end of line
800 fprintf(f, "\n");
801 return true;
802 }
803
804 // Write all state files. If write_always is false, don't write
805 // unless must_write is set.
806 static void write_all_dev_states(const dev_config_vector & configs,
807 dev_state_vector & states,
808 bool write_always = true)
809 {
810 for (unsigned i = 0; i < states.size(); i++) {
811 const dev_config & cfg = configs.at(i);
812 if (cfg.state_file.empty())
813 continue;
814 dev_state & state = states[i];
815 if (!write_always && !state.must_write)
816 continue;
817 if (!write_dev_state(cfg.state_file.c_str(), state))
818 continue;
819 state.must_write = false;
820 if (write_always || debugmode)
821 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
822 cfg.name.c_str(), cfg.state_file.c_str());
823 }
824 }
825
826 // Write to all attrlog files
827 static void write_all_dev_attrlogs(const dev_config_vector & configs,
828 dev_state_vector & states)
829 {
830 for (unsigned i = 0; i < states.size(); i++) {
831 const dev_config & cfg = configs.at(i);
832 if (cfg.attrlog_file.empty())
833 continue;
834 dev_state & state = states[i];
835 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
836 }
837 }
838
839 // remove the PID file
840 static void RemovePidFile()
841 {
842 if (!pid_file.empty()) {
843 if (unlink(pid_file.c_str()))
844 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
845 pid_file.c_str(), strerror(errno));
846 pid_file.clear();
847 }
848 return;
849 }
850
851 extern "C" { // signal handlers require C-linkage
852
853 // Note if we catch a SIGUSR1
854 static void USR1handler(int sig)
855 {
856 if (SIGUSR1==sig)
857 caughtsigUSR1=1;
858 return;
859 }
860
861 #ifdef _WIN32
862 // Note if we catch a SIGUSR2
863 static void USR2handler(int sig)
864 {
865 if (SIGUSR2==sig)
866 caughtsigUSR2=1;
867 return;
868 }
869 #endif
870
871 // Note if we catch a HUP (or INT in debug mode)
872 static void HUPhandler(int sig)
873 {
874 if (sig==SIGHUP)
875 caughtsigHUP=1;
876 else
877 caughtsigHUP=2;
878 return;
879 }
880
881 // signal handler for TERM, QUIT, and INT (if not in debug mode)
882 static void sighandler(int sig)
883 {
884 if (!caughtsigEXIT)
885 caughtsigEXIT=sig;
886 return;
887 }
888
889 } // extern "C"
890
891 // Cleanup, print Goodbye message and remove pidfile
892 static int Goodbye(int status)
893 {
894 // delete PID file, if one was created
895 RemovePidFile();
896
897 // and this should be the final output from smartd before it exits
898 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
899
900 return status;
901 }
902
903 // a replacement for setenv() which is not available on all platforms.
904 // Note that the string passed to putenv must not be freed or made
905 // invalid, since a pointer to it is kept by putenv(). This means that
906 // it must either be a static buffer or allocated off the heap. The
907 // string can be freed if the environment variable is redefined via
908 // another call to putenv(). There is no portable way to unset a variable
909 // with putenv(). So we manage the buffer in a static object.
910 // Using setenv() if available is not considered because some
911 // implementations may produce memory leaks.
912
913 class env_buffer
914 {
915 public:
916 env_buffer()
917 : m_buf((char *)0) { }
918
919 void set(const char * name, const char * value);
920
921 private:
922 char * m_buf;
923
924 env_buffer(const env_buffer &);
925 void operator=(const env_buffer &);
926 };
927
928 void env_buffer::set(const char * name, const char * value)
929 {
930 int size = strlen(name) + 1 + strlen(value) + 1;
931 char * newbuf = new char[size];
932 snprintf(newbuf, size, "%s=%s", name, value);
933
934 if (putenv(newbuf))
935 throw std::runtime_error("putenv() failed");
936
937 // This assumes that the same NAME is passed on each call
938 delete [] m_buf;
939 m_buf = newbuf;
940 }
941
942 #define EBUFLEN 1024
943
944 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
945 __attribute_format_printf(4, 5);
946
947 // If either address or executable path is non-null then send and log
948 // a warning email, or execute executable
949 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
950 {
951 static const char * const whichfail[] = {
952 "EmailTest", // 0
953 "Health", // 1
954 "Usage", // 2
955 "SelfTest", // 3
956 "ErrorCount", // 4
957 "FailedHealthCheck", // 5
958 "FailedReadSmartData", // 6
959 "FailedReadSmartErrorLog", // 7
960 "FailedReadSmartSelfTestLog", // 8
961 "FailedOpenDevice", // 9
962 "CurrentPendingSector", // 10
963 "OfflineUncorrectableSector", // 11
964 "Temperature" // 12
965 };
966
967 // See if user wants us to send mail
968 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
969 return;
970
971 std::string address = cfg.emailaddress;
972 const char * executable = cfg.emailcmdline.c_str();
973
974 // which type of mail are we sending?
975 mailinfo * mail=(state.maillog)+which;
976
977 // checks for sanity
978 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
979 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
980 return;
981 }
982 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
983 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
984 which, (int)sizeof(whichfail));
985 return;
986 }
987
988 // Return if a single warning mail has been sent.
989 if ((cfg.emailfreq==1) && mail->logged)
990 return;
991
992 // Return if this is an email test and one has already been sent.
993 if (which == 0 && mail->logged)
994 return;
995
996 // To decide if to send mail, we need to know what time it is.
997 time_t epoch = time(0);
998
999 // Return if less than one day has gone by
1000 const int day = 24*3600;
1001 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1002 return;
1003
1004 // Return if less than 2^(logged-1) days have gone by
1005 if (cfg.emailfreq==3 && mail->logged) {
1006 int days = 0x01 << (mail->logged - 1);
1007 days*=day;
1008 if (epoch<(mail->lastsent+days))
1009 return;
1010 }
1011
1012 #ifdef HAVE_LIBCAP_NG
1013 if (enable_capabilities) {
1014 PrintOut(LOG_ERR, "Sending a mail was supressed. "
1015 "Mails can't be send when capabilites are enabled\n");
1016 return;
1017 }
1018 #endif
1019
1020 // record the time of this mail message, and the first mail message
1021 if (!mail->logged)
1022 mail->firstsent=epoch;
1023 mail->lastsent=epoch;
1024
1025 // print warning string into message
1026 char message[256];
1027 va_list ap;
1028 va_start(ap, fmt);
1029 vsnprintf(message, sizeof(message), fmt, ap);
1030 va_end(ap);
1031
1032 // replace commas by spaces to separate recipients
1033 std::replace(address.begin(), address.end(), ',', ' ');
1034
1035 // Export information in environment variables that will be useful
1036 // for user scripts
1037 static env_buffer env[12];
1038 env[0].set("SMARTD_MAILER", executable);
1039 env[1].set("SMARTD_MESSAGE", message);
1040 char dates[DATEANDEPOCHLEN];
1041 snprintf(dates, sizeof(dates), "%d", mail->logged);
1042 env[2].set("SMARTD_PREVCNT", dates);
1043 dateandtimezoneepoch(dates, mail->firstsent);
1044 env[3].set("SMARTD_TFIRST", dates);
1045 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1046 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1047 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1048 env[6].set("SMARTD_ADDRESS", address.c_str());
1049 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1050
1051 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1052 env[8].set("SMARTD_DEVICETYPE",
1053 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1054 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1055
1056 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1057 dates[0] = 0;
1058 if (which) switch (cfg.emailfreq) {
1059 case 2: dates[0] = '1'; dates[1] = 0; break;
1060 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1061 }
1062 env[11].set("SMARTD_NEXTDAYS", dates);
1063
1064 // now construct a command to send this as EMAIL
1065 if (!*executable)
1066 executable = "<mail>";
1067 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1068 const char * newwarn = (which? "Warning via" : "Test of");
1069
1070 #ifndef _WIN32
1071 char command[2048];
1072 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1073
1074 // tell SYSLOG what we are about to do...
1075 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1076 which?"Sending warning via":"Executing test of", executable, newadd);
1077
1078 // issue the command to send mail or to run the user's executable
1079 errno=0;
1080 FILE * pfp;
1081 if (!(pfp=popen(command, "r")))
1082 // failed to popen() mail process
1083 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1084 newwarn, executable, newadd, errno?strerror(errno):"");
1085 else {
1086 // pipe suceeded!
1087 int len, status;
1088 char buffer[EBUFLEN];
1089
1090 // if unexpected output on stdout/stderr, null terminate, print, and flush
1091 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1092 int count=0;
1093 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1094 buffer[newlen]='\0';
1095 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1096 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1097
1098 // flush pipe if needed
1099 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1100 count++;
1101
1102 // tell user that pipe was flushed, or that something is really wrong
1103 if (count && count<EBUFLEN)
1104 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1105 newwarn, executable, newadd);
1106 else if (count)
1107 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1108 newwarn, executable, newadd);
1109 }
1110
1111 // if something went wrong with mail process, print warning
1112 errno=0;
1113 if (-1==(status=pclose(pfp)))
1114 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1115 errno?strerror(errno):"");
1116 else {
1117 // mail process apparently succeeded. Check and report exit status
1118 if (WIFEXITED(status)) {
1119 // exited 'normally' (but perhaps with nonzero status)
1120 int status8 = WEXITSTATUS(status);
1121 if (status8>128)
1122 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1123 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1124 else if (status8)
1125 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1126 newwarn, executable, newadd, status, status8);
1127 else
1128 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1129 }
1130
1131 if (WIFSIGNALED(status))
1132 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1133 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1134
1135 // this branch is probably not possible. If subprocess is
1136 // stopped then pclose() should not return.
1137 if (WIFSTOPPED(status))
1138 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1139 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1140
1141 }
1142 }
1143
1144 #else // _WIN32
1145 {
1146 char command[2048];
1147 snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1148
1149 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1150 int rc;
1151 // run command
1152 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1153 (which?"Sending warning via":"Executing test of"), executable, newadd);
1154 rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1155 if (rc >= 0 && stdoutbuf[0])
1156 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1157 newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1158 if (rc != 0)
1159 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1160 newwarn, executable, newadd, rc);
1161 else
1162 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1163 }
1164
1165 #endif // _WIN32
1166
1167 // increment mail sent counter
1168 mail->logged++;
1169 }
1170
1171 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1172 __attribute_format_printf(4, 5);
1173
1174 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1175 {
1176 if (!(0 <= which && which < SMARTD_NMAIL))
1177 return;
1178
1179 // Return if no mail sent yet
1180 mailinfo & mi = state.maillog[which];
1181 if (!mi.logged)
1182 return;
1183
1184 // Format & print message
1185 char msg[256];
1186 va_list ap;
1187 va_start(ap, fmt);
1188 vsnprintf(msg, sizeof(msg), fmt, ap);
1189 va_end(ap);
1190
1191 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1192 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1193
1194 // Clear mail counter and timestamps
1195 mi = mailinfo();
1196 state.must_write = true;
1197 }
1198
1199 #ifndef _WIN32
1200
1201 // Output multiple lines via separate syslog(3) calls.
1202 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1203 {
1204 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1205 vsnprintf(buf, sizeof(buf), fmt, ap);
1206
1207 for (char * p = buf, * q; p && *p; p = q) {
1208 if ((q = strchr(p, '\n')))
1209 *q++ = 0;
1210 if (*p)
1211 syslog(priority, "%s\n", p);
1212 }
1213 }
1214
1215 #else // _WIN32
1216 // os_win32/syslog_win32.cpp supports multiple lines.
1217 #define vsyslog_lines vsyslog
1218 #endif // _WIN32
1219
1220 // Printing function for watching ataprint commands, or losing them
1221 // [From GLIBC Manual: Since the prototype doesn't specify types for
1222 // optional arguments, in a call to a variadic function the default
1223 // argument promotions are performed on the optional argument
1224 // values. This means the objects of type char or short int (whether
1225 // signed or not) are promoted to either int or unsigned int, as
1226 // appropriate.]
1227 void pout(const char *fmt, ...){
1228 va_list ap;
1229
1230 // get the correct time in syslog()
1231 FixGlibcTimeZoneBug();
1232 // initialize variable argument list
1233 va_start(ap,fmt);
1234 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1235 if (debugmode && debugmode != 2) {
1236 FILE * f = stdout;
1237 #ifdef _WIN32
1238 if (facility == LOG_LOCAL1) // logging to stdout
1239 f = stderr;
1240 #endif
1241 vfprintf(f, fmt, ap);
1242 fflush(f);
1243 }
1244 // in debugmode==2 mode we print output from knowndrives.o functions
1245 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1246 openlog("smartd", LOG_PID, facility);
1247 vsyslog_lines(LOG_INFO, fmt, ap);
1248 closelog();
1249 }
1250 va_end(ap);
1251 return;
1252 }
1253
1254 // This function prints either to stdout or to the syslog as needed.
1255 static void PrintOut(int priority, const char *fmt, ...){
1256 va_list ap;
1257
1258 // get the correct time in syslog()
1259 FixGlibcTimeZoneBug();
1260 // initialize variable argument list
1261 va_start(ap,fmt);
1262 if (debugmode) {
1263 FILE * f = stdout;
1264 #ifdef _WIN32
1265 if (facility == LOG_LOCAL1) // logging to stdout
1266 f = stderr;
1267 #endif
1268 vfprintf(f, fmt, ap);
1269 fflush(f);
1270 }
1271 else {
1272 openlog("smartd", LOG_PID, facility);
1273 vsyslog_lines(priority, fmt, ap);
1274 closelog();
1275 }
1276 va_end(ap);
1277 return;
1278 }
1279
1280 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1281 void checksumwarning(const char * string)
1282 {
1283 pout("Warning! %s error: invalid SMART checksum.\n", string);
1284 }
1285
1286 #ifndef _WIN32
1287
1288 // Wait for the pid file to show up, this makes sure a calling program knows
1289 // that the daemon is really up and running and has a pid to kill it
1290 static bool WaitForPidFile()
1291 {
1292 int waited, max_wait = 10;
1293 struct stat stat_buf;
1294
1295 if (pid_file.empty() || debugmode)
1296 return true;
1297
1298 for(waited = 0; waited < max_wait; ++waited) {
1299 if (!stat(pid_file.c_str(), &stat_buf)) {
1300 return true;
1301 } else
1302 sleep(1);
1303 }
1304 return false;
1305 }
1306
1307 #endif // _WIN32
1308
1309 // Forks new process, closes ALL file descriptors, redirects stdin,
1310 // stdout, and stderr. Not quite daemon(). See
1311 // http://www.linuxjournal.com/article/2335
1312 // for a good description of why we do things this way.
1313 static void DaemonInit()
1314 {
1315 #ifndef _WIN32
1316 pid_t pid;
1317 int i;
1318
1319 // flush all buffered streams. Else we might get two copies of open
1320 // streams since both parent and child get copies of the buffers.
1321 fflush(NULL);
1322
1323 if (do_fork) {
1324 if ((pid=fork()) < 0) {
1325 // unable to fork!
1326 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1327 EXIT(EXIT_STARTUP);
1328 }
1329 else if (pid) {
1330 // we are the parent process, wait for pid file, then exit cleanly
1331 if(!WaitForPidFile()) {
1332 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1333 EXIT(EXIT_STARTUP);
1334 } else
1335 EXIT(0);
1336 }
1337
1338 // from here on, we are the child process.
1339 setsid();
1340
1341 // Fork one more time to avoid any possibility of having terminals
1342 if ((pid=fork()) < 0) {
1343 // unable to fork!
1344 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1345 EXIT(EXIT_STARTUP);
1346 }
1347 else if (pid)
1348 // we are the parent process -- exit cleanly
1349 EXIT(0);
1350
1351 // Now we are the child's child...
1352 }
1353
1354 // close any open file descriptors
1355 for (i=getdtablesize();i>=0;--i)
1356 close(i);
1357
1358 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1359
1360 // redirect any IO attempts to /dev/null for stdin
1361 i=open("/dev/null",O_RDWR);
1362 if (i>=0) {
1363 // stdout
1364 NO_warn_unused_result(dup(i));
1365 // stderr
1366 NO_warn_unused_result(dup(i));
1367 };
1368 umask(0022);
1369 NO_warn_unused_result(chdir("/"));
1370
1371 if (do_fork)
1372 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1373
1374 #else // _WIN32
1375
1376 // No fork() on native Win32
1377 // Detach this process from console
1378 fflush(NULL);
1379 if (daemon_detach("smartd")) {
1380 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1381 EXIT(EXIT_STARTUP);
1382 }
1383 // stdin/out/err now closed if not redirected
1384
1385 #endif // _WIN32
1386 return;
1387 }
1388
1389 // create a PID file containing the current process id
1390 static void WritePidFile()
1391 {
1392 if (!pid_file.empty()) {
1393 pid_t pid = getpid();
1394 mode_t old_umask;
1395 #ifndef __CYGWIN__
1396 old_umask = umask(0077); // rwx------
1397 #else
1398 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1399 old_umask = umask(0033); // rwxr--r--
1400 #endif
1401
1402 stdio_file f(pid_file.c_str(), "w");
1403 umask(old_umask);
1404 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1405 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1406 EXIT(EXIT_PID);
1407 }
1408 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1409 }
1410 }
1411
1412 // Prints header identifying version of code and home
1413 static void PrintHead()
1414 {
1415 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1416 }
1417
1418 // prints help info for configuration file Directives
1419 static void Directives()
1420 {
1421 PrintOut(LOG_INFO,
1422 "Configuration file (%s) Directives (after device name):\n"
1423 " -d TYPE Set the device type: auto, ignore, removable,\n"
1424 " %s\n"
1425 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1426 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1427 " -S VAL Enable/disable attribute autosave (on/off)\n"
1428 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1429 " -H Monitor SMART Health Status, report if failed\n"
1430 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1431 " -l TYPE Monitor SMART log or self-test status:\n"
1432 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1433 " -l scterc,R,W Set SCT Error Recovery Control\n"
1434 " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1435 " security-freeze, standby,[N|off], wcache,[on|off]\n"
1436 " -f Monitor 'Usage' Attributes, report failures\n"
1437 " -m ADD Send email warning to address ADD\n"
1438 " -M TYPE Modify email warning behavior (see man page)\n"
1439 " -p Report changes in 'Prefailure' Attributes\n"
1440 " -u Report changes in 'Usage' Attributes\n"
1441 " -t Equivalent to -p and -u Directives\n"
1442 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1443 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1444 " -i ID Ignore Attribute ID for -f Directive\n"
1445 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1446 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1447 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1448 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1449 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1450 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1451 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1452 " -F TYPE Use firmware bug workaround:\n"
1453 " %s\n"
1454 " # Comment: text after a hash sign is ignored\n"
1455 " \\ Line continuation character\n"
1456 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1457 "Use ID = 0 to turn off -C and/or -U Directives\n"
1458 "Example: /dev/sda -a\n",
1459 configfile,
1460 smi()->get_valid_dev_types_str().c_str(),
1461 get_valid_firmwarebug_args());
1462 }
1463
1464 /* Returns a pointer to a static string containing a formatted list of the valid
1465 arguments to the option opt or NULL on failure. */
1466 static const char *GetValidArgList(char opt)
1467 {
1468 switch (opt) {
1469 case 'A':
1470 case 's':
1471 return "<PATH_PREFIX>";
1472 case 'c':
1473 return "<FILE_NAME>, -";
1474 case 'l':
1475 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1476 case 'q':
1477 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1478 case 'r':
1479 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1480 case 'B':
1481 case 'p':
1482 case 'w':
1483 return "<FILE_NAME>";
1484 case 'i':
1485 return "<INTEGER_SECONDS>";
1486 default:
1487 return NULL;
1488 }
1489 }
1490
1491 /* prints help information for command syntax */
1492 static void Usage()
1493 {
1494 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1495 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1496 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1497 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1498 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1499 #endif
1500 PrintOut(LOG_INFO,"\n");
1501 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1502 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1503 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1504 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1505 PrintOut(LOG_INFO,"\n");
1506 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1507 #endif
1508 PrintOut(LOG_INFO,"]\n\n");
1509 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1510 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1511 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1512 #ifdef HAVE_LIBCAP_NG
1513 PrintOut(LOG_INFO," -C, --capabilities\n");
1514 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1515 " Warning: Mail notification does not work when used.\n\n");
1516 #endif
1517 PrintOut(LOG_INFO," -d, --debug\n");
1518 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1519 PrintOut(LOG_INFO," -D, --showdirectives\n");
1520 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1521 PrintOut(LOG_INFO," -h, --help, --usage\n");
1522 PrintOut(LOG_INFO," Display this help and exit\n\n");
1523 PrintOut(LOG_INFO," -i N, --interval=N\n");
1524 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1525 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1526 #ifndef _WIN32
1527 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1528 #else
1529 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1530 #endif
1531 #ifndef _WIN32
1532 PrintOut(LOG_INFO," -n, --no-fork\n");
1533 PrintOut(LOG_INFO," Do not fork into background\n\n");
1534 #endif // _WIN32
1535 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1536 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1537 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1538 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1539 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1540 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1541 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1542 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1543 #ifdef SMARTMONTOOLS_SAVESTATES
1544 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1545 #endif
1546 PrintOut(LOG_INFO,"\n");
1547 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1548 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1549 #ifndef _WIN32
1550 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1551 #else
1552 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1553 #endif
1554 #ifdef _WIN32
1555 PrintOut(LOG_INFO," --service\n");
1556 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1557 PrintOut(LOG_INFO," smartd install [options]\n");
1558 PrintOut(LOG_INFO," Remove service with:\n");
1559 PrintOut(LOG_INFO," smartd remove\n\n");
1560 #endif // _WIN32
1561 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1562 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1563 }
1564
1565 static int CloseDevice(smart_device * device, const char * name)
1566 {
1567 if (!device->close()){
1568 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1569 return 1;
1570 }
1571 // device sucessfully closed
1572 return 0;
1573 }
1574
1575 // return true if a char is not allowed in a state file name
1576 static bool not_allowed_in_filename(char c)
1577 {
1578 return !( ('0' <= c && c <= '9')
1579 || ('A' <= c && c <= 'Z')
1580 || ('a' <= c && c <= 'z'));
1581 }
1582
1583 // Read error count from Summary or Extended Comprehensive SMART error log
1584 // Return -1 on error
1585 static int read_ata_error_count(ata_device * device, const char * name,
1586 firmwarebug_defs firmwarebugs, bool extended)
1587 {
1588 if (!extended) {
1589 ata_smart_errorlog log;
1590 if (ataReadErrorLog(device, &log, firmwarebugs)){
1591 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1592 return -1;
1593 }
1594 return (log.error_log_pointer ? log.ata_error_count : 0);
1595 }
1596 else {
1597 ata_smart_exterrlog logx;
1598 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1599 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1600 return -1;
1601 }
1602 // Some disks use the reserved byte as index, see ataprint.cpp.
1603 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1604 }
1605 }
1606
1607 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1608 // error count, and top bits are the power-on hours of the last error.
1609 static int SelfTestErrorCount(ata_device * device, const char * name,
1610 firmwarebug_defs firmwarebugs)
1611 {
1612 struct ata_smart_selftestlog log;
1613
1614 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1615 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1616 return -1;
1617 }
1618
1619 // return current number of self-test errors
1620 return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1621 }
1622
1623 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1624 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1625
1626 // Check offline data collection status
1627 static inline bool is_offl_coll_in_progress(unsigned char status)
1628 {
1629 return ((status & 0x7f) == 0x03);
1630 }
1631
1632 // Check self-test execution status
1633 static inline bool is_self_test_in_progress(unsigned char status)
1634 {
1635 return ((status >> 4) == 0xf);
1636 }
1637
1638 // Log offline data collection status
1639 static void log_offline_data_coll_status(const char * name, unsigned char status)
1640 {
1641 const char * msg;
1642 switch (status & 0x7f) {
1643 case 0x00: msg = "was never started"; break;
1644 case 0x02: msg = "was completed without error"; break;
1645 case 0x03: msg = "is in progress"; break;
1646 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1647 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1648 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1649 default: msg = 0;
1650 }
1651
1652 if (msg)
1653 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1654 "Device: %s, offline data collection %s%s\n", name, msg,
1655 ((status & 0x80) ? " (auto:on)" : ""));
1656 else
1657 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1658 name, status);
1659 }
1660
1661 // Log self-test execution status
1662 static void log_self_test_exec_status(const char * name, unsigned char status)
1663 {
1664 const char * msg;
1665 switch (status >> 4) {
1666 case 0x0: msg = "completed without error"; break;
1667 case 0x1: msg = "was aborted by the host"; break;
1668 case 0x2: msg = "was interrupted by the host with a reset"; break;
1669 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1670 case 0x4: msg = "completed with error (unknown test element)"; break;
1671 case 0x5: msg = "completed with error (electrical test element)"; break;
1672 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1673 case 0x7: msg = "completed with error (read test element)"; break;
1674 case 0x8: msg = "completed with error (handling damage?)"; break;
1675 default: msg = 0;
1676 }
1677
1678 if (msg)
1679 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1680 "Device: %s, previous self-test %s\n", name, msg);
1681 else if ((status >> 4) == 0xf)
1682 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1683 name, status & 0x0f);
1684 else
1685 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1686 name, status);
1687 }
1688
1689 // Check pending sector count id (-C, -U directives).
1690 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1691 unsigned char id, const char * msg)
1692 {
1693 // Check attribute index
1694 int i = ata_find_attr_index(id, state.smartval);
1695 if (i < 0) {
1696 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1697 cfg.name.c_str(), msg, id);
1698 return false;
1699 }
1700
1701 // Check value
1702 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1703 cfg.attribute_defs);
1704 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1705 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1706 cfg.name.c_str(), msg, id, rawval, rawval);
1707 return false;
1708 }
1709
1710 return true;
1711 }
1712
1713 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1714 static void finish_device_scan(dev_config & cfg, dev_state & state)
1715 {
1716 // Set cfg.emailfreq if user hasn't set it
1717 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1718 // Avoid that emails are suppressed forever due to state persistence
1719 if (cfg.state_file.empty())
1720 cfg.emailfreq = 1; // '-M once'
1721 else
1722 cfg.emailfreq = 2; // '-M daily'
1723 }
1724
1725 // Start self-test regex check now if time was not read from state file
1726 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1727 state.scheduled_test_next_check = time(0);
1728 }
1729
1730 // Common function to format result message for ATA setting
1731 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1732 int set_option = 0, bool has_value = false)
1733 {
1734 if (!msg.empty())
1735 msg += ", ";
1736 msg += name;
1737 if (!ok)
1738 msg += ":--";
1739 else if (set_option < 0)
1740 msg += ":off";
1741 else if (has_value)
1742 msg += strprintf(":%d", set_option-1);
1743 else if (set_option > 0)
1744 msg += ":on";
1745 }
1746
1747
1748 // TODO: Add '-F swapid' directive
1749 const bool fix_swapped_id = false;
1750
1751 // scan to see what ata devices there are, and if they support SMART
1752 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1753 {
1754 int supported=0;
1755 struct ata_identify_device drive;
1756 const char *name = cfg.name.c_str();
1757 int retid;
1758
1759 // Device must be open
1760
1761 // Get drive identity structure
1762 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1763 if (retid<0)
1764 // Unable to read Identity structure
1765 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1766 else
1767 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1768 name, packetdevicetype(retid-1));
1769 CloseDevice(atadev, name);
1770 return 2;
1771 }
1772
1773 // Get drive identity, size and rotation rate (HDD/SSD)
1774 char model[40+1], serial[20+1], firmware[8+1];
1775 ata_format_id_string(model, drive.model, sizeof(model)-1);
1776 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1777 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1778
1779 ata_size_info sizes;
1780 ata_get_size_info(&drive, sizes);
1781 state.num_sectors = sizes.sectors;
1782 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1783
1784 char wwn[30]; wwn[0] = 0;
1785 unsigned oui = 0; uint64_t unique_id = 0;
1786 int naa = ata_get_wwn(&drive, oui, unique_id);
1787 if (naa >= 0)
1788 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1789
1790 // Format device id string for warning emails
1791 char cap[32];
1792 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1793 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1794
1795 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1796
1797 // Show if device in database, and use preset vendor attribute
1798 // options unless user has requested otherwise.
1799 if (cfg.ignorepresets)
1800 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1801 else {
1802 // Apply vendor specific presets, print warning if present
1803 const drive_settings * dbentry = lookup_drive_apply_presets(
1804 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1805 if (!dbentry)
1806 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1807 else {
1808 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1809 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1810 if (*dbentry->warningmsg)
1811 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1812 }
1813 }
1814
1815 // Check for ATA Security LOCK
1816 unsigned short word128 = drive.words088_255[128-88];
1817 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1818 if (locked)
1819 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1820
1821 // Set default '-C 197[+]' if no '-C ID' is specified.
1822 if (!cfg.curr_pending_set)
1823 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1824 // Set default '-U 198[+]' if no '-U ID' is specified.
1825 if (!cfg.offl_pending_set)
1826 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1827
1828 // If requested, show which presets would be used for this drive
1829 if (cfg.showpresets) {
1830 int savedebugmode=debugmode;
1831 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1832 if (!debugmode)
1833 debugmode=2;
1834 show_presets(&drive);
1835 debugmode=savedebugmode;
1836 }
1837
1838 // see if drive supports SMART
1839 supported=ataSmartSupport(&drive);
1840 if (supported!=1) {
1841 if (supported==0)
1842 // drive does NOT support SMART
1843 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1844 else
1845 // can't tell if drive supports SMART
1846 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1847
1848 // should we proceed anyway?
1849 if (cfg.permissive) {
1850 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1851 }
1852 else {
1853 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1854 CloseDevice(atadev, name);
1855 return 2;
1856 }
1857 }
1858
1859 if (ataEnableSmart(atadev)) {
1860 // Enable SMART command has failed
1861 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1862
1863 if (ataIsSmartEnabled(&drive) <= 0) {
1864 CloseDevice(atadev, name);
1865 return 2;
1866 }
1867 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
1868 }
1869
1870 // disable device attribute autosave...
1871 if (cfg.autosave==1) {
1872 if (ataDisableAutoSave(atadev))
1873 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1874 else
1875 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1876 }
1877
1878 // or enable device attribute autosave
1879 if (cfg.autosave==2) {
1880 if (ataEnableAutoSave(atadev))
1881 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1882 else
1883 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1884 }
1885
1886 // capability check: SMART status
1887 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1888 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1889 cfg.smartcheck = false;
1890 }
1891
1892 // capability check: Read smart values and thresholds. Note that
1893 // smart values are ALSO needed even if we ONLY want to know if the
1894 // device is self-test log or error-log capable! After ATA-5, this
1895 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1896 // but sadly not for ATA-5. Sigh.
1897
1898 // do we need to get SMART data?
1899 bool smart_val_ok = false;
1900 if ( cfg.autoofflinetest || cfg.selftest
1901 || cfg.errorlog || cfg.xerrorlog
1902 || cfg.offlinests || cfg.selfteststs
1903 || cfg.usagefailed || cfg.prefail || cfg.usage
1904 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1905 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1906
1907 if (ataReadSmartValues(atadev, &state.smartval)) {
1908 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1909 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1910 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1911 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1912 }
1913 else {
1914 smart_val_ok = true;
1915 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1916 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1917 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1918 cfg.usagefailed = false;
1919 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1920 memset(&state.smartthres, 0, sizeof(state.smartthres));
1921 }
1922 }
1923
1924 // see if the necessary Attribute is there to monitor offline or
1925 // current pending sectors or temperature
1926 if ( cfg.curr_pending_id
1927 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1928 "Current_Pending_Sector"))
1929 cfg.curr_pending_id = 0;
1930
1931 if ( cfg.offl_pending_id
1932 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1933 "Offline_Uncorrectable"))
1934 cfg.offl_pending_id = 0;
1935
1936 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1937 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1938 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1939 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1940 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1941 }
1942
1943 // Report ignored '-r' or '-R' directives
1944 for (int id = 1; id <= 255; id++) {
1945 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1946 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1947 const char * excl = (cfg.monitor_attr_flags.is_set(id,
1948 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1949
1950 int idx = ata_find_attr_index(id, state.smartval);
1951 if (idx < 0)
1952 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1953 else {
1954 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1955 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1956 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1957 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1958 }
1959 }
1960 }
1961 }
1962
1963 // enable/disable automatic on-line testing
1964 if (cfg.autoofflinetest) {
1965 // is this an enable or disable request?
1966 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1967 if (!smart_val_ok)
1968 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1969 else {
1970 // if command appears unsupported, issue a warning...
1971 if (!isSupportAutomaticTimer(&state.smartval))
1972 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1973 // ... but then try anyway
1974 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1975 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1976 else
1977 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1978 }
1979 }
1980
1981 // Read log directories if required for capability check
1982 ata_smart_log_directory smart_logdir, gp_logdir;
1983 bool smart_logdir_ok = false, gp_logdir_ok = false;
1984
1985 if ( isGeneralPurposeLoggingCapable(&drive)
1986 && (cfg.errorlog || cfg.selftest)
1987 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1988 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1989 smart_logdir_ok = true;
1990 }
1991
1992 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1993 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1994 gp_logdir_ok = true;
1995 }
1996
1997 // capability check: self-test-log
1998 state.selflogcount = 0; state.selfloghour = 0;
1999 if (cfg.selftest) {
2000 int retval;
2001 if (!( cfg.permissive
2002 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2003 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2004 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2005 cfg.selftest = false;
2006 }
2007 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2008 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2009 cfg.selftest = false;
2010 }
2011 else {
2012 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2013 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2014 }
2015 }
2016
2017 // capability check: ATA error log
2018 state.ataerrorcount = 0;
2019 if (cfg.errorlog) {
2020 int errcnt1;
2021 if (!( cfg.permissive
2022 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2023 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2024 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2025 cfg.errorlog = false;
2026 }
2027 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2028 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2029 cfg.errorlog = false;
2030 }
2031 else
2032 state.ataerrorcount = errcnt1;
2033 }
2034
2035 if (cfg.xerrorlog) {
2036 int errcnt2;
2037 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2038 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2039 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2040 name);
2041 cfg.xerrorlog = false;
2042 }
2043 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2044 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2045 cfg.xerrorlog = false;
2046 }
2047 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2048 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2049 name, state.ataerrorcount, errcnt2);
2050 // Record max error count
2051 if (errcnt2 > state.ataerrorcount)
2052 state.ataerrorcount = errcnt2;
2053 }
2054 else
2055 state.ataerrorcount = errcnt2;
2056 }
2057
2058 // capability check: self-test and offline data collection status
2059 if (cfg.offlinests || cfg.selfteststs) {
2060 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2061 if (cfg.offlinests)
2062 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2063 if (cfg.selfteststs)
2064 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2065 cfg.offlinests = cfg.selfteststs = false;
2066 }
2067 }
2068
2069 // capabilities check -- does it support powermode?
2070 if (cfg.powermode) {
2071 int powermode = ataCheckPowerMode(atadev);
2072
2073 if (-1 == powermode) {
2074 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2075 cfg.powermode=0;
2076 }
2077 else if (powermode!=0x00 && powermode!=0x01
2078 && powermode!=0x40 && powermode!=0x41
2079 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2080 && powermode!=0xff) {
2081 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2082 name, powermode);
2083 cfg.powermode=0;
2084 }
2085 }
2086
2087 // Apply ATA settings
2088 std::string msg;
2089
2090 if (cfg.set_aam)
2091 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2092 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2093 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2094
2095 if (cfg.set_apm)
2096 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2097 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2098 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2099
2100 if (cfg.set_lookahead)
2101 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2102 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2103 cfg.set_lookahead);
2104
2105 if (cfg.set_wcache)
2106 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2107 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2108
2109 if (cfg.set_security_freeze)
2110 format_set_result_msg(msg, "Security freeze",
2111 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2112
2113 if (cfg.set_standby)
2114 format_set_result_msg(msg, "Standby",
2115 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2116
2117 // Report as one log entry
2118 if (!msg.empty())
2119 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2120
2121 // set SCT Error Recovery Control if requested
2122 if (cfg.sct_erc_set) {
2123 if (!isSCTErrorRecoveryControlCapable(&drive))
2124 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2125 name);
2126 else if (locked)
2127 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2128 name);
2129 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2130 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2131 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2132 else
2133 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2134 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2135 }
2136
2137 // If no tests available or selected, return
2138 if (!( cfg.smartcheck || cfg.selftest
2139 || cfg.errorlog || cfg.xerrorlog
2140 || cfg.offlinests || cfg.selfteststs
2141 || cfg.usagefailed || cfg.prefail || cfg.usage
2142 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2143 CloseDevice(atadev, name);
2144 return 3;
2145 }
2146
2147 // tell user we are registering device
2148 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2149
2150 // close file descriptor
2151 CloseDevice(atadev, name);
2152
2153 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2154 // Build file name for state file
2155 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2156 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2157 if (!state_path_prefix.empty()) {
2158 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2159 // Read previous state
2160 if (read_dev_state(cfg.state_file.c_str(), state)) {
2161 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2162 // Copy ATA attribute values to temp state
2163 state.update_temp_state();
2164 }
2165 }
2166 if (!attrlog_path_prefix.empty())
2167 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2168 }
2169
2170 finish_device_scan(cfg, state);
2171
2172 return 0;
2173 }
2174
2175 // on success, return 0. On failure, return >0. Never return <0,
2176 // please.
2177 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2178 {
2179 int err, req_len, avail_len, version, len;
2180 const char *device = cfg.name.c_str();
2181 struct scsi_iec_mode_page iec;
2182 UINT8 tBuf[64];
2183 UINT8 inqBuf[96];
2184 UINT8 vpdBuf[252];
2185 char lu_id[64], serial[256], vendor[40], model[40];
2186
2187 // Device must be open
2188 memset(inqBuf, 0, 96);
2189 req_len = 36;
2190 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2191 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2192 req_len = 64;
2193 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2194 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2195 "skip device\n", device);
2196 return 2;
2197 }
2198 }
2199 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2200
2201 avail_len = inqBuf[4] + 5;
2202 len = (avail_len < req_len) ? avail_len : req_len;
2203 if (len < 36) {
2204 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2205 "skip device\n", device);
2206 return 2;
2207 }
2208
2209 int pdt = inqBuf[0] & 0x1f;
2210
2211 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2212 (0xe == pdt))) {
2213 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2214 "skip\n", device, pdt);
2215 return 2;
2216 }
2217
2218 if (supported_vpd_pages_p) {
2219 delete supported_vpd_pages_p;
2220 supported_vpd_pages_p = NULL;
2221 }
2222 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2223
2224 lu_id[0] = '\0';
2225 if ((version >= 0x3) && (version < 0x8)) {
2226 /* SPC to SPC-5 */
2227 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2228 vpdBuf, sizeof(vpdBuf))) {
2229 len = vpdBuf[3];
2230 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2231 }
2232 }
2233 serial[0] = '\0';
2234 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2235 vpdBuf, sizeof(vpdBuf))) {
2236 len = vpdBuf[3];
2237 vpdBuf[4 + len] = '\0';
2238 scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2239 }
2240
2241 unsigned int lb_size;
2242 char si_str[64];
2243 uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
2244
2245 if (capacity)
2246 format_capacity(si_str, sizeof(si_str), capacity, ".");
2247 else
2248 si_str[0] = '\0';
2249
2250 // Format device id string for warning emails
2251 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2252 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2253 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2254 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2255 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2256
2257 // format "model" string
2258 scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2259 scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2260 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2261
2262 // check that device is ready for commands. IE stores its stuff on
2263 // the media.
2264 if ((err = scsiTestUnitReady(scsidev))) {
2265 if (SIMPLE_ERR_NOT_READY == err)
2266 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2267 else if (SIMPLE_ERR_NO_MEDIUM == err)
2268 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2269 else if (SIMPLE_ERR_BECOMING_READY == err)
2270 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2271 else
2272 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2273 CloseDevice(scsidev, device);
2274 return 2;
2275 }
2276
2277 // Badly-conforming USB storage devices may fail this check.
2278 // The response to the following IE mode page fetch (current and
2279 // changeable values) is carefully examined. It has been found
2280 // that various USB devices that malform the response will lock up
2281 // if asked for a log page (e.g. temperature) so it is best to
2282 // bail out now.
2283 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2284 state.modese_len = iec.modese_len;
2285 else if (SIMPLE_ERR_BAD_FIELD == err)
2286 ; /* continue since it is reasonable not to support IE mpage */
2287 else { /* any other error (including malformed response) unreasonable */
2288 PrintOut(LOG_INFO,
2289 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2290 device, err);
2291 CloseDevice(scsidev, device);
2292 return 3;
2293 }
2294
2295 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2296 // smart if it is off). This may change to be the same as the ATA side.
2297 if (!scsi_IsExceptionControlEnabled(&iec)) {
2298 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2299 "Try 'smartctl -s on %s' to turn on SMART features\n",
2300 device, device);
2301 CloseDevice(scsidev, device);
2302 return 3;
2303 }
2304
2305 // Flag that certain log pages are supported (information may be
2306 // available from other sources).
2307 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2308 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2309 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2310 {
2311 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2312 switch (tBuf[k]) {
2313 case TEMPERATURE_LPAGE:
2314 state.TempPageSupported = 1;
2315 break;
2316 case IE_LPAGE:
2317 state.SmartPageSupported = 1;
2318 break;
2319 case READ_ERROR_COUNTER_LPAGE:
2320 state.ReadECounterPageSupported = 1;
2321 break;
2322 case WRITE_ERROR_COUNTER_LPAGE:
2323 state.WriteECounterPageSupported = 1;
2324 break;
2325 case VERIFY_ERROR_COUNTER_LPAGE:
2326 state.VerifyECounterPageSupported = 1;
2327 break;
2328 case NON_MEDIUM_ERROR_LPAGE:
2329 state.NonMediumErrorPageSupported = 1;
2330 break;
2331 default:
2332 break;
2333 }
2334 }
2335 }
2336
2337 // Check if scsiCheckIE() is going to work
2338 {
2339 UINT8 asc = 0;
2340 UINT8 ascq = 0;
2341 UINT8 currenttemp = 0;
2342 UINT8 triptemp = 0;
2343
2344 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2345 &asc, &ascq, &currenttemp, &triptemp)) {
2346 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2347 state.SuppressReport = 1;
2348 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2349 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2350 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2351 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2352 }
2353 }
2354 }
2355
2356 // capability check: self-test-log
2357 if (cfg.selftest){
2358 int retval = scsiCountFailedSelfTests(scsidev, 0);
2359 if (retval<0) {
2360 // no self-test log, turn off monitoring
2361 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2362 cfg.selftest = false;
2363 state.selflogcount = 0;
2364 state.selfloghour = 0;
2365 }
2366 else {
2367 // register starting values to watch for changes
2368 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2369 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2370 }
2371 }
2372
2373 // disable autosave (set GLTSD bit)
2374 if (cfg.autosave==1){
2375 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2376 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2377 else
2378 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2379 }
2380
2381 // or enable autosave (clear GLTSD bit)
2382 if (cfg.autosave==2){
2383 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2384 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2385 else
2386 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2387 }
2388
2389 // tell user we are registering device
2390 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2391
2392 // Make sure that init_standby_check() ignores SCSI devices
2393 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2394
2395 // close file descriptor
2396 CloseDevice(scsidev, device);
2397
2398 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2399 // Build file name for state file
2400 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2401 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2402 if (!state_path_prefix.empty()) {
2403 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2404 // Read previous state
2405 if (read_dev_state(cfg.state_file.c_str(), state)) {
2406 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2407 // Copy ATA attribute values to temp state
2408 state.update_temp_state();
2409 }
2410 }
2411 if (!attrlog_path_prefix.empty())
2412 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2413 }
2414
2415 finish_device_scan(cfg, state);
2416
2417 return 0;
2418 }
2419
2420 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2421 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2422 {
2423 for (int i = 8; i < 16; i++) {
2424 if (val[i])
2425 return ~(uint64_t)0;
2426 }
2427 uint64_t lo = val[7];
2428 for (int i = 7-1; i >= 0; i--) {
2429 lo <<= 8; lo += val[i];
2430 }
2431 return lo;
2432 }
2433
2434 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2435 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2436 {
2437 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2438 for (int i = 0; i < 8; i++) {
2439 if (smart_log.temp_sensor[i] > k)
2440 k = smart_log.temp_sensor[i];
2441 }
2442 return k;
2443 }
2444
2445 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev)
2446 {
2447 const char *name = cfg.name.c_str();
2448
2449 // Device must be open
2450
2451 // Get ID Controller
2452 nvme_id_ctrl id_ctrl;
2453 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2454 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2455 CloseDevice(nvmedev, name);
2456 return 2;
2457 }
2458
2459 // Get drive identity
2460 char model[40+1], serial[20+1], firmware[8+1];
2461 format_char_array(model, id_ctrl.mn);
2462 format_char_array(serial, id_ctrl.sn);
2463 format_char_array(firmware, id_ctrl.fr);
2464
2465 // Format device id string for warning emails
2466 char nsstr[32] = "", capstr[32] = "";
2467 unsigned nsid = nvmedev->get_nsid();
2468 if (nsid != 0xffffffff)
2469 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2470 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2471 if (capacity)
2472 format_capacity(capstr, sizeof(capstr), capacity, ".");
2473 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2474 nsstr, (capstr[0] ? ", " : ""), capstr);
2475
2476 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2477
2478 // Read SMART/Health log
2479 nvme_smart_log smart_log;
2480 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2481 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2482 CloseDevice(nvmedev, name);
2483 return 2;
2484 }
2485
2486 // Check temperature sensor support
2487 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2488 if (!nvme_get_max_temp_kelvin(smart_log)) {
2489 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2490 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2491 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2492 }
2493 }
2494
2495 // Init total error count
2496 if (cfg.errorlog || cfg.xerrorlog) {
2497 state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2498 }
2499
2500 // If no supported tests selected, return
2501 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2502 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2503 CloseDevice(nvmedev, name);
2504 return 3;
2505 }
2506
2507 // Tell user we are registering device
2508 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2509
2510 // Make sure that init_standby_check() ignores NVMe devices
2511 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2512
2513 CloseDevice(nvmedev, name);
2514
2515 if (!state_path_prefix.empty()) {
2516 // Build file name for state file
2517 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2518 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2519 nsstr[0] = 0;
2520 if (nsid != 0xffffffff)
2521 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2522 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2523 // Read previous state
2524 if (read_dev_state(cfg.state_file.c_str(), state))
2525 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2526 }
2527
2528 finish_device_scan(cfg, state);
2529
2530 return 0;
2531 }
2532
2533 // If the self-test log has got more self-test errors (or more recent
2534 // self-test errors) recorded, then notify user.
2535 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2536 {
2537 const char * name = cfg.name.c_str();
2538
2539 if (newi<0)
2540 // command failed
2541 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2542 else {
2543 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2544
2545 // old and new error counts
2546 int oldc=state.selflogcount;
2547 int newc=SELFTEST_ERRORCOUNT(newi);
2548
2549 // old and new error timestamps in hours
2550 int oldh=state.selfloghour;
2551 int newh=SELFTEST_ERRORHOURS(newi);
2552
2553 if (oldc<newc) {
2554 // increase in error count
2555 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2556 name, oldc, newc);
2557 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2558 name, oldc, newc);
2559 state.must_write = true;
2560 }
2561 else if (newc > 0 && oldh != newh) {
2562 // more recent error
2563 // a 'more recent' error might actually be a smaller hour number,
2564 // if the hour number has wrapped.
2565 // There's still a bug here. You might just happen to run a new test
2566 // exactly 32768 hours after the previous failure, and have run exactly
2567 // 20 tests between the two, in which case smartd will miss the
2568 // new failure.
2569 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2570 name, newh);
2571 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2572 name, newh);
2573 state.must_write = true;
2574 }
2575
2576 // Print info if error entries have disappeared
2577 // or newer successful successful extended self-test exits
2578 if (oldc > newc) {
2579 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2580 name, oldc, newc);
2581 if (newc == 0)
2582 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2583 }
2584
2585 // Needed since self-test error count may DECREASE. Hour might
2586 // also have changed.
2587 state.selflogcount= newc;
2588 state.selfloghour = newh;
2589 }
2590 return;
2591 }
2592
2593 // Test types, ordered by priority.
2594 static const char test_type_chars[] = "LncrSCO";
2595 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2596
2597 // returns test type if time to do test of type testtype,
2598 // 0 if not time to do test.
2599 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2600 {
2601 // check that self-testing has been requested
2602 if (cfg.test_regex.empty())
2603 return 0;
2604
2605 // Exit if drive not capable of any test
2606 if ( state.not_cap_long && state.not_cap_short &&
2607 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2608 return 0;
2609
2610 // since we are about to call localtime(), be sure glibc is informed
2611 // of any timezone changes we make.
2612 if (!usetime)
2613 FixGlibcTimeZoneBug();
2614
2615 // Is it time for next check?
2616 time_t now = (!usetime ? time(0) : usetime);
2617 if (now < state.scheduled_test_next_check)
2618 return 0;
2619
2620 // Limit time check interval to 90 days
2621 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2622 state.scheduled_test_next_check = now - (3600L*24*90);
2623
2624 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2625 char testtype = 0;
2626 time_t testtime = 0; int testhour = 0;
2627 int maxtest = num_test_types-1;
2628
2629 for (time_t t = state.scheduled_test_next_check; ; ) {
2630 struct tm * tms = localtime(&t);
2631 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2632 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2633 for (int i = 0; i <= maxtest; i++) {
2634 // Skip if drive not capable of this test
2635 switch (test_type_chars[i]) {
2636 case 'L': if (state.not_cap_long) continue; break;
2637 case 'S': if (state.not_cap_short) continue; break;
2638 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2639 case 'O': if (scsi || state.not_cap_offline) continue; break;
2640 case 'c': case 'n':
2641 case 'r': if (scsi || state.not_cap_selective) continue; break;
2642 default: continue;
2643 }
2644 // Try match of "T/MM/DD/d/HH"
2645 char pattern[16];
2646 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2647 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2648 if (cfg.test_regex.full_match(pattern)) {
2649 // Test found
2650 testtype = pattern[0];
2651 testtime = t; testhour = tms->tm_hour;
2652 // Limit further matches to higher priority self-tests
2653 maxtest = i-1;
2654 break;
2655 }
2656 }
2657 // Exit if no tests left or current time reached
2658 if (maxtest < 0)
2659 break;
2660 if (t >= now)
2661 break;
2662 // Check next hour
2663 if ((t += 3600) > now)
2664 t = now;
2665 }
2666
2667 // Do next check not before next hour.
2668 struct tm * tmnow = localtime(&now);
2669 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2670
2671 if (testtype) {
2672 state.must_write = true;
2673 // Tell user if an old test was found.
2674 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2675 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2676 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2677 cfg.name.c_str(), testtype, datebuf);
2678 }
2679 }
2680
2681 return testtype;
2682 }
2683
2684 // Print a list of future tests.
2685 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2686 {
2687 unsigned numdev = configs.size();
2688 if (!numdev)
2689 return;
2690 std::vector<int> testcnts(numdev * num_test_types, 0);
2691
2692 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2693
2694 // FixGlibcTimeZoneBug(); // done in PrintOut()
2695 time_t now = time(0);
2696 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2697 dateandtimezoneepoch(datenow, now);
2698
2699 long seconds;
2700 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2701 // Check for each device whether a test will be run
2702 time_t testtime = now + seconds;
2703 for (unsigned i = 0; i < numdev; i++) {
2704 const dev_config & cfg = configs.at(i);
2705 dev_state & state = states.at(i);
2706 const char * p;
2707 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2708 if (testtype && (p = strchr(test_type_chars, testtype))) {
2709 unsigned t = (p - test_type_chars);
2710 // Report at most 5 tests of each type
2711 if (++testcnts[i*num_test_types + t] <= 5) {
2712 dateandtimezoneepoch(date, testtime);
2713 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2714 testcnts[i*num_test_types + t], testtype, date);
2715 }
2716 }
2717 }
2718 }
2719
2720 // Report totals
2721 dateandtimezoneepoch(date, now+seconds);
2722 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2723 for (unsigned i = 0; i < numdev; i++) {
2724 const dev_config & cfg = configs.at(i);
2725 bool scsi = devices.at(i)->is_scsi();
2726 for (unsigned t = 0; t < num_test_types; t++) {
2727 int cnt = testcnts[i*num_test_types + t];
2728 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2729 continue;
2730 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2731 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2732 }
2733 }
2734
2735 }
2736
2737 // Return zero on success, nonzero on failure. Perform offline (background)
2738 // short or long (extended) self test on given scsi device.
2739 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2740 {
2741 int retval = 0;
2742 const char *testname = 0;
2743 const char *name = cfg.name.c_str();
2744 int inProgress;
2745
2746 if (scsiSelfTestInProgress(device, &inProgress)) {
2747 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2748 state.not_cap_short = state.not_cap_long = true;
2749 return 1;
2750 }
2751
2752 if (1 == inProgress) {
2753 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2754 "progress.\n", name);
2755 return 1;
2756 }
2757
2758 switch (testtype) {
2759 case 'S':
2760 testname = "Short Self";
2761 retval = scsiSmartShortSelfTest(device);
2762 break;
2763 case 'L':
2764 testname = "Long Self";
2765 retval = scsiSmartExtendSelfTest(device);
2766 break;
2767 }
2768 // If we can't do the test, exit
2769 if (NULL == testname) {
2770 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2771 testtype);
2772 return 1;
2773 }
2774 if (retval) {
2775 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2776 (SIMPLE_ERR_BAD_FIELD == retval)) {
2777 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2778 testname);
2779 if ('L'==testtype)
2780 state.not_cap_long = true;
2781 else
2782 state.not_cap_short = true;
2783
2784 return 1;
2785 }
2786 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2787 testname, retval);
2788 return 1;
2789 }
2790
2791 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2792
2793 return 0;
2794 }
2795
2796 // Do an offline immediate or self-test. Return zero on success,
2797 // nonzero on failure.
2798 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2799 {
2800 const char *name = cfg.name.c_str();
2801
2802 // Read current smart data and check status/capability
2803 struct ata_smart_values data;
2804 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2805 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2806 return 1;
2807 }
2808
2809 // Check for capability to do the test
2810 int dotest = -1, mode = 0;
2811 const char *testname = 0;
2812 switch (testtype) {
2813 case 'O':
2814 testname="Offline Immediate ";
2815 if (isSupportExecuteOfflineImmediate(&data))
2816 dotest=OFFLINE_FULL_SCAN;
2817 else
2818 state.not_cap_offline = true;
2819 break;
2820 case 'C':
2821 testname="Conveyance Self-";
2822 if (isSupportConveyanceSelfTest(&data))
2823 dotest=CONVEYANCE_SELF_TEST;
2824 else
2825 state.not_cap_conveyance = true;
2826 break;
2827 case 'S':
2828 testname="Short Self-";
2829 if (isSupportSelfTest(&data))
2830 dotest=SHORT_SELF_TEST;
2831 else
2832 state.not_cap_short = true;
2833 break;
2834 case 'L':
2835 testname="Long Self-";
2836 if (isSupportSelfTest(&data))
2837 dotest=EXTEND_SELF_TEST;
2838 else
2839 state.not_cap_long = true;
2840 break;
2841
2842 case 'c': case 'n': case 'r':
2843 testname = "Selective Self-";
2844 if (isSupportSelectiveSelfTest(&data)) {
2845 dotest = SELECTIVE_SELF_TEST;
2846 switch (testtype) {
2847 case 'c': mode = SEL_CONT; break;
2848 case 'n': mode = SEL_NEXT; break;
2849 case 'r': mode = SEL_REDO; break;
2850 }
2851 }
2852 else
2853 state.not_cap_selective = true;
2854 break;
2855 }
2856
2857 // If we can't do the test, exit
2858 if (dotest<0) {
2859 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2860 return 1;
2861 }
2862
2863 // If currently running a self-test, do not interrupt it to start another.
2864 if (15==(data.self_test_exec_status >> 4)) {
2865 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
2866 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2867 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2868 } else {
2869 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2870 name, testname, (int)(data.self_test_exec_status & 0x0f));
2871 return 1;
2872 }
2873 }
2874
2875 if (dotest == SELECTIVE_SELF_TEST) {
2876 // Set test span
2877 ata_selective_selftest_args selargs, prev_args;
2878 selargs.num_spans = 1;
2879 selargs.span[0].mode = mode;
2880 prev_args.num_spans = 1;
2881 prev_args.span[0].start = state.selective_test_last_start;
2882 prev_args.span[0].end = state.selective_test_last_end;
2883 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2884 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2885 return 1;
2886 }
2887 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2888 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
2889 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2890 start, end, end - start + 1,
2891 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2892 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2893 state.selective_test_last_start = start;
2894 state.selective_test_last_end = end;
2895 }
2896
2897 // execute the test, and return status
2898 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2899 if (retval) {
2900 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2901 return retval;
2902 }
2903
2904 // Report recent test start to do_disable_standby_check()
2905 // and force log of next test status
2906 if (testtype == 'O')
2907 state.offline_started = true;
2908 else
2909 state.selftest_started = true;
2910
2911 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2912 return 0;
2913 }
2914
2915 // Check pending sector count attribute values (-C, -U directives).
2916 static void check_pending(const dev_config & cfg, dev_state & state,
2917 unsigned char id, bool increase_only,
2918 const ata_smart_values & smartval,
2919 int mailtype, const char * msg)
2920 {
2921 // Find attribute index
2922 int i = ata_find_attr_index(id, smartval);
2923 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2924 return;
2925
2926 // No report if no sectors pending.
2927 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2928 if (rawval == 0) {
2929 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2930 return;
2931 }
2932
2933 // If attribute is not reset, report only sector count increases.
2934 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2935 if (!(!increase_only || prev_rawval < rawval))
2936 return;
2937
2938 // Format message.
2939 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
2940 if (prev_rawval > 0 && rawval != prev_rawval)
2941 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
2942
2943 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2944 MailWarning(cfg, state, mailtype, "%s", s.c_str());
2945 state.must_write = true;
2946 }
2947
2948 // Format Temperature value
2949 static const char * fmt_temp(unsigned char x, char (& buf)[20])
2950 {
2951 if (!x) // unset
2952 return "??";
2953 snprintf(buf, sizeof(buf), "%u", x);
2954 return buf;
2955 }
2956
2957 // Check Temperature limits
2958 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2959 {
2960 if (!(0 < currtemp && currtemp < 255)) {
2961 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2962 return;
2963 }
2964
2965 // Update Max Temperature
2966 const char * minchg = "", * maxchg = "";
2967 if (currtemp > state.tempmax) {
2968 if (state.tempmax)
2969 maxchg = "!";
2970 state.tempmax = currtemp;
2971 state.must_write = true;
2972 }
2973
2974 char buf[20];
2975 if (!state.temperature) {
2976 // First check
2977 if (!state.tempmin || currtemp < state.tempmin)
2978 // Delay Min Temperature update by ~ 30 minutes.
2979 state.tempmin_delay = time(0) + CHECKTIME - 60;
2980 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2981 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2982 if (triptemp)
2983 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2984 state.temperature = currtemp;
2985 }
2986 else {
2987 if (state.tempmin_delay) {
2988 // End Min Temperature update delay if ...
2989 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2990 || (state.tempmin_delay <= time(0))) { // or delay time is over.
2991 state.tempmin_delay = 0;
2992 if (!state.tempmin)
2993 state.tempmin = 255;
2994 }
2995 }
2996
2997 // Update Min Temperature
2998 if (!state.tempmin_delay && currtemp < state.tempmin) {
2999 state.tempmin = currtemp;
3000 state.must_write = true;
3001 if (currtemp != state.temperature)
3002 minchg = "!";
3003 }
3004
3005 // Track changes
3006 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3007 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3008 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3009 state.temperature = currtemp;
3010 }
3011 }
3012
3013 // Check limits
3014 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3015 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3016 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3017 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3018 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3019 }
3020 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3021 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3022 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3023 }
3024 else if (cfg.tempcrit) {
3025 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3026 if (currtemp < limit)
3027 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3028 }
3029 }
3030
3031 // Check normalized and raw attribute values.
3032 static void check_attribute(const dev_config & cfg, dev_state & state,
3033 const ata_smart_attribute & attr,
3034 const ata_smart_attribute & prev,
3035 int attridx,
3036 const ata_smart_threshold_entry * thresholds)
3037 {
3038 // Check attribute and threshold
3039 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3040 if (attrstate == ATTRSTATE_NON_EXISTING)
3041 return;
3042
3043 // If requested, check for usage attributes that have failed.
3044 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3045 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3046 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3047 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3048 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3049 state.must_write = true;
3050 }
3051
3052 // Return if we're not tracking this type of attribute
3053 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3054 if (!( ( prefail && cfg.prefail)
3055 || (!prefail && cfg.usage )))
3056 return;
3057
3058 // Return if '-I ID' was specified
3059 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3060 return;
3061
3062 // Issue warning if they don't have the same ID in all structures.
3063 if (attr.id != prev.id) {
3064 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3065 cfg.name.c_str(), attr.id, prev.id);
3066 return;
3067 }
3068
3069 // Compare normalized values if valid.
3070 bool valchanged = false;
3071 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3072 if (attr.current != prev.current)
3073 valchanged = true;
3074 }
3075
3076 // Compare raw values if requested.
3077 bool rawchanged = false;
3078 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3079 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3080 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3081 rawchanged = true;
3082 }
3083
3084 // Return if no change
3085 if (!(valchanged || rawchanged))
3086 return;
3087
3088 // Format value strings
3089 std::string currstr, prevstr;
3090 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3091 // Print raw values only
3092 currstr = strprintf("%s (Raw)",
3093 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3094 prevstr = strprintf("%s (Raw)",
3095 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3096 }
3097 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3098 // Print normalized and raw values
3099 currstr = strprintf("%d [Raw %s]", attr.current,
3100 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3101 prevstr = strprintf("%d [Raw %s]", prev.current,
3102 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3103 }
3104 else {
3105 // Print normalized values only
3106 currstr = strprintf("%d", attr.current);
3107 prevstr = strprintf("%d", prev.current);
3108 }
3109
3110 // Format message
3111 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3112 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3113 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3114 prevstr.c_str(), currstr.c_str());
3115
3116 // Report this change as critical ?
3117 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3118 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3119 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3120 MailWarning(cfg, state, 2, "%s", msg.c_str());
3121 }
3122 else {
3123 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3124 }
3125 state.must_write = true;
3126 }
3127
3128
3129 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3130 bool firstpass, bool allow_selftests)
3131 {
3132 const char * name = cfg.name.c_str();
3133
3134 // If user has asked, test the email warning system
3135 if (cfg.emailtest)
3136 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3137
3138 // User may have requested (with the -n Directive) to leave the disk
3139 // alone if it is in idle or standby mode. In this case check the
3140 // power mode first before opening the device for full access,
3141 // and exit without check if disk is reported in standby.
3142 if (cfg.powermode && !state.powermodefail) {
3143 // Note that 'is_powered_down()' handles opening the device itself, and
3144 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
3145 if (atadev->is_powered_down())
3146 {
3147 // skip at most powerskipmax checks
3148 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3149 // report first only except if state has changed, avoid waking up system disk
3150 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
3151 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
3152 state.lastpowermodeskipped = -1;
3153 }
3154 state.powerskipcnt++;
3155 return 0;
3156 }
3157 }
3158 }
3159
3160 // if we can't open device, fail gracefully rather than hard --
3161 // perhaps the next time around we'll be able to open it. ATAPI
3162 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3163 // given (see linux cdrom driver).
3164 if (!atadev->open()) {
3165 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3166 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3167 return 1;
3168 }
3169 if (debugmode)
3170 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3171 reset_warning_mail(cfg, state, 9, "open device worked again");
3172
3173 // user may have requested (with the -n Directive) to leave the disk
3174 // alone if it is in idle or sleeping mode. In this case check the
3175 // power mode and exit without check if needed
3176 if (cfg.powermode && !state.powermodefail) {
3177 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3178 const char * mode = 0;
3179 if (0 <= powermode && powermode < 0xff) {
3180 // wait for possible spin up and check again
3181 int powermode2;
3182 sleep(5);
3183 powermode2 = ataCheckPowerMode(atadev);
3184 if (powermode2 > powermode)
3185 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3186 powermode = powermode2;
3187 }
3188
3189 switch (powermode){
3190 case -1:
3191 // SLEEP
3192 mode="SLEEP";
3193 if (cfg.powermode>=1)
3194 dontcheck=1;
3195 break;
3196 case 0x00:
3197 // STANDBY
3198 mode="STANDBY";
3199 if (cfg.powermode>=2)
3200 dontcheck=1;
3201 break;
3202 case 0x01:
3203 // STANDBY_Y
3204 mode="STANDBY_Y";
3205 if (cfg.powermode>=2)
3206 dontcheck=1;
3207 break;
3208 case 0x80:
3209 // IDLE
3210 mode="IDLE";
3211 if (cfg.powermode>=3)
3212 dontcheck=1;
3213 break;
3214 case 0x81:
3215 // IDLE_A
3216 mode="IDLE_A";
3217 if (cfg.powermode>=3)
3218 dontcheck=1;
3219 break;
3220 case 0x82:
3221 // IDLE_B
3222 mode="IDLE_B";
3223 if (cfg.powermode>=3)
3224 dontcheck=1;
3225 break;
3226 case 0x83:
3227 // IDLE_C
3228 mode="IDLE_C";
3229 if (cfg.powermode>=3)
3230 dontcheck=1;
3231 break;
3232 case 0xff:
3233 // ACTIVE/IDLE
3234 case 0x40:
3235 // ACTIVE
3236 case 0x41:
3237 // ACTIVE
3238 mode="ACTIVE or IDLE";
3239 break;
3240 default:
3241 // UNKNOWN
3242 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3243 name, powermode);
3244 state.powermodefail = true;
3245 break;
3246 }
3247
3248 // if we are going to skip a check, return now
3249 if (dontcheck){
3250 // skip at most powerskipmax checks
3251 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3252 CloseDevice(atadev, name);
3253 // report first only except if state has changed, avoid waking up system disk
3254 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3255 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3256 state.lastpowermodeskipped = powermode;
3257 }
3258 state.powerskipcnt++;
3259 return 0;
3260 }
3261 else {
3262 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3263 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3264 }
3265 state.powerskipcnt = 0;
3266 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3267 }
3268 else if (state.powerskipcnt) {
3269 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3270 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3271 state.powerskipcnt = 0;
3272 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3273 }
3274 }
3275
3276 // check smart status
3277 if (cfg.smartcheck) {
3278 int status=ataSmartStatus2(atadev);
3279 if (status==-1){
3280 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3281 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3282 state.must_write = true;
3283 }
3284 else if (status==1){
3285 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3286 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3287 state.must_write = true;
3288 }
3289 }
3290
3291 // Check everything that depends upon SMART Data (eg, Attribute values)
3292 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3293 || cfg.curr_pending_id || cfg.offl_pending_id
3294 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3295 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3296
3297 // Read current attribute values.
3298 ata_smart_values curval;
3299 if (ataReadSmartValues(atadev, &curval)){
3300 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3301 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3302 state.must_write = true;
3303 }
3304 else {
3305 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3306
3307 // look for current or offline pending sectors
3308 if (cfg.curr_pending_id)
3309 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3310 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3311 : "Total unreadable (pending) sectors" ));
3312
3313 if (cfg.offl_pending_id)
3314 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3315 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3316 : "Total offline uncorrectable sectors"));
3317
3318 // check temperature limits
3319 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3320 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3321
3322 // look for failed usage attributes, or track usage or prefail attributes
3323 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3324 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3325 check_attribute(cfg, state,
3326 curval.vendor_attributes[i],
3327 state.smartval.vendor_attributes[i],
3328 i, state.smartthres.thres_entries);
3329 }
3330 }
3331
3332 // Log changes of offline data collection status
3333 if (cfg.offlinests) {
3334 if ( curval.offline_data_collection_status
3335 != state.smartval.offline_data_collection_status
3336 || state.offline_started // test was started in previous call
3337 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3338 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3339 }
3340
3341 // Log changes of self-test execution status
3342 if (cfg.selfteststs) {
3343 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3344 || state.selftest_started // test was started in previous call
3345 || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3346 log_self_test_exec_status(name, curval.self_test_exec_status);
3347 }
3348
3349 // Save the new values for the next time around
3350 state.smartval = curval;
3351 }
3352 }
3353 state.offline_started = state.selftest_started = false;
3354
3355 // check if number of selftest errors has increased (note: may also DECREASE)
3356 if (cfg.selftest)
3357 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3358
3359 // check if number of ATA errors has increased
3360 if (cfg.errorlog || cfg.xerrorlog) {
3361
3362 int errcnt1 = -1, errcnt2 = -1;
3363 if (cfg.errorlog)
3364 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3365 if (cfg.xerrorlog)
3366 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3367
3368 // new number of errors is max of both logs
3369 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3370
3371 // did command fail?
3372 if (newc<0)
3373 // lack of PrintOut here is INTENTIONAL
3374 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3375
3376 // has error count increased?
3377 int oldc = state.ataerrorcount;
3378 if (newc>oldc){
3379 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3380 name, oldc, newc);
3381 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3382 name, oldc, newc);
3383 state.must_write = true;
3384 }
3385
3386 if (newc>=0)
3387 state.ataerrorcount=newc;
3388 }
3389
3390 // if the user has asked, and device is capable (or we're not yet
3391 // sure) check whether a self test should be done now.
3392 if (allow_selftests && !cfg.test_regex.empty()) {
3393 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3394 if (testtype)
3395 DoATASelfTest(cfg, state, atadev, testtype);
3396 }
3397
3398 // Don't leave device open -- the OS/user may want to access it
3399 // before the next smartd cycle!
3400 CloseDevice(atadev, name);
3401
3402 // Copy ATA attribute values to persistent state
3403 state.update_persistent_state();
3404
3405 return 0;
3406 }
3407
3408 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3409 {
3410 const char * name = cfg.name.c_str();
3411
3412 // If the user has asked for it, test the email warning system
3413 if (cfg.emailtest)
3414 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3415
3416 // if we can't open device, fail gracefully rather than hard --
3417 // perhaps the next time around we'll be able to open it
3418 if (!scsidev->open()) {
3419 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3420 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3421 return 1;
3422 } else if (debugmode)
3423 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3424 reset_warning_mail(cfg, state, 9, "open device worked again");
3425
3426 UINT8 asc = 0, ascq = 0;
3427 UINT8 currenttemp = 0, triptemp = 0;
3428 if (!state.SuppressReport) {
3429 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3430 &asc, &ascq, &currenttemp, &triptemp)) {
3431 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3432 name);
3433 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3434 state.SuppressReport = 1;
3435 }
3436 }
3437 if (asc > 0) {
3438 const char * cp = scsiGetIEString(asc, ascq);
3439 if (cp) {
3440 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3441 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3442 } else if (asc == 4 && ascq == 9) {
3443 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3444 } else if (debugmode)
3445 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3446 name, (int)asc, (int)ascq);
3447 } else if (debugmode)
3448 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3449
3450 // check temperature limits
3451 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || !cfg.attrlog_file.empty())
3452 CheckTemperature(cfg, state, currenttemp, triptemp);
3453
3454 // check if number of selftest errors has increased (note: may also DECREASE)
3455 if (cfg.selftest)
3456 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3457
3458 if (allow_selftests && !cfg.test_regex.empty()) {
3459 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3460 if (testtype)
3461 DoSCSISelfTest(cfg, state, scsidev, testtype);
3462 }
3463 if (!cfg.attrlog_file.empty()){
3464 // saving error counters to state
3465 UINT8 tBuf[252];
3466 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3467 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3468 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3469 state.scsi_error_counters[0].found=1;
3470 }
3471 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3472 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3473 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3474 state.scsi_error_counters[1].found=1;
3475 }
3476 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3477 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3478 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3479 state.scsi_error_counters[2].found=1;
3480 }
3481 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3482 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3483 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3484 state.scsi_nonmedium_error.found=1;
3485 }
3486 }
3487 CloseDevice(scsidev, name);
3488 return 0;
3489 }
3490
3491 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3492 {
3493 const char * name = cfg.name.c_str();
3494
3495 // TODO: Use common open function for ATA/SCSI/NVMe
3496 // If user has asked, test the email warning system
3497 if (cfg.emailtest)
3498 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3499
3500 if (!nvmedev->open()) {
3501 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, nvmedev->get_errmsg());
3502 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3503 return 1;
3504 }
3505 if (debugmode)
3506 PrintOut(LOG_INFO,"Device: %s, opened NVMe device\n", name);
3507 reset_warning_mail(cfg, state, 9, "open device worked again");
3508
3509 // Read SMART/Health log
3510 nvme_smart_log smart_log;
3511 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3512 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3513 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3514 state.must_write = true;
3515 return 0;
3516 }
3517
3518 // Check Critical Warning bits
3519 if (cfg.smartcheck && smart_log.critical_warning) {
3520 unsigned char w = smart_log.critical_warning;
3521 std::string msg;
3522 static const char * const wnames[] =
3523 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3524
3525 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3526 if (!(w & (1 << b)))
3527 continue;
3528 if (cnt)
3529 msg += ", ";
3530 if (++cnt > 3) {
3531 msg += "..."; break;
3532 }
3533 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3534 msg += "*Unknown*"; break;
3535 }
3536 msg += wnames[b];
3537 }
3538
3539 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3540 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3541 state.must_write = true;
3542 }
3543
3544 // Check temperature limits
3545 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3546 int k = nvme_get_max_temp_kelvin(smart_log);
3547 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3548 int c = k - 273;
3549 if (c < 1)
3550 c = 1;
3551 else if (c > 0xff)
3552 c = 0xff;
3553 CheckTemperature(cfg, state, c, 0);
3554 }
3555
3556 // Check if number of errors has increased
3557 if (cfg.errorlog || cfg.xerrorlog) {
3558 uint64_t oldcnt = state.nvme_err_log_entries;
3559 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3560 if (newcnt > oldcnt) {
3561 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3562 name, oldcnt, newcnt);
3563 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3564 name, oldcnt, newcnt);
3565 state.must_write = true;
3566 }
3567 state.nvme_err_log_entries = newcnt;
3568 }
3569
3570 CloseDevice(nvmedev, name);
3571 return 0;
3572 }
3573
3574 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3575 static int standby_disable_state = 0;
3576
3577 static void init_disable_standby_check(dev_config_vector & configs)
3578 {
3579 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3580 bool sts1 = false, sts2 = false;
3581 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3582 const dev_config & cfg = configs.at(i);
3583 if (cfg.offlinests_ns)
3584 sts1 = true;
3585 if (cfg.selfteststs_ns)
3586 sts2 = true;
3587 }
3588
3589 // Check for support of disable auto standby
3590 // Reenable standby if smartd.conf was reread
3591 if (sts1 || sts2 || standby_disable_state == 3) {
3592 if (!smi()->disable_system_auto_standby(false)) {
3593 if (standby_disable_state == 3)
3594 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3595 if (sts1 || sts2) {
3596 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3597 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3598 sts1 = sts2 = false;
3599 }
3600 }
3601 }
3602
3603 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3604 }
3605
3606 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3607 {
3608 if (!standby_disable_state)
3609 return;
3610
3611 // Check for just started or still running self-tests
3612 bool running = false;
3613 for (unsigned i = 0; i < configs.size() && !running; i++) {
3614 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3615
3616 if ( ( cfg.offlinests_ns
3617 && (state.offline_started ||
3618 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3619 || ( cfg.selfteststs_ns
3620 && (state.selftest_started ||
3621 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3622 running = true;
3623 // state.offline/selftest_started will be reset after next logging of test status
3624 }
3625
3626 // Disable/enable auto standby and log state changes
3627 if (!running) {
3628 if (standby_disable_state != 1) {
3629 if (!smi()->disable_system_auto_standby(false))
3630 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3631 smi()->get_errmsg());
3632 else
3633 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3634 standby_disable_state = 1;
3635 }
3636 }
3637 else if (!smi()->disable_system_auto_standby(true)) {
3638 if (standby_disable_state != 2) {
3639 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3640 smi()->get_errmsg());
3641 standby_disable_state = 2;
3642 }
3643 }
3644 else {
3645 if (standby_disable_state != 3) {
3646 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3647 standby_disable_state = 3;
3648 }
3649 }
3650 }
3651
3652 // Checks the SMART status of all ATA and SCSI devices
3653 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3654 smart_device_list & devices, bool firstpass, bool allow_selftests)
3655 {
3656 for (unsigned i = 0; i < configs.size(); i++) {
3657 const dev_config & cfg = configs.at(i);
3658 dev_state & state = states.at(i);
3659 smart_device * dev = devices.at(i);
3660 if (dev->is_ata())
3661 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3662 else if (dev->is_scsi())
3663 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3664 else if (dev->is_nvme())
3665 NVMeCheckDevice(cfg, state, dev->to_nvme());
3666 }
3667
3668 do_disable_standby_check(configs, states);
3669 }
3670
3671 // Set if Initialize() was called
3672 static bool is_initialized = false;
3673
3674 // Does initialization right after fork to daemon mode
3675 static void Initialize(time_t *wakeuptime)
3676 {
3677 // Call Goodbye() on exit
3678 is_initialized = true;
3679
3680 // write PID file
3681 if (!debugmode)
3682 WritePidFile();
3683
3684 // install signal handlers. On Solaris, can't use signal() because
3685 // it resets the handler to SIG_DFL after each call. So use sigset()
3686 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3687
3688 // normal and abnormal exit
3689 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3690 SIGNALFN(SIGTERM, SIG_IGN);
3691 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3692 SIGNALFN(SIGQUIT, SIG_IGN);
3693
3694 // in debug mode, <CONTROL-C> ==> HUP
3695 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3696 SIGNALFN(SIGINT, SIG_IGN);
3697
3698 // Catch HUP and USR1
3699 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3700 SIGNALFN(SIGHUP, SIG_IGN);
3701 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3702 SIGNALFN(SIGUSR1, SIG_IGN);
3703 #ifdef _WIN32
3704 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3705 SIGNALFN(SIGUSR2, SIG_IGN);
3706 #endif
3707
3708 // initialize wakeup time to CURRENT time
3709 *wakeuptime=time(NULL);
3710
3711 return;
3712 }
3713
3714 #ifdef _WIN32
3715 // Toggle debug mode implemented for native windows only
3716 // (there is no easy way to reopen tty on *nix)
3717 static void ToggleDebugMode()
3718 {
3719 if (!debugmode) {
3720 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3721 if (!daemon_enable_console("smartd [Debug]")) {
3722 debugmode = 1;
3723 daemon_signal(SIGINT, HUPhandler);
3724 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3725 }
3726 else
3727 PrintOut(LOG_INFO,"enable console failed\n");
3728 }
3729 else if (debugmode == 1) {
3730 daemon_disable_console();
3731 debugmode = 0;
3732 daemon_signal(SIGINT, sighandler);
3733 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3734 }
3735 else
3736 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3737 }
3738 #endif
3739
3740 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3741 {
3742 // If past wake-up-time, compute next wake-up-time
3743 time_t timenow=time(NULL);
3744 while (wakeuptime<=timenow){
3745 int intervals=1+(timenow-wakeuptime)/checktime;
3746 wakeuptime+=intervals*checktime;
3747 }
3748
3749 // sleep until we catch SIGUSR1 or have completed sleeping
3750 int addtime = 0;
3751 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3752
3753 // protect user again system clock being adjusted backwards
3754 if (wakeuptime>timenow+checktime){
3755 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3756 wakeuptime=timenow+checktime;
3757 }
3758
3759 // Exit sleep when time interval has expired or a signal is received
3760 sleep(wakeuptime+addtime-timenow);
3761
3762 #ifdef _WIN32
3763 // toggle debug mode?
3764 if (caughtsigUSR2) {
3765 ToggleDebugMode();
3766 caughtsigUSR2 = 0;
3767 }
3768 #endif
3769
3770 timenow=time(NULL);
3771
3772 // Actual sleep time too long?
3773 if (!addtime && timenow > wakeuptime+60) {
3774 if (debugmode)
3775 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3776 (int)(timenow-wakeuptime));
3777 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3778 addtime = timenow-wakeuptime+20;
3779 // Use next wake-up-time if close
3780 int nextcheck = checktime - addtime % checktime;
3781 if (nextcheck <= 20)
3782 addtime += nextcheck;
3783 }
3784 }
3785
3786 // if we caught a SIGUSR1 then print message and clear signal
3787 if (caughtsigUSR1){
3788 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3789 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3790 caughtsigUSR1=0;
3791 sigwakeup = true;
3792 }
3793
3794 // return adjusted wakeuptime
3795 return wakeuptime;
3796 }
3797
3798 // Print out a list of valid arguments for the Directive d
3799 static void printoutvaliddirectiveargs(int priority, char d)
3800 {
3801 switch (d) {
3802 case 'n':
3803 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3804 break;
3805 case 's':
3806 PrintOut(priority, "valid_regular_expression");
3807 break;
3808 case 'd':
3809 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3810 break;
3811 case 'T':
3812 PrintOut(priority, "normal, permissive");
3813 break;
3814 case 'o':
3815 case 'S':
3816 PrintOut(priority, "on, off");
3817 break;
3818 case 'l':
3819 PrintOut(priority, "error, selftest");
3820 break;
3821 case 'M':
3822 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3823 break;
3824 case 'v':
3825 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3826 break;
3827 case 'P':
3828 PrintOut(priority, "use, ignore, show, showall");
3829 break;
3830 case 'F':
3831 PrintOut(priority, "%s", get_valid_firmwarebug_args());
3832 break;
3833 case 'e':
3834 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3835 "security-freeze, standby,[N|off], wcache,[on|off]");
3836 break;
3837 }
3838 }
3839
3840 // exits with an error message, or returns integer value of token
3841 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3842 int min, int max, char * suffix = 0)
3843 {
3844 // make sure argument is there
3845 if (!arg) {
3846 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3847 cfgfile, lineno, name, token, min, max);
3848 return -1;
3849 }
3850
3851 // get argument value (base 10), check that it's integer, and in-range
3852 char *endptr;
3853 int val = strtol(arg,&endptr,10);
3854
3855 // optional suffix present?
3856 if (suffix) {
3857 if (!strcmp(endptr, suffix))
3858 endptr += strlen(suffix);
3859 else
3860 *suffix = 0;
3861 }
3862
3863 if (!(!*endptr && min <= val && val <= max)) {
3864 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3865 cfgfile, lineno, name, token, arg, min, max);
3866 return -1;
3867 }
3868
3869 // all is well; return value
3870 return val;
3871 }
3872
3873
3874 // Get 1-3 small integer(s) for '-W' directive
3875 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3876 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3877 {
3878 unsigned v1 = 0, v2 = 0, v3 = 0;
3879 int n1 = -1, n2 = -1, n3 = -1, len;
3880 if (!arg) {
3881 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3882 cfgfile, lineno, name, token);
3883 return -1;
3884 }
3885
3886 len = strlen(arg);
3887 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3888 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3889 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3890 cfgfile, lineno, name, token, arg);
3891 return -1;
3892 }
3893 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3894 return 0;
3895 }
3896
3897
3898 #ifdef _WIN32
3899
3900 // Concatenate strtok() results if quoted with "..."
3901 static const char * strtok_dequote(const char * delimiters)
3902 {
3903 const char * t = strtok(0, delimiters);
3904 if (!t || t[0] != '"')
3905 return t;
3906
3907 static std::string token;
3908 token = t+1;
3909 for (;;) {
3910 t = strtok(0, delimiters);
3911 if (!t || !*t)
3912 return "\"";
3913 token += ' ';
3914 int len = strlen(t);
3915 if (t[len-1] == '"') {
3916 token += std::string(t, len-1);
3917 break;
3918 }
3919 token += t;
3920 }
3921 return token.c_str();
3922 }
3923
3924 #endif // _WIN32
3925
3926
3927 // This function returns 1 if it has correctly parsed one token (and
3928 // any arguments), else zero if no tokens remain. It returns -1 if an
3929 // error was encountered.
3930 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
3931 {
3932 char sym;
3933 const char * name = cfg.name.c_str();
3934 int lineno=cfg.lineno;
3935 const char *delim = " \n\t";
3936 int badarg = 0;
3937 int missingarg = 0;
3938 const char *arg = 0;
3939
3940 // is the rest of the line a comment
3941 if (*token=='#')
3942 return 1;
3943
3944 // is the token not recognized?
3945 if (*token!='-' || strlen(token)!=2) {
3946 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3947 configfile, lineno, name, token);
3948 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3949 return -1;
3950 }
3951
3952 // token we will be parsing:
3953 sym=token[1];
3954
3955 // parse the token and swallow its argument
3956 int val;
3957 char plus[] = "+", excl[] = "!";
3958
3959 switch (sym) {
3960 case 'C':
3961 // monitor current pending sector count (default 197)
3962 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3963 return -1;
3964 cfg.curr_pending_id = (unsigned char)val;
3965 cfg.curr_pending_incr = (*plus == '+');
3966 cfg.curr_pending_set = true;
3967 break;
3968 case 'U':
3969 // monitor offline uncorrectable sectors (default 198)
3970 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3971 return -1;
3972 cfg.offl_pending_id = (unsigned char)val;
3973 cfg.offl_pending_incr = (*plus == '+');
3974 cfg.offl_pending_set = true;
3975 break;
3976 case 'T':
3977 // Set tolerance level for SMART command failures
3978 if ((arg = strtok(NULL, delim)) == NULL) {
3979 missingarg = 1;
3980 } else if (!strcmp(arg, "normal")) {
3981 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3982 // not on failure of an optional S.M.A.R.T. command.
3983 // This is the default so we don't need to actually do anything here.
3984 cfg.permissive = false;
3985 } else if (!strcmp(arg, "permissive")) {
3986 // Permissive mode; ignore errors from Mandatory SMART commands
3987 cfg.permissive = true;
3988 } else {
3989 badarg = 1;
3990 }
3991 break;
3992 case 'd':
3993 // specify the device type
3994 if ((arg = strtok(NULL, delim)) == NULL) {
3995 missingarg = 1;
3996 } else if (!strcmp(arg, "ignore")) {
3997 cfg.ignore = true;
3998 } else if (!strcmp(arg, "removable")) {
3999 cfg.removable = true;
4000 } else if (!strcmp(arg, "auto")) {
4001 cfg.dev_type = "";
4002 scan_types.clear();
4003 } else {
4004 cfg.dev_type = arg;
4005 scan_types.push_back(arg);
4006 }
4007 break;
4008 case 'F':
4009 // fix firmware bug
4010 if (!(arg = strtok(0, delim)))
4011 missingarg = 1;
4012 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4013 badarg = 1;
4014 break;
4015 case 'H':
4016 // check SMART status
4017 cfg.smartcheck = true;
4018 break;
4019 case 'f':
4020 // check for failure of usage attributes
4021 cfg.usagefailed = true;
4022 break;
4023 case 't':
4024 // track changes in all vendor attributes
4025 cfg.prefail = true;
4026 cfg.usage = true;
4027 break;
4028 case 'p':
4029 // track changes in prefail vendor attributes
4030 cfg.prefail = true;
4031 break;
4032 case 'u':
4033 // track changes in usage vendor attributes
4034 cfg.usage = true;
4035 break;
4036 case 'l':
4037 // track changes in SMART logs
4038 if ((arg = strtok(NULL, delim)) == NULL) {
4039 missingarg = 1;
4040 } else if (!strcmp(arg, "selftest")) {
4041 // track changes in self-test log
4042 cfg.selftest = true;
4043 } else if (!strcmp(arg, "error")) {
4044 // track changes in ATA error log
4045 cfg.errorlog = true;
4046 } else if (!strcmp(arg, "xerror")) {
4047 // track changes in Extended Comprehensive SMART error log
4048 cfg.xerrorlog = true;
4049 } else if (!strcmp(arg, "offlinests")) {
4050 // track changes in offline data collection status
4051 cfg.offlinests = true;
4052 } else if (!strcmp(arg, "offlinests,ns")) {
4053 // track changes in offline data collection status, disable auto standby
4054 cfg.offlinests = cfg.offlinests_ns = true;
4055 } else if (!strcmp(arg, "selfteststs")) {
4056 // track changes in self-test execution status
4057 cfg.selfteststs = true;
4058 } else if (!strcmp(arg, "selfteststs,ns")) {
4059 // track changes in self-test execution status, disable auto standby
4060 cfg.selfteststs = cfg.selfteststs_ns = true;
4061 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4062 // set SCT Error Recovery Control
4063 unsigned rt = ~0, wt = ~0; int nc = -1;
4064 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4065 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4066 cfg.sct_erc_set = true;
4067 cfg.sct_erc_readtime = rt;
4068 cfg.sct_erc_writetime = wt;
4069 }
4070 else
4071 badarg = 1;
4072 } else {
4073 badarg = 1;
4074 }
4075 break;
4076 case 'a':
4077 // monitor everything
4078 cfg.smartcheck = true;
4079 cfg.prefail = true;
4080 cfg.usagefailed = true;
4081 cfg.usage = true;
4082 cfg.selftest = true;
4083 cfg.errorlog = true;
4084 cfg.selfteststs = true;
4085 break;
4086 case 'o':
4087 // automatic offline testing enable/disable
4088 if ((arg = strtok(NULL, delim)) == NULL) {
4089 missingarg = 1;
4090 } else if (!strcmp(arg, "on")) {
4091 cfg.autoofflinetest = 2;
4092 } else if (!strcmp(arg, "off")) {
4093 cfg.autoofflinetest = 1;
4094 } else {
4095 badarg = 1;
4096 }
4097 break;
4098 case 'n':
4099 // skip disk check if in idle or standby mode
4100 if (!(arg = strtok(NULL, delim)))
4101 missingarg = 1;
4102 else {
4103 char *endptr = NULL;
4104 char *next = strchr(const_cast<char*>(arg), ',');
4105
4106 cfg.powerquiet = false;
4107 cfg.powerskipmax = 0;
4108
4109 if (next!=NULL) *next='\0';
4110 if (!strcmp(arg, "never"))
4111 cfg.powermode = 0;
4112 else if (!strcmp(arg, "sleep"))
4113 cfg.powermode = 1;
4114 else if (!strcmp(arg, "standby"))
4115 cfg.powermode = 2;
4116 else if (!strcmp(arg, "idle"))
4117 cfg.powermode = 3;
4118 else
4119 badarg = 1;
4120
4121 // if optional arguments are present
4122 if (!badarg && next!=NULL) {
4123 next++;
4124 cfg.powerskipmax = strtol(next, &endptr, 10);
4125 if (endptr == next)
4126 cfg.powerskipmax = 0;
4127 else {
4128 next = endptr + (*endptr != '\0');
4129 if (cfg.powerskipmax <= 0)
4130 badarg = 1;
4131 }
4132 if (*next != '\0') {
4133 if (!strcmp("q", next))
4134 cfg.powerquiet = true;
4135 else {
4136 badarg = 1;
4137 }
4138 }
4139 }
4140 }
4141 break;
4142 case 'S':
4143 // automatic attribute autosave enable/disable
4144 if ((arg = strtok(NULL, delim)) == NULL) {
4145 missingarg = 1;
4146 } else if (!strcmp(arg, "on")) {
4147 cfg.autosave = 2;
4148 } else if (!strcmp(arg, "off")) {
4149 cfg.autosave = 1;
4150 } else {
4151 badarg = 1;
4152 }
4153 break;
4154 case 's':
4155 // warn user, and delete any previously given -s REGEXP Directives
4156 if (!cfg.test_regex.empty()){
4157 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4158 configfile, lineno, name, cfg.test_regex.get_pattern());
4159 cfg.test_regex = regular_expression();
4160 }
4161 // check for missing argument
4162 if (!(arg = strtok(NULL, delim))) {
4163 missingarg = 1;
4164 }
4165 // Compile regex
4166 else {
4167 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
4168 // not a valid regular expression!
4169 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4170 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4171 return -1;
4172 }
4173 // Do a bit of sanity checking and warn user if we think that
4174 // their regexp is "strange". User probably confused about shell
4175 // glob(3) syntax versus regular expression syntax regexp(7).
4176 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4177 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4178 configfile, lineno, name, val+1, arg[val], arg);
4179 }
4180 break;
4181 case 'm':
4182 // send email to address that follows
4183 if (!(arg = strtok(NULL,delim)))
4184 missingarg = 1;
4185 else {
4186 if (!cfg.emailaddress.empty())
4187 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4188 configfile, lineno, name, cfg.emailaddress.c_str());
4189 #ifdef _WIN32 // TODO: Remove after smartmontools 6.5
4190 if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
4191 || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
4192 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -m %s is no longer supported, use -m console[,...] instead\n",
4193 configfile, lineno, name, arg);
4194 return -1;
4195 }
4196 #endif
4197 cfg.emailaddress = arg;
4198 }
4199 break;
4200 case 'M':
4201 // email warning options
4202 if (!(arg = strtok(NULL, delim)))
4203 missingarg = 1;
4204 else if (!strcmp(arg, "once"))
4205 cfg.emailfreq = 1;
4206 else if (!strcmp(arg, "daily"))
4207 cfg.emailfreq = 2;
4208 else if (!strcmp(arg, "diminishing"))
4209 cfg.emailfreq = 3;
4210 else if (!strcmp(arg, "test"))
4211 cfg.emailtest = 1;
4212 else if (!strcmp(arg, "exec")) {
4213 // Get the next argument (the command line)
4214 #ifdef _WIN32
4215 // Allow "/path name/with spaces/..." on Windows
4216 arg = strtok_dequote(delim);
4217 if (arg && arg[0] == '"') {
4218 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4219 configfile, lineno, name, token);
4220 return -1;
4221 }
4222 #else
4223 arg = strtok(0, delim);
4224 #endif
4225 if (!arg) {
4226 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4227 configfile, lineno, name, token);
4228 return -1;
4229 }
4230 // Free the last cmd line given if any, and copy new one
4231 if (!cfg.emailcmdline.empty())
4232 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4233 configfile, lineno, name, cfg.emailcmdline.c_str());
4234 cfg.emailcmdline = arg;
4235 }
4236 else
4237 badarg = 1;
4238 break;
4239 case 'i':
4240 // ignore failure of usage attribute
4241 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4242 return -1;
4243 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4244 break;
4245 case 'I':
4246 // ignore attribute for tracking purposes
4247 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4248 return -1;
4249 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4250 break;
4251 case 'r':
4252 // print raw value when tracking
4253 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4254 return -1;
4255 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4256 if (*excl == '!') // attribute change is critical
4257 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4258 break;
4259 case 'R':
4260 // track changes in raw value (forces printing of raw value)
4261 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4262 return -1;
4263 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4264 if (*excl == '!') // raw value change is critical
4265 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4266 break;
4267 case 'W':
4268 // track Temperature
4269 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4270 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4271 return -1;
4272 break;
4273 case 'v':
4274 // non-default vendor-specific attribute meaning
4275 if (!(arg=strtok(NULL,delim))) {
4276 missingarg = 1;
4277 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4278 badarg = 1;
4279 }
4280 break;
4281 case 'P':
4282 // Define use of drive-specific presets.
4283 if (!(arg = strtok(NULL, delim))) {
4284 missingarg = 1;
4285 } else if (!strcmp(arg, "use")) {
4286 cfg.ignorepresets = false;
4287 } else if (!strcmp(arg, "ignore")) {
4288 cfg.ignorepresets = true;
4289 } else if (!strcmp(arg, "show")) {
4290 cfg.showpresets = true;
4291 } else if (!strcmp(arg, "showall")) {
4292 showallpresets();
4293 } else {
4294 badarg = 1;
4295 }
4296 break;
4297
4298 case 'e':
4299 // Various ATA settings
4300 if (!(arg = strtok(NULL, delim))) {
4301 missingarg = true;
4302 }
4303 else {
4304 char arg2[16+1]; unsigned val;
4305 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4306 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4307 && (n1 == len || n2 > 0)) {
4308 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4309 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4310 if (n3 != len)
4311 val = ~0U;
4312
4313 if (!strcmp(arg2, "aam")) {
4314 if (off)
4315 cfg.set_aam = -1;
4316 else if (val <= 254)
4317 cfg.set_aam = val + 1;
4318 else
4319 badarg = true;
4320 }
4321 else if (!strcmp(arg2, "apm")) {
4322 if (off)
4323 cfg.set_apm = -1;
4324 else if (1 <= val && val <= 254)
4325 cfg.set_apm = val + 1;
4326 else
4327 badarg = true;
4328 }
4329 else if (!strcmp(arg2, "lookahead")) {
4330 if (off)
4331 cfg.set_lookahead = -1;
4332 else if (on)
4333 cfg.set_lookahead = 1;
4334 else
4335 badarg = true;
4336 }
4337 else if (!strcmp(arg, "security-freeze")) {
4338 cfg.set_security_freeze = true;
4339 }
4340 else if (!strcmp(arg2, "standby")) {
4341 if (off)
4342 cfg.set_standby = 0 + 1;
4343 else if (val <= 255)
4344 cfg.set_standby = val + 1;
4345 else
4346 badarg = true;
4347 }
4348 else if (!strcmp(arg2, "wcache")) {
4349 if (off)
4350 cfg.set_wcache = -1;
4351 else if (on)
4352 cfg.set_wcache = 1;
4353 else
4354 badarg = true;
4355 }
4356 else
4357 badarg = true;
4358 }
4359 else
4360 badarg = true;
4361 }
4362 break;
4363
4364 default:
4365 // Directive not recognized
4366 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4367 configfile, lineno, name, token);
4368 Directives();
4369 return -1;
4370 }
4371 if (missingarg) {
4372 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4373 configfile, lineno, name, token);
4374 }
4375 if (badarg) {
4376 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4377 configfile, lineno, name, token, arg);
4378 }
4379 if (missingarg || badarg) {
4380 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4381 printoutvaliddirectiveargs(LOG_CRIT, sym);
4382 PrintOut(LOG_CRIT, "\n");
4383 return -1;
4384 }
4385
4386 return 1;
4387 }
4388
4389 // Scan directive for configuration file
4390 #define SCANDIRECTIVE "DEVICESCAN"
4391
4392 // This is the routine that adds things to the conf_entries list.
4393 //
4394 // Return values are:
4395 // 1: parsed a normal line
4396 // 0: found DEFAULT setting or comment or blank line
4397 // -1: found SCANDIRECTIVE line
4398 // -2: found an error
4399 //
4400 // Note: this routine modifies *line from the caller!
4401 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4402 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4403 {
4404 const char *delim = " \n\t";
4405
4406 // get first token: device name. If a comment, skip line
4407 const char * name = strtok(line, delim);
4408 if (!name || *name == '#')
4409 return 0;
4410
4411 // Check device name for DEFAULT or DEVICESCAN
4412 int retval;
4413 if (!strcmp("DEFAULT", name)) {
4414 retval = 0;
4415 // Restart with empty defaults
4416 default_conf = dev_config();
4417 }
4418 else {
4419 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4420 // Init new entry with current defaults
4421 conf_entries.push_back(default_conf);
4422 }
4423 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4424
4425 cfg.name = name; // Later replaced by dev->get_info().info_name
4426 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4427 cfg.lineno = lineno;
4428
4429 // parse tokens one at a time from the file.
4430 while (char * token = strtok(0, delim)) {
4431 int rc = ParseToken(token, cfg, scan_types);
4432 if (rc < 0)
4433 // error found on the line
4434 return -2;
4435
4436 if (rc == 0)
4437 // No tokens left
4438 break;
4439
4440 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4441 }
4442
4443 // Check for multiple -d TYPE directives
4444 if (retval != -1 && scan_types.size() > 1) {
4445 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4446 cfg.name.c_str(), cfg.lineno, configfile);
4447 return -2;
4448 }
4449
4450 // Don't perform checks below for DEFAULT entries
4451 if (retval == 0)
4452 return retval;
4453
4454 // If NO monitoring directives are set, then set all of them.
4455 if (!( cfg.smartcheck || cfg.selftest
4456 || cfg.errorlog || cfg.xerrorlog
4457 || cfg.offlinests || cfg.selfteststs
4458 || cfg.usagefailed || cfg.prefail || cfg.usage
4459 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4460
4461 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4462 cfg.name.c_str(), cfg.lineno, configfile);
4463
4464 cfg.smartcheck = true;
4465 cfg.usagefailed = true;
4466 cfg.prefail = true;
4467 cfg.usage = true;
4468 cfg.selftest = true;
4469 cfg.errorlog = true;
4470 cfg.selfteststs = true;
4471 }
4472
4473 // additional sanity check. Has user set -M options without -m?
4474 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4475 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4476 cfg.name.c_str(), cfg.lineno, configfile);
4477 return -2;
4478 }
4479
4480 // has the user has set <nomailer>?
4481 if (cfg.emailaddress == "<nomailer>") {
4482 // check that -M exec is also set
4483 if (cfg.emailcmdline.empty()){
4484 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4485 cfg.name.c_str(), cfg.lineno, configfile);
4486 return -2;
4487 }
4488 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4489 cfg.emailaddress.clear();
4490 }
4491
4492 return retval;
4493 }
4494
4495 // Parses a configuration file. Return values are:
4496 // N=>0: found N entries
4497 // -1: syntax error in config file
4498 // -2: config file does not exist
4499 // -3: config file exists but cannot be read
4500 //
4501 // In the case where the return value is 0, there are three
4502 // possiblities:
4503 // Empty configuration file ==> conf_entries.empty()
4504 // No configuration file ==> conf_entries[0].lineno == 0
4505 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4506 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4507 {
4508 // maximum line length in configuration file
4509 const int MAXLINELEN = 256;
4510 // maximum length of a continued line in configuration file
4511 const int MAXCONTLINE = 1023;
4512
4513 stdio_file f;
4514 // Open config file, if it exists and is not <stdin>
4515 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4516 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4517 // file exists but we can't read it or it should exist due to '-c' option
4518 int ret = (errno!=ENOENT ? -3 : -2);
4519 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4520 strerror(errno),configfile);
4521 return ret;
4522 }
4523 }
4524 else // read from stdin ('-c -' option)
4525 f.open(stdin);
4526
4527 // Start with empty defaults
4528 dev_config default_conf;
4529
4530 // No configuration file found -- use fake one
4531 int entry = 0;
4532 if (!f) {
4533 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4534
4535 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4536 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4537 return 0;
4538 }
4539
4540 #ifdef __CYGWIN__
4541 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4542 #endif
4543
4544 // configuration file exists
4545 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4546
4547 // parse config file line by line
4548 int lineno = 1, cont = 0, contlineno = 0;
4549 char line[MAXLINELEN+2];
4550 char fullline[MAXCONTLINE+1];
4551
4552 for (;;) {
4553 int len=0,scandevice;
4554 char *lastslash;
4555 char *comment;
4556 char *code;
4557
4558 // make debugging simpler
4559 memset(line,0,sizeof(line));
4560
4561 // get a line
4562 code=fgets(line, MAXLINELEN+2, f);
4563
4564 // are we at the end of the file?
4565 if (!code){
4566 if (cont) {
4567 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4568 // See if we found a SCANDIRECTIVE directive
4569 if (scandevice==-1)
4570 return 0;
4571 // did we find a syntax error
4572 if (scandevice==-2)
4573 return -1;
4574 // the final line is part of a continuation line
4575 entry+=scandevice;
4576 }
4577 break;
4578 }
4579
4580 // input file line number
4581 contlineno++;
4582
4583 // See if line is too long
4584 len=strlen(line);
4585 if (len>MAXLINELEN){
4586 const char *warn;
4587 if (line[len-1]=='\n')
4588 warn="(including newline!) ";
4589 else
4590 warn="";
4591 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4592 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4593 return -1;
4594 }
4595
4596 // Ignore anything after comment symbol
4597 if ((comment=strchr(line,'#'))){
4598 *comment='\0';
4599 len=strlen(line);
4600 }
4601
4602 // is the total line (made of all continuation lines) too long?
4603 if (cont+len>MAXCONTLINE){
4604 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4605 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4606 return -1;
4607 }
4608
4609 // copy string so far into fullline, and increment length
4610 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4611 cont+=len;
4612
4613 // is this a continuation line. If so, replace \ by space and look at next line
4614 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4615 *(fullline+(cont-len)+(lastslash-line))=' ';
4616 continue;
4617 }
4618
4619 // Not a continuation line. Parse it
4620 scan_types.clear();
4621 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4622
4623 // did we find a scandevice directive?
4624 if (scandevice==-1)
4625 return 0;
4626 // did we find a syntax error
4627 if (scandevice==-2)
4628 return -1;
4629
4630 entry+=scandevice;
4631 lineno++;
4632 cont=0;
4633 }
4634
4635 // note -- may be zero if syntax of file OK, but no valid entries!
4636 return entry;
4637 }
4638
4639 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4640 <LIST> is the list of valid arguments for option opt. */
4641 static void PrintValidArgs(char opt)
4642 {
4643 const char *s;
4644
4645 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4646 if (!(s = GetValidArgList(opt)))
4647 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4648 else
4649 PrintOut(LOG_CRIT, "%s", (char *)s);
4650 PrintOut(LOG_CRIT, " <=======\n");
4651 }
4652
4653 #ifndef _WIN32
4654 // Report error and exit if specified path is not absolute.
4655 static void check_abs_path(char option, const std::string & path)
4656 {
4657 if (path.empty() || path[0] == '/')
4658 return;
4659
4660 debugmode = 1;
4661 PrintHead();
4662 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4663 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4664 EXIT(EXIT_BADCMD);
4665 }
4666 #endif // !_WIN32
4667
4668 // Parses input line, prints usage message and
4669 // version/license/copyright messages
4670 static void ParseOpts(int argc, char **argv)
4671 {
4672 // Init default path names
4673 #ifndef _WIN32
4674 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4675 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4676 #else
4677 std::string exedir = get_exe_dir();
4678 static std::string configfile_str = exedir + "/smartd.conf";
4679 configfile = configfile_str.c_str();
4680 warning_script = exedir + "/smartd_warning.cmd";
4681 #endif
4682
4683 // Please update GetValidArgList() if you edit shortopts
4684 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4685 #ifdef HAVE_LIBCAP_NG
4686 "C"
4687 #endif
4688 ;
4689 // Please update GetValidArgList() if you edit longopts
4690 struct option longopts[] = {
4691 { "configfile", required_argument, 0, 'c' },
4692 { "logfacility", required_argument, 0, 'l' },
4693 { "quit", required_argument, 0, 'q' },
4694 { "debug", no_argument, 0, 'd' },
4695 { "showdirectives", no_argument, 0, 'D' },
4696 { "interval", required_argument, 0, 'i' },
4697 #ifndef _WIN32
4698 { "no-fork", no_argument, 0, 'n' },
4699 #else
4700 { "service", no_argument, 0, 'n' },
4701 #endif
4702 { "pidfile", required_argument, 0, 'p' },
4703 { "report", required_argument, 0, 'r' },
4704 { "savestates", required_argument, 0, 's' },
4705 { "attributelog", required_argument, 0, 'A' },
4706 { "drivedb", required_argument, 0, 'B' },
4707 { "warnexec", required_argument, 0, 'w' },
4708 { "version", no_argument, 0, 'V' },
4709 { "license", no_argument, 0, 'V' },
4710 { "copyright", no_argument, 0, 'V' },
4711 { "help", no_argument, 0, 'h' },
4712 { "usage", no_argument, 0, 'h' },
4713 #ifdef HAVE_LIBCAP_NG
4714 { "capabilities", no_argument, 0, 'C' },
4715 #endif
4716 { 0, 0, 0, 0 }
4717 };
4718
4719 opterr=optopt=0;
4720 bool badarg = false;
4721 bool use_default_db = true; // set false on '-B FILE'
4722
4723 // Parse input options.
4724 int optchar;
4725 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4726 char *arg;
4727 char *tailptr;
4728 long lchecktime;
4729
4730 switch(optchar) {
4731 case 'q':
4732 // when to quit
4733 if (!(strcmp(optarg,"nodev"))) {
4734 quit=0;
4735 } else if (!(strcmp(optarg,"nodevstartup"))) {
4736 quit=1;
4737 } else if (!(strcmp(optarg,"never"))) {
4738 quit=2;
4739 } else if (!(strcmp(optarg,"onecheck"))) {
4740 quit=3;
4741 debugmode=1;
4742 } else if (!(strcmp(optarg,"showtests"))) {
4743 quit=4;
4744 debugmode=1;
4745 } else if (!(strcmp(optarg,"errors"))) {
4746 quit=5;
4747 } else {
4748 badarg = true;
4749 }
4750 break;
4751 case 'l':
4752 // set the log facility level
4753 if (!strcmp(optarg, "daemon"))
4754 facility=LOG_DAEMON;
4755 else if (!strcmp(optarg, "local0"))
4756 facility=LOG_LOCAL0;
4757 else if (!strcmp(optarg, "local1"))
4758 facility=LOG_LOCAL1;
4759 else if (!strcmp(optarg, "local2"))
4760 facility=LOG_LOCAL2;
4761 else if (!strcmp(optarg, "local3"))
4762 facility=LOG_LOCAL3;
4763 else if (!strcmp(optarg, "local4"))
4764 facility=LOG_LOCAL4;
4765 else if (!strcmp(optarg, "local5"))
4766 facility=LOG_LOCAL5;
4767 else if (!strcmp(optarg, "local6"))
4768 facility=LOG_LOCAL6;
4769 else if (!strcmp(optarg, "local7"))
4770 facility=LOG_LOCAL7;
4771 else
4772 badarg = true;
4773 break;
4774 case 'd':
4775 // enable debug mode
4776 debugmode = 1;
4777 break;
4778 case 'n':
4779 // don't fork()
4780 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4781 do_fork = false;
4782 #endif
4783 break;
4784 case 'D':
4785 // print summary of all valid directives
4786 debugmode = 1;
4787 Directives();
4788 EXIT(0);
4789 break;
4790 case 'i':
4791 // Period (time interval) for checking
4792 // strtol will set errno in the event of overflow, so we'll check it.
4793 errno = 0;
4794 lchecktime = strtol(optarg, &tailptr, 10);
4795 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4796 debugmode=1;
4797 PrintHead();
4798 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4799 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4800 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4801 EXIT(EXIT_BADCMD);
4802 }
4803 checktime = (int)lchecktime;
4804 break;
4805 case 'r':
4806 // report IOCTL transactions
4807 {
4808 int n1 = -1, n2 = -1, len = strlen(optarg);
4809 char s[9+1]; unsigned i = 1;
4810 sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
4811 if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
4812 badarg = true;
4813 } else if (!strcmp(s,"ioctl")) {
4814 ata_debugmode = scsi_debugmode = nvme_debugmode = i;
4815 } else if (!strcmp(s,"ataioctl")) {
4816 ata_debugmode = i;
4817 } else if (!strcmp(s,"scsiioctl")) {
4818 scsi_debugmode = i;
4819 } else if (!strcmp(s,"nvmeioctl")) {
4820 nvme_debugmode = i;
4821 } else {
4822 badarg = true;
4823 }
4824 }
4825 break;
4826 case 'c':
4827 // alternate configuration file
4828 if (strcmp(optarg,"-"))
4829 configfile = (configfile_alt = optarg).c_str();
4830 else // read from stdin
4831 configfile=configfile_stdin;
4832 break;
4833 case 'p':
4834 // output file with PID number
4835 pid_file = optarg;
4836 break;
4837 case 's':
4838 // path prefix of persistent state file
4839 state_path_prefix = optarg;
4840 break;
4841 case 'A':
4842 // path prefix of attribute log file
4843 attrlog_path_prefix = optarg;
4844 break;
4845 case 'B':
4846 {
4847 const char * path = optarg;
4848 if (*path == '+' && path[1])
4849 path++;
4850 else
4851 use_default_db = false;
4852 unsigned char savedebug = debugmode; debugmode = 1;
4853 if (!read_drive_database(path))
4854 EXIT(EXIT_BADCMD);
4855 debugmode = savedebug;
4856 }
4857 break;
4858 case 'w':
4859 warning_script = optarg;
4860 break;
4861 case 'V':
4862 // print version and CVS info
4863 debugmode = 1;
4864 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4865 EXIT(0);
4866 break;
4867 #ifdef HAVE_LIBCAP_NG
4868 case 'C':
4869 // enable capabilities
4870 enable_capabilities = true;
4871 break;
4872 #endif
4873 case 'h':
4874 // help: print summary of command-line options
4875 debugmode=1;
4876 PrintHead();
4877 Usage();
4878 EXIT(0);
4879 break;
4880 case '?':
4881 default:
4882 // unrecognized option
4883 debugmode=1;
4884 PrintHead();
4885 // Point arg to the argument in which this option was found.
4886 arg = argv[optind-1];
4887 // Check whether the option is a long option that doesn't map to -h.
4888 if (arg[1] == '-' && optchar != 'h') {
4889 // Iff optopt holds a valid option then argument must be missing.
4890 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4891 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4892 PrintValidArgs(optopt);
4893 } else {
4894 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4895 }
4896 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4897 EXIT(EXIT_BADCMD);
4898 }
4899 if (optopt) {
4900 // Iff optopt holds a valid option then argument must be missing.
4901 if (strchr(shortopts, optopt) != NULL){
4902 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4903 PrintValidArgs(optopt);
4904 } else {
4905 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4906 }
4907 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4908 EXIT(EXIT_BADCMD);
4909 }
4910 Usage();
4911 EXIT(0);
4912 }
4913
4914 // Check to see if option had an unrecognized or incorrect argument.
4915 if (badarg) {
4916 debugmode=1;
4917 PrintHead();
4918 // It would be nice to print the actual option name given by the user
4919 // here, but we just print the short form. Please fix this if you know
4920 // a clean way to do it.
4921 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4922 PrintValidArgs(optchar);
4923 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4924 EXIT(EXIT_BADCMD);
4925 }
4926 }
4927
4928 // non-option arguments are not allowed
4929 if (argc > optind) {
4930 debugmode=1;
4931 PrintHead();
4932 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4933 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4934 EXIT(EXIT_BADCMD);
4935 }
4936
4937 // no pidfile in debug mode
4938 if (debugmode && !pid_file.empty()) {
4939 debugmode=1;
4940 PrintHead();
4941 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4942 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4943 EXIT(EXIT_BADCMD);
4944 }
4945
4946 #ifndef _WIN32
4947 if (!debugmode) {
4948 // absolute path names are required due to chdir('/') after fork().
4949 check_abs_path('p', pid_file);
4950 check_abs_path('s', state_path_prefix);
4951 check_abs_path('A', attrlog_path_prefix);
4952 }
4953 #endif
4954
4955 // Read or init drive database
4956 {
4957 unsigned char savedebug = debugmode; debugmode = 1;
4958 if (!init_drive_database(use_default_db))
4959 EXIT(EXIT_BADCMD);
4960 debugmode = savedebug;
4961 }
4962
4963 // print header
4964 PrintHead();
4965 }
4966
4967 // Function we call if no configuration file was found or if the
4968 // SCANDIRECTIVE Directive was found. It makes entries for device
4969 // names returned by scan_smart_devices() in os_OSNAME.cpp
4970 static int MakeConfigEntries(const dev_config & base_cfg,
4971 dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4972 const smart_devtype_list & types)
4973 {
4974 // make list of devices
4975 smart_device_list devlist;
4976 if (!smi()->scan_smart_devices(devlist, types)) {
4977 PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
4978 return 0;
4979 }
4980
4981 // if no devices, return
4982 if (devlist.size() <= 0)
4983 return 0;
4984
4985 // add empty device slots for existing config entries
4986 while (scanned_devs.size() < conf_entries.size())
4987 scanned_devs.push_back((smart_device *)0);
4988
4989 // loop over entries to create
4990 for (unsigned i = 0; i < devlist.size(); i++) {
4991 // Move device pointer
4992 smart_device * dev = devlist.release(i);
4993 scanned_devs.push_back(dev);
4994
4995 // Copy configuration, update device and type name
4996 conf_entries.push_back(base_cfg);
4997 dev_config & cfg = conf_entries.back();
4998 cfg.name = dev->get_info().info_name;
4999 cfg.dev_name = dev->get_info().dev_name;
5000 cfg.dev_type = dev->get_info().dev_type;
5001 }
5002
5003 return devlist.size();
5004 }
5005
5006 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
5007 {
5008 if (!debugmode && scandirective)
5009 return;
5010 if (line)
5011 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
5012 "Unable to register %s device %s at line %d of file %s\n",
5013 type, name, line, configfile);
5014 else
5015 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
5016 type, name);
5017 return;
5018 }
5019
5020 // Returns negative value (see ParseConfigFile()) if config file
5021 // had errors, else number of entries which may be zero or positive.
5022 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5023 {
5024 // parse configuration file configfile (normally /etc/smartd.conf)
5025 smart_devtype_list scan_types;
5026 int entries = ParseConfigFile(conf_entries, scan_types);
5027
5028 if (entries < 0) {
5029 // There was an error reading the configuration file.
5030 conf_entries.clear();
5031 if (entries == -1)
5032 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5033 return entries;
5034 }
5035
5036 // no error parsing config file.
5037 if (entries) {
5038 // we did not find a SCANDIRECTIVE and did find valid entries
5039 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5040 }
5041 else if (!conf_entries.empty()) {
5042 // we found a SCANDIRECTIVE or there was no configuration file so
5043 // scan. Configuration file's last entry contains all options
5044 // that were set
5045 dev_config first = conf_entries.back();
5046 conf_entries.pop_back();
5047
5048 if (first.lineno)
5049 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5050 else
5051 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5052
5053 // make config list of devices to search for
5054 MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5055
5056 // warn user if scan table found no devices
5057 if (conf_entries.empty())
5058 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5059 }
5060 else
5061 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5062
5063 return conf_entries.size();
5064 }
5065
5066 // Return true if TYPE contains a RAID drive number
5067 static bool is_raid_type(const char * type)
5068 {
5069 if (str_starts_with(type, "sat,"))
5070 return false;
5071 int i;
5072 if (sscanf(type, "%*[^,],%d", &i) != 1)
5073 return false;
5074 return true;
5075 }
5076
5077 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5078 static bool is_duplicate_device(const smart_device * dev,
5079 const smart_device_list & devices, unsigned numdevs,
5080 const dev_config_vector & ignored)
5081 {
5082 const smart_device::device_info & info1 = dev->get_info();
5083 bool is_raid1 = is_raid_type(info1.dev_type.c_str());
5084
5085 for (unsigned i = 0; i < numdevs; i++) {
5086 const smart_device::device_info & info2 = devices.at(i)->get_info();
5087 // -d TYPE options must match if RAID drive number is specified
5088 if ( info1.dev_name == info2.dev_name
5089 && ( info1.dev_type == info2.dev_type
5090 || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
5091 return true;
5092 }
5093
5094 for (unsigned i = 0; i < ignored.size(); i++) {
5095 const dev_config & cfg2 = ignored.at(i);
5096 if ( info1.dev_name == cfg2.dev_name
5097 && ( info1.dev_type == cfg2.dev_type
5098 || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
5099 return true;
5100 }
5101 return false;
5102 }
5103
5104 // This function tries devices from conf_entries. Each one that can be
5105 // registered is moved onto the [ata|scsi]devices lists and removed
5106 // from the conf_entries list.
5107 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5108 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5109 {
5110 // start by clearing lists/memory of ALL existing devices
5111 configs.clear();
5112 devices.clear();
5113 states.clear();
5114
5115 // Register entries
5116 dev_config_vector ignored_entries;
5117 unsigned numnoscan = 0;
5118 for (unsigned i = 0; i < conf_entries.size(); i++){
5119
5120 dev_config cfg = conf_entries[i];
5121
5122 if (cfg.ignore) {
5123 // Store for is_duplicate_device() check and ignore
5124 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5125 (!cfg.dev_type.empty() ? " [" : ""),
5126 cfg.dev_type.c_str(),
5127 (!cfg.dev_type.empty() ? "]" : ""));
5128 ignored_entries.push_back(cfg);
5129 continue;
5130 }
5131
5132 // get device of appropriate type
5133 smart_device_auto_ptr dev;
5134 bool scanning = false;
5135
5136 // Device may already be detected during devicescan
5137 if (i < scanned_devs.size()) {
5138 dev = scanned_devs.release(i);
5139 if (dev) {
5140 // Check for a preceding non-DEVICESCAN entry for the same device
5141 if ( (numnoscan || !ignored_entries.empty())
5142 && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
5143 PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
5144 continue;
5145 }
5146 scanning = true;
5147 }
5148 }
5149
5150 if (!dev) {
5151 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5152 if (!dev) {
5153 if (cfg.dev_type.empty())
5154 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5155 else
5156 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5157 continue;
5158 }
5159 }
5160
5161 // Save old info
5162 smart_device::device_info oldinfo = dev->get_info();
5163
5164 // Open with autodetect support, may return 'better' device
5165 dev.replace( dev->autodetect_open() );
5166
5167 // Report if type has changed
5168 if (oldinfo.dev_type != dev->get_dev_type())
5169 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
5170 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5171
5172 if (!dev->is_open()) {
5173 // For linux+devfs, a nonexistent device gives a strange error
5174 // message. This makes the error message a bit more sensible.
5175 // If no debug and scanning - don't print errors
5176 if (debugmode || !scanning)
5177 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5178 continue;
5179 }
5180
5181 // Update informal name
5182 cfg.name = dev->get_info().info_name;
5183 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5184
5185 // Prepare initial state
5186 dev_state state;
5187
5188 // register ATA devices
5189 if (dev->is_ata()){
5190 if (ATADeviceScan(cfg, state, dev->to_ata())) {
5191 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
5192 dev.reset();
5193 }
5194 }
5195 // or register SCSI devices
5196 else if (dev->is_scsi()){
5197 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
5198 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
5199 dev.reset();
5200 }
5201 }
5202 // or register NVMe devices
5203 else if (dev->is_nvme()) {
5204 if (NVMeDeviceScan(cfg, state, dev->to_nvme())) {
5205 CanNotRegister(cfg.name.c_str(), "NVMe", cfg.lineno, scanning);
5206 dev.reset();
5207 }
5208 }
5209 else {
5210 PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5211 dev.reset();
5212 }
5213
5214 if (dev) {
5215 // move onto the list of devices
5216 configs.push_back(cfg);
5217 states.push_back(state);
5218 devices.push_back(dev);
5219 if (!scanning)
5220 numnoscan = devices.size();
5221 }
5222 // if device is explictly listed and we can't register it, then
5223 // exit unless the user has specified that the device is removable
5224 else if (!scanning) {
5225 if (cfg.removable || quit==2)
5226 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
5227 else {
5228 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
5229 EXIT(EXIT_BADDEV);
5230 }
5231 }
5232 }
5233
5234 init_disable_standby_check(configs);
5235 }
5236
5237
5238 // Main program without exception handling
5239 static int main_worker(int argc, char **argv)
5240 {
5241 // Initialize interface
5242 smart_interface::init();
5243 if (!smi())
5244 return 1;
5245
5246 // is it our first pass through?
5247 bool firstpass = true;
5248
5249 // next time to wake up
5250 time_t wakeuptime = 0;
5251
5252 // parse input and print header and usage info if needed
5253 ParseOpts(argc,argv);
5254
5255 // Configuration for each device
5256 dev_config_vector configs;
5257 // Device states
5258 dev_state_vector states;
5259 // Devices to monitor
5260 smart_device_list devices;
5261
5262 bool write_states_always = true;
5263
5264 #ifdef HAVE_LIBCAP_NG
5265 // Drop capabilities
5266 if (enable_capabilities) {
5267 capng_clear(CAPNG_SELECT_BOTH);
5268 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
5269 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
5270 capng_apply(CAPNG_SELECT_BOTH);
5271 }
5272 #endif
5273
5274 // the main loop of the code
5275 for (;;) {
5276
5277 // are we exiting from a signal?
5278 if (caughtsigEXIT) {
5279 // are we exiting with SIGTERM?
5280 int isterm=(caughtsigEXIT==SIGTERM);
5281 int isquit=(caughtsigEXIT==SIGQUIT);
5282 int isok=debugmode?isterm || isquit:isterm;
5283
5284 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5285 caughtsigEXIT, strsignal(caughtsigEXIT));
5286
5287 if (!isok)
5288 return EXIT_SIGNAL;
5289
5290 // Write state files
5291 if (!state_path_prefix.empty())
5292 write_all_dev_states(configs, states);
5293
5294 return 0;
5295 }
5296
5297 // Should we (re)read the config file?
5298 if (firstpass || caughtsigHUP){
5299 if (!firstpass) {
5300 // Write state files
5301 if (!state_path_prefix.empty())
5302 write_all_dev_states(configs, states);
5303
5304 PrintOut(LOG_INFO,
5305 caughtsigHUP==1?
5306 "Signal HUP - rereading configuration file %s\n":
5307 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5308 configfile);
5309 }
5310
5311 {
5312 dev_config_vector conf_entries; // Entries read from smartd.conf
5313 smart_device_list scanned_devs; // Devices found during scan
5314 // (re)reads config file, makes >=0 entries
5315 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5316
5317 if (entries>=0) {
5318 // checks devices, then moves onto ata/scsi list or deallocates.
5319 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5320 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5321 throw std::logic_error("Invalid result from RegisterDevices");
5322 }
5323 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
5324 // user has asked to continue on error in configuration file
5325 if (!firstpass)
5326 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5327 }
5328 else {
5329 // exit with configuration file error status
5330 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5331 }
5332 }
5333
5334 // Log number of devices we are monitoring...
5335 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
5336 int numata = 0, numscsi = 0;
5337 for (unsigned i = 0; i < devices.size(); i++) {
5338 const smart_device * dev = devices.at(i);
5339 if (dev->is_ata())
5340 numata++;
5341 else if (dev->is_scsi())
5342 numscsi++;
5343 }
5344 PrintOut(LOG_INFO,"Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5345 numata, numscsi, (int)devices.size() - numata - numscsi);
5346 }
5347 else {
5348 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5349 return EXIT_NODEV;
5350 }
5351
5352 if (quit==4) {
5353 // user has asked to print test schedule
5354 PrintTestSchedule(configs, states, devices);
5355 return 0;
5356 }
5357
5358 #ifdef HAVE_LIBCAP_NG
5359 if (enable_capabilities) {
5360 for (unsigned i = 0; i < configs.size(); i++) {
5361 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5362 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5363 break;
5364 }
5365 }
5366 }
5367 #endif
5368
5369 // reset signal
5370 caughtsigHUP=0;
5371
5372 // Always write state files after (re)configuration
5373 write_states_always = true;
5374 }
5375
5376 // check all devices once,
5377 // self tests are not started in first pass unless '-q onecheck' is specified
5378 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
5379
5380 // Write state files
5381 if (!state_path_prefix.empty())
5382 write_all_dev_states(configs, states, write_states_always);
5383 write_states_always = false;
5384
5385 // Write attribute logs
5386 if (!attrlog_path_prefix.empty())
5387 write_all_dev_attrlogs(configs, states);
5388
5389 // user has asked us to exit after first check
5390 if (quit==3) {
5391 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5392 "smartd is exiting (exit status 0)\n");
5393 return 0;
5394 }
5395
5396 // fork into background if needed
5397 if (firstpass && !debugmode) {
5398 DaemonInit();
5399 }
5400
5401 // set exit and signal handlers, write PID file, set wake-up time
5402 if (firstpass){
5403 Initialize(&wakeuptime);
5404 firstpass = false;
5405 }
5406
5407 // sleep until next check time, or a signal arrives
5408 wakeuptime = dosleep(wakeuptime, write_states_always);
5409 }
5410 }
5411
5412
5413 #ifndef _WIN32
5414 // Main program
5415 int main(int argc, char **argv)
5416 #else
5417 // Windows: internal main function started direct or by service control manager
5418 static int smartd_main(int argc, char **argv)
5419 #endif
5420 {
5421 int status;
5422 try {
5423 // Do the real work ...
5424 status = main_worker(argc, argv);
5425 }
5426 catch (int ex) {
5427 // EXIT(status) arrives here
5428 status = ex;
5429 }
5430 catch (const std::bad_alloc & /*ex*/) {
5431 // Memory allocation failed (also thrown by std::operator new)
5432 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5433 status = EXIT_NOMEM;
5434 }
5435 catch (const std::exception & ex) {
5436 // Other fatal errors
5437 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5438 status = EXIT_BADCODE;
5439 }
5440
5441 // Check for remaining device objects
5442 if (smart_device::get_num_objects() != 0) {
5443 PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5444 smart_device::get_num_objects());
5445 status = EXIT_BADCODE;
5446 }
5447
5448 if (status == EXIT_BADCODE)
5449 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5450
5451 if (is_initialized)
5452 status = Goodbye(status);
5453
5454 #ifdef _WIN32
5455 daemon_winsvc_exitcode = status;
5456 #endif
5457 return status;
5458 }
5459
5460
5461 #ifdef _WIN32
5462 // Main function for Windows
5463 int main(int argc, char **argv){
5464 // Options for smartd windows service
5465 static const daemon_winsvc_options svc_opts = {
5466 "--service", // cmd_opt
5467 "smartd", "SmartD Service", // servicename, displayname
5468 // description
5469 "Controls and monitors storage devices using the Self-Monitoring, "
5470 "Analysis and Reporting Technology System (SMART) built into "
5471 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5472 "www.smartmontools.org"
5473 };
5474 // daemon_main() handles daemon and service specific commands
5475 // and starts smartd_main() direct, from a new process,
5476 // or via service control manager
5477 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5478 }
5479 #endif