]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
2f1cdc86bd97ea3b2ecb61d91b22b52b7efc0f40
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-17 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16 *
17 * This code was originally developed as a Senior Thesis by Michael Cornwell
18 * at the Concurrent Systems Laboratory (now part of the Storage Systems
19 * Research Center), Jack Baskin School of Engineering, University of
20 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21 *
22 */
23
24 #include "config.h"
25 #include "int64.h"
26
27 // unconditionally included files
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
31 #include <signal.h>
32 #include <fcntl.h>
33 #include <string.h>
34 #include <syslog.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <time.h>
39 #include <limits.h>
40 #include <getopt.h>
41
42 #include <stdexcept>
43 #include <string>
44 #include <vector>
45 #include <algorithm> // std::replace()
46
47 // conditionally included files
48 #ifndef _WIN32
49 #include <sys/wait.h>
50 #endif
51 #ifdef HAVE_UNISTD_H
52 #include <unistd.h>
53 #endif
54
55 #ifdef _WIN32
56 #ifdef _MSC_VER
57 #pragma warning(disable:4761) // "conversion supplied"
58 typedef unsigned short mode_t;
59 typedef int pid_t;
60 #endif
61 #include <io.h> // umask()
62 #include <process.h> // getpid()
63 #endif // _WIN32
64
65 #ifdef __CYGWIN__
66 #include <io.h> // setmode()
67 #endif // __CYGWIN__
68
69 #ifdef HAVE_LIBCAP_NG
70 #include <cap-ng.h>
71 #endif // LIBCAP_NG
72
73 // locally included files
74 #include "atacmds.h"
75 #include "dev_interface.h"
76 #include "knowndrives.h"
77 #include "scsicmds.h"
78 #include "nvmecmds.h"
79 #include "utility.h"
80
81 // This is for solaris, where signal() resets the handler to SIG_DFL
82 // after the first signal is caught.
83 #ifdef HAVE_SIGSET
84 #define SIGNALFN sigset
85 #else
86 #define SIGNALFN signal
87 #endif
88
89 #ifdef _WIN32
90 // fork()/signal()/initd simulation for native Windows
91 #include "daemon_win32.h" // daemon_main/detach/signal()
92 #undef SIGNALFN
93 #define SIGNALFN daemon_signal
94 #define strsignal daemon_strsignal
95 #define sleep daemon_sleep
96 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
97 #define SIGQUIT SIGBREAK
98 #define SIGQUIT_KEYNAME "CONTROL-Break"
99 #else // _WIN32
100 #define SIGQUIT_KEYNAME "CONTROL-\\"
101 #endif // _WIN32
102
103 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4556 2017-10-15 17:31:30Z chrfranke $"
104 CONFIG_H_CVSID;
105
106 using namespace smartmontools;
107
108 // smartd exit codes
109 #define EXIT_BADCMD 1 // command line did not parse
110 #define EXIT_BADCONF 2 // syntax error in config file
111 #define EXIT_STARTUP 3 // problem forking daemon
112 #define EXIT_PID 4 // problem creating pid file
113 #define EXIT_NOCONF 5 // config file does not exist
114 #define EXIT_READCONF 6 // config file exists but cannot be read
115
116 #define EXIT_NOMEM 8 // out of memory
117 #define EXIT_BADCODE 10 // internal error - should NEVER happen
118
119 #define EXIT_BADDEV 16 // we can't monitor this device
120 #define EXIT_NODEV 17 // no devices to monitor
121
122 #define EXIT_SIGNAL 254 // abort on signal
123
124
125 // command-line: 1=debug mode, 2=print presets
126 static unsigned char debugmode = 0;
127
128 // command-line: how long to sleep between checks
129 #define CHECKTIME 1800
130 static int checktime=CHECKTIME;
131
132 // command-line: name of PID file (empty for no pid file)
133 static std::string pid_file;
134
135 // command-line: path prefix of persistent state file, empty if no persistence.
136 static std::string state_path_prefix
137 #ifdef SMARTMONTOOLS_SAVESTATES
138 = SMARTMONTOOLS_SAVESTATES
139 #endif
140 ;
141
142 // command-line: path prefix of attribute log file, empty if no logs.
143 static std::string attrlog_path_prefix
144 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
145 = SMARTMONTOOLS_ATTRIBUTELOG
146 #endif
147 ;
148
149 // configuration file name
150 static const char * configfile;
151 // configuration file "name" if read from stdin
152 static const char * const configfile_stdin = "<stdin>";
153 // path of alternate configuration file
154 static std::string configfile_alt;
155
156 // warning script file
157 static std::string warning_script;
158
159 // command-line: when should we exit?
160 enum quit_t {
161 QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
162 QUIT_SHOWTESTS, QUIT_ERRORS
163 };
164 static quit_t quit = QUIT_NODEV;
165
166 // command-line; this is the default syslog(3) log facility to use.
167 static int facility=LOG_DAEMON;
168
169 #ifndef _WIN32
170 // command-line: fork into background?
171 static bool do_fork=true;
172 #endif
173
174 #ifdef HAVE_LIBCAP_NG
175 // command-line: enable capabilities?
176 static bool enable_capabilities = false;
177 #endif
178
179 // TODO: This smartctl only variable is also used in os_win32.cpp
180 unsigned char failuretest_permissive = 0;
181
182 // set to one if we catch a USR1 (check devices now)
183 static volatile int caughtsigUSR1=0;
184
185 #ifdef _WIN32
186 // set to one if we catch a USR2 (toggle debug mode)
187 static volatile int caughtsigUSR2=0;
188 #endif
189
190 // set to one if we catch a HUP (reload config file). In debug mode,
191 // set to two, if we catch INT (also reload config file).
192 static volatile int caughtsigHUP=0;
193
194 // set to signal value if we catch INT, QUIT, or TERM
195 static volatile int caughtsigEXIT=0;
196
197 // This function prints either to stdout or to the syslog as needed.
198 static void PrintOut(int priority, const char *fmt, ...)
199 __attribute_format_printf(2, 3);
200
201 // Attribute monitoring flags.
202 // See monitor_attr_flags below.
203 enum {
204 MONITOR_IGN_FAILUSE = 0x01,
205 MONITOR_IGNORE = 0x02,
206 MONITOR_RAW_PRINT = 0x04,
207 MONITOR_RAW = 0x08,
208 MONITOR_AS_CRIT = 0x10,
209 MONITOR_RAW_AS_CRIT = 0x20,
210 };
211
212 // Array of flags for each attribute.
213 class attribute_flags
214 {
215 public:
216 attribute_flags()
217 { memset(m_flags, 0, sizeof(m_flags)); }
218
219 bool is_set(int id, unsigned char flag) const
220 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
221
222 void set(int id, unsigned char flags)
223 {
224 if (0 < id && id < (int)sizeof(m_flags))
225 m_flags[id] |= flags;
226 }
227
228 private:
229 unsigned char m_flags[256];
230 };
231
232
233 /// Configuration data for a device. Read from smartd.conf.
234 /// Supports copy & assignment and is compatible with STL containers.
235 struct dev_config
236 {
237 int lineno; // Line number of entry in file
238 std::string name; // Device name (with optional extra info)
239 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
240 std::string dev_type; // Device type argument from -d directive, empty if none
241 std::string dev_idinfo; // Device identify info for warning emails
242 std::string state_file; // Path of the persistent state file, empty if none
243 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
244 bool ignore; // Ignore this entry
245 bool id_is_unique; // True if dev_idinfo is unique (includes S/N or WWN)
246 bool smartcheck; // Check SMART status
247 bool usagefailed; // Check for failed Usage Attributes
248 bool prefail; // Track changes in Prefail Attributes
249 bool usage; // Track changes in Usage Attributes
250 bool selftest; // Monitor number of selftest errors
251 bool errorlog; // Monitor number of ATA errors
252 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
253 bool offlinests; // Monitor changes in offline data collection status
254 bool offlinests_ns; // Disable auto standby if in progress
255 bool selfteststs; // Monitor changes in self-test execution status
256 bool selfteststs_ns; // Disable auto standby if in progress
257 bool permissive; // Ignore failed SMART commands
258 char autosave; // 1=disable, 2=enable Autosave Attributes
259 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
260 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
261 bool ignorepresets; // Ignore database of -v options
262 bool showpresets; // Show database entry for this device
263 bool removable; // Device may disappear (not be present)
264 char powermode; // skip check, if disk in idle or standby mode
265 bool powerquiet; // skip powermode 'skipping checks' message
266 int powerskipmax; // how many times can be check skipped
267 unsigned char tempdiff; // Track Temperature changes >= this limit
268 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
269 regular_expression test_regex; // Regex for scheduled testing
270
271 // Configuration of email warning messages
272 std::string emailcmdline; // script to execute, empty if no messages
273 std::string emailaddress; // email address, or empty
274 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
275 bool emailtest; // Send test email?
276
277 // ATA ONLY
278 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
279 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
280 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
281 int set_lookahead; // disable(-1), enable(1) read look-ahead
282 int set_standby; // set(1..255->0..254) standby timer
283 bool set_security_freeze; // Freeze ATA security
284 int set_wcache; // disable(-1), enable(1) write cache
285 int set_dsn; // disable(0x2), enable(0x1) DSN
286
287 bool sct_erc_set; // set SCT ERC to:
288 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
289 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
290
291 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
292 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
293 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
294 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
295
296 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
297
298 ata_vendor_attr_defs attribute_defs; // -v options
299
300 dev_config();
301 };
302
303 dev_config::dev_config()
304 : lineno(0),
305 ignore(false),
306 id_is_unique(false),
307 smartcheck(false),
308 usagefailed(false),
309 prefail(false),
310 usage(false),
311 selftest(false),
312 errorlog(false),
313 xerrorlog(false),
314 offlinests(false), offlinests_ns(false),
315 selfteststs(false), selfteststs_ns(false),
316 permissive(false),
317 autosave(0),
318 autoofflinetest(0),
319 ignorepresets(false),
320 showpresets(false),
321 removable(false),
322 powermode(0),
323 powerquiet(false),
324 powerskipmax(0),
325 tempdiff(0),
326 tempinfo(0), tempcrit(0),
327 emailfreq(0),
328 emailtest(false),
329 dev_rpm(0),
330 set_aam(0), set_apm(0),
331 set_lookahead(0),
332 set_standby(0),
333 set_security_freeze(false),
334 set_wcache(0), set_dsn(0),
335 sct_erc_set(false),
336 sct_erc_readtime(0), sct_erc_writetime(0),
337 curr_pending_id(0), offl_pending_id(0),
338 curr_pending_incr(false), offl_pending_incr(false),
339 curr_pending_set(false), offl_pending_set(false)
340 {
341 }
342
343
344 // Number of allowed mail message types
345 static const int SMARTD_NMAIL = 13;
346 // Type for '-M test' mails (state not persistent)
347 static const int MAILTYPE_TEST = 0;
348 // TODO: Add const or enum for all mail types.
349
350 struct mailinfo {
351 int logged;// number of times an email has been sent
352 time_t firstsent;// time first email was sent, as defined by time(2)
353 time_t lastsent; // time last email was sent, as defined by time(2)
354
355 mailinfo()
356 : logged(0), firstsent(0), lastsent(0) { }
357 };
358
359 /// Persistent state data for a device.
360 struct persistent_dev_state
361 {
362 unsigned char tempmin, tempmax; // Min/Max Temperatures
363
364 unsigned char selflogcount; // total number of self-test errors
365 unsigned short selfloghour; // lifetime hours of last self-test error
366
367 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
368
369 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
370 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
371
372 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
373
374 // ATA ONLY
375 int ataerrorcount; // Total number of ATA errors
376
377 // Persistent part of ata_smart_values:
378 struct ata_attribute {
379 unsigned char id;
380 unsigned char val;
381 unsigned char worst; // Byte needed for 'raw64' attribute only.
382 uint64_t raw;
383 unsigned char resvd;
384
385 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
386 };
387 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
388
389 // SCSI ONLY
390
391 struct scsi_error_counter_t {
392 struct scsiErrorCounter errCounter;
393 unsigned char found;
394 scsi_error_counter_t() : found(0)
395 { memset(&errCounter, 0, sizeof(errCounter)); }
396 };
397 scsi_error_counter_t scsi_error_counters[3];
398
399 struct scsi_nonmedium_error_t {
400 struct scsiNonMediumError nme;
401 unsigned char found;
402 scsi_nonmedium_error_t() : found(0)
403 { memset(&nme, 0, sizeof(nme)); }
404 };
405 scsi_nonmedium_error_t scsi_nonmedium_error;
406
407 // NVMe only
408 uint64_t nvme_err_log_entries;
409
410 persistent_dev_state();
411 };
412
413 persistent_dev_state::persistent_dev_state()
414 : tempmin(0), tempmax(0),
415 selflogcount(0),
416 selfloghour(0),
417 scheduled_test_next_check(0),
418 selective_test_last_start(0),
419 selective_test_last_end(0),
420 ataerrorcount(0),
421 nvme_err_log_entries(0)
422 {
423 }
424
425 /// Non-persistent state data for a device.
426 struct temp_dev_state
427 {
428 bool must_write; // true if persistent part should be written
429
430 bool not_cap_offline; // true == not capable of offline testing
431 bool not_cap_conveyance;
432 bool not_cap_short;
433 bool not_cap_long;
434 bool not_cap_selective;
435
436 unsigned char temperature; // last recorded Temperature (in Celsius)
437 time_t tempmin_delay; // time where Min Temperature tracking will start
438
439 bool removed; // true if open() failed for removable device
440
441 bool powermodefail; // true if power mode check failed
442 int powerskipcnt; // Number of checks skipped due to idle or standby mode
443 int lastpowermodeskipped; // the last power mode that was skipped
444
445 // SCSI ONLY
446 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
447 unsigned char TempPageSupported; // has log sense temperature page (0xd)
448 unsigned char ReadECounterPageSupported;
449 unsigned char WriteECounterPageSupported;
450 unsigned char VerifyECounterPageSupported;
451 unsigned char NonMediumErrorPageSupported;
452 unsigned char SuppressReport; // minimize nuisance reports
453 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
454 // know yet) 6 or 10
455 // ATA ONLY
456 uint64_t num_sectors; // Number of sectors
457 ata_smart_values smartval; // SMART data
458 ata_smart_thresholds_pvt smartthres; // SMART thresholds
459 bool offline_started; // true if offline data collection was started
460 bool selftest_started; // true if self-test was started
461
462 temp_dev_state();
463 };
464
465 temp_dev_state::temp_dev_state()
466 : must_write(false),
467 not_cap_offline(false),
468 not_cap_conveyance(false),
469 not_cap_short(false),
470 not_cap_long(false),
471 not_cap_selective(false),
472 temperature(0),
473 tempmin_delay(0),
474 removed(false),
475 powermodefail(false),
476 powerskipcnt(0),
477 lastpowermodeskipped(0),
478 SmartPageSupported(false),
479 TempPageSupported(false),
480 ReadECounterPageSupported(false),
481 WriteECounterPageSupported(false),
482 VerifyECounterPageSupported(false),
483 NonMediumErrorPageSupported(false),
484 SuppressReport(false),
485 modese_len(0),
486 num_sectors(0),
487 offline_started(false),
488 selftest_started(false)
489 {
490 memset(&smartval, 0, sizeof(smartval));
491 memset(&smartthres, 0, sizeof(smartthres));
492 }
493
494 /// Runtime state data for a device.
495 struct dev_state
496 : public persistent_dev_state,
497 public temp_dev_state
498 {
499 void update_persistent_state();
500 void update_temp_state();
501 };
502
503 /// Container for configuration info for each device.
504 typedef std::vector<dev_config> dev_config_vector;
505
506 /// Container for state info for each device.
507 typedef std::vector<dev_state> dev_state_vector;
508
509 // Copy ATA attributes to persistent state.
510 void dev_state::update_persistent_state()
511 {
512 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
513 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
514 ata_attribute & pa = ata_attributes[i];
515 pa.id = ta.id;
516 if (ta.id == 0) {
517 pa.val = pa.worst = 0; pa.raw = 0;
518 continue;
519 }
520 pa.val = ta.current;
521 pa.worst = ta.worst;
522 pa.raw = ta.raw[0]
523 | ( ta.raw[1] << 8)
524 | ( ta.raw[2] << 16)
525 | ((uint64_t)ta.raw[3] << 24)
526 | ((uint64_t)ta.raw[4] << 32)
527 | ((uint64_t)ta.raw[5] << 40);
528 pa.resvd = ta.reserv;
529 }
530 }
531
532 // Copy ATA from persistent to temp state.
533 void dev_state::update_temp_state()
534 {
535 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
536 const ata_attribute & pa = ata_attributes[i];
537 ata_smart_attribute & ta = smartval.vendor_attributes[i];
538 ta.id = pa.id;
539 if (pa.id == 0) {
540 ta.current = ta.worst = 0;
541 memset(ta.raw, 0, sizeof(ta.raw));
542 continue;
543 }
544 ta.current = pa.val;
545 ta.worst = pa.worst;
546 ta.raw[0] = (unsigned char) pa.raw;
547 ta.raw[1] = (unsigned char)(pa.raw >> 8);
548 ta.raw[2] = (unsigned char)(pa.raw >> 16);
549 ta.raw[3] = (unsigned char)(pa.raw >> 24);
550 ta.raw[4] = (unsigned char)(pa.raw >> 32);
551 ta.raw[5] = (unsigned char)(pa.raw >> 40);
552 ta.reserv = pa.resvd;
553 }
554 }
555
556 // Parse a line from a state file.
557 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
558 {
559 static const regular_expression regex(
560 "^ *"
561 "((temperature-min)" // (1 (2)
562 "|(temperature-max)" // (3)
563 "|(self-test-errors)" // (4)
564 "|(self-test-last-err-hour)" // (5)
565 "|(scheduled-test-next-check)" // (6)
566 "|(selective-test-last-start)" // (7)
567 "|(selective-test-last-end)" // (8)
568 "|(ata-error-count)" // (9)
569 "|(mail\\.([0-9]+)\\." // (10 (11)
570 "((count)" // (12 (13)
571 "|(first-sent-time)" // (14)
572 "|(last-sent-time)" // (15)
573 ")" // 12)
574 ")" // 10)
575 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
576 "((id)" // (18 (19)
577 "|(val)" // (20)
578 "|(worst)" // (21)
579 "|(raw)" // (22)
580 "|(resvd)" // (23)
581 ")" // 18)
582 ")" // 16)
583 "|(nvme-err-log-entries)" // (24)
584 ")" // 1)
585 " *= *([0-9]+)[ \n]*$", // (25)
586 REG_EXTENDED
587 );
588
589 const int nmatch = 1+25;
590 regmatch_t match[nmatch];
591 if (!regex.execute(line, nmatch, match))
592 return false;
593 if (match[nmatch-1].rm_so < 0)
594 return false;
595
596 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
597
598 int m = 1;
599 if (match[++m].rm_so >= 0)
600 state.tempmin = (unsigned char)val;
601 else if (match[++m].rm_so >= 0)
602 state.tempmax = (unsigned char)val;
603 else if (match[++m].rm_so >= 0)
604 state.selflogcount = (unsigned char)val;
605 else if (match[++m].rm_so >= 0)
606 state.selfloghour = (unsigned short)val;
607 else if (match[++m].rm_so >= 0)
608 state.scheduled_test_next_check = (time_t)val;
609 else if (match[++m].rm_so >= 0)
610 state.selective_test_last_start = val;
611 else if (match[++m].rm_so >= 0)
612 state.selective_test_last_end = val;
613 else if (match[++m].rm_so >= 0)
614 state.ataerrorcount = (int)val;
615 else if (match[m+=2].rm_so >= 0) {
616 int i = atoi(line+match[m].rm_so);
617 if (!(0 <= i && i < SMARTD_NMAIL))
618 return false;
619 if (i == MAILTYPE_TEST) // Don't suppress test mails
620 return true;
621 if (match[m+=2].rm_so >= 0)
622 state.maillog[i].logged = (int)val;
623 else if (match[++m].rm_so >= 0)
624 state.maillog[i].firstsent = (time_t)val;
625 else if (match[++m].rm_so >= 0)
626 state.maillog[i].lastsent = (time_t)val;
627 else
628 return false;
629 }
630 else if (match[m+=5+1].rm_so >= 0) {
631 int i = atoi(line+match[m].rm_so);
632 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
633 return false;
634 if (match[m+=2].rm_so >= 0)
635 state.ata_attributes[i].id = (unsigned char)val;
636 else if (match[++m].rm_so >= 0)
637 state.ata_attributes[i].val = (unsigned char)val;
638 else if (match[++m].rm_so >= 0)
639 state.ata_attributes[i].worst = (unsigned char)val;
640 else if (match[++m].rm_so >= 0)
641 state.ata_attributes[i].raw = val;
642 else if (match[++m].rm_so >= 0)
643 state.ata_attributes[i].resvd = (unsigned char)val;
644 else
645 return false;
646 }
647 else if (match[m+7].rm_so >= 0)
648 state.nvme_err_log_entries = val;
649 else
650 return false;
651 return true;
652 }
653
654 // Read a state file.
655 static bool read_dev_state(const char * path, persistent_dev_state & state)
656 {
657 stdio_file f(path, "r");
658 if (!f) {
659 if (errno != ENOENT)
660 pout("Cannot read state file \"%s\"\n", path);
661 return false;
662 }
663 #ifdef __CYGWIN__
664 setmode(fileno(f), O_TEXT); // Allow files with \r\n
665 #endif
666
667 persistent_dev_state new_state;
668 int good = 0, bad = 0;
669 char line[256];
670 while (fgets(line, sizeof(line), f)) {
671 const char * s = line + strspn(line, " \t");
672 if (!*s || *s == '#')
673 continue;
674 if (!parse_dev_state_line(line, new_state))
675 bad++;
676 else
677 good++;
678 }
679
680 if (bad) {
681 if (!good) {
682 pout("%s: format error\n", path);
683 return false;
684 }
685 pout("%s: %d invalid line(s) ignored\n", path, bad);
686 }
687
688 // This sets the values missing in the file to 0.
689 state = new_state;
690 return true;
691 }
692
693 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
694 {
695 if (val)
696 fprintf(f, "%s = %" PRIu64 "\n", name, val);
697 }
698
699 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
700 {
701 if (val)
702 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
703 }
704
705 // Write a state file
706 static bool write_dev_state(const char * path, const persistent_dev_state & state)
707 {
708 // Rename old "file" to "file~"
709 std::string pathbak = path; pathbak += '~';
710 unlink(pathbak.c_str());
711 rename(path, pathbak.c_str());
712
713 stdio_file f(path, "w");
714 if (!f) {
715 pout("Cannot create state file \"%s\"\n", path);
716 return false;
717 }
718
719 fprintf(f, "# smartd state file\n");
720 write_dev_state_line(f, "temperature-min", state.tempmin);
721 write_dev_state_line(f, "temperature-max", state.tempmax);
722 write_dev_state_line(f, "self-test-errors", state.selflogcount);
723 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
724 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
725 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
726 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
727
728 int i;
729 for (i = 0; i < SMARTD_NMAIL; i++) {
730 if (i == MAILTYPE_TEST) // Don't suppress test mails
731 continue;
732 const mailinfo & mi = state.maillog[i];
733 if (!mi.logged)
734 continue;
735 write_dev_state_line(f, "mail", i, "count", mi.logged);
736 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
737 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
738 }
739
740 // ATA ONLY
741 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
742
743 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
744 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
745 if (!pa.id)
746 continue;
747 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
748 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
749 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
750 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
751 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
752 }
753
754 // NVMe only
755 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
756
757 return true;
758 }
759
760 // Write to the attrlog file
761 static bool write_dev_attrlog(const char * path, const dev_state & state)
762 {
763 stdio_file f(path, "a");
764 if (!f) {
765 pout("Cannot create attribute log file \"%s\"\n", path);
766 return false;
767 }
768
769
770 time_t now = time(0);
771 struct tm * tms = gmtime(&now);
772 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
773 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
774 tms->tm_hour, tms->tm_min, tms->tm_sec);
775 // ATA ONLY
776 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
777 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
778 if (!pa.id)
779 continue;
780 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
781 }
782 // SCSI ONLY
783 const struct scsiErrorCounter * ecp;
784 const char * pageNames[3] = {"read", "write", "verify"};
785 for (int k = 0; k < 3; ++k) {
786 if ( !state.scsi_error_counters[k].found ) continue;
787 ecp = &state.scsi_error_counters[k].errCounter;
788 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
789 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
790 "\t%s-corr-by-retry;%" PRIu64 ";"
791 "\t%s-total-err-corrected;%" PRIu64 ";"
792 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
793 "\t%s-gb-processed;%.3f;"
794 "\t%s-total-unc-errors;%" PRIu64 ";",
795 pageNames[k], ecp->counter[0],
796 pageNames[k], ecp->counter[1],
797 pageNames[k], ecp->counter[2],
798 pageNames[k], ecp->counter[3],
799 pageNames[k], ecp->counter[4],
800 pageNames[k], (ecp->counter[5] / 1000000000.0),
801 pageNames[k], ecp->counter[6]);
802 }
803 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
804 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
805 }
806 // write SCSI current temperature if it is monitored
807 if (state.temperature)
808 fprintf(f, "\ttemperature;%d;", state.temperature);
809 // end of line
810 fprintf(f, "\n");
811 return true;
812 }
813
814 // Write all state files. If write_always is false, don't write
815 // unless must_write is set.
816 static void write_all_dev_states(const dev_config_vector & configs,
817 dev_state_vector & states,
818 bool write_always = true)
819 {
820 for (unsigned i = 0; i < states.size(); i++) {
821 const dev_config & cfg = configs.at(i);
822 if (cfg.state_file.empty())
823 continue;
824 dev_state & state = states[i];
825 if (!write_always && !state.must_write)
826 continue;
827 if (!write_dev_state(cfg.state_file.c_str(), state))
828 continue;
829 state.must_write = false;
830 if (write_always || debugmode)
831 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
832 cfg.name.c_str(), cfg.state_file.c_str());
833 }
834 }
835
836 // Write to all attrlog files
837 static void write_all_dev_attrlogs(const dev_config_vector & configs,
838 dev_state_vector & states)
839 {
840 for (unsigned i = 0; i < states.size(); i++) {
841 const dev_config & cfg = configs.at(i);
842 if (cfg.attrlog_file.empty())
843 continue;
844 dev_state & state = states[i];
845 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
846 }
847 }
848
849 // remove the PID file
850 static void RemovePidFile()
851 {
852 if (!pid_file.empty()) {
853 if (unlink(pid_file.c_str()))
854 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
855 pid_file.c_str(), strerror(errno));
856 pid_file.clear();
857 }
858 return;
859 }
860
861 extern "C" { // signal handlers require C-linkage
862
863 // Note if we catch a SIGUSR1
864 static void USR1handler(int sig)
865 {
866 if (SIGUSR1==sig)
867 caughtsigUSR1=1;
868 return;
869 }
870
871 #ifdef _WIN32
872 // Note if we catch a SIGUSR2
873 static void USR2handler(int sig)
874 {
875 if (SIGUSR2==sig)
876 caughtsigUSR2=1;
877 return;
878 }
879 #endif
880
881 // Note if we catch a HUP (or INT in debug mode)
882 static void HUPhandler(int sig)
883 {
884 if (sig==SIGHUP)
885 caughtsigHUP=1;
886 else
887 caughtsigHUP=2;
888 return;
889 }
890
891 // signal handler for TERM, QUIT, and INT (if not in debug mode)
892 static void sighandler(int sig)
893 {
894 if (!caughtsigEXIT)
895 caughtsigEXIT=sig;
896 return;
897 }
898
899 } // extern "C"
900
901 // Cleanup, print Goodbye message and remove pidfile
902 static int Goodbye(int status)
903 {
904 // delete PID file, if one was created
905 RemovePidFile();
906
907 // and this should be the final output from smartd before it exits
908 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
909
910 return status;
911 }
912
913 // a replacement for setenv() which is not available on all platforms.
914 // Note that the string passed to putenv must not be freed or made
915 // invalid, since a pointer to it is kept by putenv(). This means that
916 // it must either be a static buffer or allocated off the heap. The
917 // string can be freed if the environment variable is redefined via
918 // another call to putenv(). There is no portable way to unset a variable
919 // with putenv(). So we manage the buffer in a static object.
920 // Using setenv() if available is not considered because some
921 // implementations may produce memory leaks.
922
923 class env_buffer
924 {
925 public:
926 env_buffer()
927 : m_buf((char *)0) { }
928
929 void set(const char * name, const char * value);
930
931 private:
932 char * m_buf;
933
934 env_buffer(const env_buffer &);
935 void operator=(const env_buffer &);
936 };
937
938 void env_buffer::set(const char * name, const char * value)
939 {
940 int size = strlen(name) + 1 + strlen(value) + 1;
941 char * newbuf = new char[size];
942 snprintf(newbuf, size, "%s=%s", name, value);
943
944 if (putenv(newbuf))
945 throw std::runtime_error("putenv() failed");
946
947 // This assumes that the same NAME is passed on each call
948 delete [] m_buf;
949 m_buf = newbuf;
950 }
951
952 #define EBUFLEN 1024
953
954 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
955 __attribute_format_printf(4, 5);
956
957 // If either address or executable path is non-null then send and log
958 // a warning email, or execute executable
959 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
960 {
961 static const char * const whichfail[] = {
962 "EmailTest", // 0
963 "Health", // 1
964 "Usage", // 2
965 "SelfTest", // 3
966 "ErrorCount", // 4
967 "FailedHealthCheck", // 5
968 "FailedReadSmartData", // 6
969 "FailedReadSmartErrorLog", // 7
970 "FailedReadSmartSelfTestLog", // 8
971 "FailedOpenDevice", // 9
972 "CurrentPendingSector", // 10
973 "OfflineUncorrectableSector", // 11
974 "Temperature" // 12
975 };
976
977 // See if user wants us to send mail
978 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
979 return;
980
981 std::string address = cfg.emailaddress;
982 const char * executable = cfg.emailcmdline.c_str();
983
984 // which type of mail are we sending?
985 mailinfo * mail=(state.maillog)+which;
986
987 // checks for sanity
988 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
989 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
990 return;
991 }
992 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
993 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
994 which, (int)sizeof(whichfail));
995 return;
996 }
997
998 // Return if a single warning mail has been sent.
999 if ((cfg.emailfreq==1) && mail->logged)
1000 return;
1001
1002 // Return if this is an email test and one has already been sent.
1003 if (which == 0 && mail->logged)
1004 return;
1005
1006 // To decide if to send mail, we need to know what time it is.
1007 time_t epoch = time(0);
1008
1009 // Return if less than one day has gone by
1010 const int day = 24*3600;
1011 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1012 return;
1013
1014 // Return if less than 2^(logged-1) days have gone by
1015 if (cfg.emailfreq==3 && mail->logged) {
1016 int days = 0x01 << (mail->logged - 1);
1017 days*=day;
1018 if (epoch<(mail->lastsent+days))
1019 return;
1020 }
1021
1022 #ifdef HAVE_LIBCAP_NG
1023 if (enable_capabilities) {
1024 PrintOut(LOG_ERR, "Sending a mail was supressed. "
1025 "Mails can't be send when capabilites are enabled\n");
1026 return;
1027 }
1028 #endif
1029
1030 // record the time of this mail message, and the first mail message
1031 if (!mail->logged)
1032 mail->firstsent=epoch;
1033 mail->lastsent=epoch;
1034
1035 // print warning string into message
1036 char message[256];
1037 va_list ap;
1038 va_start(ap, fmt);
1039 vsnprintf(message, sizeof(message), fmt, ap);
1040 va_end(ap);
1041
1042 // replace commas by spaces to separate recipients
1043 std::replace(address.begin(), address.end(), ',', ' ');
1044
1045 // Export information in environment variables that will be useful
1046 // for user scripts
1047 static env_buffer env[12];
1048 env[0].set("SMARTD_MAILER", executable);
1049 env[1].set("SMARTD_MESSAGE", message);
1050 char dates[DATEANDEPOCHLEN];
1051 snprintf(dates, sizeof(dates), "%d", mail->logged);
1052 env[2].set("SMARTD_PREVCNT", dates);
1053 dateandtimezoneepoch(dates, mail->firstsent);
1054 env[3].set("SMARTD_TFIRST", dates);
1055 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1056 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1057 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1058 env[6].set("SMARTD_ADDRESS", address.c_str());
1059 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1060
1061 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1062 env[8].set("SMARTD_DEVICETYPE",
1063 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1064 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1065
1066 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1067 dates[0] = 0;
1068 if (which) switch (cfg.emailfreq) {
1069 case 2: dates[0] = '1'; dates[1] = 0; break;
1070 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1071 }
1072 env[11].set("SMARTD_NEXTDAYS", dates);
1073
1074 // now construct a command to send this as EMAIL
1075 if (!*executable)
1076 executable = "<mail>";
1077 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1078 const char * newwarn = (which? "Warning via" : "Test of");
1079
1080 #ifndef _WIN32
1081 char command[2048];
1082 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1083
1084 // tell SYSLOG what we are about to do...
1085 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1086 which?"Sending warning via":"Executing test of", executable, newadd);
1087
1088 // issue the command to send mail or to run the user's executable
1089 errno=0;
1090 FILE * pfp;
1091 if (!(pfp=popen(command, "r")))
1092 // failed to popen() mail process
1093 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1094 newwarn, executable, newadd, errno?strerror(errno):"");
1095 else {
1096 // pipe suceeded!
1097 int len, status;
1098 char buffer[EBUFLEN];
1099
1100 // if unexpected output on stdout/stderr, null terminate, print, and flush
1101 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1102 int count=0;
1103 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1104 buffer[newlen]='\0';
1105 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1106 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1107
1108 // flush pipe if needed
1109 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1110 count++;
1111
1112 // tell user that pipe was flushed, or that something is really wrong
1113 if (count && count<EBUFLEN)
1114 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1115 newwarn, executable, newadd);
1116 else if (count)
1117 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1118 newwarn, executable, newadd);
1119 }
1120
1121 // if something went wrong with mail process, print warning
1122 errno=0;
1123 if (-1==(status=pclose(pfp)))
1124 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1125 errno?strerror(errno):"");
1126 else {
1127 // mail process apparently succeeded. Check and report exit status
1128 if (WIFEXITED(status)) {
1129 // exited 'normally' (but perhaps with nonzero status)
1130 int status8 = WEXITSTATUS(status);
1131 if (status8>128)
1132 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1133 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1134 else if (status8)
1135 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1136 newwarn, executable, newadd, status, status8);
1137 else
1138 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1139 }
1140
1141 if (WIFSIGNALED(status))
1142 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1143 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1144
1145 // this branch is probably not possible. If subprocess is
1146 // stopped then pclose() should not return.
1147 if (WIFSTOPPED(status))
1148 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1149 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1150
1151 }
1152 }
1153
1154 #else // _WIN32
1155 {
1156 char command[2048];
1157 snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1158
1159 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1160 int rc;
1161 // run command
1162 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1163 (which?"Sending warning via":"Executing test of"), executable, newadd);
1164 rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1165 if (rc >= 0 && stdoutbuf[0])
1166 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1167 newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1168 if (rc != 0)
1169 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1170 newwarn, executable, newadd, rc);
1171 else
1172 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1173 }
1174
1175 #endif // _WIN32
1176
1177 // increment mail sent counter
1178 mail->logged++;
1179 }
1180
1181 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1182 __attribute_format_printf(4, 5);
1183
1184 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1185 {
1186 if (!(0 <= which && which < SMARTD_NMAIL))
1187 return;
1188
1189 // Return if no mail sent yet
1190 mailinfo & mi = state.maillog[which];
1191 if (!mi.logged)
1192 return;
1193
1194 // Format & print message
1195 char msg[256];
1196 va_list ap;
1197 va_start(ap, fmt);
1198 vsnprintf(msg, sizeof(msg), fmt, ap);
1199 va_end(ap);
1200
1201 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1202 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1203
1204 // Clear mail counter and timestamps
1205 mi = mailinfo();
1206 state.must_write = true;
1207 }
1208
1209 #ifndef _WIN32
1210
1211 // Output multiple lines via separate syslog(3) calls.
1212 __attribute_format_printf(2, 0)
1213 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1214 {
1215 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1216 vsnprintf(buf, sizeof(buf), fmt, ap);
1217
1218 for (char * p = buf, * q; p && *p; p = q) {
1219 if ((q = strchr(p, '\n')))
1220 *q++ = 0;
1221 if (*p)
1222 syslog(priority, "%s\n", p);
1223 }
1224 }
1225
1226 #else // _WIN32
1227 // os_win32/syslog_win32.cpp supports multiple lines.
1228 #define vsyslog_lines vsyslog
1229 #endif // _WIN32
1230
1231 // Printing function for watching ataprint commands, or losing them
1232 // [From GLIBC Manual: Since the prototype doesn't specify types for
1233 // optional arguments, in a call to a variadic function the default
1234 // argument promotions are performed on the optional argument
1235 // values. This means the objects of type char or short int (whether
1236 // signed or not) are promoted to either int or unsigned int, as
1237 // appropriate.]
1238 void pout(const char *fmt, ...){
1239 va_list ap;
1240
1241 // get the correct time in syslog()
1242 FixGlibcTimeZoneBug();
1243 // initialize variable argument list
1244 va_start(ap,fmt);
1245 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1246 if (debugmode && debugmode != 2) {
1247 FILE * f = stdout;
1248 #ifdef _WIN32
1249 if (facility == LOG_LOCAL1) // logging to stdout
1250 f = stderr;
1251 #endif
1252 vfprintf(f, fmt, ap);
1253 fflush(f);
1254 }
1255 // in debugmode==2 mode we print output from knowndrives.o functions
1256 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1257 openlog("smartd", LOG_PID, facility);
1258 vsyslog_lines(LOG_INFO, fmt, ap);
1259 closelog();
1260 }
1261 va_end(ap);
1262 return;
1263 }
1264
1265 // This function prints either to stdout or to the syslog as needed.
1266 static void PrintOut(int priority, const char *fmt, ...){
1267 va_list ap;
1268
1269 // get the correct time in syslog()
1270 FixGlibcTimeZoneBug();
1271 // initialize variable argument list
1272 va_start(ap,fmt);
1273 if (debugmode) {
1274 FILE * f = stdout;
1275 #ifdef _WIN32
1276 if (facility == LOG_LOCAL1) // logging to stdout
1277 f = stderr;
1278 #endif
1279 vfprintf(f, fmt, ap);
1280 fflush(f);
1281 }
1282 else {
1283 openlog("smartd", LOG_PID, facility);
1284 vsyslog_lines(priority, fmt, ap);
1285 closelog();
1286 }
1287 va_end(ap);
1288 return;
1289 }
1290
1291 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1292 void checksumwarning(const char * string)
1293 {
1294 pout("Warning! %s error: invalid SMART checksum.\n", string);
1295 }
1296
1297 #ifndef _WIN32
1298
1299 // Wait for the pid file to show up, this makes sure a calling program knows
1300 // that the daemon is really up and running and has a pid to kill it
1301 static bool WaitForPidFile()
1302 {
1303 int waited, max_wait = 10;
1304 struct stat stat_buf;
1305
1306 if (pid_file.empty() || debugmode)
1307 return true;
1308
1309 for(waited = 0; waited < max_wait; ++waited) {
1310 if (!stat(pid_file.c_str(), &stat_buf)) {
1311 return true;
1312 } else
1313 sleep(1);
1314 }
1315 return false;
1316 }
1317
1318 #endif // _WIN32
1319
1320 // Forks new process, closes ALL file descriptors, redirects stdin,
1321 // stdout, and stderr. Not quite daemon(). See
1322 // http://www.linuxjournal.com/article/2335
1323 // for a good description of why we do things this way.
1324 static void DaemonInit()
1325 {
1326 #ifndef _WIN32
1327
1328 // flush all buffered streams. Else we might get two copies of open
1329 // streams since both parent and child get copies of the buffers.
1330 fflush(NULL);
1331
1332 if (do_fork) {
1333 pid_t pid;
1334 if ((pid=fork()) < 0) {
1335 // unable to fork!
1336 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1337 EXIT(EXIT_STARTUP);
1338 }
1339 else if (pid) {
1340 // we are the parent process, wait for pid file, then exit cleanly
1341 if(!WaitForPidFile()) {
1342 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1343 EXIT(EXIT_STARTUP);
1344 } else
1345 EXIT(0);
1346 }
1347
1348 // from here on, we are the child process.
1349 setsid();
1350
1351 // Fork one more time to avoid any possibility of having terminals
1352 if ((pid=fork()) < 0) {
1353 // unable to fork!
1354 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1355 EXIT(EXIT_STARTUP);
1356 }
1357 else if (pid)
1358 // we are the parent process -- exit cleanly
1359 EXIT(0);
1360
1361 // Now we are the child's child...
1362 }
1363
1364 // close any open file descriptors
1365 for (int i = getdtablesize(); --i >= 0; )
1366 close(i);
1367
1368 // redirect any IO attempts to /dev/null and change to root directory
1369 int fd = open("/dev/null", O_RDWR);
1370 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1371 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1372 EXIT(EXIT_STARTUP);
1373 }
1374 umask(0022);
1375
1376 if (do_fork)
1377 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1378
1379 #else // _WIN32
1380
1381 // No fork() on native Win32
1382 // Detach this process from console
1383 fflush(NULL);
1384 if (daemon_detach("smartd")) {
1385 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1386 EXIT(EXIT_STARTUP);
1387 }
1388 // stdin/out/err now closed if not redirected
1389
1390 #endif // _WIN32
1391 return;
1392 }
1393
1394 // create a PID file containing the current process id
1395 static void WritePidFile()
1396 {
1397 if (!pid_file.empty()) {
1398 pid_t pid = getpid();
1399 mode_t old_umask;
1400 #ifndef __CYGWIN__
1401 old_umask = umask(0077); // rwx------
1402 #else
1403 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1404 old_umask = umask(0033); // rwxr--r--
1405 #endif
1406
1407 stdio_file f(pid_file.c_str(), "w");
1408 umask(old_umask);
1409 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1410 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1411 EXIT(EXIT_PID);
1412 }
1413 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1414 }
1415 }
1416
1417 // Prints header identifying version of code and home
1418 static void PrintHead()
1419 {
1420 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1421 }
1422
1423 // prints help info for configuration file Directives
1424 static void Directives()
1425 {
1426 PrintOut(LOG_INFO,
1427 "Configuration file (%s) Directives (after device name):\n"
1428 " -d TYPE Set the device type: auto, ignore, removable,\n"
1429 " %s\n"
1430 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1431 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1432 " -S VAL Enable/disable attribute autosave (on/off)\n"
1433 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1434 " -H Monitor SMART Health Status, report if failed\n"
1435 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1436 " -l TYPE Monitor SMART log or self-test status:\n"
1437 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1438 " -l scterc,R,W Set SCT Error Recovery Control\n"
1439 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1440 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1441 " -f Monitor 'Usage' Attributes, report failures\n"
1442 " -m ADD Send email warning to address ADD\n"
1443 " -M TYPE Modify email warning behavior (see man page)\n"
1444 " -p Report changes in 'Prefailure' Attributes\n"
1445 " -u Report changes in 'Usage' Attributes\n"
1446 " -t Equivalent to -p and -u Directives\n"
1447 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1448 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1449 " -i ID Ignore Attribute ID for -f Directive\n"
1450 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1451 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1452 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1453 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1454 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1455 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1456 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1457 " -F TYPE Use firmware bug workaround:\n"
1458 " %s\n"
1459 " # Comment: text after a hash sign is ignored\n"
1460 " \\ Line continuation character\n"
1461 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1462 "Use ID = 0 to turn off -C and/or -U Directives\n"
1463 "Example: /dev/sda -a\n",
1464 configfile,
1465 smi()->get_valid_dev_types_str().c_str(),
1466 get_valid_firmwarebug_args());
1467 }
1468
1469 /* Returns a pointer to a static string containing a formatted list of the valid
1470 arguments to the option opt or NULL on failure. */
1471 static const char *GetValidArgList(char opt)
1472 {
1473 switch (opt) {
1474 case 'A':
1475 case 's':
1476 return "<PATH_PREFIX>";
1477 case 'B':
1478 return "[+]<FILE_NAME>";
1479 case 'c':
1480 return "<FILE_NAME>, -";
1481 case 'l':
1482 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1483 case 'q':
1484 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1485 case 'r':
1486 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1487 case 'p':
1488 case 'w':
1489 return "<FILE_NAME>";
1490 case 'i':
1491 return "<INTEGER_SECONDS>";
1492 default:
1493 return NULL;
1494 }
1495 }
1496
1497 /* prints help information for command syntax */
1498 static void Usage()
1499 {
1500 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1501 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1502 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1503 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1504 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1505 #endif
1506 PrintOut(LOG_INFO,"\n");
1507 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1508 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1509 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1510 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1511 PrintOut(LOG_INFO,"\n");
1512 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1513 #endif
1514 PrintOut(LOG_INFO,"]\n\n");
1515 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1516 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1517 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1518 #ifdef HAVE_LIBCAP_NG
1519 PrintOut(LOG_INFO," -C, --capabilities\n");
1520 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1521 " Warning: Mail notification does not work when used.\n\n");
1522 #endif
1523 PrintOut(LOG_INFO," -d, --debug\n");
1524 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1525 PrintOut(LOG_INFO," -D, --showdirectives\n");
1526 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1527 PrintOut(LOG_INFO," -h, --help, --usage\n");
1528 PrintOut(LOG_INFO," Display this help and exit\n\n");
1529 PrintOut(LOG_INFO," -i N, --interval=N\n");
1530 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1531 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1532 #ifndef _WIN32
1533 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1534 #else
1535 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1536 #endif
1537 #ifndef _WIN32
1538 PrintOut(LOG_INFO," -n, --no-fork\n");
1539 PrintOut(LOG_INFO," Do not fork into background\n\n");
1540 #endif // _WIN32
1541 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1542 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1543 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1544 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1545 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1546 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1547 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1548 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1549 #ifdef SMARTMONTOOLS_SAVESTATES
1550 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1551 #endif
1552 PrintOut(LOG_INFO,"\n");
1553 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1554 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1555 #ifndef _WIN32
1556 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1557 #else
1558 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1559 #endif
1560 #ifdef _WIN32
1561 PrintOut(LOG_INFO," --service\n");
1562 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1563 PrintOut(LOG_INFO," smartd install [options]\n");
1564 PrintOut(LOG_INFO," Remove service with:\n");
1565 PrintOut(LOG_INFO," smartd remove\n\n");
1566 #endif // _WIN32
1567 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1568 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1569 }
1570
1571 static int CloseDevice(smart_device * device, const char * name)
1572 {
1573 if (!device->close()){
1574 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1575 return 1;
1576 }
1577 // device sucessfully closed
1578 return 0;
1579 }
1580
1581 // return true if a char is not allowed in a state file name
1582 static bool not_allowed_in_filename(char c)
1583 {
1584 return !( ('0' <= c && c <= '9')
1585 || ('A' <= c && c <= 'Z')
1586 || ('a' <= c && c <= 'z'));
1587 }
1588
1589 // Read error count from Summary or Extended Comprehensive SMART error log
1590 // Return -1 on error
1591 static int read_ata_error_count(ata_device * device, const char * name,
1592 firmwarebug_defs firmwarebugs, bool extended)
1593 {
1594 if (!extended) {
1595 ata_smart_errorlog log;
1596 if (ataReadErrorLog(device, &log, firmwarebugs)){
1597 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1598 return -1;
1599 }
1600 return (log.error_log_pointer ? log.ata_error_count : 0);
1601 }
1602 else {
1603 ata_smart_exterrlog logx;
1604 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1605 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1606 return -1;
1607 }
1608 // Some disks use the reserved byte as index, see ataprint.cpp.
1609 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1610 }
1611 }
1612
1613 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1614 // error count, and top bits are the power-on hours of the last error.
1615 static int SelfTestErrorCount(ata_device * device, const char * name,
1616 firmwarebug_defs firmwarebugs)
1617 {
1618 struct ata_smart_selftestlog log;
1619
1620 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1621 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1622 return -1;
1623 }
1624
1625 // return current number of self-test errors
1626 return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1627 }
1628
1629 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1630 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1631
1632 // Check offline data collection status
1633 static inline bool is_offl_coll_in_progress(unsigned char status)
1634 {
1635 return ((status & 0x7f) == 0x03);
1636 }
1637
1638 // Check self-test execution status
1639 static inline bool is_self_test_in_progress(unsigned char status)
1640 {
1641 return ((status >> 4) == 0xf);
1642 }
1643
1644 // Log offline data collection status
1645 static void log_offline_data_coll_status(const char * name, unsigned char status)
1646 {
1647 const char * msg;
1648 switch (status & 0x7f) {
1649 case 0x00: msg = "was never started"; break;
1650 case 0x02: msg = "was completed without error"; break;
1651 case 0x03: msg = "is in progress"; break;
1652 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1653 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1654 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1655 default: msg = 0;
1656 }
1657
1658 if (msg)
1659 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1660 "Device: %s, offline data collection %s%s\n", name, msg,
1661 ((status & 0x80) ? " (auto:on)" : ""));
1662 else
1663 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1664 name, status);
1665 }
1666
1667 // Log self-test execution status
1668 static void log_self_test_exec_status(const char * name, unsigned char status)
1669 {
1670 const char * msg;
1671 switch (status >> 4) {
1672 case 0x0: msg = "completed without error"; break;
1673 case 0x1: msg = "was aborted by the host"; break;
1674 case 0x2: msg = "was interrupted by the host with a reset"; break;
1675 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1676 case 0x4: msg = "completed with error (unknown test element)"; break;
1677 case 0x5: msg = "completed with error (electrical test element)"; break;
1678 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1679 case 0x7: msg = "completed with error (read test element)"; break;
1680 case 0x8: msg = "completed with error (handling damage?)"; break;
1681 default: msg = 0;
1682 }
1683
1684 if (msg)
1685 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1686 "Device: %s, previous self-test %s\n", name, msg);
1687 else if ((status >> 4) == 0xf)
1688 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1689 name, status & 0x0f);
1690 else
1691 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1692 name, status);
1693 }
1694
1695 // Check pending sector count id (-C, -U directives).
1696 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1697 unsigned char id, const char * msg)
1698 {
1699 // Check attribute index
1700 int i = ata_find_attr_index(id, state.smartval);
1701 if (i < 0) {
1702 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1703 cfg.name.c_str(), msg, id);
1704 return false;
1705 }
1706
1707 // Check value
1708 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1709 cfg.attribute_defs);
1710 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1711 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1712 cfg.name.c_str(), msg, id, rawval, rawval);
1713 return false;
1714 }
1715
1716 return true;
1717 }
1718
1719 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1720 static void finish_device_scan(dev_config & cfg, dev_state & state)
1721 {
1722 // Set cfg.emailfreq if user hasn't set it
1723 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1724 // Avoid that emails are suppressed forever due to state persistence
1725 if (cfg.state_file.empty())
1726 cfg.emailfreq = 1; // '-M once'
1727 else
1728 cfg.emailfreq = 2; // '-M daily'
1729 }
1730
1731 // Start self-test regex check now if time was not read from state file
1732 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1733 state.scheduled_test_next_check = time(0);
1734 }
1735
1736 // Common function to format result message for ATA setting
1737 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1738 int set_option = 0, bool has_value = false)
1739 {
1740 if (!msg.empty())
1741 msg += ", ";
1742 msg += name;
1743 if (!ok)
1744 msg += ":--";
1745 else if (set_option < 0)
1746 msg += ":off";
1747 else if (has_value)
1748 msg += strprintf(":%d", set_option-1);
1749 else if (set_option > 0)
1750 msg += ":on";
1751 }
1752
1753 // Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1754 static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1755 {
1756 if (!cfg.id_is_unique)
1757 return false;
1758
1759 for (unsigned i = 0; i < prev_cfgs.size(); i++) {
1760 if (!prev_cfgs[i].id_is_unique)
1761 continue;
1762 if (cfg.dev_idinfo != prev_cfgs[i].dev_idinfo.c_str())
1763 continue;
1764
1765 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1766 cfg.dev_name.c_str(), prev_cfgs[i].dev_name.c_str());
1767 return true;
1768 }
1769
1770 return false;
1771 }
1772
1773 // TODO: Add '-F swapid' directive
1774 const bool fix_swapped_id = false;
1775
1776 // scan to see what ata devices there are, and if they support SMART
1777 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1778 const dev_config_vector * prev_cfgs)
1779 {
1780 int supported=0;
1781 struct ata_identify_device drive;
1782 const char *name = cfg.name.c_str();
1783 int retid;
1784
1785 // Device must be open
1786
1787 // Get drive identity structure
1788 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1789 if (retid<0)
1790 // Unable to read Identity structure
1791 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1792 else
1793 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1794 name, packetdevicetype(retid-1));
1795 CloseDevice(atadev, name);
1796 return 2;
1797 }
1798
1799 // Get drive identity, size and rotation rate (HDD/SSD)
1800 char model[40+1], serial[20+1], firmware[8+1];
1801 ata_format_id_string(model, drive.model, sizeof(model)-1);
1802 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1803 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1804
1805 ata_size_info sizes;
1806 ata_get_size_info(&drive, sizes);
1807 state.num_sectors = sizes.sectors;
1808 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1809
1810 char wwn[30]; wwn[0] = 0;
1811 unsigned oui = 0; uint64_t unique_id = 0;
1812 int naa = ata_get_wwn(&drive, oui, unique_id);
1813 if (naa >= 0)
1814 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1815
1816 // Format device id string for warning emails
1817 char cap[32];
1818 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1819 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1820 cfg.id_is_unique = true; // TODO: Check serial?
1821
1822 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1823
1824 // Check for duplicates
1825 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1826 CloseDevice(atadev, name);
1827 return 1;
1828 }
1829
1830 // Show if device in database, and use preset vendor attribute
1831 // options unless user has requested otherwise.
1832 if (cfg.ignorepresets)
1833 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1834 else {
1835 // Apply vendor specific presets, print warning if present
1836 const drive_settings * dbentry = lookup_drive_apply_presets(
1837 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1838 if (!dbentry)
1839 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1840 else {
1841 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1842 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1843 if (*dbentry->warningmsg)
1844 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1845 }
1846 }
1847
1848 // Check for ATA Security LOCK
1849 unsigned short word128 = drive.words088_255[128-88];
1850 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1851 if (locked)
1852 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1853
1854 // Set default '-C 197[+]' if no '-C ID' is specified.
1855 if (!cfg.curr_pending_set)
1856 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1857 // Set default '-U 198[+]' if no '-U ID' is specified.
1858 if (!cfg.offl_pending_set)
1859 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1860
1861 // If requested, show which presets would be used for this drive
1862 if (cfg.showpresets) {
1863 int savedebugmode=debugmode;
1864 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1865 if (!debugmode)
1866 debugmode=2;
1867 show_presets(&drive);
1868 debugmode=savedebugmode;
1869 }
1870
1871 // see if drive supports SMART
1872 supported=ataSmartSupport(&drive);
1873 if (supported!=1) {
1874 if (supported==0)
1875 // drive does NOT support SMART
1876 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1877 else
1878 // can't tell if drive supports SMART
1879 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1880
1881 // should we proceed anyway?
1882 if (cfg.permissive) {
1883 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1884 }
1885 else {
1886 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1887 CloseDevice(atadev, name);
1888 return 2;
1889 }
1890 }
1891
1892 if (ataEnableSmart(atadev)) {
1893 // Enable SMART command has failed
1894 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1895
1896 if (ataIsSmartEnabled(&drive) <= 0) {
1897 CloseDevice(atadev, name);
1898 return 2;
1899 }
1900 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
1901 }
1902
1903 // disable device attribute autosave...
1904 if (cfg.autosave==1) {
1905 if (ataDisableAutoSave(atadev))
1906 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1907 else
1908 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1909 }
1910
1911 // or enable device attribute autosave
1912 if (cfg.autosave==2) {
1913 if (ataEnableAutoSave(atadev))
1914 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1915 else
1916 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1917 }
1918
1919 // capability check: SMART status
1920 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1921 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1922 cfg.smartcheck = false;
1923 }
1924
1925 // capability check: Read smart values and thresholds. Note that
1926 // smart values are ALSO needed even if we ONLY want to know if the
1927 // device is self-test log or error-log capable! After ATA-5, this
1928 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1929 // but sadly not for ATA-5. Sigh.
1930
1931 // do we need to get SMART data?
1932 bool smart_val_ok = false;
1933 if ( cfg.autoofflinetest || cfg.selftest
1934 || cfg.errorlog || cfg.xerrorlog
1935 || cfg.offlinests || cfg.selfteststs
1936 || cfg.usagefailed || cfg.prefail || cfg.usage
1937 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1938 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1939
1940 if (ataReadSmartValues(atadev, &state.smartval)) {
1941 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1942 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1943 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1944 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1945 }
1946 else {
1947 smart_val_ok = true;
1948 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1949 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1950 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1951 cfg.usagefailed = false;
1952 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1953 memset(&state.smartthres, 0, sizeof(state.smartthres));
1954 }
1955 }
1956
1957 // see if the necessary Attribute is there to monitor offline or
1958 // current pending sectors or temperature
1959 if ( cfg.curr_pending_id
1960 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1961 "Current_Pending_Sector"))
1962 cfg.curr_pending_id = 0;
1963
1964 if ( cfg.offl_pending_id
1965 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1966 "Offline_Uncorrectable"))
1967 cfg.offl_pending_id = 0;
1968
1969 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1970 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1971 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1972 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1973 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1974 }
1975
1976 // Report ignored '-r' or '-R' directives
1977 for (int id = 1; id <= 255; id++) {
1978 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1979 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1980 const char * excl = (cfg.monitor_attr_flags.is_set(id,
1981 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1982
1983 int idx = ata_find_attr_index(id, state.smartval);
1984 if (idx < 0)
1985 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1986 else {
1987 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1988 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1989 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1990 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1991 }
1992 }
1993 }
1994 }
1995
1996 // enable/disable automatic on-line testing
1997 if (cfg.autoofflinetest) {
1998 // is this an enable or disable request?
1999 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2000 if (!smart_val_ok)
2001 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2002 else {
2003 // if command appears unsupported, issue a warning...
2004 if (!isSupportAutomaticTimer(&state.smartval))
2005 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2006 // ... but then try anyway
2007 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2008 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2009 else
2010 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2011 }
2012 }
2013
2014 // Read log directories if required for capability check
2015 ata_smart_log_directory smart_logdir, gp_logdir;
2016 bool smart_logdir_ok = false, gp_logdir_ok = false;
2017
2018 if ( isGeneralPurposeLoggingCapable(&drive)
2019 && (cfg.errorlog || cfg.selftest)
2020 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2021 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2022 smart_logdir_ok = true;
2023 }
2024
2025 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2026 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2027 gp_logdir_ok = true;
2028 }
2029
2030 // capability check: self-test-log
2031 state.selflogcount = 0; state.selfloghour = 0;
2032 if (cfg.selftest) {
2033 int retval;
2034 if (!( cfg.permissive
2035 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2036 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2037 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2038 cfg.selftest = false;
2039 }
2040 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2041 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2042 cfg.selftest = false;
2043 }
2044 else {
2045 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2046 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2047 }
2048 }
2049
2050 // capability check: ATA error log
2051 state.ataerrorcount = 0;
2052 if (cfg.errorlog) {
2053 int errcnt1;
2054 if (!( cfg.permissive
2055 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2056 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2057 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2058 cfg.errorlog = false;
2059 }
2060 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2061 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2062 cfg.errorlog = false;
2063 }
2064 else
2065 state.ataerrorcount = errcnt1;
2066 }
2067
2068 if (cfg.xerrorlog) {
2069 int errcnt2;
2070 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2071 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2072 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2073 name);
2074 cfg.xerrorlog = false;
2075 }
2076 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2077 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2078 cfg.xerrorlog = false;
2079 }
2080 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2081 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2082 name, state.ataerrorcount, errcnt2);
2083 // Record max error count
2084 if (errcnt2 > state.ataerrorcount)
2085 state.ataerrorcount = errcnt2;
2086 }
2087 else
2088 state.ataerrorcount = errcnt2;
2089 }
2090
2091 // capability check: self-test and offline data collection status
2092 if (cfg.offlinests || cfg.selfteststs) {
2093 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2094 if (cfg.offlinests)
2095 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2096 if (cfg.selfteststs)
2097 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2098 cfg.offlinests = cfg.selfteststs = false;
2099 }
2100 }
2101
2102 // capabilities check -- does it support powermode?
2103 if (cfg.powermode) {
2104 int powermode = ataCheckPowerMode(atadev);
2105
2106 if (-1 == powermode) {
2107 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2108 cfg.powermode=0;
2109 }
2110 else if (powermode!=0x00 && powermode!=0x01
2111 && powermode!=0x40 && powermode!=0x41
2112 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2113 && powermode!=0xff) {
2114 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2115 name, powermode);
2116 cfg.powermode=0;
2117 }
2118 }
2119
2120 // Apply ATA settings
2121 std::string msg;
2122
2123 if (cfg.set_aam)
2124 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2125 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2126 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2127
2128 if (cfg.set_apm)
2129 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2130 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2131 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2132
2133 if (cfg.set_lookahead)
2134 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2135 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2136 cfg.set_lookahead);
2137
2138 if (cfg.set_wcache)
2139 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2140 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2141
2142 if (cfg.set_dsn)
2143 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2144 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2145
2146 if (cfg.set_security_freeze)
2147 format_set_result_msg(msg, "Security freeze",
2148 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2149
2150 if (cfg.set_standby)
2151 format_set_result_msg(msg, "Standby",
2152 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2153
2154 // Report as one log entry
2155 if (!msg.empty())
2156 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2157
2158 // set SCT Error Recovery Control if requested
2159 if (cfg.sct_erc_set) {
2160 if (!isSCTErrorRecoveryControlCapable(&drive))
2161 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2162 name);
2163 else if (locked)
2164 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2165 name);
2166 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2167 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2168 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2169 else
2170 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2171 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2172 }
2173
2174 // If no tests available or selected, return
2175 if (!( cfg.smartcheck || cfg.selftest
2176 || cfg.errorlog || cfg.xerrorlog
2177 || cfg.offlinests || cfg.selfteststs
2178 || cfg.usagefailed || cfg.prefail || cfg.usage
2179 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2180 CloseDevice(atadev, name);
2181 return 3;
2182 }
2183
2184 // tell user we are registering device
2185 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2186
2187 // close file descriptor
2188 CloseDevice(atadev, name);
2189
2190 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2191 // Build file name for state file
2192 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2193 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2194 if (!state_path_prefix.empty()) {
2195 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2196 // Read previous state
2197 if (read_dev_state(cfg.state_file.c_str(), state)) {
2198 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2199 // Copy ATA attribute values to temp state
2200 state.update_temp_state();
2201 }
2202 }
2203 if (!attrlog_path_prefix.empty())
2204 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2205 }
2206
2207 finish_device_scan(cfg, state);
2208
2209 return 0;
2210 }
2211
2212 // on success, return 0. On failure, return >0. Never return <0,
2213 // please.
2214 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2215 const dev_config_vector * prev_cfgs)
2216 {
2217 int err, req_len, avail_len, version, len;
2218 const char *device = cfg.name.c_str();
2219 struct scsi_iec_mode_page iec;
2220 UINT8 tBuf[64];
2221 UINT8 inqBuf[96];
2222 UINT8 vpdBuf[252];
2223 char lu_id[64], serial[256], vendor[40], model[40];
2224
2225 // Device must be open
2226 memset(inqBuf, 0, 96);
2227 req_len = 36;
2228 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2229 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2230 req_len = 64;
2231 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2232 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2233 "skip device\n", device);
2234 return 2;
2235 }
2236 }
2237 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2238
2239 avail_len = inqBuf[4] + 5;
2240 len = (avail_len < req_len) ? avail_len : req_len;
2241 if (len < 36) {
2242 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2243 "skip device\n", device);
2244 return 2;
2245 }
2246
2247 int pdt = inqBuf[0] & 0x1f;
2248
2249 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2250 (0xe == pdt))) {
2251 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2252 "skip\n", device, pdt);
2253 return 2;
2254 }
2255
2256 if (supported_vpd_pages_p) {
2257 delete supported_vpd_pages_p;
2258 supported_vpd_pages_p = NULL;
2259 }
2260 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2261
2262 lu_id[0] = '\0';
2263 if ((version >= 0x3) && (version < 0x8)) {
2264 /* SPC to SPC-5 */
2265 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2266 vpdBuf, sizeof(vpdBuf))) {
2267 len = vpdBuf[3];
2268 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2269 }
2270 }
2271 serial[0] = '\0';
2272 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2273 vpdBuf, sizeof(vpdBuf))) {
2274 len = vpdBuf[3];
2275 vpdBuf[4 + len] = '\0';
2276 scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2277 }
2278
2279 unsigned int lb_size;
2280 char si_str[64];
2281 uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
2282
2283 if (capacity)
2284 format_capacity(si_str, sizeof(si_str), capacity, ".");
2285 else
2286 si_str[0] = '\0';
2287
2288 // Format device id string for warning emails
2289 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2290 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2291 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2292 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2293 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2294 cfg.id_is_unique = (lu_id[0] || serial[0]);
2295
2296 // format "model" string
2297 scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2298 scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2299 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2300
2301 // Check for duplicates
2302 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2303 CloseDevice(scsidev, device);
2304 return 1;
2305 }
2306
2307 // check that device is ready for commands. IE stores its stuff on
2308 // the media.
2309 if ((err = scsiTestUnitReady(scsidev))) {
2310 if (SIMPLE_ERR_NOT_READY == err)
2311 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2312 else if (SIMPLE_ERR_NO_MEDIUM == err)
2313 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2314 else if (SIMPLE_ERR_BECOMING_READY == err)
2315 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2316 else
2317 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2318 CloseDevice(scsidev, device);
2319 return 2;
2320 }
2321
2322 // Badly-conforming USB storage devices may fail this check.
2323 // The response to the following IE mode page fetch (current and
2324 // changeable values) is carefully examined. It has been found
2325 // that various USB devices that malform the response will lock up
2326 // if asked for a log page (e.g. temperature) so it is best to
2327 // bail out now.
2328 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2329 state.modese_len = iec.modese_len;
2330 else if (SIMPLE_ERR_BAD_FIELD == err)
2331 ; /* continue since it is reasonable not to support IE mpage */
2332 else { /* any other error (including malformed response) unreasonable */
2333 PrintOut(LOG_INFO,
2334 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2335 device, err);
2336 CloseDevice(scsidev, device);
2337 return 3;
2338 }
2339
2340 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2341 // smart if it is off). This may change to be the same as the ATA side.
2342 if (!scsi_IsExceptionControlEnabled(&iec)) {
2343 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2344 "Try 'smartctl -s on %s' to turn on SMART features\n",
2345 device, device);
2346 CloseDevice(scsidev, device);
2347 return 3;
2348 }
2349
2350 // Flag that certain log pages are supported (information may be
2351 // available from other sources).
2352 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2353 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2354 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2355 {
2356 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2357 switch (tBuf[k]) {
2358 case TEMPERATURE_LPAGE:
2359 state.TempPageSupported = 1;
2360 break;
2361 case IE_LPAGE:
2362 state.SmartPageSupported = 1;
2363 break;
2364 case READ_ERROR_COUNTER_LPAGE:
2365 state.ReadECounterPageSupported = 1;
2366 break;
2367 case WRITE_ERROR_COUNTER_LPAGE:
2368 state.WriteECounterPageSupported = 1;
2369 break;
2370 case VERIFY_ERROR_COUNTER_LPAGE:
2371 state.VerifyECounterPageSupported = 1;
2372 break;
2373 case NON_MEDIUM_ERROR_LPAGE:
2374 state.NonMediumErrorPageSupported = 1;
2375 break;
2376 default:
2377 break;
2378 }
2379 }
2380 }
2381
2382 // Check if scsiCheckIE() is going to work
2383 {
2384 UINT8 asc = 0;
2385 UINT8 ascq = 0;
2386 UINT8 currenttemp = 0;
2387 UINT8 triptemp = 0;
2388
2389 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2390 &asc, &ascq, &currenttemp, &triptemp)) {
2391 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2392 state.SuppressReport = 1;
2393 }
2394 if ( (state.SuppressReport || !currenttemp)
2395 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2396 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2397 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2398 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2399 }
2400 }
2401
2402 // capability check: self-test-log
2403 if (cfg.selftest){
2404 int retval = scsiCountFailedSelfTests(scsidev, 0);
2405 if (retval<0) {
2406 // no self-test log, turn off monitoring
2407 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2408 cfg.selftest = false;
2409 state.selflogcount = 0;
2410 state.selfloghour = 0;
2411 }
2412 else {
2413 // register starting values to watch for changes
2414 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2415 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2416 }
2417 }
2418
2419 // disable autosave (set GLTSD bit)
2420 if (cfg.autosave==1){
2421 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2422 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2423 else
2424 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2425 }
2426
2427 // or enable autosave (clear GLTSD bit)
2428 if (cfg.autosave==2){
2429 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2430 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2431 else
2432 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2433 }
2434
2435 // tell user we are registering device
2436 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2437
2438 // Make sure that init_standby_check() ignores SCSI devices
2439 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2440
2441 // close file descriptor
2442 CloseDevice(scsidev, device);
2443
2444 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2445 // Build file name for state file
2446 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2447 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2448 if (!state_path_prefix.empty()) {
2449 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2450 // Read previous state
2451 if (read_dev_state(cfg.state_file.c_str(), state)) {
2452 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2453 // Copy ATA attribute values to temp state
2454 state.update_temp_state();
2455 }
2456 }
2457 if (!attrlog_path_prefix.empty())
2458 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2459 }
2460
2461 finish_device_scan(cfg, state);
2462
2463 return 0;
2464 }
2465
2466 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2467 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2468 {
2469 for (int i = 8; i < 16; i++) {
2470 if (val[i])
2471 return ~(uint64_t)0;
2472 }
2473 uint64_t lo = val[7];
2474 for (int i = 7-1; i >= 0; i--) {
2475 lo <<= 8; lo += val[i];
2476 }
2477 return lo;
2478 }
2479
2480 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2481 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2482 {
2483 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2484 for (int i = 0; i < 8; i++) {
2485 if (smart_log.temp_sensor[i] > k)
2486 k = smart_log.temp_sensor[i];
2487 }
2488 return k;
2489 }
2490
2491 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2492 const dev_config_vector * prev_cfgs)
2493 {
2494 const char *name = cfg.name.c_str();
2495
2496 // Device must be open
2497
2498 // Get ID Controller
2499 nvme_id_ctrl id_ctrl;
2500 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2501 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2502 CloseDevice(nvmedev, name);
2503 return 2;
2504 }
2505
2506 // Get drive identity
2507 char model[40+1], serial[20+1], firmware[8+1];
2508 format_char_array(model, id_ctrl.mn);
2509 format_char_array(serial, id_ctrl.sn);
2510 format_char_array(firmware, id_ctrl.fr);
2511
2512 // Format device id string for warning emails
2513 char nsstr[32] = "", capstr[32] = "";
2514 unsigned nsid = nvmedev->get_nsid();
2515 if (nsid != 0xffffffff)
2516 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2517 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2518 if (capacity)
2519 format_capacity(capstr, sizeof(capstr), capacity, ".");
2520 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2521 nsstr, (capstr[0] ? ", " : ""), capstr);
2522 cfg.id_is_unique = true; // TODO: Check serial?
2523
2524 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2525
2526 // Check for duplicates
2527 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2528 CloseDevice(nvmedev, name);
2529 return 1;
2530 }
2531
2532 // Read SMART/Health log
2533 nvme_smart_log smart_log;
2534 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2535 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2536 CloseDevice(nvmedev, name);
2537 return 2;
2538 }
2539
2540 // Check temperature sensor support
2541 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2542 if (!nvme_get_max_temp_kelvin(smart_log)) {
2543 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2544 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2545 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2546 }
2547 }
2548
2549 // Init total error count
2550 if (cfg.errorlog || cfg.xerrorlog) {
2551 state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2552 }
2553
2554 // If no supported tests selected, return
2555 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2556 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2557 CloseDevice(nvmedev, name);
2558 return 3;
2559 }
2560
2561 // Tell user we are registering device
2562 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2563
2564 // Make sure that init_standby_check() ignores NVMe devices
2565 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2566
2567 CloseDevice(nvmedev, name);
2568
2569 if (!state_path_prefix.empty()) {
2570 // Build file name for state file
2571 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2572 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2573 nsstr[0] = 0;
2574 if (nsid != 0xffffffff)
2575 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2576 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2577 // Read previous state
2578 if (read_dev_state(cfg.state_file.c_str(), state))
2579 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2580 }
2581
2582 finish_device_scan(cfg, state);
2583
2584 return 0;
2585 }
2586
2587 // Open device for next check, return false on error
2588 static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2589 const char * type)
2590 {
2591 const char * name = cfg.name.c_str();
2592
2593 // If user has asked, test the email warning system
2594 if (cfg.emailtest)
2595 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2596
2597 // User may have requested (with the -n Directive) to leave the disk
2598 // alone if it is in idle or standby mode. In this case check the
2599 // power mode first before opening the device for full access,
2600 // and exit without check if disk is reported in standby.
2601 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2602 // Note that 'is_powered_down()' handles opening the device itself, and
2603 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2604 if (device->is_powered_down())
2605 {
2606 // skip at most powerskipmax checks
2607 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2608 // report first only except if state has changed, avoid waking up system disk
2609 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2610 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2611 state.lastpowermodeskipped = -1;
2612 }
2613 state.powerskipcnt++;
2614 return false;
2615 }
2616 }
2617 }
2618
2619 // if we can't open device, fail gracefully rather than hard --
2620 // perhaps the next time around we'll be able to open it
2621 if (!device->open()) {
2622 // For removable devices, print error message only once and suppress email
2623 if (!cfg.removable) {
2624 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2625 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2626 }
2627 else if (!state.removed) {
2628 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2629 state.removed = true;
2630 }
2631 else if (debugmode)
2632 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2633 return false;
2634 }
2635
2636 if (debugmode)
2637 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2638
2639 if (!cfg.removable)
2640 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2641 else if (state.removed) {
2642 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2643 state.removed = false;
2644 }
2645
2646 return true;
2647 }
2648
2649 // If the self-test log has got more self-test errors (or more recent
2650 // self-test errors) recorded, then notify user.
2651 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2652 {
2653 const char * name = cfg.name.c_str();
2654
2655 if (newi<0)
2656 // command failed
2657 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2658 else {
2659 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2660
2661 // old and new error counts
2662 int oldc=state.selflogcount;
2663 int newc=SELFTEST_ERRORCOUNT(newi);
2664
2665 // old and new error timestamps in hours
2666 int oldh=state.selfloghour;
2667 int newh=SELFTEST_ERRORHOURS(newi);
2668
2669 if (oldc<newc) {
2670 // increase in error count
2671 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2672 name, oldc, newc);
2673 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2674 name, oldc, newc);
2675 state.must_write = true;
2676 }
2677 else if (newc > 0 && oldh != newh) {
2678 // more recent error
2679 // a 'more recent' error might actually be a smaller hour number,
2680 // if the hour number has wrapped.
2681 // There's still a bug here. You might just happen to run a new test
2682 // exactly 32768 hours after the previous failure, and have run exactly
2683 // 20 tests between the two, in which case smartd will miss the
2684 // new failure.
2685 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2686 name, newh);
2687 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2688 name, newh);
2689 state.must_write = true;
2690 }
2691
2692 // Print info if error entries have disappeared
2693 // or newer successful successful extended self-test exits
2694 if (oldc > newc) {
2695 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2696 name, oldc, newc);
2697 if (newc == 0)
2698 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2699 }
2700
2701 // Needed since self-test error count may DECREASE. Hour might
2702 // also have changed.
2703 state.selflogcount= newc;
2704 state.selfloghour = newh;
2705 }
2706 return;
2707 }
2708
2709 // Test types, ordered by priority.
2710 static const char test_type_chars[] = "LncrSCO";
2711 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2712
2713 // returns test type if time to do test of type testtype,
2714 // 0 if not time to do test.
2715 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2716 {
2717 // check that self-testing has been requested
2718 if (cfg.test_regex.empty())
2719 return 0;
2720
2721 // Exit if drive not capable of any test
2722 if ( state.not_cap_long && state.not_cap_short &&
2723 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2724 return 0;
2725
2726 // since we are about to call localtime(), be sure glibc is informed
2727 // of any timezone changes we make.
2728 if (!usetime)
2729 FixGlibcTimeZoneBug();
2730
2731 // Is it time for next check?
2732 time_t now = (!usetime ? time(0) : usetime);
2733 if (now < state.scheduled_test_next_check)
2734 return 0;
2735
2736 // Limit time check interval to 90 days
2737 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2738 state.scheduled_test_next_check = now - (3600L*24*90);
2739
2740 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2741 char testtype = 0;
2742 time_t testtime = 0; int testhour = 0;
2743 int maxtest = num_test_types-1;
2744
2745 for (time_t t = state.scheduled_test_next_check; ; ) {
2746 struct tm * tms = localtime(&t);
2747 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2748 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2749 for (int i = 0; i <= maxtest; i++) {
2750 // Skip if drive not capable of this test
2751 switch (test_type_chars[i]) {
2752 case 'L': if (state.not_cap_long) continue; break;
2753 case 'S': if (state.not_cap_short) continue; break;
2754 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2755 case 'O': if (scsi || state.not_cap_offline) continue; break;
2756 case 'c': case 'n':
2757 case 'r': if (scsi || state.not_cap_selective) continue; break;
2758 default: continue;
2759 }
2760 // Try match of "T/MM/DD/d/HH"
2761 char pattern[16];
2762 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2763 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2764 if (cfg.test_regex.full_match(pattern)) {
2765 // Test found
2766 testtype = pattern[0];
2767 testtime = t; testhour = tms->tm_hour;
2768 // Limit further matches to higher priority self-tests
2769 maxtest = i-1;
2770 break;
2771 }
2772 }
2773 // Exit if no tests left or current time reached
2774 if (maxtest < 0)
2775 break;
2776 if (t >= now)
2777 break;
2778 // Check next hour
2779 if ((t += 3600) > now)
2780 t = now;
2781 }
2782
2783 // Do next check not before next hour.
2784 struct tm * tmnow = localtime(&now);
2785 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2786
2787 if (testtype) {
2788 state.must_write = true;
2789 // Tell user if an old test was found.
2790 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2791 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2792 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2793 cfg.name.c_str(), testtype, datebuf);
2794 }
2795 }
2796
2797 return testtype;
2798 }
2799
2800 // Print a list of future tests.
2801 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2802 {
2803 unsigned numdev = configs.size();
2804 if (!numdev)
2805 return;
2806 std::vector<int> testcnts(numdev * num_test_types, 0);
2807
2808 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2809
2810 // FixGlibcTimeZoneBug(); // done in PrintOut()
2811 time_t now = time(0);
2812 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2813 dateandtimezoneepoch(datenow, now);
2814
2815 long seconds;
2816 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2817 // Check for each device whether a test will be run
2818 time_t testtime = now + seconds;
2819 for (unsigned i = 0; i < numdev; i++) {
2820 const dev_config & cfg = configs.at(i);
2821 dev_state & state = states.at(i);
2822 const char * p;
2823 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2824 if (testtype && (p = strchr(test_type_chars, testtype))) {
2825 unsigned t = (p - test_type_chars);
2826 // Report at most 5 tests of each type
2827 if (++testcnts[i*num_test_types + t] <= 5) {
2828 dateandtimezoneepoch(date, testtime);
2829 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2830 testcnts[i*num_test_types + t], testtype, date);
2831 }
2832 }
2833 }
2834 }
2835
2836 // Report totals
2837 dateandtimezoneepoch(date, now+seconds);
2838 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2839 for (unsigned i = 0; i < numdev; i++) {
2840 const dev_config & cfg = configs.at(i);
2841 bool scsi = devices.at(i)->is_scsi();
2842 for (unsigned t = 0; t < num_test_types; t++) {
2843 int cnt = testcnts[i*num_test_types + t];
2844 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2845 continue;
2846 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2847 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2848 }
2849 }
2850
2851 }
2852
2853 // Return zero on success, nonzero on failure. Perform offline (background)
2854 // short or long (extended) self test on given scsi device.
2855 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2856 {
2857 int retval = 0;
2858 const char *testname = 0;
2859 const char *name = cfg.name.c_str();
2860 int inProgress;
2861
2862 if (scsiSelfTestInProgress(device, &inProgress)) {
2863 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2864 state.not_cap_short = state.not_cap_long = true;
2865 return 1;
2866 }
2867
2868 if (1 == inProgress) {
2869 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2870 "progress.\n", name);
2871 return 1;
2872 }
2873
2874 switch (testtype) {
2875 case 'S':
2876 testname = "Short Self";
2877 retval = scsiSmartShortSelfTest(device);
2878 break;
2879 case 'L':
2880 testname = "Long Self";
2881 retval = scsiSmartExtendSelfTest(device);
2882 break;
2883 }
2884 // If we can't do the test, exit
2885 if (NULL == testname) {
2886 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2887 testtype);
2888 return 1;
2889 }
2890 if (retval) {
2891 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2892 (SIMPLE_ERR_BAD_FIELD == retval)) {
2893 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2894 testname);
2895 if ('L'==testtype)
2896 state.not_cap_long = true;
2897 else
2898 state.not_cap_short = true;
2899
2900 return 1;
2901 }
2902 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2903 testname, retval);
2904 return 1;
2905 }
2906
2907 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2908
2909 return 0;
2910 }
2911
2912 // Do an offline immediate or self-test. Return zero on success,
2913 // nonzero on failure.
2914 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2915 {
2916 const char *name = cfg.name.c_str();
2917
2918 // Read current smart data and check status/capability
2919 struct ata_smart_values data;
2920 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2921 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2922 return 1;
2923 }
2924
2925 // Check for capability to do the test
2926 int dotest = -1, mode = 0;
2927 const char *testname = 0;
2928 switch (testtype) {
2929 case 'O':
2930 testname="Offline Immediate ";
2931 if (isSupportExecuteOfflineImmediate(&data))
2932 dotest=OFFLINE_FULL_SCAN;
2933 else
2934 state.not_cap_offline = true;
2935 break;
2936 case 'C':
2937 testname="Conveyance Self-";
2938 if (isSupportConveyanceSelfTest(&data))
2939 dotest=CONVEYANCE_SELF_TEST;
2940 else
2941 state.not_cap_conveyance = true;
2942 break;
2943 case 'S':
2944 testname="Short Self-";
2945 if (isSupportSelfTest(&data))
2946 dotest=SHORT_SELF_TEST;
2947 else
2948 state.not_cap_short = true;
2949 break;
2950 case 'L':
2951 testname="Long Self-";
2952 if (isSupportSelfTest(&data))
2953 dotest=EXTEND_SELF_TEST;
2954 else
2955 state.not_cap_long = true;
2956 break;
2957
2958 case 'c': case 'n': case 'r':
2959 testname = "Selective Self-";
2960 if (isSupportSelectiveSelfTest(&data)) {
2961 dotest = SELECTIVE_SELF_TEST;
2962 switch (testtype) {
2963 case 'c': mode = SEL_CONT; break;
2964 case 'n': mode = SEL_NEXT; break;
2965 case 'r': mode = SEL_REDO; break;
2966 }
2967 }
2968 else
2969 state.not_cap_selective = true;
2970 break;
2971 }
2972
2973 // If we can't do the test, exit
2974 if (dotest<0) {
2975 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2976 return 1;
2977 }
2978
2979 // If currently running a self-test, do not interrupt it to start another.
2980 if (15==(data.self_test_exec_status >> 4)) {
2981 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
2982 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2983 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2984 } else {
2985 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2986 name, testname, (int)(data.self_test_exec_status & 0x0f));
2987 return 1;
2988 }
2989 }
2990
2991 if (dotest == SELECTIVE_SELF_TEST) {
2992 // Set test span
2993 ata_selective_selftest_args selargs, prev_args;
2994 selargs.num_spans = 1;
2995 selargs.span[0].mode = mode;
2996 prev_args.num_spans = 1;
2997 prev_args.span[0].start = state.selective_test_last_start;
2998 prev_args.span[0].end = state.selective_test_last_end;
2999 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3000 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3001 return 1;
3002 }
3003 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3004 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3005 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3006 start, end, end - start + 1,
3007 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3008 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3009 state.selective_test_last_start = start;
3010 state.selective_test_last_end = end;
3011 }
3012
3013 // execute the test, and return status
3014 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
3015 if (retval) {
3016 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3017 return retval;
3018 }
3019
3020 // Report recent test start to do_disable_standby_check()
3021 // and force log of next test status
3022 if (testtype == 'O')
3023 state.offline_started = true;
3024 else
3025 state.selftest_started = true;
3026
3027 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3028 return 0;
3029 }
3030
3031 // Check pending sector count attribute values (-C, -U directives).
3032 static void check_pending(const dev_config & cfg, dev_state & state,
3033 unsigned char id, bool increase_only,
3034 const ata_smart_values & smartval,
3035 int mailtype, const char * msg)
3036 {
3037 // Find attribute index
3038 int i = ata_find_attr_index(id, smartval);
3039 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3040 return;
3041
3042 // No report if no sectors pending.
3043 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3044 if (rawval == 0) {
3045 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3046 return;
3047 }
3048
3049 // If attribute is not reset, report only sector count increases.
3050 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3051 if (!(!increase_only || prev_rawval < rawval))
3052 return;
3053
3054 // Format message.
3055 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3056 if (prev_rawval > 0 && rawval != prev_rawval)
3057 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3058
3059 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3060 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3061 state.must_write = true;
3062 }
3063
3064 // Format Temperature value
3065 static const char * fmt_temp(unsigned char x, char (& buf)[20])
3066 {
3067 if (!x) // unset
3068 return "??";
3069 snprintf(buf, sizeof(buf), "%u", x);
3070 return buf;
3071 }
3072
3073 // Check Temperature limits
3074 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3075 {
3076 if (!(0 < currtemp && currtemp < 255)) {
3077 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3078 return;
3079 }
3080
3081 // Update Max Temperature
3082 const char * minchg = "", * maxchg = "";
3083 if (currtemp > state.tempmax) {
3084 if (state.tempmax)
3085 maxchg = "!";
3086 state.tempmax = currtemp;
3087 state.must_write = true;
3088 }
3089
3090 char buf[20];
3091 if (!state.temperature) {
3092 // First check
3093 if (!state.tempmin || currtemp < state.tempmin)
3094 // Delay Min Temperature update by ~ 30 minutes.
3095 state.tempmin_delay = time(0) + CHECKTIME - 60;
3096 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3097 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3098 if (triptemp)
3099 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3100 state.temperature = currtemp;
3101 }
3102 else {
3103 if (state.tempmin_delay) {
3104 // End Min Temperature update delay if ...
3105 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3106 || (state.tempmin_delay <= time(0))) { // or delay time is over.
3107 state.tempmin_delay = 0;
3108 if (!state.tempmin)
3109 state.tempmin = 255;
3110 }
3111 }
3112
3113 // Update Min Temperature
3114 if (!state.tempmin_delay && currtemp < state.tempmin) {
3115 state.tempmin = currtemp;
3116 state.must_write = true;
3117 if (currtemp != state.temperature)
3118 minchg = "!";
3119 }
3120
3121 // Track changes
3122 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3123 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3124 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3125 state.temperature = currtemp;
3126 }
3127 }
3128
3129 // Check limits
3130 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3131 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3132 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3133 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3134 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3135 }
3136 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3137 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3138 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3139 }
3140 else if (cfg.tempcrit) {
3141 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3142 if (currtemp < limit)
3143 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3144 }
3145 }
3146
3147 // Check normalized and raw attribute values.
3148 static void check_attribute(const dev_config & cfg, dev_state & state,
3149 const ata_smart_attribute & attr,
3150 const ata_smart_attribute & prev,
3151 int attridx,
3152 const ata_smart_threshold_entry * thresholds)
3153 {
3154 // Check attribute and threshold
3155 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3156 if (attrstate == ATTRSTATE_NON_EXISTING)
3157 return;
3158
3159 // If requested, check for usage attributes that have failed.
3160 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3161 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3162 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3163 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3164 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3165 state.must_write = true;
3166 }
3167
3168 // Return if we're not tracking this type of attribute
3169 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3170 if (!( ( prefail && cfg.prefail)
3171 || (!prefail && cfg.usage )))
3172 return;
3173
3174 // Return if '-I ID' was specified
3175 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3176 return;
3177
3178 // Issue warning if they don't have the same ID in all structures.
3179 if (attr.id != prev.id) {
3180 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3181 cfg.name.c_str(), attr.id, prev.id);
3182 return;
3183 }
3184
3185 // Compare normalized values if valid.
3186 bool valchanged = false;
3187 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3188 if (attr.current != prev.current)
3189 valchanged = true;
3190 }
3191
3192 // Compare raw values if requested.
3193 bool rawchanged = false;
3194 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3195 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3196 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3197 rawchanged = true;
3198 }
3199
3200 // Return if no change
3201 if (!(valchanged || rawchanged))
3202 return;
3203
3204 // Format value strings
3205 std::string currstr, prevstr;
3206 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3207 // Print raw values only
3208 currstr = strprintf("%s (Raw)",
3209 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3210 prevstr = strprintf("%s (Raw)",
3211 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3212 }
3213 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3214 // Print normalized and raw values
3215 currstr = strprintf("%d [Raw %s]", attr.current,
3216 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3217 prevstr = strprintf("%d [Raw %s]", prev.current,
3218 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3219 }
3220 else {
3221 // Print normalized values only
3222 currstr = strprintf("%d", attr.current);
3223 prevstr = strprintf("%d", prev.current);
3224 }
3225
3226 // Format message
3227 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3228 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3229 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3230 prevstr.c_str(), currstr.c_str());
3231
3232 // Report this change as critical ?
3233 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3234 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3235 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3236 MailWarning(cfg, state, 2, "%s", msg.c_str());
3237 }
3238 else {
3239 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3240 }
3241 state.must_write = true;
3242 }
3243
3244
3245 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3246 bool firstpass, bool allow_selftests)
3247 {
3248 if (!open_device(cfg, state, atadev, "ATA"))
3249 return 1;
3250
3251 const char * name = cfg.name.c_str();
3252
3253 // user may have requested (with the -n Directive) to leave the disk
3254 // alone if it is in idle or sleeping mode. In this case check the
3255 // power mode and exit without check if needed
3256 if (cfg.powermode && !state.powermodefail) {
3257 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3258 const char * mode = 0;
3259 if (0 <= powermode && powermode < 0xff) {
3260 // wait for possible spin up and check again
3261 int powermode2;
3262 sleep(5);
3263 powermode2 = ataCheckPowerMode(atadev);
3264 if (powermode2 > powermode)
3265 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3266 powermode = powermode2;
3267 }
3268
3269 switch (powermode){
3270 case -1:
3271 // SLEEP
3272 mode="SLEEP";
3273 if (cfg.powermode>=1)
3274 dontcheck=1;
3275 break;
3276 case 0x00:
3277 // STANDBY
3278 mode="STANDBY";
3279 if (cfg.powermode>=2)
3280 dontcheck=1;
3281 break;
3282 case 0x01:
3283 // STANDBY_Y
3284 mode="STANDBY_Y";
3285 if (cfg.powermode>=2)
3286 dontcheck=1;
3287 break;
3288 case 0x80:
3289 // IDLE
3290 mode="IDLE";
3291 if (cfg.powermode>=3)
3292 dontcheck=1;
3293 break;
3294 case 0x81:
3295 // IDLE_A
3296 mode="IDLE_A";
3297 if (cfg.powermode>=3)
3298 dontcheck=1;
3299 break;
3300 case 0x82:
3301 // IDLE_B
3302 mode="IDLE_B";
3303 if (cfg.powermode>=3)
3304 dontcheck=1;
3305 break;
3306 case 0x83:
3307 // IDLE_C
3308 mode="IDLE_C";
3309 if (cfg.powermode>=3)
3310 dontcheck=1;
3311 break;
3312 case 0xff:
3313 // ACTIVE/IDLE
3314 case 0x40:
3315 // ACTIVE
3316 case 0x41:
3317 // ACTIVE
3318 mode="ACTIVE or IDLE";
3319 break;
3320 default:
3321 // UNKNOWN
3322 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3323 name, powermode);
3324 state.powermodefail = true;
3325 break;
3326 }
3327
3328 // if we are going to skip a check, return now
3329 if (dontcheck){
3330 // skip at most powerskipmax checks
3331 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3332 CloseDevice(atadev, name);
3333 // report first only except if state has changed, avoid waking up system disk
3334 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3335 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3336 state.lastpowermodeskipped = powermode;
3337 }
3338 state.powerskipcnt++;
3339 return 0;
3340 }
3341 else {
3342 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3343 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3344 }
3345 state.powerskipcnt = 0;
3346 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3347 }
3348 else if (state.powerskipcnt) {
3349 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3350 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3351 state.powerskipcnt = 0;
3352 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3353 }
3354 }
3355
3356 // check smart status
3357 if (cfg.smartcheck) {
3358 int status=ataSmartStatus2(atadev);
3359 if (status==-1){
3360 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3361 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3362 state.must_write = true;
3363 }
3364 else if (status==1){
3365 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3366 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3367 state.must_write = true;
3368 }
3369 }
3370
3371 // Check everything that depends upon SMART Data (eg, Attribute values)
3372 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3373 || cfg.curr_pending_id || cfg.offl_pending_id
3374 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3375 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3376
3377 // Read current attribute values.
3378 ata_smart_values curval;
3379 if (ataReadSmartValues(atadev, &curval)){
3380 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3381 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3382 state.must_write = true;
3383 }
3384 else {
3385 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3386
3387 // look for current or offline pending sectors
3388 if (cfg.curr_pending_id)
3389 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3390 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3391 : "Total unreadable (pending) sectors" ));
3392
3393 if (cfg.offl_pending_id)
3394 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3395 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3396 : "Total offline uncorrectable sectors"));
3397
3398 // check temperature limits
3399 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3400 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3401
3402 // look for failed usage attributes, or track usage or prefail attributes
3403 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3404 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3405 check_attribute(cfg, state,
3406 curval.vendor_attributes[i],
3407 state.smartval.vendor_attributes[i],
3408 i, state.smartthres.thres_entries);
3409 }
3410 }
3411
3412 // Log changes of offline data collection status
3413 if (cfg.offlinests) {
3414 if ( curval.offline_data_collection_status
3415 != state.smartval.offline_data_collection_status
3416 || state.offline_started // test was started in previous call
3417 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3418 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3419 }
3420
3421 // Log changes of self-test execution status
3422 if (cfg.selfteststs) {
3423 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3424 || state.selftest_started // test was started in previous call
3425 || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3426 log_self_test_exec_status(name, curval.self_test_exec_status);
3427 }
3428
3429 // Save the new values for the next time around
3430 state.smartval = curval;
3431 }
3432 }
3433 state.offline_started = state.selftest_started = false;
3434
3435 // check if number of selftest errors has increased (note: may also DECREASE)
3436 if (cfg.selftest)
3437 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3438
3439 // check if number of ATA errors has increased
3440 if (cfg.errorlog || cfg.xerrorlog) {
3441
3442 int errcnt1 = -1, errcnt2 = -1;
3443 if (cfg.errorlog)
3444 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3445 if (cfg.xerrorlog)
3446 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3447
3448 // new number of errors is max of both logs
3449 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3450
3451 // did command fail?
3452 if (newc<0)
3453 // lack of PrintOut here is INTENTIONAL
3454 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3455
3456 // has error count increased?
3457 int oldc = state.ataerrorcount;
3458 if (newc>oldc){
3459 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3460 name, oldc, newc);
3461 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3462 name, oldc, newc);
3463 state.must_write = true;
3464 }
3465
3466 if (newc>=0)
3467 state.ataerrorcount=newc;
3468 }
3469
3470 // if the user has asked, and device is capable (or we're not yet
3471 // sure) check whether a self test should be done now.
3472 if (allow_selftests && !cfg.test_regex.empty()) {
3473 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3474 if (testtype)
3475 DoATASelfTest(cfg, state, atadev, testtype);
3476 }
3477
3478 // Don't leave device open -- the OS/user may want to access it
3479 // before the next smartd cycle!
3480 CloseDevice(atadev, name);
3481
3482 // Copy ATA attribute values to persistent state
3483 state.update_persistent_state();
3484
3485 return 0;
3486 }
3487
3488 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3489 {
3490 if (!open_device(cfg, state, scsidev, "SCSI"))
3491 return 1;
3492
3493 const char * name = cfg.name.c_str();
3494
3495 UINT8 asc = 0, ascq = 0;
3496 UINT8 currenttemp = 0, triptemp = 0;
3497 if (!state.SuppressReport) {
3498 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3499 &asc, &ascq, &currenttemp, &triptemp)) {
3500 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3501 name);
3502 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3503 state.SuppressReport = 1;
3504 }
3505 }
3506 if (asc > 0) {
3507 const char * cp = scsiGetIEString(asc, ascq);
3508 if (cp) {
3509 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3510 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3511 } else if (asc == 4 && ascq == 9) {
3512 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3513 } else if (debugmode)
3514 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3515 name, (int)asc, (int)ascq);
3516 } else if (debugmode)
3517 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3518
3519 // check temperature limits
3520 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3521 CheckTemperature(cfg, state, currenttemp, triptemp);
3522
3523 // check if number of selftest errors has increased (note: may also DECREASE)
3524 if (cfg.selftest)
3525 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3526
3527 if (allow_selftests && !cfg.test_regex.empty()) {
3528 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3529 if (testtype)
3530 DoSCSISelfTest(cfg, state, scsidev, testtype);
3531 }
3532 if (!cfg.attrlog_file.empty()){
3533 // saving error counters to state
3534 UINT8 tBuf[252];
3535 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3536 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3537 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3538 state.scsi_error_counters[0].found=1;
3539 }
3540 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3541 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3542 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3543 state.scsi_error_counters[1].found=1;
3544 }
3545 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3546 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3547 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3548 state.scsi_error_counters[2].found=1;
3549 }
3550 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3551 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3552 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3553 state.scsi_nonmedium_error.found=1;
3554 }
3555 // store temperature if not done by CheckTemperature() above
3556 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3557 state.temperature = currenttemp;
3558 }
3559 CloseDevice(scsidev, name);
3560 return 0;
3561 }
3562
3563 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3564 {
3565 if (!open_device(cfg, state, nvmedev, "NVMe"))
3566 return 1;
3567
3568 const char * name = cfg.name.c_str();
3569
3570 // Read SMART/Health log
3571 nvme_smart_log smart_log;
3572 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3573 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3574 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3575 state.must_write = true;
3576 return 0;
3577 }
3578
3579 // Check Critical Warning bits
3580 if (cfg.smartcheck && smart_log.critical_warning) {
3581 unsigned char w = smart_log.critical_warning;
3582 std::string msg;
3583 static const char * const wnames[] =
3584 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3585
3586 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3587 if (!(w & (1 << b)))
3588 continue;
3589 if (cnt)
3590 msg += ", ";
3591 if (++cnt > 3) {
3592 msg += "..."; break;
3593 }
3594 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3595 msg += "*Unknown*"; break;
3596 }
3597 msg += wnames[b];
3598 }
3599
3600 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3601 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3602 state.must_write = true;
3603 }
3604
3605 // Check temperature limits
3606 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3607 int k = nvme_get_max_temp_kelvin(smart_log);
3608 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3609 int c = k - 273;
3610 if (c < 1)
3611 c = 1;
3612 else if (c > 0xff)
3613 c = 0xff;
3614 CheckTemperature(cfg, state, c, 0);
3615 }
3616
3617 // Check if number of errors has increased
3618 if (cfg.errorlog || cfg.xerrorlog) {
3619 uint64_t oldcnt = state.nvme_err_log_entries;
3620 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3621 if (newcnt > oldcnt) {
3622 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3623 name, oldcnt, newcnt);
3624 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3625 name, oldcnt, newcnt);
3626 state.must_write = true;
3627 }
3628 state.nvme_err_log_entries = newcnt;
3629 }
3630
3631 CloseDevice(nvmedev, name);
3632 return 0;
3633 }
3634
3635 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3636 static int standby_disable_state = 0;
3637
3638 static void init_disable_standby_check(dev_config_vector & configs)
3639 {
3640 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3641 bool sts1 = false, sts2 = false;
3642 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3643 const dev_config & cfg = configs.at(i);
3644 if (cfg.offlinests_ns)
3645 sts1 = true;
3646 if (cfg.selfteststs_ns)
3647 sts2 = true;
3648 }
3649
3650 // Check for support of disable auto standby
3651 // Reenable standby if smartd.conf was reread
3652 if (sts1 || sts2 || standby_disable_state == 3) {
3653 if (!smi()->disable_system_auto_standby(false)) {
3654 if (standby_disable_state == 3)
3655 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3656 if (sts1 || sts2) {
3657 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3658 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3659 sts1 = sts2 = false;
3660 }
3661 }
3662 }
3663
3664 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3665 }
3666
3667 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3668 {
3669 if (!standby_disable_state)
3670 return;
3671
3672 // Check for just started or still running self-tests
3673 bool running = false;
3674 for (unsigned i = 0; i < configs.size() && !running; i++) {
3675 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3676
3677 if ( ( cfg.offlinests_ns
3678 && (state.offline_started ||
3679 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3680 || ( cfg.selfteststs_ns
3681 && (state.selftest_started ||
3682 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3683 running = true;
3684 // state.offline/selftest_started will be reset after next logging of test status
3685 }
3686
3687 // Disable/enable auto standby and log state changes
3688 if (!running) {
3689 if (standby_disable_state != 1) {
3690 if (!smi()->disable_system_auto_standby(false))
3691 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3692 smi()->get_errmsg());
3693 else
3694 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3695 standby_disable_state = 1;
3696 }
3697 }
3698 else if (!smi()->disable_system_auto_standby(true)) {
3699 if (standby_disable_state != 2) {
3700 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3701 smi()->get_errmsg());
3702 standby_disable_state = 2;
3703 }
3704 }
3705 else {
3706 if (standby_disable_state != 3) {
3707 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3708 standby_disable_state = 3;
3709 }
3710 }
3711 }
3712
3713 // Checks the SMART status of all ATA and SCSI devices
3714 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3715 smart_device_list & devices, bool firstpass, bool allow_selftests)
3716 {
3717 for (unsigned i = 0; i < configs.size(); i++) {
3718 const dev_config & cfg = configs.at(i);
3719 dev_state & state = states.at(i);
3720 smart_device * dev = devices.at(i);
3721 if (dev->is_ata())
3722 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3723 else if (dev->is_scsi())
3724 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3725 else if (dev->is_nvme())
3726 NVMeCheckDevice(cfg, state, dev->to_nvme());
3727 }
3728
3729 do_disable_standby_check(configs, states);
3730 }
3731
3732 // Set if Initialize() was called
3733 static bool is_initialized = false;
3734
3735 // Does initialization right after fork to daemon mode
3736 static void Initialize(time_t *wakeuptime)
3737 {
3738 // Call Goodbye() on exit
3739 is_initialized = true;
3740
3741 // write PID file
3742 if (!debugmode)
3743 WritePidFile();
3744
3745 // install signal handlers. On Solaris, can't use signal() because
3746 // it resets the handler to SIG_DFL after each call. So use sigset()
3747 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3748
3749 // normal and abnormal exit
3750 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3751 SIGNALFN(SIGTERM, SIG_IGN);
3752 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3753 SIGNALFN(SIGQUIT, SIG_IGN);
3754
3755 // in debug mode, <CONTROL-C> ==> HUP
3756 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3757 SIGNALFN(SIGINT, SIG_IGN);
3758
3759 // Catch HUP and USR1
3760 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3761 SIGNALFN(SIGHUP, SIG_IGN);
3762 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3763 SIGNALFN(SIGUSR1, SIG_IGN);
3764 #ifdef _WIN32
3765 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3766 SIGNALFN(SIGUSR2, SIG_IGN);
3767 #endif
3768
3769 // initialize wakeup time to CURRENT time
3770 *wakeuptime=time(NULL);
3771
3772 return;
3773 }
3774
3775 #ifdef _WIN32
3776 // Toggle debug mode implemented for native windows only
3777 // (there is no easy way to reopen tty on *nix)
3778 static void ToggleDebugMode()
3779 {
3780 if (!debugmode) {
3781 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3782 if (!daemon_enable_console("smartd [Debug]")) {
3783 debugmode = 1;
3784 daemon_signal(SIGINT, HUPhandler);
3785 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3786 }
3787 else
3788 PrintOut(LOG_INFO,"enable console failed\n");
3789 }
3790 else if (debugmode == 1) {
3791 daemon_disable_console();
3792 debugmode = 0;
3793 daemon_signal(SIGINT, sighandler);
3794 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3795 }
3796 else
3797 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3798 }
3799 #endif
3800
3801 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3802 {
3803 // If past wake-up-time, compute next wake-up-time
3804 time_t timenow=time(NULL);
3805 while (wakeuptime<=timenow){
3806 int intervals=1+(timenow-wakeuptime)/checktime;
3807 wakeuptime+=intervals*checktime;
3808 }
3809
3810 // sleep until we catch SIGUSR1 or have completed sleeping
3811 int addtime = 0;
3812 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3813
3814 // protect user again system clock being adjusted backwards
3815 if (wakeuptime>timenow+checktime){
3816 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3817 wakeuptime=timenow+checktime;
3818 }
3819
3820 // Exit sleep when time interval has expired or a signal is received
3821 sleep(wakeuptime+addtime-timenow);
3822
3823 #ifdef _WIN32
3824 // toggle debug mode?
3825 if (caughtsigUSR2) {
3826 ToggleDebugMode();
3827 caughtsigUSR2 = 0;
3828 }
3829 #endif
3830
3831 timenow=time(NULL);
3832
3833 // Actual sleep time too long?
3834 if (!addtime && timenow > wakeuptime+60) {
3835 if (debugmode)
3836 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3837 (int)(timenow-wakeuptime));
3838 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3839 addtime = timenow-wakeuptime+20;
3840 // Use next wake-up-time if close
3841 int nextcheck = checktime - addtime % checktime;
3842 if (nextcheck <= 20)
3843 addtime += nextcheck;
3844 }
3845 }
3846
3847 // if we caught a SIGUSR1 then print message and clear signal
3848 if (caughtsigUSR1){
3849 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3850 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3851 caughtsigUSR1=0;
3852 sigwakeup = true;
3853 }
3854
3855 // return adjusted wakeuptime
3856 return wakeuptime;
3857 }
3858
3859 // Print out a list of valid arguments for the Directive d
3860 static void printoutvaliddirectiveargs(int priority, char d)
3861 {
3862 switch (d) {
3863 case 'n':
3864 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3865 break;
3866 case 's':
3867 PrintOut(priority, "valid_regular_expression");
3868 break;
3869 case 'd':
3870 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3871 break;
3872 case 'T':
3873 PrintOut(priority, "normal, permissive");
3874 break;
3875 case 'o':
3876 case 'S':
3877 PrintOut(priority, "on, off");
3878 break;
3879 case 'l':
3880 PrintOut(priority, "error, selftest");
3881 break;
3882 case 'M':
3883 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3884 break;
3885 case 'v':
3886 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3887 break;
3888 case 'P':
3889 PrintOut(priority, "use, ignore, show, showall");
3890 break;
3891 case 'F':
3892 PrintOut(priority, "%s", get_valid_firmwarebug_args());
3893 break;
3894 case 'e':
3895 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
3896 "security-freeze, standby,[N|off], wcache,[on|off]");
3897 break;
3898 }
3899 }
3900
3901 // exits with an error message, or returns integer value of token
3902 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3903 int min, int max, char * suffix = 0)
3904 {
3905 // make sure argument is there
3906 if (!arg) {
3907 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3908 cfgfile, lineno, name, token, min, max);
3909 return -1;
3910 }
3911
3912 // get argument value (base 10), check that it's integer, and in-range
3913 char *endptr;
3914 int val = strtol(arg,&endptr,10);
3915
3916 // optional suffix present?
3917 if (suffix) {
3918 if (!strcmp(endptr, suffix))
3919 endptr += strlen(suffix);
3920 else
3921 *suffix = 0;
3922 }
3923
3924 if (!(!*endptr && min <= val && val <= max)) {
3925 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3926 cfgfile, lineno, name, token, arg, min, max);
3927 return -1;
3928 }
3929
3930 // all is well; return value
3931 return val;
3932 }
3933
3934
3935 // Get 1-3 small integer(s) for '-W' directive
3936 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3937 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3938 {
3939 unsigned v1 = 0, v2 = 0, v3 = 0;
3940 int n1 = -1, n2 = -1, n3 = -1, len;
3941 if (!arg) {
3942 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3943 cfgfile, lineno, name, token);
3944 return -1;
3945 }
3946
3947 len = strlen(arg);
3948 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3949 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3950 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3951 cfgfile, lineno, name, token, arg);
3952 return -1;
3953 }
3954 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3955 return 0;
3956 }
3957
3958
3959 #ifdef _WIN32
3960
3961 // Concatenate strtok() results if quoted with "..."
3962 static const char * strtok_dequote(const char * delimiters)
3963 {
3964 const char * t = strtok(0, delimiters);
3965 if (!t || t[0] != '"')
3966 return t;
3967
3968 static std::string token;
3969 token = t+1;
3970 for (;;) {
3971 t = strtok(0, delimiters);
3972 if (!t || !*t)
3973 return "\"";
3974 token += ' ';
3975 int len = strlen(t);
3976 if (t[len-1] == '"') {
3977 token += std::string(t, len-1);
3978 break;
3979 }
3980 token += t;
3981 }
3982 return token.c_str();
3983 }
3984
3985 #endif // _WIN32
3986
3987
3988 // This function returns 1 if it has correctly parsed one token (and
3989 // any arguments), else zero if no tokens remain. It returns -1 if an
3990 // error was encountered.
3991 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
3992 {
3993 char sym;
3994 const char * name = cfg.name.c_str();
3995 int lineno=cfg.lineno;
3996 const char *delim = " \n\t";
3997 int badarg = 0;
3998 int missingarg = 0;
3999 const char *arg = 0;
4000
4001 // is the rest of the line a comment
4002 if (*token=='#')
4003 return 1;
4004
4005 // is the token not recognized?
4006 if (*token!='-' || strlen(token)!=2) {
4007 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4008 configfile, lineno, name, token);
4009 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4010 return -1;
4011 }
4012
4013 // token we will be parsing:
4014 sym=token[1];
4015
4016 // parse the token and swallow its argument
4017 int val;
4018 char plus[] = "+", excl[] = "!";
4019
4020 switch (sym) {
4021 case 'C':
4022 // monitor current pending sector count (default 197)
4023 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4024 return -1;
4025 cfg.curr_pending_id = (unsigned char)val;
4026 cfg.curr_pending_incr = (*plus == '+');
4027 cfg.curr_pending_set = true;
4028 break;
4029 case 'U':
4030 // monitor offline uncorrectable sectors (default 198)
4031 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4032 return -1;
4033 cfg.offl_pending_id = (unsigned char)val;
4034 cfg.offl_pending_incr = (*plus == '+');
4035 cfg.offl_pending_set = true;
4036 break;
4037 case 'T':
4038 // Set tolerance level for SMART command failures
4039 if ((arg = strtok(NULL, delim)) == NULL) {
4040 missingarg = 1;
4041 } else if (!strcmp(arg, "normal")) {
4042 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4043 // not on failure of an optional S.M.A.R.T. command.
4044 // This is the default so we don't need to actually do anything here.
4045 cfg.permissive = false;
4046 } else if (!strcmp(arg, "permissive")) {
4047 // Permissive mode; ignore errors from Mandatory SMART commands
4048 cfg.permissive = true;
4049 } else {
4050 badarg = 1;
4051 }
4052 break;
4053 case 'd':
4054 // specify the device type
4055 if ((arg = strtok(NULL, delim)) == NULL) {
4056 missingarg = 1;
4057 } else if (!strcmp(arg, "ignore")) {
4058 cfg.ignore = true;
4059 } else if (!strcmp(arg, "removable")) {
4060 cfg.removable = true;
4061 } else if (!strcmp(arg, "auto")) {
4062 cfg.dev_type = "";
4063 scan_types.clear();
4064 } else {
4065 cfg.dev_type = arg;
4066 scan_types.push_back(arg);
4067 }
4068 break;
4069 case 'F':
4070 // fix firmware bug
4071 if (!(arg = strtok(0, delim)))
4072 missingarg = 1;
4073 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4074 badarg = 1;
4075 break;
4076 case 'H':
4077 // check SMART status
4078 cfg.smartcheck = true;
4079 break;
4080 case 'f':
4081 // check for failure of usage attributes
4082 cfg.usagefailed = true;
4083 break;
4084 case 't':
4085 // track changes in all vendor attributes
4086 cfg.prefail = true;
4087 cfg.usage = true;
4088 break;
4089 case 'p':
4090 // track changes in prefail vendor attributes
4091 cfg.prefail = true;
4092 break;
4093 case 'u':
4094 // track changes in usage vendor attributes
4095 cfg.usage = true;
4096 break;
4097 case 'l':
4098 // track changes in SMART logs
4099 if ((arg = strtok(NULL, delim)) == NULL) {
4100 missingarg = 1;
4101 } else if (!strcmp(arg, "selftest")) {
4102 // track changes in self-test log
4103 cfg.selftest = true;
4104 } else if (!strcmp(arg, "error")) {
4105 // track changes in ATA error log
4106 cfg.errorlog = true;
4107 } else if (!strcmp(arg, "xerror")) {
4108 // track changes in Extended Comprehensive SMART error log
4109 cfg.xerrorlog = true;
4110 } else if (!strcmp(arg, "offlinests")) {
4111 // track changes in offline data collection status
4112 cfg.offlinests = true;
4113 } else if (!strcmp(arg, "offlinests,ns")) {
4114 // track changes in offline data collection status, disable auto standby
4115 cfg.offlinests = cfg.offlinests_ns = true;
4116 } else if (!strcmp(arg, "selfteststs")) {
4117 // track changes in self-test execution status
4118 cfg.selfteststs = true;
4119 } else if (!strcmp(arg, "selfteststs,ns")) {
4120 // track changes in self-test execution status, disable auto standby
4121 cfg.selfteststs = cfg.selfteststs_ns = true;
4122 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4123 // set SCT Error Recovery Control
4124 unsigned rt = ~0, wt = ~0; int nc = -1;
4125 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4126 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4127 cfg.sct_erc_set = true;
4128 cfg.sct_erc_readtime = rt;
4129 cfg.sct_erc_writetime = wt;
4130 }
4131 else
4132 badarg = 1;
4133 } else {
4134 badarg = 1;
4135 }
4136 break;
4137 case 'a':
4138 // monitor everything
4139 cfg.smartcheck = true;
4140 cfg.prefail = true;
4141 cfg.usagefailed = true;
4142 cfg.usage = true;
4143 cfg.selftest = true;
4144 cfg.errorlog = true;
4145 cfg.selfteststs = true;
4146 break;
4147 case 'o':
4148 // automatic offline testing enable/disable
4149 if ((arg = strtok(NULL, delim)) == NULL) {
4150 missingarg = 1;
4151 } else if (!strcmp(arg, "on")) {
4152 cfg.autoofflinetest = 2;
4153 } else if (!strcmp(arg, "off")) {
4154 cfg.autoofflinetest = 1;
4155 } else {
4156 badarg = 1;
4157 }
4158 break;
4159 case 'n':
4160 // skip disk check if in idle or standby mode
4161 if (!(arg = strtok(NULL, delim)))
4162 missingarg = 1;
4163 else {
4164 char *endptr = NULL;
4165 char *next = strchr(const_cast<char*>(arg), ',');
4166
4167 cfg.powerquiet = false;
4168 cfg.powerskipmax = 0;
4169
4170 if (next!=NULL) *next='\0';
4171 if (!strcmp(arg, "never"))
4172 cfg.powermode = 0;
4173 else if (!strcmp(arg, "sleep"))
4174 cfg.powermode = 1;
4175 else if (!strcmp(arg, "standby"))
4176 cfg.powermode = 2;
4177 else if (!strcmp(arg, "idle"))
4178 cfg.powermode = 3;
4179 else
4180 badarg = 1;
4181
4182 // if optional arguments are present
4183 if (!badarg && next!=NULL) {
4184 next++;
4185 cfg.powerskipmax = strtol(next, &endptr, 10);
4186 if (endptr == next)
4187 cfg.powerskipmax = 0;
4188 else {
4189 next = endptr + (*endptr != '\0');
4190 if (cfg.powerskipmax <= 0)
4191 badarg = 1;
4192 }
4193 if (*next != '\0') {
4194 if (!strcmp("q", next))
4195 cfg.powerquiet = true;
4196 else {
4197 badarg = 1;
4198 }
4199 }
4200 }
4201 }
4202 break;
4203 case 'S':
4204 // automatic attribute autosave enable/disable
4205 if ((arg = strtok(NULL, delim)) == NULL) {
4206 missingarg = 1;
4207 } else if (!strcmp(arg, "on")) {
4208 cfg.autosave = 2;
4209 } else if (!strcmp(arg, "off")) {
4210 cfg.autosave = 1;
4211 } else {
4212 badarg = 1;
4213 }
4214 break;
4215 case 's':
4216 // warn user, and delete any previously given -s REGEXP Directives
4217 if (!cfg.test_regex.empty()){
4218 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4219 configfile, lineno, name, cfg.test_regex.get_pattern());
4220 cfg.test_regex = regular_expression();
4221 }
4222 // check for missing argument
4223 if (!(arg = strtok(NULL, delim))) {
4224 missingarg = 1;
4225 }
4226 // Compile regex
4227 else {
4228 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
4229 // not a valid regular expression!
4230 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4231 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4232 return -1;
4233 }
4234 // Do a bit of sanity checking and warn user if we think that
4235 // their regexp is "strange". User probably confused about shell
4236 // glob(3) syntax versus regular expression syntax regexp(7).
4237 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4238 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4239 configfile, lineno, name, val+1, arg[val], arg);
4240 }
4241 break;
4242 case 'm':
4243 // send email to address that follows
4244 if (!(arg = strtok(NULL,delim)))
4245 missingarg = 1;
4246 else {
4247 if (!cfg.emailaddress.empty())
4248 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4249 configfile, lineno, name, cfg.emailaddress.c_str());
4250 #ifdef _WIN32 // TODO: Remove after smartmontools 6.5
4251 if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
4252 || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
4253 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -m %s is no longer supported, use -m console[,...] instead\n",
4254 configfile, lineno, name, arg);
4255 return -1;
4256 }
4257 #endif
4258 cfg.emailaddress = arg;
4259 }
4260 break;
4261 case 'M':
4262 // email warning options
4263 if (!(arg = strtok(NULL, delim)))
4264 missingarg = 1;
4265 else if (!strcmp(arg, "once"))
4266 cfg.emailfreq = 1;
4267 else if (!strcmp(arg, "daily"))
4268 cfg.emailfreq = 2;
4269 else if (!strcmp(arg, "diminishing"))
4270 cfg.emailfreq = 3;
4271 else if (!strcmp(arg, "test"))
4272 cfg.emailtest = 1;
4273 else if (!strcmp(arg, "exec")) {
4274 // Get the next argument (the command line)
4275 #ifdef _WIN32
4276 // Allow "/path name/with spaces/..." on Windows
4277 arg = strtok_dequote(delim);
4278 if (arg && arg[0] == '"') {
4279 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4280 configfile, lineno, name, token);
4281 return -1;
4282 }
4283 #else
4284 arg = strtok(0, delim);
4285 #endif
4286 if (!arg) {
4287 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4288 configfile, lineno, name, token);
4289 return -1;
4290 }
4291 // Free the last cmd line given if any, and copy new one
4292 if (!cfg.emailcmdline.empty())
4293 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4294 configfile, lineno, name, cfg.emailcmdline.c_str());
4295 cfg.emailcmdline = arg;
4296 }
4297 else
4298 badarg = 1;
4299 break;
4300 case 'i':
4301 // ignore failure of usage attribute
4302 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4303 return -1;
4304 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4305 break;
4306 case 'I':
4307 // ignore attribute for tracking purposes
4308 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4309 return -1;
4310 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4311 break;
4312 case 'r':
4313 // print raw value when tracking
4314 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4315 return -1;
4316 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4317 if (*excl == '!') // attribute change is critical
4318 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4319 break;
4320 case 'R':
4321 // track changes in raw value (forces printing of raw value)
4322 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4323 return -1;
4324 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4325 if (*excl == '!') // raw value change is critical
4326 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4327 break;
4328 case 'W':
4329 // track Temperature
4330 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4331 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4332 return -1;
4333 break;
4334 case 'v':
4335 // non-default vendor-specific attribute meaning
4336 if (!(arg=strtok(NULL,delim))) {
4337 missingarg = 1;
4338 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4339 badarg = 1;
4340 }
4341 break;
4342 case 'P':
4343 // Define use of drive-specific presets.
4344 if (!(arg = strtok(NULL, delim))) {
4345 missingarg = 1;
4346 } else if (!strcmp(arg, "use")) {
4347 cfg.ignorepresets = false;
4348 } else if (!strcmp(arg, "ignore")) {
4349 cfg.ignorepresets = true;
4350 } else if (!strcmp(arg, "show")) {
4351 cfg.showpresets = true;
4352 } else if (!strcmp(arg, "showall")) {
4353 showallpresets();
4354 } else {
4355 badarg = 1;
4356 }
4357 break;
4358
4359 case 'e':
4360 // Various ATA settings
4361 if (!(arg = strtok(NULL, delim))) {
4362 missingarg = true;
4363 }
4364 else {
4365 char arg2[16+1]; unsigned val;
4366 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4367 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4368 && (n1 == len || n2 > 0)) {
4369 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4370 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4371 if (n3 != len)
4372 val = ~0U;
4373
4374 if (!strcmp(arg2, "aam")) {
4375 if (off)
4376 cfg.set_aam = -1;
4377 else if (val <= 254)
4378 cfg.set_aam = val + 1;
4379 else
4380 badarg = true;
4381 }
4382 else if (!strcmp(arg2, "apm")) {
4383 if (off)
4384 cfg.set_apm = -1;
4385 else if (1 <= val && val <= 254)
4386 cfg.set_apm = val + 1;
4387 else
4388 badarg = true;
4389 }
4390 else if (!strcmp(arg2, "lookahead")) {
4391 if (off)
4392 cfg.set_lookahead = -1;
4393 else if (on)
4394 cfg.set_lookahead = 1;
4395 else
4396 badarg = true;
4397 }
4398 else if (!strcmp(arg, "security-freeze")) {
4399 cfg.set_security_freeze = true;
4400 }
4401 else if (!strcmp(arg2, "standby")) {
4402 if (off)
4403 cfg.set_standby = 0 + 1;
4404 else if (val <= 255)
4405 cfg.set_standby = val + 1;
4406 else
4407 badarg = true;
4408 }
4409 else if (!strcmp(arg2, "wcache")) {
4410 if (off)
4411 cfg.set_wcache = -1;
4412 else if (on)
4413 cfg.set_wcache = 1;
4414 else
4415 badarg = true;
4416 }
4417 else if (!strcmp(arg2, "dsn")) {
4418 if (off)
4419 cfg.set_dsn = -1;
4420 else if (on)
4421 cfg.set_dsn = 1;
4422 else
4423 badarg = true;
4424 }
4425 else
4426 badarg = true;
4427 }
4428 else
4429 badarg = true;
4430 }
4431 break;
4432
4433 default:
4434 // Directive not recognized
4435 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4436 configfile, lineno, name, token);
4437 Directives();
4438 return -1;
4439 }
4440 if (missingarg) {
4441 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4442 configfile, lineno, name, token);
4443 }
4444 if (badarg) {
4445 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4446 configfile, lineno, name, token, arg);
4447 }
4448 if (missingarg || badarg) {
4449 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4450 printoutvaliddirectiveargs(LOG_CRIT, sym);
4451 PrintOut(LOG_CRIT, "\n");
4452 return -1;
4453 }
4454
4455 return 1;
4456 }
4457
4458 // Scan directive for configuration file
4459 #define SCANDIRECTIVE "DEVICESCAN"
4460
4461 // This is the routine that adds things to the conf_entries list.
4462 //
4463 // Return values are:
4464 // 1: parsed a normal line
4465 // 0: found DEFAULT setting or comment or blank line
4466 // -1: found SCANDIRECTIVE line
4467 // -2: found an error
4468 //
4469 // Note: this routine modifies *line from the caller!
4470 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4471 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4472 {
4473 const char *delim = " \n\t";
4474
4475 // get first token: device name. If a comment, skip line
4476 const char * name = strtok(line, delim);
4477 if (!name || *name == '#')
4478 return 0;
4479
4480 // Check device name for DEFAULT or DEVICESCAN
4481 int retval;
4482 if (!strcmp("DEFAULT", name)) {
4483 retval = 0;
4484 // Restart with empty defaults
4485 default_conf = dev_config();
4486 }
4487 else {
4488 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4489 // Init new entry with current defaults
4490 conf_entries.push_back(default_conf);
4491 }
4492 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4493
4494 cfg.name = name; // Later replaced by dev->get_info().info_name
4495 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4496 cfg.lineno = lineno;
4497
4498 // parse tokens one at a time from the file.
4499 while (char * token = strtok(0, delim)) {
4500 int rc = ParseToken(token, cfg, scan_types);
4501 if (rc < 0)
4502 // error found on the line
4503 return -2;
4504
4505 if (rc == 0)
4506 // No tokens left
4507 break;
4508
4509 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4510 }
4511
4512 // Check for multiple -d TYPE directives
4513 if (retval != -1 && scan_types.size() > 1) {
4514 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4515 cfg.name.c_str(), cfg.lineno, configfile);
4516 return -2;
4517 }
4518
4519 // Don't perform checks below for DEFAULT entries
4520 if (retval == 0)
4521 return retval;
4522
4523 // If NO monitoring directives are set, then set all of them.
4524 if (!( cfg.smartcheck || cfg.selftest
4525 || cfg.errorlog || cfg.xerrorlog
4526 || cfg.offlinests || cfg.selfteststs
4527 || cfg.usagefailed || cfg.prefail || cfg.usage
4528 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4529
4530 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4531 cfg.name.c_str(), cfg.lineno, configfile);
4532
4533 cfg.smartcheck = true;
4534 cfg.usagefailed = true;
4535 cfg.prefail = true;
4536 cfg.usage = true;
4537 cfg.selftest = true;
4538 cfg.errorlog = true;
4539 cfg.selfteststs = true;
4540 }
4541
4542 // additional sanity check. Has user set -M options without -m?
4543 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4544 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4545 cfg.name.c_str(), cfg.lineno, configfile);
4546 return -2;
4547 }
4548
4549 // has the user has set <nomailer>?
4550 if (cfg.emailaddress == "<nomailer>") {
4551 // check that -M exec is also set
4552 if (cfg.emailcmdline.empty()){
4553 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4554 cfg.name.c_str(), cfg.lineno, configfile);
4555 return -2;
4556 }
4557 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4558 cfg.emailaddress.clear();
4559 }
4560
4561 return retval;
4562 }
4563
4564 // Parses a configuration file. Return values are:
4565 // N=>0: found N entries
4566 // -1: syntax error in config file
4567 // -2: config file does not exist
4568 // -3: config file exists but cannot be read
4569 //
4570 // In the case where the return value is 0, there are three
4571 // possiblities:
4572 // Empty configuration file ==> conf_entries.empty()
4573 // No configuration file ==> conf_entries[0].lineno == 0
4574 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4575 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4576 {
4577 // maximum line length in configuration file
4578 const int MAXLINELEN = 256;
4579 // maximum length of a continued line in configuration file
4580 const int MAXCONTLINE = 1023;
4581
4582 stdio_file f;
4583 // Open config file, if it exists and is not <stdin>
4584 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4585 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4586 // file exists but we can't read it or it should exist due to '-c' option
4587 int ret = (errno!=ENOENT ? -3 : -2);
4588 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4589 strerror(errno),configfile);
4590 return ret;
4591 }
4592 }
4593 else // read from stdin ('-c -' option)
4594 f.open(stdin);
4595
4596 // Start with empty defaults
4597 dev_config default_conf;
4598
4599 // No configuration file found -- use fake one
4600 int entry = 0;
4601 if (!f) {
4602 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4603
4604 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4605 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4606 return 0;
4607 }
4608
4609 #ifdef __CYGWIN__
4610 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4611 #endif
4612
4613 // configuration file exists
4614 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4615
4616 // parse config file line by line
4617 int lineno = 1, cont = 0, contlineno = 0;
4618 char line[MAXLINELEN+2];
4619 char fullline[MAXCONTLINE+1];
4620
4621 for (;;) {
4622 int len=0,scandevice;
4623 char *lastslash;
4624 char *comment;
4625 char *code;
4626
4627 // make debugging simpler
4628 memset(line,0,sizeof(line));
4629
4630 // get a line
4631 code=fgets(line, MAXLINELEN+2, f);
4632
4633 // are we at the end of the file?
4634 if (!code){
4635 if (cont) {
4636 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4637 // See if we found a SCANDIRECTIVE directive
4638 if (scandevice==-1)
4639 return 0;
4640 // did we find a syntax error
4641 if (scandevice==-2)
4642 return -1;
4643 // the final line is part of a continuation line
4644 entry+=scandevice;
4645 }
4646 break;
4647 }
4648
4649 // input file line number
4650 contlineno++;
4651
4652 // See if line is too long
4653 len=strlen(line);
4654 if (len>MAXLINELEN){
4655 const char *warn;
4656 if (line[len-1]=='\n')
4657 warn="(including newline!) ";
4658 else
4659 warn="";
4660 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4661 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4662 return -1;
4663 }
4664
4665 // Ignore anything after comment symbol
4666 if ((comment=strchr(line,'#'))){
4667 *comment='\0';
4668 len=strlen(line);
4669 }
4670
4671 // is the total line (made of all continuation lines) too long?
4672 if (cont+len>MAXCONTLINE){
4673 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4674 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4675 return -1;
4676 }
4677
4678 // copy string so far into fullline, and increment length
4679 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4680 cont+=len;
4681
4682 // is this a continuation line. If so, replace \ by space and look at next line
4683 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4684 *(fullline+(cont-len)+(lastslash-line))=' ';
4685 continue;
4686 }
4687
4688 // Not a continuation line. Parse it
4689 scan_types.clear();
4690 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4691
4692 // did we find a scandevice directive?
4693 if (scandevice==-1)
4694 return 0;
4695 // did we find a syntax error
4696 if (scandevice==-2)
4697 return -1;
4698
4699 entry+=scandevice;
4700 lineno++;
4701 cont=0;
4702 }
4703
4704 // note -- may be zero if syntax of file OK, but no valid entries!
4705 return entry;
4706 }
4707
4708 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4709 <LIST> is the list of valid arguments for option opt. */
4710 static void PrintValidArgs(char opt)
4711 {
4712 const char *s;
4713
4714 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4715 if (!(s = GetValidArgList(opt)))
4716 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4717 else
4718 PrintOut(LOG_CRIT, "%s", (char *)s);
4719 PrintOut(LOG_CRIT, " <=======\n");
4720 }
4721
4722 #ifndef _WIN32
4723 // Report error and exit if specified path is not absolute.
4724 static void check_abs_path(char option, const std::string & path)
4725 {
4726 if (path.empty() || path[0] == '/')
4727 return;
4728
4729 debugmode = 1;
4730 PrintHead();
4731 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4732 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4733 EXIT(EXIT_BADCMD);
4734 }
4735 #endif // !_WIN32
4736
4737 // Parses input line, prints usage message and
4738 // version/license/copyright messages
4739 static void ParseOpts(int argc, char **argv)
4740 {
4741 // Init default path names
4742 #ifndef _WIN32
4743 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4744 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4745 #else
4746 std::string exedir = get_exe_dir();
4747 static std::string configfile_str = exedir + "/smartd.conf";
4748 configfile = configfile_str.c_str();
4749 warning_script = exedir + "/smartd_warning.cmd";
4750 #endif
4751
4752 // Please update GetValidArgList() if you edit shortopts
4753 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4754 #ifdef HAVE_LIBCAP_NG
4755 "C"
4756 #endif
4757 ;
4758 // Please update GetValidArgList() if you edit longopts
4759 struct option longopts[] = {
4760 { "configfile", required_argument, 0, 'c' },
4761 { "logfacility", required_argument, 0, 'l' },
4762 { "quit", required_argument, 0, 'q' },
4763 { "debug", no_argument, 0, 'd' },
4764 { "showdirectives", no_argument, 0, 'D' },
4765 { "interval", required_argument, 0, 'i' },
4766 #ifndef _WIN32
4767 { "no-fork", no_argument, 0, 'n' },
4768 #else
4769 { "service", no_argument, 0, 'n' },
4770 #endif
4771 { "pidfile", required_argument, 0, 'p' },
4772 { "report", required_argument, 0, 'r' },
4773 { "savestates", required_argument, 0, 's' },
4774 { "attributelog", required_argument, 0, 'A' },
4775 { "drivedb", required_argument, 0, 'B' },
4776 { "warnexec", required_argument, 0, 'w' },
4777 { "version", no_argument, 0, 'V' },
4778 { "license", no_argument, 0, 'V' },
4779 { "copyright", no_argument, 0, 'V' },
4780 { "help", no_argument, 0, 'h' },
4781 { "usage", no_argument, 0, 'h' },
4782 #ifdef HAVE_LIBCAP_NG
4783 { "capabilities", no_argument, 0, 'C' },
4784 #endif
4785 { 0, 0, 0, 0 }
4786 };
4787
4788 opterr=optopt=0;
4789 bool badarg = false;
4790 bool use_default_db = true; // set false on '-B FILE'
4791
4792 // Parse input options.
4793 int optchar;
4794 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4795 char *arg;
4796 char *tailptr;
4797 long lchecktime;
4798
4799 switch(optchar) {
4800 case 'q':
4801 // when to quit
4802 if (!strcmp(optarg, "nodev"))
4803 quit = QUIT_NODEV;
4804 else if (!strcmp(optarg, "nodevstartup"))
4805 quit = QUIT_NODEVSTARTUP;
4806 else if (!strcmp(optarg, "never"))
4807 quit = QUIT_NEVER;
4808 else if (!strcmp(optarg, "onecheck")) {
4809 quit = QUIT_ONECHECK;
4810 debugmode = 1;
4811 }
4812 else if (!strcmp(optarg, "showtests")) {
4813 quit = QUIT_SHOWTESTS;
4814 debugmode = 1;
4815 }
4816 else if (!strcmp(optarg, "errors"))
4817 quit = QUIT_ERRORS;
4818 else
4819 badarg = true;
4820 break;
4821 case 'l':
4822 // set the log facility level
4823 if (!strcmp(optarg, "daemon"))
4824 facility=LOG_DAEMON;
4825 else if (!strcmp(optarg, "local0"))
4826 facility=LOG_LOCAL0;
4827 else if (!strcmp(optarg, "local1"))
4828 facility=LOG_LOCAL1;
4829 else if (!strcmp(optarg, "local2"))
4830 facility=LOG_LOCAL2;
4831 else if (!strcmp(optarg, "local3"))
4832 facility=LOG_LOCAL3;
4833 else if (!strcmp(optarg, "local4"))
4834 facility=LOG_LOCAL4;
4835 else if (!strcmp(optarg, "local5"))
4836 facility=LOG_LOCAL5;
4837 else if (!strcmp(optarg, "local6"))
4838 facility=LOG_LOCAL6;
4839 else if (!strcmp(optarg, "local7"))
4840 facility=LOG_LOCAL7;
4841 else
4842 badarg = true;
4843 break;
4844 case 'd':
4845 // enable debug mode
4846 debugmode = 1;
4847 break;
4848 case 'n':
4849 // don't fork()
4850 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4851 do_fork = false;
4852 #endif
4853 break;
4854 case 'D':
4855 // print summary of all valid directives
4856 debugmode = 1;
4857 Directives();
4858 EXIT(0);
4859 break;
4860 case 'i':
4861 // Period (time interval) for checking
4862 // strtol will set errno in the event of overflow, so we'll check it.
4863 errno = 0;
4864 lchecktime = strtol(optarg, &tailptr, 10);
4865 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4866 debugmode=1;
4867 PrintHead();
4868 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4869 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4870 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4871 EXIT(EXIT_BADCMD);
4872 }
4873 checktime = (int)lchecktime;
4874 break;
4875 case 'r':
4876 // report IOCTL transactions
4877 {
4878 int n1 = -1, n2 = -1, len = strlen(optarg);
4879 char s[9+1]; unsigned i = 1;
4880 sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
4881 if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
4882 badarg = true;
4883 } else if (!strcmp(s,"ioctl")) {
4884 ata_debugmode = scsi_debugmode = nvme_debugmode = i;
4885 } else if (!strcmp(s,"ataioctl")) {
4886 ata_debugmode = i;
4887 } else if (!strcmp(s,"scsiioctl")) {
4888 scsi_debugmode = i;
4889 } else if (!strcmp(s,"nvmeioctl")) {
4890 nvme_debugmode = i;
4891 } else {
4892 badarg = true;
4893 }
4894 }
4895 break;
4896 case 'c':
4897 // alternate configuration file
4898 if (strcmp(optarg,"-"))
4899 configfile = (configfile_alt = optarg).c_str();
4900 else // read from stdin
4901 configfile=configfile_stdin;
4902 break;
4903 case 'p':
4904 // output file with PID number
4905 pid_file = optarg;
4906 break;
4907 case 's':
4908 // path prefix of persistent state file
4909 state_path_prefix = optarg;
4910 break;
4911 case 'A':
4912 // path prefix of attribute log file
4913 attrlog_path_prefix = optarg;
4914 break;
4915 case 'B':
4916 {
4917 const char * path = optarg;
4918 if (*path == '+' && path[1])
4919 path++;
4920 else
4921 use_default_db = false;
4922 unsigned char savedebug = debugmode; debugmode = 1;
4923 if (!read_drive_database(path))
4924 EXIT(EXIT_BADCMD);
4925 debugmode = savedebug;
4926 }
4927 break;
4928 case 'w':
4929 warning_script = optarg;
4930 break;
4931 case 'V':
4932 // print version and CVS info
4933 debugmode = 1;
4934 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4935 EXIT(0);
4936 break;
4937 #ifdef HAVE_LIBCAP_NG
4938 case 'C':
4939 // enable capabilities
4940 enable_capabilities = true;
4941 break;
4942 #endif
4943 case 'h':
4944 // help: print summary of command-line options
4945 debugmode=1;
4946 PrintHead();
4947 Usage();
4948 EXIT(0);
4949 break;
4950 case '?':
4951 default:
4952 // unrecognized option
4953 debugmode=1;
4954 PrintHead();
4955 // Point arg to the argument in which this option was found.
4956 arg = argv[optind-1];
4957 // Check whether the option is a long option that doesn't map to -h.
4958 if (arg[1] == '-' && optchar != 'h') {
4959 // Iff optopt holds a valid option then argument must be missing.
4960 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4961 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4962 PrintValidArgs(optopt);
4963 } else {
4964 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4965 }
4966 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4967 EXIT(EXIT_BADCMD);
4968 }
4969 if (optopt) {
4970 // Iff optopt holds a valid option then argument must be missing.
4971 if (strchr(shortopts, optopt) != NULL){
4972 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4973 PrintValidArgs(optopt);
4974 } else {
4975 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4976 }
4977 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4978 EXIT(EXIT_BADCMD);
4979 }
4980 Usage();
4981 EXIT(0);
4982 }
4983
4984 // Check to see if option had an unrecognized or incorrect argument.
4985 if (badarg) {
4986 debugmode=1;
4987 PrintHead();
4988 // It would be nice to print the actual option name given by the user
4989 // here, but we just print the short form. Please fix this if you know
4990 // a clean way to do it.
4991 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4992 PrintValidArgs(optchar);
4993 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4994 EXIT(EXIT_BADCMD);
4995 }
4996 }
4997
4998 // non-option arguments are not allowed
4999 if (argc > optind) {
5000 debugmode=1;
5001 PrintHead();
5002 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
5003 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5004 EXIT(EXIT_BADCMD);
5005 }
5006
5007 // no pidfile in debug mode
5008 if (debugmode && !pid_file.empty()) {
5009 debugmode=1;
5010 PrintHead();
5011 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
5012 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
5013 EXIT(EXIT_BADCMD);
5014 }
5015
5016 #ifndef _WIN32
5017 if (!debugmode) {
5018 // absolute path names are required due to chdir('/') after fork().
5019 check_abs_path('p', pid_file);
5020 check_abs_path('s', state_path_prefix);
5021 check_abs_path('A', attrlog_path_prefix);
5022 }
5023 #endif
5024
5025 // Read or init drive database
5026 {
5027 unsigned char savedebug = debugmode; debugmode = 1;
5028 if (!init_drive_database(use_default_db))
5029 EXIT(EXIT_BADCMD);
5030 debugmode = savedebug;
5031 }
5032
5033 // print header
5034 PrintHead();
5035 }
5036
5037 // Function we call if no configuration file was found or if the
5038 // SCANDIRECTIVE Directive was found. It makes entries for device
5039 // names returned by scan_smart_devices() in os_OSNAME.cpp
5040 static int MakeConfigEntries(const dev_config & base_cfg,
5041 dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5042 const smart_devtype_list & types)
5043 {
5044 // make list of devices
5045 smart_device_list devlist;
5046 if (!smi()->scan_smart_devices(devlist, types)) {
5047 PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5048 return 0;
5049 }
5050
5051 // if no devices, return
5052 if (devlist.size() <= 0)
5053 return 0;
5054
5055 // add empty device slots for existing config entries
5056 while (scanned_devs.size() < conf_entries.size())
5057 scanned_devs.push_back((smart_device *)0);
5058
5059 // loop over entries to create
5060 for (unsigned i = 0; i < devlist.size(); i++) {
5061 // Move device pointer
5062 smart_device * dev = devlist.release(i);
5063 scanned_devs.push_back(dev);
5064
5065 // Copy configuration, update device and type name
5066 conf_entries.push_back(base_cfg);
5067 dev_config & cfg = conf_entries.back();
5068 cfg.name = dev->get_info().info_name;
5069 cfg.dev_name = dev->get_info().dev_name;
5070 cfg.dev_type = dev->get_info().dev_type;
5071 }
5072
5073 return devlist.size();
5074 }
5075
5076 // Returns negative value (see ParseConfigFile()) if config file
5077 // had errors, else number of entries which may be zero or positive.
5078 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5079 {
5080 // parse configuration file configfile (normally /etc/smartd.conf)
5081 smart_devtype_list scan_types;
5082 int entries = ParseConfigFile(conf_entries, scan_types);
5083
5084 if (entries < 0) {
5085 // There was an error reading the configuration file.
5086 conf_entries.clear();
5087 if (entries == -1)
5088 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5089 return entries;
5090 }
5091
5092 // no error parsing config file.
5093 if (entries) {
5094 // we did not find a SCANDIRECTIVE and did find valid entries
5095 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5096 }
5097 else if (!conf_entries.empty()) {
5098 // we found a SCANDIRECTIVE or there was no configuration file so
5099 // scan. Configuration file's last entry contains all options
5100 // that were set
5101 dev_config first = conf_entries.back();
5102 conf_entries.pop_back();
5103
5104 if (first.lineno)
5105 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5106 else
5107 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5108
5109 // make config list of devices to search for
5110 MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5111
5112 // warn user if scan table found no devices
5113 if (conf_entries.empty())
5114 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5115 }
5116 else
5117 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5118
5119 return conf_entries.size();
5120 }
5121
5122 // Return true if TYPE contains a RAID drive number
5123 static bool is_raid_type(const char * type)
5124 {
5125 if (str_starts_with(type, "sat,"))
5126 return false;
5127 int i;
5128 if (sscanf(type, "%*[^,],%d", &i) != 1)
5129 return false;
5130 return true;
5131 }
5132
5133 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5134 static bool is_duplicate_device(const smart_device * dev,
5135 const smart_device_list & devices, unsigned numdevs,
5136 const dev_config_vector & ignored)
5137 {
5138 const smart_device::device_info & info1 = dev->get_info();
5139 bool is_raid1 = is_raid_type(info1.dev_type.c_str());
5140
5141 for (unsigned i = 0; i < numdevs; i++) {
5142 const smart_device::device_info & info2 = devices.at(i)->get_info();
5143 // -d TYPE options must match if RAID drive number is specified
5144 if ( info1.dev_name == info2.dev_name
5145 && ( info1.dev_type == info2.dev_type
5146 || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
5147 return true;
5148 }
5149
5150 for (unsigned i = 0; i < ignored.size(); i++) {
5151 const dev_config & cfg2 = ignored.at(i);
5152 if ( info1.dev_name == cfg2.dev_name
5153 && ( info1.dev_type == cfg2.dev_type
5154 || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
5155 return true;
5156 }
5157 return false;
5158 }
5159
5160 // Register one device, return false on error
5161 static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev,
5162 const dev_config_vector * prev_cfgs)
5163 {
5164 bool scanning;
5165 if (!dev) {
5166 // Get device of appropriate type
5167 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5168 if (!dev) {
5169 if (cfg.dev_type.empty())
5170 PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5171 else
5172 PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5173 return false;
5174 }
5175 scanning = false;
5176 }
5177 else {
5178 // Use device from device scan
5179 scanning = true;
5180 }
5181
5182 // Save old info
5183 smart_device::device_info oldinfo = dev->get_info();
5184
5185 // Open with autodetect support, may return 'better' device
5186 dev.replace( dev->autodetect_open() );
5187
5188 // Report if type has changed
5189 if (oldinfo.dev_type != dev->get_dev_type())
5190 PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
5191 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5192
5193 // Return if autodetect_open() failed
5194 if (!dev->is_open()) {
5195 if (debugmode || !scanning)
5196 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5197 return false;
5198 }
5199
5200 // Update informal name
5201 cfg.name = dev->get_info().info_name;
5202 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5203
5204 int status;
5205 const char * typemsg;
5206 // register ATA device
5207 if (dev->is_ata()){
5208 typemsg = "ATA";
5209 status = ATADeviceScan(cfg, state, dev->to_ata(), prev_cfgs);
5210 }
5211 // or register SCSI device
5212 else if (dev->is_scsi()){
5213 typemsg = "SCSI";
5214 status = SCSIDeviceScan(cfg, state, dev->to_scsi(), prev_cfgs);
5215 }
5216 // or register NVMe device
5217 else if (dev->is_nvme()) {
5218 typemsg = "NVMe";
5219 status = NVMeDeviceScan(cfg, state, dev->to_nvme(), prev_cfgs);
5220 }
5221 else {
5222 PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5223 return false;
5224 }
5225
5226 if (status) {
5227 if (!scanning || debugmode) {
5228 if (cfg.lineno)
5229 PrintOut(scanning ? LOG_INFO : LOG_CRIT,
5230 "Unable to register %s device %s at line %d of file %s\n",
5231 typemsg, cfg.name.c_str(), cfg.lineno, configfile);
5232 else
5233 PrintOut(LOG_INFO, "Unable to register %s device %s\n",
5234 typemsg, cfg.name.c_str());
5235 }
5236
5237 return false;
5238 }
5239
5240 return true;
5241 }
5242
5243 // This function tries devices from conf_entries. Each one that can be
5244 // registered is moved onto the [ata|scsi]devices lists and removed
5245 // from the conf_entries list.
5246 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5247 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5248 {
5249 // start by clearing lists/memory of ALL existing devices
5250 configs.clear();
5251 devices.clear();
5252 states.clear();
5253
5254 // Register entries
5255 dev_config_vector ignored_entries;
5256 unsigned numnoscan = 0;
5257 for (unsigned i = 0; i < conf_entries.size(); i++){
5258
5259 dev_config cfg = conf_entries[i];
5260
5261 if (cfg.ignore) {
5262 // Store for is_duplicate_device() check and ignore
5263 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5264 (!cfg.dev_type.empty() ? " [" : ""),
5265 cfg.dev_type.c_str(),
5266 (!cfg.dev_type.empty() ? "]" : ""));
5267 ignored_entries.push_back(cfg);
5268 continue;
5269 }
5270
5271 smart_device_auto_ptr dev;
5272
5273 // Device may already be detected during devicescan
5274 bool scanning = false;
5275 if (i < scanned_devs.size()) {
5276 dev = scanned_devs.release(i);
5277 if (dev) {
5278 // Check for a preceding non-DEVICESCAN entry for the same device
5279 if ( (numnoscan || !ignored_entries.empty())
5280 && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
5281 PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
5282 continue;
5283 }
5284 scanning = true;
5285 }
5286 }
5287
5288 // Register device
5289 // If scanning, pass dev_idinfo of previous devices for duplicate check
5290 dev_state state;
5291 if (!register_device(cfg, state, dev, (scanning ? &configs : 0))) {
5292 // if device is explictly listed and we can't register it, then
5293 // exit unless the user has specified that the device is removable
5294 if (!scanning) {
5295 if (!(cfg.removable || quit == QUIT_NEVER)) {
5296 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
5297 EXIT(EXIT_BADDEV);
5298 }
5299 PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
5300 // Prevent retry of registration
5301 ignored_entries.push_back(cfg);
5302 }
5303 continue;
5304 }
5305
5306 // move onto the list of devices
5307 configs.push_back(cfg);
5308 states.push_back(state);
5309 devices.push_back(dev);
5310 if (!scanning)
5311 numnoscan = devices.size();
5312 }
5313
5314 init_disable_standby_check(configs);
5315 }
5316
5317
5318 // Main program without exception handling
5319 static int main_worker(int argc, char **argv)
5320 {
5321 // Initialize interface
5322 smart_interface::init();
5323 if (!smi())
5324 return 1;
5325
5326 // is it our first pass through?
5327 bool firstpass = true;
5328
5329 // next time to wake up
5330 time_t wakeuptime = 0;
5331
5332 // parse input and print header and usage info if needed
5333 ParseOpts(argc,argv);
5334
5335 // Configuration for each device
5336 dev_config_vector configs;
5337 // Device states
5338 dev_state_vector states;
5339 // Devices to monitor
5340 smart_device_list devices;
5341
5342 bool write_states_always = true;
5343
5344 #ifdef HAVE_LIBCAP_NG
5345 // Drop capabilities
5346 if (enable_capabilities) {
5347 capng_clear(CAPNG_SELECT_BOTH);
5348 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
5349 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
5350 capng_apply(CAPNG_SELECT_BOTH);
5351 }
5352 #endif
5353
5354 // the main loop of the code
5355 for (;;) {
5356
5357 // are we exiting from a signal?
5358 if (caughtsigEXIT) {
5359 // are we exiting with SIGTERM?
5360 int isterm=(caughtsigEXIT==SIGTERM);
5361 int isquit=(caughtsigEXIT==SIGQUIT);
5362 int isok=debugmode?isterm || isquit:isterm;
5363
5364 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5365 caughtsigEXIT, strsignal(caughtsigEXIT));
5366
5367 if (!isok)
5368 return EXIT_SIGNAL;
5369
5370 // Write state files
5371 if (!state_path_prefix.empty())
5372 write_all_dev_states(configs, states);
5373
5374 return 0;
5375 }
5376
5377 // Should we (re)read the config file?
5378 if (firstpass || caughtsigHUP){
5379 if (!firstpass) {
5380 // Write state files
5381 if (!state_path_prefix.empty())
5382 write_all_dev_states(configs, states);
5383
5384 PrintOut(LOG_INFO,
5385 caughtsigHUP==1?
5386 "Signal HUP - rereading configuration file %s\n":
5387 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5388 configfile);
5389 }
5390
5391 {
5392 dev_config_vector conf_entries; // Entries read from smartd.conf
5393 smart_device_list scanned_devs; // Devices found during scan
5394 // (re)reads config file, makes >=0 entries
5395 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5396
5397 if (entries>=0) {
5398 // checks devices, then moves onto ata/scsi list or deallocates.
5399 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5400 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5401 throw std::logic_error("Invalid result from RegisterDevices");
5402 }
5403 else if ( quit == QUIT_NEVER
5404 || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
5405 // user has asked to continue on error in configuration file
5406 if (!firstpass)
5407 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5408 }
5409 else {
5410 // exit with configuration file error status
5411 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5412 }
5413 }
5414
5415 // Log number of devices we are monitoring...
5416 if (devices.size() > 0 || quit == QUIT_NEVER || (quit == QUIT_NODEVSTARTUP && !firstpass)) {
5417 int numata = 0, numscsi = 0;
5418 for (unsigned i = 0; i < devices.size(); i++) {
5419 const smart_device * dev = devices.at(i);
5420 if (dev->is_ata())
5421 numata++;
5422 else if (dev->is_scsi())
5423 numscsi++;
5424 }
5425 PrintOut(LOG_INFO,"Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5426 numata, numscsi, (int)devices.size() - numata - numscsi);
5427 }
5428 else {
5429 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5430 return EXIT_NODEV;
5431 }
5432
5433 if (quit == QUIT_SHOWTESTS) {
5434 // user has asked to print test schedule
5435 PrintTestSchedule(configs, states, devices);
5436 return 0;
5437 }
5438
5439 #ifdef HAVE_LIBCAP_NG
5440 if (enable_capabilities) {
5441 for (unsigned i = 0; i < configs.size(); i++) {
5442 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5443 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5444 break;
5445 }
5446 }
5447 }
5448 #endif
5449
5450 // reset signal
5451 caughtsigHUP=0;
5452
5453 // Always write state files after (re)configuration
5454 write_states_always = true;
5455 }
5456
5457 // check all devices once,
5458 // self tests are not started in first pass unless '-q onecheck' is specified
5459 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit == QUIT_ONECHECK));
5460
5461 // Write state files
5462 if (!state_path_prefix.empty())
5463 write_all_dev_states(configs, states, write_states_always);
5464 write_states_always = false;
5465
5466 // Write attribute logs
5467 if (!attrlog_path_prefix.empty())
5468 write_all_dev_attrlogs(configs, states);
5469
5470 // user has asked us to exit after first check
5471 if (quit == QUIT_ONECHECK) {
5472 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5473 "smartd is exiting (exit status 0)\n");
5474 return 0;
5475 }
5476
5477 // fork into background if needed
5478 if (firstpass && !debugmode) {
5479 DaemonInit();
5480 }
5481
5482 // set exit and signal handlers, write PID file, set wake-up time
5483 if (firstpass){
5484 Initialize(&wakeuptime);
5485 firstpass = false;
5486 }
5487
5488 // sleep until next check time, or a signal arrives
5489 wakeuptime = dosleep(wakeuptime, write_states_always);
5490 }
5491 }
5492
5493
5494 #ifndef _WIN32
5495 // Main program
5496 int main(int argc, char **argv)
5497 #else
5498 // Windows: internal main function started direct or by service control manager
5499 static int smartd_main(int argc, char **argv)
5500 #endif
5501 {
5502 int status;
5503 try {
5504 // Do the real work ...
5505 status = main_worker(argc, argv);
5506 }
5507 catch (int ex) {
5508 // EXIT(status) arrives here
5509 status = ex;
5510 }
5511 catch (const std::bad_alloc & /*ex*/) {
5512 // Memory allocation failed (also thrown by std::operator new)
5513 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5514 status = EXIT_NOMEM;
5515 }
5516 catch (const std::exception & ex) {
5517 // Other fatal errors
5518 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5519 status = EXIT_BADCODE;
5520 }
5521
5522 // Check for remaining device objects
5523 if (smart_device::get_num_objects() != 0) {
5524 PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5525 smart_device::get_num_objects());
5526 status = EXIT_BADCODE;
5527 }
5528
5529 if (status == EXIT_BADCODE)
5530 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5531
5532 if (is_initialized)
5533 status = Goodbye(status);
5534
5535 #ifdef _WIN32
5536 daemon_winsvc_exitcode = status;
5537 #endif
5538 return status;
5539 }
5540
5541
5542 #ifdef _WIN32
5543 // Main function for Windows
5544 int main(int argc, char **argv){
5545 // Options for smartd windows service
5546 static const daemon_winsvc_options svc_opts = {
5547 "--service", // cmd_opt
5548 "smartd", "SmartD Service", // servicename, displayname
5549 // description
5550 "Controls and monitors storage devices using the Self-Monitoring, "
5551 "Analysis and Reporting Technology System (SMART) built into "
5552 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5553 "www.smartmontools.org"
5554 };
5555 // daemon_main() handles daemon and service specific commands
5556 // and starts smartd_main() direct, from a new process,
5557 // or via service control manager
5558 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5559 }
5560 #endif