]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
Merge branch 'upstream' of git.debian.org:/git/collab-maint/smartmontools into upstream
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-16 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 <<<<<<< HEAD
9 =======
10 * Copyright (C) 2008-15 Christian Franke <smartmontools-support@lists.sourceforge.net>
11 >>>>>>> 3d8ad6fa4529eb02ae1391a1e937bf57aad3fb74
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * You should have received a copy of the GNU General Public License
19 * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
20 *
21 * This code was originally developed as a Senior Thesis by Michael Cornwell
22 * at the Concurrent Systems Laboratory (now part of the Storage Systems
23 * Research Center), Jack Baskin School of Engineering, University of
24 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
25 *
26 */
27
28 #include "config.h"
29 #include "int64.h"
30
31 // unconditionally included files
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/stat.h> // umask
35 #include <signal.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <syslog.h>
39 #include <stdarg.h>
40 #include <stdlib.h>
41 #include <errno.h>
42 #include <time.h>
43 #include <limits.h>
44 #include <getopt.h>
45
46 #include <stdexcept>
47 #include <string>
48 #include <vector>
49 #include <algorithm> // std::replace()
50
51 // conditionally included files
52 #ifndef _WIN32
53 #include <sys/wait.h>
54 #endif
55 #ifdef HAVE_UNISTD_H
56 #include <unistd.h>
57 #endif
58
59 #ifdef _WIN32
60 #ifdef _MSC_VER
61 #pragma warning(disable:4761) // "conversion supplied"
62 typedef unsigned short mode_t;
63 typedef int pid_t;
64 #endif
65 #include <io.h> // umask()
66 #include <process.h> // getpid()
67 #endif // _WIN32
68
69 #ifdef __CYGWIN__
70 #include <io.h> // setmode()
71 #endif // __CYGWIN__
72
73 #ifdef HAVE_LIBCAP_NG
74 #include <cap-ng.h>
75 #endif // LIBCAP_NG
76
77 // locally included files
78 #include "atacmds.h"
79 #include "dev_interface.h"
80 #include "knowndrives.h"
81 #include "scsicmds.h"
82 #include "nvmecmds.h"
83 #include "utility.h"
84
85 // This is for solaris, where signal() resets the handler to SIG_DFL
86 // after the first signal is caught.
87 #ifdef HAVE_SIGSET
88 #define SIGNALFN sigset
89 #else
90 #define SIGNALFN signal
91 #endif
92
93 #ifdef _WIN32
94 // fork()/signal()/initd simulation for native Windows
95 #include "daemon_win32.h" // daemon_main/detach/signal()
96 #undef SIGNALFN
97 #define SIGNALFN daemon_signal
98 #define strsignal daemon_strsignal
99 #define sleep daemon_sleep
100 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
101 #define SIGQUIT SIGBREAK
102 #define SIGQUIT_KEYNAME "CONTROL-Break"
103 #else // _WIN32
104 #define SIGQUIT_KEYNAME "CONTROL-\\"
105 #endif // _WIN32
106
107 <<<<<<< HEAD
108 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4308 2016-04-24 13:36:10Z chrfranke $"
109 =======
110 #if defined (__SVR4) && defined (__sun)
111 extern "C" int getdomainname(char *, int); // no declaration in header files!
112 #endif
113
114 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4059 2015-04-18 17:01:31Z chrfranke $"
115 >>>>>>> 3d8ad6fa4529eb02ae1391a1e937bf57aad3fb74
116 CONFIG_H_CVSID;
117
118 using namespace smartmontools;
119
120 // smartd exit codes
121 #define EXIT_BADCMD 1 // command line did not parse
122 #define EXIT_BADCONF 2 // syntax error in config file
123 #define EXIT_STARTUP 3 // problem forking daemon
124 #define EXIT_PID 4 // problem creating pid file
125 #define EXIT_NOCONF 5 // config file does not exist
126 #define EXIT_READCONF 6 // config file exists but cannot be read
127
128 #define EXIT_NOMEM 8 // out of memory
129 #define EXIT_BADCODE 10 // internal error - should NEVER happen
130
131 #define EXIT_BADDEV 16 // we can't monitor this device
132 #define EXIT_NODEV 17 // no devices to monitor
133
134 #define EXIT_SIGNAL 254 // abort on signal
135
136
137 // command-line: 1=debug mode, 2=print presets
138 static unsigned char debugmode = 0;
139
140 // command-line: how long to sleep between checks
141 #define CHECKTIME 1800
142 static int checktime=CHECKTIME;
143
144 // command-line: name of PID file (empty for no pid file)
145 static std::string pid_file;
146
147 // command-line: path prefix of persistent state file, empty if no persistence.
148 static std::string state_path_prefix
149 #ifdef SMARTMONTOOLS_SAVESTATES
150 = SMARTMONTOOLS_SAVESTATES
151 #endif
152 ;
153
154 // command-line: path prefix of attribute log file, empty if no logs.
155 static std::string attrlog_path_prefix
156 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
157 = SMARTMONTOOLS_ATTRIBUTELOG
158 #endif
159 ;
160
161 // configuration file name
162 static const char * configfile;
163 // configuration file "name" if read from stdin
164 static const char * const configfile_stdin = "<stdin>";
165 // path of alternate configuration file
166 static std::string configfile_alt;
167
168 // warning script file
169 static std::string warning_script;
170
171 // command-line: when should we exit?
172 static int quit=0;
173
174 // command-line; this is the default syslog(3) log facility to use.
175 static int facility=LOG_DAEMON;
176
177 #ifndef _WIN32
178 // command-line: fork into background?
179 static bool do_fork=true;
180 #endif
181
182 #ifdef HAVE_LIBCAP_NG
183 // command-line: enable capabilities?
184 static bool enable_capabilities = false;
185 #endif
186
187 // TODO: This smartctl only variable is also used in os_win32.cpp
188 unsigned char failuretest_permissive = 0;
189
190 // set to one if we catch a USR1 (check devices now)
191 static volatile int caughtsigUSR1=0;
192
193 #ifdef _WIN32
194 // set to one if we catch a USR2 (toggle debug mode)
195 static volatile int caughtsigUSR2=0;
196 #endif
197
198 // set to one if we catch a HUP (reload config file). In debug mode,
199 // set to two, if we catch INT (also reload config file).
200 static volatile int caughtsigHUP=0;
201
202 // set to signal value if we catch INT, QUIT, or TERM
203 static volatile int caughtsigEXIT=0;
204
205 // This function prints either to stdout or to the syslog as needed.
206 static void PrintOut(int priority, const char *fmt, ...)
207 __attribute_format_printf(2, 3);
208
209 // Attribute monitoring flags.
210 // See monitor_attr_flags below.
211 enum {
212 MONITOR_IGN_FAILUSE = 0x01,
213 MONITOR_IGNORE = 0x02,
214 MONITOR_RAW_PRINT = 0x04,
215 MONITOR_RAW = 0x08,
216 MONITOR_AS_CRIT = 0x10,
217 MONITOR_RAW_AS_CRIT = 0x20,
218 };
219
220 // Array of flags for each attribute.
221 class attribute_flags
222 {
223 public:
224 attribute_flags()
225 { memset(m_flags, 0, sizeof(m_flags)); }
226
227 bool is_set(int id, unsigned char flag) const
228 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
229
230 void set(int id, unsigned char flags)
231 {
232 if (0 < id && id < (int)sizeof(m_flags))
233 m_flags[id] |= flags;
234 }
235
236 private:
237 unsigned char m_flags[256];
238 };
239
240
241 /// Configuration data for a device. Read from smartd.conf.
242 /// Supports copy & assignment and is compatible with STL containers.
243 struct dev_config
244 {
245 int lineno; // Line number of entry in file
246 std::string name; // Device name (with optional extra info)
247 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
248 std::string dev_type; // Device type argument from -d directive, empty if none
249 std::string dev_idinfo; // Device identify info for warning emails
250 std::string state_file; // Path of the persistent state file, empty if none
251 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
252 bool ignore; // Ignore this entry
253 bool smartcheck; // Check SMART status
254 bool usagefailed; // Check for failed Usage Attributes
255 bool prefail; // Track changes in Prefail Attributes
256 bool usage; // Track changes in Usage Attributes
257 bool selftest; // Monitor number of selftest errors
258 bool errorlog; // Monitor number of ATA errors
259 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
260 bool offlinests; // Monitor changes in offline data collection status
261 bool offlinests_ns; // Disable auto standby if in progress
262 bool selfteststs; // Monitor changes in self-test execution status
263 bool selfteststs_ns; // Disable auto standby if in progress
264 bool permissive; // Ignore failed SMART commands
265 char autosave; // 1=disable, 2=enable Autosave Attributes
266 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
267 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
268 bool ignorepresets; // Ignore database of -v options
269 bool showpresets; // Show database entry for this device
270 bool removable; // Device may disappear (not be present)
271 char powermode; // skip check, if disk in idle or standby mode
272 bool powerquiet; // skip powermode 'skipping checks' message
273 int powerskipmax; // how many times can be check skipped
274 unsigned char tempdiff; // Track Temperature changes >= this limit
275 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
276 regular_expression test_regex; // Regex for scheduled testing
277
278 // Configuration of email warning messages
279 std::string emailcmdline; // script to execute, empty if no messages
280 std::string emailaddress; // email address, or empty
281 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
282 bool emailtest; // Send test email?
283
284 // ATA ONLY
285 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
286 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
287 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
288 int set_lookahead; // disable(-1), enable(1) read look-ahead
289 int set_standby; // set(1..255->0..254) standby timer
290 bool set_security_freeze; // Freeze ATA security
291 int set_wcache; // disable(-1), enable(1) write cache
292
293 bool sct_erc_set; // set SCT ERC to:
294 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
295 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
296
297 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
298 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
299 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
300 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
301
302 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
303
304 ata_vendor_attr_defs attribute_defs; // -v options
305
306 dev_config();
307 };
308
309 dev_config::dev_config()
310 : lineno(0),
311 ignore(false),
312 smartcheck(false),
313 usagefailed(false),
314 prefail(false),
315 usage(false),
316 selftest(false),
317 errorlog(false),
318 xerrorlog(false),
319 offlinests(false), offlinests_ns(false),
320 selfteststs(false), selfteststs_ns(false),
321 permissive(false),
322 autosave(0),
323 autoofflinetest(0),
324 ignorepresets(false),
325 showpresets(false),
326 removable(false),
327 powermode(0),
328 powerquiet(false),
329 powerskipmax(0),
330 tempdiff(0),
331 tempinfo(0), tempcrit(0),
332 emailfreq(0),
333 emailtest(false),
334 dev_rpm(0),
335 set_aam(0), set_apm(0),
336 set_lookahead(0),
337 set_standby(0),
338 set_security_freeze(false),
339 set_wcache(0),
340 sct_erc_set(false),
341 sct_erc_readtime(0), sct_erc_writetime(0),
342 curr_pending_id(0), offl_pending_id(0),
343 curr_pending_incr(false), offl_pending_incr(false),
344 curr_pending_set(false), offl_pending_set(false)
345 {
346 }
347
348
349 // Number of allowed mail message types
350 static const int SMARTD_NMAIL = 13;
351 // Type for '-M test' mails (state not persistent)
352 static const int MAILTYPE_TEST = 0;
353 // TODO: Add const or enum for all mail types.
354
355 struct mailinfo {
356 int logged;// number of times an email has been sent
357 time_t firstsent;// time first email was sent, as defined by time(2)
358 time_t lastsent; // time last email was sent, as defined by time(2)
359
360 mailinfo()
361 : logged(0), firstsent(0), lastsent(0) { }
362 };
363
364 /// Persistent state data for a device.
365 struct persistent_dev_state
366 {
367 unsigned char tempmin, tempmax; // Min/Max Temperatures
368
369 unsigned char selflogcount; // total number of self-test errors
370 unsigned short selfloghour; // lifetime hours of last self-test error
371
372 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
373
374 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
375 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
376
377 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
378
379 // ATA ONLY
380 int ataerrorcount; // Total number of ATA errors
381
382 // Persistent part of ata_smart_values:
383 struct ata_attribute {
384 unsigned char id;
385 unsigned char val;
386 unsigned char worst; // Byte needed for 'raw64' attribute only.
387 uint64_t raw;
388 unsigned char resvd;
389
390 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
391 };
392 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
393
394 // SCSI ONLY
395
396 struct scsi_error_counter_t {
397 struct scsiErrorCounter errCounter;
398 unsigned char found;
399 scsi_error_counter_t() : found(0)
400 { memset(&errCounter, 0, sizeof(errCounter)); }
401 };
402 scsi_error_counter_t scsi_error_counters[3];
403
404 struct scsi_nonmedium_error_t {
405 struct scsiNonMediumError nme;
406 unsigned char found;
407 scsi_nonmedium_error_t() : found(0)
408 { memset(&nme, 0, sizeof(nme)); }
409 };
410 scsi_nonmedium_error_t scsi_nonmedium_error;
411
412 // NVMe only
413 uint64_t nvme_err_log_entries;
414
415 persistent_dev_state();
416 };
417
418 persistent_dev_state::persistent_dev_state()
419 : tempmin(0), tempmax(0),
420 selflogcount(0),
421 selfloghour(0),
422 scheduled_test_next_check(0),
423 selective_test_last_start(0),
424 selective_test_last_end(0),
425 ataerrorcount(0),
426 nvme_err_log_entries(0)
427 {
428 }
429
430 /// Non-persistent state data for a device.
431 struct temp_dev_state
432 {
433 bool must_write; // true if persistent part should be written
434
435 bool not_cap_offline; // true == not capable of offline testing
436 bool not_cap_conveyance;
437 bool not_cap_short;
438 bool not_cap_long;
439 bool not_cap_selective;
440
441 unsigned char temperature; // last recorded Temperature (in Celsius)
442 time_t tempmin_delay; // time where Min Temperature tracking will start
443
444 bool powermodefail; // true if power mode check failed
445 int powerskipcnt; // Number of checks skipped due to idle or standby mode
446 int lastpowermodeskipped; // the last power mode that was skipped
447
448 // SCSI ONLY
449 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
450 unsigned char TempPageSupported; // has log sense temperature page (0xd)
451 unsigned char ReadECounterPageSupported;
452 unsigned char WriteECounterPageSupported;
453 unsigned char VerifyECounterPageSupported;
454 unsigned char NonMediumErrorPageSupported;
455 unsigned char SuppressReport; // minimize nuisance reports
456 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
457 // know yet) 6 or 10
458 // ATA ONLY
459 uint64_t num_sectors; // Number of sectors
460 ata_smart_values smartval; // SMART data
461 ata_smart_thresholds_pvt smartthres; // SMART thresholds
462 bool offline_started; // true if offline data collection was started
463 bool selftest_started; // true if self-test was started
464
465 temp_dev_state();
466 };
467
468 temp_dev_state::temp_dev_state()
469 : must_write(false),
470 not_cap_offline(false),
471 not_cap_conveyance(false),
472 not_cap_short(false),
473 not_cap_long(false),
474 not_cap_selective(false),
475 temperature(0),
476 tempmin_delay(0),
477 powermodefail(false),
478 powerskipcnt(0),
479 lastpowermodeskipped(0),
480 SmartPageSupported(false),
481 TempPageSupported(false),
482 ReadECounterPageSupported(false),
483 WriteECounterPageSupported(false),
484 VerifyECounterPageSupported(false),
485 NonMediumErrorPageSupported(false),
486 SuppressReport(false),
487 modese_len(0),
488 num_sectors(0),
489 offline_started(false),
490 selftest_started(false)
491 {
492 memset(&smartval, 0, sizeof(smartval));
493 memset(&smartthres, 0, sizeof(smartthres));
494 }
495
496 /// Runtime state data for a device.
497 struct dev_state
498 : public persistent_dev_state,
499 public temp_dev_state
500 {
501 void update_persistent_state();
502 void update_temp_state();
503 };
504
505 /// Container for configuration info for each device.
506 typedef std::vector<dev_config> dev_config_vector;
507
508 /// Container for state info for each device.
509 typedef std::vector<dev_state> dev_state_vector;
510
511 // Copy ATA attributes to persistent state.
512 void dev_state::update_persistent_state()
513 {
514 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
515 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
516 ata_attribute & pa = ata_attributes[i];
517 pa.id = ta.id;
518 if (ta.id == 0) {
519 pa.val = pa.worst = 0; pa.raw = 0;
520 continue;
521 }
522 pa.val = ta.current;
523 pa.worst = ta.worst;
524 pa.raw = ta.raw[0]
525 | ( ta.raw[1] << 8)
526 | ( ta.raw[2] << 16)
527 | ((uint64_t)ta.raw[3] << 24)
528 | ((uint64_t)ta.raw[4] << 32)
529 | ((uint64_t)ta.raw[5] << 40);
530 pa.resvd = ta.reserv;
531 }
532 }
533
534 // Copy ATA from persistent to temp state.
535 void dev_state::update_temp_state()
536 {
537 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
538 const ata_attribute & pa = ata_attributes[i];
539 ata_smart_attribute & ta = smartval.vendor_attributes[i];
540 ta.id = pa.id;
541 if (pa.id == 0) {
542 ta.current = ta.worst = 0;
543 memset(ta.raw, 0, sizeof(ta.raw));
544 continue;
545 }
546 ta.current = pa.val;
547 ta.worst = pa.worst;
548 ta.raw[0] = (unsigned char) pa.raw;
549 ta.raw[1] = (unsigned char)(pa.raw >> 8);
550 ta.raw[2] = (unsigned char)(pa.raw >> 16);
551 ta.raw[3] = (unsigned char)(pa.raw >> 24);
552 ta.raw[4] = (unsigned char)(pa.raw >> 32);
553 ta.raw[5] = (unsigned char)(pa.raw >> 40);
554 ta.reserv = pa.resvd;
555 }
556 }
557
558 // Parse a line from a state file.
559 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
560 {
561 static const regular_expression regex(
562 "^ *"
563 "((temperature-min)" // (1 (2)
564 "|(temperature-max)" // (3)
565 "|(self-test-errors)" // (4)
566 "|(self-test-last-err-hour)" // (5)
567 "|(scheduled-test-next-check)" // (6)
568 "|(selective-test-last-start)" // (7)
569 "|(selective-test-last-end)" // (8)
570 "|(ata-error-count)" // (9)
571 "|(mail\\.([0-9]+)\\." // (10 (11)
572 "((count)" // (12 (13)
573 "|(first-sent-time)" // (14)
574 "|(last-sent-time)" // (15)
575 ")" // 12)
576 ")" // 10)
577 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
578 "((id)" // (18 (19)
579 "|(val)" // (20)
580 "|(worst)" // (21)
581 "|(raw)" // (22)
582 "|(resvd)" // (23)
583 ")" // 18)
584 ")" // 16)
585 "|(nvme-err-log-entries)" // (24)
586 ")" // 1)
587 " *= *([0-9]+)[ \n]*$", // (25)
588 REG_EXTENDED
589 );
590
591 const int nmatch = 1+25;
592 regmatch_t match[nmatch];
593 if (!regex.execute(line, nmatch, match))
594 return false;
595 if (match[nmatch-1].rm_so < 0)
596 return false;
597
598 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
599
600 int m = 1;
601 if (match[++m].rm_so >= 0)
602 state.tempmin = (unsigned char)val;
603 else if (match[++m].rm_so >= 0)
604 state.tempmax = (unsigned char)val;
605 else if (match[++m].rm_so >= 0)
606 state.selflogcount = (unsigned char)val;
607 else if (match[++m].rm_so >= 0)
608 state.selfloghour = (unsigned short)val;
609 else if (match[++m].rm_so >= 0)
610 state.scheduled_test_next_check = (time_t)val;
611 else if (match[++m].rm_so >= 0)
612 state.selective_test_last_start = val;
613 else if (match[++m].rm_so >= 0)
614 state.selective_test_last_end = val;
615 else if (match[++m].rm_so >= 0)
616 state.ataerrorcount = (int)val;
617 else if (match[m+=2].rm_so >= 0) {
618 int i = atoi(line+match[m].rm_so);
619 if (!(0 <= i && i < SMARTD_NMAIL))
620 return false;
621 if (i == MAILTYPE_TEST) // Don't suppress test mails
622 return true;
623 if (match[m+=2].rm_so >= 0)
624 state.maillog[i].logged = (int)val;
625 else if (match[++m].rm_so >= 0)
626 state.maillog[i].firstsent = (time_t)val;
627 else if (match[++m].rm_so >= 0)
628 state.maillog[i].lastsent = (time_t)val;
629 else
630 return false;
631 }
632 else if (match[m+=5+1].rm_so >= 0) {
633 int i = atoi(line+match[m].rm_so);
634 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
635 return false;
636 if (match[m+=2].rm_so >= 0)
637 state.ata_attributes[i].id = (unsigned char)val;
638 else if (match[++m].rm_so >= 0)
639 state.ata_attributes[i].val = (unsigned char)val;
640 else if (match[++m].rm_so >= 0)
641 state.ata_attributes[i].worst = (unsigned char)val;
642 else if (match[++m].rm_so >= 0)
643 state.ata_attributes[i].raw = val;
644 else if (match[++m].rm_so >= 0)
645 state.ata_attributes[i].resvd = (unsigned char)val;
646 else
647 return false;
648 }
649 else if (match[m+7].rm_so >= 0)
650 state.nvme_err_log_entries = val;
651 else
652 return false;
653 return true;
654 }
655
656 // Read a state file.
657 static bool read_dev_state(const char * path, persistent_dev_state & state)
658 {
659 stdio_file f(path, "r");
660 if (!f) {
661 if (errno != ENOENT)
662 pout("Cannot read state file \"%s\"\n", path);
663 return false;
664 }
665 #ifdef __CYGWIN__
666 setmode(fileno(f), O_TEXT); // Allow files with \r\n
667 #endif
668
669 persistent_dev_state new_state;
670 int good = 0, bad = 0;
671 char line[256];
672 while (fgets(line, sizeof(line), f)) {
673 const char * s = line + strspn(line, " \t");
674 if (!*s || *s == '#')
675 continue;
676 if (!parse_dev_state_line(line, new_state))
677 bad++;
678 else
679 good++;
680 }
681
682 if (bad) {
683 if (!good) {
684 pout("%s: format error\n", path);
685 return false;
686 }
687 pout("%s: %d invalid line(s) ignored\n", path, bad);
688 }
689
690 // This sets the values missing in the file to 0.
691 state = new_state;
692 return true;
693 }
694
695 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
696 {
697 if (val)
698 fprintf(f, "%s = %" PRIu64 "\n", name, val);
699 }
700
701 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
702 {
703 if (val)
704 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
705 }
706
707 // Write a state file
708 static bool write_dev_state(const char * path, const persistent_dev_state & state)
709 {
710 // Rename old "file" to "file~"
711 std::string pathbak = path; pathbak += '~';
712 unlink(pathbak.c_str());
713 rename(path, pathbak.c_str());
714
715 stdio_file f(path, "w");
716 if (!f) {
717 pout("Cannot create state file \"%s\"\n", path);
718 return false;
719 }
720
721 fprintf(f, "# smartd state file\n");
722 write_dev_state_line(f, "temperature-min", state.tempmin);
723 write_dev_state_line(f, "temperature-max", state.tempmax);
724 write_dev_state_line(f, "self-test-errors", state.selflogcount);
725 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
726 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
727 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
728 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
729
730 int i;
731 for (i = 0; i < SMARTD_NMAIL; i++) {
732 if (i == MAILTYPE_TEST) // Don't suppress test mails
733 continue;
734 const mailinfo & mi = state.maillog[i];
735 if (!mi.logged)
736 continue;
737 write_dev_state_line(f, "mail", i, "count", mi.logged);
738 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
739 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
740 }
741
742 // ATA ONLY
743 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
744
745 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
746 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
747 if (!pa.id)
748 continue;
749 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
750 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
751 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
752 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
753 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
754 }
755
756 // NVMe only
757 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
758
759 return true;
760 }
761
762 // Write to the attrlog file
763 static bool write_dev_attrlog(const char * path, const dev_state & state)
764 {
765 stdio_file f(path, "a");
766 if (!f) {
767 pout("Cannot create attribute log file \"%s\"\n", path);
768 return false;
769 }
770
771
772 time_t now = time(0);
773 struct tm * tms = gmtime(&now);
774 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
775 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
776 tms->tm_hour, tms->tm_min, tms->tm_sec);
777 // ATA ONLY
778 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
779 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
780 if (!pa.id)
781 continue;
782 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
783 }
784 // SCSI ONLY
785 const struct scsiErrorCounter * ecp;
786 const char * pageNames[3] = {"read", "write", "verify"};
787 for (int k = 0; k < 3; ++k) {
788 if ( !state.scsi_error_counters[k].found ) continue;
789 ecp = &state.scsi_error_counters[k].errCounter;
790 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
791 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
792 "\t%s-corr-by-retry;%" PRIu64 ";"
793 "\t%s-total-err-corrected;%" PRIu64 ";"
794 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
795 "\t%s-gb-processed;%.3f;"
796 "\t%s-total-unc-errors;%" PRIu64 ";",
797 pageNames[k], ecp->counter[0],
798 pageNames[k], ecp->counter[1],
799 pageNames[k], ecp->counter[2],
800 pageNames[k], ecp->counter[3],
801 pageNames[k], ecp->counter[4],
802 pageNames[k], (ecp->counter[5] / 1000000000.0),
803 pageNames[k], ecp->counter[6]);
804 }
805 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
806 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
807 }
808 // write SCSI current temperature if it is monitored
809 if(state.TempPageSupported && state.temperature)
810 fprintf(f, "\ttemperature;%d;", state.temperature);
811 // end of line
812 fprintf(f, "\n");
813 return true;
814 }
815
816 // Write all state files. If write_always is false, don't write
817 // unless must_write is set.
818 static void write_all_dev_states(const dev_config_vector & configs,
819 dev_state_vector & states,
820 bool write_always = true)
821 {
822 for (unsigned i = 0; i < states.size(); i++) {
823 const dev_config & cfg = configs.at(i);
824 if (cfg.state_file.empty())
825 continue;
826 dev_state & state = states[i];
827 if (!write_always && !state.must_write)
828 continue;
829 if (!write_dev_state(cfg.state_file.c_str(), state))
830 continue;
831 state.must_write = false;
832 if (write_always || debugmode)
833 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
834 cfg.name.c_str(), cfg.state_file.c_str());
835 }
836 }
837
838 // Write to all attrlog files
839 static void write_all_dev_attrlogs(const dev_config_vector & configs,
840 dev_state_vector & states)
841 {
842 for (unsigned i = 0; i < states.size(); i++) {
843 const dev_config & cfg = configs.at(i);
844 if (cfg.attrlog_file.empty())
845 continue;
846 dev_state & state = states[i];
847 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
848 }
849 }
850
851 // remove the PID file
852 static void RemovePidFile()
853 {
854 if (!pid_file.empty()) {
855 if (unlink(pid_file.c_str()))
856 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
857 pid_file.c_str(), strerror(errno));
858 pid_file.clear();
859 }
860 return;
861 }
862
863 extern "C" { // signal handlers require C-linkage
864
865 // Note if we catch a SIGUSR1
866 static void USR1handler(int sig)
867 {
868 if (SIGUSR1==sig)
869 caughtsigUSR1=1;
870 return;
871 }
872
873 #ifdef _WIN32
874 // Note if we catch a SIGUSR2
875 static void USR2handler(int sig)
876 {
877 if (SIGUSR2==sig)
878 caughtsigUSR2=1;
879 return;
880 }
881 #endif
882
883 // Note if we catch a HUP (or INT in debug mode)
884 static void HUPhandler(int sig)
885 {
886 if (sig==SIGHUP)
887 caughtsigHUP=1;
888 else
889 caughtsigHUP=2;
890 return;
891 }
892
893 // signal handler for TERM, QUIT, and INT (if not in debug mode)
894 static void sighandler(int sig)
895 {
896 if (!caughtsigEXIT)
897 caughtsigEXIT=sig;
898 return;
899 }
900
901 } // extern "C"
902
903 // Cleanup, print Goodbye message and remove pidfile
904 static int Goodbye(int status)
905 {
906 // delete PID file, if one was created
907 RemovePidFile();
908
909 // and this should be the final output from smartd before it exits
910 PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
911
912 return status;
913 }
914
915 // a replacement for setenv() which is not available on all platforms.
916 // Note that the string passed to putenv must not be freed or made
917 // invalid, since a pointer to it is kept by putenv(). This means that
918 // it must either be a static buffer or allocated off the heap. The
919 // string can be freed if the environment variable is redefined via
920 // another call to putenv(). There is no portable way to unset a variable
921 // with putenv(). So we manage the buffer in a static object.
922 // Using setenv() if available is not considered because some
923 // implementations may produce memory leaks.
924
925 class env_buffer
926 {
927 public:
928 env_buffer()
929 : m_buf((char *)0) { }
930
931 void set(const char * name, const char * value);
932
933 private:
934 char * m_buf;
935
936 env_buffer(const env_buffer &);
937 void operator=(const env_buffer &);
938 };
939
940 void env_buffer::set(const char * name, const char * value)
941 {
942 int size = strlen(name) + 1 + strlen(value) + 1;
943 char * newbuf = new char[size];
944 snprintf(newbuf, size, "%s=%s", name, value);
945
946 if (putenv(newbuf))
947 throw std::runtime_error("putenv() failed");
948
949 // This assumes that the same NAME is passed on each call
950 delete [] m_buf;
951 m_buf = newbuf;
952 }
953
954 #define EBUFLEN 1024
955
956 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
957 __attribute_format_printf(4, 5);
958
959 // If either address or executable path is non-null then send and log
960 // a warning email, or execute executable
961 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
962 {
963 static const char * const whichfail[] = {
964 "EmailTest", // 0
965 "Health", // 1
966 "Usage", // 2
967 "SelfTest", // 3
968 "ErrorCount", // 4
969 "FailedHealthCheck", // 5
970 "FailedReadSmartData", // 6
971 "FailedReadSmartErrorLog", // 7
972 "FailedReadSmartSelfTestLog", // 8
973 "FailedOpenDevice", // 9
974 "CurrentPendingSector", // 10
975 "OfflineUncorrectableSector", // 11
976 "Temperature" // 12
977 };
978
979 // See if user wants us to send mail
980 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
981 return;
982
983 std::string address = cfg.emailaddress;
984 const char * executable = cfg.emailcmdline.c_str();
985
986 // which type of mail are we sending?
987 mailinfo * mail=(state.maillog)+which;
988
989 // checks for sanity
990 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
991 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
992 return;
993 }
994 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
995 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
996 which, (int)sizeof(whichfail));
997 return;
998 }
999
1000 // Return if a single warning mail has been sent.
1001 if ((cfg.emailfreq==1) && mail->logged)
1002 return;
1003
1004 // Return if this is an email test and one has already been sent.
1005 if (which == 0 && mail->logged)
1006 return;
1007
1008 // To decide if to send mail, we need to know what time it is.
1009 time_t epoch = time(0);
1010
1011 // Return if less than one day has gone by
1012 const int day = 24*3600;
1013 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1014 return;
1015
1016 // Return if less than 2^(logged-1) days have gone by
1017 if (cfg.emailfreq==3 && mail->logged) {
1018 int days = 0x01 << (mail->logged - 1);
1019 days*=day;
1020 if (epoch<(mail->lastsent+days))
1021 return;
1022 }
1023
1024 #ifdef HAVE_LIBCAP_NG
1025 if (enable_capabilities) {
1026 PrintOut(LOG_ERR, "Sending a mail was supressed. "
1027 "Mails can't be send when capabilites are enabled\n");
1028 return;
1029 }
1030 #endif
1031
1032 // record the time of this mail message, and the first mail message
1033 if (!mail->logged)
1034 mail->firstsent=epoch;
1035 mail->lastsent=epoch;
1036
1037 // print warning string into message
1038 char message[256];
1039 va_list ap;
1040 va_start(ap, fmt);
1041 vsnprintf(message, sizeof(message), fmt, ap);
1042 va_end(ap);
1043
1044 // replace commas by spaces to separate recipients
1045 std::replace(address.begin(), address.end(), ',', ' ');
1046
1047 // Export information in environment variables that will be useful
1048 // for user scripts
1049 static env_buffer env[12];
1050 env[0].set("SMARTD_MAILER", executable);
1051 env[1].set("SMARTD_MESSAGE", message);
1052 char dates[DATEANDEPOCHLEN];
1053 snprintf(dates, sizeof(dates), "%d", mail->logged);
1054 env[2].set("SMARTD_PREVCNT", dates);
1055 dateandtimezoneepoch(dates, mail->firstsent);
1056 env[3].set("SMARTD_TFIRST", dates);
1057 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1058 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1059 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1060 env[6].set("SMARTD_ADDRESS", address.c_str());
1061 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1062
1063 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1064 env[8].set("SMARTD_DEVICETYPE",
1065 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1066 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1067
1068 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1069 dates[0] = 0;
1070 if (which) switch (cfg.emailfreq) {
1071 case 2: dates[0] = '1'; dates[1] = 0; break;
1072 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1073 }
1074 env[11].set("SMARTD_NEXTDAYS", dates);
1075
1076 // now construct a command to send this as EMAIL
1077 if (!*executable)
1078 executable = "<mail>";
1079 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1080 const char * newwarn = (which? "Warning via" : "Test of");
1081
1082 #ifndef _WIN32
1083 char command[2048];
1084 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1085
1086 // tell SYSLOG what we are about to do...
1087 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1088 which?"Sending warning via":"Executing test of", executable, newadd);
1089
1090 // issue the command to send mail or to run the user's executable
1091 errno=0;
1092 FILE * pfp;
1093 if (!(pfp=popen(command, "r")))
1094 // failed to popen() mail process
1095 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1096 newwarn, executable, newadd, errno?strerror(errno):"");
1097 else {
1098 // pipe suceeded!
1099 int len, status;
1100 char buffer[EBUFLEN];
1101
1102 // if unexpected output on stdout/stderr, null terminate, print, and flush
1103 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1104 int count=0;
1105 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1106 buffer[newlen]='\0';
1107 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1108 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1109
1110 // flush pipe if needed
1111 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1112 count++;
1113
1114 // tell user that pipe was flushed, or that something is really wrong
1115 if (count && count<EBUFLEN)
1116 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1117 newwarn, executable, newadd);
1118 else if (count)
1119 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1120 newwarn, executable, newadd);
1121 }
1122
1123 // if something went wrong with mail process, print warning
1124 errno=0;
1125 if (-1==(status=pclose(pfp)))
1126 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1127 errno?strerror(errno):"");
1128 else {
1129 // mail process apparently succeeded. Check and report exit status
1130 if (WIFEXITED(status)) {
1131 // exited 'normally' (but perhaps with nonzero status)
1132 int status8 = WEXITSTATUS(status);
1133 if (status8>128)
1134 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1135 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1136 else if (status8)
1137 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1138 newwarn, executable, newadd, status, status8);
1139 else
1140 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1141 }
1142
1143 if (WIFSIGNALED(status))
1144 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1145 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1146
1147 // this branch is probably not possible. If subprocess is
1148 // stopped then pclose() should not return.
1149 if (WIFSTOPPED(status))
1150 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1151 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1152
1153 }
1154 }
1155
1156 #else // _WIN32
1157 {
1158 char command[2048];
1159 snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1160
1161 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1162 int rc;
1163 // run command
1164 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1165 (which?"Sending warning via":"Executing test of"), executable, newadd);
1166 rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1167 if (rc >= 0 && stdoutbuf[0])
1168 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1169 newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1170 if (rc != 0)
1171 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1172 newwarn, executable, newadd, rc);
1173 else
1174 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1175 }
1176
1177 #endif // _WIN32
1178
1179 // increment mail sent counter
1180 mail->logged++;
1181 }
1182
1183 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1184 __attribute_format_printf(4, 5);
1185
1186 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1187 {
1188 if (!(0 <= which && which < SMARTD_NMAIL))
1189 return;
1190
1191 // Return if no mail sent yet
1192 mailinfo & mi = state.maillog[which];
1193 if (!mi.logged)
1194 return;
1195
1196 // Format & print message
1197 char msg[256];
1198 va_list ap;
1199 va_start(ap, fmt);
1200 vsnprintf(msg, sizeof(msg), fmt, ap);
1201 va_end(ap);
1202
1203 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1204 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1205
1206 // Clear mail counter and timestamps
1207 mi = mailinfo();
1208 state.must_write = true;
1209 }
1210
1211 #ifndef _WIN32
1212
1213 // Output multiple lines via separate syslog(3) calls.
1214 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1215 {
1216 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1217 vsnprintf(buf, sizeof(buf), fmt, ap);
1218
1219 for (char * p = buf, * q; p && *p; p = q) {
1220 if ((q = strchr(p, '\n')))
1221 *q++ = 0;
1222 if (*p)
1223 syslog(priority, "%s\n", p);
1224 }
1225 }
1226
1227 #else // _WIN32
1228 // os_win32/syslog_win32.cpp supports multiple lines.
1229 #define vsyslog_lines vsyslog
1230 #endif // _WIN32
1231
1232 // Printing function for watching ataprint commands, or losing them
1233 // [From GLIBC Manual: Since the prototype doesn't specify types for
1234 // optional arguments, in a call to a variadic function the default
1235 // argument promotions are performed on the optional argument
1236 // values. This means the objects of type char or short int (whether
1237 // signed or not) are promoted to either int or unsigned int, as
1238 // appropriate.]
1239 void pout(const char *fmt, ...){
1240 va_list ap;
1241
1242 // get the correct time in syslog()
1243 FixGlibcTimeZoneBug();
1244 // initialize variable argument list
1245 va_start(ap,fmt);
1246 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1247 if (debugmode && debugmode != 2) {
1248 FILE * f = stdout;
1249 #ifdef _WIN32
1250 if (facility == LOG_LOCAL1) // logging to stdout
1251 f = stderr;
1252 #endif
1253 vfprintf(f, fmt, ap);
1254 fflush(f);
1255 }
1256 // in debugmode==2 mode we print output from knowndrives.o functions
1257 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1258 openlog("smartd", LOG_PID, facility);
1259 vsyslog_lines(LOG_INFO, fmt, ap);
1260 closelog();
1261 }
1262 va_end(ap);
1263 return;
1264 }
1265
1266 // This function prints either to stdout or to the syslog as needed.
1267 static void PrintOut(int priority, const char *fmt, ...){
1268 va_list ap;
1269
1270 // get the correct time in syslog()
1271 FixGlibcTimeZoneBug();
1272 // initialize variable argument list
1273 va_start(ap,fmt);
1274 if (debugmode) {
1275 FILE * f = stdout;
1276 #ifdef _WIN32
1277 if (facility == LOG_LOCAL1) // logging to stdout
1278 f = stderr;
1279 #endif
1280 vfprintf(f, fmt, ap);
1281 fflush(f);
1282 }
1283 else {
1284 openlog("smartd", LOG_PID, facility);
1285 vsyslog_lines(priority, fmt, ap);
1286 closelog();
1287 }
1288 va_end(ap);
1289 return;
1290 }
1291
1292 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1293 void checksumwarning(const char * string)
1294 {
1295 pout("Warning! %s error: invalid SMART checksum.\n", string);
1296 }
1297
1298 #ifndef _WIN32
1299
1300 // Wait for the pid file to show up, this makes sure a calling program knows
1301 // that the daemon is really up and running and has a pid to kill it
1302 static bool WaitForPidFile()
1303 {
1304 int waited, max_wait = 10;
1305 struct stat stat_buf;
1306
1307 if (pid_file.empty() || debugmode)
1308 return true;
1309
1310 for(waited = 0; waited < max_wait; ++waited) {
1311 if (!stat(pid_file.c_str(), &stat_buf)) {
1312 return true;
1313 } else
1314 sleep(1);
1315 }
1316 return false;
1317 }
1318
1319 #endif // _WIN32
1320
1321 // Forks new process, closes ALL file descriptors, redirects stdin,
1322 // stdout, and stderr. Not quite daemon(). See
1323 // http://www.linuxjournal.com/article/2335
1324 // for a good description of why we do things this way.
1325 static void DaemonInit()
1326 {
1327 #ifndef _WIN32
1328 pid_t pid;
1329 int i;
1330
1331 // flush all buffered streams. Else we might get two copies of open
1332 // streams since both parent and child get copies of the buffers.
1333 fflush(NULL);
1334
1335 if (do_fork) {
1336 if ((pid=fork()) < 0) {
1337 // unable to fork!
1338 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1339 EXIT(EXIT_STARTUP);
1340 }
1341 else if (pid) {
1342 // we are the parent process, wait for pid file, then exit cleanly
1343 if(!WaitForPidFile()) {
1344 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1345 EXIT(EXIT_STARTUP);
1346 } else
1347 EXIT(0);
1348 }
1349
1350 // from here on, we are the child process.
1351 setsid();
1352
1353 // Fork one more time to avoid any possibility of having terminals
1354 if ((pid=fork()) < 0) {
1355 // unable to fork!
1356 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1357 EXIT(EXIT_STARTUP);
1358 }
1359 else if (pid)
1360 // we are the parent process -- exit cleanly
1361 EXIT(0);
1362
1363 // Now we are the child's child...
1364 }
1365
1366 // close any open file descriptors
1367 for (i=getdtablesize();i>=0;--i)
1368 close(i);
1369
1370 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1371
1372 // redirect any IO attempts to /dev/null for stdin
1373 i=open("/dev/null",O_RDWR);
1374 if (i>=0) {
1375 // stdout
1376 NO_warn_unused_result(dup(i));
1377 // stderr
1378 NO_warn_unused_result(dup(i));
1379 };
1380 umask(0022);
1381 NO_warn_unused_result(chdir("/"));
1382
1383 if (do_fork)
1384 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1385
1386 #else // _WIN32
1387
1388 // No fork() on native Win32
1389 // Detach this process from console
1390 fflush(NULL);
1391 if (daemon_detach("smartd")) {
1392 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1393 EXIT(EXIT_STARTUP);
1394 }
1395 // stdin/out/err now closed if not redirected
1396
1397 #endif // _WIN32
1398 return;
1399 }
1400
1401 // create a PID file containing the current process id
1402 static void WritePidFile()
1403 {
1404 if (!pid_file.empty()) {
1405 pid_t pid = getpid();
1406 mode_t old_umask;
1407 #ifndef __CYGWIN__
1408 old_umask = umask(0077); // rwx------
1409 #else
1410 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1411 old_umask = umask(0033); // rwxr--r--
1412 #endif
1413
1414 stdio_file f(pid_file.c_str(), "w");
1415 umask(old_umask);
1416 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1417 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1418 EXIT(EXIT_PID);
1419 }
1420 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1421 }
1422 }
1423
1424 // Prints header identifying version of code and home
1425 static void PrintHead()
1426 {
1427 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1428 }
1429
1430 // prints help info for configuration file Directives
1431 static void Directives()
1432 {
1433 PrintOut(LOG_INFO,
1434 "Configuration file (%s) Directives (after device name):\n"
1435 " -d TYPE Set the device type: auto, ignore, removable,\n"
1436 " %s\n"
1437 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1438 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1439 " -S VAL Enable/disable attribute autosave (on/off)\n"
1440 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1441 " -H Monitor SMART Health Status, report if failed\n"
1442 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1443 " -l TYPE Monitor SMART log or self-test status:\n"
1444 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1445 " -l scterc,R,W Set SCT Error Recovery Control\n"
1446 " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1447 " security-freeze, standby,[N|off], wcache,[on|off]\n"
1448 " -f Monitor 'Usage' Attributes, report failures\n"
1449 " -m ADD Send email warning to address ADD\n"
1450 " -M TYPE Modify email warning behavior (see man page)\n"
1451 " -p Report changes in 'Prefailure' Attributes\n"
1452 " -u Report changes in 'Usage' Attributes\n"
1453 " -t Equivalent to -p and -u Directives\n"
1454 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1455 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1456 " -i ID Ignore Attribute ID for -f Directive\n"
1457 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1458 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1459 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1460 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1461 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1462 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1463 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1464 " -F TYPE Use firmware bug workaround:\n"
1465 " %s\n"
1466 " # Comment: text after a hash sign is ignored\n"
1467 " \\ Line continuation character\n"
1468 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1469 "Use ID = 0 to turn off -C and/or -U Directives\n"
1470 "Example: /dev/sda -a\n",
1471 configfile,
1472 smi()->get_valid_dev_types_str().c_str(),
1473 get_valid_firmwarebug_args());
1474 }
1475
1476 /* Returns a pointer to a static string containing a formatted list of the valid
1477 arguments to the option opt or NULL on failure. */
1478 static const char *GetValidArgList(char opt)
1479 {
1480 switch (opt) {
1481 case 'A':
1482 case 's':
1483 return "<PATH_PREFIX>";
1484 case 'c':
1485 return "<FILE_NAME>, -";
1486 case 'l':
1487 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1488 case 'q':
1489 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1490 case 'r':
1491 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1492 case 'B':
1493 case 'p':
1494 case 'w':
1495 return "<FILE_NAME>";
1496 case 'i':
1497 return "<INTEGER_SECONDS>";
1498 default:
1499 return NULL;
1500 }
1501 }
1502
1503 /* prints help information for command syntax */
1504 static void Usage()
1505 {
1506 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1507 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1508 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1509 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1510 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1511 #endif
1512 PrintOut(LOG_INFO,"\n");
1513 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1514 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1515 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1516 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1517 PrintOut(LOG_INFO,"\n");
1518 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1519 #endif
1520 PrintOut(LOG_INFO,"]\n\n");
1521 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1522 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1523 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1524 #ifdef HAVE_LIBCAP_NG
1525 PrintOut(LOG_INFO," -C, --capabilities\n");
1526 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1527 " Warning: Mail notification does not work when used.\n\n");
1528 #endif
1529 PrintOut(LOG_INFO," -d, --debug\n");
1530 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1531 PrintOut(LOG_INFO," -D, --showdirectives\n");
1532 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1533 PrintOut(LOG_INFO," -h, --help, --usage\n");
1534 PrintOut(LOG_INFO," Display this help and exit\n\n");
1535 PrintOut(LOG_INFO," -i N, --interval=N\n");
1536 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1537 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1538 #ifndef _WIN32
1539 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1540 #else
1541 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1542 #endif
1543 #ifndef _WIN32
1544 PrintOut(LOG_INFO," -n, --no-fork\n");
1545 PrintOut(LOG_INFO," Do not fork into background\n\n");
1546 #endif // _WIN32
1547 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1548 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1549 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1550 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1551 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1552 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1553 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1554 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1555 #ifdef SMARTMONTOOLS_SAVESTATES
1556 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1557 #endif
1558 PrintOut(LOG_INFO,"\n");
1559 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1560 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1561 #ifndef _WIN32
1562 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1563 #else
1564 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1565 #endif
1566 #ifdef _WIN32
1567 PrintOut(LOG_INFO," --service\n");
1568 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1569 PrintOut(LOG_INFO," smartd install [options]\n");
1570 PrintOut(LOG_INFO," Remove service with:\n");
1571 PrintOut(LOG_INFO," smartd remove\n\n");
1572 #endif // _WIN32
1573 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1574 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1575 }
1576
1577 static int CloseDevice(smart_device * device, const char * name)
1578 {
1579 if (!device->close()){
1580 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1581 return 1;
1582 }
1583 // device sucessfully closed
1584 return 0;
1585 }
1586
1587 // return true if a char is not allowed in a state file name
1588 static bool not_allowed_in_filename(char c)
1589 {
1590 return !( ('0' <= c && c <= '9')
1591 || ('A' <= c && c <= 'Z')
1592 || ('a' <= c && c <= 'z'));
1593 }
1594
1595 // Read error count from Summary or Extended Comprehensive SMART error log
1596 // Return -1 on error
1597 static int read_ata_error_count(ata_device * device, const char * name,
1598 firmwarebug_defs firmwarebugs, bool extended)
1599 {
1600 if (!extended) {
1601 ata_smart_errorlog log;
1602 if (ataReadErrorLog(device, &log, firmwarebugs)){
1603 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1604 return -1;
1605 }
1606 return (log.error_log_pointer ? log.ata_error_count : 0);
1607 }
1608 else {
1609 ata_smart_exterrlog logx;
1610 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1611 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1612 return -1;
1613 }
1614 // Some disks use the reserved byte as index, see ataprint.cpp.
1615 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1616 }
1617 }
1618
1619 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1620 // error count, and top bits are the power-on hours of the last error.
1621 static int SelfTestErrorCount(ata_device * device, const char * name,
1622 firmwarebug_defs firmwarebugs)
1623 {
1624 struct ata_smart_selftestlog log;
1625
1626 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1627 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1628 return -1;
1629 }
1630
1631 // return current number of self-test errors
1632 return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1633 }
1634
1635 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1636 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1637
1638 // Check offline data collection status
1639 static inline bool is_offl_coll_in_progress(unsigned char status)
1640 {
1641 return ((status & 0x7f) == 0x03);
1642 }
1643
1644 // Check self-test execution status
1645 static inline bool is_self_test_in_progress(unsigned char status)
1646 {
1647 return ((status >> 4) == 0xf);
1648 }
1649
1650 // Log offline data collection status
1651 static void log_offline_data_coll_status(const char * name, unsigned char status)
1652 {
1653 const char * msg;
1654 switch (status & 0x7f) {
1655 case 0x00: msg = "was never started"; break;
1656 case 0x02: msg = "was completed without error"; break;
1657 case 0x03: msg = "is in progress"; break;
1658 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1659 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1660 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1661 default: msg = 0;
1662 }
1663
1664 if (msg)
1665 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1666 "Device: %s, offline data collection %s%s\n", name, msg,
1667 ((status & 0x80) ? " (auto:on)" : ""));
1668 else
1669 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1670 name, status);
1671 }
1672
1673 // Log self-test execution status
1674 static void log_self_test_exec_status(const char * name, unsigned char status)
1675 {
1676 const char * msg;
1677 switch (status >> 4) {
1678 case 0x0: msg = "completed without error"; break;
1679 case 0x1: msg = "was aborted by the host"; break;
1680 case 0x2: msg = "was interrupted by the host with a reset"; break;
1681 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1682 case 0x4: msg = "completed with error (unknown test element)"; break;
1683 case 0x5: msg = "completed with error (electrical test element)"; break;
1684 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1685 case 0x7: msg = "completed with error (read test element)"; break;
1686 case 0x8: msg = "completed with error (handling damage?)"; break;
1687 default: msg = 0;
1688 }
1689
1690 if (msg)
1691 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1692 "Device: %s, previous self-test %s\n", name, msg);
1693 else if ((status >> 4) == 0xf)
1694 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1695 name, status & 0x0f);
1696 else
1697 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1698 name, status);
1699 }
1700
1701 // Check pending sector count id (-C, -U directives).
1702 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1703 unsigned char id, const char * msg)
1704 {
1705 // Check attribute index
1706 int i = ata_find_attr_index(id, state.smartval);
1707 if (i < 0) {
1708 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1709 cfg.name.c_str(), msg, id);
1710 return false;
1711 }
1712
1713 // Check value
1714 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1715 cfg.attribute_defs);
1716 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1717 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1718 cfg.name.c_str(), msg, id, rawval, rawval);
1719 return false;
1720 }
1721
1722 return true;
1723 }
1724
1725 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1726 static void finish_device_scan(dev_config & cfg, dev_state & state)
1727 {
1728 // Set cfg.emailfreq if user hasn't set it
1729 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1730 // Avoid that emails are suppressed forever due to state persistence
1731 if (cfg.state_file.empty())
1732 cfg.emailfreq = 1; // '-M once'
1733 else
1734 cfg.emailfreq = 2; // '-M daily'
1735 }
1736
1737 // Start self-test regex check now if time was not read from state file
1738 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1739 state.scheduled_test_next_check = time(0);
1740 }
1741
1742 // Common function to format result message for ATA setting
1743 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1744 int set_option = 0, bool has_value = false)
1745 {
1746 if (!msg.empty())
1747 msg += ", ";
1748 msg += name;
1749 if (!ok)
1750 msg += ":--";
1751 else if (set_option < 0)
1752 msg += ":off";
1753 else if (has_value)
1754 msg += strprintf(":%d", set_option-1);
1755 else if (set_option > 0)
1756 msg += ":on";
1757 }
1758
1759
1760 // TODO: Add '-F swapid' directive
1761 const bool fix_swapped_id = false;
1762
1763 // scan to see what ata devices there are, and if they support SMART
1764 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1765 {
1766 int supported=0;
1767 struct ata_identify_device drive;
1768 const char *name = cfg.name.c_str();
1769 int retid;
1770
1771 // Device must be open
1772
1773 // Get drive identity structure
1774 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1775 if (retid<0)
1776 // Unable to read Identity structure
1777 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1778 else
1779 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1780 name, packetdevicetype(retid-1));
1781 CloseDevice(atadev, name);
1782 return 2;
1783 }
1784
1785 // Get drive identity, size and rotation rate (HDD/SSD)
1786 char model[40+1], serial[20+1], firmware[8+1];
1787 ata_format_id_string(model, drive.model, sizeof(model)-1);
1788 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1789 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1790
1791 ata_size_info sizes;
1792 ata_get_size_info(&drive, sizes);
1793 state.num_sectors = sizes.sectors;
1794 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1795
1796 char wwn[30]; wwn[0] = 0;
1797 unsigned oui = 0; uint64_t unique_id = 0;
1798 int naa = ata_get_wwn(&drive, oui, unique_id);
1799 if (naa >= 0)
1800 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1801
1802 // Format device id string for warning emails
1803 char cap[32];
1804 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1805 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1806
1807 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1808
1809 // Show if device in database, and use preset vendor attribute
1810 // options unless user has requested otherwise.
1811 if (cfg.ignorepresets)
1812 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1813 else {
1814 // Apply vendor specific presets, print warning if present
1815 const drive_settings * dbentry = lookup_drive_apply_presets(
1816 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1817 if (!dbentry)
1818 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1819 else {
1820 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1821 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1822 if (*dbentry->warningmsg)
1823 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1824 }
1825 }
1826
1827 // Check for ATA Security LOCK
1828 unsigned short word128 = drive.words088_255[128-88];
1829 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1830 if (locked)
1831 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1832
1833 // Set default '-C 197[+]' if no '-C ID' is specified.
1834 if (!cfg.curr_pending_set)
1835 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1836 // Set default '-U 198[+]' if no '-U ID' is specified.
1837 if (!cfg.offl_pending_set)
1838 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1839
1840 // If requested, show which presets would be used for this drive
1841 if (cfg.showpresets) {
1842 int savedebugmode=debugmode;
1843 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1844 if (!debugmode)
1845 debugmode=2;
1846 show_presets(&drive);
1847 debugmode=savedebugmode;
1848 }
1849
1850 // see if drive supports SMART
1851 supported=ataSmartSupport(&drive);
1852 if (supported!=1) {
1853 if (supported==0)
1854 // drive does NOT support SMART
1855 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1856 else
1857 // can't tell if drive supports SMART
1858 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1859
1860 // should we proceed anyway?
1861 if (cfg.permissive) {
1862 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1863 }
1864 else {
1865 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1866 CloseDevice(atadev, name);
1867 return 2;
1868 }
1869 }
1870
1871 if (ataEnableSmart(atadev)) {
1872 // Enable SMART command has failed
1873 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1874
1875 if (ataIsSmartEnabled(&drive) <= 0) {
1876 CloseDevice(atadev, name);
1877 return 2;
1878 }
1879 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
1880 }
1881
1882 // disable device attribute autosave...
1883 if (cfg.autosave==1) {
1884 if (ataDisableAutoSave(atadev))
1885 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1886 else
1887 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1888 }
1889
1890 // or enable device attribute autosave
1891 if (cfg.autosave==2) {
1892 if (ataEnableAutoSave(atadev))
1893 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1894 else
1895 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1896 }
1897
1898 // capability check: SMART status
1899 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1900 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1901 cfg.smartcheck = false;
1902 }
1903
1904 // capability check: Read smart values and thresholds. Note that
1905 // smart values are ALSO needed even if we ONLY want to know if the
1906 // device is self-test log or error-log capable! After ATA-5, this
1907 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1908 // but sadly not for ATA-5. Sigh.
1909
1910 // do we need to get SMART data?
1911 bool smart_val_ok = false;
1912 if ( cfg.autoofflinetest || cfg.selftest
1913 || cfg.errorlog || cfg.xerrorlog
1914 || cfg.offlinests || cfg.selfteststs
1915 || cfg.usagefailed || cfg.prefail || cfg.usage
1916 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1917 || cfg.curr_pending_id || cfg.offl_pending_id ) {
1918
1919 if (ataReadSmartValues(atadev, &state.smartval)) {
1920 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1921 cfg.usagefailed = cfg.prefail = cfg.usage = false;
1922 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1923 cfg.curr_pending_id = cfg.offl_pending_id = 0;
1924 }
1925 else {
1926 smart_val_ok = true;
1927 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1928 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1929 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1930 cfg.usagefailed = false;
1931 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1932 memset(&state.smartthres, 0, sizeof(state.smartthres));
1933 }
1934 }
1935
1936 // see if the necessary Attribute is there to monitor offline or
1937 // current pending sectors or temperature
1938 if ( cfg.curr_pending_id
1939 && !check_pending_id(cfg, state, cfg.curr_pending_id,
1940 "Current_Pending_Sector"))
1941 cfg.curr_pending_id = 0;
1942
1943 if ( cfg.offl_pending_id
1944 && !check_pending_id(cfg, state, cfg.offl_pending_id,
1945 "Offline_Uncorrectable"))
1946 cfg.offl_pending_id = 0;
1947
1948 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1949 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1950 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1951 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1952 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1953 }
1954
1955 // Report ignored '-r' or '-R' directives
1956 for (int id = 1; id <= 255; id++) {
1957 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1958 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1959 const char * excl = (cfg.monitor_attr_flags.is_set(id,
1960 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1961
1962 int idx = ata_find_attr_index(id, state.smartval);
1963 if (idx < 0)
1964 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1965 else {
1966 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1967 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1968 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1969 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1970 }
1971 }
1972 }
1973 }
1974
1975 // enable/disable automatic on-line testing
1976 if (cfg.autoofflinetest) {
1977 // is this an enable or disable request?
1978 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1979 if (!smart_val_ok)
1980 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1981 else {
1982 // if command appears unsupported, issue a warning...
1983 if (!isSupportAutomaticTimer(&state.smartval))
1984 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1985 // ... but then try anyway
1986 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1987 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1988 else
1989 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1990 }
1991 }
1992
1993 // Read log directories if required for capability check
1994 ata_smart_log_directory smart_logdir, gp_logdir;
1995 bool smart_logdir_ok = false, gp_logdir_ok = false;
1996
1997 if ( isGeneralPurposeLoggingCapable(&drive)
1998 && (cfg.errorlog || cfg.selftest)
1999 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2000 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2001 smart_logdir_ok = true;
2002 }
2003
2004 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2005 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2006 gp_logdir_ok = true;
2007 }
2008
2009 // capability check: self-test-log
2010 state.selflogcount = 0; state.selfloghour = 0;
2011 if (cfg.selftest) {
2012 int retval;
2013 if (!( cfg.permissive
2014 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2015 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2016 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2017 cfg.selftest = false;
2018 }
2019 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2020 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2021 cfg.selftest = false;
2022 }
2023 else {
2024 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2025 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2026 }
2027 }
2028
2029 // capability check: ATA error log
2030 state.ataerrorcount = 0;
2031 if (cfg.errorlog) {
2032 int errcnt1;
2033 if (!( cfg.permissive
2034 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2035 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2036 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2037 cfg.errorlog = false;
2038 }
2039 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2040 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2041 cfg.errorlog = false;
2042 }
2043 else
2044 state.ataerrorcount = errcnt1;
2045 }
2046
2047 if (cfg.xerrorlog) {
2048 int errcnt2;
2049 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2050 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2051 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2052 name);
2053 cfg.xerrorlog = false;
2054 }
2055 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2056 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2057 cfg.xerrorlog = false;
2058 }
2059 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2060 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2061 name, state.ataerrorcount, errcnt2);
2062 // Record max error count
2063 if (errcnt2 > state.ataerrorcount)
2064 state.ataerrorcount = errcnt2;
2065 }
2066 else
2067 state.ataerrorcount = errcnt2;
2068 }
2069
2070 // capability check: self-test and offline data collection status
2071 if (cfg.offlinests || cfg.selfteststs) {
2072 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2073 if (cfg.offlinests)
2074 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2075 if (cfg.selfteststs)
2076 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2077 cfg.offlinests = cfg.selfteststs = false;
2078 }
2079 }
2080
2081 // capabilities check -- does it support powermode?
2082 if (cfg.powermode) {
2083 int powermode = ataCheckPowerMode(atadev);
2084
2085 if (-1 == powermode) {
2086 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2087 cfg.powermode=0;
2088 }
2089 else if (powermode!=0x00 && powermode!=0x01
2090 && powermode!=0x40 && powermode!=0x41
2091 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2092 && powermode!=0xff) {
2093 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2094 name, powermode);
2095 cfg.powermode=0;
2096 }
2097 }
2098
2099 // Apply ATA settings
2100 std::string msg;
2101
2102 if (cfg.set_aam)
2103 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2104 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2105 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2106
2107 if (cfg.set_apm)
2108 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2109 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2110 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2111
2112 if (cfg.set_lookahead)
2113 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2114 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2115 cfg.set_lookahead);
2116
2117 if (cfg.set_wcache)
2118 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2119 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2120
2121 if (cfg.set_security_freeze)
2122 format_set_result_msg(msg, "Security freeze",
2123 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2124
2125 if (cfg.set_standby)
2126 format_set_result_msg(msg, "Standby",
2127 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2128
2129 // Report as one log entry
2130 if (!msg.empty())
2131 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2132
2133 // set SCT Error Recovery Control if requested
2134 if (cfg.sct_erc_set) {
2135 if (!isSCTErrorRecoveryControlCapable(&drive))
2136 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2137 name);
2138 else if (locked)
2139 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2140 name);
2141 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2142 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2143 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2144 else
2145 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2146 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2147 }
2148
2149 // If no tests available or selected, return
2150 if (!( cfg.smartcheck || cfg.selftest
2151 || cfg.errorlog || cfg.xerrorlog
2152 || cfg.offlinests || cfg.selfteststs
2153 || cfg.usagefailed || cfg.prefail || cfg.usage
2154 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2155 CloseDevice(atadev, name);
2156 return 3;
2157 }
2158
2159 // tell user we are registering device
2160 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2161
2162 // close file descriptor
2163 CloseDevice(atadev, name);
2164
2165 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2166 // Build file name for state file
2167 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2168 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2169 if (!state_path_prefix.empty()) {
2170 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2171 // Read previous state
2172 if (read_dev_state(cfg.state_file.c_str(), state)) {
2173 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2174 // Copy ATA attribute values to temp state
2175 state.update_temp_state();
2176 }
2177 }
2178 if (!attrlog_path_prefix.empty())
2179 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2180 }
2181
2182 finish_device_scan(cfg, state);
2183
2184 return 0;
2185 }
2186
2187 // on success, return 0. On failure, return >0. Never return <0,
2188 // please.
2189 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2190 {
2191 int err, req_len, avail_len, version, len;
2192 const char *device = cfg.name.c_str();
2193 struct scsi_iec_mode_page iec;
2194 UINT8 tBuf[64];
2195 UINT8 inqBuf[96];
2196 UINT8 vpdBuf[252];
2197 char lu_id[64], serial[256], vendor[40], model[40];
2198
2199 // Device must be open
2200 memset(inqBuf, 0, 96);
2201 req_len = 36;
2202 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2203 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2204 req_len = 64;
2205 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2206 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2207 "skip device\n", device);
2208 return 2;
2209 }
2210 }
2211 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2212
2213 avail_len = inqBuf[4] + 5;
2214 len = (avail_len < req_len) ? avail_len : req_len;
2215 if (len < 36) {
2216 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2217 "skip device\n", device);
2218 return 2;
2219 }
2220
2221 int pdt = inqBuf[0] & 0x1f;
2222
2223 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2224 (0xe == pdt))) {
2225 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2226 "skip\n", device, pdt);
2227 return 2;
2228 }
2229
2230 if (supported_vpd_pages_p) {
2231 delete supported_vpd_pages_p;
2232 supported_vpd_pages_p = NULL;
2233 }
2234 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2235
2236 lu_id[0] = '\0';
2237 if ((version >= 0x3) && (version < 0x8)) {
2238 /* SPC to SPC-5 */
2239 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2240 vpdBuf, sizeof(vpdBuf))) {
2241 len = vpdBuf[3];
2242 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2243 }
2244 }
2245 serial[0] = '\0';
2246 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2247 vpdBuf, sizeof(vpdBuf))) {
2248 len = vpdBuf[3];
2249 vpdBuf[4 + len] = '\0';
2250 scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2251 }
2252
2253 unsigned int lb_size;
2254 char si_str[64];
2255 uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
2256
2257 if (capacity)
2258 format_capacity(si_str, sizeof(si_str), capacity, ".");
2259 else
2260 si_str[0] = '\0';
2261
2262 // Format device id string for warning emails
2263 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2264 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2265 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2266 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2267 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2268
2269 // format "model" string
2270 scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2271 scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2272 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2273
2274 // check that device is ready for commands. IE stores its stuff on
2275 // the media.
2276 if ((err = scsiTestUnitReady(scsidev))) {
2277 if (SIMPLE_ERR_NOT_READY == err)
2278 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2279 else if (SIMPLE_ERR_NO_MEDIUM == err)
2280 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2281 else if (SIMPLE_ERR_BECOMING_READY == err)
2282 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2283 else
2284 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2285 CloseDevice(scsidev, device);
2286 return 2;
2287 }
2288
2289 // Badly-conforming USB storage devices may fail this check.
2290 // The response to the following IE mode page fetch (current and
2291 // changeable values) is carefully examined. It has been found
2292 // that various USB devices that malform the response will lock up
2293 // if asked for a log page (e.g. temperature) so it is best to
2294 // bail out now.
2295 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2296 state.modese_len = iec.modese_len;
2297 else if (SIMPLE_ERR_BAD_FIELD == err)
2298 ; /* continue since it is reasonable not to support IE mpage */
2299 else { /* any other error (including malformed response) unreasonable */
2300 PrintOut(LOG_INFO,
2301 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2302 device, err);
2303 CloseDevice(scsidev, device);
2304 return 3;
2305 }
2306
2307 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2308 // smart if it is off). This may change to be the same as the ATA side.
2309 if (!scsi_IsExceptionControlEnabled(&iec)) {
2310 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2311 "Try 'smartctl -s on %s' to turn on SMART features\n",
2312 device, device);
2313 CloseDevice(scsidev, device);
2314 return 3;
2315 }
2316
2317 // Flag that certain log pages are supported (information may be
2318 // available from other sources).
2319 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2320 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2321 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2322 {
2323 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2324 switch (tBuf[k]) {
2325 case TEMPERATURE_LPAGE:
2326 state.TempPageSupported = 1;
2327 break;
2328 case IE_LPAGE:
2329 state.SmartPageSupported = 1;
2330 break;
2331 case READ_ERROR_COUNTER_LPAGE:
2332 state.ReadECounterPageSupported = 1;
2333 break;
2334 case WRITE_ERROR_COUNTER_LPAGE:
2335 state.WriteECounterPageSupported = 1;
2336 break;
2337 case VERIFY_ERROR_COUNTER_LPAGE:
2338 state.VerifyECounterPageSupported = 1;
2339 break;
2340 case NON_MEDIUM_ERROR_LPAGE:
2341 state.NonMediumErrorPageSupported = 1;
2342 break;
2343 default:
2344 break;
2345 }
2346 }
2347 }
2348
2349 // Check if scsiCheckIE() is going to work
2350 {
2351 UINT8 asc = 0;
2352 UINT8 ascq = 0;
2353 UINT8 currenttemp = 0;
2354 UINT8 triptemp = 0;
2355
2356 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2357 &asc, &ascq, &currenttemp, &triptemp)) {
2358 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2359 state.SuppressReport = 1;
2360 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2361 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2362 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2363 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2364 }
2365 }
2366 }
2367
2368 // capability check: self-test-log
2369 if (cfg.selftest){
2370 int retval = scsiCountFailedSelfTests(scsidev, 0);
2371 if (retval<0) {
2372 // no self-test log, turn off monitoring
2373 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2374 cfg.selftest = false;
2375 state.selflogcount = 0;
2376 state.selfloghour = 0;
2377 }
2378 else {
2379 // register starting values to watch for changes
2380 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2381 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2382 }
2383 }
2384
2385 // disable autosave (set GLTSD bit)
2386 if (cfg.autosave==1){
2387 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2388 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2389 else
2390 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2391 }
2392
2393 // or enable autosave (clear GLTSD bit)
2394 if (cfg.autosave==2){
2395 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2396 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2397 else
2398 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2399 }
2400
2401 // tell user we are registering device
2402 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2403
2404 // Make sure that init_standby_check() ignores SCSI devices
2405 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2406
2407 // close file descriptor
2408 CloseDevice(scsidev, device);
2409
2410 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2411 // Build file name for state file
2412 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2413 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2414 if (!state_path_prefix.empty()) {
2415 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2416 // Read previous state
2417 if (read_dev_state(cfg.state_file.c_str(), state)) {
2418 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2419 // Copy ATA attribute values to temp state
2420 state.update_temp_state();
2421 }
2422 }
2423 if (!attrlog_path_prefix.empty())
2424 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2425 }
2426
2427 finish_device_scan(cfg, state);
2428
2429 return 0;
2430 }
2431
2432 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2433 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2434 {
2435 for (int i = 8; i < 16; i++) {
2436 if (val[i])
2437 return ~(uint64_t)0;
2438 }
2439 uint64_t lo = val[7];
2440 for (int i = 7-1; i >= 0; i--) {
2441 lo <<= 8; lo += val[i];
2442 }
2443 return lo;
2444 }
2445
2446 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2447 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2448 {
2449 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2450 for (int i = 0; i < 8; i++) {
2451 if (smart_log.temp_sensor[i] > k)
2452 k = smart_log.temp_sensor[i];
2453 }
2454 return k;
2455 }
2456
2457 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev)
2458 {
2459 const char *name = cfg.name.c_str();
2460
2461 // Device must be open
2462
2463 // Get ID Controller
2464 nvme_id_ctrl id_ctrl;
2465 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2466 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2467 CloseDevice(nvmedev, name);
2468 return 2;
2469 }
2470
2471 // Get drive identity
2472 char model[40+1], serial[20+1], firmware[8+1];
2473 format_char_array(model, id_ctrl.mn);
2474 format_char_array(serial, id_ctrl.sn);
2475 format_char_array(firmware, id_ctrl.fr);
2476
2477 // Format device id string for warning emails
2478 char nsstr[32] = "", capstr[32] = "";
2479 unsigned nsid = nvmedev->get_nsid();
2480 if (nsid != 0xffffffff)
2481 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2482 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2483 if (capacity)
2484 format_capacity(capstr, sizeof(capstr), capacity, ".");
2485 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2486 nsstr, (capstr[0] ? ", " : ""), capstr);
2487
2488 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2489
2490 // Read SMART/Health log
2491 nvme_smart_log smart_log;
2492 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2493 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2494 CloseDevice(nvmedev, name);
2495 return 2;
2496 }
2497
2498 // Check temperature sensor support
2499 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2500 if (!nvme_get_max_temp_kelvin(smart_log)) {
2501 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2502 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2503 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2504 }
2505 }
2506
2507 // Init total error count
2508 if (cfg.errorlog || cfg.xerrorlog) {
2509 state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2510 }
2511
2512 // If no supported tests selected, return
2513 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2514 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2515 CloseDevice(nvmedev, name);
2516 return 3;
2517 }
2518
2519 // Tell user we are registering device
2520 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2521
2522 // Make sure that init_standby_check() ignores NVMe devices
2523 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2524
2525 CloseDevice(nvmedev, name);
2526
2527 if (!state_path_prefix.empty()) {
2528 // Build file name for state file
2529 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2530 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2531 nsstr[0] = 0;
2532 if (nsid != 0xffffffff)
2533 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2534 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2535 // Read previous state
2536 if (read_dev_state(cfg.state_file.c_str(), state))
2537 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2538 }
2539
2540 finish_device_scan(cfg, state);
2541
2542 return 0;
2543 }
2544
2545 // If the self-test log has got more self-test errors (or more recent
2546 // self-test errors) recorded, then notify user.
2547 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2548 {
2549 const char * name = cfg.name.c_str();
2550
2551 if (newi<0)
2552 // command failed
2553 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2554 else {
2555 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2556
2557 // old and new error counts
2558 int oldc=state.selflogcount;
2559 int newc=SELFTEST_ERRORCOUNT(newi);
2560
2561 // old and new error timestamps in hours
2562 int oldh=state.selfloghour;
2563 int newh=SELFTEST_ERRORHOURS(newi);
2564
2565 if (oldc<newc) {
2566 // increase in error count
2567 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2568 name, oldc, newc);
2569 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2570 name, oldc, newc);
2571 state.must_write = true;
2572 }
2573 else if (newc > 0 && oldh != newh) {
2574 // more recent error
2575 // a 'more recent' error might actually be a smaller hour number,
2576 // if the hour number has wrapped.
2577 // There's still a bug here. You might just happen to run a new test
2578 // exactly 32768 hours after the previous failure, and have run exactly
2579 // 20 tests between the two, in which case smartd will miss the
2580 // new failure.
2581 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2582 name, newh);
2583 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2584 name, newh);
2585 state.must_write = true;
2586 }
2587
2588 // Print info if error entries have disappeared
2589 // or newer successful successful extended self-test exits
2590 if (oldc > newc) {
2591 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2592 name, oldc, newc);
2593 if (newc == 0)
2594 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2595 }
2596
2597 // Needed since self-test error count may DECREASE. Hour might
2598 // also have changed.
2599 state.selflogcount= newc;
2600 state.selfloghour = newh;
2601 }
2602 return;
2603 }
2604
2605 // Test types, ordered by priority.
2606 static const char test_type_chars[] = "LncrSCO";
2607 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2608
2609 // returns test type if time to do test of type testtype,
2610 // 0 if not time to do test.
2611 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2612 {
2613 // check that self-testing has been requested
2614 if (cfg.test_regex.empty())
2615 return 0;
2616
2617 // Exit if drive not capable of any test
2618 if ( state.not_cap_long && state.not_cap_short &&
2619 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2620 return 0;
2621
2622 // since we are about to call localtime(), be sure glibc is informed
2623 // of any timezone changes we make.
2624 if (!usetime)
2625 FixGlibcTimeZoneBug();
2626
2627 // Is it time for next check?
2628 time_t now = (!usetime ? time(0) : usetime);
2629 if (now < state.scheduled_test_next_check)
2630 return 0;
2631
2632 // Limit time check interval to 90 days
2633 if (state.scheduled_test_next_check + (3600L*24*90) < now)
2634 state.scheduled_test_next_check = now - (3600L*24*90);
2635
2636 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2637 char testtype = 0;
2638 time_t testtime = 0; int testhour = 0;
2639 int maxtest = num_test_types-1;
2640
2641 for (time_t t = state.scheduled_test_next_check; ; ) {
2642 struct tm * tms = localtime(&t);
2643 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2644 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2645 for (int i = 0; i <= maxtest; i++) {
2646 // Skip if drive not capable of this test
2647 switch (test_type_chars[i]) {
2648 case 'L': if (state.not_cap_long) continue; break;
2649 case 'S': if (state.not_cap_short) continue; break;
2650 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2651 case 'O': if (scsi || state.not_cap_offline) continue; break;
2652 case 'c': case 'n':
2653 case 'r': if (scsi || state.not_cap_selective) continue; break;
2654 default: continue;
2655 }
2656 // Try match of "T/MM/DD/d/HH"
2657 char pattern[16];
2658 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2659 test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2660 if (cfg.test_regex.full_match(pattern)) {
2661 // Test found
2662 testtype = pattern[0];
2663 testtime = t; testhour = tms->tm_hour;
2664 // Limit further matches to higher priority self-tests
2665 maxtest = i-1;
2666 break;
2667 }
2668 }
2669 // Exit if no tests left or current time reached
2670 if (maxtest < 0)
2671 break;
2672 if (t >= now)
2673 break;
2674 // Check next hour
2675 if ((t += 3600) > now)
2676 t = now;
2677 }
2678
2679 // Do next check not before next hour.
2680 struct tm * tmnow = localtime(&now);
2681 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2682
2683 if (testtype) {
2684 state.must_write = true;
2685 // Tell user if an old test was found.
2686 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2687 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2688 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2689 cfg.name.c_str(), testtype, datebuf);
2690 }
2691 }
2692
2693 return testtype;
2694 }
2695
2696 // Print a list of future tests.
2697 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2698 {
2699 unsigned numdev = configs.size();
2700 if (!numdev)
2701 return;
2702 std::vector<int> testcnts(numdev * num_test_types, 0);
2703
2704 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2705
2706 // FixGlibcTimeZoneBug(); // done in PrintOut()
2707 time_t now = time(0);
2708 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2709 dateandtimezoneepoch(datenow, now);
2710
2711 long seconds;
2712 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2713 // Check for each device whether a test will be run
2714 time_t testtime = now + seconds;
2715 for (unsigned i = 0; i < numdev; i++) {
2716 const dev_config & cfg = configs.at(i);
2717 dev_state & state = states.at(i);
2718 const char * p;
2719 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2720 if (testtype && (p = strchr(test_type_chars, testtype))) {
2721 unsigned t = (p - test_type_chars);
2722 // Report at most 5 tests of each type
2723 if (++testcnts[i*num_test_types + t] <= 5) {
2724 dateandtimezoneepoch(date, testtime);
2725 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2726 testcnts[i*num_test_types + t], testtype, date);
2727 }
2728 }
2729 }
2730 }
2731
2732 // Report totals
2733 dateandtimezoneepoch(date, now+seconds);
2734 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2735 for (unsigned i = 0; i < numdev; i++) {
2736 const dev_config & cfg = configs.at(i);
2737 bool scsi = devices.at(i)->is_scsi();
2738 for (unsigned t = 0; t < num_test_types; t++) {
2739 int cnt = testcnts[i*num_test_types + t];
2740 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2741 continue;
2742 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2743 cnt, (cnt==1?"":"s"), test_type_chars[t]);
2744 }
2745 }
2746
2747 }
2748
2749 // Return zero on success, nonzero on failure. Perform offline (background)
2750 // short or long (extended) self test on given scsi device.
2751 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2752 {
2753 int retval = 0;
2754 const char *testname = 0;
2755 const char *name = cfg.name.c_str();
2756 int inProgress;
2757
2758 if (scsiSelfTestInProgress(device, &inProgress)) {
2759 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2760 state.not_cap_short = state.not_cap_long = true;
2761 return 1;
2762 }
2763
2764 if (1 == inProgress) {
2765 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2766 "progress.\n", name);
2767 return 1;
2768 }
2769
2770 switch (testtype) {
2771 case 'S':
2772 testname = "Short Self";
2773 retval = scsiSmartShortSelfTest(device);
2774 break;
2775 case 'L':
2776 testname = "Long Self";
2777 retval = scsiSmartExtendSelfTest(device);
2778 break;
2779 }
2780 // If we can't do the test, exit
2781 if (NULL == testname) {
2782 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2783 testtype);
2784 return 1;
2785 }
2786 if (retval) {
2787 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2788 (SIMPLE_ERR_BAD_FIELD == retval)) {
2789 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2790 testname);
2791 if ('L'==testtype)
2792 state.not_cap_long = true;
2793 else
2794 state.not_cap_short = true;
2795
2796 return 1;
2797 }
2798 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2799 testname, retval);
2800 return 1;
2801 }
2802
2803 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2804
2805 return 0;
2806 }
2807
2808 // Do an offline immediate or self-test. Return zero on success,
2809 // nonzero on failure.
2810 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2811 {
2812 const char *name = cfg.name.c_str();
2813
2814 // Read current smart data and check status/capability
2815 struct ata_smart_values data;
2816 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2817 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2818 return 1;
2819 }
2820
2821 // Check for capability to do the test
2822 int dotest = -1, mode = 0;
2823 const char *testname = 0;
2824 switch (testtype) {
2825 case 'O':
2826 testname="Offline Immediate ";
2827 if (isSupportExecuteOfflineImmediate(&data))
2828 dotest=OFFLINE_FULL_SCAN;
2829 else
2830 state.not_cap_offline = true;
2831 break;
2832 case 'C':
2833 testname="Conveyance Self-";
2834 if (isSupportConveyanceSelfTest(&data))
2835 dotest=CONVEYANCE_SELF_TEST;
2836 else
2837 state.not_cap_conveyance = true;
2838 break;
2839 case 'S':
2840 testname="Short Self-";
2841 if (isSupportSelfTest(&data))
2842 dotest=SHORT_SELF_TEST;
2843 else
2844 state.not_cap_short = true;
2845 break;
2846 case 'L':
2847 testname="Long Self-";
2848 if (isSupportSelfTest(&data))
2849 dotest=EXTEND_SELF_TEST;
2850 else
2851 state.not_cap_long = true;
2852 break;
2853
2854 case 'c': case 'n': case 'r':
2855 testname = "Selective Self-";
2856 if (isSupportSelectiveSelfTest(&data)) {
2857 dotest = SELECTIVE_SELF_TEST;
2858 switch (testtype) {
2859 case 'c': mode = SEL_CONT; break;
2860 case 'n': mode = SEL_NEXT; break;
2861 case 'r': mode = SEL_REDO; break;
2862 }
2863 }
2864 else
2865 state.not_cap_selective = true;
2866 break;
2867 }
2868
2869 // If we can't do the test, exit
2870 if (dotest<0) {
2871 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2872 return 1;
2873 }
2874
2875 // If currently running a self-test, do not interrupt it to start another.
2876 if (15==(data.self_test_exec_status >> 4)) {
2877 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
2878 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2879 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2880 } else {
2881 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2882 name, testname, (int)(data.self_test_exec_status & 0x0f));
2883 return 1;
2884 }
2885 }
2886
2887 if (dotest == SELECTIVE_SELF_TEST) {
2888 // Set test span
2889 ata_selective_selftest_args selargs, prev_args;
2890 selargs.num_spans = 1;
2891 selargs.span[0].mode = mode;
2892 prev_args.num_spans = 1;
2893 prev_args.span[0].start = state.selective_test_last_start;
2894 prev_args.span[0].end = state.selective_test_last_end;
2895 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2896 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2897 return 1;
2898 }
2899 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2900 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
2901 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2902 start, end, end - start + 1,
2903 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2904 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2905 state.selective_test_last_start = start;
2906 state.selective_test_last_end = end;
2907 }
2908
2909 // execute the test, and return status
2910 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2911 if (retval) {
2912 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2913 return retval;
2914 }
2915
2916 // Report recent test start to do_disable_standby_check()
2917 // and force log of next test status
2918 if (testtype == 'O')
2919 state.offline_started = true;
2920 else
2921 state.selftest_started = true;
2922
2923 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2924 return 0;
2925 }
2926
2927 // Check pending sector count attribute values (-C, -U directives).
2928 static void check_pending(const dev_config & cfg, dev_state & state,
2929 unsigned char id, bool increase_only,
2930 const ata_smart_values & smartval,
2931 int mailtype, const char * msg)
2932 {
2933 // Find attribute index
2934 int i = ata_find_attr_index(id, smartval);
2935 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2936 return;
2937
2938 // No report if no sectors pending.
2939 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2940 if (rawval == 0) {
2941 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2942 return;
2943 }
2944
2945 // If attribute is not reset, report only sector count increases.
2946 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2947 if (!(!increase_only || prev_rawval < rawval))
2948 return;
2949
2950 // Format message.
2951 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
2952 if (prev_rawval > 0 && rawval != prev_rawval)
2953 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
2954
2955 PrintOut(LOG_CRIT, "%s\n", s.c_str());
2956 MailWarning(cfg, state, mailtype, "%s", s.c_str());
2957 state.must_write = true;
2958 }
2959
2960 // Format Temperature value
2961 static const char * fmt_temp(unsigned char x, char (& buf)[20])
2962 {
2963 if (!x) // unset
2964 return "??";
2965 snprintf(buf, sizeof(buf), "%u", x);
2966 return buf;
2967 }
2968
2969 // Check Temperature limits
2970 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2971 {
2972 if (!(0 < currtemp && currtemp < 255)) {
2973 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2974 return;
2975 }
2976
2977 // Update Max Temperature
2978 const char * minchg = "", * maxchg = "";
2979 if (currtemp > state.tempmax) {
2980 if (state.tempmax)
2981 maxchg = "!";
2982 state.tempmax = currtemp;
2983 state.must_write = true;
2984 }
2985
2986 char buf[20];
2987 if (!state.temperature) {
2988 // First check
2989 if (!state.tempmin || currtemp < state.tempmin)
2990 // Delay Min Temperature update by ~ 30 minutes.
2991 state.tempmin_delay = time(0) + CHECKTIME - 60;
2992 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2993 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2994 if (triptemp)
2995 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2996 state.temperature = currtemp;
2997 }
2998 else {
2999 if (state.tempmin_delay) {
3000 // End Min Temperature update delay if ...
3001 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3002 || (state.tempmin_delay <= time(0))) { // or delay time is over.
3003 state.tempmin_delay = 0;
3004 if (!state.tempmin)
3005 state.tempmin = 255;
3006 }
3007 }
3008
3009 // Update Min Temperature
3010 if (!state.tempmin_delay && currtemp < state.tempmin) {
3011 state.tempmin = currtemp;
3012 state.must_write = true;
3013 if (currtemp != state.temperature)
3014 minchg = "!";
3015 }
3016
3017 // Track changes
3018 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3019 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3020 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3021 state.temperature = currtemp;
3022 }
3023 }
3024
3025 // Check limits
3026 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3027 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3028 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3029 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3030 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3031 }
3032 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3033 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3034 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3035 }
3036 else if (cfg.tempcrit) {
3037 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3038 if (currtemp < limit)
3039 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3040 }
3041 }
3042
3043 // Check normalized and raw attribute values.
3044 static void check_attribute(const dev_config & cfg, dev_state & state,
3045 const ata_smart_attribute & attr,
3046 const ata_smart_attribute & prev,
3047 int attridx,
3048 const ata_smart_threshold_entry * thresholds)
3049 {
3050 // Check attribute and threshold
3051 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3052 if (attrstate == ATTRSTATE_NON_EXISTING)
3053 return;
3054
3055 // If requested, check for usage attributes that have failed.
3056 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3057 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3058 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3059 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3060 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3061 state.must_write = true;
3062 }
3063
3064 // Return if we're not tracking this type of attribute
3065 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3066 if (!( ( prefail && cfg.prefail)
3067 || (!prefail && cfg.usage )))
3068 return;
3069
3070 // Return if '-I ID' was specified
3071 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3072 return;
3073
3074 // Issue warning if they don't have the same ID in all structures.
3075 if (attr.id != prev.id) {
3076 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3077 cfg.name.c_str(), attr.id, prev.id);
3078 return;
3079 }
3080
3081 // Compare normalized values if valid.
3082 bool valchanged = false;
3083 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3084 if (attr.current != prev.current)
3085 valchanged = true;
3086 }
3087
3088 // Compare raw values if requested.
3089 bool rawchanged = false;
3090 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3091 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3092 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3093 rawchanged = true;
3094 }
3095
3096 // Return if no change
3097 if (!(valchanged || rawchanged))
3098 return;
3099
3100 // Format value strings
3101 std::string currstr, prevstr;
3102 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3103 // Print raw values only
3104 currstr = strprintf("%s (Raw)",
3105 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3106 prevstr = strprintf("%s (Raw)",
3107 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3108 }
3109 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3110 // Print normalized and raw values
3111 currstr = strprintf("%d [Raw %s]", attr.current,
3112 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3113 prevstr = strprintf("%d [Raw %s]", prev.current,
3114 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3115 }
3116 else {
3117 // Print normalized values only
3118 currstr = strprintf("%d", attr.current);
3119 prevstr = strprintf("%d", prev.current);
3120 }
3121
3122 // Format message
3123 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3124 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3125 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3126 prevstr.c_str(), currstr.c_str());
3127
3128 // Report this change as critical ?
3129 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3130 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3131 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3132 MailWarning(cfg, state, 2, "%s", msg.c_str());
3133 }
3134 else {
3135 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3136 }
3137 state.must_write = true;
3138 }
3139
3140
3141 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3142 bool firstpass, bool allow_selftests)
3143 {
3144 const char * name = cfg.name.c_str();
3145
3146 // If user has asked, test the email warning system
3147 if (cfg.emailtest)
3148 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3149
3150 // User may have requested (with the -n Directive) to leave the disk
3151 // alone if it is in idle or standby mode. In this case check the
3152 // power mode first before opening the device for full access,
3153 // and exit without check if disk is reported in standby.
3154 if (cfg.powermode && !state.powermodefail) {
3155 // Note that 'is_powered_down()' handles opening the device itself, and
3156 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
3157 if (atadev->is_powered_down())
3158 {
3159 // skip at most powerskipmax checks
3160 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3161 // report first only except if state has changed, avoid waking up system disk
3162 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
3163 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
3164 state.lastpowermodeskipped = -1;
3165 }
3166 state.powerskipcnt++;
3167 return 0;
3168 }
3169 }
3170 }
3171
3172 // if we can't open device, fail gracefully rather than hard --
3173 // perhaps the next time around we'll be able to open it. ATAPI
3174 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3175 // given (see linux cdrom driver).
3176 if (!atadev->open()) {
3177 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3178 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3179 return 1;
3180 }
3181 if (debugmode)
3182 PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3183 reset_warning_mail(cfg, state, 9, "open device worked again");
3184
3185 // user may have requested (with the -n Directive) to leave the disk
3186 // alone if it is in idle or sleeping mode. In this case check the
3187 // power mode and exit without check if needed
3188 if (cfg.powermode && !state.powermodefail) {
3189 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3190 const char * mode = 0;
3191 if (0 <= powermode && powermode < 0xff) {
3192 // wait for possible spin up and check again
3193 int powermode2;
3194 sleep(5);
3195 powermode2 = ataCheckPowerMode(atadev);
3196 if (powermode2 > powermode)
3197 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3198 powermode = powermode2;
3199 }
3200
3201 switch (powermode){
3202 case -1:
3203 // SLEEP
3204 mode="SLEEP";
3205 if (cfg.powermode>=1)
3206 dontcheck=1;
3207 break;
3208 case 0x00:
3209 // STANDBY
3210 mode="STANDBY";
3211 if (cfg.powermode>=2)
3212 dontcheck=1;
3213 break;
3214 case 0x01:
3215 // STANDBY_Y
3216 mode="STANDBY_Y";
3217 if (cfg.powermode>=2)
3218 dontcheck=1;
3219 break;
3220 case 0x80:
3221 // IDLE
3222 mode="IDLE";
3223 if (cfg.powermode>=3)
3224 dontcheck=1;
3225 break;
3226 case 0x81:
3227 // IDLE_A
3228 mode="IDLE_A";
3229 if (cfg.powermode>=3)
3230 dontcheck=1;
3231 break;
3232 case 0x82:
3233 // IDLE_B
3234 mode="IDLE_B";
3235 if (cfg.powermode>=3)
3236 dontcheck=1;
3237 break;
3238 case 0x83:
3239 // IDLE_C
3240 mode="IDLE_C";
3241 if (cfg.powermode>=3)
3242 dontcheck=1;
3243 break;
3244 case 0xff:
3245 // ACTIVE/IDLE
3246 case 0x40:
3247 // ACTIVE
3248 case 0x41:
3249 // ACTIVE
3250 mode="ACTIVE or IDLE";
3251 break;
3252 default:
3253 // UNKNOWN
3254 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3255 name, powermode);
3256 state.powermodefail = true;
3257 break;
3258 }
3259
3260 // if we are going to skip a check, return now
3261 if (dontcheck){
3262 // skip at most powerskipmax checks
3263 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3264 CloseDevice(atadev, name);
3265 // report first only except if state has changed, avoid waking up system disk
3266 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3267 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3268 state.lastpowermodeskipped = powermode;
3269 }
3270 state.powerskipcnt++;
3271 return 0;
3272 }
3273 else {
3274 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3275 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3276 }
3277 state.powerskipcnt = 0;
3278 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3279 }
3280 else if (state.powerskipcnt) {
3281 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3282 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3283 state.powerskipcnt = 0;
3284 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3285 }
3286 }
3287
3288 // check smart status
3289 if (cfg.smartcheck) {
3290 int status=ataSmartStatus2(atadev);
3291 if (status==-1){
3292 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3293 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3294 state.must_write = true;
3295 }
3296 else if (status==1){
3297 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3298 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3299 state.must_write = true;
3300 }
3301 }
3302
3303 // Check everything that depends upon SMART Data (eg, Attribute values)
3304 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3305 || cfg.curr_pending_id || cfg.offl_pending_id
3306 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3307 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3308
3309 // Read current attribute values.
3310 ata_smart_values curval;
3311 if (ataReadSmartValues(atadev, &curval)){
3312 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3313 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3314 state.must_write = true;
3315 }
3316 else {
3317 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3318
3319 // look for current or offline pending sectors
3320 if (cfg.curr_pending_id)
3321 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3322 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3323 : "Total unreadable (pending) sectors" ));
3324
3325 if (cfg.offl_pending_id)
3326 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3327 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3328 : "Total offline uncorrectable sectors"));
3329
3330 // check temperature limits
3331 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3332 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3333
3334 // look for failed usage attributes, or track usage or prefail attributes
3335 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3336 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3337 check_attribute(cfg, state,
3338 curval.vendor_attributes[i],
3339 state.smartval.vendor_attributes[i],
3340 i, state.smartthres.thres_entries);
3341 }
3342 }
3343
3344 // Log changes of offline data collection status
3345 if (cfg.offlinests) {
3346 if ( curval.offline_data_collection_status
3347 != state.smartval.offline_data_collection_status
3348 || state.offline_started // test was started in previous call
3349 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3350 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3351 }
3352
3353 // Log changes of self-test execution status
3354 if (cfg.selfteststs) {
3355 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3356 || state.selftest_started // test was started in previous call
3357 || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3358 log_self_test_exec_status(name, curval.self_test_exec_status);
3359 }
3360
3361 // Save the new values for the next time around
3362 state.smartval = curval;
3363 }
3364 }
3365 state.offline_started = state.selftest_started = false;
3366
3367 // check if number of selftest errors has increased (note: may also DECREASE)
3368 if (cfg.selftest)
3369 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3370
3371 // check if number of ATA errors has increased
3372 if (cfg.errorlog || cfg.xerrorlog) {
3373
3374 int errcnt1 = -1, errcnt2 = -1;
3375 if (cfg.errorlog)
3376 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3377 if (cfg.xerrorlog)
3378 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3379
3380 // new number of errors is max of both logs
3381 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3382
3383 // did command fail?
3384 if (newc<0)
3385 // lack of PrintOut here is INTENTIONAL
3386 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3387
3388 // has error count increased?
3389 int oldc = state.ataerrorcount;
3390 if (newc>oldc){
3391 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3392 name, oldc, newc);
3393 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3394 name, oldc, newc);
3395 state.must_write = true;
3396 }
3397
3398 if (newc>=0)
3399 state.ataerrorcount=newc;
3400 }
3401
3402 // if the user has asked, and device is capable (or we're not yet
3403 // sure) check whether a self test should be done now.
3404 if (allow_selftests && !cfg.test_regex.empty()) {
3405 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3406 if (testtype)
3407 DoATASelfTest(cfg, state, atadev, testtype);
3408 }
3409
3410 // Don't leave device open -- the OS/user may want to access it
3411 // before the next smartd cycle!
3412 CloseDevice(atadev, name);
3413
3414 // Copy ATA attribute values to persistent state
3415 state.update_persistent_state();
3416
3417 return 0;
3418 }
3419
3420 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3421 {
3422 const char * name = cfg.name.c_str();
3423
3424 // If the user has asked for it, test the email warning system
3425 if (cfg.emailtest)
3426 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3427
3428 // if we can't open device, fail gracefully rather than hard --
3429 // perhaps the next time around we'll be able to open it
3430 if (!scsidev->open()) {
3431 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3432 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3433 return 1;
3434 } else if (debugmode)
3435 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3436 reset_warning_mail(cfg, state, 9, "open device worked again");
3437
3438 UINT8 asc = 0, ascq = 0;
3439 UINT8 currenttemp = 0, triptemp = 0;
3440 if (!state.SuppressReport) {
3441 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3442 &asc, &ascq, &currenttemp, &triptemp)) {
3443 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3444 name);
3445 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3446 state.SuppressReport = 1;
3447 }
3448 }
3449 if (asc > 0) {
3450 const char * cp = scsiGetIEString(asc, ascq);
3451 if (cp) {
3452 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3453 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3454 } else if (asc == 4 && ascq == 9) {
3455 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3456 } else if (debugmode)
3457 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3458 name, (int)asc, (int)ascq);
3459 } else if (debugmode)
3460 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3461
3462 // check temperature limits
3463 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || !cfg.attrlog_file.empty())
3464 CheckTemperature(cfg, state, currenttemp, triptemp);
3465
3466 // check if number of selftest errors has increased (note: may also DECREASE)
3467 if (cfg.selftest)
3468 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3469
3470 if (allow_selftests && !cfg.test_regex.empty()) {
3471 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3472 if (testtype)
3473 DoSCSISelfTest(cfg, state, scsidev, testtype);
3474 }
3475 if (!cfg.attrlog_file.empty()){
3476 // saving error counters to state
3477 UINT8 tBuf[252];
3478 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3479 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3480 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3481 state.scsi_error_counters[0].found=1;
3482 }
3483 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3484 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3485 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3486 state.scsi_error_counters[1].found=1;
3487 }
3488 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3489 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3490 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3491 state.scsi_error_counters[2].found=1;
3492 }
3493 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3494 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3495 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3496 state.scsi_nonmedium_error.found=1;
3497 }
3498 }
3499 CloseDevice(scsidev, name);
3500 return 0;
3501 }
3502
3503 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3504 {
3505 const char * name = cfg.name.c_str();
3506
3507 // TODO: Use common open function for ATA/SCSI/NVMe
3508 // If user has asked, test the email warning system
3509 if (cfg.emailtest)
3510 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3511
3512 if (!nvmedev->open()) {
3513 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, nvmedev->get_errmsg());
3514 MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3515 return 1;
3516 }
3517 if (debugmode)
3518 PrintOut(LOG_INFO,"Device: %s, opened NVMe device\n", name);
3519 reset_warning_mail(cfg, state, 9, "open device worked again");
3520
3521 // Read SMART/Health log
3522 nvme_smart_log smart_log;
3523 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3524 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3525 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3526 state.must_write = true;
3527 return 0;
3528 }
3529
3530 // Check Critical Warning bits
3531 if (cfg.smartcheck && smart_log.critical_warning) {
3532 unsigned char w = smart_log.critical_warning;
3533 std::string msg;
3534 static const char * const wnames[] =
3535 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3536
3537 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3538 if (!(w & (1 << b)))
3539 continue;
3540 if (cnt)
3541 msg += ", ";
3542 if (++cnt > 3) {
3543 msg += "..."; break;
3544 }
3545 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3546 msg += "*Unknown*"; break;
3547 }
3548 msg += wnames[b];
3549 }
3550
3551 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3552 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3553 state.must_write = true;
3554 }
3555
3556 // Check temperature limits
3557 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3558 int k = nvme_get_max_temp_kelvin(smart_log);
3559 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3560 int c = k - 273;
3561 if (c < 1)
3562 c = 1;
3563 else if (c > 0xff)
3564 c = 0xff;
3565 CheckTemperature(cfg, state, c, 0);
3566 }
3567
3568 // Check if number of errors has increased
3569 if (cfg.errorlog || cfg.xerrorlog) {
3570 uint64_t oldcnt = state.nvme_err_log_entries;
3571 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3572 if (newcnt > oldcnt) {
3573 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3574 name, oldcnt, newcnt);
3575 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3576 name, oldcnt, newcnt);
3577 state.must_write = true;
3578 }
3579 state.nvme_err_log_entries = newcnt;
3580 }
3581
3582 CloseDevice(nvmedev, name);
3583 return 0;
3584 }
3585
3586 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3587 static int standby_disable_state = 0;
3588
3589 static void init_disable_standby_check(dev_config_vector & configs)
3590 {
3591 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3592 bool sts1 = false, sts2 = false;
3593 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3594 const dev_config & cfg = configs.at(i);
3595 if (cfg.offlinests_ns)
3596 sts1 = true;
3597 if (cfg.selfteststs_ns)
3598 sts2 = true;
3599 }
3600
3601 // Check for support of disable auto standby
3602 // Reenable standby if smartd.conf was reread
3603 if (sts1 || sts2 || standby_disable_state == 3) {
3604 if (!smi()->disable_system_auto_standby(false)) {
3605 if (standby_disable_state == 3)
3606 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3607 if (sts1 || sts2) {
3608 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3609 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3610 sts1 = sts2 = false;
3611 }
3612 }
3613 }
3614
3615 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3616 }
3617
3618 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3619 {
3620 if (!standby_disable_state)
3621 return;
3622
3623 // Check for just started or still running self-tests
3624 bool running = false;
3625 for (unsigned i = 0; i < configs.size() && !running; i++) {
3626 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3627
3628 if ( ( cfg.offlinests_ns
3629 && (state.offline_started ||
3630 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3631 || ( cfg.selfteststs_ns
3632 && (state.selftest_started ||
3633 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3634 running = true;
3635 // state.offline/selftest_started will be reset after next logging of test status
3636 }
3637
3638 // Disable/enable auto standby and log state changes
3639 if (!running) {
3640 if (standby_disable_state != 1) {
3641 if (!smi()->disable_system_auto_standby(false))
3642 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3643 smi()->get_errmsg());
3644 else
3645 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3646 standby_disable_state = 1;
3647 }
3648 }
3649 else if (!smi()->disable_system_auto_standby(true)) {
3650 if (standby_disable_state != 2) {
3651 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3652 smi()->get_errmsg());
3653 standby_disable_state = 2;
3654 }
3655 }
3656 else {
3657 if (standby_disable_state != 3) {
3658 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3659 standby_disable_state = 3;
3660 }
3661 }
3662 }
3663
3664 // Checks the SMART status of all ATA and SCSI devices
3665 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3666 smart_device_list & devices, bool firstpass, bool allow_selftests)
3667 {
3668 for (unsigned i = 0; i < configs.size(); i++) {
3669 const dev_config & cfg = configs.at(i);
3670 dev_state & state = states.at(i);
3671 smart_device * dev = devices.at(i);
3672 if (dev->is_ata())
3673 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3674 else if (dev->is_scsi())
3675 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3676 else if (dev->is_nvme())
3677 NVMeCheckDevice(cfg, state, dev->to_nvme());
3678 }
3679
3680 do_disable_standby_check(configs, states);
3681 }
3682
3683 // Set if Initialize() was called
3684 static bool is_initialized = false;
3685
3686 // Does initialization right after fork to daemon mode
3687 static void Initialize(time_t *wakeuptime)
3688 {
3689 // Call Goodbye() on exit
3690 is_initialized = true;
3691
3692 // write PID file
3693 if (!debugmode)
3694 WritePidFile();
3695
3696 // install signal handlers. On Solaris, can't use signal() because
3697 // it resets the handler to SIG_DFL after each call. So use sigset()
3698 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3699
3700 // normal and abnormal exit
3701 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3702 SIGNALFN(SIGTERM, SIG_IGN);
3703 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3704 SIGNALFN(SIGQUIT, SIG_IGN);
3705
3706 // in debug mode, <CONTROL-C> ==> HUP
3707 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3708 SIGNALFN(SIGINT, SIG_IGN);
3709
3710 // Catch HUP and USR1
3711 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3712 SIGNALFN(SIGHUP, SIG_IGN);
3713 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3714 SIGNALFN(SIGUSR1, SIG_IGN);
3715 #ifdef _WIN32
3716 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3717 SIGNALFN(SIGUSR2, SIG_IGN);
3718 #endif
3719
3720 // initialize wakeup time to CURRENT time
3721 *wakeuptime=time(NULL);
3722
3723 return;
3724 }
3725
3726 #ifdef _WIN32
3727 // Toggle debug mode implemented for native windows only
3728 // (there is no easy way to reopen tty on *nix)
3729 static void ToggleDebugMode()
3730 {
3731 if (!debugmode) {
3732 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3733 if (!daemon_enable_console("smartd [Debug]")) {
3734 debugmode = 1;
3735 daemon_signal(SIGINT, HUPhandler);
3736 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3737 }
3738 else
3739 PrintOut(LOG_INFO,"enable console failed\n");
3740 }
3741 else if (debugmode == 1) {
3742 daemon_disable_console();
3743 debugmode = 0;
3744 daemon_signal(SIGINT, sighandler);
3745 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3746 }
3747 else
3748 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3749 }
3750 #endif
3751
3752 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3753 {
3754 // If past wake-up-time, compute next wake-up-time
3755 time_t timenow=time(NULL);
3756 while (wakeuptime<=timenow){
3757 int intervals=1+(timenow-wakeuptime)/checktime;
3758 wakeuptime+=intervals*checktime;
3759 }
3760
3761 // sleep until we catch SIGUSR1 or have completed sleeping
3762 int addtime = 0;
3763 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3764
3765 // protect user again system clock being adjusted backwards
3766 if (wakeuptime>timenow+checktime){
3767 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3768 wakeuptime=timenow+checktime;
3769 }
3770
3771 // Exit sleep when time interval has expired or a signal is received
3772 sleep(wakeuptime+addtime-timenow);
3773
3774 #ifdef _WIN32
3775 // toggle debug mode?
3776 if (caughtsigUSR2) {
3777 ToggleDebugMode();
3778 caughtsigUSR2 = 0;
3779 }
3780 #endif
3781
3782 timenow=time(NULL);
3783
3784 // Actual sleep time too long?
3785 if (!addtime && timenow > wakeuptime+60) {
3786 if (debugmode)
3787 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3788 (int)(timenow-wakeuptime));
3789 // Wait another 20 seconds to avoid I/O errors during disk spin-up
3790 addtime = timenow-wakeuptime+20;
3791 // Use next wake-up-time if close
3792 int nextcheck = checktime - addtime % checktime;
3793 if (nextcheck <= 20)
3794 addtime += nextcheck;
3795 }
3796 }
3797
3798 // if we caught a SIGUSR1 then print message and clear signal
3799 if (caughtsigUSR1){
3800 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3801 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3802 caughtsigUSR1=0;
3803 sigwakeup = true;
3804 }
3805
3806 // return adjusted wakeuptime
3807 return wakeuptime;
3808 }
3809
3810 // Print out a list of valid arguments for the Directive d
3811 static void printoutvaliddirectiveargs(int priority, char d)
3812 {
3813 switch (d) {
3814 case 'n':
3815 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3816 break;
3817 case 's':
3818 PrintOut(priority, "valid_regular_expression");
3819 break;
3820 case 'd':
3821 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3822 break;
3823 case 'T':
3824 PrintOut(priority, "normal, permissive");
3825 break;
3826 case 'o':
3827 case 'S':
3828 PrintOut(priority, "on, off");
3829 break;
3830 case 'l':
3831 PrintOut(priority, "error, selftest");
3832 break;
3833 case 'M':
3834 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3835 break;
3836 case 'v':
3837 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3838 break;
3839 case 'P':
3840 PrintOut(priority, "use, ignore, show, showall");
3841 break;
3842 case 'F':
3843 PrintOut(priority, "%s", get_valid_firmwarebug_args());
3844 break;
3845 case 'e':
3846 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3847 "security-freeze, standby,[N|off], wcache,[on|off]");
3848 break;
3849 }
3850 }
3851
3852 // exits with an error message, or returns integer value of token
3853 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3854 int min, int max, char * suffix = 0)
3855 {
3856 // make sure argument is there
3857 if (!arg) {
3858 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3859 cfgfile, lineno, name, token, min, max);
3860 return -1;
3861 }
3862
3863 // get argument value (base 10), check that it's integer, and in-range
3864 char *endptr;
3865 int val = strtol(arg,&endptr,10);
3866
3867 // optional suffix present?
3868 if (suffix) {
3869 if (!strcmp(endptr, suffix))
3870 endptr += strlen(suffix);
3871 else
3872 *suffix = 0;
3873 }
3874
3875 if (!(!*endptr && min <= val && val <= max)) {
3876 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3877 cfgfile, lineno, name, token, arg, min, max);
3878 return -1;
3879 }
3880
3881 // all is well; return value
3882 return val;
3883 }
3884
3885
3886 // Get 1-3 small integer(s) for '-W' directive
3887 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3888 unsigned char *val1, unsigned char *val2, unsigned char *val3)
3889 {
3890 unsigned v1 = 0, v2 = 0, v3 = 0;
3891 int n1 = -1, n2 = -1, n3 = -1, len;
3892 if (!arg) {
3893 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3894 cfgfile, lineno, name, token);
3895 return -1;
3896 }
3897
3898 len = strlen(arg);
3899 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3900 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3901 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3902 cfgfile, lineno, name, token, arg);
3903 return -1;
3904 }
3905 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3906 return 0;
3907 }
3908
3909
3910 #ifdef _WIN32
3911
3912 // Concatenate strtok() results if quoted with "..."
3913 static const char * strtok_dequote(const char * delimiters)
3914 {
3915 const char * t = strtok(0, delimiters);
3916 if (!t || t[0] != '"')
3917 return t;
3918
3919 static std::string token;
3920 token = t+1;
3921 for (;;) {
3922 t = strtok(0, delimiters);
3923 if (!t || !*t)
3924 return "\"";
3925 token += ' ';
3926 int len = strlen(t);
3927 if (t[len-1] == '"') {
3928 token += std::string(t, len-1);
3929 break;
3930 }
3931 token += t;
3932 }
3933 return token.c_str();
3934 }
3935
3936 #endif // _WIN32
3937
3938
3939 // This function returns 1 if it has correctly parsed one token (and
3940 // any arguments), else zero if no tokens remain. It returns -1 if an
3941 // error was encountered.
3942 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
3943 {
3944 char sym;
3945 const char * name = cfg.name.c_str();
3946 int lineno=cfg.lineno;
3947 const char *delim = " \n\t";
3948 int badarg = 0;
3949 int missingarg = 0;
3950 const char *arg = 0;
3951
3952 // is the rest of the line a comment
3953 if (*token=='#')
3954 return 1;
3955
3956 // is the token not recognized?
3957 if (*token!='-' || strlen(token)!=2) {
3958 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3959 configfile, lineno, name, token);
3960 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3961 return -1;
3962 }
3963
3964 // token we will be parsing:
3965 sym=token[1];
3966
3967 // parse the token and swallow its argument
3968 int val;
3969 char plus[] = "+", excl[] = "!";
3970
3971 switch (sym) {
3972 case 'C':
3973 // monitor current pending sector count (default 197)
3974 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3975 return -1;
3976 cfg.curr_pending_id = (unsigned char)val;
3977 cfg.curr_pending_incr = (*plus == '+');
3978 cfg.curr_pending_set = true;
3979 break;
3980 case 'U':
3981 // monitor offline uncorrectable sectors (default 198)
3982 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3983 return -1;
3984 cfg.offl_pending_id = (unsigned char)val;
3985 cfg.offl_pending_incr = (*plus == '+');
3986 cfg.offl_pending_set = true;
3987 break;
3988 case 'T':
3989 // Set tolerance level for SMART command failures
3990 if ((arg = strtok(NULL, delim)) == NULL) {
3991 missingarg = 1;
3992 } else if (!strcmp(arg, "normal")) {
3993 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3994 // not on failure of an optional S.M.A.R.T. command.
3995 // This is the default so we don't need to actually do anything here.
3996 cfg.permissive = false;
3997 } else if (!strcmp(arg, "permissive")) {
3998 // Permissive mode; ignore errors from Mandatory SMART commands
3999 cfg.permissive = true;
4000 } else {
4001 badarg = 1;
4002 }
4003 break;
4004 case 'd':
4005 // specify the device type
4006 if ((arg = strtok(NULL, delim)) == NULL) {
4007 missingarg = 1;
4008 } else if (!strcmp(arg, "ignore")) {
4009 cfg.ignore = true;
4010 } else if (!strcmp(arg, "removable")) {
4011 cfg.removable = true;
4012 } else if (!strcmp(arg, "auto")) {
4013 cfg.dev_type = "";
4014 scan_types.clear();
4015 } else {
4016 cfg.dev_type = arg;
4017 scan_types.push_back(arg);
4018 }
4019 break;
4020 case 'F':
4021 // fix firmware bug
4022 if (!(arg = strtok(0, delim)))
4023 missingarg = 1;
4024 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4025 badarg = 1;
4026 break;
4027 case 'H':
4028 // check SMART status
4029 cfg.smartcheck = true;
4030 break;
4031 case 'f':
4032 // check for failure of usage attributes
4033 cfg.usagefailed = true;
4034 break;
4035 case 't':
4036 // track changes in all vendor attributes
4037 cfg.prefail = true;
4038 cfg.usage = true;
4039 break;
4040 case 'p':
4041 // track changes in prefail vendor attributes
4042 cfg.prefail = true;
4043 break;
4044 case 'u':
4045 // track changes in usage vendor attributes
4046 cfg.usage = true;
4047 break;
4048 case 'l':
4049 // track changes in SMART logs
4050 if ((arg = strtok(NULL, delim)) == NULL) {
4051 missingarg = 1;
4052 } else if (!strcmp(arg, "selftest")) {
4053 // track changes in self-test log
4054 cfg.selftest = true;
4055 } else if (!strcmp(arg, "error")) {
4056 // track changes in ATA error log
4057 cfg.errorlog = true;
4058 } else if (!strcmp(arg, "xerror")) {
4059 // track changes in Extended Comprehensive SMART error log
4060 cfg.xerrorlog = true;
4061 } else if (!strcmp(arg, "offlinests")) {
4062 // track changes in offline data collection status
4063 cfg.offlinests = true;
4064 } else if (!strcmp(arg, "offlinests,ns")) {
4065 // track changes in offline data collection status, disable auto standby
4066 cfg.offlinests = cfg.offlinests_ns = true;
4067 } else if (!strcmp(arg, "selfteststs")) {
4068 // track changes in self-test execution status
4069 cfg.selfteststs = true;
4070 } else if (!strcmp(arg, "selfteststs,ns")) {
4071 // track changes in self-test execution status, disable auto standby
4072 cfg.selfteststs = cfg.selfteststs_ns = true;
4073 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4074 // set SCT Error Recovery Control
4075 unsigned rt = ~0, wt = ~0; int nc = -1;
4076 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4077 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4078 cfg.sct_erc_set = true;
4079 cfg.sct_erc_readtime = rt;
4080 cfg.sct_erc_writetime = wt;
4081 }
4082 else
4083 badarg = 1;
4084 } else {
4085 badarg = 1;
4086 }
4087 break;
4088 case 'a':
4089 // monitor everything
4090 cfg.smartcheck = true;
4091 cfg.prefail = true;
4092 cfg.usagefailed = true;
4093 cfg.usage = true;
4094 cfg.selftest = true;
4095 cfg.errorlog = true;
4096 cfg.selfteststs = true;
4097 break;
4098 case 'o':
4099 // automatic offline testing enable/disable
4100 if ((arg = strtok(NULL, delim)) == NULL) {
4101 missingarg = 1;
4102 } else if (!strcmp(arg, "on")) {
4103 cfg.autoofflinetest = 2;
4104 } else if (!strcmp(arg, "off")) {
4105 cfg.autoofflinetest = 1;
4106 } else {
4107 badarg = 1;
4108 }
4109 break;
4110 case 'n':
4111 // skip disk check if in idle or standby mode
4112 if (!(arg = strtok(NULL, delim)))
4113 missingarg = 1;
4114 else {
4115 char *endptr = NULL;
4116 char *next = strchr(const_cast<char*>(arg), ',');
4117
4118 cfg.powerquiet = false;
4119 cfg.powerskipmax = 0;
4120
4121 if (next!=NULL) *next='\0';
4122 if (!strcmp(arg, "never"))
4123 cfg.powermode = 0;
4124 else if (!strcmp(arg, "sleep"))
4125 cfg.powermode = 1;
4126 else if (!strcmp(arg, "standby"))
4127 cfg.powermode = 2;
4128 else if (!strcmp(arg, "idle"))
4129 cfg.powermode = 3;
4130 else
4131 badarg = 1;
4132
4133 // if optional arguments are present
4134 if (!badarg && next!=NULL) {
4135 next++;
4136 cfg.powerskipmax = strtol(next, &endptr, 10);
4137 if (endptr == next)
4138 cfg.powerskipmax = 0;
4139 else {
4140 next = endptr + (*endptr != '\0');
4141 if (cfg.powerskipmax <= 0)
4142 badarg = 1;
4143 }
4144 if (*next != '\0') {
4145 if (!strcmp("q", next))
4146 cfg.powerquiet = true;
4147 else {
4148 badarg = 1;
4149 }
4150 }
4151 }
4152 }
4153 break;
4154 case 'S':
4155 // automatic attribute autosave enable/disable
4156 if ((arg = strtok(NULL, delim)) == NULL) {
4157 missingarg = 1;
4158 } else if (!strcmp(arg, "on")) {
4159 cfg.autosave = 2;
4160 } else if (!strcmp(arg, "off")) {
4161 cfg.autosave = 1;
4162 } else {
4163 badarg = 1;
4164 }
4165 break;
4166 case 's':
4167 // warn user, and delete any previously given -s REGEXP Directives
4168 if (!cfg.test_regex.empty()){
4169 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4170 configfile, lineno, name, cfg.test_regex.get_pattern());
4171 cfg.test_regex = regular_expression();
4172 }
4173 // check for missing argument
4174 if (!(arg = strtok(NULL, delim))) {
4175 missingarg = 1;
4176 }
4177 // Compile regex
4178 else {
4179 if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
4180 // not a valid regular expression!
4181 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4182 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4183 return -1;
4184 }
4185 // Do a bit of sanity checking and warn user if we think that
4186 // their regexp is "strange". User probably confused about shell
4187 // glob(3) syntax versus regular expression syntax regexp(7).
4188 if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4189 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4190 configfile, lineno, name, val+1, arg[val], arg);
4191 }
4192 break;
4193 case 'm':
4194 // send email to address that follows
4195 if (!(arg = strtok(NULL,delim)))
4196 missingarg = 1;
4197 else {
4198 if (!cfg.emailaddress.empty())
4199 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4200 configfile, lineno, name, cfg.emailaddress.c_str());
4201 #ifdef _WIN32 // TODO: Remove after smartmontools 6.5
4202 if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
4203 || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
4204 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -m %s is no longer supported, use -m console[,...] instead\n",
4205 configfile, lineno, name, arg);
4206 return -1;
4207 }
4208 #endif
4209 cfg.emailaddress = arg;
4210 }
4211 break;
4212 case 'M':
4213 // email warning options
4214 if (!(arg = strtok(NULL, delim)))
4215 missingarg = 1;
4216 else if (!strcmp(arg, "once"))
4217 cfg.emailfreq = 1;
4218 else if (!strcmp(arg, "daily"))
4219 cfg.emailfreq = 2;
4220 else if (!strcmp(arg, "diminishing"))
4221 cfg.emailfreq = 3;
4222 else if (!strcmp(arg, "test"))
4223 cfg.emailtest = 1;
4224 else if (!strcmp(arg, "exec")) {
4225 // Get the next argument (the command line)
4226 #ifdef _WIN32
4227 // Allow "/path name/with spaces/..." on Windows
4228 arg = strtok_dequote(delim);
4229 if (arg && arg[0] == '"') {
4230 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4231 configfile, lineno, name, token);
4232 return -1;
4233 }
4234 #else
4235 arg = strtok(0, delim);
4236 #endif
4237 if (!arg) {
4238 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4239 configfile, lineno, name, token);
4240 return -1;
4241 }
4242 // Free the last cmd line given if any, and copy new one
4243 if (!cfg.emailcmdline.empty())
4244 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4245 configfile, lineno, name, cfg.emailcmdline.c_str());
4246 cfg.emailcmdline = arg;
4247 }
4248 else
4249 badarg = 1;
4250 break;
4251 case 'i':
4252 // ignore failure of usage attribute
4253 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4254 return -1;
4255 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4256 break;
4257 case 'I':
4258 // ignore attribute for tracking purposes
4259 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4260 return -1;
4261 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4262 break;
4263 case 'r':
4264 // print raw value when tracking
4265 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4266 return -1;
4267 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4268 if (*excl == '!') // attribute change is critical
4269 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4270 break;
4271 case 'R':
4272 // track changes in raw value (forces printing of raw value)
4273 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4274 return -1;
4275 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4276 if (*excl == '!') // raw value change is critical
4277 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4278 break;
4279 case 'W':
4280 // track Temperature
4281 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4282 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4283 return -1;
4284 break;
4285 case 'v':
4286 // non-default vendor-specific attribute meaning
4287 if (!(arg=strtok(NULL,delim))) {
4288 missingarg = 1;
4289 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4290 badarg = 1;
4291 }
4292 break;
4293 case 'P':
4294 // Define use of drive-specific presets.
4295 if (!(arg = strtok(NULL, delim))) {
4296 missingarg = 1;
4297 } else if (!strcmp(arg, "use")) {
4298 cfg.ignorepresets = false;
4299 } else if (!strcmp(arg, "ignore")) {
4300 cfg.ignorepresets = true;
4301 } else if (!strcmp(arg, "show")) {
4302 cfg.showpresets = true;
4303 } else if (!strcmp(arg, "showall")) {
4304 showallpresets();
4305 } else {
4306 badarg = 1;
4307 }
4308 break;
4309
4310 case 'e':
4311 // Various ATA settings
4312 if (!(arg = strtok(NULL, delim))) {
4313 missingarg = true;
4314 }
4315 else {
4316 char arg2[16+1]; unsigned val;
4317 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4318 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4319 && (n1 == len || n2 > 0)) {
4320 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4321 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4322 if (n3 != len)
4323 val = ~0U;
4324
4325 if (!strcmp(arg2, "aam")) {
4326 if (off)
4327 cfg.set_aam = -1;
4328 else if (val <= 254)
4329 cfg.set_aam = val + 1;
4330 else
4331 badarg = true;
4332 }
4333 else if (!strcmp(arg2, "apm")) {
4334 if (off)
4335 cfg.set_apm = -1;
4336 else if (1 <= val && val <= 254)
4337 cfg.set_apm = val + 1;
4338 else
4339 badarg = true;
4340 }
4341 else if (!strcmp(arg2, "lookahead")) {
4342 if (off)
4343 cfg.set_lookahead = -1;
4344 else if (on)
4345 cfg.set_lookahead = 1;
4346 else
4347 badarg = true;
4348 }
4349 else if (!strcmp(arg, "security-freeze")) {
4350 cfg.set_security_freeze = true;
4351 }
4352 else if (!strcmp(arg2, "standby")) {
4353 if (off)
4354 cfg.set_standby = 0 + 1;
4355 else if (val <= 255)
4356 cfg.set_standby = val + 1;
4357 else
4358 badarg = true;
4359 }
4360 else if (!strcmp(arg2, "wcache")) {
4361 if (off)
4362 cfg.set_wcache = -1;
4363 else if (on)
4364 cfg.set_wcache = 1;
4365 else
4366 badarg = true;
4367 }
4368 else
4369 badarg = true;
4370 }
4371 else
4372 badarg = true;
4373 }
4374 break;
4375
4376 default:
4377 // Directive not recognized
4378 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4379 configfile, lineno, name, token);
4380 Directives();
4381 return -1;
4382 }
4383 if (missingarg) {
4384 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4385 configfile, lineno, name, token);
4386 }
4387 if (badarg) {
4388 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4389 configfile, lineno, name, token, arg);
4390 }
4391 if (missingarg || badarg) {
4392 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4393 printoutvaliddirectiveargs(LOG_CRIT, sym);
4394 PrintOut(LOG_CRIT, "\n");
4395 return -1;
4396 }
4397
4398 return 1;
4399 }
4400
4401 // Scan directive for configuration file
4402 #define SCANDIRECTIVE "DEVICESCAN"
4403
4404 // This is the routine that adds things to the conf_entries list.
4405 //
4406 // Return values are:
4407 // 1: parsed a normal line
4408 // 0: found DEFAULT setting or comment or blank line
4409 // -1: found SCANDIRECTIVE line
4410 // -2: found an error
4411 //
4412 // Note: this routine modifies *line from the caller!
4413 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4414 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4415 {
4416 const char *delim = " \n\t";
4417
4418 // get first token: device name. If a comment, skip line
4419 const char * name = strtok(line, delim);
4420 if (!name || *name == '#')
4421 return 0;
4422
4423 // Check device name for DEFAULT or DEVICESCAN
4424 int retval;
4425 if (!strcmp("DEFAULT", name)) {
4426 retval = 0;
4427 // Restart with empty defaults
4428 default_conf = dev_config();
4429 }
4430 else {
4431 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4432 // Init new entry with current defaults
4433 conf_entries.push_back(default_conf);
4434 }
4435 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4436
4437 cfg.name = name; // Later replaced by dev->get_info().info_name
4438 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4439 cfg.lineno = lineno;
4440
4441 // parse tokens one at a time from the file.
4442 while (char * token = strtok(0, delim)) {
4443 int rc = ParseToken(token, cfg, scan_types);
4444 if (rc < 0)
4445 // error found on the line
4446 return -2;
4447
4448 if (rc == 0)
4449 // No tokens left
4450 break;
4451
4452 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4453 }
4454
4455 // Check for multiple -d TYPE directives
4456 if (retval != -1 && scan_types.size() > 1) {
4457 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4458 cfg.name.c_str(), cfg.lineno, configfile);
4459 return -2;
4460 }
4461
4462 // Don't perform checks below for DEFAULT entries
4463 if (retval == 0)
4464 return retval;
4465
4466 // If NO monitoring directives are set, then set all of them.
4467 if (!( cfg.smartcheck || cfg.selftest
4468 || cfg.errorlog || cfg.xerrorlog
4469 || cfg.offlinests || cfg.selfteststs
4470 || cfg.usagefailed || cfg.prefail || cfg.usage
4471 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4472
4473 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4474 cfg.name.c_str(), cfg.lineno, configfile);
4475
4476 cfg.smartcheck = true;
4477 cfg.usagefailed = true;
4478 cfg.prefail = true;
4479 cfg.usage = true;
4480 cfg.selftest = true;
4481 cfg.errorlog = true;
4482 cfg.selfteststs = true;
4483 }
4484
4485 // additional sanity check. Has user set -M options without -m?
4486 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4487 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4488 cfg.name.c_str(), cfg.lineno, configfile);
4489 return -2;
4490 }
4491
4492 // has the user has set <nomailer>?
4493 if (cfg.emailaddress == "<nomailer>") {
4494 // check that -M exec is also set
4495 if (cfg.emailcmdline.empty()){
4496 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4497 cfg.name.c_str(), cfg.lineno, configfile);
4498 return -2;
4499 }
4500 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4501 cfg.emailaddress.clear();
4502 }
4503
4504 return retval;
4505 }
4506
4507 // Parses a configuration file. Return values are:
4508 // N=>0: found N entries
4509 // -1: syntax error in config file
4510 // -2: config file does not exist
4511 // -3: config file exists but cannot be read
4512 //
4513 // In the case where the return value is 0, there are three
4514 // possiblities:
4515 // Empty configuration file ==> conf_entries.empty()
4516 // No configuration file ==> conf_entries[0].lineno == 0
4517 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4518 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4519 {
4520 // maximum line length in configuration file
4521 const int MAXLINELEN = 256;
4522 // maximum length of a continued line in configuration file
4523 const int MAXCONTLINE = 1023;
4524
4525 stdio_file f;
4526 // Open config file, if it exists and is not <stdin>
4527 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4528 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4529 // file exists but we can't read it or it should exist due to '-c' option
4530 int ret = (errno!=ENOENT ? -3 : -2);
4531 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4532 strerror(errno),configfile);
4533 return ret;
4534 }
4535 }
4536 else // read from stdin ('-c -' option)
4537 f.open(stdin);
4538
4539 // Start with empty defaults
4540 dev_config default_conf;
4541
4542 // No configuration file found -- use fake one
4543 int entry = 0;
4544 if (!f) {
4545 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4546
4547 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4548 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4549 return 0;
4550 }
4551
4552 #ifdef __CYGWIN__
4553 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4554 #endif
4555
4556 // configuration file exists
4557 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4558
4559 // parse config file line by line
4560 int lineno = 1, cont = 0, contlineno = 0;
4561 char line[MAXLINELEN+2];
4562 char fullline[MAXCONTLINE+1];
4563
4564 for (;;) {
4565 int len=0,scandevice;
4566 char *lastslash;
4567 char *comment;
4568 char *code;
4569
4570 // make debugging simpler
4571 memset(line,0,sizeof(line));
4572
4573 // get a line
4574 code=fgets(line, MAXLINELEN+2, f);
4575
4576 // are we at the end of the file?
4577 if (!code){
4578 if (cont) {
4579 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4580 // See if we found a SCANDIRECTIVE directive
4581 if (scandevice==-1)
4582 return 0;
4583 // did we find a syntax error
4584 if (scandevice==-2)
4585 return -1;
4586 // the final line is part of a continuation line
4587 entry+=scandevice;
4588 }
4589 break;
4590 }
4591
4592 // input file line number
4593 contlineno++;
4594
4595 // See if line is too long
4596 len=strlen(line);
4597 if (len>MAXLINELEN){
4598 const char *warn;
4599 if (line[len-1]=='\n')
4600 warn="(including newline!) ";
4601 else
4602 warn="";
4603 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4604 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4605 return -1;
4606 }
4607
4608 // Ignore anything after comment symbol
4609 if ((comment=strchr(line,'#'))){
4610 *comment='\0';
4611 len=strlen(line);
4612 }
4613
4614 // is the total line (made of all continuation lines) too long?
4615 if (cont+len>MAXCONTLINE){
4616 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4617 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4618 return -1;
4619 }
4620
4621 // copy string so far into fullline, and increment length
4622 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4623 cont+=len;
4624
4625 // is this a continuation line. If so, replace \ by space and look at next line
4626 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4627 *(fullline+(cont-len)+(lastslash-line))=' ';
4628 continue;
4629 }
4630
4631 // Not a continuation line. Parse it
4632 scan_types.clear();
4633 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4634
4635 // did we find a scandevice directive?
4636 if (scandevice==-1)
4637 return 0;
4638 // did we find a syntax error
4639 if (scandevice==-2)
4640 return -1;
4641
4642 entry+=scandevice;
4643 lineno++;
4644 cont=0;
4645 }
4646
4647 // note -- may be zero if syntax of file OK, but no valid entries!
4648 return entry;
4649 }
4650
4651 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4652 <LIST> is the list of valid arguments for option opt. */
4653 static void PrintValidArgs(char opt)
4654 {
4655 const char *s;
4656
4657 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4658 if (!(s = GetValidArgList(opt)))
4659 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4660 else
4661 PrintOut(LOG_CRIT, "%s", (char *)s);
4662 PrintOut(LOG_CRIT, " <=======\n");
4663 }
4664
4665 #ifndef _WIN32
4666 // Report error and exit if specified path is not absolute.
4667 static void check_abs_path(char option, const std::string & path)
4668 {
4669 if (path.empty() || path[0] == '/')
4670 return;
4671
4672 debugmode = 1;
4673 PrintHead();
4674 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4675 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4676 EXIT(EXIT_BADCMD);
4677 }
4678 #endif // !_WIN32
4679
4680 // Parses input line, prints usage message and
4681 // version/license/copyright messages
4682 static void ParseOpts(int argc, char **argv)
4683 {
4684 // Init default path names
4685 #ifndef _WIN32
4686 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4687 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4688 #else
4689 std::string exedir = get_exe_dir();
4690 static std::string configfile_str = exedir + "/smartd.conf";
4691 configfile = configfile_str.c_str();
4692 warning_script = exedir + "/smartd_warning.cmd";
4693 #endif
4694
4695 // Please update GetValidArgList() if you edit shortopts
4696 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4697 #ifdef HAVE_LIBCAP_NG
4698 "C"
4699 #endif
4700 ;
4701 // Please update GetValidArgList() if you edit longopts
4702 struct option longopts[] = {
4703 { "configfile", required_argument, 0, 'c' },
4704 { "logfacility", required_argument, 0, 'l' },
4705 { "quit", required_argument, 0, 'q' },
4706 { "debug", no_argument, 0, 'd' },
4707 { "showdirectives", no_argument, 0, 'D' },
4708 { "interval", required_argument, 0, 'i' },
4709 #ifndef _WIN32
4710 { "no-fork", no_argument, 0, 'n' },
4711 #else
4712 { "service", no_argument, 0, 'n' },
4713 #endif
4714 { "pidfile", required_argument, 0, 'p' },
4715 { "report", required_argument, 0, 'r' },
4716 { "savestates", required_argument, 0, 's' },
4717 { "attributelog", required_argument, 0, 'A' },
4718 { "drivedb", required_argument, 0, 'B' },
4719 { "warnexec", required_argument, 0, 'w' },
4720 { "version", no_argument, 0, 'V' },
4721 { "license", no_argument, 0, 'V' },
4722 { "copyright", no_argument, 0, 'V' },
4723 { "help", no_argument, 0, 'h' },
4724 { "usage", no_argument, 0, 'h' },
4725 #ifdef HAVE_LIBCAP_NG
4726 { "capabilities", no_argument, 0, 'C' },
4727 #endif
4728 { 0, 0, 0, 0 }
4729 };
4730
4731 opterr=optopt=0;
4732 bool badarg = false;
4733 bool use_default_db = true; // set false on '-B FILE'
4734
4735 // Parse input options.
4736 int optchar;
4737 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4738 char *arg;
4739 char *tailptr;
4740 long lchecktime;
4741
4742 switch(optchar) {
4743 case 'q':
4744 // when to quit
4745 if (!(strcmp(optarg,"nodev"))) {
4746 quit=0;
4747 } else if (!(strcmp(optarg,"nodevstartup"))) {
4748 quit=1;
4749 } else if (!(strcmp(optarg,"never"))) {
4750 quit=2;
4751 } else if (!(strcmp(optarg,"onecheck"))) {
4752 quit=3;
4753 debugmode=1;
4754 } else if (!(strcmp(optarg,"showtests"))) {
4755 quit=4;
4756 debugmode=1;
4757 } else if (!(strcmp(optarg,"errors"))) {
4758 quit=5;
4759 } else {
4760 badarg = true;
4761 }
4762 break;
4763 case 'l':
4764 // set the log facility level
4765 if (!strcmp(optarg, "daemon"))
4766 facility=LOG_DAEMON;
4767 else if (!strcmp(optarg, "local0"))
4768 facility=LOG_LOCAL0;
4769 else if (!strcmp(optarg, "local1"))
4770 facility=LOG_LOCAL1;
4771 else if (!strcmp(optarg, "local2"))
4772 facility=LOG_LOCAL2;
4773 else if (!strcmp(optarg, "local3"))
4774 facility=LOG_LOCAL3;
4775 else if (!strcmp(optarg, "local4"))
4776 facility=LOG_LOCAL4;
4777 else if (!strcmp(optarg, "local5"))
4778 facility=LOG_LOCAL5;
4779 else if (!strcmp(optarg, "local6"))
4780 facility=LOG_LOCAL6;
4781 else if (!strcmp(optarg, "local7"))
4782 facility=LOG_LOCAL7;
4783 else
4784 badarg = true;
4785 break;
4786 case 'd':
4787 // enable debug mode
4788 debugmode = 1;
4789 break;
4790 case 'n':
4791 // don't fork()
4792 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4793 do_fork = false;
4794 #endif
4795 break;
4796 case 'D':
4797 // print summary of all valid directives
4798 debugmode = 1;
4799 Directives();
4800 EXIT(0);
4801 break;
4802 case 'i':
4803 // Period (time interval) for checking
4804 // strtol will set errno in the event of overflow, so we'll check it.
4805 errno = 0;
4806 lchecktime = strtol(optarg, &tailptr, 10);
4807 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4808 debugmode=1;
4809 PrintHead();
4810 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4811 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4812 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4813 EXIT(EXIT_BADCMD);
4814 }
4815 checktime = (int)lchecktime;
4816 break;
4817 case 'r':
4818 // report IOCTL transactions
4819 {
4820 int n1 = -1, n2 = -1, len = strlen(optarg);
4821 char s[9+1]; unsigned i = 1;
4822 sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
4823 if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
4824 badarg = true;
4825 } else if (!strcmp(s,"ioctl")) {
4826 ata_debugmode = scsi_debugmode = nvme_debugmode = i;
4827 } else if (!strcmp(s,"ataioctl")) {
4828 ata_debugmode = i;
4829 } else if (!strcmp(s,"scsiioctl")) {
4830 scsi_debugmode = i;
4831 } else if (!strcmp(s,"nvmeioctl")) {
4832 nvme_debugmode = i;
4833 } else {
4834 badarg = true;
4835 }
4836 }
4837 break;
4838 case 'c':
4839 // alternate configuration file
4840 if (strcmp(optarg,"-"))
4841 configfile = (configfile_alt = optarg).c_str();
4842 else // read from stdin
4843 configfile=configfile_stdin;
4844 break;
4845 case 'p':
4846 // output file with PID number
4847 pid_file = optarg;
4848 break;
4849 case 's':
4850 // path prefix of persistent state file
4851 state_path_prefix = optarg;
4852 break;
4853 case 'A':
4854 // path prefix of attribute log file
4855 attrlog_path_prefix = optarg;
4856 break;
4857 case 'B':
4858 {
4859 const char * path = optarg;
4860 if (*path == '+' && path[1])
4861 path++;
4862 else
4863 use_default_db = false;
4864 unsigned char savedebug = debugmode; debugmode = 1;
4865 if (!read_drive_database(path))
4866 EXIT(EXIT_BADCMD);
4867 debugmode = savedebug;
4868 }
4869 break;
4870 case 'w':
4871 warning_script = optarg;
4872 break;
4873 case 'V':
4874 // print version and CVS info
4875 debugmode = 1;
4876 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4877 EXIT(0);
4878 break;
4879 #ifdef HAVE_LIBCAP_NG
4880 case 'C':
4881 // enable capabilities
4882 enable_capabilities = true;
4883 break;
4884 #endif
4885 case 'h':
4886 // help: print summary of command-line options
4887 debugmode=1;
4888 PrintHead();
4889 Usage();
4890 EXIT(0);
4891 break;
4892 case '?':
4893 default:
4894 // unrecognized option
4895 debugmode=1;
4896 PrintHead();
4897 // Point arg to the argument in which this option was found.
4898 arg = argv[optind-1];
4899 // Check whether the option is a long option that doesn't map to -h.
4900 if (arg[1] == '-' && optchar != 'h') {
4901 // Iff optopt holds a valid option then argument must be missing.
4902 if (optopt && (strchr(shortopts, optopt) != NULL)) {
4903 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4904 PrintValidArgs(optopt);
4905 } else {
4906 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4907 }
4908 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4909 EXIT(EXIT_BADCMD);
4910 }
4911 if (optopt) {
4912 // Iff optopt holds a valid option then argument must be missing.
4913 if (strchr(shortopts, optopt) != NULL){
4914 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4915 PrintValidArgs(optopt);
4916 } else {
4917 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4918 }
4919 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4920 EXIT(EXIT_BADCMD);
4921 }
4922 Usage();
4923 EXIT(0);
4924 }
4925
4926 // Check to see if option had an unrecognized or incorrect argument.
4927 if (badarg) {
4928 debugmode=1;
4929 PrintHead();
4930 // It would be nice to print the actual option name given by the user
4931 // here, but we just print the short form. Please fix this if you know
4932 // a clean way to do it.
4933 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4934 PrintValidArgs(optchar);
4935 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4936 EXIT(EXIT_BADCMD);
4937 }
4938 }
4939
4940 // non-option arguments are not allowed
4941 if (argc > optind) {
4942 debugmode=1;
4943 PrintHead();
4944 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4945 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4946 EXIT(EXIT_BADCMD);
4947 }
4948
4949 // no pidfile in debug mode
4950 if (debugmode && !pid_file.empty()) {
4951 debugmode=1;
4952 PrintHead();
4953 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4954 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4955 EXIT(EXIT_BADCMD);
4956 }
4957
4958 #ifndef _WIN32
4959 if (!debugmode) {
4960 // absolute path names are required due to chdir('/') after fork().
4961 check_abs_path('p', pid_file);
4962 check_abs_path('s', state_path_prefix);
4963 check_abs_path('A', attrlog_path_prefix);
4964 }
4965 #endif
4966
4967 // Read or init drive database
4968 {
4969 unsigned char savedebug = debugmode; debugmode = 1;
4970 if (!init_drive_database(use_default_db))
4971 EXIT(EXIT_BADCMD);
4972 debugmode = savedebug;
4973 }
4974
4975 // print header
4976 PrintHead();
4977 }
4978
4979 // Function we call if no configuration file was found or if the
4980 // SCANDIRECTIVE Directive was found. It makes entries for device
4981 // names returned by scan_smart_devices() in os_OSNAME.cpp
4982 static int MakeConfigEntries(const dev_config & base_cfg,
4983 dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4984 const smart_devtype_list & types)
4985 {
4986 // make list of devices
4987 smart_device_list devlist;
4988 if (!smi()->scan_smart_devices(devlist, types)) {
4989 PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
4990 return 0;
4991 }
4992
4993 // if no devices, return
4994 if (devlist.size() <= 0)
4995 return 0;
4996
4997 // add empty device slots for existing config entries
4998 while (scanned_devs.size() < conf_entries.size())
4999 scanned_devs.push_back((smart_device *)0);
5000
5001 // loop over entries to create
5002 for (unsigned i = 0; i < devlist.size(); i++) {
5003 // Move device pointer
5004 smart_device * dev = devlist.release(i);
5005 scanned_devs.push_back(dev);
5006
5007 // Copy configuration, update device and type name
5008 conf_entries.push_back(base_cfg);
5009 dev_config & cfg = conf_entries.back();
5010 cfg.name = dev->get_info().info_name;
5011 cfg.dev_name = dev->get_info().dev_name;
5012 cfg.dev_type = dev->get_info().dev_type;
5013 }
5014
5015 return devlist.size();
5016 }
5017
5018 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
5019 {
5020 if (!debugmode && scandirective)
5021 return;
5022 if (line)
5023 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
5024 "Unable to register %s device %s at line %d of file %s\n",
5025 type, name, line, configfile);
5026 else
5027 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
5028 type, name);
5029 return;
5030 }
5031
5032 // Returns negative value (see ParseConfigFile()) if config file
5033 // had errors, else number of entries which may be zero or positive.
5034 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5035 {
5036 // parse configuration file configfile (normally /etc/smartd.conf)
5037 smart_devtype_list scan_types;
5038 int entries = ParseConfigFile(conf_entries, scan_types);
5039
5040 if (entries < 0) {
5041 // There was an error reading the configuration file.
5042 conf_entries.clear();
5043 if (entries == -1)
5044 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5045 return entries;
5046 }
5047
5048 // no error parsing config file.
5049 if (entries) {
5050 // we did not find a SCANDIRECTIVE and did find valid entries
5051 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5052 }
5053 else if (!conf_entries.empty()) {
5054 // we found a SCANDIRECTIVE or there was no configuration file so
5055 // scan. Configuration file's last entry contains all options
5056 // that were set
5057 dev_config first = conf_entries.back();
5058 conf_entries.pop_back();
5059
5060 if (first.lineno)
5061 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5062 else
5063 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5064
5065 // make config list of devices to search for
5066 MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5067
5068 // warn user if scan table found no devices
5069 if (conf_entries.empty())
5070 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5071 }
5072 else
5073 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5074
5075 return conf_entries.size();
5076 }
5077
5078 // Return true if TYPE contains a RAID drive number
5079 static bool is_raid_type(const char * type)
5080 {
5081 if (str_starts_with(type, "sat,"))
5082 return false;
5083 int i;
5084 if (sscanf(type, "%*[^,],%d", &i) != 1)
5085 return false;
5086 return true;
5087 }
5088
5089 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5090 static bool is_duplicate_device(const smart_device * dev,
5091 const smart_device_list & devices, unsigned numdevs,
5092 const dev_config_vector & ignored)
5093 {
5094 const smart_device::device_info & info1 = dev->get_info();
5095 bool is_raid1 = is_raid_type(info1.dev_type.c_str());
5096
5097 for (unsigned i = 0; i < numdevs; i++) {
5098 const smart_device::device_info & info2 = devices.at(i)->get_info();
5099 // -d TYPE options must match if RAID drive number is specified
5100 if ( info1.dev_name == info2.dev_name
5101 && ( info1.dev_type == info2.dev_type
5102 || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
5103 return true;
5104 }
5105
5106 for (unsigned i = 0; i < ignored.size(); i++) {
5107 const dev_config & cfg2 = ignored.at(i);
5108 if ( info1.dev_name == cfg2.dev_name
5109 && ( info1.dev_type == cfg2.dev_type
5110 || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
5111 return true;
5112 }
5113 return false;
5114 }
5115
5116 // This function tries devices from conf_entries. Each one that can be
5117 // registered is moved onto the [ata|scsi]devices lists and removed
5118 // from the conf_entries list.
5119 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5120 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5121 {
5122 // start by clearing lists/memory of ALL existing devices
5123 configs.clear();
5124 devices.clear();
5125 states.clear();
5126
5127 // Register entries
5128 dev_config_vector ignored_entries;
5129 unsigned numnoscan = 0;
5130 for (unsigned i = 0; i < conf_entries.size(); i++){
5131
5132 dev_config cfg = conf_entries[i];
5133
5134 if (cfg.ignore) {
5135 // Store for is_duplicate_device() check and ignore
5136 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5137 (!cfg.dev_type.empty() ? " [" : ""),
5138 cfg.dev_type.c_str(),
5139 (!cfg.dev_type.empty() ? "]" : ""));
5140 ignored_entries.push_back(cfg);
5141 continue;
5142 }
5143
5144 // get device of appropriate type
5145 smart_device_auto_ptr dev;
5146 bool scanning = false;
5147
5148 // Device may already be detected during devicescan
5149 if (i < scanned_devs.size()) {
5150 dev = scanned_devs.release(i);
5151 if (dev) {
5152 // Check for a preceding non-DEVICESCAN entry for the same device
5153 if ( (numnoscan || !ignored_entries.empty())
5154 && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
5155 PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
5156 continue;
5157 }
5158 scanning = true;
5159 }
5160 }
5161
5162 if (!dev) {
5163 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5164 if (!dev) {
5165 if (cfg.dev_type.empty())
5166 PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5167 else
5168 PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5169 continue;
5170 }
5171 }
5172
5173 // Save old info
5174 smart_device::device_info oldinfo = dev->get_info();
5175
5176 // Open with autodetect support, may return 'better' device
5177 dev.replace( dev->autodetect_open() );
5178
5179 // Report if type has changed
5180 if (oldinfo.dev_type != dev->get_dev_type())
5181 PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
5182 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5183
5184 if (!dev->is_open()) {
5185 // For linux+devfs, a nonexistent device gives a strange error
5186 // message. This makes the error message a bit more sensible.
5187 // If no debug and scanning - don't print errors
5188 if (debugmode || !scanning)
5189 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5190 continue;
5191 }
5192
5193 // Update informal name
5194 cfg.name = dev->get_info().info_name;
5195 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5196
5197 // Prepare initial state
5198 dev_state state;
5199
5200 // register ATA devices
5201 if (dev->is_ata()){
5202 if (ATADeviceScan(cfg, state, dev->to_ata())) {
5203 CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
5204 dev.reset();
5205 }
5206 }
5207 // or register SCSI devices
5208 else if (dev->is_scsi()){
5209 if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
5210 CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
5211 dev.reset();
5212 }
5213 }
5214 // or register NVMe devices
5215 else if (dev->is_nvme()) {
5216 if (NVMeDeviceScan(cfg, state, dev->to_nvme())) {
5217 CanNotRegister(cfg.name.c_str(), "NVMe", cfg.lineno, scanning);
5218 dev.reset();
5219 }
5220 }
5221 else {
5222 PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5223 dev.reset();
5224 }
5225
5226 if (dev) {
5227 // move onto the list of devices
5228 configs.push_back(cfg);
5229 states.push_back(state);
5230 devices.push_back(dev);
5231 if (!scanning)
5232 numnoscan = devices.size();
5233 }
5234 // if device is explictly listed and we can't register it, then
5235 // exit unless the user has specified that the device is removable
5236 else if (!scanning) {
5237 if (cfg.removable || quit==2)
5238 PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
5239 else {
5240 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
5241 EXIT(EXIT_BADDEV);
5242 }
5243 }
5244 }
5245
5246 init_disable_standby_check(configs);
5247 }
5248
5249
5250 // Main program without exception handling
5251 static int main_worker(int argc, char **argv)
5252 {
5253 // Initialize interface
5254 smart_interface::init();
5255 if (!smi())
5256 return 1;
5257
5258 // is it our first pass through?
5259 bool firstpass = true;
5260
5261 // next time to wake up
5262 time_t wakeuptime = 0;
5263
5264 // parse input and print header and usage info if needed
5265 ParseOpts(argc,argv);
5266
5267 // Configuration for each device
5268 dev_config_vector configs;
5269 // Device states
5270 dev_state_vector states;
5271 // Devices to monitor
5272 smart_device_list devices;
5273
5274 bool write_states_always = true;
5275
5276 #ifdef HAVE_LIBCAP_NG
5277 // Drop capabilities
5278 if (enable_capabilities) {
5279 capng_clear(CAPNG_SELECT_BOTH);
5280 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
5281 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
5282 capng_apply(CAPNG_SELECT_BOTH);
5283 }
5284 #endif
5285
5286 // the main loop of the code
5287 for (;;) {
5288
5289 // are we exiting from a signal?
5290 if (caughtsigEXIT) {
5291 // are we exiting with SIGTERM?
5292 int isterm=(caughtsigEXIT==SIGTERM);
5293 int isquit=(caughtsigEXIT==SIGQUIT);
5294 int isok=debugmode?isterm || isquit:isterm;
5295
5296 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5297 caughtsigEXIT, strsignal(caughtsigEXIT));
5298
5299 if (!isok)
5300 return EXIT_SIGNAL;
5301
5302 // Write state files
5303 if (!state_path_prefix.empty())
5304 write_all_dev_states(configs, states);
5305
5306 return 0;
5307 }
5308
5309 // Should we (re)read the config file?
5310 if (firstpass || caughtsigHUP){
5311 if (!firstpass) {
5312 // Write state files
5313 if (!state_path_prefix.empty())
5314 write_all_dev_states(configs, states);
5315
5316 PrintOut(LOG_INFO,
5317 caughtsigHUP==1?
5318 "Signal HUP - rereading configuration file %s\n":
5319 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5320 configfile);
5321 }
5322
5323 {
5324 dev_config_vector conf_entries; // Entries read from smartd.conf
5325 smart_device_list scanned_devs; // Devices found during scan
5326 // (re)reads config file, makes >=0 entries
5327 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5328
5329 if (entries>=0) {
5330 // checks devices, then moves onto ata/scsi list or deallocates.
5331 RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5332 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5333 throw std::logic_error("Invalid result from RegisterDevices");
5334 }
5335 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
5336 // user has asked to continue on error in configuration file
5337 if (!firstpass)
5338 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5339 }
5340 else {
5341 // exit with configuration file error status
5342 return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5343 }
5344 }
5345
5346 // Log number of devices we are monitoring...
5347 if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
5348 int numata = 0, numscsi = 0;
5349 for (unsigned i = 0; i < devices.size(); i++) {
5350 const smart_device * dev = devices.at(i);
5351 if (dev->is_ata())
5352 numata++;
5353 else if (dev->is_scsi())
5354 numscsi++;
5355 }
5356 PrintOut(LOG_INFO,"Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5357 numata, numscsi, (int)devices.size() - numata - numscsi);
5358 }
5359 else {
5360 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5361 return EXIT_NODEV;
5362 }
5363
5364 if (quit==4) {
5365 // user has asked to print test schedule
5366 PrintTestSchedule(configs, states, devices);
5367 return 0;
5368 }
5369
5370 #ifdef HAVE_LIBCAP_NG
5371 if (enable_capabilities) {
5372 for (unsigned i = 0; i < configs.size(); i++) {
5373 if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5374 PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5375 break;
5376 }
5377 }
5378 }
5379 #endif
5380
5381 // reset signal
5382 caughtsigHUP=0;
5383
5384 // Always write state files after (re)configuration
5385 write_states_always = true;
5386 }
5387
5388 // check all devices once,
5389 // self tests are not started in first pass unless '-q onecheck' is specified
5390 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
5391
5392 // Write state files
5393 if (!state_path_prefix.empty())
5394 write_all_dev_states(configs, states, write_states_always);
5395 write_states_always = false;
5396
5397 // Write attribute logs
5398 if (!attrlog_path_prefix.empty())
5399 write_all_dev_attrlogs(configs, states);
5400
5401 // user has asked us to exit after first check
5402 if (quit==3) {
5403 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5404 "smartd is exiting (exit status 0)\n");
5405 return 0;
5406 }
5407
5408 // fork into background if needed
5409 if (firstpass && !debugmode) {
5410 DaemonInit();
5411 }
5412
5413 // set exit and signal handlers, write PID file, set wake-up time
5414 if (firstpass){
5415 Initialize(&wakeuptime);
5416 firstpass = false;
5417 }
5418
5419 // sleep until next check time, or a signal arrives
5420 wakeuptime = dosleep(wakeuptime, write_states_always);
5421 }
5422 }
5423
5424
5425 #ifndef _WIN32
5426 // Main program
5427 int main(int argc, char **argv)
5428 #else
5429 // Windows: internal main function started direct or by service control manager
5430 static int smartd_main(int argc, char **argv)
5431 #endif
5432 {
5433 int status;
5434 try {
5435 // Do the real work ...
5436 status = main_worker(argc, argv);
5437 }
5438 catch (int ex) {
5439 // EXIT(status) arrives here
5440 status = ex;
5441 }
5442 catch (const std::bad_alloc & /*ex*/) {
5443 // Memory allocation failed (also thrown by std::operator new)
5444 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5445 status = EXIT_NOMEM;
5446 }
5447 catch (const std::exception & ex) {
5448 // Other fatal errors
5449 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5450 status = EXIT_BADCODE;
5451 }
5452
5453 // Check for remaining device objects
5454 if (smart_device::get_num_objects() != 0) {
5455 PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5456 smart_device::get_num_objects());
5457 status = EXIT_BADCODE;
5458 }
5459
5460 if (status == EXIT_BADCODE)
5461 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5462
5463 if (is_initialized)
5464 status = Goodbye(status);
5465
5466 #ifdef _WIN32
5467 daemon_winsvc_exitcode = status;
5468 #endif
5469 return status;
5470 }
5471
5472
5473 #ifdef _WIN32
5474 // Main function for Windows
5475 int main(int argc, char **argv){
5476 // Options for smartd windows service
5477 static const daemon_winsvc_options svc_opts = {
5478 "--service", // cmd_opt
5479 "smartd", "SmartD Service", // servicename, displayname
5480 // description
5481 "Controls and monitors storage devices using the Self-Monitoring, "
5482 "Analysis and Reporting Technology System (SMART) built into "
5483 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5484 "www.smartmontools.org"
5485 };
5486 // daemon_main() handles daemon and service specific commands
5487 // and starts smartd_main() direct, from a new process,
5488 // or via service control manager
5489 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5490 }
5491 #endif