2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-7 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
27 // unconditionally included files
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
49 // see which system files to conditionally include
52 // conditionally included files
53 #ifdef HAVE_GETOPT_LONG
62 #pragma warning(disable:4761) // "conversion supplied"
63 typedef unsigned short mode_t
;
66 #include <io.h> // umask()
67 #include <process.h> // getpid()
72 // BOOL WINAPI FreeConsole(void);
73 extern "C" int __stdcall
FreeConsole(void);
74 #include <io.h> // setmode()
77 // locally included files
82 #include "knowndrives.h"
89 #include "hostname_win32.h" // gethost/domainname()
90 #define HAVE_GETHOSTNAME 1
91 #define HAVE_GETDOMAINNAME 1
92 // fork()/signal()/initd simulation for native Windows
93 #include "daemon_win32.h" // daemon_main/detach/signal()
95 #define SIGNALFN daemon_signal
96 #define strsignal daemon_strsignal
97 #define sleep daemon_sleep
98 #undef EXIT // see utility.h
99 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
100 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
101 #define SIGQUIT SIGBREAK
102 #define SIGQUIT_KEYNAME "CONTROL-Break"
105 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
106 #define SIGQUIT_KEYNAME "2x CONTROL-C"
108 #define SIGQUIT_KEYNAME "CONTROL-\\"
112 #if defined (__SVR4) && defined (__sun)
113 extern "C" int getdomainname(char *, int); // no declaration in header files!
116 #define ARGUSED(x) ((void)(x))
118 // These are CVS identification information for *.cpp and *.h files
119 extern const char *atacmdnames_c_cvsid
, *atacmds_c_cvsid
, *ataprint_c_cvsid
, *escalade_c_cvsid
,
120 *knowndrives_c_cvsid
, *os_XXXX_c_cvsid
, *scsicmds_c_cvsid
, *utility_c_cvsid
;
122 static const char *filenameandversion
="$Id: smartd.cpp,v 1.395 2007/11/26 18:11:32 guidog Exp $";
123 #ifdef NEED_SOLARIS_ATA_CODE
124 extern const char *os_solaris_ata_s_cvsid
;
127 extern const char *daemon_win32_c_cvsid
, *hostname_win32_c_cvsid
, *syslog_win32_c_cvsid
;
129 const char *smartd_c_cvsid
="$Id: smartd.cpp,v 1.395 2007/11/26 18:11:32 guidog Exp $"
130 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
131 #ifdef DAEMON_WIN32_H_CVSID
134 EXTERN_H_CVSID INT64_H_CVSID
135 #ifdef HOSTNAME_WIN32_H_CVSID
136 HOSTNAME_WIN32_H_CVSID
138 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
139 #ifdef SYSLOG_H_CVSID
144 extern const char *reportbug
;
146 // GNU copyleft statement. Needed for GPL purposes.
147 const char *copyleftstring
="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
148 "free software, and you are welcome to redistribute it\n"
149 "under the terms of the GNU General Public License\n"
150 "Version 2. See http://www.gnu.org for further details.\n\n";
152 extern unsigned char debugmode
;
154 // command-line: how long to sleep between checks
155 static int checktime
=CHECKTIME
;
157 // command-line: name of PID file (NULL for no pid file)
158 static char* pid_file
=NULL
;
160 // configuration file name
162 static char* configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
164 static char* configfile
= "./" CONFIGFILENAME
;
166 // configuration file "name" if read from stdin
167 static /*const*/ char * const configfile_stdin
= "<stdin>";
168 // allocated memory for alternate configuration file name
169 static char* configfile_alt
= NULL
;
171 // command-line: when should we exit?
174 // command-line; this is the default syslog(3) log facility to use.
175 static int facility
=LOG_DAEMON
;
178 // command-line: fork into background?
179 static bool do_fork
=true;
182 // used for control of printing, passing arguments to atacmds.c
183 smartmonctrl
*con
=NULL
;
185 // pointers to (real or simulated) entries in configuration file, and
186 // maximum space currently allocated for these entries.
187 cfgfile
**cfgentries
=NULL
;
188 int cfgentries_max
=0;
190 // pointers to ATA and SCSI devices being monitored, maximum and
192 cfgfile
**atadevlist
=NULL
, **scsidevlist
=NULL
;
193 int atadevlist_max
=0, scsidevlist_max
=0;
194 int numdevata
=0, numdevscsi
=0;
196 // track memory usage
197 extern int64_t bytes
;
200 extern int exitstatus
;
202 // set to one if we catch a USR1 (check devices now)
203 volatile int caughtsigUSR1
=0;
206 // set to one if we catch a USR2 (toggle debug mode)
207 volatile int caughtsigUSR2
=0;
210 // set to one if we catch a HUP (reload config file). In debug mode,
211 // set to two, if we catch INT (also reload config file).
212 volatile int caughtsigHUP
=0;
214 // set to signal value if we catch INT, QUIT, or TERM
215 volatile int caughtsigEXIT
=0;
218 // stack environment if we time out during SCSI access (USB devices)
219 jmp_buf registerscsienv
;
222 // tranlate cfg->pending into the correct Attribute numbers
223 void TranslatePending(unsigned short pending
, unsigned char *current
, unsigned char *offline
) {
225 unsigned char curr
= CURR_PEND(pending
);
226 unsigned char off
= OFF_PEND(pending
);
228 // look for special value of CUR_UNC_DEFAULT that means DONT
229 // monitor. 0 means DO test.
230 if (curr
==CUR_UNC_DEFAULT
)
233 curr
=CUR_UNC_DEFAULT
;
235 // look for special value of OFF_UNC_DEFAULT that means DONT
236 // monitor. 0 means DO TEST.
237 if (off
==OFF_UNC_DEFAULT
)
249 // free all memory associated with selftest part of configfile entry. Return NULL
250 testinfo
* FreeTestData(testinfo
*data
){
252 // make sure we have something to do.
256 // free space for text pattern
257 data
->regex
=FreeNonZero(data
->regex
, -1, __LINE__
, filenameandversion
);
259 // free compiled expression
260 regfree(&(data
->cregex
));
262 // make sure that no sign of the compiled expression is left behind
263 // (just in case, to help detect bugs if we ever try and refer to
265 memset(&(data
->cregex
), '0', sizeof(regex_t
));
267 // free remaining memory space
268 data
=FreeNonZero(data
, sizeof(testinfo
), __LINE__
, filenameandversion
);
273 cfgfile
**AllocateMoreSpace(cfgfile
**oldarray
, int *oldsize
, char *listname
){
274 // for now keep BLOCKSIZE small to help detect coding problems.
275 // Perhaps increase in the future.
276 const int BLOCKSIZE
=8;
279 int news
= olds
+ BLOCKSIZE
;
280 cfgfile
**newptr
=(cfgfile
**)realloc(oldarray
, news
*sizeof(cfgfile
*));
282 // did we get more space?
285 // clear remaining entries ala calloc()
286 for (i
=olds
; i
<news
; i
++)
289 bytes
+= BLOCKSIZE
*sizeof(cfgfile
*);
294 PrintOut(LOG_INFO
, "allocating %d slots for %s\n", BLOCKSIZE
, listname
);
300 PrintOut(LOG_CRIT
, "out of memory for allocating %s list\n", listname
);
304 void PrintOneCVS(const char *a_cvs_id
){
306 printone(out
,a_cvs_id
);
307 PrintOut(LOG_INFO
,"%s",out
);
311 // prints CVS identity information for the executable
313 const char *configargs
=strlen(SMARTMONTOOLS_CONFIGURE_ARGS
)?SMARTMONTOOLS_CONFIGURE_ARGS
:"[no arguments given]";
315 PrintOut(LOG_INFO
,(char *)copyleftstring
);
316 PrintOut(LOG_INFO
,"CVS version IDs of files used to build this code are:\n");
317 PrintOneCVS(atacmdnames_c_cvsid
);
318 PrintOneCVS(atacmds_c_cvsid
);
319 PrintOneCVS(ataprint_c_cvsid
);
321 PrintOneCVS(daemon_win32_c_cvsid
);
324 PrintOneCVS(hostname_win32_c_cvsid
);
326 PrintOneCVS(knowndrives_c_cvsid
);
327 PrintOneCVS(os_XXXX_c_cvsid
);
328 #ifdef NEED_SOLARIS_ATA_CODE
329 PrintOneCVS( os_solaris_ata_s_cvsid
);
331 PrintOneCVS(scsicmds_c_cvsid
);
332 PrintOneCVS(smartd_c_cvsid
);
334 PrintOneCVS(syslog_win32_c_cvsid
);
336 PrintOneCVS(utility_c_cvsid
);
337 PrintOut(LOG_INFO
, "\nsmartmontools release " PACKAGE_VERSION
" dated " SMARTMONTOOLS_RELEASE_DATE
" at " SMARTMONTOOLS_RELEASE_TIME
"\n");
338 PrintOut(LOG_INFO
, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST
"\n");
339 PrintOut(LOG_INFO
, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE
"\n");
340 PrintOut(LOG_INFO
, "smartd compile dated " __DATE__
" at "__TIME__
"\n");
341 PrintOut(LOG_INFO
, "smartmontools configure arguments: %s\n", configargs
);
345 // Removes config file entry, freeing all memory
346 void RmConfigEntry(cfgfile
**anentry
, int whatline
){
350 // pointer should never be null!
352 PrintOut(LOG_CRIT
,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
353 whatline
, filenameandversion
, reportbug
);
357 // only remove entries that exist!
361 // entry exists -- free all of its memory
362 cfg
->name
= FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
363 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
364 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
365 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
366 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
368 cfg
->mailwarn
->address
= FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
369 cfg
->mailwarn
->emailcmdline
= FreeNonZero(cfg
->mailwarn
->emailcmdline
, -1,__LINE__
,filenameandversion
);
370 cfg
->mailwarn
= FreeNonZero(cfg
->mailwarn
, sizeof(maildata
),__LINE__
,filenameandversion
);
372 cfg
->testdata
= FreeTestData(cfg
->testdata
);
373 *anentry
= FreeNonZero(cfg
, sizeof(cfgfile
),__LINE__
,filenameandversion
);
378 // deallocates all memory associated with cfgentries list
379 void RmAllConfigEntries(){
382 for (i
=0; i
<cfgentries_max
; i
++)
383 RmConfigEntry(cfgentries
+i
, __LINE__
);
385 cfgentries
=FreeNonZero(cfgentries
, sizeof(cfgfile
*)*cfgentries_max
, __LINE__
, filenameandversion
);
391 // deallocates all memory associated with ATA/SCSI device lists
392 void RmAllDevEntries(){
395 for (i
=0; i
<atadevlist_max
; i
++)
396 RmConfigEntry(atadevlist
+i
, __LINE__
);
398 atadevlist
=FreeNonZero(atadevlist
, sizeof(cfgfile
*)*atadevlist_max
, __LINE__
, filenameandversion
);
401 for (i
=0; i
<scsidevlist_max
; i
++)
402 RmConfigEntry(scsidevlist
+i
, __LINE__
);
404 scsidevlist
=FreeNonZero(scsidevlist
, sizeof(cfgfile
*)*scsidevlist_max
, __LINE__
, filenameandversion
);
410 // remove the PID file
411 void RemovePidFile(){
413 if ( -1==unlink(pid_file
) )
414 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
415 pid_file
, strerror(errno
));
416 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
422 // Note if we catch a SIGUSR1
423 void USR1handler(int sig
){
430 // Note if we catch a SIGUSR2
431 void USR2handler(int sig
){
438 // Note if we catch a HUP (or INT in debug mode)
439 void HUPhandler(int sig
){
447 // signal handler for TERM, QUIT, and INT (if not in debug mode)
448 void sighandler(int sig
){
455 // signal handler that prints Goodbye message and removes pidfile
458 // clean up memory -- useful for debugging
459 RmAllConfigEntries();
462 // delete PID file, if one was created
465 // remove alternate configfile name
466 configfile_alt
=FreeNonZero(configfile_alt
, -1,__LINE__
,filenameandversion
);
468 // useful for debugging -- have we managed memory correctly?
469 if (debugmode
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
470 PrintOut(LOG_INFO
, "Memory still allocated for devices at exit is %" PRId64
" bytes.\n", bytes
);
472 // if we are exiting because of a code bug, tell user
473 if (exitstatus
==EXIT_BADCODE
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
474 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
476 if (exitstatus
==0 && bytes
)
477 exitstatus
=EXIT_BADCODE
;
479 // and this should be the final output from smartd before it exits
480 PrintOut(exitstatus
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", exitstatus
);
485 #define ENVLENGTH 1024
487 // a replacement for setenv() which is not available on all platforms.
488 // Note that the string passed to putenv must not be freed or made
489 // invalid, since a pointer to it is kept by putenv(). This means that
490 // it must either be a static buffer or allocated off the heap. The
491 // string can be freed if the environment variable is redefined or
492 // deleted via another call to putenv(). So we keep these on the stack
493 // as long as the popen() call is underway.
494 int exportenv(char* stackspace
, const char *name
, const char *value
){
495 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
496 return putenv(stackspace
);
499 char* dnsdomain(const char* hostname
) {
501 #ifdef HAVE_GETHOSTBYNAME
504 if ((hp
= gethostbyname(hostname
))) {
505 // Does this work if gethostbyname() returns an IPv6 name in
506 // colon/dot notation? [BA]
507 if ((p
= strchr(hp
->h_name
, '.')))
518 // If either address or executable path is non-null then send and log
519 // a warning email, or execute executable
520 void MailWarning(cfgfile
*cfg
, int which
, char *fmt
, ...){
521 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
522 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
523 char environ_strings
[11][ENVLENGTH
];
526 const int day
=24*3600;
534 "FailedHealthCheck", // 5
535 "FailedReadSmartData", // 6
536 "FailedReadSmartErrorLog", // 7
537 "FailedReadSmartSelfTestLog", // 8
538 "FailedOpenDevice", // 9
539 "CurrentPendingSector", // 10
540 "OfflineUncorrectableSector", // 11
544 char *address
, *executable
;
546 maildata
* data
=cfg
->mailwarn
;
550 char stdinbuf
[1024]; int boxmsgoffs
, boxtype
;
552 const char *newadd
=NULL
, *newwarn
=NULL
;
553 const char *unknown
="[Unknown]";
555 // See if user wants us to send mail
559 address
=data
->address
;
560 executable
=data
->emailcmdline
;
562 if (!address
&& !executable
)
565 // which type of mail are we sending?
566 mail
=(data
->maillog
)+which
;
569 if (data
->emailfreq
<1 || data
->emailfreq
>3) {
570 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data
->emailfreq
);
573 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
574 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
575 which
, (int)sizeof(whichfail
));
579 // Return if a single warning mail has been sent.
580 if ((data
->emailfreq
==1) && mail
->logged
)
583 // Return if this is an email test and one has already been sent.
584 if (which
== 0 && mail
->logged
)
587 // To decide if to send mail, we need to know what time it is.
590 // Return if less than one day has gone by
591 if (data
->emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
594 // Return if less than 2^(logged-1) days have gone by
595 if (data
->emailfreq
==3 && mail
->logged
){
596 days
=0x01<<(mail
->logged
-1);
598 if (epoch
<(mail
->lastsent
+days
))
602 // record the time of this mail message, and the first mail message
604 mail
->firstsent
=epoch
;
605 mail
->lastsent
=epoch
;
607 // get system host & domain names (not null terminated if length=MAX)
608 #ifdef HAVE_GETHOSTNAME
609 if (gethostname(hostname
, 256))
610 strcpy(hostname
, unknown
);
614 p
= dnsdomain(hostname
);
616 strncpy(domainname
, p
, 255);
617 domainname
[255]='\0';
619 strcpy(domainname
, unknown
);
622 strcpy(hostname
, unknown
);
623 strcpy(domainname
, unknown
);
626 #ifdef HAVE_GETDOMAINNAME
627 if (getdomainname(nisdomain
, 256))
628 strcpy(nisdomain
, unknown
);
632 strcpy(nisdomain
, unknown
);
635 // print warning string into message
637 vsnprintf(message
, 256, fmt
, ap
);
640 // appropriate message about further information
641 additional
[0]=original
[0]=further
[0]='\0';
643 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
645 switch (data
->emailfreq
){
647 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
650 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
653 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
654 (0x01)<<mail
->logged
);
657 if (data
->emailfreq
>1 && mail
->logged
){
658 dateandtimezoneepoch(dates
, mail
->firstsent
);
659 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
663 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
665 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
667 #ifdef DEFAULT_MAILER
668 executable
= DEFAULT_MAILER
;
673 executable
= "blat"; // http://blat.sourceforge.net/
677 // make a private copy of address with commas replaced by spaces
678 // to separate recipients
680 address
=CustomStrDup(data
->address
, 1, __LINE__
, filenameandversion
);
681 #ifndef _WIN32 // blat mailer needs comma
684 while ((comma
=strchr(comma
, ',')))
690 // Export information in environment variables that will be useful
692 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
693 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
694 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
695 dateandtimezoneepoch(dates
, mail
->firstsent
);
696 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
697 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
698 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
699 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
701 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
);
702 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
->name
);
704 switch (cfg
->controller_type
) {
705 case CONTROLLER_3WARE_678K
:
706 case CONTROLLER_3WARE_9000_CHAR
:
707 case CONTROLLER_3WARE_678K_CHAR
:
709 char *s
,devicetype
[16];
710 sprintf(devicetype
, "3ware,%d", cfg
->controller_port
-1);
711 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
712 if ((s
=strchr(cfg
->name
, ' ')))
714 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
719 case CONTROLLER_CCISS
:
721 char *s
,devicetype
[16];
722 sprintf(devicetype
, "cciss,%d", cfg
->controller_port
-1);
723 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
724 if ((s
=strchr(cfg
->name
, ' ')))
726 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
732 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "ata");
733 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
735 case CONTROLLER_MARVELL_SATA
:
736 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "marvell");
737 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
739 case CONTROLLER_SCSI
:
740 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "scsi");
741 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
744 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "sat");
745 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
749 char *s
,devicetype
[16];
750 sprintf(devicetype
, "hpt,%d/%d/%d", cfg
->hpt_data
[0],
751 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
752 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
753 if ((s
=strchr(cfg
->name
, ' ')))
755 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
762 snprintf(fullmessage
, 1024,
763 "This email was generated by the smartd daemon running on:\n\n"
766 " NIS domain: %s\n\n"
767 "The following warning/error was logged by the smartd daemon:\n\n"
769 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
771 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
772 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
774 // now construct a command to send this as EMAIL
777 snprintf(command
, 2048,
778 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
779 "%sENDMAIL\n", subject
, address
, fullmessage
);
781 snprintf(command
, 2048, "%s 2>&1", executable
);
783 // tell SYSLOG what we are about to do...
784 newadd
=address
?address
:"<nomailer>";
785 newwarn
=which
?"Warning via":"Test of";
787 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
788 which
?"Sending warning via":"Executing test of", executable
, newadd
);
790 // issue the command to send mail or to run the user's executable
792 if (!(pfp
=popen(command
, "r")))
793 // failed to popen() mail process
794 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
795 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
799 char buffer
[EBUFLEN
];
801 // if unexpected output on stdout/stderr, null terminate, print, and flush
802 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
804 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
806 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
807 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
809 // flush pipe if needed
810 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
813 // tell user that pipe was flushed, or that something is really wrong
814 if (count
&& count
<EBUFLEN
)
815 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
816 newwarn
, executable
, newadd
);
818 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
819 newwarn
, executable
, newadd
);
822 // if something went wrong with mail process, print warning
824 if (-1==(status
=pclose(pfp
)))
825 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
826 errno
?strerror(errno
):"");
828 // mail process apparently succeeded. Check and report exit status
831 if (WIFEXITED(status
)) {
832 // exited 'normally' (but perhaps with nonzero status)
833 status8
=WEXITSTATUS(status
);
836 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
837 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
839 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
840 newwarn
, executable
, newadd
, status
, status8
);
842 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
845 if (WIFSIGNALED(status
))
846 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
847 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
849 // this branch is probably not possible. If subprocess is
850 // stopped then pclose() should not return.
851 if (WIFSTOPPED(status
))
852 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
853 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
860 // No "here-documents" on Windows, so must use separate commandline and stdin
861 command
[0] = stdinbuf
[0] = 0;
862 boxtype
= -1; boxmsgoffs
= 0;
863 newadd
= "<nomailer>";
865 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
866 int addroffs
= (!strncmp(address
, "sys", 3) ? 3 : 0);
867 if (!strncmp(address
+addroffs
, "msgbox", 6) && (!address
[addroffs
+6] || address
[addroffs
+6] == ',')) {
868 boxtype
= (addroffs
> 0 ? 1 : 0);
870 if (address
[addroffs
])
876 if (address
[addroffs
]) {
877 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
878 snprintf(command
, sizeof(command
),
879 "%s - -q -subject \"%s\" -to \"%s\"",
880 executable
, subject
, address
+addroffs
);
881 newadd
= address
+addroffs
;
883 // Message for mail [0...] and messagebox [boxmsgoffs...]
884 snprintf(stdinbuf
, sizeof(stdinbuf
),
885 "This email was generated by the smartd daemon running on:\n\n"
888 // " NIS domain: %s\n"
890 "The following warning/error was logged by the smartd daemon:\n\n"
892 "For details see the event log or log file of smartd.\n\n"
895 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
898 snprintf(command
, sizeof(command
), "%s", executable
);
900 newwarn
=which
?"Warning via":"Test of";
903 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
904 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
907 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
910 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
911 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
912 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
913 if (rc
>= 0 && stdoutbuf
[0])
914 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
915 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
917 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
918 newwarn
, executable
, newadd
, rc
);
920 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
925 // increment mail sent counter
928 // free copy of address (without commas)
929 address
=FreeNonZero(address
, -1, __LINE__
, filenameandversion
);
934 // Printing function for watching ataprint commands, or losing them
935 // [From GLIBC Manual: Since the prototype doesn't specify types for
936 // optional arguments, in a call to a variadic function the default
937 // argument promotions are performed on the optional argument
938 // values. This means the objects of type char or short int (whether
939 // signed or not) are promoted to either int or unsigned int, as
941 void pout(const char *fmt
, ...){
944 // get the correct time in syslog()
945 FixGlibcTimeZoneBug();
946 // initialize variable argument list
948 // in debug==1 mode we will print the output from the ataprint.o functions!
949 if (debugmode
&& debugmode
!=2)
951 if (facility
== LOG_LOCAL1
) // logging to stdout
952 vfprintf(stderr
,fmt
,ap
);
956 // in debug==2 mode we print output from knowndrives.o functions
957 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
|| con
->controller_port
) {
958 openlog("smartd", LOG_PID
, facility
);
959 vsyslog(LOG_INFO
, fmt
, ap
);
967 // This function prints either to stdout or to the syslog as needed.
968 // This function is also used by utility.cpp to report LOG_CRIT errors.
969 void PrintOut(int priority
, const char *fmt
, ...){
972 // get the correct time in syslog()
973 FixGlibcTimeZoneBug();
974 // initialize variable argument list
978 if (facility
== LOG_LOCAL1
) // logging to stdout
979 vfprintf(stderr
,fmt
,ap
);
984 openlog("smartd", LOG_PID
, facility
);
985 vsyslog(priority
,fmt
,ap
);
993 // Wait for the pid file to show up, this makes sure a calling program knows
994 // that the daemon is really up and running and has a pid to kill it
995 bool WaitForPidFile()
997 int waited
, max_wait
= 10;
998 struct stat stat_buf
;
1000 if(!pid_file
|| debugmode
)
1003 for(waited
= 0; waited
< max_wait
; ++waited
) {
1004 if(stat(pid_file
, &stat_buf
) == 0) {
1013 // Forks new process, closes ALL file descriptors, redirects stdin,
1014 // stdout, and stderr. Not quite daemon(). See
1015 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
1016 // for a good description of why we do things this way.
1022 // flush all buffered streams. Else we might get two copies of open
1023 // streams since both parent and child get copies of the buffers.
1027 if ((pid
=fork()) < 0) {
1029 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1033 // we are the parent process, wait for pid file, then exit cleanly
1034 if(!WaitForPidFile()) {
1035 PrintOut(LOG_CRIT
,"PID file %s didn't show up!\n", pid_file
);
1040 // from here on, we are the child process.
1043 // Fork one more time to avoid any possibility of having terminals
1044 if ((pid
=fork()) < 0) {
1046 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1050 // we are the parent process -- exit cleanly
1053 // Now we are the child's child...
1056 // close any open file descriptors
1057 for (i
=getdtablesize();i
>=0;--i
)
1061 // Cygwin's setsid() does not detach the process from Windows console
1063 #endif // __CYGWIN__
1065 // redirect any IO attempts to /dev/null for stdin
1066 i
=open("/dev/null",O_RDWR
);
1075 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1079 // No fork() on native Win32
1080 // Detach this process from console
1082 if (daemon_detach("smartd")) {
1083 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1086 // stdin/out/err now closed if not redirected
1092 // create a PID file containing the current process id
1093 void WritePidFile() {
1096 pid_t pid
= getpid();
1101 old_umask
= umask(0077); // rwx------
1103 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1104 old_umask
= umask(0033); // rwxr--r--
1106 fp
= fopen(pid_file
, "w");
1110 } else if (fprintf(fp
, "%d\n", (int)pid
) <= 0) {
1112 } else if (fclose(fp
) != 0) {
1116 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
);
1119 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
, (int)pid
);
1124 // Prints header identifying version of code and home
1126 #ifdef HAVE_GET_OS_VERSION_STR
1127 const char * ver
= get_os_version_str();
1129 const char * ver
= SMARTMONTOOLS_BUILD_HOST
;
1131 PrintOut(LOG_INFO
,"smartd version %s [%s] Copyright (C) 2002-7 Bruce Allen\n", PACKAGE_VERSION
, ver
);
1132 PrintOut(LOG_INFO
,"Home page is " PACKAGE_HOMEPAGE
"\n\n");
1136 // prints help info for configuration file Directives
1139 "Configuration file (%s) Directives (after device name):\n"
1140 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N, cciss,N\n"
1141 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1142 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1143 " -S VAL Enable/disable attribute autosave (on/off)\n"
1144 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1145 " -H Monitor SMART Health Status, report if failed\n"
1146 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1147 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1148 " -f Monitor 'Usage' Attributes, report failures\n"
1149 " -m ADD Send email warning to address ADD\n"
1150 " -M TYPE Modify email warning behavior (see man page)\n"
1151 " -p Report changes in 'Prefailure' Attributes\n"
1152 " -u Report changes in 'Usage' Attributes\n"
1153 " -t Equivalent to -p and -u Directives\n"
1154 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1155 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1156 " -i ID Ignore Attribute ID for -f Directive\n"
1157 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1158 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1159 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1160 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1161 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1162 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1163 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1164 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1165 " # Comment: text after a hash sign is ignored\n"
1166 " \\ Line continuation character\n"
1167 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1168 "Use ID = 0 to turn off -C and/or -U Directives\n"
1169 "Example: /dev/hda -a\n",
1174 /* Returns a pointer to a static string containing a formatted list of the valid
1175 arguments to the option opt or NULL on failure. */
1176 const char *GetValidArgList(char opt
) {
1179 return "<FILE_NAME>, -";
1181 return "valid_regular_expression";
1183 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1185 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1187 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1189 return "<FILE_NAME>";
1191 return "<INTEGER_SECONDS>";
1197 /* prints help information for command syntax */
1199 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1200 #ifdef HAVE_GETOPT_LONG
1201 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1202 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1203 PrintOut(LOG_INFO
," -d, --debug\n");
1204 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1205 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1206 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1207 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1208 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1209 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1210 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1211 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1213 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1215 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1218 PrintOut(LOG_INFO
," -n, --no-fork\n");
1219 PrintOut(LOG_INFO
," Do not fork into background\n\n");
1221 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1222 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1223 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1224 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1225 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1226 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1228 PrintOut(LOG_INFO
," --service\n");
1229 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1230 PrintOut(LOG_INFO
," smartd install [options]\n");
1231 PrintOut(LOG_INFO
," Remove service with:\n");
1232 PrintOut(LOG_INFO
," smartd remove\n\n");
1234 #endif // _WIN32 || __CYGWIN__
1235 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1236 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1238 PrintOut(LOG_INFO
," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile
);
1239 PrintOut(LOG_INFO
," -d Start smartd in debug mode\n");
1240 PrintOut(LOG_INFO
," -D Print the configuration file Directives and exit\n");
1241 PrintOut(LOG_INFO
," -h Display this help and exit\n");
1242 PrintOut(LOG_INFO
," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1243 PrintOut(LOG_INFO
," -l local? Use syslog facility local0 - local7, or daemon\n");
1244 PrintOut(LOG_INFO
," -n Do not fork into background\n");
1245 PrintOut(LOG_INFO
," -p NAME Write PID file NAME\n");
1246 PrintOut(LOG_INFO
," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1247 PrintOut(LOG_INFO
," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1248 PrintOut(LOG_INFO
," -V Print License, Copyright, and version information\n");
1252 // returns negative if problem, else fd>=0
1253 static int OpenDevice(char *device
, char *mode
, int scanning
) {
1257 // If there is an ASCII "space" character in the device name,
1258 // terminate string there. This is for 3ware and highpoint devices only.
1259 if ((s
=strchr(device
,' ')))
1263 fd
= deviceopen(device
, mode
);
1265 // if we removed a space, put it back in please
1269 // if we failed to open the device, complain!
1272 // For linux+devfs, a nonexistent device gives a strange error
1273 // message. This makes the error message a bit more sensible.
1274 // If no debug and scanning - don't print errors
1275 if (debugmode
|| !scanning
) {
1276 if (errno
==ENOENT
|| errno
==ENOTDIR
)
1279 PrintOut(LOG_INFO
,"Device: %s, %s, open() failed\n",
1280 device
, strerror(errno
));
1284 // device opened sucessfully
1288 int CloseDevice(int fd
, char *name
){
1289 if (deviceclose(fd
)){
1290 PrintOut(LOG_INFO
,"Device: %s, %s, close(%d) failed\n", name
, strerror(errno
), fd
);
1293 // device sucessfully closed
1297 // returns <0 on failure
1298 int ATAErrorCount(int fd
, char *name
){
1299 struct ata_smart_errorlog log
;
1301 if (-1==ataReadErrorLog(fd
,&log
)){
1302 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1306 // return current number of ATA errors
1307 return log
.error_log_pointer
?log
.ata_error_count
:0;
1310 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1311 // error count, and top bits are the power-on hours of the last error.
1312 int SelfTestErrorCount(int fd
, char *name
){
1313 struct ata_smart_selftestlog log
;
1315 if (-1==ataReadSelfTestLog(fd
,&log
)){
1316 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1320 // return current number of self-test errors
1321 return ataPrintSmartSelfTestlog(&log
,0);
1324 // scan to see what ata devices there are, and if they support SMART
1325 int ATADeviceScan(cfgfile
*cfg
, int scanning
){
1326 int fd
, supported
=0;
1327 struct ata_identify_device drive
;
1328 char *name
=cfg
->name
;
1329 int retainsmartdata
=0;
1333 // should we try to register this as an ATA device?
1334 switch (cfg
->controller_type
) {
1335 case CONTROLLER_ATA
:
1336 case CONTROLLER_3WARE_678K
:
1337 case CONTROLLER_MARVELL_SATA
:
1338 case CONTROLLER_HPT
:
1339 case CONTROLLER_UNKNOWN
:
1342 case CONTROLLER_3WARE_678K_CHAR
:
1343 mode
="ATA_3WARE_678K";
1345 case CONTROLLER_3WARE_9000_CHAR
:
1346 mode
="ATA_3WARE_9000";
1348 case CONTROLLER_SAT
:
1352 // not a recognized ATA or SATA device. We should never enter
1358 if ((fd
=OpenDevice(name
, mode
, scanning
))<0)
1359 // device open failed
1361 PrintOut(LOG_INFO
,"Device: %s, opened\n", name
);
1363 // pass user settings on to low-level ATA commands
1364 con
->controller_port
=cfg
->controller_port
;
1365 con
->hpt_data
[0]=cfg
->hpt_data
[0];
1366 con
->hpt_data
[1]=cfg
->hpt_data
[1];
1367 con
->hpt_data
[2]=cfg
->hpt_data
[2];
1368 con
->controller_type
=cfg
->controller_type
;
1369 con
->controller_explicit
=cfg
->controller_explicit
;
1370 con
->fixfirmwarebug
= cfg
->fixfirmwarebug
;
1371 con
->satpassthrulen
= cfg
->satpassthrulen
;
1373 // Get drive identity structure
1374 if ((retid
=ataReadHDIdentity (fd
,&drive
))){
1376 // Unable to read Identity structure
1377 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1379 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1380 name
, packetdevicetype(retid
-1));
1381 CloseDevice(fd
, name
);
1385 // Show if device in database, and use preset vendor attribute
1386 // options unless user has requested otherwise.
1387 if (cfg
->ignorepresets
)
1388 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1390 // do whatever applypresets decides to do. Will allocate memory if
1391 // cfg->attributedefs is needed.
1392 if (applypresets(&drive
, &cfg
->attributedefs
, con
)<0)
1393 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1395 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1397 // then save the correct state of the flag (applypresets may have changed it)
1398 cfg
->fixfirmwarebug
= con
->fixfirmwarebug
;
1401 // If requested, show which presets would be used for this drive
1402 if (cfg
->showpresets
) {
1403 int savedebugmode
=debugmode
;
1404 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1407 showpresets(&drive
);
1408 debugmode
=savedebugmode
;
1411 // see if drive supports SMART
1412 supported
=ataSmartSupport(&drive
);
1415 // drive does NOT support SMART
1416 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1418 // can't tell if drive supports SMART
1419 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1421 // should we proceed anyway?
1422 if (cfg
->permissive
){
1423 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1426 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1427 CloseDevice(fd
, name
);
1432 if (ataEnableSmart(fd
)){
1433 // Enable SMART command has failed
1434 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1435 CloseDevice(fd
, name
);
1439 // disable device attribute autosave...
1440 if (cfg
->autosave
==1){
1441 if (ataDisableAutoSave(fd
))
1442 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1444 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1447 // or enable device attribute autosave
1448 if (cfg
->autosave
==2){
1449 if (ataEnableAutoSave(fd
))
1450 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1452 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1455 // capability check: SMART status
1456 if (cfg
->smartcheck
&& ataSmartStatus2(fd
)==-1){
1457 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1461 // capability check: Read smart values and thresholds. Note that
1462 // smart values are ALSO needed even if we ONLY want to know if the
1463 // device is self-test log or error-log capable! After ATA-5, this
1464 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1465 // but sadly not for ATA-5. Sigh.
1467 // do we need to retain SMART data after returning from this routine?
1468 retainsmartdata
=cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
;
1470 // do we need to get SMART data?
1471 if (retainsmartdata
|| cfg
->autoofflinetest
|| cfg
->selftest
|| cfg
->errorlog
|| cfg
->pending
!=DONT_MONITOR_UNC
) {
1473 unsigned char currentpending
, offlinepending
;
1475 cfg
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
));
1476 cfg
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
));
1478 if (!cfg
->smartval
|| !cfg
->smartthres
){
1479 PrintOut(LOG_CRIT
,"Not enough memory to obtain SMART data\n");
1483 if (ataReadSmartValues(fd
,cfg
->smartval
) ||
1484 ataReadSmartThresholds (fd
,cfg
->smartthres
)){
1485 PrintOut(LOG_INFO
,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name
);
1486 retainsmartdata
=cfg
->usagefailed
=cfg
->prefail
=cfg
->usage
=0;
1487 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1488 cfg
->pending
=DONT_MONITOR_UNC
;
1491 // see if the necessary Attribute is there to monitor offline or
1492 // current pending sectors or temperature
1493 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
1495 if (currentpending
&& ATAReturnAttributeRawValue(currentpending
, cfg
->smartval
)<0) {
1496 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1497 name
, (int)currentpending
);
1498 cfg
->pending
&= 0xff00;
1499 cfg
->pending
|= CUR_UNC_DEFAULT
;
1502 if (offlinepending
&& ATAReturnAttributeRawValue(offlinepending
, cfg
->smartval
)<0) {
1503 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1504 name
, (int)offlinepending
);
1505 cfg
->pending
&= 0x00ff;
1506 cfg
->pending
|= OFF_UNC_DEFAULT
<<8;
1509 if ( (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
1510 && !ATAReturnTemperatureValue(cfg
->smartval
, cfg
->attributedefs
)) {
1511 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1512 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1516 // enable/disable automatic on-line testing
1517 if (cfg
->autoofflinetest
){
1518 // is this an enable or disable request?
1519 const char *what
=(cfg
->autoofflinetest
==1)?"disable":"enable";
1521 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1523 // if command appears unsupported, issue a warning...
1524 if (!isSupportAutomaticTimer(cfg
->smartval
))
1525 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1526 // ... but then try anyway
1527 if ((cfg
->autoofflinetest
==1)?ataDisableAutoOffline(fd
):ataEnableAutoOffline(fd
))
1528 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1530 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1534 // capability check: self-test-log
1538 // start with service disabled, and re-enable it if all works OK
1540 cfg
->selflogcount
=0;
1544 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1545 else if (!cfg
->permissive
&& !isSmartTestLogCapable(cfg
->smartval
, &drive
))
1546 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1547 else if ((retval
=SelfTestErrorCount(fd
, name
))<0)
1548 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1551 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1552 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1556 // capability check: ATA error log
1560 // start with service disabled, and re-enable it if all works OK
1562 cfg
->ataerrorcount
=0;
1565 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1566 else if (!cfg
->permissive
&& !isSmartErrorLogCapable(cfg
->smartval
, &drive
))
1567 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1568 else if ((val
=ATAErrorCount(fd
, name
))<0)
1569 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1572 cfg
->ataerrorcount
=val
;
1576 // If we don't need to save SMART data, get rid of it now
1577 if (!retainsmartdata
) {
1578 if (cfg
->smartval
) {
1579 cfg
->smartval
=CheckFree(cfg
->smartval
, __LINE__
,filenameandversion
);
1580 bytes
-=sizeof(struct ata_smart_values
);
1582 if (cfg
->smartthres
) {
1583 cfg
->smartthres
=CheckFree(cfg
->smartthres
, __LINE__
,filenameandversion
);
1584 bytes
-=sizeof(struct ata_smart_thresholds_pvt
);
1588 // capabilities check -- does it support powermode?
1589 if (cfg
->powermode
) {
1590 int powermode
=ataCheckPowerMode(fd
);
1592 if (-1 == powermode
) {
1593 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1596 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1597 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1603 // If no tests available or selected, return
1604 if (!(cfg
->errorlog
|| cfg
->selftest
|| cfg
->smartcheck
||
1605 cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
||
1606 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
1607 CloseDevice(fd
, name
);
1611 // Do we still have entries available?
1612 while (numdevata
>=atadevlist_max
)
1613 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
1616 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1618 // record number of device, type of device, increment device count
1619 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1620 cfg
->controller_type
=CONTROLLER_ATA
;
1622 // close file descriptor
1623 CloseDevice(fd
, name
);
1627 // Returns 0 if normal SCSI device. Returns -1 if INQUIRY fails.
1628 // Returns 2 if ATA device detected behind SAT layer.
1629 // Returns 1 if other device detected that we don't want to treat
1630 // as a normal SCSI device.
1631 static int SCSIFilterKnown(int fd
, char * device
)
1634 int req_len
, avail_len
, len
;
1636 memset(req_buff
, 0, 96);
1638 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1639 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1640 /* watch this spot ... other devices could lock up here */
1642 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1643 PrintOut(LOG_INFO
, "Device: %s, failed on INQUIRY; skip device\n", device
);
1644 // device doesn't like INQUIRY commands
1648 avail_len
= req_buff
[4] + 5;
1649 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1651 if (0 == strncmp(req_buff
+ 8, "3ware", 5) || 0 == strncmp(req_buff
+ 8, "AMCC", 4) ) {
1652 PrintOut(LOG_INFO
, "Device %s, please try adding '-d 3ware,N'\n", device
);
1653 PrintOut(LOG_INFO
, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device
, device
);
1655 } else if ((len
>= 42) && (0 == strncmp(req_buff
+ 36, "MVSATA", 6))) {
1656 PrintOut(LOG_INFO
, "Device %s, please try '-d marvell'\n", device
);
1658 } else if ((avail_len
>= 36) &&
1659 (0 == strncmp(req_buff
+ 8, "ATA ", 8)) &&
1660 has_sat_pass_through(fd
, 0 /* non-packet dev */)) {
1662 PrintOut(LOG_INFO
, "Device %s: ATA disk detected behind SAT layer\n",
1664 PrintOut(LOG_INFO
, " Try adding '-d sat' to the device line in the "
1665 "smartd.conf file.\n");
1666 PrintOut(LOG_INFO
, " For example: '%s -a -d sat'\n", device
);
1673 // on success, return 0. On failure, return >0. Never return <0,
1675 static int SCSIDeviceScan(cfgfile
*cfg
, int scanning
) {
1677 char *device
= cfg
->name
;
1678 struct scsi_iec_mode_page iec
;
1682 // should we try to register this as a SCSI device?
1683 switch (cfg
->controller_type
) {
1684 case CONTROLLER_SCSI
:
1685 case CONTROLLER_UNKNOWN
:
1688 case CONTROLLER_CCISS
:
1694 // pass user settings on to low-level SCSI commands
1695 con
->controller_port
=cfg
->controller_port
;
1696 con
->controller_type
=cfg
->controller_type
;
1699 if ((fd
= OpenDevice(device
, mode
, scanning
)) < 0)
1701 PrintOut(LOG_INFO
,"Device: %s, opened\n", device
);
1703 // early skip if device known and needs to be handled by some other
1704 // device type (e.g. '-d 3ware,<n>')
1705 if (SCSIFilterKnown(fd
, device
)) {
1706 CloseDevice(fd
, device
);
1710 // check that device is ready for commands. IE stores its stuff on
1712 if ((err
= scsiTestUnitReady(fd
))) {
1713 if (SIMPLE_ERR_NOT_READY
== err
)
1714 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1715 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1716 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1717 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1718 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1720 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1721 CloseDevice(fd
, device
);
1725 // Badly-conforming USB storage devices may fail this check.
1726 // The response to the following IE mode page fetch (current and
1727 // changeable values) is carefully examined. It has been found
1728 // that various USB devices that malform the response will lock up
1729 // if asked for a log page (e.g. temperature) so it is best to
1731 if (!(err
= scsiFetchIECmpage(fd
, &iec
, cfg
->modese_len
)))
1732 cfg
->modese_len
= iec
.modese_len
;
1733 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1734 ; /* continue since it is reasonable not to support IE mpage */
1735 else { /* any other error (including malformed response) unreasonable */
1737 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1739 CloseDevice(fd
, device
);
1743 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1744 // smart if it is off). This may change to be the same as the ATA side.
1745 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1746 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1747 "Try 'smartctl -s on %s' to turn on SMART features\n",
1749 CloseDevice(fd
, device
);
1753 // Device exists, and does SMART. Add to list (allocating more space if needed)
1754 while (numdevscsi
>= scsidevlist_max
)
1755 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
1757 // Flag that certain log pages are supported (information may be
1758 // available from other sources).
1759 if (0 == scsiLogSense(fd
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1760 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1762 case TEMPERATURE_LPAGE
:
1763 cfg
->TempPageSupported
= 1;
1766 cfg
->SmartPageSupported
= 1;
1774 // record type of device
1775 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1776 cfg
->controller_type
= CONTROLLER_SCSI
;
1778 // get rid of allocated memory only needed for ATA devices. These
1779 // might have been allocated if the user specified Ignore options or
1780 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1781 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
1782 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
1783 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
1784 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
1786 // Check if scsiCheckIE() is going to work
1790 UINT8 currenttemp
= 0;
1793 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
1794 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1795 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1796 cfg
->SuppressReport
= 1;
1797 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
) {
1798 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1799 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1804 // capability check: self-test-log
1806 int retval
=scsiCountFailedSelfTests(fd
, 0);
1808 // no self-test log, turn off monitoring
1809 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1811 cfg
->selflogcount
=0;
1815 // register starting values to watch for changes
1816 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1817 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1821 // disable autosave (set GLTSD bit)
1822 if (cfg
->autosave
==1){
1823 if (scsiSetControlGLTSD(fd
, 1, cfg
->modese_len
))
1824 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
1826 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
1829 // or enable autosave (clear GLTSD bit)
1830 if (cfg
->autosave
==2){
1831 if (scsiSetControlGLTSD(fd
, 0, cfg
->modese_len
))
1832 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
1834 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
1837 // tell user we are registering device
1838 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
1840 // close file descriptor
1841 CloseDevice(fd
, device
);
1845 // We compare old and new values of the n'th attribute. Note that n
1846 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1847 // then return 0, else nonzero.
1848 int ATACompareValues(changedattribute_t
*delta
,
1849 struct ata_smart_values
*newv
,
1850 struct ata_smart_values
*oldv
,
1851 struct ata_smart_thresholds_pvt
*thresholds
,
1853 struct ata_smart_attribute
*now
,*was
;
1854 struct ata_smart_threshold_entry
*thre
;
1855 unsigned char oldval
,newval
;
1858 // check that attribute number in range, and no null pointers
1859 if (n
<0 || n
>=NUMBER_ATA_SMART_ATTRIBUTES
|| !newv
|| !oldv
|| !thresholds
)
1862 // pointers to disk's values and vendor's thresholds
1863 now
=newv
->vendor_attributes
+n
;
1864 was
=oldv
->vendor_attributes
+n
;
1865 thre
=thresholds
->thres_entries
+n
;
1867 // consider only valid attributes
1868 if (!now
->id
|| !was
->id
|| !thre
->id
)
1872 // issue warning if they don't have the same ID in all structures:
1873 if ( (now
->id
!= was
->id
) || (now
->id
!= thre
->id
) ){
1874 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1875 name
, (int)now
->id
, (int)was
->id
, (int)thre
->id
);
1879 // new and old values of Normalized Attributes
1880 newval
=now
->current
;
1881 oldval
=was
->current
;
1883 // See if the RAW values are unchanged (ie, the same)
1884 if (memcmp(now
->raw
, was
->raw
, 6))
1889 // if any values out of the allowed range, or if the values haven't
1890 // changed, return 0
1891 if (!newval
|| !oldval
|| newval
>0xfe || oldval
>0xfe || (oldval
==newval
&& sameraw
))
1894 // values have changed. Construct output and return
1895 delta
->newval
=newval
;
1896 delta
->oldval
=oldval
;
1898 delta
->prefail
=ATTRIBUTE_FLAGS_PREFAILURE(now
->flags
);
1899 delta
->sameraw
=sameraw
;
1904 // This looks to see if the corresponding bit of the 32 bytes is set.
1905 // This wastes a few bytes of storage but eliminates all searching and
1906 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1907 // with set=0 tells you if the attribute is being tracked or not.
1908 // Calling with set=1 turns the attribute OFF.
1909 int IsAttributeOff(unsigned char attr
, unsigned char **datap
, int set
, int which
, int whatline
){
1910 unsigned char *data
;
1912 int bit
=attr
& 0x07;
1913 unsigned char mask
=0x01<<bit
;
1915 if (which
>=NMONITOR
|| which
< 0){
1916 PrintOut(LOG_CRIT
, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1917 whatline
, filenameandversion
, which
, reportbug
);
1921 if (*datap
== NULL
){
1922 // NULL data implies Attributes are ON...
1927 if (!(*datap
=(unsigned char *)Calloc(NMONITOR
*32, 1))){
1928 PrintOut(LOG_CRIT
,"No memory to create monattflags\n");
1933 // pointer to the 256 bits that we need
1934 data
=*datap
+which
*32;
1936 // attribute zero is always OFF
1941 return (data
[loc
] & mask
);
1945 // return value when setting has no sense
1949 // If the self-test log has got more self-test errors (or more recent
1950 // self-test errors) recorded, then notify user.
1951 void CheckSelfTestLogs(cfgfile
*cfg
, int newi
){
1952 char *name
=cfg
->name
;
1956 MailWarning(cfg
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
1958 // old and new error counts
1959 int oldc
=cfg
->selflogcount
;
1960 int newc
=SELFTEST_ERRORCOUNT(newi
);
1962 // old and new error timestamps in hours
1963 int oldh
=cfg
->selfloghour
;
1964 int newh
=SELFTEST_ERRORHOURS(newi
);
1967 // increase in error count
1968 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1970 MailWarning(cfg
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1972 } else if (oldh
!=newh
) {
1973 // more recent error
1974 // a 'more recent' error might actually be a smaller hour number,
1975 // if the hour number has wrapped.
1976 // There's still a bug here. You might just happen to run a new test
1977 // exactly 32768 hours after the previous failure, and have run exactly
1978 // 20 tests between the two, in which case smartd will miss the
1980 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1982 MailWarning(cfg
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1986 // Needed since self-test error count may DECREASE. Hour might
1987 // also have changed.
1988 cfg
->selflogcount
= newc
;
1989 cfg
->selfloghour
= newh
;
1994 // returns 1 if time to do test of type testtype, 0 if not time to do
1995 // test, < 0 if error
1996 int DoTestNow(cfgfile
*cfg
, char testtype
, time_t testtime
) {
1997 // start by finding out the time:
2000 char matchpattern
[16];
2001 regmatch_t substring
;
2002 int weekday
, length
;
2003 unsigned short hours
;
2004 testinfo
*dat
=cfg
->testdata
;
2006 // check that self-testing has been requested
2010 // since we are about to call localtime(), be sure glibc is informed
2011 // of any timezone changes we make.
2013 FixGlibcTimeZoneBug();
2015 // construct pattern containing the month, day of month, day of
2017 epochnow
= (!testtime
? time(NULL
) : testtime
);
2018 timenow
=localtime(&epochnow
);
2020 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
2022 weekday
=timenow
->tm_wday
?timenow
->tm_wday
:7;
2023 sprintf(matchpattern
, "%c/%02d/%02d/%1d/%02d", testtype
, timenow
->tm_mon
+1,
2024 timenow
->tm_mday
, weekday
, timenow
->tm_hour
);
2026 // if no match, we are done
2027 if (regexec(&(dat
->cregex
), matchpattern
, 1, &substring
, 0))
2030 // must match the ENTIRE type/date/time string
2031 length
=strlen(matchpattern
);
2032 if (substring
.rm_so
!=0 || substring
.rm_eo
!=length
)
2035 // never do a second test in the same hour as another test (the % 7 ensures
2036 // that the RHS will never be greater than 65535 and so will always fit into
2037 // an unsigned short)
2038 hours
=1+timenow
->tm_hour
+24*(timenow
->tm_yday
+366*(timenow
->tm_year
% 7));
2039 if (hours
==dat
->hour
) {
2040 if (!testtime
&& testtype
!=dat
->testtype
)
2041 PrintOut(LOG_INFO
, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
2042 cfg
->name
, dat
->testtype
, testtype
);
2046 // save time and type of the current test; we are ready to do a test
2048 dat
->testtype
=testtype
;
2052 // Print a list of future tests.
2053 void PrintTestSchedule(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2056 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2057 time_t now
; long seconds
;
2058 int numdev
= numdevata
+numdevscsi
;
2059 typedef int cnt_t
[4];
2060 cnt_t
* testcnts
; // testcnts[numdev][4]
2063 testcnts
= (cnt_t
*)calloc(numdev
, sizeof(testcnts
[0]));
2067 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2069 // FixGlibcTimeZoneBug(); // done in PrintOut()
2071 dateandtimezoneepoch(datenow
, now
);
2072 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2073 // Check for each device whether a test will be run
2074 time_t testtime
= now
+ seconds
;
2075 for (i
=0; i
<numdev
; i
++) {
2076 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2077 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2078 char testtype
= "LSCO"[t
];
2079 if (DoTestNow(cfg
, testtype
, testtime
)) {
2080 // Report at most 5 tests of each type
2081 if (++testcnts
[i
][t
] <= 5) {
2082 dateandtimezoneepoch(date
, testtime
);
2083 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
->name
,
2084 testcnts
[i
][t
], testtype
, date
);
2092 dateandtimezoneepoch(date
, now
+seconds
);
2093 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2094 for (i
=0; i
<numdev
; i
++) {
2095 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2096 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2097 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
->name
, testcnts
[i
][t
],
2098 (testcnts
[i
][t
]==1?"":"s"), "LSCO"[t
]);
2105 // Return zero on success, nonzero on failure. Perform offline (background)
2106 // short or long (extended) self test on given scsi device.
2107 int DoSCSISelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2109 char *testname
= NULL
;
2110 char *name
= cfg
->name
;
2113 if (scsiSelfTestInProgress(fd
, &inProgress
)) {
2114 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2115 cfg
->testdata
->not_cap_short
=cfg
->testdata
->not_cap_long
=1;
2119 if (1 == inProgress
) {
2120 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2121 "progress.\n", name
);
2127 testname
= "Short Self";
2128 retval
= scsiSmartShortSelfTest(fd
);
2131 testname
= "Long Self";
2132 retval
= scsiSmartExtendSelfTest(fd
);
2135 // If we can't do the test, exit
2136 if (NULL
== testname
) {
2137 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2142 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2143 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2144 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2147 cfg
->testdata
->not_cap_long
=1;
2149 cfg
->testdata
->not_cap_short
=1;
2153 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2158 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2163 // Do an offline immediate or self-test. Return zero on success,
2164 // nonzero on failure.
2165 int DoATASelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2167 struct ata_smart_values data
;
2168 char *testname
=NULL
;
2169 int retval
, dotest
=-1;
2170 char *name
=cfg
->name
;
2172 // Read current smart data and check status/capability
2173 if (ataReadSmartValues(fd
, &data
) || !(data
.offline_data_collection_capability
)) {
2174 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2178 // Check for capability to do the test
2181 testname
="Offline Immediate ";
2182 if (isSupportExecuteOfflineImmediate(&data
))
2183 dotest
=OFFLINE_FULL_SCAN
;
2185 cfg
->testdata
->not_cap_offline
=1;
2188 testname
="Conveyance Self-";
2189 if (isSupportConveyanceSelfTest(&data
))
2190 dotest
=CONVEYANCE_SELF_TEST
;
2192 cfg
->testdata
->not_cap_conveyance
=1;
2195 testname
="Short Self-";
2196 if (isSupportSelfTest(&data
))
2197 dotest
=SHORT_SELF_TEST
;
2199 cfg
->testdata
->not_cap_short
=1;
2202 testname
="Long Self-";
2203 if (isSupportSelfTest(&data
))
2204 dotest
=EXTEND_SELF_TEST
;
2206 cfg
->testdata
->not_cap_long
=1;
2210 // If we can't do the test, exit
2212 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2216 // If currently running a self-test, do not interrupt it to start another.
2217 if (15==(data
.self_test_exec_status
>> 4)) {
2218 if (cfg
->fixfirmwarebug
== FIX_SAMSUNG3
&& data
.self_test_exec_status
== 0xf0) {
2219 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
2220 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
2222 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2223 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2228 // else execute the test, and return status
2229 if ((retval
=smartcommandhandler(fd
, IMMEDIATE_OFFLINE
, dotest
, NULL
)))
2230 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2232 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2237 // Check Temperature limits
2238 static void CheckTemperature(cfgfile
* cfg
, unsigned char currtemp
, unsigned char triptemp
)
2240 const char *minchg
= "", *maxchg
= "";
2241 if (!(0 < currtemp
&& currtemp
< 255)) {
2242 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
->name
);
2246 if (!cfg
->temperature
) {
2247 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius\n",
2248 cfg
->name
, (int)currtemp
);
2250 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2251 cfg
->temperature
= cfg
->tempmin
= cfg
->tempmax
= currtemp
;
2255 if (currtemp
< cfg
->tempmin
) {
2256 cfg
->tempmin
= currtemp
; minchg
= "!";
2257 cfg
->tempmininc
= 0;
2259 else if (cfg
->tempmininc
) {
2260 // increase min Temperature during first 30 minutes
2261 cfg
->tempmin
= currtemp
;
2264 if (currtemp
> cfg
->tempmax
) {
2265 cfg
->tempmax
= currtemp
; maxchg
= "!";
2269 if (cfg
->tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)cfg
->temperature
) >= cfg
->tempdiff
)) {
2270 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2271 cfg
->name
, (int)currtemp
-(int)cfg
->temperature
, currtemp
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2272 cfg
->temperature
= currtemp
;
2277 if (cfg
->tempcrit
&& currtemp
>= cfg
->tempcrit
) {
2278 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2279 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2280 MailWarning(cfg
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2281 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2283 else if (cfg
->tempinfo
&& currtemp
>= cfg
->tempinfo
) {
2284 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2285 cfg
->name
, currtemp
, cfg
->tempinfo
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2289 int ATACheckDevice(cfgfile
*cfg
, bool allow_selftests
){
2291 char *name
=cfg
->name
;
2295 // fix firmware bug if requested
2296 con
->fixfirmwarebug
=cfg
->fixfirmwarebug
;
2297 con
->controller_port
=cfg
->controller_port
;
2298 con
->controller_type
=cfg
->controller_type
;
2299 con
->controller_explicit
=cfg
->controller_explicit
;
2300 // Highpoint-specific data
2301 con
->hpt_data
[0]=cfg
->hpt_data
[0];
2302 con
->hpt_data
[1]=cfg
->hpt_data
[1];
2303 con
->hpt_data
[2]=cfg
->hpt_data
[2];
2305 // If user has asked, test the email warning system
2306 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2307 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2309 if (cfg
->controller_type
== CONTROLLER_3WARE_9000_CHAR
)
2310 mode
="ATA_3WARE_9000";
2312 if (cfg
->controller_type
== CONTROLLER_3WARE_678K_CHAR
)
2313 mode
="ATA_3WARE_678K";
2315 // if we can't open device, fail gracefully rather than hard --
2316 // perhaps the next time around we'll be able to open it. ATAPI
2317 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2318 // given (see linux cdrom driver).
2319 if ((fd
=OpenDevice(name
, mode
, 0))<0){
2320 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2324 // if the user has asked, and device is capable (or we're not yet
2325 // sure) check whether a self test should be done now.
2326 // This check is done before powermode check to avoid missing self
2327 // tests on idle or sleeping disks.
2328 if (allow_selftests
&& cfg
->testdata
) {
2330 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2333 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2336 else if (!cfg
->testdata
->not_cap_conveyance
&& DoTestNow(cfg
, 'C', 0)>0)
2338 // offline immediate
2339 else if (!cfg
->testdata
->not_cap_offline
&& DoTestNow(cfg
, 'O', 0)>0)
2343 // user may have requested (with the -n Directive) to leave the disk
2344 // alone if it is in idle or sleeping mode. In this case check the
2345 // power mode and exit without check if needed
2346 if (cfg
->powermode
){
2347 int dontcheck
=0, powermode
=ataCheckPowerMode(fd
);
2349 if (0 <= powermode
&& powermode
< 0xff) {
2350 // wait for possible spin up and check again
2353 powermode2
= ataCheckPowerMode(fd
);
2354 if (powermode2
> powermode
)
2355 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2356 powermode
= powermode2
;
2363 if (cfg
->powermode
>=1)
2369 if (cfg
->powermode
>=2)
2375 if (cfg
->powermode
>=3)
2380 mode
="ACTIVE or IDLE";
2384 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2390 // if we are going to skip a check, return now
2392 // but ignore powermode on scheduled selftest
2394 CloseDevice(fd
, name
);
2395 if (!cfg
->powerskipcnt
&& !cfg
->powerquiet
) // report first only and avoid waking up system disk
2396 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2397 cfg
->powerskipcnt
++;
2400 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2401 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2402 cfg
->powerskipcnt
= 0;
2404 else if (cfg
->powerskipcnt
) {
2405 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2406 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2407 cfg
->powerskipcnt
= 0;
2411 // check smart status
2412 if (cfg
->smartcheck
){
2413 int status
=ataSmartStatus2(fd
);
2415 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2416 MailWarning(cfg
, 5, "Device: %s, not capable of SMART self-check", name
);
2418 else if (status
==1){
2419 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2420 MailWarning(cfg
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2424 // Check everything that depends upon SMART Data (eg, Attribute values)
2425 if ( cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->pending
!=DONT_MONITOR_UNC
2426 || cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
){
2427 struct ata_smart_values curval
;
2428 struct ata_smart_thresholds_pvt
*thresh
=cfg
->smartthres
;
2430 // Read current attribute values. *drive contains old values and thresholds
2431 if (ataReadSmartValues(fd
,&curval
)){
2432 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2433 MailWarning(cfg
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2436 // look for current or offline pending sectors
2437 if (cfg
->pending
!= DONT_MONITOR_UNC
) {
2439 unsigned char currentpending
, offlinepending
;
2441 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
2443 if (currentpending
&& (rawval
=ATAReturnAttributeRawValue(currentpending
, &curval
))>0) {
2444 // Unreadable pending sectors!!
2445 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors\n", name
, rawval
);
2446 MailWarning(cfg
, 10, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors", name
, rawval
);
2449 if (offlinepending
&& (rawval
=ATAReturnAttributeRawValue(offlinepending
, &curval
))>0) {
2450 // Unreadable offline sectors!!
2451 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Offline uncorrectable sectors\n", name
, rawval
);
2452 MailWarning(cfg
, 11, "Device: %s, %"PRId64
" Offline uncorrectable sectors", name
, rawval
);
2456 // check temperature limits
2457 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2458 CheckTemperature(cfg
, ATAReturnTemperatureValue(&curval
, cfg
->attributedefs
), 0);
2460 if (cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
) {
2462 // look for failed usage attributes, or track usage or prefail attributes
2463 for (i
=0; i
<NUMBER_ATA_SMART_ATTRIBUTES
; i
++){
2465 changedattribute_t delta
;
2467 // This block looks for usage attributes that have failed.
2468 // Prefail attributes that have failed are returned with a
2469 // positive sign. No failure returns 0. Usage attributes<0.
2470 if (cfg
->usagefailed
&& ((att
=ataCheckAttribute(&curval
, thresh
, i
))<0)){
2472 // are we ignoring failures of this attribute?
2474 if (!IsAttributeOff(att
, &cfg
->monitorattflags
, 0, MONITOR_FAILUSE
, __LINE__
)){
2475 char attname
[64], *loc
=attname
;
2477 // get attribute name & skip white space
2478 ataPrintSmartAttribName(loc
, att
, cfg
->attributedefs
);
2479 while (*loc
&& *loc
==' ') loc
++;
2482 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %s.\n", name
, loc
);
2483 MailWarning(cfg
, 2, "Device: %s, Failed SMART usage Attribute: %s.", name
, loc
);
2487 // This block tracks usage or prefailure attributes to see if
2488 // they are changing. It also looks for changes in RAW values
2489 // if this has been requested by user.
2490 if ((cfg
->usage
|| cfg
->prefail
) && ATACompareValues(&delta
, &curval
, cfg
->smartval
, thresh
, i
, name
)){
2491 unsigned char id
=delta
.id
;
2493 // if the only change is the raw value, and we're not
2494 // tracking raw value, then continue loop over attributes
2495 if (!delta
.sameraw
&& delta
.newval
==delta
.oldval
&& !IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAW
, __LINE__
))
2498 // are we tracking this attribute?
2499 if (!IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_IGNORE
, __LINE__
)){
2500 char newrawstring
[64], oldrawstring
[64], attname
[64], *loc
=attname
;
2502 // get attribute name, skip spaces
2503 ataPrintSmartAttribName(loc
, id
, cfg
->attributedefs
);
2504 while (*loc
&& *loc
==' ') loc
++;
2506 // has the user asked for us to print raw values?
2507 if (IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAWPRINT
, __LINE__
)) {
2508 // get raw values (as a string) and add to printout
2510 ataPrintSmartAttribRawValue(rawstring
, curval
.vendor_attributes
+i
, cfg
->attributedefs
);
2511 sprintf(newrawstring
, " [Raw %s]", rawstring
);
2512 ataPrintSmartAttribRawValue(rawstring
, cfg
->smartval
->vendor_attributes
+i
, cfg
->attributedefs
);
2513 sprintf(oldrawstring
, " [Raw %s]", rawstring
);
2516 newrawstring
[0]=oldrawstring
[0]='\0';
2518 // prefailure attribute
2519 if (cfg
->prefail
&& delta
.prefail
)
2520 PrintOut(LOG_INFO
, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2521 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2524 if (cfg
->usage
&& !delta
.prefail
)
2525 PrintOut(LOG_INFO
, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2526 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2528 } // endof block tracking usage or prefailure
2529 } // end of loop over attributes
2531 // Save the new values into *drive for the next time around
2532 *(cfg
->smartval
)=curval
;
2537 // check if number of selftest errors has increased (note: may also DECREASE)
2539 CheckSelfTestLogs(cfg
, SelfTestErrorCount(fd
, name
));
2541 // check if number of ATA errors has increased
2544 int newc
,oldc
=cfg
->ataerrorcount
;
2546 // new number of errors
2547 newc
=ATAErrorCount(fd
, name
);
2549 // did command fail?
2551 // lack of PrintOut here is INTENTIONAL
2552 MailWarning(cfg
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2554 // has error count increased?
2556 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2558 MailWarning(cfg
, 4, "Device: %s, ATA error count increased from %d to %d",
2562 // this last line is probably not needed, count always increases
2564 cfg
->ataerrorcount
=newc
;
2567 // carry out scheduled self-test
2569 DoATASelfTest(fd
, cfg
, testtype
);
2571 // Don't leave device open -- the OS/user may want to access it
2572 // before the next smartd cycle!
2573 CloseDevice(fd
, name
);
2577 int SCSICheckDevice(cfgfile
*cfg
, bool allow_selftests
)
2583 char *name
=cfg
->name
;
2587 // should we try to register this as a SCSI device?
2588 switch (cfg
->controller_type
) {
2589 case CONTROLLER_CCISS
:
2592 case CONTROLLER_SCSI
:
2593 case CONTROLLER_UNKNOWN
:
2600 // pass user settings on to low-level SCSI commands
2601 con
->controller_port
=cfg
->controller_port
;
2602 con
->controller_type
=cfg
->controller_type
;
2604 // If the user has asked for it, test the email warning system
2605 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2606 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2608 // if we can't open device, fail gracefully rather than hard --
2609 // perhaps the next time around we'll be able to open it
2610 if ((fd
=OpenDevice(name
, mode
, 0))<0) {
2611 // Lack of PrintOut() here is intentional!
2612 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2614 } else if (debugmode
)
2615 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
2619 if (! cfg
->SuppressReport
) {
2620 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
2621 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2622 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2624 MailWarning(cfg
, 6, "Device: %s, failed to read SMART values", name
);
2625 cfg
->SuppressReport
= 1;
2629 cp
= scsiGetIEString(asc
, ascq
);
2631 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2632 MailWarning(cfg
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2633 } else if (debugmode
)
2634 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2635 name
, (int)asc
, (int)ascq
);
2636 } else if (debugmode
)
2637 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
2639 // check temperature limits
2640 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2641 CheckTemperature(cfg
, currenttemp
, triptemp
);
2643 // check if number of selftest errors has increased (note: may also DECREASE)
2645 CheckSelfTestLogs(cfg
, scsiCountFailedSelfTests(fd
, 0));
2647 if (allow_selftests
&& cfg
->testdata
) {
2648 // long (extended) background test
2649 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2650 DoSCSISelfTest(fd
, cfg
, 'L');
2651 // short background test
2652 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2653 DoSCSISelfTest(fd
, cfg
, 'S');
2655 CloseDevice(fd
, name
);
2659 // Checks the SMART status of all ATA and SCSI devices
2660 void CheckDevicesOnce(cfgfile
**atadevices
, cfgfile
**scsidevices
, bool allow_selftests
){
2663 for (i
=0; i
<numdevata
; i
++)
2664 ATACheckDevice(atadevices
[i
], allow_selftests
);
2666 for (i
=0; i
<numdevscsi
; i
++)
2667 SCSICheckDevice(scsidevices
[i
], allow_selftests
);
2673 // This alarm means that a SCSI USB device was hanging
2674 void AlarmHandler(int signal
) {
2675 longjmp(registerscsienv
, 1);
2679 // Does initialization right after fork to daemon mode
2680 void Initialize(time_t *wakeuptime
){
2682 // install goobye message and remove pidfile handler
2685 // write PID file only after installing exit handler
2689 // install signal handlers. On Solaris, can't use signal() because
2690 // it resets the handler to SIG_DFL after each call. So use sigset()
2691 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2693 // normal and abnormal exit
2694 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2695 SIGNALFN(SIGTERM
, SIG_IGN
);
2696 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2697 SIGNALFN(SIGQUIT
, SIG_IGN
);
2699 // in debug mode, <CONTROL-C> ==> HUP
2700 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2701 SIGNALFN(SIGINT
, SIG_IGN
);
2703 // Catch HUP and USR1
2704 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2705 SIGNALFN(SIGHUP
, SIG_IGN
);
2706 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2707 SIGNALFN(SIGUSR1
, SIG_IGN
);
2709 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2710 SIGNALFN(SIGUSR2
, SIG_IGN
);
2713 // initialize wakeup time to CURRENT time
2714 *wakeuptime
=time(NULL
);
2720 // Toggle debug mode implemented for native windows only
2721 // (there is no easy way to reopen tty on *nix)
2722 static void ToggleDebugMode()
2725 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2726 if (!daemon_enable_console("smartd [Debug]")) {
2728 daemon_signal(SIGINT
, HUPhandler
);
2729 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2732 PrintOut(LOG_INFO
,"enable console failed\n");
2734 else if (debugmode
== 1) {
2735 daemon_disable_console();
2737 daemon_signal(SIGINT
, sighandler
);
2738 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2741 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2745 time_t dosleep(time_t wakeuptime
){
2748 // If past wake-up-time, compute next wake-up-time
2750 while (wakeuptime
<=timenow
){
2751 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2752 wakeuptime
+=intervals
*checktime
;
2755 // sleep until we catch SIGUSR1 or have completed sleeping
2756 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2758 // protect user again system clock being adjusted backwards
2759 if (wakeuptime
>timenow
+checktime
){
2760 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2761 wakeuptime
=timenow
+checktime
;
2764 // Exit sleep when time interval has expired or a signal is received
2765 sleep(wakeuptime
-timenow
);
2768 // toggle debug mode?
2769 if (caughtsigUSR2
) {
2778 // if we caught a SIGUSR1 then print message and clear signal
2780 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2781 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2785 // return adjusted wakeuptime
2789 // Print out a list of valid arguments for the Directive d
2790 void printoutvaliddirectiveargs(int priority
, char d
) {
2795 PrintOut(priority
, "never[,q], sleep[,q], standby[,q], idle[,q]");
2798 PrintOut(priority
, "valid_regular_expression");
2801 PrintOut(priority
, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2804 PrintOut(priority
, "normal, permissive");
2808 PrintOut(priority
, "on, off");
2811 PrintOut(priority
, "error, selftest");
2814 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2817 if (!(s
= create_vendor_attribute_arg_list())) {
2818 PrintOut(LOG_CRIT
,"Insufficient memory to construct argument list\n");
2821 PrintOut(priority
, "\n%s\n", s
);
2822 s
=CheckFree(s
, __LINE__
,filenameandversion
);
2825 PrintOut(priority
, "use, ignore, show, showall");
2828 PrintOut(priority
, "none, samsung, samsung2, samsung3");
2833 // exits with an error message, or returns integer value of token
2834 int GetInteger(char *arg
, char *name
, char *token
, int lineno
, char *configfile
, int min
, int max
){
2838 // check input range
2840 PrintOut(LOG_CRIT
, "min =%d passed to GetInteger() must be >=0\n", min
);
2844 // make sure argument is there
2846 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2847 configfile
, lineno
, name
, token
, min
, max
);
2851 // get argument value (base 10), check that it's integer, and in-range
2852 val
=strtol(arg
,&endptr
,10);
2853 if (*endptr
!='\0' || val
<min
|| val
>max
) {
2854 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2855 configfile
, lineno
, name
, token
, arg
, min
, max
);
2859 // all is well; return value
2864 // Get 1-3 small integer(s) for '-W' directive
2865 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
2866 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
2867 unsigned v1
= 0, v2
= 0, v3
= 0;
2868 int n1
= -1, n2
= -1, n3
= -1, len
;
2870 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2871 configfile
, lineno
, name
, token
);
2876 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
2877 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
2878 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2879 configfile
, lineno
, name
, token
, arg
);
2882 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
2887 // This function returns 1 if it has correctly parsed one token (and
2888 // any arguments), else zero if no tokens remain. It returns -1 if an
2889 // error was encountered.
2890 int ParseToken(char *token
,cfgfile
*cfg
){
2892 char *name
=cfg
->name
;
2893 int lineno
=cfg
->lineno
;
2894 char *delim
= " \n\t";
2899 maildata
*mdat
=NULL
, tempmail
;
2901 // is the rest of the line a comment
2905 // is the token not recognized?
2906 if (*token
!='-' || strlen(token
)!=2) {
2907 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
2908 configfile
, lineno
, name
, token
);
2909 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
2913 // token we will be parsing:
2916 // create temporary maildata structure. This means we can postpone
2917 // allocating space in the data segment until we are sure there are
2919 if ('m'==sym
|| 'M'==sym
){
2920 if (!cfg
->mailwarn
){
2921 memset(&tempmail
, 0, sizeof(maildata
));
2929 // parse the token and swallow its argument
2934 // monitor current pending sector count (default 197)
2935 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2937 if (val
==CUR_UNC_DEFAULT
)
2940 val
=CUR_UNC_DEFAULT
;
2941 // set bottom 8 bits to correct value
2942 cfg
->pending
&= 0xff00;
2943 cfg
->pending
|= val
;
2946 // monitor offline uncorrectable sectors (default 198)
2947 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2949 if (val
==OFF_UNC_DEFAULT
)
2952 val
=OFF_UNC_DEFAULT
;
2953 // turn off top 8 bits, then set to correct value
2954 cfg
->pending
&= 0xff;
2955 cfg
->pending
|= (val
<<8);
2958 // Set tolerance level for SMART command failures
2959 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2961 } else if (!strcmp(arg
, "normal")) {
2962 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2963 // not on failure of an optional S.M.A.R.T. command.
2964 // This is the default so we don't need to actually do anything here.
2966 } else if (!strcmp(arg
, "permissive")) {
2967 // Permissive mode; ignore errors from Mandatory SMART commands
2974 // specify the device type
2975 cfg
->controller_explicit
= 1;
2976 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2978 } else if (!strcmp(arg
, "ata")) {
2979 cfg
->controller_port
= 0;
2980 cfg
->controller_type
= CONTROLLER_ATA
;
2981 } else if (!strcmp(arg
, "scsi")) {
2982 cfg
->controller_port
=0;
2983 cfg
->controller_type
= CONTROLLER_SCSI
;
2984 } else if (!strcmp(arg
, "marvell")) {
2985 cfg
->controller_port
=0;
2986 cfg
->controller_type
= CONTROLLER_MARVELL_SATA
;
2987 } else if (!strncmp(arg
, "sat", 3)) {
2988 cfg
->controller_type
= CONTROLLER_SAT
;
2989 cfg
->controller_port
= 0;
2990 cfg
->satpassthrulen
= 0;
2991 if (strlen(arg
) > 3) {
2995 cp
= strchr(arg
, ',');
2996 if (cp
&& (1 == sscanf(cp
+ 1, "%d", &k
)) &&
2997 ((0 == k
) || (12 == k
) || (16 == k
)))
2998 cfg
->satpassthrulen
= k
;
3000 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3001 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
3002 configfile
, lineno
, name
);
3006 } else if (!strncmp(arg
, "hpt", 3)){
3007 unsigned char i
, slash
= 0;
3008 cfg
->hpt_data
[0] = 0;
3009 cfg
->hpt_data
[1] = 0;
3010 cfg
->hpt_data
[2] = 0;
3011 cfg
->controller_type
= CONTROLLER_HPT
;
3012 for (i
=4; i
< strlen(arg
); i
++) {
3016 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3017 "'-d hpt,L/M/N' supports 2-3 items\n",
3018 configfile
, lineno
, name
);
3023 else if ((arg
[i
])>='0' && (arg
[i
])<='9') {
3024 if (cfg
->hpt_data
[slash
]>1) { /* hpt_data[x] max 19 */
3028 cfg
->hpt_data
[slash
] = cfg
->hpt_data
[slash
]*10 + arg
[i
] - '0';
3037 } else if (badarg
!= TRUE
) {
3038 if (cfg
->hpt_data
[0]==0 || cfg
->hpt_data
[0]>8){
3039 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3040 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
3041 configfile
, lineno
, name
);
3044 if (cfg
->hpt_data
[1]==0 || cfg
->hpt_data
[1]>8){
3045 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3046 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
3047 configfile
, lineno
, name
);
3051 if (cfg
->hpt_data
[2]==0 || cfg
->hpt_data
[2]>15){
3052 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3053 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
3054 configfile
, lineno
, name
);
3057 } else { /* no pmport device */
3061 } else if (!strcmp(arg
, "removable")) {
3064 // look 3ware,N RAID device
3068 // make a copy of the string to mess with
3069 if (!(s
= strdup(arg
))) {
3071 "No memory to copy argument to -d option - exiting\n");
3073 } else if (!strncmp(s
,"3ware,",6)) {
3074 if (split_report_arg2(s
, &i
)){
3075 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3076 configfile
, lineno
, name
);
3078 } else if ( i
<0 || i
>31) {
3079 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 31\n",
3080 configfile
, lineno
, name
, i
);
3083 // determine type of escalade device from name of device
3084 cfg
->controller_type
= guess_device_type(name
);
3085 if (cfg
->controller_type
!=CONTROLLER_3WARE_9000_CHAR
&& cfg
->controller_type
!=CONTROLLER_3WARE_678K_CHAR
)
3086 cfg
->controller_type
=CONTROLLER_3WARE_678K
;
3088 // NOTE: controller_port == disk number + 1
3089 cfg
->controller_port
= i
+1;
3091 } else if (!strncmp(s
,"cciss,",6)) {
3092 if (split_report_arg2(s
, &i
)){
3093 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N requires N integer\n",
3094 configfile
, lineno
, name
);
3096 } else if ( i
<0 || i
>127) {
3097 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N (N=%d) must have 0 <= N <= 127\n",
3098 configfile
, lineno
, name
, i
);
3101 // NOTE: controller_port == disk number + 1
3102 cfg
->controller_type
= CONTROLLER_CCISS
;
3103 cfg
->controller_port
= i
+1;
3108 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3113 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3115 } else if (!strcmp(arg
, "none")) {
3116 cfg
->fixfirmwarebug
= FIX_NONE
;
3117 } else if (!strcmp(arg
, "samsung")) {
3118 cfg
->fixfirmwarebug
= FIX_SAMSUNG
;
3119 } else if (!strcmp(arg
, "samsung2")) {
3120 cfg
->fixfirmwarebug
= FIX_SAMSUNG2
;
3121 } else if (!strcmp(arg
, "samsung3")) {
3122 cfg
->fixfirmwarebug
= FIX_SAMSUNG3
;
3128 // check SMART status
3132 // check for failure of usage attributes
3136 // track changes in all vendor attributes
3141 // track changes in prefail vendor attributes
3145 // track changes in usage vendor attributes
3149 // track changes in SMART logs
3150 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3152 } else if (!strcmp(arg
, "selftest")) {
3153 // track changes in self-test log
3155 } else if (!strcmp(arg
, "error")) {
3156 // track changes in ATA error log
3163 // monitor everything
3172 // automatic offline testing enable/disable
3173 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3175 } else if (!strcmp(arg
, "on")) {
3176 cfg
->autoofflinetest
= 2;
3177 } else if (!strcmp(arg
, "off")) {
3178 cfg
->autoofflinetest
= 1;
3184 // skip disk check if in idle or standby mode
3185 if (!(arg
= strtok(NULL
, delim
)))
3187 else if (!strcmp(arg
, "never") || !strcmp(arg
, "never,q"))
3189 else if (!strcmp(arg
, "sleep") || !strcmp(arg
, "sleep,q"))
3191 else if (!strcmp(arg
, "standby") || !strcmp(arg
, "standby,q"))
3193 else if (!strcmp(arg
, "idle") || !strcmp(arg
, "idle,q"))
3197 cfg
->powerquiet
= !!strchr(arg
, ',');
3200 // automatic attribute autosave enable/disable
3201 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3203 } else if (!strcmp(arg
, "on")) {
3205 } else if (!strcmp(arg
, "off")) {
3212 // warn user, and delete any previously given -s REGEXP Directives
3214 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3215 configfile
, lineno
, name
, cfg
->testdata
->regex
);
3216 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3218 // check for missing argument
3219 if (!(arg
= strtok(NULL
, delim
))) {
3222 // allocate space for structure and string
3223 else if (!(cfg
->testdata
=(testinfo
*)Calloc(1, sizeof(testinfo
))) || !(cfg
->testdata
->regex
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
))) {
3224 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3225 configfile
, lineno
, name
, arg
);
3228 else if ((val
=regcomp(&(cfg
->testdata
->cregex
), arg
, REG_EXTENDED
))) {
3230 // not a valid regular expression!
3231 regerror(val
, &(cfg
->testdata
->cregex
), errormsg
, 512);
3232 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3233 configfile
, lineno
, name
, arg
, errormsg
);
3234 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3237 // Do a bit of sanity checking and warn user if we think that
3238 // their regexp is "strange". User probably confused about shell
3239 // glob(3) syntax versus regular expression syntax regexp(7).
3240 else if ((int)strlen(arg
) != (val
=strspn(arg
,"0123456789/.-+*|()?^$[]SLCO")))
3241 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3242 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3245 // send email to address that follows
3246 if (!(arg
= strtok(NULL
,delim
)))
3249 if (mdat
->address
) {
3250 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3251 configfile
, lineno
, name
, mdat
->address
);
3252 mdat
->address
=FreeNonZero(mdat
->address
, -1,__LINE__
,filenameandversion
);
3254 mdat
->address
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3258 // email warning options
3259 if (!(arg
= strtok(NULL
, delim
)))
3261 else if (!strcmp(arg
, "once"))
3262 mdat
->emailfreq
= 1;
3263 else if (!strcmp(arg
, "daily"))
3264 mdat
->emailfreq
= 2;
3265 else if (!strcmp(arg
, "diminishing"))
3266 mdat
->emailfreq
= 3;
3267 else if (!strcmp(arg
, "test"))
3268 mdat
->emailtest
= 1;
3269 else if (!strcmp(arg
, "exec")) {
3270 // Get the next argument (the command line)
3271 if (!(arg
= strtok(NULL
, delim
))) {
3272 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3273 configfile
, lineno
, name
, token
);
3276 // Free the last cmd line given if any, and copy new one
3277 if (mdat
->emailcmdline
) {
3278 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3279 configfile
, lineno
, name
, mdat
->emailcmdline
);
3280 mdat
->emailcmdline
=FreeNonZero(mdat
->emailcmdline
, -1,__LINE__
,filenameandversion
);
3282 mdat
->emailcmdline
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3288 // ignore failure of usage attribute
3289 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3291 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_FAILUSE
, __LINE__
);
3294 // ignore attribute for tracking purposes
3295 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3297 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_IGNORE
, __LINE__
);
3300 // print raw value when tracking
3301 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3303 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3306 // track changes in raw value (forces printing of raw value)
3307 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3309 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3310 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAW
, __LINE__
);
3313 // track Temperature
3314 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3315 &cfg
->tempdiff
, &cfg
->tempinfo
, &cfg
->tempcrit
))<0)
3317 // increase min Temperature during first 30 minutes
3318 if (!(cfg
->tempmininc
= (unsigned char)(CHECKTIME
/ checktime
)))
3319 cfg
->tempmininc
= 1;
3322 // non-default vendor-specific attribute meaning
3323 if (!(arg
=strtok(NULL
,delim
))) {
3325 } else if (parse_attribute_def(arg
, &cfg
->attributedefs
)){
3330 // Define use of drive-specific presets.
3331 if (!(arg
= strtok(NULL
, delim
))) {
3333 } else if (!strcmp(arg
, "use")) {
3334 cfg
->ignorepresets
= FALSE
;
3335 } else if (!strcmp(arg
, "ignore")) {
3336 cfg
->ignorepresets
= TRUE
;
3337 } else if (!strcmp(arg
, "show")) {
3338 cfg
->showpresets
= TRUE
;
3339 } else if (!strcmp(arg
, "showall")) {
3346 // Directive not recognized
3347 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3348 configfile
, lineno
, name
, token
);
3353 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3354 configfile
, lineno
, name
, token
);
3357 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3358 configfile
, lineno
, name
, token
, arg
);
3360 if (missingarg
|| badarg
) {
3361 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3362 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3363 PrintOut(LOG_CRIT
, "\n");
3367 // If this did something to fill the mail structure, and that didn't
3368 // already exist, create it and copy.
3370 if (!(cfg
->mailwarn
=(maildata
*)Calloc(1, sizeof(maildata
)))) {
3371 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3372 configfile
, lineno
, name
);
3375 memcpy(cfg
->mailwarn
, mdat
, sizeof(maildata
));
3381 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3382 // a copy of the original, but with private data storage. Else all is
3383 // zeroed. Returns address, and fails if non memory available.
3385 cfgfile
*CreateConfigEntry(cfgfile
*original
){
3388 // allocate memory for new structure
3389 if (!(add
=(cfgfile
*)Calloc(1,sizeof(cfgfile
))))
3392 // if old structure was pointed to, copy it
3394 memcpy(add
, original
, sizeof(cfgfile
));
3396 // make private copies of data items ONLY if they are in use (non
3398 add
->name
= CustomStrDup(add
->name
, 0, __LINE__
,filenameandversion
);
3400 if (add
->testdata
) {
3402 if (!(add
->testdata
=(testinfo
*)Calloc(1,sizeof(testinfo
))))
3404 memcpy(add
->testdata
, original
->testdata
, sizeof(testinfo
));
3405 add
->testdata
->regex
= CustomStrDup(add
->testdata
->regex
, 1, __LINE__
,filenameandversion
);
3406 // only POSIX-portable way to make fresh copy of compiled regex is
3407 // to recompile it completely. There is no POSIX
3408 // compiled-regex-copy command.
3409 if ((val
=regcomp(&(add
->testdata
->cregex
), add
->testdata
->regex
, REG_EXTENDED
))) {
3411 regerror(val
, &(add
->testdata
->cregex
), errormsg
, 512);
3412 PrintOut(LOG_CRIT
, "unable to recompile regular expression %s. %s\n", add
->testdata
->regex
, errormsg
);
3417 if (add
->mailwarn
) {
3418 if (!(add
->mailwarn
=(maildata
*)Calloc(1,sizeof(maildata
))))
3420 memcpy(add
->mailwarn
, original
->mailwarn
, sizeof(maildata
));
3421 add
->mailwarn
->address
= CustomStrDup(add
->mailwarn
->address
, 0, __LINE__
,filenameandversion
);
3422 add
->mailwarn
->emailcmdline
= CustomStrDup(add
->mailwarn
->emailcmdline
, 0, __LINE__
,filenameandversion
);
3425 if (add
->attributedefs
) {
3426 if (!(add
->attributedefs
=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM
,1)))
3428 memcpy(add
->attributedefs
, original
->attributedefs
, MAX_ATTRIBUTE_NUM
);
3431 if (add
->monitorattflags
) {
3432 if (!(add
->monitorattflags
=(unsigned char *)Calloc(NMONITOR
*32, 1)))
3434 memcpy(add
->monitorattflags
, original
->monitorattflags
, NMONITOR
*32);
3437 if (add
->smartval
) {
3438 if (!(add
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
))))
3442 if (add
->smartthres
) {
3443 if (!(add
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
))))
3450 PrintOut(LOG_CRIT
, "No memory to create entry from configuration file\n");
3456 // This is the routine that adds things to the cfgentries list. To
3457 // prevent memory leaks when re-reading the configuration file many
3458 // times, this routine MUST deallocate any memory other than that
3459 // pointed to within cfg-> before it returns.
3461 // Return values are:
3462 // 1: parsed a normal line
3463 // 0: found comment or blank line
3464 // -1: found SCANDIRECTIVE line
3465 // -2: found an error
3467 // Note: this routine modifies *line from the caller!
3468 int ParseConfigLine(int entry
, int lineno
,char *line
){
3471 char *delim
= " \n\t";
3475 // get first token: device name. If a comment, skip line
3476 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3480 // Have we detected the SCANDIRECTIVE directive?
3481 if (!strcmp(SCANDIRECTIVE
,name
)){
3484 PrintOut(LOG_INFO
,"Scan Directive %s (line %d) must be the first entry in %s\n",name
, lineno
, configfile
);
3489 // Is there space for another entry? If not, allocate more
3490 while (entry
>=cfgentries_max
)
3491 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "configuration file device");
3493 // We've got a legit entry, make space to store it
3494 cfg
=cfgentries
[entry
]=CreateConfigEntry(NULL
);
3495 cfg
->name
= CustomStrDup(name
, 1, __LINE__
,filenameandversion
);
3497 // Store line number, and by default check for both device types.
3500 // Try and recognize if a IDE or SCSI device. These can be
3501 // overwritten by configuration file directives.
3502 if (cfg
->controller_type
==CONTROLLER_UNKNOWN
)
3503 cfg
->controller_type
= guess_device_type(cfg
->name
);
3505 // parse tokens one at a time from the file.
3506 while ((token
=strtok(NULL
,delim
))){
3507 int retval
=ParseToken(token
,cfg
);
3516 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3522 // error found on the line
3527 // If we found 3ware/cciss controller, then modify device name by adding a SPACE
3528 if (cfg
->controller_port
) {
3529 int len
=17+strlen(cfg
->name
);
3533 PrintOut(LOG_CRIT
, "smartd: can not scan for 3ware/cciss devices (line %d of file %s)\n",
3534 lineno
, configfile
);
3538 if (!(newname
=(char *)calloc(len
,1))) {
3539 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3543 // Make new device name by adding a space then RAID disk number
3544 snprintf(newname
, len
, "%s [%s_disk_%02d]", cfg
->name
, (cfg
->controller_type
== CONTROLLER_CCISS
) ? "cciss" : "3ware",
3545 cfg
->controller_port
-1);
3546 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3551 if (cfg
->hpt_data
[0]) {
3552 int len
=17+strlen(cfg
->name
);
3556 PrintOut(LOG_CRIT
, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3557 lineno
, configfile
);
3561 if (!(newname
=(char *)calloc(len
,1))) {
3562 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3566 // Make new device name by adding a space then RAID disk number
3567 snprintf(newname
, len
, "%s [hpt_%d/%d/%d]", cfg
->name
, cfg
->hpt_data
[0],
3568 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
3569 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3574 // If NO monitoring directives are set, then set all of them.
3575 if (!(cfg
->smartcheck
|| cfg
->usagefailed
|| cfg
->prefail
||
3576 cfg
->usage
|| cfg
->selftest
|| cfg
->errorlog
||
3577 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
3579 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3580 cfg
->name
, cfg
->lineno
, configfile
);
3590 // additional sanity check. Has user set -M options without -m?
3591 if (cfg
->mailwarn
&& !cfg
->mailwarn
->address
&& (cfg
->mailwarn
->emailcmdline
|| cfg
->mailwarn
->emailfreq
|| cfg
->mailwarn
->emailtest
)){
3592 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3593 cfg
->name
, cfg
->lineno
, configfile
);
3597 // has the user has set <nomailer>?
3598 if (cfg
->mailwarn
&& cfg
->mailwarn
->address
&& !strcmp(cfg
->mailwarn
->address
,"<nomailer>")){
3599 // check that -M exec is also set
3600 if (!cfg
->mailwarn
->emailcmdline
){
3601 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3602 cfg
->name
, cfg
->lineno
, configfile
);
3605 // now free memory. From here on the sign of <nomailer> is
3606 // address==NULL and cfg->emailcmdline!=NULL
3607 cfg
->mailwarn
->address
=FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
3610 // set cfg->emailfreq to 1 (once) if user hasn't set it
3611 if (cfg
->mailwarn
&& !cfg
->mailwarn
->emailfreq
)
3612 cfg
->mailwarn
->emailfreq
= 1;
3622 // clean up utility for ParseConfigFile()
3623 void cleanup(FILE **fpp
, int is_stdin
){
3625 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3635 // Parses a configuration file. Return values are:
3636 // N=>0: found N entries
3637 // -1: syntax error in config file
3638 // -2: config file does not exist
3639 // -3: config file exists but cannot be read
3641 // In the case where the return value is 0, there are three
3643 // Empty configuration file ==> cfgentries==NULL
3644 // No configuration file ==> cfgentries[0]->lineno == 0
3645 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3646 int ParseConfigFile(){
3648 int entry
=0,lineno
=1,cont
=0,contlineno
=0;
3649 char line
[MAXLINELEN
+2];
3650 char fullline
[MAXCONTLINE
+1];
3652 int is_stdin
= (configfile
== configfile_stdin
); // pointer comparison ok here
3654 // Open config file, if it exists and is not <stdin>
3656 fp
=fopen(configfile
,"r");
3657 if (fp
==NULL
&& (errno
!=ENOENT
|| configfile_alt
)) {
3658 // file exists but we can't read it or it should exist due to '-c' option
3659 int ret
= (errno
!=ENOENT
? -3 : -2);
3660 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3661 strerror(errno
),configfile
);
3665 else // read from stdin ('-c -' option)
3668 // No configuration file found -- use fake one
3670 int len
=strlen(SCANDIRECTIVE
)+4;
3671 char *fakeconfig
=(char *)calloc(len
,1);
3674 (len
-1) != snprintf(fakeconfig
, len
, "%s -a", SCANDIRECTIVE
) ||
3675 -1 != ParseConfigLine(entry
, 0, fakeconfig
)
3677 PrintOut(LOG_CRIT
,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3678 __LINE__
, filenameandversion
, reportbug
);
3681 fakeconfig
=CheckFree(fakeconfig
, __LINE__
,filenameandversion
);
3686 setmode(fileno(fp
), O_TEXT
); // Allow files with \r\n
3689 // configuration file exists
3690 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3692 // parse config file line by line
3694 int len
=0,scandevice
;
3699 // make debugging simpler
3700 memset(line
,0,sizeof(line
));
3703 code
=fgets(line
,MAXLINELEN
+2,fp
);
3705 // are we at the end of the file?
3708 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3709 // See if we found a SCANDIRECTIVE directive
3710 if (scandevice
==-1) {
3711 cleanup(&fp
, is_stdin
);
3714 // did we find a syntax error
3715 if (scandevice
==-2) {
3716 cleanup(&fp
, is_stdin
);
3719 // the final line is part of a continuation line
3726 // input file line number
3729 // See if line is too long
3731 if (len
>MAXLINELEN
){
3733 if (line
[len
-1]=='\n')
3734 warn
="(including newline!) ";
3737 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3738 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3739 cleanup(&fp
, is_stdin
);
3743 // Ignore anything after comment symbol
3744 if ((comment
=strchr(line
,'#'))){
3749 // is the total line (made of all continuation lines) too long?
3750 if (cont
+len
>MAXCONTLINE
){
3751 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3752 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3753 cleanup(&fp
, is_stdin
);
3757 // copy string so far into fullline, and increment length
3758 strcpy(fullline
+cont
,line
);
3761 // is this a continuation line. If so, replace \ by space and look at next line
3762 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3763 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3767 // Not a continuation line. Parse it
3768 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3770 // did we find a scandevice directive?
3771 if (scandevice
==-1) {
3772 cleanup(&fp
, is_stdin
);
3775 // did we find a syntax error
3776 if (scandevice
==-2) {
3777 cleanup(&fp
, is_stdin
);
3785 cleanup(&fp
, is_stdin
);
3787 // note -- may be zero if syntax of file OK, but no valid entries!
3792 // Prints copyright, license and version information
3793 void PrintCopyleft(void){
3800 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3801 <LIST> is the list of valid arguments for option opt. */
3802 void PrintValidArgs(char opt
) {
3805 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3806 if (!(s
= GetValidArgList(opt
)))
3807 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3809 PrintOut(LOG_CRIT
, (char *)s
);
3810 PrintOut(LOG_CRIT
, " <=======\n");
3813 // Parses input line, prints usage message and
3814 // version/license/copyright messages
3815 void ParseOpts(int argc
, char **argv
){
3816 extern char *optarg
;
3817 extern int optopt
, optind
, opterr
;
3822 // Please update GetValidArgList() if you edit shortopts
3823 const char *shortopts
= "c:l:q:dDni:p:r:Vh?";
3824 #ifdef HAVE_GETOPT_LONG
3826 // Please update GetValidArgList() if you edit longopts
3827 struct option longopts
[] = {
3828 { "configfile", required_argument
, 0, 'c' },
3829 { "logfacility", required_argument
, 0, 'l' },
3830 { "quit", required_argument
, 0, 'q' },
3831 { "debug", no_argument
, 0, 'd' },
3832 { "showdirectives", no_argument
, 0, 'D' },
3833 { "interval", required_argument
, 0, 'i' },
3835 { "no-fork", no_argument
, 0, 'n' },
3837 { "pidfile", required_argument
, 0, 'p' },
3838 { "report", required_argument
, 0, 'r' },
3839 #if defined(_WIN32) || defined(__CYGWIN__)
3840 { "service", no_argument
, 0, 'n' },
3842 { "version", no_argument
, 0, 'V' },
3843 { "license", no_argument
, 0, 'V' },
3844 { "copyright", no_argument
, 0, 'V' },
3845 { "help", no_argument
, 0, 'h' },
3846 { "usage", no_argument
, 0, 'h' },
3854 // Parse input options. This horrible construction is so that emacs
3855 // indents properly. Sorry.
3856 while (-1 != (optchar
=
3857 #ifdef HAVE_GETOPT_LONG
3858 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3860 getopt(argc
, argv
, shortopts
)
3867 if (!(strcmp(optarg
,"nodev"))) {
3869 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3871 } else if (!(strcmp(optarg
,"never"))) {
3873 } else if (!(strcmp(optarg
,"onecheck"))) {
3876 } else if (!(strcmp(optarg
,"showtests"))) {
3879 } else if (!(strcmp(optarg
,"errors"))) {
3886 // set the log facility level
3887 if (!strcmp(optarg
, "daemon"))
3888 facility
=LOG_DAEMON
;
3889 else if (!strcmp(optarg
, "local0"))
3890 facility
=LOG_LOCAL0
;
3891 else if (!strcmp(optarg
, "local1"))
3892 facility
=LOG_LOCAL1
;
3893 else if (!strcmp(optarg
, "local2"))
3894 facility
=LOG_LOCAL2
;
3895 else if (!strcmp(optarg
, "local3"))
3896 facility
=LOG_LOCAL3
;
3897 else if (!strcmp(optarg
, "local4"))
3898 facility
=LOG_LOCAL4
;
3899 else if (!strcmp(optarg
, "local5"))
3900 facility
=LOG_LOCAL5
;
3901 else if (!strcmp(optarg
, "local6"))
3902 facility
=LOG_LOCAL6
;
3903 else if (!strcmp(optarg
, "local7"))
3904 facility
=LOG_LOCAL7
;
3909 // enable debug mode
3914 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3919 // print summary of all valid directives
3925 // Period (time interval) for checking
3926 // strtol will set errno in the event of overflow, so we'll check it.
3928 lchecktime
= strtol(optarg
, &tailptr
, 10);
3929 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3932 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3933 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3934 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3937 checktime
= (int)lchecktime
;
3940 // report IOCTL transactions
3945 // split_report_arg() may modify its first argument string, so use a
3946 // copy of optarg in case we want optarg for an error message.
3947 if (!(s
= strdup(optarg
))) {
3948 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3951 if (split_report_arg(s
, &i
)) {
3953 } else if (i
<1 || i
>3) {
3956 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3957 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3959 } else if (!strcmp(s
,"ioctl")) {
3960 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3961 } else if (!strcmp(s
,"ataioctl")) {
3962 con
->reportataioctl
= i
;
3963 } else if (!strcmp(s
,"scsiioctl")) {
3964 con
->reportscsiioctl
= i
;
3968 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3972 // alternate configuration file
3973 if (strcmp(optarg
,"-"))
3974 configfile
=configfile_alt
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3975 else // read from stdin
3976 configfile
=configfile_stdin
;
3979 // output file with PID number
3980 pid_file
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3983 // print version and CVS info
3988 // help: print summary of command-line options
3996 // unrecognized option
3999 #ifdef HAVE_GETOPT_LONG
4000 // Point arg to the argument in which this option was found.
4001 arg
= argv
[optind
-1];
4002 // Check whether the option is a long option that doesn't map to -h.
4003 if (arg
[1] == '-' && optchar
!= 'h') {
4004 // Iff optopt holds a valid option then argument must be missing.
4005 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
4006 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
4007 PrintValidArgs(optopt
);
4009 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
4011 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
4016 // Iff optopt holds a valid option then argument must be missing.
4017 if (strchr(shortopts
, optopt
) != NULL
){
4018 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
4019 PrintValidArgs(optopt
);
4021 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
4023 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4030 // Check to see if option had an unrecognized or incorrect argument.
4034 // It would be nice to print the actual option name given by the user
4035 // here, but we just print the short form. Please fix this if you know
4036 // a clean way to do it.
4037 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
4038 PrintValidArgs(optchar
);
4039 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4044 // non-option arguments are not allowed
4045 if (argc
> optind
) {
4048 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
4049 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4053 // no pidfile in debug mode
4054 if (debugmode
&& pid_file
) {
4057 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4058 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
);
4059 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
4069 // Function we call if no configuration file was found or if the
4070 // SCANDIRECTIVE Directive was found. It makes entries for device
4071 // names returned by make_device_names() in os_OSNAME.c
4072 int MakeConfigEntries(const char *type
, int start
){
4075 char** devlist
= NULL
;
4076 cfgfile
*first
=cfgentries
[0],*cfg
=first
;
4078 // Hack! This is to make DEVICESCAN work on ATA devices behind
4079 // a SCSI to ATA Translation (SAT) Layer.
4080 // This will work on a general OS if the way that SAT devices are
4081 // named is the same as SCSI devices.
4082 // The BETTER solution is to modify make_device_names to recognize
4083 // the additional type "SAT". This requires changing os_*.cpp.
4085 const char *basetype
= type
;
4086 if (!strcmp(type
,"SAT") )
4089 // make list of devices
4090 if ((num
=make_device_names(&devlist
,basetype
))<0)
4091 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4093 // if no devices, or error constructing list, return
4097 // loop over entries to create
4098 for (i
=0; i
<num
; i
++){
4100 // make storage and copy for all but first entry
4102 // allocate more storage if needed
4103 while (cfgentries_max
<=start
+i
)
4104 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "simulated configuration file device");
4105 cfg
=cfgentries
[start
+i
]=CreateConfigEntry(first
);
4109 if (!strcmp(type
,"ATA") )
4110 cfg
->controller_type
= CONTROLLER_ATA
;
4111 if (!strcmp(type
,"SCSI") )
4112 cfg
->controller_type
= CONTROLLER_SCSI
;
4113 if (!strcmp(type
,"SAT") )
4114 cfg
->controller_type
= CONTROLLER_SAT
;
4116 // remove device name, if it's there, and put in correct one
4117 cfg
->name
=FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
4118 // save pointer to the device name created within
4119 // make_device_names
4120 cfg
->name
=devlist
[i
];
4123 // If needed, free memory used for devlist: pointers now in
4124 // cfgentries[]->names. If num==0 we never get to this point, but
4125 // that's OK. If we realloc()d the array length in
4126 // make_device_names() that was ALREADY equivalent to calling
4128 devlist
= FreeNonZero(devlist
,(sizeof (char*) * num
),__LINE__
, filenameandversion
);
4133 void CanNotRegister(char *name
, char *type
, int line
, int scandirective
){
4134 if( !debugmode
&& scandirective
== 1 ) { return; }
4136 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4137 "Unable to register %s device %s at line %d of file %s\n",
4138 type
, name
, line
, configfile
);
4140 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4145 // Returns negative value (see ParseConfigFile()) if config file
4146 // had errors, else number of entries which may be zero or positive.
4147 // If we found no configuration file, or it contained SCANDIRECTIVE,
4148 // then *scanning is set to 1, else 0.
4149 int ReadOrMakeConfigEntries(int *scanning
){
4152 // deallocate any cfgfile data structures in memory
4153 RmAllConfigEntries();
4155 // parse configuration file configfile (normally /etc/smartd.conf)
4156 if ((entries
=ParseConfigFile())<0) {
4158 // There was an error reading the configuration file.
4159 RmAllConfigEntries();
4161 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4165 // did we find entries or scan?
4168 // no error parsing config file.
4170 // we did not find a SCANDIRECTIVE and did find valid entries
4171 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4173 else if (cfgentries
&& cfgentries
[0]) {
4174 // we found a SCANDIRECTIVE or there was no configuration file so
4175 // scan. Configuration file's first entry contains all options
4177 cfgfile
*first
=cfgentries
[0];
4179 // By default scan for ATA, SCSI and SAT devices
4180 int doata
=1, doscsi
=1, dosat
=1;
4182 if (first
->controller_type
==CONTROLLER_SCSI
) {
4185 } else if (first
->controller_type
==CONTROLLER_ATA
) {
4188 } else if (first
->controller_type
==CONTROLLER_SAT
) {
4193 // The code in this block has been neutered by D. Gilbert
4194 // on 20070226. smartd can't cope ATA disk behind a SAT
4195 // transport seamlessly _without_ a bigger restructuring
4196 // of smartd than this code tried. It made ATA disks
4197 // behind a SAT interface automatically detected only by
4198 // killing support for real SCSI disks. Sorry, no.
4203 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4205 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4207 // make config list of ATA devices to search for
4209 entries
+=MakeConfigEntries("ATA", entries
);
4210 // make config list of SCSI devices to search for
4212 entries
+=MakeConfigEntries("SCSI", entries
);
4214 entries
+=MakeConfigEntries("SAT", entries
);
4216 // warn user if scan table found no devices
4218 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4219 // get rid of fake entry with SCANDIRECTIVE as name
4220 RmConfigEntry(cfgentries
, __LINE__
);
4224 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4230 // This function tries devices from cfgentries. Each one that can be
4231 // registered is moved onto the [ata|scsi]devices lists and removed
4232 // from the cfgentries list, else it's memory is deallocated.
4233 void RegisterDevices(int scanning
){
4236 // start by clearing lists/memory of ALL existing devices
4238 numdevata
=numdevscsi
=0;
4241 for (i
=0; i
<cfgentries_max
; i
++){
4243 cfgfile
*ent
=cfgentries
[i
];
4245 // skip any NULL entries (holes)
4249 // register ATA devices
4250 if (ent
->controller_type
!=CONTROLLER_SCSI
&& ent
->controller_type
!=CONTROLLER_CCISS
){
4251 if (ATADeviceScan(ent
, scanning
))
4252 CanNotRegister(ent
->name
, "ATA", ent
->lineno
, scanning
);
4254 // move onto the list of ata devices
4256 while (numdevata
>=atadevlist_max
)
4257 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
4258 atadevlist
[numdevata
++]=ent
;
4262 // then register SCSI devices
4263 if (ent
->controller_type
==CONTROLLER_SCSI
|| ent
->controller_type
==CONTROLLER_CCISS
||
4264 ent
->controller_type
==CONTROLLER_UNKNOWN
){
4268 struct sigaction alarmAction
, defaultaction
;
4270 // Set up an alarm handler to catch USB devices that hang on
4272 alarmAction
.sa_handler
= AlarmHandler
;
4273 alarmAction
.sa_flags
= SA_RESTART
;
4274 if (sigaction(SIGALRM
, &alarmAction
, &defaultaction
)) {
4275 // if we can't set timeout, just scan device
4276 PrintOut(LOG_CRIT
, "Unable to initialize SCSI timeout mechanism.\n");
4277 retscsi
=SCSIDeviceScan(ent
, scanning
);
4280 // prepare return point in case of bad SCSI device
4281 if (setjmp(registerscsienv
))
4282 // SCSI device timed out!
4285 // Set alarm, make SCSI call, reset alarm
4287 retscsi
=SCSIDeviceScan(ent
, scanning
);
4290 if (sigaction(SIGALRM
, &defaultaction
, NULL
)){
4291 PrintOut(LOG_CRIT
, "Unable to clear SCSI timeout mechanism.\n");
4295 retscsi
=SCSIDeviceScan(ent
, scanning
);
4298 // Now scan SCSI device...
4301 PrintOut(LOG_CRIT
, "Device %s timed out (poorly-implemented USB device?)\n", ent
->name
);
4302 CanNotRegister(ent
->name
, "SCSI", ent
->lineno
, scanning
);
4305 // move onto the list of scsi devices
4307 while (numdevscsi
>=scsidevlist_max
)
4308 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
4309 scsidevlist
[numdevscsi
++]=ent
;
4313 // if device is explictly listed and we can't register it, then
4314 // exit unless the user has specified that the device is removable
4315 if (cfgentries
[i
] && !scanning
){
4316 if (ent
->removable
|| quit
==2)
4317 PrintOut(LOG_INFO
, "Device %s not available\n", ent
->name
);
4319 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent
->name
);
4324 // free up memory if device could not be registered
4325 RmConfigEntry(cfgentries
+i
, __LINE__
);
4334 int main(int argc
, char **argv
)
4336 // Windows: internal main function started direct or by service control manager
4337 static int smartd_main(int argc
, char **argv
)
4340 // external control variables for ATA disks
4341 smartmonctrl control
;
4343 // is it our first pass through?
4346 // next time to wake up
4349 // for simplicity, null all global communications variables/lists
4351 memset(con
, 0,sizeof(control
));
4353 // parse input and print header and usage info if needed
4354 ParseOpts(argc
,argv
);
4356 // do we mute printing from ataprint commands?
4357 con
->printing_switchable
=0;
4358 con
->dont_print
=debugmode
?0:1;
4360 // don't exit on bad checksums
4361 con
->checksumfail
=0;
4363 // the main loop of the code
4366 // are we exiting from a signal?
4367 if (caughtsigEXIT
) {
4368 // are we exiting with SIGTERM?
4369 int isterm
=(caughtsigEXIT
==SIGTERM
);
4370 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4371 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4373 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4374 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4376 EXIT(isok
?0:EXIT_SIGNAL
);
4379 // Should we (re)read the config file?
4380 if (firstpass
|| caughtsigHUP
){
4381 int entries
, scanning
=0;
4385 // Workaround for missing SIGQUIT via keyboard on Cygwin
4386 if (caughtsigHUP
==2) {
4387 // Simulate SIGQUIT if another SIGINT arrives soon
4390 if (caughtsigHUP
==2) {
4391 caughtsigEXIT
=SIGQUIT
;
4399 "Signal HUP - rereading configuration file %s\n":
4400 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4404 // clears cfgentries, (re)reads config file, makes >=0 entries
4405 entries
=ReadOrMakeConfigEntries(&scanning
);
4408 // checks devices, then moves onto ata/scsi list or deallocates.
4409 RegisterDevices(scanning
);
4411 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4412 // user has asked to continue on error in configuration file
4414 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4417 // exit with configuration file error status
4418 int status
= (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4422 // Log number of devices we are monitoring...
4423 if (numdevata
+numdevscsi
|| quit
==2 || (quit
==1 && !firstpass
))
4424 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4425 numdevata
, numdevscsi
);
4427 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4432 // user has asked to print test schedule
4433 PrintTestSchedule(atadevlist
, scsidevlist
);
4441 // check all devices once,
4442 // self tests are not started in first pass unless '-q onecheck' is specified
4443 CheckDevicesOnce(atadevlist
, scsidevlist
, (!firstpass
|| quit
==3));
4445 // user has asked us to exit after first check
4447 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4448 "smartd is exiting (exit status 0)\n");
4452 // fork into background if needed
4453 if (firstpass
&& !debugmode
) {
4457 // set exit and signal handlers, write PID file, set wake-up time
4459 Initialize(&wakeuptime
);
4463 // sleep until next check time, or a signal arrives
4464 wakeuptime
=dosleep(wakeuptime
);
4470 // Main function for Windows
4471 int main(int argc
, char **argv
){
4472 // Options for smartd windows service
4473 static const daemon_winsvc_options svc_opts
= {
4474 "--service", // cmd_opt
4475 "smartd", "SmartD Service", // servicename, displayname
4477 "Controls and monitors storage devices using the Self-Monitoring, "
4478 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4479 "built into ATA and SCSI Hard Drives. "
4482 // daemon_main() handles daemon and service specific commands
4483 // and starts smartd_main() direct, from a new process,
4484 // or via service control manager
4485 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);