2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-6 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
27 // unconditionally included files
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
49 // see which system files to conditionally include
52 // conditionally included files
53 #ifdef HAVE_GETOPT_LONG
62 #pragma warning(disable:4761) // "conversion supplied"
63 typedef unsigned short mode_t
;
66 #include <io.h> // umask()
67 #include <process.h> // getpid()
72 // BOOL WINAPI FreeConsole(void);
73 extern "C" int __stdcall
FreeConsole(void);
74 #include <io.h> // setmode()
77 // locally included files
82 #include "knowndrives.h"
88 #include "hostname_win32.h" // gethost/domainname()
89 #define HAVE_GETHOSTNAME 1
90 #define HAVE_GETDOMAINNAME 1
91 // fork()/signal()/initd simulation for native Windows
92 #include "daemon_win32.h" // daemon_main/detach/signal()
94 #define SIGNALFN daemon_signal
95 #define strsignal daemon_strsignal
96 #define sleep daemon_sleep
97 #undef EXIT // see utility.h
98 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
99 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
100 #define SIGQUIT SIGBREAK
101 #define SIGQUIT_KEYNAME "CONTROL-Break"
104 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
105 #define SIGQUIT_KEYNAME "2x CONTROL-C"
107 #define SIGQUIT_KEYNAME "CONTROL-\\"
111 #if defined (__SVR4) && defined (__sun)
112 extern "C" int getdomainname(char *, int); // no declaration in header files!
115 #define ARGUSED(x) ((void)(x))
117 // These are CVS identification information for *.cpp and *.h files
118 extern const char *atacmdnames_c_cvsid
, *atacmds_c_cvsid
, *ataprint_c_cvsid
, *escalade_c_cvsid
,
119 *knowndrives_c_cvsid
, *os_XXXX_c_cvsid
, *scsicmds_c_cvsid
, *utility_c_cvsid
;
121 static const char *filenameandversion
="$Id: smartd.cpp,v 1.383 2006/11/10 04:59:02 dpgilbert Exp $";
122 #ifdef NEED_SOLARIS_ATA_CODE
123 extern const char *os_solaris_ata_s_cvsid
;
126 extern const char *daemon_win32_c_cvsid
, *hostname_win32_c_cvsid
, *syslog_win32_c_cvsid
;
128 const char *smartd_c_cvsid
="$Id: smartd.cpp,v 1.383 2006/11/10 04:59:02 dpgilbert Exp $"
129 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
130 #ifdef DAEMON_WIN32_H_CVSID
133 EXTERN_H_CVSID INT64_H_CVSID
134 #ifdef HOSTNAME_WIN32_H_CVSID
135 HOSTNAME_WIN32_H_CVSID
137 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
138 #ifdef SYSLOG_H_CVSID
143 extern const char *reportbug
;
145 // GNU copyleft statement. Needed for GPL purposes.
146 const char *copyleftstring
="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
147 "free software, and you are welcome to redistribute it\n"
148 "under the terms of the GNU General Public License\n"
149 "Version 2. See http://www.gnu.org for further details.\n\n";
151 extern unsigned char debugmode
;
153 // command-line: how long to sleep between checks
154 static int checktime
=CHECKTIME
;
156 // command-line: name of PID file (NULL for no pid file)
157 static char* pid_file
=NULL
;
159 // configuration file name
161 static char* configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
163 static char* configfile
= "./" CONFIGFILENAME
;
165 // configuration file "name" if read from stdin
166 static /*const*/ char * const configfile_stdin
= "<stdin>";
167 // allocated memory for alternate configuration file name
168 static char* configfile_alt
= NULL
;
170 // command-line: when should we exit?
173 // command-line; this is the default syslog(3) log facility to use.
174 static int facility
=LOG_DAEMON
;
177 // command-line: running as service, so don't fork()
178 static int is_service
=0;
181 // used for control of printing, passing arguments to atacmds.c
182 smartmonctrl
*con
=NULL
;
184 // pointers to (real or simulated) entries in configuration file, and
185 // maximum space currently allocated for these entries.
186 cfgfile
**cfgentries
=NULL
;
187 int cfgentries_max
=0;
189 // pointers to ATA and SCSI devices being monitored, maximum and
191 cfgfile
**atadevlist
=NULL
, **scsidevlist
=NULL
;
192 int atadevlist_max
=0, scsidevlist_max
=0;
193 int numdevata
=0, numdevscsi
=0;
195 // track memory usage
196 extern int64_t bytes
;
199 extern int exitstatus
;
201 // set to one if we catch a USR1 (check devices now)
202 volatile int caughtsigUSR1
=0;
205 // set to one if we catch a USR2 (toggle debug mode)
206 volatile int caughtsigUSR2
=0;
209 // set to one if we catch a HUP (reload config file). In debug mode,
210 // set to two, if we catch INT (also reload config file).
211 volatile int caughtsigHUP
=0;
213 // set to signal value if we catch INT, QUIT, or TERM
214 volatile int caughtsigEXIT
=0;
217 // stack environment if we time out during SCSI access (USB devices)
218 jmp_buf registerscsienv
;
221 // tranlate cfg->pending into the correct Attribute numbers
222 void TranslatePending(unsigned short pending
, unsigned char *current
, unsigned char *offline
) {
224 unsigned char curr
= CURR_PEND(pending
);
225 unsigned char off
= OFF_PEND(pending
);
227 // look for special value of CUR_UNC_DEFAULT that means DONT
228 // monitor. 0 means DO test.
229 if (curr
==CUR_UNC_DEFAULT
)
232 curr
=CUR_UNC_DEFAULT
;
234 // look for special value of OFF_UNC_DEFAULT that means DONT
235 // monitor. 0 means DO TEST.
236 if (off
==OFF_UNC_DEFAULT
)
248 // free all memory associated with selftest part of configfile entry. Return NULL
249 testinfo
* FreeTestData(testinfo
*data
){
251 // make sure we have something to do.
255 // free space for text pattern
256 data
->regex
=FreeNonZero(data
->regex
, -1, __LINE__
, filenameandversion
);
258 // free compiled expression
259 regfree(&(data
->cregex
));
261 // make sure that no sign of the compiled expression is left behind
262 // (just in case, to help detect bugs if we ever try and refer to
264 memset(&(data
->cregex
), '0', sizeof(regex_t
));
266 // free remaining memory space
267 data
=FreeNonZero(data
, sizeof(testinfo
), __LINE__
, filenameandversion
);
272 cfgfile
**AllocateMoreSpace(cfgfile
**oldarray
, int *oldsize
, char *listname
){
273 // for now keep BLOCKSIZE small to help detect coding problems.
274 // Perhaps increase in the future.
275 const int BLOCKSIZE
=8;
278 int news
= olds
+ BLOCKSIZE
;
279 cfgfile
**newptr
=(cfgfile
**)realloc(oldarray
, news
*sizeof(cfgfile
*));
281 // did we get more space?
284 // clear remaining entries ala calloc()
285 for (i
=olds
; i
<news
; i
++)
288 bytes
+= BLOCKSIZE
*sizeof(cfgfile
*);
293 PrintOut(LOG_INFO
, "allocating %d slots for %s\n", BLOCKSIZE
, listname
);
299 PrintOut(LOG_CRIT
, "out of memory for allocating %s list\n", listname
);
303 void PrintOneCVS(const char *a_cvs_id
){
305 printone(out
,a_cvs_id
);
306 PrintOut(LOG_INFO
,"%s",out
);
310 // prints CVS identity information for the executable
312 char *configargs
=strlen(SMARTMONTOOLS_CONFIGURE_ARGS
)?SMARTMONTOOLS_CONFIGURE_ARGS
:"[no arguments given]";
314 PrintOut(LOG_INFO
,(char *)copyleftstring
);
315 PrintOut(LOG_INFO
,"CVS version IDs of files used to build this code are:\n");
316 PrintOneCVS(atacmdnames_c_cvsid
);
317 PrintOneCVS(atacmds_c_cvsid
);
318 PrintOneCVS(ataprint_c_cvsid
);
320 PrintOneCVS(daemon_win32_c_cvsid
);
323 PrintOneCVS(hostname_win32_c_cvsid
);
325 PrintOneCVS(knowndrives_c_cvsid
);
326 PrintOneCVS(os_XXXX_c_cvsid
);
327 #ifdef NEED_SOLARIS_ATA_CODE
328 PrintOneCVS( os_solaris_ata_s_cvsid
);
330 PrintOneCVS(scsicmds_c_cvsid
);
331 PrintOneCVS(smartd_c_cvsid
);
333 PrintOneCVS(syslog_win32_c_cvsid
);
335 PrintOneCVS(utility_c_cvsid
);
336 PrintOut(LOG_INFO
, "\nsmartmontools release " PACKAGE_VERSION
" dated " SMARTMONTOOLS_RELEASE_DATE
" at " SMARTMONTOOLS_RELEASE_TIME
"\n");
337 PrintOut(LOG_INFO
, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST
"\n");
338 PrintOut(LOG_INFO
, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE
"\n");
339 PrintOut(LOG_INFO
, "smartd compile dated " __DATE__
" at "__TIME__
"\n");
340 PrintOut(LOG_INFO
, "smartmontools configure arguments: %s\n", configargs
);
344 // Removes config file entry, freeing all memory
345 void RmConfigEntry(cfgfile
**anentry
, int whatline
){
349 // pointer should never be null!
351 PrintOut(LOG_CRIT
,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
352 whatline
, filenameandversion
, reportbug
);
356 // only remove entries that exist!
360 // entry exists -- free all of its memory
361 cfg
->name
= FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
362 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
363 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
364 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
365 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
367 cfg
->mailwarn
->address
= FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
368 cfg
->mailwarn
->emailcmdline
= FreeNonZero(cfg
->mailwarn
->emailcmdline
, -1,__LINE__
,filenameandversion
);
369 cfg
->mailwarn
= FreeNonZero(cfg
->mailwarn
, sizeof(maildata
),__LINE__
,filenameandversion
);
371 cfg
->testdata
= FreeTestData(cfg
->testdata
);
372 *anentry
= FreeNonZero(cfg
, sizeof(cfgfile
),__LINE__
,filenameandversion
);
377 // deallocates all memory associated with cfgentries list
378 void RmAllConfigEntries(){
381 for (i
=0; i
<cfgentries_max
; i
++)
382 RmConfigEntry(cfgentries
+i
, __LINE__
);
384 cfgentries
=FreeNonZero(cfgentries
, sizeof(cfgfile
*)*cfgentries_max
, __LINE__
, filenameandversion
);
390 // deallocates all memory associated with ATA/SCSI device lists
391 void RmAllDevEntries(){
394 for (i
=0; i
<atadevlist_max
; i
++)
395 RmConfigEntry(atadevlist
+i
, __LINE__
);
397 atadevlist
=FreeNonZero(atadevlist
, sizeof(cfgfile
*)*atadevlist_max
, __LINE__
, filenameandversion
);
400 for (i
=0; i
<scsidevlist_max
; i
++)
401 RmConfigEntry(scsidevlist
+i
, __LINE__
);
403 scsidevlist
=FreeNonZero(scsidevlist
, sizeof(cfgfile
*)*scsidevlist_max
, __LINE__
, filenameandversion
);
409 // remove the PID file
410 void RemovePidFile(){
412 if ( -1==unlink(pid_file
) )
413 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
414 pid_file
, strerror(errno
));
415 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
421 // Note if we catch a SIGUSR1
422 void USR1handler(int sig
){
429 // Note if we catch a SIGUSR2
430 void USR2handler(int sig
){
437 // Note if we catch a HUP (or INT in debug mode)
438 void HUPhandler(int sig
){
446 // signal handler for TERM, QUIT, and INT (if not in debug mode)
447 void sighandler(int sig
){
454 // signal handler that prints Goodbye message and removes pidfile
457 // clean up memory -- useful for debugging
458 RmAllConfigEntries();
461 // delete PID file, if one was created
464 // remove alternate configfile name
465 configfile_alt
=FreeNonZero(configfile_alt
, -1,__LINE__
,filenameandversion
);
467 // useful for debugging -- have we managed memory correctly?
468 if (debugmode
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
469 PrintOut(LOG_INFO
, "Memory still allocated for devices at exit is %" PRId64
" bytes.\n", bytes
);
471 // if we are exiting because of a code bug, tell user
472 if (exitstatus
==EXIT_BADCODE
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
473 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
475 if (exitstatus
==0 && bytes
)
476 exitstatus
=EXIT_BADCODE
;
478 // and this should be the final output from smartd before it exits
479 PrintOut(exitstatus
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", exitstatus
);
484 #define ENVLENGTH 1024
486 // a replacement for setenv() which is not available on all platforms.
487 // Note that the string passed to putenv must not be freed or made
488 // invalid, since a pointer to it is kept by putenv(). This means that
489 // it must either be a static buffer or allocated off the heap. The
490 // string can be freed if the environment variable is redefined or
491 // deleted via another call to putenv(). So we keep these on the stack
492 // as long as the popen() call is underway.
493 int exportenv(char* stackspace
, const char *name
, const char *value
){
494 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
495 return putenv(stackspace
);
498 char* dnsdomain(const char* hostname
) {
500 #ifdef HAVE_GETHOSTBYNAME
503 if ((hp
= gethostbyname(hostname
))) {
504 // Does this work if gethostbyname() returns an IPv6 name in
505 // colon/dot notation? [BA]
506 if ((p
= strchr(hp
->h_name
, '.')))
517 // If either address or executable path is non-null then send and log
518 // a warning email, or execute executable
519 void MailWarning(cfgfile
*cfg
, int which
, char *fmt
, ...){
520 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
521 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
522 char environ_strings
[11][ENVLENGTH
];
525 const int day
=24*3600;
533 "FailedHealthCheck", // 5
534 "FailedReadSmartData", // 6
535 "FailedReadSmartErrorLog", // 7
536 "FailedReadSmartSelfTestLog", // 8
537 "FailedOpenDevice", // 9
538 "CurrentPendingSector", // 10
539 "OfflineUncorrectableSector", // 11
543 char *address
, *executable
;
545 maildata
* data
=cfg
->mailwarn
;
549 char stdinbuf
[1024]; int boxmsgoffs
, boxtype
;
551 const char *newadd
=NULL
, *newwarn
=NULL
;
552 const char *unknown
="[Unknown]";
554 // See if user wants us to send mail
558 address
=data
->address
;
559 executable
=data
->emailcmdline
;
561 if (!address
&& !executable
)
564 // which type of mail are we sending?
565 mail
=(data
->maillog
)+which
;
568 if (data
->emailfreq
<1 || data
->emailfreq
>3) {
569 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data
->emailfreq
);
572 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
573 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
574 which
, (int)sizeof(whichfail
));
578 // Return if a single warning mail has been sent.
579 if ((data
->emailfreq
==1) && mail
->logged
)
582 // Return if this is an email test and one has already been sent.
583 if (which
== 0 && mail
->logged
)
586 // To decide if to send mail, we need to know what time it is.
589 // Return if less than one day has gone by
590 if (data
->emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
593 // Return if less than 2^(logged-1) days have gone by
594 if (data
->emailfreq
==3 && mail
->logged
){
595 days
=0x01<<(mail
->logged
-1);
597 if (epoch
<(mail
->lastsent
+days
))
601 // record the time of this mail message, and the first mail message
603 mail
->firstsent
=epoch
;
604 mail
->lastsent
=epoch
;
606 // get system host & domain names (not null terminated if length=MAX)
607 #ifdef HAVE_GETHOSTNAME
608 if (gethostname(hostname
, 256))
609 strcpy(hostname
, unknown
);
613 p
= dnsdomain(hostname
);
615 strncpy(domainname
, p
, 255);
616 domainname
[255]='\0';
618 strcpy(domainname
, unknown
);
621 strcpy(hostname
, unknown
);
622 strcpy(domainname
, unknown
);
625 #ifdef HAVE_GETDOMAINNAME
626 if (getdomainname(nisdomain
, 256))
627 strcpy(nisdomain
, unknown
);
631 strcpy(nisdomain
, unknown
);
634 // print warning string into message
636 vsnprintf(message
, 256, fmt
, ap
);
639 // appropriate message about further information
640 additional
[0]=original
[0]=further
[0]='\0';
642 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
644 switch (data
->emailfreq
){
646 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
649 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
652 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
653 (0x01)<<mail
->logged
);
656 if (data
->emailfreq
>1 && mail
->logged
){
657 dateandtimezoneepoch(dates
, mail
->firstsent
);
658 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
662 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
664 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
666 #ifdef DEFAULT_MAILER
667 executable
= DEFAULT_MAILER
;
672 executable
= "blat"; // http://blat.sourceforge.net/
676 // make a private copy of address with commas replaced by spaces
677 // to separate recipients
679 address
=CustomStrDup(data
->address
, 1, __LINE__
, filenameandversion
);
680 #ifndef _WIN32 // blat mailer needs comma
683 while ((comma
=strchr(comma
, ',')))
689 // Export information in environment variables that will be useful
691 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
692 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
693 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
694 dateandtimezoneepoch(dates
, mail
->firstsent
);
695 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
696 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
697 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
698 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
700 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
);
701 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
->name
);
703 switch (cfg
->controller_type
) {
704 case CONTROLLER_3WARE_678K
:
705 case CONTROLLER_3WARE_9000_CHAR
:
706 case CONTROLLER_3WARE_678K_CHAR
:
708 char *s
,devicetype
[16];
709 sprintf(devicetype
, "3ware,%d", cfg
->controller_port
-1);
710 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
711 if ((s
=strchr(cfg
->name
, ' ')))
713 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
718 case CONTROLLER_CCISS
:
720 char *s
,devicetype
[16];
721 sprintf(devicetype
, "cciss,%d", cfg
->controller_port
-1);
722 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
723 if ((s
=strchr(cfg
->name
, ' ')))
725 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
731 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "ata");
732 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
734 case CONTROLLER_MARVELL_SATA
:
735 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "marvell");
736 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
738 case CONTROLLER_SCSI
:
739 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "scsi");
740 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
743 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "sat");
744 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
748 char *s
,devicetype
[16];
749 sprintf(devicetype
, "hpt,%d/%d/%d", cfg
->hpt_data
[0],
750 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
751 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
752 if ((s
=strchr(cfg
->name
, ' ')))
754 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
761 snprintf(fullmessage
, 1024,
762 "This email was generated by the smartd daemon running on:\n\n"
765 " NIS domain: %s\n\n"
766 "The following warning/error was logged by the smartd daemon:\n\n"
768 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
770 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
771 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
773 // now construct a command to send this as EMAIL
776 snprintf(command
, 2048,
777 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
778 "%sENDMAIL\n", subject
, address
, fullmessage
);
780 snprintf(command
, 2048, "%s 2>&1", executable
);
782 // tell SYSLOG what we are about to do...
783 newadd
=address
?address
:"<nomailer>";
784 newwarn
=which
?"Warning via":"Test of";
786 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
787 which
?"Sending warning via":"Executing test of", executable
, newadd
);
789 // issue the command to send mail or to run the user's executable
791 if (!(pfp
=popen(command
, "r")))
792 // failed to popen() mail process
793 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
794 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
798 char buffer
[EBUFLEN
];
800 // if unexpected output on stdout/stderr, null terminate, print, and flush
801 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
803 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
805 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
806 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
808 // flush pipe if needed
809 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
812 // tell user that pipe was flushed, or that something is really wrong
813 if (count
&& count
<EBUFLEN
)
814 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
815 newwarn
, executable
, newadd
);
817 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
818 newwarn
, executable
, newadd
);
821 // if something went wrong with mail process, print warning
823 if (-1==(status
=pclose(pfp
)))
824 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
825 errno
?strerror(errno
):"");
827 // mail process apparently succeeded. Check and report exit status
830 if (WIFEXITED(status
)) {
831 // exited 'normally' (but perhaps with nonzero status)
832 status8
=WEXITSTATUS(status
);
835 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
836 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
838 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
839 newwarn
, executable
, newadd
, status
, status8
);
841 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
844 if (WIFSIGNALED(status
))
845 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
846 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
848 // this branch is probably not possible. If subprocess is
849 // stopped then pclose() should not return.
850 if (WIFSTOPPED(status
))
851 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
852 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
859 // No "here-documents" on Windows, so must use separate commandline and stdin
860 command
[0] = stdinbuf
[0] = 0;
861 boxtype
= -1; boxmsgoffs
= 0;
862 newadd
= "<nomailer>";
864 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
865 int addroffs
= (!strncmp(address
, "sys", 3) ? 3 : 0);
866 if (!strncmp(address
+addroffs
, "msgbox", 6) && (!address
[addroffs
+6] || address
[addroffs
+6] == ',')) {
867 boxtype
= (addroffs
> 0 ? 1 : 0);
869 if (address
[addroffs
])
875 if (address
[addroffs
]) {
876 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
877 snprintf(command
, sizeof(command
),
878 "%s - -q -subject \"%s\" -to \"%s\"",
879 executable
, subject
, address
+addroffs
);
880 newadd
= address
+addroffs
;
882 // Message for mail [0...] and messagebox [boxmsgoffs...]
883 snprintf(stdinbuf
, sizeof(stdinbuf
),
884 "This email was generated by the smartd daemon running on:\n\n"
887 // " NIS domain: %s\n"
889 "The following warning/error was logged by the smartd daemon:\n\n"
891 "For details see the event log or log file of smartd.\n\n"
894 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
897 snprintf(command
, sizeof(command
), "%s", executable
);
899 newwarn
=which
?"Warning via":"Test of";
902 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
903 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
906 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
909 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
910 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
911 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
912 if (rc
>= 0 && stdoutbuf
[0])
913 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
914 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
916 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
917 newwarn
, executable
, newadd
, rc
);
919 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
924 // increment mail sent counter
927 // free copy of address (without commas)
928 address
=FreeNonZero(address
, -1, __LINE__
, filenameandversion
);
933 // Printing function for watching ataprint commands, or losing them
934 // [From GLIBC Manual: Since the prototype doesn't specify types for
935 // optional arguments, in a call to a variadic function the default
936 // argument promotions are performed on the optional argument
937 // values. This means the objects of type char or short int (whether
938 // signed or not) are promoted to either int or unsigned int, as
940 void pout(const char *fmt
, ...){
943 // get the correct time in syslog()
944 FixGlibcTimeZoneBug();
945 // initialize variable argument list
947 // in debug==1 mode we will print the output from the ataprint.o functions!
948 if (debugmode
&& debugmode
!=2)
950 if (facility
== LOG_LOCAL1
) // logging to stdout
951 vfprintf(stderr
,fmt
,ap
);
955 // in debug==2 mode we print output from knowndrives.o functions
956 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
|| con
->controller_port
) {
957 openlog("smartd", LOG_PID
, facility
);
958 vsyslog(LOG_INFO
, fmt
, ap
);
966 // This function prints either to stdout or to the syslog as needed.
967 // This function is also used by utility.cpp to report LOG_CRIT errors.
968 void PrintOut(int priority
, const char *fmt
, ...){
971 // get the correct time in syslog()
972 FixGlibcTimeZoneBug();
973 // initialize variable argument list
977 if (facility
== LOG_LOCAL1
) // logging to stdout
978 vfprintf(stderr
,fmt
,ap
);
983 openlog("smartd", LOG_PID
, facility
);
984 vsyslog(priority
,fmt
,ap
);
991 // Forks new process, closes ALL file descriptors, redirects stdin,
992 // stdout, and stderr. Not quite daemon(). See
993 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
994 // for a good description of why we do things this way.
1000 // flush all buffered streams. Else we might get two copies of open
1001 // streams since both parent and child get copies of the buffers.
1004 if ((pid
=fork()) < 0) {
1006 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1010 // we are the parent process -- exit cleanly
1013 // from here on, we are the child process.
1016 // Fork one more time to avoid any possibility of having terminals
1017 if ((pid
=fork()) < 0) {
1019 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1023 // we are the parent process -- exit cleanly
1026 // Now we are the child's child...
1028 // close any open file descriptors
1029 for (i
=getdtablesize();i
>=0;--i
)
1033 // Cygwin's setsid() does not detach the process from Windows console
1035 #endif // __CYGWIN__
1037 // redirect any IO attempts to /dev/null for stdin
1038 i
=open("/dev/null",O_RDWR
);
1046 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1050 // No fork() on native Win32
1051 // Detach this process from console
1053 if (daemon_detach("smartd")) {
1054 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1057 // stdin/out/err now closed if not redirected
1063 // create a PID file containing the current process id
1064 void WritePidFile() {
1067 pid_t pid
= getpid();
1072 old_umask
= umask(0077); // rwx------
1074 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1075 old_umask
= umask(0033); // rwxr--r--
1077 fp
= fopen(pid_file
, "w");
1081 } else if (fprintf(fp
, "%d\n", (int)pid
) <= 0) {
1083 } else if (fclose(fp
) != 0) {
1087 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
);
1090 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
, (int)pid
);
1095 // Prints header identifying version of code and home
1097 #ifdef HAVE_GET_OS_VERSION_STR
1098 const char * ver
= get_os_version_str();
1100 const char * ver
= SMARTMONTOOLS_BUILD_HOST
;
1102 PrintOut(LOG_INFO
,"smartd version %s [%s] Copyright (C) 2002-6 Bruce Allen\n", PACKAGE_VERSION
, ver
);
1103 PrintOut(LOG_INFO
,"Home page is " PACKAGE_HOMEPAGE
"\n\n");
1107 // prints help info for configuration file Directives
1110 "Configuration file (%s) Directives (after device name):\n"
1111 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N, cciss,N\n"
1112 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1113 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1114 " -S VAL Enable/disable attribute autosave (on/off)\n"
1115 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1116 " -H Monitor SMART Health Status, report if failed\n"
1117 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1118 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1119 " -f Monitor 'Usage' Attributes, report failures\n"
1120 " -m ADD Send email warning to address ADD\n"
1121 " -M TYPE Modify email warning behavior (see man page)\n"
1122 " -p Report changes in 'Prefailure' Attributes\n"
1123 " -u Report changes in 'Usage' Attributes\n"
1124 " -t Equivalent to -p and -u Directives\n"
1125 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1126 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1127 " -i ID Ignore Attribute ID for -f Directive\n"
1128 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1129 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1130 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1131 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1132 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1133 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1134 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1135 " -F TYPE Firmware bug workaround: none, samsung, samsung2\n"
1136 " # Comment: text after a hash sign is ignored\n"
1137 " \\ Line continuation character\n"
1138 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1139 "Use ID = 0 to turn off -C and/or -U Directives\n"
1140 "Example: /dev/hda -a\n",
1145 /* Returns a pointer to a static string containing a formatted list of the valid
1146 arguments to the option opt or NULL on failure. */
1147 const char *GetValidArgList(char opt
) {
1150 return "<FILE_NAME>, -";
1152 return "valid_regular_expression";
1154 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1156 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1158 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1160 return "<FILE_NAME>";
1162 return "<INTEGER_SECONDS>";
1168 /* prints help information for command syntax */
1170 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1171 #ifdef HAVE_GETOPT_LONG
1172 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1173 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1174 PrintOut(LOG_INFO
," -d, --debug\n");
1175 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1176 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1177 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1178 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1179 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1180 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1181 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1182 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1184 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1186 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1188 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1189 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1190 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1191 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1192 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1193 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1194 #if defined(_WIN32) || defined(__CYGWIN__)
1195 PrintOut(LOG_INFO
," --service\n");
1196 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1198 PrintOut(LOG_INFO
," smartd install [options]\n");
1199 PrintOut(LOG_INFO
," Remove service with:\n");
1200 PrintOut(LOG_INFO
," smartd remove\n\n");
1202 PrintOut(LOG_INFO
," /etc/rc.d/init.d/smartd install [options]\n");
1203 PrintOut(LOG_INFO
," Remove service with:\n");
1204 PrintOut(LOG_INFO
," /etc/rc.d/init.d/smartd remove\n\n");
1206 #endif // _WIN32 || __CYGWIN__
1207 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1208 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1210 PrintOut(LOG_INFO
," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile
);
1211 PrintOut(LOG_INFO
," -d Start smartd in debug mode\n");
1212 PrintOut(LOG_INFO
," -D Print the configuration file Directives and exit\n");
1213 PrintOut(LOG_INFO
," -h Display this help and exit\n");
1214 PrintOut(LOG_INFO
," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1215 PrintOut(LOG_INFO
," -l local? Use syslog facility local0 - local7, or daemon\n");
1216 PrintOut(LOG_INFO
," -p NAME Write PID file NAME\n");
1217 PrintOut(LOG_INFO
," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1218 PrintOut(LOG_INFO
," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1219 PrintOut(LOG_INFO
," -V Print License, Copyright, and version information\n");
1223 // returns negative if problem, else fd>=0
1224 static int OpenDevice(char *device
, char *mode
, int scanning
) {
1228 // If there is an ASCII "space" character in the device name,
1229 // terminate string there. This is for 3ware and highpoint devices only.
1230 if ((s
=strchr(device
,' ')))
1234 fd
= deviceopen(device
, mode
);
1236 // if we removed a space, put it back in please
1240 // if we failed to open the device, complain!
1243 // For linux+devfs, a nonexistent device gives a strange error
1244 // message. This makes the error message a bit more sensible.
1245 // If no debug and scanning - don't print errors
1246 if (debugmode
|| !scanning
) {
1247 if (errno
==ENOENT
|| errno
==ENOTDIR
)
1250 PrintOut(LOG_INFO
,"Device: %s, %s, open() failed\n",
1251 device
, strerror(errno
));
1255 // device opened sucessfully
1259 int CloseDevice(int fd
, char *name
){
1260 if (deviceclose(fd
)){
1261 PrintOut(LOG_INFO
,"Device: %s, %s, close(%d) failed\n", name
, strerror(errno
), fd
);
1264 // device sucessfully closed
1268 // returns <0 on failure
1269 int ATAErrorCount(int fd
, char *name
){
1270 struct ata_smart_errorlog log
;
1272 if (-1==ataReadErrorLog(fd
,&log
)){
1273 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1277 // return current number of ATA errors
1278 return log
.error_log_pointer
?log
.ata_error_count
:0;
1281 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1282 // error count, and top bits are the power-on hours of the last error.
1283 int SelfTestErrorCount(int fd
, char *name
){
1284 struct ata_smart_selftestlog log
;
1286 if (-1==ataReadSelfTestLog(fd
,&log
)){
1287 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1291 // return current number of self-test errors
1292 return ataPrintSmartSelfTestlog(&log
,0);
1295 // scan to see what ata devices there are, and if they support SMART
1296 int ATADeviceScan(cfgfile
*cfg
, int scanning
){
1297 int fd
, supported
=0;
1298 struct ata_identify_device drive
;
1299 char *name
=cfg
->name
;
1300 int retainsmartdata
=0;
1304 // should we try to register this as an ATA device?
1305 switch (cfg
->controller_type
) {
1306 case CONTROLLER_ATA
:
1307 case CONTROLLER_3WARE_678K
:
1308 case CONTROLLER_MARVELL_SATA
:
1309 case CONTROLLER_HPT
:
1310 case CONTROLLER_UNKNOWN
:
1313 case CONTROLLER_3WARE_678K_CHAR
:
1314 mode
="ATA_3WARE_678K";
1316 case CONTROLLER_3WARE_9000_CHAR
:
1317 mode
="ATA_3WARE_9000";
1319 case CONTROLLER_SAT
:
1323 // not a recognized ATA or SATA device. We should never enter
1329 if ((fd
=OpenDevice(name
, mode
, scanning
))<0)
1330 // device open failed
1332 PrintOut(LOG_INFO
,"Device: %s, opened\n", name
);
1334 // pass user settings on to low-level ATA commands
1335 con
->controller_port
=cfg
->controller_port
;
1336 con
->hpt_data
[0]=cfg
->hpt_data
[0];
1337 con
->hpt_data
[1]=cfg
->hpt_data
[1];
1338 con
->hpt_data
[2]=cfg
->hpt_data
[2];
1339 con
->controller_type
=cfg
->controller_type
;
1340 con
->controller_explicit
=cfg
->controller_explicit
;
1341 con
->fixfirmwarebug
= cfg
->fixfirmwarebug
;
1342 con
->satpassthrulen
= cfg
->satpassthrulen
;
1344 // Get drive identity structure
1345 if ((retid
=ataReadHDIdentity (fd
,&drive
))){
1347 // Unable to read Identity structure
1348 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1350 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1351 name
, packetdevicetype(retid
-1));
1352 CloseDevice(fd
, name
);
1356 // Show if device in database, and use preset vendor attribute
1357 // options unless user has requested otherwise.
1358 if (cfg
->ignorepresets
)
1359 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1361 // do whatever applypresets decides to do. Will allocate memory if
1362 // cfg->attributedefs is needed.
1363 if (applypresets(&drive
, &cfg
->attributedefs
, con
)<0)
1364 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1366 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1368 // then save the correct state of the flag (applypresets may have changed it)
1369 cfg
->fixfirmwarebug
= con
->fixfirmwarebug
;
1372 // If requested, show which presets would be used for this drive
1373 if (cfg
->showpresets
) {
1374 int savedebugmode
=debugmode
;
1375 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1378 showpresets(&drive
);
1379 debugmode
=savedebugmode
;
1382 // see if drive supports SMART
1383 supported
=ataSmartSupport(&drive
);
1386 // drive does NOT support SMART
1387 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1389 // can't tell if drive supports SMART
1390 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1392 // should we proceed anyway?
1393 if (cfg
->permissive
){
1394 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1397 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1398 CloseDevice(fd
, name
);
1403 if (ataEnableSmart(fd
)){
1404 // Enable SMART command has failed
1405 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1406 CloseDevice(fd
, name
);
1410 // disable device attribute autosave...
1411 if (cfg
->autosave
==1){
1412 if (ataDisableAutoSave(fd
))
1413 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1415 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1418 // or enable device attribute autosave
1419 if (cfg
->autosave
==2){
1420 if (ataEnableAutoSave(fd
))
1421 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1423 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1426 // capability check: SMART status
1427 if (cfg
->smartcheck
&& ataSmartStatus2(fd
)==-1){
1428 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1432 // capability check: Read smart values and thresholds. Note that
1433 // smart values are ALSO needed even if we ONLY want to know if the
1434 // device is self-test log or error-log capable! After ATA-5, this
1435 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1436 // but sadly not for ATA-5. Sigh.
1438 // do we need to retain SMART data after returning from this routine?
1439 retainsmartdata
=cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
;
1441 // do we need to get SMART data?
1442 if (retainsmartdata
|| cfg
->autoofflinetest
|| cfg
->selftest
|| cfg
->errorlog
|| cfg
->pending
!=DONT_MONITOR_UNC
) {
1444 unsigned char currentpending
, offlinepending
;
1446 cfg
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
));
1447 cfg
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
));
1449 if (!cfg
->smartval
|| !cfg
->smartthres
){
1450 PrintOut(LOG_CRIT
,"Not enough memory to obtain SMART data\n");
1454 if (ataReadSmartValues(fd
,cfg
->smartval
) ||
1455 ataReadSmartThresholds (fd
,cfg
->smartthres
)){
1456 PrintOut(LOG_INFO
,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name
);
1457 retainsmartdata
=cfg
->usagefailed
=cfg
->prefail
=cfg
->usage
=0;
1458 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1459 cfg
->pending
=DONT_MONITOR_UNC
;
1462 // see if the necessary Attribute is there to monitor offline or
1463 // current pending sectors or temperature
1464 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
1466 if (currentpending
&& ATAReturnAttributeRawValue(currentpending
, cfg
->smartval
)<0) {
1467 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1468 name
, (int)currentpending
);
1469 cfg
->pending
&= 0xff00;
1470 cfg
->pending
|= CUR_UNC_DEFAULT
;
1473 if (offlinepending
&& ATAReturnAttributeRawValue(offlinepending
, cfg
->smartval
)<0) {
1474 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1475 name
, (int)offlinepending
);
1476 cfg
->pending
&= 0x00ff;
1477 cfg
->pending
|= OFF_UNC_DEFAULT
<<8;
1480 if ( (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
1481 && !ATAReturnTemperatureValue(cfg
->smartval
, cfg
->attributedefs
)) {
1482 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1483 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1487 // enable/disable automatic on-line testing
1488 if (cfg
->autoofflinetest
){
1489 // is this an enable or disable request?
1490 const char *what
=(cfg
->autoofflinetest
==1)?"disable":"enable";
1492 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1494 // if command appears unsupported, issue a warning...
1495 if (!isSupportAutomaticTimer(cfg
->smartval
))
1496 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1497 // ... but then try anyway
1498 if ((cfg
->autoofflinetest
==1)?ataDisableAutoOffline(fd
):ataEnableAutoOffline(fd
))
1499 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1501 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1505 // capability check: self-test-log
1509 // start with service disabled, and re-enable it if all works OK
1511 cfg
->selflogcount
=0;
1515 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1516 else if (!cfg
->permissive
&& !isSmartTestLogCapable(cfg
->smartval
, &drive
))
1517 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1518 else if ((retval
=SelfTestErrorCount(fd
, name
))<0)
1519 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1522 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1523 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1527 // capability check: ATA error log
1531 // start with service disabled, and re-enable it if all works OK
1533 cfg
->ataerrorcount
=0;
1536 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1537 else if (!cfg
->permissive
&& !isSmartErrorLogCapable(cfg
->smartval
, &drive
))
1538 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1539 else if ((val
=ATAErrorCount(fd
, name
))<0)
1540 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1543 cfg
->ataerrorcount
=val
;
1547 // If we don't need to save SMART data, get rid of it now
1548 if (!retainsmartdata
) {
1549 if (cfg
->smartval
) {
1550 cfg
->smartval
=CheckFree(cfg
->smartval
, __LINE__
,filenameandversion
);
1551 bytes
-=sizeof(struct ata_smart_values
);
1553 if (cfg
->smartthres
) {
1554 cfg
->smartthres
=CheckFree(cfg
->smartthres
, __LINE__
,filenameandversion
);
1555 bytes
-=sizeof(struct ata_smart_thresholds_pvt
);
1559 // capabilities check -- does it support powermode?
1560 if (cfg
->powermode
) {
1561 int powermode
=ataCheckPowerMode(fd
);
1563 if (-1 == powermode
) {
1564 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1567 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1568 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1574 // If no tests available or selected, return
1575 if (!(cfg
->errorlog
|| cfg
->selftest
|| cfg
->smartcheck
||
1576 cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
||
1577 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
1578 CloseDevice(fd
, name
);
1582 // Do we still have entries available?
1583 while (numdevata
>=atadevlist_max
)
1584 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
1587 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1589 // record number of device, type of device, increment device count
1590 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1591 cfg
->controller_type
=CONTROLLER_ATA
;
1593 // close file descriptor
1594 CloseDevice(fd
, name
);
1598 // Returns 1 if device recognised as one we do not want to treat as a general
1599 // SCSI device. Also returns 1 if INQUIRY fails (all "SCSI" devices should
1600 // respond to INQUIRY). Otherwise returns 0 (i.e. normal SCSI device).
1601 static int SCSIFilterKnown(int fd
, char * device
)
1605 int req_len
, avail_len
, len
;
1607 memset(req_buff
, 0, 96);
1609 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1610 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1611 /* watch this spot ... other devices could lock up here */
1613 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1614 PrintOut(LOG_INFO
, "Device: %s, failed on INQUIRY; skip device\n", device
);
1615 // device doesn't like INQUIRY commands
1619 avail_len
= req_buff
[4] + 5;
1620 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1622 if (0 == strncmp(req_buff
+ 8, "3ware", 5) || 0 == strncmp(req_buff
+ 8, "AMCC", 4) ) {
1623 PrintOut(LOG_INFO
, "Device %s, please try adding '-d 3ware,N'\n", device
);
1624 PrintOut(LOG_INFO
, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device
, device
);
1626 } else if ((len
>= 42) && (0 == strncmp(req_buff
+ 36, "MVSATA", 6))) {
1627 PrintOut(LOG_INFO
, "Device %s, please try '-d marvell'\n", device
);
1629 } else if ((avail_len
>= 96) && (0 == strncmp(req_buff
+ 8, "ATA", 3))) {
1630 /* <<<< This is Linux specific code to detect SATA disks using a
1631 SCSI-ATA command translation layer. This may be generalized
1632 later when the t10.org SAT project matures. >>>> */
1634 memset(di_buff
, 0, req_len
);
1635 if (scsiInquiryVpd(fd
, 0x83, (unsigned char *)di_buff
, req_len
)) {
1636 return 0; // guess it is normal device
1638 avail_len
= ((di_buff
[2] << 8) + di_buff
[3]) + 4;
1639 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1640 if (isLinuxLibAta((unsigned char *)di_buff
, len
)) {
1641 PrintOut(LOG_INFO
, "Device %s: SATA disks accessed via libata are "
1642 "supported by Linux\nkernel versions 2.6.15-rc1 and above. "
1643 "Try adding '-d ata' or\n'-d sat' to the smartd.conf "
1644 "config file line.\n", device
);
1652 // on success, return 0. On failure, return >0. Never return <0,
1654 static int SCSIDeviceScan(cfgfile
*cfg
, int scanning
) {
1656 char *device
= cfg
->name
;
1657 struct scsi_iec_mode_page iec
;
1661 // should we try to register this as a SCSI device?
1662 switch (cfg
->controller_type
) {
1663 case CONTROLLER_SCSI
:
1664 case CONTROLLER_UNKNOWN
:
1667 case CONTROLLER_CCISS
:
1673 // pass user settings on to low-level SCSI commands
1674 con
->controller_port
=cfg
->controller_port
;
1675 con
->controller_type
=cfg
->controller_type
;
1678 if ((fd
= OpenDevice(device
, mode
, scanning
)) < 0)
1680 PrintOut(LOG_INFO
,"Device: %s, opened\n", device
);
1682 // early skip if device known and needs to be handled by some other
1683 // device type (e.g. '-d 3ware,<n>')
1684 if (SCSIFilterKnown(fd
, device
)) {
1685 CloseDevice(fd
, device
);
1689 // check that device is ready for commands. IE stores its stuff on
1691 if ((err
= scsiTestUnitReady(fd
))) {
1692 if (SIMPLE_ERR_NOT_READY
== err
)
1693 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1694 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1695 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1696 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1697 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1699 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1700 CloseDevice(fd
, device
);
1704 // Badly-conforming USB storage devices may fail this check.
1705 // The response to the following IE mode page fetch (current and
1706 // changeable values) is carefully examined. It has been found
1707 // that various USB devices that malform the response will lock up
1708 // if asked for a log page (e.g. temperature) so it is best to
1710 if (!(err
= scsiFetchIECmpage(fd
, &iec
, cfg
->modese_len
)))
1711 cfg
->modese_len
= iec
.modese_len
;
1712 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1713 ; /* continue since it is reasonable not to support IE mpage */
1714 else { /* any other error (including malformed response) unreasonable */
1716 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1718 CloseDevice(fd
, device
);
1722 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1723 // smart if it is off). This may change to be the same as the ATA side.
1724 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1725 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1726 "Try 'smartctl -s on %s' to turn on SMART features\n",
1728 CloseDevice(fd
, device
);
1732 // Device exists, and does SMART. Add to list (allocating more space if needed)
1733 while (numdevscsi
>= scsidevlist_max
)
1734 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
1736 // Flag that certain log pages are supported (information may be
1737 // available from other sources).
1738 if (0 == scsiLogSense(fd
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1739 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1741 case TEMPERATURE_LPAGE
:
1742 cfg
->TempPageSupported
= 1;
1745 cfg
->SmartPageSupported
= 1;
1753 // record type of device
1754 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1755 cfg
->controller_type
= CONTROLLER_SCSI
;
1757 // get rid of allocated memory only needed for ATA devices. These
1758 // might have been allocated if the user specified Ignore options or
1759 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1760 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
1761 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
1762 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
1763 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
1765 // Check if scsiCheckIE() is going to work
1769 UINT8 currenttemp
= 0;
1772 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
1773 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1774 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1775 cfg
->SuppressReport
= 1;
1776 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
) {
1777 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1778 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1783 // capability check: self-test-log
1785 int retval
=scsiCountFailedSelfTests(fd
, 0);
1787 // no self-test log, turn off monitoring
1788 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1790 cfg
->selflogcount
=0;
1794 // register starting values to watch for changes
1795 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1796 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1800 // disable autosave (set GLTSD bit)
1801 if (cfg
->autosave
==1){
1802 if (scsiSetControlGLTSD(fd
, 1, cfg
->modese_len
))
1803 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
1805 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
1808 // or enable autosave (clear GLTSD bit)
1809 if (cfg
->autosave
==2){
1810 if (scsiSetControlGLTSD(fd
, 0, cfg
->modese_len
))
1811 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
1813 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
1816 // tell user we are registering device
1817 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
1819 // close file descriptor
1820 CloseDevice(fd
, device
);
1824 // We compare old and new values of the n'th attribute. Note that n
1825 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1826 // then return 0, else nonzero.
1827 int ATACompareValues(changedattribute_t
*delta
,
1828 struct ata_smart_values
*newv
,
1829 struct ata_smart_values
*oldv
,
1830 struct ata_smart_thresholds_pvt
*thresholds
,
1832 struct ata_smart_attribute
*now
,*was
;
1833 struct ata_smart_threshold_entry
*thre
;
1834 unsigned char oldval
,newval
;
1837 // check that attribute number in range, and no null pointers
1838 if (n
<0 || n
>=NUMBER_ATA_SMART_ATTRIBUTES
|| !newv
|| !oldv
|| !thresholds
)
1841 // pointers to disk's values and vendor's thresholds
1842 now
=newv
->vendor_attributes
+n
;
1843 was
=oldv
->vendor_attributes
+n
;
1844 thre
=thresholds
->thres_entries
+n
;
1846 // consider only valid attributes
1847 if (!now
->id
|| !was
->id
|| !thre
->id
)
1851 // issue warning if they don't have the same ID in all structures:
1852 if ( (now
->id
!= was
->id
) || (now
->id
!= thre
->id
) ){
1853 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1854 name
, (int)now
->id
, (int)was
->id
, (int)thre
->id
);
1858 // new and old values of Normalized Attributes
1859 newval
=now
->current
;
1860 oldval
=was
->current
;
1862 // See if the RAW values are unchanged (ie, the same)
1863 if (memcmp(now
->raw
, was
->raw
, 6))
1868 // if any values out of the allowed range, or if the values haven't
1869 // changed, return 0
1870 if (!newval
|| !oldval
|| newval
>0xfe || oldval
>0xfe || (oldval
==newval
&& sameraw
))
1873 // values have changed. Construct output and return
1874 delta
->newval
=newval
;
1875 delta
->oldval
=oldval
;
1877 delta
->prefail
=ATTRIBUTE_FLAGS_PREFAILURE(now
->flags
);
1878 delta
->sameraw
=sameraw
;
1883 // This looks to see if the corresponding bit of the 32 bytes is set.
1884 // This wastes a few bytes of storage but eliminates all searching and
1885 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1886 // with set=0 tells you if the attribute is being tracked or not.
1887 // Calling with set=1 turns the attribute OFF.
1888 int IsAttributeOff(unsigned char attr
, unsigned char **datap
, int set
, int which
, int whatline
){
1889 unsigned char *data
;
1891 int bit
=attr
& 0x07;
1892 unsigned char mask
=0x01<<bit
;
1894 if (which
>=NMONITOR
|| which
< 0){
1895 PrintOut(LOG_CRIT
, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1896 whatline
, filenameandversion
, which
, reportbug
);
1900 if (*datap
== NULL
){
1901 // NULL data implies Attributes are ON...
1906 if (!(*datap
=(unsigned char *)Calloc(NMONITOR
*32, 1))){
1907 PrintOut(LOG_CRIT
,"No memory to create monattflags\n");
1912 // pointer to the 256 bits that we need
1913 data
=*datap
+which
*32;
1915 // attribute zero is always OFF
1920 return (data
[loc
] & mask
);
1924 // return value when setting has no sense
1928 // If the self-test log has got more self-test errors (or more recent
1929 // self-test errors) recorded, then notify user.
1930 void CheckSelfTestLogs(cfgfile
*cfg
, int newi
){
1931 char *name
=cfg
->name
;
1935 MailWarning(cfg
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
1937 // old and new error counts
1938 int oldc
=cfg
->selflogcount
;
1939 int newc
=SELFTEST_ERRORCOUNT(newi
);
1941 // old and new error timestamps in hours
1942 int oldh
=cfg
->selfloghour
;
1943 int newh
=SELFTEST_ERRORHOURS(newi
);
1946 // increase in error count
1947 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1949 MailWarning(cfg
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1951 } else if (oldh
!=newh
) {
1952 // more recent error
1953 // a 'more recent' error might actually be a smaller hour number,
1954 // if the hour number has wrapped.
1955 // There's still a bug here. You might just happen to run a new test
1956 // exactly 32768 hours after the previous failure, and have run exactly
1957 // 20 tests between the two, in which case smartd will miss the
1959 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1961 MailWarning(cfg
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1965 // Needed since self-test error count may DECREASE. Hour might
1966 // also have changed.
1967 cfg
->selflogcount
= newc
;
1968 cfg
->selfloghour
= newh
;
1973 // returns 1 if time to do test of type testtype, 0 if not time to do
1974 // test, < 0 if error
1975 int DoTestNow(cfgfile
*cfg
, char testtype
, time_t testtime
) {
1976 // start by finding out the time:
1979 char matchpattern
[16];
1980 regmatch_t substring
;
1981 int weekday
, length
;
1982 unsigned short hours
;
1983 testinfo
*dat
=cfg
->testdata
;
1985 // check that self-testing has been requested
1989 // since we are about to call localtime(), be sure glibc is informed
1990 // of any timezone changes we make.
1992 FixGlibcTimeZoneBug();
1994 // construct pattern containing the month, day of month, day of
1996 epochnow
= (!testtime
? time(NULL
) : testtime
);
1997 timenow
=localtime(&epochnow
);
1999 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
2001 weekday
=timenow
->tm_wday
?timenow
->tm_wday
:7;
2002 sprintf(matchpattern
, "%c/%02d/%02d/%1d/%02d", testtype
, timenow
->tm_mon
+1,
2003 timenow
->tm_mday
, weekday
, timenow
->tm_hour
);
2005 // if no match, we are done
2006 if (regexec(&(dat
->cregex
), matchpattern
, 1, &substring
, 0))
2009 // must match the ENTIRE type/date/time string
2010 length
=strlen(matchpattern
);
2011 if (substring
.rm_so
!=0 || substring
.rm_eo
!=length
)
2014 // never do a second test in the same hour as another test (the % 7 ensures
2015 // that the RHS will never be greater than 65535 and so will always fit into
2016 // an unsigned short)
2017 hours
=1+timenow
->tm_hour
+24*(timenow
->tm_yday
+366*(timenow
->tm_year
% 7));
2018 if (hours
==dat
->hour
) {
2019 if (!testtime
&& testtype
!=dat
->testtype
)
2020 PrintOut(LOG_INFO
, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
2021 cfg
->name
, dat
->testtype
, testtype
);
2025 // save time and type of the current test; we are ready to do a test
2027 dat
->testtype
=testtype
;
2031 // Print a list of future tests.
2032 void PrintTestSchedule(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2035 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2036 time_t now
; long seconds
;
2037 int numdev
= numdevata
+numdevscsi
;
2038 typedef int cnt_t
[4];
2039 cnt_t
* testcnts
; // testcnts[numdev][4]
2042 testcnts
= (cnt_t
*)calloc(numdev
, sizeof(testcnts
[0]));
2046 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2048 // FixGlibcTimeZoneBug(); // done in PrintOut()
2050 dateandtimezoneepoch(datenow
, now
);
2051 for (seconds
=0; seconds
<3600L*24*90; seconds
+=checktime
) {
2052 // Check for each device whether a test will be run
2053 time_t testtime
= now
+ seconds
;
2054 for (i
=0; i
<numdev
; i
++) {
2055 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2056 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2057 char testtype
= "LSCO"[t
];
2058 if (DoTestNow(cfg
, testtype
, testtime
)) {
2059 // Report at most 5 tests of each type
2060 if (++testcnts
[i
][t
] <= 5) {
2061 dateandtimezoneepoch(date
, testtime
);
2062 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
->name
,
2063 testcnts
[i
][t
], testtype
, date
);
2071 dateandtimezoneepoch(date
, now
+seconds
);
2072 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2073 for (i
=0; i
<numdev
; i
++) {
2074 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2075 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2076 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
->name
, testcnts
[i
][t
],
2077 (testcnts
[i
][t
]==1?"":"s"), "LSCO"[t
]);
2084 // Return zero on success, nonzero on failure. Perform offline (background)
2085 // short or long (extended) self test on given scsi device.
2086 int DoSCSISelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2088 char *testname
= NULL
;
2089 char *name
= cfg
->name
;
2092 if (scsiSelfTestInProgress(fd
, &inProgress
)) {
2093 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2094 cfg
->testdata
->not_cap_short
=cfg
->testdata
->not_cap_long
=1;
2098 if (1 == inProgress
) {
2099 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2100 "progress.\n", name
);
2106 testname
= "Short Self";
2107 retval
= scsiSmartShortSelfTest(fd
);
2110 testname
= "Long Self";
2111 retval
= scsiSmartExtendSelfTest(fd
);
2114 // If we can't do the test, exit
2115 if (NULL
== testname
) {
2116 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2121 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2122 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2123 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2126 cfg
->testdata
->not_cap_long
=1;
2128 cfg
->testdata
->not_cap_short
=1;
2132 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2137 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2142 // Do an offline immediate or self-test. Return zero on success,
2143 // nonzero on failure.
2144 int DoATASelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2146 struct ata_smart_values data
;
2147 char *testname
=NULL
;
2148 int retval
, dotest
=-1;
2149 char *name
=cfg
->name
;
2151 // Read current smart data and check status/capability
2152 if (ataReadSmartValues(fd
, &data
) || !(data
.offline_data_collection_capability
)) {
2153 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2157 // Check for capability to do the test
2160 testname
="Offline Immediate ";
2161 if (isSupportExecuteOfflineImmediate(&data
))
2162 dotest
=OFFLINE_FULL_SCAN
;
2164 cfg
->testdata
->not_cap_offline
=1;
2167 testname
="Conveyance Self-";
2168 if (isSupportConveyanceSelfTest(&data
))
2169 dotest
=CONVEYANCE_SELF_TEST
;
2171 cfg
->testdata
->not_cap_conveyance
=1;
2174 testname
="Short Self-";
2175 if (isSupportSelfTest(&data
))
2176 dotest
=SHORT_SELF_TEST
;
2178 cfg
->testdata
->not_cap_short
=1;
2181 testname
="Long Self-";
2182 if (isSupportSelfTest(&data
))
2183 dotest
=EXTEND_SELF_TEST
;
2185 cfg
->testdata
->not_cap_long
=1;
2189 // If we can't do the test, exit
2191 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2195 // If currently running a self-test, do not interrupt it to start another.
2196 if (15==(data
.self_test_exec_status
>> 4)) {
2197 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2198 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2202 // else execute the test, and return status
2203 if ((retval
=smartcommandhandler(fd
, IMMEDIATE_OFFLINE
, dotest
, NULL
)))
2204 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2206 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2211 // Check Temperature limits
2212 static void CheckTemperature(cfgfile
* cfg
, unsigned char currtemp
, unsigned char triptemp
)
2214 const char *minchg
= "", *maxchg
= "";
2215 if (!(0 < currtemp
&& currtemp
< 255)) {
2216 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
->name
);
2220 if (!cfg
->temperature
) {
2221 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius\n",
2222 cfg
->name
, (int)currtemp
);
2224 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2225 cfg
->temperature
= cfg
->tempmin
= cfg
->tempmax
= currtemp
;
2229 if (currtemp
< cfg
->tempmin
) {
2230 cfg
->tempmin
= currtemp
; minchg
= "!";
2231 cfg
->tempmininc
= 0;
2233 else if (cfg
->tempmininc
) {
2234 // increase min Temperature during first 30 minutes
2235 cfg
->tempmin
= currtemp
;
2238 if (currtemp
> cfg
->tempmax
) {
2239 cfg
->tempmax
= currtemp
; maxchg
= "!";
2243 if (cfg
->tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)cfg
->temperature
) >= cfg
->tempdiff
)) {
2244 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2245 cfg
->name
, (int)currtemp
-(int)cfg
->temperature
, currtemp
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2246 cfg
->temperature
= currtemp
;
2251 if (cfg
->tempcrit
&& currtemp
>= cfg
->tempcrit
) {
2252 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2253 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2254 MailWarning(cfg
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2255 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2257 else if (cfg
->tempinfo
&& currtemp
>= cfg
->tempinfo
) {
2258 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2259 cfg
->name
, currtemp
, cfg
->tempinfo
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2263 int ATACheckDevice(cfgfile
*cfg
){
2265 char *name
=cfg
->name
;
2269 // fix firmware bug if requested
2270 con
->fixfirmwarebug
=cfg
->fixfirmwarebug
;
2271 con
->controller_port
=cfg
->controller_port
;
2272 con
->controller_type
=cfg
->controller_type
;
2273 con
->controller_explicit
=cfg
->controller_explicit
;
2275 // If user has asked, test the email warning system
2276 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2277 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2279 if (cfg
->controller_type
== CONTROLLER_3WARE_9000_CHAR
)
2280 mode
="ATA_3WARE_9000";
2282 if (cfg
->controller_type
== CONTROLLER_3WARE_678K_CHAR
)
2283 mode
="ATA_3WARE_678K";
2285 // if we can't open device, fail gracefully rather than hard --
2286 // perhaps the next time around we'll be able to open it. ATAPI
2287 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2288 // given (see linux cdrom driver).
2289 if ((fd
=OpenDevice(name
, mode
, 0))<0){
2290 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2294 // if the user has asked, and device is capable (or we're not yet
2295 // sure) check whether a self test should be done now.
2296 // This check is done before powermode check to avoid missing self
2297 // tests on idle or sleeping disks.
2298 if (cfg
->testdata
) {
2300 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2303 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2306 else if (!cfg
->testdata
->not_cap_conveyance
&& DoTestNow(cfg
, 'C', 0)>0)
2308 // offline immediate
2309 else if (!cfg
->testdata
->not_cap_offline
&& DoTestNow(cfg
, 'O', 0)>0)
2313 // user may have requested (with the -n Directive) to leave the disk
2314 // alone if it is in idle or sleeping mode. In this case check the
2315 // power mode and exit without check if needed
2316 if (cfg
->powermode
){
2317 int dontcheck
=0, powermode
=ataCheckPowerMode(fd
);
2319 if (0 <= powermode
&& powermode
< 0xff) {
2320 // wait for possible spin up and check again
2323 powermode2
= ataCheckPowerMode(fd
);
2324 if (powermode2
> powermode
)
2325 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2326 powermode
= powermode2
;
2333 if (cfg
->powermode
>=1)
2339 if (cfg
->powermode
>=2)
2345 if (cfg
->powermode
>=3)
2350 mode
="ACTIVE or IDLE";
2354 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2360 // if we are going to skip a check, return now
2362 // but ignore powermode on scheduled selftest
2364 CloseDevice(fd
, name
);
2365 if (!cfg
->powerskipcnt
&& !cfg
->powerquiet
) // report first only and avoid waking up system disk
2366 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2367 cfg
->powerskipcnt
++;
2370 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2371 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2372 cfg
->powerskipcnt
= 0;
2374 else if (cfg
->powerskipcnt
) {
2375 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2376 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2377 cfg
->powerskipcnt
= 0;
2381 // check smart status
2382 if (cfg
->smartcheck
){
2383 int status
=ataSmartStatus2(fd
);
2385 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2386 MailWarning(cfg
, 5, "Device: %s, not capable of SMART self-check", name
);
2388 else if (status
==1){
2389 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2390 MailWarning(cfg
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2394 // Check everything that depends upon SMART Data (eg, Attribute values)
2395 if ( cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->pending
!=DONT_MONITOR_UNC
2396 || cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
){
2397 struct ata_smart_values curval
;
2398 struct ata_smart_thresholds_pvt
*thresh
=cfg
->smartthres
;
2400 // Read current attribute values. *drive contains old values and thresholds
2401 if (ataReadSmartValues(fd
,&curval
)){
2402 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2403 MailWarning(cfg
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2406 // look for current or offline pending sectors
2407 if (cfg
->pending
!= DONT_MONITOR_UNC
) {
2409 unsigned char currentpending
, offlinepending
;
2411 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
2413 if (currentpending
&& (rawval
=ATAReturnAttributeRawValue(currentpending
, &curval
))>0) {
2414 // Unreadable pending sectors!!
2415 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors\n", name
, rawval
);
2416 MailWarning(cfg
, 10, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors", name
, rawval
);
2419 if (offlinepending
&& (rawval
=ATAReturnAttributeRawValue(offlinepending
, &curval
))>0) {
2420 // Unreadable offline sectors!!
2421 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Offline uncorrectable sectors\n", name
, rawval
);
2422 MailWarning(cfg
, 11, "Device: %s, %"PRId64
" Offline uncorrectable sectors", name
, rawval
);
2426 // check temperature limits
2427 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2428 CheckTemperature(cfg
, ATAReturnTemperatureValue(&curval
, cfg
->attributedefs
), 0);
2430 if (cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
) {
2432 // look for failed usage attributes, or track usage or prefail attributes
2433 for (i
=0; i
<NUMBER_ATA_SMART_ATTRIBUTES
; i
++){
2435 changedattribute_t delta
;
2437 // This block looks for usage attributes that have failed.
2438 // Prefail attributes that have failed are returned with a
2439 // positive sign. No failure returns 0. Usage attributes<0.
2440 if (cfg
->usagefailed
&& ((att
=ataCheckAttribute(&curval
, thresh
, i
))<0)){
2442 // are we ignoring failures of this attribute?
2444 if (!IsAttributeOff(att
, &cfg
->monitorattflags
, 0, MONITOR_FAILUSE
, __LINE__
)){
2445 char attname
[64], *loc
=attname
;
2447 // get attribute name & skip white space
2448 ataPrintSmartAttribName(loc
, att
, cfg
->attributedefs
);
2449 while (*loc
&& *loc
==' ') loc
++;
2452 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %s.\n", name
, loc
);
2453 MailWarning(cfg
, 2, "Device: %s, Failed SMART usage Attribute: %s.", name
, loc
);
2457 // This block tracks usage or prefailure attributes to see if
2458 // they are changing. It also looks for changes in RAW values
2459 // if this has been requested by user.
2460 if ((cfg
->usage
|| cfg
->prefail
) && ATACompareValues(&delta
, &curval
, cfg
->smartval
, thresh
, i
, name
)){
2461 unsigned char id
=delta
.id
;
2463 // if the only change is the raw value, and we're not
2464 // tracking raw value, then continue loop over attributes
2465 if (!delta
.sameraw
&& delta
.newval
==delta
.oldval
&& !IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAW
, __LINE__
))
2468 // are we tracking this attribute?
2469 if (!IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_IGNORE
, __LINE__
)){
2470 char newrawstring
[64], oldrawstring
[64], attname
[64], *loc
=attname
;
2472 // get attribute name, skip spaces
2473 ataPrintSmartAttribName(loc
, id
, cfg
->attributedefs
);
2474 while (*loc
&& *loc
==' ') loc
++;
2476 // has the user asked for us to print raw values?
2477 if (IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAWPRINT
, __LINE__
)) {
2478 // get raw values (as a string) and add to printout
2480 ataPrintSmartAttribRawValue(rawstring
, curval
.vendor_attributes
+i
, cfg
->attributedefs
);
2481 sprintf(newrawstring
, " [Raw %s]", rawstring
);
2482 ataPrintSmartAttribRawValue(rawstring
, cfg
->smartval
->vendor_attributes
+i
, cfg
->attributedefs
);
2483 sprintf(oldrawstring
, " [Raw %s]", rawstring
);
2486 newrawstring
[0]=oldrawstring
[0]='\0';
2488 // prefailure attribute
2489 if (cfg
->prefail
&& delta
.prefail
)
2490 PrintOut(LOG_INFO
, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2491 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2494 if (cfg
->usage
&& !delta
.prefail
)
2495 PrintOut(LOG_INFO
, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2496 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2498 } // endof block tracking usage or prefailure
2499 } // end of loop over attributes
2501 // Save the new values into *drive for the next time around
2502 *(cfg
->smartval
)=curval
;
2507 // check if number of selftest errors has increased (note: may also DECREASE)
2509 CheckSelfTestLogs(cfg
, SelfTestErrorCount(fd
, name
));
2511 // check if number of ATA errors has increased
2514 int newc
,oldc
=cfg
->ataerrorcount
;
2516 // new number of errors
2517 newc
=ATAErrorCount(fd
, name
);
2519 // did command fail?
2521 // lack of PrintOut here is INTENTIONAL
2522 MailWarning(cfg
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2524 // has error count increased?
2526 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2528 MailWarning(cfg
, 4, "Device: %s, ATA error count increased from %d to %d",
2532 // this last line is probably not needed, count always increases
2534 cfg
->ataerrorcount
=newc
;
2537 // carry out scheduled self-test
2539 DoATASelfTest(fd
, cfg
, testtype
);
2541 // Don't leave device open -- the OS/user may want to access it
2542 // before the next smartd cycle!
2543 CloseDevice(fd
, name
);
2547 int SCSICheckDevice(cfgfile
*cfg
)
2553 char *name
=cfg
->name
;
2557 // should we try to register this as a SCSI device?
2558 switch (cfg
->controller_type
) {
2559 case CONTROLLER_CCISS
:
2562 case CONTROLLER_SCSI
:
2563 case CONTROLLER_UNKNOWN
:
2570 // pass user settings on to low-level SCSI commands
2571 con
->controller_port
=cfg
->controller_port
;
2572 con
->controller_type
=cfg
->controller_type
;
2574 // If the user has asked for it, test the email warning system
2575 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2576 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2578 // if we can't open device, fail gracefully rather than hard --
2579 // perhaps the next time around we'll be able to open it
2580 if ((fd
=OpenDevice(name
, mode
, 0))<0) {
2581 // Lack of PrintOut() here is intentional!
2582 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2584 } else if (debugmode
)
2585 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
2589 if (! cfg
->SuppressReport
) {
2590 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
2591 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2592 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2594 MailWarning(cfg
, 6, "Device: %s, failed to read SMART values", name
);
2595 cfg
->SuppressReport
= 1;
2599 cp
= scsiGetIEString(asc
, ascq
);
2601 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2602 MailWarning(cfg
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2603 } else if (debugmode
)
2604 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2605 name
, (int)asc
, (int)ascq
);
2606 } else if (debugmode
)
2607 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
2609 // check temperature limits
2610 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2611 CheckTemperature(cfg
, currenttemp
, triptemp
);
2613 // check if number of selftest errors has increased (note: may also DECREASE)
2615 CheckSelfTestLogs(cfg
, scsiCountFailedSelfTests(fd
, 0));
2617 if (cfg
->testdata
) {
2618 // long (extended) background test
2619 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2620 DoSCSISelfTest(fd
, cfg
, 'L');
2621 // short background test
2622 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2623 DoSCSISelfTest(fd
, cfg
, 'S');
2625 CloseDevice(fd
, name
);
2629 // Checks the SMART status of all ATA and SCSI devices
2630 void CheckDevicesOnce(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2633 for (i
=0; i
<numdevata
; i
++)
2634 ATACheckDevice(atadevices
[i
]);
2636 for (i
=0; i
<numdevscsi
; i
++)
2637 SCSICheckDevice(scsidevices
[i
]);
2643 // This alarm means that a SCSI USB device was hanging
2644 void AlarmHandler(int signal
) {
2645 longjmp(registerscsienv
, 1);
2649 // Does initialization right after fork to daemon mode
2650 void Initialize(time_t *wakeuptime
){
2652 // install goobye message and remove pidfile handler
2655 // write PID file only after installing exit handler
2659 // install signal handlers. On Solaris, can't use signal() because
2660 // it resets the handler to SIG_DFL after each call. So use sigset()
2661 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2663 // normal and abnormal exit
2664 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2665 SIGNALFN(SIGTERM
, SIG_IGN
);
2666 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2667 SIGNALFN(SIGQUIT
, SIG_IGN
);
2669 // in debug mode, <CONTROL-C> ==> HUP
2670 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2671 SIGNALFN(SIGINT
, SIG_IGN
);
2673 // Catch HUP and USR1
2674 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2675 SIGNALFN(SIGHUP
, SIG_IGN
);
2676 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2677 SIGNALFN(SIGUSR1
, SIG_IGN
);
2679 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2680 SIGNALFN(SIGUSR2
, SIG_IGN
);
2683 // initialize wakeup time to CURRENT time
2684 *wakeuptime
=time(NULL
);
2690 // Toggle debug mode implemented for native windows only
2691 // (there is no easy way to reopen tty on *nix)
2692 static void ToggleDebugMode()
2695 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2696 if (!daemon_enable_console("smartd [Debug]")) {
2698 daemon_signal(SIGINT
, HUPhandler
);
2699 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2702 PrintOut(LOG_INFO
,"enable console failed\n");
2704 else if (debugmode
== 1) {
2705 daemon_disable_console();
2707 daemon_signal(SIGINT
, sighandler
);
2708 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2711 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2715 time_t dosleep(time_t wakeuptime
){
2718 // If past wake-up-time, compute next wake-up-time
2720 while (wakeuptime
<=timenow
){
2721 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2722 wakeuptime
+=intervals
*checktime
;
2725 // sleep until we catch SIGUSR1 or have completed sleeping
2726 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2728 // protect user again system clock being adjusted backwards
2729 if (wakeuptime
>timenow
+checktime
){
2730 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2731 wakeuptime
=timenow
+checktime
;
2734 // Exit sleep when time interval has expired or a signal is received
2735 sleep(wakeuptime
-timenow
);
2738 // toggle debug mode?
2739 if (caughtsigUSR2
) {
2748 // if we caught a SIGUSR1 then print message and clear signal
2750 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2751 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2755 // return adjusted wakeuptime
2759 // Print out a list of valid arguments for the Directive d
2760 void printoutvaliddirectiveargs(int priority
, char d
) {
2765 PrintOut(priority
, "never[,q], sleep[,q], standby[,q], idle[,q]");
2768 PrintOut(priority
, "valid_regular_expression");
2771 PrintOut(priority
, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2774 PrintOut(priority
, "normal, permissive");
2778 PrintOut(priority
, "on, off");
2781 PrintOut(priority
, "error, selftest");
2784 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2787 if (!(s
= create_vendor_attribute_arg_list())) {
2788 PrintOut(LOG_CRIT
,"Insufficient memory to construct argument list\n");
2791 PrintOut(priority
, "\n%s\n", s
);
2792 s
=CheckFree(s
, __LINE__
,filenameandversion
);
2795 PrintOut(priority
, "use, ignore, show, showall");
2798 PrintOut(priority
, "none, samsung, samsung2");
2803 // exits with an error message, or returns integer value of token
2804 int GetInteger(char *arg
, char *name
, char *token
, int lineno
, char *configfile
, int min
, int max
){
2808 // check input range
2810 PrintOut(LOG_CRIT
, "min =%d passed to GetInteger() must be >=0\n", min
);
2814 // make sure argument is there
2816 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2817 configfile
, lineno
, name
, token
, min
, max
);
2821 // get argument value (base 10), check that it's integer, and in-range
2822 val
=strtol(arg
,&endptr
,10);
2823 if (*endptr
!='\0' || val
<min
|| val
>max
) {
2824 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2825 configfile
, lineno
, name
, token
, arg
, min
, max
);
2829 // all is well; return value
2834 // Get 1-3 small integer(s) for '-W' directive
2835 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
2836 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
2837 unsigned v1
= 0, v2
= 0, v3
= 0;
2838 int n1
= -1, n2
= -1, n3
= -1, len
;
2840 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2841 configfile
, lineno
, name
, token
);
2846 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
2847 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
2848 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2849 configfile
, lineno
, name
, token
, arg
);
2852 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
2857 // This function returns 1 if it has correctly parsed one token (and
2858 // any arguments), else zero if no tokens remain. It returns -1 if an
2859 // error was encountered.
2860 int ParseToken(char *token
,cfgfile
*cfg
){
2862 char *name
=cfg
->name
;
2863 int lineno
=cfg
->lineno
;
2864 char *delim
= " \n\t";
2869 maildata
*mdat
=NULL
, tempmail
;
2871 // is the rest of the line a comment
2875 // is the token not recognized?
2876 if (*token
!='-' || strlen(token
)!=2) {
2877 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
2878 configfile
, lineno
, name
, token
);
2879 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
2883 // token we will be parsing:
2886 // create temporary maildata structure. This means we can postpone
2887 // allocating space in the data segment until we are sure there are
2889 if ('m'==sym
|| 'M'==sym
){
2890 if (!cfg
->mailwarn
){
2891 memset(&tempmail
, 0, sizeof(maildata
));
2899 // parse the token and swallow its argument
2904 // monitor current pending sector count (default 197)
2905 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2907 if (val
==CUR_UNC_DEFAULT
)
2910 val
=CUR_UNC_DEFAULT
;
2911 // set bottom 8 bits to correct value
2912 cfg
->pending
&= 0xff00;
2913 cfg
->pending
|= val
;
2916 // monitor offline uncorrectable sectors (default 198)
2917 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2919 if (val
==OFF_UNC_DEFAULT
)
2922 val
=OFF_UNC_DEFAULT
;
2923 // turn off top 8 bits, then set to correct value
2924 cfg
->pending
&= 0xff;
2925 cfg
->pending
|= (val
<<8);
2928 // Set tolerance level for SMART command failures
2929 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2931 } else if (!strcmp(arg
, "normal")) {
2932 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2933 // not on failure of an optional S.M.A.R.T. command.
2934 // This is the default so we don't need to actually do anything here.
2936 } else if (!strcmp(arg
, "permissive")) {
2937 // Permissive mode; ignore errors from Mandatory SMART commands
2944 // specify the device type
2945 cfg
->controller_explicit
= 1;
2946 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2948 } else if (!strcmp(arg
, "ata")) {
2949 cfg
->controller_port
= 0;
2950 cfg
->controller_type
= CONTROLLER_ATA
;
2951 } else if (!strcmp(arg
, "scsi")) {
2952 cfg
->controller_port
=0;
2953 cfg
->controller_type
= CONTROLLER_SCSI
;
2954 } else if (!strcmp(arg
, "marvell")) {
2955 cfg
->controller_port
=0;
2956 cfg
->controller_type
= CONTROLLER_MARVELL_SATA
;
2957 } else if (!strncmp(arg
, "sat", 3)) {
2958 cfg
->controller_type
= CONTROLLER_SAT
;
2959 cfg
->controller_port
= 0;
2960 cfg
->satpassthrulen
= 0;
2961 if (strlen(arg
) > 3) {
2965 cp
= strchr(arg
, ',');
2966 if (cp
&& (1 == sscanf(cp
+ 1, "%d", &k
)) &&
2967 ((0 == k
) || (12 == k
) || (16 == k
)))
2968 cfg
->satpassthrulen
= k
;
2970 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2971 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
2972 configfile
, lineno
, name
);
2976 } else if (!strncmp(arg
, "hpt", 3)){
2977 unsigned char i
, slash
= 0;
2978 cfg
->hpt_data
[0] = 0;
2979 cfg
->hpt_data
[1] = 0;
2980 cfg
->hpt_data
[2] = 0;
2981 cfg
->controller_type
= CONTROLLER_HPT
;
2982 for (i
=4; i
< strlen(arg
); i
++) {
2986 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2987 "'-d hpt,L/M/N' supports 2-3 items\n",
2988 configfile
, lineno
, name
);
2993 else if ((arg
[i
])>='0' && (arg
[i
])<='9') {
2994 if (cfg
->hpt_data
[slash
]>1) { /* hpt_data[x] max 19 */
2998 cfg
->hpt_data
[slash
] = cfg
->hpt_data
[slash
]*10 + arg
[i
] - '0';
3007 } else if (badarg
!= TRUE
) {
3008 if (cfg
->hpt_data
[0]==0 || cfg
->hpt_data
[0]>8){
3009 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3010 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
3011 configfile
, lineno
, name
);
3014 if (cfg
->hpt_data
[1]==0 || cfg
->hpt_data
[1]>8){
3015 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3016 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
3017 configfile
, lineno
, name
);
3021 if (cfg
->hpt_data
[2]==0 || cfg
->hpt_data
[2]>15){
3022 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3023 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
3024 configfile
, lineno
, name
);
3027 } else { /* no pmport device */
3031 } else if (!strcmp(arg
, "removable")) {
3034 // look 3ware,N RAID device
3038 // make a copy of the string to mess with
3039 if (!(s
= strdup(arg
))) {
3041 "No memory to copy argument to -d option - exiting\n");
3043 } else if (!strncmp(s
,"3ware,",6)) {
3044 if (split_report_arg2(s
, &i
)){
3045 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3046 configfile
, lineno
, name
);
3048 } else if ( i
<0 || i
>15) {
3049 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 15\n",
3050 configfile
, lineno
, name
, i
);
3053 // determine type of escalade device from name of device
3054 cfg
->controller_type
= guess_device_type(name
);
3055 if (cfg
->controller_type
!=CONTROLLER_3WARE_9000_CHAR
&& cfg
->controller_type
!=CONTROLLER_3WARE_678K_CHAR
)
3056 cfg
->controller_type
=CONTROLLER_3WARE_678K
;
3058 // NOTE: controller_port == disk number + 1
3059 cfg
->controller_port
= i
+1;
3061 } else if (!strncmp(s
,"cciss,",6)) {
3062 if (split_report_arg2(s
, &i
)){
3063 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N requires N integer\n",
3064 configfile
, lineno
, name
);
3066 } else if ( i
<0 || i
>15) {
3067 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N (N=%d) must have 0 <= N <= 15\n",
3068 configfile
, lineno
, name
, i
);
3071 // NOTE: controller_port == disk number + 1
3072 cfg
->controller_type
= CONTROLLER_CCISS
;
3073 cfg
->controller_port
= i
+1;
3078 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3083 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3085 } else if (!strcmp(arg
, "none")) {
3086 cfg
->fixfirmwarebug
= FIX_NONE
;
3087 } else if (!strcmp(arg
, "samsung")) {
3088 cfg
->fixfirmwarebug
= FIX_SAMSUNG
;
3089 } else if (!strcmp(arg
, "samsung2")) {
3090 cfg
->fixfirmwarebug
= FIX_SAMSUNG2
;
3096 // check SMART status
3100 // check for failure of usage attributes
3104 // track changes in all vendor attributes
3109 // track changes in prefail vendor attributes
3113 // track changes in usage vendor attributes
3117 // track changes in SMART logs
3118 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3120 } else if (!strcmp(arg
, "selftest")) {
3121 // track changes in self-test log
3123 } else if (!strcmp(arg
, "error")) {
3124 // track changes in ATA error log
3131 // monitor everything
3140 // automatic offline testing enable/disable
3141 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3143 } else if (!strcmp(arg
, "on")) {
3144 cfg
->autoofflinetest
= 2;
3145 } else if (!strcmp(arg
, "off")) {
3146 cfg
->autoofflinetest
= 1;
3152 // skip disk check if in idle or standby mode
3153 if (!(arg
= strtok(NULL
, delim
)))
3155 else if (!strcmp(arg
, "never") || !strcmp(arg
, "never,q"))
3157 else if (!strcmp(arg
, "sleep") || !strcmp(arg
, "sleep,q"))
3159 else if (!strcmp(arg
, "standby") || !strcmp(arg
, "standby,q"))
3161 else if (!strcmp(arg
, "idle") || !strcmp(arg
, "idle,q"))
3165 cfg
->powerquiet
= !!strchr(arg
, ',');
3168 // automatic attribute autosave enable/disable
3169 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3171 } else if (!strcmp(arg
, "on")) {
3173 } else if (!strcmp(arg
, "off")) {
3180 // warn user, and delete any previously given -s REGEXP Directives
3182 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3183 configfile
, lineno
, name
, cfg
->testdata
->regex
);
3184 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3186 // check for missing argument
3187 if (!(arg
= strtok(NULL
, delim
))) {
3190 // allocate space for structure and string
3191 else if (!(cfg
->testdata
=(testinfo
*)Calloc(1, sizeof(testinfo
))) || !(cfg
->testdata
->regex
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
))) {
3192 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3193 configfile
, lineno
, name
, arg
);
3196 else if ((val
=regcomp(&(cfg
->testdata
->cregex
), arg
, REG_EXTENDED
))) {
3198 // not a valid regular expression!
3199 regerror(val
, &(cfg
->testdata
->cregex
), errormsg
, 512);
3200 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3201 configfile
, lineno
, name
, arg
, errormsg
);
3202 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3205 // Do a bit of sanity checking and warn user if we think that
3206 // their regexp is "strange". User probably confused about shell
3207 // glob(3) syntax versus regular expression syntax regexp(7).
3208 if ((int)strlen(arg
) != (val
=strspn(arg
,"0123456789/.-+*|()?^$[]SLCO")))
3209 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3210 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3213 // send email to address that follows
3214 if (!(arg
= strtok(NULL
,delim
)))
3217 if (mdat
->address
) {
3218 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3219 configfile
, lineno
, name
, mdat
->address
);
3220 mdat
->address
=FreeNonZero(mdat
->address
, -1,__LINE__
,filenameandversion
);
3222 mdat
->address
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3226 // email warning options
3227 if (!(arg
= strtok(NULL
, delim
)))
3229 else if (!strcmp(arg
, "once"))
3230 mdat
->emailfreq
= 1;
3231 else if (!strcmp(arg
, "daily"))
3232 mdat
->emailfreq
= 2;
3233 else if (!strcmp(arg
, "diminishing"))
3234 mdat
->emailfreq
= 3;
3235 else if (!strcmp(arg
, "test"))
3236 mdat
->emailtest
= 1;
3237 else if (!strcmp(arg
, "exec")) {
3238 // Get the next argument (the command line)
3239 if (!(arg
= strtok(NULL
, delim
))) {
3240 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3241 configfile
, lineno
, name
, token
);
3244 // Free the last cmd line given if any, and copy new one
3245 if (mdat
->emailcmdline
) {
3246 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3247 configfile
, lineno
, name
, mdat
->emailcmdline
);
3248 mdat
->emailcmdline
=FreeNonZero(mdat
->emailcmdline
, -1,__LINE__
,filenameandversion
);
3250 mdat
->emailcmdline
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3256 // ignore failure of usage attribute
3257 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3259 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_FAILUSE
, __LINE__
);
3262 // ignore attribute for tracking purposes
3263 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3265 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_IGNORE
, __LINE__
);
3268 // print raw value when tracking
3269 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3271 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3274 // track changes in raw value (forces printing of raw value)
3275 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3277 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3278 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAW
, __LINE__
);
3281 // track Temperature
3282 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3283 &cfg
->tempdiff
, &cfg
->tempinfo
, &cfg
->tempcrit
))<0)
3285 // increase min Temperature during first 30 minutes
3286 if (!(cfg
->tempmininc
= (unsigned char)(CHECKTIME
/ checktime
)))
3287 cfg
->tempmininc
= 1;
3290 // non-default vendor-specific attribute meaning
3291 if (!(arg
=strtok(NULL
,delim
))) {
3293 } else if (parse_attribute_def(arg
, &cfg
->attributedefs
)){
3298 // Define use of drive-specific presets.
3299 if (!(arg
= strtok(NULL
, delim
))) {
3301 } else if (!strcmp(arg
, "use")) {
3302 cfg
->ignorepresets
= FALSE
;
3303 } else if (!strcmp(arg
, "ignore")) {
3304 cfg
->ignorepresets
= TRUE
;
3305 } else if (!strcmp(arg
, "show")) {
3306 cfg
->showpresets
= TRUE
;
3307 } else if (!strcmp(arg
, "showall")) {
3314 // Directive not recognized
3315 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3316 configfile
, lineno
, name
, token
);
3321 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3322 configfile
, lineno
, name
, token
);
3325 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3326 configfile
, lineno
, name
, token
, arg
);
3328 if (missingarg
|| badarg
) {
3329 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3330 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3331 PrintOut(LOG_CRIT
, "\n");
3335 // If this did something to fill the mail structure, and that didn't
3336 // already exist, create it and copy.
3338 if (!(cfg
->mailwarn
=(maildata
*)Calloc(1, sizeof(maildata
)))) {
3339 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3340 configfile
, lineno
, name
);
3343 memcpy(cfg
->mailwarn
, mdat
, sizeof(maildata
));
3349 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3350 // a copy of the original, but with private data storage. Else all is
3351 // zeroed. Returns address, and fails if non memory available.
3353 cfgfile
*CreateConfigEntry(cfgfile
*original
){
3356 // allocate memory for new structure
3357 if (!(add
=(cfgfile
*)Calloc(1,sizeof(cfgfile
))))
3360 // if old structure was pointed to, copy it
3362 memcpy(add
, original
, sizeof(cfgfile
));
3364 // make private copies of data items ONLY if they are in use (non
3366 add
->name
= CustomStrDup(add
->name
, 0, __LINE__
,filenameandversion
);
3368 if (add
->testdata
) {
3370 if (!(add
->testdata
=(testinfo
*)Calloc(1,sizeof(testinfo
))))
3372 memcpy(add
->testdata
, original
->testdata
, sizeof(testinfo
));
3373 add
->testdata
->regex
= CustomStrDup(add
->testdata
->regex
, 1, __LINE__
,filenameandversion
);
3374 // only POSIX-portable way to make fresh copy of compiled regex is
3375 // to recompile it completely. There is no POSIX
3376 // compiled-regex-copy command.
3377 if ((val
=regcomp(&(add
->testdata
->cregex
), add
->testdata
->regex
, REG_EXTENDED
))) {
3379 regerror(val
, &(add
->testdata
->cregex
), errormsg
, 512);
3380 PrintOut(LOG_CRIT
, "unable to recompile regular expression %s. %s\n", add
->testdata
->regex
, errormsg
);
3385 if (add
->mailwarn
) {
3386 if (!(add
->mailwarn
=(maildata
*)Calloc(1,sizeof(maildata
))))
3388 memcpy(add
->mailwarn
, original
->mailwarn
, sizeof(maildata
));
3389 add
->mailwarn
->address
= CustomStrDup(add
->mailwarn
->address
, 0, __LINE__
,filenameandversion
);
3390 add
->mailwarn
->emailcmdline
= CustomStrDup(add
->mailwarn
->emailcmdline
, 0, __LINE__
,filenameandversion
);
3393 if (add
->attributedefs
) {
3394 if (!(add
->attributedefs
=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM
,1)))
3396 memcpy(add
->attributedefs
, original
->attributedefs
, MAX_ATTRIBUTE_NUM
);
3399 if (add
->monitorattflags
) {
3400 if (!(add
->monitorattflags
=(unsigned char *)Calloc(NMONITOR
*32, 1)))
3402 memcpy(add
->monitorattflags
, original
->monitorattflags
, NMONITOR
*32);
3405 if (add
->smartval
) {
3406 if (!(add
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
))))
3410 if (add
->smartthres
) {
3411 if (!(add
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
))))
3418 PrintOut(LOG_CRIT
, "No memory to create entry from configuration file\n");
3424 // This is the routine that adds things to the cfgentries list. To
3425 // prevent memory leaks when re-reading the configuration file many
3426 // times, this routine MUST deallocate any memory other than that
3427 // pointed to within cfg-> before it returns.
3429 // Return values are:
3430 // 1: parsed a normal line
3431 // 0: found comment or blank line
3432 // -1: found SCANDIRECTIVE line
3433 // -2: found an error
3435 // Note: this routine modifies *line from the caller!
3436 int ParseConfigLine(int entry
, int lineno
,char *line
){
3439 char *delim
= " \n\t";
3443 // get first token: device name. If a comment, skip line
3444 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3448 // Have we detected the SCANDIRECTIVE directive?
3449 if (!strcmp(SCANDIRECTIVE
,name
)){
3452 PrintOut(LOG_INFO
,"Scan Directive %s (line %d) must be the first entry in %s\n",name
, lineno
, configfile
);
3457 // Is there space for another entry? If not, allocate more
3458 while (entry
>=cfgentries_max
)
3459 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "configuration file device");
3461 // We've got a legit entry, make space to store it
3462 cfg
=cfgentries
[entry
]=CreateConfigEntry(NULL
);
3463 cfg
->name
= CustomStrDup(name
, 1, __LINE__
,filenameandversion
);
3465 // Store line number, and by default check for both device types.
3468 // Try and recognize if a IDE or SCSI device. These can be
3469 // overwritten by configuration file directives.
3470 if (cfg
->controller_type
==CONTROLLER_UNKNOWN
)
3471 cfg
->controller_type
= guess_device_type(cfg
->name
);
3473 // parse tokens one at a time from the file.
3474 while ((token
=strtok(NULL
,delim
))){
3475 int retval
=ParseToken(token
,cfg
);
3484 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3490 // error found on the line
3495 // If we found 3ware/cciss controller, then modify device name by adding a SPACE
3496 if (cfg
->controller_port
) {
3497 int len
=17+strlen(cfg
->name
);
3501 PrintOut(LOG_CRIT
, "smartd: can not scan for 3ware/cciss devices (line %d of file %s)\n",
3502 lineno
, configfile
);
3506 if (!(newname
=(char *)calloc(len
,1))) {
3507 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3511 // Make new device name by adding a space then RAID disk number
3512 snprintf(newname
, len
, "%s [%s_disk_%02d]", cfg
->name
, (cfg
->controller_type
== CONTROLLER_CCISS
) ? "cciss" : "3ware",
3513 cfg
->controller_port
-1);
3514 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3519 if (cfg
->hpt_data
[0]) {
3520 int len
=17+strlen(cfg
->name
);
3524 PrintOut(LOG_CRIT
, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3525 lineno
, configfile
);
3529 if (!(newname
=(char *)calloc(len
,1))) {
3530 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3534 // Make new device name by adding a space then RAID disk number
3535 snprintf(newname
, len
, "%s [hpt_%d/%d/%d]", cfg
->name
, cfg
->hpt_data
[0],
3536 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
3537 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3542 // If NO monitoring directives are set, then set all of them.
3543 if (!(cfg
->smartcheck
|| cfg
->usagefailed
|| cfg
->prefail
||
3544 cfg
->usage
|| cfg
->selftest
|| cfg
->errorlog
||
3545 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
3547 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3548 cfg
->name
, cfg
->lineno
, configfile
);
3558 // additional sanity check. Has user set -M options without -m?
3559 if (cfg
->mailwarn
&& !cfg
->mailwarn
->address
&& (cfg
->mailwarn
->emailcmdline
|| cfg
->mailwarn
->emailfreq
|| cfg
->mailwarn
->emailtest
)){
3560 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3561 cfg
->name
, cfg
->lineno
, configfile
);
3565 // has the user has set <nomailer>?
3566 if (cfg
->mailwarn
&& cfg
->mailwarn
->address
&& !strcmp(cfg
->mailwarn
->address
,"<nomailer>")){
3567 // check that -M exec is also set
3568 if (!cfg
->mailwarn
->emailcmdline
){
3569 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3570 cfg
->name
, cfg
->lineno
, configfile
);
3573 // now free memory. From here on the sign of <nomailer> is
3574 // address==NULL and cfg->emailcmdline!=NULL
3575 cfg
->mailwarn
->address
=FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
3578 // set cfg->emailfreq to 1 (once) if user hasn't set it
3579 if (cfg
->mailwarn
&& !cfg
->mailwarn
->emailfreq
)
3580 cfg
->mailwarn
->emailfreq
= 1;
3590 // clean up utility for ParseConfigFile()
3591 void cleanup(FILE **fpp
, int is_stdin
){
3593 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3603 // Parses a configuration file. Return values are:
3604 // N=>0: found N entries
3605 // -1: syntax error in config file
3606 // -2: config file does not exist
3607 // -3: config file exists but cannot be read
3609 // In the case where the return value is 0, there are three
3611 // Empty configuration file ==> cfgentries==NULL
3612 // No configuration file ==> cfgentries[0]->lineno == 0
3613 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3614 int ParseConfigFile(){
3616 int entry
=0,lineno
=1,cont
=0,contlineno
=0;
3617 char line
[MAXLINELEN
+2];
3618 char fullline
[MAXCONTLINE
+1];
3620 int is_stdin
= (configfile
== configfile_stdin
); // pointer comparison ok here
3622 // Open config file, if it exists and is not <stdin>
3624 fp
=fopen(configfile
,"r");
3625 if (fp
==NULL
&& (errno
!=ENOENT
|| configfile_alt
)) {
3626 // file exists but we can't read it or it should exist due to '-c' option
3627 int ret
= (errno
!=ENOENT
? -3 : -2);
3628 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3629 strerror(errno
),configfile
);
3633 else // read from stdin ('-c -' option)
3636 // No configuration file found -- use fake one
3638 int len
=strlen(SCANDIRECTIVE
)+4;
3639 char *fakeconfig
=(char *)calloc(len
,1);
3642 (len
-1) != snprintf(fakeconfig
, len
, "%s -a", SCANDIRECTIVE
) ||
3643 -1 != ParseConfigLine(entry
, 0, fakeconfig
)
3645 PrintOut(LOG_CRIT
,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3646 __LINE__
, filenameandversion
, reportbug
);
3649 fakeconfig
=CheckFree(fakeconfig
, __LINE__
,filenameandversion
);
3654 setmode(fileno(fp
), O_TEXT
); // Allow files with \r\n
3657 // configuration file exists
3658 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3660 // parse config file line by line
3662 int len
=0,scandevice
;
3667 // make debugging simpler
3668 memset(line
,0,sizeof(line
));
3671 code
=fgets(line
,MAXLINELEN
+2,fp
);
3673 // are we at the end of the file?
3676 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3677 // See if we found a SCANDIRECTIVE directive
3678 if (scandevice
==-1) {
3679 cleanup(&fp
, is_stdin
);
3682 // did we find a syntax error
3683 if (scandevice
==-2) {
3684 cleanup(&fp
, is_stdin
);
3687 // the final line is part of a continuation line
3694 // input file line number
3697 // See if line is too long
3699 if (len
>MAXLINELEN
){
3701 if (line
[len
-1]=='\n')
3702 warn
="(including newline!) ";
3705 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3706 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3707 cleanup(&fp
, is_stdin
);
3711 // Ignore anything after comment symbol
3712 if ((comment
=strchr(line
,'#'))){
3717 // is the total line (made of all continuation lines) too long?
3718 if (cont
+len
>MAXCONTLINE
){
3719 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3720 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3721 cleanup(&fp
, is_stdin
);
3725 // copy string so far into fullline, and increment length
3726 strcpy(fullline
+cont
,line
);
3729 // is this a continuation line. If so, replace \ by space and look at next line
3730 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3731 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3735 // Not a continuation line. Parse it
3736 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3738 // did we find a scandevice directive?
3739 if (scandevice
==-1) {
3740 cleanup(&fp
, is_stdin
);
3743 // did we find a syntax error
3744 if (scandevice
==-2) {
3745 cleanup(&fp
, is_stdin
);
3753 cleanup(&fp
, is_stdin
);
3755 // note -- may be zero if syntax of file OK, but no valid entries!
3760 // Prints copyright, license and version information
3761 void PrintCopyleft(void){
3768 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3769 <LIST> is the list of valid arguments for option opt. */
3770 void PrintValidArgs(char opt
) {
3773 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3774 if (!(s
= GetValidArgList(opt
)))
3775 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3777 PrintOut(LOG_CRIT
, (char *)s
);
3778 PrintOut(LOG_CRIT
, " <=======\n");
3781 // Parses input line, prints usage message and
3782 // version/license/copyright messages
3783 void ParseOpts(int argc
, char **argv
){
3784 extern char *optarg
;
3785 extern int optopt
, optind
, opterr
;
3790 // Please update GetValidArgList() if you edit shortopts
3791 const char *shortopts
= "c:l:q:dDi:p:r:Vh?";
3792 #ifdef HAVE_GETOPT_LONG
3794 // Please update GetValidArgList() if you edit longopts
3795 struct option longopts
[] = {
3796 { "configfile", required_argument
, 0, 'c' },
3797 { "logfacility", required_argument
, 0, 'l' },
3798 { "quit", required_argument
, 0, 'q' },
3799 { "debug", no_argument
, 0, 'd' },
3800 { "showdirectives", no_argument
, 0, 'D' },
3801 { "interval", required_argument
, 0, 'i' },
3802 { "pidfile", required_argument
, 0, 'p' },
3803 { "report", required_argument
, 0, 'r' },
3804 #if defined(_WIN32) || defined(__CYGWIN__)
3805 { "service", no_argument
, 0, 'S' },
3807 { "version", no_argument
, 0, 'V' },
3808 { "license", no_argument
, 0, 'V' },
3809 { "copyright", no_argument
, 0, 'V' },
3810 { "help", no_argument
, 0, 'h' },
3811 { "usage", no_argument
, 0, 'h' },
3819 // Parse input options. This horrible construction is so that emacs
3820 // indents properly. Sorry.
3821 while (-1 != (optchar
=
3822 #ifdef HAVE_GETOPT_LONG
3823 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3825 getopt(argc
, argv
, shortopts
)
3832 if (!(strcmp(optarg
,"nodev"))) {
3834 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3836 } else if (!(strcmp(optarg
,"never"))) {
3838 } else if (!(strcmp(optarg
,"onecheck"))) {
3841 } else if (!(strcmp(optarg
,"showtests"))) {
3844 } else if (!(strcmp(optarg
,"errors"))) {
3851 // set the log facility level
3852 if (!strcmp(optarg
, "daemon"))
3853 facility
=LOG_DAEMON
;
3854 else if (!strcmp(optarg
, "local0"))
3855 facility
=LOG_LOCAL0
;
3856 else if (!strcmp(optarg
, "local1"))
3857 facility
=LOG_LOCAL1
;
3858 else if (!strcmp(optarg
, "local2"))
3859 facility
=LOG_LOCAL2
;
3860 else if (!strcmp(optarg
, "local3"))
3861 facility
=LOG_LOCAL3
;
3862 else if (!strcmp(optarg
, "local4"))
3863 facility
=LOG_LOCAL4
;
3864 else if (!strcmp(optarg
, "local5"))
3865 facility
=LOG_LOCAL5
;
3866 else if (!strcmp(optarg
, "local6"))
3867 facility
=LOG_LOCAL6
;
3868 else if (!strcmp(optarg
, "local7"))
3869 facility
=LOG_LOCAL7
;
3874 // enable debug mode
3878 // print summary of all valid directives
3884 // Period (time interval) for checking
3885 // strtol will set errno in the event of overflow, so we'll check it.
3887 lchecktime
= strtol(optarg
, &tailptr
, 10);
3888 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3891 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3892 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3893 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3896 checktime
= (int)lchecktime
;
3899 // report IOCTL transactions
3904 // split_report_arg() may modify its first argument string, so use a
3905 // copy of optarg in case we want optarg for an error message.
3906 if (!(s
= strdup(optarg
))) {
3907 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3910 if (split_report_arg(s
, &i
)) {
3912 } else if (i
<1 || i
>3) {
3915 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3916 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3918 } else if (!strcmp(s
,"ioctl")) {
3919 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3920 } else if (!strcmp(s
,"ataioctl")) {
3921 con
->reportataioctl
= i
;
3922 } else if (!strcmp(s
,"scsiioctl")) {
3923 con
->reportscsiioctl
= i
;
3927 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3931 // alternate configuration file
3932 if (strcmp(optarg
,"-"))
3933 configfile
=configfile_alt
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3934 else // read from stdin
3935 configfile
=configfile_stdin
;
3938 // output file with PID number
3939 pid_file
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3941 #if defined(_WIN32) || defined(__CYGWIN__)
3943 // running as service
3944 #ifdef __CYGWIN__ // On Windows, option is already handled by daemon_main(), so ignore it
3948 #endif // _WIN32 || __CYGWIN__
3950 // print version and CVS info
3955 // help: print summary of command-line options
3963 // unrecognized option
3966 #ifdef HAVE_GETOPT_LONG
3967 // Point arg to the argument in which this option was found.
3968 arg
= argv
[optind
-1];
3969 // Check whether the option is a long option that doesn't map to -h.
3970 if (arg
[1] == '-' && optchar
!= 'h') {
3971 // Iff optopt holds a valid option then argument must be missing.
3972 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
3973 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
3974 PrintValidArgs(optopt
);
3976 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
3978 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
3983 // Iff optopt holds a valid option then argument must be missing.
3984 if (strchr(shortopts
, optopt
) != NULL
){
3985 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
3986 PrintValidArgs(optopt
);
3988 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
3990 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3997 // Check to see if option had an unrecognized or incorrect argument.
4001 // It would be nice to print the actual option name given by the user
4002 // here, but we just print the short form. Please fix this if you know
4003 // a clean way to do it.
4004 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
4005 PrintValidArgs(optchar
);
4006 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4011 // non-option arguments are not allowed
4012 if (argc
> optind
) {
4015 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
4016 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4020 // no pidfile in debug mode
4021 if (debugmode
&& pid_file
) {
4024 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4025 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
);
4026 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
4036 // Function we call if no configuration file was found or if the
4037 // SCANDIRECTIVE Directive was found. It makes entries for device
4038 // names returned by make_device_names() in os_OSNAME.c
4039 int MakeConfigEntries(const char *type
, int start
){
4042 char** devlist
= NULL
;
4043 cfgfile
*first
=cfgentries
[0],*cfg
=first
;
4045 // Hack! This is to make DEVICESCAN work on Linux libata devices.
4046 // This will work on a general OS if the way that SAT devices are
4047 // named is the same as SCSI devices.
4048 // The BETTER solution is to modify make_device_names to recognize
4049 // the additional type "SAT". This requires changing os_*.cpp.
4051 const char *basetype
= type
;
4052 if (!strcmp(type
,"SAT") )
4055 // make list of devices
4056 if ((num
=make_device_names(&devlist
,basetype
))<0)
4057 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4059 // if no devices, or error constructing list, return
4063 // loop over entries to create
4064 for (i
=0; i
<num
; i
++){
4066 // make storage and copy for all but first entry
4068 // allocate more storage if needed
4069 while (cfgentries_max
<=start
+i
)
4070 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "simulated configuration file device");
4071 cfg
=cfgentries
[start
+i
]=CreateConfigEntry(first
);
4075 if (!strcmp(type
,"ATA") )
4076 cfg
->controller_type
= CONTROLLER_ATA
;
4077 if (!strcmp(type
,"SCSI") )
4078 cfg
->controller_type
= CONTROLLER_SCSI
;
4079 if (!strcmp(type
,"SAT") )
4080 cfg
->controller_type
= CONTROLLER_SAT
;
4082 // remove device name, if it's there, and put in correct one
4083 cfg
->name
=FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
4084 // save pointer to the device name created within
4085 // make_device_names
4086 cfg
->name
=devlist
[i
];
4089 // If needed, free memory used for devlist: pointers now in
4090 // cfgentries[]->names. If num==0 we never get to this point, but
4091 // that's OK. If we realloc()d the array length in
4092 // make_device_names() that was ALREADY equivalent to calling
4094 devlist
= FreeNonZero(devlist
,(sizeof (char*) * num
),__LINE__
, filenameandversion
);
4099 void CanNotRegister(char *name
, char *type
, int line
, int scandirective
){
4100 if( !debugmode
&& scandirective
== 1 ) { return; }
4102 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4103 "Unable to register %s device %s at line %d of file %s\n",
4104 type
, name
, line
, configfile
);
4106 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4111 // Returns negative value (see ParseConfigFile()) if config file
4112 // had errors, else number of entries which may be zero or positive.
4113 // If we found no configuration file, or it contained SCANDIRECTIVE,
4114 // then *scanning is set to 1, else 0.
4115 int ReadOrMakeConfigEntries(int *scanning
){
4118 // deallocate any cfgfile data structures in memory
4119 RmAllConfigEntries();
4121 // parse configuration file configfile (normally /etc/smartd.conf)
4122 if ((entries
=ParseConfigFile())<0) {
4124 // There was an error reading the configuration file.
4125 RmAllConfigEntries();
4127 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4131 // did we find entries or scan?
4134 // no error parsing config file.
4136 // we did not find a SCANDIRECTIVE and did find valid entries
4137 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4139 else if (cfgentries
&& cfgentries
[0]) {
4140 // we found a SCANDIRECTIVE or there was no configuration file so
4141 // scan. Configuration file's first entry contains all options
4143 cfgfile
*first
=cfgentries
[0];
4145 // By default scan for ATA, SCSI and SAT devices
4146 int doata
=1, doscsi
=1, dosat
=1;
4148 if (first
->controller_type
==CONTROLLER_SCSI
) {
4151 } else if (first
->controller_type
==CONTROLLER_ATA
) {
4154 } else if (first
->controller_type
==CONTROLLER_SAT
) {
4162 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4164 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4166 // make config list of ATA devices to search for
4168 entries
+=MakeConfigEntries("ATA", entries
);
4169 // make config list of SCSI devices to search for
4171 entries
+=MakeConfigEntries("SCSI", entries
);
4173 entries
+=MakeConfigEntries("SAT", entries
);
4175 // warn user if scan table found no devices
4177 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4178 // get rid of fake entry with SCANDIRECTIVE as name
4179 RmConfigEntry(cfgentries
, __LINE__
);
4183 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4189 // This function tries devices from cfgentries. Each one that can be
4190 // registered is moved onto the [ata|scsi]devices lists and removed
4191 // from the cfgentries list, else it's memory is deallocated.
4192 void RegisterDevices(int scanning
){
4195 // start by clearing lists/memory of ALL existing devices
4197 numdevata
=numdevscsi
=0;
4200 for (i
=0; i
<cfgentries_max
; i
++){
4202 cfgfile
*ent
=cfgentries
[i
];
4204 // skip any NULL entries (holes)
4208 // register ATA devices
4209 if (ent
->controller_type
!=CONTROLLER_SCSI
&& ent
->controller_type
!=CONTROLLER_CCISS
){
4210 if (ATADeviceScan(ent
, scanning
))
4211 CanNotRegister(ent
->name
, "ATA", ent
->lineno
, scanning
);
4213 // move onto the list of ata devices
4215 while (numdevata
>=atadevlist_max
)
4216 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
4217 atadevlist
[numdevata
++]=ent
;
4221 // then register SCSI devices
4222 if (ent
->controller_type
==CONTROLLER_SCSI
|| ent
->controller_type
==CONTROLLER_CCISS
||
4223 ent
->controller_type
==CONTROLLER_UNKNOWN
){
4227 struct sigaction alarmAction
, defaultaction
;
4229 // Set up an alarm handler to catch USB devices that hang on
4231 alarmAction
.sa_handler
= AlarmHandler
;
4232 alarmAction
.sa_flags
= SA_RESTART
;
4233 if (sigaction(SIGALRM
, &alarmAction
, &defaultaction
)) {
4234 // if we can't set timeout, just scan device
4235 PrintOut(LOG_CRIT
, "Unable to initialize SCSI timeout mechanism.\n");
4236 retscsi
=SCSIDeviceScan(ent
, scanning
);
4239 // prepare return point in case of bad SCSI device
4240 if (setjmp(registerscsienv
))
4241 // SCSI device timed out!
4244 // Set alarm, make SCSI call, reset alarm
4246 retscsi
=SCSIDeviceScan(ent
, scanning
);
4249 if (sigaction(SIGALRM
, &defaultaction
, NULL
)){
4250 PrintOut(LOG_CRIT
, "Unable to clear SCSI timeout mechanism.\n");
4254 retscsi
=SCSIDeviceScan(ent
, scanning
);
4257 // Now scan SCSI device...
4260 PrintOut(LOG_CRIT
, "Device %s timed out (poorly-implemented USB device?)\n", ent
->name
);
4261 CanNotRegister(ent
->name
, "SCSI", ent
->lineno
, scanning
);
4264 // move onto the list of scsi devices
4266 while (numdevscsi
>=scsidevlist_max
)
4267 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
4268 scsidevlist
[numdevscsi
++]=ent
;
4272 // if device is explictly listed and we can't register it, then
4273 // exit unless the user has specified that the device is removable
4274 if (cfgentries
[i
] && !scanning
){
4275 if (ent
->removable
|| quit
==2)
4276 PrintOut(LOG_INFO
, "Device %s not available\n", ent
->name
);
4278 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent
->name
);
4283 // free up memory if device could not be registered
4284 RmConfigEntry(cfgentries
+i
, __LINE__
);
4293 int main(int argc
, char **argv
)
4295 // Windows: internal main function started direct or by service control manager
4296 static int smartd_main(int argc
, char **argv
)
4299 // external control variables for ATA disks
4300 smartmonctrl control
;
4302 // is it our first pass through?
4305 // next time to wake up
4308 // for simplicity, null all global communications variables/lists
4310 memset(con
, 0,sizeof(control
));
4312 // parse input and print header and usage info if needed
4313 ParseOpts(argc
,argv
);
4315 // do we mute printing from ataprint commands?
4316 con
->printing_switchable
=0;
4317 con
->dont_print
=debugmode
?0:1;
4319 // don't exit on bad checksums
4320 con
->checksumfail
=0;
4322 // the main loop of the code
4325 // are we exiting from a signal?
4326 if (caughtsigEXIT
) {
4327 // are we exiting with SIGTERM?
4328 int isterm
=(caughtsigEXIT
==SIGTERM
);
4329 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4330 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4332 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4333 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4335 EXIT(isok
?0:EXIT_SIGNAL
);
4338 // Should we (re)read the config file?
4339 if (firstpass
|| caughtsigHUP
){
4340 int entries
, scanning
=0;
4344 // Workaround for missing SIGQUIT via keyboard on Cygwin
4345 if (caughtsigHUP
==2) {
4346 // Simulate SIGQUIT if another SIGINT arrives soon
4349 if (caughtsigHUP
==2) {
4350 caughtsigEXIT
=SIGQUIT
;
4358 "Signal HUP - rereading configuration file %s\n":
4359 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4363 // clears cfgentries, (re)reads config file, makes >=0 entries
4364 entries
=ReadOrMakeConfigEntries(&scanning
);
4367 // checks devices, then moves onto ata/scsi list or deallocates.
4368 RegisterDevices(scanning
);
4370 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4371 // user has asked to continue on error in configuration file
4373 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4376 // exit with configuration file error status
4377 int status
= (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4381 // Log number of devices we are monitoring...
4382 if (numdevata
+numdevscsi
|| quit
==2 || (quit
==1 && !firstpass
))
4383 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4384 numdevata
, numdevscsi
);
4386 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4391 // user has asked to print test schedule
4392 PrintTestSchedule(atadevlist
, scsidevlist
);
4400 // check all devices once
4401 CheckDevicesOnce(atadevlist
, scsidevlist
);
4403 // user has asked us to exit after first check
4405 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4406 "smartd is exiting (exit status 0)\n");
4410 // fork into background if needed
4411 if (firstpass
&& !debugmode
) {
4413 if (!is_service
) // don't fork() if running as service via cygrunsrv
4418 // set exit and signal handlers, write PID file, set wake-up time
4420 Initialize(&wakeuptime
);
4424 // sleep until next check time, or a signal arrives
4425 wakeuptime
=dosleep(wakeuptime
);
4431 // Main function for Windows
4432 int main(int argc
, char **argv
){
4433 // Options for smartd windows service
4434 static const daemon_winsvc_options svc_opts
= {
4435 "--service", // cmd_opt
4436 "smartd", "SmartD Service", // servicename, displayname
4438 "Controls and monitors storage devices using the Self-Monitoring, "
4439 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4440 "built into ATA and SCSI Hard Drives. "
4443 // daemon_main() handles daemon and service specific commands
4444 // and starts smartd_main() direct, from a new process,
4445 // or via service control manager
4446 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);