2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-7 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
27 // unconditionally included files
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
49 // see which system files to conditionally include
52 // conditionally included files
53 #ifdef HAVE_GETOPT_LONG
62 #pragma warning(disable:4761) // "conversion supplied"
63 typedef unsigned short mode_t
;
66 #include <io.h> // umask()
67 #include <process.h> // getpid()
72 // BOOL WINAPI FreeConsole(void);
73 extern "C" int __stdcall
FreeConsole(void);
74 #include <io.h> // setmode()
77 // locally included files
82 #include "knowndrives.h"
89 #include "hostname_win32.h" // gethost/domainname()
90 #define HAVE_GETHOSTNAME 1
91 #define HAVE_GETDOMAINNAME 1
92 // fork()/signal()/initd simulation for native Windows
93 #include "daemon_win32.h" // daemon_main/detach/signal()
95 #define SIGNALFN daemon_signal
96 #define strsignal daemon_strsignal
97 #define sleep daemon_sleep
98 #undef EXIT // see utility.h
99 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
100 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
101 #define SIGQUIT SIGBREAK
102 #define SIGQUIT_KEYNAME "CONTROL-Break"
105 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
106 #define SIGQUIT_KEYNAME "2x CONTROL-C"
108 #define SIGQUIT_KEYNAME "CONTROL-\\"
112 #if defined (__SVR4) && defined (__sun)
113 extern "C" int getdomainname(char *, int); // no declaration in header files!
116 #define ARGUSED(x) ((void)(x))
118 // These are CVS identification information for *.cpp and *.h files
119 extern const char *atacmdnames_c_cvsid
, *atacmds_c_cvsid
, *ataprint_c_cvsid
, *escalade_c_cvsid
,
120 *knowndrives_c_cvsid
, *os_XXXX_c_cvsid
, *scsicmds_c_cvsid
, *utility_c_cvsid
;
122 static const char *filenameandversion
="$Id: smartd.cpp,v 1.394 2007/11/01 20:53:30 chrfranke Exp $";
123 #ifdef NEED_SOLARIS_ATA_CODE
124 extern const char *os_solaris_ata_s_cvsid
;
127 extern const char *daemon_win32_c_cvsid
, *hostname_win32_c_cvsid
, *syslog_win32_c_cvsid
;
129 const char *smartd_c_cvsid
="$Id: smartd.cpp,v 1.394 2007/11/01 20:53:30 chrfranke Exp $"
130 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
131 #ifdef DAEMON_WIN32_H_CVSID
134 EXTERN_H_CVSID INT64_H_CVSID
135 #ifdef HOSTNAME_WIN32_H_CVSID
136 HOSTNAME_WIN32_H_CVSID
138 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
139 #ifdef SYSLOG_H_CVSID
144 extern const char *reportbug
;
146 // GNU copyleft statement. Needed for GPL purposes.
147 const char *copyleftstring
="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
148 "free software, and you are welcome to redistribute it\n"
149 "under the terms of the GNU General Public License\n"
150 "Version 2. See http://www.gnu.org for further details.\n\n";
152 extern unsigned char debugmode
;
154 // command-line: how long to sleep between checks
155 static int checktime
=CHECKTIME
;
157 // command-line: name of PID file (NULL for no pid file)
158 static char* pid_file
=NULL
;
160 // configuration file name
162 static char* configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
164 static char* configfile
= "./" CONFIGFILENAME
;
166 // configuration file "name" if read from stdin
167 static /*const*/ char * const configfile_stdin
= "<stdin>";
168 // allocated memory for alternate configuration file name
169 static char* configfile_alt
= NULL
;
171 // command-line: when should we exit?
174 // command-line; this is the default syslog(3) log facility to use.
175 static int facility
=LOG_DAEMON
;
178 // command-line: fork into background?
179 static bool do_fork
=true;
182 // used for control of printing, passing arguments to atacmds.c
183 smartmonctrl
*con
=NULL
;
185 // pointers to (real or simulated) entries in configuration file, and
186 // maximum space currently allocated for these entries.
187 cfgfile
**cfgentries
=NULL
;
188 int cfgentries_max
=0;
190 // pointers to ATA and SCSI devices being monitored, maximum and
192 cfgfile
**atadevlist
=NULL
, **scsidevlist
=NULL
;
193 int atadevlist_max
=0, scsidevlist_max
=0;
194 int numdevata
=0, numdevscsi
=0;
196 // track memory usage
197 extern int64_t bytes
;
200 extern int exitstatus
;
202 // set to one if we catch a USR1 (check devices now)
203 volatile int caughtsigUSR1
=0;
206 // set to one if we catch a USR2 (toggle debug mode)
207 volatile int caughtsigUSR2
=0;
210 // set to one if we catch a HUP (reload config file). In debug mode,
211 // set to two, if we catch INT (also reload config file).
212 volatile int caughtsigHUP
=0;
214 // set to signal value if we catch INT, QUIT, or TERM
215 volatile int caughtsigEXIT
=0;
218 // stack environment if we time out during SCSI access (USB devices)
219 jmp_buf registerscsienv
;
222 // tranlate cfg->pending into the correct Attribute numbers
223 void TranslatePending(unsigned short pending
, unsigned char *current
, unsigned char *offline
) {
225 unsigned char curr
= CURR_PEND(pending
);
226 unsigned char off
= OFF_PEND(pending
);
228 // look for special value of CUR_UNC_DEFAULT that means DONT
229 // monitor. 0 means DO test.
230 if (curr
==CUR_UNC_DEFAULT
)
233 curr
=CUR_UNC_DEFAULT
;
235 // look for special value of OFF_UNC_DEFAULT that means DONT
236 // monitor. 0 means DO TEST.
237 if (off
==OFF_UNC_DEFAULT
)
249 // free all memory associated with selftest part of configfile entry. Return NULL
250 testinfo
* FreeTestData(testinfo
*data
){
252 // make sure we have something to do.
256 // free space for text pattern
257 data
->regex
=FreeNonZero(data
->regex
, -1, __LINE__
, filenameandversion
);
259 // free compiled expression
260 regfree(&(data
->cregex
));
262 // make sure that no sign of the compiled expression is left behind
263 // (just in case, to help detect bugs if we ever try and refer to
265 memset(&(data
->cregex
), '0', sizeof(regex_t
));
267 // free remaining memory space
268 data
=FreeNonZero(data
, sizeof(testinfo
), __LINE__
, filenameandversion
);
273 cfgfile
**AllocateMoreSpace(cfgfile
**oldarray
, int *oldsize
, char *listname
){
274 // for now keep BLOCKSIZE small to help detect coding problems.
275 // Perhaps increase in the future.
276 const int BLOCKSIZE
=8;
279 int news
= olds
+ BLOCKSIZE
;
280 cfgfile
**newptr
=(cfgfile
**)realloc(oldarray
, news
*sizeof(cfgfile
*));
282 // did we get more space?
285 // clear remaining entries ala calloc()
286 for (i
=olds
; i
<news
; i
++)
289 bytes
+= BLOCKSIZE
*sizeof(cfgfile
*);
294 PrintOut(LOG_INFO
, "allocating %d slots for %s\n", BLOCKSIZE
, listname
);
300 PrintOut(LOG_CRIT
, "out of memory for allocating %s list\n", listname
);
304 void PrintOneCVS(const char *a_cvs_id
){
306 printone(out
,a_cvs_id
);
307 PrintOut(LOG_INFO
,"%s",out
);
311 // prints CVS identity information for the executable
313 const char *configargs
=strlen(SMARTMONTOOLS_CONFIGURE_ARGS
)?SMARTMONTOOLS_CONFIGURE_ARGS
:"[no arguments given]";
315 PrintOut(LOG_INFO
,(char *)copyleftstring
);
316 PrintOut(LOG_INFO
,"CVS version IDs of files used to build this code are:\n");
317 PrintOneCVS(atacmdnames_c_cvsid
);
318 PrintOneCVS(atacmds_c_cvsid
);
319 PrintOneCVS(ataprint_c_cvsid
);
321 PrintOneCVS(daemon_win32_c_cvsid
);
324 PrintOneCVS(hostname_win32_c_cvsid
);
326 PrintOneCVS(knowndrives_c_cvsid
);
327 PrintOneCVS(os_XXXX_c_cvsid
);
328 #ifdef NEED_SOLARIS_ATA_CODE
329 PrintOneCVS( os_solaris_ata_s_cvsid
);
331 PrintOneCVS(scsicmds_c_cvsid
);
332 PrintOneCVS(smartd_c_cvsid
);
334 PrintOneCVS(syslog_win32_c_cvsid
);
336 PrintOneCVS(utility_c_cvsid
);
337 PrintOut(LOG_INFO
, "\nsmartmontools release " PACKAGE_VERSION
" dated " SMARTMONTOOLS_RELEASE_DATE
" at " SMARTMONTOOLS_RELEASE_TIME
"\n");
338 PrintOut(LOG_INFO
, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST
"\n");
339 PrintOut(LOG_INFO
, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE
"\n");
340 PrintOut(LOG_INFO
, "smartd compile dated " __DATE__
" at "__TIME__
"\n");
341 PrintOut(LOG_INFO
, "smartmontools configure arguments: %s\n", configargs
);
345 // Removes config file entry, freeing all memory
346 void RmConfigEntry(cfgfile
**anentry
, int whatline
){
350 // pointer should never be null!
352 PrintOut(LOG_CRIT
,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
353 whatline
, filenameandversion
, reportbug
);
357 // only remove entries that exist!
361 // entry exists -- free all of its memory
362 cfg
->name
= FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
363 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
364 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
365 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
366 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
368 cfg
->mailwarn
->address
= FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
369 cfg
->mailwarn
->emailcmdline
= FreeNonZero(cfg
->mailwarn
->emailcmdline
, -1,__LINE__
,filenameandversion
);
370 cfg
->mailwarn
= FreeNonZero(cfg
->mailwarn
, sizeof(maildata
),__LINE__
,filenameandversion
);
372 cfg
->testdata
= FreeTestData(cfg
->testdata
);
373 *anentry
= FreeNonZero(cfg
, sizeof(cfgfile
),__LINE__
,filenameandversion
);
378 // deallocates all memory associated with cfgentries list
379 void RmAllConfigEntries(){
382 for (i
=0; i
<cfgentries_max
; i
++)
383 RmConfigEntry(cfgentries
+i
, __LINE__
);
385 cfgentries
=FreeNonZero(cfgentries
, sizeof(cfgfile
*)*cfgentries_max
, __LINE__
, filenameandversion
);
391 // deallocates all memory associated with ATA/SCSI device lists
392 void RmAllDevEntries(){
395 for (i
=0; i
<atadevlist_max
; i
++)
396 RmConfigEntry(atadevlist
+i
, __LINE__
);
398 atadevlist
=FreeNonZero(atadevlist
, sizeof(cfgfile
*)*atadevlist_max
, __LINE__
, filenameandversion
);
401 for (i
=0; i
<scsidevlist_max
; i
++)
402 RmConfigEntry(scsidevlist
+i
, __LINE__
);
404 scsidevlist
=FreeNonZero(scsidevlist
, sizeof(cfgfile
*)*scsidevlist_max
, __LINE__
, filenameandversion
);
410 // remove the PID file
411 void RemovePidFile(){
413 if ( -1==unlink(pid_file
) )
414 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
415 pid_file
, strerror(errno
));
416 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
422 // Note if we catch a SIGUSR1
423 void USR1handler(int sig
){
430 // Note if we catch a SIGUSR2
431 void USR2handler(int sig
){
438 // Note if we catch a HUP (or INT in debug mode)
439 void HUPhandler(int sig
){
447 // signal handler for TERM, QUIT, and INT (if not in debug mode)
448 void sighandler(int sig
){
455 // signal handler that prints Goodbye message and removes pidfile
458 // clean up memory -- useful for debugging
459 RmAllConfigEntries();
462 // delete PID file, if one was created
465 // remove alternate configfile name
466 configfile_alt
=FreeNonZero(configfile_alt
, -1,__LINE__
,filenameandversion
);
468 // useful for debugging -- have we managed memory correctly?
469 if (debugmode
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
470 PrintOut(LOG_INFO
, "Memory still allocated for devices at exit is %" PRId64
" bytes.\n", bytes
);
472 // if we are exiting because of a code bug, tell user
473 if (exitstatus
==EXIT_BADCODE
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
474 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
476 if (exitstatus
==0 && bytes
)
477 exitstatus
=EXIT_BADCODE
;
479 // and this should be the final output from smartd before it exits
480 PrintOut(exitstatus
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", exitstatus
);
485 #define ENVLENGTH 1024
487 // a replacement for setenv() which is not available on all platforms.
488 // Note that the string passed to putenv must not be freed or made
489 // invalid, since a pointer to it is kept by putenv(). This means that
490 // it must either be a static buffer or allocated off the heap. The
491 // string can be freed if the environment variable is redefined or
492 // deleted via another call to putenv(). So we keep these on the stack
493 // as long as the popen() call is underway.
494 int exportenv(char* stackspace
, const char *name
, const char *value
){
495 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
496 return putenv(stackspace
);
499 char* dnsdomain(const char* hostname
) {
501 #ifdef HAVE_GETHOSTBYNAME
504 if ((hp
= gethostbyname(hostname
))) {
505 // Does this work if gethostbyname() returns an IPv6 name in
506 // colon/dot notation? [BA]
507 if ((p
= strchr(hp
->h_name
, '.')))
518 // If either address or executable path is non-null then send and log
519 // a warning email, or execute executable
520 void MailWarning(cfgfile
*cfg
, int which
, char *fmt
, ...){
521 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
522 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
523 char environ_strings
[11][ENVLENGTH
];
526 const int day
=24*3600;
534 "FailedHealthCheck", // 5
535 "FailedReadSmartData", // 6
536 "FailedReadSmartErrorLog", // 7
537 "FailedReadSmartSelfTestLog", // 8
538 "FailedOpenDevice", // 9
539 "CurrentPendingSector", // 10
540 "OfflineUncorrectableSector", // 11
544 char *address
, *executable
;
546 maildata
* data
=cfg
->mailwarn
;
550 char stdinbuf
[1024]; int boxmsgoffs
, boxtype
;
552 const char *newadd
=NULL
, *newwarn
=NULL
;
553 const char *unknown
="[Unknown]";
555 // See if user wants us to send mail
559 address
=data
->address
;
560 executable
=data
->emailcmdline
;
562 if (!address
&& !executable
)
565 // which type of mail are we sending?
566 mail
=(data
->maillog
)+which
;
569 if (data
->emailfreq
<1 || data
->emailfreq
>3) {
570 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data
->emailfreq
);
573 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
574 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
575 which
, (int)sizeof(whichfail
));
579 // Return if a single warning mail has been sent.
580 if ((data
->emailfreq
==1) && mail
->logged
)
583 // Return if this is an email test and one has already been sent.
584 if (which
== 0 && mail
->logged
)
587 // To decide if to send mail, we need to know what time it is.
590 // Return if less than one day has gone by
591 if (data
->emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
594 // Return if less than 2^(logged-1) days have gone by
595 if (data
->emailfreq
==3 && mail
->logged
){
596 days
=0x01<<(mail
->logged
-1);
598 if (epoch
<(mail
->lastsent
+days
))
602 // record the time of this mail message, and the first mail message
604 mail
->firstsent
=epoch
;
605 mail
->lastsent
=epoch
;
607 // get system host & domain names (not null terminated if length=MAX)
608 #ifdef HAVE_GETHOSTNAME
609 if (gethostname(hostname
, 256))
610 strcpy(hostname
, unknown
);
614 p
= dnsdomain(hostname
);
616 strncpy(domainname
, p
, 255);
617 domainname
[255]='\0';
619 strcpy(domainname
, unknown
);
622 strcpy(hostname
, unknown
);
623 strcpy(domainname
, unknown
);
626 #ifdef HAVE_GETDOMAINNAME
627 if (getdomainname(nisdomain
, 256))
628 strcpy(nisdomain
, unknown
);
632 strcpy(nisdomain
, unknown
);
635 // print warning string into message
637 vsnprintf(message
, 256, fmt
, ap
);
640 // appropriate message about further information
641 additional
[0]=original
[0]=further
[0]='\0';
643 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
645 switch (data
->emailfreq
){
647 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
650 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
653 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
654 (0x01)<<mail
->logged
);
657 if (data
->emailfreq
>1 && mail
->logged
){
658 dateandtimezoneepoch(dates
, mail
->firstsent
);
659 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
663 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
665 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
667 #ifdef DEFAULT_MAILER
668 executable
= DEFAULT_MAILER
;
673 executable
= "blat"; // http://blat.sourceforge.net/
677 // make a private copy of address with commas replaced by spaces
678 // to separate recipients
680 address
=CustomStrDup(data
->address
, 1, __LINE__
, filenameandversion
);
681 #ifndef _WIN32 // blat mailer needs comma
684 while ((comma
=strchr(comma
, ',')))
690 // Export information in environment variables that will be useful
692 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
693 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
694 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
695 dateandtimezoneepoch(dates
, mail
->firstsent
);
696 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
697 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
698 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
699 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
701 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
);
702 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
->name
);
704 switch (cfg
->controller_type
) {
705 case CONTROLLER_3WARE_678K
:
706 case CONTROLLER_3WARE_9000_CHAR
:
707 case CONTROLLER_3WARE_678K_CHAR
:
709 char *s
,devicetype
[16];
710 sprintf(devicetype
, "3ware,%d", cfg
->controller_port
-1);
711 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
712 if ((s
=strchr(cfg
->name
, ' ')))
714 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
719 case CONTROLLER_CCISS
:
721 char *s
,devicetype
[16];
722 sprintf(devicetype
, "cciss,%d", cfg
->controller_port
-1);
723 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
724 if ((s
=strchr(cfg
->name
, ' ')))
726 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
732 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "ata");
733 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
735 case CONTROLLER_MARVELL_SATA
:
736 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "marvell");
737 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
739 case CONTROLLER_SCSI
:
740 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "scsi");
741 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
744 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "sat");
745 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
749 char *s
,devicetype
[16];
750 sprintf(devicetype
, "hpt,%d/%d/%d", cfg
->hpt_data
[0],
751 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
752 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
753 if ((s
=strchr(cfg
->name
, ' ')))
755 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
762 snprintf(fullmessage
, 1024,
763 "This email was generated by the smartd daemon running on:\n\n"
766 " NIS domain: %s\n\n"
767 "The following warning/error was logged by the smartd daemon:\n\n"
769 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
771 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
772 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
774 // now construct a command to send this as EMAIL
777 snprintf(command
, 2048,
778 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
779 "%sENDMAIL\n", subject
, address
, fullmessage
);
781 snprintf(command
, 2048, "%s 2>&1", executable
);
783 // tell SYSLOG what we are about to do...
784 newadd
=address
?address
:"<nomailer>";
785 newwarn
=which
?"Warning via":"Test of";
787 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
788 which
?"Sending warning via":"Executing test of", executable
, newadd
);
790 // issue the command to send mail or to run the user's executable
792 if (!(pfp
=popen(command
, "r")))
793 // failed to popen() mail process
794 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
795 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
799 char buffer
[EBUFLEN
];
801 // if unexpected output on stdout/stderr, null terminate, print, and flush
802 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
804 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
806 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
807 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
809 // flush pipe if needed
810 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
813 // tell user that pipe was flushed, or that something is really wrong
814 if (count
&& count
<EBUFLEN
)
815 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
816 newwarn
, executable
, newadd
);
818 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
819 newwarn
, executable
, newadd
);
822 // if something went wrong with mail process, print warning
824 if (-1==(status
=pclose(pfp
)))
825 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
826 errno
?strerror(errno
):"");
828 // mail process apparently succeeded. Check and report exit status
831 if (WIFEXITED(status
)) {
832 // exited 'normally' (but perhaps with nonzero status)
833 status8
=WEXITSTATUS(status
);
836 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
837 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
839 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
840 newwarn
, executable
, newadd
, status
, status8
);
842 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
845 if (WIFSIGNALED(status
))
846 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
847 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
849 // this branch is probably not possible. If subprocess is
850 // stopped then pclose() should not return.
851 if (WIFSTOPPED(status
))
852 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
853 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
860 // No "here-documents" on Windows, so must use separate commandline and stdin
861 command
[0] = stdinbuf
[0] = 0;
862 boxtype
= -1; boxmsgoffs
= 0;
863 newadd
= "<nomailer>";
865 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
866 int addroffs
= (!strncmp(address
, "sys", 3) ? 3 : 0);
867 if (!strncmp(address
+addroffs
, "msgbox", 6) && (!address
[addroffs
+6] || address
[addroffs
+6] == ',')) {
868 boxtype
= (addroffs
> 0 ? 1 : 0);
870 if (address
[addroffs
])
876 if (address
[addroffs
]) {
877 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
878 snprintf(command
, sizeof(command
),
879 "%s - -q -subject \"%s\" -to \"%s\"",
880 executable
, subject
, address
+addroffs
);
881 newadd
= address
+addroffs
;
883 // Message for mail [0...] and messagebox [boxmsgoffs...]
884 snprintf(stdinbuf
, sizeof(stdinbuf
),
885 "This email was generated by the smartd daemon running on:\n\n"
888 // " NIS domain: %s\n"
890 "The following warning/error was logged by the smartd daemon:\n\n"
892 "For details see the event log or log file of smartd.\n\n"
895 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
898 snprintf(command
, sizeof(command
), "%s", executable
);
900 newwarn
=which
?"Warning via":"Test of";
903 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
904 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
907 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
910 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
911 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
912 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
913 if (rc
>= 0 && stdoutbuf
[0])
914 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
915 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
917 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
918 newwarn
, executable
, newadd
, rc
);
920 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
925 // increment mail sent counter
928 // free copy of address (without commas)
929 address
=FreeNonZero(address
, -1, __LINE__
, filenameandversion
);
934 // Printing function for watching ataprint commands, or losing them
935 // [From GLIBC Manual: Since the prototype doesn't specify types for
936 // optional arguments, in a call to a variadic function the default
937 // argument promotions are performed on the optional argument
938 // values. This means the objects of type char or short int (whether
939 // signed or not) are promoted to either int or unsigned int, as
941 void pout(const char *fmt
, ...){
944 // get the correct time in syslog()
945 FixGlibcTimeZoneBug();
946 // initialize variable argument list
948 // in debug==1 mode we will print the output from the ataprint.o functions!
949 if (debugmode
&& debugmode
!=2)
951 if (facility
== LOG_LOCAL1
) // logging to stdout
952 vfprintf(stderr
,fmt
,ap
);
956 // in debug==2 mode we print output from knowndrives.o functions
957 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
|| con
->controller_port
) {
958 openlog("smartd", LOG_PID
, facility
);
959 vsyslog(LOG_INFO
, fmt
, ap
);
967 // This function prints either to stdout or to the syslog as needed.
968 // This function is also used by utility.cpp to report LOG_CRIT errors.
969 void PrintOut(int priority
, const char *fmt
, ...){
972 // get the correct time in syslog()
973 FixGlibcTimeZoneBug();
974 // initialize variable argument list
978 if (facility
== LOG_LOCAL1
) // logging to stdout
979 vfprintf(stderr
,fmt
,ap
);
984 openlog("smartd", LOG_PID
, facility
);
985 vsyslog(priority
,fmt
,ap
);
992 // Forks new process, closes ALL file descriptors, redirects stdin,
993 // stdout, and stderr. Not quite daemon(). See
994 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
995 // for a good description of why we do things this way.
1001 // flush all buffered streams. Else we might get two copies of open
1002 // streams since both parent and child get copies of the buffers.
1006 if ((pid
=fork()) < 0) {
1008 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1012 // we are the parent process -- exit cleanly
1015 // from here on, we are the child process.
1018 // Fork one more time to avoid any possibility of having terminals
1019 if ((pid
=fork()) < 0) {
1021 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1025 // we are the parent process -- exit cleanly
1028 // Now we are the child's child...
1031 // close any open file descriptors
1032 for (i
=getdtablesize();i
>=0;--i
)
1036 // Cygwin's setsid() does not detach the process from Windows console
1038 #endif // __CYGWIN__
1040 // redirect any IO attempts to /dev/null for stdin
1041 i
=open("/dev/null",O_RDWR
);
1050 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1054 // No fork() on native Win32
1055 // Detach this process from console
1057 if (daemon_detach("smartd")) {
1058 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1061 // stdin/out/err now closed if not redirected
1067 // create a PID file containing the current process id
1068 void WritePidFile() {
1071 pid_t pid
= getpid();
1076 old_umask
= umask(0077); // rwx------
1078 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1079 old_umask
= umask(0033); // rwxr--r--
1081 fp
= fopen(pid_file
, "w");
1085 } else if (fprintf(fp
, "%d\n", (int)pid
) <= 0) {
1087 } else if (fclose(fp
) != 0) {
1091 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
);
1094 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
, (int)pid
);
1099 // Prints header identifying version of code and home
1101 #ifdef HAVE_GET_OS_VERSION_STR
1102 const char * ver
= get_os_version_str();
1104 const char * ver
= SMARTMONTOOLS_BUILD_HOST
;
1106 PrintOut(LOG_INFO
,"smartd version %s [%s] Copyright (C) 2002-7 Bruce Allen\n", PACKAGE_VERSION
, ver
);
1107 PrintOut(LOG_INFO
,"Home page is " PACKAGE_HOMEPAGE
"\n\n");
1111 // prints help info for configuration file Directives
1114 "Configuration file (%s) Directives (after device name):\n"
1115 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N, cciss,N\n"
1116 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1117 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1118 " -S VAL Enable/disable attribute autosave (on/off)\n"
1119 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1120 " -H Monitor SMART Health Status, report if failed\n"
1121 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1122 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1123 " -f Monitor 'Usage' Attributes, report failures\n"
1124 " -m ADD Send email warning to address ADD\n"
1125 " -M TYPE Modify email warning behavior (see man page)\n"
1126 " -p Report changes in 'Prefailure' Attributes\n"
1127 " -u Report changes in 'Usage' Attributes\n"
1128 " -t Equivalent to -p and -u Directives\n"
1129 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1130 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1131 " -i ID Ignore Attribute ID for -f Directive\n"
1132 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1133 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1134 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1135 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1136 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1137 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1138 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1139 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1140 " # Comment: text after a hash sign is ignored\n"
1141 " \\ Line continuation character\n"
1142 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1143 "Use ID = 0 to turn off -C and/or -U Directives\n"
1144 "Example: /dev/hda -a\n",
1149 /* Returns a pointer to a static string containing a formatted list of the valid
1150 arguments to the option opt or NULL on failure. */
1151 const char *GetValidArgList(char opt
) {
1154 return "<FILE_NAME>, -";
1156 return "valid_regular_expression";
1158 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1160 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1162 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1164 return "<FILE_NAME>";
1166 return "<INTEGER_SECONDS>";
1172 /* prints help information for command syntax */
1174 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1175 #ifdef HAVE_GETOPT_LONG
1176 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1177 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1178 PrintOut(LOG_INFO
," -d, --debug\n");
1179 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1180 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1181 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1182 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1183 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1184 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1185 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1186 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1188 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1190 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1193 PrintOut(LOG_INFO
," -n, --no-fork\n");
1194 PrintOut(LOG_INFO
," Do not fork into background\n\n");
1196 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1197 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1198 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1199 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1200 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1201 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1203 PrintOut(LOG_INFO
," --service\n");
1204 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1205 PrintOut(LOG_INFO
," smartd install [options]\n");
1206 PrintOut(LOG_INFO
," Remove service with:\n");
1207 PrintOut(LOG_INFO
," smartd remove\n\n");
1209 #endif // _WIN32 || __CYGWIN__
1210 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1211 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1213 PrintOut(LOG_INFO
," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile
);
1214 PrintOut(LOG_INFO
," -d Start smartd in debug mode\n");
1215 PrintOut(LOG_INFO
," -D Print the configuration file Directives and exit\n");
1216 PrintOut(LOG_INFO
," -h Display this help and exit\n");
1217 PrintOut(LOG_INFO
," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1218 PrintOut(LOG_INFO
," -l local? Use syslog facility local0 - local7, or daemon\n");
1219 PrintOut(LOG_INFO
," -n Do not fork into background\n");
1220 PrintOut(LOG_INFO
," -p NAME Write PID file NAME\n");
1221 PrintOut(LOG_INFO
," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1222 PrintOut(LOG_INFO
," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1223 PrintOut(LOG_INFO
," -V Print License, Copyright, and version information\n");
1227 // returns negative if problem, else fd>=0
1228 static int OpenDevice(char *device
, char *mode
, int scanning
) {
1232 // If there is an ASCII "space" character in the device name,
1233 // terminate string there. This is for 3ware and highpoint devices only.
1234 if ((s
=strchr(device
,' ')))
1238 fd
= deviceopen(device
, mode
);
1240 // if we removed a space, put it back in please
1244 // if we failed to open the device, complain!
1247 // For linux+devfs, a nonexistent device gives a strange error
1248 // message. This makes the error message a bit more sensible.
1249 // If no debug and scanning - don't print errors
1250 if (debugmode
|| !scanning
) {
1251 if (errno
==ENOENT
|| errno
==ENOTDIR
)
1254 PrintOut(LOG_INFO
,"Device: %s, %s, open() failed\n",
1255 device
, strerror(errno
));
1259 // device opened sucessfully
1263 int CloseDevice(int fd
, char *name
){
1264 if (deviceclose(fd
)){
1265 PrintOut(LOG_INFO
,"Device: %s, %s, close(%d) failed\n", name
, strerror(errno
), fd
);
1268 // device sucessfully closed
1272 // returns <0 on failure
1273 int ATAErrorCount(int fd
, char *name
){
1274 struct ata_smart_errorlog log
;
1276 if (-1==ataReadErrorLog(fd
,&log
)){
1277 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1281 // return current number of ATA errors
1282 return log
.error_log_pointer
?log
.ata_error_count
:0;
1285 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1286 // error count, and top bits are the power-on hours of the last error.
1287 int SelfTestErrorCount(int fd
, char *name
){
1288 struct ata_smart_selftestlog log
;
1290 if (-1==ataReadSelfTestLog(fd
,&log
)){
1291 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1295 // return current number of self-test errors
1296 return ataPrintSmartSelfTestlog(&log
,0);
1299 // scan to see what ata devices there are, and if they support SMART
1300 int ATADeviceScan(cfgfile
*cfg
, int scanning
){
1301 int fd
, supported
=0;
1302 struct ata_identify_device drive
;
1303 char *name
=cfg
->name
;
1304 int retainsmartdata
=0;
1308 // should we try to register this as an ATA device?
1309 switch (cfg
->controller_type
) {
1310 case CONTROLLER_ATA
:
1311 case CONTROLLER_3WARE_678K
:
1312 case CONTROLLER_MARVELL_SATA
:
1313 case CONTROLLER_HPT
:
1314 case CONTROLLER_UNKNOWN
:
1317 case CONTROLLER_3WARE_678K_CHAR
:
1318 mode
="ATA_3WARE_678K";
1320 case CONTROLLER_3WARE_9000_CHAR
:
1321 mode
="ATA_3WARE_9000";
1323 case CONTROLLER_SAT
:
1327 // not a recognized ATA or SATA device. We should never enter
1333 if ((fd
=OpenDevice(name
, mode
, scanning
))<0)
1334 // device open failed
1336 PrintOut(LOG_INFO
,"Device: %s, opened\n", name
);
1338 // pass user settings on to low-level ATA commands
1339 con
->controller_port
=cfg
->controller_port
;
1340 con
->hpt_data
[0]=cfg
->hpt_data
[0];
1341 con
->hpt_data
[1]=cfg
->hpt_data
[1];
1342 con
->hpt_data
[2]=cfg
->hpt_data
[2];
1343 con
->controller_type
=cfg
->controller_type
;
1344 con
->controller_explicit
=cfg
->controller_explicit
;
1345 con
->fixfirmwarebug
= cfg
->fixfirmwarebug
;
1346 con
->satpassthrulen
= cfg
->satpassthrulen
;
1348 // Get drive identity structure
1349 if ((retid
=ataReadHDIdentity (fd
,&drive
))){
1351 // Unable to read Identity structure
1352 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1354 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1355 name
, packetdevicetype(retid
-1));
1356 CloseDevice(fd
, name
);
1360 // Show if device in database, and use preset vendor attribute
1361 // options unless user has requested otherwise.
1362 if (cfg
->ignorepresets
)
1363 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1365 // do whatever applypresets decides to do. Will allocate memory if
1366 // cfg->attributedefs is needed.
1367 if (applypresets(&drive
, &cfg
->attributedefs
, con
)<0)
1368 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1370 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1372 // then save the correct state of the flag (applypresets may have changed it)
1373 cfg
->fixfirmwarebug
= con
->fixfirmwarebug
;
1376 // If requested, show which presets would be used for this drive
1377 if (cfg
->showpresets
) {
1378 int savedebugmode
=debugmode
;
1379 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1382 showpresets(&drive
);
1383 debugmode
=savedebugmode
;
1386 // see if drive supports SMART
1387 supported
=ataSmartSupport(&drive
);
1390 // drive does NOT support SMART
1391 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1393 // can't tell if drive supports SMART
1394 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1396 // should we proceed anyway?
1397 if (cfg
->permissive
){
1398 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1401 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1402 CloseDevice(fd
, name
);
1407 if (ataEnableSmart(fd
)){
1408 // Enable SMART command has failed
1409 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1410 CloseDevice(fd
, name
);
1414 // disable device attribute autosave...
1415 if (cfg
->autosave
==1){
1416 if (ataDisableAutoSave(fd
))
1417 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1419 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1422 // or enable device attribute autosave
1423 if (cfg
->autosave
==2){
1424 if (ataEnableAutoSave(fd
))
1425 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1427 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1430 // capability check: SMART status
1431 if (cfg
->smartcheck
&& ataSmartStatus2(fd
)==-1){
1432 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1436 // capability check: Read smart values and thresholds. Note that
1437 // smart values are ALSO needed even if we ONLY want to know if the
1438 // device is self-test log or error-log capable! After ATA-5, this
1439 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1440 // but sadly not for ATA-5. Sigh.
1442 // do we need to retain SMART data after returning from this routine?
1443 retainsmartdata
=cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
;
1445 // do we need to get SMART data?
1446 if (retainsmartdata
|| cfg
->autoofflinetest
|| cfg
->selftest
|| cfg
->errorlog
|| cfg
->pending
!=DONT_MONITOR_UNC
) {
1448 unsigned char currentpending
, offlinepending
;
1450 cfg
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
));
1451 cfg
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
));
1453 if (!cfg
->smartval
|| !cfg
->smartthres
){
1454 PrintOut(LOG_CRIT
,"Not enough memory to obtain SMART data\n");
1458 if (ataReadSmartValues(fd
,cfg
->smartval
) ||
1459 ataReadSmartThresholds (fd
,cfg
->smartthres
)){
1460 PrintOut(LOG_INFO
,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name
);
1461 retainsmartdata
=cfg
->usagefailed
=cfg
->prefail
=cfg
->usage
=0;
1462 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1463 cfg
->pending
=DONT_MONITOR_UNC
;
1466 // see if the necessary Attribute is there to monitor offline or
1467 // current pending sectors or temperature
1468 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
1470 if (currentpending
&& ATAReturnAttributeRawValue(currentpending
, cfg
->smartval
)<0) {
1471 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1472 name
, (int)currentpending
);
1473 cfg
->pending
&= 0xff00;
1474 cfg
->pending
|= CUR_UNC_DEFAULT
;
1477 if (offlinepending
&& ATAReturnAttributeRawValue(offlinepending
, cfg
->smartval
)<0) {
1478 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1479 name
, (int)offlinepending
);
1480 cfg
->pending
&= 0x00ff;
1481 cfg
->pending
|= OFF_UNC_DEFAULT
<<8;
1484 if ( (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
1485 && !ATAReturnTemperatureValue(cfg
->smartval
, cfg
->attributedefs
)) {
1486 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1487 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1491 // enable/disable automatic on-line testing
1492 if (cfg
->autoofflinetest
){
1493 // is this an enable or disable request?
1494 const char *what
=(cfg
->autoofflinetest
==1)?"disable":"enable";
1496 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1498 // if command appears unsupported, issue a warning...
1499 if (!isSupportAutomaticTimer(cfg
->smartval
))
1500 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1501 // ... but then try anyway
1502 if ((cfg
->autoofflinetest
==1)?ataDisableAutoOffline(fd
):ataEnableAutoOffline(fd
))
1503 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1505 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1509 // capability check: self-test-log
1513 // start with service disabled, and re-enable it if all works OK
1515 cfg
->selflogcount
=0;
1519 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1520 else if (!cfg
->permissive
&& !isSmartTestLogCapable(cfg
->smartval
, &drive
))
1521 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1522 else if ((retval
=SelfTestErrorCount(fd
, name
))<0)
1523 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1526 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1527 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1531 // capability check: ATA error log
1535 // start with service disabled, and re-enable it if all works OK
1537 cfg
->ataerrorcount
=0;
1540 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1541 else if (!cfg
->permissive
&& !isSmartErrorLogCapable(cfg
->smartval
, &drive
))
1542 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1543 else if ((val
=ATAErrorCount(fd
, name
))<0)
1544 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1547 cfg
->ataerrorcount
=val
;
1551 // If we don't need to save SMART data, get rid of it now
1552 if (!retainsmartdata
) {
1553 if (cfg
->smartval
) {
1554 cfg
->smartval
=CheckFree(cfg
->smartval
, __LINE__
,filenameandversion
);
1555 bytes
-=sizeof(struct ata_smart_values
);
1557 if (cfg
->smartthres
) {
1558 cfg
->smartthres
=CheckFree(cfg
->smartthres
, __LINE__
,filenameandversion
);
1559 bytes
-=sizeof(struct ata_smart_thresholds_pvt
);
1563 // capabilities check -- does it support powermode?
1564 if (cfg
->powermode
) {
1565 int powermode
=ataCheckPowerMode(fd
);
1567 if (-1 == powermode
) {
1568 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1571 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1572 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1578 // If no tests available or selected, return
1579 if (!(cfg
->errorlog
|| cfg
->selftest
|| cfg
->smartcheck
||
1580 cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
||
1581 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
1582 CloseDevice(fd
, name
);
1586 // Do we still have entries available?
1587 while (numdevata
>=atadevlist_max
)
1588 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
1591 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1593 // record number of device, type of device, increment device count
1594 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1595 cfg
->controller_type
=CONTROLLER_ATA
;
1597 // close file descriptor
1598 CloseDevice(fd
, name
);
1602 // Returns 0 if normal SCSI device. Returns -1 if INQUIRY fails.
1603 // Returns 2 if ATA device detected behind SAT layer.
1604 // Returns 1 if other device detected that we don't want to treat
1605 // as a normal SCSI device.
1606 static int SCSIFilterKnown(int fd
, char * device
)
1609 int req_len
, avail_len
, len
;
1611 memset(req_buff
, 0, 96);
1613 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1614 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1615 /* watch this spot ... other devices could lock up here */
1617 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1618 PrintOut(LOG_INFO
, "Device: %s, failed on INQUIRY; skip device\n", device
);
1619 // device doesn't like INQUIRY commands
1623 avail_len
= req_buff
[4] + 5;
1624 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1626 if (0 == strncmp(req_buff
+ 8, "3ware", 5) || 0 == strncmp(req_buff
+ 8, "AMCC", 4) ) {
1627 PrintOut(LOG_INFO
, "Device %s, please try adding '-d 3ware,N'\n", device
);
1628 PrintOut(LOG_INFO
, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device
, device
);
1630 } else if ((len
>= 42) && (0 == strncmp(req_buff
+ 36, "MVSATA", 6))) {
1631 PrintOut(LOG_INFO
, "Device %s, please try '-d marvell'\n", device
);
1633 } else if ((avail_len
>= 36) &&
1634 (0 == strncmp(req_buff
+ 8, "ATA ", 8)) &&
1635 has_sat_pass_through(fd
, 0 /* non-packet dev */)) {
1637 PrintOut(LOG_INFO
, "Device %s: ATA disk detected behind SAT layer\n",
1639 PrintOut(LOG_INFO
, " Try adding '-d sat' to the device line in the "
1640 "smartd.conf file.\n");
1641 PrintOut(LOG_INFO
, " For example: '%s -a -d sat'\n", device
);
1648 // on success, return 0. On failure, return >0. Never return <0,
1650 static int SCSIDeviceScan(cfgfile
*cfg
, int scanning
) {
1652 char *device
= cfg
->name
;
1653 struct scsi_iec_mode_page iec
;
1657 // should we try to register this as a SCSI device?
1658 switch (cfg
->controller_type
) {
1659 case CONTROLLER_SCSI
:
1660 case CONTROLLER_UNKNOWN
:
1663 case CONTROLLER_CCISS
:
1669 // pass user settings on to low-level SCSI commands
1670 con
->controller_port
=cfg
->controller_port
;
1671 con
->controller_type
=cfg
->controller_type
;
1674 if ((fd
= OpenDevice(device
, mode
, scanning
)) < 0)
1676 PrintOut(LOG_INFO
,"Device: %s, opened\n", device
);
1678 // early skip if device known and needs to be handled by some other
1679 // device type (e.g. '-d 3ware,<n>')
1680 if (SCSIFilterKnown(fd
, device
)) {
1681 CloseDevice(fd
, device
);
1685 // check that device is ready for commands. IE stores its stuff on
1687 if ((err
= scsiTestUnitReady(fd
))) {
1688 if (SIMPLE_ERR_NOT_READY
== err
)
1689 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1690 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1691 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1692 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1693 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1695 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1696 CloseDevice(fd
, device
);
1700 // Badly-conforming USB storage devices may fail this check.
1701 // The response to the following IE mode page fetch (current and
1702 // changeable values) is carefully examined. It has been found
1703 // that various USB devices that malform the response will lock up
1704 // if asked for a log page (e.g. temperature) so it is best to
1706 if (!(err
= scsiFetchIECmpage(fd
, &iec
, cfg
->modese_len
)))
1707 cfg
->modese_len
= iec
.modese_len
;
1708 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1709 ; /* continue since it is reasonable not to support IE mpage */
1710 else { /* any other error (including malformed response) unreasonable */
1712 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1714 CloseDevice(fd
, device
);
1718 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1719 // smart if it is off). This may change to be the same as the ATA side.
1720 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1721 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1722 "Try 'smartctl -s on %s' to turn on SMART features\n",
1724 CloseDevice(fd
, device
);
1728 // Device exists, and does SMART. Add to list (allocating more space if needed)
1729 while (numdevscsi
>= scsidevlist_max
)
1730 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
1732 // Flag that certain log pages are supported (information may be
1733 // available from other sources).
1734 if (0 == scsiLogSense(fd
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1735 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1737 case TEMPERATURE_LPAGE
:
1738 cfg
->TempPageSupported
= 1;
1741 cfg
->SmartPageSupported
= 1;
1749 // record type of device
1750 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1751 cfg
->controller_type
= CONTROLLER_SCSI
;
1753 // get rid of allocated memory only needed for ATA devices. These
1754 // might have been allocated if the user specified Ignore options or
1755 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1756 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
1757 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
1758 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
1759 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
1761 // Check if scsiCheckIE() is going to work
1765 UINT8 currenttemp
= 0;
1768 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
1769 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1770 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1771 cfg
->SuppressReport
= 1;
1772 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
) {
1773 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1774 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1779 // capability check: self-test-log
1781 int retval
=scsiCountFailedSelfTests(fd
, 0);
1783 // no self-test log, turn off monitoring
1784 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1786 cfg
->selflogcount
=0;
1790 // register starting values to watch for changes
1791 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1792 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1796 // disable autosave (set GLTSD bit)
1797 if (cfg
->autosave
==1){
1798 if (scsiSetControlGLTSD(fd
, 1, cfg
->modese_len
))
1799 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
1801 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
1804 // or enable autosave (clear GLTSD bit)
1805 if (cfg
->autosave
==2){
1806 if (scsiSetControlGLTSD(fd
, 0, cfg
->modese_len
))
1807 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
1809 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
1812 // tell user we are registering device
1813 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
1815 // close file descriptor
1816 CloseDevice(fd
, device
);
1820 // We compare old and new values of the n'th attribute. Note that n
1821 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1822 // then return 0, else nonzero.
1823 int ATACompareValues(changedattribute_t
*delta
,
1824 struct ata_smart_values
*newv
,
1825 struct ata_smart_values
*oldv
,
1826 struct ata_smart_thresholds_pvt
*thresholds
,
1828 struct ata_smart_attribute
*now
,*was
;
1829 struct ata_smart_threshold_entry
*thre
;
1830 unsigned char oldval
,newval
;
1833 // check that attribute number in range, and no null pointers
1834 if (n
<0 || n
>=NUMBER_ATA_SMART_ATTRIBUTES
|| !newv
|| !oldv
|| !thresholds
)
1837 // pointers to disk's values and vendor's thresholds
1838 now
=newv
->vendor_attributes
+n
;
1839 was
=oldv
->vendor_attributes
+n
;
1840 thre
=thresholds
->thres_entries
+n
;
1842 // consider only valid attributes
1843 if (!now
->id
|| !was
->id
|| !thre
->id
)
1847 // issue warning if they don't have the same ID in all structures:
1848 if ( (now
->id
!= was
->id
) || (now
->id
!= thre
->id
) ){
1849 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1850 name
, (int)now
->id
, (int)was
->id
, (int)thre
->id
);
1854 // new and old values of Normalized Attributes
1855 newval
=now
->current
;
1856 oldval
=was
->current
;
1858 // See if the RAW values are unchanged (ie, the same)
1859 if (memcmp(now
->raw
, was
->raw
, 6))
1864 // if any values out of the allowed range, or if the values haven't
1865 // changed, return 0
1866 if (!newval
|| !oldval
|| newval
>0xfe || oldval
>0xfe || (oldval
==newval
&& sameraw
))
1869 // values have changed. Construct output and return
1870 delta
->newval
=newval
;
1871 delta
->oldval
=oldval
;
1873 delta
->prefail
=ATTRIBUTE_FLAGS_PREFAILURE(now
->flags
);
1874 delta
->sameraw
=sameraw
;
1879 // This looks to see if the corresponding bit of the 32 bytes is set.
1880 // This wastes a few bytes of storage but eliminates all searching and
1881 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1882 // with set=0 tells you if the attribute is being tracked or not.
1883 // Calling with set=1 turns the attribute OFF.
1884 int IsAttributeOff(unsigned char attr
, unsigned char **datap
, int set
, int which
, int whatline
){
1885 unsigned char *data
;
1887 int bit
=attr
& 0x07;
1888 unsigned char mask
=0x01<<bit
;
1890 if (which
>=NMONITOR
|| which
< 0){
1891 PrintOut(LOG_CRIT
, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1892 whatline
, filenameandversion
, which
, reportbug
);
1896 if (*datap
== NULL
){
1897 // NULL data implies Attributes are ON...
1902 if (!(*datap
=(unsigned char *)Calloc(NMONITOR
*32, 1))){
1903 PrintOut(LOG_CRIT
,"No memory to create monattflags\n");
1908 // pointer to the 256 bits that we need
1909 data
=*datap
+which
*32;
1911 // attribute zero is always OFF
1916 return (data
[loc
] & mask
);
1920 // return value when setting has no sense
1924 // If the self-test log has got more self-test errors (or more recent
1925 // self-test errors) recorded, then notify user.
1926 void CheckSelfTestLogs(cfgfile
*cfg
, int newi
){
1927 char *name
=cfg
->name
;
1931 MailWarning(cfg
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
1933 // old and new error counts
1934 int oldc
=cfg
->selflogcount
;
1935 int newc
=SELFTEST_ERRORCOUNT(newi
);
1937 // old and new error timestamps in hours
1938 int oldh
=cfg
->selfloghour
;
1939 int newh
=SELFTEST_ERRORHOURS(newi
);
1942 // increase in error count
1943 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1945 MailWarning(cfg
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1947 } else if (oldh
!=newh
) {
1948 // more recent error
1949 // a 'more recent' error might actually be a smaller hour number,
1950 // if the hour number has wrapped.
1951 // There's still a bug here. You might just happen to run a new test
1952 // exactly 32768 hours after the previous failure, and have run exactly
1953 // 20 tests between the two, in which case smartd will miss the
1955 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1957 MailWarning(cfg
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1961 // Needed since self-test error count may DECREASE. Hour might
1962 // also have changed.
1963 cfg
->selflogcount
= newc
;
1964 cfg
->selfloghour
= newh
;
1969 // returns 1 if time to do test of type testtype, 0 if not time to do
1970 // test, < 0 if error
1971 int DoTestNow(cfgfile
*cfg
, char testtype
, time_t testtime
) {
1972 // start by finding out the time:
1975 char matchpattern
[16];
1976 regmatch_t substring
;
1977 int weekday
, length
;
1978 unsigned short hours
;
1979 testinfo
*dat
=cfg
->testdata
;
1981 // check that self-testing has been requested
1985 // since we are about to call localtime(), be sure glibc is informed
1986 // of any timezone changes we make.
1988 FixGlibcTimeZoneBug();
1990 // construct pattern containing the month, day of month, day of
1992 epochnow
= (!testtime
? time(NULL
) : testtime
);
1993 timenow
=localtime(&epochnow
);
1995 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
1997 weekday
=timenow
->tm_wday
?timenow
->tm_wday
:7;
1998 sprintf(matchpattern
, "%c/%02d/%02d/%1d/%02d", testtype
, timenow
->tm_mon
+1,
1999 timenow
->tm_mday
, weekday
, timenow
->tm_hour
);
2001 // if no match, we are done
2002 if (regexec(&(dat
->cregex
), matchpattern
, 1, &substring
, 0))
2005 // must match the ENTIRE type/date/time string
2006 length
=strlen(matchpattern
);
2007 if (substring
.rm_so
!=0 || substring
.rm_eo
!=length
)
2010 // never do a second test in the same hour as another test (the % 7 ensures
2011 // that the RHS will never be greater than 65535 and so will always fit into
2012 // an unsigned short)
2013 hours
=1+timenow
->tm_hour
+24*(timenow
->tm_yday
+366*(timenow
->tm_year
% 7));
2014 if (hours
==dat
->hour
) {
2015 if (!testtime
&& testtype
!=dat
->testtype
)
2016 PrintOut(LOG_INFO
, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
2017 cfg
->name
, dat
->testtype
, testtype
);
2021 // save time and type of the current test; we are ready to do a test
2023 dat
->testtype
=testtype
;
2027 // Print a list of future tests.
2028 void PrintTestSchedule(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2031 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2032 time_t now
; long seconds
;
2033 int numdev
= numdevata
+numdevscsi
;
2034 typedef int cnt_t
[4];
2035 cnt_t
* testcnts
; // testcnts[numdev][4]
2038 testcnts
= (cnt_t
*)calloc(numdev
, sizeof(testcnts
[0]));
2042 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2044 // FixGlibcTimeZoneBug(); // done in PrintOut()
2046 dateandtimezoneepoch(datenow
, now
);
2047 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2048 // Check for each device whether a test will be run
2049 time_t testtime
= now
+ seconds
;
2050 for (i
=0; i
<numdev
; i
++) {
2051 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2052 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2053 char testtype
= "LSCO"[t
];
2054 if (DoTestNow(cfg
, testtype
, testtime
)) {
2055 // Report at most 5 tests of each type
2056 if (++testcnts
[i
][t
] <= 5) {
2057 dateandtimezoneepoch(date
, testtime
);
2058 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
->name
,
2059 testcnts
[i
][t
], testtype
, date
);
2067 dateandtimezoneepoch(date
, now
+seconds
);
2068 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2069 for (i
=0; i
<numdev
; i
++) {
2070 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2071 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2072 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
->name
, testcnts
[i
][t
],
2073 (testcnts
[i
][t
]==1?"":"s"), "LSCO"[t
]);
2080 // Return zero on success, nonzero on failure. Perform offline (background)
2081 // short or long (extended) self test on given scsi device.
2082 int DoSCSISelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2084 char *testname
= NULL
;
2085 char *name
= cfg
->name
;
2088 if (scsiSelfTestInProgress(fd
, &inProgress
)) {
2089 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2090 cfg
->testdata
->not_cap_short
=cfg
->testdata
->not_cap_long
=1;
2094 if (1 == inProgress
) {
2095 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2096 "progress.\n", name
);
2102 testname
= "Short Self";
2103 retval
= scsiSmartShortSelfTest(fd
);
2106 testname
= "Long Self";
2107 retval
= scsiSmartExtendSelfTest(fd
);
2110 // If we can't do the test, exit
2111 if (NULL
== testname
) {
2112 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2117 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2118 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2119 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2122 cfg
->testdata
->not_cap_long
=1;
2124 cfg
->testdata
->not_cap_short
=1;
2128 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2133 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2138 // Do an offline immediate or self-test. Return zero on success,
2139 // nonzero on failure.
2140 int DoATASelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2142 struct ata_smart_values data
;
2143 char *testname
=NULL
;
2144 int retval
, dotest
=-1;
2145 char *name
=cfg
->name
;
2147 // Read current smart data and check status/capability
2148 if (ataReadSmartValues(fd
, &data
) || !(data
.offline_data_collection_capability
)) {
2149 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2153 // Check for capability to do the test
2156 testname
="Offline Immediate ";
2157 if (isSupportExecuteOfflineImmediate(&data
))
2158 dotest
=OFFLINE_FULL_SCAN
;
2160 cfg
->testdata
->not_cap_offline
=1;
2163 testname
="Conveyance Self-";
2164 if (isSupportConveyanceSelfTest(&data
))
2165 dotest
=CONVEYANCE_SELF_TEST
;
2167 cfg
->testdata
->not_cap_conveyance
=1;
2170 testname
="Short Self-";
2171 if (isSupportSelfTest(&data
))
2172 dotest
=SHORT_SELF_TEST
;
2174 cfg
->testdata
->not_cap_short
=1;
2177 testname
="Long Self-";
2178 if (isSupportSelfTest(&data
))
2179 dotest
=EXTEND_SELF_TEST
;
2181 cfg
->testdata
->not_cap_long
=1;
2185 // If we can't do the test, exit
2187 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2191 // If currently running a self-test, do not interrupt it to start another.
2192 if (15==(data
.self_test_exec_status
>> 4)) {
2193 if (cfg
->fixfirmwarebug
== FIX_SAMSUNG3
&& data
.self_test_exec_status
== 0xf0) {
2194 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
2195 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
2197 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2198 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2203 // else execute the test, and return status
2204 if ((retval
=smartcommandhandler(fd
, IMMEDIATE_OFFLINE
, dotest
, NULL
)))
2205 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2207 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2212 // Check Temperature limits
2213 static void CheckTemperature(cfgfile
* cfg
, unsigned char currtemp
, unsigned char triptemp
)
2215 const char *minchg
= "", *maxchg
= "";
2216 if (!(0 < currtemp
&& currtemp
< 255)) {
2217 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
->name
);
2221 if (!cfg
->temperature
) {
2222 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius\n",
2223 cfg
->name
, (int)currtemp
);
2225 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2226 cfg
->temperature
= cfg
->tempmin
= cfg
->tempmax
= currtemp
;
2230 if (currtemp
< cfg
->tempmin
) {
2231 cfg
->tempmin
= currtemp
; minchg
= "!";
2232 cfg
->tempmininc
= 0;
2234 else if (cfg
->tempmininc
) {
2235 // increase min Temperature during first 30 minutes
2236 cfg
->tempmin
= currtemp
;
2239 if (currtemp
> cfg
->tempmax
) {
2240 cfg
->tempmax
= currtemp
; maxchg
= "!";
2244 if (cfg
->tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)cfg
->temperature
) >= cfg
->tempdiff
)) {
2245 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2246 cfg
->name
, (int)currtemp
-(int)cfg
->temperature
, currtemp
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2247 cfg
->temperature
= currtemp
;
2252 if (cfg
->tempcrit
&& currtemp
>= cfg
->tempcrit
) {
2253 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2254 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2255 MailWarning(cfg
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2256 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2258 else if (cfg
->tempinfo
&& currtemp
>= cfg
->tempinfo
) {
2259 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2260 cfg
->name
, currtemp
, cfg
->tempinfo
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2264 int ATACheckDevice(cfgfile
*cfg
, bool allow_selftests
){
2266 char *name
=cfg
->name
;
2270 // fix firmware bug if requested
2271 con
->fixfirmwarebug
=cfg
->fixfirmwarebug
;
2272 con
->controller_port
=cfg
->controller_port
;
2273 con
->controller_type
=cfg
->controller_type
;
2274 con
->controller_explicit
=cfg
->controller_explicit
;
2275 // Highpoint-specific data
2276 con
->hpt_data
[0]=cfg
->hpt_data
[0];
2277 con
->hpt_data
[1]=cfg
->hpt_data
[1];
2278 con
->hpt_data
[2]=cfg
->hpt_data
[2];
2280 // If user has asked, test the email warning system
2281 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2282 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2284 if (cfg
->controller_type
== CONTROLLER_3WARE_9000_CHAR
)
2285 mode
="ATA_3WARE_9000";
2287 if (cfg
->controller_type
== CONTROLLER_3WARE_678K_CHAR
)
2288 mode
="ATA_3WARE_678K";
2290 // if we can't open device, fail gracefully rather than hard --
2291 // perhaps the next time around we'll be able to open it. ATAPI
2292 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2293 // given (see linux cdrom driver).
2294 if ((fd
=OpenDevice(name
, mode
, 0))<0){
2295 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2299 // if the user has asked, and device is capable (or we're not yet
2300 // sure) check whether a self test should be done now.
2301 // This check is done before powermode check to avoid missing self
2302 // tests on idle or sleeping disks.
2303 if (allow_selftests
&& cfg
->testdata
) {
2305 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2308 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2311 else if (!cfg
->testdata
->not_cap_conveyance
&& DoTestNow(cfg
, 'C', 0)>0)
2313 // offline immediate
2314 else if (!cfg
->testdata
->not_cap_offline
&& DoTestNow(cfg
, 'O', 0)>0)
2318 // user may have requested (with the -n Directive) to leave the disk
2319 // alone if it is in idle or sleeping mode. In this case check the
2320 // power mode and exit without check if needed
2321 if (cfg
->powermode
){
2322 int dontcheck
=0, powermode
=ataCheckPowerMode(fd
);
2324 if (0 <= powermode
&& powermode
< 0xff) {
2325 // wait for possible spin up and check again
2328 powermode2
= ataCheckPowerMode(fd
);
2329 if (powermode2
> powermode
)
2330 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2331 powermode
= powermode2
;
2338 if (cfg
->powermode
>=1)
2344 if (cfg
->powermode
>=2)
2350 if (cfg
->powermode
>=3)
2355 mode
="ACTIVE or IDLE";
2359 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2365 // if we are going to skip a check, return now
2367 // but ignore powermode on scheduled selftest
2369 CloseDevice(fd
, name
);
2370 if (!cfg
->powerskipcnt
&& !cfg
->powerquiet
) // report first only and avoid waking up system disk
2371 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2372 cfg
->powerskipcnt
++;
2375 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2376 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2377 cfg
->powerskipcnt
= 0;
2379 else if (cfg
->powerskipcnt
) {
2380 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2381 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2382 cfg
->powerskipcnt
= 0;
2386 // check smart status
2387 if (cfg
->smartcheck
){
2388 int status
=ataSmartStatus2(fd
);
2390 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2391 MailWarning(cfg
, 5, "Device: %s, not capable of SMART self-check", name
);
2393 else if (status
==1){
2394 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2395 MailWarning(cfg
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2399 // Check everything that depends upon SMART Data (eg, Attribute values)
2400 if ( cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->pending
!=DONT_MONITOR_UNC
2401 || cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
){
2402 struct ata_smart_values curval
;
2403 struct ata_smart_thresholds_pvt
*thresh
=cfg
->smartthres
;
2405 // Read current attribute values. *drive contains old values and thresholds
2406 if (ataReadSmartValues(fd
,&curval
)){
2407 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2408 MailWarning(cfg
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2411 // look for current or offline pending sectors
2412 if (cfg
->pending
!= DONT_MONITOR_UNC
) {
2414 unsigned char currentpending
, offlinepending
;
2416 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
2418 if (currentpending
&& (rawval
=ATAReturnAttributeRawValue(currentpending
, &curval
))>0) {
2419 // Unreadable pending sectors!!
2420 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors\n", name
, rawval
);
2421 MailWarning(cfg
, 10, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors", name
, rawval
);
2424 if (offlinepending
&& (rawval
=ATAReturnAttributeRawValue(offlinepending
, &curval
))>0) {
2425 // Unreadable offline sectors!!
2426 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Offline uncorrectable sectors\n", name
, rawval
);
2427 MailWarning(cfg
, 11, "Device: %s, %"PRId64
" Offline uncorrectable sectors", name
, rawval
);
2431 // check temperature limits
2432 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2433 CheckTemperature(cfg
, ATAReturnTemperatureValue(&curval
, cfg
->attributedefs
), 0);
2435 if (cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
) {
2437 // look for failed usage attributes, or track usage or prefail attributes
2438 for (i
=0; i
<NUMBER_ATA_SMART_ATTRIBUTES
; i
++){
2440 changedattribute_t delta
;
2442 // This block looks for usage attributes that have failed.
2443 // Prefail attributes that have failed are returned with a
2444 // positive sign. No failure returns 0. Usage attributes<0.
2445 if (cfg
->usagefailed
&& ((att
=ataCheckAttribute(&curval
, thresh
, i
))<0)){
2447 // are we ignoring failures of this attribute?
2449 if (!IsAttributeOff(att
, &cfg
->monitorattflags
, 0, MONITOR_FAILUSE
, __LINE__
)){
2450 char attname
[64], *loc
=attname
;
2452 // get attribute name & skip white space
2453 ataPrintSmartAttribName(loc
, att
, cfg
->attributedefs
);
2454 while (*loc
&& *loc
==' ') loc
++;
2457 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %s.\n", name
, loc
);
2458 MailWarning(cfg
, 2, "Device: %s, Failed SMART usage Attribute: %s.", name
, loc
);
2462 // This block tracks usage or prefailure attributes to see if
2463 // they are changing. It also looks for changes in RAW values
2464 // if this has been requested by user.
2465 if ((cfg
->usage
|| cfg
->prefail
) && ATACompareValues(&delta
, &curval
, cfg
->smartval
, thresh
, i
, name
)){
2466 unsigned char id
=delta
.id
;
2468 // if the only change is the raw value, and we're not
2469 // tracking raw value, then continue loop over attributes
2470 if (!delta
.sameraw
&& delta
.newval
==delta
.oldval
&& !IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAW
, __LINE__
))
2473 // are we tracking this attribute?
2474 if (!IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_IGNORE
, __LINE__
)){
2475 char newrawstring
[64], oldrawstring
[64], attname
[64], *loc
=attname
;
2477 // get attribute name, skip spaces
2478 ataPrintSmartAttribName(loc
, id
, cfg
->attributedefs
);
2479 while (*loc
&& *loc
==' ') loc
++;
2481 // has the user asked for us to print raw values?
2482 if (IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAWPRINT
, __LINE__
)) {
2483 // get raw values (as a string) and add to printout
2485 ataPrintSmartAttribRawValue(rawstring
, curval
.vendor_attributes
+i
, cfg
->attributedefs
);
2486 sprintf(newrawstring
, " [Raw %s]", rawstring
);
2487 ataPrintSmartAttribRawValue(rawstring
, cfg
->smartval
->vendor_attributes
+i
, cfg
->attributedefs
);
2488 sprintf(oldrawstring
, " [Raw %s]", rawstring
);
2491 newrawstring
[0]=oldrawstring
[0]='\0';
2493 // prefailure attribute
2494 if (cfg
->prefail
&& delta
.prefail
)
2495 PrintOut(LOG_INFO
, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2496 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2499 if (cfg
->usage
&& !delta
.prefail
)
2500 PrintOut(LOG_INFO
, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2501 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2503 } // endof block tracking usage or prefailure
2504 } // end of loop over attributes
2506 // Save the new values into *drive for the next time around
2507 *(cfg
->smartval
)=curval
;
2512 // check if number of selftest errors has increased (note: may also DECREASE)
2514 CheckSelfTestLogs(cfg
, SelfTestErrorCount(fd
, name
));
2516 // check if number of ATA errors has increased
2519 int newc
,oldc
=cfg
->ataerrorcount
;
2521 // new number of errors
2522 newc
=ATAErrorCount(fd
, name
);
2524 // did command fail?
2526 // lack of PrintOut here is INTENTIONAL
2527 MailWarning(cfg
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2529 // has error count increased?
2531 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2533 MailWarning(cfg
, 4, "Device: %s, ATA error count increased from %d to %d",
2537 // this last line is probably not needed, count always increases
2539 cfg
->ataerrorcount
=newc
;
2542 // carry out scheduled self-test
2544 DoATASelfTest(fd
, cfg
, testtype
);
2546 // Don't leave device open -- the OS/user may want to access it
2547 // before the next smartd cycle!
2548 CloseDevice(fd
, name
);
2552 int SCSICheckDevice(cfgfile
*cfg
, bool allow_selftests
)
2558 char *name
=cfg
->name
;
2562 // should we try to register this as a SCSI device?
2563 switch (cfg
->controller_type
) {
2564 case CONTROLLER_CCISS
:
2567 case CONTROLLER_SCSI
:
2568 case CONTROLLER_UNKNOWN
:
2575 // pass user settings on to low-level SCSI commands
2576 con
->controller_port
=cfg
->controller_port
;
2577 con
->controller_type
=cfg
->controller_type
;
2579 // If the user has asked for it, test the email warning system
2580 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2581 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2583 // if we can't open device, fail gracefully rather than hard --
2584 // perhaps the next time around we'll be able to open it
2585 if ((fd
=OpenDevice(name
, mode
, 0))<0) {
2586 // Lack of PrintOut() here is intentional!
2587 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2589 } else if (debugmode
)
2590 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
2594 if (! cfg
->SuppressReport
) {
2595 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
2596 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2597 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2599 MailWarning(cfg
, 6, "Device: %s, failed to read SMART values", name
);
2600 cfg
->SuppressReport
= 1;
2604 cp
= scsiGetIEString(asc
, ascq
);
2606 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2607 MailWarning(cfg
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2608 } else if (debugmode
)
2609 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2610 name
, (int)asc
, (int)ascq
);
2611 } else if (debugmode
)
2612 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
2614 // check temperature limits
2615 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2616 CheckTemperature(cfg
, currenttemp
, triptemp
);
2618 // check if number of selftest errors has increased (note: may also DECREASE)
2620 CheckSelfTestLogs(cfg
, scsiCountFailedSelfTests(fd
, 0));
2622 if (allow_selftests
&& cfg
->testdata
) {
2623 // long (extended) background test
2624 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2625 DoSCSISelfTest(fd
, cfg
, 'L');
2626 // short background test
2627 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2628 DoSCSISelfTest(fd
, cfg
, 'S');
2630 CloseDevice(fd
, name
);
2634 // Checks the SMART status of all ATA and SCSI devices
2635 void CheckDevicesOnce(cfgfile
**atadevices
, cfgfile
**scsidevices
, bool allow_selftests
){
2638 for (i
=0; i
<numdevata
; i
++)
2639 ATACheckDevice(atadevices
[i
], allow_selftests
);
2641 for (i
=0; i
<numdevscsi
; i
++)
2642 SCSICheckDevice(scsidevices
[i
], allow_selftests
);
2648 // This alarm means that a SCSI USB device was hanging
2649 void AlarmHandler(int signal
) {
2650 longjmp(registerscsienv
, 1);
2654 // Does initialization right after fork to daemon mode
2655 void Initialize(time_t *wakeuptime
){
2657 // install goobye message and remove pidfile handler
2660 // write PID file only after installing exit handler
2664 // install signal handlers. On Solaris, can't use signal() because
2665 // it resets the handler to SIG_DFL after each call. So use sigset()
2666 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2668 // normal and abnormal exit
2669 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2670 SIGNALFN(SIGTERM
, SIG_IGN
);
2671 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2672 SIGNALFN(SIGQUIT
, SIG_IGN
);
2674 // in debug mode, <CONTROL-C> ==> HUP
2675 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2676 SIGNALFN(SIGINT
, SIG_IGN
);
2678 // Catch HUP and USR1
2679 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2680 SIGNALFN(SIGHUP
, SIG_IGN
);
2681 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2682 SIGNALFN(SIGUSR1
, SIG_IGN
);
2684 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2685 SIGNALFN(SIGUSR2
, SIG_IGN
);
2688 // initialize wakeup time to CURRENT time
2689 *wakeuptime
=time(NULL
);
2695 // Toggle debug mode implemented for native windows only
2696 // (there is no easy way to reopen tty on *nix)
2697 static void ToggleDebugMode()
2700 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2701 if (!daemon_enable_console("smartd [Debug]")) {
2703 daemon_signal(SIGINT
, HUPhandler
);
2704 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2707 PrintOut(LOG_INFO
,"enable console failed\n");
2709 else if (debugmode
== 1) {
2710 daemon_disable_console();
2712 daemon_signal(SIGINT
, sighandler
);
2713 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2716 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2720 time_t dosleep(time_t wakeuptime
){
2723 // If past wake-up-time, compute next wake-up-time
2725 while (wakeuptime
<=timenow
){
2726 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2727 wakeuptime
+=intervals
*checktime
;
2730 // sleep until we catch SIGUSR1 or have completed sleeping
2731 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2733 // protect user again system clock being adjusted backwards
2734 if (wakeuptime
>timenow
+checktime
){
2735 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2736 wakeuptime
=timenow
+checktime
;
2739 // Exit sleep when time interval has expired or a signal is received
2740 sleep(wakeuptime
-timenow
);
2743 // toggle debug mode?
2744 if (caughtsigUSR2
) {
2753 // if we caught a SIGUSR1 then print message and clear signal
2755 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2756 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2760 // return adjusted wakeuptime
2764 // Print out a list of valid arguments for the Directive d
2765 void printoutvaliddirectiveargs(int priority
, char d
) {
2770 PrintOut(priority
, "never[,q], sleep[,q], standby[,q], idle[,q]");
2773 PrintOut(priority
, "valid_regular_expression");
2776 PrintOut(priority
, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2779 PrintOut(priority
, "normal, permissive");
2783 PrintOut(priority
, "on, off");
2786 PrintOut(priority
, "error, selftest");
2789 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2792 if (!(s
= create_vendor_attribute_arg_list())) {
2793 PrintOut(LOG_CRIT
,"Insufficient memory to construct argument list\n");
2796 PrintOut(priority
, "\n%s\n", s
);
2797 s
=CheckFree(s
, __LINE__
,filenameandversion
);
2800 PrintOut(priority
, "use, ignore, show, showall");
2803 PrintOut(priority
, "none, samsung, samsung2, samsung3");
2808 // exits with an error message, or returns integer value of token
2809 int GetInteger(char *arg
, char *name
, char *token
, int lineno
, char *configfile
, int min
, int max
){
2813 // check input range
2815 PrintOut(LOG_CRIT
, "min =%d passed to GetInteger() must be >=0\n", min
);
2819 // make sure argument is there
2821 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2822 configfile
, lineno
, name
, token
, min
, max
);
2826 // get argument value (base 10), check that it's integer, and in-range
2827 val
=strtol(arg
,&endptr
,10);
2828 if (*endptr
!='\0' || val
<min
|| val
>max
) {
2829 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2830 configfile
, lineno
, name
, token
, arg
, min
, max
);
2834 // all is well; return value
2839 // Get 1-3 small integer(s) for '-W' directive
2840 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
2841 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
2842 unsigned v1
= 0, v2
= 0, v3
= 0;
2843 int n1
= -1, n2
= -1, n3
= -1, len
;
2845 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2846 configfile
, lineno
, name
, token
);
2851 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
2852 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
2853 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2854 configfile
, lineno
, name
, token
, arg
);
2857 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
2862 // This function returns 1 if it has correctly parsed one token (and
2863 // any arguments), else zero if no tokens remain. It returns -1 if an
2864 // error was encountered.
2865 int ParseToken(char *token
,cfgfile
*cfg
){
2867 char *name
=cfg
->name
;
2868 int lineno
=cfg
->lineno
;
2869 char *delim
= " \n\t";
2874 maildata
*mdat
=NULL
, tempmail
;
2876 // is the rest of the line a comment
2880 // is the token not recognized?
2881 if (*token
!='-' || strlen(token
)!=2) {
2882 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
2883 configfile
, lineno
, name
, token
);
2884 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
2888 // token we will be parsing:
2891 // create temporary maildata structure. This means we can postpone
2892 // allocating space in the data segment until we are sure there are
2894 if ('m'==sym
|| 'M'==sym
){
2895 if (!cfg
->mailwarn
){
2896 memset(&tempmail
, 0, sizeof(maildata
));
2904 // parse the token and swallow its argument
2909 // monitor current pending sector count (default 197)
2910 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2912 if (val
==CUR_UNC_DEFAULT
)
2915 val
=CUR_UNC_DEFAULT
;
2916 // set bottom 8 bits to correct value
2917 cfg
->pending
&= 0xff00;
2918 cfg
->pending
|= val
;
2921 // monitor offline uncorrectable sectors (default 198)
2922 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2924 if (val
==OFF_UNC_DEFAULT
)
2927 val
=OFF_UNC_DEFAULT
;
2928 // turn off top 8 bits, then set to correct value
2929 cfg
->pending
&= 0xff;
2930 cfg
->pending
|= (val
<<8);
2933 // Set tolerance level for SMART command failures
2934 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2936 } else if (!strcmp(arg
, "normal")) {
2937 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2938 // not on failure of an optional S.M.A.R.T. command.
2939 // This is the default so we don't need to actually do anything here.
2941 } else if (!strcmp(arg
, "permissive")) {
2942 // Permissive mode; ignore errors from Mandatory SMART commands
2949 // specify the device type
2950 cfg
->controller_explicit
= 1;
2951 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2953 } else if (!strcmp(arg
, "ata")) {
2954 cfg
->controller_port
= 0;
2955 cfg
->controller_type
= CONTROLLER_ATA
;
2956 } else if (!strcmp(arg
, "scsi")) {
2957 cfg
->controller_port
=0;
2958 cfg
->controller_type
= CONTROLLER_SCSI
;
2959 } else if (!strcmp(arg
, "marvell")) {
2960 cfg
->controller_port
=0;
2961 cfg
->controller_type
= CONTROLLER_MARVELL_SATA
;
2962 } else if (!strncmp(arg
, "sat", 3)) {
2963 cfg
->controller_type
= CONTROLLER_SAT
;
2964 cfg
->controller_port
= 0;
2965 cfg
->satpassthrulen
= 0;
2966 if (strlen(arg
) > 3) {
2970 cp
= strchr(arg
, ',');
2971 if (cp
&& (1 == sscanf(cp
+ 1, "%d", &k
)) &&
2972 ((0 == k
) || (12 == k
) || (16 == k
)))
2973 cfg
->satpassthrulen
= k
;
2975 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2976 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
2977 configfile
, lineno
, name
);
2981 } else if (!strncmp(arg
, "hpt", 3)){
2982 unsigned char i
, slash
= 0;
2983 cfg
->hpt_data
[0] = 0;
2984 cfg
->hpt_data
[1] = 0;
2985 cfg
->hpt_data
[2] = 0;
2986 cfg
->controller_type
= CONTROLLER_HPT
;
2987 for (i
=4; i
< strlen(arg
); i
++) {
2991 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2992 "'-d hpt,L/M/N' supports 2-3 items\n",
2993 configfile
, lineno
, name
);
2998 else if ((arg
[i
])>='0' && (arg
[i
])<='9') {
2999 if (cfg
->hpt_data
[slash
]>1) { /* hpt_data[x] max 19 */
3003 cfg
->hpt_data
[slash
] = cfg
->hpt_data
[slash
]*10 + arg
[i
] - '0';
3012 } else if (badarg
!= TRUE
) {
3013 if (cfg
->hpt_data
[0]==0 || cfg
->hpt_data
[0]>8){
3014 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3015 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
3016 configfile
, lineno
, name
);
3019 if (cfg
->hpt_data
[1]==0 || cfg
->hpt_data
[1]>8){
3020 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3021 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
3022 configfile
, lineno
, name
);
3026 if (cfg
->hpt_data
[2]==0 || cfg
->hpt_data
[2]>15){
3027 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3028 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
3029 configfile
, lineno
, name
);
3032 } else { /* no pmport device */
3036 } else if (!strcmp(arg
, "removable")) {
3039 // look 3ware,N RAID device
3043 // make a copy of the string to mess with
3044 if (!(s
= strdup(arg
))) {
3046 "No memory to copy argument to -d option - exiting\n");
3048 } else if (!strncmp(s
,"3ware,",6)) {
3049 if (split_report_arg2(s
, &i
)){
3050 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3051 configfile
, lineno
, name
);
3053 } else if ( i
<0 || i
>31) {
3054 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 31\n",
3055 configfile
, lineno
, name
, i
);
3058 // determine type of escalade device from name of device
3059 cfg
->controller_type
= guess_device_type(name
);
3060 if (cfg
->controller_type
!=CONTROLLER_3WARE_9000_CHAR
&& cfg
->controller_type
!=CONTROLLER_3WARE_678K_CHAR
)
3061 cfg
->controller_type
=CONTROLLER_3WARE_678K
;
3063 // NOTE: controller_port == disk number + 1
3064 cfg
->controller_port
= i
+1;
3066 } else if (!strncmp(s
,"cciss,",6)) {
3067 if (split_report_arg2(s
, &i
)){
3068 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N requires N integer\n",
3069 configfile
, lineno
, name
);
3071 } else if ( i
<0 || i
>127) {
3072 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N (N=%d) must have 0 <= N <= 127\n",
3073 configfile
, lineno
, name
, i
);
3076 // NOTE: controller_port == disk number + 1
3077 cfg
->controller_type
= CONTROLLER_CCISS
;
3078 cfg
->controller_port
= i
+1;
3083 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3088 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3090 } else if (!strcmp(arg
, "none")) {
3091 cfg
->fixfirmwarebug
= FIX_NONE
;
3092 } else if (!strcmp(arg
, "samsung")) {
3093 cfg
->fixfirmwarebug
= FIX_SAMSUNG
;
3094 } else if (!strcmp(arg
, "samsung2")) {
3095 cfg
->fixfirmwarebug
= FIX_SAMSUNG2
;
3096 } else if (!strcmp(arg
, "samsung3")) {
3097 cfg
->fixfirmwarebug
= FIX_SAMSUNG3
;
3103 // check SMART status
3107 // check for failure of usage attributes
3111 // track changes in all vendor attributes
3116 // track changes in prefail vendor attributes
3120 // track changes in usage vendor attributes
3124 // track changes in SMART logs
3125 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3127 } else if (!strcmp(arg
, "selftest")) {
3128 // track changes in self-test log
3130 } else if (!strcmp(arg
, "error")) {
3131 // track changes in ATA error log
3138 // monitor everything
3147 // automatic offline testing enable/disable
3148 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3150 } else if (!strcmp(arg
, "on")) {
3151 cfg
->autoofflinetest
= 2;
3152 } else if (!strcmp(arg
, "off")) {
3153 cfg
->autoofflinetest
= 1;
3159 // skip disk check if in idle or standby mode
3160 if (!(arg
= strtok(NULL
, delim
)))
3162 else if (!strcmp(arg
, "never") || !strcmp(arg
, "never,q"))
3164 else if (!strcmp(arg
, "sleep") || !strcmp(arg
, "sleep,q"))
3166 else if (!strcmp(arg
, "standby") || !strcmp(arg
, "standby,q"))
3168 else if (!strcmp(arg
, "idle") || !strcmp(arg
, "idle,q"))
3172 cfg
->powerquiet
= !!strchr(arg
, ',');
3175 // automatic attribute autosave enable/disable
3176 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3178 } else if (!strcmp(arg
, "on")) {
3180 } else if (!strcmp(arg
, "off")) {
3187 // warn user, and delete any previously given -s REGEXP Directives
3189 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3190 configfile
, lineno
, name
, cfg
->testdata
->regex
);
3191 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3193 // check for missing argument
3194 if (!(arg
= strtok(NULL
, delim
))) {
3197 // allocate space for structure and string
3198 else if (!(cfg
->testdata
=(testinfo
*)Calloc(1, sizeof(testinfo
))) || !(cfg
->testdata
->regex
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
))) {
3199 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3200 configfile
, lineno
, name
, arg
);
3203 else if ((val
=regcomp(&(cfg
->testdata
->cregex
), arg
, REG_EXTENDED
))) {
3205 // not a valid regular expression!
3206 regerror(val
, &(cfg
->testdata
->cregex
), errormsg
, 512);
3207 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3208 configfile
, lineno
, name
, arg
, errormsg
);
3209 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3212 // Do a bit of sanity checking and warn user if we think that
3213 // their regexp is "strange". User probably confused about shell
3214 // glob(3) syntax versus regular expression syntax regexp(7).
3215 else if ((int)strlen(arg
) != (val
=strspn(arg
,"0123456789/.-+*|()?^$[]SLCO")))
3216 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3217 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3220 // send email to address that follows
3221 if (!(arg
= strtok(NULL
,delim
)))
3224 if (mdat
->address
) {
3225 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3226 configfile
, lineno
, name
, mdat
->address
);
3227 mdat
->address
=FreeNonZero(mdat
->address
, -1,__LINE__
,filenameandversion
);
3229 mdat
->address
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3233 // email warning options
3234 if (!(arg
= strtok(NULL
, delim
)))
3236 else if (!strcmp(arg
, "once"))
3237 mdat
->emailfreq
= 1;
3238 else if (!strcmp(arg
, "daily"))
3239 mdat
->emailfreq
= 2;
3240 else if (!strcmp(arg
, "diminishing"))
3241 mdat
->emailfreq
= 3;
3242 else if (!strcmp(arg
, "test"))
3243 mdat
->emailtest
= 1;
3244 else if (!strcmp(arg
, "exec")) {
3245 // Get the next argument (the command line)
3246 if (!(arg
= strtok(NULL
, delim
))) {
3247 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3248 configfile
, lineno
, name
, token
);
3251 // Free the last cmd line given if any, and copy new one
3252 if (mdat
->emailcmdline
) {
3253 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3254 configfile
, lineno
, name
, mdat
->emailcmdline
);
3255 mdat
->emailcmdline
=FreeNonZero(mdat
->emailcmdline
, -1,__LINE__
,filenameandversion
);
3257 mdat
->emailcmdline
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3263 // ignore failure of usage attribute
3264 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3266 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_FAILUSE
, __LINE__
);
3269 // ignore attribute for tracking purposes
3270 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3272 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_IGNORE
, __LINE__
);
3275 // print raw value when tracking
3276 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3278 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3281 // track changes in raw value (forces printing of raw value)
3282 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3284 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3285 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAW
, __LINE__
);
3288 // track Temperature
3289 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3290 &cfg
->tempdiff
, &cfg
->tempinfo
, &cfg
->tempcrit
))<0)
3292 // increase min Temperature during first 30 minutes
3293 if (!(cfg
->tempmininc
= (unsigned char)(CHECKTIME
/ checktime
)))
3294 cfg
->tempmininc
= 1;
3297 // non-default vendor-specific attribute meaning
3298 if (!(arg
=strtok(NULL
,delim
))) {
3300 } else if (parse_attribute_def(arg
, &cfg
->attributedefs
)){
3305 // Define use of drive-specific presets.
3306 if (!(arg
= strtok(NULL
, delim
))) {
3308 } else if (!strcmp(arg
, "use")) {
3309 cfg
->ignorepresets
= FALSE
;
3310 } else if (!strcmp(arg
, "ignore")) {
3311 cfg
->ignorepresets
= TRUE
;
3312 } else if (!strcmp(arg
, "show")) {
3313 cfg
->showpresets
= TRUE
;
3314 } else if (!strcmp(arg
, "showall")) {
3321 // Directive not recognized
3322 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3323 configfile
, lineno
, name
, token
);
3328 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3329 configfile
, lineno
, name
, token
);
3332 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3333 configfile
, lineno
, name
, token
, arg
);
3335 if (missingarg
|| badarg
) {
3336 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3337 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3338 PrintOut(LOG_CRIT
, "\n");
3342 // If this did something to fill the mail structure, and that didn't
3343 // already exist, create it and copy.
3345 if (!(cfg
->mailwarn
=(maildata
*)Calloc(1, sizeof(maildata
)))) {
3346 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3347 configfile
, lineno
, name
);
3350 memcpy(cfg
->mailwarn
, mdat
, sizeof(maildata
));
3356 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3357 // a copy of the original, but with private data storage. Else all is
3358 // zeroed. Returns address, and fails if non memory available.
3360 cfgfile
*CreateConfigEntry(cfgfile
*original
){
3363 // allocate memory for new structure
3364 if (!(add
=(cfgfile
*)Calloc(1,sizeof(cfgfile
))))
3367 // if old structure was pointed to, copy it
3369 memcpy(add
, original
, sizeof(cfgfile
));
3371 // make private copies of data items ONLY if they are in use (non
3373 add
->name
= CustomStrDup(add
->name
, 0, __LINE__
,filenameandversion
);
3375 if (add
->testdata
) {
3377 if (!(add
->testdata
=(testinfo
*)Calloc(1,sizeof(testinfo
))))
3379 memcpy(add
->testdata
, original
->testdata
, sizeof(testinfo
));
3380 add
->testdata
->regex
= CustomStrDup(add
->testdata
->regex
, 1, __LINE__
,filenameandversion
);
3381 // only POSIX-portable way to make fresh copy of compiled regex is
3382 // to recompile it completely. There is no POSIX
3383 // compiled-regex-copy command.
3384 if ((val
=regcomp(&(add
->testdata
->cregex
), add
->testdata
->regex
, REG_EXTENDED
))) {
3386 regerror(val
, &(add
->testdata
->cregex
), errormsg
, 512);
3387 PrintOut(LOG_CRIT
, "unable to recompile regular expression %s. %s\n", add
->testdata
->regex
, errormsg
);
3392 if (add
->mailwarn
) {
3393 if (!(add
->mailwarn
=(maildata
*)Calloc(1,sizeof(maildata
))))
3395 memcpy(add
->mailwarn
, original
->mailwarn
, sizeof(maildata
));
3396 add
->mailwarn
->address
= CustomStrDup(add
->mailwarn
->address
, 0, __LINE__
,filenameandversion
);
3397 add
->mailwarn
->emailcmdline
= CustomStrDup(add
->mailwarn
->emailcmdline
, 0, __LINE__
,filenameandversion
);
3400 if (add
->attributedefs
) {
3401 if (!(add
->attributedefs
=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM
,1)))
3403 memcpy(add
->attributedefs
, original
->attributedefs
, MAX_ATTRIBUTE_NUM
);
3406 if (add
->monitorattflags
) {
3407 if (!(add
->monitorattflags
=(unsigned char *)Calloc(NMONITOR
*32, 1)))
3409 memcpy(add
->monitorattflags
, original
->monitorattflags
, NMONITOR
*32);
3412 if (add
->smartval
) {
3413 if (!(add
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
))))
3417 if (add
->smartthres
) {
3418 if (!(add
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
))))
3425 PrintOut(LOG_CRIT
, "No memory to create entry from configuration file\n");
3431 // This is the routine that adds things to the cfgentries list. To
3432 // prevent memory leaks when re-reading the configuration file many
3433 // times, this routine MUST deallocate any memory other than that
3434 // pointed to within cfg-> before it returns.
3436 // Return values are:
3437 // 1: parsed a normal line
3438 // 0: found comment or blank line
3439 // -1: found SCANDIRECTIVE line
3440 // -2: found an error
3442 // Note: this routine modifies *line from the caller!
3443 int ParseConfigLine(int entry
, int lineno
,char *line
){
3446 char *delim
= " \n\t";
3450 // get first token: device name. If a comment, skip line
3451 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3455 // Have we detected the SCANDIRECTIVE directive?
3456 if (!strcmp(SCANDIRECTIVE
,name
)){
3459 PrintOut(LOG_INFO
,"Scan Directive %s (line %d) must be the first entry in %s\n",name
, lineno
, configfile
);
3464 // Is there space for another entry? If not, allocate more
3465 while (entry
>=cfgentries_max
)
3466 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "configuration file device");
3468 // We've got a legit entry, make space to store it
3469 cfg
=cfgentries
[entry
]=CreateConfigEntry(NULL
);
3470 cfg
->name
= CustomStrDup(name
, 1, __LINE__
,filenameandversion
);
3472 // Store line number, and by default check for both device types.
3475 // Try and recognize if a IDE or SCSI device. These can be
3476 // overwritten by configuration file directives.
3477 if (cfg
->controller_type
==CONTROLLER_UNKNOWN
)
3478 cfg
->controller_type
= guess_device_type(cfg
->name
);
3480 // parse tokens one at a time from the file.
3481 while ((token
=strtok(NULL
,delim
))){
3482 int retval
=ParseToken(token
,cfg
);
3491 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3497 // error found on the line
3502 // If we found 3ware/cciss controller, then modify device name by adding a SPACE
3503 if (cfg
->controller_port
) {
3504 int len
=17+strlen(cfg
->name
);
3508 PrintOut(LOG_CRIT
, "smartd: can not scan for 3ware/cciss devices (line %d of file %s)\n",
3509 lineno
, configfile
);
3513 if (!(newname
=(char *)calloc(len
,1))) {
3514 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3518 // Make new device name by adding a space then RAID disk number
3519 snprintf(newname
, len
, "%s [%s_disk_%02d]", cfg
->name
, (cfg
->controller_type
== CONTROLLER_CCISS
) ? "cciss" : "3ware",
3520 cfg
->controller_port
-1);
3521 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3526 if (cfg
->hpt_data
[0]) {
3527 int len
=17+strlen(cfg
->name
);
3531 PrintOut(LOG_CRIT
, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3532 lineno
, configfile
);
3536 if (!(newname
=(char *)calloc(len
,1))) {
3537 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3541 // Make new device name by adding a space then RAID disk number
3542 snprintf(newname
, len
, "%s [hpt_%d/%d/%d]", cfg
->name
, cfg
->hpt_data
[0],
3543 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
3544 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3549 // If NO monitoring directives are set, then set all of them.
3550 if (!(cfg
->smartcheck
|| cfg
->usagefailed
|| cfg
->prefail
||
3551 cfg
->usage
|| cfg
->selftest
|| cfg
->errorlog
||
3552 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
3554 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3555 cfg
->name
, cfg
->lineno
, configfile
);
3565 // additional sanity check. Has user set -M options without -m?
3566 if (cfg
->mailwarn
&& !cfg
->mailwarn
->address
&& (cfg
->mailwarn
->emailcmdline
|| cfg
->mailwarn
->emailfreq
|| cfg
->mailwarn
->emailtest
)){
3567 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3568 cfg
->name
, cfg
->lineno
, configfile
);
3572 // has the user has set <nomailer>?
3573 if (cfg
->mailwarn
&& cfg
->mailwarn
->address
&& !strcmp(cfg
->mailwarn
->address
,"<nomailer>")){
3574 // check that -M exec is also set
3575 if (!cfg
->mailwarn
->emailcmdline
){
3576 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3577 cfg
->name
, cfg
->lineno
, configfile
);
3580 // now free memory. From here on the sign of <nomailer> is
3581 // address==NULL and cfg->emailcmdline!=NULL
3582 cfg
->mailwarn
->address
=FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
3585 // set cfg->emailfreq to 1 (once) if user hasn't set it
3586 if (cfg
->mailwarn
&& !cfg
->mailwarn
->emailfreq
)
3587 cfg
->mailwarn
->emailfreq
= 1;
3597 // clean up utility for ParseConfigFile()
3598 void cleanup(FILE **fpp
, int is_stdin
){
3600 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3610 // Parses a configuration file. Return values are:
3611 // N=>0: found N entries
3612 // -1: syntax error in config file
3613 // -2: config file does not exist
3614 // -3: config file exists but cannot be read
3616 // In the case where the return value is 0, there are three
3618 // Empty configuration file ==> cfgentries==NULL
3619 // No configuration file ==> cfgentries[0]->lineno == 0
3620 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3621 int ParseConfigFile(){
3623 int entry
=0,lineno
=1,cont
=0,contlineno
=0;
3624 char line
[MAXLINELEN
+2];
3625 char fullline
[MAXCONTLINE
+1];
3627 int is_stdin
= (configfile
== configfile_stdin
); // pointer comparison ok here
3629 // Open config file, if it exists and is not <stdin>
3631 fp
=fopen(configfile
,"r");
3632 if (fp
==NULL
&& (errno
!=ENOENT
|| configfile_alt
)) {
3633 // file exists but we can't read it or it should exist due to '-c' option
3634 int ret
= (errno
!=ENOENT
? -3 : -2);
3635 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3636 strerror(errno
),configfile
);
3640 else // read from stdin ('-c -' option)
3643 // No configuration file found -- use fake one
3645 int len
=strlen(SCANDIRECTIVE
)+4;
3646 char *fakeconfig
=(char *)calloc(len
,1);
3649 (len
-1) != snprintf(fakeconfig
, len
, "%s -a", SCANDIRECTIVE
) ||
3650 -1 != ParseConfigLine(entry
, 0, fakeconfig
)
3652 PrintOut(LOG_CRIT
,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3653 __LINE__
, filenameandversion
, reportbug
);
3656 fakeconfig
=CheckFree(fakeconfig
, __LINE__
,filenameandversion
);
3661 setmode(fileno(fp
), O_TEXT
); // Allow files with \r\n
3664 // configuration file exists
3665 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3667 // parse config file line by line
3669 int len
=0,scandevice
;
3674 // make debugging simpler
3675 memset(line
,0,sizeof(line
));
3678 code
=fgets(line
,MAXLINELEN
+2,fp
);
3680 // are we at the end of the file?
3683 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3684 // See if we found a SCANDIRECTIVE directive
3685 if (scandevice
==-1) {
3686 cleanup(&fp
, is_stdin
);
3689 // did we find a syntax error
3690 if (scandevice
==-2) {
3691 cleanup(&fp
, is_stdin
);
3694 // the final line is part of a continuation line
3701 // input file line number
3704 // See if line is too long
3706 if (len
>MAXLINELEN
){
3708 if (line
[len
-1]=='\n')
3709 warn
="(including newline!) ";
3712 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3713 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3714 cleanup(&fp
, is_stdin
);
3718 // Ignore anything after comment symbol
3719 if ((comment
=strchr(line
,'#'))){
3724 // is the total line (made of all continuation lines) too long?
3725 if (cont
+len
>MAXCONTLINE
){
3726 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3727 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3728 cleanup(&fp
, is_stdin
);
3732 // copy string so far into fullline, and increment length
3733 strcpy(fullline
+cont
,line
);
3736 // is this a continuation line. If so, replace \ by space and look at next line
3737 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3738 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3742 // Not a continuation line. Parse it
3743 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3745 // did we find a scandevice directive?
3746 if (scandevice
==-1) {
3747 cleanup(&fp
, is_stdin
);
3750 // did we find a syntax error
3751 if (scandevice
==-2) {
3752 cleanup(&fp
, is_stdin
);
3760 cleanup(&fp
, is_stdin
);
3762 // note -- may be zero if syntax of file OK, but no valid entries!
3767 // Prints copyright, license and version information
3768 void PrintCopyleft(void){
3775 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3776 <LIST> is the list of valid arguments for option opt. */
3777 void PrintValidArgs(char opt
) {
3780 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3781 if (!(s
= GetValidArgList(opt
)))
3782 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3784 PrintOut(LOG_CRIT
, (char *)s
);
3785 PrintOut(LOG_CRIT
, " <=======\n");
3788 // Parses input line, prints usage message and
3789 // version/license/copyright messages
3790 void ParseOpts(int argc
, char **argv
){
3791 extern char *optarg
;
3792 extern int optopt
, optind
, opterr
;
3797 // Please update GetValidArgList() if you edit shortopts
3798 const char *shortopts
= "c:l:q:dDni:p:r:Vh?";
3799 #ifdef HAVE_GETOPT_LONG
3801 // Please update GetValidArgList() if you edit longopts
3802 struct option longopts
[] = {
3803 { "configfile", required_argument
, 0, 'c' },
3804 { "logfacility", required_argument
, 0, 'l' },
3805 { "quit", required_argument
, 0, 'q' },
3806 { "debug", no_argument
, 0, 'd' },
3807 { "showdirectives", no_argument
, 0, 'D' },
3808 { "interval", required_argument
, 0, 'i' },
3810 { "no-fork", no_argument
, 0, 'n' },
3812 { "pidfile", required_argument
, 0, 'p' },
3813 { "report", required_argument
, 0, 'r' },
3814 #if defined(_WIN32) || defined(__CYGWIN__)
3815 { "service", no_argument
, 0, 'n' },
3817 { "version", no_argument
, 0, 'V' },
3818 { "license", no_argument
, 0, 'V' },
3819 { "copyright", no_argument
, 0, 'V' },
3820 { "help", no_argument
, 0, 'h' },
3821 { "usage", no_argument
, 0, 'h' },
3829 // Parse input options. This horrible construction is so that emacs
3830 // indents properly. Sorry.
3831 while (-1 != (optchar
=
3832 #ifdef HAVE_GETOPT_LONG
3833 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3835 getopt(argc
, argv
, shortopts
)
3842 if (!(strcmp(optarg
,"nodev"))) {
3844 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3846 } else if (!(strcmp(optarg
,"never"))) {
3848 } else if (!(strcmp(optarg
,"onecheck"))) {
3851 } else if (!(strcmp(optarg
,"showtests"))) {
3854 } else if (!(strcmp(optarg
,"errors"))) {
3861 // set the log facility level
3862 if (!strcmp(optarg
, "daemon"))
3863 facility
=LOG_DAEMON
;
3864 else if (!strcmp(optarg
, "local0"))
3865 facility
=LOG_LOCAL0
;
3866 else if (!strcmp(optarg
, "local1"))
3867 facility
=LOG_LOCAL1
;
3868 else if (!strcmp(optarg
, "local2"))
3869 facility
=LOG_LOCAL2
;
3870 else if (!strcmp(optarg
, "local3"))
3871 facility
=LOG_LOCAL3
;
3872 else if (!strcmp(optarg
, "local4"))
3873 facility
=LOG_LOCAL4
;
3874 else if (!strcmp(optarg
, "local5"))
3875 facility
=LOG_LOCAL5
;
3876 else if (!strcmp(optarg
, "local6"))
3877 facility
=LOG_LOCAL6
;
3878 else if (!strcmp(optarg
, "local7"))
3879 facility
=LOG_LOCAL7
;
3884 // enable debug mode
3889 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3894 // print summary of all valid directives
3900 // Period (time interval) for checking
3901 // strtol will set errno in the event of overflow, so we'll check it.
3903 lchecktime
= strtol(optarg
, &tailptr
, 10);
3904 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3907 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3908 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3909 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3912 checktime
= (int)lchecktime
;
3915 // report IOCTL transactions
3920 // split_report_arg() may modify its first argument string, so use a
3921 // copy of optarg in case we want optarg for an error message.
3922 if (!(s
= strdup(optarg
))) {
3923 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3926 if (split_report_arg(s
, &i
)) {
3928 } else if (i
<1 || i
>3) {
3931 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3932 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3934 } else if (!strcmp(s
,"ioctl")) {
3935 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3936 } else if (!strcmp(s
,"ataioctl")) {
3937 con
->reportataioctl
= i
;
3938 } else if (!strcmp(s
,"scsiioctl")) {
3939 con
->reportscsiioctl
= i
;
3943 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3947 // alternate configuration file
3948 if (strcmp(optarg
,"-"))
3949 configfile
=configfile_alt
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3950 else // read from stdin
3951 configfile
=configfile_stdin
;
3954 // output file with PID number
3955 pid_file
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3958 // print version and CVS info
3963 // help: print summary of command-line options
3971 // unrecognized option
3974 #ifdef HAVE_GETOPT_LONG
3975 // Point arg to the argument in which this option was found.
3976 arg
= argv
[optind
-1];
3977 // Check whether the option is a long option that doesn't map to -h.
3978 if (arg
[1] == '-' && optchar
!= 'h') {
3979 // Iff optopt holds a valid option then argument must be missing.
3980 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
3981 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
3982 PrintValidArgs(optopt
);
3984 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
3986 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
3991 // Iff optopt holds a valid option then argument must be missing.
3992 if (strchr(shortopts
, optopt
) != NULL
){
3993 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
3994 PrintValidArgs(optopt
);
3996 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
3998 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4005 // Check to see if option had an unrecognized or incorrect argument.
4009 // It would be nice to print the actual option name given by the user
4010 // here, but we just print the short form. Please fix this if you know
4011 // a clean way to do it.
4012 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
4013 PrintValidArgs(optchar
);
4014 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4019 // non-option arguments are not allowed
4020 if (argc
> optind
) {
4023 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
4024 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4028 // no pidfile in debug mode
4029 if (debugmode
&& pid_file
) {
4032 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4033 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
);
4034 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
4044 // Function we call if no configuration file was found or if the
4045 // SCANDIRECTIVE Directive was found. It makes entries for device
4046 // names returned by make_device_names() in os_OSNAME.c
4047 int MakeConfigEntries(const char *type
, int start
){
4050 char** devlist
= NULL
;
4051 cfgfile
*first
=cfgentries
[0],*cfg
=first
;
4053 // Hack! This is to make DEVICESCAN work on ATA devices behind
4054 // a SCSI to ATA Translation (SAT) Layer.
4055 // This will work on a general OS if the way that SAT devices are
4056 // named is the same as SCSI devices.
4057 // The BETTER solution is to modify make_device_names to recognize
4058 // the additional type "SAT". This requires changing os_*.cpp.
4060 const char *basetype
= type
;
4061 if (!strcmp(type
,"SAT") )
4064 // make list of devices
4065 if ((num
=make_device_names(&devlist
,basetype
))<0)
4066 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4068 // if no devices, or error constructing list, return
4072 // loop over entries to create
4073 for (i
=0; i
<num
; i
++){
4075 // make storage and copy for all but first entry
4077 // allocate more storage if needed
4078 while (cfgentries_max
<=start
+i
)
4079 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "simulated configuration file device");
4080 cfg
=cfgentries
[start
+i
]=CreateConfigEntry(first
);
4084 if (!strcmp(type
,"ATA") )
4085 cfg
->controller_type
= CONTROLLER_ATA
;
4086 if (!strcmp(type
,"SCSI") )
4087 cfg
->controller_type
= CONTROLLER_SCSI
;
4088 if (!strcmp(type
,"SAT") )
4089 cfg
->controller_type
= CONTROLLER_SAT
;
4091 // remove device name, if it's there, and put in correct one
4092 cfg
->name
=FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
4093 // save pointer to the device name created within
4094 // make_device_names
4095 cfg
->name
=devlist
[i
];
4098 // If needed, free memory used for devlist: pointers now in
4099 // cfgentries[]->names. If num==0 we never get to this point, but
4100 // that's OK. If we realloc()d the array length in
4101 // make_device_names() that was ALREADY equivalent to calling
4103 devlist
= FreeNonZero(devlist
,(sizeof (char*) * num
),__LINE__
, filenameandversion
);
4108 void CanNotRegister(char *name
, char *type
, int line
, int scandirective
){
4109 if( !debugmode
&& scandirective
== 1 ) { return; }
4111 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4112 "Unable to register %s device %s at line %d of file %s\n",
4113 type
, name
, line
, configfile
);
4115 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4120 // Returns negative value (see ParseConfigFile()) if config file
4121 // had errors, else number of entries which may be zero or positive.
4122 // If we found no configuration file, or it contained SCANDIRECTIVE,
4123 // then *scanning is set to 1, else 0.
4124 int ReadOrMakeConfigEntries(int *scanning
){
4127 // deallocate any cfgfile data structures in memory
4128 RmAllConfigEntries();
4130 // parse configuration file configfile (normally /etc/smartd.conf)
4131 if ((entries
=ParseConfigFile())<0) {
4133 // There was an error reading the configuration file.
4134 RmAllConfigEntries();
4136 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4140 // did we find entries or scan?
4143 // no error parsing config file.
4145 // we did not find a SCANDIRECTIVE and did find valid entries
4146 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4148 else if (cfgentries
&& cfgentries
[0]) {
4149 // we found a SCANDIRECTIVE or there was no configuration file so
4150 // scan. Configuration file's first entry contains all options
4152 cfgfile
*first
=cfgentries
[0];
4154 // By default scan for ATA, SCSI and SAT devices
4155 int doata
=1, doscsi
=1, dosat
=1;
4157 if (first
->controller_type
==CONTROLLER_SCSI
) {
4160 } else if (first
->controller_type
==CONTROLLER_ATA
) {
4163 } else if (first
->controller_type
==CONTROLLER_SAT
) {
4168 // The code in this block has been neutered by D. Gilbert
4169 // on 20070226. smartd can't cope ATA disk behind a SAT
4170 // transport seamlessly _without_ a bigger restructuring
4171 // of smartd than this code tried. It made ATA disks
4172 // behind a SAT interface automatically detected only by
4173 // killing support for real SCSI disks. Sorry, no.
4178 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4180 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4182 // make config list of ATA devices to search for
4184 entries
+=MakeConfigEntries("ATA", entries
);
4185 // make config list of SCSI devices to search for
4187 entries
+=MakeConfigEntries("SCSI", entries
);
4189 entries
+=MakeConfigEntries("SAT", entries
);
4191 // warn user if scan table found no devices
4193 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4194 // get rid of fake entry with SCANDIRECTIVE as name
4195 RmConfigEntry(cfgentries
, __LINE__
);
4199 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4205 // This function tries devices from cfgentries. Each one that can be
4206 // registered is moved onto the [ata|scsi]devices lists and removed
4207 // from the cfgentries list, else it's memory is deallocated.
4208 void RegisterDevices(int scanning
){
4211 // start by clearing lists/memory of ALL existing devices
4213 numdevata
=numdevscsi
=0;
4216 for (i
=0; i
<cfgentries_max
; i
++){
4218 cfgfile
*ent
=cfgentries
[i
];
4220 // skip any NULL entries (holes)
4224 // register ATA devices
4225 if (ent
->controller_type
!=CONTROLLER_SCSI
&& ent
->controller_type
!=CONTROLLER_CCISS
){
4226 if (ATADeviceScan(ent
, scanning
))
4227 CanNotRegister(ent
->name
, "ATA", ent
->lineno
, scanning
);
4229 // move onto the list of ata devices
4231 while (numdevata
>=atadevlist_max
)
4232 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
4233 atadevlist
[numdevata
++]=ent
;
4237 // then register SCSI devices
4238 if (ent
->controller_type
==CONTROLLER_SCSI
|| ent
->controller_type
==CONTROLLER_CCISS
||
4239 ent
->controller_type
==CONTROLLER_UNKNOWN
){
4243 struct sigaction alarmAction
, defaultaction
;
4245 // Set up an alarm handler to catch USB devices that hang on
4247 alarmAction
.sa_handler
= AlarmHandler
;
4248 alarmAction
.sa_flags
= SA_RESTART
;
4249 if (sigaction(SIGALRM
, &alarmAction
, &defaultaction
)) {
4250 // if we can't set timeout, just scan device
4251 PrintOut(LOG_CRIT
, "Unable to initialize SCSI timeout mechanism.\n");
4252 retscsi
=SCSIDeviceScan(ent
, scanning
);
4255 // prepare return point in case of bad SCSI device
4256 if (setjmp(registerscsienv
))
4257 // SCSI device timed out!
4260 // Set alarm, make SCSI call, reset alarm
4262 retscsi
=SCSIDeviceScan(ent
, scanning
);
4265 if (sigaction(SIGALRM
, &defaultaction
, NULL
)){
4266 PrintOut(LOG_CRIT
, "Unable to clear SCSI timeout mechanism.\n");
4270 retscsi
=SCSIDeviceScan(ent
, scanning
);
4273 // Now scan SCSI device...
4276 PrintOut(LOG_CRIT
, "Device %s timed out (poorly-implemented USB device?)\n", ent
->name
);
4277 CanNotRegister(ent
->name
, "SCSI", ent
->lineno
, scanning
);
4280 // move onto the list of scsi devices
4282 while (numdevscsi
>=scsidevlist_max
)
4283 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
4284 scsidevlist
[numdevscsi
++]=ent
;
4288 // if device is explictly listed and we can't register it, then
4289 // exit unless the user has specified that the device is removable
4290 if (cfgentries
[i
] && !scanning
){
4291 if (ent
->removable
|| quit
==2)
4292 PrintOut(LOG_INFO
, "Device %s not available\n", ent
->name
);
4294 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent
->name
);
4299 // free up memory if device could not be registered
4300 RmConfigEntry(cfgentries
+i
, __LINE__
);
4309 int main(int argc
, char **argv
)
4311 // Windows: internal main function started direct or by service control manager
4312 static int smartd_main(int argc
, char **argv
)
4315 // external control variables for ATA disks
4316 smartmonctrl control
;
4318 // is it our first pass through?
4321 // next time to wake up
4324 // for simplicity, null all global communications variables/lists
4326 memset(con
, 0,sizeof(control
));
4328 // parse input and print header and usage info if needed
4329 ParseOpts(argc
,argv
);
4331 // do we mute printing from ataprint commands?
4332 con
->printing_switchable
=0;
4333 con
->dont_print
=debugmode
?0:1;
4335 // don't exit on bad checksums
4336 con
->checksumfail
=0;
4338 // the main loop of the code
4341 // are we exiting from a signal?
4342 if (caughtsigEXIT
) {
4343 // are we exiting with SIGTERM?
4344 int isterm
=(caughtsigEXIT
==SIGTERM
);
4345 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4346 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4348 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4349 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4351 EXIT(isok
?0:EXIT_SIGNAL
);
4354 // Should we (re)read the config file?
4355 if (firstpass
|| caughtsigHUP
){
4356 int entries
, scanning
=0;
4360 // Workaround for missing SIGQUIT via keyboard on Cygwin
4361 if (caughtsigHUP
==2) {
4362 // Simulate SIGQUIT if another SIGINT arrives soon
4365 if (caughtsigHUP
==2) {
4366 caughtsigEXIT
=SIGQUIT
;
4374 "Signal HUP - rereading configuration file %s\n":
4375 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4379 // clears cfgentries, (re)reads config file, makes >=0 entries
4380 entries
=ReadOrMakeConfigEntries(&scanning
);
4383 // checks devices, then moves onto ata/scsi list or deallocates.
4384 RegisterDevices(scanning
);
4386 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4387 // user has asked to continue on error in configuration file
4389 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4392 // exit with configuration file error status
4393 int status
= (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4397 // Log number of devices we are monitoring...
4398 if (numdevata
+numdevscsi
|| quit
==2 || (quit
==1 && !firstpass
))
4399 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4400 numdevata
, numdevscsi
);
4402 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4407 // user has asked to print test schedule
4408 PrintTestSchedule(atadevlist
, scsidevlist
);
4416 // check all devices once,
4417 // self tests are not started in first pass unless '-q onecheck' is specified
4418 CheckDevicesOnce(atadevlist
, scsidevlist
, (!firstpass
|| quit
==3));
4420 // user has asked us to exit after first check
4422 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4423 "smartd is exiting (exit status 0)\n");
4427 // fork into background if needed
4428 if (firstpass
&& !debugmode
) {
4432 // set exit and signal handlers, write PID file, set wake-up time
4434 Initialize(&wakeuptime
);
4438 // sleep until next check time, or a signal arrives
4439 wakeuptime
=dosleep(wakeuptime
);
4445 // Main function for Windows
4446 int main(int argc
, char **argv
){
4447 // Options for smartd windows service
4448 static const daemon_winsvc_options svc_opts
= {
4449 "--service", // cmd_opt
4450 "smartd", "SmartD Service", // servicename, displayname
4452 "Controls and monitors storage devices using the Self-Monitoring, "
4453 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4454 "built into ATA and SCSI Hard Drives. "
4457 // daemon_main() handles daemon and service specific commands
4458 // and starts smartd_main() direct, from a new process,
4459 // or via service control manager
4460 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);