2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-8 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
27 // unconditionally included files
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
49 // see which system files to conditionally include
52 // conditionally included files
53 #ifdef HAVE_GETOPT_LONG
62 #pragma warning(disable:4761) // "conversion supplied"
63 typedef unsigned short mode_t
;
66 #include <io.h> // umask()
67 #include <process.h> // getpid()
72 // BOOL WINAPI FreeConsole(void);
73 extern "C" int __stdcall
FreeConsole(void);
74 #include <io.h> // setmode()
77 // locally included files
82 #include "knowndrives.h"
89 #include "hostname_win32.h" // gethost/domainname()
90 #define HAVE_GETHOSTNAME 1
91 #define HAVE_GETDOMAINNAME 1
92 // fork()/signal()/initd simulation for native Windows
93 #include "daemon_win32.h" // daemon_main/detach/signal()
95 #define SIGNALFN daemon_signal
96 #define strsignal daemon_strsignal
97 #define sleep daemon_sleep
98 #undef EXIT // see utility.h
99 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
100 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
101 #define SIGQUIT SIGBREAK
102 #define SIGQUIT_KEYNAME "CONTROL-Break"
105 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
106 #define SIGQUIT_KEYNAME "2x CONTROL-C"
108 #define SIGQUIT_KEYNAME "CONTROL-\\"
112 #if defined (__SVR4) && defined (__sun)
113 extern "C" int getdomainname(char *, int); // no declaration in header files!
116 #define ARGUSED(x) ((void)(x))
118 // These are CVS identification information for *.cpp and *.h files
119 extern const char *atacmdnames_c_cvsid
, *atacmds_c_cvsid
, *ataprint_c_cvsid
, *escalade_c_cvsid
,
120 *knowndrives_c_cvsid
, *os_XXXX_c_cvsid
, *scsicmds_c_cvsid
, *utility_c_cvsid
;
122 static const char *filenameandversion
="$Id: smartd.cpp,v 1.397 2008/03/04 22:09:47 ballen4705 Exp $";
123 #ifdef NEED_SOLARIS_ATA_CODE
124 extern const char *os_solaris_ata_s_cvsid
;
127 extern const char *daemon_win32_c_cvsid
, *hostname_win32_c_cvsid
, *syslog_win32_c_cvsid
;
129 const char *smartd_c_cvsid
="$Id: smartd.cpp,v 1.397 2008/03/04 22:09:47 ballen4705 Exp $"
130 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
131 #ifdef DAEMON_WIN32_H_CVSID
134 EXTERN_H_CVSID INT64_H_CVSID
135 #ifdef HOSTNAME_WIN32_H_CVSID
136 HOSTNAME_WIN32_H_CVSID
138 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
139 #ifdef SYSLOG_H_CVSID
144 extern const char *reportbug
;
146 // GNU copyleft statement. Needed for GPL purposes.
147 const char *copyleftstring
="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
148 "free software, and you are welcome to redistribute it\n"
149 "under the terms of the GNU General Public License\n"
150 "Version 2. See http://www.gnu.org for further details.\n\n";
152 extern unsigned char debugmode
;
154 // command-line: how long to sleep between checks
155 static int checktime
=CHECKTIME
;
157 // command-line: name of PID file (NULL for no pid file)
158 static char* pid_file
=NULL
;
160 // configuration file name
162 static char* configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
164 static char* configfile
= "./" CONFIGFILENAME
;
166 // configuration file "name" if read from stdin
167 static /*const*/ char * const configfile_stdin
= "<stdin>";
168 // allocated memory for alternate configuration file name
169 static char* configfile_alt
= NULL
;
171 // command-line: when should we exit?
174 // command-line; this is the default syslog(3) log facility to use.
175 static int facility
=LOG_DAEMON
;
178 // command-line: fork into background?
179 static bool do_fork
=true;
182 // used for control of printing, passing arguments to atacmds.c
183 smartmonctrl
*con
=NULL
;
185 // pointers to (real or simulated) entries in configuration file, and
186 // maximum space currently allocated for these entries.
187 cfgfile
**cfgentries
=NULL
;
188 int cfgentries_max
=0;
190 // pointers to ATA and SCSI devices being monitored, maximum and
192 cfgfile
**ATAandSCSIdevlist
=NULL
;
193 int ATAandSCSIdevlist_max
=0;
194 int numdevata
=0, numdevscsi
=0;
196 // track memory usage
197 extern int64_t bytes
;
200 extern int exitstatus
;
202 // set to one if we catch a USR1 (check devices now)
203 volatile int caughtsigUSR1
=0;
206 // set to one if we catch a USR2 (toggle debug mode)
207 volatile int caughtsigUSR2
=0;
210 // set to one if we catch a HUP (reload config file). In debug mode,
211 // set to two, if we catch INT (also reload config file).
212 volatile int caughtsigHUP
=0;
214 // set to signal value if we catch INT, QUIT, or TERM
215 volatile int caughtsigEXIT
=0;
218 // stack environment if we time out during SCSI access (USB devices)
219 jmp_buf registerscsienv
;
222 // tranlate cfg->pending into the correct Attribute numbers
223 void TranslatePending(unsigned short pending
, unsigned char *current
, unsigned char *offline
) {
225 unsigned char curr
= CURR_PEND(pending
);
226 unsigned char off
= OFF_PEND(pending
);
228 // look for special value of CUR_UNC_DEFAULT that means DONT
229 // monitor. 0 means DO test.
230 if (curr
==CUR_UNC_DEFAULT
)
233 curr
=CUR_UNC_DEFAULT
;
235 // look for special value of OFF_UNC_DEFAULT that means DONT
236 // monitor. 0 means DO TEST.
237 if (off
==OFF_UNC_DEFAULT
)
249 // free all memory associated with selftest part of configfile entry. Return NULL
250 testinfo
* FreeTestData(testinfo
*data
){
252 // make sure we have something to do.
256 // free space for text pattern
257 data
->regex
=FreeNonZero(data
->regex
, -1, __LINE__
, filenameandversion
);
259 // free compiled expression
260 regfree(&(data
->cregex
));
262 // make sure that no sign of the compiled expression is left behind
263 // (just in case, to help detect bugs if we ever try and refer to
265 memset(&(data
->cregex
), '0', sizeof(regex_t
));
267 // free remaining memory space
268 data
=FreeNonZero(data
, sizeof(testinfo
), __LINE__
, filenameandversion
);
273 cfgfile
**AllocateMoreSpace(cfgfile
**oldarray
, int *oldsize
, char *listname
){
274 // for now keep BLOCKSIZE small to help detect coding problems.
275 // Perhaps increase in the future.
276 const int BLOCKSIZE
=8;
279 int news
= olds
+ BLOCKSIZE
;
280 cfgfile
**newptr
=(cfgfile
**)realloc(oldarray
, news
*sizeof(cfgfile
*));
282 // did we get more space?
285 // clear remaining entries ala calloc()
286 for (i
=olds
; i
<news
; i
++)
289 bytes
+= BLOCKSIZE
*sizeof(cfgfile
*);
294 PrintOut(LOG_INFO
, "allocating %d slots for %s\n", BLOCKSIZE
, listname
);
300 PrintOut(LOG_CRIT
, "out of memory for allocating %s list\n", listname
);
304 void PrintOneCVS(const char *a_cvs_id
){
306 printone(out
,a_cvs_id
);
307 PrintOut(LOG_INFO
,"%s",out
);
311 // prints CVS identity information for the executable
313 const char *configargs
=strlen(SMARTMONTOOLS_CONFIGURE_ARGS
)?SMARTMONTOOLS_CONFIGURE_ARGS
:"[no arguments given]";
315 PrintOut(LOG_INFO
,(char *)copyleftstring
);
316 PrintOut(LOG_INFO
,"CVS version IDs of files used to build this code are:\n");
317 PrintOneCVS(atacmdnames_c_cvsid
);
318 PrintOneCVS(atacmds_c_cvsid
);
319 PrintOneCVS(ataprint_c_cvsid
);
321 PrintOneCVS(daemon_win32_c_cvsid
);
324 PrintOneCVS(hostname_win32_c_cvsid
);
326 PrintOneCVS(knowndrives_c_cvsid
);
327 PrintOneCVS(os_XXXX_c_cvsid
);
328 #ifdef NEED_SOLARIS_ATA_CODE
329 PrintOneCVS( os_solaris_ata_s_cvsid
);
331 PrintOneCVS(scsicmds_c_cvsid
);
332 PrintOneCVS(smartd_c_cvsid
);
334 PrintOneCVS(syslog_win32_c_cvsid
);
336 PrintOneCVS(utility_c_cvsid
);
337 PrintOut(LOG_INFO
, "\nsmartmontools release " PACKAGE_VERSION
" dated " SMARTMONTOOLS_RELEASE_DATE
" at " SMARTMONTOOLS_RELEASE_TIME
"\n");
338 PrintOut(LOG_INFO
, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST
"\n");
339 PrintOut(LOG_INFO
, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE
"\n");
340 PrintOut(LOG_INFO
, "smartd compile dated " __DATE__
" at "__TIME__
"\n");
341 PrintOut(LOG_INFO
, "smartmontools configure arguments: %s\n", configargs
);
345 // Removes config file entry, freeing all memory
346 void RmConfigEntry(cfgfile
**anentry
, int whatline
){
350 // pointer should never be null!
352 PrintOut(LOG_CRIT
,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
353 whatline
, filenameandversion
, reportbug
);
357 // only remove entries that exist!
361 // entry exists -- free all of its memory
362 cfg
->name
= FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
363 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
364 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
365 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
366 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
368 cfg
->mailwarn
->address
= FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
369 cfg
->mailwarn
->emailcmdline
= FreeNonZero(cfg
->mailwarn
->emailcmdline
, -1,__LINE__
,filenameandversion
);
370 cfg
->mailwarn
= FreeNonZero(cfg
->mailwarn
, sizeof(maildata
),__LINE__
,filenameandversion
);
372 cfg
->testdata
= FreeTestData(cfg
->testdata
);
373 *anentry
= FreeNonZero(cfg
, sizeof(cfgfile
),__LINE__
,filenameandversion
);
378 // deallocates all memory associated with cfgentries list
379 void RmAllConfigEntries(){
382 for (i
=0; i
<cfgentries_max
; i
++)
383 RmConfigEntry(cfgentries
+i
, __LINE__
);
385 cfgentries
=FreeNonZero(cfgentries
, sizeof(cfgfile
*)*cfgentries_max
, __LINE__
, filenameandversion
);
391 // deallocates all memory associated with ATA/SCSI device lists
392 void RmAllDevEntries(){
395 for (i
=0; i
<ATAandSCSIdevlist_max
; i
++)
396 RmConfigEntry(ATAandSCSIdevlist
+i
, __LINE__
);
398 ATAandSCSIdevlist
=FreeNonZero(ATAandSCSIdevlist
, sizeof(cfgfile
*)*ATAandSCSIdevlist_max
, __LINE__
, filenameandversion
);
399 ATAandSCSIdevlist_max
=0;
404 // remove the PID file
405 void RemovePidFile(){
407 if ( -1==unlink(pid_file
) )
408 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
409 pid_file
, strerror(errno
));
410 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
416 // Note if we catch a SIGUSR1
417 void USR1handler(int sig
){
424 // Note if we catch a SIGUSR2
425 void USR2handler(int sig
){
432 // Note if we catch a HUP (or INT in debug mode)
433 void HUPhandler(int sig
){
441 // signal handler for TERM, QUIT, and INT (if not in debug mode)
442 void sighandler(int sig
){
449 // signal handler that prints Goodbye message and removes pidfile
452 // clean up memory -- useful for debugging
453 RmAllConfigEntries();
456 // delete PID file, if one was created
459 // remove alternate configfile name
460 configfile_alt
=FreeNonZero(configfile_alt
, -1,__LINE__
,filenameandversion
);
462 // useful for debugging -- have we managed memory correctly?
463 if (debugmode
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
464 PrintOut(LOG_INFO
, "Memory still allocated for devices at exit is %" PRId64
" bytes.\n", bytes
);
466 // if we are exiting because of a code bug, tell user
467 if (exitstatus
==EXIT_BADCODE
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
468 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
470 if (exitstatus
==0 && bytes
)
471 exitstatus
=EXIT_BADCODE
;
473 // and this should be the final output from smartd before it exits
474 PrintOut(exitstatus
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", exitstatus
);
479 #define ENVLENGTH 1024
481 // a replacement for setenv() which is not available on all platforms.
482 // Note that the string passed to putenv must not be freed or made
483 // invalid, since a pointer to it is kept by putenv(). This means that
484 // it must either be a static buffer or allocated off the heap. The
485 // string can be freed if the environment variable is redefined or
486 // deleted via another call to putenv(). So we keep these on the stack
487 // as long as the popen() call is underway.
488 int exportenv(char* stackspace
, const char *name
, const char *value
){
489 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
490 return putenv(stackspace
);
493 char* dnsdomain(const char* hostname
) {
495 #ifdef HAVE_GETHOSTBYNAME
498 if ((hp
= gethostbyname(hostname
))) {
499 // Does this work if gethostbyname() returns an IPv6 name in
500 // colon/dot notation? [BA]
501 if ((p
= strchr(hp
->h_name
, '.')))
512 // If either address or executable path is non-null then send and log
513 // a warning email, or execute executable
514 void MailWarning(cfgfile
*cfg
, int which
, char *fmt
, ...){
515 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
516 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
517 char environ_strings
[11][ENVLENGTH
];
520 const int day
=24*3600;
528 "FailedHealthCheck", // 5
529 "FailedReadSmartData", // 6
530 "FailedReadSmartErrorLog", // 7
531 "FailedReadSmartSelfTestLog", // 8
532 "FailedOpenDevice", // 9
533 "CurrentPendingSector", // 10
534 "OfflineUncorrectableSector", // 11
538 char *address
, *executable
;
540 maildata
* data
=cfg
->mailwarn
;
544 char stdinbuf
[1024]; int boxmsgoffs
, boxtype
;
546 const char *newadd
=NULL
, *newwarn
=NULL
;
547 const char *unknown
="[Unknown]";
549 // See if user wants us to send mail
553 address
=data
->address
;
554 executable
=data
->emailcmdline
;
556 if (!address
&& !executable
)
559 // which type of mail are we sending?
560 mail
=(data
->maillog
)+which
;
563 if (data
->emailfreq
<1 || data
->emailfreq
>3) {
564 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data
->emailfreq
);
567 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
568 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
569 which
, (int)sizeof(whichfail
));
573 // Return if a single warning mail has been sent.
574 if ((data
->emailfreq
==1) && mail
->logged
)
577 // Return if this is an email test and one has already been sent.
578 if (which
== 0 && mail
->logged
)
581 // To decide if to send mail, we need to know what time it is.
584 // Return if less than one day has gone by
585 if (data
->emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
588 // Return if less than 2^(logged-1) days have gone by
589 if (data
->emailfreq
==3 && mail
->logged
){
590 days
=0x01<<(mail
->logged
-1);
592 if (epoch
<(mail
->lastsent
+days
))
596 // record the time of this mail message, and the first mail message
598 mail
->firstsent
=epoch
;
599 mail
->lastsent
=epoch
;
601 // get system host & domain names (not null terminated if length=MAX)
602 #ifdef HAVE_GETHOSTNAME
603 if (gethostname(hostname
, 256))
604 strcpy(hostname
, unknown
);
608 p
= dnsdomain(hostname
);
610 strncpy(domainname
, p
, 255);
611 domainname
[255]='\0';
613 strcpy(domainname
, unknown
);
616 strcpy(hostname
, unknown
);
617 strcpy(domainname
, unknown
);
620 #ifdef HAVE_GETDOMAINNAME
621 if (getdomainname(nisdomain
, 256))
622 strcpy(nisdomain
, unknown
);
626 strcpy(nisdomain
, unknown
);
629 // print warning string into message
631 vsnprintf(message
, 256, fmt
, ap
);
634 // appropriate message about further information
635 additional
[0]=original
[0]=further
[0]='\0';
637 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
639 switch (data
->emailfreq
){
641 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
644 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
647 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
648 (0x01)<<mail
->logged
);
651 if (data
->emailfreq
>1 && mail
->logged
){
652 dateandtimezoneepoch(dates
, mail
->firstsent
);
653 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
657 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
659 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
661 #ifdef DEFAULT_MAILER
662 executable
= DEFAULT_MAILER
;
667 executable
= "blat"; // http://blat.sourceforge.net/
671 // make a private copy of address with commas replaced by spaces
672 // to separate recipients
674 address
=CustomStrDup(data
->address
, 1, __LINE__
, filenameandversion
);
675 #ifndef _WIN32 // blat mailer needs comma
678 while ((comma
=strchr(comma
, ',')))
684 // Export information in environment variables that will be useful
686 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
687 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
688 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
689 dateandtimezoneepoch(dates
, mail
->firstsent
);
690 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
691 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
692 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
693 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
695 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
);
696 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
->name
);
698 switch (cfg
->controller_type
) {
699 case CONTROLLER_3WARE_678K
:
700 case CONTROLLER_3WARE_9000_CHAR
:
701 case CONTROLLER_3WARE_678K_CHAR
:
703 char *s
,devicetype
[16];
704 sprintf(devicetype
, "3ware,%d", cfg
->controller_port
-1);
705 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
706 if ((s
=strchr(cfg
->name
, ' ')))
708 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
713 case CONTROLLER_CCISS
:
715 char *s
,devicetype
[16];
716 sprintf(devicetype
, "cciss,%d", cfg
->controller_port
-1);
717 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
718 if ((s
=strchr(cfg
->name
, ' ')))
720 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
726 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "ata");
727 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
729 case CONTROLLER_MARVELL_SATA
:
730 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "marvell");
731 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
733 case CONTROLLER_SCSI
:
734 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "scsi");
735 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
738 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "sat");
739 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
743 char *s
,devicetype
[16];
744 sprintf(devicetype
, "hpt,%d/%d/%d", cfg
->hpt_data
[0],
745 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
746 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
747 if ((s
=strchr(cfg
->name
, ' ')))
749 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
756 snprintf(fullmessage
, 1024,
757 "This email was generated by the smartd daemon running on:\n\n"
760 " NIS domain: %s\n\n"
761 "The following warning/error was logged by the smartd daemon:\n\n"
763 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
765 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
766 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
768 // now construct a command to send this as EMAIL
771 snprintf(command
, 2048,
772 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
773 "%sENDMAIL\n", subject
, address
, fullmessage
);
775 snprintf(command
, 2048, "%s 2>&1", executable
);
777 // tell SYSLOG what we are about to do...
778 newadd
=address
?address
:"<nomailer>";
779 newwarn
=which
?"Warning via":"Test of";
781 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
782 which
?"Sending warning via":"Executing test of", executable
, newadd
);
784 // issue the command to send mail or to run the user's executable
786 if (!(pfp
=popen(command
, "r")))
787 // failed to popen() mail process
788 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
789 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
793 char buffer
[EBUFLEN
];
795 // if unexpected output on stdout/stderr, null terminate, print, and flush
796 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
798 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
800 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
801 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
803 // flush pipe if needed
804 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
807 // tell user that pipe was flushed, or that something is really wrong
808 if (count
&& count
<EBUFLEN
)
809 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
810 newwarn
, executable
, newadd
);
812 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
813 newwarn
, executable
, newadd
);
816 // if something went wrong with mail process, print warning
818 if (-1==(status
=pclose(pfp
)))
819 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
820 errno
?strerror(errno
):"");
822 // mail process apparently succeeded. Check and report exit status
825 if (WIFEXITED(status
)) {
826 // exited 'normally' (but perhaps with nonzero status)
827 status8
=WEXITSTATUS(status
);
830 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
831 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
833 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
834 newwarn
, executable
, newadd
, status
, status8
);
836 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
839 if (WIFSIGNALED(status
))
840 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
841 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
843 // this branch is probably not possible. If subprocess is
844 // stopped then pclose() should not return.
845 if (WIFSTOPPED(status
))
846 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
847 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
854 // No "here-documents" on Windows, so must use separate commandline and stdin
855 command
[0] = stdinbuf
[0] = 0;
856 boxtype
= -1; boxmsgoffs
= 0;
857 newadd
= "<nomailer>";
859 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
860 int addroffs
= (!strncmp(address
, "sys", 3) ? 3 : 0);
861 if (!strncmp(address
+addroffs
, "msgbox", 6) && (!address
[addroffs
+6] || address
[addroffs
+6] == ',')) {
862 boxtype
= (addroffs
> 0 ? 1 : 0);
864 if (address
[addroffs
])
870 if (address
[addroffs
]) {
871 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
872 snprintf(command
, sizeof(command
),
873 "%s - -q -subject \"%s\" -to \"%s\"",
874 executable
, subject
, address
+addroffs
);
875 newadd
= address
+addroffs
;
877 // Message for mail [0...] and messagebox [boxmsgoffs...]
878 snprintf(stdinbuf
, sizeof(stdinbuf
),
879 "This email was generated by the smartd daemon running on:\n\n"
882 // " NIS domain: %s\n"
884 "The following warning/error was logged by the smartd daemon:\n\n"
886 "For details see the event log or log file of smartd.\n\n"
889 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
892 snprintf(command
, sizeof(command
), "%s", executable
);
894 newwarn
=which
?"Warning via":"Test of";
897 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
898 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
901 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
904 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
905 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
906 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
907 if (rc
>= 0 && stdoutbuf
[0])
908 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
909 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
911 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
912 newwarn
, executable
, newadd
, rc
);
914 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
919 // increment mail sent counter
922 // free copy of address (without commas)
923 address
=FreeNonZero(address
, -1, __LINE__
, filenameandversion
);
928 // Printing function for watching ataprint commands, or losing them
929 // [From GLIBC Manual: Since the prototype doesn't specify types for
930 // optional arguments, in a call to a variadic function the default
931 // argument promotions are performed on the optional argument
932 // values. This means the objects of type char or short int (whether
933 // signed or not) are promoted to either int or unsigned int, as
935 void pout(const char *fmt
, ...){
938 // get the correct time in syslog()
939 FixGlibcTimeZoneBug();
940 // initialize variable argument list
942 // in debug==1 mode we will print the output from the ataprint.o functions!
943 if (debugmode
&& debugmode
!=2)
945 if (facility
== LOG_LOCAL1
) // logging to stdout
946 vfprintf(stderr
,fmt
,ap
);
950 // in debug==2 mode we print output from knowndrives.o functions
951 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
|| con
->controller_port
) {
952 openlog("smartd", LOG_PID
, facility
);
953 vsyslog(LOG_INFO
, fmt
, ap
);
961 // This function prints either to stdout or to the syslog as needed.
962 // This function is also used by utility.cpp to report LOG_CRIT errors.
963 void PrintOut(int priority
, const char *fmt
, ...){
966 // get the correct time in syslog()
967 FixGlibcTimeZoneBug();
968 // initialize variable argument list
972 if (facility
== LOG_LOCAL1
) // logging to stdout
973 vfprintf(stderr
,fmt
,ap
);
978 openlog("smartd", LOG_PID
, facility
);
979 vsyslog(priority
,fmt
,ap
);
987 // Wait for the pid file to show up, this makes sure a calling program knows
988 // that the daemon is really up and running and has a pid to kill it
989 bool WaitForPidFile()
991 int waited
, max_wait
= 10;
992 struct stat stat_buf
;
994 if(!pid_file
|| debugmode
)
997 for(waited
= 0; waited
< max_wait
; ++waited
) {
998 if(stat(pid_file
, &stat_buf
) == 0) {
1007 // Forks new process, closes ALL file descriptors, redirects stdin,
1008 // stdout, and stderr. Not quite daemon(). See
1009 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
1010 // for a good description of why we do things this way.
1016 // flush all buffered streams. Else we might get two copies of open
1017 // streams since both parent and child get copies of the buffers.
1021 if ((pid
=fork()) < 0) {
1023 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1027 // we are the parent process, wait for pid file, then exit cleanly
1028 if(!WaitForPidFile()) {
1029 PrintOut(LOG_CRIT
,"PID file %s didn't show up!\n", pid_file
);
1034 // from here on, we are the child process.
1037 // Fork one more time to avoid any possibility of having terminals
1038 if ((pid
=fork()) < 0) {
1040 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1044 // we are the parent process -- exit cleanly
1047 // Now we are the child's child...
1050 // close any open file descriptors
1051 for (i
=getdtablesize();i
>=0;--i
)
1055 // Cygwin's setsid() does not detach the process from Windows console
1057 #endif // __CYGWIN__
1059 // redirect any IO attempts to /dev/null for stdin
1060 i
=open("/dev/null",O_RDWR
);
1069 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1073 // No fork() on native Win32
1074 // Detach this process from console
1076 if (daemon_detach("smartd")) {
1077 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1080 // stdin/out/err now closed if not redirected
1086 // create a PID file containing the current process id
1087 void WritePidFile() {
1090 pid_t pid
= getpid();
1095 old_umask
= umask(0077); // rwx------
1097 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1098 old_umask
= umask(0033); // rwxr--r--
1100 fp
= fopen(pid_file
, "w");
1104 } else if (fprintf(fp
, "%d\n", (int)pid
) <= 0) {
1106 } else if (fclose(fp
) != 0) {
1110 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
);
1113 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
, (int)pid
);
1118 // Prints header identifying version of code and home
1120 #ifdef HAVE_GET_OS_VERSION_STR
1121 const char * ver
= get_os_version_str();
1123 const char * ver
= SMARTMONTOOLS_BUILD_HOST
;
1125 PrintOut(LOG_INFO
,"smartd version %s [%s] Copyright (C) 2002-8 Bruce Allen\n", PACKAGE_VERSION
, ver
);
1126 PrintOut(LOG_INFO
,"Home page is " PACKAGE_HOMEPAGE
"\n\n");
1130 // prints help info for configuration file Directives
1133 "Configuration file (%s) Directives (after device name):\n"
1134 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N, cciss,N\n"
1135 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1136 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1137 " -S VAL Enable/disable attribute autosave (on/off)\n"
1138 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1139 " -H Monitor SMART Health Status, report if failed\n"
1140 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1141 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1142 " -f Monitor 'Usage' Attributes, report failures\n"
1143 " -m ADD Send email warning to address ADD\n"
1144 " -M TYPE Modify email warning behavior (see man page)\n"
1145 " -p Report changes in 'Prefailure' Attributes\n"
1146 " -u Report changes in 'Usage' Attributes\n"
1147 " -t Equivalent to -p and -u Directives\n"
1148 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1149 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1150 " -i ID Ignore Attribute ID for -f Directive\n"
1151 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1152 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1153 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1154 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1155 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1156 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1157 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1158 " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1159 " # Comment: text after a hash sign is ignored\n"
1160 " \\ Line continuation character\n"
1161 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1162 "Use ID = 0 to turn off -C and/or -U Directives\n"
1163 "Example: /dev/hda -a\n",
1168 /* Returns a pointer to a static string containing a formatted list of the valid
1169 arguments to the option opt or NULL on failure. */
1170 const char *GetValidArgList(char opt
) {
1173 return "<FILE_NAME>, -";
1175 return "valid_regular_expression";
1177 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1179 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1181 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1183 return "<FILE_NAME>";
1185 return "<INTEGER_SECONDS>";
1191 /* prints help information for command syntax */
1193 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1194 #ifdef HAVE_GETOPT_LONG
1195 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1196 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1197 PrintOut(LOG_INFO
," -d, --debug\n");
1198 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1199 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1200 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1201 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1202 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1203 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1204 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1205 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1207 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1209 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1212 PrintOut(LOG_INFO
," -n, --no-fork\n");
1213 PrintOut(LOG_INFO
," Do not fork into background\n\n");
1215 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1216 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1217 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1218 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1219 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1220 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1222 PrintOut(LOG_INFO
," --service\n");
1223 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1224 PrintOut(LOG_INFO
," smartd install [options]\n");
1225 PrintOut(LOG_INFO
," Remove service with:\n");
1226 PrintOut(LOG_INFO
," smartd remove\n\n");
1228 #endif // _WIN32 || __CYGWIN__
1229 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1230 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1232 PrintOut(LOG_INFO
," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile
);
1233 PrintOut(LOG_INFO
," -d Start smartd in debug mode\n");
1234 PrintOut(LOG_INFO
," -D Print the configuration file Directives and exit\n");
1235 PrintOut(LOG_INFO
," -h Display this help and exit\n");
1236 PrintOut(LOG_INFO
," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1237 PrintOut(LOG_INFO
," -l local? Use syslog facility local0 - local7, or daemon\n");
1238 PrintOut(LOG_INFO
," -n Do not fork into background\n");
1239 PrintOut(LOG_INFO
," -p NAME Write PID file NAME\n");
1240 PrintOut(LOG_INFO
," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1241 PrintOut(LOG_INFO
," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1242 PrintOut(LOG_INFO
," -V Print License, Copyright, and version information\n");
1246 // returns negative if problem, else fd>=0
1247 static int OpenDevice(char *device
, char *mode
, int scanning
) {
1251 // If there is an ASCII "space" character in the device name,
1252 // terminate string there. This is for 3ware and highpoint devices only.
1253 if ((s
=strchr(device
,' ')))
1257 fd
= deviceopen(device
, mode
);
1259 // if we removed a space, put it back in please
1263 // if we failed to open the device, complain!
1266 // For linux+devfs, a nonexistent device gives a strange error
1267 // message. This makes the error message a bit more sensible.
1268 // If no debug and scanning - don't print errors
1269 if (debugmode
|| !scanning
) {
1270 if (errno
==ENOENT
|| errno
==ENOTDIR
)
1273 PrintOut(LOG_INFO
,"Device: %s, %s, open() failed\n",
1274 device
, strerror(errno
));
1278 // device opened sucessfully
1282 int CloseDevice(int fd
, char *name
){
1283 if (deviceclose(fd
)){
1284 PrintOut(LOG_INFO
,"Device: %s, %s, close(%d) failed\n", name
, strerror(errno
), fd
);
1287 // device sucessfully closed
1291 // returns <0 on failure
1292 int ATAErrorCount(int fd
, char *name
){
1293 struct ata_smart_errorlog log
;
1295 if (-1==ataReadErrorLog(fd
,&log
)){
1296 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1300 // return current number of ATA errors
1301 return log
.error_log_pointer
?log
.ata_error_count
:0;
1304 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1305 // error count, and top bits are the power-on hours of the last error.
1306 int SelfTestErrorCount(int fd
, char *name
){
1307 struct ata_smart_selftestlog log
;
1309 if (-1==ataReadSelfTestLog(fd
,&log
)){
1310 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1314 // return current number of self-test errors
1315 return ataPrintSmartSelfTestlog(&log
,0);
1318 // scan to see what ata devices there are, and if they support SMART
1319 int ATADeviceScan(cfgfile
*cfg
, int scanning
){
1320 int fd
, supported
=0;
1321 struct ata_identify_device drive
;
1322 char *name
=cfg
->name
;
1323 int retainsmartdata
=0;
1327 // should we try to register this as an ATA device?
1328 switch (cfg
->controller_type
) {
1329 case CONTROLLER_ATA
:
1330 case CONTROLLER_3WARE_678K
:
1331 case CONTROLLER_MARVELL_SATA
:
1332 case CONTROLLER_HPT
:
1333 case CONTROLLER_UNKNOWN
:
1336 case CONTROLLER_3WARE_678K_CHAR
:
1337 mode
="ATA_3WARE_678K";
1339 case CONTROLLER_3WARE_9000_CHAR
:
1340 mode
="ATA_3WARE_9000";
1342 case CONTROLLER_SAT
:
1346 // not a recognized ATA or SATA device. We should never enter
1352 if ((fd
=OpenDevice(name
, mode
, scanning
))<0)
1353 // device open failed
1355 PrintOut(LOG_INFO
,"Device: %s, opened\n", name
);
1357 // pass user settings on to low-level ATA commands
1358 con
->controller_port
=cfg
->controller_port
;
1359 con
->hpt_data
[0]=cfg
->hpt_data
[0];
1360 con
->hpt_data
[1]=cfg
->hpt_data
[1];
1361 con
->hpt_data
[2]=cfg
->hpt_data
[2];
1362 con
->controller_type
=cfg
->controller_type
;
1363 con
->controller_explicit
=cfg
->controller_explicit
;
1364 con
->fixfirmwarebug
= cfg
->fixfirmwarebug
;
1365 con
->satpassthrulen
= cfg
->satpassthrulen
;
1367 // Get drive identity structure
1368 if ((retid
=ataReadHDIdentity (fd
,&drive
))){
1370 // Unable to read Identity structure
1371 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1373 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1374 name
, packetdevicetype(retid
-1));
1375 CloseDevice(fd
, name
);
1379 // Show if device in database, and use preset vendor attribute
1380 // options unless user has requested otherwise.
1381 if (cfg
->ignorepresets
)
1382 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1384 // do whatever applypresets decides to do. Will allocate memory if
1385 // cfg->attributedefs is needed.
1386 if (applypresets(&drive
, &cfg
->attributedefs
, con
)<0)
1387 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1389 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1391 // then save the correct state of the flag (applypresets may have changed it)
1392 cfg
->fixfirmwarebug
= con
->fixfirmwarebug
;
1395 // If requested, show which presets would be used for this drive
1396 if (cfg
->showpresets
) {
1397 int savedebugmode
=debugmode
;
1398 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1401 showpresets(&drive
);
1402 debugmode
=savedebugmode
;
1405 // see if drive supports SMART
1406 supported
=ataSmartSupport(&drive
);
1409 // drive does NOT support SMART
1410 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1412 // can't tell if drive supports SMART
1413 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1415 // should we proceed anyway?
1416 if (cfg
->permissive
){
1417 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1420 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1421 CloseDevice(fd
, name
);
1426 if (ataEnableSmart(fd
)){
1427 // Enable SMART command has failed
1428 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1429 CloseDevice(fd
, name
);
1433 // disable device attribute autosave...
1434 if (cfg
->autosave
==1){
1435 if (ataDisableAutoSave(fd
))
1436 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1438 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1441 // or enable device attribute autosave
1442 if (cfg
->autosave
==2){
1443 if (ataEnableAutoSave(fd
))
1444 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1446 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1449 // capability check: SMART status
1450 if (cfg
->smartcheck
&& ataSmartStatus2(fd
)==-1){
1451 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1455 // capability check: Read smart values and thresholds. Note that
1456 // smart values are ALSO needed even if we ONLY want to know if the
1457 // device is self-test log or error-log capable! After ATA-5, this
1458 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1459 // but sadly not for ATA-5. Sigh.
1461 // do we need to retain SMART data after returning from this routine?
1462 retainsmartdata
=cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
;
1464 // do we need to get SMART data?
1465 if (retainsmartdata
|| cfg
->autoofflinetest
|| cfg
->selftest
|| cfg
->errorlog
|| cfg
->pending
!=DONT_MONITOR_UNC
) {
1467 unsigned char currentpending
, offlinepending
;
1469 cfg
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
));
1470 cfg
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
));
1472 if (!cfg
->smartval
|| !cfg
->smartthres
){
1473 PrintOut(LOG_CRIT
,"Not enough memory to obtain SMART data\n");
1477 if (ataReadSmartValues(fd
,cfg
->smartval
) ||
1478 ataReadSmartThresholds (fd
,cfg
->smartthres
)){
1479 PrintOut(LOG_INFO
,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name
);
1480 retainsmartdata
=cfg
->usagefailed
=cfg
->prefail
=cfg
->usage
=0;
1481 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1482 cfg
->pending
=DONT_MONITOR_UNC
;
1485 // see if the necessary Attribute is there to monitor offline or
1486 // current pending sectors or temperature
1487 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
1489 if (currentpending
&& ATAReturnAttributeRawValue(currentpending
, cfg
->smartval
)<0) {
1490 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1491 name
, (int)currentpending
);
1492 cfg
->pending
&= 0xff00;
1493 cfg
->pending
|= CUR_UNC_DEFAULT
;
1496 if (offlinepending
&& ATAReturnAttributeRawValue(offlinepending
, cfg
->smartval
)<0) {
1497 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1498 name
, (int)offlinepending
);
1499 cfg
->pending
&= 0x00ff;
1500 cfg
->pending
|= OFF_UNC_DEFAULT
<<8;
1503 if ( (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
1504 && !ATAReturnTemperatureValue(cfg
->smartval
, cfg
->attributedefs
)) {
1505 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1506 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1510 // enable/disable automatic on-line testing
1511 if (cfg
->autoofflinetest
){
1512 // is this an enable or disable request?
1513 const char *what
=(cfg
->autoofflinetest
==1)?"disable":"enable";
1515 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1517 // if command appears unsupported, issue a warning...
1518 if (!isSupportAutomaticTimer(cfg
->smartval
))
1519 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1520 // ... but then try anyway
1521 if ((cfg
->autoofflinetest
==1)?ataDisableAutoOffline(fd
):ataEnableAutoOffline(fd
))
1522 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1524 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1528 // capability check: self-test-log
1532 // start with service disabled, and re-enable it if all works OK
1534 cfg
->selflogcount
=0;
1538 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1539 else if (!cfg
->permissive
&& !isSmartTestLogCapable(cfg
->smartval
, &drive
))
1540 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1541 else if ((retval
=SelfTestErrorCount(fd
, name
))<0)
1542 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1545 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1546 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1550 // capability check: ATA error log
1554 // start with service disabled, and re-enable it if all works OK
1556 cfg
->ataerrorcount
=0;
1559 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1560 else if (!cfg
->permissive
&& !isSmartErrorLogCapable(cfg
->smartval
, &drive
))
1561 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1562 else if ((val
=ATAErrorCount(fd
, name
))<0)
1563 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1566 cfg
->ataerrorcount
=val
;
1570 // If we don't need to save SMART data, get rid of it now
1571 if (!retainsmartdata
) {
1572 if (cfg
->smartval
) {
1573 cfg
->smartval
=CheckFree(cfg
->smartval
, __LINE__
,filenameandversion
);
1574 bytes
-=sizeof(struct ata_smart_values
);
1576 if (cfg
->smartthres
) {
1577 cfg
->smartthres
=CheckFree(cfg
->smartthres
, __LINE__
,filenameandversion
);
1578 bytes
-=sizeof(struct ata_smart_thresholds_pvt
);
1582 // capabilities check -- does it support powermode?
1583 if (cfg
->powermode
) {
1584 int powermode
=ataCheckPowerMode(fd
);
1586 if (-1 == powermode
) {
1587 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1590 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1591 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1597 // If no tests available or selected, return
1598 if (!(cfg
->errorlog
|| cfg
->selftest
|| cfg
->smartcheck
||
1599 cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
||
1600 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
1601 CloseDevice(fd
, name
);
1605 // Do we still have entries available?
1606 while (numdevata
+numdevscsi
>=ATAandSCSIdevlist_max
)
1607 ATAandSCSIdevlist
=AllocateMoreSpace(ATAandSCSIdevlist
, &ATAandSCSIdevlist_max
, "ATA and SCSI devices");
1610 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1612 // record number of device, type of device, increment device count
1613 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1614 cfg
->controller_type
=CONTROLLER_ATA
;
1616 // close file descriptor
1617 CloseDevice(fd
, name
);
1621 // Returns 0 if normal SCSI device.
1622 // Returns -1 if INQUIRY fails.
1623 // Returns 2 if ATA device detected behind SAT layer.
1624 // Returns 3 if ATA device detected behind Marvell controller.
1625 // Returns 1 if other device detected that we don't want to treat
1626 // as a normal SCSI device.
1627 static int SCSIFilterKnown(int fd
, char * device
)
1630 int req_len
, avail_len
, len
;
1632 memset(req_buff
, 0, 96);
1634 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1635 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1636 /* watch this spot ... other devices could lock up here */
1638 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1639 PrintOut(LOG_INFO
, "Device: %s, failed on INQUIRY; skip device\n", device
);
1640 // device doesn't like INQUIRY commands
1641 return SCSIFK_FAILED
;
1644 avail_len
= req_buff
[4] + 5;
1645 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1647 if (0 == strncmp(req_buff
+ 8, "3ware", 5) || 0 == strncmp(req_buff
+ 8, "AMCC", 4) ) {
1648 PrintOut(LOG_INFO
, "Device %s, please try adding '-d 3ware,N'\n", device
);
1649 PrintOut(LOG_INFO
, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device
, device
);
1650 return SCSIFK_3WARE
;
1651 } else if ((len
>= 42) && (0 == strncmp(req_buff
+ 36, "MVSATA", 6))) {
1652 PrintOut(LOG_INFO
, "Device %s: using '-d marvell' for ATA disk with Marvell driver\n", device
);
1653 return SCSIFK_MARVELL
;
1654 } else if ((avail_len
>= 36) &&
1655 (0 == strncmp(req_buff
+ 8, "ATA ", 8)) &&
1656 has_sat_pass_through(fd
, 0 /* non-packet dev */)) {
1657 PrintOut(LOG_INFO
, "Device %s: using '-d sat' for ATA disk behind SAT layer.\n",
1662 return SCSIFK_NORMAL
;
1665 // on success, return 0. On failure, return >0. Never return <0,
1667 static int SCSIDeviceScan(cfgfile
*cfg
, int scanning
) {
1668 int k
, fd
, err
, retval
;
1669 char *device
= cfg
->name
;
1670 struct scsi_iec_mode_page iec
;
1674 // should we try to register this as a SCSI device?
1675 switch (cfg
->controller_type
) {
1676 case CONTROLLER_SCSI
:
1677 case CONTROLLER_UNKNOWN
:
1680 case CONTROLLER_CCISS
:
1686 // pass user settings on to low-level SCSI commands
1687 con
->controller_port
=cfg
->controller_port
;
1688 con
->controller_type
=cfg
->controller_type
;
1691 if ((fd
= OpenDevice(device
, mode
, scanning
)) < 0)
1693 PrintOut(LOG_INFO
,"Device: %s, opened\n", device
);
1695 // early skip if device known and needs to be handled by some other
1696 // device type (e.g. '-d 3ware,<n>')
1697 if ((retval
= SCSIFilterKnown(fd
, device
))) {
1698 CloseDevice(fd
, device
);
1700 if (retval
==SCSIFK_SAT
)
1701 // SATA Device behind SAT layer
1704 if (retval
==SCSIFK_MARVELL
)
1705 // ATA/SATA device behind Marvell driver
1706 return SCSIFK_MARVELL
;
1711 // check that device is ready for commands. IE stores its stuff on
1713 if ((err
= scsiTestUnitReady(fd
))) {
1714 if (SIMPLE_ERR_NOT_READY
== err
)
1715 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1716 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1717 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1718 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1719 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1721 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1722 CloseDevice(fd
, device
);
1726 // Badly-conforming USB storage devices may fail this check.
1727 // The response to the following IE mode page fetch (current and
1728 // changeable values) is carefully examined. It has been found
1729 // that various USB devices that malform the response will lock up
1730 // if asked for a log page (e.g. temperature) so it is best to
1732 if (!(err
= scsiFetchIECmpage(fd
, &iec
, cfg
->modese_len
)))
1733 cfg
->modese_len
= iec
.modese_len
;
1734 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1735 ; /* continue since it is reasonable not to support IE mpage */
1736 else { /* any other error (including malformed response) unreasonable */
1738 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1740 CloseDevice(fd
, device
);
1744 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1745 // smart if it is off). This may change to be the same as the ATA side.
1746 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1747 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1748 "Try 'smartctl -s on %s' to turn on SMART features\n",
1750 CloseDevice(fd
, device
);
1754 // Device exists, and does SMART. Add to list (allocating more space if needed)
1755 while (numdevscsi
+numdevata
>= ATAandSCSIdevlist_max
)
1756 ATAandSCSIdevlist
=AllocateMoreSpace(ATAandSCSIdevlist
, &ATAandSCSIdevlist_max
, "ATA and SCSI devices");
1758 // Flag that certain log pages are supported (information may be
1759 // available from other sources).
1760 if (0 == scsiLogSense(fd
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1761 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1763 case TEMPERATURE_LPAGE
:
1764 cfg
->TempPageSupported
= 1;
1767 cfg
->SmartPageSupported
= 1;
1775 // record type of device
1776 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1777 cfg
->controller_type
= CONTROLLER_SCSI
;
1779 // get rid of allocated memory only needed for ATA devices. These
1780 // might have been allocated if the user specified Ignore options or
1781 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1782 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
1783 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
1784 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
1785 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
1787 // Check if scsiCheckIE() is going to work
1791 UINT8 currenttemp
= 0;
1794 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
1795 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1796 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1797 cfg
->SuppressReport
= 1;
1798 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
) {
1799 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1800 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1805 // capability check: self-test-log
1807 int retval
=scsiCountFailedSelfTests(fd
, 0);
1809 // no self-test log, turn off monitoring
1810 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1812 cfg
->selflogcount
=0;
1816 // register starting values to watch for changes
1817 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1818 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1822 // disable autosave (set GLTSD bit)
1823 if (cfg
->autosave
==1){
1824 if (scsiSetControlGLTSD(fd
, 1, cfg
->modese_len
))
1825 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
1827 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
1830 // or enable autosave (clear GLTSD bit)
1831 if (cfg
->autosave
==2){
1832 if (scsiSetControlGLTSD(fd
, 0, cfg
->modese_len
))
1833 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
1835 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
1838 // tell user we are registering device
1839 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
1841 // close file descriptor
1842 CloseDevice(fd
, device
);
1846 // modified treatment of SCSI device behind SAT layer
1847 static int SCSIandSATDeviceScan(cfgfile
*cfg
, int scanning
) {
1848 int retval
= SCSIDeviceScan(cfg
, scanning
);
1849 cfg
->WhichCheckDevice
=1; // default SCSI device
1851 if (retval
==SCSIFK_SAT
) {
1852 // found SATA device behind SAT translation layer
1853 cfg
->controller_type
=CONTROLLER_SAT
;
1854 cfg
->WhichCheckDevice
=0; // actually SATA device!
1855 return ATADeviceScan(cfg
, scanning
);
1858 if (retval
==SCSIFK_MARVELL
) {
1859 // found SATA device behind Marvell controller
1860 cfg
->controller_type
=CONTROLLER_MARVELL_SATA
;
1861 cfg
->WhichCheckDevice
=0; // actually SATA device!
1862 return ATADeviceScan(cfg
, scanning
);
1869 // We compare old and new values of the n'th attribute. Note that n
1870 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1871 // then return 0, else nonzero.
1872 int ATACompareValues(changedattribute_t
*delta
,
1873 struct ata_smart_values
*newv
,
1874 struct ata_smart_values
*oldv
,
1875 struct ata_smart_thresholds_pvt
*thresholds
,
1877 struct ata_smart_attribute
*now
,*was
;
1878 struct ata_smart_threshold_entry
*thre
;
1879 unsigned char oldval
,newval
;
1882 // check that attribute number in range, and no null pointers
1883 if (n
<0 || n
>=NUMBER_ATA_SMART_ATTRIBUTES
|| !newv
|| !oldv
|| !thresholds
)
1886 // pointers to disk's values and vendor's thresholds
1887 now
=newv
->vendor_attributes
+n
;
1888 was
=oldv
->vendor_attributes
+n
;
1889 thre
=thresholds
->thres_entries
+n
;
1891 // consider only valid attributes
1892 if (!now
->id
|| !was
->id
|| !thre
->id
)
1896 // issue warning if they don't have the same ID in all structures:
1897 if ( (now
->id
!= was
->id
) || (now
->id
!= thre
->id
) ){
1898 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1899 name
, (int)now
->id
, (int)was
->id
, (int)thre
->id
);
1903 // new and old values of Normalized Attributes
1904 newval
=now
->current
;
1905 oldval
=was
->current
;
1907 // See if the RAW values are unchanged (ie, the same)
1908 if (memcmp(now
->raw
, was
->raw
, 6))
1913 // if any values out of the allowed range, or if the values haven't
1914 // changed, return 0
1915 if (!newval
|| !oldval
|| newval
>0xfe || oldval
>0xfe || (oldval
==newval
&& sameraw
))
1918 // values have changed. Construct output and return
1919 delta
->newval
=newval
;
1920 delta
->oldval
=oldval
;
1922 delta
->prefail
=ATTRIBUTE_FLAGS_PREFAILURE(now
->flags
);
1923 delta
->sameraw
=sameraw
;
1928 // This looks to see if the corresponding bit of the 32 bytes is set.
1929 // This wastes a few bytes of storage but eliminates all searching and
1930 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1931 // with set=0 tells you if the attribute is being tracked or not.
1932 // Calling with set=1 turns the attribute OFF.
1933 int IsAttributeOff(unsigned char attr
, unsigned char **datap
, int set
, int which
, int whatline
){
1934 unsigned char *data
;
1936 int bit
=attr
& 0x07;
1937 unsigned char mask
=0x01<<bit
;
1939 if (which
>=NMONITOR
|| which
< 0){
1940 PrintOut(LOG_CRIT
, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1941 whatline
, filenameandversion
, which
, reportbug
);
1945 if (*datap
== NULL
){
1946 // NULL data implies Attributes are ON...
1951 if (!(*datap
=(unsigned char *)Calloc(NMONITOR
*32, 1))){
1952 PrintOut(LOG_CRIT
,"No memory to create monattflags\n");
1957 // pointer to the 256 bits that we need
1958 data
=*datap
+which
*32;
1960 // attribute zero is always OFF
1965 return (data
[loc
] & mask
);
1969 // return value when setting has no sense
1973 // If the self-test log has got more self-test errors (or more recent
1974 // self-test errors) recorded, then notify user.
1975 void CheckSelfTestLogs(cfgfile
*cfg
, int newi
){
1976 char *name
=cfg
->name
;
1980 MailWarning(cfg
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
1982 // old and new error counts
1983 int oldc
=cfg
->selflogcount
;
1984 int newc
=SELFTEST_ERRORCOUNT(newi
);
1986 // old and new error timestamps in hours
1987 int oldh
=cfg
->selfloghour
;
1988 int newh
=SELFTEST_ERRORHOURS(newi
);
1991 // increase in error count
1992 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1994 MailWarning(cfg
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1996 } else if (oldh
!=newh
) {
1997 // more recent error
1998 // a 'more recent' error might actually be a smaller hour number,
1999 // if the hour number has wrapped.
2000 // There's still a bug here. You might just happen to run a new test
2001 // exactly 32768 hours after the previous failure, and have run exactly
2002 // 20 tests between the two, in which case smartd will miss the
2004 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2006 MailWarning(cfg
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2010 // Needed since self-test error count may DECREASE. Hour might
2011 // also have changed.
2012 cfg
->selflogcount
= newc
;
2013 cfg
->selfloghour
= newh
;
2018 // returns 1 if time to do test of type testtype, 0 if not time to do
2019 // test, < 0 if error
2020 int DoTestNow(cfgfile
*cfg
, char testtype
, time_t testtime
) {
2021 // start by finding out the time:
2024 char matchpattern
[16];
2025 regmatch_t substring
;
2026 int weekday
, length
;
2027 unsigned short hours
;
2028 testinfo
*dat
=cfg
->testdata
;
2030 // check that self-testing has been requested
2034 // since we are about to call localtime(), be sure glibc is informed
2035 // of any timezone changes we make.
2037 FixGlibcTimeZoneBug();
2039 // construct pattern containing the month, day of month, day of
2041 epochnow
= (!testtime
? time(NULL
) : testtime
);
2042 timenow
=localtime(&epochnow
);
2044 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
2046 weekday
=timenow
->tm_wday
?timenow
->tm_wday
:7;
2047 sprintf(matchpattern
, "%c/%02d/%02d/%1d/%02d", testtype
, timenow
->tm_mon
+1,
2048 timenow
->tm_mday
, weekday
, timenow
->tm_hour
);
2050 // if no match, we are done
2051 if (regexec(&(dat
->cregex
), matchpattern
, 1, &substring
, 0))
2054 // must match the ENTIRE type/date/time string
2055 length
=strlen(matchpattern
);
2056 if (substring
.rm_so
!=0 || substring
.rm_eo
!=length
)
2059 // never do a second test in the same hour as another test (the % 7 ensures
2060 // that the RHS will never be greater than 65535 and so will always fit into
2061 // an unsigned short)
2062 hours
=1+timenow
->tm_hour
+24*(timenow
->tm_yday
+366*(timenow
->tm_year
% 7));
2063 if (hours
==dat
->hour
) {
2064 if (!testtime
&& testtype
!=dat
->testtype
)
2065 PrintOut(LOG_INFO
, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
2066 cfg
->name
, dat
->testtype
, testtype
);
2070 // save time and type of the current test; we are ready to do a test
2072 dat
->testtype
=testtype
;
2076 // Print a list of future tests.
2077 void PrintTestSchedule(cfgfile
**ATAandSCSIdevices
){
2080 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2081 time_t now
; long seconds
;
2082 int numdev
= numdevata
+numdevscsi
;
2083 typedef int cnt_t
[4];
2084 cnt_t
* testcnts
; // testcnts[numdev][4]
2087 testcnts
= (cnt_t
*)calloc(numdev
, sizeof(testcnts
[0]));
2091 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2093 // FixGlibcTimeZoneBug(); // done in PrintOut()
2095 dateandtimezoneepoch(datenow
, now
);
2096 for (seconds
=checktime
; seconds
<3600L*24*90; seconds
+=checktime
) {
2097 // Check for each device whether a test will be run
2098 time_t testtime
= now
+ seconds
;
2099 for (i
=0; i
<numdev
; i
++) {
2100 cfg
= ATAandSCSIdevices
[i
];
2101 for (t
=0; t
<(cfg
->WhichCheckDevice
==0?4:2); t
++) {
2102 char testtype
= "LSCO"[t
];
2103 if (DoTestNow(cfg
, testtype
, testtime
)) {
2104 // Report at most 5 tests of each type
2105 if (++testcnts
[i
][t
] <= 5) {
2106 dateandtimezoneepoch(date
, testtime
);
2107 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
->name
,
2108 testcnts
[i
][t
], testtype
, date
);
2116 dateandtimezoneepoch(date
, now
+seconds
);
2117 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2118 for (i
=0; i
<numdev
; i
++) {
2119 cfg
= ATAandSCSIdevices
[i
];
2120 for (t
=0; t
<(cfg
->WhichCheckDevice
==0?4:2); t
++) {
2121 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
->name
, testcnts
[i
][t
],
2122 (testcnts
[i
][t
]==1?"":"s"), "LSCO"[t
]);
2129 // Return zero on success, nonzero on failure. Perform offline (background)
2130 // short or long (extended) self test on given scsi device.
2131 int DoSCSISelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2133 char *testname
= NULL
;
2134 char *name
= cfg
->name
;
2137 if (scsiSelfTestInProgress(fd
, &inProgress
)) {
2138 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2139 cfg
->testdata
->not_cap_short
=cfg
->testdata
->not_cap_long
=1;
2143 if (1 == inProgress
) {
2144 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2145 "progress.\n", name
);
2151 testname
= "Short Self";
2152 retval
= scsiSmartShortSelfTest(fd
);
2155 testname
= "Long Self";
2156 retval
= scsiSmartExtendSelfTest(fd
);
2159 // If we can't do the test, exit
2160 if (NULL
== testname
) {
2161 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2166 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2167 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2168 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2171 cfg
->testdata
->not_cap_long
=1;
2173 cfg
->testdata
->not_cap_short
=1;
2177 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2182 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2187 // Do an offline immediate or self-test. Return zero on success,
2188 // nonzero on failure.
2189 int DoATASelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2191 struct ata_smart_values data
;
2192 char *testname
=NULL
;
2193 int retval
, dotest
=-1;
2194 char *name
=cfg
->name
;
2196 // Read current smart data and check status/capability
2197 if (ataReadSmartValues(fd
, &data
) || !(data
.offline_data_collection_capability
)) {
2198 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2202 // Check for capability to do the test
2205 testname
="Offline Immediate ";
2206 if (isSupportExecuteOfflineImmediate(&data
))
2207 dotest
=OFFLINE_FULL_SCAN
;
2209 cfg
->testdata
->not_cap_offline
=1;
2212 testname
="Conveyance Self-";
2213 if (isSupportConveyanceSelfTest(&data
))
2214 dotest
=CONVEYANCE_SELF_TEST
;
2216 cfg
->testdata
->not_cap_conveyance
=1;
2219 testname
="Short Self-";
2220 if (isSupportSelfTest(&data
))
2221 dotest
=SHORT_SELF_TEST
;
2223 cfg
->testdata
->not_cap_short
=1;
2226 testname
="Long Self-";
2227 if (isSupportSelfTest(&data
))
2228 dotest
=EXTEND_SELF_TEST
;
2230 cfg
->testdata
->not_cap_long
=1;
2234 // If we can't do the test, exit
2236 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2240 // If currently running a self-test, do not interrupt it to start another.
2241 if (15==(data
.self_test_exec_status
>> 4)) {
2242 if (cfg
->fixfirmwarebug
== FIX_SAMSUNG3
&& data
.self_test_exec_status
== 0xf0) {
2243 PrintOut(LOG_INFO
, "Device: %s, will not skip scheduled %sTest "
2244 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name
, testname
);
2246 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2247 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2252 // else execute the test, and return status
2253 if ((retval
=smartcommandhandler(fd
, IMMEDIATE_OFFLINE
, dotest
, NULL
)))
2254 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2256 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2261 // Check Temperature limits
2262 static void CheckTemperature(cfgfile
* cfg
, unsigned char currtemp
, unsigned char triptemp
)
2264 const char *minchg
= "", *maxchg
= "";
2265 if (!(0 < currtemp
&& currtemp
< 255)) {
2266 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
->name
);
2270 if (!cfg
->temperature
) {
2271 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius\n",
2272 cfg
->name
, (int)currtemp
);
2274 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2275 cfg
->temperature
= cfg
->tempmin
= cfg
->tempmax
= currtemp
;
2279 if (currtemp
< cfg
->tempmin
) {
2280 cfg
->tempmin
= currtemp
; minchg
= "!";
2281 cfg
->tempmininc
= 0;
2283 else if (cfg
->tempmininc
) {
2284 // increase min Temperature during first 30 minutes
2285 cfg
->tempmin
= currtemp
;
2288 if (currtemp
> cfg
->tempmax
) {
2289 cfg
->tempmax
= currtemp
; maxchg
= "!";
2293 if (cfg
->tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)cfg
->temperature
) >= cfg
->tempdiff
)) {
2294 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2295 cfg
->name
, (int)currtemp
-(int)cfg
->temperature
, currtemp
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2296 cfg
->temperature
= currtemp
;
2301 if (cfg
->tempcrit
&& currtemp
>= cfg
->tempcrit
) {
2302 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2303 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2304 MailWarning(cfg
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2305 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2307 else if (cfg
->tempinfo
&& currtemp
>= cfg
->tempinfo
) {
2308 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2309 cfg
->name
, currtemp
, cfg
->tempinfo
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2313 int ATACheckDevice(cfgfile
*cfg
, bool allow_selftests
){
2315 char *name
=cfg
->name
;
2319 // fix firmware bug if requested
2320 con
->fixfirmwarebug
=cfg
->fixfirmwarebug
;
2321 con
->controller_port
=cfg
->controller_port
;
2322 con
->controller_type
=cfg
->controller_type
;
2323 con
->controller_explicit
=cfg
->controller_explicit
;
2324 // Highpoint-specific data
2325 con
->hpt_data
[0]=cfg
->hpt_data
[0];
2326 con
->hpt_data
[1]=cfg
->hpt_data
[1];
2327 con
->hpt_data
[2]=cfg
->hpt_data
[2];
2329 // If user has asked, test the email warning system
2330 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2331 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2333 if (cfg
->controller_type
== CONTROLLER_3WARE_9000_CHAR
)
2334 mode
="ATA_3WARE_9000";
2336 if (cfg
->controller_type
== CONTROLLER_3WARE_678K_CHAR
)
2337 mode
="ATA_3WARE_678K";
2339 // if we can't open device, fail gracefully rather than hard --
2340 // perhaps the next time around we'll be able to open it. ATAPI
2341 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2342 // given (see linux cdrom driver).
2343 if ((fd
=OpenDevice(name
, mode
, 0))<0){
2344 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2348 // if the user has asked, and device is capable (or we're not yet
2349 // sure) check whether a self test should be done now.
2350 // This check is done before powermode check to avoid missing self
2351 // tests on idle or sleeping disks.
2352 if (allow_selftests
&& cfg
->testdata
) {
2354 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2357 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2360 else if (!cfg
->testdata
->not_cap_conveyance
&& DoTestNow(cfg
, 'C', 0)>0)
2362 // offline immediate
2363 else if (!cfg
->testdata
->not_cap_offline
&& DoTestNow(cfg
, 'O', 0)>0)
2367 // user may have requested (with the -n Directive) to leave the disk
2368 // alone if it is in idle or sleeping mode. In this case check the
2369 // power mode and exit without check if needed
2370 if (cfg
->powermode
){
2371 int dontcheck
=0, powermode
=ataCheckPowerMode(fd
);
2373 if (0 <= powermode
&& powermode
< 0xff) {
2374 // wait for possible spin up and check again
2377 powermode2
= ataCheckPowerMode(fd
);
2378 if (powermode2
> powermode
)
2379 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2380 powermode
= powermode2
;
2387 if (cfg
->powermode
>=1)
2393 if (cfg
->powermode
>=2)
2399 if (cfg
->powermode
>=3)
2404 mode
="ACTIVE or IDLE";
2408 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2414 // if we are going to skip a check, return now
2416 // but ignore powermode on scheduled selftest
2418 CloseDevice(fd
, name
);
2419 if (!cfg
->powerskipcnt
&& !cfg
->powerquiet
) // report first only and avoid waking up system disk
2420 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2421 cfg
->powerskipcnt
++;
2424 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2425 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2426 cfg
->powerskipcnt
= 0;
2428 else if (cfg
->powerskipcnt
) {
2429 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2430 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2431 cfg
->powerskipcnt
= 0;
2435 // check smart status
2436 if (cfg
->smartcheck
){
2437 int status
=ataSmartStatus2(fd
);
2439 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2440 MailWarning(cfg
, 5, "Device: %s, not capable of SMART self-check", name
);
2442 else if (status
==1){
2443 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2444 MailWarning(cfg
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2448 // Check everything that depends upon SMART Data (eg, Attribute values)
2449 if ( cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->pending
!=DONT_MONITOR_UNC
2450 || cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
){
2451 struct ata_smart_values curval
;
2452 struct ata_smart_thresholds_pvt
*thresh
=cfg
->smartthres
;
2454 // Read current attribute values. *drive contains old values and thresholds
2455 if (ataReadSmartValues(fd
,&curval
)){
2456 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2457 MailWarning(cfg
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2460 // look for current or offline pending sectors
2461 if (cfg
->pending
!= DONT_MONITOR_UNC
) {
2463 unsigned char currentpending
, offlinepending
;
2465 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
2467 if (currentpending
&& (rawval
=ATAReturnAttributeRawValue(currentpending
, &curval
))>0) {
2468 // Unreadable pending sectors!!
2469 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors\n", name
, rawval
);
2470 MailWarning(cfg
, 10, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors", name
, rawval
);
2473 if (offlinepending
&& (rawval
=ATAReturnAttributeRawValue(offlinepending
, &curval
))>0) {
2474 // Unreadable offline sectors!!
2475 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Offline uncorrectable sectors\n", name
, rawval
);
2476 MailWarning(cfg
, 11, "Device: %s, %"PRId64
" Offline uncorrectable sectors", name
, rawval
);
2480 // check temperature limits
2481 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2482 CheckTemperature(cfg
, ATAReturnTemperatureValue(&curval
, cfg
->attributedefs
), 0);
2484 if (cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
) {
2486 // look for failed usage attributes, or track usage or prefail attributes
2487 for (i
=0; i
<NUMBER_ATA_SMART_ATTRIBUTES
; i
++){
2489 changedattribute_t delta
;
2491 // This block looks for usage attributes that have failed.
2492 // Prefail attributes that have failed are returned with a
2493 // positive sign. No failure returns 0. Usage attributes<0.
2494 if (cfg
->usagefailed
&& ((att
=ataCheckAttribute(&curval
, thresh
, i
))<0)){
2496 // are we ignoring failures of this attribute?
2498 if (!IsAttributeOff(att
, &cfg
->monitorattflags
, 0, MONITOR_FAILUSE
, __LINE__
)){
2499 char attname
[64], *loc
=attname
;
2501 // get attribute name & skip white space
2502 ataPrintSmartAttribName(loc
, att
, cfg
->attributedefs
);
2503 while (*loc
&& *loc
==' ') loc
++;
2506 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %s.\n", name
, loc
);
2507 MailWarning(cfg
, 2, "Device: %s, Failed SMART usage Attribute: %s.", name
, loc
);
2511 // This block tracks usage or prefailure attributes to see if
2512 // they are changing. It also looks for changes in RAW values
2513 // if this has been requested by user.
2514 if ((cfg
->usage
|| cfg
->prefail
) && ATACompareValues(&delta
, &curval
, cfg
->smartval
, thresh
, i
, name
)){
2515 unsigned char id
=delta
.id
;
2517 // if the only change is the raw value, and we're not
2518 // tracking raw value, then continue loop over attributes
2519 if (!delta
.sameraw
&& delta
.newval
==delta
.oldval
&& !IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAW
, __LINE__
))
2522 // are we tracking this attribute?
2523 if (!IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_IGNORE
, __LINE__
)){
2524 char newrawstring
[64], oldrawstring
[64], attname
[64], *loc
=attname
;
2526 // get attribute name, skip spaces
2527 ataPrintSmartAttribName(loc
, id
, cfg
->attributedefs
);
2528 while (*loc
&& *loc
==' ') loc
++;
2530 // has the user asked for us to print raw values?
2531 if (IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAWPRINT
, __LINE__
)) {
2532 // get raw values (as a string) and add to printout
2534 ataPrintSmartAttribRawValue(rawstring
, curval
.vendor_attributes
+i
, cfg
->attributedefs
);
2535 sprintf(newrawstring
, " [Raw %s]", rawstring
);
2536 ataPrintSmartAttribRawValue(rawstring
, cfg
->smartval
->vendor_attributes
+i
, cfg
->attributedefs
);
2537 sprintf(oldrawstring
, " [Raw %s]", rawstring
);
2540 newrawstring
[0]=oldrawstring
[0]='\0';
2542 // prefailure attribute
2543 if (cfg
->prefail
&& delta
.prefail
)
2544 PrintOut(LOG_INFO
, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2545 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2548 if (cfg
->usage
&& !delta
.prefail
)
2549 PrintOut(LOG_INFO
, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2550 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2552 } // endof block tracking usage or prefailure
2553 } // end of loop over attributes
2555 // Save the new values into *drive for the next time around
2556 *(cfg
->smartval
)=curval
;
2561 // check if number of selftest errors has increased (note: may also DECREASE)
2563 CheckSelfTestLogs(cfg
, SelfTestErrorCount(fd
, name
));
2565 // check if number of ATA errors has increased
2568 int newc
,oldc
=cfg
->ataerrorcount
;
2570 // new number of errors
2571 newc
=ATAErrorCount(fd
, name
);
2573 // did command fail?
2575 // lack of PrintOut here is INTENTIONAL
2576 MailWarning(cfg
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2578 // has error count increased?
2580 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2582 MailWarning(cfg
, 4, "Device: %s, ATA error count increased from %d to %d",
2586 // this last line is probably not needed, count always increases
2588 cfg
->ataerrorcount
=newc
;
2591 // carry out scheduled self-test
2593 DoATASelfTest(fd
, cfg
, testtype
);
2595 // Don't leave device open -- the OS/user may want to access it
2596 // before the next smartd cycle!
2597 CloseDevice(fd
, name
);
2601 int SCSICheckDevice(cfgfile
*cfg
, bool allow_selftests
)
2607 char *name
=cfg
->name
;
2611 // should we try to register this as a SCSI device?
2612 switch (cfg
->controller_type
) {
2613 case CONTROLLER_CCISS
:
2616 case CONTROLLER_SCSI
:
2617 case CONTROLLER_UNKNOWN
:
2624 // pass user settings on to low-level SCSI commands
2625 con
->controller_port
=cfg
->controller_port
;
2626 con
->controller_type
=cfg
->controller_type
;
2628 // If the user has asked for it, test the email warning system
2629 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2630 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2632 // if we can't open device, fail gracefully rather than hard --
2633 // perhaps the next time around we'll be able to open it
2634 if ((fd
=OpenDevice(name
, mode
, 0))<0) {
2635 // Lack of PrintOut() here is intentional!
2636 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2638 } else if (debugmode
)
2639 PrintOut(LOG_INFO
,"Device: %s, opened SCSI device\n", name
);
2643 if (! cfg
->SuppressReport
) {
2644 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
2645 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2646 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2648 MailWarning(cfg
, 6, "Device: %s, failed to read SMART values", name
);
2649 cfg
->SuppressReport
= 1;
2653 cp
= scsiGetIEString(asc
, ascq
);
2655 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2656 MailWarning(cfg
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2657 } else if (debugmode
)
2658 PrintOut(LOG_INFO
,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2659 name
, (int)asc
, (int)ascq
);
2660 } else if (debugmode
)
2661 PrintOut(LOG_INFO
,"Device: %s, SMART health: passed\n", name
);
2663 // check temperature limits
2664 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2665 CheckTemperature(cfg
, currenttemp
, triptemp
);
2667 // check if number of selftest errors has increased (note: may also DECREASE)
2669 CheckSelfTestLogs(cfg
, scsiCountFailedSelfTests(fd
, 0));
2671 if (allow_selftests
&& cfg
->testdata
) {
2672 // long (extended) background test
2673 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2674 DoSCSISelfTest(fd
, cfg
, 'L');
2675 // short background test
2676 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2677 DoSCSISelfTest(fd
, cfg
, 'S');
2679 CloseDevice(fd
, name
);
2683 // Checks the SMART status of all ATA and SCSI devices
2684 void CheckDevicesOnce(cfgfile
**ATAandSCSIdevices
, bool allow_selftests
){
2687 for (i
=0; i
<numdevata
+numdevscsi
; i
++) {
2688 if (ATAandSCSIdevices
[i
]->WhichCheckDevice
==1)
2689 SCSICheckDevice(ATAandSCSIdevices
[i
], allow_selftests
);
2691 ATACheckDevice(ATAandSCSIdevices
[i
], allow_selftests
);
2698 // This alarm means that a SCSI USB device was hanging
2699 void AlarmHandler(int signal
) {
2700 longjmp(registerscsienv
, 1);
2704 // Does initialization right after fork to daemon mode
2705 void Initialize(time_t *wakeuptime
){
2707 // install goobye message and remove pidfile handler
2710 // write PID file only after installing exit handler
2714 // install signal handlers. On Solaris, can't use signal() because
2715 // it resets the handler to SIG_DFL after each call. So use sigset()
2716 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2718 // normal and abnormal exit
2719 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2720 SIGNALFN(SIGTERM
, SIG_IGN
);
2721 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2722 SIGNALFN(SIGQUIT
, SIG_IGN
);
2724 // in debug mode, <CONTROL-C> ==> HUP
2725 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2726 SIGNALFN(SIGINT
, SIG_IGN
);
2728 // Catch HUP and USR1
2729 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2730 SIGNALFN(SIGHUP
, SIG_IGN
);
2731 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2732 SIGNALFN(SIGUSR1
, SIG_IGN
);
2734 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2735 SIGNALFN(SIGUSR2
, SIG_IGN
);
2738 // initialize wakeup time to CURRENT time
2739 *wakeuptime
=time(NULL
);
2745 // Toggle debug mode implemented for native windows only
2746 // (there is no easy way to reopen tty on *nix)
2747 static void ToggleDebugMode()
2750 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2751 if (!daemon_enable_console("smartd [Debug]")) {
2753 daemon_signal(SIGINT
, HUPhandler
);
2754 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2757 PrintOut(LOG_INFO
,"enable console failed\n");
2759 else if (debugmode
== 1) {
2760 daemon_disable_console();
2762 daemon_signal(SIGINT
, sighandler
);
2763 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2766 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2770 time_t dosleep(time_t wakeuptime
){
2773 // If past wake-up-time, compute next wake-up-time
2775 while (wakeuptime
<=timenow
){
2776 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2777 wakeuptime
+=intervals
*checktime
;
2780 // sleep until we catch SIGUSR1 or have completed sleeping
2781 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2783 // protect user again system clock being adjusted backwards
2784 if (wakeuptime
>timenow
+checktime
){
2785 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2786 wakeuptime
=timenow
+checktime
;
2789 // Exit sleep when time interval has expired or a signal is received
2790 sleep(wakeuptime
-timenow
);
2793 // toggle debug mode?
2794 if (caughtsigUSR2
) {
2803 // if we caught a SIGUSR1 then print message and clear signal
2805 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2806 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2810 // return adjusted wakeuptime
2814 // Print out a list of valid arguments for the Directive d
2815 void printoutvaliddirectiveargs(int priority
, char d
) {
2820 PrintOut(priority
, "never[,q], sleep[,q], standby[,q], idle[,q]");
2823 PrintOut(priority
, "valid_regular_expression");
2826 PrintOut(priority
, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2829 PrintOut(priority
, "normal, permissive");
2833 PrintOut(priority
, "on, off");
2836 PrintOut(priority
, "error, selftest");
2839 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2842 if (!(s
= create_vendor_attribute_arg_list())) {
2843 PrintOut(LOG_CRIT
,"Insufficient memory to construct argument list\n");
2846 PrintOut(priority
, "\n%s\n", s
);
2847 s
=CheckFree(s
, __LINE__
,filenameandversion
);
2850 PrintOut(priority
, "use, ignore, show, showall");
2853 PrintOut(priority
, "none, samsung, samsung2, samsung3");
2858 // exits with an error message, or returns integer value of token
2859 int GetInteger(char *arg
, char *name
, char *token
, int lineno
, char *configfile
, int min
, int max
){
2863 // check input range
2865 PrintOut(LOG_CRIT
, "min =%d passed to GetInteger() must be >=0\n", min
);
2869 // make sure argument is there
2871 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2872 configfile
, lineno
, name
, token
, min
, max
);
2876 // get argument value (base 10), check that it's integer, and in-range
2877 val
=strtol(arg
,&endptr
,10);
2878 if (*endptr
!='\0' || val
<min
|| val
>max
) {
2879 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2880 configfile
, lineno
, name
, token
, arg
, min
, max
);
2884 // all is well; return value
2889 // Get 1-3 small integer(s) for '-W' directive
2890 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
2891 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
2892 unsigned v1
= 0, v2
= 0, v3
= 0;
2893 int n1
= -1, n2
= -1, n3
= -1, len
;
2895 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2896 configfile
, lineno
, name
, token
);
2901 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
2902 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
2903 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2904 configfile
, lineno
, name
, token
, arg
);
2907 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
2912 // This function returns 1 if it has correctly parsed one token (and
2913 // any arguments), else zero if no tokens remain. It returns -1 if an
2914 // error was encountered.
2915 int ParseToken(char *token
,cfgfile
*cfg
){
2917 char *name
=cfg
->name
;
2918 int lineno
=cfg
->lineno
;
2919 char *delim
= " \n\t";
2924 maildata
*mdat
=NULL
, tempmail
;
2926 // is the rest of the line a comment
2930 // is the token not recognized?
2931 if (*token
!='-' || strlen(token
)!=2) {
2932 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
2933 configfile
, lineno
, name
, token
);
2934 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
2938 // token we will be parsing:
2941 // create temporary maildata structure. This means we can postpone
2942 // allocating space in the data segment until we are sure there are
2944 if ('m'==sym
|| 'M'==sym
){
2945 if (!cfg
->mailwarn
){
2946 memset(&tempmail
, 0, sizeof(maildata
));
2954 // parse the token and swallow its argument
2959 // monitor current pending sector count (default 197)
2960 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2962 if (val
==CUR_UNC_DEFAULT
)
2965 val
=CUR_UNC_DEFAULT
;
2966 // set bottom 8 bits to correct value
2967 cfg
->pending
&= 0xff00;
2968 cfg
->pending
|= val
;
2971 // monitor offline uncorrectable sectors (default 198)
2972 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2974 if (val
==OFF_UNC_DEFAULT
)
2977 val
=OFF_UNC_DEFAULT
;
2978 // turn off top 8 bits, then set to correct value
2979 cfg
->pending
&= 0xff;
2980 cfg
->pending
|= (val
<<8);
2983 // Set tolerance level for SMART command failures
2984 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2986 } else if (!strcmp(arg
, "normal")) {
2987 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2988 // not on failure of an optional S.M.A.R.T. command.
2989 // This is the default so we don't need to actually do anything here.
2991 } else if (!strcmp(arg
, "permissive")) {
2992 // Permissive mode; ignore errors from Mandatory SMART commands
2999 // specify the device type
3000 cfg
->controller_explicit
= 1;
3001 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3003 } else if (!strcmp(arg
, "ata")) {
3004 cfg
->controller_port
= 0;
3005 cfg
->controller_type
= CONTROLLER_ATA
;
3006 } else if (!strcmp(arg
, "scsi")) {
3007 cfg
->controller_port
=0;
3008 cfg
->controller_type
= CONTROLLER_SCSI
;
3009 } else if (!strcmp(arg
, "marvell")) {
3010 cfg
->controller_port
=0;
3011 cfg
->controller_type
= CONTROLLER_MARVELL_SATA
;
3012 } else if (!strncmp(arg
, "sat", 3)) {
3013 cfg
->controller_type
= CONTROLLER_SAT
;
3014 cfg
->controller_port
= 0;
3015 cfg
->satpassthrulen
= 0;
3016 if (strlen(arg
) > 3) {
3020 cp
= strchr(arg
, ',');
3021 if (cp
&& (1 == sscanf(cp
+ 1, "%d", &k
)) &&
3022 ((0 == k
) || (12 == k
) || (16 == k
)))
3023 cfg
->satpassthrulen
= k
;
3025 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3026 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
3027 configfile
, lineno
, name
);
3031 } else if (!strncmp(arg
, "hpt", 3)){
3032 unsigned char i
, slash
= 0;
3033 cfg
->hpt_data
[0] = 0;
3034 cfg
->hpt_data
[1] = 0;
3035 cfg
->hpt_data
[2] = 0;
3036 cfg
->controller_type
= CONTROLLER_HPT
;
3037 for (i
=4; i
< strlen(arg
); i
++) {
3041 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3042 "'-d hpt,L/M/N' supports 2-3 items\n",
3043 configfile
, lineno
, name
);
3048 else if ((arg
[i
])>='0' && (arg
[i
])<='9') {
3049 if (cfg
->hpt_data
[slash
]>1) { /* hpt_data[x] max 19 */
3053 cfg
->hpt_data
[slash
] = cfg
->hpt_data
[slash
]*10 + arg
[i
] - '0';
3062 } else if (badarg
!= TRUE
) {
3063 if (cfg
->hpt_data
[0]==0 || cfg
->hpt_data
[0]>8){
3064 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3065 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
3066 configfile
, lineno
, name
);
3069 if (cfg
->hpt_data
[1]==0 || cfg
->hpt_data
[1]>8){
3070 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3071 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
3072 configfile
, lineno
, name
);
3076 if (cfg
->hpt_data
[2]==0 || cfg
->hpt_data
[2]>15){
3077 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
3078 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
3079 configfile
, lineno
, name
);
3082 } else { /* no pmport device */
3086 } else if (!strcmp(arg
, "removable")) {
3089 // look 3ware,N RAID device
3093 // make a copy of the string to mess with
3094 if (!(s
= strdup(arg
))) {
3096 "No memory to copy argument to -d option - exiting\n");
3098 } else if (!strncmp(s
,"3ware,",6)) {
3099 if (split_report_arg2(s
, &i
)){
3100 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3101 configfile
, lineno
, name
);
3103 } else if ( i
<0 || i
>31) {
3104 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 31\n",
3105 configfile
, lineno
, name
, i
);
3108 // determine type of escalade device from name of device
3109 cfg
->controller_type
= guess_device_type(name
);
3110 if (cfg
->controller_type
!=CONTROLLER_3WARE_9000_CHAR
&& cfg
->controller_type
!=CONTROLLER_3WARE_678K_CHAR
)
3111 cfg
->controller_type
=CONTROLLER_3WARE_678K
;
3113 // NOTE: controller_port == disk number + 1
3114 cfg
->controller_port
= i
+1;
3116 } else if (!strncmp(s
,"cciss,",6)) {
3117 if (split_report_arg2(s
, &i
)){
3118 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N requires N integer\n",
3119 configfile
, lineno
, name
);
3121 } else if ( i
<0 || i
>127) {
3122 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d cciss,N (N=%d) must have 0 <= N <= 127\n",
3123 configfile
, lineno
, name
, i
);
3126 // NOTE: controller_port == disk number + 1
3127 cfg
->controller_type
= CONTROLLER_CCISS
;
3128 cfg
->controller_port
= i
+1;
3133 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3138 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3140 } else if (!strcmp(arg
, "none")) {
3141 cfg
->fixfirmwarebug
= FIX_NONE
;
3142 } else if (!strcmp(arg
, "samsung")) {
3143 cfg
->fixfirmwarebug
= FIX_SAMSUNG
;
3144 } else if (!strcmp(arg
, "samsung2")) {
3145 cfg
->fixfirmwarebug
= FIX_SAMSUNG2
;
3146 } else if (!strcmp(arg
, "samsung3")) {
3147 cfg
->fixfirmwarebug
= FIX_SAMSUNG3
;
3153 // check SMART status
3157 // check for failure of usage attributes
3161 // track changes in all vendor attributes
3166 // track changes in prefail vendor attributes
3170 // track changes in usage vendor attributes
3174 // track changes in SMART logs
3175 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3177 } else if (!strcmp(arg
, "selftest")) {
3178 // track changes in self-test log
3180 } else if (!strcmp(arg
, "error")) {
3181 // track changes in ATA error log
3188 // monitor everything
3197 // automatic offline testing enable/disable
3198 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3200 } else if (!strcmp(arg
, "on")) {
3201 cfg
->autoofflinetest
= 2;
3202 } else if (!strcmp(arg
, "off")) {
3203 cfg
->autoofflinetest
= 1;
3209 // skip disk check if in idle or standby mode
3210 if (!(arg
= strtok(NULL
, delim
)))
3212 else if (!strcmp(arg
, "never") || !strcmp(arg
, "never,q"))
3214 else if (!strcmp(arg
, "sleep") || !strcmp(arg
, "sleep,q"))
3216 else if (!strcmp(arg
, "standby") || !strcmp(arg
, "standby,q"))
3218 else if (!strcmp(arg
, "idle") || !strcmp(arg
, "idle,q"))
3222 cfg
->powerquiet
= !!strchr(arg
, ',');
3225 // automatic attribute autosave enable/disable
3226 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3228 } else if (!strcmp(arg
, "on")) {
3230 } else if (!strcmp(arg
, "off")) {
3237 // warn user, and delete any previously given -s REGEXP Directives
3239 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3240 configfile
, lineno
, name
, cfg
->testdata
->regex
);
3241 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3243 // check for missing argument
3244 if (!(arg
= strtok(NULL
, delim
))) {
3247 // allocate space for structure and string
3248 else if (!(cfg
->testdata
=(testinfo
*)Calloc(1, sizeof(testinfo
))) || !(cfg
->testdata
->regex
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
))) {
3249 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3250 configfile
, lineno
, name
, arg
);
3253 else if ((val
=regcomp(&(cfg
->testdata
->cregex
), arg
, REG_EXTENDED
))) {
3255 // not a valid regular expression!
3256 regerror(val
, &(cfg
->testdata
->cregex
), errormsg
, 512);
3257 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3258 configfile
, lineno
, name
, arg
, errormsg
);
3259 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3262 // Do a bit of sanity checking and warn user if we think that
3263 // their regexp is "strange". User probably confused about shell
3264 // glob(3) syntax versus regular expression syntax regexp(7).
3265 else if ((int)strlen(arg
) != (val
=strspn(arg
,"0123456789/.-+*|()?^$[]SLCO")))
3266 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3267 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3270 // send email to address that follows
3271 if (!(arg
= strtok(NULL
,delim
)))
3274 if (mdat
->address
) {
3275 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3276 configfile
, lineno
, name
, mdat
->address
);
3277 mdat
->address
=FreeNonZero(mdat
->address
, -1,__LINE__
,filenameandversion
);
3279 mdat
->address
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3283 // email warning options
3284 if (!(arg
= strtok(NULL
, delim
)))
3286 else if (!strcmp(arg
, "once"))
3287 mdat
->emailfreq
= 1;
3288 else if (!strcmp(arg
, "daily"))
3289 mdat
->emailfreq
= 2;
3290 else if (!strcmp(arg
, "diminishing"))
3291 mdat
->emailfreq
= 3;
3292 else if (!strcmp(arg
, "test"))
3293 mdat
->emailtest
= 1;
3294 else if (!strcmp(arg
, "exec")) {
3295 // Get the next argument (the command line)
3296 if (!(arg
= strtok(NULL
, delim
))) {
3297 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3298 configfile
, lineno
, name
, token
);
3301 // Free the last cmd line given if any, and copy new one
3302 if (mdat
->emailcmdline
) {
3303 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3304 configfile
, lineno
, name
, mdat
->emailcmdline
);
3305 mdat
->emailcmdline
=FreeNonZero(mdat
->emailcmdline
, -1,__LINE__
,filenameandversion
);
3307 mdat
->emailcmdline
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3313 // ignore failure of usage attribute
3314 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3316 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_FAILUSE
, __LINE__
);
3319 // ignore attribute for tracking purposes
3320 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3322 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_IGNORE
, __LINE__
);
3325 // print raw value when tracking
3326 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3328 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3331 // track changes in raw value (forces printing of raw value)
3332 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3334 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3335 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAW
, __LINE__
);
3338 // track Temperature
3339 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3340 &cfg
->tempdiff
, &cfg
->tempinfo
, &cfg
->tempcrit
))<0)
3342 // increase min Temperature during first 30 minutes
3343 if (!(cfg
->tempmininc
= (unsigned char)(CHECKTIME
/ checktime
)))
3344 cfg
->tempmininc
= 1;
3347 // non-default vendor-specific attribute meaning
3348 if (!(arg
=strtok(NULL
,delim
))) {
3350 } else if (parse_attribute_def(arg
, &cfg
->attributedefs
)){
3355 // Define use of drive-specific presets.
3356 if (!(arg
= strtok(NULL
, delim
))) {
3358 } else if (!strcmp(arg
, "use")) {
3359 cfg
->ignorepresets
= FALSE
;
3360 } else if (!strcmp(arg
, "ignore")) {
3361 cfg
->ignorepresets
= TRUE
;
3362 } else if (!strcmp(arg
, "show")) {
3363 cfg
->showpresets
= TRUE
;
3364 } else if (!strcmp(arg
, "showall")) {
3371 // Directive not recognized
3372 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3373 configfile
, lineno
, name
, token
);
3378 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3379 configfile
, lineno
, name
, token
);
3382 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3383 configfile
, lineno
, name
, token
, arg
);
3385 if (missingarg
|| badarg
) {
3386 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3387 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3388 PrintOut(LOG_CRIT
, "\n");
3392 // If this did something to fill the mail structure, and that didn't
3393 // already exist, create it and copy.
3395 if (!(cfg
->mailwarn
=(maildata
*)Calloc(1, sizeof(maildata
)))) {
3396 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3397 configfile
, lineno
, name
);
3400 memcpy(cfg
->mailwarn
, mdat
, sizeof(maildata
));
3406 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3407 // a copy of the original, but with private data storage. Else all is
3408 // zeroed. Returns address, and fails if non memory available.
3410 cfgfile
*CreateConfigEntry(cfgfile
*original
){
3413 // allocate memory for new structure
3414 if (!(add
=(cfgfile
*)Calloc(1,sizeof(cfgfile
))))
3417 // if old structure was pointed to, copy it
3419 memcpy(add
, original
, sizeof(cfgfile
));
3421 // make private copies of data items ONLY if they are in use (non
3423 add
->name
= CustomStrDup(add
->name
, 0, __LINE__
,filenameandversion
);
3425 if (add
->testdata
) {
3427 if (!(add
->testdata
=(testinfo
*)Calloc(1,sizeof(testinfo
))))
3429 memcpy(add
->testdata
, original
->testdata
, sizeof(testinfo
));
3430 add
->testdata
->regex
= CustomStrDup(add
->testdata
->regex
, 1, __LINE__
,filenameandversion
);
3431 // only POSIX-portable way to make fresh copy of compiled regex is
3432 // to recompile it completely. There is no POSIX
3433 // compiled-regex-copy command.
3434 if ((val
=regcomp(&(add
->testdata
->cregex
), add
->testdata
->regex
, REG_EXTENDED
))) {
3436 regerror(val
, &(add
->testdata
->cregex
), errormsg
, 512);
3437 PrintOut(LOG_CRIT
, "unable to recompile regular expression %s. %s\n", add
->testdata
->regex
, errormsg
);
3442 if (add
->mailwarn
) {
3443 if (!(add
->mailwarn
=(maildata
*)Calloc(1,sizeof(maildata
))))
3445 memcpy(add
->mailwarn
, original
->mailwarn
, sizeof(maildata
));
3446 add
->mailwarn
->address
= CustomStrDup(add
->mailwarn
->address
, 0, __LINE__
,filenameandversion
);
3447 add
->mailwarn
->emailcmdline
= CustomStrDup(add
->mailwarn
->emailcmdline
, 0, __LINE__
,filenameandversion
);
3450 if (add
->attributedefs
) {
3451 if (!(add
->attributedefs
=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM
,1)))
3453 memcpy(add
->attributedefs
, original
->attributedefs
, MAX_ATTRIBUTE_NUM
);
3456 if (add
->monitorattflags
) {
3457 if (!(add
->monitorattflags
=(unsigned char *)Calloc(NMONITOR
*32, 1)))
3459 memcpy(add
->monitorattflags
, original
->monitorattflags
, NMONITOR
*32);
3462 if (add
->smartval
) {
3463 if (!(add
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
))))
3467 if (add
->smartthres
) {
3468 if (!(add
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
))))
3475 PrintOut(LOG_CRIT
, "No memory to create entry from configuration file\n");
3481 // This is the routine that adds things to the cfgentries list. To
3482 // prevent memory leaks when re-reading the configuration file many
3483 // times, this routine MUST deallocate any memory other than that
3484 // pointed to within cfg-> before it returns.
3486 // Return values are:
3487 // 1: parsed a normal line
3488 // 0: found comment or blank line
3489 // -1: found SCANDIRECTIVE line
3490 // -2: found an error
3492 // Note: this routine modifies *line from the caller!
3493 int ParseConfigLine(int entry
, int lineno
,char *line
){
3496 char *delim
= " \n\t";
3500 // get first token: device name. If a comment, skip line
3501 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3505 // Have we detected the SCANDIRECTIVE directive?
3506 if (!strcmp(SCANDIRECTIVE
,name
)){
3509 PrintOut(LOG_INFO
,"Scan Directive %s (line %d) must be the first entry in %s\n",name
, lineno
, configfile
);
3514 // Is there space for another entry? If not, allocate more
3515 while (entry
>=cfgentries_max
)
3516 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "configuration file device");
3518 // We've got a legit entry, make space to store it
3519 cfg
=cfgentries
[entry
]=CreateConfigEntry(NULL
);
3520 cfg
->name
= CustomStrDup(name
, 1, __LINE__
,filenameandversion
);
3522 // Store line number, and by default check for both device types.
3525 // Try and recognize if a IDE or SCSI device. These can be
3526 // overwritten by configuration file directives.
3527 if (cfg
->controller_type
==CONTROLLER_UNKNOWN
)
3528 cfg
->controller_type
= guess_device_type(cfg
->name
);
3530 // parse tokens one at a time from the file.
3531 while ((token
=strtok(NULL
,delim
))){
3532 int retval
=ParseToken(token
,cfg
);
3541 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3547 // error found on the line
3552 // If we found 3ware/cciss controller, then modify device name by adding a SPACE
3553 if (cfg
->controller_port
) {
3554 int len
=17+strlen(cfg
->name
);
3558 PrintOut(LOG_CRIT
, "smartd: can not scan for 3ware/cciss devices (line %d of file %s)\n",
3559 lineno
, configfile
);
3563 if (!(newname
=(char *)calloc(len
,1))) {
3564 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3568 // Make new device name by adding a space then RAID disk number
3569 snprintf(newname
, len
, "%s [%s_disk_%02d]", cfg
->name
, (cfg
->controller_type
== CONTROLLER_CCISS
) ? "cciss" : "3ware",
3570 cfg
->controller_port
-1);
3571 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3576 if (cfg
->hpt_data
[0]) {
3577 int len
=17+strlen(cfg
->name
);
3581 PrintOut(LOG_CRIT
, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3582 lineno
, configfile
);
3586 if (!(newname
=(char *)calloc(len
,1))) {
3587 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3591 // Make new device name by adding a space then RAID disk number
3592 snprintf(newname
, len
, "%s [hpt_%d/%d/%d]", cfg
->name
, cfg
->hpt_data
[0],
3593 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
3594 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3599 // If NO monitoring directives are set, then set all of them.
3600 if (!(cfg
->smartcheck
|| cfg
->usagefailed
|| cfg
->prefail
||
3601 cfg
->usage
|| cfg
->selftest
|| cfg
->errorlog
||
3602 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
3604 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3605 cfg
->name
, cfg
->lineno
, configfile
);
3615 // additional sanity check. Has user set -M options without -m?
3616 if (cfg
->mailwarn
&& !cfg
->mailwarn
->address
&& (cfg
->mailwarn
->emailcmdline
|| cfg
->mailwarn
->emailfreq
|| cfg
->mailwarn
->emailtest
)){
3617 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3618 cfg
->name
, cfg
->lineno
, configfile
);
3622 // has the user has set <nomailer>?
3623 if (cfg
->mailwarn
&& cfg
->mailwarn
->address
&& !strcmp(cfg
->mailwarn
->address
,"<nomailer>")){
3624 // check that -M exec is also set
3625 if (!cfg
->mailwarn
->emailcmdline
){
3626 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3627 cfg
->name
, cfg
->lineno
, configfile
);
3630 // now free memory. From here on the sign of <nomailer> is
3631 // address==NULL and cfg->emailcmdline!=NULL
3632 cfg
->mailwarn
->address
=FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
3635 // set cfg->emailfreq to 1 (once) if user hasn't set it
3636 if (cfg
->mailwarn
&& !cfg
->mailwarn
->emailfreq
)
3637 cfg
->mailwarn
->emailfreq
= 1;
3647 // clean up utility for ParseConfigFile()
3648 void cleanup(FILE **fpp
, int is_stdin
){
3650 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3660 // Parses a configuration file. Return values are:
3661 // N=>0: found N entries
3662 // -1: syntax error in config file
3663 // -2: config file does not exist
3664 // -3: config file exists but cannot be read
3666 // In the case where the return value is 0, there are three
3668 // Empty configuration file ==> cfgentries==NULL
3669 // No configuration file ==> cfgentries[0]->lineno == 0
3670 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3671 int ParseConfigFile(){
3673 int entry
=0,lineno
=1,cont
=0,contlineno
=0;
3674 char line
[MAXLINELEN
+2];
3675 char fullline
[MAXCONTLINE
+1];
3677 int is_stdin
= (configfile
== configfile_stdin
); // pointer comparison ok here
3679 // Open config file, if it exists and is not <stdin>
3681 fp
=fopen(configfile
,"r");
3682 if (fp
==NULL
&& (errno
!=ENOENT
|| configfile_alt
)) {
3683 // file exists but we can't read it or it should exist due to '-c' option
3684 int ret
= (errno
!=ENOENT
? -3 : -2);
3685 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3686 strerror(errno
),configfile
);
3690 else // read from stdin ('-c -' option)
3693 // No configuration file found -- use fake one
3695 int len
=strlen(SCANDIRECTIVE
)+4;
3696 char *fakeconfig
=(char *)calloc(len
,1);
3699 (len
-1) != snprintf(fakeconfig
, len
, "%s -a", SCANDIRECTIVE
) ||
3700 -1 != ParseConfigLine(entry
, 0, fakeconfig
)
3702 PrintOut(LOG_CRIT
,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3703 __LINE__
, filenameandversion
, reportbug
);
3706 fakeconfig
=CheckFree(fakeconfig
, __LINE__
,filenameandversion
);
3711 setmode(fileno(fp
), O_TEXT
); // Allow files with \r\n
3714 // configuration file exists
3715 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3717 // parse config file line by line
3719 int len
=0,scandevice
;
3724 // make debugging simpler
3725 memset(line
,0,sizeof(line
));
3728 code
=fgets(line
,MAXLINELEN
+2,fp
);
3730 // are we at the end of the file?
3733 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3734 // See if we found a SCANDIRECTIVE directive
3735 if (scandevice
==-1) {
3736 cleanup(&fp
, is_stdin
);
3739 // did we find a syntax error
3740 if (scandevice
==-2) {
3741 cleanup(&fp
, is_stdin
);
3744 // the final line is part of a continuation line
3751 // input file line number
3754 // See if line is too long
3756 if (len
>MAXLINELEN
){
3758 if (line
[len
-1]=='\n')
3759 warn
="(including newline!) ";
3762 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3763 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3764 cleanup(&fp
, is_stdin
);
3768 // Ignore anything after comment symbol
3769 if ((comment
=strchr(line
,'#'))){
3774 // is the total line (made of all continuation lines) too long?
3775 if (cont
+len
>MAXCONTLINE
){
3776 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3777 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3778 cleanup(&fp
, is_stdin
);
3782 // copy string so far into fullline, and increment length
3783 strcpy(fullline
+cont
,line
);
3786 // is this a continuation line. If so, replace \ by space and look at next line
3787 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3788 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3792 // Not a continuation line. Parse it
3793 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3795 // did we find a scandevice directive?
3796 if (scandevice
==-1) {
3797 cleanup(&fp
, is_stdin
);
3800 // did we find a syntax error
3801 if (scandevice
==-2) {
3802 cleanup(&fp
, is_stdin
);
3810 cleanup(&fp
, is_stdin
);
3812 // note -- may be zero if syntax of file OK, but no valid entries!
3817 // Prints copyright, license and version information
3818 void PrintCopyleft(void){
3825 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3826 <LIST> is the list of valid arguments for option opt. */
3827 void PrintValidArgs(char opt
) {
3830 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3831 if (!(s
= GetValidArgList(opt
)))
3832 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3834 PrintOut(LOG_CRIT
, (char *)s
);
3835 PrintOut(LOG_CRIT
, " <=======\n");
3838 // Parses input line, prints usage message and
3839 // version/license/copyright messages
3840 void ParseOpts(int argc
, char **argv
){
3841 extern char *optarg
;
3842 extern int optopt
, optind
, opterr
;
3847 // Please update GetValidArgList() if you edit shortopts
3848 const char *shortopts
= "c:l:q:dDni:p:r:Vh?";
3849 #ifdef HAVE_GETOPT_LONG
3851 // Please update GetValidArgList() if you edit longopts
3852 struct option longopts
[] = {
3853 { "configfile", required_argument
, 0, 'c' },
3854 { "logfacility", required_argument
, 0, 'l' },
3855 { "quit", required_argument
, 0, 'q' },
3856 { "debug", no_argument
, 0, 'd' },
3857 { "showdirectives", no_argument
, 0, 'D' },
3858 { "interval", required_argument
, 0, 'i' },
3860 { "no-fork", no_argument
, 0, 'n' },
3862 { "pidfile", required_argument
, 0, 'p' },
3863 { "report", required_argument
, 0, 'r' },
3864 #if defined(_WIN32) || defined(__CYGWIN__)
3865 { "service", no_argument
, 0, 'n' },
3867 { "version", no_argument
, 0, 'V' },
3868 { "license", no_argument
, 0, 'V' },
3869 { "copyright", no_argument
, 0, 'V' },
3870 { "help", no_argument
, 0, 'h' },
3871 { "usage", no_argument
, 0, 'h' },
3879 // Parse input options. This horrible construction is so that emacs
3880 // indents properly. Sorry.
3881 while (-1 != (optchar
=
3882 #ifdef HAVE_GETOPT_LONG
3883 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3885 getopt(argc
, argv
, shortopts
)
3892 if (!(strcmp(optarg
,"nodev"))) {
3894 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3896 } else if (!(strcmp(optarg
,"never"))) {
3898 } else if (!(strcmp(optarg
,"onecheck"))) {
3901 } else if (!(strcmp(optarg
,"showtests"))) {
3904 } else if (!(strcmp(optarg
,"errors"))) {
3911 // set the log facility level
3912 if (!strcmp(optarg
, "daemon"))
3913 facility
=LOG_DAEMON
;
3914 else if (!strcmp(optarg
, "local0"))
3915 facility
=LOG_LOCAL0
;
3916 else if (!strcmp(optarg
, "local1"))
3917 facility
=LOG_LOCAL1
;
3918 else if (!strcmp(optarg
, "local2"))
3919 facility
=LOG_LOCAL2
;
3920 else if (!strcmp(optarg
, "local3"))
3921 facility
=LOG_LOCAL3
;
3922 else if (!strcmp(optarg
, "local4"))
3923 facility
=LOG_LOCAL4
;
3924 else if (!strcmp(optarg
, "local5"))
3925 facility
=LOG_LOCAL5
;
3926 else if (!strcmp(optarg
, "local6"))
3927 facility
=LOG_LOCAL6
;
3928 else if (!strcmp(optarg
, "local7"))
3929 facility
=LOG_LOCAL7
;
3934 // enable debug mode
3939 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
3944 // print summary of all valid directives
3950 // Period (time interval) for checking
3951 // strtol will set errno in the event of overflow, so we'll check it.
3953 lchecktime
= strtol(optarg
, &tailptr
, 10);
3954 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3957 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3958 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3959 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3962 checktime
= (int)lchecktime
;
3965 // report IOCTL transactions
3970 // split_report_arg() may modify its first argument string, so use a
3971 // copy of optarg in case we want optarg for an error message.
3972 if (!(s
= strdup(optarg
))) {
3973 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3976 if (split_report_arg(s
, &i
)) {
3978 } else if (i
<1 || i
>3) {
3981 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3982 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3984 } else if (!strcmp(s
,"ioctl")) {
3985 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3986 } else if (!strcmp(s
,"ataioctl")) {
3987 con
->reportataioctl
= i
;
3988 } else if (!strcmp(s
,"scsiioctl")) {
3989 con
->reportscsiioctl
= i
;
3993 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3997 // alternate configuration file
3998 if (strcmp(optarg
,"-"))
3999 configfile
=configfile_alt
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
4000 else // read from stdin
4001 configfile
=configfile_stdin
;
4004 // output file with PID number
4005 pid_file
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
4008 // print version and CVS info
4013 // help: print summary of command-line options
4021 // unrecognized option
4024 #ifdef HAVE_GETOPT_LONG
4025 // Point arg to the argument in which this option was found.
4026 arg
= argv
[optind
-1];
4027 // Check whether the option is a long option that doesn't map to -h.
4028 if (arg
[1] == '-' && optchar
!= 'h') {
4029 // Iff optopt holds a valid option then argument must be missing.
4030 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
4031 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
4032 PrintValidArgs(optopt
);
4034 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
4036 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
4041 // Iff optopt holds a valid option then argument must be missing.
4042 if (strchr(shortopts
, optopt
) != NULL
){
4043 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
4044 PrintValidArgs(optopt
);
4046 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
4048 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4055 // Check to see if option had an unrecognized or incorrect argument.
4059 // It would be nice to print the actual option name given by the user
4060 // here, but we just print the short form. Please fix this if you know
4061 // a clean way to do it.
4062 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
4063 PrintValidArgs(optchar
);
4064 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4069 // non-option arguments are not allowed
4070 if (argc
> optind
) {
4073 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
4074 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
4078 // no pidfile in debug mode
4079 if (debugmode
&& pid_file
) {
4082 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4083 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
);
4084 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
4094 // Function we call if no configuration file was found or if the
4095 // SCANDIRECTIVE Directive was found. It makes entries for device
4096 // names returned by make_device_names() in os_OSNAME.c
4097 int MakeConfigEntries(const char *type
, int start
){
4100 char** devlist
= NULL
;
4101 cfgfile
*first
=cfgentries
[0],*cfg
=first
;
4103 // Hack! This is to make DEVICESCAN work on ATA devices behind
4104 // a SCSI to ATA Translation (SAT) Layer.
4105 // This will work on a general OS if the way that SAT devices are
4106 // named is the same as SCSI devices.
4107 // The BETTER solution is to modify make_device_names to recognize
4108 // the additional type "SAT". This requires changing os_*.cpp.
4110 const char *basetype
= type
;
4111 if (!strcmp(type
,"SAT") )
4114 // make list of devices
4115 if ((num
=make_device_names(&devlist
,basetype
))<0)
4116 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
4118 // if no devices, or error constructing list, return
4122 // loop over entries to create
4123 for (i
=0; i
<num
; i
++){
4125 // make storage and copy for all but first entry
4127 // allocate more storage if needed
4128 while (cfgentries_max
<=start
+i
)
4129 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "simulated configuration file device");
4130 cfg
=cfgentries
[start
+i
]=CreateConfigEntry(first
);
4134 if (!strcmp(type
,"ATA") )
4135 cfg
->controller_type
= CONTROLLER_ATA
;
4136 if (!strcmp(type
,"SCSI") )
4137 cfg
->controller_type
= CONTROLLER_SCSI
;
4138 if (!strcmp(type
,"SAT") )
4139 cfg
->controller_type
= CONTROLLER_SAT
;
4141 // remove device name, if it's there, and put in correct one
4142 cfg
->name
=FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
4143 // save pointer to the device name created within
4144 // make_device_names
4145 cfg
->name
=devlist
[i
];
4148 // If needed, free memory used for devlist: pointers now in
4149 // cfgentries[]->names. If num==0 we never get to this point, but
4150 // that's OK. If we realloc()d the array length in
4151 // make_device_names() that was ALREADY equivalent to calling
4153 devlist
= FreeNonZero(devlist
,(sizeof (char*) * num
),__LINE__
, filenameandversion
);
4158 void CanNotRegister(char *name
, char *type
, int line
, int scandirective
){
4159 if( !debugmode
&& scandirective
== 1 ) { return; }
4161 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4162 "Unable to register %s device %s at line %d of file %s\n",
4163 type
, name
, line
, configfile
);
4165 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4170 // Returns negative value (see ParseConfigFile()) if config file
4171 // had errors, else number of entries which may be zero or positive.
4172 // If we found no configuration file, or it contained SCANDIRECTIVE,
4173 // then *scanning is set to 1, else 0.
4174 int ReadOrMakeConfigEntries(int *scanning
){
4177 // deallocate any cfgfile data structures in memory
4178 RmAllConfigEntries();
4180 // parse configuration file configfile (normally /etc/smartd.conf)
4181 if ((entries
=ParseConfigFile())<0) {
4183 // There was an error reading the configuration file.
4184 RmAllConfigEntries();
4186 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4190 // did we find entries or scan?
4193 // no error parsing config file.
4195 // we did not find a SCANDIRECTIVE and did find valid entries
4196 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4198 else if (cfgentries
&& cfgentries
[0]) {
4199 // we found a SCANDIRECTIVE or there was no configuration file so
4200 // scan. Configuration file's first entry contains all options
4202 cfgfile
*first
=cfgentries
[0];
4204 // By default scan for ATA, SCSI and SAT devices
4205 int doata
=1, doscsi
=1, dosat
=1;
4207 if (first
->controller_type
==CONTROLLER_SCSI
) {
4210 } else if (first
->controller_type
==CONTROLLER_ATA
) {
4213 } else if (first
->controller_type
==CONTROLLER_SAT
) {
4218 // The code in this block has been neutered by D. Gilbert
4219 // on 20070226. smartd can't cope ATA disk behind a SAT
4220 // transport seamlessly _without_ a bigger restructuring
4221 // of smartd than this code tried. It made ATA disks
4222 // behind a SAT interface automatically detected only by
4223 // killing support for real SCSI disks. Sorry, no.
4228 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4230 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4232 // make config list of ATA devices to search for
4234 entries
+=MakeConfigEntries("ATA", entries
);
4235 // make config list of SCSI devices to search for
4237 entries
+=MakeConfigEntries("SCSI", entries
);
4239 entries
+=MakeConfigEntries("SAT", entries
);
4241 // warn user if scan table found no devices
4243 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4244 // get rid of fake entry with SCANDIRECTIVE as name
4245 RmConfigEntry(cfgentries
, __LINE__
);
4249 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4255 // This function tries devices from cfgentries. Each one that can be
4256 // registered is moved onto the [ata|scsi]devices lists and removed
4257 // from the cfgentries list, else it's memory is deallocated.
4258 void RegisterDevices(int scanning
){
4261 // start by clearing lists/memory of ALL existing devices
4263 numdevata
=numdevscsi
=0;
4266 for (i
=0; i
<cfgentries_max
; i
++){
4268 cfgfile
*ent
=cfgentries
[i
];
4270 // skip any NULL entries (holes)
4274 // register ATA devices
4275 if (ent
->controller_type
!=CONTROLLER_SCSI
&& ent
->controller_type
!=CONTROLLER_CCISS
){
4276 if (ATADeviceScan(ent
, scanning
))
4277 CanNotRegister(ent
->name
, "ATA", ent
->lineno
, scanning
);
4279 // move onto the list of ata devices
4281 while (numdevata
+numdevscsi
>=ATAandSCSIdevlist_max
)
4282 ATAandSCSIdevlist
=AllocateMoreSpace(ATAandSCSIdevlist
, &ATAandSCSIdevlist_max
, "ATA and SCSI devices");
4283 ent
->WhichCheckDevice
=0;
4284 ATAandSCSIdevlist
[numdevscsi
+numdevata
]=ent
;
4289 // then register SCSI devices
4290 if (ent
->controller_type
==CONTROLLER_SCSI
|| ent
->controller_type
==CONTROLLER_CCISS
||
4291 ent
->controller_type
==CONTROLLER_UNKNOWN
){
4295 struct sigaction alarmAction
, defaultaction
;
4297 // Set up an alarm handler to catch USB devices that hang on
4299 alarmAction
.sa_handler
= AlarmHandler
;
4300 alarmAction
.sa_flags
= SA_RESTART
;
4301 if (sigaction(SIGALRM
, &alarmAction
, &defaultaction
)) {
4302 // if we can't set timeout, just scan device
4303 PrintOut(LOG_CRIT
, "Unable to initialize SCSI timeout mechanism.\n");
4304 retscsi
=SCSIandSATDeviceScan(ent
, scanning
);
4307 // prepare return point in case of bad SCSI device
4308 if (setjmp(registerscsienv
))
4309 // SCSI device timed out!
4312 // Set alarm, make SCSI call, reset alarm
4314 retscsi
=SCSIandSATDeviceScan(ent
, scanning
);
4317 if (sigaction(SIGALRM
, &defaultaction
, NULL
)){
4318 PrintOut(LOG_CRIT
, "Unable to clear SCSI timeout mechanism.\n");
4322 retscsi
=SCSIandSATDeviceScan(ent
, scanning
);
4325 // Now scan SCSI device...
4328 PrintOut(LOG_CRIT
, "Device %s timed out (poorly-implemented USB device?)\n", ent
->name
);
4329 CanNotRegister(ent
->name
, "SCSI", ent
->lineno
, scanning
);
4332 // move onto the list of scsi devices
4334 while (numdevscsi
+numdevata
>=ATAandSCSIdevlist_max
)
4335 ATAandSCSIdevlist
=AllocateMoreSpace(ATAandSCSIdevlist
, &ATAandSCSIdevlist_max
, "ATA and SCSI devices");
4336 ATAandSCSIdevlist
[numdevata
+numdevscsi
]=ent
;
4341 // if device is explictly listed and we can't register it, then
4342 // exit unless the user has specified that the device is removable
4343 if (cfgentries
[i
] && !scanning
){
4344 if (ent
->removable
|| quit
==2)
4345 PrintOut(LOG_INFO
, "Device %s not available\n", ent
->name
);
4347 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent
->name
);
4352 // free up memory if device could not be registered
4353 RmConfigEntry(cfgentries
+i
, __LINE__
);
4362 int main(int argc
, char **argv
)
4364 // Windows: internal main function started direct or by service control manager
4365 static int smartd_main(int argc
, char **argv
)
4368 // external control variables for ATA disks
4369 smartmonctrl control
;
4371 // is it our first pass through?
4374 // next time to wake up
4377 // for simplicity, null all global communications variables/lists
4379 memset(con
, 0,sizeof(control
));
4381 // parse input and print header and usage info if needed
4382 ParseOpts(argc
,argv
);
4384 // do we mute printing from ataprint commands?
4385 con
->printing_switchable
=0;
4386 con
->dont_print
=debugmode
?0:1;
4388 // don't exit on bad checksums
4389 con
->checksumfail
=0;
4391 // the main loop of the code
4394 // are we exiting from a signal?
4395 if (caughtsigEXIT
) {
4396 // are we exiting with SIGTERM?
4397 int isterm
=(caughtsigEXIT
==SIGTERM
);
4398 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4399 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4401 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4402 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4404 EXIT(isok
?0:EXIT_SIGNAL
);
4407 // Should we (re)read the config file?
4408 if (firstpass
|| caughtsigHUP
){
4409 int entries
, scanning
=0;
4413 // Workaround for missing SIGQUIT via keyboard on Cygwin
4414 if (caughtsigHUP
==2) {
4415 // Simulate SIGQUIT if another SIGINT arrives soon
4418 if (caughtsigHUP
==2) {
4419 caughtsigEXIT
=SIGQUIT
;
4427 "Signal HUP - rereading configuration file %s\n":
4428 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4432 // clears cfgentries, (re)reads config file, makes >=0 entries
4433 entries
=ReadOrMakeConfigEntries(&scanning
);
4436 // checks devices, then moves onto ata/scsi list or deallocates.
4437 RegisterDevices(scanning
);
4439 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4440 // user has asked to continue on error in configuration file
4442 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4445 // exit with configuration file error status
4446 int status
= (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4450 // Log number of devices we are monitoring...
4451 if (numdevata
+numdevscsi
|| quit
==2 || (quit
==1 && !firstpass
))
4452 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4453 numdevata
, numdevscsi
);
4455 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4460 // user has asked to print test schedule
4461 PrintTestSchedule(ATAandSCSIdevlist
);
4469 // check all devices once,
4470 // self tests are not started in first pass unless '-q onecheck' is specified
4471 CheckDevicesOnce(ATAandSCSIdevlist
, (!firstpass
|| quit
==3));
4473 // user has asked us to exit after first check
4475 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4476 "smartd is exiting (exit status 0)\n");
4480 // fork into background if needed
4481 if (firstpass
&& !debugmode
) {
4485 // set exit and signal handlers, write PID file, set wake-up time
4487 Initialize(&wakeuptime
);
4491 // sleep until next check time, or a signal arrives
4492 wakeuptime
=dosleep(wakeuptime
);
4498 // Main function for Windows
4499 int main(int argc
, char **argv
){
4500 // Options for smartd windows service
4501 static const daemon_winsvc_options svc_opts
= {
4502 "--service", // cmd_opt
4503 "smartd", "SmartD Service", // servicename, displayname
4505 "Controls and monitors storage devices using the Self-Monitoring, "
4506 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4507 "built into ATA and SCSI Hard Drives. "
4510 // daemon_main() handles daemon and service specific commands
4511 // and starts smartd_main() direct, from a new process,
4512 // or via service control manager
4513 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);