2 * Home page of code is: http://smartmontools.sourceforge.net
4 * Copyright (C) 2002-6 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
23 // unconditionally included files
26 #include <sys/types.h>
27 #include <sys/stat.h> // umask
46 // see which system files to conditionally include
49 // conditionally included files
50 #ifdef HAVE_GETOPT_LONG
59 #pragma warning(disable:4761) // "conversion supplied"
60 typedef unsigned short mode_t
;
63 #include <io.h> // umask()
64 #include <process.h> // getpid()
69 // BOOL WINAPI FreeConsole(void);
70 extern "C" int __stdcall
FreeConsole(void);
71 #include <io.h> // setmode()
74 // locally included files
79 #include "knowndrives.h"
85 #include "hostname_win32.h" // gethost/domainname()
86 #define HAVE_GETHOSTNAME 1
87 #define HAVE_GETDOMAINNAME 1
88 // fork()/signal()/initd simulation for native Windows
89 #include "daemon_win32.h" // daemon_main/detach/signal()
91 #define SIGNALFN daemon_signal
92 #define strsignal daemon_strsignal
93 #define sleep daemon_sleep
94 #undef EXIT // see utility.h
95 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
96 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
97 #define SIGQUIT SIGBREAK
98 #define SIGQUIT_KEYNAME "CONTROL-Break"
101 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
102 #define SIGQUIT_KEYNAME "2x CONTROL-C"
104 #define SIGQUIT_KEYNAME "CONTROL-\\"
108 #if defined (__SVR4) && defined (__sun)
109 extern "C" int getdomainname(char *, int); // no declaration in header files!
112 #define ARGUSED(x) ((void)(x))
114 // These are CVS identification information for *.cpp and *.h files
115 extern const char *atacmdnames_c_cvsid
, *atacmds_c_cvsid
, *ataprint_c_cvsid
, *escalade_c_cvsid
,
116 *knowndrives_c_cvsid
, *os_XXXX_c_cvsid
, *scsicmds_c_cvsid
, *utility_c_cvsid
;
118 static const char *filenameandversion
="$Id: smartd.cpp,v 1.378 2006/09/20 16:17:31 shattered Exp $";
119 #ifdef NEED_SOLARIS_ATA_CODE
120 extern const char *os_solaris_ata_s_cvsid
;
123 extern const char *daemon_win32_c_cvsid
, *hostname_win32_c_cvsid
, *syslog_win32_c_cvsid
;
125 const char *smartd_c_cvsid
="$Id: smartd.cpp,v 1.378 2006/09/20 16:17:31 shattered Exp $"
126 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
127 #ifdef DAEMON_WIN32_H_CVSID
130 EXTERN_H_CVSID INT64_H_CVSID
131 #ifdef HOSTNAME_WIN32_H_CVSID
132 HOSTNAME_WIN32_H_CVSID
134 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
135 #ifdef SYSLOG_H_CVSID
140 extern const char *reportbug
;
142 // GNU copyleft statement. Needed for GPL purposes.
143 const char *copyleftstring
="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
144 "free software, and you are welcome to redistribute it\n"
145 "under the terms of the GNU General Public License\n"
146 "Version 2. See http://www.gnu.org for further details.\n\n";
148 extern unsigned char debugmode
;
150 // command-line: how long to sleep between checks
151 static int checktime
=CHECKTIME
;
153 // command-line: name of PID file (NULL for no pid file)
154 static char* pid_file
=NULL
;
156 // configuration file name
158 static char* configfile
= SMARTMONTOOLS_SYSCONFDIR
"/" CONFIGFILENAME
;
160 static char* configfile
= "./" CONFIGFILENAME
;
162 // configuration file "name" if read from stdin
163 static /*const*/ char * const configfile_stdin
= "<stdin>";
164 // allocated memory for alternate configuration file name
165 static char* configfile_alt
= NULL
;
167 // command-line: when should we exit?
170 // command-line; this is the default syslog(3) log facility to use.
171 static int facility
=LOG_DAEMON
;
174 // command-line: running as service, so don't fork()
175 static int is_service
=0;
178 // used for control of printing, passing arguments to atacmds.c
179 smartmonctrl
*con
=NULL
;
181 // pointers to (real or simulated) entries in configuration file, and
182 // maximum space currently allocated for these entries.
183 cfgfile
**cfgentries
=NULL
;
184 int cfgentries_max
=0;
186 // pointers to ATA and SCSI devices being monitored, maximum and
188 cfgfile
**atadevlist
=NULL
, **scsidevlist
=NULL
;
189 int atadevlist_max
=0, scsidevlist_max
=0;
190 int numdevata
=0, numdevscsi
=0;
192 // track memory usage
193 extern int64_t bytes
;
196 extern int exitstatus
;
198 // set to one if we catch a USR1 (check devices now)
199 volatile int caughtsigUSR1
=0;
202 // set to one if we catch a USR2 (toggle debug mode)
203 volatile int caughtsigUSR2
=0;
206 // set to one if we catch a HUP (reload config file). In debug mode,
207 // set to two, if we catch INT (also reload config file).
208 volatile int caughtsigHUP
=0;
210 // set to signal value if we catch INT, QUIT, or TERM
211 volatile int caughtsigEXIT
=0;
214 // stack environment if we time out during SCSI access (USB devices)
215 jmp_buf registerscsienv
;
218 // tranlate cfg->pending into the correct Attribute numbers
219 void TranslatePending(unsigned short pending
, unsigned char *current
, unsigned char *offline
) {
221 unsigned char curr
= CURR_PEND(pending
);
222 unsigned char off
= OFF_PEND(pending
);
224 // look for special value of CUR_UNC_DEFAULT that means DONT
225 // monitor. 0 means DO test.
226 if (curr
==CUR_UNC_DEFAULT
)
229 curr
=CUR_UNC_DEFAULT
;
231 // look for special value of OFF_UNC_DEFAULT that means DONT
232 // monitor. 0 means DO TEST.
233 if (off
==OFF_UNC_DEFAULT
)
245 // free all memory associated with selftest part of configfile entry. Return NULL
246 testinfo
* FreeTestData(testinfo
*data
){
248 // make sure we have something to do.
252 // free space for text pattern
253 data
->regex
=FreeNonZero(data
->regex
, -1, __LINE__
, filenameandversion
);
255 // free compiled expression
256 regfree(&(data
->cregex
));
258 // make sure that no sign of the compiled expression is left behind
259 // (just in case, to help detect bugs if we ever try and refer to
261 memset(&(data
->cregex
), '0', sizeof(regex_t
));
263 // free remaining memory space
264 data
=FreeNonZero(data
, sizeof(testinfo
), __LINE__
, filenameandversion
);
269 cfgfile
**AllocateMoreSpace(cfgfile
**oldarray
, int *oldsize
, char *listname
){
270 // for now keep BLOCKSIZE small to help detect coding problems.
271 // Perhaps increase in the future.
272 const int BLOCKSIZE
=8;
275 int news
= olds
+ BLOCKSIZE
;
276 cfgfile
**newptr
=(cfgfile
**)realloc(oldarray
, news
*sizeof(cfgfile
*));
278 // did we get more space?
281 // clear remaining entries ala calloc()
282 for (i
=olds
; i
<news
; i
++)
285 bytes
+= BLOCKSIZE
*sizeof(cfgfile
*);
290 PrintOut(LOG_INFO
, "allocating %d slots for %s\n", BLOCKSIZE
, listname
);
296 PrintOut(LOG_CRIT
, "out of memory for allocating %s list\n", listname
);
300 void PrintOneCVS(const char *a_cvs_id
){
302 printone(out
,a_cvs_id
);
303 PrintOut(LOG_INFO
,"%s",out
);
307 // prints CVS identity information for the executable
309 char *configargs
=strlen(SMARTMONTOOLS_CONFIGURE_ARGS
)?SMARTMONTOOLS_CONFIGURE_ARGS
:"[no arguments given]";
311 PrintOut(LOG_INFO
,(char *)copyleftstring
);
312 PrintOut(LOG_INFO
,"CVS version IDs of files used to build this code are:\n");
313 PrintOneCVS(atacmdnames_c_cvsid
);
314 PrintOneCVS(atacmds_c_cvsid
);
315 PrintOneCVS(ataprint_c_cvsid
);
317 PrintOneCVS(daemon_win32_c_cvsid
);
320 PrintOneCVS(hostname_win32_c_cvsid
);
322 PrintOneCVS(knowndrives_c_cvsid
);
323 PrintOneCVS(os_XXXX_c_cvsid
);
324 #ifdef NEED_SOLARIS_ATA_CODE
325 PrintOneCVS( os_solaris_ata_s_cvsid
);
327 PrintOneCVS(scsicmds_c_cvsid
);
328 PrintOneCVS(smartd_c_cvsid
);
330 PrintOneCVS(syslog_win32_c_cvsid
);
332 PrintOneCVS(utility_c_cvsid
);
333 PrintOut(LOG_INFO
, "\nsmartmontools release " PACKAGE_VERSION
" dated " SMARTMONTOOLS_RELEASE_DATE
" at " SMARTMONTOOLS_RELEASE_TIME
"\n");
334 PrintOut(LOG_INFO
, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST
"\n");
335 PrintOut(LOG_INFO
, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE
"\n");
336 PrintOut(LOG_INFO
, "smartd compile dated " __DATE__
" at "__TIME__
"\n");
337 PrintOut(LOG_INFO
, "smartmontools configure arguments: %s\n", configargs
);
341 // Removes config file entry, freeing all memory
342 void RmConfigEntry(cfgfile
**anentry
, int whatline
){
346 // pointer should never be null!
348 PrintOut(LOG_CRIT
,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
349 whatline
, filenameandversion
, reportbug
);
353 // only remove entries that exist!
357 // entry exists -- free all of its memory
358 cfg
->name
= FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
359 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
360 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
361 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
362 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
364 cfg
->mailwarn
->address
= FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
365 cfg
->mailwarn
->emailcmdline
= FreeNonZero(cfg
->mailwarn
->emailcmdline
, -1,__LINE__
,filenameandversion
);
366 cfg
->mailwarn
= FreeNonZero(cfg
->mailwarn
, sizeof(maildata
),__LINE__
,filenameandversion
);
368 cfg
->testdata
= FreeTestData(cfg
->testdata
);
369 *anentry
= FreeNonZero(cfg
, sizeof(cfgfile
),__LINE__
,filenameandversion
);
374 // deallocates all memory associated with cfgentries list
375 void RmAllConfigEntries(){
378 for (i
=0; i
<cfgentries_max
; i
++)
379 RmConfigEntry(cfgentries
+i
, __LINE__
);
381 cfgentries
=FreeNonZero(cfgentries
, sizeof(cfgfile
*)*cfgentries_max
, __LINE__
, filenameandversion
);
387 // deallocates all memory associated with ATA/SCSI device lists
388 void RmAllDevEntries(){
391 for (i
=0; i
<atadevlist_max
; i
++)
392 RmConfigEntry(atadevlist
+i
, __LINE__
);
394 atadevlist
=FreeNonZero(atadevlist
, sizeof(cfgfile
*)*atadevlist_max
, __LINE__
, filenameandversion
);
397 for (i
=0; i
<scsidevlist_max
; i
++)
398 RmConfigEntry(scsidevlist
+i
, __LINE__
);
400 scsidevlist
=FreeNonZero(scsidevlist
, sizeof(cfgfile
*)*scsidevlist_max
, __LINE__
, filenameandversion
);
406 // remove the PID file
407 void RemovePidFile(){
409 if ( -1==unlink(pid_file
) )
410 PrintOut(LOG_CRIT
,"Can't unlink PID file %s (%s).\n",
411 pid_file
, strerror(errno
));
412 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
418 // Note if we catch a SIGUSR1
419 void USR1handler(int sig
){
426 // Note if we catch a SIGUSR2
427 void USR2handler(int sig
){
434 // Note if we catch a HUP (or INT in debug mode)
435 void HUPhandler(int sig
){
443 // signal handler for TERM, QUIT, and INT (if not in debug mode)
444 void sighandler(int sig
){
451 // signal handler that prints Goodbye message and removes pidfile
454 // clean up memory -- useful for debugging
455 RmAllConfigEntries();
458 // delete PID file, if one was created
461 // remove alternate configfile name
462 configfile_alt
=FreeNonZero(configfile_alt
, -1,__LINE__
,filenameandversion
);
464 // useful for debugging -- have we managed memory correctly?
465 if (debugmode
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
466 PrintOut(LOG_INFO
, "Memory still allocated for devices at exit is %" PRId64
" bytes.\n", bytes
);
468 // if we are exiting because of a code bug, tell user
469 if (exitstatus
==EXIT_BADCODE
|| (bytes
&& exitstatus
!=EXIT_NOMEM
))
470 PrintOut(LOG_CRIT
, "Please inform " PACKAGE_BUGREPORT
", including output of smartd -V.\n");
472 if (exitstatus
==0 && bytes
)
473 exitstatus
=EXIT_BADCODE
;
475 // and this should be the final output from smartd before it exits
476 PrintOut(exitstatus
?LOG_CRIT
:LOG_INFO
, "smartd is exiting (exit status %d)\n", exitstatus
);
481 #define ENVLENGTH 1024
483 // a replacement for setenv() which is not available on all platforms.
484 // Note that the string passed to putenv must not be freed or made
485 // invalid, since a pointer to it is kept by putenv(). This means that
486 // it must either be a static buffer or allocated off the heap. The
487 // string can be freed if the environment variable is redefined or
488 // deleted via another call to putenv(). So we keep these on the stack
489 // as long as the popen() call is underway.
490 int exportenv(char* stackspace
, const char *name
, const char *value
){
491 snprintf(stackspace
,ENVLENGTH
, "%s=%s", name
, value
);
492 return putenv(stackspace
);
495 char* dnsdomain(const char* hostname
) {
497 #ifdef HAVE_GETHOSTBYNAME
500 if ((hp
= gethostbyname(hostname
))) {
501 // Does this work if gethostbyname() returns an IPv6 name in
502 // colon/dot notation? [BA]
503 if ((p
= strchr(hp
->h_name
, '.')))
514 // If either address or executable path is non-null then send and log
515 // a warning email, or execute executable
516 void MailWarning(cfgfile
*cfg
, int which
, char *fmt
, ...){
517 char command
[2048], message
[256], hostname
[256], domainname
[256], additional
[256],fullmessage
[1024];
518 char original
[256], further
[256], nisdomain
[256], subject
[256],dates
[DATEANDEPOCHLEN
];
519 char environ_strings
[11][ENVLENGTH
];
522 const int day
=24*3600;
530 "FailedHealthCheck", // 5
531 "FailedReadSmartData", // 6
532 "FailedReadSmartErrorLog", // 7
533 "FailedReadSmartSelfTestLog", // 8
534 "FailedOpenDevice", // 9
535 "CurrentPendingSector", // 10
536 "OfflineUncorrectableSector", // 11
540 char *address
, *executable
;
542 maildata
* data
=cfg
->mailwarn
;
546 char stdinbuf
[1024]; int boxmsgoffs
, boxtype
;
548 const char *newadd
=NULL
, *newwarn
=NULL
;
549 const char *unknown
="[Unknown]";
551 // See if user wants us to send mail
555 address
=data
->address
;
556 executable
=data
->emailcmdline
;
558 if (!address
&& !executable
)
561 // which type of mail are we sending?
562 mail
=(data
->maillog
)+which
;
565 if (data
->emailfreq
<1 || data
->emailfreq
>3) {
566 PrintOut(LOG_CRIT
,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data
->emailfreq
);
569 if (which
<0 || which
>=SMARTD_NMAIL
|| sizeof(whichfail
)!=SMARTD_NMAIL
*sizeof(char *)) {
570 PrintOut(LOG_CRIT
,"Contact " PACKAGE_BUGREPORT
"; internal error in MailWarning(): which=%d, size=%d\n",
571 which
, (int)sizeof(whichfail
));
575 // Return if a single warning mail has been sent.
576 if ((data
->emailfreq
==1) && mail
->logged
)
579 // Return if this is an email test and one has already been sent.
580 if (which
== 0 && mail
->logged
)
583 // To decide if to send mail, we need to know what time it is.
586 // Return if less than one day has gone by
587 if (data
->emailfreq
==2 && mail
->logged
&& epoch
<(mail
->lastsent
+day
))
590 // Return if less than 2^(logged-1) days have gone by
591 if (data
->emailfreq
==3 && mail
->logged
){
592 days
=0x01<<(mail
->logged
-1);
594 if (epoch
<(mail
->lastsent
+days
))
598 // record the time of this mail message, and the first mail message
600 mail
->firstsent
=epoch
;
601 mail
->lastsent
=epoch
;
603 // get system host & domain names (not null terminated if length=MAX)
604 #ifdef HAVE_GETHOSTNAME
605 if (gethostname(hostname
, 256))
606 strcpy(hostname
, unknown
);
610 p
= dnsdomain(hostname
);
612 strncpy(domainname
, p
, 255);
613 domainname
[255]='\0';
615 strcpy(domainname
, unknown
);
618 strcpy(hostname
, unknown
);
619 strcpy(domainname
, unknown
);
622 #ifdef HAVE_GETDOMAINNAME
623 if (getdomainname(nisdomain
, 256))
624 strcpy(nisdomain
, unknown
);
628 strcpy(nisdomain
, unknown
);
631 // print warning string into message
633 vsnprintf(message
, 256, fmt
, ap
);
636 // appropriate message about further information
637 additional
[0]=original
[0]=further
[0]='\0';
639 sprintf(further
,"You can also use the smartctl utility for further investigation.\n");
641 switch (data
->emailfreq
){
643 sprintf(additional
,"No additional email messages about this problem will be sent.\n");
646 sprintf(additional
,"Another email message will be sent in 24 hours if the problem persists.\n");
649 sprintf(additional
,"Another email message will be sent in %d days if the problem persists\n",
650 (0x01)<<mail
->logged
);
653 if (data
->emailfreq
>1 && mail
->logged
){
654 dateandtimezoneepoch(dates
, mail
->firstsent
);
655 sprintf(original
,"The original email about this issue was sent at %s\n", dates
);
659 snprintf(subject
, 256,"SMART error (%s) detected on host: %s", whichfail
[which
], hostname
);
661 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
663 #ifdef DEFAULT_MAILER
664 executable
= DEFAULT_MAILER
;
669 executable
= "blat"; // http://blat.sourceforge.net/
673 // make a private copy of address with commas replaced by spaces
674 // to separate recipients
676 address
=CustomStrDup(data
->address
, 1, __LINE__
, filenameandversion
);
677 #ifndef _WIN32 // blat mailer needs comma
680 while ((comma
=strchr(comma
, ',')))
686 // Export information in environment variables that will be useful
688 exportenv(environ_strings
[0], "SMARTD_MAILER", executable
);
689 exportenv(environ_strings
[1], "SMARTD_MESSAGE", message
);
690 exportenv(environ_strings
[2], "SMARTD_SUBJECT", subject
);
691 dateandtimezoneepoch(dates
, mail
->firstsent
);
692 exportenv(environ_strings
[3], "SMARTD_TFIRST", dates
);
693 snprintf(dates
, DATEANDEPOCHLEN
,"%d", (int)mail
->firstsent
);
694 exportenv(environ_strings
[4], "SMARTD_TFIRSTEPOCH", dates
);
695 exportenv(environ_strings
[5], "SMARTD_FAILTYPE", whichfail
[which
]);
697 exportenv(environ_strings
[6], "SMARTD_ADDRESS", address
);
698 exportenv(environ_strings
[7], "SMARTD_DEVICESTRING", cfg
->name
);
700 switch (cfg
->controller_type
) {
701 case CONTROLLER_3WARE_678K
:
702 case CONTROLLER_3WARE_9000_CHAR
:
703 case CONTROLLER_3WARE_678K_CHAR
:
705 char *s
,devicetype
[16];
706 sprintf(devicetype
, "3ware,%d", cfg
->controller_port
-1);
707 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
708 if ((s
=strchr(cfg
->name
, ' ')))
710 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
716 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "ata");
717 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
719 case CONTROLLER_MARVELL_SATA
:
720 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "marvell");
721 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
723 case CONTROLLER_SCSI
:
724 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "scsi");
725 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
727 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", "sat");
728 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
731 char *s
,devicetype
[16];
732 sprintf(devicetype
, "hpt,%d/%d/%d", cfg
->hpt_data
[0],
733 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
734 exportenv(environ_strings
[8], "SMARTD_DEVICETYPE", devicetype
);
735 if ((s
=strchr(cfg
->name
, ' ')))
737 exportenv(environ_strings
[9], "SMARTD_DEVICE", cfg
->name
);
743 snprintf(fullmessage
, 1024,
744 "This email was generated by the smartd daemon running on:\n\n"
747 " NIS domain: %s\n\n"
748 "The following warning/error was logged by the smartd daemon:\n\n"
750 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
752 hostname
, domainname
, nisdomain
, message
, further
, original
, additional
);
753 exportenv(environ_strings
[10], "SMARTD_FULLMESSAGE", fullmessage
);
755 // now construct a command to send this as EMAIL
758 snprintf(command
, 2048,
759 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
760 "%sENDMAIL\n", subject
, address
, fullmessage
);
762 snprintf(command
, 2048, "%s 2>&1", executable
);
764 // tell SYSLOG what we are about to do...
765 newadd
=address
?address
:"<nomailer>";
766 newwarn
=which
?"Warning via":"Test of";
768 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
769 which
?"Sending warning via":"Executing test of", executable
, newadd
);
771 // issue the command to send mail or to run the user's executable
773 if (!(pfp
=popen(command
, "r")))
774 // failed to popen() mail process
775 PrintOut(LOG_CRIT
,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
776 newwarn
, executable
, newadd
, errno
?strerror(errno
):"");
780 char buffer
[EBUFLEN
];
782 // if unexpected output on stdout/stderr, null terminate, print, and flush
783 if ((len
=fread(buffer
, 1, EBUFLEN
, pfp
))) {
785 int newlen
= len
<EBUFLEN
? len
: EBUFLEN
-1;
787 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
788 newwarn
, executable
, newadd
, len
!=newlen
?"here truncated to ":"", newlen
, buffer
);
790 // flush pipe if needed
791 while (fread(buffer
, 1, EBUFLEN
, pfp
) && count
<EBUFLEN
)
794 // tell user that pipe was flushed, or that something is really wrong
795 if (count
&& count
<EBUFLEN
)
796 PrintOut(LOG_CRIT
,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
797 newwarn
, executable
, newadd
);
799 PrintOut(LOG_CRIT
,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
800 newwarn
, executable
, newadd
);
803 // if something went wrong with mail process, print warning
805 if (-1==(status
=pclose(pfp
)))
806 PrintOut(LOG_CRIT
,"%s %s to %s: pclose(3) failed %s\n", newwarn
, executable
, newadd
,
807 errno
?strerror(errno
):"");
809 // mail process apparently succeeded. Check and report exit status
812 if (WIFEXITED(status
)) {
813 // exited 'normally' (but perhaps with nonzero status)
814 status8
=WEXITSTATUS(status
);
817 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
818 newwarn
, executable
, newadd
, status
, status8
, status8
-128, strsignal(status8
-128));
820 PrintOut(LOG_CRIT
,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
821 newwarn
, executable
, newadd
, status
, status8
);
823 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
826 if (WIFSIGNALED(status
))
827 PrintOut(LOG_INFO
,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
828 newwarn
, executable
, newadd
, WTERMSIG(status
), strsignal(WTERMSIG(status
)));
830 // this branch is probably not possible. If subprocess is
831 // stopped then pclose() should not return.
832 if (WIFSTOPPED(status
))
833 PrintOut(LOG_CRIT
,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
834 newwarn
, executable
, newadd
, WSTOPSIG(status
), strsignal(WSTOPSIG(status
)));
841 // No "here-documents" on Windows, so must use separate commandline and stdin
842 command
[0] = stdinbuf
[0] = 0;
843 boxtype
= -1; boxmsgoffs
= 0;
844 newadd
= "<nomailer>";
846 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
847 int addroffs
= (!strncmp(address
, "sys", 3) ? 3 : 0);
848 if (!strncmp(address
+addroffs
, "msgbox", 6) && (!address
[addroffs
+6] || address
[addroffs
+6] == ',')) {
849 boxtype
= (addroffs
> 0 ? 1 : 0);
851 if (address
[addroffs
])
857 if (address
[addroffs
]) {
858 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
859 snprintf(command
, sizeof(command
),
860 "%s - -q -subject \"%s\" -to \"%s\"",
861 executable
, subject
, address
+addroffs
);
862 newadd
= address
+addroffs
;
864 // Message for mail [0...] and messagebox [boxmsgoffs...]
865 snprintf(stdinbuf
, sizeof(stdinbuf
),
866 "This email was generated by the smartd daemon running on:\n\n"
869 // " NIS domain: %s\n"
871 "The following warning/error was logged by the smartd daemon:\n\n"
873 "For details see the event log or log file of smartd.\n\n"
876 hostname
, /*domainname, */ nisdomain
, &boxmsgoffs
, message
, further
, original
, additional
);
879 snprintf(command
, sizeof(command
), "%s", executable
);
881 newwarn
=which
?"Warning via":"Test of";
884 daemon_messagebox(boxtype
, subject
, stdinbuf
+boxmsgoffs
);
885 PrintOut(LOG_INFO
,"%s message box\n", newwarn
);
888 char stdoutbuf
[800]; // < buffer in syslog_win32::vsyslog()
891 PrintOut(LOG_INFO
,"%s %s to %s ...\n",
892 (which
?"Sending warning via":"Executing test of"), executable
, newadd
);
893 rc
= daemon_spawn(command
, stdinbuf
, strlen(stdinbuf
), stdoutbuf
, sizeof(stdoutbuf
));
894 if (rc
>= 0 && stdoutbuf
[0])
895 PrintOut(LOG_CRIT
,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
896 newwarn
, executable
, newadd
, strlen(stdoutbuf
), stdoutbuf
);
898 PrintOut(LOG_CRIT
,"%s %s to %s: failed, exit status %d\n",
899 newwarn
, executable
, newadd
, rc
);
901 PrintOut(LOG_INFO
,"%s %s to %s: successful\n", newwarn
, executable
, newadd
);
906 // increment mail sent counter
909 // free copy of address (without commas)
910 address
=FreeNonZero(address
, -1, __LINE__
, filenameandversion
);
915 // Printing function for watching ataprint commands, or losing them
916 // [From GLIBC Manual: Since the prototype doesn't specify types for
917 // optional arguments, in a call to a variadic function the default
918 // argument promotions are performed on the optional argument
919 // values. This means the objects of type char or short int (whether
920 // signed or not) are promoted to either int or unsigned int, as
922 void pout(const char *fmt
, ...){
925 // get the correct time in syslog()
926 FixGlibcTimeZoneBug();
927 // initialize variable argument list
929 // in debug==1 mode we will print the output from the ataprint.o functions!
930 if (debugmode
&& debugmode
!=2)
932 if (facility
== LOG_LOCAL1
) // logging to stdout
933 vfprintf(stderr
,fmt
,ap
);
937 // in debug==2 mode we print output from knowndrives.o functions
938 else if (debugmode
==2 || con
->reportataioctl
|| con
->reportscsiioctl
|| con
->controller_port
) {
939 openlog("smartd", LOG_PID
, facility
);
940 vsyslog(LOG_INFO
, fmt
, ap
);
948 // This function prints either to stdout or to the syslog as needed.
949 // This function is also used by utility.cpp to report LOG_CRIT errors.
950 void PrintOut(int priority
, const char *fmt
, ...){
953 // get the correct time in syslog()
954 FixGlibcTimeZoneBug();
955 // initialize variable argument list
959 if (facility
== LOG_LOCAL1
) // logging to stdout
960 vfprintf(stderr
,fmt
,ap
);
965 openlog("smartd", LOG_PID
, facility
);
966 vsyslog(priority
,fmt
,ap
);
973 // Forks new process, closes ALL file descriptors, redirects stdin,
974 // stdout, and stderr. Not quite daemon(). See
975 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
976 // for a good description of why we do things this way.
982 // flush all buffered streams. Else we might get two copies of open
983 // streams since both parent and child get copies of the buffers.
986 if ((pid
=fork()) < 0) {
988 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
992 // we are the parent process -- exit cleanly
995 // from here on, we are the child process.
998 // Fork one more time to avoid any possibility of having terminals
999 if ((pid
=fork()) < 0) {
1001 PrintOut(LOG_CRIT
,"smartd unable to fork daemon process!\n");
1005 // we are the parent process -- exit cleanly
1008 // Now we are the child's child...
1010 // close any open file descriptors
1011 for (i
=getdtablesize();i
>=0;--i
)
1015 // Cygwin's setsid() does not detach the process from Windows console
1017 #endif // __CYGWIN__
1019 // redirect any IO attempts to /dev/null for stdin
1020 i
=open("/dev/null",O_RDWR
);
1028 PrintOut(LOG_INFO
, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1032 // No fork() on native Win32
1033 // Detach this process from console
1035 if (daemon_detach("smartd")) {
1036 PrintOut(LOG_CRIT
,"smartd unable to detach from console!\n");
1039 // stdin/out/err now closed if not redirected
1045 // create a PID file containing the current process id
1046 void WritePidFile() {
1049 pid_t pid
= getpid();
1054 old_umask
= umask(0077); // rwx------
1056 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1057 old_umask
= umask(0033); // rwxr--r--
1059 fp
= fopen(pid_file
, "w");
1063 } else if (fprintf(fp
, "%d\n", (int)pid
) <= 0) {
1065 } else if (fclose(fp
) != 0) {
1069 PrintOut(LOG_CRIT
, "unable to write PID file %s - exiting.\n", pid_file
);
1072 PrintOut(LOG_INFO
, "file %s written containing PID %d\n", pid_file
, (int)pid
);
1077 // Prints header identifying version of code and home
1079 #ifdef HAVE_GET_OS_VERSION_STR
1080 const char * ver
= get_os_version_str();
1082 const char * ver
= SMARTMONTOOLS_BUILD_HOST
;
1084 PrintOut(LOG_INFO
,"smartd version %s [%s] Copyright (C) 2002-6 Bruce Allen\n", PACKAGE_VERSION
, ver
);
1085 PrintOut(LOG_INFO
,"Home page is " PACKAGE_HOMEPAGE
"\n\n");
1089 // prints help info for configuration file Directives
1092 "Configuration file (%s) Directives (after device name):\n"
1093 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N\n"
1094 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1095 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1096 " -S VAL Enable/disable attribute autosave (on/off)\n"
1097 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1098 " -H Monitor SMART Health Status, report if failed\n"
1099 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1100 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1101 " -f Monitor 'Usage' Attributes, report failures\n"
1102 " -m ADD Send email warning to address ADD\n"
1103 " -M TYPE Modify email warning behavior (see man page)\n"
1104 " -p Report changes in 'Prefailure' Attributes\n"
1105 " -u Report changes in 'Usage' Attributes\n"
1106 " -t Equivalent to -p and -u Directives\n"
1107 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1108 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1109 " -i ID Ignore Attribute ID for -f Directive\n"
1110 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1111 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1112 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1113 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1114 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1115 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1116 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1117 " -F TYPE Firmware bug workaround: none, samsung, samsung2\n"
1118 " # Comment: text after a hash sign is ignored\n"
1119 " \\ Line continuation character\n"
1120 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1121 "Use ID = 0 to turn off -C and/or -U Directives\n"
1122 "Example: /dev/hda -a\n",
1127 /* Returns a pointer to a static string containing a formatted list of the valid
1128 arguments to the option opt or NULL on failure. */
1129 const char *GetValidArgList(char opt
) {
1132 return "<FILE_NAME>, -";
1134 return "valid_regular_expression";
1136 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1138 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1140 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1142 return "<FILE_NAME>";
1144 return "<INTEGER_SECONDS>";
1150 /* prints help information for command syntax */
1152 PrintOut(LOG_INFO
,"Usage: smartd [options]\n\n");
1153 #ifdef HAVE_GETOPT_LONG
1154 PrintOut(LOG_INFO
," -c NAME|-, --configfile=NAME|-\n");
1155 PrintOut(LOG_INFO
," Read configuration file NAME or stdin [default is %s]\n\n", configfile
);
1156 PrintOut(LOG_INFO
," -d, --debug\n");
1157 PrintOut(LOG_INFO
," Start smartd in debug mode\n\n");
1158 PrintOut(LOG_INFO
," -D, --showdirectives\n");
1159 PrintOut(LOG_INFO
," Print the configuration file Directives and exit\n\n");
1160 PrintOut(LOG_INFO
," -h, --help, --usage\n");
1161 PrintOut(LOG_INFO
," Display this help and exit\n\n");
1162 PrintOut(LOG_INFO
," -i N, --interval=N\n");
1163 PrintOut(LOG_INFO
," Set interval between disk checks to N seconds, where N >= 10\n\n");
1164 PrintOut(LOG_INFO
," -l local[0-7], --logfacility=local[0-7]\n");
1166 PrintOut(LOG_INFO
," Use syslog facility local0 - local7 or daemon [default]\n\n");
1168 PrintOut(LOG_INFO
," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1170 PrintOut(LOG_INFO
," -p NAME, --pidfile=NAME\n");
1171 PrintOut(LOG_INFO
," Write PID file NAME\n\n");
1172 PrintOut(LOG_INFO
," -q WHEN, --quit=WHEN\n");
1173 PrintOut(LOG_INFO
," Quit on one of: %s\n\n", GetValidArgList('q'));
1174 PrintOut(LOG_INFO
," -r, --report=TYPE\n");
1175 PrintOut(LOG_INFO
," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1176 #if defined(_WIN32) || defined(__CYGWIN__)
1177 PrintOut(LOG_INFO
," --service\n");
1178 PrintOut(LOG_INFO
," Running as windows service (see man page), install with:\n");
1180 PrintOut(LOG_INFO
," smartd install [options]\n");
1181 PrintOut(LOG_INFO
," Remove service with:\n");
1182 PrintOut(LOG_INFO
," smartd remove\n\n");
1184 PrintOut(LOG_INFO
," /etc/rc.d/init.d/smartd install [options]\n");
1185 PrintOut(LOG_INFO
," Remove service with:\n");
1186 PrintOut(LOG_INFO
," /etc/rc.d/init.d/smartd remove\n\n");
1188 #endif // _WIN32 || __CYGWIN__
1189 PrintOut(LOG_INFO
," -V, --version, --license, --copyright\n");
1190 PrintOut(LOG_INFO
," Print License, Copyright, and version information\n");
1192 PrintOut(LOG_INFO
," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile
);
1193 PrintOut(LOG_INFO
," -d Start smartd in debug mode\n");
1194 PrintOut(LOG_INFO
," -D Print the configuration file Directives and exit\n");
1195 PrintOut(LOG_INFO
," -h Display this help and exit\n");
1196 PrintOut(LOG_INFO
," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1197 PrintOut(LOG_INFO
," -l local? Use syslog facility local0 - local7, or daemon\n");
1198 PrintOut(LOG_INFO
," -p NAME Write PID file NAME\n");
1199 PrintOut(LOG_INFO
," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1200 PrintOut(LOG_INFO
," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1201 PrintOut(LOG_INFO
," -V Print License, Copyright, and version information\n");
1205 // returns negative if problem, else fd>=0
1206 static int OpenDevice(char *device
, char *mode
, int scanning
) {
1210 // If there is an ASCII "space" character in the device name,
1211 // terminate string there. This is for 3ware and highpoint devices only.
1212 if ((s
=strchr(device
,' ')))
1216 fd
= deviceopen(device
, mode
);
1218 // if we removed a space, put it back in please
1222 // if we failed to open the device, complain!
1225 // For linux+devfs, a nonexistent device gives a strange error
1226 // message. This makes the error message a bit more sensible.
1227 // If no debug and scanning - don't print errors
1228 if (debugmode
|| !scanning
) {
1229 if (errno
==ENOENT
|| errno
==ENOTDIR
)
1232 PrintOut(LOG_INFO
,"Device: %s, %s, open() failed\n",
1233 device
, strerror(errno
));
1237 // device opened sucessfully
1241 int CloseDevice(int fd
, char *name
){
1242 if (deviceclose(fd
)){
1243 PrintOut(LOG_INFO
,"Device: %s, %s, close(%d) failed\n", name
, strerror(errno
), fd
);
1246 // device sucessfully closed
1250 // returns <0 on failure
1251 int ATAErrorCount(int fd
, char *name
){
1252 struct ata_smart_errorlog log
;
1254 if (-1==ataReadErrorLog(fd
,&log
)){
1255 PrintOut(LOG_INFO
,"Device: %s, Read SMART Error Log Failed\n",name
);
1259 // return current number of ATA errors
1260 return log
.error_log_pointer
?log
.ata_error_count
:0;
1263 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1264 // error count, and top bits are the power-on hours of the last error.
1265 int SelfTestErrorCount(int fd
, char *name
){
1266 struct ata_smart_selftestlog log
;
1268 if (-1==ataReadSelfTestLog(fd
,&log
)){
1269 PrintOut(LOG_INFO
,"Device: %s, Read SMART Self Test Log Failed\n",name
);
1273 // return current number of self-test errors
1274 return ataPrintSmartSelfTestlog(&log
,0);
1277 // scan to see what ata devices there are, and if they support SMART
1278 int ATADeviceScan(cfgfile
*cfg
, int scanning
){
1279 int fd
, supported
=0;
1280 struct ata_identify_device drive
;
1281 char *name
=cfg
->name
;
1282 int retainsmartdata
=0;
1286 // should we try to register this as an ATA device?
1287 switch (cfg
->controller_type
) {
1288 case CONTROLLER_ATA
:
1289 case CONTROLLER_3WARE_678K
:
1290 case CONTROLLER_MARVELL_SATA
:
1291 case CONTROLLER_HPT
:
1292 case CONTROLLER_UNKNOWN
:
1295 case CONTROLLER_3WARE_678K_CHAR
:
1296 mode
="ATA_3WARE_678K";
1298 case CONTROLLER_3WARE_9000_CHAR
:
1299 mode
="ATA_3WARE_9000";
1301 case CONTROLLER_SAT
:
1305 // not a recognized ATA or SATA device. We should never enter
1311 if ((fd
=OpenDevice(name
, mode
, scanning
))<0)
1312 // device open failed
1314 PrintOut(LOG_INFO
,"Device: %s, opened\n", name
);
1316 // pass user settings on to low-level ATA commands
1317 con
->controller_port
=cfg
->controller_port
;
1318 con
->hpt_data
[0]=cfg
->hpt_data
[0];
1319 con
->hpt_data
[1]=cfg
->hpt_data
[1];
1320 con
->hpt_data
[2]=cfg
->hpt_data
[2];
1321 con
->controller_type
=cfg
->controller_type
;
1322 con
->controller_explicit
=cfg
->controller_explicit
;
1323 con
->fixfirmwarebug
= cfg
->fixfirmwarebug
;
1324 con
->satpassthrulen
= cfg
->satpassthrulen
;
1326 // Get drive identity structure
1327 if ((retid
=ataReadHDIdentity (fd
,&drive
))){
1329 // Unable to read Identity structure
1330 PrintOut(LOG_INFO
,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name
);
1332 PrintOut(LOG_INFO
,"Device: %s, packet devices [this device %s] not SMART capable\n",
1333 name
, packetdevicetype(retid
-1));
1334 CloseDevice(fd
, name
);
1338 // Show if device in database, and use preset vendor attribute
1339 // options unless user has requested otherwise.
1340 if (cfg
->ignorepresets
)
1341 PrintOut(LOG_INFO
, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name
);
1343 // do whatever applypresets decides to do. Will allocate memory if
1344 // cfg->attributedefs is needed.
1345 if (applypresets(&drive
, &cfg
->attributedefs
, con
)<0)
1346 PrintOut(LOG_INFO
, "Device: %s, not found in smartd database.\n", name
);
1348 PrintOut(LOG_INFO
, "Device: %s, found in smartd database.\n", name
);
1350 // then save the correct state of the flag (applypresets may have changed it)
1351 cfg
->fixfirmwarebug
= con
->fixfirmwarebug
;
1354 // If requested, show which presets would be used for this drive
1355 if (cfg
->showpresets
) {
1356 int savedebugmode
=debugmode
;
1357 PrintOut(LOG_INFO
, "Device %s: presets are:\n", name
);
1360 showpresets(&drive
);
1361 debugmode
=savedebugmode
;
1364 // see if drive supports SMART
1365 supported
=ataSmartSupport(&drive
);
1368 // drive does NOT support SMART
1369 PrintOut(LOG_INFO
,"Device: %s, lacks SMART capability\n",name
);
1371 // can't tell if drive supports SMART
1372 PrintOut(LOG_INFO
,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name
);
1374 // should we proceed anyway?
1375 if (cfg
->permissive
){
1376 PrintOut(LOG_INFO
,"Device: %s, proceeding since '-T permissive' Directive given.\n",name
);
1379 PrintOut(LOG_INFO
,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name
);
1380 CloseDevice(fd
, name
);
1385 if (ataEnableSmart(fd
)){
1386 // Enable SMART command has failed
1387 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART capability\n",name
);
1388 CloseDevice(fd
, name
);
1392 // disable device attribute autosave...
1393 if (cfg
->autosave
==1){
1394 if (ataDisableAutoSave(fd
))
1395 PrintOut(LOG_INFO
,"Device: %s, could not disable SMART Attribute Autosave.\n",name
);
1397 PrintOut(LOG_INFO
,"Device: %s, disabled SMART Attribute Autosave.\n",name
);
1400 // or enable device attribute autosave
1401 if (cfg
->autosave
==2){
1402 if (ataEnableAutoSave(fd
))
1403 PrintOut(LOG_INFO
,"Device: %s, could not enable SMART Attribute Autosave.\n",name
);
1405 PrintOut(LOG_INFO
,"Device: %s, enabled SMART Attribute Autosave.\n",name
);
1408 // capability check: SMART status
1409 if (cfg
->smartcheck
&& ataSmartStatus2(fd
)==-1){
1410 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART Health Status check\n",name
);
1414 // capability check: Read smart values and thresholds. Note that
1415 // smart values are ALSO needed even if we ONLY want to know if the
1416 // device is self-test log or error-log capable! After ATA-5, this
1417 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1418 // but sadly not for ATA-5. Sigh.
1420 // do we need to retain SMART data after returning from this routine?
1421 retainsmartdata
=cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
;
1423 // do we need to get SMART data?
1424 if (retainsmartdata
|| cfg
->autoofflinetest
|| cfg
->selftest
|| cfg
->errorlog
|| cfg
->pending
!=DONT_MONITOR_UNC
) {
1426 unsigned char currentpending
, offlinepending
;
1428 cfg
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
));
1429 cfg
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
));
1431 if (!cfg
->smartval
|| !cfg
->smartthres
){
1432 PrintOut(LOG_CRIT
,"Not enough memory to obtain SMART data\n");
1436 if (ataReadSmartValues(fd
,cfg
->smartval
) ||
1437 ataReadSmartThresholds (fd
,cfg
->smartthres
)){
1438 PrintOut(LOG_INFO
,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name
);
1439 retainsmartdata
=cfg
->usagefailed
=cfg
->prefail
=cfg
->usage
=0;
1440 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1441 cfg
->pending
=DONT_MONITOR_UNC
;
1444 // see if the necessary Attribute is there to monitor offline or
1445 // current pending sectors or temperature
1446 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
1448 if (currentpending
&& ATAReturnAttributeRawValue(currentpending
, cfg
->smartval
)<0) {
1449 PrintOut(LOG_INFO
,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1450 name
, (int)currentpending
);
1451 cfg
->pending
&= 0xff00;
1452 cfg
->pending
|= CUR_UNC_DEFAULT
;
1455 if (offlinepending
&& ATAReturnAttributeRawValue(offlinepending
, cfg
->smartval
)<0) {
1456 PrintOut(LOG_INFO
,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1457 name
, (int)offlinepending
);
1458 cfg
->pending
&= 0x00ff;
1459 cfg
->pending
|= OFF_UNC_DEFAULT
<<8;
1462 if ( (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
1463 && !ATAReturnTemperatureValue(cfg
->smartval
, cfg
->attributedefs
)) {
1464 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name
);
1465 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1469 // enable/disable automatic on-line testing
1470 if (cfg
->autoofflinetest
){
1471 // is this an enable or disable request?
1472 const char *what
=(cfg
->autoofflinetest
==1)?"disable":"enable";
1474 PrintOut(LOG_INFO
,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name
, what
);
1476 // if command appears unsupported, issue a warning...
1477 if (!isSupportAutomaticTimer(cfg
->smartval
))
1478 PrintOut(LOG_INFO
,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name
);
1479 // ... but then try anyway
1480 if ((cfg
->autoofflinetest
==1)?ataDisableAutoOffline(fd
):ataEnableAutoOffline(fd
))
1481 PrintOut(LOG_INFO
,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name
, what
);
1483 PrintOut(LOG_INFO
,"Device: %s, %sd SMART Automatic Offline Testing.\n", name
, what
);
1487 // capability check: self-test-log
1491 // start with service disabled, and re-enable it if all works OK
1493 cfg
->selflogcount
=0;
1497 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name
);
1498 else if (!cfg
->permissive
&& !isSmartTestLogCapable(cfg
->smartval
, &drive
))
1499 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name
);
1500 else if ((retval
=SelfTestErrorCount(fd
, name
))<0)
1501 PrintOut(LOG_INFO
, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name
);
1504 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1505 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1509 // capability check: ATA error log
1513 // start with service disabled, and re-enable it if all works OK
1515 cfg
->ataerrorcount
=0;
1518 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name
);
1519 else if (!cfg
->permissive
&& !isSmartErrorLogCapable(cfg
->smartval
, &drive
))
1520 PrintOut(LOG_INFO
, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name
);
1521 else if ((val
=ATAErrorCount(fd
, name
))<0)
1522 PrintOut(LOG_INFO
, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name
);
1525 cfg
->ataerrorcount
=val
;
1529 // If we don't need to save SMART data, get rid of it now
1530 if (!retainsmartdata
) {
1531 if (cfg
->smartval
) {
1532 cfg
->smartval
=CheckFree(cfg
->smartval
, __LINE__
,filenameandversion
);
1533 bytes
-=sizeof(struct ata_smart_values
);
1535 if (cfg
->smartthres
) {
1536 cfg
->smartthres
=CheckFree(cfg
->smartthres
, __LINE__
,filenameandversion
);
1537 bytes
-=sizeof(struct ata_smart_thresholds_pvt
);
1541 // capabilities check -- does it support powermode?
1542 if (cfg
->powermode
) {
1543 int powermode
=ataCheckPowerMode(fd
);
1545 if (-1 == powermode
) {
1546 PrintOut(LOG_CRIT
, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name
);
1549 else if (powermode
!=0 && powermode
!=0x80 && powermode
!=0xff) {
1550 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1556 // If no tests available or selected, return
1557 if (!(cfg
->errorlog
|| cfg
->selftest
|| cfg
->smartcheck
||
1558 cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
||
1559 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
1560 CloseDevice(fd
, name
);
1564 // Do we still have entries available?
1565 while (numdevata
>=atadevlist_max
)
1566 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
1569 PrintOut(LOG_INFO
,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name
);
1571 // record number of device, type of device, increment device count
1572 if (cfg
->controller_type
== CONTROLLER_UNKNOWN
)
1573 cfg
->controller_type
=CONTROLLER_ATA
;
1575 // close file descriptor
1576 CloseDevice(fd
, name
);
1580 // Returns 1 if device recognised as one we do not want to treat as a general
1581 // SCSI device. Also returns 1 if INQUIRY fails (all "SCSI" devices should
1582 // respond to INQUIRY). Otherwise returns 0 (i.e. normal SCSI device).
1583 static int SCSIFilterKnown(int fd
, char * device
)
1587 int req_len
, avail_len
, len
;
1589 memset(req_buff
, 0, 96);
1591 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1592 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1593 /* watch this spot ... other devices could lock up here */
1595 if (scsiStdInquiry(fd
, (unsigned char *)req_buff
, req_len
)) {
1596 PrintOut(LOG_INFO
, "Device: %s, failed on INQUIRY; skip device\n", device
);
1597 // device doesn't like INQUIRY commands
1601 avail_len
= req_buff
[4] + 5;
1602 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1604 if (0 == strncmp(req_buff
+ 8, "3ware", 5) || 0 == strncmp(req_buff
+ 8, "AMCC", 4) ) {
1605 PrintOut(LOG_INFO
, "Device %s, please try adding '-d 3ware,N'\n", device
);
1606 PrintOut(LOG_INFO
, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device
, device
);
1608 } else if ((len
>= 42) && (0 == strncmp(req_buff
+ 36, "MVSATA", 6))) {
1609 PrintOut(LOG_INFO
, "Device %s, please try '-d marvell'\n", device
);
1611 } else if ((avail_len
>= 96) && (0 == strncmp(req_buff
+ 8, "ATA", 3))) {
1612 /* <<<< This is Linux specific code to detect SATA disks using a
1613 SCSI-ATA command translation layer. This may be generalized
1614 later when the t10.org SAT project matures. >>>> */
1616 memset(di_buff
, 0, req_len
);
1617 if (scsiInquiryVpd(fd
, 0x83, (unsigned char *)di_buff
, req_len
)) {
1618 return 0; // guess it is normal device
1620 avail_len
= ((di_buff
[2] << 8) + di_buff
[3]) + 4;
1621 len
= (avail_len
< req_len
) ? avail_len
: req_len
;
1622 if (isLinuxLibAta((unsigned char *)di_buff
, len
)) {
1623 PrintOut(LOG_INFO
, "Device %s: SATA disks accessed via libata are "
1624 "supported by Linux\nkernel versions 2.6.15-rc1 and above. "
1625 "Try adding '-d ata' or\n'-d sat' to the smartd.conf "
1626 "config file line.\n", device
);
1634 // on success, return 0. On failure, return >0. Never return <0,
1636 static int SCSIDeviceScan(cfgfile
*cfg
, int scanning
) {
1638 char *device
= cfg
->name
;
1639 struct scsi_iec_mode_page iec
;
1642 // should we try to register this as a SCSI device?
1643 switch (cfg
->controller_type
) {
1644 case CONTROLLER_SCSI
:
1645 case CONTROLLER_UNKNOWN
:
1652 if ((fd
= OpenDevice(device
, "SCSI", scanning
)) < 0)
1654 PrintOut(LOG_INFO
,"Device: %s, opened\n", device
);
1656 // early skip if device known and needs to be handled by some other
1657 // device type (e.g. '-d 3ware,<n>')
1658 if (SCSIFilterKnown(fd
, device
)) {
1659 CloseDevice(fd
, device
);
1663 // check that device is ready for commands. IE stores its stuff on
1665 if ((err
= scsiTestUnitReady(fd
))) {
1666 if (SIMPLE_ERR_NOT_READY
== err
)
1667 PrintOut(LOG_INFO
, "Device: %s, NOT READY (e.g. spun down); skip device\n", device
);
1668 else if (SIMPLE_ERR_NO_MEDIUM
== err
)
1669 PrintOut(LOG_INFO
, "Device: %s, NO MEDIUM present; skip device\n", device
);
1670 else if (SIMPLE_ERR_BECOMING_READY
== err
)
1671 PrintOut(LOG_INFO
, "Device: %s, BECOMING (but not yet) READY; skip device\n", device
);
1673 PrintOut(LOG_CRIT
, "Device: %s, failed Test Unit Ready [err=%d]\n", device
, err
);
1674 CloseDevice(fd
, device
);
1678 // Badly-conforming USB storage devices may fail this check.
1679 // The response to the following IE mode page fetch (current and
1680 // changeable values) is carefully examined. It has been found
1681 // that various USB devices that malform the response will lock up
1682 // if asked for a log page (e.g. temperature) so it is best to
1684 if (!(err
= scsiFetchIECmpage(fd
, &iec
, cfg
->modese_len
)))
1685 cfg
->modese_len
= iec
.modese_len
;
1686 else if (SIMPLE_ERR_BAD_FIELD
== err
)
1687 ; /* continue since it is reasonable not to support IE mpage */
1688 else { /* any other error (including malformed response) unreasonable */
1690 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1692 CloseDevice(fd
, device
);
1696 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1697 // smart if it is off). This may change to be the same as the ATA side.
1698 if (!scsi_IsExceptionControlEnabled(&iec
)) {
1699 PrintOut(LOG_INFO
, "Device: %s, IE (SMART) not enabled, skip device\n"
1700 "Try 'smartctl -s on %s' to turn on SMART features\n",
1702 CloseDevice(fd
, device
);
1706 // Device exists, and does SMART. Add to list (allocating more space if needed)
1707 while (numdevscsi
>= scsidevlist_max
)
1708 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
1710 // Flag that certain log pages are supported (information may be
1711 // available from other sources).
1712 if (0 == scsiLogSense(fd
, SUPPORTED_LPAGES
, 0, tBuf
, sizeof(tBuf
), 0)) {
1713 for (k
= 4; k
< tBuf
[3] + LOGPAGEHDRSIZE
; ++k
) {
1715 case TEMPERATURE_LPAGE
:
1716 cfg
->TempPageSupported
= 1;
1719 cfg
->SmartPageSupported
= 1;
1727 // record type of device
1728 cfg
->controller_type
= CONTROLLER_SCSI
;
1730 // get rid of allocated memory only needed for ATA devices. These
1731 // might have been allocated if the user specified Ignore options or
1732 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1733 cfg
->monitorattflags
= FreeNonZero(cfg
->monitorattflags
, NMONITOR
*32,__LINE__
,filenameandversion
);
1734 cfg
->attributedefs
= FreeNonZero(cfg
->attributedefs
, MAX_ATTRIBUTE_NUM
,__LINE__
,filenameandversion
);
1735 cfg
->smartval
= FreeNonZero(cfg
->smartval
, sizeof(struct ata_smart_values
),__LINE__
,filenameandversion
);
1736 cfg
->smartthres
= FreeNonZero(cfg
->smartthres
, sizeof(struct ata_smart_thresholds_pvt
),__LINE__
,filenameandversion
);
1738 // Check if scsiCheckIE() is going to work
1742 UINT8 currenttemp
= 0;
1745 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
1746 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
1747 PrintOut(LOG_INFO
, "Device: %s, unexpectedly failed to read SMART values\n", device
);
1748 cfg
->SuppressReport
= 1;
1749 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
) {
1750 PrintOut(LOG_CRIT
, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device
);
1751 cfg
->tempdiff
= cfg
->tempinfo
= cfg
->tempcrit
= 0;
1756 // capability check: self-test-log
1758 int retval
=scsiCountFailedSelfTests(fd
, 0);
1760 // no self-test log, turn off monitoring
1761 PrintOut(LOG_INFO
, "Device: %s, does not support SMART Self-Test Log.\n", device
);
1763 cfg
->selflogcount
=0;
1767 // register starting values to watch for changes
1768 cfg
->selflogcount
=SELFTEST_ERRORCOUNT(retval
);
1769 cfg
->selfloghour
=SELFTEST_ERRORHOURS(retval
);
1773 // disable autosave (set GLTSD bit)
1774 if (cfg
->autosave
==1){
1775 if (scsiSetControlGLTSD(fd
, 1, cfg
->modese_len
))
1776 PrintOut(LOG_INFO
,"Device: %s, could not disable autosave (set GLTSD bit).\n",device
);
1778 PrintOut(LOG_INFO
,"Device: %s, disabled autosave (set GLTSD bit).\n",device
);
1781 // or enable autosave (clear GLTSD bit)
1782 if (cfg
->autosave
==2){
1783 if (scsiSetControlGLTSD(fd
, 0, cfg
->modese_len
))
1784 PrintOut(LOG_INFO
,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device
);
1786 PrintOut(LOG_INFO
,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device
);
1789 // tell user we are registering device
1790 PrintOut(LOG_INFO
, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device
);
1792 // close file descriptor
1793 CloseDevice(fd
, device
);
1797 // We compare old and new values of the n'th attribute. Note that n
1798 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1799 // then return 0, else nonzero.
1800 int ATACompareValues(changedattribute_t
*delta
,
1801 struct ata_smart_values
*newv
,
1802 struct ata_smart_values
*oldv
,
1803 struct ata_smart_thresholds_pvt
*thresholds
,
1805 struct ata_smart_attribute
*now
,*was
;
1806 struct ata_smart_threshold_entry
*thre
;
1807 unsigned char oldval
,newval
;
1810 // check that attribute number in range, and no null pointers
1811 if (n
<0 || n
>=NUMBER_ATA_SMART_ATTRIBUTES
|| !newv
|| !oldv
|| !thresholds
)
1814 // pointers to disk's values and vendor's thresholds
1815 now
=newv
->vendor_attributes
+n
;
1816 was
=oldv
->vendor_attributes
+n
;
1817 thre
=thresholds
->thres_entries
+n
;
1819 // consider only valid attributes
1820 if (!now
->id
|| !was
->id
|| !thre
->id
)
1824 // issue warning if they don't have the same ID in all structures:
1825 if ( (now
->id
!= was
->id
) || (now
->id
!= thre
->id
) ){
1826 PrintOut(LOG_INFO
,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1827 name
, (int)now
->id
, (int)was
->id
, (int)thre
->id
);
1831 // new and old values of Normalized Attributes
1832 newval
=now
->current
;
1833 oldval
=was
->current
;
1835 // See if the RAW values are unchanged (ie, the same)
1836 if (memcmp(now
->raw
, was
->raw
, 6))
1841 // if any values out of the allowed range, or if the values haven't
1842 // changed, return 0
1843 if (!newval
|| !oldval
|| newval
>0xfe || oldval
>0xfe || (oldval
==newval
&& sameraw
))
1846 // values have changed. Construct output and return
1847 delta
->newval
=newval
;
1848 delta
->oldval
=oldval
;
1850 delta
->prefail
=ATTRIBUTE_FLAGS_PREFAILURE(now
->flags
);
1851 delta
->sameraw
=sameraw
;
1856 // This looks to see if the corresponding bit of the 32 bytes is set.
1857 // This wastes a few bytes of storage but eliminates all searching and
1858 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1859 // with set=0 tells you if the attribute is being tracked or not.
1860 // Calling with set=1 turns the attribute OFF.
1861 int IsAttributeOff(unsigned char attr
, unsigned char **datap
, int set
, int which
, int whatline
){
1862 unsigned char *data
;
1864 int bit
=attr
& 0x07;
1865 unsigned char mask
=0x01<<bit
;
1867 if (which
>=NMONITOR
|| which
< 0){
1868 PrintOut(LOG_CRIT
, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1869 whatline
, filenameandversion
, which
, reportbug
);
1873 if (*datap
== NULL
){
1874 // NULL data implies Attributes are ON...
1879 if (!(*datap
=(unsigned char *)Calloc(NMONITOR
*32, 1))){
1880 PrintOut(LOG_CRIT
,"No memory to create monattflags\n");
1885 // pointer to the 256 bits that we need
1886 data
=*datap
+which
*32;
1888 // attribute zero is always OFF
1893 return (data
[loc
] & mask
);
1897 // return value when setting has no sense
1901 // If the self-test log has got more self-test errors (or more recent
1902 // self-test errors) recorded, then notify user.
1903 void CheckSelfTestLogs(cfgfile
*cfg
, int newi
){
1904 char *name
=cfg
->name
;
1908 MailWarning(cfg
, 8, "Device: %s, Read SMART Self-Test Log Failed", name
);
1910 // old and new error counts
1911 int oldc
=cfg
->selflogcount
;
1912 int newc
=SELFTEST_ERRORCOUNT(newi
);
1914 // old and new error timestamps in hours
1915 int oldh
=cfg
->selfloghour
;
1916 int newh
=SELFTEST_ERRORHOURS(newi
);
1919 // increase in error count
1920 PrintOut(LOG_CRIT
, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1922 MailWarning(cfg
, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1924 } else if (oldh
!=newh
) {
1925 // more recent error
1926 // a 'more recent' error might actually be a smaller hour number,
1927 // if the hour number has wrapped.
1928 // There's still a bug here. You might just happen to run a new test
1929 // exactly 32768 hours after the previous failure, and have run exactly
1930 // 20 tests between the two, in which case smartd will miss the
1932 PrintOut(LOG_CRIT
, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1934 MailWarning(cfg
, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1938 // Needed since self-test error count may DECREASE. Hour might
1939 // also have changed.
1940 cfg
->selflogcount
= newc
;
1941 cfg
->selfloghour
= newh
;
1946 // returns 1 if time to do test of type testtype, 0 if not time to do
1947 // test, < 0 if error
1948 int DoTestNow(cfgfile
*cfg
, char testtype
, time_t testtime
) {
1949 // start by finding out the time:
1952 char matchpattern
[16];
1953 regmatch_t substring
;
1954 int weekday
, length
;
1955 unsigned short hours
;
1956 testinfo
*dat
=cfg
->testdata
;
1958 // check that self-testing has been requested
1962 // since we are about to call localtime(), be sure glibc is informed
1963 // of any timezone changes we make.
1965 FixGlibcTimeZoneBug();
1967 // construct pattern containing the month, day of month, day of
1969 epochnow
= (!testtime
? time(NULL
) : testtime
);
1970 timenow
=localtime(&epochnow
);
1972 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
1974 weekday
=timenow
->tm_wday
?timenow
->tm_wday
:7;
1975 sprintf(matchpattern
, "%c/%02d/%02d/%1d/%02d", testtype
, timenow
->tm_mon
+1,
1976 timenow
->tm_mday
, weekday
, timenow
->tm_hour
);
1978 // if no match, we are done
1979 if (regexec(&(dat
->cregex
), matchpattern
, 1, &substring
, 0))
1982 // must match the ENTIRE type/date/time string
1983 length
=strlen(matchpattern
);
1984 if (substring
.rm_so
!=0 || substring
.rm_eo
!=length
)
1987 // never do a second test in the same hour as another test (the % 7 ensures
1988 // that the RHS will never be greater than 65535 and so will always fit into
1989 // an unsigned short)
1990 hours
=1+timenow
->tm_hour
+24*(timenow
->tm_yday
+366*(timenow
->tm_year
% 7));
1991 if (hours
==dat
->hour
) {
1992 if (!testtime
&& testtype
!=dat
->testtype
)
1993 PrintOut(LOG_INFO
, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
1994 cfg
->name
, dat
->testtype
, testtype
);
1998 // save time and type of the current test; we are ready to do a test
2000 dat
->testtype
=testtype
;
2004 // Print a list of future tests.
2005 void PrintTestSchedule(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2008 char datenow
[DATEANDEPOCHLEN
], date
[DATEANDEPOCHLEN
];
2009 time_t now
; long seconds
;
2010 int numdev
= numdevata
+numdevscsi
;
2011 typedef int cnt_t
[4];
2012 cnt_t
* testcnts
; // testcnts[numdev][4]
2015 testcnts
= (cnt_t
*)calloc(numdev
, sizeof(testcnts
[0]));
2019 PrintOut(LOG_INFO
, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2021 // FixGlibcTimeZoneBug(); // done in PrintOut()
2023 dateandtimezoneepoch(datenow
, now
);
2024 for (seconds
=0; seconds
<3600L*24*90; seconds
+=checktime
) {
2025 // Check for each device whether a test will be run
2026 time_t testtime
= now
+ seconds
;
2027 for (i
=0; i
<numdev
; i
++) {
2028 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2029 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2030 char testtype
= "LSCO"[t
];
2031 if (DoTestNow(cfg
, testtype
, testtime
)) {
2032 // Report at most 5 tests of each type
2033 if (++testcnts
[i
][t
] <= 5) {
2034 dateandtimezoneepoch(date
, testtime
);
2035 PrintOut(LOG_INFO
, "Device: %s, will do test %d of type %c at %s\n", cfg
->name
,
2036 testcnts
[i
][t
], testtype
, date
);
2044 dateandtimezoneepoch(date
, now
+seconds
);
2045 PrintOut(LOG_INFO
, "\nTotals [%s - %s]:\n", datenow
, date
);
2046 for (i
=0; i
<numdev
; i
++) {
2047 cfg
= (i
<numdevata
? atadevices
[i
] : scsidevices
[i
-numdevata
]);
2048 for (t
=0; t
<(i
<numdevata
?4:2); t
++) {
2049 PrintOut(LOG_INFO
, "Device: %s, will do %3d test%s of type %c\n", cfg
->name
, testcnts
[i
][t
],
2050 (testcnts
[i
][t
]==1?"":"s"), "LSCO"[t
]);
2057 // Return zero on success, nonzero on failure. Perform offline (background)
2058 // short or long (extended) self test on given scsi device.
2059 int DoSCSISelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2061 char *testname
= NULL
;
2062 char *name
= cfg
->name
;
2065 if (scsiSelfTestInProgress(fd
, &inProgress
)) {
2066 PrintOut(LOG_CRIT
, "Device: %s, does not support Self-Tests\n", name
);
2067 cfg
->testdata
->not_cap_short
=cfg
->testdata
->not_cap_long
=1;
2071 if (1 == inProgress
) {
2072 PrintOut(LOG_INFO
, "Device: %s, skip since Self-Test already in "
2073 "progress.\n", name
);
2079 testname
= "Short Self";
2080 retval
= scsiSmartShortSelfTest(fd
);
2083 testname
= "Long Self";
2084 retval
= scsiSmartExtendSelfTest(fd
);
2087 // If we can't do the test, exit
2088 if (NULL
== testname
) {
2089 PrintOut(LOG_CRIT
, "Device: %s, not capable of %c Self-Test\n", name
,
2094 if ((SIMPLE_ERR_BAD_OPCODE
== retval
) ||
2095 (SIMPLE_ERR_BAD_FIELD
== retval
)) {
2096 PrintOut(LOG_CRIT
, "Device: %s, not capable of %s-Test\n", name
,
2099 cfg
->testdata
->not_cap_long
=1;
2101 cfg
->testdata
->not_cap_short
=1;
2105 PrintOut(LOG_CRIT
, "Device: %s, execute %s-Test failed (err: %d)\n", name
,
2110 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %s-Test.\n", name
, testname
);
2115 // Do an offline immediate or self-test. Return zero on success,
2116 // nonzero on failure.
2117 int DoATASelfTest(int fd
, cfgfile
*cfg
, char testtype
) {
2119 struct ata_smart_values data
;
2120 char *testname
=NULL
;
2121 int retval
, dotest
=-1;
2122 char *name
=cfg
->name
;
2124 // Read current smart data and check status/capability
2125 if (ataReadSmartValues(fd
, &data
) || !(data
.offline_data_collection_capability
)) {
2126 PrintOut(LOG_CRIT
, "Device: %s, not capable of Offline or Self-Testing.\n", name
);
2130 // Check for capability to do the test
2133 testname
="Offline Immediate ";
2134 if (isSupportExecuteOfflineImmediate(&data
))
2135 dotest
=OFFLINE_FULL_SCAN
;
2137 cfg
->testdata
->not_cap_offline
=1;
2140 testname
="Conveyance Self-";
2141 if (isSupportConveyanceSelfTest(&data
))
2142 dotest
=CONVEYANCE_SELF_TEST
;
2144 cfg
->testdata
->not_cap_conveyance
=1;
2147 testname
="Short Self-";
2148 if (isSupportSelfTest(&data
))
2149 dotest
=SHORT_SELF_TEST
;
2151 cfg
->testdata
->not_cap_short
=1;
2154 testname
="Long Self-";
2155 if (isSupportSelfTest(&data
))
2156 dotest
=EXTEND_SELF_TEST
;
2158 cfg
->testdata
->not_cap_long
=1;
2162 // If we can't do the test, exit
2164 PrintOut(LOG_CRIT
, "Device: %s, not capable of %sTest\n", name
, testname
);
2168 // If currently running a self-test, do not interrupt it to start another.
2169 if (15==(data
.self_test_exec_status
>> 4)) {
2170 PrintOut(LOG_INFO
, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2171 name
, testname
, (int)(data
.self_test_exec_status
& 0x0f));
2175 // else execute the test, and return status
2176 if ((retval
=smartcommandhandler(fd
, IMMEDIATE_OFFLINE
, dotest
, NULL
)))
2177 PrintOut(LOG_CRIT
, "Device: %s, execute %sTest failed.\n", name
, testname
);
2179 PrintOut(LOG_INFO
, "Device: %s, starting scheduled %sTest.\n", name
, testname
);
2184 // Check Temperature limits
2185 static void CheckTemperature(cfgfile
* cfg
, unsigned char currtemp
, unsigned char triptemp
)
2187 const char *minchg
= "", *maxchg
= "";
2188 if (!(0 < currtemp
&& currtemp
< 255)) {
2189 PrintOut(LOG_INFO
, "Device: %s, failed to read Temperature\n", cfg
->name
);
2193 if (!cfg
->temperature
) {
2194 PrintOut(LOG_INFO
, "Device: %s, initial Temperature is %d Celsius\n",
2195 cfg
->name
, (int)currtemp
);
2197 PrintOut(LOG_INFO
, " [trip Temperature is %d Celsius]\n", (int)triptemp
);
2198 cfg
->temperature
= cfg
->tempmin
= cfg
->tempmax
= currtemp
;
2202 if (currtemp
< cfg
->tempmin
) {
2203 cfg
->tempmin
= currtemp
; minchg
= "!";
2204 cfg
->tempmininc
= 0;
2206 else if (cfg
->tempmininc
) {
2207 // increase min Temperature during first 30 minutes
2208 cfg
->tempmin
= currtemp
;
2211 if (currtemp
> cfg
->tempmax
) {
2212 cfg
->tempmax
= currtemp
; maxchg
= "!";
2216 if (cfg
->tempdiff
&& (*minchg
|| *maxchg
|| abs((int)currtemp
- (int)cfg
->temperature
) >= cfg
->tempdiff
)) {
2217 PrintOut(LOG_INFO
, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2218 cfg
->name
, (int)currtemp
-(int)cfg
->temperature
, currtemp
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2219 cfg
->temperature
= currtemp
;
2224 if (cfg
->tempcrit
&& currtemp
>= cfg
->tempcrit
) {
2225 PrintOut(LOG_CRIT
, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2226 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2227 MailWarning(cfg
, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2228 cfg
->name
, currtemp
, cfg
->tempcrit
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2230 else if (cfg
->tempinfo
&& currtemp
>= cfg
->tempinfo
) {
2231 PrintOut(LOG_INFO
, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2232 cfg
->name
, currtemp
, cfg
->tempinfo
, cfg
->tempmin
, minchg
, cfg
->tempmax
, maxchg
);
2236 int ATACheckDevice(cfgfile
*cfg
){
2238 char *name
=cfg
->name
;
2242 // fix firmware bug if requested
2243 con
->fixfirmwarebug
=cfg
->fixfirmwarebug
;
2244 con
->controller_port
=cfg
->controller_port
;
2245 con
->controller_type
=cfg
->controller_type
;
2246 con
->controller_explicit
=cfg
->controller_explicit
;
2248 // If user has asked, test the email warning system
2249 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2250 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2252 if (cfg
->controller_type
== CONTROLLER_3WARE_9000_CHAR
)
2253 mode
="ATA_3WARE_9000";
2255 if (cfg
->controller_type
== CONTROLLER_3WARE_678K_CHAR
)
2256 mode
="ATA_3WARE_678K";
2258 // if we can't open device, fail gracefully rather than hard --
2259 // perhaps the next time around we'll be able to open it. ATAPI
2260 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2261 // given (see linux cdrom driver).
2262 if ((fd
=OpenDevice(name
, mode
, 0))<0){
2263 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2267 // if the user has asked, and device is capable (or we're not yet
2268 // sure) check whether a self test should be done now.
2269 // This check is done before powermode check to avoid missing self
2270 // tests on idle or sleeping disks.
2271 if (cfg
->testdata
) {
2273 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2276 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2279 else if (!cfg
->testdata
->not_cap_conveyance
&& DoTestNow(cfg
, 'C', 0)>0)
2281 // offline immediate
2282 else if (!cfg
->testdata
->not_cap_offline
&& DoTestNow(cfg
, 'O', 0)>0)
2286 // user may have requested (with the -n Directive) to leave the disk
2287 // alone if it is in idle or sleeping mode. In this case check the
2288 // power mode and exit without check if needed
2289 if (cfg
->powermode
){
2290 int dontcheck
=0, powermode
=ataCheckPowerMode(fd
);
2292 if (0 <= powermode
&& powermode
< 0xff) {
2293 // wait for possible spin up and check again
2296 powermode2
= ataCheckPowerMode(fd
);
2297 if (powermode2
> powermode
)
2298 PrintOut(LOG_INFO
, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name
, powermode
, powermode2
);
2299 powermode
= powermode2
;
2306 if (cfg
->powermode
>=1)
2312 if (cfg
->powermode
>=2)
2318 if (cfg
->powermode
>=3)
2323 mode
="ACTIVE or IDLE";
2327 PrintOut(LOG_CRIT
, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2333 // if we are going to skip a check, return now
2335 // but ignore powermode on scheduled selftest
2337 CloseDevice(fd
, name
);
2338 if (!cfg
->powerskipcnt
&& !cfg
->powerquiet
) // report first only and avoid waking up system disk
2339 PrintOut(LOG_INFO
, "Device: %s, is in %s mode, suspending checks\n", name
, mode
);
2340 cfg
->powerskipcnt
++;
2343 PrintOut(LOG_INFO
, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2344 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2345 cfg
->powerskipcnt
= 0;
2347 else if (cfg
->powerskipcnt
) {
2348 PrintOut(LOG_INFO
, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2349 name
, mode
, cfg
->powerskipcnt
, (cfg
->powerskipcnt
==1?"":"s"));
2350 cfg
->powerskipcnt
= 0;
2354 // check smart status
2355 if (cfg
->smartcheck
){
2356 int status
=ataSmartStatus2(fd
);
2358 PrintOut(LOG_INFO
,"Device: %s, not capable of SMART self-check\n",name
);
2359 MailWarning(cfg
, 5, "Device: %s, not capable of SMART self-check", name
);
2361 else if (status
==1){
2362 PrintOut(LOG_CRIT
, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name
);
2363 MailWarning(cfg
, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name
);
2367 // Check everything that depends upon SMART Data (eg, Attribute values)
2368 if ( cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
|| cfg
->pending
!=DONT_MONITOR_UNC
2369 || cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
){
2370 struct ata_smart_values curval
;
2371 struct ata_smart_thresholds_pvt
*thresh
=cfg
->smartthres
;
2373 // Read current attribute values. *drive contains old values and thresholds
2374 if (ataReadSmartValues(fd
,&curval
)){
2375 PrintOut(LOG_CRIT
, "Device: %s, failed to read SMART Attribute Data\n", name
);
2376 MailWarning(cfg
, 6, "Device: %s, failed to read SMART Attribute Data", name
);
2379 // look for current or offline pending sectors
2380 if (cfg
->pending
!= DONT_MONITOR_UNC
) {
2382 unsigned char currentpending
, offlinepending
;
2384 TranslatePending(cfg
->pending
, ¤tpending
, &offlinepending
);
2386 if (currentpending
&& (rawval
=ATAReturnAttributeRawValue(currentpending
, &curval
))>0) {
2387 // Unreadable pending sectors!!
2388 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors\n", name
, rawval
);
2389 MailWarning(cfg
, 10, "Device: %s, %"PRId64
" Currently unreadable (pending) sectors", name
, rawval
);
2392 if (offlinepending
&& (rawval
=ATAReturnAttributeRawValue(offlinepending
, &curval
))>0) {
2393 // Unreadable offline sectors!!
2394 PrintOut(LOG_CRIT
, "Device: %s, %"PRId64
" Offline uncorrectable sectors\n", name
, rawval
);
2395 MailWarning(cfg
, 11, "Device: %s, %"PRId64
" Offline uncorrectable sectors", name
, rawval
);
2399 // check temperature limits
2400 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2401 CheckTemperature(cfg
, ATAReturnTemperatureValue(&curval
, cfg
->attributedefs
), 0);
2403 if (cfg
->usagefailed
|| cfg
->prefail
|| cfg
->usage
) {
2405 // look for failed usage attributes, or track usage or prefail attributes
2406 for (i
=0; i
<NUMBER_ATA_SMART_ATTRIBUTES
; i
++){
2408 changedattribute_t delta
;
2410 // This block looks for usage attributes that have failed.
2411 // Prefail attributes that have failed are returned with a
2412 // positive sign. No failure returns 0. Usage attributes<0.
2413 if (cfg
->usagefailed
&& ((att
=ataCheckAttribute(&curval
, thresh
, i
))<0)){
2415 // are we ignoring failures of this attribute?
2417 if (!IsAttributeOff(att
, &cfg
->monitorattflags
, 0, MONITOR_FAILUSE
, __LINE__
)){
2418 char attname
[64], *loc
=attname
;
2420 // get attribute name & skip white space
2421 ataPrintSmartAttribName(loc
, att
, cfg
->attributedefs
);
2422 while (*loc
&& *loc
==' ') loc
++;
2425 PrintOut(LOG_CRIT
, "Device: %s, Failed SMART usage Attribute: %s.\n", name
, loc
);
2426 MailWarning(cfg
, 2, "Device: %s, Failed SMART usage Attribute: %s.", name
, loc
);
2430 // This block tracks usage or prefailure attributes to see if
2431 // they are changing. It also looks for changes in RAW values
2432 // if this has been requested by user.
2433 if ((cfg
->usage
|| cfg
->prefail
) && ATACompareValues(&delta
, &curval
, cfg
->smartval
, thresh
, i
, name
)){
2434 unsigned char id
=delta
.id
;
2436 // if the only change is the raw value, and we're not
2437 // tracking raw value, then continue loop over attributes
2438 if (!delta
.sameraw
&& delta
.newval
==delta
.oldval
&& !IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAW
, __LINE__
))
2441 // are we tracking this attribute?
2442 if (!IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_IGNORE
, __LINE__
)){
2443 char newrawstring
[64], oldrawstring
[64], attname
[64], *loc
=attname
;
2445 // get attribute name, skip spaces
2446 ataPrintSmartAttribName(loc
, id
, cfg
->attributedefs
);
2447 while (*loc
&& *loc
==' ') loc
++;
2449 // has the user asked for us to print raw values?
2450 if (IsAttributeOff(id
, &cfg
->monitorattflags
, 0, MONITOR_RAWPRINT
, __LINE__
)) {
2451 // get raw values (as a string) and add to printout
2453 ataPrintSmartAttribRawValue(rawstring
, curval
.vendor_attributes
+i
, cfg
->attributedefs
);
2454 sprintf(newrawstring
, " [Raw %s]", rawstring
);
2455 ataPrintSmartAttribRawValue(rawstring
, cfg
->smartval
->vendor_attributes
+i
, cfg
->attributedefs
);
2456 sprintf(oldrawstring
, " [Raw %s]", rawstring
);
2459 newrawstring
[0]=oldrawstring
[0]='\0';
2461 // prefailure attribute
2462 if (cfg
->prefail
&& delta
.prefail
)
2463 PrintOut(LOG_INFO
, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2464 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2467 if (cfg
->usage
&& !delta
.prefail
)
2468 PrintOut(LOG_INFO
, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2469 name
, loc
, delta
.oldval
, oldrawstring
, delta
.newval
, newrawstring
);
2471 } // endof block tracking usage or prefailure
2472 } // end of loop over attributes
2474 // Save the new values into *drive for the next time around
2475 *(cfg
->smartval
)=curval
;
2480 // check if number of selftest errors has increased (note: may also DECREASE)
2482 CheckSelfTestLogs(cfg
, SelfTestErrorCount(fd
, name
));
2484 // check if number of ATA errors has increased
2487 int newc
,oldc
=cfg
->ataerrorcount
;
2489 // new number of errors
2490 newc
=ATAErrorCount(fd
, name
);
2492 // did command fail?
2494 // lack of PrintOut here is INTENTIONAL
2495 MailWarning(cfg
, 7, "Device: %s, Read SMART Error Log Failed", name
);
2497 // has error count increased?
2499 PrintOut(LOG_CRIT
, "Device: %s, ATA error count increased from %d to %d\n",
2501 MailWarning(cfg
, 4, "Device: %s, ATA error count increased from %d to %d",
2505 // this last line is probably not needed, count always increases
2507 cfg
->ataerrorcount
=newc
;
2510 // carry out scheduled self-test
2512 DoATASelfTest(fd
, cfg
, testtype
);
2514 // Don't leave device open -- the OS/user may want to access it
2515 // before the next smartd cycle!
2516 CloseDevice(fd
, name
);
2520 int SCSICheckDevice(cfgfile
*cfg
)
2526 char *name
=cfg
->name
;
2529 // If the user has asked for it, test the email warning system
2530 if (cfg
->mailwarn
&& cfg
->mailwarn
->emailtest
)
2531 MailWarning(cfg
, 0, "TEST EMAIL from smartd for device: %s", name
);
2533 // if we can't open device, fail gracefully rather than hard --
2534 // perhaps the next time around we'll be able to open it
2535 if ((fd
=OpenDevice(name
, "SCSI", 0))<0) {
2536 // Lack of PrintOut() here is intentional!
2537 MailWarning(cfg
, 9, "Device: %s, unable to open device", name
);
2543 if (! cfg
->SuppressReport
) {
2544 if (scsiCheckIE(fd
, cfg
->SmartPageSupported
, cfg
->TempPageSupported
,
2545 &asc
, &ascq
, ¤ttemp
, &triptemp
)) {
2546 PrintOut(LOG_INFO
, "Device: %s, failed to read SMART values\n",
2548 MailWarning(cfg
, 6, "Device: %s, failed to read SMART values", name
);
2549 cfg
->SuppressReport
= 1;
2553 cp
= scsiGetIEString(asc
, ascq
);
2555 PrintOut(LOG_CRIT
, "Device: %s, SMART Failure: %s\n", name
, cp
);
2556 MailWarning(cfg
, 1,"Device: %s, SMART Failure: %s", name
, cp
);
2558 } else if (debugmode
)
2559 PrintOut(LOG_INFO
,"Device: %s, Acceptable asc,ascq: %d,%d\n",
2560 name
, (int)asc
, (int)ascq
);
2562 // check temperature limits
2563 if (cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)
2564 CheckTemperature(cfg
, currenttemp
, triptemp
);
2566 // check if number of selftest errors has increased (note: may also DECREASE)
2568 CheckSelfTestLogs(cfg
, scsiCountFailedSelfTests(fd
, 0));
2570 if (cfg
->testdata
) {
2571 // long (extended) background test
2572 if (!cfg
->testdata
->not_cap_long
&& DoTestNow(cfg
, 'L', 0)>0)
2573 DoSCSISelfTest(fd
, cfg
, 'L');
2574 // short background test
2575 else if (!cfg
->testdata
->not_cap_short
&& DoTestNow(cfg
, 'S', 0)>0)
2576 DoSCSISelfTest(fd
, cfg
, 'S');
2578 CloseDevice(fd
, name
);
2582 // Checks the SMART status of all ATA and SCSI devices
2583 void CheckDevicesOnce(cfgfile
**atadevices
, cfgfile
**scsidevices
){
2586 for (i
=0; i
<numdevata
; i
++)
2587 ATACheckDevice(atadevices
[i
]);
2589 for (i
=0; i
<numdevscsi
; i
++)
2590 SCSICheckDevice(scsidevices
[i
]);
2596 // This alarm means that a SCSI USB device was hanging
2597 void AlarmHandler(int signal
) {
2598 longjmp(registerscsienv
, 1);
2602 // Does initialization right after fork to daemon mode
2603 void Initialize(time_t *wakeuptime
){
2605 // install goobye message and remove pidfile handler
2608 // write PID file only after installing exit handler
2612 // install signal handlers. On Solaris, can't use signal() because
2613 // it resets the handler to SIG_DFL after each call. So use sigset()
2614 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2616 // normal and abnormal exit
2617 if (SIGNALFN(SIGTERM
, sighandler
)==SIG_IGN
)
2618 SIGNALFN(SIGTERM
, SIG_IGN
);
2619 if (SIGNALFN(SIGQUIT
, sighandler
)==SIG_IGN
)
2620 SIGNALFN(SIGQUIT
, SIG_IGN
);
2622 // in debug mode, <CONTROL-C> ==> HUP
2623 if (SIGNALFN(SIGINT
, debugmode
?HUPhandler
:sighandler
)==SIG_IGN
)
2624 SIGNALFN(SIGINT
, SIG_IGN
);
2626 // Catch HUP and USR1
2627 if (SIGNALFN(SIGHUP
, HUPhandler
)==SIG_IGN
)
2628 SIGNALFN(SIGHUP
, SIG_IGN
);
2629 if (SIGNALFN(SIGUSR1
, USR1handler
)==SIG_IGN
)
2630 SIGNALFN(SIGUSR1
, SIG_IGN
);
2632 if (SIGNALFN(SIGUSR2
, USR2handler
)==SIG_IGN
)
2633 SIGNALFN(SIGUSR2
, SIG_IGN
);
2636 // initialize wakeup time to CURRENT time
2637 *wakeuptime
=time(NULL
);
2643 // Toggle debug mode implemented for native windows only
2644 // (there is no easy way to reopen tty on *nix)
2645 static void ToggleDebugMode()
2648 PrintOut(LOG_INFO
,"Signal USR2 - enabling debug mode\n");
2649 if (!daemon_enable_console("smartd [Debug]")) {
2651 daemon_signal(SIGINT
, HUPhandler
);
2652 PrintOut(LOG_INFO
,"smartd debug mode enabled, PID=%d\n", getpid());
2655 PrintOut(LOG_INFO
,"enable console failed\n");
2657 else if (debugmode
== 1) {
2658 daemon_disable_console();
2660 daemon_signal(SIGINT
, sighandler
);
2661 PrintOut(LOG_INFO
,"Signal USR2 - debug mode disabled\n");
2664 PrintOut(LOG_INFO
,"Signal USR2 - debug mode %d not changed\n", debugmode
);
2668 time_t dosleep(time_t wakeuptime
){
2671 // If past wake-up-time, compute next wake-up-time
2673 while (wakeuptime
<=timenow
){
2674 int intervals
=1+(timenow
-wakeuptime
)/checktime
;
2675 wakeuptime
+=intervals
*checktime
;
2678 // sleep until we catch SIGUSR1 or have completed sleeping
2679 while (timenow
<wakeuptime
&& !caughtsigUSR1
&& !caughtsigHUP
&& !caughtsigEXIT
){
2681 // protect user again system clock being adjusted backwards
2682 if (wakeuptime
>timenow
+checktime
){
2683 PrintOut(LOG_CRIT
, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2684 wakeuptime
=timenow
+checktime
;
2687 // Exit sleep when time interval has expired or a signal is received
2688 sleep(wakeuptime
-timenow
);
2691 // toggle debug mode?
2692 if (caughtsigUSR2
) {
2701 // if we caught a SIGUSR1 then print message and clear signal
2703 PrintOut(LOG_INFO
,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2704 wakeuptime
-timenow
>0?(int)(wakeuptime
-timenow
):0);
2708 // return adjusted wakeuptime
2712 // Print out a list of valid arguments for the Directive d
2713 void printoutvaliddirectiveargs(int priority
, char d
) {
2718 PrintOut(priority
, "never[,q], sleep[,q], standby[,q], idle[,q]");
2721 PrintOut(priority
, "valid_regular_expression");
2724 PrintOut(priority
, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2727 PrintOut(priority
, "normal, permissive");
2731 PrintOut(priority
, "on, off");
2734 PrintOut(priority
, "error, selftest");
2737 PrintOut(priority
, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2740 if (!(s
= create_vendor_attribute_arg_list())) {
2741 PrintOut(LOG_CRIT
,"Insufficient memory to construct argument list\n");
2744 PrintOut(priority
, "\n%s\n", s
);
2745 s
=CheckFree(s
, __LINE__
,filenameandversion
);
2748 PrintOut(priority
, "use, ignore, show, showall");
2751 PrintOut(priority
, "none, samsung, samsung2");
2756 // exits with an error message, or returns integer value of token
2757 int GetInteger(char *arg
, char *name
, char *token
, int lineno
, char *configfile
, int min
, int max
){
2761 // check input range
2763 PrintOut(LOG_CRIT
, "min =%d passed to GetInteger() must be >=0\n", min
);
2767 // make sure argument is there
2769 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2770 configfile
, lineno
, name
, token
, min
, max
);
2774 // get argument value (base 10), check that it's integer, and in-range
2775 val
=strtol(arg
,&endptr
,10);
2776 if (*endptr
!='\0' || val
<min
|| val
>max
) {
2777 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2778 configfile
, lineno
, name
, token
, arg
, min
, max
);
2782 // all is well; return value
2787 // Get 1-3 small integer(s) for '-W' directive
2788 int Get3Integers(const char *arg
, const char *name
, const char *token
, int lineno
, const char *configfile
,
2789 unsigned char * val1
, unsigned char * val2
, unsigned char * val3
){
2790 unsigned v1
= 0, v2
= 0, v3
= 0;
2791 int n1
= -1, n2
= -1, n3
= -1, len
;
2793 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2794 configfile
, lineno
, name
, token
);
2799 if (!( sscanf(arg
, "%u%n,%u%n,%u%n", &v1
, &n1
, &v2
, &n2
, &v3
, &n3
) >= 1
2800 && (n1
== len
|| n2
== len
|| n3
== len
) && v1
<= 255 && v2
<= 255 && v3
<= 255)) {
2801 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2802 configfile
, lineno
, name
, token
, arg
);
2805 *val1
= (unsigned char)v1
; *val2
= (unsigned char)v2
; *val3
= (unsigned char)v3
;
2810 // This function returns 1 if it has correctly parsed one token (and
2811 // any arguments), else zero if no tokens remain. It returns -1 if an
2812 // error was encountered.
2813 int ParseToken(char *token
,cfgfile
*cfg
){
2815 char *name
=cfg
->name
;
2816 int lineno
=cfg
->lineno
;
2817 char *delim
= " \n\t";
2822 maildata
*mdat
=NULL
, tempmail
;
2824 // is the rest of the line a comment
2828 // is the token not recognized?
2829 if (*token
!='-' || strlen(token
)!=2) {
2830 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
2831 configfile
, lineno
, name
, token
);
2832 PrintOut(LOG_CRIT
, "Run smartd -D to print a list of valid Directives.\n");
2836 // token we will be parsing:
2839 // create temporary maildata structure. This means we can postpone
2840 // allocating space in the data segment until we are sure there are
2842 if ('m'==sym
|| 'M'==sym
){
2843 if (!cfg
->mailwarn
){
2844 memset(&tempmail
, 0, sizeof(maildata
));
2852 // parse the token and swallow its argument
2857 // monitor current pending sector count (default 197)
2858 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2860 if (val
==CUR_UNC_DEFAULT
)
2863 val
=CUR_UNC_DEFAULT
;
2864 // set bottom 8 bits to correct value
2865 cfg
->pending
&= 0xff00;
2866 cfg
->pending
|= val
;
2869 // monitor offline uncorrectable sectors (default 198)
2870 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 0, 255))<0)
2872 if (val
==OFF_UNC_DEFAULT
)
2875 val
=OFF_UNC_DEFAULT
;
2876 // turn off top 8 bits, then set to correct value
2877 cfg
->pending
&= 0xff;
2878 cfg
->pending
|= (val
<<8);
2881 // Set tolerance level for SMART command failures
2882 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2884 } else if (!strcmp(arg
, "normal")) {
2885 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2886 // not on failure of an optional S.M.A.R.T. command.
2887 // This is the default so we don't need to actually do anything here.
2889 } else if (!strcmp(arg
, "permissive")) {
2890 // Permissive mode; ignore errors from Mandatory SMART commands
2897 // specify the device type
2898 cfg
->controller_explicit
= 1;
2899 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
2901 } else if (!strcmp(arg
, "ata")) {
2902 cfg
->controller_port
= 0;
2903 cfg
->controller_type
= CONTROLLER_ATA
;
2904 } else if (!strcmp(arg
, "scsi")) {
2905 cfg
->controller_port
=0;
2906 cfg
->controller_type
= CONTROLLER_SCSI
;
2907 } else if (!strcmp(arg
, "marvell")) {
2908 cfg
->controller_port
=0;
2909 cfg
->controller_type
= CONTROLLER_MARVELL_SATA
;
2910 } else if (!strncmp(arg
, "sat", 3)) {
2911 cfg
->controller_type
= CONTROLLER_SAT
;
2912 cfg
->controller_port
= 0;
2913 cfg
->satpassthrulen
= 0;
2914 if (strlen(arg
) > 3) {
2918 cp
= strchr(arg
, ',');
2919 if (cp
&& (1 == sscanf(cp
+ 1, "%d", &k
)) &&
2920 ((0 == k
) || (12 == k
) || (16 == k
)))
2921 cfg
->satpassthrulen
= k
;
2923 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2924 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
2925 configfile
, lineno
, name
);
2929 } else if (!strncmp(arg
, "hpt", 3)){
2930 unsigned char i
, slash
= 0;
2931 cfg
->hpt_data
[0] = 0;
2932 cfg
->hpt_data
[1] = 0;
2933 cfg
->hpt_data
[2] = 0;
2934 cfg
->controller_type
= CONTROLLER_HPT
;
2935 for (i
=4; i
< strlen(arg
); i
++) {
2939 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2940 "'-d hpt,L/M/N' supports 2-3 items\n",
2941 configfile
, lineno
, name
);
2946 else if ((arg
[i
])>='0' && (arg
[i
])<='9') {
2947 if (cfg
->hpt_data
[slash
]>1) { /* hpt_data[x] max 19 */
2951 cfg
->hpt_data
[slash
] = cfg
->hpt_data
[slash
]*10 + arg
[i
] - '0';
2960 } else if (badarg
!= TRUE
) {
2961 if (cfg
->hpt_data
[0]==0 || cfg
->hpt_data
[0]>8){
2962 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2963 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
2964 configfile
, lineno
, name
);
2967 if (cfg
->hpt_data
[1]==0 || cfg
->hpt_data
[1]>8){
2968 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2969 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
2970 configfile
, lineno
, name
);
2974 if (cfg
->hpt_data
[2]==0 || cfg
->hpt_data
[2]>15){
2975 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive "
2976 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
2977 configfile
, lineno
, name
);
2980 } else { /* no pmport device */
2984 } else if (!strcmp(arg
, "removable")) {
2987 // look 3ware,N RAID device
2991 // make a copy of the string to mess with
2992 if (!(s
= strdup(arg
))) {
2994 "No memory to copy argument to -d option - exiting\n");
2996 } else if (strncmp(s
,"3ware,",6)) {
2998 } else if (split_report_arg2(s
, &i
)){
2999 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3000 configfile
, lineno
, name
);
3002 } else if ( i
<0 || i
>15) {
3003 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 15\n",
3004 configfile
, lineno
, name
, i
);
3007 // determine type of escalade device from name of device
3008 cfg
->controller_type
= guess_device_type(name
);
3009 if (cfg
->controller_type
!=CONTROLLER_3WARE_9000_CHAR
&& cfg
->controller_type
!=CONTROLLER_3WARE_678K_CHAR
)
3010 cfg
->controller_type
=CONTROLLER_3WARE_678K
;
3012 // NOTE: controller_port == disk number + 1
3013 cfg
->controller_port
= i
+1;
3015 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3020 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3022 } else if (!strcmp(arg
, "none")) {
3023 cfg
->fixfirmwarebug
= FIX_NONE
;
3024 } else if (!strcmp(arg
, "samsung")) {
3025 cfg
->fixfirmwarebug
= FIX_SAMSUNG
;
3026 } else if (!strcmp(arg
, "samsung2")) {
3027 cfg
->fixfirmwarebug
= FIX_SAMSUNG2
;
3033 // check SMART status
3037 // check for failure of usage attributes
3041 // track changes in all vendor attributes
3046 // track changes in prefail vendor attributes
3050 // track changes in usage vendor attributes
3054 // track changes in SMART logs
3055 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3057 } else if (!strcmp(arg
, "selftest")) {
3058 // track changes in self-test log
3060 } else if (!strcmp(arg
, "error")) {
3061 // track changes in ATA error log
3068 // monitor everything
3077 // automatic offline testing enable/disable
3078 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3080 } else if (!strcmp(arg
, "on")) {
3081 cfg
->autoofflinetest
= 2;
3082 } else if (!strcmp(arg
, "off")) {
3083 cfg
->autoofflinetest
= 1;
3089 // skip disk check if in idle or standby mode
3090 if (!(arg
= strtok(NULL
, delim
)))
3092 else if (!strcmp(arg
, "never") || !strcmp(arg
, "never,q"))
3094 else if (!strcmp(arg
, "sleep") || !strcmp(arg
, "sleep,q"))
3096 else if (!strcmp(arg
, "standby") || !strcmp(arg
, "standby,q"))
3098 else if (!strcmp(arg
, "idle") || !strcmp(arg
, "idle,q"))
3102 cfg
->powerquiet
= !!strchr(arg
, ',');
3105 // automatic attribute autosave enable/disable
3106 if ((arg
= strtok(NULL
, delim
)) == NULL
) {
3108 } else if (!strcmp(arg
, "on")) {
3110 } else if (!strcmp(arg
, "off")) {
3117 // warn user, and delete any previously given -s REGEXP Directives
3119 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3120 configfile
, lineno
, name
, cfg
->testdata
->regex
);
3121 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3123 // check for missing argument
3124 if (!(arg
= strtok(NULL
, delim
))) {
3127 // allocate space for structure and string
3128 else if (!(cfg
->testdata
=(testinfo
*)Calloc(1, sizeof(testinfo
))) || !(cfg
->testdata
->regex
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
))) {
3129 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3130 configfile
, lineno
, name
, arg
);
3133 else if ((val
=regcomp(&(cfg
->testdata
->cregex
), arg
, REG_EXTENDED
))) {
3135 // not a valid regular expression!
3136 regerror(val
, &(cfg
->testdata
->cregex
), errormsg
, 512);
3137 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3138 configfile
, lineno
, name
, arg
, errormsg
);
3139 cfg
->testdata
=FreeTestData(cfg
->testdata
);
3142 // Do a bit of sanity checking and warn user if we think that
3143 // their regexp is "strange". User probably confused about shell
3144 // glob(3) syntax versus regular expression syntax regexp(7).
3145 if ((int)strlen(arg
) != (val
=strspn(arg
,"0123456789/.-+*|()?^$[]SLCO")))
3146 PrintOut(LOG_INFO
, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3147 configfile
, lineno
, name
, val
+1, arg
[val
], arg
);
3150 // send email to address that follows
3151 if (!(arg
= strtok(NULL
,delim
)))
3154 if (mdat
->address
) {
3155 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3156 configfile
, lineno
, name
, mdat
->address
);
3157 mdat
->address
=FreeNonZero(mdat
->address
, -1,__LINE__
,filenameandversion
);
3159 mdat
->address
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3163 // email warning options
3164 if (!(arg
= strtok(NULL
, delim
)))
3166 else if (!strcmp(arg
, "once"))
3167 mdat
->emailfreq
= 1;
3168 else if (!strcmp(arg
, "daily"))
3169 mdat
->emailfreq
= 2;
3170 else if (!strcmp(arg
, "diminishing"))
3171 mdat
->emailfreq
= 3;
3172 else if (!strcmp(arg
, "test"))
3173 mdat
->emailtest
= 1;
3174 else if (!strcmp(arg
, "exec")) {
3175 // Get the next argument (the command line)
3176 if (!(arg
= strtok(NULL
, delim
))) {
3177 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3178 configfile
, lineno
, name
, token
);
3181 // Free the last cmd line given if any, and copy new one
3182 if (mdat
->emailcmdline
) {
3183 PrintOut(LOG_INFO
, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3184 configfile
, lineno
, name
, mdat
->emailcmdline
);
3185 mdat
->emailcmdline
=FreeNonZero(mdat
->emailcmdline
, -1,__LINE__
,filenameandversion
);
3187 mdat
->emailcmdline
=CustomStrDup(arg
, 1, __LINE__
,filenameandversion
);
3193 // ignore failure of usage attribute
3194 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3196 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_FAILUSE
, __LINE__
);
3199 // ignore attribute for tracking purposes
3200 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3202 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_IGNORE
, __LINE__
);
3205 // print raw value when tracking
3206 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3208 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3211 // track changes in raw value (forces printing of raw value)
3212 if ((val
=GetInteger(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
, 1, 255))<0)
3214 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAWPRINT
, __LINE__
);
3215 IsAttributeOff(val
, &cfg
->monitorattflags
, 1, MONITOR_RAW
, __LINE__
);
3218 // track Temperature
3219 if ((val
=Get3Integers(arg
=strtok(NULL
,delim
), name
, token
, lineno
, configfile
,
3220 &cfg
->tempdiff
, &cfg
->tempinfo
, &cfg
->tempcrit
))<0)
3222 // increase min Temperature during first 30 minutes
3223 if (!(cfg
->tempmininc
= (unsigned char)(CHECKTIME
/ checktime
)))
3224 cfg
->tempmininc
= 1;
3227 // non-default vendor-specific attribute meaning
3228 if (!(arg
=strtok(NULL
,delim
))) {
3230 } else if (parse_attribute_def(arg
, &cfg
->attributedefs
)){
3235 // Define use of drive-specific presets.
3236 if (!(arg
= strtok(NULL
, delim
))) {
3238 } else if (!strcmp(arg
, "use")) {
3239 cfg
->ignorepresets
= FALSE
;
3240 } else if (!strcmp(arg
, "ignore")) {
3241 cfg
->ignorepresets
= TRUE
;
3242 } else if (!strcmp(arg
, "show")) {
3243 cfg
->showpresets
= TRUE
;
3244 } else if (!strcmp(arg
, "showall")) {
3251 // Directive not recognized
3252 PrintOut(LOG_CRIT
,"File %s line %d (drive %s): unknown Directive: %s\n",
3253 configfile
, lineno
, name
, token
);
3258 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3259 configfile
, lineno
, name
, token
);
3262 PrintOut(LOG_CRIT
, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3263 configfile
, lineno
, name
, token
, arg
);
3265 if (missingarg
|| badarg
) {
3266 PrintOut(LOG_CRIT
, "Valid arguments to %s Directive are: ", token
);
3267 printoutvaliddirectiveargs(LOG_CRIT
, sym
);
3268 PrintOut(LOG_CRIT
, "\n");
3272 // If this did something to fill the mail structure, and that didn't
3273 // already exist, create it and copy.
3275 if (!(cfg
->mailwarn
=(maildata
*)Calloc(1, sizeof(maildata
)))) {
3276 PrintOut(LOG_INFO
, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3277 configfile
, lineno
, name
);
3280 memcpy(cfg
->mailwarn
, mdat
, sizeof(maildata
));
3286 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3287 // a copy of the original, but with private data storage. Else all is
3288 // zeroed. Returns address, and fails if non memory available.
3290 cfgfile
*CreateConfigEntry(cfgfile
*original
){
3293 // allocate memory for new structure
3294 if (!(add
=(cfgfile
*)Calloc(1,sizeof(cfgfile
))))
3297 // if old structure was pointed to, copy it
3299 memcpy(add
, original
, sizeof(cfgfile
));
3301 // make private copies of data items ONLY if they are in use (non
3303 add
->name
= CustomStrDup(add
->name
, 0, __LINE__
,filenameandversion
);
3305 if (add
->testdata
) {
3307 if (!(add
->testdata
=(testinfo
*)Calloc(1,sizeof(testinfo
))))
3309 memcpy(add
->testdata
, original
->testdata
, sizeof(testinfo
));
3310 add
->testdata
->regex
= CustomStrDup(add
->testdata
->regex
, 1, __LINE__
,filenameandversion
);
3311 // only POSIX-portable way to make fresh copy of compiled regex is
3312 // to recompile it completely. There is no POSIX
3313 // compiled-regex-copy command.
3314 if ((val
=regcomp(&(add
->testdata
->cregex
), add
->testdata
->regex
, REG_EXTENDED
))) {
3316 regerror(val
, &(add
->testdata
->cregex
), errormsg
, 512);
3317 PrintOut(LOG_CRIT
, "unable to recompile regular expression %s. %s\n", add
->testdata
->regex
, errormsg
);
3322 if (add
->mailwarn
) {
3323 if (!(add
->mailwarn
=(maildata
*)Calloc(1,sizeof(maildata
))))
3325 memcpy(add
->mailwarn
, original
->mailwarn
, sizeof(maildata
));
3326 add
->mailwarn
->address
= CustomStrDup(add
->mailwarn
->address
, 0, __LINE__
,filenameandversion
);
3327 add
->mailwarn
->emailcmdline
= CustomStrDup(add
->mailwarn
->emailcmdline
, 0, __LINE__
,filenameandversion
);
3330 if (add
->attributedefs
) {
3331 if (!(add
->attributedefs
=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM
,1)))
3333 memcpy(add
->attributedefs
, original
->attributedefs
, MAX_ATTRIBUTE_NUM
);
3336 if (add
->monitorattflags
) {
3337 if (!(add
->monitorattflags
=(unsigned char *)Calloc(NMONITOR
*32, 1)))
3339 memcpy(add
->monitorattflags
, original
->monitorattflags
, NMONITOR
*32);
3342 if (add
->smartval
) {
3343 if (!(add
->smartval
=(struct ata_smart_values
*)Calloc(1,sizeof(struct ata_smart_values
))))
3347 if (add
->smartthres
) {
3348 if (!(add
->smartthres
=(struct ata_smart_thresholds_pvt
*)Calloc(1,sizeof(struct ata_smart_thresholds_pvt
))))
3355 PrintOut(LOG_CRIT
, "No memory to create entry from configuration file\n");
3361 // This is the routine that adds things to the cfgentries list. To
3362 // prevent memory leaks when re-reading the configuration file many
3363 // times, this routine MUST deallocate any memory other than that
3364 // pointed to within cfg-> before it returns.
3366 // Return values are:
3367 // 1: parsed a normal line
3368 // 0: found comment or blank line
3369 // -1: found SCANDIRECTIVE line
3370 // -2: found an error
3372 // Note: this routine modifies *line from the caller!
3373 int ParseConfigLine(int entry
, int lineno
,char *line
){
3376 char *delim
= " \n\t";
3380 // get first token: device name. If a comment, skip line
3381 if (!(name
=strtok(line
,delim
)) || *name
=='#') {
3385 // Have we detected the SCANDIRECTIVE directive?
3386 if (!strcmp(SCANDIRECTIVE
,name
)){
3389 PrintOut(LOG_INFO
,"Scan Directive %s (line %d) must be the first entry in %s\n",name
, lineno
, configfile
);
3394 // Is there space for another entry? If not, allocate more
3395 while (entry
>=cfgentries_max
)
3396 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "configuration file device");
3398 // We've got a legit entry, make space to store it
3399 cfg
=cfgentries
[entry
]=CreateConfigEntry(NULL
);
3400 cfg
->name
= CustomStrDup(name
, 1, __LINE__
,filenameandversion
);
3402 // Store line number, and by default check for both device types.
3405 // Try and recognize if a IDE or SCSI device. These can be
3406 // overwritten by configuration file directives.
3407 if (cfg
->controller_type
==CONTROLLER_UNKNOWN
)
3408 cfg
->controller_type
= guess_device_type(cfg
->name
);
3410 // parse tokens one at a time from the file.
3411 while ((token
=strtok(NULL
,delim
))){
3412 int retval
=ParseToken(token
,cfg
);
3421 PrintOut(LOG_INFO
,"Parsed token %s\n",token
);
3427 // error found on the line
3432 // If we found 3ware controller, then modify device name by adding a SPACE
3433 if (cfg
->controller_port
){
3434 int len
=17+strlen(cfg
->name
);
3438 PrintOut(LOG_CRIT
, "smartd: can not scan for 3ware devices (line %d of file %s)\n",
3439 lineno
, configfile
);
3443 if (!(newname
=(char *)calloc(len
,1))) {
3444 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3448 // Make new device name by adding a space then RAID disk number
3449 snprintf(newname
, len
, "%s [3ware_disk_%02d]", cfg
->name
, cfg
->controller_port
-1);
3450 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3455 if (cfg
->hpt_data
[0]) {
3456 int len
=17+strlen(cfg
->name
);
3460 PrintOut(LOG_CRIT
, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3461 lineno
, configfile
);
3465 if (!(newname
=(char *)calloc(len
,1))) {
3466 PrintOut(LOG_INFO
,"No memory to parse file: %s line %d, %s\n", configfile
, lineno
, strerror(errno
));
3470 // Make new device name by adding a space then RAID disk number
3471 snprintf(newname
, len
, "%s [hpt_%d/%d/%d]", cfg
->name
, cfg
->hpt_data
[0],
3472 cfg
->hpt_data
[1], cfg
->hpt_data
[2]);
3473 cfg
->name
=CheckFree(cfg
->name
, __LINE__
,filenameandversion
);
3478 // If NO monitoring directives are set, then set all of them.
3479 if (!(cfg
->smartcheck
|| cfg
->usagefailed
|| cfg
->prefail
||
3480 cfg
->usage
|| cfg
->selftest
|| cfg
->errorlog
||
3481 cfg
->tempdiff
|| cfg
->tempinfo
|| cfg
->tempcrit
)) {
3483 PrintOut(LOG_INFO
,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3484 cfg
->name
, cfg
->lineno
, configfile
);
3494 // additional sanity check. Has user set -M options without -m?
3495 if (cfg
->mailwarn
&& !cfg
->mailwarn
->address
&& (cfg
->mailwarn
->emailcmdline
|| cfg
->mailwarn
->emailfreq
|| cfg
->mailwarn
->emailtest
)){
3496 PrintOut(LOG_CRIT
,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3497 cfg
->name
, cfg
->lineno
, configfile
);
3501 // has the user has set <nomailer>?
3502 if (cfg
->mailwarn
&& cfg
->mailwarn
->address
&& !strcmp(cfg
->mailwarn
->address
,"<nomailer>")){
3503 // check that -M exec is also set
3504 if (!cfg
->mailwarn
->emailcmdline
){
3505 PrintOut(LOG_CRIT
,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3506 cfg
->name
, cfg
->lineno
, configfile
);
3509 // now free memory. From here on the sign of <nomailer> is
3510 // address==NULL and cfg->emailcmdline!=NULL
3511 cfg
->mailwarn
->address
=FreeNonZero(cfg
->mailwarn
->address
, -1,__LINE__
,filenameandversion
);
3514 // set cfg->emailfreq to 1 (once) if user hasn't set it
3515 if (cfg
->mailwarn
&& !cfg
->mailwarn
->emailfreq
)
3516 cfg
->mailwarn
->emailfreq
= 1;
3526 // clean up utility for ParseConfigFile()
3527 void cleanup(FILE **fpp
, int is_stdin
){
3529 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3539 // Parses a configuration file. Return values are:
3540 // N=>0: found N entries
3541 // -1: syntax error in config file
3542 // -2: config file does not exist
3543 // -3: config file exists but cannot be read
3545 // In the case where the return value is 0, there are three
3547 // Empty configuration file ==> cfgentries==NULL
3548 // No configuration file ==> cfgentries[0]->lineno == 0
3549 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3550 int ParseConfigFile(){
3552 int entry
=0,lineno
=1,cont
=0,contlineno
=0;
3553 char line
[MAXLINELEN
+2];
3554 char fullline
[MAXCONTLINE
+1];
3556 int is_stdin
= (configfile
== configfile_stdin
); // pointer comparison ok here
3558 // Open config file, if it exists and is not <stdin>
3560 fp
=fopen(configfile
,"r");
3561 if (fp
==NULL
&& (errno
!=ENOENT
|| configfile_alt
)) {
3562 // file exists but we can't read it or it should exist due to '-c' option
3563 int ret
= (errno
!=ENOENT
? -3 : -2);
3564 PrintOut(LOG_CRIT
,"%s: Unable to open configuration file %s\n",
3565 strerror(errno
),configfile
);
3569 else // read from stdin ('-c -' option)
3572 // No configuration file found -- use fake one
3574 int len
=strlen(SCANDIRECTIVE
)+4;
3575 char *fakeconfig
=(char *)calloc(len
,1);
3578 (len
-1) != snprintf(fakeconfig
, len
, "%s -a", SCANDIRECTIVE
) ||
3579 -1 != ParseConfigLine(entry
, 0, fakeconfig
)
3581 PrintOut(LOG_CRIT
,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3582 __LINE__
, filenameandversion
, reportbug
);
3585 fakeconfig
=CheckFree(fakeconfig
, __LINE__
,filenameandversion
);
3590 setmode(fileno(fp
), O_TEXT
); // Allow files with \r\n
3593 // configuration file exists
3594 PrintOut(LOG_INFO
,"Opened configuration file %s\n",configfile
);
3596 // parse config file line by line
3598 int len
=0,scandevice
;
3603 // make debugging simpler
3604 memset(line
,0,sizeof(line
));
3607 code
=fgets(line
,MAXLINELEN
+2,fp
);
3609 // are we at the end of the file?
3612 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3613 // See if we found a SCANDIRECTIVE directive
3614 if (scandevice
==-1) {
3615 cleanup(&fp
, is_stdin
);
3618 // did we find a syntax error
3619 if (scandevice
==-2) {
3620 cleanup(&fp
, is_stdin
);
3623 // the final line is part of a continuation line
3630 // input file line number
3633 // See if line is too long
3635 if (len
>MAXLINELEN
){
3637 if (line
[len
-1]=='\n')
3638 warn
="(including newline!) ";
3641 PrintOut(LOG_CRIT
,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3642 (int)contlineno
,configfile
,warn
,(int)MAXLINELEN
);
3643 cleanup(&fp
, is_stdin
);
3647 // Ignore anything after comment symbol
3648 if ((comment
=strchr(line
,'#'))){
3653 // is the total line (made of all continuation lines) too long?
3654 if (cont
+len
>MAXCONTLINE
){
3655 PrintOut(LOG_CRIT
,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3656 lineno
, (int)contlineno
, configfile
, (int)MAXCONTLINE
);
3657 cleanup(&fp
, is_stdin
);
3661 // copy string so far into fullline, and increment length
3662 strcpy(fullline
+cont
,line
);
3665 // is this a continuation line. If so, replace \ by space and look at next line
3666 if ( (lastslash
=strrchr(line
,'\\')) && !strtok(lastslash
+1," \n\t")){
3667 *(fullline
+(cont
-len
)+(lastslash
-line
))=' ';
3671 // Not a continuation line. Parse it
3672 scandevice
=ParseConfigLine(entry
,contlineno
,fullline
);
3674 // did we find a scandevice directive?
3675 if (scandevice
==-1) {
3676 cleanup(&fp
, is_stdin
);
3679 // did we find a syntax error
3680 if (scandevice
==-2) {
3681 cleanup(&fp
, is_stdin
);
3689 cleanup(&fp
, is_stdin
);
3691 // note -- may be zero if syntax of file OK, but no valid entries!
3696 // Prints copyright, license and version information
3697 void PrintCopyleft(void){
3704 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3705 <LIST> is the list of valid arguments for option opt. */
3706 void PrintValidArgs(char opt
) {
3709 PrintOut(LOG_CRIT
, "=======> VALID ARGUMENTS ARE: ");
3710 if (!(s
= GetValidArgList(opt
)))
3711 PrintOut(LOG_CRIT
, "Error constructing argument list for option %c", opt
);
3713 PrintOut(LOG_CRIT
, (char *)s
);
3714 PrintOut(LOG_CRIT
, " <=======\n");
3717 // Parses input line, prints usage message and
3718 // version/license/copyright messages
3719 void ParseOpts(int argc
, char **argv
){
3720 extern char *optarg
;
3721 extern int optopt
, optind
, opterr
;
3726 // Please update GetValidArgList() if you edit shortopts
3727 const char *shortopts
= "c:l:q:dDi:p:r:Vh?";
3728 #ifdef HAVE_GETOPT_LONG
3730 // Please update GetValidArgList() if you edit longopts
3731 struct option longopts
[] = {
3732 { "configfile", required_argument
, 0, 'c' },
3733 { "logfacility", required_argument
, 0, 'l' },
3734 { "quit", required_argument
, 0, 'q' },
3735 { "debug", no_argument
, 0, 'd' },
3736 { "showdirectives", no_argument
, 0, 'D' },
3737 { "interval", required_argument
, 0, 'i' },
3738 { "pidfile", required_argument
, 0, 'p' },
3739 { "report", required_argument
, 0, 'r' },
3740 #if defined(_WIN32) || defined(__CYGWIN__)
3741 { "service", no_argument
, 0, 'S' },
3743 { "version", no_argument
, 0, 'V' },
3744 { "license", no_argument
, 0, 'V' },
3745 { "copyright", no_argument
, 0, 'V' },
3746 { "help", no_argument
, 0, 'h' },
3747 { "usage", no_argument
, 0, 'h' },
3755 // Parse input options. This horrible construction is so that emacs
3756 // indents properly. Sorry.
3757 while (-1 != (optchar
=
3758 #ifdef HAVE_GETOPT_LONG
3759 getopt_long(argc
, argv
, shortopts
, longopts
, NULL
)
3761 getopt(argc
, argv
, shortopts
)
3768 if (!(strcmp(optarg
,"nodev"))) {
3770 } else if (!(strcmp(optarg
,"nodevstartup"))) {
3772 } else if (!(strcmp(optarg
,"never"))) {
3774 } else if (!(strcmp(optarg
,"onecheck"))) {
3777 } else if (!(strcmp(optarg
,"showtests"))) {
3780 } else if (!(strcmp(optarg
,"errors"))) {
3787 // set the log facility level
3788 if (!strcmp(optarg
, "daemon"))
3789 facility
=LOG_DAEMON
;
3790 else if (!strcmp(optarg
, "local0"))
3791 facility
=LOG_LOCAL0
;
3792 else if (!strcmp(optarg
, "local1"))
3793 facility
=LOG_LOCAL1
;
3794 else if (!strcmp(optarg
, "local2"))
3795 facility
=LOG_LOCAL2
;
3796 else if (!strcmp(optarg
, "local3"))
3797 facility
=LOG_LOCAL3
;
3798 else if (!strcmp(optarg
, "local4"))
3799 facility
=LOG_LOCAL4
;
3800 else if (!strcmp(optarg
, "local5"))
3801 facility
=LOG_LOCAL5
;
3802 else if (!strcmp(optarg
, "local6"))
3803 facility
=LOG_LOCAL6
;
3804 else if (!strcmp(optarg
, "local7"))
3805 facility
=LOG_LOCAL7
;
3810 // enable debug mode
3814 // print summary of all valid directives
3820 // Period (time interval) for checking
3821 // strtol will set errno in the event of overflow, so we'll check it.
3823 lchecktime
= strtol(optarg
, &tailptr
, 10);
3824 if (*tailptr
!= '\0' || lchecktime
< 10 || lchecktime
> INT_MAX
|| errno
) {
3827 PrintOut(LOG_CRIT
, "======> INVALID INTERVAL: %s <=======\n", optarg
);
3828 PrintOut(LOG_CRIT
, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX
);
3829 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3832 checktime
= (int)lchecktime
;
3835 // report IOCTL transactions
3840 // split_report_arg() may modify its first argument string, so use a
3841 // copy of optarg in case we want optarg for an error message.
3842 if (!(s
= strdup(optarg
))) {
3843 PrintOut(LOG_CRIT
, "No memory to process -r option - exiting\n");
3846 if (split_report_arg(s
, &i
)) {
3848 } else if (i
<1 || i
>3) {
3851 PrintOut(LOG_CRIT
, "======> INVALID REPORT LEVEL: %s <=======\n", optarg
);
3852 PrintOut(LOG_CRIT
, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3854 } else if (!strcmp(s
,"ioctl")) {
3855 con
->reportataioctl
= con
->reportscsiioctl
= i
;
3856 } else if (!strcmp(s
,"ataioctl")) {
3857 con
->reportataioctl
= i
;
3858 } else if (!strcmp(s
,"scsiioctl")) {
3859 con
->reportscsiioctl
= i
;
3863 s
=CheckFree(s
, __LINE__
,filenameandversion
);
3867 // alternate configuration file
3868 if (strcmp(optarg
,"-"))
3869 configfile
=configfile_alt
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3870 else // read from stdin
3871 configfile
=configfile_stdin
;
3874 // output file with PID number
3875 pid_file
=CustomStrDup(optarg
, 1, __LINE__
,filenameandversion
);
3877 #if defined(_WIN32) || defined(__CYGWIN__)
3879 // running as service
3880 #ifdef __CYGWIN__ // On Windows, option is already handled by daemon_main(), so ignore it
3884 #endif // _WIN32 || __CYGWIN__
3886 // print version and CVS info
3891 // help: print summary of command-line options
3899 // unrecognized option
3902 #ifdef HAVE_GETOPT_LONG
3903 // Point arg to the argument in which this option was found.
3904 arg
= argv
[optind
-1];
3905 // Check whether the option is a long option that doesn't map to -h.
3906 if (arg
[1] == '-' && optchar
!= 'h') {
3907 // Iff optopt holds a valid option then argument must be missing.
3908 if (optopt
&& (strchr(shortopts
, optopt
) != NULL
)) {
3909 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg
+2);
3910 PrintValidArgs(optopt
);
3912 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg
+2);
3914 PrintOut(LOG_CRIT
, "\nUse smartd --help to get a usage summary\n\n");
3919 // Iff optopt holds a valid option then argument must be missing.
3920 if (strchr(shortopts
, optopt
) != NULL
){
3921 PrintOut(LOG_CRIT
, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt
);
3922 PrintValidArgs(optopt
);
3924 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt
);
3926 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3933 // Check to see if option had an unrecognized or incorrect argument.
3937 // It would be nice to print the actual option name given by the user
3938 // here, but we just print the short form. Please fix this if you know
3939 // a clean way to do it.
3940 PrintOut(LOG_CRIT
, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar
, optarg
);
3941 PrintValidArgs(optchar
);
3942 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3947 // non-option arguments are not allowed
3948 if (argc
> optind
) {
3951 PrintOut(LOG_CRIT
, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv
[optind
]);
3952 PrintOut(LOG_CRIT
, "\nUse smartd -h to get a usage summary\n\n");
3956 // no pidfile in debug mode
3957 if (debugmode
&& pid_file
) {
3960 PrintOut(LOG_CRIT
, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
3961 PrintOut(LOG_CRIT
, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file
);
3962 pid_file
=FreeNonZero(pid_file
, -1,__LINE__
,filenameandversion
);
3972 // Function we call if no configuration file was found or if the
3973 // SCANDIRECTIVE Directive was found. It makes entries for device
3974 // names returned by make_device_names() in os_OSNAME.c
3975 int MakeConfigEntries(const char *type
, int start
){
3978 char** devlist
= NULL
;
3979 cfgfile
*first
=cfgentries
[0],*cfg
=first
;
3981 // make list of devices
3982 if ((num
=make_device_names(&devlist
,type
))<0)
3983 PrintOut(LOG_CRIT
,"Problem creating device name scan list\n");
3985 // if no devices, or error constructing list, return
3989 // loop over entries to create
3990 for (i
=0; i
<num
; i
++){
3992 // make storage and copy for all but first entry
3994 // allocate more storage if needed
3995 while (cfgentries_max
<=start
+i
)
3996 cfgentries
=AllocateMoreSpace(cfgentries
, &cfgentries_max
, "simulated configuration file device");
3997 cfg
=cfgentries
[start
+i
]=CreateConfigEntry(first
);
4001 if (!strcmp(type
,"ATA") )
4002 cfg
->controller_type
= CONTROLLER_ATA
;
4003 if (!strcmp(type
,"SCSI") )
4004 cfg
->controller_type
= CONTROLLER_SCSI
;
4006 // remove device name, if it's there, and put in correct one
4007 cfg
->name
=FreeNonZero(cfg
->name
, -1,__LINE__
,filenameandversion
);
4008 // save pointer to the device name created within
4009 // make_device_names
4010 cfg
->name
=devlist
[i
];
4013 // If needed, free memory used for devlist: pointers now in
4014 // cfgentries[]->names. If num==0 we never get to this point, but
4015 // that's OK. If we realloc()d the array length in
4016 // make_device_names() that was ALREADY equivalent to calling
4018 devlist
= FreeNonZero(devlist
,(sizeof (char*) * num
),__LINE__
, filenameandversion
);
4023 void CanNotRegister(char *name
, char *type
, int line
, int scandirective
){
4024 if( !debugmode
&& scandirective
== 1 ) { return; }
4026 PrintOut(scandirective
?LOG_INFO
:LOG_CRIT
,
4027 "Unable to register %s device %s at line %d of file %s\n",
4028 type
, name
, line
, configfile
);
4030 PrintOut(LOG_INFO
,"Unable to register %s device %s\n",
4035 // Returns negative value (see ParseConfigFile()) if config file
4036 // had errors, else number of entries which may be zero or positive.
4037 // If we found no configuration file, or it contained SCANDIRECTIVE,
4038 // then *scanning is set to 1, else 0.
4039 int ReadOrMakeConfigEntries(int *scanning
){
4042 // deallocate any cfgfile data structures in memory
4043 RmAllConfigEntries();
4045 // parse configuration file configfile (normally /etc/smartd.conf)
4046 if ((entries
=ParseConfigFile())<0) {
4048 // There was an error reading the configuration file.
4049 RmAllConfigEntries();
4051 PrintOut(LOG_CRIT
, "Configuration file %s has fatal syntax errors.\n", configfile
);
4055 // did we find entries or scan?
4058 // no error parsing config file.
4060 // we did not find a SCANDIRECTIVE and did find valid entries
4061 PrintOut(LOG_INFO
, "Configuration file %s parsed.\n", configfile
);
4063 else if (cfgentries
&& cfgentries
[0]) {
4064 // we found a SCANDIRECTIVE or there was no configuration file so
4065 // scan. Configuration file's first entry contains all options
4067 cfgfile
*first
=cfgentries
[0];
4068 int doata
= !(first
->controller_type
==CONTROLLER_SCSI
);
4069 int doscsi
= !(first
->controller_type
==CONTROLLER_ATA
);
4074 PrintOut(LOG_INFO
,"Configuration file %s was parsed, found %s, scanning devices\n", configfile
, SCANDIRECTIVE
);
4076 PrintOut(LOG_INFO
,"No configuration file %s found, scanning devices\n", configfile
);
4078 // make config list of ATA devices to search for
4080 entries
+=MakeConfigEntries("ATA", entries
);
4081 // make config list of SCSI devices to search for
4083 entries
+=MakeConfigEntries("SCSI", entries
);
4085 // warn user if scan table found no devices
4087 PrintOut(LOG_CRIT
,"In the system's table of devices NO devices found to scan\n");
4088 // get rid of fake entry with SCANDIRECTIVE as name
4089 RmConfigEntry(cfgentries
, __LINE__
);
4093 PrintOut(LOG_CRIT
,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile
);
4099 // This function tries devices from cfgentries. Each one that can be
4100 // registered is moved onto the [ata|scsi]devices lists and removed
4101 // from the cfgentries list, else it's memory is deallocated.
4102 void RegisterDevices(int scanning
){
4105 // start by clearing lists/memory of ALL existing devices
4107 numdevata
=numdevscsi
=0;
4110 for (i
=0; i
<cfgentries_max
; i
++){
4112 cfgfile
*ent
=cfgentries
[i
];
4114 // skip any NULL entries (holes)
4118 // register ATA devices
4119 if (ent
->controller_type
!=CONTROLLER_SCSI
){
4120 if (ATADeviceScan(ent
, scanning
))
4121 CanNotRegister(ent
->name
, "ATA", ent
->lineno
, scanning
);
4123 // move onto the list of ata devices
4125 while (numdevata
>=atadevlist_max
)
4126 atadevlist
=AllocateMoreSpace(atadevlist
, &atadevlist_max
, "ATA device");
4127 atadevlist
[numdevata
++]=ent
;
4131 // then register SCSI devices
4132 if (ent
->controller_type
==CONTROLLER_SCSI
|| ent
->controller_type
==CONTROLLER_UNKNOWN
){
4136 struct sigaction alarmAction
, defaultaction
;
4138 // Set up an alarm handler to catch USB devices that hang on
4140 alarmAction
.sa_handler
= AlarmHandler
;
4141 alarmAction
.sa_flags
= SA_RESTART
;
4142 if (sigaction(SIGALRM
, &alarmAction
, &defaultaction
)) {
4143 // if we can't set timeout, just scan device
4144 PrintOut(LOG_CRIT
, "Unable to initialize SCSI timeout mechanism.\n");
4145 retscsi
=SCSIDeviceScan(ent
, scanning
);
4148 // prepare return point in case of bad SCSI device
4149 if (setjmp(registerscsienv
))
4150 // SCSI device timed out!
4153 // Set alarm, make SCSI call, reset alarm
4155 retscsi
=SCSIDeviceScan(ent
, scanning
);
4158 if (sigaction(SIGALRM
, &defaultaction
, NULL
)){
4159 PrintOut(LOG_CRIT
, "Unable to clear SCSI timeout mechanism.\n");
4163 retscsi
=SCSIDeviceScan(ent
, scanning
);
4166 // Now scan SCSI device...
4169 PrintOut(LOG_CRIT
, "Device %s timed out (poorly-implemented USB device?)\n", ent
->name
);
4170 CanNotRegister(ent
->name
, "SCSI", ent
->lineno
, scanning
);
4173 // move onto the list of scsi devices
4175 while (numdevscsi
>=scsidevlist_max
)
4176 scsidevlist
=AllocateMoreSpace(scsidevlist
, &scsidevlist_max
, "SCSI device");
4177 scsidevlist
[numdevscsi
++]=ent
;
4181 // if device is explictly listed and we can't register it, then
4182 // exit unless the user has specified that the device is removable
4183 if (cfgentries
[i
] && !scanning
){
4184 if (ent
->removable
|| quit
==2)
4185 PrintOut(LOG_INFO
, "Device %s not available\n", ent
->name
);
4187 PrintOut(LOG_CRIT
, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent
->name
);
4192 // free up memory if device could not be registered
4193 RmConfigEntry(cfgentries
+i
, __LINE__
);
4202 int main(int argc
, char **argv
)
4204 // Windows: internal main function started direct or by service control manager
4205 static int smartd_main(int argc
, char **argv
)
4208 // external control variables for ATA disks
4209 smartmonctrl control
;
4211 // is it our first pass through?
4214 // next time to wake up
4217 // for simplicity, null all global communications variables/lists
4219 memset(con
, 0,sizeof(control
));
4221 // parse input and print header and usage info if needed
4222 ParseOpts(argc
,argv
);
4224 // do we mute printing from ataprint commands?
4225 con
->printing_switchable
=0;
4226 con
->dont_print
=debugmode
?0:1;
4228 // don't exit on bad checksums
4229 con
->checksumfail
=0;
4231 // the main loop of the code
4234 // are we exiting from a signal?
4235 if (caughtsigEXIT
) {
4236 // are we exiting with SIGTERM?
4237 int isterm
=(caughtsigEXIT
==SIGTERM
);
4238 int isquit
=(caughtsigEXIT
==SIGQUIT
);
4239 int isok
=debugmode
?isterm
|| isquit
:isterm
;
4241 PrintOut(isok
?LOG_INFO
:LOG_CRIT
, "smartd received signal %d: %s\n",
4242 caughtsigEXIT
, strsignal(caughtsigEXIT
));
4244 EXIT(isok
?0:EXIT_SIGNAL
);
4247 // Should we (re)read the config file?
4248 if (firstpass
|| caughtsigHUP
){
4249 int entries
, scanning
=0;
4253 // Workaround for missing SIGQUIT via keyboard on Cygwin
4254 if (caughtsigHUP
==2) {
4255 // Simulate SIGQUIT if another SIGINT arrives soon
4258 if (caughtsigHUP
==2) {
4259 caughtsigEXIT
=SIGQUIT
;
4267 "Signal HUP - rereading configuration file %s\n":
4268 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME
" quits)\n\n",
4272 // clears cfgentries, (re)reads config file, makes >=0 entries
4273 entries
=ReadOrMakeConfigEntries(&scanning
);
4276 // checks devices, then moves onto ata/scsi list or deallocates.
4277 RegisterDevices(scanning
);
4279 else if (quit
==2 || ((quit
==0 || quit
==1) && !firstpass
)) {
4280 // user has asked to continue on error in configuration file
4282 PrintOut(LOG_INFO
,"Reusing previous configuration\n");
4285 // exit with configuration file error status
4286 int status
= (entries
==-3 ? EXIT_READCONF
: entries
==-2 ? EXIT_NOCONF
: EXIT_BADCONF
);
4290 // Log number of devices we are monitoring...
4291 if (numdevata
+numdevscsi
|| quit
==2 || (quit
==1 && !firstpass
))
4292 PrintOut(LOG_INFO
,"Monitoring %d ATA and %d SCSI devices\n",
4293 numdevata
, numdevscsi
);
4295 PrintOut(LOG_INFO
,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4300 // user has asked to print test schedule
4301 PrintTestSchedule(atadevlist
, scsidevlist
);
4309 // check all devices once
4310 CheckDevicesOnce(atadevlist
, scsidevlist
);
4312 // user has asked us to exit after first check
4314 PrintOut(LOG_INFO
,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4315 "smartd is exiting (exit status 0)\n");
4319 // fork into background if needed
4320 if (firstpass
&& !debugmode
) {
4322 if (!is_service
) // don't fork() if running as service via cygrunsrv
4327 // set exit and signal handlers, write PID file, set wake-up time
4329 Initialize(&wakeuptime
);
4333 // sleep until next check time, or a signal arrives
4334 wakeuptime
=dosleep(wakeuptime
);
4340 // Main function for Windows
4341 int main(int argc
, char **argv
){
4342 // Options for smartd windows service
4343 static const daemon_winsvc_options svc_opts
= {
4344 "--service", // cmd_opt
4345 "smartd", "SmartD Service", // servicename, displayname
4347 "Controls and monitors storage devices using the Self-Monitoring, "
4348 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4349 "built into ATA and SCSI Hard Drives. "
4352 // daemon_main() handles daemon and service specific commands
4353 // and starts smartd_main() direct, from a new process,
4354 // or via service control manager
4355 return daemon_main("smartd", &svc_opts
, smartd_main
, argc
, argv
);