]> git.proxmox.com Git - mirror_smartmontools-debian.git/blob - smartd.cpp
8f204a85548ba56b9dad48d6872f8776ff6ebe1d
[mirror_smartmontools-debian.git] / smartd.cpp
1 /*
2 * Home page of code is: http://smartmontools.sourceforge.net
3 *
4 * Copyright (C) 2002-6 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * You should have received a copy of the GNU General Public License
13 * (for example COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 *
16 * This code was originally developed as a Senior Thesis by Michael Cornwell
17 * at the Concurrent Systems Laboratory (now part of the Storage Systems
18 * Research Center), Jack Baskin School of Engineering, University of
19 * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 // unconditionally included files
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
31 #ifndef _WIN32
32 #include <sys/wait.h>
33 #include <unistd.h>
34 #endif
35 #include <signal.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <syslog.h>
39 #include <stdarg.h>
40 #include <stdlib.h>
41 #include <errno.h>
42 #include <time.h>
43 #include <limits.h>
44
45 #if SCSITIMEOUT
46 #include <setjmp.h>
47 #endif
48
49 // see which system files to conditionally include
50 #include "config.h"
51
52 // conditionally included files
53 #ifdef HAVE_GETOPT_LONG
54 #include <getopt.h>
55 #endif
56 #ifdef HAVE_NETDB_H
57 #include <netdb.h>
58 #endif
59
60 #ifdef _WIN32
61 #ifdef _MSC_VER
62 #pragma warning(disable:4761) // "conversion supplied"
63 typedef unsigned short mode_t;
64 typedef int pid_t;
65 #endif
66 #include <io.h> // umask()
67 #include <process.h> // getpid()
68 #endif // _WIN32
69
70 #ifdef __CYGWIN__
71 // From <windows.h>:
72 // BOOL WINAPI FreeConsole(void);
73 extern "C" int __stdcall FreeConsole(void);
74 #include <io.h> // setmode()
75 #endif // __CYGWIN__
76
77 // locally included files
78 #include "int64.h"
79 #include "atacmds.h"
80 #include "ataprint.h"
81 #include "extern.h"
82 #include "knowndrives.h"
83 #include "scsicmds.h"
84 #include "smartd.h"
85 #include "utility.h"
86
87 #ifdef _WIN32
88 #include "hostname_win32.h" // gethost/domainname()
89 #define HAVE_GETHOSTNAME 1
90 #define HAVE_GETDOMAINNAME 1
91 // fork()/signal()/initd simulation for native Windows
92 #include "daemon_win32.h" // daemon_main/detach/signal()
93 #undef SIGNALFN
94 #define SIGNALFN daemon_signal
95 #define strsignal daemon_strsignal
96 #define sleep daemon_sleep
97 #undef EXIT // see utility.h
98 #define EXIT(x) { exitstatus = daemon_winsvc_exitcode = (x); exit((x)); }
99 // SIGQUIT does not exits, CONTROL-Break signals SIGBREAK.
100 #define SIGQUIT SIGBREAK
101 #define SIGQUIT_KEYNAME "CONTROL-Break"
102 #else // _WIN32
103 #ifdef __CYGWIN__
104 // 2x CONTROL-C simulates missing SIGQUIT via keyboard
105 #define SIGQUIT_KEYNAME "2x CONTROL-C"
106 #else // __CYGWIN__
107 #define SIGQUIT_KEYNAME "CONTROL-\\"
108 #endif // __CYGWIN__
109 #endif // _WIN32
110
111 #if defined (__SVR4) && defined (__sun)
112 extern "C" int getdomainname(char *, int); // no declaration in header files!
113 #endif
114
115 #define ARGUSED(x) ((void)(x))
116
117 // These are CVS identification information for *.cpp and *.h files
118 extern const char *atacmdnames_c_cvsid, *atacmds_c_cvsid, *ataprint_c_cvsid, *escalade_c_cvsid,
119 *knowndrives_c_cvsid, *os_XXXX_c_cvsid, *scsicmds_c_cvsid, *utility_c_cvsid;
120
121 static const char *filenameandversion="$Id: smartd.cpp,v 1.383 2006/11/10 04:59:02 dpgilbert Exp $";
122 #ifdef NEED_SOLARIS_ATA_CODE
123 extern const char *os_solaris_ata_s_cvsid;
124 #endif
125 #ifdef _WIN32
126 extern const char *daemon_win32_c_cvsid, *hostname_win32_c_cvsid, *syslog_win32_c_cvsid;
127 #endif
128 const char *smartd_c_cvsid="$Id: smartd.cpp,v 1.383 2006/11/10 04:59:02 dpgilbert Exp $"
129 ATACMDS_H_CVSID ATAPRINT_H_CVSID CONFIG_H_CVSID
130 #ifdef DAEMON_WIN32_H_CVSID
131 DAEMON_WIN32_H_CVSID
132 #endif
133 EXTERN_H_CVSID INT64_H_CVSID
134 #ifdef HOSTNAME_WIN32_H_CVSID
135 HOSTNAME_WIN32_H_CVSID
136 #endif
137 KNOWNDRIVES_H_CVSID SCSICMDS_H_CVSID SMARTD_H_CVSID
138 #ifdef SYSLOG_H_CVSID
139 SYSLOG_H_CVSID
140 #endif
141 UTILITY_H_CVSID;
142
143 extern const char *reportbug;
144
145 // GNU copyleft statement. Needed for GPL purposes.
146 const char *copyleftstring="smartd comes with ABSOLUTELY NO WARRANTY. This is\n"
147 "free software, and you are welcome to redistribute it\n"
148 "under the terms of the GNU General Public License\n"
149 "Version 2. See http://www.gnu.org for further details.\n\n";
150
151 extern unsigned char debugmode;
152
153 // command-line: how long to sleep between checks
154 static int checktime=CHECKTIME;
155
156 // command-line: name of PID file (NULL for no pid file)
157 static char* pid_file=NULL;
158
159 // configuration file name
160 #ifndef _WIN32
161 static char* configfile = SMARTMONTOOLS_SYSCONFDIR "/" CONFIGFILENAME ;
162 #else
163 static char* configfile = "./" CONFIGFILENAME ;
164 #endif
165 // configuration file "name" if read from stdin
166 static /*const*/ char * const configfile_stdin = "<stdin>";
167 // allocated memory for alternate configuration file name
168 static char* configfile_alt = NULL;
169
170 // command-line: when should we exit?
171 static int quit=0;
172
173 // command-line; this is the default syslog(3) log facility to use.
174 static int facility=LOG_DAEMON;
175
176 #ifdef __CYGWIN__
177 // command-line: running as service, so don't fork()
178 static int is_service=0;
179 #endif
180
181 // used for control of printing, passing arguments to atacmds.c
182 smartmonctrl *con=NULL;
183
184 // pointers to (real or simulated) entries in configuration file, and
185 // maximum space currently allocated for these entries.
186 cfgfile **cfgentries=NULL;
187 int cfgentries_max=0;
188
189 // pointers to ATA and SCSI devices being monitored, maximum and
190 // actual numbers
191 cfgfile **atadevlist=NULL, **scsidevlist=NULL;
192 int atadevlist_max=0, scsidevlist_max=0;
193 int numdevata=0, numdevscsi=0;
194
195 // track memory usage
196 extern int64_t bytes;
197
198 // exit status
199 extern int exitstatus;
200
201 // set to one if we catch a USR1 (check devices now)
202 volatile int caughtsigUSR1=0;
203
204 #ifdef _WIN32
205 // set to one if we catch a USR2 (toggle debug mode)
206 volatile int caughtsigUSR2=0;
207 #endif
208
209 // set to one if we catch a HUP (reload config file). In debug mode,
210 // set to two, if we catch INT (also reload config file).
211 volatile int caughtsigHUP=0;
212
213 // set to signal value if we catch INT, QUIT, or TERM
214 volatile int caughtsigEXIT=0;
215
216 #if SCSITIMEOUT
217 // stack environment if we time out during SCSI access (USB devices)
218 jmp_buf registerscsienv;
219 #endif
220
221 // tranlate cfg->pending into the correct Attribute numbers
222 void TranslatePending(unsigned short pending, unsigned char *current, unsigned char *offline) {
223
224 unsigned char curr = CURR_PEND(pending);
225 unsigned char off = OFF_PEND(pending);
226
227 // look for special value of CUR_UNC_DEFAULT that means DONT
228 // monitor. 0 means DO test.
229 if (curr==CUR_UNC_DEFAULT)
230 curr=0;
231 else if (curr==0)
232 curr=CUR_UNC_DEFAULT;
233
234 // look for special value of OFF_UNC_DEFAULT that means DONT
235 // monitor. 0 means DO TEST.
236 if (off==OFF_UNC_DEFAULT)
237 off=0;
238 else if (off==0)
239 off=OFF_UNC_DEFAULT;
240
241 *current=curr;
242 *offline=off;
243
244 return;
245 }
246
247
248 // free all memory associated with selftest part of configfile entry. Return NULL
249 testinfo* FreeTestData(testinfo *data){
250
251 // make sure we have something to do.
252 if (!data)
253 return NULL;
254
255 // free space for text pattern
256 data->regex=FreeNonZero(data->regex, -1, __LINE__, filenameandversion);
257
258 // free compiled expression
259 regfree(&(data->cregex));
260
261 // make sure that no sign of the compiled expression is left behind
262 // (just in case, to help detect bugs if we ever try and refer to
263 // that again).
264 memset(&(data->cregex), '0', sizeof(regex_t));
265
266 // free remaining memory space
267 data=FreeNonZero(data, sizeof(testinfo), __LINE__, filenameandversion);
268
269 return NULL;
270 }
271
272 cfgfile **AllocateMoreSpace(cfgfile **oldarray, int *oldsize, char *listname){
273 // for now keep BLOCKSIZE small to help detect coding problems.
274 // Perhaps increase in the future.
275 const int BLOCKSIZE=8;
276 int i;
277 int olds = *oldsize;
278 int news = olds + BLOCKSIZE;
279 cfgfile **newptr=(cfgfile **)realloc(oldarray, news*sizeof(cfgfile *));
280
281 // did we get more space?
282 if (newptr) {
283
284 // clear remaining entries ala calloc()
285 for (i=olds; i<news; i++)
286 newptr[i]=NULL;
287
288 bytes += BLOCKSIZE*sizeof(cfgfile *);
289
290 *oldsize=news;
291
292 #if 0
293 PrintOut(LOG_INFO, "allocating %d slots for %s\n", BLOCKSIZE, listname);
294 #endif
295
296 return newptr;
297 }
298
299 PrintOut(LOG_CRIT, "out of memory for allocating %s list\n", listname);
300 EXIT(EXIT_NOMEM);
301 }
302
303 void PrintOneCVS(const char *a_cvs_id){
304 char out[CVSMAXLEN];
305 printone(out,a_cvs_id);
306 PrintOut(LOG_INFO,"%s",out);
307 return;
308 }
309
310 // prints CVS identity information for the executable
311 void PrintCVS(void){
312 char *configargs=strlen(SMARTMONTOOLS_CONFIGURE_ARGS)?SMARTMONTOOLS_CONFIGURE_ARGS:"[no arguments given]";
313
314 PrintOut(LOG_INFO,(char *)copyleftstring);
315 PrintOut(LOG_INFO,"CVS version IDs of files used to build this code are:\n");
316 PrintOneCVS(atacmdnames_c_cvsid);
317 PrintOneCVS(atacmds_c_cvsid);
318 PrintOneCVS(ataprint_c_cvsid);
319 #ifdef _WIN32
320 PrintOneCVS(daemon_win32_c_cvsid);
321 #endif
322 #ifdef _WIN32
323 PrintOneCVS(hostname_win32_c_cvsid);
324 #endif
325 PrintOneCVS(knowndrives_c_cvsid);
326 PrintOneCVS(os_XXXX_c_cvsid);
327 #ifdef NEED_SOLARIS_ATA_CODE
328 PrintOneCVS( os_solaris_ata_s_cvsid);
329 #endif
330 PrintOneCVS(scsicmds_c_cvsid);
331 PrintOneCVS(smartd_c_cvsid);
332 #ifdef _WIN32
333 PrintOneCVS(syslog_win32_c_cvsid);
334 #endif
335 PrintOneCVS(utility_c_cvsid);
336 PrintOut(LOG_INFO, "\nsmartmontools release " PACKAGE_VERSION " dated " SMARTMONTOOLS_RELEASE_DATE " at " SMARTMONTOOLS_RELEASE_TIME "\n");
337 PrintOut(LOG_INFO, "smartmontools build host: " SMARTMONTOOLS_BUILD_HOST "\n");
338 PrintOut(LOG_INFO, "smartmontools build configured: " SMARTMONTOOLS_CONFIGURE_DATE "\n");
339 PrintOut(LOG_INFO, "smartd compile dated " __DATE__ " at "__TIME__ "\n");
340 PrintOut(LOG_INFO, "smartmontools configure arguments: %s\n", configargs);
341 return;
342 }
343
344 // Removes config file entry, freeing all memory
345 void RmConfigEntry(cfgfile **anentry, int whatline){
346
347 cfgfile *cfg;
348
349 // pointer should never be null!
350 if (!anentry){
351 PrintOut(LOG_CRIT,"Internal error in RmConfigEntry() at line %d of file %s\n%s",
352 whatline, filenameandversion, reportbug);
353 EXIT(EXIT_BADCODE);
354 }
355
356 // only remove entries that exist!
357 if (!(cfg=*anentry))
358 return;
359
360 // entry exists -- free all of its memory
361 cfg->name = FreeNonZero(cfg->name, -1,__LINE__,filenameandversion);
362 cfg->smartthres = FreeNonZero(cfg->smartthres, sizeof(struct ata_smart_thresholds_pvt),__LINE__,filenameandversion);
363 cfg->smartval = FreeNonZero(cfg->smartval, sizeof(struct ata_smart_values),__LINE__,filenameandversion);
364 cfg->monitorattflags = FreeNonZero(cfg->monitorattflags, NMONITOR*32,__LINE__,filenameandversion);
365 cfg->attributedefs = FreeNonZero(cfg->attributedefs, MAX_ATTRIBUTE_NUM,__LINE__,filenameandversion);
366 if (cfg->mailwarn){
367 cfg->mailwarn->address = FreeNonZero(cfg->mailwarn->address, -1,__LINE__,filenameandversion);
368 cfg->mailwarn->emailcmdline = FreeNonZero(cfg->mailwarn->emailcmdline, -1,__LINE__,filenameandversion);
369 cfg->mailwarn = FreeNonZero(cfg->mailwarn, sizeof(maildata),__LINE__,filenameandversion);
370 }
371 cfg->testdata = FreeTestData(cfg->testdata);
372 *anentry = FreeNonZero(cfg, sizeof(cfgfile),__LINE__,filenameandversion);
373
374 return;
375 }
376
377 // deallocates all memory associated with cfgentries list
378 void RmAllConfigEntries(){
379 int i;
380
381 for (i=0; i<cfgentries_max; i++)
382 RmConfigEntry(cfgentries+i, __LINE__);
383
384 cfgentries=FreeNonZero(cfgentries, sizeof(cfgfile *)*cfgentries_max, __LINE__, filenameandversion);
385 cfgentries_max=0;
386
387 return;
388 }
389
390 // deallocates all memory associated with ATA/SCSI device lists
391 void RmAllDevEntries(){
392 int i;
393
394 for (i=0; i<atadevlist_max; i++)
395 RmConfigEntry(atadevlist+i, __LINE__);
396
397 atadevlist=FreeNonZero(atadevlist, sizeof(cfgfile *)*atadevlist_max, __LINE__, filenameandversion);
398 atadevlist_max=0;
399
400 for (i=0; i<scsidevlist_max; i++)
401 RmConfigEntry(scsidevlist+i, __LINE__);
402
403 scsidevlist=FreeNonZero(scsidevlist, sizeof(cfgfile *)*scsidevlist_max, __LINE__, filenameandversion);
404 scsidevlist_max=0;
405
406 return;
407 }
408
409 // remove the PID file
410 void RemovePidFile(){
411 if (pid_file) {
412 if ( -1==unlink(pid_file) )
413 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
414 pid_file, strerror(errno));
415 pid_file=FreeNonZero(pid_file, -1,__LINE__,filenameandversion);
416 }
417 return;
418 }
419
420
421 // Note if we catch a SIGUSR1
422 void USR1handler(int sig){
423 if (SIGUSR1==sig)
424 caughtsigUSR1=1;
425 return;
426 }
427
428 #ifdef _WIN32
429 // Note if we catch a SIGUSR2
430 void USR2handler(int sig){
431 if (SIGUSR2==sig)
432 caughtsigUSR2=1;
433 return;
434 }
435 #endif
436
437 // Note if we catch a HUP (or INT in debug mode)
438 void HUPhandler(int sig){
439 if (sig==SIGHUP)
440 caughtsigHUP=1;
441 else
442 caughtsigHUP=2;
443 return;
444 }
445
446 // signal handler for TERM, QUIT, and INT (if not in debug mode)
447 void sighandler(int sig){
448 if (!caughtsigEXIT)
449 caughtsigEXIT=sig;
450 return;
451 }
452
453
454 // signal handler that prints Goodbye message and removes pidfile
455 void Goodbye(void){
456
457 // clean up memory -- useful for debugging
458 RmAllConfigEntries();
459 RmAllDevEntries();
460
461 // delete PID file, if one was created
462 RemovePidFile();
463
464 // remove alternate configfile name
465 configfile_alt=FreeNonZero(configfile_alt, -1,__LINE__,filenameandversion);
466
467 // useful for debugging -- have we managed memory correctly?
468 if (debugmode || (bytes && exitstatus!=EXIT_NOMEM))
469 PrintOut(LOG_INFO, "Memory still allocated for devices at exit is %" PRId64 " bytes.\n", bytes);
470
471 // if we are exiting because of a code bug, tell user
472 if (exitstatus==EXIT_BADCODE || (bytes && exitstatus!=EXIT_NOMEM))
473 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
474
475 if (exitstatus==0 && bytes)
476 exitstatus=EXIT_BADCODE;
477
478 // and this should be the final output from smartd before it exits
479 PrintOut(exitstatus?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", exitstatus);
480
481 return;
482 }
483
484 #define ENVLENGTH 1024
485
486 // a replacement for setenv() which is not available on all platforms.
487 // Note that the string passed to putenv must not be freed or made
488 // invalid, since a pointer to it is kept by putenv(). This means that
489 // it must either be a static buffer or allocated off the heap. The
490 // string can be freed if the environment variable is redefined or
491 // deleted via another call to putenv(). So we keep these on the stack
492 // as long as the popen() call is underway.
493 int exportenv(char* stackspace, const char *name, const char *value){
494 snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
495 return putenv(stackspace);
496 }
497
498 char* dnsdomain(const char* hostname) {
499 char *p = NULL;
500 #ifdef HAVE_GETHOSTBYNAME
501 struct hostent *hp;
502
503 if ((hp = gethostbyname(hostname))) {
504 // Does this work if gethostbyname() returns an IPv6 name in
505 // colon/dot notation? [BA]
506 if ((p = strchr(hp->h_name, '.')))
507 p++; // skip "."
508 }
509 #else
510 ARGUSED(hostname);
511 #endif
512 return p;
513 }
514
515 #define EBUFLEN 1024
516
517 // If either address or executable path is non-null then send and log
518 // a warning email, or execute executable
519 void MailWarning(cfgfile *cfg, int which, char *fmt, ...){
520 char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
521 char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
522 char environ_strings[11][ENVLENGTH];
523 time_t epoch;
524 va_list ap;
525 const int day=24*3600;
526 int days=0;
527 char *whichfail[]={
528 "EmailTest", // 0
529 "Health", // 1
530 "Usage", // 2
531 "SelfTest", // 3
532 "ErrorCount", // 4
533 "FailedHealthCheck", // 5
534 "FailedReadSmartData", // 6
535 "FailedReadSmartErrorLog", // 7
536 "FailedReadSmartSelfTestLog", // 8
537 "FailedOpenDevice", // 9
538 "CurrentPendingSector", // 10
539 "OfflineUncorrectableSector", // 11
540 "Temperature" // 12
541 };
542
543 char *address, *executable;
544 mailinfo *mail;
545 maildata* data=cfg->mailwarn;
546 #ifndef _WIN32
547 FILE *pfp=NULL;
548 #else
549 char stdinbuf[1024]; int boxmsgoffs, boxtype;
550 #endif
551 const char *newadd=NULL, *newwarn=NULL;
552 const char *unknown="[Unknown]";
553
554 // See if user wants us to send mail
555 if(!data)
556 return;
557
558 address=data->address;
559 executable=data->emailcmdline;
560
561 if (!address && !executable)
562 return;
563
564 // which type of mail are we sending?
565 mail=(data->maillog)+which;
566
567 // checks for sanity
568 if (data->emailfreq<1 || data->emailfreq>3) {
569 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg->mailwarn->emailfreq=%d\n",data->emailfreq);
570 return;
571 }
572 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
573 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
574 which, (int)sizeof(whichfail));
575 return;
576 }
577
578 // Return if a single warning mail has been sent.
579 if ((data->emailfreq==1) && mail->logged)
580 return;
581
582 // Return if this is an email test and one has already been sent.
583 if (which == 0 && mail->logged)
584 return;
585
586 // To decide if to send mail, we need to know what time it is.
587 epoch=time(NULL);
588
589 // Return if less than one day has gone by
590 if (data->emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
591 return;
592
593 // Return if less than 2^(logged-1) days have gone by
594 if (data->emailfreq==3 && mail->logged){
595 days=0x01<<(mail->logged-1);
596 days*=day;
597 if (epoch<(mail->lastsent+days))
598 return;
599 }
600
601 // record the time of this mail message, and the first mail message
602 if (!mail->logged)
603 mail->firstsent=epoch;
604 mail->lastsent=epoch;
605
606 // get system host & domain names (not null terminated if length=MAX)
607 #ifdef HAVE_GETHOSTNAME
608 if (gethostname(hostname, 256))
609 strcpy(hostname, unknown);
610 else {
611 char *p=NULL;
612 hostname[255]='\0';
613 p = dnsdomain(hostname);
614 if (p && *p) {
615 strncpy(domainname, p, 255);
616 domainname[255]='\0';
617 } else
618 strcpy(domainname, unknown);
619 }
620 #else
621 strcpy(hostname, unknown);
622 strcpy(domainname, unknown);
623 #endif
624
625 #ifdef HAVE_GETDOMAINNAME
626 if (getdomainname(nisdomain, 256))
627 strcpy(nisdomain, unknown);
628 else
629 nisdomain[255]='\0';
630 #else
631 strcpy(nisdomain, unknown);
632 #endif
633
634 // print warning string into message
635 va_start(ap, fmt);
636 vsnprintf(message, 256, fmt, ap);
637 va_end(ap);
638
639 // appropriate message about further information
640 additional[0]=original[0]=further[0]='\0';
641 if (which) {
642 sprintf(further,"You can also use the smartctl utility for further investigation.\n");
643
644 switch (data->emailfreq){
645 case 1:
646 sprintf(additional,"No additional email messages about this problem will be sent.\n");
647 break;
648 case 2:
649 sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
650 break;
651 case 3:
652 sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
653 (0x01)<<mail->logged);
654 break;
655 }
656 if (data->emailfreq>1 && mail->logged){
657 dateandtimezoneepoch(dates, mail->firstsent);
658 sprintf(original,"The original email about this issue was sent at %s\n", dates);
659 }
660 }
661
662 snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
663
664 // If the user has set cfg->emailcmdline, use that as mailer, else "mail" or "mailx".
665 if (!executable)
666 #ifdef DEFAULT_MAILER
667 executable = DEFAULT_MAILER ;
668 #else
669 #ifndef _WIN32
670 executable = "mail";
671 #else
672 executable = "blat"; // http://blat.sourceforge.net/
673 #endif
674 #endif
675
676 // make a private copy of address with commas replaced by spaces
677 // to separate recipients
678 if (address) {
679 address=CustomStrDup(data->address, 1, __LINE__, filenameandversion);
680 #ifndef _WIN32 // blat mailer needs comma
681 {
682 char *comma=address;
683 while ((comma=strchr(comma, ',')))
684 *comma=' ';
685 }
686 #endif
687 }
688
689 // Export information in environment variables that will be useful
690 // for user scripts
691 exportenv(environ_strings[0], "SMARTD_MAILER", executable);
692 exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
693 exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
694 dateandtimezoneepoch(dates, mail->firstsent);
695 exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
696 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
697 exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
698 exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
699 if (address)
700 exportenv(environ_strings[6], "SMARTD_ADDRESS", address);
701 exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg->name);
702
703 switch (cfg->controller_type) {
704 case CONTROLLER_3WARE_678K:
705 case CONTROLLER_3WARE_9000_CHAR:
706 case CONTROLLER_3WARE_678K_CHAR:
707 {
708 char *s,devicetype[16];
709 sprintf(devicetype, "3ware,%d", cfg->controller_port-1);
710 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", devicetype);
711 if ((s=strchr(cfg->name, ' ')))
712 *s='\0';
713 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
714 if (s)
715 *s=' ';
716 }
717 break;
718 case CONTROLLER_CCISS:
719 {
720 char *s,devicetype[16];
721 sprintf(devicetype, "cciss,%d", cfg->controller_port-1);
722 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", devicetype);
723 if ((s=strchr(cfg->name, ' ')))
724 *s='\0';
725 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
726 if (s)
727 *s=' ';
728 }
729 break;
730 case CONTROLLER_ATA:
731 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", "ata");
732 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
733 break;
734 case CONTROLLER_MARVELL_SATA:
735 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", "marvell");
736 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
737 break;
738 case CONTROLLER_SCSI:
739 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", "scsi");
740 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
741 break;
742 case CONTROLLER_SAT:
743 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", "sat");
744 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
745 break;
746 case CONTROLLER_HPT:
747 {
748 char *s,devicetype[16];
749 sprintf(devicetype, "hpt,%d/%d/%d", cfg->hpt_data[0],
750 cfg->hpt_data[1], cfg->hpt_data[2]);
751 exportenv(environ_strings[8], "SMARTD_DEVICETYPE", devicetype);
752 if ((s=strchr(cfg->name, ' ')))
753 *s='\0';
754 exportenv(environ_strings[9], "SMARTD_DEVICE", cfg->name);
755 if (s)
756 *s=' ';
757 }
758 break;
759 }
760
761 snprintf(fullmessage, 1024,
762 "This email was generated by the smartd daemon running on:\n\n"
763 " host name: %s\n"
764 " DNS domain: %s\n"
765 " NIS domain: %s\n\n"
766 "The following warning/error was logged by the smartd daemon:\n\n"
767 "%s\n\n"
768 "For details see host's SYSLOG (default: /var/log/messages).\n\n"
769 "%s%s%s",
770 hostname, domainname, nisdomain, message, further, original, additional);
771 exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
772
773 // now construct a command to send this as EMAIL
774 #ifndef _WIN32
775 if (address)
776 snprintf(command, 2048,
777 "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
778 "%sENDMAIL\n", subject, address, fullmessage);
779 else
780 snprintf(command, 2048, "%s 2>&1", executable);
781
782 // tell SYSLOG what we are about to do...
783 newadd=address?address:"<nomailer>";
784 newwarn=which?"Warning via":"Test of";
785
786 PrintOut(LOG_INFO,"%s %s to %s ...\n",
787 which?"Sending warning via":"Executing test of", executable, newadd);
788
789 // issue the command to send mail or to run the user's executable
790 errno=0;
791 if (!(pfp=popen(command, "r")))
792 // failed to popen() mail process
793 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
794 newwarn, executable, newadd, errno?strerror(errno):"");
795 else {
796 // pipe suceeded!
797 int len, status;
798 char buffer[EBUFLEN];
799
800 // if unexpected output on stdout/stderr, null terminate, print, and flush
801 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
802 int count=0;
803 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
804 buffer[newlen]='\0';
805 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
806 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
807
808 // flush pipe if needed
809 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
810 count++;
811
812 // tell user that pipe was flushed, or that something is really wrong
813 if (count && count<EBUFLEN)
814 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
815 newwarn, executable, newadd);
816 else if (count)
817 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
818 newwarn, executable, newadd);
819 }
820
821 // if something went wrong with mail process, print warning
822 errno=0;
823 if (-1==(status=pclose(pfp)))
824 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
825 errno?strerror(errno):"");
826 else {
827 // mail process apparently succeeded. Check and report exit status
828 int status8;
829
830 if (WIFEXITED(status)) {
831 // exited 'normally' (but perhaps with nonzero status)
832 status8=WEXITSTATUS(status);
833
834 if (status8>128)
835 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
836 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
837 else if (status8)
838 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
839 newwarn, executable, newadd, status, status8);
840 else
841 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
842 }
843
844 if (WIFSIGNALED(status))
845 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
846 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
847
848 // this branch is probably not possible. If subprocess is
849 // stopped then pclose() should not return.
850 if (WIFSTOPPED(status))
851 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
852 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
853
854 }
855 }
856
857 #else // _WIN32
858
859 // No "here-documents" on Windows, so must use separate commandline and stdin
860 command[0] = stdinbuf[0] = 0;
861 boxtype = -1; boxmsgoffs = 0;
862 newadd = "<nomailer>";
863 if (address) {
864 // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
865 int addroffs = (!strncmp(address, "sys", 3) ? 3 : 0);
866 if (!strncmp(address+addroffs, "msgbox", 6) && (!address[addroffs+6] || address[addroffs+6] == ',')) {
867 boxtype = (addroffs > 0 ? 1 : 0);
868 addroffs += 6;
869 if (address[addroffs])
870 addroffs++;
871 }
872 else
873 addroffs = 0;
874
875 if (address[addroffs]) {
876 // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
877 snprintf(command, sizeof(command),
878 "%s - -q -subject \"%s\" -to \"%s\"",
879 executable, subject, address+addroffs);
880 newadd = address+addroffs;
881 }
882 // Message for mail [0...] and messagebox [boxmsgoffs...]
883 snprintf(stdinbuf, sizeof(stdinbuf),
884 "This email was generated by the smartd daemon running on:\n\n"
885 " host name: %s\n"
886 " DNS domain: %s\n"
887 // " NIS domain: %s\n"
888 "\n%n"
889 "The following warning/error was logged by the smartd daemon:\n\n"
890 "%s\n\n"
891 "For details see the event log or log file of smartd.\n\n"
892 "%s%s%s"
893 "\n",
894 hostname, /*domainname, */ nisdomain, &boxmsgoffs, message, further, original, additional);
895 }
896 else
897 snprintf(command, sizeof(command), "%s", executable);
898
899 newwarn=which?"Warning via":"Test of";
900 if (boxtype >= 0) {
901 // show message box
902 daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
903 PrintOut(LOG_INFO,"%s message box\n", newwarn);
904 }
905 if (command[0]) {
906 char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
907 int rc;
908 // run command
909 PrintOut(LOG_INFO,"%s %s to %s ...\n",
910 (which?"Sending warning via":"Executing test of"), executable, newadd);
911 rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
912 if (rc >= 0 && stdoutbuf[0])
913 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
914 newwarn, executable, newadd, strlen(stdoutbuf), stdoutbuf);
915 if (rc != 0)
916 PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
917 newwarn, executable, newadd, rc);
918 else
919 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
920 }
921
922 #endif // _WIN32
923
924 // increment mail sent counter
925 mail->logged++;
926
927 // free copy of address (without commas)
928 address=FreeNonZero(address, -1, __LINE__, filenameandversion);
929
930 return;
931 }
932
933 // Printing function for watching ataprint commands, or losing them
934 // [From GLIBC Manual: Since the prototype doesn't specify types for
935 // optional arguments, in a call to a variadic function the default
936 // argument promotions are performed on the optional argument
937 // values. This means the objects of type char or short int (whether
938 // signed or not) are promoted to either int or unsigned int, as
939 // appropriate.]
940 void pout(const char *fmt, ...){
941 va_list ap;
942
943 // get the correct time in syslog()
944 FixGlibcTimeZoneBug();
945 // initialize variable argument list
946 va_start(ap,fmt);
947 // in debug==1 mode we will print the output from the ataprint.o functions!
948 if (debugmode && debugmode!=2)
949 #ifdef _WIN32
950 if (facility == LOG_LOCAL1) // logging to stdout
951 vfprintf(stderr,fmt,ap);
952 else
953 #endif
954 vprintf(fmt,ap);
955 // in debug==2 mode we print output from knowndrives.o functions
956 else if (debugmode==2 || con->reportataioctl || con->reportscsiioctl || con->controller_port) {
957 openlog("smartd", LOG_PID, facility);
958 vsyslog(LOG_INFO, fmt, ap);
959 closelog();
960 }
961 va_end(ap);
962 fflush(NULL);
963 return;
964 }
965
966 // This function prints either to stdout or to the syslog as needed.
967 // This function is also used by utility.cpp to report LOG_CRIT errors.
968 void PrintOut(int priority, const char *fmt, ...){
969 va_list ap;
970
971 // get the correct time in syslog()
972 FixGlibcTimeZoneBug();
973 // initialize variable argument list
974 va_start(ap,fmt);
975 if (debugmode)
976 #ifdef _WIN32
977 if (facility == LOG_LOCAL1) // logging to stdout
978 vfprintf(stderr,fmt,ap);
979 else
980 #endif
981 vprintf(fmt,ap);
982 else {
983 openlog("smartd", LOG_PID, facility);
984 vsyslog(priority,fmt,ap);
985 closelog();
986 }
987 va_end(ap);
988 return;
989 }
990
991 // Forks new process, closes ALL file descriptors, redirects stdin,
992 // stdout, and stderr. Not quite daemon(). See
993 // http://www.iar.unlp.edu.ar/~fede/revistas/lj/Magazines/LJ47/2335.html
994 // for a good description of why we do things this way.
995 void DaemonInit(){
996 #ifndef _WIN32
997 pid_t pid;
998 int i;
999
1000 // flush all buffered streams. Else we might get two copies of open
1001 // streams since both parent and child get copies of the buffers.
1002 fflush(NULL);
1003
1004 if ((pid=fork()) < 0) {
1005 // unable to fork!
1006 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1007 EXIT(EXIT_STARTUP);
1008 }
1009 else if (pid)
1010 // we are the parent process -- exit cleanly
1011 EXIT(0);
1012
1013 // from here on, we are the child process.
1014 setsid();
1015
1016 // Fork one more time to avoid any possibility of having terminals
1017 if ((pid=fork()) < 0) {
1018 // unable to fork!
1019 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1020 EXIT(EXIT_STARTUP);
1021 }
1022 else if (pid)
1023 // we are the parent process -- exit cleanly
1024 EXIT(0);
1025
1026 // Now we are the child's child...
1027
1028 // close any open file descriptors
1029 for (i=getdtablesize();i>=0;--i)
1030 close(i);
1031
1032 #ifdef __CYGWIN__
1033 // Cygwin's setsid() does not detach the process from Windows console
1034 FreeConsole();
1035 #endif // __CYGWIN__
1036
1037 // redirect any IO attempts to /dev/null for stdin
1038 i=open("/dev/null",O_RDWR);
1039 // stdout
1040 dup(i);
1041 // stderr
1042 dup(i);
1043 umask(0);
1044 chdir("/");
1045
1046 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1047
1048 #else // _WIN32
1049
1050 // No fork() on native Win32
1051 // Detach this process from console
1052 fflush(NULL);
1053 if (daemon_detach("smartd")) {
1054 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1055 EXIT(EXIT_STARTUP);
1056 }
1057 // stdin/out/err now closed if not redirected
1058
1059 #endif // _WIN32
1060 return;
1061 }
1062
1063 // create a PID file containing the current process id
1064 void WritePidFile() {
1065 if (pid_file) {
1066 int error = 0;
1067 pid_t pid = getpid();
1068 mode_t old_umask;
1069 FILE* fp;
1070
1071 #ifndef __CYGWIN__
1072 old_umask = umask(0077); // rwx------
1073 #else
1074 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1075 old_umask = umask(0033); // rwxr--r--
1076 #endif
1077 fp = fopen(pid_file, "w");
1078 umask(old_umask);
1079 if (fp == NULL) {
1080 error = 1;
1081 } else if (fprintf(fp, "%d\n", (int)pid) <= 0) {
1082 error = 1;
1083 } else if (fclose(fp) != 0) {
1084 error = 1;
1085 }
1086 if (error) {
1087 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file);
1088 EXIT(EXIT_PID);
1089 }
1090 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file, (int)pid);
1091 }
1092 return;
1093 }
1094
1095 // Prints header identifying version of code and home
1096 void PrintHead(){
1097 #ifdef HAVE_GET_OS_VERSION_STR
1098 const char * ver = get_os_version_str();
1099 #else
1100 const char * ver = SMARTMONTOOLS_BUILD_HOST;
1101 #endif
1102 PrintOut(LOG_INFO,"smartd version %s [%s] Copyright (C) 2002-6 Bruce Allen\n", PACKAGE_VERSION, ver);
1103 PrintOut(LOG_INFO,"Home page is " PACKAGE_HOMEPAGE "\n\n");
1104 return;
1105 }
1106
1107 // prints help info for configuration file Directives
1108 void Directives() {
1109 PrintOut(LOG_INFO,
1110 "Configuration file (%s) Directives (after device name):\n"
1111 " -d TYPE Set the device type: ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N, cciss,N\n"
1112 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1113 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1114 " -S VAL Enable/disable attribute autosave (on/off)\n"
1115 " -n MODE No check if: never[,q], sleep[,q], standby[,q], idle[,q]\n"
1116 " -H Monitor SMART Health Status, report if failed\n"
1117 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1118 " -l TYPE Monitor SMART log. Type is one of: error, selftest\n"
1119 " -f Monitor 'Usage' Attributes, report failures\n"
1120 " -m ADD Send email warning to address ADD\n"
1121 " -M TYPE Modify email warning behavior (see man page)\n"
1122 " -p Report changes in 'Prefailure' Attributes\n"
1123 " -u Report changes in 'Usage' Attributes\n"
1124 " -t Equivalent to -p and -u Directives\n"
1125 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1126 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1127 " -i ID Ignore Attribute ID for -f Directive\n"
1128 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1129 " -C ID Monitor Current Pending Sectors in Attribute ID\n"
1130 " -U ID Monitor Offline Uncorrectable Sectors in Attribute ID\n"
1131 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1132 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1133 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1134 " -a Default: -H -f -t -l error -l selftest -C 197 -U 198\n"
1135 " -F TYPE Firmware bug workaround: none, samsung, samsung2\n"
1136 " # Comment: text after a hash sign is ignored\n"
1137 " \\ Line continuation character\n"
1138 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1139 "Use ID = 0 to turn off -C and/or -U Directives\n"
1140 "Example: /dev/hda -a\n",
1141 configfile);
1142 return;
1143 }
1144
1145 /* Returns a pointer to a static string containing a formatted list of the valid
1146 arguments to the option opt or NULL on failure. */
1147 const char *GetValidArgList(char opt) {
1148 switch (opt) {
1149 case 'c':
1150 return "<FILE_NAME>, -";
1151 case 's':
1152 return "valid_regular_expression";
1153 case 'l':
1154 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1155 case 'q':
1156 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1157 case 'r':
1158 return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1159 case 'p':
1160 return "<FILE_NAME>";
1161 case 'i':
1162 return "<INTEGER_SECONDS>";
1163 default:
1164 return NULL;
1165 }
1166 }
1167
1168 /* prints help information for command syntax */
1169 void Usage (void){
1170 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1171 #ifdef HAVE_GETOPT_LONG
1172 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1173 PrintOut(LOG_INFO," Read configuration file NAME or stdin [default is %s]\n\n", configfile);
1174 PrintOut(LOG_INFO," -d, --debug\n");
1175 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1176 PrintOut(LOG_INFO," -D, --showdirectives\n");
1177 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1178 PrintOut(LOG_INFO," -h, --help, --usage\n");
1179 PrintOut(LOG_INFO," Display this help and exit\n\n");
1180 PrintOut(LOG_INFO," -i N, --interval=N\n");
1181 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1182 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1183 #ifndef _WIN32
1184 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1185 #else
1186 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1187 #endif
1188 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1189 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1190 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1191 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1192 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1193 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1194 #if defined(_WIN32) || defined(__CYGWIN__)
1195 PrintOut(LOG_INFO," --service\n");
1196 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1197 #ifdef _WIN32
1198 PrintOut(LOG_INFO," smartd install [options]\n");
1199 PrintOut(LOG_INFO," Remove service with:\n");
1200 PrintOut(LOG_INFO," smartd remove\n\n");
1201 #else
1202 PrintOut(LOG_INFO," /etc/rc.d/init.d/smartd install [options]\n");
1203 PrintOut(LOG_INFO," Remove service with:\n");
1204 PrintOut(LOG_INFO," /etc/rc.d/init.d/smartd remove\n\n");
1205 #endif
1206 #endif // _WIN32 || __CYGWIN__
1207 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1208 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1209 #else
1210 PrintOut(LOG_INFO," -c NAME|- Read configuration file NAME or stdin [default is %s]\n", configfile);
1211 PrintOut(LOG_INFO," -d Start smartd in debug mode\n");
1212 PrintOut(LOG_INFO," -D Print the configuration file Directives and exit\n");
1213 PrintOut(LOG_INFO," -h Display this help and exit\n");
1214 PrintOut(LOG_INFO," -i N Set interval between disk checks to N seconds, where N >= 10\n");
1215 PrintOut(LOG_INFO," -l local? Use syslog facility local0 - local7, or daemon\n");
1216 PrintOut(LOG_INFO," -p NAME Write PID file NAME\n");
1217 PrintOut(LOG_INFO," -q WHEN Quit on one of: %s\n", GetValidArgList('q'));
1218 PrintOut(LOG_INFO," -r TYPE Report transactions for one of: %s\n", GetValidArgList('r'));
1219 PrintOut(LOG_INFO," -V Print License, Copyright, and version information\n");
1220 #endif
1221 }
1222
1223 // returns negative if problem, else fd>=0
1224 static int OpenDevice(char *device, char *mode, int scanning) {
1225 int fd;
1226 char *s=device;
1227
1228 // If there is an ASCII "space" character in the device name,
1229 // terminate string there. This is for 3ware and highpoint devices only.
1230 if ((s=strchr(device,' ')))
1231 *s='\0';
1232
1233 // open the device
1234 fd = deviceopen(device, mode);
1235
1236 // if we removed a space, put it back in please
1237 if (s)
1238 *s=' ';
1239
1240 // if we failed to open the device, complain!
1241 if (fd < 0) {
1242
1243 // For linux+devfs, a nonexistent device gives a strange error
1244 // message. This makes the error message a bit more sensible.
1245 // If no debug and scanning - don't print errors
1246 if (debugmode || !scanning) {
1247 if (errno==ENOENT || errno==ENOTDIR)
1248 errno=ENODEV;
1249
1250 PrintOut(LOG_INFO,"Device: %s, %s, open() failed\n",
1251 device, strerror(errno));
1252 }
1253 return -1;
1254 }
1255 // device opened sucessfully
1256 return fd;
1257 }
1258
1259 int CloseDevice(int fd, char *name){
1260 if (deviceclose(fd)){
1261 PrintOut(LOG_INFO,"Device: %s, %s, close(%d) failed\n", name, strerror(errno), fd);
1262 return 1;
1263 }
1264 // device sucessfully closed
1265 return 0;
1266 }
1267
1268 // returns <0 on failure
1269 int ATAErrorCount(int fd, char *name){
1270 struct ata_smart_errorlog log;
1271
1272 if (-1==ataReadErrorLog(fd,&log)){
1273 PrintOut(LOG_INFO,"Device: %s, Read SMART Error Log Failed\n",name);
1274 return -1;
1275 }
1276
1277 // return current number of ATA errors
1278 return log.error_log_pointer?log.ata_error_count:0;
1279 }
1280
1281 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1282 // error count, and top bits are the power-on hours of the last error.
1283 int SelfTestErrorCount(int fd, char *name){
1284 struct ata_smart_selftestlog log;
1285
1286 if (-1==ataReadSelfTestLog(fd,&log)){
1287 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1288 return -1;
1289 }
1290
1291 // return current number of self-test errors
1292 return ataPrintSmartSelfTestlog(&log,0);
1293 }
1294
1295 // scan to see what ata devices there are, and if they support SMART
1296 int ATADeviceScan(cfgfile *cfg, int scanning){
1297 int fd, supported=0;
1298 struct ata_identify_device drive;
1299 char *name=cfg->name;
1300 int retainsmartdata=0;
1301 int retid;
1302 char *mode;
1303
1304 // should we try to register this as an ATA device?
1305 switch (cfg->controller_type) {
1306 case CONTROLLER_ATA:
1307 case CONTROLLER_3WARE_678K:
1308 case CONTROLLER_MARVELL_SATA:
1309 case CONTROLLER_HPT:
1310 case CONTROLLER_UNKNOWN:
1311 mode="ATA";
1312 break;
1313 case CONTROLLER_3WARE_678K_CHAR:
1314 mode="ATA_3WARE_678K";
1315 break;
1316 case CONTROLLER_3WARE_9000_CHAR:
1317 mode="ATA_3WARE_9000";
1318 break;
1319 case CONTROLLER_SAT:
1320 mode="SCSI";
1321 break;
1322 default:
1323 // not a recognized ATA or SATA device. We should never enter
1324 // this branch.
1325 return 1;
1326 }
1327
1328 // open the device
1329 if ((fd=OpenDevice(name, mode, scanning))<0)
1330 // device open failed
1331 return 1;
1332 PrintOut(LOG_INFO,"Device: %s, opened\n", name);
1333
1334 // pass user settings on to low-level ATA commands
1335 con->controller_port=cfg->controller_port;
1336 con->hpt_data[0]=cfg->hpt_data[0];
1337 con->hpt_data[1]=cfg->hpt_data[1];
1338 con->hpt_data[2]=cfg->hpt_data[2];
1339 con->controller_type=cfg->controller_type;
1340 con->controller_explicit=cfg->controller_explicit;
1341 con->fixfirmwarebug = cfg->fixfirmwarebug;
1342 con->satpassthrulen = cfg->satpassthrulen;
1343
1344 // Get drive identity structure
1345 if ((retid=ataReadHDIdentity (fd,&drive))){
1346 if (retid<0)
1347 // Unable to read Identity structure
1348 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1349 else
1350 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1351 name, packetdevicetype(retid-1));
1352 CloseDevice(fd, name);
1353 return 2;
1354 }
1355
1356 // Show if device in database, and use preset vendor attribute
1357 // options unless user has requested otherwise.
1358 if (cfg->ignorepresets)
1359 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1360 else {
1361 // do whatever applypresets decides to do. Will allocate memory if
1362 // cfg->attributedefs is needed.
1363 if (applypresets(&drive, &cfg->attributedefs, con)<0)
1364 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1365 else
1366 PrintOut(LOG_INFO, "Device: %s, found in smartd database.\n", name);
1367
1368 // then save the correct state of the flag (applypresets may have changed it)
1369 cfg->fixfirmwarebug = con->fixfirmwarebug;
1370 }
1371
1372 // If requested, show which presets would be used for this drive
1373 if (cfg->showpresets) {
1374 int savedebugmode=debugmode;
1375 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1376 if (!debugmode)
1377 debugmode=2;
1378 showpresets(&drive);
1379 debugmode=savedebugmode;
1380 }
1381
1382 // see if drive supports SMART
1383 supported=ataSmartSupport(&drive);
1384 if (supported!=1) {
1385 if (supported==0)
1386 // drive does NOT support SMART
1387 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1388 else
1389 // can't tell if drive supports SMART
1390 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1391
1392 // should we proceed anyway?
1393 if (cfg->permissive){
1394 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1395 }
1396 else {
1397 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1398 CloseDevice(fd, name);
1399 return 2;
1400 }
1401 }
1402
1403 if (ataEnableSmart(fd)){
1404 // Enable SMART command has failed
1405 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1406 CloseDevice(fd, name);
1407 return 2;
1408 }
1409
1410 // disable device attribute autosave...
1411 if (cfg->autosave==1){
1412 if (ataDisableAutoSave(fd))
1413 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1414 else
1415 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1416 }
1417
1418 // or enable device attribute autosave
1419 if (cfg->autosave==2){
1420 if (ataEnableAutoSave(fd))
1421 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1422 else
1423 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1424 }
1425
1426 // capability check: SMART status
1427 if (cfg->smartcheck && ataSmartStatus2(fd)==-1){
1428 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1429 cfg->smartcheck=0;
1430 }
1431
1432 // capability check: Read smart values and thresholds. Note that
1433 // smart values are ALSO needed even if we ONLY want to know if the
1434 // device is self-test log or error-log capable! After ATA-5, this
1435 // information was ALSO reproduced in the IDENTIFY DEVICE response,
1436 // but sadly not for ATA-5. Sigh.
1437
1438 // do we need to retain SMART data after returning from this routine?
1439 retainsmartdata=cfg->usagefailed || cfg->prefail || cfg->usage || cfg->tempdiff || cfg->tempinfo || cfg->tempcrit;
1440
1441 // do we need to get SMART data?
1442 if (retainsmartdata || cfg->autoofflinetest || cfg->selftest || cfg->errorlog || cfg->pending!=DONT_MONITOR_UNC) {
1443
1444 unsigned char currentpending, offlinepending;
1445
1446 cfg->smartval=(struct ata_smart_values *)Calloc(1,sizeof(struct ata_smart_values));
1447 cfg->smartthres=(struct ata_smart_thresholds_pvt *)Calloc(1,sizeof(struct ata_smart_thresholds_pvt));
1448
1449 if (!cfg->smartval || !cfg->smartthres){
1450 PrintOut(LOG_CRIT,"Not enough memory to obtain SMART data\n");
1451 EXIT(EXIT_NOMEM);
1452 }
1453
1454 if (ataReadSmartValues(fd,cfg->smartval) ||
1455 ataReadSmartThresholds (fd,cfg->smartthres)){
1456 PrintOut(LOG_INFO,"Device: %s, Read SMART Values and/or Thresholds Failed\n",name);
1457 retainsmartdata=cfg->usagefailed=cfg->prefail=cfg->usage=0;
1458 cfg->tempdiff = cfg->tempinfo = cfg->tempcrit = 0;
1459 cfg->pending=DONT_MONITOR_UNC;
1460 }
1461
1462 // see if the necessary Attribute is there to monitor offline or
1463 // current pending sectors or temperature
1464 TranslatePending(cfg->pending, &currentpending, &offlinepending);
1465
1466 if (currentpending && ATAReturnAttributeRawValue(currentpending, cfg->smartval)<0) {
1467 PrintOut(LOG_INFO,"Device: %s, can't monitor Current Pending Sector count - no Attribute %d\n",
1468 name, (int)currentpending);
1469 cfg->pending &= 0xff00;
1470 cfg->pending |= CUR_UNC_DEFAULT;
1471 }
1472
1473 if (offlinepending && ATAReturnAttributeRawValue(offlinepending, cfg->smartval)<0) {
1474 PrintOut(LOG_INFO,"Device: %s, can't monitor Offline Uncorrectable Sector count - no Attribute %d\n",
1475 name, (int)offlinepending);
1476 cfg->pending &= 0x00ff;
1477 cfg->pending |= OFF_UNC_DEFAULT<<8;
1478 }
1479
1480 if ( (cfg->tempdiff || cfg->tempinfo || cfg->tempcrit)
1481 && !ATAReturnTemperatureValue(cfg->smartval, cfg->attributedefs)) {
1482 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1483 cfg->tempdiff = cfg->tempinfo = cfg->tempcrit = 0;
1484 }
1485 }
1486
1487 // enable/disable automatic on-line testing
1488 if (cfg->autoofflinetest){
1489 // is this an enable or disable request?
1490 const char *what=(cfg->autoofflinetest==1)?"disable":"enable";
1491 if (!cfg->smartval)
1492 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1493 else {
1494 // if command appears unsupported, issue a warning...
1495 if (!isSupportAutomaticTimer(cfg->smartval))
1496 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1497 // ... but then try anyway
1498 if ((cfg->autoofflinetest==1)?ataDisableAutoOffline(fd):ataEnableAutoOffline(fd))
1499 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1500 else
1501 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1502 }
1503 }
1504
1505 // capability check: self-test-log
1506 if (cfg->selftest){
1507 int retval;
1508
1509 // start with service disabled, and re-enable it if all works OK
1510 cfg->selftest=0;
1511 cfg->selflogcount=0;
1512 cfg->selfloghour=0;
1513
1514 if (!cfg->smartval)
1515 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log (SMART READ DATA failed); disabling -l selftest\n", name);
1516 else if (!cfg->permissive && !isSmartTestLogCapable(cfg->smartval, &drive))
1517 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Self-Test log; disabling -l selftest (override with -T permissive Directive)\n", name);
1518 else if ((retval=SelfTestErrorCount(fd, name))<0)
1519 PrintOut(LOG_INFO, "Device: %s, no SMART Self-Test log; remove -l selftest Directive from smartd.conf\n", name);
1520 else {
1521 cfg->selftest=1;
1522 cfg->selflogcount=SELFTEST_ERRORCOUNT(retval);
1523 cfg->selfloghour =SELFTEST_ERRORHOURS(retval);
1524 }
1525 }
1526
1527 // capability check: ATA error log
1528 if (cfg->errorlog){
1529 int val;
1530
1531 // start with service disabled, and re-enable it if all works OK
1532 cfg->errorlog=0;
1533 cfg->ataerrorcount=0;
1534
1535 if (!cfg->smartval)
1536 PrintOut(LOG_INFO, "Device: %s, no SMART Error log (SMART READ DATA failed); disabling -l error\n", name);
1537 else if (!cfg->permissive && !isSmartErrorLogCapable(cfg->smartval, &drive))
1538 PrintOut(LOG_INFO, "Device: %s, appears to lack SMART Error log; disabling -l error (override with -T permissive Directive)\n", name);
1539 else if ((val=ATAErrorCount(fd, name))<0)
1540 PrintOut(LOG_INFO, "Device: %s, no SMART Error log; remove -l error Directive from smartd.conf\n", name);
1541 else {
1542 cfg->errorlog=1;
1543 cfg->ataerrorcount=val;
1544 }
1545 }
1546
1547 // If we don't need to save SMART data, get rid of it now
1548 if (!retainsmartdata) {
1549 if (cfg->smartval) {
1550 cfg->smartval=CheckFree(cfg->smartval, __LINE__,filenameandversion);
1551 bytes-=sizeof(struct ata_smart_values);
1552 }
1553 if (cfg->smartthres) {
1554 cfg->smartthres=CheckFree(cfg->smartthres, __LINE__,filenameandversion);
1555 bytes-=sizeof(struct ata_smart_thresholds_pvt);
1556 }
1557 }
1558
1559 // capabilities check -- does it support powermode?
1560 if (cfg->powermode) {
1561 int powermode=ataCheckPowerMode(fd);
1562
1563 if (-1 == powermode) {
1564 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
1565 cfg->powermode=0;
1566 }
1567 else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
1568 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
1569 name, powermode);
1570 cfg->powermode=0;
1571 }
1572 }
1573
1574 // If no tests available or selected, return
1575 if (!(cfg->errorlog || cfg->selftest || cfg->smartcheck ||
1576 cfg->usagefailed || cfg->prefail || cfg->usage ||
1577 cfg->tempdiff || cfg->tempinfo || cfg->tempcrit )) {
1578 CloseDevice(fd, name);
1579 return 3;
1580 }
1581
1582 // Do we still have entries available?
1583 while (numdevata>=atadevlist_max)
1584 atadevlist=AllocateMoreSpace(atadevlist, &atadevlist_max, "ATA device");
1585
1586 // register device
1587 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
1588
1589 // record number of device, type of device, increment device count
1590 if (cfg->controller_type == CONTROLLER_UNKNOWN)
1591 cfg->controller_type=CONTROLLER_ATA;
1592
1593 // close file descriptor
1594 CloseDevice(fd, name);
1595 return 0;
1596 }
1597
1598 // Returns 1 if device recognised as one we do not want to treat as a general
1599 // SCSI device. Also returns 1 if INQUIRY fails (all "SCSI" devices should
1600 // respond to INQUIRY). Otherwise returns 0 (i.e. normal SCSI device).
1601 static int SCSIFilterKnown(int fd, char * device)
1602 {
1603 char req_buff[256];
1604 char di_buff[256];
1605 int req_len, avail_len, len;
1606
1607 memset(req_buff, 0, 96);
1608 req_len = 36;
1609 if (scsiStdInquiry(fd, (unsigned char *)req_buff, req_len)) {
1610 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
1611 /* watch this spot ... other devices could lock up here */
1612 req_len = 64;
1613 if (scsiStdInquiry(fd, (unsigned char *)req_buff, req_len)) {
1614 PrintOut(LOG_INFO, "Device: %s, failed on INQUIRY; skip device\n", device);
1615 // device doesn't like INQUIRY commands
1616 return 1;
1617 }
1618 }
1619 avail_len = req_buff[4] + 5;
1620 len = (avail_len < req_len) ? avail_len : req_len;
1621 if (len >= 36) {
1622 if (0 == strncmp(req_buff + 8, "3ware", 5) || 0 == strncmp(req_buff + 8, "AMCC", 4) ) {
1623 PrintOut(LOG_INFO, "Device %s, please try adding '-d 3ware,N'\n", device);
1624 PrintOut(LOG_INFO, "Device %s, you may need to replace %s with /dev/twaN or /dev/tweN\n", device, device);
1625 return 1;
1626 } else if ((len >= 42) && (0 == strncmp(req_buff + 36, "MVSATA", 6))) {
1627 PrintOut(LOG_INFO, "Device %s, please try '-d marvell'\n", device);
1628 return 1;
1629 } else if ((avail_len >= 96) && (0 == strncmp(req_buff + 8, "ATA", 3))) {
1630 /* <<<< This is Linux specific code to detect SATA disks using a
1631 SCSI-ATA command translation layer. This may be generalized
1632 later when the t10.org SAT project matures. >>>> */
1633 req_len = 252;
1634 memset(di_buff, 0, req_len);
1635 if (scsiInquiryVpd(fd, 0x83, (unsigned char *)di_buff, req_len)) {
1636 return 0; // guess it is normal device
1637 }
1638 avail_len = ((di_buff[2] << 8) + di_buff[3]) + 4;
1639 len = (avail_len < req_len) ? avail_len : req_len;
1640 if (isLinuxLibAta((unsigned char *)di_buff, len)) {
1641 PrintOut(LOG_INFO, "Device %s: SATA disks accessed via libata are "
1642 "supported by Linux\nkernel versions 2.6.15-rc1 and above. "
1643 "Try adding '-d ata' or\n'-d sat' to the smartd.conf "
1644 "config file line.\n", device);
1645 return 1;
1646 }
1647 }
1648 }
1649 return 0;
1650 }
1651
1652 // on success, return 0. On failure, return >0. Never return <0,
1653 // please.
1654 static int SCSIDeviceScan(cfgfile *cfg, int scanning) {
1655 int k, fd, err;
1656 char *device = cfg->name;
1657 struct scsi_iec_mode_page iec;
1658 UINT8 tBuf[64];
1659 char *mode=NULL;
1660
1661 // should we try to register this as a SCSI device?
1662 switch (cfg->controller_type) {
1663 case CONTROLLER_SCSI:
1664 case CONTROLLER_UNKNOWN:
1665 mode="SCSI";
1666 break;
1667 case CONTROLLER_CCISS:
1668 mode="CCISS";
1669 break;
1670 default:
1671 return 1;
1672 }
1673 // pass user settings on to low-level SCSI commands
1674 con->controller_port=cfg->controller_port;
1675 con->controller_type=cfg->controller_type;
1676
1677 // open the device
1678 if ((fd = OpenDevice(device, mode, scanning)) < 0)
1679 return 1;
1680 PrintOut(LOG_INFO,"Device: %s, opened\n", device);
1681
1682 // early skip if device known and needs to be handled by some other
1683 // device type (e.g. '-d 3ware,<n>')
1684 if (SCSIFilterKnown(fd, device)) {
1685 CloseDevice(fd, device);
1686 return 2;
1687 }
1688
1689 // check that device is ready for commands. IE stores its stuff on
1690 // the media.
1691 if ((err = scsiTestUnitReady(fd))) {
1692 if (SIMPLE_ERR_NOT_READY == err)
1693 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
1694 else if (SIMPLE_ERR_NO_MEDIUM == err)
1695 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
1696 else if (SIMPLE_ERR_BECOMING_READY == err)
1697 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
1698 else
1699 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
1700 CloseDevice(fd, device);
1701 return 2;
1702 }
1703
1704 // Badly-conforming USB storage devices may fail this check.
1705 // The response to the following IE mode page fetch (current and
1706 // changeable values) is carefully examined. It has been found
1707 // that various USB devices that malform the response will lock up
1708 // if asked for a log page (e.g. temperature) so it is best to
1709 // bail out now.
1710 if (!(err = scsiFetchIECmpage(fd, &iec, cfg->modese_len)))
1711 cfg->modese_len = iec.modese_len;
1712 else if (SIMPLE_ERR_BAD_FIELD == err)
1713 ; /* continue since it is reasonable not to support IE mpage */
1714 else { /* any other error (including malformed response) unreasonable */
1715 PrintOut(LOG_INFO,
1716 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
1717 device, err);
1718 CloseDevice(fd, device);
1719 return 3;
1720 }
1721
1722 // N.B. The following is passive (i.e. it doesn't attempt to turn on
1723 // smart if it is off). This may change to be the same as the ATA side.
1724 if (!scsi_IsExceptionControlEnabled(&iec)) {
1725 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
1726 "Try 'smartctl -s on %s' to turn on SMART features\n",
1727 device, device);
1728 CloseDevice(fd, device);
1729 return 3;
1730 }
1731
1732 // Device exists, and does SMART. Add to list (allocating more space if needed)
1733 while (numdevscsi >= scsidevlist_max)
1734 scsidevlist=AllocateMoreSpace(scsidevlist, &scsidevlist_max, "SCSI device");
1735
1736 // Flag that certain log pages are supported (information may be
1737 // available from other sources).
1738 if (0 == scsiLogSense(fd, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
1739 for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
1740 switch (tBuf[k]) {
1741 case TEMPERATURE_LPAGE:
1742 cfg->TempPageSupported = 1;
1743 break;
1744 case IE_LPAGE:
1745 cfg->SmartPageSupported = 1;
1746 break;
1747 default:
1748 break;
1749 }
1750 }
1751 }
1752
1753 // record type of device
1754 if (cfg->controller_type == CONTROLLER_UNKNOWN)
1755 cfg->controller_type = CONTROLLER_SCSI;
1756
1757 // get rid of allocated memory only needed for ATA devices. These
1758 // might have been allocated if the user specified Ignore options or
1759 // other ATA-only Attribute-specific options on the DEVICESCAN line.
1760 cfg->monitorattflags = FreeNonZero(cfg->monitorattflags, NMONITOR*32,__LINE__,filenameandversion);
1761 cfg->attributedefs = FreeNonZero(cfg->attributedefs, MAX_ATTRIBUTE_NUM,__LINE__,filenameandversion);
1762 cfg->smartval = FreeNonZero(cfg->smartval, sizeof(struct ata_smart_values),__LINE__,filenameandversion);
1763 cfg->smartthres = FreeNonZero(cfg->smartthres, sizeof(struct ata_smart_thresholds_pvt),__LINE__,filenameandversion);
1764
1765 // Check if scsiCheckIE() is going to work
1766 {
1767 UINT8 asc = 0;
1768 UINT8 ascq = 0;
1769 UINT8 currenttemp = 0;
1770 UINT8 triptemp = 0;
1771
1772 if (scsiCheckIE(fd, cfg->SmartPageSupported, cfg->TempPageSupported,
1773 &asc, &ascq, &currenttemp, &triptemp)) {
1774 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
1775 cfg->SuppressReport = 1;
1776 if (cfg->tempdiff || cfg->tempinfo || cfg->tempcrit) {
1777 PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
1778 cfg->tempdiff = cfg->tempinfo = cfg->tempcrit = 0;
1779 }
1780 }
1781 }
1782
1783 // capability check: self-test-log
1784 if (cfg->selftest){
1785 int retval=scsiCountFailedSelfTests(fd, 0);
1786 if (retval<0) {
1787 // no self-test log, turn off monitoring
1788 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
1789 cfg->selftest=0;
1790 cfg->selflogcount=0;
1791 cfg->selfloghour=0;
1792 }
1793 else {
1794 // register starting values to watch for changes
1795 cfg->selflogcount=SELFTEST_ERRORCOUNT(retval);
1796 cfg->selfloghour =SELFTEST_ERRORHOURS(retval);
1797 }
1798 }
1799
1800 // disable autosave (set GLTSD bit)
1801 if (cfg->autosave==1){
1802 if (scsiSetControlGLTSD(fd, 1, cfg->modese_len))
1803 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
1804 else
1805 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
1806 }
1807
1808 // or enable autosave (clear GLTSD bit)
1809 if (cfg->autosave==2){
1810 if (scsiSetControlGLTSD(fd, 0, cfg->modese_len))
1811 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
1812 else
1813 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
1814 }
1815
1816 // tell user we are registering device
1817 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
1818
1819 // close file descriptor
1820 CloseDevice(fd, device);
1821 return 0;
1822 }
1823
1824 // We compare old and new values of the n'th attribute. Note that n
1825 // is NOT the attribute ID number.. If (Normalized & Raw) equal,
1826 // then return 0, else nonzero.
1827 int ATACompareValues(changedattribute_t *delta,
1828 struct ata_smart_values *newv,
1829 struct ata_smart_values *oldv,
1830 struct ata_smart_thresholds_pvt *thresholds,
1831 int n, char *name){
1832 struct ata_smart_attribute *now,*was;
1833 struct ata_smart_threshold_entry *thre;
1834 unsigned char oldval,newval;
1835 int sameraw;
1836
1837 // check that attribute number in range, and no null pointers
1838 if (n<0 || n>=NUMBER_ATA_SMART_ATTRIBUTES || !newv || !oldv || !thresholds)
1839 return 0;
1840
1841 // pointers to disk's values and vendor's thresholds
1842 now=newv->vendor_attributes+n;
1843 was=oldv->vendor_attributes+n;
1844 thre=thresholds->thres_entries+n;
1845
1846 // consider only valid attributes
1847 if (!now->id || !was->id || !thre->id)
1848 return 0;
1849
1850
1851 // issue warning if they don't have the same ID in all structures:
1852 if ( (now->id != was->id) || (now->id != thre->id) ){
1853 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d = %d\n",
1854 name, (int)now->id, (int)was->id, (int)thre->id);
1855 return 0;
1856 }
1857
1858 // new and old values of Normalized Attributes
1859 newval=now->current;
1860 oldval=was->current;
1861
1862 // See if the RAW values are unchanged (ie, the same)
1863 if (memcmp(now->raw, was->raw, 6))
1864 sameraw=0;
1865 else
1866 sameraw=1;
1867
1868 // if any values out of the allowed range, or if the values haven't
1869 // changed, return 0
1870 if (!newval || !oldval || newval>0xfe || oldval>0xfe || (oldval==newval && sameraw))
1871 return 0;
1872
1873 // values have changed. Construct output and return
1874 delta->newval=newval;
1875 delta->oldval=oldval;
1876 delta->id=now->id;
1877 delta->prefail=ATTRIBUTE_FLAGS_PREFAILURE(now->flags);
1878 delta->sameraw=sameraw;
1879
1880 return 1;
1881 }
1882
1883 // This looks to see if the corresponding bit of the 32 bytes is set.
1884 // This wastes a few bytes of storage but eliminates all searching and
1885 // sorting functions! Entry is ZERO <==> the attribute ON. Calling
1886 // with set=0 tells you if the attribute is being tracked or not.
1887 // Calling with set=1 turns the attribute OFF.
1888 int IsAttributeOff(unsigned char attr, unsigned char **datap, int set, int which, int whatline){
1889 unsigned char *data;
1890 int loc=attr>>3;
1891 int bit=attr & 0x07;
1892 unsigned char mask=0x01<<bit;
1893
1894 if (which>=NMONITOR || which < 0){
1895 PrintOut(LOG_CRIT, "Internal error in IsAttributeOff() at line %d of file %s (which=%d)\n%s",
1896 whatline, filenameandversion, which, reportbug);
1897 EXIT(EXIT_BADCODE);
1898 }
1899
1900 if (*datap == NULL){
1901 // NULL data implies Attributes are ON...
1902 if (!set)
1903 return 0;
1904
1905 // we are writing
1906 if (!(*datap=(unsigned char *)Calloc(NMONITOR*32, 1))){
1907 PrintOut(LOG_CRIT,"No memory to create monattflags\n");
1908 EXIT(EXIT_NOMEM);
1909 }
1910 }
1911
1912 // pointer to the 256 bits that we need
1913 data=*datap+which*32;
1914
1915 // attribute zero is always OFF
1916 if (!attr)
1917 return 1;
1918
1919 if (!set)
1920 return (data[loc] & mask);
1921
1922 data[loc]|=mask;
1923
1924 // return value when setting has no sense
1925 return 0;
1926 }
1927
1928 // If the self-test log has got more self-test errors (or more recent
1929 // self-test errors) recorded, then notify user.
1930 void CheckSelfTestLogs(cfgfile *cfg, int newi){
1931 char *name=cfg->name;
1932
1933 if (newi<0)
1934 // command failed
1935 MailWarning(cfg, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
1936 else {
1937 // old and new error counts
1938 int oldc=cfg->selflogcount;
1939 int newc=SELFTEST_ERRORCOUNT(newi);
1940
1941 // old and new error timestamps in hours
1942 int oldh=cfg->selfloghour;
1943 int newh=SELFTEST_ERRORHOURS(newi);
1944
1945 if (oldc<newc) {
1946 // increase in error count
1947 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
1948 name, oldc, newc);
1949 MailWarning(cfg, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
1950 name, oldc, newc);
1951 } else if (oldh!=newh) {
1952 // more recent error
1953 // a 'more recent' error might actually be a smaller hour number,
1954 // if the hour number has wrapped.
1955 // There's still a bug here. You might just happen to run a new test
1956 // exactly 32768 hours after the previous failure, and have run exactly
1957 // 20 tests between the two, in which case smartd will miss the
1958 // new failure.
1959 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1960 name, newh);
1961 MailWarning(cfg, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
1962 name, newh);
1963 }
1964
1965 // Needed since self-test error count may DECREASE. Hour might
1966 // also have changed.
1967 cfg->selflogcount= newc;
1968 cfg->selfloghour = newh;
1969 }
1970 return;
1971 }
1972
1973 // returns 1 if time to do test of type testtype, 0 if not time to do
1974 // test, < 0 if error
1975 int DoTestNow(cfgfile *cfg, char testtype, time_t testtime) {
1976 // start by finding out the time:
1977 struct tm *timenow;
1978 time_t epochnow;
1979 char matchpattern[16];
1980 regmatch_t substring;
1981 int weekday, length;
1982 unsigned short hours;
1983 testinfo *dat=cfg->testdata;
1984
1985 // check that self-testing has been requested
1986 if (!dat)
1987 return 0;
1988
1989 // since we are about to call localtime(), be sure glibc is informed
1990 // of any timezone changes we make.
1991 if (!testtime)
1992 FixGlibcTimeZoneBug();
1993
1994 // construct pattern containing the month, day of month, day of
1995 // week, and hour
1996 epochnow = (!testtime ? time(NULL) : testtime);
1997 timenow=localtime(&epochnow);
1998
1999 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7
2000 // (Sunday).
2001 weekday=timenow->tm_wday?timenow->tm_wday:7;
2002 sprintf(matchpattern, "%c/%02d/%02d/%1d/%02d", testtype, timenow->tm_mon+1,
2003 timenow->tm_mday, weekday, timenow->tm_hour);
2004
2005 // if no match, we are done
2006 if (regexec(&(dat->cregex), matchpattern, 1, &substring, 0))
2007 return 0;
2008
2009 // must match the ENTIRE type/date/time string
2010 length=strlen(matchpattern);
2011 if (substring.rm_so!=0 || substring.rm_eo!=length)
2012 return 0;
2013
2014 // never do a second test in the same hour as another test (the % 7 ensures
2015 // that the RHS will never be greater than 65535 and so will always fit into
2016 // an unsigned short)
2017 hours=1+timenow->tm_hour+24*(timenow->tm_yday+366*(timenow->tm_year % 7));
2018 if (hours==dat->hour) {
2019 if (!testtime && testtype!=dat->testtype)
2020 PrintOut(LOG_INFO, "Device: %s, did test of type %c in current hour, skipping test of type %c\n",
2021 cfg->name, dat->testtype, testtype);
2022 return 0;
2023 }
2024
2025 // save time and type of the current test; we are ready to do a test
2026 dat->hour=hours;
2027 dat->testtype=testtype;
2028 return 1;
2029 }
2030
2031 // Print a list of future tests.
2032 void PrintTestSchedule(cfgfile **atadevices, cfgfile **scsidevices){
2033 int i, t;
2034 cfgfile * cfg;
2035 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2036 time_t now; long seconds;
2037 int numdev = numdevata+numdevscsi;
2038 typedef int cnt_t[4];
2039 cnt_t * testcnts; // testcnts[numdev][4]
2040 if (numdev <= 0)
2041 return;
2042 testcnts = (cnt_t *)calloc(numdev, sizeof(testcnts[0]));
2043 if (!testcnts)
2044 return;
2045
2046 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2047
2048 // FixGlibcTimeZoneBug(); // done in PrintOut()
2049 now=time(NULL);
2050 dateandtimezoneepoch(datenow, now);
2051 for (seconds=0; seconds<3600L*24*90; seconds+=checktime) {
2052 // Check for each device whether a test will be run
2053 time_t testtime = now + seconds;
2054 for (i=0; i<numdev; i++) {
2055 cfg = (i<numdevata? atadevices[i] : scsidevices[i-numdevata]);
2056 for (t=0; t<(i<numdevata?4:2); t++) {
2057 char testtype = "LSCO"[t];
2058 if (DoTestNow(cfg, testtype, testtime)) {
2059 // Report at most 5 tests of each type
2060 if (++testcnts[i][t] <= 5) {
2061 dateandtimezoneepoch(date, testtime);
2062 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg->name,
2063 testcnts[i][t], testtype, date);
2064 }
2065 }
2066 }
2067 }
2068 }
2069
2070 // Report totals
2071 dateandtimezoneepoch(date, now+seconds);
2072 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2073 for (i=0; i<numdev; i++) {
2074 cfg = (i<numdevata? atadevices[i] : scsidevices[i-numdevata]);
2075 for (t=0; t<(i<numdevata?4:2); t++) {
2076 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg->name, testcnts[i][t],
2077 (testcnts[i][t]==1?"":"s"), "LSCO"[t]);
2078 }
2079 }
2080
2081 free(testcnts);
2082 }
2083
2084 // Return zero on success, nonzero on failure. Perform offline (background)
2085 // short or long (extended) self test on given scsi device.
2086 int DoSCSISelfTest(int fd, cfgfile *cfg, char testtype) {
2087 int retval = 0;
2088 char *testname = NULL;
2089 char *name = cfg->name;
2090 int inProgress;
2091
2092 if (scsiSelfTestInProgress(fd, &inProgress)) {
2093 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2094 cfg->testdata->not_cap_short=cfg->testdata->not_cap_long=1;
2095 return 1;
2096 }
2097
2098 if (1 == inProgress) {
2099 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2100 "progress.\n", name);
2101 return 1;
2102 }
2103
2104 switch (testtype) {
2105 case 'S':
2106 testname = "Short Self";
2107 retval = scsiSmartShortSelfTest(fd);
2108 break;
2109 case 'L':
2110 testname = "Long Self";
2111 retval = scsiSmartExtendSelfTest(fd);
2112 break;
2113 }
2114 // If we can't do the test, exit
2115 if (NULL == testname) {
2116 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2117 testtype);
2118 return 1;
2119 }
2120 if (retval) {
2121 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2122 (SIMPLE_ERR_BAD_FIELD == retval)) {
2123 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2124 testname);
2125 if ('L'==testtype)
2126 cfg->testdata->not_cap_long=1;
2127 else
2128 cfg->testdata->not_cap_short=1;
2129
2130 return 1;
2131 }
2132 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2133 testname, retval);
2134 return 1;
2135 }
2136
2137 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2138
2139 return 0;
2140 }
2141
2142 // Do an offline immediate or self-test. Return zero on success,
2143 // nonzero on failure.
2144 int DoATASelfTest(int fd, cfgfile *cfg, char testtype) {
2145
2146 struct ata_smart_values data;
2147 char *testname=NULL;
2148 int retval, dotest=-1;
2149 char *name=cfg->name;
2150
2151 // Read current smart data and check status/capability
2152 if (ataReadSmartValues(fd, &data) || !(data.offline_data_collection_capability)) {
2153 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2154 return 1;
2155 }
2156
2157 // Check for capability to do the test
2158 switch (testtype) {
2159 case 'O':
2160 testname="Offline Immediate ";
2161 if (isSupportExecuteOfflineImmediate(&data))
2162 dotest=OFFLINE_FULL_SCAN;
2163 else
2164 cfg->testdata->not_cap_offline=1;
2165 break;
2166 case 'C':
2167 testname="Conveyance Self-";
2168 if (isSupportConveyanceSelfTest(&data))
2169 dotest=CONVEYANCE_SELF_TEST;
2170 else
2171 cfg->testdata->not_cap_conveyance=1;
2172 break;
2173 case 'S':
2174 testname="Short Self-";
2175 if (isSupportSelfTest(&data))
2176 dotest=SHORT_SELF_TEST;
2177 else
2178 cfg->testdata->not_cap_short=1;
2179 break;
2180 case 'L':
2181 testname="Long Self-";
2182 if (isSupportSelfTest(&data))
2183 dotest=EXTEND_SELF_TEST;
2184 else
2185 cfg->testdata->not_cap_long=1;
2186 break;
2187 }
2188
2189 // If we can't do the test, exit
2190 if (dotest<0) {
2191 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2192 return 1;
2193 }
2194
2195 // If currently running a self-test, do not interrupt it to start another.
2196 if (15==(data.self_test_exec_status >> 4)) {
2197 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2198 name, testname, (int)(data.self_test_exec_status & 0x0f));
2199 return 1;
2200 }
2201
2202 // else execute the test, and return status
2203 if ((retval=smartcommandhandler(fd, IMMEDIATE_OFFLINE, dotest, NULL)))
2204 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2205 else
2206 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2207
2208 return retval;
2209 }
2210
2211 // Check Temperature limits
2212 static void CheckTemperature(cfgfile * cfg, unsigned char currtemp, unsigned char triptemp)
2213 {
2214 const char *minchg = "", *maxchg = "";
2215 if (!(0 < currtemp && currtemp < 255)) {
2216 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg->name);
2217 return;
2218 }
2219
2220 if (!cfg->temperature) {
2221 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius\n",
2222 cfg->name, (int)currtemp);
2223 if (triptemp)
2224 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2225 cfg->temperature = cfg->tempmin = cfg->tempmax = currtemp;
2226 }
2227 else {
2228 // Update [min,max]
2229 if (currtemp < cfg->tempmin) {
2230 cfg->tempmin = currtemp; minchg = "!";
2231 cfg->tempmininc = 0;
2232 }
2233 else if (cfg->tempmininc) {
2234 // increase min Temperature during first 30 minutes
2235 cfg->tempmin = currtemp;
2236 cfg->tempmininc--;
2237 }
2238 if (currtemp > cfg->tempmax) {
2239 cfg->tempmax = currtemp; maxchg = "!";
2240 }
2241
2242 // Track changes
2243 if (cfg->tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)cfg->temperature) >= cfg->tempdiff)) {
2244 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %u%s/%u%s)\n",
2245 cfg->name, (int)currtemp-(int)cfg->temperature, currtemp, cfg->tempmin, minchg, cfg->tempmax, maxchg);
2246 cfg->temperature = currtemp;
2247 }
2248 }
2249
2250 // Check limits
2251 if (cfg->tempcrit && currtemp >= cfg->tempcrit) {
2252 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2253 cfg->name, currtemp, cfg->tempcrit, cfg->tempmin, minchg, cfg->tempmax, maxchg);
2254 MailWarning(cfg, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2255 cfg->name, currtemp, cfg->tempcrit, cfg->tempmin, minchg, cfg->tempmax, maxchg);
2256 }
2257 else if (cfg->tempinfo && currtemp >= cfg->tempinfo) {
2258 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %u%s/%u%s)\n",
2259 cfg->name, currtemp, cfg->tempinfo, cfg->tempmin, minchg, cfg->tempmax, maxchg);
2260 }
2261 }
2262
2263 int ATACheckDevice(cfgfile *cfg){
2264 int fd,i;
2265 char *name=cfg->name;
2266 char *mode="ATA";
2267 char testtype=0;
2268
2269 // fix firmware bug if requested
2270 con->fixfirmwarebug=cfg->fixfirmwarebug;
2271 con->controller_port=cfg->controller_port;
2272 con->controller_type=cfg->controller_type;
2273 con->controller_explicit=cfg->controller_explicit;
2274
2275 // If user has asked, test the email warning system
2276 if (cfg->mailwarn && cfg->mailwarn->emailtest)
2277 MailWarning(cfg, 0, "TEST EMAIL from smartd for device: %s", name);
2278
2279 if (cfg->controller_type == CONTROLLER_3WARE_9000_CHAR)
2280 mode="ATA_3WARE_9000";
2281
2282 if (cfg->controller_type == CONTROLLER_3WARE_678K_CHAR)
2283 mode="ATA_3WARE_678K";
2284
2285 // if we can't open device, fail gracefully rather than hard --
2286 // perhaps the next time around we'll be able to open it. ATAPI
2287 // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2288 // given (see linux cdrom driver).
2289 if ((fd=OpenDevice(name, mode, 0))<0){
2290 MailWarning(cfg, 9, "Device: %s, unable to open device", name);
2291 return 1;
2292 }
2293
2294 // if the user has asked, and device is capable (or we're not yet
2295 // sure) check whether a self test should be done now.
2296 // This check is done before powermode check to avoid missing self
2297 // tests on idle or sleeping disks.
2298 if (cfg->testdata) {
2299 // long test
2300 if (!cfg->testdata->not_cap_long && DoTestNow(cfg, 'L', 0)>0)
2301 testtype = 'L';
2302 // short test
2303 else if (!cfg->testdata->not_cap_short && DoTestNow(cfg, 'S', 0)>0)
2304 testtype = 'S';
2305 // conveyance test
2306 else if (!cfg->testdata->not_cap_conveyance && DoTestNow(cfg, 'C', 0)>0)
2307 testtype = 'C';
2308 // offline immediate
2309 else if (!cfg->testdata->not_cap_offline && DoTestNow(cfg, 'O', 0)>0)
2310 testtype = 'O';
2311 }
2312
2313 // user may have requested (with the -n Directive) to leave the disk
2314 // alone if it is in idle or sleeping mode. In this case check the
2315 // power mode and exit without check if needed
2316 if (cfg->powermode){
2317 int dontcheck=0, powermode=ataCheckPowerMode(fd);
2318 char *mode=NULL;
2319 if (0 <= powermode && powermode < 0xff) {
2320 // wait for possible spin up and check again
2321 int powermode2;
2322 sleep(5);
2323 powermode2 = ataCheckPowerMode(fd);
2324 if (powermode2 > powermode)
2325 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2326 powermode = powermode2;
2327 }
2328
2329 switch (powermode){
2330 case -1:
2331 // SLEEP
2332 mode="SLEEP";
2333 if (cfg->powermode>=1)
2334 dontcheck=1;
2335 break;
2336 case 0:
2337 // STANDBY
2338 mode="STANDBY";
2339 if (cfg->powermode>=2)
2340 dontcheck=1;
2341 break;
2342 case 0x80:
2343 // IDLE
2344 mode="IDLE";
2345 if (cfg->powermode>=3)
2346 dontcheck=1;
2347 break;
2348 case 0xff:
2349 // ACTIVE/IDLE
2350 mode="ACTIVE or IDLE";
2351 break;
2352 default:
2353 // UNKNOWN
2354 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2355 name, powermode);
2356 cfg->powermode=0;
2357 break;
2358 }
2359
2360 // if we are going to skip a check, return now
2361 if (dontcheck){
2362 // but ignore powermode on scheduled selftest
2363 if (!testtype) {
2364 CloseDevice(fd, name);
2365 if (!cfg->powerskipcnt && !cfg->powerquiet) // report first only and avoid waking up system disk
2366 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2367 cfg->powerskipcnt++;
2368 return 0;
2369 }
2370 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to scheduled self test (%d check%s skipped)\n",
2371 name, mode, cfg->powerskipcnt, (cfg->powerskipcnt==1?"":"s"));
2372 cfg->powerskipcnt = 0;
2373 }
2374 else if (cfg->powerskipcnt) {
2375 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
2376 name, mode, cfg->powerskipcnt, (cfg->powerskipcnt==1?"":"s"));
2377 cfg->powerskipcnt = 0;
2378 }
2379 }
2380
2381 // check smart status
2382 if (cfg->smartcheck){
2383 int status=ataSmartStatus2(fd);
2384 if (status==-1){
2385 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
2386 MailWarning(cfg, 5, "Device: %s, not capable of SMART self-check", name);
2387 }
2388 else if (status==1){
2389 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
2390 MailWarning(cfg, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
2391 }
2392 }
2393
2394 // Check everything that depends upon SMART Data (eg, Attribute values)
2395 if ( cfg->usagefailed || cfg->prefail || cfg->usage || cfg->pending!=DONT_MONITOR_UNC
2396 || cfg->tempdiff || cfg->tempinfo || cfg->tempcrit ){
2397 struct ata_smart_values curval;
2398 struct ata_smart_thresholds_pvt *thresh=cfg->smartthres;
2399
2400 // Read current attribute values. *drive contains old values and thresholds
2401 if (ataReadSmartValues(fd,&curval)){
2402 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
2403 MailWarning(cfg, 6, "Device: %s, failed to read SMART Attribute Data", name);
2404 }
2405 else {
2406 // look for current or offline pending sectors
2407 if (cfg->pending != DONT_MONITOR_UNC) {
2408 int64_t rawval;
2409 unsigned char currentpending, offlinepending;
2410
2411 TranslatePending(cfg->pending, &currentpending, &offlinepending);
2412
2413 if (currentpending && (rawval=ATAReturnAttributeRawValue(currentpending, &curval))>0) {
2414 // Unreadable pending sectors!!
2415 PrintOut(LOG_CRIT, "Device: %s, %"PRId64" Currently unreadable (pending) sectors\n", name, rawval);
2416 MailWarning(cfg, 10, "Device: %s, %"PRId64" Currently unreadable (pending) sectors", name, rawval);
2417 }
2418
2419 if (offlinepending && (rawval=ATAReturnAttributeRawValue(offlinepending, &curval))>0) {
2420 // Unreadable offline sectors!!
2421 PrintOut(LOG_CRIT, "Device: %s, %"PRId64" Offline uncorrectable sectors\n", name, rawval);
2422 MailWarning(cfg, 11, "Device: %s, %"PRId64" Offline uncorrectable sectors", name, rawval);
2423 }
2424 }
2425
2426 // check temperature limits
2427 if (cfg->tempdiff || cfg->tempinfo || cfg->tempcrit)
2428 CheckTemperature(cfg, ATAReturnTemperatureValue(&curval, cfg->attributedefs), 0);
2429
2430 if (cfg->usagefailed || cfg->prefail || cfg->usage) {
2431
2432 // look for failed usage attributes, or track usage or prefail attributes
2433 for (i=0; i<NUMBER_ATA_SMART_ATTRIBUTES; i++){
2434 int att;
2435 changedattribute_t delta;
2436
2437 // This block looks for usage attributes that have failed.
2438 // Prefail attributes that have failed are returned with a
2439 // positive sign. No failure returns 0. Usage attributes<0.
2440 if (cfg->usagefailed && ((att=ataCheckAttribute(&curval, thresh, i))<0)){
2441
2442 // are we ignoring failures of this attribute?
2443 att *= -1;
2444 if (!IsAttributeOff(att, &cfg->monitorattflags, 0, MONITOR_FAILUSE, __LINE__)){
2445 char attname[64], *loc=attname;
2446
2447 // get attribute name & skip white space
2448 ataPrintSmartAttribName(loc, att, cfg->attributedefs);
2449 while (*loc && *loc==' ') loc++;
2450
2451 // warning message
2452 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %s.\n", name, loc);
2453 MailWarning(cfg, 2, "Device: %s, Failed SMART usage Attribute: %s.", name, loc);
2454 }
2455 }
2456
2457 // This block tracks usage or prefailure attributes to see if
2458 // they are changing. It also looks for changes in RAW values
2459 // if this has been requested by user.
2460 if ((cfg->usage || cfg->prefail) && ATACompareValues(&delta, &curval, cfg->smartval, thresh, i, name)){
2461 unsigned char id=delta.id;
2462
2463 // if the only change is the raw value, and we're not
2464 // tracking raw value, then continue loop over attributes
2465 if (!delta.sameraw && delta.newval==delta.oldval && !IsAttributeOff(id, &cfg->monitorattflags, 0, MONITOR_RAW, __LINE__))
2466 continue;
2467
2468 // are we tracking this attribute?
2469 if (!IsAttributeOff(id, &cfg->monitorattflags, 0, MONITOR_IGNORE, __LINE__)){
2470 char newrawstring[64], oldrawstring[64], attname[64], *loc=attname;
2471
2472 // get attribute name, skip spaces
2473 ataPrintSmartAttribName(loc, id, cfg->attributedefs);
2474 while (*loc && *loc==' ') loc++;
2475
2476 // has the user asked for us to print raw values?
2477 if (IsAttributeOff(id, &cfg->monitorattflags, 0, MONITOR_RAWPRINT, __LINE__)) {
2478 // get raw values (as a string) and add to printout
2479 char rawstring[64];
2480 ataPrintSmartAttribRawValue(rawstring, curval.vendor_attributes+i, cfg->attributedefs);
2481 sprintf(newrawstring, " [Raw %s]", rawstring);
2482 ataPrintSmartAttribRawValue(rawstring, cfg->smartval->vendor_attributes+i, cfg->attributedefs);
2483 sprintf(oldrawstring, " [Raw %s]", rawstring);
2484 }
2485 else
2486 newrawstring[0]=oldrawstring[0]='\0';
2487
2488 // prefailure attribute
2489 if (cfg->prefail && delta.prefail)
2490 PrintOut(LOG_INFO, "Device: %s, SMART Prefailure Attribute: %s changed from %d%s to %d%s\n",
2491 name, loc, delta.oldval, oldrawstring, delta.newval, newrawstring);
2492
2493 // usage attribute
2494 if (cfg->usage && !delta.prefail)
2495 PrintOut(LOG_INFO, "Device: %s, SMART Usage Attribute: %s changed from %d%s to %d%s\n",
2496 name, loc, delta.oldval, oldrawstring, delta.newval, newrawstring);
2497 }
2498 } // endof block tracking usage or prefailure
2499 } // end of loop over attributes
2500
2501 // Save the new values into *drive for the next time around
2502 *(cfg->smartval)=curval;
2503 }
2504 }
2505 }
2506
2507 // check if number of selftest errors has increased (note: may also DECREASE)
2508 if (cfg->selftest)
2509 CheckSelfTestLogs(cfg, SelfTestErrorCount(fd, name));
2510
2511 // check if number of ATA errors has increased
2512 if (cfg->errorlog){
2513
2514 int newc,oldc=cfg->ataerrorcount;
2515
2516 // new number of errors
2517 newc=ATAErrorCount(fd, name);
2518
2519 // did command fail?
2520 if (newc<0)
2521 // lack of PrintOut here is INTENTIONAL
2522 MailWarning(cfg, 7, "Device: %s, Read SMART Error Log Failed", name);
2523
2524 // has error count increased?
2525 if (newc>oldc){
2526 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
2527 name, oldc, newc);
2528 MailWarning(cfg, 4, "Device: %s, ATA error count increased from %d to %d",
2529 name, oldc, newc);
2530 }
2531
2532 // this last line is probably not needed, count always increases
2533 if (newc>=0)
2534 cfg->ataerrorcount=newc;
2535 }
2536
2537 // carry out scheduled self-test
2538 if (testtype)
2539 DoATASelfTest(fd, cfg, testtype);
2540
2541 // Don't leave device open -- the OS/user may want to access it
2542 // before the next smartd cycle!
2543 CloseDevice(fd, name);
2544 return 0;
2545 }
2546
2547 int SCSICheckDevice(cfgfile *cfg)
2548 {
2549 UINT8 asc, ascq;
2550 UINT8 currenttemp;
2551 UINT8 triptemp;
2552 int fd;
2553 char *name=cfg->name;
2554 const char *cp;
2555 char *mode=NULL;
2556
2557 // should we try to register this as a SCSI device?
2558 switch (cfg->controller_type) {
2559 case CONTROLLER_CCISS:
2560 mode="CCISS";
2561 break;
2562 case CONTROLLER_SCSI:
2563 case CONTROLLER_UNKNOWN:
2564 mode="SCSI";
2565 break;
2566 default:
2567 return 1;
2568 }
2569
2570 // pass user settings on to low-level SCSI commands
2571 con->controller_port=cfg->controller_port;
2572 con->controller_type=cfg->controller_type;
2573
2574 // If the user has asked for it, test the email warning system
2575 if (cfg->mailwarn && cfg->mailwarn->emailtest)
2576 MailWarning(cfg, 0, "TEST EMAIL from smartd for device: %s", name);
2577
2578 // if we can't open device, fail gracefully rather than hard --
2579 // perhaps the next time around we'll be able to open it
2580 if ((fd=OpenDevice(name, mode, 0))<0) {
2581 // Lack of PrintOut() here is intentional!
2582 MailWarning(cfg, 9, "Device: %s, unable to open device", name);
2583 return 1;
2584 } else if (debugmode)
2585 PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
2586 currenttemp = 0;
2587 asc = 0;
2588 ascq = 0;
2589 if (! cfg->SuppressReport) {
2590 if (scsiCheckIE(fd, cfg->SmartPageSupported, cfg->TempPageSupported,
2591 &asc, &ascq, &currenttemp, &triptemp)) {
2592 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
2593 name);
2594 MailWarning(cfg, 6, "Device: %s, failed to read SMART values", name);
2595 cfg->SuppressReport = 1;
2596 }
2597 }
2598 if (asc > 0) {
2599 cp = scsiGetIEString(asc, ascq);
2600 if (cp) {
2601 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
2602 MailWarning(cfg, 1,"Device: %s, SMART Failure: %s", name, cp);
2603 } else if (debugmode)
2604 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
2605 name, (int)asc, (int)ascq);
2606 } else if (debugmode)
2607 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
2608
2609 // check temperature limits
2610 if (cfg->tempdiff || cfg->tempinfo || cfg->tempcrit)
2611 CheckTemperature(cfg, currenttemp, triptemp);
2612
2613 // check if number of selftest errors has increased (note: may also DECREASE)
2614 if (cfg->selftest)
2615 CheckSelfTestLogs(cfg, scsiCountFailedSelfTests(fd, 0));
2616
2617 if (cfg->testdata) {
2618 // long (extended) background test
2619 if (!cfg->testdata->not_cap_long && DoTestNow(cfg, 'L', 0)>0)
2620 DoSCSISelfTest(fd, cfg, 'L');
2621 // short background test
2622 else if (!cfg->testdata->not_cap_short && DoTestNow(cfg, 'S', 0)>0)
2623 DoSCSISelfTest(fd, cfg, 'S');
2624 }
2625 CloseDevice(fd, name);
2626 return 0;
2627 }
2628
2629 // Checks the SMART status of all ATA and SCSI devices
2630 void CheckDevicesOnce(cfgfile **atadevices, cfgfile **scsidevices){
2631 int i;
2632
2633 for (i=0; i<numdevata; i++)
2634 ATACheckDevice(atadevices[i]);
2635
2636 for (i=0; i<numdevscsi; i++)
2637 SCSICheckDevice(scsidevices[i]);
2638
2639 return;
2640 }
2641
2642 #if SCSITIMEOUT
2643 // This alarm means that a SCSI USB device was hanging
2644 void AlarmHandler(int signal) {
2645 longjmp(registerscsienv, 1);
2646 }
2647 #endif
2648
2649 // Does initialization right after fork to daemon mode
2650 void Initialize(time_t *wakeuptime){
2651
2652 // install goobye message and remove pidfile handler
2653 atexit(Goodbye);
2654
2655 // write PID file only after installing exit handler
2656 if (!debugmode)
2657 WritePidFile();
2658
2659 // install signal handlers. On Solaris, can't use signal() because
2660 // it resets the handler to SIG_DFL after each call. So use sigset()
2661 // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
2662
2663 // normal and abnormal exit
2664 if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
2665 SIGNALFN(SIGTERM, SIG_IGN);
2666 if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
2667 SIGNALFN(SIGQUIT, SIG_IGN);
2668
2669 // in debug mode, <CONTROL-C> ==> HUP
2670 if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
2671 SIGNALFN(SIGINT, SIG_IGN);
2672
2673 // Catch HUP and USR1
2674 if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
2675 SIGNALFN(SIGHUP, SIG_IGN);
2676 if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
2677 SIGNALFN(SIGUSR1, SIG_IGN);
2678 #ifdef _WIN32
2679 if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
2680 SIGNALFN(SIGUSR2, SIG_IGN);
2681 #endif
2682
2683 // initialize wakeup time to CURRENT time
2684 *wakeuptime=time(NULL);
2685
2686 return;
2687 }
2688
2689 #ifdef _WIN32
2690 // Toggle debug mode implemented for native windows only
2691 // (there is no easy way to reopen tty on *nix)
2692 static void ToggleDebugMode()
2693 {
2694 if (!debugmode) {
2695 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
2696 if (!daemon_enable_console("smartd [Debug]")) {
2697 debugmode = 1;
2698 daemon_signal(SIGINT, HUPhandler);
2699 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
2700 }
2701 else
2702 PrintOut(LOG_INFO,"enable console failed\n");
2703 }
2704 else if (debugmode == 1) {
2705 daemon_disable_console();
2706 debugmode = 0;
2707 daemon_signal(SIGINT, sighandler);
2708 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
2709 }
2710 else
2711 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
2712 }
2713 #endif
2714
2715 time_t dosleep(time_t wakeuptime){
2716 time_t timenow=0;
2717
2718 // If past wake-up-time, compute next wake-up-time
2719 timenow=time(NULL);
2720 while (wakeuptime<=timenow){
2721 int intervals=1+(timenow-wakeuptime)/checktime;
2722 wakeuptime+=intervals*checktime;
2723 }
2724
2725 // sleep until we catch SIGUSR1 or have completed sleeping
2726 while (timenow<wakeuptime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT){
2727
2728 // protect user again system clock being adjusted backwards
2729 if (wakeuptime>timenow+checktime){
2730 PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
2731 wakeuptime=timenow+checktime;
2732 }
2733
2734 // Exit sleep when time interval has expired or a signal is received
2735 sleep(wakeuptime-timenow);
2736
2737 #ifdef _WIN32
2738 // toggle debug mode?
2739 if (caughtsigUSR2) {
2740 ToggleDebugMode();
2741 caughtsigUSR2 = 0;
2742 }
2743 #endif
2744
2745 timenow=time(NULL);
2746 }
2747
2748 // if we caught a SIGUSR1 then print message and clear signal
2749 if (caughtsigUSR1){
2750 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
2751 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
2752 caughtsigUSR1=0;
2753 }
2754
2755 // return adjusted wakeuptime
2756 return wakeuptime;
2757 }
2758
2759 // Print out a list of valid arguments for the Directive d
2760 void printoutvaliddirectiveargs(int priority, char d) {
2761 char *s=NULL;
2762
2763 switch (d) {
2764 case 'n':
2765 PrintOut(priority, "never[,q], sleep[,q], standby[,q], idle[,q]");
2766 break;
2767 case 's':
2768 PrintOut(priority, "valid_regular_expression");
2769 break;
2770 case 'd':
2771 PrintOut(priority, "ata, scsi, marvell, removable, sat, 3ware,N, hpt,L/M/N");
2772 break;
2773 case 'T':
2774 PrintOut(priority, "normal, permissive");
2775 break;
2776 case 'o':
2777 case 'S':
2778 PrintOut(priority, "on, off");
2779 break;
2780 case 'l':
2781 PrintOut(priority, "error, selftest");
2782 break;
2783 case 'M':
2784 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
2785 break;
2786 case 'v':
2787 if (!(s = create_vendor_attribute_arg_list())) {
2788 PrintOut(LOG_CRIT,"Insufficient memory to construct argument list\n");
2789 EXIT(EXIT_NOMEM);
2790 }
2791 PrintOut(priority, "\n%s\n", s);
2792 s=CheckFree(s, __LINE__,filenameandversion);
2793 break;
2794 case 'P':
2795 PrintOut(priority, "use, ignore, show, showall");
2796 break;
2797 case 'F':
2798 PrintOut(priority, "none, samsung, samsung2");
2799 break;
2800 }
2801 }
2802
2803 // exits with an error message, or returns integer value of token
2804 int GetInteger(char *arg, char *name, char *token, int lineno, char *configfile, int min, int max){
2805 char *endptr;
2806 int val;
2807
2808 // check input range
2809 if (min<0){
2810 PrintOut(LOG_CRIT, "min =%d passed to GetInteger() must be >=0\n", min);
2811 return -1;
2812 }
2813
2814 // make sure argument is there
2815 if (!arg) {
2816 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
2817 configfile, lineno, name, token, min, max);
2818 return -1;
2819 }
2820
2821 // get argument value (base 10), check that it's integer, and in-range
2822 val=strtol(arg,&endptr,10);
2823 if (*endptr!='\0' || val<min || val>max ) {
2824 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
2825 configfile, lineno, name, token, arg, min, max);
2826 return -1;
2827 }
2828
2829 // all is well; return value
2830 return val;
2831 }
2832
2833
2834 // Get 1-3 small integer(s) for '-W' directive
2835 int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *configfile,
2836 unsigned char * val1, unsigned char * val2, unsigned char * val3){
2837 unsigned v1 = 0, v2 = 0, v3 = 0;
2838 int n1 = -1, n2 = -1, n3 = -1, len;
2839 if (!arg) {
2840 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
2841 configfile, lineno, name, token);
2842 return -1;
2843 }
2844
2845 len = strlen(arg);
2846 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
2847 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
2848 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
2849 configfile, lineno, name, token, arg);
2850 return -1;
2851 }
2852 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
2853 return 0;
2854 }
2855
2856
2857 // This function returns 1 if it has correctly parsed one token (and
2858 // any arguments), else zero if no tokens remain. It returns -1 if an
2859 // error was encountered.
2860 int ParseToken(char *token,cfgfile *cfg){
2861 char sym;
2862 char *name=cfg->name;
2863 int lineno=cfg->lineno;
2864 char *delim = " \n\t";
2865 int badarg = 0;
2866 int missingarg = 0;
2867 char *arg = NULL;
2868 int makemail=0;
2869 maildata *mdat=NULL, tempmail;
2870
2871 // is the rest of the line a comment
2872 if (*token=='#')
2873 return 1;
2874
2875 // is the token not recognized?
2876 if (*token!='-' || strlen(token)!=2) {
2877 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
2878 configfile, lineno, name, token);
2879 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
2880 return -1;
2881 }
2882
2883 // token we will be parsing:
2884 sym=token[1];
2885
2886 // create temporary maildata structure. This means we can postpone
2887 // allocating space in the data segment until we are sure there are
2888 // no errors.
2889 if ('m'==sym || 'M'==sym){
2890 if (!cfg->mailwarn){
2891 memset(&tempmail, 0, sizeof(maildata));
2892 mdat=&tempmail;
2893 makemail=1;
2894 }
2895 else
2896 mdat=cfg->mailwarn;
2897 }
2898
2899 // parse the token and swallow its argument
2900 switch (sym) {
2901 int val;
2902
2903 case 'C':
2904 // monitor current pending sector count (default 197)
2905 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255))<0)
2906 return -1;
2907 if (val==CUR_UNC_DEFAULT)
2908 val=0;
2909 else if (val==0)
2910 val=CUR_UNC_DEFAULT;
2911 // set bottom 8 bits to correct value
2912 cfg->pending &= 0xff00;
2913 cfg->pending |= val;
2914 break;
2915 case 'U':
2916 // monitor offline uncorrectable sectors (default 198)
2917 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255))<0)
2918 return -1;
2919 if (val==OFF_UNC_DEFAULT)
2920 val=0;
2921 else if (val==0)
2922 val=OFF_UNC_DEFAULT;
2923 // turn off top 8 bits, then set to correct value
2924 cfg->pending &= 0xff;
2925 cfg->pending |= (val<<8);
2926 break;
2927 case 'T':
2928 // Set tolerance level for SMART command failures
2929 if ((arg = strtok(NULL, delim)) == NULL) {
2930 missingarg = 1;
2931 } else if (!strcmp(arg, "normal")) {
2932 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
2933 // not on failure of an optional S.M.A.R.T. command.
2934 // This is the default so we don't need to actually do anything here.
2935 cfg->permissive=0;
2936 } else if (!strcmp(arg, "permissive")) {
2937 // Permissive mode; ignore errors from Mandatory SMART commands
2938 cfg->permissive=1;
2939 } else {
2940 badarg = 1;
2941 }
2942 break;
2943 case 'd':
2944 // specify the device type
2945 cfg->controller_explicit = 1;
2946 if ((arg = strtok(NULL, delim)) == NULL) {
2947 missingarg = 1;
2948 } else if (!strcmp(arg, "ata")) {
2949 cfg->controller_port = 0;
2950 cfg->controller_type = CONTROLLER_ATA;
2951 } else if (!strcmp(arg, "scsi")) {
2952 cfg->controller_port =0;
2953 cfg->controller_type = CONTROLLER_SCSI;
2954 } else if (!strcmp(arg, "marvell")) {
2955 cfg->controller_port =0;
2956 cfg->controller_type = CONTROLLER_MARVELL_SATA;
2957 } else if (!strncmp(arg, "sat", 3)) {
2958 cfg->controller_type = CONTROLLER_SAT;
2959 cfg->controller_port = 0;
2960 cfg->satpassthrulen = 0;
2961 if (strlen(arg) > 3) {
2962 int k;
2963 char * cp;
2964
2965 cp = strchr(arg, ',');
2966 if (cp && (1 == sscanf(cp + 1, "%d", &k)) &&
2967 ((0 == k) || (12 == k) || (16 == k)))
2968 cfg->satpassthrulen = k;
2969 else {
2970 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive "
2971 "'-d sat,<n>' requires <n> to be 0, 12 or 16\n",
2972 configfile, lineno, name);
2973 badarg = 1;
2974 }
2975 }
2976 } else if (!strncmp(arg, "hpt", 3)){
2977 unsigned char i, slash = 0;
2978 cfg->hpt_data[0] = 0;
2979 cfg->hpt_data[1] = 0;
2980 cfg->hpt_data[2] = 0;
2981 cfg->controller_type = CONTROLLER_HPT;
2982 for (i=4; i < strlen(arg); i++) {
2983 if(arg[i] == '/') {
2984 slash++;
2985 if(slash == 3) {
2986 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive "
2987 "'-d hpt,L/M/N' supports 2-3 items\n",
2988 configfile, lineno, name);
2989 badarg = TRUE;
2990 break;
2991 }
2992 }
2993 else if ((arg[i])>='0' && (arg[i])<='9') {
2994 if (cfg->hpt_data[slash]>1) { /* hpt_data[x] max 19 */
2995 badarg = TRUE;
2996 break;
2997 }
2998 cfg->hpt_data[slash] = cfg->hpt_data[slash]*10 + arg[i] - '0';
2999 }
3000 else {
3001 badarg = TRUE;
3002 break;
3003 }
3004 }
3005 if ( slash == 0 ) {
3006 badarg = TRUE;
3007 } else if (badarg != TRUE) {
3008 if (cfg->hpt_data[0]==0 || cfg->hpt_data[0]>8){
3009 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive "
3010 "'-d hpt,L/M/N' no/invalid controller id L supplied\n",
3011 configfile, lineno, name);
3012 badarg = TRUE;
3013 }
3014 if (cfg->hpt_data[1]==0 || cfg->hpt_data[1]>8){
3015 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive "
3016 "'-d hpt,L/M/N' no/invalid channel number M supplied\n",
3017 configfile, lineno, name);
3018 badarg = TRUE;
3019 }
3020 if (slash==2){
3021 if (cfg->hpt_data[2]==0 || cfg->hpt_data[2]>15){
3022 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive "
3023 "'-d hpt,L/M/N' no/invalid pmport number N supplied\n",
3024 configfile, lineno, name);
3025 badarg = TRUE;
3026 }
3027 } else { /* no pmport device */
3028 cfg->hpt_data[2]=1;
3029 }
3030 }
3031 } else if (!strcmp(arg, "removable")) {
3032 cfg->removable = 1;
3033 } else {
3034 // look 3ware,N RAID device
3035 int i;
3036 char *s;
3037
3038 // make a copy of the string to mess with
3039 if (!(s = strdup(arg))) {
3040 PrintOut(LOG_CRIT,
3041 "No memory to copy argument to -d option - exiting\n");
3042 EXIT(EXIT_NOMEM);
3043 } else if (!strncmp(s,"3ware,",6)) {
3044 if (split_report_arg2(s, &i)){
3045 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive -d 3ware,N requires N integer\n",
3046 configfile, lineno, name);
3047 badarg=1;
3048 } else if ( i<0 || i>15) {
3049 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive -d 3ware,N (N=%d) must have 0 <= N <= 15\n",
3050 configfile, lineno, name, i);
3051 badarg=1;
3052 } else {
3053 // determine type of escalade device from name of device
3054 cfg->controller_type = guess_device_type(name);
3055 if (cfg->controller_type!=CONTROLLER_3WARE_9000_CHAR && cfg->controller_type!=CONTROLLER_3WARE_678K_CHAR)
3056 cfg->controller_type=CONTROLLER_3WARE_678K;
3057
3058 // NOTE: controller_port == disk number + 1
3059 cfg->controller_port = i+1;
3060 }
3061 } else if (!strncmp(s,"cciss,",6)) {
3062 if (split_report_arg2(s, &i)){
3063 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive -d cciss,N requires N integer\n",
3064 configfile, lineno, name);
3065 badarg=1;
3066 } else if ( i<0 || i>15) {
3067 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive -d cciss,N (N=%d) must have 0 <= N <= 15\n",
3068 configfile, lineno, name, i);
3069 badarg=1;
3070 } else {
3071 // NOTE: controller_port == disk number + 1
3072 cfg->controller_type = CONTROLLER_CCISS;
3073 cfg->controller_port = i+1;
3074 }
3075 } else {
3076 badarg=1;
3077 }
3078 s=CheckFree(s, __LINE__,filenameandversion);
3079 }
3080 break;
3081 case 'F':
3082 // fix firmware bug
3083 if ((arg = strtok(NULL, delim)) == NULL) {
3084 missingarg = 1;
3085 } else if (!strcmp(arg, "none")) {
3086 cfg->fixfirmwarebug = FIX_NONE;
3087 } else if (!strcmp(arg, "samsung")) {
3088 cfg->fixfirmwarebug = FIX_SAMSUNG;
3089 } else if (!strcmp(arg, "samsung2")) {
3090 cfg->fixfirmwarebug = FIX_SAMSUNG2;
3091 } else {
3092 badarg = 1;
3093 }
3094 break;
3095 case 'H':
3096 // check SMART status
3097 cfg->smartcheck=1;
3098 break;
3099 case 'f':
3100 // check for failure of usage attributes
3101 cfg->usagefailed=1;
3102 break;
3103 case 't':
3104 // track changes in all vendor attributes
3105 cfg->prefail=1;
3106 cfg->usage=1;
3107 break;
3108 case 'p':
3109 // track changes in prefail vendor attributes
3110 cfg->prefail=1;
3111 break;
3112 case 'u':
3113 // track changes in usage vendor attributes
3114 cfg->usage=1;
3115 break;
3116 case 'l':
3117 // track changes in SMART logs
3118 if ((arg = strtok(NULL, delim)) == NULL) {
3119 missingarg = 1;
3120 } else if (!strcmp(arg, "selftest")) {
3121 // track changes in self-test log
3122 cfg->selftest=1;
3123 } else if (!strcmp(arg, "error")) {
3124 // track changes in ATA error log
3125 cfg->errorlog=1;
3126 } else {
3127 badarg = 1;
3128 }
3129 break;
3130 case 'a':
3131 // monitor everything
3132 cfg->smartcheck=1;
3133 cfg->prefail=1;
3134 cfg->usagefailed=1;
3135 cfg->usage=1;
3136 cfg->selftest=1;
3137 cfg->errorlog=1;
3138 break;
3139 case 'o':
3140 // automatic offline testing enable/disable
3141 if ((arg = strtok(NULL, delim)) == NULL) {
3142 missingarg = 1;
3143 } else if (!strcmp(arg, "on")) {
3144 cfg->autoofflinetest = 2;
3145 } else if (!strcmp(arg, "off")) {
3146 cfg->autoofflinetest = 1;
3147 } else {
3148 badarg = 1;
3149 }
3150 break;
3151 case 'n':
3152 // skip disk check if in idle or standby mode
3153 if (!(arg = strtok(NULL, delim)))
3154 missingarg = 1;
3155 else if (!strcmp(arg, "never") || !strcmp(arg, "never,q"))
3156 cfg->powermode = 0;
3157 else if (!strcmp(arg, "sleep") || !strcmp(arg, "sleep,q"))
3158 cfg->powermode = 1;
3159 else if (!strcmp(arg, "standby") || !strcmp(arg, "standby,q"))
3160 cfg->powermode = 2;
3161 else if (!strcmp(arg, "idle") || !strcmp(arg, "idle,q"))
3162 cfg->powermode = 3;
3163 else
3164 badarg = 1;
3165 cfg->powerquiet = !!strchr(arg, ',');
3166 break;
3167 case 'S':
3168 // automatic attribute autosave enable/disable
3169 if ((arg = strtok(NULL, delim)) == NULL) {
3170 missingarg = 1;
3171 } else if (!strcmp(arg, "on")) {
3172 cfg->autosave = 2;
3173 } else if (!strcmp(arg, "off")) {
3174 cfg->autosave = 1;
3175 } else {
3176 badarg = 1;
3177 }
3178 break;
3179 case 's':
3180 // warn user, and delete any previously given -s REGEXP Directives
3181 if (cfg->testdata){
3182 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3183 configfile, lineno, name, cfg->testdata->regex);
3184 cfg->testdata=FreeTestData(cfg->testdata);
3185 }
3186 // check for missing argument
3187 if (!(arg = strtok(NULL, delim))) {
3188 missingarg = 1;
3189 }
3190 // allocate space for structure and string
3191 else if (!(cfg->testdata=(testinfo *)Calloc(1, sizeof(testinfo))) || !(cfg->testdata->regex=CustomStrDup(arg, 1, __LINE__,filenameandversion))) {
3192 PrintOut(LOG_INFO, "File %s line %d (drive %s): no memory to create Test Directive -s %s!\n",
3193 configfile, lineno, name, arg);
3194 EXIT(EXIT_NOMEM);
3195 }
3196 else if ((val=regcomp(&(cfg->testdata->cregex), arg, REG_EXTENDED))) {
3197 char errormsg[512];
3198 // not a valid regular expression!
3199 regerror(val, &(cfg->testdata->cregex), errormsg, 512);
3200 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3201 configfile, lineno, name, arg, errormsg);
3202 cfg->testdata=FreeTestData(cfg->testdata);
3203 return -1;
3204 }
3205 // Do a bit of sanity checking and warn user if we think that
3206 // their regexp is "strange". User probably confused about shell
3207 // glob(3) syntax versus regular expression syntax regexp(7).
3208 if ((int)strlen(arg) != (val=strspn(arg,"0123456789/.-+*|()?^$[]SLCO")))
3209 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3210 configfile, lineno, name, val+1, arg[val], arg);
3211 break;
3212 case 'm':
3213 // send email to address that follows
3214 if (!(arg = strtok(NULL,delim)))
3215 missingarg = 1;
3216 else {
3217 if (mdat->address) {
3218 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3219 configfile, lineno, name, mdat->address);
3220 mdat->address=FreeNonZero(mdat->address, -1,__LINE__,filenameandversion);
3221 }
3222 mdat->address=CustomStrDup(arg, 1, __LINE__,filenameandversion);
3223 }
3224 break;
3225 case 'M':
3226 // email warning options
3227 if (!(arg = strtok(NULL, delim)))
3228 missingarg = 1;
3229 else if (!strcmp(arg, "once"))
3230 mdat->emailfreq = 1;
3231 else if (!strcmp(arg, "daily"))
3232 mdat->emailfreq = 2;
3233 else if (!strcmp(arg, "diminishing"))
3234 mdat->emailfreq = 3;
3235 else if (!strcmp(arg, "test"))
3236 mdat->emailtest = 1;
3237 else if (!strcmp(arg, "exec")) {
3238 // Get the next argument (the command line)
3239 if (!(arg = strtok(NULL, delim))) {
3240 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3241 configfile, lineno, name, token);
3242 return -1;
3243 }
3244 // Free the last cmd line given if any, and copy new one
3245 if (mdat->emailcmdline) {
3246 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3247 configfile, lineno, name, mdat->emailcmdline);
3248 mdat->emailcmdline=FreeNonZero(mdat->emailcmdline, -1,__LINE__,filenameandversion);
3249 }
3250 mdat->emailcmdline=CustomStrDup(arg, 1, __LINE__,filenameandversion);
3251 }
3252 else
3253 badarg = 1;
3254 break;
3255 case 'i':
3256 // ignore failure of usage attribute
3257 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3258 return -1;
3259 IsAttributeOff(val, &cfg->monitorattflags, 1, MONITOR_FAILUSE, __LINE__);
3260 break;
3261 case 'I':
3262 // ignore attribute for tracking purposes
3263 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3264 return -1;
3265 IsAttributeOff(val, &cfg->monitorattflags, 1, MONITOR_IGNORE, __LINE__);
3266 break;
3267 case 'r':
3268 // print raw value when tracking
3269 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3270 return -1;
3271 IsAttributeOff(val, &cfg->monitorattflags, 1, MONITOR_RAWPRINT, __LINE__);
3272 break;
3273 case 'R':
3274 // track changes in raw value (forces printing of raw value)
3275 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3276 return -1;
3277 IsAttributeOff(val, &cfg->monitorattflags, 1, MONITOR_RAWPRINT, __LINE__);
3278 IsAttributeOff(val, &cfg->monitorattflags, 1, MONITOR_RAW, __LINE__);
3279 break;
3280 case 'W':
3281 // track Temperature
3282 if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3283 &cfg->tempdiff, &cfg->tempinfo, &cfg->tempcrit))<0)
3284 return -1;
3285 // increase min Temperature during first 30 minutes
3286 if (!(cfg->tempmininc = (unsigned char)(CHECKTIME / checktime)))
3287 cfg->tempmininc = 1;
3288 break;
3289 case 'v':
3290 // non-default vendor-specific attribute meaning
3291 if (!(arg=strtok(NULL,delim))) {
3292 missingarg = 1;
3293 } else if (parse_attribute_def(arg, &cfg->attributedefs)){
3294 badarg = 1;
3295 }
3296 break;
3297 case 'P':
3298 // Define use of drive-specific presets.
3299 if (!(arg = strtok(NULL, delim))) {
3300 missingarg = 1;
3301 } else if (!strcmp(arg, "use")) {
3302 cfg->ignorepresets = FALSE;
3303 } else if (!strcmp(arg, "ignore")) {
3304 cfg->ignorepresets = TRUE;
3305 } else if (!strcmp(arg, "show")) {
3306 cfg->showpresets = TRUE;
3307 } else if (!strcmp(arg, "showall")) {
3308 showallpresets();
3309 } else {
3310 badarg = 1;
3311 }
3312 break;
3313 default:
3314 // Directive not recognized
3315 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3316 configfile, lineno, name, token);
3317 Directives();
3318 return -1;
3319 }
3320 if (missingarg) {
3321 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3322 configfile, lineno, name, token);
3323 }
3324 if (badarg) {
3325 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3326 configfile, lineno, name, token, arg);
3327 }
3328 if (missingarg || badarg) {
3329 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3330 printoutvaliddirectiveargs(LOG_CRIT, sym);
3331 PrintOut(LOG_CRIT, "\n");
3332 return -1;
3333 }
3334
3335 // If this did something to fill the mail structure, and that didn't
3336 // already exist, create it and copy.
3337 if (makemail) {
3338 if (!(cfg->mailwarn=(maildata *)Calloc(1, sizeof(maildata)))) {
3339 PrintOut(LOG_INFO, "File %s line %d (drive %s): no memory to create mail warning entry!\n",
3340 configfile, lineno, name);
3341 EXIT(EXIT_NOMEM);
3342 }
3343 memcpy(cfg->mailwarn, mdat, sizeof(maildata));
3344 }
3345
3346 return 1;
3347 }
3348
3349 // Allocate storage for a new cfgfile entry. If original!=NULL, it's
3350 // a copy of the original, but with private data storage. Else all is
3351 // zeroed. Returns address, and fails if non memory available.
3352
3353 cfgfile *CreateConfigEntry(cfgfile *original){
3354 cfgfile *add;
3355
3356 // allocate memory for new structure
3357 if (!(add=(cfgfile *)Calloc(1,sizeof(cfgfile))))
3358 goto badexit;
3359
3360 // if old structure was pointed to, copy it
3361 if (original)
3362 memcpy(add, original, sizeof(cfgfile));
3363
3364 // make private copies of data items ONLY if they are in use (non
3365 // NULL)
3366 add->name = CustomStrDup(add->name, 0, __LINE__,filenameandversion);
3367
3368 if (add->testdata) {
3369 int val;
3370 if (!(add->testdata=(testinfo *)Calloc(1,sizeof(testinfo))))
3371 goto badexit;
3372 memcpy(add->testdata, original->testdata, sizeof(testinfo));
3373 add->testdata->regex = CustomStrDup(add->testdata->regex, 1, __LINE__,filenameandversion);
3374 // only POSIX-portable way to make fresh copy of compiled regex is
3375 // to recompile it completely. There is no POSIX
3376 // compiled-regex-copy command.
3377 if ((val=regcomp(&(add->testdata->cregex), add->testdata->regex, REG_EXTENDED))) {
3378 char errormsg[512];
3379 regerror(val, &(add->testdata->cregex), errormsg, 512);
3380 PrintOut(LOG_CRIT, "unable to recompile regular expression %s. %s\n", add->testdata->regex, errormsg);
3381 goto badexit;
3382 }
3383 }
3384
3385 if (add->mailwarn) {
3386 if (!(add->mailwarn=(maildata *)Calloc(1,sizeof(maildata))))
3387 goto badexit;
3388 memcpy(add->mailwarn, original->mailwarn, sizeof(maildata));
3389 add->mailwarn->address = CustomStrDup(add->mailwarn->address, 0, __LINE__,filenameandversion);
3390 add->mailwarn->emailcmdline = CustomStrDup(add->mailwarn->emailcmdline, 0, __LINE__,filenameandversion);
3391 }
3392
3393 if (add->attributedefs) {
3394 if (!(add->attributedefs=(unsigned char *)Calloc(MAX_ATTRIBUTE_NUM,1)))
3395 goto badexit;
3396 memcpy(add->attributedefs, original->attributedefs, MAX_ATTRIBUTE_NUM);
3397 }
3398
3399 if (add->monitorattflags) {
3400 if (!(add->monitorattflags=(unsigned char *)Calloc(NMONITOR*32, 1)))
3401 goto badexit;
3402 memcpy(add->monitorattflags, original->monitorattflags, NMONITOR*32);
3403 }
3404
3405 if (add->smartval) {
3406 if (!(add->smartval=(struct ata_smart_values *)Calloc(1,sizeof(struct ata_smart_values))))
3407 goto badexit;
3408 }
3409
3410 if (add->smartthres) {
3411 if (!(add->smartthres=(struct ata_smart_thresholds_pvt *)Calloc(1,sizeof(struct ata_smart_thresholds_pvt))))
3412 goto badexit;
3413 }
3414
3415 return add;
3416
3417 badexit:
3418 PrintOut(LOG_CRIT, "No memory to create entry from configuration file\n");
3419 EXIT(EXIT_NOMEM);
3420 }
3421
3422
3423
3424 // This is the routine that adds things to the cfgentries list. To
3425 // prevent memory leaks when re-reading the configuration file many
3426 // times, this routine MUST deallocate any memory other than that
3427 // pointed to within cfg-> before it returns.
3428 //
3429 // Return values are:
3430 // 1: parsed a normal line
3431 // 0: found comment or blank line
3432 // -1: found SCANDIRECTIVE line
3433 // -2: found an error
3434 //
3435 // Note: this routine modifies *line from the caller!
3436 int ParseConfigLine(int entry, int lineno,char *line){
3437 char *token=NULL;
3438 char *name=NULL;
3439 char *delim = " \n\t";
3440 cfgfile *cfg=NULL;
3441 int devscan=0;
3442
3443 // get first token: device name. If a comment, skip line
3444 if (!(name=strtok(line,delim)) || *name=='#') {
3445 return 0;
3446 }
3447
3448 // Have we detected the SCANDIRECTIVE directive?
3449 if (!strcmp(SCANDIRECTIVE,name)){
3450 devscan=1;
3451 if (entry) {
3452 PrintOut(LOG_INFO,"Scan Directive %s (line %d) must be the first entry in %s\n",name, lineno, configfile);
3453 return -2;
3454 }
3455 }
3456
3457 // Is there space for another entry? If not, allocate more
3458 while (entry>=cfgentries_max)
3459 cfgentries=AllocateMoreSpace(cfgentries, &cfgentries_max, "configuration file device");
3460
3461 // We've got a legit entry, make space to store it
3462 cfg=cfgentries[entry]=CreateConfigEntry(NULL);
3463 cfg->name = CustomStrDup(name, 1, __LINE__,filenameandversion);
3464
3465 // Store line number, and by default check for both device types.
3466 cfg->lineno=lineno;
3467
3468 // Try and recognize if a IDE or SCSI device. These can be
3469 // overwritten by configuration file directives.
3470 if (cfg->controller_type==CONTROLLER_UNKNOWN)
3471 cfg->controller_type = guess_device_type(cfg->name);
3472
3473 // parse tokens one at a time from the file.
3474 while ((token=strtok(NULL,delim))){
3475 int retval=ParseToken(token,cfg);
3476
3477 if (retval==0)
3478 // No tokens left:
3479 break;
3480
3481 if (retval>0) {
3482 // Parsed token
3483 #if (0)
3484 PrintOut(LOG_INFO,"Parsed token %s\n",token);
3485 #endif
3486 continue;
3487 }
3488
3489 if (retval<0) {
3490 // error found on the line
3491 return -2;
3492 }
3493 }
3494
3495 // If we found 3ware/cciss controller, then modify device name by adding a SPACE
3496 if (cfg->controller_port) {
3497 int len=17+strlen(cfg->name);
3498 char *newname;
3499
3500 if (devscan){
3501 PrintOut(LOG_CRIT, "smartd: can not scan for 3ware/cciss devices (line %d of file %s)\n",
3502 lineno, configfile);
3503 return -2;
3504 }
3505
3506 if (!(newname=(char *)calloc(len,1))) {
3507 PrintOut(LOG_INFO,"No memory to parse file: %s line %d, %s\n", configfile, lineno, strerror(errno));
3508 EXIT(EXIT_NOMEM);
3509 }
3510
3511 // Make new device name by adding a space then RAID disk number
3512 snprintf(newname, len, "%s [%s_disk_%02d]", cfg->name, (cfg->controller_type == CONTROLLER_CCISS) ? "cciss" : "3ware",
3513 cfg->controller_port-1);
3514 cfg->name=CheckFree(cfg->name, __LINE__,filenameandversion);
3515 cfg->name=newname;
3516 bytes+=16;
3517 }
3518
3519 if (cfg->hpt_data[0]) {
3520 int len=17+strlen(cfg->name);
3521 char *newname;
3522
3523 if (devscan){
3524 PrintOut(LOG_CRIT, "smartd: can not scan for highpoint devices (line %d of file %s)\n",
3525 lineno, configfile);
3526 return -2;
3527 }
3528
3529 if (!(newname=(char *)calloc(len,1))) {
3530 PrintOut(LOG_INFO,"No memory to parse file: %s line %d, %s\n", configfile, lineno, strerror(errno));
3531 EXIT(EXIT_NOMEM);
3532 }
3533
3534 // Make new device name by adding a space then RAID disk number
3535 snprintf(newname, len, "%s [hpt_%d/%d/%d]", cfg->name, cfg->hpt_data[0],
3536 cfg->hpt_data[1], cfg->hpt_data[2]);
3537 cfg->name=CheckFree(cfg->name, __LINE__,filenameandversion);
3538 cfg->name=newname;
3539 bytes+=16;
3540 }
3541
3542 // If NO monitoring directives are set, then set all of them.
3543 if (!(cfg->smartcheck || cfg->usagefailed || cfg->prefail ||
3544 cfg->usage || cfg->selftest || cfg->errorlog ||
3545 cfg->tempdiff || cfg->tempinfo || cfg->tempcrit )) {
3546
3547 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3548 cfg->name, cfg->lineno, configfile);
3549
3550 cfg->smartcheck=1;
3551 cfg->usagefailed=1;
3552 cfg->prefail=1;
3553 cfg->usage=1;
3554 cfg->selftest=1;
3555 cfg->errorlog=1;
3556 }
3557
3558 // additional sanity check. Has user set -M options without -m?
3559 if (cfg->mailwarn && !cfg->mailwarn->address && (cfg->mailwarn->emailcmdline || cfg->mailwarn->emailfreq || cfg->mailwarn->emailtest)){
3560 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3561 cfg->name, cfg->lineno, configfile);
3562 return -2;
3563 }
3564
3565 // has the user has set <nomailer>?
3566 if (cfg->mailwarn && cfg->mailwarn->address && !strcmp(cfg->mailwarn->address,"<nomailer>")){
3567 // check that -M exec is also set
3568 if (!cfg->mailwarn->emailcmdline){
3569 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3570 cfg->name, cfg->lineno, configfile);
3571 return -2;
3572 }
3573 // now free memory. From here on the sign of <nomailer> is
3574 // address==NULL and cfg->emailcmdline!=NULL
3575 cfg->mailwarn->address=FreeNonZero(cfg->mailwarn->address, -1,__LINE__,filenameandversion);
3576 }
3577
3578 // set cfg->emailfreq to 1 (once) if user hasn't set it
3579 if (cfg->mailwarn && !cfg->mailwarn->emailfreq)
3580 cfg->mailwarn->emailfreq = 1;
3581
3582 entry++;
3583
3584 if (devscan)
3585 return -1;
3586 else
3587 return 1;
3588 }
3589
3590 // clean up utility for ParseConfigFile()
3591 void cleanup(FILE **fpp, int is_stdin){
3592 if (*fpp){
3593 // (*fpp != stdin) does not work here if stdin has been closed & reopened
3594 if (!is_stdin)
3595 fclose(*fpp);
3596 *fpp=NULL;
3597 }
3598
3599 return;
3600 }
3601
3602
3603 // Parses a configuration file. Return values are:
3604 // N=>0: found N entries
3605 // -1: syntax error in config file
3606 // -2: config file does not exist
3607 // -3: config file exists but cannot be read
3608 //
3609 // In the case where the return value is 0, there are three
3610 // possiblities:
3611 // Empty configuration file ==> cfgentries==NULL
3612 // No configuration file ==> cfgentries[0]->lineno == 0
3613 // SCANDIRECTIVE found ==> cfgentries[0]->lineno != 0
3614 int ParseConfigFile(){
3615 FILE *fp=NULL;
3616 int entry=0,lineno=1,cont=0,contlineno=0;
3617 char line[MAXLINELEN+2];
3618 char fullline[MAXCONTLINE+1];
3619
3620 int is_stdin = (configfile == configfile_stdin); // pointer comparison ok here
3621
3622 // Open config file, if it exists and is not <stdin>
3623 if (!is_stdin) {
3624 fp=fopen(configfile,"r");
3625 if (fp==NULL && (errno!=ENOENT || configfile_alt)) {
3626 // file exists but we can't read it or it should exist due to '-c' option
3627 int ret = (errno!=ENOENT ? -3 : -2);
3628 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3629 strerror(errno),configfile);
3630 return ret;
3631 }
3632 }
3633 else // read from stdin ('-c -' option)
3634 fp = stdin;
3635
3636 // No configuration file found -- use fake one
3637 if (fp==NULL) {
3638 int len=strlen(SCANDIRECTIVE)+4;
3639 char *fakeconfig=(char *)calloc(len,1);
3640
3641 if (!fakeconfig ||
3642 (len-1) != snprintf(fakeconfig, len, "%s -a", SCANDIRECTIVE) ||
3643 -1 != ParseConfigLine(entry, 0, fakeconfig)
3644 ) {
3645 PrintOut(LOG_CRIT,"Internal error in ParseConfigFile() at line %d of file %s\n%s",
3646 __LINE__, filenameandversion, reportbug);
3647 EXIT(EXIT_BADCODE);
3648 }
3649 fakeconfig=CheckFree(fakeconfig, __LINE__,filenameandversion);
3650 return 0;
3651 }
3652
3653 #ifdef __CYGWIN__
3654 setmode(fileno(fp), O_TEXT); // Allow files with \r\n
3655 #endif
3656
3657 // configuration file exists
3658 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3659
3660 // parse config file line by line
3661 while (1) {
3662 int len=0,scandevice;
3663 char *lastslash;
3664 char *comment;
3665 char *code;
3666
3667 // make debugging simpler
3668 memset(line,0,sizeof(line));
3669
3670 // get a line
3671 code=fgets(line,MAXLINELEN+2,fp);
3672
3673 // are we at the end of the file?
3674 if (!code){
3675 if (cont) {
3676 scandevice=ParseConfigLine(entry,contlineno,fullline);
3677 // See if we found a SCANDIRECTIVE directive
3678 if (scandevice==-1) {
3679 cleanup(&fp, is_stdin);
3680 return 0;
3681 }
3682 // did we find a syntax error
3683 if (scandevice==-2) {
3684 cleanup(&fp, is_stdin);
3685 return -1;
3686 }
3687 // the final line is part of a continuation line
3688 cont=0;
3689 entry+=scandevice;
3690 }
3691 break;
3692 }
3693
3694 // input file line number
3695 contlineno++;
3696
3697 // See if line is too long
3698 len=strlen(line);
3699 if (len>MAXLINELEN){
3700 char *warn;
3701 if (line[len-1]=='\n')
3702 warn="(including newline!) ";
3703 else
3704 warn="";
3705 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
3706 (int)contlineno,configfile,warn,(int)MAXLINELEN);
3707 cleanup(&fp, is_stdin);
3708 return -1;
3709 }
3710
3711 // Ignore anything after comment symbol
3712 if ((comment=strchr(line,'#'))){
3713 *comment='\0';
3714 len=strlen(line);
3715 }
3716
3717 // is the total line (made of all continuation lines) too long?
3718 if (cont+len>MAXCONTLINE){
3719 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
3720 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
3721 cleanup(&fp, is_stdin);
3722 return -1;
3723 }
3724
3725 // copy string so far into fullline, and increment length
3726 strcpy(fullline+cont,line);
3727 cont+=len;
3728
3729 // is this a continuation line. If so, replace \ by space and look at next line
3730 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
3731 *(fullline+(cont-len)+(lastslash-line))=' ';
3732 continue;
3733 }
3734
3735 // Not a continuation line. Parse it
3736 scandevice=ParseConfigLine(entry,contlineno,fullline);
3737
3738 // did we find a scandevice directive?
3739 if (scandevice==-1) {
3740 cleanup(&fp, is_stdin);
3741 return 0;
3742 }
3743 // did we find a syntax error
3744 if (scandevice==-2) {
3745 cleanup(&fp, is_stdin);
3746 return -1;
3747 }
3748
3749 entry+=scandevice;
3750 lineno++;
3751 cont=0;
3752 }
3753 cleanup(&fp, is_stdin);
3754
3755 // note -- may be zero if syntax of file OK, but no valid entries!
3756 return entry;
3757 }
3758
3759
3760 // Prints copyright, license and version information
3761 void PrintCopyleft(void){
3762 debugmode=1;
3763 PrintHead();
3764 PrintCVS();
3765 return;
3766 }
3767
3768 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
3769 <LIST> is the list of valid arguments for option opt. */
3770 void PrintValidArgs(char opt) {
3771 const char *s;
3772
3773 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
3774 if (!(s = GetValidArgList(opt)))
3775 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
3776 else
3777 PrintOut(LOG_CRIT, (char *)s);
3778 PrintOut(LOG_CRIT, " <=======\n");
3779 }
3780
3781 // Parses input line, prints usage message and
3782 // version/license/copyright messages
3783 void ParseOpts(int argc, char **argv){
3784 extern char *optarg;
3785 extern int optopt, optind, opterr;
3786 int optchar;
3787 int badarg;
3788 char *tailptr;
3789 long lchecktime;
3790 // Please update GetValidArgList() if you edit shortopts
3791 const char *shortopts = "c:l:q:dDi:p:r:Vh?";
3792 #ifdef HAVE_GETOPT_LONG
3793 char *arg;
3794 // Please update GetValidArgList() if you edit longopts
3795 struct option longopts[] = {
3796 { "configfile", required_argument, 0, 'c' },
3797 { "logfacility", required_argument, 0, 'l' },
3798 { "quit", required_argument, 0, 'q' },
3799 { "debug", no_argument, 0, 'd' },
3800 { "showdirectives", no_argument, 0, 'D' },
3801 { "interval", required_argument, 0, 'i' },
3802 { "pidfile", required_argument, 0, 'p' },
3803 { "report", required_argument, 0, 'r' },
3804 #if defined(_WIN32) || defined(__CYGWIN__)
3805 { "service", no_argument, 0, 'S' },
3806 #endif
3807 { "version", no_argument, 0, 'V' },
3808 { "license", no_argument, 0, 'V' },
3809 { "copyright", no_argument, 0, 'V' },
3810 { "help", no_argument, 0, 'h' },
3811 { "usage", no_argument, 0, 'h' },
3812 { 0, 0, 0, 0 }
3813 };
3814 #endif
3815
3816 opterr=optopt=0;
3817 badarg=FALSE;
3818
3819 // Parse input options. This horrible construction is so that emacs
3820 // indents properly. Sorry.
3821 while (-1 != (optchar =
3822 #ifdef HAVE_GETOPT_LONG
3823 getopt_long(argc, argv, shortopts, longopts, NULL)
3824 #else
3825 getopt(argc, argv, shortopts)
3826 #endif
3827 )) {
3828
3829 switch(optchar) {
3830 case 'q':
3831 // when to quit
3832 if (!(strcmp(optarg,"nodev"))) {
3833 quit=0;
3834 } else if (!(strcmp(optarg,"nodevstartup"))) {
3835 quit=1;
3836 } else if (!(strcmp(optarg,"never"))) {
3837 quit=2;
3838 } else if (!(strcmp(optarg,"onecheck"))) {
3839 quit=3;
3840 debugmode=1;
3841 } else if (!(strcmp(optarg,"showtests"))) {
3842 quit=4;
3843 debugmode=1;
3844 } else if (!(strcmp(optarg,"errors"))) {
3845 quit=5;
3846 } else {
3847 badarg = TRUE;
3848 }
3849 break;
3850 case 'l':
3851 // set the log facility level
3852 if (!strcmp(optarg, "daemon"))
3853 facility=LOG_DAEMON;
3854 else if (!strcmp(optarg, "local0"))
3855 facility=LOG_LOCAL0;
3856 else if (!strcmp(optarg, "local1"))
3857 facility=LOG_LOCAL1;
3858 else if (!strcmp(optarg, "local2"))
3859 facility=LOG_LOCAL2;
3860 else if (!strcmp(optarg, "local3"))
3861 facility=LOG_LOCAL3;
3862 else if (!strcmp(optarg, "local4"))
3863 facility=LOG_LOCAL4;
3864 else if (!strcmp(optarg, "local5"))
3865 facility=LOG_LOCAL5;
3866 else if (!strcmp(optarg, "local6"))
3867 facility=LOG_LOCAL6;
3868 else if (!strcmp(optarg, "local7"))
3869 facility=LOG_LOCAL7;
3870 else
3871 badarg = TRUE;
3872 break;
3873 case 'd':
3874 // enable debug mode
3875 debugmode = TRUE;
3876 break;
3877 case 'D':
3878 // print summary of all valid directives
3879 debugmode = TRUE;
3880 Directives();
3881 EXIT(0);
3882 break;
3883 case 'i':
3884 // Period (time interval) for checking
3885 // strtol will set errno in the event of overflow, so we'll check it.
3886 errno = 0;
3887 lchecktime = strtol(optarg, &tailptr, 10);
3888 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
3889 debugmode=1;
3890 PrintHead();
3891 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
3892 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
3893 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3894 EXIT(EXIT_BADCMD);
3895 }
3896 checktime = (int)lchecktime;
3897 break;
3898 case 'r':
3899 // report IOCTL transactions
3900 {
3901 int i;
3902 char *s;
3903
3904 // split_report_arg() may modify its first argument string, so use a
3905 // copy of optarg in case we want optarg for an error message.
3906 if (!(s = strdup(optarg))) {
3907 PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
3908 EXIT(EXIT_NOMEM);
3909 }
3910 if (split_report_arg(s, &i)) {
3911 badarg = TRUE;
3912 } else if (i<1 || i>3) {
3913 debugmode=1;
3914 PrintHead();
3915 PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
3916 PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
3917 EXIT(EXIT_BADCMD);
3918 } else if (!strcmp(s,"ioctl")) {
3919 con->reportataioctl = con->reportscsiioctl = i;
3920 } else if (!strcmp(s,"ataioctl")) {
3921 con->reportataioctl = i;
3922 } else if (!strcmp(s,"scsiioctl")) {
3923 con->reportscsiioctl = i;
3924 } else {
3925 badarg = TRUE;
3926 }
3927 s=CheckFree(s, __LINE__,filenameandversion);
3928 }
3929 break;
3930 case 'c':
3931 // alternate configuration file
3932 if (strcmp(optarg,"-"))
3933 configfile=configfile_alt=CustomStrDup(optarg, 1, __LINE__,filenameandversion);
3934 else // read from stdin
3935 configfile=configfile_stdin;
3936 break;
3937 case 'p':
3938 // output file with PID number
3939 pid_file=CustomStrDup(optarg, 1, __LINE__,filenameandversion);
3940 break;
3941 #if defined(_WIN32) || defined(__CYGWIN__)
3942 case 'S':
3943 // running as service
3944 #ifdef __CYGWIN__ // On Windows, option is already handled by daemon_main(), so ignore it
3945 is_service = 1;
3946 #endif
3947 break;
3948 #endif // _WIN32 || __CYGWIN__
3949 case 'V':
3950 // print version and CVS info
3951 PrintCopyleft();
3952 EXIT(0);
3953 break;
3954 case 'h':
3955 // help: print summary of command-line options
3956 debugmode=1;
3957 PrintHead();
3958 Usage();
3959 EXIT(0);
3960 break;
3961 case '?':
3962 default:
3963 // unrecognized option
3964 debugmode=1;
3965 PrintHead();
3966 #ifdef HAVE_GETOPT_LONG
3967 // Point arg to the argument in which this option was found.
3968 arg = argv[optind-1];
3969 // Check whether the option is a long option that doesn't map to -h.
3970 if (arg[1] == '-' && optchar != 'h') {
3971 // Iff optopt holds a valid option then argument must be missing.
3972 if (optopt && (strchr(shortopts, optopt) != NULL)) {
3973 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
3974 PrintValidArgs(optopt);
3975 } else {
3976 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
3977 }
3978 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
3979 EXIT(EXIT_BADCMD);
3980 }
3981 #endif
3982 if (optopt) {
3983 // Iff optopt holds a valid option then argument must be missing.
3984 if (strchr(shortopts, optopt) != NULL){
3985 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
3986 PrintValidArgs(optopt);
3987 } else {
3988 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
3989 }
3990 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
3991 EXIT(EXIT_BADCMD);
3992 }
3993 Usage();
3994 EXIT(0);
3995 }
3996
3997 // Check to see if option had an unrecognized or incorrect argument.
3998 if (badarg) {
3999 debugmode=1;
4000 PrintHead();
4001 // It would be nice to print the actual option name given by the user
4002 // here, but we just print the short form. Please fix this if you know
4003 // a clean way to do it.
4004 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4005 PrintValidArgs(optchar);
4006 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4007 EXIT(EXIT_BADCMD);
4008 }
4009 }
4010
4011 // non-option arguments are not allowed
4012 if (argc > optind) {
4013 debugmode=1;
4014 PrintHead();
4015 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4016 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4017 EXIT(EXIT_BADCMD);
4018 }
4019
4020 // no pidfile in debug mode
4021 if (debugmode && pid_file) {
4022 debugmode=1;
4023 PrintHead();
4024 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4025 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file);
4026 pid_file=FreeNonZero(pid_file, -1,__LINE__,filenameandversion);
4027 EXIT(EXIT_BADCMD);
4028 }
4029
4030 // print header
4031 PrintHead();
4032
4033 return;
4034 }
4035
4036 // Function we call if no configuration file was found or if the
4037 // SCANDIRECTIVE Directive was found. It makes entries for device
4038 // names returned by make_device_names() in os_OSNAME.c
4039 int MakeConfigEntries(const char *type, int start){
4040 int i;
4041 int num;
4042 char** devlist = NULL;
4043 cfgfile *first=cfgentries[0],*cfg=first;
4044
4045 // Hack! This is to make DEVICESCAN work on Linux libata devices.
4046 // This will work on a general OS if the way that SAT devices are
4047 // named is the same as SCSI devices.
4048 // The BETTER solution is to modify make_device_names to recognize
4049 // the additional type "SAT". This requires changing os_*.cpp.
4050
4051 const char *basetype = type;
4052 if (!strcmp(type,"SAT") )
4053 basetype = "SCSI";
4054
4055 // make list of devices
4056 if ((num=make_device_names(&devlist,basetype))<0)
4057 PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4058
4059 // if no devices, or error constructing list, return
4060 if (num<=0)
4061 return 0;
4062
4063 // loop over entries to create
4064 for (i=0; i<num; i++){
4065
4066 // make storage and copy for all but first entry
4067 if (start+i) {
4068 // allocate more storage if needed
4069 while (cfgentries_max<=start+i)
4070 cfgentries=AllocateMoreSpace(cfgentries, &cfgentries_max, "simulated configuration file device");
4071 cfg=cfgentries[start+i]=CreateConfigEntry(first);
4072 }
4073
4074 // ATA or SCSI?
4075 if (!strcmp(type,"ATA") )
4076 cfg->controller_type = CONTROLLER_ATA;
4077 if (!strcmp(type,"SCSI") )
4078 cfg->controller_type = CONTROLLER_SCSI;
4079 if (!strcmp(type,"SAT") )
4080 cfg->controller_type = CONTROLLER_SAT;
4081
4082 // remove device name, if it's there, and put in correct one
4083 cfg->name=FreeNonZero(cfg->name, -1,__LINE__,filenameandversion);
4084 // save pointer to the device name created within
4085 // make_device_names
4086 cfg->name=devlist[i];
4087 }
4088
4089 // If needed, free memory used for devlist: pointers now in
4090 // cfgentries[]->names. If num==0 we never get to this point, but
4091 // that's OK. If we realloc()d the array length in
4092 // make_device_names() that was ALREADY equivalent to calling
4093 // free().
4094 devlist = FreeNonZero(devlist,(sizeof (char*) * num),__LINE__, filenameandversion);
4095
4096 return num;
4097 }
4098
4099 void CanNotRegister(char *name, char *type, int line, int scandirective){
4100 if( !debugmode && scandirective == 1 ) { return; }
4101 if (line)
4102 PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4103 "Unable to register %s device %s at line %d of file %s\n",
4104 type, name, line, configfile);
4105 else
4106 PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4107 type, name);
4108 return;
4109 }
4110
4111 // Returns negative value (see ParseConfigFile()) if config file
4112 // had errors, else number of entries which may be zero or positive.
4113 // If we found no configuration file, or it contained SCANDIRECTIVE,
4114 // then *scanning is set to 1, else 0.
4115 int ReadOrMakeConfigEntries(int *scanning){
4116 int entries;
4117
4118 // deallocate any cfgfile data structures in memory
4119 RmAllConfigEntries();
4120
4121 // parse configuration file configfile (normally /etc/smartd.conf)
4122 if ((entries=ParseConfigFile())<0) {
4123
4124 // There was an error reading the configuration file.
4125 RmAllConfigEntries();
4126 if (entries == -1)
4127 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4128 return entries;
4129 }
4130
4131 // did we find entries or scan?
4132 *scanning=0;
4133
4134 // no error parsing config file.
4135 if (entries) {
4136 // we did not find a SCANDIRECTIVE and did find valid entries
4137 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4138 }
4139 else if (cfgentries && cfgentries[0]) {
4140 // we found a SCANDIRECTIVE or there was no configuration file so
4141 // scan. Configuration file's first entry contains all options
4142 // that were set
4143 cfgfile *first=cfgentries[0];
4144
4145 // By default scan for ATA, SCSI and SAT devices
4146 int doata=1, doscsi=1, dosat=1;
4147
4148 if (first->controller_type==CONTROLLER_SCSI) {
4149 doata = 0;
4150 dosat = 0;
4151 } else if (first->controller_type==CONTROLLER_ATA) {
4152 doscsi = 0;
4153 dosat = 0;
4154 } else if (first->controller_type==CONTROLLER_SAT) {
4155 doata = 0;
4156 doscsi = 0;
4157 }
4158
4159 *scanning=1;
4160
4161 if (first->lineno)
4162 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4163 else
4164 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4165
4166 // make config list of ATA devices to search for
4167 if (doata)
4168 entries+=MakeConfigEntries("ATA", entries);
4169 // make config list of SCSI devices to search for
4170 if (doscsi)
4171 entries+=MakeConfigEntries("SCSI", entries);
4172 if (dosat)
4173 entries+=MakeConfigEntries("SAT", entries);
4174
4175 // warn user if scan table found no devices
4176 if (!entries) {
4177 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4178 // get rid of fake entry with SCANDIRECTIVE as name
4179 RmConfigEntry(cfgentries, __LINE__);
4180 }
4181 }
4182 else
4183 PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4184
4185 return entries;
4186 }
4187
4188
4189 // This function tries devices from cfgentries. Each one that can be
4190 // registered is moved onto the [ata|scsi]devices lists and removed
4191 // from the cfgentries list, else it's memory is deallocated.
4192 void RegisterDevices(int scanning){
4193 int i;
4194
4195 // start by clearing lists/memory of ALL existing devices
4196 RmAllDevEntries();
4197 numdevata=numdevscsi=0;
4198
4199 // Register entries
4200 for (i=0; i<cfgentries_max ; i++){
4201
4202 cfgfile *ent=cfgentries[i];
4203
4204 // skip any NULL entries (holes)
4205 if (!ent)
4206 continue;
4207
4208 // register ATA devices
4209 if (ent->controller_type!=CONTROLLER_SCSI && ent->controller_type!=CONTROLLER_CCISS){
4210 if (ATADeviceScan(ent, scanning))
4211 CanNotRegister(ent->name, "ATA", ent->lineno, scanning);
4212 else {
4213 // move onto the list of ata devices
4214 cfgentries[i]=NULL;
4215 while (numdevata>=atadevlist_max)
4216 atadevlist=AllocateMoreSpace(atadevlist, &atadevlist_max, "ATA device");
4217 atadevlist[numdevata++]=ent;
4218 }
4219 }
4220
4221 // then register SCSI devices
4222 if (ent->controller_type==CONTROLLER_SCSI || ent->controller_type==CONTROLLER_CCISS ||
4223 ent->controller_type==CONTROLLER_UNKNOWN){
4224 int retscsi=0;
4225
4226 #if SCSITIMEOUT
4227 struct sigaction alarmAction, defaultaction;
4228
4229 // Set up an alarm handler to catch USB devices that hang on
4230 // SCSI scanning...
4231 alarmAction.sa_handler= AlarmHandler;
4232 alarmAction.sa_flags = SA_RESTART;
4233 if (sigaction(SIGALRM, &alarmAction, &defaultaction)) {
4234 // if we can't set timeout, just scan device
4235 PrintOut(LOG_CRIT, "Unable to initialize SCSI timeout mechanism.\n");
4236 retscsi=SCSIDeviceScan(ent, scanning);
4237 }
4238 else {
4239 // prepare return point in case of bad SCSI device
4240 if (setjmp(registerscsienv))
4241 // SCSI device timed out!
4242 retscsi=-1;
4243 else {
4244 // Set alarm, make SCSI call, reset alarm
4245 alarm(SCSITIMEOUT);
4246 retscsi=SCSIDeviceScan(ent, scanning);
4247 alarm(0);
4248 }
4249 if (sigaction(SIGALRM, &defaultaction, NULL)){
4250 PrintOut(LOG_CRIT, "Unable to clear SCSI timeout mechanism.\n");
4251 }
4252 }
4253 #else
4254 retscsi=SCSIDeviceScan(ent, scanning);
4255 #endif
4256
4257 // Now scan SCSI device...
4258 if (retscsi){
4259 if (retscsi<0)
4260 PrintOut(LOG_CRIT, "Device %s timed out (poorly-implemented USB device?)\n", ent->name);
4261 CanNotRegister(ent->name, "SCSI", ent->lineno, scanning);
4262 }
4263 else {
4264 // move onto the list of scsi devices
4265 cfgentries[i]=NULL;
4266 while (numdevscsi>=scsidevlist_max)
4267 scsidevlist=AllocateMoreSpace(scsidevlist, &scsidevlist_max, "SCSI device");
4268 scsidevlist[numdevscsi++]=ent;
4269 }
4270 }
4271
4272 // if device is explictly listed and we can't register it, then
4273 // exit unless the user has specified that the device is removable
4274 if (cfgentries[i] && !scanning){
4275 if (ent->removable || quit==2)
4276 PrintOut(LOG_INFO, "Device %s not available\n", ent->name);
4277 else {
4278 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", ent->name);
4279 EXIT(EXIT_BADDEV);
4280 }
4281 }
4282
4283 // free up memory if device could not be registered
4284 RmConfigEntry(cfgentries+i, __LINE__);
4285 }
4286
4287 return;
4288 }
4289
4290
4291 #ifndef _WIN32
4292 // Main function
4293 int main(int argc, char **argv)
4294 #else
4295 // Windows: internal main function started direct or by service control manager
4296 static int smartd_main(int argc, char **argv)
4297 #endif
4298 {
4299 // external control variables for ATA disks
4300 smartmonctrl control;
4301
4302 // is it our first pass through?
4303 int firstpass=1;
4304
4305 // next time to wake up
4306 time_t wakeuptime;
4307
4308 // for simplicity, null all global communications variables/lists
4309 con=&control;
4310 memset(con, 0,sizeof(control));
4311
4312 // parse input and print header and usage info if needed
4313 ParseOpts(argc,argv);
4314
4315 // do we mute printing from ataprint commands?
4316 con->printing_switchable=0;
4317 con->dont_print=debugmode?0:1;
4318
4319 // don't exit on bad checksums
4320 con->checksumfail=0;
4321
4322 // the main loop of the code
4323 while (1){
4324
4325 // are we exiting from a signal?
4326 if (caughtsigEXIT) {
4327 // are we exiting with SIGTERM?
4328 int isterm=(caughtsigEXIT==SIGTERM);
4329 int isquit=(caughtsigEXIT==SIGQUIT);
4330 int isok=debugmode?isterm || isquit:isterm;
4331
4332 PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4333 caughtsigEXIT, strsignal(caughtsigEXIT));
4334
4335 EXIT(isok?0:EXIT_SIGNAL);
4336 }
4337
4338 // Should we (re)read the config file?
4339 if (firstpass || caughtsigHUP){
4340 int entries, scanning=0;
4341
4342 if (!firstpass) {
4343 #ifdef __CYGWIN__
4344 // Workaround for missing SIGQUIT via keyboard on Cygwin
4345 if (caughtsigHUP==2) {
4346 // Simulate SIGQUIT if another SIGINT arrives soon
4347 caughtsigHUP=0;
4348 sleep(1);
4349 if (caughtsigHUP==2) {
4350 caughtsigEXIT=SIGQUIT;
4351 continue;
4352 }
4353 caughtsigHUP=2;
4354 }
4355 #endif
4356 PrintOut(LOG_INFO,
4357 caughtsigHUP==1?
4358 "Signal HUP - rereading configuration file %s\n":
4359 "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4360 configfile);
4361 }
4362
4363 // clears cfgentries, (re)reads config file, makes >=0 entries
4364 entries=ReadOrMakeConfigEntries(&scanning);
4365
4366 if (entries>=0) {
4367 // checks devices, then moves onto ata/scsi list or deallocates.
4368 RegisterDevices(scanning);
4369 }
4370 else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4371 // user has asked to continue on error in configuration file
4372 if (!firstpass)
4373 PrintOut(LOG_INFO,"Reusing previous configuration\n");
4374 }
4375 else {
4376 // exit with configuration file error status
4377 int status = (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4378 EXIT(status);
4379 }
4380
4381 // Log number of devices we are monitoring...
4382 if (numdevata+numdevscsi || quit==2 || (quit==1 && !firstpass))
4383 PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4384 numdevata, numdevscsi);
4385 else {
4386 PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4387 EXIT(EXIT_NODEV);
4388 }
4389
4390 if (quit==4) {
4391 // user has asked to print test schedule
4392 PrintTestSchedule(atadevlist, scsidevlist);
4393 EXIT(0);
4394 }
4395
4396 // reset signal
4397 caughtsigHUP=0;
4398 }
4399
4400 // check all devices once
4401 CheckDevicesOnce(atadevlist, scsidevlist);
4402
4403 // user has asked us to exit after first check
4404 if (quit==3) {
4405 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4406 "smartd is exiting (exit status 0)\n");
4407 EXIT(0);
4408 }
4409
4410 // fork into background if needed
4411 if (firstpass && !debugmode) {
4412 #ifdef __CYGWIN__
4413 if (!is_service) // don't fork() if running as service via cygrunsrv
4414 #endif
4415 DaemonInit();
4416 }
4417
4418 // set exit and signal handlers, write PID file, set wake-up time
4419 if (firstpass){
4420 Initialize(&wakeuptime);
4421 firstpass=0;
4422 }
4423
4424 // sleep until next check time, or a signal arrives
4425 wakeuptime=dosleep(wakeuptime);
4426 }
4427 }
4428
4429
4430 #ifdef _WIN32
4431 // Main function for Windows
4432 int main(int argc, char **argv){
4433 // Options for smartd windows service
4434 static const daemon_winsvc_options svc_opts = {
4435 "--service", // cmd_opt
4436 "smartd", "SmartD Service", // servicename, displayname
4437 // description
4438 "Controls and monitors storage devices using the Self-Monitoring, "
4439 "Analysis and Reporting Technology System (S.M.A.R.T.) "
4440 "built into ATA and SCSI Hard Drives. "
4441 PACKAGE_HOMEPAGE
4442 };
4443 // daemon_main() handles daemon and service specific commands
4444 // and starts smartd_main() direct, from a new process,
4445 // or via service control manager
4446 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4447 }
4448 #endif