]> git.proxmox.com Git - pve-common.git/blob - data/PVE/Daemon.pm
Daemon: keep lock file open during restart
[pve-common.git] / data / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * correctly daemonize (redirect STDIN/STDOUT)
9 # * restart by stop/start, exec, or signal HUP
10 # * daemon restart on error (option 'restart_on_error')
11 # * handle worker processes (option 'max_workers')
12
13 use strict;
14 use warnings;
15 use PVE::SafeSyslog;
16 use PVE::INotify;
17
18 use POSIX ":sys_wait_h";
19 use Fcntl ':flock';
20 use Getopt::Long;
21 use Time::HiRes qw (gettimeofday);
22
23 use base qw(PVE::CLIHandler);
24
25 $SIG{'__WARN__'} = sub {
26 my $err = $@;
27 my $t = $_[0];
28 chomp $t;
29 print "$t\n";
30 syslog('warning', "WARNING: %s", $t);
31 $@ = $err;
32 };
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37
38 my $close_daemon_lock = sub {
39 my ($self) = @_;
40
41 return if !$self->{daemon_lock_fh};
42
43 close $self->{daemon_lock_fh};
44 delete $self->{daemon_lock_fh};
45 };
46
47 # call this if you fork() from child
48 # Note: we already call this for workers, so it is only required
49 # if you fork inside a simple daemon (max_workers == 0).
50 sub after_fork_cleanup {
51 my ($self) = @_;
52
53 &$close_daemon_lock($self);
54
55 PVE::INotify::inotify_close();
56
57 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
58 $SIG{$sig} = 'DEFAULT'; # restore default handler
59 # AnyEvent signals only works if $SIG{XX} is
60 # undefined (perl event loop)
61 delete $SIG{$sig}; # so that we can handle events with AnyEvent
62 }
63 }
64
65 my $lockpidfile = sub {
66 my ($self) = @_;
67
68 my $lkfn = $self->{pidfile} . ".lock";
69
70 if (my $fd = $self->{env_pve_lock_fd}) {
71
72 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
73
74 } else {
75
76 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
77 }
78
79 if (!$self->{daemon_lock_fh}) {
80 die "can't open lock '$lkfn' - $!\n";
81 }
82
83 for (my $i = 0; $i < 5; $i ++) {
84 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
85 sleep(1);
86 }
87
88 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
89 &$close_daemon_lock($self);
90 die "can't aquire lock '$lkfn' - $!\n";
91 }
92 };
93
94 my $writepidfile = sub {
95 my ($self) = @_;
96
97 my $pidfile = $self->{pidfile};
98
99 if (!open (PIDFH, ">$pidfile")) {
100 my $msg = "can't open pid file '$pidfile' - $!";
101 syslog ('err', $msg);
102 die "ERROR: $msg\n";
103 }
104 print PIDFH "$$\n";
105 close (PIDFH);
106 };
107
108 my $server_cleanup = sub {
109 my ($self) = @_;
110
111 unlink $self->{pidfile} . ".lock";
112 unlink $self->{pidfile};
113 };
114
115 my $finish_workers = sub {
116 my ($self) = @_;
117
118 foreach my $cpid (keys %{$self->{workers}}) {
119 my $waitpid = waitpid($cpid, WNOHANG);
120 if (defined($waitpid) && ($waitpid == $cpid)) {
121 delete ($self->{workers}->{$cpid});
122 syslog('info', "worker $cpid finished");
123 }
124 }
125 };
126
127 my $start_workers = sub {
128 my ($self) = @_;
129
130 return if $self->{terminate};
131
132 my $count = 0;
133 foreach my $cpid (keys %{$self->{workers}}) {
134 $count++;
135 }
136
137 my $need = $self->{max_workers} - $count;
138
139 return if $need <= 0;
140
141 syslog('info', "starting $need worker(s)");
142
143 while ($need > 0) {
144 my $pid = fork;
145
146 if (!defined ($pid)) {
147 syslog('err', "can't fork worker");
148 sleep (1);
149 } elsif ($pid) { # parent
150 $self->{workers}->{$pid} = 1;
151 syslog('info', "worker $pid started");
152 $need--;
153 } else {
154 $0 = "$self->{name} worker";
155
156 $self->after_fork_cleanup();
157
158 eval { $self->run(); };
159 if (my $err = $@) {
160 syslog('err', $err);
161 sleep(5); # avoid fast restarts
162 }
163
164 syslog('info', "worker exit");
165 exit (0);
166 }
167 }
168 };
169
170 my $terminate_server = sub {
171 my ($self) = @_;
172
173 $self->{terminate} = 1; # set flag to avoid worker restart
174
175 if (!$self->{max_workers}) {
176 eval { $self->shutdown(); };
177 warn $@ if $@;
178 return;
179 }
180
181 eval { $self->shutdown(); };
182 warn $@ if $@;
183
184 # we have workers - terminate them
185
186 foreach my $cpid (keys %{$self->{workers}}) {
187 kill(15, $cpid); # TERM childs
188 }
189
190 # nicely shutdown childs (give them max 10 seconds to shut down)
191 my $previous_alarm = alarm(10);
192 eval {
193 local $SIG{ALRM} = sub { die "timeout\n" };
194
195 while ((my $pid = waitpid (-1, 0)) > 0) {
196 if (defined($self->{workers}->{$pid})) {
197 delete($self->{workers}->{$pid});
198 syslog('info', "worker $pid finished");
199 }
200 }
201 alarm(0); # avoid race condition
202 };
203 my $err = $@;
204
205 alarm ($previous_alarm);
206
207 if ($err) {
208 syslog('err', "error stopping workers (will kill them now) - $err");
209 foreach my $cpid (keys %{$self->{workers}}) {
210 # KILL childs still alive!
211 if (kill (0, $cpid)) {
212 delete($self->{workers}->{$cpid});
213 syslog("err", "kill worker $cpid");
214 kill(9, $cpid);
215 # fixme: waitpid?
216 }
217 }
218 }
219 };
220
221 my $server_run = sub {
222 my ($self, $debug) = @_;
223
224 # fixme: handle restart lockfd
225 &$lockpidfile($self);
226
227 # remove FD_CLOEXEC bit to reuse on exec
228 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
229
230 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
231
232 # my $fd = POSIX::dup($self->{daemon_lock_fh}) ||
233 # die "unable to duplicate daemon_lock_fh\n";
234
235
236 # run in background
237 my $spid;
238
239 $self->{debug} = 1 if $debug;
240
241 $self->init();
242
243 if (!$debug) {
244 open STDIN, '</dev/null' || die "can't read /dev/null";
245 open STDOUT, '>/dev/null' || die "can't write /dev/null";
246 }
247
248 if (!$self->{env_restart_pve_daemon} && !$debug) {
249 PVE::INotify::inotify_close();
250 $spid = fork();
251 if (!defined ($spid)) {
252 my $msg = "can't put server into background - fork failed";
253 syslog('err', $msg);
254 die "ERROR: $msg\n";
255 } elsif ($spid) { # parent
256 exit (0);
257 }
258 PVE::INotify::inotify_init();
259 }
260
261 &$writepidfile($self);
262
263 POSIX::setsid();
264
265 if ($self->{env_restart_pve_daemon}) {
266 syslog('info' , "restarting server");
267 } else {
268 syslog('info' , "starting server");
269 }
270
271 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
272
273 my $old_sig_term = $SIG{TERM};
274 local $SIG{TERM} = sub {
275 local ($@, $!, $?); # do not overwrite error vars
276 syslog('info', "received signal TERM");
277 &$terminate_server($self);
278 &$server_cleanup($self);
279 &$old_sig_term(@_) if $old_sig_term;
280 };
281
282 my $old_sig_quit = $SIG{QUIT};
283 local $SIG{QUIT} = sub {
284 local ($@, $!, $?); # do not overwrite error vars
285 syslog('info', "received signal QUIT");
286 &$terminate_server($self);
287 &$server_cleanup($self);
288 &$old_sig_quit(@_) if $old_sig_quit;
289 };
290
291 my $old_sig_int = $SIG{INT};
292 local $SIG{INT} = sub {
293 local ($@, $!, $?); # do not overwrite error vars
294 syslog('info', "received signal INT");
295 $SIG{INT} = 'DEFAULT'; # allow to terminate now
296 &$terminate_server($self);
297 &$server_cleanup($self);
298 &$old_sig_int(@_) if $old_sig_int;
299 };
300
301 $SIG{HUP} = sub {
302 local ($@, $!, $?); # do not overwrite error vars
303 syslog('info', "received signal HUP");
304 if ($self->{max_workers}) {
305 &$terminate_server($self);
306 $self->{got_hup_signal} = 1;
307 } elsif ($self->can('hup')) {
308 eval { $self->hup() };
309 warn $@ if $@;
310 }
311 };
312
313 eval {
314 if ($self->{max_workers}) {
315 my $old_sig_chld = $SIG{CHLD};
316 local $SIG{CHLD} = sub {
317 local ($@, $!, $?); # do not overwrite error vars
318 &$finish_workers($self);
319 &$old_sig_chld(@_) if $old_sig_chld;
320 };
321
322 for (;;) { # forever
323 &$start_workers($self);
324 sleep(5);
325 &$finish_workers($self);
326 last if $self->{terminate};
327 }
328
329 } else {
330 $self->run();
331 }
332 };
333 my $err = $@;
334
335 if ($err) {
336 syslog ('err', "ERROR: $err");
337
338 # fixme: kill all workers
339
340 if (my $wait_time = $self->{restart_on_error}) {
341 $self->restart_daemon($wait_time);
342 } else {
343 $self->exit_daemon(-1);
344 }
345 }
346
347 if ($self->{got_hup_signal}) {
348 $self->restart_daemon();
349 } else {
350 $self->exit_daemon(0);
351 }
352 };
353
354 sub new {
355 my ($this, $name, $cmdline, %params) = @_;
356
357 die "missing name" if !$name;
358
359 initlog($name);
360
361 my $restart = $ENV{RESTART_PVE_DAEMON};
362 delete $ENV{RESTART_PVE_DAEMON};
363
364 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
365 delete $ENV{PVE_DAEMON_LOCK_FD};
366
367 die "please run as root\n" if !$restart && ($> != 0);
368
369 die "can't create more that one PVE::Daemon" if $daemon_initialized;
370 $daemon_initialized = 1;
371
372 PVE::INotify::inotify_init();
373
374 my $class = ref($this) || $this;
375
376 my $self = bless {
377 name => $name,
378 run_dir => '/var/run',
379 env_restart_pve_daemon => $restart,
380 env_pve_lock_fd => $lockfd,
381 workers => {},
382 }, $class;
383
384 foreach my $opt (keys %params) {
385 my $value = $params{$opt};
386 if ($opt eq 'restart_on_error') {
387 $self->{$opt} = $value;
388 } elsif ($opt eq 'stop_wait_time') {
389 $self->{$opt} = $value;
390 } elsif ($opt eq 'run_dir') {
391 $self->{$opt} = $value;
392 } elsif ($opt eq 'max_workers') {
393 $self->{$opt} = $value;
394 } else {
395 die "unknown option '$opt'";
396 }
397 }
398
399 $self->{pidfile} = "$self->{run_dir}/${name}.pid";
400
401 $self->{nodename} = PVE::INotify::nodename();
402
403 $self->{cmdline} = [];
404
405 foreach my $el (@$cmdline) {
406 $el =~ m/^(.*)$/; # untaint
407 push @{$self->{cmdline}}, $1;
408 }
409
410 $0 = $name;
411
412 return $self;
413 }
414
415 sub exit_daemon {
416 my ($self, $status) = @_;
417
418 syslog("info", "server stopped");
419
420 &$server_cleanup($self);
421
422 exit($status);
423 }
424
425 sub restart_daemon {
426 my ($self, $waittime) = @_;
427
428 syslog('info', "server shutdown (restart)");
429
430 $ENV{RESTART_PVE_DAEMON} = 1;
431
432 sleep($waittime) if $waittime; # avoid high server load due to restarts
433
434 PVE::INotify::inotify_close();
435
436 exec (@{$self->{cmdline}});
437
438 exit (-1); # never reached?
439 }
440
441 # please overwrite in subclass
442 # this is called at startup - before forking
443 sub init {
444 my ($self) = @_;
445
446 }
447
448 # please overwrite in subclass
449 sub shutdown {
450 my ($self) = @_;
451
452 syslog('info' , "server closing");
453
454 if (!$self->{max_workers}) {
455 # wait for children
456 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
457 }
458 }
459
460 # please define in subclass
461 #sub hup {
462 # my ($self) = @_;
463 #
464 # syslog('info' , "received signal HUP (restart)");
465 #}
466
467 # please overwrite in subclass
468 sub run {
469 my ($self) = @_;
470
471 for (;;) { # forever
472 syslog('info' , "server is running");
473 sleep(5);
474 }
475 }
476
477 sub start {
478 my ($self, $debug) = @_;
479
480 eval { &$server_run($self, $debug); };
481 if (my $err = $@) {
482 syslog('err', "start failed - $err");
483 }
484 }
485
486 my $read_pid = sub {
487 my ($self) = @_;
488
489 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
490
491 return 0 if !$pid_str;
492
493 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
494
495 my $pid = int($1);
496
497 return $pid;
498 };
499
500 sub running {
501 my ($self) = @_;
502
503 my $pid = &$read_pid($self);
504
505 if ($pid) {
506 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
507 return wantarray ? ($res, $pid) : $res;
508 }
509
510 return wantarray ? (0, 0) : 0;
511 }
512
513 sub stop {
514 my ($self) = @_;
515
516 my $pid = &$read_pid($self);
517
518 return if !$pid;
519
520 if (PVE::ProcFSTools::check_process_running($pid)) {
521 kill(15, $pid); # send TERM signal
522 # give some time
523 my $wait_time = $self->{stop_wait_time} || 5;
524 my $running = 1;
525 for (my $i = 0; $i < $wait_time; $i++) {
526 $running = PVE::ProcFSTools::check_process_running($pid);
527 last if !$running;
528 sleep (1);
529 }
530
531 syslog('err', "server still running - send KILL") if $running;
532
533 # to be sure
534 kill(9, $pid);
535 waitpid($pid, 0);
536 }
537
538 if (-f $self->{pidfile}) {
539 eval {
540 # try to get the lock
541 &$lockpidfile($self);
542 &$server_cleanup($self);
543 };
544 if (my $err = $@) {
545 syslog('err', "cleanup failed - $err");
546 }
547 }
548 }
549
550 sub register_start_command {
551 my ($self, $class, $description) = @_;
552
553 $class->register_method({
554 name => 'start',
555 path => 'start',
556 method => 'POST',
557 description => $description || "Start the daemon.",
558 parameters => {
559 additionalProperties => 0,
560 properties => {
561 debug => {
562 description => "Debug mode - stay in foreground",
563 type => "boolean",
564 optional => 1,
565 default => 0,
566 },
567 },
568 },
569 returns => { type => 'null' },
570
571 code => sub {
572 my ($param) = @_;
573
574 $self->start($param->{debug});
575
576 return undef;
577 }});
578 }
579
580 my $reload_daemon = sub {
581 my ($self, $use_hup) = @_;
582
583 if ($self->{env_restart_pve_daemon}) {
584 $self->start();
585 } else {
586 my ($running, $pid) = $self->running();
587 if (!$running) {
588 $self->start();
589 } else {
590 if ($use_hup) {
591 syslog('info', "send HUP to $pid");
592 kill 1, $pid;
593 } else {
594 $self->stop();
595 $self->start();
596 }
597 }
598 }
599 };
600
601 sub register_restart_command {
602 my ($self, $class, $use_hup, $description) = @_;
603
604 $class->register_method({
605 name => 'restart',
606 path => 'restart',
607 method => 'POST',
608 description => $description || "Restart the daemon (or start if not running).",
609 parameters => {
610 additionalProperties => 0,
611 properties => {},
612 },
613 returns => { type => 'null' },
614
615 code => sub {
616 my ($param) = @_;
617
618 &$reload_daemon($self, $use_hup);
619
620 return undef;
621 }});
622 }
623
624 sub register_reload_command {
625 my ($self, $class, $description) = @_;
626
627 $class->register_method({
628 name => 'reload',
629 path => 'reload',
630 method => 'POST',
631 description => $description || "Reload daemon configuration (or start if not running).",
632 parameters => {
633 additionalProperties => 0,
634 properties => {},
635 },
636 returns => { type => 'null' },
637
638 code => sub {
639 my ($param) = @_;
640
641 &$reload_daemon($self, 1);
642
643 return undef;
644 }});
645 }
646
647 sub register_stop_command {
648 my ($self, $class, $description) = @_;
649
650 $class->register_method({
651 name => 'stop',
652 path => 'stop',
653 method => 'POST',
654 description => $description || "Stop the daemon.",
655 parameters => {
656 additionalProperties => 0,
657 properties => {},
658 },
659 returns => { type => 'null' },
660
661 code => sub {
662 my ($param) = @_;
663
664 $self->stop();
665
666 return undef;
667 }});
668 }
669
670 sub register_status_command {
671 my ($self, $class, $description) = @_;
672
673 $class->register_method({
674 name => 'status',
675 path => 'status',
676 method => 'GET',
677 description => "Get daemon status.",
678 parameters => {
679 additionalProperties => 0,
680 properties => {},
681 },
682 returns => {
683 type => 'string',
684 enum => ['stopped', 'running'],
685 },
686 code => sub {
687 my ($param) = @_;
688
689 return $self->running() ? 'running' : 'stopped';
690 }});
691 }
692
693 1;
694