]> git.proxmox.com Git - pve-common.git/blame - data/PVE/Daemon.pm
Daemon: catch finished worker earlier
[pve-common.git] / data / PVE / Daemon.pm
CommitLineData
390802ab
DM
1package PVE::Daemon;
2
3# Abstract class to implement Daemons
4#
5# Features:
6# * lock and write PID file /var/run/$name.pid to make sure onyl
7# one instance is running.
d7950851 8# * keep lock open during restart
390802ab 9# * correctly daemonize (redirect STDIN/STDOUT)
4fc69176
DM
10# * restart by stop/start, exec, or signal HUP
11# * daemon restart on error (option 'restart_on_error')
b2132054 12# * handle worker processes (option 'max_workers')
d7950851
DM
13# * allow to restart while workers are still runningl
14# (option 'leave_children_open_on_reload')
15
390802ab
DM
16use strict;
17use warnings;
18use PVE::SafeSyslog;
19use PVE::INotify;
20
21use POSIX ":sys_wait_h";
22use Fcntl ':flock';
23use Getopt::Long;
24use Time::HiRes qw (gettimeofday);
25
26use base qw(PVE::CLIHandler);
27
390802ab
DM
28$ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
29
30my $daemon_initialized = 0; # we only allow one instance
31
3206cdf6
DM
32my $close_daemon_lock = sub {
33 my ($self) = @_;
34
eead1ad8
DM
35 return if !$self->{daemon_lock_fh};
36
3206cdf6
DM
37 close $self->{daemon_lock_fh};
38 delete $self->{daemon_lock_fh};
39};
40
c5611195
DM
41my $log_err = sub {
42 my ($msg) = @_;
43 chomp $msg;
44 print STDERR "$msg\n";
45 syslog('err', "%s", $msg);
46};
47
eead1ad8
DM
48# call this if you fork() from child
49# Note: we already call this for workers, so it is only required
50# if you fork inside a simple daemon (max_workers == 0).
51sub after_fork_cleanup {
52 my ($self) = @_;
53
54 &$close_daemon_lock($self);
55
56 PVE::INotify::inotify_close();
57
58 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
59 $SIG{$sig} = 'DEFAULT'; # restore default handler
60 # AnyEvent signals only works if $SIG{XX} is
61 # undefined (perl event loop)
62 delete $SIG{$sig}; # so that we can handle events with AnyEvent
63 }
64}
65
390802ab
DM
66my $lockpidfile = sub {
67 my ($self) = @_;
68
69 my $lkfn = $self->{pidfile} . ".lock";
70
48876459
DM
71 my $waittime = 0;
72
1a6bc2f3
DM
73 if (my $fd = $self->{env_pve_lock_fd}) {
74
75 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
48876459 76
1a6bc2f3
DM
77 } else {
78
48876459 79 $waittime = 5;
1a6bc2f3
DM
80 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
81 }
82
3206cdf6 83 if (!$self->{daemon_lock_fh}) {
1a6bc2f3 84 die "can't open lock '$lkfn' - $!\n";
390802ab
DM
85 }
86
48876459 87 for (my $i = 0; $i < $waittime; $i ++) {
3206cdf6
DM
88 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
89 sleep(1);
90 }
91
92 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
93 &$close_daemon_lock($self);
c5611195
DM
94 my $err = $!;
95
96 my ($running, $pid) = $self->running();
97 if ($running) {
98 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
99 } else {
100 die "can't aquire lock '$lkfn' - $err\n";
101 }
390802ab
DM
102 }
103};
104
105my $writepidfile = sub {
106 my ($self) = @_;
107
108 my $pidfile = $self->{pidfile};
109
c5611195
DM
110 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
111
390802ab
DM
112 print PIDFH "$$\n";
113 close (PIDFH);
114};
115
116my $server_cleanup = sub {
117 my ($self) = @_;
118
119 unlink $self->{pidfile} . ".lock";
120 unlink $self->{pidfile};
121};
122
b2132054
DM
123my $finish_workers = sub {
124 my ($self) = @_;
125
d7950851
DM
126 foreach my $id (qw(workers old_workers)) {
127 foreach my $cpid (keys %{$self->{$id}}) {
128 my $waitpid = waitpid($cpid, WNOHANG);
129 if (defined($waitpid) && ($waitpid == $cpid)) {
130 delete ($self->{$id}->{$cpid});
131 syslog('info', "worker $cpid finished");
132 }
b2132054
DM
133 }
134 }
135};
136
137my $start_workers = sub {
138 my ($self) = @_;
139
140 return if $self->{terminate};
141
142 my $count = 0;
143 foreach my $cpid (keys %{$self->{workers}}) {
144 $count++;
145 }
146
147 my $need = $self->{max_workers} - $count;
148
149 return if $need <= 0;
150
151 syslog('info', "starting $need worker(s)");
152
153 while ($need > 0) {
154 my $pid = fork;
155
156 if (!defined ($pid)) {
157 syslog('err', "can't fork worker");
158 sleep (1);
159 } elsif ($pid) { # parent
160 $self->{workers}->{$pid} = 1;
161 syslog('info', "worker $pid started");
162 $need--;
163 } else {
164 $0 = "$self->{name} worker";
165
eead1ad8 166 $self->after_fork_cleanup();
b2132054
DM
167
168 eval { $self->run(); };
169 if (my $err = $@) {
170 syslog('err', $err);
171 sleep(5); # avoid fast restarts
172 }
173
174 syslog('info', "worker exit");
175 exit (0);
176 }
177 }
178};
179
180my $terminate_server = sub {
181 my ($self) = @_;
182
183 $self->{terminate} = 1; # set flag to avoid worker restart
184
185 if (!$self->{max_workers}) {
186 eval { $self->shutdown(); };
187 warn $@ if $@;
188 return;
189 }
190
191 eval { $self->shutdown(); };
192 warn $@ if $@;
193
f3a33995 194 # we have workers - send TERM signal
b2132054
DM
195
196 foreach my $cpid (keys %{$self->{workers}}) {
197 kill(15, $cpid); # TERM childs
198 }
199
f3a33995 200 # if configured, leave children running on HUP
d7950851
DM
201 return if $self->{got_hup_signal} &&
202 $self->{leave_children_open_on_reload};
203
f3a33995
DM
204 # else, send TERM to old workers
205 foreach my $cpid (keys %{$self->{old_workers}}) {
206 kill(15, $cpid); # TERM childs
207 }
208
b2132054
DM
209 # nicely shutdown childs (give them max 10 seconds to shut down)
210 my $previous_alarm = alarm(10);
211 eval {
212 local $SIG{ALRM} = sub { die "timeout\n" };
213
214 while ((my $pid = waitpid (-1, 0)) > 0) {
f3a33995
DM
215 foreach my $id (qw(workers old_workers)) {
216 if (defined($self->{$id}->{$pid})) {
217 delete($self->{$id}->{$pid});
218 syslog('info', "worker $pid finished");
219 }
b2132054
DM
220 }
221 }
222 alarm(0); # avoid race condition
223 };
224 my $err = $@;
225
226 alarm ($previous_alarm);
227
228 if ($err) {
229 syslog('err', "error stopping workers (will kill them now) - $err");
f3a33995
DM
230 foreach my $id (qw(workers old_workers)) {
231 foreach my $cpid (keys %{$self->{$id}}) {
232 # KILL childs still alive!
233 if (kill (0, $cpid)) {
234 delete($self->{$id}->{$cpid});
235 syslog("err", "kill worker $cpid");
236 kill(9, $cpid);
237 # fixme: waitpid?
238 }
b2132054
DM
239 }
240 }
241 }
242};
243
390802ab
DM
244my $server_run = sub {
245 my ($self, $debug) = @_;
246
1a6bc2f3 247 # fixme: handle restart lockfd
390802ab
DM
248 &$lockpidfile($self);
249
1a6bc2f3
DM
250 # remove FD_CLOEXEC bit to reuse on exec
251 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
252
253 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
254
390802ab
DM
255 # run in background
256 my $spid;
257
390802ab
DM
258 $self->{debug} = 1 if $debug;
259
260 $self->init();
261
262 if (!$debug) {
263 open STDIN, '</dev/null' || die "can't read /dev/null";
264 open STDOUT, '>/dev/null' || die "can't write /dev/null";
265 }
266
a8ba2293 267 if (!$self->{env_restart_pve_daemon} && !$debug) {
390802ab
DM
268 PVE::INotify::inotify_close();
269 $spid = fork();
270 if (!defined ($spid)) {
c5611195 271 die "can't put server into background - fork failed";
390802ab
DM
272 } elsif ($spid) { # parent
273 exit (0);
274 }
275 PVE::INotify::inotify_init();
276 }
277
a8ba2293 278 if ($self->{env_restart_pve_daemon}) {
390802ab
DM
279 syslog('info' , "restarting server");
280 } else {
c5611195 281 &$writepidfile($self);
390802ab
DM
282 syslog('info' , "starting server");
283 }
284
c5611195
DM
285 POSIX::setsid();
286
390802ab
DM
287 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
288
b2132054
DM
289 my $old_sig_term = $SIG{TERM};
290 local $SIG{TERM} = sub {
291 local ($@, $!, $?); # do not overwrite error vars
292 syslog('info', "received signal TERM");
293 &$terminate_server($self);
294 &$server_cleanup($self);
295 &$old_sig_term(@_) if $old_sig_term;
296 };
390802ab 297
b2132054
DM
298 my $old_sig_quit = $SIG{QUIT};
299 local $SIG{QUIT} = sub {
300 local ($@, $!, $?); # do not overwrite error vars
301 syslog('info', "received signal QUIT");
302 &$terminate_server($self);
303 &$server_cleanup($self);
304 &$old_sig_quit(@_) if $old_sig_quit;
305 };
390802ab 306
b2132054
DM
307 my $old_sig_int = $SIG{INT};
308 local $SIG{INT} = sub {
309 local ($@, $!, $?); # do not overwrite error vars
310 syslog('info', "received signal INT");
311 $SIG{INT} = 'DEFAULT'; # allow to terminate now
312 &$terminate_server($self);
390802ab 313 &$server_cleanup($self);
b2132054 314 &$old_sig_int(@_) if $old_sig_int;
390802ab
DM
315 };
316
b2132054
DM
317 $SIG{HUP} = sub {
318 local ($@, $!, $?); # do not overwrite error vars
319 syslog('info', "received signal HUP");
93710700 320 $self->{got_hup_signal} = 1;
b2132054
DM
321 if ($self->{max_workers}) {
322 &$terminate_server($self);
b2132054 323 } elsif ($self->can('hup')) {
bdb5acce
DM
324 eval { $self->hup() };
325 warn $@ if $@;
b2132054
DM
326 }
327 };
328
329 eval {
330 if ($self->{max_workers}) {
331 my $old_sig_chld = $SIG{CHLD};
332 local $SIG{CHLD} = sub {
333 local ($@, $!, $?); # do not overwrite error vars
334 &$finish_workers($self);
335 &$old_sig_chld(@_) if $old_sig_chld;
336 };
337
ce0ae1ed
DM
338 # catch worker finished during restart phase
339 &$finish_workers($self);
340
341 # now loop forever (until we receive terminate signal)
342 for (;;) {
b2132054
DM
343 &$start_workers($self);
344 sleep(5);
345 &$finish_workers($self);
346 last if $self->{terminate};
347 }
bdb5acce 348
b2132054
DM
349 } else {
350 $self->run();
351 }
352 };
390802ab
DM
353 my $err = $@;
354
355 if ($err) {
356 syslog ('err', "ERROR: $err");
b2132054 357
88260370 358 &$terminate_server($self);
b2132054 359
390802ab
DM
360 if (my $wait_time = $self->{restart_on_error}) {
361 $self->restart_daemon($wait_time);
362 } else {
363 $self->exit_daemon(-1);
364 }
365 }
366
b2132054
DM
367 if ($self->{got_hup_signal}) {
368 $self->restart_daemon();
369 } else {
370 $self->exit_daemon(0);
371 }
390802ab
DM
372};
373
374sub new {
375 my ($this, $name, $cmdline, %params) = @_;
376
1ea9e6df 377 $name = 'daemon' if !$name; # should not happen
1a6bc2f3
DM
378
379 initlog($name);
a8ba2293 380
1ea9e6df 381 my $self;
a8ba2293 382
1ea9e6df 383 eval {
390802ab 384
1ea9e6df
DM
385 my $restart = $ENV{RESTART_PVE_DAEMON};
386 delete $ENV{RESTART_PVE_DAEMON};
390802ab 387
1ea9e6df
DM
388 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
389 delete $ENV{PVE_DAEMON_LOCK_FD};
390802ab 390
7db2b940 391 if (defined($lockfd)) {
6105a115
DM
392 die "unable to parse lock fd '$lockfd'\n"
393 if $lockfd !~ m/^(\d+)$/;
7db2b940
DM
394 $lockfd = $1; # untaint
395 }
396
1ea9e6df 397 die "please run as root\n" if !$restart && ($> != 0);
390802ab 398
1ea9e6df
DM
399 die "can't create more that one PVE::Daemon" if $daemon_initialized;
400 $daemon_initialized = 1;
390802ab 401
1ea9e6df 402 PVE::INotify::inotify_init();
390802ab 403
1ea9e6df
DM
404 my $class = ref($this) || $this;
405
406 $self = bless {
407 name => $name,
408 run_dir => '/var/run',
409 env_restart_pve_daemon => $restart,
410 env_pve_lock_fd => $lockfd,
411 workers => {},
d7950851 412 old_workers => {},
1ea9e6df
DM
413 }, $class;
414
415 foreach my $opt (keys %params) {
416 my $value = $params{$opt};
417 if ($opt eq 'restart_on_error') {
418 $self->{$opt} = $value;
419 } elsif ($opt eq 'stop_wait_time') {
420 $self->{$opt} = $value;
421 } elsif ($opt eq 'run_dir') {
422 $self->{$opt} = $value;
423 } elsif ($opt eq 'max_workers') {
424 $self->{$opt} = $value;
d7950851
DM
425 } elsif ($opt eq 'leave_children_open_on_reload') {
426 $self->{$opt} = $value;
1ea9e6df
DM
427 } else {
428 die "unknown daemon option '$opt'\n";
429 }
390802ab 430 }
d7950851
DM
431
432 if ($restart && $self->{max_workers}) {
433 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
434 foreach my $pid (split(':', $wpids)) {
435 if ($pid =~ m/^(\d+)$/) {
436 $self->{old_workers}->{$1} = 1;
437 }
438 }
439 }
440 }
390802ab 441
1ea9e6df 442 $self->{pidfile} = "$self->{run_dir}/${name}.pid";
bdb5acce 443
1ea9e6df 444 $self->{nodename} = PVE::INotify::nodename();
bdb5acce 445
1ea9e6df 446 $self->{cmdline} = [];
bdb5acce 447
1ea9e6df
DM
448 foreach my $el (@$cmdline) {
449 $el =~ m/^(.*)$/; # untaint
450 push @{$self->{cmdline}}, $1;
451 }
bdb5acce 452
1ea9e6df
DM
453 $0 = $name;
454 };
455 if (my $err = $@) {
456 &$log_err($err);
457 exit(-1);
458 }
bdb5acce 459
390802ab
DM
460 return $self;
461}
462
463sub exit_daemon {
464 my ($self, $status) = @_;
465
466 syslog("info", "server stopped");
467
468 &$server_cleanup($self);
469
470 exit($status);
471}
472
473sub restart_daemon {
474 my ($self, $waittime) = @_;
475
476 syslog('info', "server shutdown (restart)");
477
478 $ENV{RESTART_PVE_DAEMON} = 1;
479
d7950851
DM
480 if ($self->{max_workers}) {
481 my @workers = keys %{$self->{workers}};
482 push @workers, keys %{$self->{old_workers}};
483 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
484 }
485
390802ab
DM
486 sleep($waittime) if $waittime; # avoid high server load due to restarts
487
488 PVE::INotify::inotify_close();
489
490 exec (@{$self->{cmdline}});
491
492 exit (-1); # never reached?
493}
494
495# please overwrite in subclass
496# this is called at startup - before forking
497sub init {
498 my ($self) = @_;
499
500}
501
502# please overwrite in subclass
503sub shutdown {
504 my ($self) = @_;
505
506 syslog('info' , "server closing");
507
b2132054
DM
508 if (!$self->{max_workers}) {
509 # wait for children
510 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
511 }
390802ab
DM
512}
513
bdb5acce
DM
514# please define in subclass
515#sub hup {
516# my ($self) = @_;
517#
518# syslog('info' , "received signal HUP (restart)");
519#}
390802ab
DM
520
521# please overwrite in subclass
522sub run {
523 my ($self) = @_;
524
525 for (;;) { # forever
526 syslog('info' , "server is running");
527 sleep(5);
528 }
529}
530
531sub start {
532 my ($self, $debug) = @_;
533
1a6bc2f3
DM
534 eval { &$server_run($self, $debug); };
535 if (my $err = $@) {
c5611195
DM
536 &$log_err("start failed - $err");
537 exit(-1);
1a6bc2f3 538 }
390802ab
DM
539}
540
bdb5acce
DM
541my $read_pid = sub {
542 my ($self) = @_;
543
544 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
545
546 return 0 if !$pid_str;
547
548 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
549
550 my $pid = int($1);
551
552 return $pid;
553};
554
390802ab
DM
555sub running {
556 my ($self) = @_;
557
bdb5acce 558 my $pid = &$read_pid($self);
390802ab
DM
559
560 if ($pid) {
561 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
562 return wantarray ? ($res, $pid) : $res;
563 }
564
565 return wantarray ? (0, 0) : 0;
566}
567
568sub stop {
569 my ($self) = @_;
570
bdb5acce
DM
571 my $pid = &$read_pid($self);
572
390802ab
DM
573 return if !$pid;
574
575 if (PVE::ProcFSTools::check_process_running($pid)) {
576 kill(15, $pid); # send TERM signal
577 # give some time
578 my $wait_time = $self->{stop_wait_time} || 5;
579 my $running = 1;
580 for (my $i = 0; $i < $wait_time; $i++) {
581 $running = PVE::ProcFSTools::check_process_running($pid);
582 last if !$running;
583 sleep (1);
584 }
585
586 syslog('err', "server still running - send KILL") if $running;
587
588 # to be sure
589 kill(9, $pid);
590 waitpid($pid, 0);
591 }
592
593 if (-f $self->{pidfile}) {
1a6bc2f3
DM
594 eval {
595 # try to get the lock
596 &$lockpidfile($self);
597 &$server_cleanup($self);
598 };
599 if (my $err = $@) {
c5611195 600 &$log_err("cleanup failed - $err");
1a6bc2f3 601 }
390802ab
DM
602 }
603}
604
605sub register_start_command {
da1a7f58
DM
606 my ($self, $description) = @_;
607
608 my $class = ref($self);
390802ab
DM
609
610 $class->register_method({
611 name => 'start',
612 path => 'start',
613 method => 'POST',
614 description => $description || "Start the daemon.",
615 parameters => {
616 additionalProperties => 0,
617 properties => {
618 debug => {
619 description => "Debug mode - stay in foreground",
620 type => "boolean",
621 optional => 1,
622 default => 0,
623 },
624 },
625 },
626 returns => { type => 'null' },
627
628 code => sub {
629 my ($param) = @_;
630
631 $self->start($param->{debug});
632
633 return undef;
634 }});
635}
636
bdb5acce
DM
637my $reload_daemon = sub {
638 my ($self, $use_hup) = @_;
639
a8ba2293 640 if ($self->{env_restart_pve_daemon}) {
bdb5acce
DM
641 $self->start();
642 } else {
643 my ($running, $pid) = $self->running();
644 if (!$running) {
645 $self->start();
646 } else {
647 if ($use_hup) {
b2132054
DM
648 syslog('info', "send HUP to $pid");
649 kill 1, $pid;
bdb5acce
DM
650 } else {
651 $self->stop();
652 $self->start();
653 }
654 }
655 }
656};
657
390802ab 658sub register_restart_command {
da1a7f58
DM
659 my ($self, $use_hup, $description) = @_;
660
661 my $class = ref($self);
390802ab
DM
662
663 $class->register_method({
664 name => 'restart',
665 path => 'restart',
666 method => 'POST',
667 description => $description || "Restart the daemon (or start if not running).",
668 parameters => {
669 additionalProperties => 0,
670 properties => {},
671 },
672 returns => { type => 'null' },
673
674 code => sub {
675 my ($param) = @_;
676
bdb5acce
DM
677 &$reload_daemon($self, $use_hup);
678
679 return undef;
680 }});
681}
682
683sub register_reload_command {
da1a7f58
DM
684 my ($self, $description) = @_;
685
686 my $class = ref($self);
bdb5acce
DM
687
688 $class->register_method({
689 name => 'reload',
690 path => 'reload',
691 method => 'POST',
692 description => $description || "Reload daemon configuration (or start if not running).",
693 parameters => {
694 additionalProperties => 0,
695 properties => {},
696 },
697 returns => { type => 'null' },
698
699 code => sub {
700 my ($param) = @_;
701
702 &$reload_daemon($self, 1);
390802ab
DM
703
704 return undef;
705 }});
706}
707
708sub register_stop_command {
da1a7f58
DM
709 my ($self, $description) = @_;
710
711 my $class = ref($self);
390802ab
DM
712
713 $class->register_method({
714 name => 'stop',
715 path => 'stop',
716 method => 'POST',
717 description => $description || "Stop the daemon.",
718 parameters => {
719 additionalProperties => 0,
720 properties => {},
721 },
722 returns => { type => 'null' },
723
724 code => sub {
725 my ($param) = @_;
726
727 $self->stop();
728
729 return undef;
730 }});
731}
732
733sub register_status_command {
da1a7f58
DM
734 my ($self, $description) = @_;
735
736 my $class = ref($self);
390802ab
DM
737
738 $class->register_method({
739 name => 'status',
740 path => 'status',
741 method => 'GET',
742 description => "Get daemon status.",
743 parameters => {
744 additionalProperties => 0,
745 properties => {},
746 },
747 returns => {
748 type => 'string',
749 enum => ['stopped', 'running'],
750 },
751 code => sub {
752 my ($param) = @_;
753
754 return $self->running() ? 'running' : 'stopped';
755 }});
756}
757
7581;
759