]> git.proxmox.com Git - pve-common.git/blob - src/PVE/Daemon.pm
daemon: don't send SIGTERM before restart on leave_children_open_on_reload
[pve-common.git] / src / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
16
17 use strict;
18 use warnings;
19 use English;
20
21 use PVE::SafeSyslog;
22 use PVE::INotify;
23
24 use POSIX ":sys_wait_h";
25 use Fcntl ':flock';
26 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use IO::Socket::IP;
28
29 use Getopt::Long;
30 use Time::HiRes qw (gettimeofday);
31
32 use base qw(PVE::CLIHandler);
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37 my $daemon_sockets = [];
38
39 my $close_daemon_lock = sub {
40 my ($self) = @_;
41
42 return if !$self->{daemon_lock_fh};
43
44 close $self->{daemon_lock_fh};
45 delete $self->{daemon_lock_fh};
46 };
47
48 my $log_err = sub {
49 my ($msg) = @_;
50 chomp $msg;
51 print STDERR "$msg\n";
52 syslog('err', "%s", $msg);
53 };
54
55 # call this if you fork() from child
56 # Note: we already call this for workers, so it is only required
57 # if you fork inside a simple daemon (max_workers == 0).
58 sub after_fork_cleanup {
59 my ($self) = @_;
60
61 &$close_daemon_lock($self);
62
63 PVE::INotify::inotify_close();
64
65 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
66 $SIG{$sig} = 'DEFAULT'; # restore default handler
67 # AnyEvent signals only works if $SIG{XX} is
68 # undefined (perl event loop)
69 delete $SIG{$sig}; # so that we can handle events with AnyEvent
70 }
71 }
72
73 my $lockpidfile = sub {
74 my ($self) = @_;
75
76 my $lkfn = $self->{pidfile} . ".lock";
77
78 my $waittime = 0;
79
80 if (my $fd = $self->{env_pve_lock_fd}) {
81
82 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
83
84 } else {
85
86 $waittime = 5;
87 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
88 }
89
90 if (!$self->{daemon_lock_fh}) {
91 die "can't open lock '$lkfn' - $!\n";
92 }
93
94 for (my $i = 0; $i < $waittime; $i ++) {
95 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
96 sleep(1);
97 }
98
99 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
100 &$close_daemon_lock($self);
101 my $err = $!;
102
103 my ($running, $pid) = $self->running();
104 if ($running) {
105 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
106 } else {
107 die "can't aquire lock '$lkfn' - $err\n";
108 }
109 }
110 };
111
112 my $writepidfile = sub {
113 my ($self) = @_;
114
115 my $pidfile = $self->{pidfile};
116
117 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
118
119 print PIDFH "$$\n";
120 close (PIDFH);
121 };
122
123 my $server_cleanup = sub {
124 my ($self) = @_;
125
126 unlink $self->{pidfile} . ".lock";
127 unlink $self->{pidfile};
128 };
129
130 my $finish_workers = sub {
131 my ($self) = @_;
132
133 foreach my $id (qw(workers old_workers)) {
134 foreach my $cpid (keys %{$self->{$id}}) {
135 my $waitpid = waitpid($cpid, WNOHANG);
136 if (defined($waitpid) && ($waitpid == $cpid)) {
137 delete ($self->{$id}->{$cpid});
138 syslog('info', "worker $cpid finished");
139 }
140 }
141 }
142 };
143
144 my $start_workers = sub {
145 my ($self) = @_;
146
147 return if $self->{terminate};
148
149 my $count = 0;
150 foreach my $cpid (keys %{$self->{workers}}) {
151 $count++;
152 }
153
154 my $need = $self->{max_workers} - $count;
155
156 return if $need <= 0;
157
158 syslog('info', "starting $need worker(s)");
159
160 while ($need > 0) {
161 my $pid = fork;
162
163 if (!defined ($pid)) {
164 syslog('err', "can't fork worker");
165 sleep (1);
166 } elsif ($pid) { # parent
167 $self->{workers}->{$pid} = 1;
168 syslog('info', "worker $pid started");
169 $need--;
170 } else {
171 $0 = "$self->{name} worker";
172
173 $self->after_fork_cleanup();
174
175 eval { $self->run(); };
176 if (my $err = $@) {
177 syslog('err', $err);
178 sleep(5); # avoid fast restarts
179 }
180
181 syslog('info', "worker exit");
182 exit (0);
183 }
184 }
185 };
186
187 my $terminate_old_workers = sub {
188 my ($self) = @_;
189
190 # if list is empty kill sends no signal, so no checks needed
191 kill 15, keys %{$self->{old_workers}};
192 };
193
194 my $terminate_server = sub {
195 my ($self, $allow_open_children) = @_;
196
197 $self->{terminate} = 1; # set flag to avoid worker restart
198
199 if (!$self->{max_workers}) {
200 eval { $self->shutdown(); };
201 warn $@ if $@;
202 return;
203 }
204
205 eval { $self->shutdown(); };
206 warn $@ if $@;
207
208
209 # if configured, leave children running on HUP
210 return if $allow_open_children && $self->{leave_children_open_on_reload};
211
212 # else send TERM to all (old and current) child workers
213 kill 15, keys %{$self->@{'workers','old_workers'}};
214
215 # nicely shutdown childs (give them max 10 seconds to shut down)
216 my $previous_alarm = alarm(10);
217 eval {
218 local $SIG{ALRM} = sub { die "timeout\n" };
219
220 while ((my $pid = waitpid (-1, 0)) > 0) {
221 foreach my $id (qw(workers old_workers)) {
222 if (defined($self->{$id}->{$pid})) {
223 delete($self->{$id}->{$pid});
224 syslog('info', "worker $pid finished");
225 }
226 }
227 }
228 alarm(0); # avoid race condition
229 };
230 my $err = $@;
231
232 alarm ($previous_alarm);
233
234 if ($err) {
235 syslog('err', "error stopping workers (will kill them now) - $err");
236 foreach my $id (qw(workers old_workers)) {
237 foreach my $cpid (keys %{$self->{$id}}) {
238 # KILL childs still alive!
239 if (kill (0, $cpid)) {
240 delete($self->{$id}->{$cpid});
241 syslog("err", "kill worker $cpid");
242 kill(9, $cpid);
243 # fixme: waitpid?
244 }
245 }
246 }
247 }
248 };
249
250 sub setup {
251 my ($self) = @_;
252
253 initlog($self->{name});
254
255 my $restart = $ENV{RESTART_PVE_DAEMON};
256 delete $ENV{RESTART_PVE_DAEMON};
257 $self->{env_restart_pve_daemon} = $restart;
258
259 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
260 delete $ENV{PVE_DAEMON_LOCK_FD};
261 if (defined($lockfd)) {
262 die "unable to parse lock fd '$lockfd'\n"
263 if $lockfd !~ m/^(\d+)$/;
264 $lockfd = $1; # untaint
265 }
266 $self->{env_pve_lock_fd} = $lockfd;
267
268 die "please run as root\n" if !$restart && ($> != 0);
269
270 die "can't create more that one PVE::Daemon" if $daemon_initialized;
271 $daemon_initialized = 1;
272
273 PVE::INotify::inotify_init();
274
275 if (my $gidstr = $self->{setgid}) {
276 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
277 POSIX::setgid($gid) || die "setgid $gid failed - $!\n";
278 $EGID = "$gid $gid"; # this calls setgroups
279 # just to be sure
280 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
281 }
282
283 if (my $uidstr = $self->{setuid}) {
284 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
285 POSIX::setuid($uid) || die "setuid $uid failed - $!\n";
286 # just to be sure
287 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
288 }
289
290 if ($restart && $self->{max_workers}) {
291 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
292 foreach my $pid (split(':', $wpids)) {
293 if ($pid =~ m/^(\d+)$/) {
294 $self->{old_workers}->{$1} = 1;
295 }
296 }
297 }
298 }
299
300 $self->{nodename} = PVE::INotify::nodename();
301 }
302
303 my $server_run = sub {
304 my ($self, $debug) = @_;
305
306 # fixme: handle restart lockfd
307 &$lockpidfile($self);
308
309 # remove FD_CLOEXEC bit to reuse on exec
310 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
311
312 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
313
314 # run in background
315 my $spid;
316
317 $self->{debug} = 1 if $debug;
318
319 $self->init();
320
321 if (!$debug) {
322 open STDIN, '</dev/null' || die "can't read /dev/null";
323 open STDOUT, '>/dev/null' || die "can't write /dev/null";
324 }
325
326 if (!$self->{env_restart_pve_daemon} && !$debug) {
327 PVE::INotify::inotify_close();
328 $spid = fork();
329 if (!defined ($spid)) {
330 die "can't put server into background - fork failed";
331 } elsif ($spid) { # parent
332 exit (0);
333 }
334 PVE::INotify::inotify_init();
335 }
336
337 if ($self->{env_restart_pve_daemon}) {
338 syslog('info' , "restarting server");
339 } else {
340 &$writepidfile($self);
341 syslog('info' , "starting server");
342 }
343
344 POSIX::setsid();
345
346 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
347
348 my $old_sig_term = $SIG{TERM};
349 local $SIG{TERM} = sub {
350 local ($@, $!, $?); # do not overwrite error vars
351 syslog('info', "received signal TERM");
352 &$terminate_server($self, 0);
353 &$server_cleanup($self);
354 &$old_sig_term(@_) if $old_sig_term;
355 };
356
357 my $old_sig_quit = $SIG{QUIT};
358 local $SIG{QUIT} = sub {
359 local ($@, $!, $?); # do not overwrite error vars
360 syslog('info', "received signal QUIT");
361 &$terminate_server($self, 0);
362 &$server_cleanup($self);
363 &$old_sig_quit(@_) if $old_sig_quit;
364 };
365
366 my $old_sig_int = $SIG{INT};
367 local $SIG{INT} = sub {
368 local ($@, $!, $?); # do not overwrite error vars
369 syslog('info', "received signal INT");
370 $SIG{INT} = 'DEFAULT'; # allow to terminate now
371 &$terminate_server($self, 0);
372 &$server_cleanup($self);
373 &$old_sig_int(@_) if $old_sig_int;
374 };
375
376 $SIG{HUP} = sub {
377 local ($@, $!, $?); # do not overwrite error vars
378 syslog('info', "received signal HUP");
379 $self->{got_hup_signal} = 1;
380 if ($self->{max_workers}) {
381 &$terminate_server($self, 1);
382 } elsif ($self->can('hup')) {
383 eval { $self->hup() };
384 warn $@ if $@;
385 }
386 };
387
388 eval {
389 if ($self->{max_workers}) {
390 my $old_sig_chld = $SIG{CHLD};
391 local $SIG{CHLD} = sub {
392 local ($@, $!, $?); # do not overwrite error vars
393 &$finish_workers($self);
394 &$old_sig_chld(@_) if $old_sig_chld;
395 };
396
397 # now loop forever (until we receive terminate signal)
398 for (;;) {
399 &$start_workers($self);
400 sleep(5);
401 &$terminate_old_workers($self);
402 &$finish_workers($self);
403 last if $self->{terminate};
404 }
405
406 } else {
407 $self->run();
408 }
409 };
410 my $err = $@;
411
412 if ($err) {
413 syslog ('err', "ERROR: $err");
414
415 &$terminate_server($self, 1);
416
417 if (my $wait_time = $self->{restart_on_error}) {
418 $self->restart_daemon($wait_time);
419 } else {
420 $self->exit_daemon(-1);
421 }
422 }
423
424 if ($self->{got_hup_signal}) {
425 $self->restart_daemon();
426 } else {
427 $self->exit_daemon(0);
428 }
429 };
430
431 sub new {
432 my ($this, $name, $cmdline, %params) = @_;
433
434 $name = 'daemon' if !$name; # should not happen
435
436 my $self;
437
438 eval {
439 my $class = ref($this) || $this;
440
441 $self = bless {
442 name => $name,
443 pidfile => "/var/run/${name}.pid",
444 workers => {},
445 old_workers => {},
446 }, $class;
447
448
449 foreach my $opt (keys %params) {
450 my $value = $params{$opt};
451 if ($opt eq 'restart_on_error') {
452 $self->{$opt} = $value;
453 } elsif ($opt eq 'stop_wait_time') {
454 $self->{$opt} = $value;
455 } elsif ($opt eq 'pidfile') {
456 $self->{$opt} = $value;
457 } elsif ($opt eq 'max_workers') {
458 $self->{$opt} = $value;
459 } elsif ($opt eq 'leave_children_open_on_reload') {
460 $self->{$opt} = $value;
461 } elsif ($opt eq 'setgid') {
462 $self->{$opt} = $value;
463 } elsif ($opt eq 'setuid') {
464 $self->{$opt} = $value;
465 } else {
466 die "unknown daemon option '$opt'\n";
467 }
468 }
469
470
471 # untaint
472 $self->{cmdline} = [map { /^(.*)$/ } @$cmdline];
473
474 $0 = $name;
475 };
476 if (my $err = $@) {
477 &$log_err($err);
478 exit(-1);
479 }
480
481 return $self;
482 }
483
484 sub exit_daemon {
485 my ($self, $status) = @_;
486
487 syslog("info", "server stopped");
488
489 &$server_cleanup($self);
490
491 exit($status);
492 }
493
494 sub restart_daemon {
495 my ($self, $waittime) = @_;
496
497 syslog('info', "server shutdown (restart)");
498
499 $ENV{RESTART_PVE_DAEMON} = 1;
500
501 foreach my $ds (@$daemon_sockets) {
502 $ds->fcntl(Fcntl::F_SETFD(), 0);
503 }
504
505 if ($self->{max_workers}) {
506 my @workers = keys %{$self->{workers}};
507 push @workers, keys %{$self->{old_workers}};
508 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
509 }
510
511 sleep($waittime) if $waittime; # avoid high server load due to restarts
512
513 PVE::INotify::inotify_close();
514
515 exec (@{$self->{cmdline}});
516
517 exit (-1); # never reached?
518 }
519
520 # please overwrite in subclass
521 # this is called at startup - before forking
522 sub init {
523 my ($self) = @_;
524
525 }
526
527 # please overwrite in subclass
528 sub shutdown {
529 my ($self) = @_;
530
531 syslog('info' , "server closing");
532
533 if (!$self->{max_workers}) {
534 # wait for children
535 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
536 }
537 }
538
539 # please define in subclass
540 #sub hup {
541 # my ($self) = @_;
542 #
543 # syslog('info' , "received signal HUP (restart)");
544 #}
545
546 # please overwrite in subclass
547 sub run {
548 my ($self) = @_;
549
550 for (;;) { # forever
551 syslog('info' , "server is running");
552 sleep(5);
553 }
554 }
555
556 sub start {
557 my ($self, $debug) = @_;
558
559 eval {
560 $self->setup();
561 &$server_run($self, $debug);
562 };
563 if (my $err = $@) {
564 &$log_err("start failed - $err");
565 exit(-1);
566 }
567 }
568
569 my $read_pid = sub {
570 my ($self) = @_;
571
572 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
573
574 return 0 if !$pid_str;
575
576 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
577
578 my $pid = int($1);
579
580 return $pid;
581 };
582
583 # checks if the process was started by systemd
584 my $init_ppid = sub {
585
586 if (getppid() == 1) {
587 return 1;
588 } else {
589 return 0;
590 }
591 };
592
593 sub running {
594 my ($self) = @_;
595
596 my $pid = &$read_pid($self);
597
598 if ($pid) {
599 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
600 return wantarray ? ($res, $pid) : $res;
601 }
602
603 return wantarray ? (0, 0) : 0;
604 }
605
606 sub stop {
607 my ($self) = @_;
608
609 my $pid = &$read_pid($self);
610
611 return if !$pid;
612
613 if (PVE::ProcFSTools::check_process_running($pid)) {
614 kill(15, $pid); # send TERM signal
615 # give some time
616 my $wait_time = $self->{stop_wait_time} || 5;
617 my $running = 1;
618 for (my $i = 0; $i < $wait_time; $i++) {
619 $running = PVE::ProcFSTools::check_process_running($pid);
620 last if !$running;
621 sleep (1);
622 }
623
624 syslog('err', "server still running - send KILL") if $running;
625
626 # to be sure
627 kill(9, $pid);
628 waitpid($pid, 0);
629 }
630
631 if (-f $self->{pidfile}) {
632 eval {
633 # try to get the lock
634 &$lockpidfile($self);
635 &$server_cleanup($self);
636 };
637 if (my $err = $@) {
638 &$log_err("cleanup failed - $err");
639 }
640 }
641 }
642
643 sub register_start_command {
644 my ($self, $description) = @_;
645
646 my $class = ref($self);
647
648 $class->register_method({
649 name => 'start',
650 path => 'start',
651 method => 'POST',
652 description => $description || "Start the daemon.",
653 parameters => {
654 additionalProperties => 0,
655 properties => {
656 debug => {
657 description => "Debug mode - stay in foreground",
658 type => "boolean",
659 optional => 1,
660 default => 0,
661 },
662 },
663 },
664 returns => { type => 'null' },
665
666 code => sub {
667 my ($param) = @_;
668
669 if (&$init_ppid() || $param->{debug}) {
670 $self->start($param->{debug});
671 } else {
672 PVE::Tools::run_command(['systemctl', 'start', $self->{name}]);
673 }
674
675 return undef;
676 }});
677 }
678
679 my $reload_daemon = sub {
680 my ($self, $use_hup) = @_;
681
682 if ($self->{env_restart_pve_daemon}) {
683 $self->start();
684 } else {
685 my ($running, $pid) = $self->running();
686 if (!$running) {
687 $self->start();
688 } else {
689 if ($use_hup) {
690 syslog('info', "send HUP to $pid");
691 kill 1, $pid;
692 } else {
693 $self->stop();
694 $self->start();
695 }
696 }
697 }
698 };
699
700 sub register_restart_command {
701 my ($self, $use_hup, $description) = @_;
702
703 my $class = ref($self);
704
705 $class->register_method({
706 name => 'restart',
707 path => 'restart',
708 method => 'POST',
709 description => $description || "Restart the daemon (or start if not running).",
710 parameters => {
711 additionalProperties => 0,
712 properties => {},
713 },
714 returns => { type => 'null' },
715
716 code => sub {
717 my ($param) = @_;
718
719 if (&$init_ppid()) {
720 &$reload_daemon($self, $use_hup);
721 } else {
722 PVE::Tools::run_command(['systemctl', $use_hup ? 'reload-or-restart' : 'restart', $self->{name}]);
723 }
724
725 return undef;
726 }});
727 }
728
729 sub register_reload_command {
730 my ($self, $description) = @_;
731
732 my $class = ref($self);
733
734 $class->register_method({
735 name => 'reload',
736 path => 'reload',
737 method => 'POST',
738 description => $description || "Reload daemon configuration (or start if not running).",
739 parameters => {
740 additionalProperties => 0,
741 properties => {},
742 },
743 returns => { type => 'null' },
744
745 code => sub {
746 my ($param) = @_;
747
748 &$reload_daemon($self, 1);
749
750 return undef;
751 }});
752 }
753
754 sub register_stop_command {
755 my ($self, $description) = @_;
756
757 my $class = ref($self);
758
759 $class->register_method({
760 name => 'stop',
761 path => 'stop',
762 method => 'POST',
763 description => $description || "Stop the daemon.",
764 parameters => {
765 additionalProperties => 0,
766 properties => {},
767 },
768 returns => { type => 'null' },
769
770 code => sub {
771 my ($param) = @_;
772
773 if (&$init_ppid()) {
774 $self->stop();
775 } else {
776 PVE::Tools::run_command(['systemctl', 'stop', $self->{name}]);
777 }
778
779 return undef;
780 }});
781 }
782
783 sub register_status_command {
784 my ($self, $description) = @_;
785
786 my $class = ref($self);
787
788 $class->register_method({
789 name => 'status',
790 path => 'status',
791 method => 'GET',
792 description => "Get daemon status.",
793 parameters => {
794 additionalProperties => 0,
795 properties => {},
796 },
797 returns => {
798 type => 'string',
799 enum => ['stopped', 'running'],
800 },
801 code => sub {
802 my ($param) = @_;
803
804 return $self->running() ? 'running' : 'stopped';
805 }});
806 }
807
808 # some useful helper
809
810 sub create_reusable_socket {
811 my ($self, $port, $host, $family) = @_;
812
813 die "no port specifed" if !$port;
814
815 my ($socket, $sockfd);
816
817 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
818 $self->{env_restart_pve_daemon}) {
819
820 die "unable to parse socket fd '$sockfd'\n"
821 if $sockfd !~ m/^(\d+)$/;
822 $sockfd = $1; # untaint
823
824 $socket = IO::Socket::IP->new;
825 $socket->fdopen($sockfd, 'w') ||
826 die "cannot fdopen file descriptor '$sockfd' - $!\n";
827
828 $socket->fcntl(Fcntl::F_SETFD(), Fcntl::FD_CLOEXEC);
829 } else {
830
831 $socket = IO::Socket::IP->new(
832 LocalAddr => $host,
833 LocalPort => $port,
834 Listen => SOMAXCONN,
835 Family => $family,
836 Proto => 'tcp',
837 GetAddrInfoFlags => 0,
838 ReuseAddr => 1) ||
839 die "unable to create socket - $@\n";
840
841 # we often observe delays when using Nagle algorithm,
842 # so we disable that to maximize performance
843 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
844
845 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
846 }
847
848 push @$daemon_sockets, $socket;
849
850 return $socket;
851 }
852
853
854 1;
855