]> git.proxmox.com Git - pve-common.git/blob - src/PVE/Daemon.pm
bump version to 8.2.1
[pve-common.git] / src / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure only
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
16
17 use strict;
18 use warnings;
19 use English;
20
21 use PVE::SafeSyslog;
22 use PVE::INotify;
23
24 use POSIX ":sys_wait_h";
25 use Fcntl ':flock';
26 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use IO::Socket::IP;
28
29 use Getopt::Long;
30 use Time::HiRes qw (gettimeofday);
31
32 use base qw(PVE::CLIHandler);
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37 my $daemon_sockets = [];
38
39 my $close_daemon_lock = sub {
40 my ($self) = @_;
41
42 return if !$self->{daemon_lock_fh};
43
44 close $self->{daemon_lock_fh};
45 delete $self->{daemon_lock_fh};
46 };
47
48 my $log_err = sub {
49 my ($msg) = @_;
50 chomp $msg;
51 print STDERR "$msg\n";
52 syslog('err', "%s", $msg);
53 };
54
55 # call this if you fork() from child
56 # Note: we already call this for workers, so it is only required
57 # if you fork inside a simple daemon (max_workers == 0).
58 sub after_fork_cleanup {
59 my ($self) = @_;
60
61 &$close_daemon_lock($self);
62
63 PVE::INotify::inotify_close();
64
65 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
66 $SIG{$sig} = 'DEFAULT'; # restore default handler
67 # AnyEvent signals only works if $SIG{XX} is
68 # undefined (perl event loop)
69 delete $SIG{$sig}; # so that we can handle events with AnyEvent
70 }
71 }
72
73 my $lockpidfile = sub {
74 my ($self) = @_;
75
76 my $lkfn = $self->{pidfile} . ".lock";
77
78 my $waittime = 0;
79
80 if (my $fd = $self->{env_pve_lock_fd}) {
81
82 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
83
84 } else {
85
86 $waittime = 5;
87 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
88 }
89
90 if (!$self->{daemon_lock_fh}) {
91 die "can't open lock '$lkfn' - $!\n";
92 }
93
94 for (my $i = 0; $i < $waittime; $i ++) {
95 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
96 sleep(1);
97 }
98
99 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
100 &$close_daemon_lock($self);
101 my $err = $!;
102
103 my ($running, $pid) = $self->running();
104 if ($running) {
105 die "can't acquire lock '$lkfn' - daemon already started (pid = $pid)\n";
106 } else {
107 die "can't acquire lock '$lkfn' - $err\n";
108 }
109 }
110 };
111
112 my $writepidfile = sub {
113 my ($self) = @_;
114
115 my $pidfile = $self->{pidfile};
116
117 open (my $PID_FH, '>', "$pidfile") or die "can't open pid file '$pidfile' - $!\n";
118
119 print $PID_FH "$$\n";
120 close ($PID_FH);
121 };
122
123 my $server_cleanup = sub {
124 my ($self) = @_;
125
126 unlink $self->{pidfile} . ".lock";
127 unlink $self->{pidfile};
128 };
129
130 my $finish_workers = sub {
131 my ($self) = @_;
132
133 foreach my $id (qw(workers old_workers)) {
134 foreach my $cpid (keys %{$self->{$id}}) {
135 my $waitpid = waitpid($cpid, WNOHANG);
136 if (defined($waitpid) && ($waitpid == $cpid)) {
137 delete ($self->{$id}->{$cpid});
138 syslog('info', "worker $cpid finished");
139 }
140 }
141 }
142 };
143
144 my $start_workers = sub {
145 my ($self) = @_;
146
147 return if $self->{terminate};
148
149 my $count = scalar keys %{$self->{workers}};
150 my $need = $self->{max_workers} - $count;
151
152 return if $need <= 0;
153
154 syslog('info', "starting $need worker(s)");
155
156 while ($need > 0) {
157 my $pid = fork;
158
159 if (!defined ($pid)) {
160 syslog('err', "can't fork worker");
161 sleep (1);
162 } elsif ($pid) { # parent
163 $self->{workers}->{$pid} = 1;
164 syslog('info', "worker $pid started");
165 $need--;
166 } else {
167 $0 = "$self->{name} worker";
168
169 $self->after_fork_cleanup();
170
171 eval { $self->run(); };
172 if (my $err = $@) {
173 syslog('err', $err);
174 sleep(5); # avoid fast restarts
175 }
176
177 syslog('info', "worker exit");
178 exit (0);
179 }
180 }
181 };
182
183 my $terminate_old_workers = sub {
184 my ($self) = @_;
185
186 # if list is empty kill sends no signal, so no checks needed
187 kill 15, keys %{$self->{old_workers}};
188 };
189
190 my $terminate_server = sub {
191 my ($self, $allow_open_children) = @_;
192
193 $self->{terminate} = 1; # set flag to avoid worker restart
194
195 eval { $self->shutdown(); };
196 warn $@ if $@;
197
198 return if !$self->{max_workers}; # if we have no workers we're done here
199
200 # if configured, leave children running on HUP
201 return if $allow_open_children && $self->{leave_children_open_on_reload};
202
203 # else send TERM to all (old and current) child workers
204 kill 15, (keys %{$self->{workers}}, keys %{$self->{old_workers}});
205
206 # nicely shutdown childs (give them max 10 seconds to shut down)
207 my $previous_alarm = alarm(10);
208 eval {
209 local $SIG{ALRM} = sub { die "timeout\n" };
210
211 while ((my $pid = waitpid (-1, 0)) > 0) {
212 foreach my $id (qw(workers old_workers)) {
213 if (defined($self->{$id}->{$pid})) {
214 delete($self->{$id}->{$pid});
215 syslog('info', "worker $pid finished");
216 }
217 }
218 }
219 alarm(0); # avoid race condition
220 };
221 my $err = $@;
222
223 alarm ($previous_alarm);
224
225 if ($err) {
226 syslog('err', "error stopping workers (will kill them now) - $err");
227 foreach my $id (qw(workers old_workers)) {
228 foreach my $cpid (keys %{$self->{$id}}) {
229 # KILL childs still alive!
230 if (kill (0, $cpid)) {
231 delete($self->{$id}->{$cpid});
232 syslog("err", "kill worker $cpid");
233 kill(9, $cpid);
234 # fixme: waitpid?
235 }
236 }
237 }
238 }
239 };
240
241 sub setup {
242 my ($self) = @_;
243
244 initlog($self->{name});
245
246 my $restart = delete $ENV{RESTART_PVE_DAEMON};
247 $self->{env_restart_pve_daemon} = $restart;
248
249 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
250 delete $ENV{PVE_DAEMON_LOCK_FD};
251 if (defined($lockfd)) {
252 die "unable to parse lock fd '$lockfd'\n"
253 if $lockfd !~ m/^(\d+)$/;
254 $lockfd = $1; # untaint
255 }
256 $self->{env_pve_lock_fd} = $lockfd;
257
258 die "please run as root\n" if !$restart && ($> != 0);
259
260 die "can't create more that one PVE::Daemon" if $daemon_initialized;
261 $daemon_initialized = 1;
262
263 PVE::INotify::inotify_init();
264
265 if (my $gidstr = $self->{setgid}) {
266 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
267 POSIX::setgid($gid) || die "setgid $gid failed - $!\n";
268 $EGID = "$gid $gid"; # this calls setgroups
269 # just to be sure
270 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
271 }
272
273 if (my $uidstr = $self->{setuid}) {
274 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
275 POSIX::setuid($uid) || die "setuid $uid failed - $!\n";
276 # just to be sure
277 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
278 }
279
280 if ($restart && $self->{max_workers}) {
281 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
282 foreach my $pid (split(':', $wpids)) {
283 # check & untaint
284 if ($pid =~ m/^(\d+)$/) {
285 $self->{old_workers}->{$1} = 1;
286 }
287 }
288 }
289 }
290
291 $self->{nodename} = PVE::INotify::nodename();
292 }
293
294 my $server_run = sub {
295 my ($self, $debug) = @_;
296
297 # fixme: handle restart lockfd
298 &$lockpidfile($self);
299
300 # remove FD_CLOEXEC bit to reuse on exec
301 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
302
303 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
304
305 # run in background
306 my $spid;
307
308 $self->{debug} = 1 if $debug;
309
310 $self->init();
311
312 if (!$debug) {
313 open STDIN, '<', '/dev/null' or die "can't read /dev/null - $!";
314 open STDOUT, '>', '/dev/null' or die "can't write /dev/null - $!";
315 }
316
317 if (!$self->{env_restart_pve_daemon} && !$debug) {
318 PVE::INotify::inotify_close();
319 $spid = fork();
320 if (!defined ($spid)) {
321 die "can't put server into background - fork failed";
322 } elsif ($spid) { # parent
323 exit (0);
324 }
325 PVE::INotify::inotify_init();
326 }
327
328 if ($self->{env_restart_pve_daemon}) {
329 syslog('info' , "restarting server");
330 } else {
331 &$writepidfile($self);
332 syslog('info' , "starting server");
333 }
334
335 POSIX::setsid();
336
337 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
338
339 my $old_sig_term = $SIG{TERM};
340 local $SIG{TERM} = sub {
341 local ($@, $!, $?); # do not overwrite error vars
342 syslog('info', "received signal TERM");
343 &$terminate_server($self, 0);
344 &$server_cleanup($self);
345 &$old_sig_term(@_) if $old_sig_term;
346 };
347
348 my $old_sig_quit = $SIG{QUIT};
349 local $SIG{QUIT} = sub {
350 local ($@, $!, $?); # do not overwrite error vars
351 syslog('info', "received signal QUIT");
352 &$terminate_server($self, 0);
353 &$server_cleanup($self);
354 &$old_sig_quit(@_) if $old_sig_quit;
355 };
356
357 my $old_sig_int = $SIG{INT};
358 local $SIG{INT} = sub {
359 local ($@, $!, $?); # do not overwrite error vars
360 syslog('info', "received signal INT");
361 $SIG{INT} = 'DEFAULT'; # allow to terminate now
362 &$terminate_server($self, 0);
363 &$server_cleanup($self);
364 &$old_sig_int(@_) if $old_sig_int;
365 };
366
367 $SIG{HUP} = sub {
368 local ($@, $!, $?); # do not overwrite error vars
369 syslog('info', "received signal HUP");
370 $self->{got_hup_signal} = 1;
371 if ($self->{max_workers}) {
372 &$terminate_server($self, 1);
373 } elsif ($self->can('hup')) {
374 eval { $self->hup() };
375 warn $@ if $@;
376 }
377 };
378
379 eval {
380 if ($self->{max_workers}) {
381 my $old_sig_chld = $SIG{CHLD};
382 local $SIG{CHLD} = sub {
383 local ($@, $!, $?); # do not overwrite error vars
384 &$finish_workers($self);
385 &$old_sig_chld(@_) if $old_sig_chld;
386 };
387
388 # now loop forever (until we receive terminate signal)
389 for (;;) {
390 &$start_workers($self);
391 sleep(5);
392 &$terminate_old_workers($self);
393 &$finish_workers($self);
394 last if $self->{terminate};
395 }
396
397 } else {
398 $self->run();
399 }
400 };
401 my $err = $@;
402
403 if ($err) {
404 syslog ('err', "ERROR: $err");
405
406 &$terminate_server($self, 1);
407
408 if (my $wait_time = $self->{restart_on_error}) {
409 $self->restart_daemon($wait_time);
410 } else {
411 $self->exit_daemon(-1);
412 }
413 }
414
415 if ($self->{got_hup_signal}) {
416 $self->restart_daemon();
417 } else {
418 $self->exit_daemon(0);
419 }
420 };
421
422 sub new {
423 my ($this, $name, $cmdline, %params) = @_;
424
425 $name = 'daemon' if !$name; # should not happen
426
427 my $self;
428
429 eval {
430 my $class = ref($this) || $this;
431
432 $self = bless {
433 name => $name,
434 pidfile => "/var/run/${name}.pid",
435 workers => {},
436 old_workers => {},
437 }, $class;
438
439
440 foreach my $opt (keys %params) {
441 my $value = $params{$opt};
442 if ($opt eq 'restart_on_error') {
443 $self->{$opt} = $value;
444 } elsif ($opt eq 'stop_wait_time') {
445 $self->{$opt} = $value;
446 } elsif ($opt eq 'pidfile') {
447 $self->{$opt} = $value;
448 } elsif ($opt eq 'max_workers') {
449 $self->{$opt} = $value;
450 } elsif ($opt eq 'leave_children_open_on_reload') {
451 $self->{$opt} = $value;
452 } elsif ($opt eq 'setgid') {
453 $self->{$opt} = $value;
454 } elsif ($opt eq 'setuid') {
455 $self->{$opt} = $value;
456 } else {
457 die "unknown daemon option '$opt'\n";
458 }
459 }
460
461
462 # untaint
463 $self->{cmdline} = [map { /^(.*)$/ } @$cmdline];
464
465 $0 = $name;
466 };
467 if (my $err = $@) {
468 &$log_err($err);
469 exit(-1);
470 }
471
472 return $self;
473 }
474
475 sub exit_daemon {
476 my ($self, $status) = @_;
477
478 syslog("info", "server stopped");
479
480 &$server_cleanup($self);
481
482 exit($status);
483 }
484
485 sub restart_daemon {
486 my ($self, $waittime) = @_;
487
488 syslog('info', "server shutdown (restart)");
489
490 $ENV{RESTART_PVE_DAEMON} = 1;
491
492 foreach my $ds (@$daemon_sockets) {
493 $ds->fcntl(Fcntl::F_SETFD(), 0);
494 }
495
496 if ($self->{max_workers}) {
497 my @workers = (keys %{$self->{workers}}, keys %{$self->{old_workers}});
498 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
499 }
500
501 sleep($waittime) if $waittime; # avoid high server load due to restarts
502
503 PVE::INotify::inotify_close();
504
505 exec (@{$self->{cmdline}});
506
507 exit (-1); # never reached?
508 }
509
510 # please overwrite in subclass
511 # this is called at startup - before forking
512 sub init {
513 my ($self) = @_;
514
515 }
516
517 # please overwrite in subclass
518 sub shutdown {
519 my ($self) = @_;
520
521 syslog('info' , "server closing");
522
523 if (!$self->{max_workers}) {
524 # wait for children
525 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
526 }
527 }
528
529 # please define in subclass
530 #sub hup {
531 # my ($self) = @_;
532 #
533 # syslog('info' , "received signal HUP (restart)");
534 #}
535
536 # please overwrite in subclass
537 sub run {
538 my ($self) = @_;
539
540 for (;;) { # forever
541 syslog('info' , "server is running");
542 sleep(5);
543 }
544 }
545
546 sub start {
547 my ($self, $debug) = @_;
548
549 eval {
550 $self->setup();
551 &$server_run($self, $debug);
552 };
553 if (my $err = $@) {
554 &$log_err("start failed - $err");
555 exit(-1);
556 }
557 }
558
559 my $read_pid = sub {
560 my ($self) = @_;
561
562 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
563
564 return 0 if !$pid_str;
565
566 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
567
568 my $pid = int($1);
569
570 return $pid;
571 };
572
573 # checks if the process was started by systemd
574 my $init_ppid = sub {
575 if (getppid() == 1) {
576 return 1;
577 } else {
578 return 0;
579 }
580 };
581
582 sub running {
583 my ($self) = @_;
584
585 my $pid = &$read_pid($self);
586
587 if ($pid) {
588 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
589 return wantarray ? ($res, $pid) : $res;
590 }
591
592 return wantarray ? (0, 0) : 0;
593 }
594
595 sub stop {
596 my ($self) = @_;
597
598 my $pid = &$read_pid($self);
599
600 return if !$pid;
601
602 if (PVE::ProcFSTools::check_process_running($pid)) {
603 kill(15, $pid); # send TERM signal
604 # give some time
605 my $wait_time = $self->{stop_wait_time} || 5;
606 my $running = 1;
607 for (my $i = 0; $i < $wait_time; $i++) {
608 $running = PVE::ProcFSTools::check_process_running($pid);
609 last if !$running;
610 sleep (1);
611 }
612
613 syslog('err', "server still running - send KILL") if $running;
614
615 # to be sure
616 kill(9, $pid);
617 waitpid($pid, 0);
618 }
619
620 if (-f $self->{pidfile}) {
621 eval {
622 # try to get the lock
623 &$lockpidfile($self);
624 &$server_cleanup($self);
625 };
626 if (my $err = $@) {
627 &$log_err("cleanup failed - $err");
628 }
629 }
630 }
631
632 sub register_start_command {
633 my ($self, $description) = @_;
634
635 my $class = ref($self);
636
637 $class->register_method({
638 name => 'start',
639 path => 'start',
640 method => 'POST',
641 description => $description || "Start the daemon.",
642 parameters => {
643 additionalProperties => 0,
644 properties => {
645 debug => {
646 description => "Debug mode - stay in foreground",
647 type => "boolean",
648 optional => 1,
649 default => 0,
650 },
651 },
652 },
653 returns => { type => 'null' },
654
655 code => sub {
656 my ($param) = @_;
657
658 if (&$init_ppid() || $param->{debug}) {
659 $self->start($param->{debug});
660 } else {
661 PVE::Tools::run_command(['systemctl', 'start', $self->{name}]);
662 }
663
664 return undef;
665 }});
666 }
667
668 my $reload_daemon = sub {
669 my ($self, $use_hup) = @_;
670
671 if ($self->{env_restart_pve_daemon}) {
672 $self->start();
673 } else {
674 my ($running, $pid) = $self->running();
675 if (!$running) {
676 $self->start();
677 } else {
678 if ($use_hup) {
679 syslog('info', "send HUP to $pid");
680 kill 1, $pid;
681 } else {
682 $self->stop();
683 $self->start();
684 }
685 }
686 }
687 };
688
689 sub register_restart_command {
690 my ($self, $use_hup, $description) = @_;
691
692 my $class = ref($self);
693
694 $class->register_method({
695 name => 'restart',
696 path => 'restart',
697 method => 'POST',
698 description => $description || "Restart the daemon (or start if not running).",
699 parameters => {
700 additionalProperties => 0,
701 properties => {},
702 },
703 returns => { type => 'null' },
704
705 code => sub {
706 my ($param) = @_;
707
708 if (&$init_ppid()) {
709 &$reload_daemon($self, $use_hup);
710 } else {
711 PVE::Tools::run_command(['systemctl', $use_hup ? 'reload-or-restart' : 'restart', $self->{name}]);
712 }
713
714 return undef;
715 }});
716 }
717
718 sub register_reload_command {
719 my ($self, $description) = @_;
720
721 my $class = ref($self);
722
723 $class->register_method({
724 name => 'reload',
725 path => 'reload',
726 method => 'POST',
727 description => $description || "Reload daemon configuration (or start if not running).",
728 parameters => {
729 additionalProperties => 0,
730 properties => {},
731 },
732 returns => { type => 'null' },
733
734 code => sub {
735 my ($param) = @_;
736
737 &$reload_daemon($self, 1);
738
739 return undef;
740 }});
741 }
742
743 sub register_stop_command {
744 my ($self, $description) = @_;
745
746 my $class = ref($self);
747
748 $class->register_method({
749 name => 'stop',
750 path => 'stop',
751 method => 'POST',
752 description => $description || "Stop the daemon.",
753 parameters => {
754 additionalProperties => 0,
755 properties => {},
756 },
757 returns => { type => 'null' },
758
759 code => sub {
760 my ($param) = @_;
761
762 if (&$init_ppid()) {
763 $self->stop();
764 } else {
765 PVE::Tools::run_command(['systemctl', 'stop', $self->{name}]);
766 }
767
768 return undef;
769 }});
770 }
771
772 sub register_status_command {
773 my ($self, $description) = @_;
774
775 my $class = ref($self);
776
777 $class->register_method({
778 name => 'status',
779 path => 'status',
780 method => 'GET',
781 description => "Get daemon status.",
782 parameters => {
783 additionalProperties => 0,
784 properties => {},
785 },
786 returns => {
787 type => 'string',
788 enum => ['stopped', 'running'],
789 },
790 code => sub {
791 my ($param) = @_;
792
793 return $self->running() ? 'running' : 'stopped';
794 }});
795 }
796
797 # some useful helper
798
799 sub create_reusable_socket {
800 my ($self, $port, $host) = @_;
801
802 die "no port specifed" if !$port;
803
804 my ($socket, $sockfd);
805
806 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
807 $self->{env_restart_pve_daemon}) {
808
809 die "unable to parse socket fd '$sockfd'\n"
810 if $sockfd !~ m/^(\d+)$/;
811 $sockfd = $1; # untaint
812
813 $socket = IO::Socket::IP->new;
814 $socket->fdopen($sockfd, 'w') ||
815 die "cannot fdopen file descriptor '$sockfd' - $!\n";
816
817 $socket->fcntl(Fcntl::F_SETFD(), Fcntl::FD_CLOEXEC);
818 } else {
819
820 my %sockargs = (
821 LocalPort => $port,
822 Listen => SOMAXCONN,
823 Proto => 'tcp',
824 GetAddrInfoFlags => 0,
825 ReuseAddr => 1,
826 );
827 if (defined($host)) {
828 $socket = IO::Socket::IP->new( LocalHost => $host, %sockargs) ||
829 die "unable to create socket - $@\n";
830 } else {
831 # disabling AF_INET6 (by adding ipv6.disable=1 to the kernel cmdline)
832 # causes bind on :: to fail, try 0.0.0.0 in that case
833 $socket = IO::Socket::IP->new( LocalHost => '::', %sockargs) //
834 IO::Socket::IP->new( LocalHost => '0.0.0.0', %sockargs);
835 die "unable to create socket - $@\n" if !$socket;
836 }
837
838 # we often observe delays when using Nagle algorithm,
839 # so we disable that to maximize performance
840 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
841
842 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
843 }
844
845 push @$daemon_sockets, $socket;
846
847 return $socket;
848 }
849
850
851 1;
852