3 # Abstract class to implement Daemons
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
21 use POSIX
":sys_wait_h";
23 use Socket
qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use Time
::HiRes qw
(gettimeofday
);
29 use base
qw(PVE::CLIHandler);
31 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
33 my $daemon_initialized = 0; # we only allow one instance
35 my $close_daemon_lock = sub {
38 return if !$self->{daemon_lock_fh
};
40 close $self->{daemon_lock_fh
};
41 delete $self->{daemon_lock_fh
};
47 print STDERR
"$msg\n";
48 syslog
('err', "%s", $msg);
51 # call this if you fork() from child
52 # Note: we already call this for workers, so it is only required
53 # if you fork inside a simple daemon (max_workers == 0).
54 sub after_fork_cleanup
{
57 &$close_daemon_lock($self);
59 PVE
::INotify
::inotify_close
();
61 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
62 $SIG{$sig} = 'DEFAULT'; # restore default handler
63 # AnyEvent signals only works if $SIG{XX} is
64 # undefined (perl event loop)
65 delete $SIG{$sig}; # so that we can handle events with AnyEvent
69 my $lockpidfile = sub {
72 my $lkfn = $self->{pidfile
} . ".lock";
76 if (my $fd = $self->{env_pve_lock_fd
}) {
78 $self->{daemon_lock_fh
} = IO
::Handle-
>new_from_fd($fd, "a");
83 $self->{daemon_lock_fh
} = IO
::File-
>new(">>$lkfn");
86 if (!$self->{daemon_lock_fh
}) {
87 die "can't open lock '$lkfn' - $!\n";
90 for (my $i = 0; $i < $waittime; $i ++) {
91 return if flock ($self->{daemon_lock_fh
}, LOCK_EX
|LOCK_NB
);
95 if (!flock ($self->{daemon_lock_fh
}, LOCK_EX
|LOCK_NB
)) {
96 &$close_daemon_lock($self);
99 my ($running, $pid) = $self->running();
101 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
103 die "can't aquire lock '$lkfn' - $err\n";
108 my $writepidfile = sub {
111 my $pidfile = $self->{pidfile
};
113 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH
, ">$pidfile");
119 my $server_cleanup = sub {
122 unlink $self->{pidfile
} . ".lock";
123 unlink $self->{pidfile
};
126 my $finish_workers = sub {
129 foreach my $id (qw(workers old_workers)) {
130 foreach my $cpid (keys %{$self->{$id}}) {
131 my $waitpid = waitpid($cpid, WNOHANG
);
132 if (defined($waitpid) && ($waitpid == $cpid)) {
133 delete ($self->{$id}->{$cpid});
134 syslog
('info', "worker $cpid finished");
140 my $start_workers = sub {
143 return if $self->{terminate
};
146 foreach my $cpid (keys %{$self->{workers
}}) {
150 my $need = $self->{max_workers
} - $count;
152 return if $need <= 0;
154 syslog
('info', "starting $need worker(s)");
159 if (!defined ($pid)) {
160 syslog
('err', "can't fork worker");
162 } elsif ($pid) { # parent
163 $self->{workers
}->{$pid} = 1;
164 syslog
('info', "worker $pid started");
167 $0 = "$self->{name} worker";
169 $self->after_fork_cleanup();
171 eval { $self->run(); };
174 sleep(5); # avoid fast restarts
177 syslog
('info', "worker exit");
183 my $terminate_server = sub {
186 $self->{terminate
} = 1; # set flag to avoid worker restart
188 if (!$self->{max_workers
}) {
189 eval { $self->shutdown(); };
194 eval { $self->shutdown(); };
197 # we have workers - send TERM signal
199 foreach my $cpid (keys %{$self->{workers
}}) {
200 kill(15, $cpid); # TERM childs
203 # if configured, leave children running on HUP
204 return if $self->{got_hup_signal
} &&
205 $self->{leave_children_open_on_reload
};
207 # else, send TERM to old workers
208 foreach my $cpid (keys %{$self->{old_workers
}}) {
209 kill(15, $cpid); # TERM childs
212 # nicely shutdown childs (give them max 10 seconds to shut down)
213 my $previous_alarm = alarm(10);
215 local $SIG{ALRM
} = sub { die "timeout\n" };
217 while ((my $pid = waitpid (-1, 0)) > 0) {
218 foreach my $id (qw(workers old_workers)) {
219 if (defined($self->{$id}->{$pid})) {
220 delete($self->{$id}->{$pid});
221 syslog
('info', "worker $pid finished");
225 alarm(0); # avoid race condition
229 alarm ($previous_alarm);
232 syslog
('err', "error stopping workers (will kill them now) - $err");
233 foreach my $id (qw(workers old_workers)) {
234 foreach my $cpid (keys %{$self->{$id}}) {
235 # KILL childs still alive!
236 if (kill (0, $cpid)) {
237 delete($self->{$id}->{$cpid});
238 syslog
("err", "kill worker $cpid");
247 my $server_run = sub {
248 my ($self, $debug) = @_;
250 # fixme: handle restart lockfd
251 &$lockpidfile($self);
253 # remove FD_CLOEXEC bit to reuse on exec
254 $self->{daemon_lock_fh
}->fcntl(Fcntl
::F_SETFD
(), 0);
256 $ENV{PVE_DAEMON_LOCK_FD
} = $self->{daemon_lock_fh
}->fileno;
261 $self->{debug
} = 1 if $debug;
266 open STDIN
, '</dev/null' || die "can't read /dev/null";
267 open STDOUT
, '>/dev/null' || die "can't write /dev/null";
270 if (!$self->{env_restart_pve_daemon
} && !$debug) {
271 PVE
::INotify
::inotify_close
();
273 if (!defined ($spid)) {
274 die "can't put server into background - fork failed";
275 } elsif ($spid) { # parent
278 PVE
::INotify
::inotify_init
();
281 if ($self->{env_restart_pve_daemon
}) {
282 syslog
('info' , "restarting server");
284 &$writepidfile($self);
285 syslog
('info' , "starting server");
290 open STDERR
, '>&STDOUT' || die "can't close STDERR\n";
292 my $old_sig_term = $SIG{TERM
};
293 local $SIG{TERM
} = sub {
294 local ($@, $!, $?); # do not overwrite error vars
295 syslog
('info', "received signal TERM");
296 &$terminate_server($self);
297 &$server_cleanup($self);
298 &$old_sig_term(@_) if $old_sig_term;
301 my $old_sig_quit = $SIG{QUIT
};
302 local $SIG{QUIT
} = sub {
303 local ($@, $!, $?); # do not overwrite error vars
304 syslog
('info', "received signal QUIT");
305 &$terminate_server($self);
306 &$server_cleanup($self);
307 &$old_sig_quit(@_) if $old_sig_quit;
310 my $old_sig_int = $SIG{INT
};
311 local $SIG{INT
} = sub {
312 local ($@, $!, $?); # do not overwrite error vars
313 syslog
('info', "received signal INT");
314 $SIG{INT
} = 'DEFAULT'; # allow to terminate now
315 &$terminate_server($self);
316 &$server_cleanup($self);
317 &$old_sig_int(@_) if $old_sig_int;
321 local ($@, $!, $?); # do not overwrite error vars
322 syslog
('info', "received signal HUP");
323 $self->{got_hup_signal
} = 1;
324 if ($self->{max_workers
}) {
325 &$terminate_server($self);
326 } elsif ($self->can('hup')) {
327 eval { $self->hup() };
333 if ($self->{max_workers
}) {
334 my $old_sig_chld = $SIG{CHLD
};
335 local $SIG{CHLD
} = sub {
336 local ($@, $!, $?); # do not overwrite error vars
337 &$finish_workers($self);
338 &$old_sig_chld(@_) if $old_sig_chld;
341 # catch worker finished during restart phase
342 &$finish_workers($self);
344 # now loop forever (until we receive terminate signal)
346 &$start_workers($self);
348 &$finish_workers($self);
349 last if $self->{terminate
};
359 syslog
('err', "ERROR: $err");
361 &$terminate_server($self);
363 if (my $wait_time = $self->{restart_on_error
}) {
364 $self->restart_daemon($wait_time);
366 $self->exit_daemon(-1);
370 if ($self->{got_hup_signal
}) {
371 $self->restart_daemon();
373 $self->exit_daemon(0);
378 my ($this, $name, $cmdline, %params) = @_;
380 $name = 'daemon' if !$name; # should not happen
388 my $restart = $ENV{RESTART_PVE_DAEMON
};
389 delete $ENV{RESTART_PVE_DAEMON
};
391 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD
};
392 delete $ENV{PVE_DAEMON_LOCK_FD
};
394 if (defined($lockfd)) {
395 die "unable to parse lock fd '$lockfd'\n"
396 if $lockfd !~ m/^(\d+)$/;
397 $lockfd = $1; # untaint
400 die "please run as root\n" if !$restart && ($> != 0);
402 die "can't create more that one PVE::Daemon" if $daemon_initialized;
403 $daemon_initialized = 1;
405 PVE
::INotify
::inotify_init
();
407 my $class = ref($this) || $this;
411 run_dir
=> '/var/run',
412 env_restart_pve_daemon
=> $restart,
413 env_pve_lock_fd
=> $lockfd,
418 foreach my $opt (keys %params) {
419 my $value = $params{$opt};
420 if ($opt eq 'restart_on_error') {
421 $self->{$opt} = $value;
422 } elsif ($opt eq 'stop_wait_time') {
423 $self->{$opt} = $value;
424 } elsif ($opt eq 'run_dir') {
425 $self->{$opt} = $value;
426 } elsif ($opt eq 'max_workers') {
427 $self->{$opt} = $value;
428 } elsif ($opt eq 'leave_children_open_on_reload') {
429 $self->{$opt} = $value;
431 die "unknown daemon option '$opt'\n";
435 if ($restart && $self->{max_workers
}) {
436 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS
}) {
437 foreach my $pid (split(':', $wpids)) {
438 if ($pid =~ m/^(\d+)$/) {
439 $self->{old_workers
}->{$1} = 1;
445 $self->{pidfile
} = "$self->{run_dir}/${name}.pid";
447 $self->{nodename
} = PVE
::INotify
::nodename
();
449 $self->{cmdline
} = [];
451 foreach my $el (@$cmdline) {
452 $el =~ m/^(.*)$/; # untaint
453 push @{$self->{cmdline
}}, $1;
467 my ($self, $status) = @_;
469 syslog
("info", "server stopped");
471 &$server_cleanup($self);
477 my ($self, $waittime) = @_;
479 syslog
('info', "server shutdown (restart)");
481 $ENV{RESTART_PVE_DAEMON
} = 1;
483 if ($self->{max_workers
}) {
484 my @workers = keys %{$self->{workers
}};
485 push @workers, keys %{$self->{old_workers
}};
486 $ENV{PVE_DAEMON_WORKER_PIDS
} = join(':', @workers);
489 sleep($waittime) if $waittime; # avoid high server load due to restarts
491 PVE
::INotify
::inotify_close
();
493 exec (@{$self->{cmdline
}});
495 exit (-1); # never reached?
498 # please overwrite in subclass
499 # this is called at startup - before forking
505 # please overwrite in subclass
509 syslog
('info' , "server closing");
511 if (!$self->{max_workers
}) {
513 1 while (waitpid(-1, POSIX
::WNOHANG
()) > 0);
517 # please define in subclass
521 # syslog('info' , "received signal HUP (restart)");
524 # please overwrite in subclass
529 syslog
('info' , "server is running");
535 my ($self, $debug) = @_;
537 eval { &$server_run($self, $debug); };
539 &$log_err("start failed - $err");
547 my $pid_str = PVE
::Tools
::file_read_firstline
($self->{pidfile
});
549 return 0 if !$pid_str;
551 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
561 my $pid = &$read_pid($self);
564 my $res = PVE
::ProcFSTools
::check_process_running
($pid) ?
1 : 0;
565 return wantarray ?
($res, $pid) : $res;
568 return wantarray ?
(0, 0) : 0;
574 my $pid = &$read_pid($self);
578 if (PVE
::ProcFSTools
::check_process_running
($pid)) {
579 kill(15, $pid); # send TERM signal
581 my $wait_time = $self->{stop_wait_time
} || 5;
583 for (my $i = 0; $i < $wait_time; $i++) {
584 $running = PVE
::ProcFSTools
::check_process_running
($pid);
589 syslog
('err', "server still running - send KILL") if $running;
596 if (-f
$self->{pidfile
}) {
598 # try to get the lock
599 &$lockpidfile($self);
600 &$server_cleanup($self);
603 &$log_err("cleanup failed - $err");
608 sub register_start_command
{
609 my ($self, $description) = @_;
611 my $class = ref($self);
613 $class->register_method({
617 description
=> $description || "Start the daemon.",
619 additionalProperties
=> 0,
622 description
=> "Debug mode - stay in foreground",
629 returns
=> { type
=> 'null' },
634 $self->start($param->{debug
});
640 my $reload_daemon = sub {
641 my ($self, $use_hup) = @_;
643 if ($self->{env_restart_pve_daemon
}) {
646 my ($running, $pid) = $self->running();
651 syslog
('info', "send HUP to $pid");
661 sub register_restart_command
{
662 my ($self, $use_hup, $description) = @_;
664 my $class = ref($self);
666 $class->register_method({
670 description
=> $description || "Restart the daemon (or start if not running).",
672 additionalProperties
=> 0,
675 returns
=> { type
=> 'null' },
680 &$reload_daemon($self, $use_hup);
686 sub register_reload_command
{
687 my ($self, $description) = @_;
689 my $class = ref($self);
691 $class->register_method({
695 description
=> $description || "Reload daemon configuration (or start if not running).",
697 additionalProperties
=> 0,
700 returns
=> { type
=> 'null' },
705 &$reload_daemon($self, 1);
711 sub register_stop_command
{
712 my ($self, $description) = @_;
714 my $class = ref($self);
716 $class->register_method({
720 description
=> $description || "Stop the daemon.",
722 additionalProperties
=> 0,
725 returns
=> { type
=> 'null' },
736 sub register_status_command
{
737 my ($self, $description) = @_;
739 my $class = ref($self);
741 $class->register_method({
745 description
=> "Get daemon status.",
747 additionalProperties
=> 0,
752 enum
=> ['stopped', 'running'],
757 return $self->running() ?
'running' : 'stopped';
763 sub create_reusable_socket
{
764 my ($self, $port, $host) = @_;
766 die "no port specifed" if !$port;
768 my ($socket, $sockfd);
770 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
771 $self->{env_restart_pve_daemon
}) {
773 die "unable to parse socket fd '$sockfd'\n"
774 if $sockfd !~ m/^(\d+)$/;
775 $sockfd = $1; # untaint
777 $socket = IO
::Socket
::INET-
>new;
778 $socket->fdopen($sockfd, 'w') ||
779 die "cannot fdopen file descriptor '$sockfd' - $!\n";
783 $socket = IO
::Socket
::INET-
>new(
789 die "unable to create socket - $@\n";
791 # we often observe delays when using Nagle algorithm,
792 # so we disable that to maximize performance
793 setsockopt($socket, IPPROTO_TCP
, TCP_NODELAY
, 1);
795 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
798 # remove FD_CLOEXEC bit to reuse on exec
799 $socket->fcntl(Fcntl
::F_SETFD
(), 0);