3 # Abstract class to implement Daemons
6 # * lock and write PID file /var/run/$name.pid to make sure only
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
24 use POSIX
":sys_wait_h";
26 use Socket
qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
30 use Time
::HiRes qw
(gettimeofday
);
32 use base
qw(PVE::CLIHandler);
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
36 my $daemon_initialized = 0; # we only allow one instance
37 my $daemon_sockets = [];
39 my $close_daemon_lock = sub {
42 return if !$self->{daemon_lock_fh
};
44 close $self->{daemon_lock_fh
};
45 delete $self->{daemon_lock_fh
};
51 print STDERR
"$msg\n";
52 syslog
('err', "%s", $msg);
55 # call this if you fork() from child
56 # Note: we already call this for workers, so it is only required
57 # if you fork inside a simple daemon (max_workers == 0).
58 sub after_fork_cleanup
{
61 &$close_daemon_lock($self);
63 PVE
::INotify
::inotify_close
();
65 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
66 $SIG{$sig} = 'DEFAULT'; # restore default handler
67 # AnyEvent signals only works if $SIG{XX} is
68 # undefined (perl event loop)
69 delete $SIG{$sig}; # so that we can handle events with AnyEvent
73 my $lockpidfile = sub {
76 my $lkfn = $self->{pidfile
} . ".lock";
80 if (my $fd = $self->{env_pve_lock_fd
}) {
82 $self->{daemon_lock_fh
} = IO
::Handle-
>new_from_fd($fd, "a");
87 $self->{daemon_lock_fh
} = IO
::File-
>new(">>$lkfn");
90 if (!$self->{daemon_lock_fh
}) {
91 die "can't open lock '$lkfn' - $!\n";
94 for (my $i = 0; $i < $waittime; $i ++) {
95 return if flock ($self->{daemon_lock_fh
}, LOCK_EX
|LOCK_NB
);
99 if (!flock ($self->{daemon_lock_fh
}, LOCK_EX
|LOCK_NB
)) {
100 &$close_daemon_lock($self);
103 my ($running, $pid) = $self->running();
105 die "can't acquire lock '$lkfn' - daemon already started (pid = $pid)\n";
107 die "can't acquire lock '$lkfn' - $err\n";
112 my $writepidfile = sub {
115 my $pidfile = $self->{pidfile
};
117 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH
, ">$pidfile");
123 my $server_cleanup = sub {
126 unlink $self->{pidfile
} . ".lock";
127 unlink $self->{pidfile
};
130 my $finish_workers = sub {
133 foreach my $id (qw(workers old_workers)) {
134 foreach my $cpid (keys %{$self->{$id}}) {
135 my $waitpid = waitpid($cpid, WNOHANG
);
136 if (defined($waitpid) && ($waitpid == $cpid)) {
137 delete ($self->{$id}->{$cpid});
138 syslog
('info', "worker $cpid finished");
144 my $start_workers = sub {
147 return if $self->{terminate
};
149 my $count = scalar keys %{$self->{workers
}};
150 my $need = $self->{max_workers
} - $count;
152 return if $need <= 0;
154 syslog
('info', "starting $need worker(s)");
159 if (!defined ($pid)) {
160 syslog
('err', "can't fork worker");
162 } elsif ($pid) { # parent
163 $self->{workers
}->{$pid} = 1;
164 syslog
('info', "worker $pid started");
167 $0 = "$self->{name} worker";
169 $self->after_fork_cleanup();
171 eval { $self->run(); };
174 sleep(5); # avoid fast restarts
177 syslog
('info', "worker exit");
183 my $terminate_old_workers = sub {
186 # if list is empty kill sends no signal, so no checks needed
187 kill 15, keys %{$self->{old_workers
}};
190 my $terminate_server = sub {
191 my ($self, $allow_open_children) = @_;
193 $self->{terminate
} = 1; # set flag to avoid worker restart
195 eval { $self->shutdown(); };
198 return if !$self->{max_workers
}; # if we have no workers we're done here
200 # if configured, leave children running on HUP
201 return if $allow_open_children && $self->{leave_children_open_on_reload
};
203 # else send TERM to all (old and current) child workers
204 kill 15, (keys %{$self->{workers
}}, keys %{$self->{old_workers
}});
206 # nicely shutdown childs (give them max 10 seconds to shut down)
207 my $previous_alarm = alarm(10);
209 local $SIG{ALRM
} = sub { die "timeout\n" };
211 while ((my $pid = waitpid (-1, 0)) > 0) {
212 foreach my $id (qw(workers old_workers)) {
213 if (defined($self->{$id}->{$pid})) {
214 delete($self->{$id}->{$pid});
215 syslog
('info', "worker $pid finished");
219 alarm(0); # avoid race condition
223 alarm ($previous_alarm);
226 syslog
('err', "error stopping workers (will kill them now) - $err");
227 foreach my $id (qw(workers old_workers)) {
228 foreach my $cpid (keys %{$self->{$id}}) {
229 # KILL childs still alive!
230 if (kill (0, $cpid)) {
231 delete($self->{$id}->{$cpid});
232 syslog
("err", "kill worker $cpid");
244 initlog
($self->{name
});
246 my $restart = delete $ENV{RESTART_PVE_DAEMON
};
247 $self->{env_restart_pve_daemon
} = $restart;
249 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD
};
250 delete $ENV{PVE_DAEMON_LOCK_FD
};
251 if (defined($lockfd)) {
252 die "unable to parse lock fd '$lockfd'\n"
253 if $lockfd !~ m/^(\d+)$/;
254 $lockfd = $1; # untaint
256 $self->{env_pve_lock_fd
} = $lockfd;
258 die "please run as root\n" if !$restart && ($> != 0);
260 die "can't create more that one PVE::Daemon" if $daemon_initialized;
261 $daemon_initialized = 1;
263 PVE
::INotify
::inotify_init
();
265 if (my $gidstr = $self->{setgid
}) {
266 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
267 POSIX
::setgid
($gid) || die "setgid $gid failed - $!\n";
268 $EGID = "$gid $gid"; # this calls setgroups
270 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
273 if (my $uidstr = $self->{setuid
}) {
274 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
275 POSIX
::setuid
($uid) || die "setuid $uid failed - $!\n";
277 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
280 if ($restart && $self->{max_workers
}) {
281 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS
}) {
282 foreach my $pid (split(':', $wpids)) {
284 if ($pid =~ m/^(\d+)$/) {
285 $self->{old_workers
}->{$1} = 1;
291 $self->{nodename
} = PVE
::INotify
::nodename
();
294 my $server_run = sub {
295 my ($self, $debug) = @_;
297 # fixme: handle restart lockfd
298 &$lockpidfile($self);
300 # remove FD_CLOEXEC bit to reuse on exec
301 $self->{daemon_lock_fh
}->fcntl(Fcntl
::F_SETFD
(), 0);
303 $ENV{PVE_DAEMON_LOCK_FD
} = $self->{daemon_lock_fh
}->fileno;
308 $self->{debug
} = 1 if $debug;
313 open STDIN
, '</dev/null' || die "can't read /dev/null";
314 open STDOUT
, '>/dev/null' || die "can't write /dev/null";
317 if (!$self->{env_restart_pve_daemon
} && !$debug) {
318 PVE
::INotify
::inotify_close
();
320 if (!defined ($spid)) {
321 die "can't put server into background - fork failed";
322 } elsif ($spid) { # parent
325 PVE
::INotify
::inotify_init
();
328 if ($self->{env_restart_pve_daemon
}) {
329 syslog
('info' , "restarting server");
331 &$writepidfile($self);
332 syslog
('info' , "starting server");
337 open STDERR
, '>&STDOUT' || die "can't close STDERR\n";
339 my $old_sig_term = $SIG{TERM
};
340 local $SIG{TERM
} = sub {
341 local ($@, $!, $?); # do not overwrite error vars
342 syslog
('info', "received signal TERM");
343 &$terminate_server($self, 0);
344 &$server_cleanup($self);
345 &$old_sig_term(@_) if $old_sig_term;
348 my $old_sig_quit = $SIG{QUIT
};
349 local $SIG{QUIT
} = sub {
350 local ($@, $!, $?); # do not overwrite error vars
351 syslog
('info', "received signal QUIT");
352 &$terminate_server($self, 0);
353 &$server_cleanup($self);
354 &$old_sig_quit(@_) if $old_sig_quit;
357 my $old_sig_int = $SIG{INT
};
358 local $SIG{INT
} = sub {
359 local ($@, $!, $?); # do not overwrite error vars
360 syslog
('info', "received signal INT");
361 $SIG{INT
} = 'DEFAULT'; # allow to terminate now
362 &$terminate_server($self, 0);
363 &$server_cleanup($self);
364 &$old_sig_int(@_) if $old_sig_int;
368 local ($@, $!, $?); # do not overwrite error vars
369 syslog
('info', "received signal HUP");
370 $self->{got_hup_signal
} = 1;
371 if ($self->{max_workers
}) {
372 &$terminate_server($self, 1);
373 } elsif ($self->can('hup')) {
374 eval { $self->hup() };
380 if ($self->{max_workers
}) {
381 my $old_sig_chld = $SIG{CHLD
};
382 local $SIG{CHLD
} = sub {
383 local ($@, $!, $?); # do not overwrite error vars
384 &$finish_workers($self);
385 &$old_sig_chld(@_) if $old_sig_chld;
388 # now loop forever (until we receive terminate signal)
390 &$start_workers($self);
392 &$terminate_old_workers($self);
393 &$finish_workers($self);
394 last if $self->{terminate
};
404 syslog
('err', "ERROR: $err");
406 &$terminate_server($self, 1);
408 if (my $wait_time = $self->{restart_on_error
}) {
409 $self->restart_daemon($wait_time);
411 $self->exit_daemon(-1);
415 if ($self->{got_hup_signal
}) {
416 $self->restart_daemon();
418 $self->exit_daemon(0);
423 my ($this, $name, $cmdline, %params) = @_;
425 $name = 'daemon' if !$name; # should not happen
430 my $class = ref($this) || $this;
434 pidfile
=> "/var/run/${name}.pid",
440 foreach my $opt (keys %params) {
441 my $value = $params{$opt};
442 if ($opt eq 'restart_on_error') {
443 $self->{$opt} = $value;
444 } elsif ($opt eq 'stop_wait_time') {
445 $self->{$opt} = $value;
446 } elsif ($opt eq 'pidfile') {
447 $self->{$opt} = $value;
448 } elsif ($opt eq 'max_workers') {
449 $self->{$opt} = $value;
450 } elsif ($opt eq 'leave_children_open_on_reload') {
451 $self->{$opt} = $value;
452 } elsif ($opt eq 'setgid') {
453 $self->{$opt} = $value;
454 } elsif ($opt eq 'setuid') {
455 $self->{$opt} = $value;
457 die "unknown daemon option '$opt'\n";
463 $self->{cmdline
} = [map { /^(.*)$/ } @$cmdline];
476 my ($self, $status) = @_;
478 syslog
("info", "server stopped");
480 &$server_cleanup($self);
486 my ($self, $waittime) = @_;
488 syslog
('info', "server shutdown (restart)");
490 $ENV{RESTART_PVE_DAEMON
} = 1;
492 foreach my $ds (@$daemon_sockets) {
493 $ds->fcntl(Fcntl
::F_SETFD
(), 0);
496 if ($self->{max_workers
}) {
497 my @workers = (keys %{$self->{workers
}}, keys %{$self->{old_workers
}});
498 $ENV{PVE_DAEMON_WORKER_PIDS
} = join(':', @workers);
501 sleep($waittime) if $waittime; # avoid high server load due to restarts
503 PVE
::INotify
::inotify_close
();
505 exec (@{$self->{cmdline
}});
507 exit (-1); # never reached?
510 # please overwrite in subclass
511 # this is called at startup - before forking
517 # please overwrite in subclass
521 syslog
('info' , "server closing");
523 if (!$self->{max_workers
}) {
525 1 while (waitpid(-1, POSIX
::WNOHANG
()) > 0);
529 # please define in subclass
533 # syslog('info' , "received signal HUP (restart)");
536 # please overwrite in subclass
541 syslog
('info' , "server is running");
547 my ($self, $debug) = @_;
551 &$server_run($self, $debug);
554 &$log_err("start failed - $err");
562 my $pid_str = PVE
::Tools
::file_read_firstline
($self->{pidfile
});
564 return 0 if !$pid_str;
566 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
573 # checks if the process was started by systemd
574 my $init_ppid = sub {
575 if (getppid() == 1) {
585 my $pid = &$read_pid($self);
588 my $res = PVE
::ProcFSTools
::check_process_running
($pid) ?
1 : 0;
589 return wantarray ?
($res, $pid) : $res;
592 return wantarray ?
(0, 0) : 0;
598 my $pid = &$read_pid($self);
602 if (PVE
::ProcFSTools
::check_process_running
($pid)) {
603 kill(15, $pid); # send TERM signal
605 my $wait_time = $self->{stop_wait_time
} || 5;
607 for (my $i = 0; $i < $wait_time; $i++) {
608 $running = PVE
::ProcFSTools
::check_process_running
($pid);
613 syslog
('err', "server still running - send KILL") if $running;
620 if (-f
$self->{pidfile
}) {
622 # try to get the lock
623 &$lockpidfile($self);
624 &$server_cleanup($self);
627 &$log_err("cleanup failed - $err");
632 sub register_start_command
{
633 my ($self, $description) = @_;
635 my $class = ref($self);
637 $class->register_method({
641 description
=> $description || "Start the daemon.",
643 additionalProperties
=> 0,
646 description
=> "Debug mode - stay in foreground",
653 returns
=> { type
=> 'null' },
658 if (&$init_ppid() || $param->{debug
}) {
659 $self->start($param->{debug
});
661 PVE
::Tools
::run_command
(['systemctl', 'start', $self->{name
}]);
668 my $reload_daemon = sub {
669 my ($self, $use_hup) = @_;
671 if ($self->{env_restart_pve_daemon
}) {
674 my ($running, $pid) = $self->running();
679 syslog
('info', "send HUP to $pid");
689 sub register_restart_command
{
690 my ($self, $use_hup, $description) = @_;
692 my $class = ref($self);
694 $class->register_method({
698 description
=> $description || "Restart the daemon (or start if not running).",
700 additionalProperties
=> 0,
703 returns
=> { type
=> 'null' },
709 &$reload_daemon($self, $use_hup);
711 PVE
::Tools
::run_command
(['systemctl', $use_hup ?
'reload-or-restart' : 'restart', $self->{name
}]);
718 sub register_reload_command
{
719 my ($self, $description) = @_;
721 my $class = ref($self);
723 $class->register_method({
727 description
=> $description || "Reload daemon configuration (or start if not running).",
729 additionalProperties
=> 0,
732 returns
=> { type
=> 'null' },
737 &$reload_daemon($self, 1);
743 sub register_stop_command
{
744 my ($self, $description) = @_;
746 my $class = ref($self);
748 $class->register_method({
752 description
=> $description || "Stop the daemon.",
754 additionalProperties
=> 0,
757 returns
=> { type
=> 'null' },
765 PVE
::Tools
::run_command
(['systemctl', 'stop', $self->{name
}]);
772 sub register_status_command
{
773 my ($self, $description) = @_;
775 my $class = ref($self);
777 $class->register_method({
781 description
=> "Get daemon status.",
783 additionalProperties
=> 0,
788 enum
=> ['stopped', 'running'],
793 return $self->running() ?
'running' : 'stopped';
799 sub create_reusable_socket
{
800 my ($self, $port, $host) = @_;
802 die "no port specifed" if !$port;
804 my ($socket, $sockfd);
806 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
807 $self->{env_restart_pve_daemon
}) {
809 die "unable to parse socket fd '$sockfd'\n"
810 if $sockfd !~ m/^(\d+)$/;
811 $sockfd = $1; # untaint
813 $socket = IO
::Socket
::IP-
>new;
814 $socket->fdopen($sockfd, 'w') ||
815 die "cannot fdopen file descriptor '$sockfd' - $!\n";
817 $socket->fcntl(Fcntl
::F_SETFD
(), Fcntl
::FD_CLOEXEC
);
824 GetAddrInfoFlags
=> 0,
827 if (defined($host)) {
828 $socket = IO
::Socket
::IP-
>new( LocalHost
=> $host, %sockargs) ||
829 die "unable to create socket - $@\n";
831 # disabling AF_INET6 (by adding ipv6.disable=1 to the kernel cmdline)
832 # causes bind on :: to fail, try 0.0.0.0 in that case
833 $socket = IO
::Socket
::IP-
>new( LocalHost
=> '::', %sockargs) //
834 IO
::Socket
::IP-
>new( LocalHost
=> '0.0.0.0', %sockargs);
835 die "unable to create socket - $@\n" if !$socket;
838 # we often observe delays when using Nagle algorithm,
839 # so we disable that to maximize performance
840 setsockopt($socket, IPPROTO_TCP
, TCP_NODELAY
, 1);
842 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
845 push @$daemon_sockets, $socket;