X-Git-Url: https://git.proxmox.com/?p=pve-common.git;a=blobdiff_plain;f=data%2FPVE%2FDaemon.pm;h=b6714531c7253b8e95a03b57e91547b02b37f46d;hp=279a78c1f36fce16394c9b06b3b9bba6963061c2;hb=7db2b94082ae5241b94a9f23d526a44737014bd1;hpb=4fc691765746d74422f840ebfa4e3391de615244 diff --git a/data/PVE/Daemon.pm b/data/PVE/Daemon.pm index 279a78c..b671453 100644 --- a/data/PVE/Daemon.pm +++ b/data/PVE/Daemon.pm @@ -8,7 +8,7 @@ package PVE::Daemon; # * correctly daemonize (redirect STDIN/STDOUT) # * restart by stop/start, exec, or signal HUP # * daemon restart on error (option 'restart_on_error') -# +# * handle worker processes (option 'max_workers') use strict; use warnings; @@ -22,35 +22,80 @@ use Time::HiRes qw (gettimeofday); use base qw(PVE::CLIHandler); -$SIG{'__WARN__'} = sub { - my $err = $@; - my $t = $_[0]; - chomp $t; - print "$t\n"; - syslog('warning', "WARNING: %s", $t); - $@ = $err; -}; - $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin'; my $daemon_initialized = 0; # we only allow one instance +my $close_daemon_lock = sub { + my ($self) = @_; + + return if !$self->{daemon_lock_fh}; + + close $self->{daemon_lock_fh}; + delete $self->{daemon_lock_fh}; +}; + +my $log_err = sub { + my ($msg) = @_; + chomp $msg; + print STDERR "$msg\n"; + syslog('err', "%s", $msg); +}; + +# call this if you fork() from child +# Note: we already call this for workers, so it is only required +# if you fork inside a simple daemon (max_workers == 0). +sub after_fork_cleanup { + my ($self) = @_; + + &$close_daemon_lock($self); + + PVE::INotify::inotify_close(); + + for my $sig (qw(CHLD HUP INT TERM QUIT)) { + $SIG{$sig} = 'DEFAULT'; # restore default handler + # AnyEvent signals only works if $SIG{XX} is + # undefined (perl event loop) + delete $SIG{$sig}; # so that we can handle events with AnyEvent + } +} + my $lockpidfile = sub { my ($self) = @_; my $lkfn = $self->{pidfile} . ".lock"; - if (!open (FLCK, ">>$lkfn")) { - my $msg = "can't aquire lock on file '$lkfn' - $!"; - syslog ('err', $msg); - die "ERROR: $msg\n"; + my $waittime = 0; + + if (my $fd = $self->{env_pve_lock_fd}) { + + $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a"); + + } else { + + $waittime = 5; + $self->{daemon_lock_fh} = IO::File->new(">>$lkfn"); + } + + if (!$self->{daemon_lock_fh}) { + die "can't open lock '$lkfn' - $!\n"; } - if (!flock (FLCK, LOCK_EX|LOCK_NB)) { - close (FLCK); - my $msg = "can't aquire lock '$lkfn' - $!"; - syslog ('err', $msg); - die "ERROR: $msg\n"; + for (my $i = 0; $i < $waittime; $i ++) { + return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB); + sleep(1); + } + + if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) { + &$close_daemon_lock($self); + my $err = $!; + + my ($running, $pid) = $self->running(); + if ($running) { + die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n"; + } else { + die "can't aquire lock '$lkfn' - $err\n"; + } } }; @@ -59,11 +104,8 @@ my $writepidfile = sub { my $pidfile = $self->{pidfile}; - if (!open (PIDFH, ">$pidfile")) { - my $msg = "can't open pid file '$pidfile' - $!"; - syslog ('err', $msg); - die "ERROR: $msg\n"; - } + die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile"); + print PIDFH "$$\n"; close (PIDFH); }; @@ -75,17 +117,125 @@ my $server_cleanup = sub { unlink $self->{pidfile}; }; +my $finish_workers = sub { + my ($self) = @_; + + foreach my $cpid (keys %{$self->{workers}}) { + my $waitpid = waitpid($cpid, WNOHANG); + if (defined($waitpid) && ($waitpid == $cpid)) { + delete ($self->{workers}->{$cpid}); + syslog('info', "worker $cpid finished"); + } + } +}; + +my $start_workers = sub { + my ($self) = @_; + + return if $self->{terminate}; + + my $count = 0; + foreach my $cpid (keys %{$self->{workers}}) { + $count++; + } + + my $need = $self->{max_workers} - $count; + + return if $need <= 0; + + syslog('info', "starting $need worker(s)"); + + while ($need > 0) { + my $pid = fork; + + if (!defined ($pid)) { + syslog('err', "can't fork worker"); + sleep (1); + } elsif ($pid) { # parent + $self->{workers}->{$pid} = 1; + syslog('info', "worker $pid started"); + $need--; + } else { + $0 = "$self->{name} worker"; + + $self->after_fork_cleanup(); + + eval { $self->run(); }; + if (my $err = $@) { + syslog('err', $err); + sleep(5); # avoid fast restarts + } + + syslog('info', "worker exit"); + exit (0); + } + } +}; + +my $terminate_server = sub { + my ($self) = @_; + + $self->{terminate} = 1; # set flag to avoid worker restart + + if (!$self->{max_workers}) { + eval { $self->shutdown(); }; + warn $@ if $@; + return; + } + + eval { $self->shutdown(); }; + warn $@ if $@; + + # we have workers - terminate them + + foreach my $cpid (keys %{$self->{workers}}) { + kill(15, $cpid); # TERM childs + } + + # nicely shutdown childs (give them max 10 seconds to shut down) + my $previous_alarm = alarm(10); + eval { + local $SIG{ALRM} = sub { die "timeout\n" }; + + while ((my $pid = waitpid (-1, 0)) > 0) { + if (defined($self->{workers}->{$pid})) { + delete($self->{workers}->{$pid}); + syslog('info', "worker $pid finished"); + } + } + alarm(0); # avoid race condition + }; + my $err = $@; + + alarm ($previous_alarm); + + if ($err) { + syslog('err', "error stopping workers (will kill them now) - $err"); + foreach my $cpid (keys %{$self->{workers}}) { + # KILL childs still alive! + if (kill (0, $cpid)) { + delete($self->{workers}->{$cpid}); + syslog("err", "kill worker $cpid"); + kill(9, $cpid); + # fixme: waitpid? + } + } + } +}; + my $server_run = sub { my ($self, $debug) = @_; + # fixme: handle restart lockfd &$lockpidfile($self); - # run in background - my $spid; + # remove FD_CLOEXEC bit to reuse on exec + $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0); - my $restart = $ENV{RESTART_PVE_DAEMON}; + $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno; - delete $ENV{RESTART_PVE_DAEMON}; + # run in background + my $spid; $self->{debug} = 1 if $debug; @@ -96,52 +246,95 @@ my $server_run = sub { open STDOUT, '>/dev/null' || die "can't write /dev/null"; } - if (!$restart && !$debug) { + if (!$self->{env_restart_pve_daemon} && !$debug) { PVE::INotify::inotify_close(); $spid = fork(); if (!defined ($spid)) { - my $msg = "can't put server into background - fork failed"; - syslog('err', $msg); - die "ERROR: $msg\n"; + die "can't put server into background - fork failed"; } elsif ($spid) { # parent exit (0); } PVE::INotify::inotify_init(); } - &$writepidfile($self); - - POSIX::setsid(); - - if ($restart) { + if ($self->{env_restart_pve_daemon}) { syslog('info' , "restarting server"); } else { + &$writepidfile($self); syslog('info' , "starting server"); } + POSIX::setsid(); + open STDERR, '>&STDOUT' || die "can't close STDERR\n"; - $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = sub { - $SIG{INT} = 'DEFAULT'; + my $old_sig_term = $SIG{TERM}; + local $SIG{TERM} = sub { + local ($@, $!, $?); # do not overwrite error vars + syslog('info', "received signal TERM"); + &$terminate_server($self); + &$server_cleanup($self); + &$old_sig_term(@_) if $old_sig_term; + }; - eval { $self->shutdown(); }; - warn $@ if $@; + my $old_sig_quit = $SIG{QUIT}; + local $SIG{QUIT} = sub { + local ($@, $!, $?); # do not overwrite error vars + syslog('info', "received signal QUIT"); + &$terminate_server($self); + &$server_cleanup($self); + &$old_sig_quit(@_) if $old_sig_quit; + }; + my $old_sig_int = $SIG{INT}; + local $SIG{INT} = sub { + local ($@, $!, $?); # do not overwrite error vars + syslog('info', "received signal INT"); + $SIG{INT} = 'DEFAULT'; # allow to terminate now + &$terminate_server($self); &$server_cleanup($self); + &$old_sig_int(@_) if $old_sig_int; }; - if ($self->can('hup')) { - $SIG{HUP} = sub { + $SIG{HUP} = sub { + local ($@, $!, $?); # do not overwrite error vars + syslog('info', "received signal HUP"); + if ($self->{max_workers}) { + &$terminate_server($self); + $self->{got_hup_signal} = 1; + } elsif ($self->can('hup')) { eval { $self->hup() }; warn $@ if $@; - }; - } + } + }; + + eval { + if ($self->{max_workers}) { + my $old_sig_chld = $SIG{CHLD}; + local $SIG{CHLD} = sub { + local ($@, $!, $?); # do not overwrite error vars + &$finish_workers($self); + &$old_sig_chld(@_) if $old_sig_chld; + }; + + for (;;) { # forever + &$start_workers($self); + sleep(5); + &$finish_workers($self); + last if $self->{terminate}; + } - eval { $self->run() }; + } else { + $self->run(); + } + }; my $err = $@; if ($err) { syslog ('err', "ERROR: $err"); + + # fixme: kill all workers + if (my $wait_time = $self->{restart_on_error}) { $self->restart_daemon($wait_time); } else { @@ -149,51 +342,84 @@ my $server_run = sub { } } - $self->exit_daemon(0); + if ($self->{got_hup_signal}) { + $self->restart_daemon(); + } else { + $self->exit_daemon(0); + } }; sub new { my ($this, $name, $cmdline, %params) = @_; - die "please run as root\n" if $> != 0; + $name = 'daemon' if !$name; # should not happen + + initlog($name); - die "missing name" if !$name; + my $self; - die "can't create more that one PVE::Daemon" if $daemon_initialized; - $daemon_initialized = 1; + eval { - PVE::INotify::inotify_init(); + my $restart = $ENV{RESTART_PVE_DAEMON}; + delete $ENV{RESTART_PVE_DAEMON}; - initlog($name); + my $lockfd = $ENV{PVE_DAEMON_LOCK_FD}; + delete $ENV{PVE_DAEMON_LOCK_FD}; - my $class = ref($this) || $this; - - my $self = bless { - name => $name, - run_dir => '/var/run', - }, $class; - - foreach my $opt (keys %params) { - my $value = $params{$opt}; - if ($opt eq 'restart_on_error') { - $self->{$opt} = $value; - } elsif ($opt eq 'stop_wait_time') { - $self->{$opt} = $value; - } elsif ($opt eq 'run_dir') { - $self->{$opt} = $value; - } else { - die "unknown option '$opt'"; + if (defined($lockfd)) { + $lockfd =~ m/^(\d+)$/; + $lockfd = $1; # untaint } - } - $self->{pidfile} = "$self->{run_dir}/${name}.pid"; + die "please run as root\n" if !$restart && ($> != 0); - $self->{nodename} = PVE::INotify::nodename(); + die "can't create more that one PVE::Daemon" if $daemon_initialized; + $daemon_initialized = 1; - $self->{cmdline} = $cmdline; + PVE::INotify::inotify_init(); + + my $class = ref($this) || $this; + + $self = bless { + name => $name, + run_dir => '/var/run', + env_restart_pve_daemon => $restart, + env_pve_lock_fd => $lockfd, + workers => {}, + }, $class; + + foreach my $opt (keys %params) { + my $value = $params{$opt}; + if ($opt eq 'restart_on_error') { + $self->{$opt} = $value; + } elsif ($opt eq 'stop_wait_time') { + $self->{$opt} = $value; + } elsif ($opt eq 'run_dir') { + $self->{$opt} = $value; + } elsif ($opt eq 'max_workers') { + $self->{$opt} = $value; + } else { + die "unknown daemon option '$opt'\n"; + } + } + + $self->{pidfile} = "$self->{run_dir}/${name}.pid"; + + $self->{nodename} = PVE::INotify::nodename(); + + $self->{cmdline} = []; - $0 = $name; + foreach my $el (@$cmdline) { + $el =~ m/^(.*)$/; # untaint + push @{$self->{cmdline}}, $1; + } + $0 = $name; + }; + if (my $err = $@) { + &$log_err($err); + exit(-1); + } return $self; } @@ -237,8 +463,10 @@ sub shutdown { syslog('info' , "server closing"); - # wait for children - 1 while (waitpid(-1, POSIX::WNOHANG()) > 0); + if (!$self->{max_workers}) { + # wait for children + 1 while (waitpid(-1, POSIX::WNOHANG()) > 0); + } } # please define in subclass @@ -261,7 +489,11 @@ sub run { sub start { my ($self, $debug) = @_; - &$server_run($self, $debug); + eval { &$server_run($self, $debug); }; + if (my $err = $@) { + &$log_err("start failed - $err"); + exit(-1); + } } my $read_pid = sub { @@ -317,9 +549,14 @@ sub stop { } if (-f $self->{pidfile}) { - # try to get the lock - &$lockpidfile($self); - &$server_cleanup($self); + eval { + # try to get the lock + &$lockpidfile($self); + &$server_cleanup($self); + }; + if (my $err = $@) { + &$log_err("cleanup failed - $err"); + } } } @@ -356,7 +593,7 @@ sub register_start_command { my $reload_daemon = sub { my ($self, $use_hup) = @_; - if (my $restart = $ENV{RESTART_PVE_DAEMON}) { + if ($self->{env_restart_pve_daemon}) { $self->start(); } else { my ($running, $pid) = $self->running(); @@ -364,7 +601,8 @@ my $reload_daemon = sub { $self->start(); } else { if ($use_hup) { - kill(1, $pid); + syslog('info', "send HUP to $pid"); + kill 1, $pid; } else { $self->stop(); $self->start();