use strict;
use warnings;
-use POSIX ":sys_wait_h";
+use POSIX qw(:sys_wait_h EINTR);
use IO::File;
use Fcntl qw(:flock);
use PVE::SafeSyslog;
# read/update list of active workers
# we move all finished tasks to the archive index,
# but keep aktive and most recent task in the active file.
+# $nocheck ... consider $new_upid still running (avoid that
+# we try to read the reult to early.
sub active_workers {
- my ($new_upid) = @_;
+ my ($new_upid, $nocheck) = @_;
my $lkfn = "/var/log/pve/tasks/.active.lock";
my $thash = {}; # only list task once
my $check_task = sub {
- my ($task) = @_;
+ my ($task, $running) = @_;
- my $pstart = PVE::ProcFSTools::read_proc_starttime($task->{pid});
- if ($pstart && ($pstart == $task->{pstart})) {
+ if ($running || PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
push @$tlist, $task;
} else {
delete $task->{pid};
$task = PVE::Tools::upid_decode($new_upid);
$task->{upid} = $new_upid;
$thash->{$new_upid} = $task;
- &$check_task($task);
+ &$check_task($task, $nocheck);
}
return $res;
}
+my $kill_process_group = sub {
+ my ($pid, $pstart) = @_;
+
+ # send kill to process group (negative pid)
+ my $kpid = -$pid;
+
+ # always send signal to all pgrp members
+ kill(15, $kpid); # send TERM signal
+
+ # give max 5 seconds to shut down
+ for (my $i = 0; $i < 5; $i++) {
+ return if !PVE::ProcFSTools::check_process_running($pid, $pstart);
+ sleep (1);
+ }
+
+ # to be sure
+ kill(9, $kpid);
+};
+
+sub check_worker {
+ my ($upid, $killit) = @_;
+
+ my $task = PVE::Tools::upid_decode($upid);
+
+ my $running = PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart});
+
+ return 0 if !$running;
+
+ if ($killit) {
+ &$kill_process_group($task->{pid});
+ return 0;
+ }
+
+ return 1;
+}
+
# start long running workers
# STDIN is redirected to /dev/null
# STDOUT,STDERR are redirected to the filename returned by upid_decode
# same algorythm as used inside SA
# STDIN = /dev/null
my $fd = fileno (STDIN);
- close STDIN;
- POSIX::close(0) if $fd != 0;
- die "unable to redirect STDIN - $!"
- if !open(STDIN, "</dev/null");
+ if (!$sync) {
+ close STDIN;
+ POSIX::close(0) if $fd != 0;
+
+ die "unable to redirect STDIN - $!"
+ if !open(STDIN, "</dev/null");
+
+ $outfh = PVE::Tools::upid_open($upid);
+ }
- $outfh = PVE::Tools::upid_open($upid) if !$sync;
# redirect STDOUT
$fd = fileno(STDOUT);
POSIX::write($psync[1], $msg, length ($msg));
POSIX::close($psync[1]);
POSIX::_exit(1);
- kill('KILL', $$);
+ kill(-9, $$);
}
- # sync with parent (signal that we are read)
+ # sync with parent (signal that we are ready)
if ($sync) {
print "$upid\n";
} else {
POSIX::_exit(-1);
} else {
print STDERR "TASK OK\n";
- POSIX::_exit (0);
+ POSIX::_exit(0);
}
- kill('KILL', $$);
+ kill(-9, $$);
}
# parent
} else {
POSIX::close($csync[1]);
- kill (9, $cpid); # make sure it gets killed
+ kill(-9, $cpid); # make sure it gets killed
die $err;
}
PVE::Cluster::log_msg('info', $user, "starting task $upid");
- my $tlist = active_workers($upid);
+ my $tlist = active_workers($upid, $sync);
PVE::Cluster::broadcast_tasklist($tlist);
my $res = 0;
if ($sync) {
my $count;
my $outbuf = '';
+ my $int_count = 0;
eval {
- local $SIG{INT} =
- local $SIG{QUIT} =
- local $SIG{TERM} = sub { die "got interrupt\n"; };
+ local $SIG{INT} = local $SIG{QUIT} = local $SIG{TERM} = sub {
+ # always send signal to all pgrp members
+ my $kpid = -$cpid;
+ if ($int_count < 3) {
+ kill(15, $kpid); # send TERM signal
+ } else {
+ kill(9, $kpid); # send KILL signal
+ }
+ $int_count++;
+ };
local $SIG{PIPE} = sub { die "broken pipe\n"; };
- while (($count = POSIX::read($psync[0], $readbuf, 4096)) && ($count > 0)) {
+ while (1) {
+ if (!defined($count = POSIX::read($psync[0], $readbuf, 4096))) {
+ next if $! == EINTR;
+ last;
+ }
+ last if $count == 0; # eof
+
$outbuf .= $readbuf;
while ($outbuf =~ s/^(([^\010\r\n]*)(\r|\n|(\010)+|\r\n))//s) {
my $line = $1;
}
if ($outfh) {
print $outfh $line;
+ $outfh->flush();
}
}
}
if ($outfh) {
print $outfh "TASK ERROR: $err\n";
}
- kill (15, $cpid);
-
- } else {
- kill (9, $cpid); # make sure it gets killed
}
+ &$kill_process_group($cpid, $pstart); # make sure it gets killed
+
close($outfh);
- waitpid ($cpid, 0);
+ waitpid($cpid, 0);
$res = $?;
&$log_task_result($upid, $user, $res);
}