From b1bad293c4f7a6024bbd363b6784b3875ca5d098 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Fri, 13 Oct 2017 13:25:50 +0200 Subject: [PATCH] add vm_stop helper Since we use a post-stop hook to unmount all file systems at container shutdown rather than a stop hook (because at this point there are still multiple mount namespaces around), we need to wait for the lxc-start/monitor process to exit to be sure all the unmounting has succeeded, because it will put the container into a STOPPED state before executing the post-stop hook, making lxc-wait and lxc-stop signal success too early when waiting for the container to stop. Introduce a vm_stop() helper which calls lxc-stop and then waits for the command socket to close. Note that lxc-stop already has the "hard-stop-after-timeout" mechanic built in, so the 'forceStop' code path of the vm_stop api call removed here was not actually necessary. Technically we could pass --nokill for the behavior assumed there, but for now this patch should not be causing any actual behavior changes. --- src/PVE/API2/LXC/Status.pm | 27 ++------------------- src/PVE/LXC.pm | 49 +++++++++++++++++++++++++++++++++++++- src/PVE/LXC/Config.pm | 2 +- src/PVE/LXC/Migrate.pm | 7 +----- src/PVE/VZDump/LXC.pm | 5 +--- src/test/snapshot-test.pm | 17 ++++++++----- 6 files changed, 64 insertions(+), 43 deletions(-) diff --git a/src/PVE/API2/LXC/Status.pm b/src/PVE/API2/LXC/Status.pm index 2989e9b..39882e2 100644 --- a/src/PVE/API2/LXC/Status.pm +++ b/src/PVE/API2/LXC/Status.pm @@ -275,9 +275,7 @@ __PACKAGE__->register_method({ PVE::LXC::Config->check_lock($conf); } - my $cmd = ['lxc-stop', '-n', $vmid, '--kill']; - - run_command($cmd); + PVE::LXC::vm_stop($vmid, 1); return; }; @@ -364,34 +362,13 @@ __PACKAGE__->register_method({ syslog('info', "shutdown CT $vmid: $upid\n"); - my $cmd = ['lxc-stop', '-n', $vmid]; - $timeout = 60 if !defined($timeout); my $conf = PVE::LXC::Config->load_config($vmid); PVE::LXC::Config->check_lock($conf); - my $storage_cfg = PVE::Storage::config(); - - push @$cmd, '--timeout', $timeout; - - eval { run_command($cmd, timeout => $timeout+5); }; - my $err = $@; - if ($err && $param->{forceStop}) { - $err = undef; - warn "shutdown failed - forcing stop now\n"; - - my $cmd = ['lxc-stop', '-n', $vmid, '--kill']; - run_command($cmd); - } - - # make sure container is stopped - $cmd = ['lxc-wait', '-n', $vmid, '-t', 5, '-s', 'STOPPED']; - run_command($cmd); - $err = $@; - - die $err if $err; + PVE::LXC::vm_stop($vmid, $param->{forceStop}, $timeout); return; }; diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm index ad48392..ebe369d 100644 --- a/src/PVE/LXC.pm +++ b/src/PVE/LXC.pm @@ -11,7 +11,8 @@ use File::Path; use File::Spec; use Cwd qw(); use Fcntl qw(O_RDONLY O_NOFOLLOW O_DIRECTORY); -use Errno qw(ELOOP ENOTDIR EROFS); +use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED); +use IO::Socket::UNIX; use PVE::Exception qw(raise_perm_exc); use PVE::Storage; @@ -1509,5 +1510,51 @@ sub userns_command { return []; } +# Helper to stop a container completely and make sure it has stopped completely. +# This is necessary because we want the post-stop hook to have completed its +# unmount-all step, but post-stop happens after lxc puts the container into the +# STOPPED state. +sub vm_stop { + my ($vmid, $kill, $shutdown_timeout, $exit_timeout) = @_; + + # Open the container's command socket. + my $path = "\0/var/lib/lxc/$vmid/command"; + my $sock = IO::Socket::UNIX->new( + Type => SOCK_STREAM(), + Peer => $path, + ); + if (!$sock) { + return if $! == ECONNREFUSED; # The container is not running + die "failed to open container ${vmid}'s command socket: $!\n"; + } + + # Stop the container: + + my $cmd = ['lxc-stop', '-n', $vmid]; + + if ($kill) { + push @$cmd, '--kill'; # doesn't allow timeouts + } elsif (defined($shutdown_timeout)) { + push @$cmd, '--timeout', $shutdown_timeout; + # Give run_command 5 extra seconds + $shutdown_timeout += 5; + } + + eval { PVE::Tools::run_command($cmd, timeout => $shutdown_timeout) }; + if (my $err = $@) { + warn $@ if $@; + } + + my $result = 1; + my $wait = sub { $result = <$sock>; }; + if (defined($exit_timeout)) { + PVE::Tools::run_with_timeout($exit_timeout, $wait); + } else { + $wait->(); + } + + return if !defined $result; # monitor is gone and the ct has stopped. + die "container did not stop\n"; +} 1; diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm index c45ce7e..8d71f4a 100644 --- a/src/PVE/LXC/Config.pm +++ b/src/PVE/LXC/Config.pm @@ -164,7 +164,7 @@ sub __snapshot_rollback_vol_rollback { sub __snapshot_rollback_vm_stop { my ($class, $vmid) = @_; - PVE::Tools::run_command(['/usr/bin/lxc-stop', '-n', $vmid, '--kill']) + PVE::LXC::vm_stop($vmid, 1) if $class->__snapshot_check_running($vmid); } diff --git a/src/PVE/LXC/Migrate.pm b/src/PVE/LXC/Migrate.pm index 93446d7..df85ef7 100644 --- a/src/PVE/LXC/Migrate.pm +++ b/src/PVE/LXC/Migrate.pm @@ -105,12 +105,7 @@ sub prepare { $self->log('info', "shutdown CT $vmid\n"); - my $cmd = ['lxc-stop', '-n', $vmid, '--timeout', $timeout]; - $self->cmd($cmd, timeout => $timeout + 5); - - # make sure container is stopped - $cmd = ['lxc-wait', '-n', $vmid, '-t', 5, '-s', 'STOPPED']; - $self->cmd($cmd); + PVE::LXC::vm_stop($vmid, 0, $timeout); $running = 0; } diff --git a/src/PVE/VZDump/LXC.pm b/src/PVE/VZDump/LXC.pm index f9616e0..e58a00e 100644 --- a/src/PVE/VZDump/LXC.pm +++ b/src/PVE/VZDump/LXC.pm @@ -251,10 +251,7 @@ sub stop_vm { my $opts = $self->{vzdump}->{opts}; my $timeout = $opts->{stopwait} * 60; - $self->cmd("lxc-stop -n $vmid -t $timeout"); - - # make sure container is stopped - $self->cmd("lxc-wait -n $vmid -s STOPPED"); + PVE::LXC::vm_stop($vmid, 0, $timeout); } sub start_vm { diff --git a/src/test/snapshot-test.pm b/src/test/snapshot-test.pm index 6f0b209..796b6b4 100644 --- a/src/test/snapshot-test.pm +++ b/src/test/snapshot-test.pm @@ -112,6 +112,15 @@ sub mocked_volume_rollback_is_possible { die "volume_rollback_is_possible failed\n"; } +sub mocked_vm_stop { + if ($kill_possible) { + $running = 0; + return 1; + } else { + return 0; + } +} + sub mocked_run_command { my ($cmd, %param) = @_; my $cmdstring; @@ -122,12 +131,7 @@ sub mocked_run_command { die "lxc-[un]freeze disabled\n"; } if ($cmdstring =~ m/.*\/lxc-stop.*--kill.*/) { - if ($kill_possible) { - $running = 0; - return 1; - } else { - return 0; - } + mocked_vm_stop(); } } die "unexpected run_command call: '$cmdstring', aborting\n"; @@ -274,6 +278,7 @@ printf("Setting up Mocking for PVE::LXC and PVE::LXC::Config\n"); my $lxc_module = new Test::MockModule('PVE::LXC'); $lxc_module->mock('sync_container_namespace', sub { return; }); $lxc_module->mock('check_running', \&mocked_check_running); +$lxc_module->mock('vm_stop', \&mocked_vm_stop); my $lxc_config_module = new Test::MockModule('PVE::LXC::Config'); $lxc_config_module->mock('config_file_lock', sub { return "snapshot-working/pve-test.lock"; }); -- 2.39.2