From 35cbb764afaa1b48a8977a8ce4619cbd4901ea9c Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Wed, 24 Feb 2016 10:06:52 +0100 Subject: [PATCH] avoid out of sync command execution in LRM We are only allowed to execute any command once as else we may disturb the synchrony between CRM and LRM. The following scenario could happen: schedule CRM: deploy task 'migrate' for service vm:100 with UID 1234 schedule LRM: fork task wit UID 123 schedule CRM: idle as no result available yet schedule LRM: collect finished task UID and write result for CRM Result was an error schedule LRM: fork task UID _AGAIN_ schedule CRM: processes _previous_ erroneous result from LRM and place vm:100 in started state on source node schedule LRM: collect finished task UID and write result for CRM This time the task was successful and service is on target node, but the CRM know _nothing_ of it! schedule LRM: try to schedule task but service is not on this node! => error To fix that we _never_ execute two exactly same commands after each other, exactly means here that the UID of the actual command to queue is already a valid result. This enforces the originally wanted SYN - ACK behaviour between CRM and LRMs. We generate now a new UID for services who does not change state the one of the following evaluates to true: * enabled AND in started state This ensures that the state from the CRM holds in the LRM and thus, for example, a killed VM gets restarted. Note that the 'stopped' command is an exception, as we do not check its result in the CRM (thus no race here) and we always want to execute it (even when no CRM is active). --- src/PVE/HA/LRM.pm | 8 ++++++++ src/PVE/HA/Manager.pm | 14 +++++++++----- src/test/test-resource-failure5/log.expect | 2 -- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm index 7bbfe46..b2d7d37 100644 --- a/src/PVE/HA/LRM.pm +++ b/src/PVE/HA/LRM.pm @@ -455,6 +455,14 @@ sub manage_resources { sub queue_resource_command { my ($self, $sid, $uid, $state, $target) = @_; + # do not queue the excatly same command twice as this may lead to + # an inconsistent HA state when the first command fails but the CRM + # does not process its failure right away and the LRM starts a second + # try, without the CRM knowing of it (race condition) + # The 'stopped' command is an exception as we do not process its result + # in the CRM and we want to execute it always (even with no active CRM) + return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid}); + if (my $w = $self->{workers}->{$sid}) { return if $w->{pid}; # already started # else, delete and overwrite queue entry with new command diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 32b0ad7..1d11084 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -517,12 +517,14 @@ sub next_state_stopped { } else { $haenv->log('err', "unknown command '$cmd' for service '$sid'"); } - } + } if ($cd->{state} eq 'disabled') { - # do nothing + # NOTE: do nothing here, the stop state is an exception as we do not + # process the LRM result here, thus the LRM always tries to stop the + # service (protection for the case no CRM is active) return; - } + } if ($cd->{state} eq 'enabled') { # simply mark it started, if it's on the wrong node @@ -608,6 +610,7 @@ sub next_state_started { " (exit code $ec))"); # we have no save way out (yet) for other errors &$change_service_state($self, $sid, 'error'); + return; } } @@ -623,12 +626,13 @@ sub next_state_started { &$change_service_state($self, $sid, 'relocate', node => $sd->{node}, target => $node); } } else { - # do nothing + # ensure service get started again if it went unexpected down + $sd->{uid} = compute_new_uuid($sd->{state}); } } return; - } + } $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration"); } diff --git a/src/test/test-resource-failure5/log.expect b/src/test/test-resource-failure5/log.expect index b6e7807..283ca8c 100644 --- a/src/test/test-resource-failure5/log.expect +++ b/src/test/test-resource-failure5/log.expect @@ -31,8 +31,6 @@ err 143 node2/lrm: unable to start service fa:130 on local node after 1 r err 160 node1/crm: recovery policy for service fa:130 failed, entering error state! info 160 node1/crm: service 'fa:130': state changed from 'started' to 'error' warn 163 node2/lrm: service fa:130 is not running and in an error state -warn 183 node2/lrm: service fa:130 is not running and in an error state -warn 203 node2/lrm: service fa:130 is not running and in an error state info 220 cmdlist: execute service fa:130 disabled info 220 node1/crm: service 'fa:130': state changed from 'error' to 'stopped' info 820 hardware: exit simulation - done -- 2.39.2