next if $req_state eq 'freeze';
# erroneous services are not managed by HA, don't count them as active
next if $req_state eq 'error';
+ # request_start is for (optional) better node selection for stop -> started transition
+ next if $req_state eq 'request_start';
$count++;
}
# higher try-count means higher priority especially compared to newly queued jobs, so
# count every try to avoid starvation
$w->{start_tries}++;
+ # FIXME: should be last and ensure that check_active_workers is called sooner
next if $count >= $max_workers && $max_workers > 0;
# only fork if we may, else call exec_resource_agent directly (e.g. for tests)
# we can just continue normally. But we must NOT do anything with it while still in recovery
next if $request_state eq 'recovery';
next if $request_state eq 'freeze';
+ # intermediate step for optional better node selection on stop -> start request state change
+ next if $request_state eq 'request_start';
$self->queue_resource_command($sid, $sd->{uid}, $request_state, {
'target' => $sd->{target},
my $valid_service_states = {
stopped => 1,
request_stop => 1,
+ request_start => 1,
started => 1,
fence => 1,
recovery => 1,
my $target = $sd->{target}; # optional
if ($online_node_usage->contains_node($sd->{node})) {
if (
- $state eq 'started' || $state eq 'request_stop' || $state eq 'fence' ||
- $state eq 'freeze' || $state eq 'error' || $state eq 'recovery'
+ $state eq 'started' || $state eq 'request_stop' || $state eq 'fence'
+ || $state eq 'freeze' || $state eq 'error' || $state eq 'recovery'
) {
$online_node_usage->add_service_usage_to_node($sd->{node}, $sid, $sd->{node});
} elsif (($state eq 'migrate') || ($state eq 'relocate')) {
# count it for both, source and target as load is put on both
$online_node_usage->add_service_usage_to_node($source, $sid, $source, $target);
$online_node_usage->add_service_usage_to_node($target, $sid, $source, $target);
- } elsif ($state eq 'stopped') {
+ } elsif ($state eq 'stopped' || $state eq 'request_start') {
# do nothing
} else {
die "should not be reached (sid = '$sid', state = '$state')";
$self->next_state_started($sid, $cd, $sd, $lrm_res);
+ } elsif ($last_state eq 'request_start') {
+
+ $self->next_state_request_start($sid, $cd, $sd, $lrm_res);
+
} elsif ($last_state eq 'migrate' || $last_state eq 'relocate') {
$self->next_state_migrate_relocate($sid, $cd, $sd, $lrm_res);
} elsif ($sd->{node} eq $target) {
$haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
} else {
- $change_service_state->($self, $sid, $cmd, node => $sd->{node}, target => $target);
+ &$change_service_state($self, $sid, $cmd, node => $sd->{node}, target => $target);
return;
}
} elsif ($cmd eq 'stop') {
}
if ($cd->{state} eq 'started') {
- # simply mark it started, if it's on the wrong node
- # next_state_started will fix that for us
- &$change_service_state($self, $sid, 'started', node => $sd->{node});
+ # simply mark it started, if it's on the wrong node next_state_started will fix that for us
+ $change_service_state->($self, $sid, 'request_start', node => $sd->{node});
return;
}
$haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
}
+sub next_state_request_start {
+ my ($self, $sid, $cd, $sd, $lrm_res) = @_;
+
+ $change_service_state->($self, $sid, 'started', node => $sd->{node});
+}
+
sub record_service_failed_on_node {
my ($self, $sid, $node) = @_;
info 24 node3/crm: status change wait_for_quorum => slave
info 40 node1/crm: service 'fa:130': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:130 started
-info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'started' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'request_start' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'request_start' to 'started' (node = node2)
info 123 node2/lrm: starting service fa:130
warn 123 node2/lrm: unable to start service fa:130
err 123 node2/lrm: unable to start service fa:130 on local node after 0 retries
info 25 node3/lrm: service status vm:105 started
info 40 node1/crm: service 'fa:130': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:130 started
-info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'started' (node = node3)
+info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'request_start' (node = node3)
+info 120 node1/crm: service 'fa:130': state changed from 'request_start' to 'started' (node = node3)
info 125 node3/lrm: starting service fa:130
warn 125 node3/lrm: unable to start service fa:130
err 125 node3/lrm: unable to start service fa:130 on local node after 0 retries
info 24 node3/crm: status change wait_for_quorum => slave
info 40 node1/crm: service 'fa:110': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:110 started
-info 120 node1/crm: service 'fa:110': state changed from 'stopped' to 'started' (node = node2)
+info 120 node1/crm: service 'fa:110': state changed from 'stopped' to 'request_start' (node = node2)
+info 120 node1/crm: service 'fa:110': state changed from 'request_start' to 'started' (node = node2)
info 123 node2/lrm: starting service fa:110
warn 123 node2/lrm: unable to start service fa:110
warn 123 node2/lrm: restart policy: retry number 1 for service 'fa:110'
info 24 node3/crm: status change wait_for_quorum => slave
info 40 node1/crm: service 'fa:130': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:130 started
-info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'started' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'request_start' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'request_start' to 'started' (node = node2)
info 123 node2/lrm: starting service fa:130
warn 123 node2/lrm: unable to start service fa:130
warn 123 node2/lrm: restart policy: retry number 1 for service 'fa:130'
info 24 node3/crm: status change wait_for_quorum => slave
info 40 node1/crm: service 'fa:130': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:130 started
-info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'started' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'request_start' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'request_start' to 'started' (node = node2)
info 123 node2/lrm: starting service fa:130
warn 123 node2/lrm: unable to start service fa:130
warn 123 node2/lrm: restart policy: retry number 1 for service 'fa:130'
info 24 node3/crm: status change wait_for_quorum => slave
info 40 node1/crm: service 'fa:130': state changed from 'request_stop' to 'stopped'
info 120 cmdlist: execute service fa:130 started
-info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'started' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'stopped' to 'request_start' (node = node2)
+info 120 node1/crm: service 'fa:130': state changed from 'request_start' to 'started' (node = node2)
info 123 node2/lrm: starting service fa:130
warn 123 node2/lrm: unable to start service fa:130
err 123 node2/lrm: unable to start service fa:130 on local node after 0 retries
info 125 node3/lrm: service status vm:103 stopped
info 140 node1/crm: service 'vm:103': state changed from 'request_stop' to 'stopped'
info 220 cmdlist: execute service vm:103 started
-info 220 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node3)
+info 220 node1/crm: service 'vm:103': state changed from 'stopped' to 'request_start' (node = node3)
+info 220 node1/crm: service 'vm:103': state changed from 'request_start' to 'started' (node = node3)
info 225 node3/lrm: starting service vm:103
info 225 node3/lrm: service status vm:103 started
info 820 hardware: exit simulation - done
info 145 node3/lrm: status change wait_for_agent_lock => active
info 160 node1/crm: service 'vm:103': state changed from 'request_stop' to 'stopped'
info 220 cmdlist: execute service vm:103 started
-info 220 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node3)
+info 220 node1/crm: service 'vm:103': state changed from 'stopped' to 'request_start' (node = node3)
+info 220 node1/crm: service 'vm:103': state changed from 'request_start' to 'started' (node = node3)
info 225 node3/lrm: starting service vm:103
info 225 node3/lrm: service status vm:103 started
info 820 hardware: exit simulation - done
info 165 node3/crm: exit (loop end)
info 165 shutdown: execute power node3 off
info 220 cmdlist: execute service ct:105 started
-info 220 node1/crm: service 'ct:105': state changed from 'stopped' to 'started' (node = node3)
+info 220 node1/crm: service 'ct:105': state changed from 'stopped' to 'request_start' (node = node3)
+info 220 node1/crm: service 'ct:105': state changed from 'request_start' to 'started' (node = node3)
info 220 node1/crm: service 'ct:105': state changed from 'started' to 'fence'
info 220 node1/crm: relocate service 'ct:105' to node 'node1'
info 220 node1/crm: service 'ct:105': state changed from 'fence' to 'relocate' (node = node3, target = node1)