]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Manager.pm
manager: better convey that basic is always the fallback
[pve-ha-manager.git] / src / PVE / HA / Manager.pm
CommitLineData
c0bbd038
DM
1package PVE::HA::Manager;
2
3use strict;
4use warnings;
c4a221bc 5use Digest::MD5 qw(md5_base64);
c0bbd038 6
c142ebc9 7use PVE::Tools;
a89ff919 8use PVE::HA::Tools ':exit_codes';
c0bbd038 9use PVE::HA::NodeStatus;
5d724d4d 10use PVE::HA::Usage::Basic;
561e7f4b 11use PVE::HA::Usage::Static;
c0bbd038 12
a3ffb0b3
TL
13## Variable Name & Abbreviations Convention
14#
15# The HA stack has some variables it uses frequently and thus abbreviates it such that it may be
16# confusing for new readers. Here's a short list of the most common used.
17#
18# NOTE: variables should be assumed to be read only if not otherwise stated, only use the specific
19# methods to re-compute/read/alter them.
20#
21# - $haenv -> HA environment, the main interface to the simulator/test/real world
22# - $sid -> Service ID, unique identifier for a service, `type:vmid` is common
23#
24# - $ms -> Master/Manager Status, contains runtime info from the current active manager
25# - $ns -> Node Status, hash holding online/offline status about all nodes
26#
27# - $ss -> Service Status, hash holding the current state (last LRM cmd result, failed starts
28# or migrates, maintenance fallback node, for *all* services ...
29# - $sd -> Service Data, the service status of a *single* service, iow. $ss->{$sid}
30#
31# - $sc -> Service Configuration, hash for all services including target state, group, ...
0869c306 32# - $cd -> Configuration Data, the service config of a *single* service, iow. $sc->{$sid}
a3ffb0b3
TL
33#
34# Try to avoid adding new two letter (or similar over abbreviated) names, but also don't send
35# patches for changing above, as that set is mostly sensible and should be easy to remember once
36# spending a bit time in the HA code base.
37
c0bbd038 38sub new {
8f0bb968 39 my ($this, $haenv) = @_;
c0bbd038
DM
40
41 my $class = ref($this) || $this;
42
6ee64cfc 43 my $self = bless { haenv => $haenv }, $class;
8f0bb968 44
6ee64cfc 45 my $old_ms = $haenv->read_manager_status();
8f0bb968 46
6ee64cfc
TL
47 # we only copy the state part of the manager which cannot be auto generated
48
49 $self->{ns} = PVE::HA::NodeStatus->new($haenv, $old_ms->{node_status} || {});
8f0bb968 50
59fd7207 51 # fixme: use separate class PVE::HA::ServiceStatus
6ee64cfc
TL
52 $self->{ss} = $old_ms->{service_status} || {};
53
54 $self->{ms} = { master_node => $haenv->nodename() };
c0bbd038 55
f74f8ffb
FE
56 my $dc_cfg = $haenv->get_datacenter_settings();
57 $self->{'scheduler-mode'} = $dc_cfg->{crs}->{ha} ? $dc_cfg->{crs}->{ha} : 'basic';
58 $haenv->log('info', "using scheduler mode '$self->{'scheduler-mode'}'")
59 if $self->{'scheduler-mode'} ne 'basic';
60
c0bbd038
DM
61 return $self;
62}
63
d84da043
DM
64sub cleanup {
65 my ($self) = @_;
66
67 # todo: ?
68}
69
8f0bb968 70sub flush_master_status {
c0bbd038
DM
71 my ($self) = @_;
72
59fd7207 73 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
c0bbd038 74
8f0bb968 75 $ms->{node_status} = $ns->{status};
59fd7207 76 $ms->{service_status} = $ss;
d2f612cf 77 $ms->{timestamp} = $haenv->get_time();
289e4784 78
8f0bb968 79 $haenv->write_manager_status($ms);
289e4784 80}
c0bbd038 81
48a6ba2a
TL
82sub get_service_group {
83 my ($groups, $online_node_usage, $service_conf) = @_;
f7ccd1b3 84
09c5c4bf
TL
85 my $group = {};
86 # add all online nodes to default group to allow try_next when no group set
5d724d4d 87 $group->{nodes}->{$_} = 1 for $online_node_usage->list_nodes();
abc920b4 88
09c5c4bf 89 # overwrite default if service is bound to a specific group
3458a0e3
TL
90 if (my $group_id = $service_conf->{group}) {
91 $group = $groups->{ids}->{$group_id} if $groups->{ids}->{$group_id};
92 }
abc920b4 93
48a6ba2a
TL
94 return $group;
95}
96
97# groups available nodes with their priority as group index
98sub get_node_priority_groups {
99 my ($group, $online_node_usage) = @_;
100
abc920b4
DM
101 my $pri_groups = {};
102 my $group_members = {};
e0a56314 103 foreach my $entry (keys %{$group->{nodes}}) {
abc920b4
DM
104 my ($node, $pri) = ($entry, 0);
105 if ($entry =~ m/^(\S+):(\d+)$/) {
106 ($node, $pri) = ($1, $2);
107 }
5d724d4d 108 next if !$online_node_usage->contains_node($node); # offline
abc920b4
DM
109 $pri_groups->{$pri}->{$node} = 1;
110 $group_members->{$node} = $pri;
111 }
f7ccd1b3 112
abc920b4
DM
113 # add non-group members to unrestricted groups (priority -1)
114 if (!$group->{restricted}) {
115 my $pri = -1;
5d724d4d 116 for my $node ($online_node_usage->list_nodes()) {
abc920b4
DM
117 next if defined($group_members->{$node});
118 $pri_groups->{$pri}->{$node} = 1;
119 $group_members->{$node} = -1;
120 }
121 }
122
48a6ba2a
TL
123 return ($pri_groups, $group_members);
124}
125
126sub select_service_node {
b2598576 127 my ($groups, $online_node_usage, $sid, $service_conf, $current_node, $try_next, $tried_nodes, $maintenance_fallback) = @_;
48a6ba2a
TL
128
129 my $group = get_service_group($groups, $online_node_usage, $service_conf);
130
131 my ($pri_groups, $group_members) = get_node_priority_groups($group, $online_node_usage);
132
abc920b4
DM
133 my @pri_list = sort {$b <=> $a} keys %$pri_groups;
134 return undef if !scalar(@pri_list);
09c5c4bf
TL
135
136 # stay on current node if possible (avoids random migrations)
abc920b4
DM
137 if (!$try_next && $group->{nofailback} && defined($group_members->{$current_node})) {
138 return $current_node;
139 }
140
141 # select node from top priority node list
142
143 my $top_pri = $pri_list[0];
144
e6eeb7dc
TL
145 # try to avoid nodes where the service failed already if we want to relocate
146 if ($try_next) {
147 foreach my $node (@$tried_nodes) {
148 delete $pri_groups->{$top_pri}->{$node};
149 }
150 }
151
631ba60e
FE
152 return $maintenance_fallback
153 if defined($maintenance_fallback) && $pri_groups->{$top_pri}->{$maintenance_fallback};
154
c724ce1b
FE
155 return $current_node if !$try_next && $pri_groups->{$top_pri}->{$current_node};
156
5d724d4d 157 my $scores = $online_node_usage->score_nodes_to_start_service($sid, $current_node);
289e4784 158 my @nodes = sort {
5d724d4d 159 $scores->{$a} <=> $scores->{$b} || $a cmp $b
c142ebc9 160 } keys %{$pri_groups->{$top_pri}};
abc920b4
DM
161
162 my $found;
163 for (my $i = scalar(@nodes) - 1; $i >= 0; $i--) {
164 my $node = $nodes[$i];
165 if ($node eq $current_node) {
166 $found = $i;
abc920b4
DM
167 }
168 }
169
abc920b4 170 if ($try_next) {
abc920b4
DM
171 if (defined($found) && ($found < (scalar(@nodes) - 1))) {
172 return $nodes[$found + 1];
173 } else {
174 return $nodes[0];
175 }
abc920b4 176 } else {
abc920b4 177 return $nodes[0];
abc920b4 178 }
f7ccd1b3
DM
179}
180
c4a221bc
DM
181my $uid_counter = 0;
182
d55aa611
DM
183sub compute_new_uuid {
184 my ($state) = @_;
289e4784 185
d55aa611
DM
186 $uid_counter++;
187 return md5_base64($state . $$ . time() . $uid_counter);
188}
189
618fbeda
DM
190my $valid_service_states = {
191 stopped => 1,
192 request_stop => 1,
193 started => 1,
194 fence => 1,
c259b1a8 195 recovery => 1,
618fbeda 196 migrate => 1,
b0fdf86a 197 relocate => 1,
9c7d068b 198 freeze => 1,
618fbeda
DM
199 error => 1,
200};
201
561e7f4b
FE
202# FIXME with 'static' mode and thousands of services, the overhead can be noticable and the fact
203# that this function is called for each state change and upon recovery doesn't help.
270d4406
DM
204sub recompute_online_node_usage {
205 my ($self) = @_;
206
561e7f4b 207 my $haenv = $self->{haenv};
270d4406
DM
208
209 my $online_nodes = $self->{ns}->list_online_nodes();
210
561e7f4b
FE
211 my $online_node_usage;
212
213 if (my $mode = $self->{'scheduler-mode'}) {
214 if ($mode eq 'static') {
215 $online_node_usage = eval {
216 my $scheduler = PVE::HA::Usage::Static->new($haenv);
217 $scheduler->add_node($_) for $online_nodes->@*;
218 return $scheduler;
219 };
220 $haenv->log('warning', "using 'basic' scheduler mode, init for 'static' failed - $@")
221 if $@;
c2d8b56a
TL
222 } elsif ($mode eq 'basic') {
223 # handled below in the general fall-back case
224 } else {
561e7f4b
FE
225 $haenv->log('warning', "got unknown scheduler mode '$mode', using 'basic'");
226 }
227 }
228
c2d8b56a 229 # fallback to the basic algorithm in any case
561e7f4b
FE
230 if (!$online_node_usage) {
231 $online_node_usage = PVE::HA::Usage::Basic->new($haenv);
232 $online_node_usage->add_node($_) for $online_nodes->@*;
233 }
270d4406
DM
234
235 foreach my $sid (keys %{$self->{ss}}) {
236 my $sd = $self->{ss}->{$sid};
237 my $state = $sd->{state};
6f818da1 238 my $target = $sd->{target}; # optional
5d724d4d 239 if ($online_node_usage->contains_node($sd->{node})) {
c259b1a8
TL
240 if (
241 $state eq 'started' || $state eq 'request_stop' || $state eq 'fence' ||
242 $state eq 'freeze' || $state eq 'error' || $state eq 'recovery'
243 ) {
5d724d4d 244 $online_node_usage->add_service_usage_to_node($sd->{node}, $sid, $sd->{node});
270d4406 245 } elsif (($state eq 'migrate') || ($state eq 'relocate')) {
5d724d4d 246 my $source = $sd->{node};
5c2eef4b 247 # count it for both, source and target as load is put on both
5d724d4d
FE
248 $online_node_usage->add_service_usage_to_node($source, $sid, $source, $target);
249 $online_node_usage->add_service_usage_to_node($target, $sid, $source, $target);
270d4406
DM
250 } elsif ($state eq 'stopped') {
251 # do nothing
252 } else {
feea3913 253 die "should not be reached (sid = '$sid', state = '$state')";
270d4406 254 }
5d724d4d 255 } elsif (defined($target) && $online_node_usage->contains_node($target)) {
066fd016
TL
256 if ($state eq 'migrate' || $state eq 'relocate') {
257 # to correctly track maintenance modi and also consider the target as used for the
258 # case a node dies, as we cannot really know if the to-be-aborted incoming migration
259 # has already cleaned up all used resources
5d724d4d 260 $online_node_usage->add_service_usage_to_node($target, $sid, $sd->{node}, $target);
066fd016 261 }
270d4406
DM
262 }
263 }
264
265 $self->{online_node_usage} = $online_node_usage;
266}
267
4e01bc86
DM
268my $change_service_state = sub {
269 my ($self, $sid, $new_state, %params) = @_;
270
271 my ($haenv, $ss) = ($self->{haenv}, $self->{ss});
272
273 my $sd = $ss->{$sid} || die "no such service '$sid";
274
275 my $old_state = $sd->{state};
e4ffb299 276 my $old_node = $sd->{node};
46139211 277 my $old_failed_nodes = $sd->{failed_nodes};
2167dd1e 278 my $old_maintenance_node = $sd->{maintenance_node};
4e01bc86
DM
279
280 die "no state change" if $old_state eq $new_state; # just to be sure
281
618fbeda
DM
282 die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state};
283
e4ffb299
DM
284 foreach my $k (keys %$sd) { delete $sd->{$k}; };
285
286 $sd->{state} = $new_state;
287 $sd->{node} = $old_node;
ea998b07 288 $sd->{failed_nodes} = $old_failed_nodes if defined($old_failed_nodes);
2167dd1e 289 $sd->{maintenance_node} = $old_maintenance_node if defined($old_maintenance_node);
e4ffb299
DM
290
291 my $text_state = '';
ba623362 292 foreach my $k (sort keys %params) {
4e01bc86 293 my $v = $params{$k};
e4ffb299
DM
294 $text_state .= ", " if $text_state;
295 $text_state .= "$k = $v";
4e01bc86
DM
296 $sd->{$k} = $v;
297 }
270d4406
DM
298
299 $self->recompute_online_node_usage();
300
d55aa611 301 $sd->{uid} = compute_new_uuid($new_state);
4e01bc86 302
24678a59
TL
303 $text_state = " ($text_state)" if $text_state;
304 $haenv->log('info', "service '$sid': state changed from '${old_state}'" .
305 " to '${new_state}'$text_state");
4e01bc86
DM
306};
307
5dd3ed86
TL
308# clean up a possible bad state from a recovered service to allow its start
309my $fence_recovery_cleanup = sub {
310 my ($self, $sid, $fenced_node) = @_;
311
312 my $haenv = $self->{haenv};
313
0087839a 314 my (undef, $type, $id) = $haenv->parse_sid($sid);
5dd3ed86
TL
315 my $plugin = PVE::HA::Resources->lookup($type);
316
317 # should not happen
318 die "unknown resource type '$type'" if !$plugin;
319
32ea51dd
TL
320 # locks may block recovery, cleanup those which are safe to remove after fencing,
321 # i.e., after the original node was reset and thus all it's state
3458a0e3
TL
322 my $removable_locks = [
323 'backup',
324 'mounted',
325 'migrate',
326 'clone',
327 'rollback',
328 'snapshot',
329 'snapshot-delete',
330 'suspending',
331 'suspended',
332 ];
5dd3ed86
TL
333 if (my $removed_lock = $plugin->remove_locks($haenv, $id, $removable_locks, $fenced_node)) {
334 $haenv->log('warning', "removed leftover lock '$removed_lock' from recovered " .
335 "service '$sid' to allow its start.");
336 }
337};
338
289e4784 339# read LRM status for all nodes
c4a221bc 340sub read_lrm_status {
332170bd 341 my ($self) = @_;
c4a221bc 342
9c7d068b 343 my $nodes = $self->{ns}->list_nodes();
c4a221bc
DM
344 my $haenv = $self->{haenv};
345
9c7d068b
DM
346 my $results = {};
347 my $modes = {};
332170bd 348 foreach my $node (@$nodes) {
9c7d068b 349 my $lrm_status = $haenv->read_lrm_status($node);
02ffd753 350 $modes->{$node} = $lrm_status->{mode} || 'active';
9c7d068b
DM
351 foreach my $uid (keys %{$lrm_status->{results}}) {
352 next if $results->{$uid}; # should not happen
353 $results->{$uid} = $lrm_status->{results}->{$uid};
c4a221bc
DM
354 }
355 }
356
9c7d068b 357 return ($results, $modes);
c4a221bc
DM
358}
359
aa98a844
DM
360# read new crm commands and save them into crm master status
361sub update_crm_commands {
362 my ($self) = @_;
363
364 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
365
366 my $cmdlist = $haenv->read_crm_commands();
bf7febe3 367
aa98a844
DM
368 foreach my $cmd (split(/\n/, $cmdlist)) {
369 chomp $cmd;
370
b0fdf86a 371 if ($cmd =~ m/^(migrate|relocate)\s+(\S+)\s+(\S+)$/) {
289e4784 372 my ($task, $sid, $node) = ($1, $2, $3);
aa98a844
DM
373 if (my $sd = $ss->{$sid}) {
374 if (!$ns->node_is_online($node)) {
375 $haenv->log('err', "crm command error - node not online: $cmd");
376 } else {
377 if ($node eq $sd->{node}) {
378 $haenv->log('info', "ignore crm command - service already on target node: $cmd");
289e4784 379 } else {
aa98a844 380 $haenv->log('info', "got crm command: $cmd");
3d42b01b 381 $ss->{$sid}->{cmd} = [ $task, $node ];
aa98a844
DM
382 }
383 }
384 } else {
385 $haenv->log('err', "crm command error - no such service: $cmd");
386 }
387
21caf0db
FE
388 } elsif ($cmd =~ m/^stop\s+(\S+)\s+(\S+)$/) {
389 my ($sid, $timeout) = ($1, $2);
390 if (my $sd = $ss->{$sid}) {
391 $haenv->log('info', "got crm command: $cmd");
392 $ss->{$sid}->{cmd} = [ 'stop', $timeout ];
393 } else {
394 $haenv->log('err', "crm command error - no such service: $cmd");
395 }
aa98a844
DM
396 } else {
397 $haenv->log('err', "unable to parse crm command: $cmd");
398 }
399 }
400
401}
402
8f0bb968
DM
403sub manage {
404 my ($self) = @_;
c0bbd038 405
59fd7207 406 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
c0bbd038 407
99278e06
TL
408 my ($node_info) = $haenv->get_node_info();
409 my ($lrm_results, $lrm_modes) = $self->read_lrm_status();
410
411 $ns->update($node_info, $lrm_modes);
c79442f2 412
99278e06 413 if (!$ns->node_is_operational($haenv->nodename())) {
e5986717 414 $haenv->log('info', "master seems offline");
c79442f2
DM
415 return;
416 }
417
f7ccd1b3
DM
418 my $sc = $haenv->read_service_config();
419
abc920b4
DM
420 $self->{groups} = $haenv->read_group_config(); # update
421
f7ccd1b3
DM
422 # compute new service status
423
424 # add new service
cc32a8f3 425 foreach my $sid (sort keys %$sc) {
f7ccd1b3 426 next if $ss->{$sid}; # already there
77499288 427 my $cd = $sc->{$sid};
667670b2
TL
428 next if $cd->{state} eq 'ignored';
429
77499288 430 $haenv->log('info', "adding new service '$sid' on node '$cd->{node}'");
f7ccd1b3 431 # assume we are running to avoid relocate running service at add
bb07bd2c 432 my $state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
77499288 433 $ss->{$sid} = { state => $state, node => $cd->{node},
d55aa611 434 uid => compute_new_uuid('started') };
f7ccd1b3
DM
435 }
436
667670b2 437 # remove stale or ignored services from manager state
4e5764af 438 foreach my $sid (keys %$ss) {
667670b2
TL
439 next if $sc->{$sid} && $sc->{$sid}->{state} ne 'ignored';
440
441 my $reason = defined($sc->{$sid}) ? 'ignored state requested' : 'no config';
442 $haenv->log('info', "removing stale service '$sid' ($reason)");
443
46139211 444 # remove all service related state information
4e5764af
DM
445 delete $ss->{$sid};
446 }
5a28da91 447
aa98a844
DM
448 $self->update_crm_commands();
449
c79442f2
DM
450 for (;;) {
451 my $repeat = 0;
289e4784 452
270d4406 453 $self->recompute_online_node_usage();
f7ccd1b3 454
a5e4bef4 455 foreach my $sid (sort keys %$ss) {
c79442f2
DM
456 my $sd = $ss->{$sid};
457 my $cd = $sc->{$sid} || { state => 'disabled' };
f7ccd1b3 458
9c7d068b 459 my $lrm_res = $sd->{uid} ? $lrm_results->{$sd->{uid}} : undef;
a875fbe8 460
c79442f2
DM
461 my $last_state = $sd->{state};
462
463 if ($last_state eq 'stopped') {
464
abc920b4 465 $self->next_state_stopped($sid, $cd, $sd, $lrm_res);
f7ccd1b3 466
c79442f2 467 } elsif ($last_state eq 'started') {
f7ccd1b3 468
abc920b4 469 $self->next_state_started($sid, $cd, $sd, $lrm_res);
f7ccd1b3 470
b0fdf86a 471 } elsif ($last_state eq 'migrate' || $last_state eq 'relocate') {
f7ccd1b3 472
8aaa0e36 473 $self->next_state_migrate_relocate($sid, $cd, $sd, $lrm_res);
f7ccd1b3 474
c79442f2 475 } elsif ($last_state eq 'fence') {
f7ccd1b3 476
21e37ed4 477 # do nothing here - wait until fenced
f7ccd1b3 478
c259b1a8
TL
479 } elsif ($last_state eq 'recovery') {
480
481 $self->next_state_recovery($sid, $cd, $sd, $lrm_res);
482
c79442f2 483 } elsif ($last_state eq 'request_stop') {
f7ccd1b3 484
0df5b3dd 485 $self->next_state_request_stop($sid, $cd, $sd, $lrm_res);
618fbeda 486
9c7d068b
DM
487 } elsif ($last_state eq 'freeze') {
488
489 my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
9c7d068b 490 # unfreeze
bb07bd2c 491 my $state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
af14d5f3 492 &$change_service_state($self, $sid, $state)
02ffd753 493 if $lrm_mode && $lrm_mode eq 'active';
9c7d068b 494
e88469ba
DM
495 } elsif ($last_state eq 'error') {
496
a2881965 497 $self->next_state_error($sid, $cd, $sd, $lrm_res);
e88469ba 498
a875fbe8
DM
499 } else {
500
501 die "unknown service state '$last_state'";
618fbeda 502 }
21e37ed4 503
9c7d068b 504 my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
07adc6a6
DM
505 if ($lrm_mode && $lrm_mode eq 'restart') {
506 if (($sd->{state} eq 'started' || $sd->{state} eq 'stopped' ||
507 $sd->{state} eq 'request_stop')) {
508 &$change_service_state($self, $sid, 'freeze');
509 }
9c7d068b 510 }
07adc6a6 511
c79442f2 512 $repeat = 1 if $sd->{state} ne $last_state;
f7ccd1b3
DM
513 }
514
21e37ed4
DM
515 # handle fencing
516 my $fenced_nodes = {};
9b2dbc2a 517 foreach my $sid (sort keys %$ss) {
2deff1ae
TL
518 my ($service_state, $service_node) = $ss->{$sid}->@{'state', 'node'};
519 next if $service_state ne 'fence';
0dcb6597
TL
520
521 if (!defined($fenced_nodes->{$service_node})) {
2deff1ae 522 $fenced_nodes->{$service_node} = $ns->fence_node($service_node) || 0;
21e37ed4
DM
523 }
524
0dcb6597 525 next if !$fenced_nodes->{$service_node};
21e37ed4 526
9da84a0d 527 # node fence was successful - recover service
c259b1a8 528 $change_service_state->($self, $sid, 'recovery');
0dcb6597 529 $repeat = 1; # for faster recovery execution
21e37ed4
DM
530 }
531
2deff1ae
TL
532 # Avoid that a node without services in 'fence' state (e.g., removed
533 # manually by admin) is stuck with the 'fence' node state.
534 for my $node (sort grep { !defined($fenced_nodes->{$_}) } keys $ns->{status}->%*) {
7dc92703 535 next if $ns->get_node_state($node) ne 'fence';
7dc92703 536
2deff1ae
TL
537 $haenv->log('notice', "node '$node' in fence state but no services to-fence! admin interference?!");
538 $repeat = 1 if $ns->fence_node($node);
7dc92703
FE
539 }
540
c79442f2 541 last if !$repeat;
f7ccd1b3 542 }
f7ccd1b3 543
8f0bb968 544 $self->flush_master_status();
c0bbd038
DM
545}
546
a875fbe8
DM
547# functions to compute next service states
548# $cd: service configuration data (read only)
549# $sd: service status data (read only)
550#
551# Note: use change_service_state() to alter state
552#
553
0df5b3dd
DM
554sub next_state_request_stop {
555 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
556
557 my $haenv = $self->{haenv};
558 my $ns = $self->{ns};
559
560 # check result from LRM daemon
561 if ($lrm_res) {
562 my $exit_code = $lrm_res->{exit_code};
a89ff919 563 if ($exit_code == SUCCESS) {
0df5b3dd
DM
564 &$change_service_state($self, $sid, 'stopped');
565 return;
566 } else {
33f01524 567 $haenv->log('err', "service '$sid' stop failed (exit code $exit_code)");
0df5b3dd
DM
568 &$change_service_state($self, $sid, 'error'); # fixme: what state?
569 return;
570 }
571 }
572
ce3d7003 573 if ($ns->node_is_offline_delayed($sd->{node})) {
0df5b3dd
DM
574 &$change_service_state($self, $sid, 'fence');
575 return;
576 }
577}
578
8aaa0e36
DM
579sub next_state_migrate_relocate {
580 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
581
582 my $haenv = $self->{haenv};
583 my $ns = $self->{ns};
584
585 # check result from LRM daemon
586 if ($lrm_res) {
587 my $exit_code = $lrm_res->{exit_code};
bb07bd2c 588 my $req_state = $cd->{state} eq 'started' ? 'started' : 'request_stop';
a89ff919 589 if ($exit_code == SUCCESS) {
542a9902 590 &$change_service_state($self, $sid, $req_state, node => $sd->{target});
8aaa0e36 591 return;
660596ce
TL
592 } elsif ($exit_code == EWRONG_NODE) {
593 $haenv->log('err', "service '$sid' - migration failed: service" .
594 " registered on wrong node!");
595 &$change_service_state($self, $sid, 'error');
8aaa0e36
DM
596 } else {
597 $haenv->log('err', "service '$sid' - migration failed (exit code $exit_code)");
542a9902 598 &$change_service_state($self, $sid, $req_state, node => $sd->{node});
8aaa0e36
DM
599 return;
600 }
601 }
602
ce3d7003 603 if ($ns->node_is_offline_delayed($sd->{node})) {
8aaa0e36
DM
604 &$change_service_state($self, $sid, 'fence');
605 return;
606 }
607}
608
a875fbe8 609sub next_state_stopped {
abc920b4 610 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
a875fbe8
DM
611
612 my $haenv = $self->{haenv};
e88469ba 613 my $ns = $self->{ns};
a875fbe8 614
ff6f1c5c
DM
615 if ($sd->{node} ne $cd->{node}) {
616 # this can happen if we fence a node with active migrations
617 # hack: modify $sd (normally this should be considered read-only)
24678a59 618 $haenv->log('info', "fixup service '$sid' location ($sd->{node} => $cd->{node})");
289e4784 619 $sd->{node} = $cd->{node};
ff6f1c5c
DM
620 }
621
94b7ebe2 622 if ($sd->{cmd}) {
21caf0db 623 my $cmd = shift @{$sd->{cmd}};
94b7ebe2 624
b0fdf86a 625 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
21caf0db 626 my $target = shift @{$sd->{cmd}};
94b7ebe2 627 if (!$ns->node_is_online($target)) {
b0fdf86a 628 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
e88469ba 629 } elsif ($sd->{node} eq $target) {
b0fdf86a 630 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
94b7ebe2 631 } else {
9dad9c88
TL
632 &$change_service_state($self, $sid, $cmd, node => $sd->{node},
633 target => $target);
9da84a0d 634 return;
94b7ebe2 635 }
21caf0db
FE
636 } elsif ($cmd eq 'stop') {
637 $haenv->log('info', "ignore service '$sid' $cmd request - service already stopped");
94b7ebe2 638 } else {
289e4784 639 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
94b7ebe2 640 }
21caf0db 641 delete $sd->{cmd};
35cbb764 642 }
94b7ebe2 643
a875fbe8 644 if ($cd->{state} eq 'disabled') {
35cbb764
TL
645 # NOTE: do nothing here, the stop state is an exception as we do not
646 # process the LRM result here, thus the LRM always tries to stop the
647 # service (protection for the case no CRM is active)
e88469ba 648 return;
35cbb764 649 }
e88469ba 650
84c945e4 651 if ($ns->node_is_offline_delayed($sd->{node}) && $ns->get_node_state($sd->{node}) ne 'maintenance') {
af14d5f3
TL
652 &$change_service_state($self, $sid, 'fence');
653 return;
654 }
655
656 if ($cd->{state} eq 'stopped') {
657 # almost the same as 'disabled' state but the service will also get recovered
658 return;
659 }
660
bb07bd2c 661 if ($cd->{state} eq 'started') {
9da84a0d
TL
662 # simply mark it started, if it's on the wrong node
663 # next_state_started will fix that for us
664 &$change_service_state($self, $sid, 'started', node => $sd->{node});
e88469ba 665 return;
a875fbe8 666 }
e88469ba
DM
667
668 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
a875fbe8
DM
669}
670
46139211 671sub record_service_failed_on_node {
57fe8e87 672 my ($self, $sid, $node) = @_;
46139211 673
57fe8e87
DM
674 if (!defined($self->{ss}->{$sid}->{failed_nodes})) {
675 $self->{ss}->{$sid}->{failed_nodes} = [];
676 }
46139211 677
57fe8e87 678 push @{$self->{ss}->{$sid}->{failed_nodes}}, $node;
46139211
TL
679}
680
a875fbe8 681sub next_state_started {
abc920b4 682 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
a875fbe8
DM
683
684 my $haenv = $self->{haenv};
ea4443cc 685 my $master_status = $self->{ms};
a875fbe8
DM
686 my $ns = $self->{ns};
687
688 if (!$ns->node_is_online($sd->{node})) {
b0e9158d 689 if ($ns->node_is_offline_delayed($sd->{node})) {
5385a606
DM
690 &$change_service_state($self, $sid, 'fence');
691 }
99278e06
TL
692 if ($ns->get_node_state($sd->{node}) ne 'maintenance') {
693 return;
2167dd1e
TL
694 } else {
695 # save current node as fallback for when it comes out of
696 # maintenance
697 $sd->{maintenance_node} = $sd->{node};
99278e06 698 }
e88469ba 699 }
289e4784 700
af14d5f3 701 if ($cd->{state} eq 'disabled' || $cd->{state} eq 'stopped') {
e88469ba
DM
702 &$change_service_state($self, $sid, 'request_stop');
703 return;
704 }
705
bb07bd2c 706 if ($cd->{state} eq 'started') {
e88469ba
DM
707
708 if ($sd->{cmd}) {
21caf0db 709 my $cmd = shift @{$sd->{cmd}};
e88469ba 710
b0fdf86a 711 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
21caf0db 712 my $target = shift @{$sd->{cmd}};
e88469ba 713 if (!$ns->node_is_online($target)) {
b0fdf86a 714 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
e88469ba 715 } elsif ($sd->{node} eq $target) {
b0fdf86a 716 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
e88469ba 717 } else {
a3cb8dcb 718 $haenv->log('info', "$cmd service '$sid' to node '$target'");
b0fdf86a 719 &$change_service_state($self, $sid, $cmd, node => $sd->{node}, target => $target);
e88469ba 720 }
21caf0db
FE
721 } elsif ($cmd eq 'stop') {
722 my $timeout = shift @{$sd->{cmd}};
396eb6f0
TL
723 if ($timeout == 0) {
724 $haenv->log('info', "request immediate service hard-stop for service '$sid'");
725 } else {
726 $haenv->log('info', "request graceful stop with timeout '$timeout' for service '$sid'");
727 }
21caf0db
FE
728 &$change_service_state($self, $sid, 'request_stop', timeout => $timeout);
729 $haenv->update_service_config($sid, {'state' => 'stopped'});
a875fbe8 730 } else {
289e4784 731 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
a875fbe8 732 }
21caf0db
FE
733
734 delete $sd->{cmd};
735
a875fbe8 736 } else {
b0fdf86a 737
abc920b4 738 my $try_next = 0;
46139211 739
ea4443cc 740 if ($lrm_res) {
46139211 741
e9e1cd68
TL
742 my $ec = $lrm_res->{exit_code};
743 if ($ec == SUCCESS) {
744
46139211 745 if (defined($sd->{failed_nodes})) {
81449997 746 $haenv->log('info', "relocation policy successful for '$sid' on node '$sd->{node}'," .
46139211
TL
747 " failed nodes: " . join(', ', @{$sd->{failed_nodes}}) );
748 }
749
750 delete $sd->{failed_nodes};
e9e1cd68 751
b47920fd
DM
752 # store flag to indicate successful start - only valid while state == 'started'
753 $sd->{running} = 1;
754
e9e1cd68 755 } elsif ($ec == ERROR) {
b47920fd
DM
756
757 delete $sd->{running};
758
e9e1cd68 759 # apply our relocate policy if we got ERROR from the LRM
46139211 760 $self->record_service_failed_on_node($sid, $sd->{node});
ea4443cc 761
46139211 762 if (scalar(@{$sd->{failed_nodes}}) <= $cd->{max_relocate}) {
ea4443cc 763
e9e1cd68
TL
764 # tell select_service_node to relocate if possible
765 $try_next = 1;
ea4443cc
TL
766
767 $haenv->log('warning', "starting service $sid on node".
768 " '$sd->{node}' failed, relocating service.");
ea4443cc
TL
769
770 } else {
771
46139211
TL
772 $haenv->log('err', "recovery policy for service $sid " .
773 "failed, entering error state. Failed nodes: ".
774 join(', ', @{$sd->{failed_nodes}}));
ea4443cc
TL
775 &$change_service_state($self, $sid, 'error');
776 return;
777
778 }
e9e1cd68 779 } else {
46139211
TL
780 $self->record_service_failed_on_node($sid, $sd->{node});
781
e9e1cd68
TL
782 $haenv->log('err', "service '$sid' got unrecoverable error" .
783 " (exit code $ec))");
784 # we have no save way out (yet) for other errors
785 &$change_service_state($self, $sid, 'error');
35cbb764 786 return;
ea4443cc 787 }
abc920b4
DM
788 }
789
2167dd1e
TL
790 my $node = select_service_node(
791 $self->{groups},
792 $self->{online_node_usage},
b2598576 793 $sid,
2167dd1e
TL
794 $cd,
795 $sd->{node},
796 $try_next,
797 $sd->{failed_nodes},
798 $sd->{maintenance_node},
799 );
abc920b4 800
b0fdf86a 801 if ($node && ($sd->{node} ne $node)) {
5d724d4d 802 $self->{online_node_usage}->add_service_usage_to_node($node, $sid, $sd->{node});
2167dd1e
TL
803
804 if (defined(my $fallback = $sd->{maintenance_node})) {
805 if ($node eq $fallback) {
806 $haenv->log('info', "moving service '$sid' back to '$fallback', node came back from maintenance.");
807 delete $sd->{maintenance_node};
808 } elsif ($sd->{node} ne $fallback) {
809 $haenv->log('info', "dropping maintenance fallback node '$fallback' for '$sid'");
810 delete $sd->{maintenance_node};
811 }
812 }
813
c0255b2c
TL
814 if ($cd->{type} eq 'vm') {
815 $haenv->log('info', "migrate service '$sid' to node '$node' (running)");
816 &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node);
817 } else {
818 $haenv->log('info', "relocate service '$sid' to node '$node'");
819 &$change_service_state($self, $sid, 'relocate', node => $sd->{node}, target => $node);
820 }
b0fdf86a 821 } else {
e6eeb7dc
TL
822 if ($try_next && !defined($node)) {
823 $haenv->log('warning', "Start Error Recovery: Tried all available " .
824 " nodes for service '$sid', retry start on current node. " .
825 "Tried nodes: " . join(', ', @{$sd->{failed_nodes}}));
826 }
35cbb764 827 # ensure service get started again if it went unexpected down
bf2d8d74
TL
828 # but ensure also no LRM result gets lost
829 $sd->{uid} = compute_new_uuid($sd->{state}) if defined($lrm_res);
b0fdf86a 830 }
a875fbe8 831 }
e88469ba
DM
832
833 return;
35cbb764 834 }
e88469ba
DM
835
836 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
a875fbe8 837}
c0bbd038 838
a2881965
TL
839sub next_state_error {
840 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
841
842 my $ns = $self->{ns};
46139211 843 my $ms = $self->{ms};
a2881965
TL
844
845 if ($cd->{state} eq 'disabled') {
46139211
TL
846 # clean up on error recovery
847 delete $sd->{failed_nodes};
848
a2881965
TL
849 &$change_service_state($self, $sid, 'stopped');
850 return;
851 }
852
a2881965
TL
853}
854
c259b1a8
TL
855# after a node was fenced this recovers the service to a new node
856sub next_state_recovery {
857 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
858
859 my ($haenv, $ss) = ($self->{haenv}, $self->{ss});
860 my $ns = $self->{ns};
861 my $ms = $self->{ms};
862
863 if ($sd->{state} ne 'recovery') { # should not happen
864 $haenv->log('err', "cannot recover service '$sid' from fencing, wrong state '$sd->{state}'");
865 return;
866 }
867
868 my $fenced_node = $sd->{node}; # for logging purpose
869
870 $self->recompute_online_node_usage(); # we want the most current node state
871
872 my $recovery_node = select_service_node(
873 $self->{groups},
874 $self->{online_node_usage},
b2598576 875 $sid,
c259b1a8
TL
876 $cd,
877 $sd->{node},
878 );
879
880 if ($recovery_node) {
90a24755
TL
881 my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
882 if ($recovery_node eq $fenced_node) {
883 # can happen if restriced groups and the node came up again OK
884 $msg = "recover service '$sid' to previous failed and fenced node '$fenced_node' again";
885 }
886 $haenv->log('info', "$msg");
c259b1a8
TL
887
888 $fence_recovery_cleanup->($self, $sid, $fenced_node);
889
890 $haenv->steal_service($sid, $sd->{node}, $recovery_node);
5d724d4d 891 $self->{online_node_usage}->add_service_usage_to_node($recovery_node, $sid, $recovery_node);
c259b1a8
TL
892
893 # NOTE: $sd *is normally read-only*, fencing is the exception
894 $cd->{node} = $sd->{node} = $recovery_node;
895 my $new_state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
896 $change_service_state->($self, $sid, $new_state, node => $recovery_node);
897 } else {
898 # no possible node found, cannot recover - but retry later, as we always try to make it available
899 $haenv->log('err', "recovering service '$sid' from fenced node '$fenced_node' failed, no recovery node found");
719883e9
TL
900
901 if ($cd->{state} eq 'disabled') {
902 # allow getting a service out of recovery manually if an admin disables it.
903 delete $sd->{failed_nodes}; # clean up on recovery to stopped
904 $change_service_state->($self, $sid, 'stopped'); # must NOT go through request_stop
905 return;
906 }
c259b1a8
TL
907 }
908}
909
c0bbd038 9101;