]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Manager.pm
add CRM command to switch an online node manually into maintenance without reboot
[pve-ha-manager.git] / src / PVE / HA / Manager.pm
1 package PVE::HA::Manager;
2
3 use strict;
4 use warnings;
5
6 use Digest::MD5 qw(md5_base64);
7
8 use PVE::Tools;
9 use PVE::HA::Tools ':exit_codes';
10 use PVE::HA::NodeStatus;
11 use PVE::HA::Usage::Basic;
12 use PVE::HA::Usage::Static;
13
14 ## Variable Name & Abbreviations Convention
15 #
16 # The HA stack has some variables it uses frequently and thus abbreviates it such that it may be
17 # confusing for new readers. Here's a short list of the most common used.
18 #
19 # NOTE: variables should be assumed to be read only if not otherwise stated, only use the specific
20 # methods to re-compute/read/alter them.
21 #
22 # - $haenv -> HA environment, the main interface to the simulator/test/real world
23 # - $sid -> Service ID, unique identifier for a service, `type:vmid` is common
24 #
25 # - $ms -> Master/Manager Status, contains runtime info from the current active manager
26 # - $ns -> Node Status, hash holding online/offline status about all nodes
27 #
28 # - $ss -> Service Status, hash holding the current state (last LRM cmd result, failed starts
29 # or migrates, maintenance fallback node, for *all* services ...
30 # - $sd -> Service Data, the service status of a *single* service, iow. $ss->{$sid}
31 #
32 # - $sc -> Service Configuration, hash for all services including target state, group, ...
33 # - $cd -> Configuration Data, the service config of a *single* service, iow. $sc->{$sid}
34 #
35 # Try to avoid adding new two letter (or similar over abbreviated) names, but also don't send
36 # patches for changing above, as that set is mostly sensible and should be easy to remember once
37 # spending a bit time in the HA code base.
38
39 sub new {
40 my ($this, $haenv) = @_;
41
42 my $class = ref($this) || $this;
43
44 my $self = bless { haenv => $haenv, crs => {} }, $class;
45
46 my $old_ms = $haenv->read_manager_status();
47
48 # we only copy the state part of the manager which cannot be auto generated
49
50 $self->{ns} = PVE::HA::NodeStatus->new($haenv, $old_ms->{node_status} || {});
51
52 # fixme: use separate class PVE::HA::ServiceStatus
53 $self->{ss} = $old_ms->{service_status} || {};
54
55 $self->{ms} = { master_node => $haenv->nodename() };
56
57 $self->update_crs_scheduler_mode(); # initial set, we update it once every loop
58
59 return $self;
60 }
61
62 sub update_crs_scheduler_mode {
63 my ($self) = @_;
64
65 my $haenv = $self->{haenv};
66 my $dc_cfg = $haenv->get_datacenter_settings();
67
68 $self->{crs}->{rebalance_on_request_start} = !!$dc_cfg->{crs}->{'ha-rebalance-on-start'};
69
70 my $old_mode = $self->{crs}->{scheduler};
71 my $new_mode = $dc_cfg->{crs}->{ha} || 'basic';
72
73 if (!defined($old_mode)) {
74 $haenv->log('info', "using scheduler mode '$new_mode'") if $new_mode ne 'basic';
75 } elsif ($new_mode eq $old_mode) {
76 return; # nothing to do
77 } else {
78 $haenv->log('info', "switching scheduler mode from '$old_mode' to '$new_mode'");
79 }
80
81 $self->{crs}->{scheduler} = $new_mode;
82
83 return;
84 }
85
86 sub cleanup {
87 my ($self) = @_;
88
89 # todo: ?
90 }
91
92 sub flush_master_status {
93 my ($self) = @_;
94
95 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
96
97 $ms->{node_status} = $ns->{status};
98 $ms->{service_status} = $ss;
99 $ms->{timestamp} = $haenv->get_time();
100
101 $haenv->write_manager_status($ms);
102 }
103
104 sub get_service_group {
105 my ($groups, $online_node_usage, $service_conf) = @_;
106
107 my $group = {};
108 # add all online nodes to default group to allow try_next when no group set
109 $group->{nodes}->{$_} = 1 for $online_node_usage->list_nodes();
110
111 # overwrite default if service is bound to a specific group
112 if (my $group_id = $service_conf->{group}) {
113 $group = $groups->{ids}->{$group_id} if $groups->{ids}->{$group_id};
114 }
115
116 return $group;
117 }
118
119 # groups available nodes with their priority as group index
120 sub get_node_priority_groups {
121 my ($group, $online_node_usage) = @_;
122
123 my $pri_groups = {};
124 my $group_members = {};
125 foreach my $entry (keys %{$group->{nodes}}) {
126 my ($node, $pri) = ($entry, 0);
127 if ($entry =~ m/^(\S+):(\d+)$/) {
128 ($node, $pri) = ($1, $2);
129 }
130 next if !$online_node_usage->contains_node($node); # offline
131 $pri_groups->{$pri}->{$node} = 1;
132 $group_members->{$node} = $pri;
133 }
134
135 # add non-group members to unrestricted groups (priority -1)
136 if (!$group->{restricted}) {
137 my $pri = -1;
138 for my $node ($online_node_usage->list_nodes()) {
139 next if defined($group_members->{$node});
140 $pri_groups->{$pri}->{$node} = 1;
141 $group_members->{$node} = -1;
142 }
143 }
144
145 return ($pri_groups, $group_members);
146 }
147
148 sub select_service_node {
149 my ($groups, $online_node_usage, $sid, $service_conf, $current_node, $try_next, $tried_nodes, $maintenance_fallback, $best_scored) = @_;
150
151 my $group = get_service_group($groups, $online_node_usage, $service_conf);
152
153 my ($pri_groups, $group_members) = get_node_priority_groups($group, $online_node_usage);
154
155 my @pri_list = sort {$b <=> $a} keys %$pri_groups;
156 return undef if !scalar(@pri_list);
157
158 # stay on current node if possible (avoids random migrations)
159 if ((!$try_next && !$best_scored) && $group->{nofailback} && defined($group_members->{$current_node})) {
160 return $current_node;
161 }
162
163 # select node from top priority node list
164
165 my $top_pri = $pri_list[0];
166
167 # try to avoid nodes where the service failed already if we want to relocate
168 if ($try_next) {
169 foreach my $node (@$tried_nodes) {
170 delete $pri_groups->{$top_pri}->{$node};
171 }
172 }
173
174 return $maintenance_fallback
175 if defined($maintenance_fallback) && $pri_groups->{$top_pri}->{$maintenance_fallback};
176
177 return $current_node if (!$try_next && !$best_scored) && $pri_groups->{$top_pri}->{$current_node};
178
179 my $scores = $online_node_usage->score_nodes_to_start_service($sid, $current_node);
180 my @nodes = sort {
181 $scores->{$a} <=> $scores->{$b} || $a cmp $b
182 } keys %{$pri_groups->{$top_pri}};
183
184 my $found;
185 for (my $i = scalar(@nodes) - 1; $i >= 0; $i--) {
186 my $node = $nodes[$i];
187 if ($node eq $current_node) {
188 $found = $i;
189 }
190 }
191
192 if ($try_next) {
193 if (!$best_scored && defined($found) && ($found < (scalar(@nodes) - 1))) {
194 return $nodes[$found + 1];
195 } else {
196 return $nodes[0];
197 }
198 } else {
199 return $nodes[0];
200 }
201 }
202
203 my $uid_counter = 0;
204
205 sub compute_new_uuid {
206 my ($state) = @_;
207
208 $uid_counter++;
209 return md5_base64($state . $$ . time() . $uid_counter);
210 }
211
212 my $valid_service_states = {
213 stopped => 1,
214 request_stop => 1,
215 request_start => 1,
216 request_start_balance => 1,
217 started => 1,
218 fence => 1,
219 recovery => 1,
220 migrate => 1,
221 relocate => 1,
222 freeze => 1,
223 error => 1,
224 };
225
226 # FIXME with 'static' mode and thousands of services, the overhead can be noticable and the fact
227 # that this function is called for each state change and upon recovery doesn't help.
228 sub recompute_online_node_usage {
229 my ($self) = @_;
230
231 my $haenv = $self->{haenv};
232
233 my $online_nodes = $self->{ns}->list_online_nodes();
234
235 my $online_node_usage;
236
237 if (my $mode = $self->{crs}->{scheduler}) {
238 if ($mode eq 'static') {
239 $online_node_usage = eval {
240 my $scheduler = PVE::HA::Usage::Static->new($haenv);
241 $scheduler->add_node($_) for $online_nodes->@*;
242 return $scheduler;
243 };
244 $haenv->log('warning', "fallback to 'basic' scheduler mode, init for 'static' failed - $@")
245 if $@;
246 } elsif ($mode eq 'basic') {
247 # handled below in the general fall-back case
248 } else {
249 $haenv->log('warning', "got unknown scheduler mode '$mode', using 'basic'");
250 }
251 }
252
253 # fallback to the basic algorithm in any case
254 if (!$online_node_usage) {
255 $online_node_usage = PVE::HA::Usage::Basic->new($haenv);
256 $online_node_usage->add_node($_) for $online_nodes->@*;
257 }
258
259 foreach my $sid (sort keys %{$self->{ss}}) {
260 my $sd = $self->{ss}->{$sid};
261 my $state = $sd->{state};
262 my $target = $sd->{target}; # optional
263 if ($online_node_usage->contains_node($sd->{node})) {
264 if (
265 $state eq 'started' || $state eq 'request_stop' || $state eq 'fence'
266 || $state eq 'freeze' || $state eq 'error' || $state eq 'recovery'
267 ) {
268 $online_node_usage->add_service_usage_to_node($sd->{node}, $sid, $sd->{node});
269 } elsif ($state eq 'migrate' || $state eq 'relocate' || $state eq 'request_start_balance') {
270 my $source = $sd->{node};
271 # count it for both, source and target as load is put on both
272 $online_node_usage->add_service_usage_to_node($source, $sid, $source, $target)
273 if $state ne 'request_start_balance';
274 $online_node_usage->add_service_usage_to_node($target, $sid, $source, $target);
275 } elsif ($state eq 'stopped' || $state eq 'request_start') {
276 # do nothing
277 } else {
278 die "should not be reached (sid = '$sid', state = '$state')";
279 }
280 } elsif (defined($target) && $online_node_usage->contains_node($target)) {
281 if ($state eq 'migrate' || $state eq 'relocate') {
282 # to correctly track maintenance modi and also consider the target as used for the
283 # case a node dies, as we cannot really know if the to-be-aborted incoming migration
284 # has already cleaned up all used resources
285 $online_node_usage->add_service_usage_to_node($target, $sid, $sd->{node}, $target);
286 }
287 }
288 }
289
290 $self->{online_node_usage} = $online_node_usage;
291 }
292
293 my $change_service_state = sub {
294 my ($self, $sid, $new_state, %params) = @_;
295
296 my ($haenv, $ss) = ($self->{haenv}, $self->{ss});
297
298 my $sd = $ss->{$sid} || die "no such service '$sid";
299
300 my $old_state = $sd->{state};
301 my $old_node = $sd->{node};
302 my $old_failed_nodes = $sd->{failed_nodes};
303 my $old_maintenance_node = $sd->{maintenance_node};
304
305 die "no state change" if $old_state eq $new_state; # just to be sure
306
307 die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state};
308
309 foreach my $k (keys %$sd) { delete $sd->{$k}; };
310
311 $sd->{state} = $new_state;
312 $sd->{node} = $old_node;
313 $sd->{failed_nodes} = $old_failed_nodes if defined($old_failed_nodes);
314 $sd->{maintenance_node} = $old_maintenance_node if defined($old_maintenance_node);
315
316 my $text_state = '';
317 foreach my $k (sort keys %params) {
318 my $v = $params{$k};
319 $text_state .= ", " if $text_state;
320 $text_state .= "$k = $v";
321 $sd->{$k} = $v;
322 }
323
324 $self->recompute_online_node_usage();
325
326 $sd->{uid} = compute_new_uuid($new_state);
327
328 $text_state = " ($text_state)" if $text_state;
329 $haenv->log('info', "service '$sid': state changed from '${old_state}' to '${new_state}'$text_state");
330 };
331
332 # clean up a possible bad state from a recovered service to allow its start
333 my $fence_recovery_cleanup = sub {
334 my ($self, $sid, $fenced_node) = @_;
335
336 my $haenv = $self->{haenv};
337
338 my (undef, $type, $id) = $haenv->parse_sid($sid);
339 my $plugin = PVE::HA::Resources->lookup($type);
340
341 # should not happen
342 die "unknown resource type '$type'" if !$plugin;
343
344 # locks may block recovery, cleanup those which are safe to remove after fencing,
345 # i.e., after the original node was reset and thus all it's state
346 my $removable_locks = [
347 'backup',
348 'mounted',
349 'migrate',
350 'clone',
351 'rollback',
352 'snapshot',
353 'snapshot-delete',
354 'suspending',
355 'suspended',
356 ];
357 if (my $removed_lock = $plugin->remove_locks($haenv, $id, $removable_locks, $fenced_node)) {
358 $haenv->log('warning', "removed leftover lock '$removed_lock' from recovered " .
359 "service '$sid' to allow its start.");
360 }
361 };
362
363 # read LRM status for all nodes
364 sub read_lrm_status {
365 my ($self) = @_;
366
367 my $nodes = $self->{ns}->list_nodes();
368 my $haenv = $self->{haenv};
369
370 my $results = {};
371 my $modes = {};
372 foreach my $node (@$nodes) {
373 my $lrm_status = $haenv->read_lrm_status($node);
374 $modes->{$node} = $lrm_status->{mode} || 'active';
375 foreach my $uid (keys %{$lrm_status->{results}}) {
376 next if $results->{$uid}; # should not happen
377 $results->{$uid} = $lrm_status->{results}->{$uid};
378 }
379 }
380
381 return ($results, $modes);
382 }
383
384 # read new crm commands and save them into crm master status
385 sub update_crm_commands {
386 my ($self) = @_;
387
388 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
389
390 my $cmdlist = $haenv->read_crm_commands();
391
392 foreach my $cmd (split(/\n/, $cmdlist)) {
393 chomp $cmd;
394
395 if ($cmd =~ m/^(migrate|relocate)\s+(\S+)\s+(\S+)$/) {
396 my ($task, $sid, $node) = ($1, $2, $3);
397 if (my $sd = $ss->{$sid}) {
398 if (!$ns->node_is_online($node)) {
399 $haenv->log('err', "crm command error - node not online: $cmd");
400 } else {
401 if ($node eq $sd->{node}) {
402 $haenv->log('info', "ignore crm command - service already on target node: $cmd");
403 } else {
404 $haenv->log('info', "got crm command: $cmd");
405 $ss->{$sid}->{cmd} = [ $task, $node ];
406 }
407 }
408 } else {
409 $haenv->log('err', "crm command error - no such service: $cmd");
410 }
411
412 } elsif ($cmd =~ m/^stop\s+(\S+)\s+(\S+)$/) {
413 my ($sid, $timeout) = ($1, $2);
414 if (my $sd = $ss->{$sid}) {
415 $haenv->log('info', "got crm command: $cmd");
416 $ss->{$sid}->{cmd} = [ 'stop', $timeout ];
417 } else {
418 $haenv->log('err', "crm command error - no such service: $cmd");
419 }
420 } elsif ($cmd =~ m/^enable-node-maintenance\s+(\S+)$/) {
421 my $node = $1;
422
423 my $state = $ns->get_node_state($node);
424 if ($state eq 'online') {
425 $ms->{node_request}->{$node}->{maintenance} = 1;
426 } elsif ($state eq 'maintenance') {
427 $haenv->log('info', "ignoring crm command - node $node is already in maintenance state");
428 } else {
429 $haenv->log('err', "crm command error - node not online: $cmd");
430 }
431 } elsif ($cmd =~ m/^disable-node-maintenance\s+(\S+)$/) {
432 my $node = $1;
433
434 my $state = $ns->get_node_state($node);
435 if ($state ne 'maintenance') {
436 $haenv->log(
437 'warn', "clearing maintenance of node $node requested, but it's in state $state");
438 }
439 delete $ms->{node_request}->{$node}->{maintenance}; # gets flushed out at the end of the CRM loop
440 } else {
441 $haenv->log('err', "unable to parse crm command: $cmd");
442 }
443 }
444
445 }
446
447 sub manage {
448 my ($self) = @_;
449
450 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
451
452 my ($node_info) = $haenv->get_node_info();
453 my ($lrm_results, $lrm_modes) = $self->read_lrm_status();
454
455 $ns->update($node_info, $lrm_modes);
456
457 if (!$ns->node_is_operational($haenv->nodename())) {
458 $haenv->log('info', "master seems offline");
459 return;
460 }
461
462 $self->update_crs_scheduler_mode();
463
464 my $sc = $haenv->read_service_config();
465
466 $self->{groups} = $haenv->read_group_config(); # update
467
468 # compute new service status
469
470 # add new service
471 foreach my $sid (sort keys %$sc) {
472 next if $ss->{$sid}; # already there
473 my $cd = $sc->{$sid};
474 next if $cd->{state} eq 'ignored';
475
476 $haenv->log('info', "adding new service '$sid' on node '$cd->{node}'");
477 # assume we are running to avoid relocate running service at add
478 my $state = ($cd->{state} eq 'started') ? 'request_start' : 'request_stop';
479 $ss->{$sid} = {
480 state => $state, node => $cd->{node}, uid => compute_new_uuid('started'),
481 };
482 }
483
484 # remove stale or ignored services from manager state
485 foreach my $sid (keys %$ss) {
486 next if $sc->{$sid} && $sc->{$sid}->{state} ne 'ignored';
487
488 my $reason = defined($sc->{$sid}) ? 'ignored state requested' : 'no config';
489 $haenv->log('info', "removing stale service '$sid' ($reason)");
490
491 # remove all service related state information
492 delete $ss->{$sid};
493 }
494
495 $self->update_crm_commands();
496
497 for (;;) {
498 my $repeat = 0;
499
500 $self->recompute_online_node_usage();
501
502 foreach my $sid (sort keys %$ss) {
503 my $sd = $ss->{$sid};
504 my $cd = $sc->{$sid} || { state => 'disabled' };
505
506 my $lrm_res = $sd->{uid} ? $lrm_results->{$sd->{uid}} : undef;
507
508 my $last_state = $sd->{state};
509
510 if ($last_state eq 'stopped') {
511
512 $self->next_state_stopped($sid, $cd, $sd, $lrm_res);
513
514 } elsif ($last_state eq 'started') {
515
516 $self->next_state_started($sid, $cd, $sd, $lrm_res);
517
518 } elsif ($last_state eq 'request_start') {
519
520 $self->next_state_request_start($sid, $cd, $sd, $lrm_res);
521
522 } elsif ($last_state eq 'migrate' || $last_state eq 'relocate' || $last_state eq 'request_start_balance') {
523
524 $self->next_state_migrate_relocate($sid, $cd, $sd, $lrm_res);
525
526 } elsif ($last_state eq 'fence') {
527
528 # do nothing here - wait until fenced
529
530 } elsif ($last_state eq 'recovery') {
531
532 $self->next_state_recovery($sid, $cd, $sd, $lrm_res);
533
534 } elsif ($last_state eq 'request_stop') {
535
536 $self->next_state_request_stop($sid, $cd, $sd, $lrm_res);
537
538 } elsif ($last_state eq 'freeze') {
539
540 my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
541 if ($lrm_mode && $lrm_mode eq 'active') { # unfreeze if active again
542 my $state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
543 $change_service_state->($self, $sid, $state);
544 }
545
546 } elsif ($last_state eq 'error') {
547
548 $self->next_state_error($sid, $cd, $sd, $lrm_res);
549
550 } else {
551
552 die "unknown service state '$last_state'";
553 }
554
555 my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
556 if ($lrm_mode && $lrm_mode eq 'restart') {
557 my $state = $sd->{state};
558 if ($state eq 'started' || $state eq 'stopped'|| $state eq 'request_stop') {
559 $change_service_state->($self, $sid, 'freeze');
560 }
561 }
562
563 $repeat = 1 if $sd->{state} ne $last_state;
564 }
565
566 # handle fencing
567 my $fenced_nodes = {};
568 foreach my $sid (sort keys %$ss) {
569 my ($service_state, $service_node) = $ss->{$sid}->@{'state', 'node'};
570 next if $service_state ne 'fence';
571
572 if (!defined($fenced_nodes->{$service_node})) {
573 $fenced_nodes->{$service_node} = $ns->fence_node($service_node) || 0;
574 }
575
576 next if !$fenced_nodes->{$service_node};
577
578 # node fence was successful - recover service
579 $change_service_state->($self, $sid, 'recovery');
580 $repeat = 1; # for faster recovery execution
581 }
582
583 # Avoid that a node without services in 'fence' state (e.g., removed
584 # manually by admin) is stuck with the 'fence' node state.
585 for my $node (sort grep { !defined($fenced_nodes->{$_}) } keys $ns->{status}->%*) {
586 next if $ns->get_node_state($node) ne 'fence';
587
588 $haenv->log('notice', "node '$node' in fence state but no services to-fence! admin interference?!");
589 $repeat = 1 if $ns->fence_node($node);
590 }
591
592 last if !$repeat;
593 }
594
595 $self->flush_master_status();
596 }
597
598 # functions to compute next service states
599 # $cd: service configuration data (read only)
600 # $sd: service status data (read only)
601 #
602 # Note: use change_service_state() to alter state
603 #
604
605 sub next_state_request_stop {
606 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
607
608 my $haenv = $self->{haenv};
609 my $ns = $self->{ns};
610
611 # check result from LRM daemon
612 if ($lrm_res) {
613 my $exit_code = $lrm_res->{exit_code};
614 if ($exit_code == SUCCESS) {
615 &$change_service_state($self, $sid, 'stopped');
616 return;
617 } else {
618 $haenv->log('err', "service '$sid' stop failed (exit code $exit_code)");
619 &$change_service_state($self, $sid, 'error'); # fixme: what state?
620 return;
621 }
622 }
623
624 if ($ns->node_is_offline_delayed($sd->{node})) {
625 &$change_service_state($self, $sid, 'fence');
626 return;
627 }
628 }
629
630 sub next_state_migrate_relocate {
631 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
632
633 my $haenv = $self->{haenv};
634 my $ns = $self->{ns};
635
636 # check result from LRM daemon
637 if ($lrm_res) {
638 my $exit_code = $lrm_res->{exit_code};
639 my $req_state = $cd->{state} eq 'started' ? 'started' : 'request_stop';
640 if ($exit_code == SUCCESS) {
641 &$change_service_state($self, $sid, $req_state, node => $sd->{target});
642 return;
643 } elsif ($exit_code == EWRONG_NODE) {
644 $haenv->log('err', "service '$sid' - migration failed: service" .
645 " registered on wrong node!");
646 &$change_service_state($self, $sid, 'error');
647 } else {
648 $haenv->log('err', "service '$sid' - migration failed (exit code $exit_code)");
649 &$change_service_state($self, $sid, $req_state, node => $sd->{node});
650 return;
651 }
652 }
653
654 if ($ns->node_is_offline_delayed($sd->{node})) {
655 &$change_service_state($self, $sid, 'fence');
656 return;
657 }
658 }
659
660 sub next_state_stopped {
661 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
662
663 my $haenv = $self->{haenv};
664 my $ns = $self->{ns};
665
666 if ($sd->{node} ne $cd->{node}) {
667 # this can happen if we fence a node with active migrations
668 # hack: modify $sd (normally this should be considered read-only)
669 $haenv->log('info', "fixup service '$sid' location ($sd->{node} => $cd->{node})");
670 $sd->{node} = $cd->{node};
671 }
672
673 if ($sd->{cmd}) {
674 my $cmd = shift @{$sd->{cmd}};
675
676 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
677 my $target = shift @{$sd->{cmd}};
678 if (!$ns->node_is_online($target)) {
679 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
680 } elsif ($sd->{node} eq $target) {
681 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
682 } else {
683 &$change_service_state($self, $sid, $cmd, node => $sd->{node}, target => $target);
684 return;
685 }
686 } elsif ($cmd eq 'stop') {
687 $haenv->log('info', "ignore service '$sid' $cmd request - service already stopped");
688 } else {
689 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
690 }
691 delete $sd->{cmd};
692 }
693
694 if ($cd->{state} eq 'disabled') {
695 # NOTE: do nothing here, the stop state is an exception as we do not
696 # process the LRM result here, thus the LRM always tries to stop the
697 # service (protection for the case no CRM is active)
698 return;
699 }
700
701 if ($ns->node_is_offline_delayed($sd->{node}) && $ns->get_node_state($sd->{node}) ne 'maintenance') {
702 &$change_service_state($self, $sid, 'fence');
703 return;
704 }
705
706 if ($cd->{state} eq 'stopped') {
707 # almost the same as 'disabled' state but the service will also get recovered
708 return;
709 }
710
711 if ($cd->{state} eq 'started') {
712 # simply mark it started, if it's on the wrong node next_state_started will fix that for us
713 $change_service_state->($self, $sid, 'request_start', node => $sd->{node});
714 return;
715 }
716
717 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
718 }
719
720 sub next_state_request_start {
721 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
722
723 my $haenv = $self->{haenv};
724 my $current_node = $sd->{node};
725
726 if ($self->{crs}->{rebalance_on_request_start}) {
727 my $selected_node = select_service_node(
728 $self->{groups},
729 $self->{online_node_usage},
730 $sid,
731 $cd,
732 $sd->{node},
733 0, # try_next
734 $sd->{failed_nodes},
735 $sd->{maintenance_node},
736 1, # best_score
737 );
738 my $select_text = $selected_node ne $current_node ? 'new' : 'current';
739 $haenv->log('info', "service $sid: re-balance selected $select_text node $selected_node for startup");
740
741 if ($selected_node ne $current_node) {
742 $change_service_state->($self, $sid, 'request_start_balance', node => $current_node, target => $selected_node);
743 return;
744 }
745 }
746
747 $change_service_state->($self, $sid, 'started', node => $current_node);
748 }
749
750 sub record_service_failed_on_node {
751 my ($self, $sid, $node) = @_;
752
753 if (!defined($self->{ss}->{$sid}->{failed_nodes})) {
754 $self->{ss}->{$sid}->{failed_nodes} = [];
755 }
756
757 push @{$self->{ss}->{$sid}->{failed_nodes}}, $node;
758 }
759
760 sub next_state_started {
761 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
762
763 my $haenv = $self->{haenv};
764 my $master_status = $self->{ms};
765 my $ns = $self->{ns};
766
767 if (!$ns->node_is_online($sd->{node})) {
768 if ($ns->node_is_offline_delayed($sd->{node})) {
769 &$change_service_state($self, $sid, 'fence');
770 }
771 if ($ns->get_node_state($sd->{node}) ne 'maintenance') {
772 return;
773 } else {
774 # save current node as fallback for when it comes out of maintenance
775 $sd->{maintenance_node} = $sd->{node};
776 }
777 }
778
779 if ($cd->{state} eq 'disabled' || $cd->{state} eq 'stopped') {
780 &$change_service_state($self, $sid, 'request_stop');
781 return;
782 }
783
784 if ($cd->{state} eq 'started') {
785
786 if ($sd->{cmd}) {
787 my $cmd = shift @{$sd->{cmd}};
788
789 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
790 my $target = shift @{$sd->{cmd}};
791 if (!$ns->node_is_online($target)) {
792 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
793 } elsif ($sd->{node} eq $target) {
794 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
795 } else {
796 $haenv->log('info', "$cmd service '$sid' to node '$target'");
797 &$change_service_state($self, $sid, $cmd, node => $sd->{node}, target => $target);
798 }
799 } elsif ($cmd eq 'stop') {
800 my $timeout = shift @{$sd->{cmd}};
801 if ($timeout == 0) {
802 $haenv->log('info', "request immediate service hard-stop for service '$sid'");
803 } else {
804 $haenv->log('info', "request graceful stop with timeout '$timeout' for service '$sid'");
805 }
806 &$change_service_state($self, $sid, 'request_stop', timeout => $timeout);
807 $haenv->update_service_config($sid, {'state' => 'stopped'});
808 } else {
809 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
810 }
811
812 delete $sd->{cmd};
813
814 } else {
815
816 my $try_next = 0;
817
818 if ($lrm_res) {
819
820 my $ec = $lrm_res->{exit_code};
821 if ($ec == SUCCESS) {
822
823 if (defined($sd->{failed_nodes})) {
824 $haenv->log('info', "relocation policy successful for '$sid' on node '$sd->{node}'," .
825 " failed nodes: " . join(', ', @{$sd->{failed_nodes}}) );
826 }
827
828 delete $sd->{failed_nodes};
829
830 # store flag to indicate successful start - only valid while state == 'started'
831 $sd->{running} = 1;
832
833 } elsif ($ec == ERROR || $ec == EWRONG_NODE) {
834
835 delete $sd->{running};
836
837 # apply our relocate policy if we got ERROR from the LRM
838 $self->record_service_failed_on_node($sid, $sd->{node});
839
840 if (scalar(@{$sd->{failed_nodes}}) <= $cd->{max_relocate}) {
841
842 # tell select_service_node to relocate if possible
843 $try_next = 1;
844
845 $haenv->log('warning', "starting service $sid on node".
846 " '$sd->{node}' failed, relocating service.");
847
848 } else {
849
850 $haenv->log('err', "recovery policy for service $sid " .
851 "failed, entering error state. Failed nodes: ".
852 join(', ', @{$sd->{failed_nodes}}));
853 &$change_service_state($self, $sid, 'error');
854 return;
855
856 }
857 } else {
858 $self->record_service_failed_on_node($sid, $sd->{node});
859
860 $haenv->log('err', "service '$sid' got unrecoverable error (exit code $ec))");
861 # we have no save way out (yet) for other errors
862 &$change_service_state($self, $sid, 'error');
863 return;
864 }
865 }
866
867 my $node = select_service_node(
868 $self->{groups},
869 $self->{online_node_usage},
870 $sid,
871 $cd,
872 $sd->{node},
873 $try_next,
874 $sd->{failed_nodes},
875 $sd->{maintenance_node},
876 );
877
878 if ($node && ($sd->{node} ne $node)) {
879 $self->{online_node_usage}->add_service_usage_to_node($node, $sid, $sd->{node});
880
881 if (defined(my $fallback = $sd->{maintenance_node})) {
882 if ($node eq $fallback) {
883 $haenv->log(
884 'info',
885 "moving service '$sid' back to '$fallback', node came back from maintenance.",
886 );
887 delete $sd->{maintenance_node};
888 } elsif ($sd->{node} ne $fallback) {
889 $haenv->log('info', "dropping maintenance fallback node '$fallback' for '$sid'");
890 delete $sd->{maintenance_node};
891 }
892 }
893
894 if ($cd->{type} eq 'vm') {
895 $haenv->log('info', "migrate service '$sid' to node '$node' (running)");
896 &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node);
897 } else {
898 $haenv->log('info', "relocate service '$sid' to node '$node'");
899 &$change_service_state($self, $sid, 'relocate', node => $sd->{node}, target => $node);
900 }
901 } else {
902 if ($try_next && !defined($node)) {
903 $haenv->log(
904 'warning',
905 "Start Error Recovery: Tried all available nodes for service '$sid', retry"
906 ." start on current node. Tried nodes: " . join(', ', @{$sd->{failed_nodes}},
907 )
908 );
909 }
910 # ensure service get started again if it went unexpected down
911 # but ensure also no LRM result gets lost
912 $sd->{uid} = compute_new_uuid($sd->{state}) if defined($lrm_res);
913 }
914 }
915
916 return;
917 }
918
919 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
920 }
921
922 sub next_state_error {
923 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
924
925 my $ns = $self->{ns};
926 my $ms = $self->{ms};
927
928 if ($cd->{state} eq 'disabled') {
929 # clean up on error recovery
930 delete $sd->{failed_nodes};
931
932 &$change_service_state($self, $sid, 'stopped');
933 return;
934 }
935
936 }
937
938 # after a node was fenced this recovers the service to a new node
939 sub next_state_recovery {
940 my ($self, $sid, $cd, $sd, $lrm_res) = @_;
941
942 my ($haenv, $ss) = ($self->{haenv}, $self->{ss});
943 my $ns = $self->{ns};
944 my $ms = $self->{ms};
945
946 if ($sd->{state} ne 'recovery') { # should not happen
947 $haenv->log('err', "cannot recover service '$sid' from fencing, wrong state '$sd->{state}'");
948 return;
949 }
950
951 my $fenced_node = $sd->{node}; # for logging purpose
952
953 $self->recompute_online_node_usage(); # we want the most current node state
954
955 my $recovery_node = select_service_node(
956 $self->{groups},
957 $self->{online_node_usage},
958 $sid,
959 $cd,
960 $sd->{node},
961 );
962
963 if ($recovery_node) {
964 my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
965 if ($recovery_node eq $fenced_node) {
966 # can happen if restriced groups and the node came up again OK
967 $msg = "recover service '$sid' to previous failed and fenced node '$fenced_node' again";
968 }
969 $haenv->log('info', "$msg");
970
971 $fence_recovery_cleanup->($self, $sid, $fenced_node);
972
973 $haenv->steal_service($sid, $sd->{node}, $recovery_node);
974 $self->{online_node_usage}->add_service_usage_to_node($recovery_node, $sid, $recovery_node);
975
976 # NOTE: $sd *is normally read-only*, fencing is the exception
977 $cd->{node} = $sd->{node} = $recovery_node;
978 my $new_state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
979 $change_service_state->($self, $sid, $new_state, node => $recovery_node);
980 } else {
981 # no possible node found, cannot recover - but retry later, as we always try to make it available
982 $haenv->log('err', "recovering service '$sid' from fenced node '$fenced_node' failed, no recovery node found");
983
984 if ($cd->{state} eq 'disabled') {
985 # allow getting a service out of recovery manually if an admin disables it.
986 delete $sd->{failed_nodes}; # clean up on recovery to stopped
987 $change_service_state->($self, $sid, 'stopped'); # must NOT go through request_stop
988 return;
989 }
990 }
991 }
992
993 1;