]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/LRM.pm
make clean: also clean source tar ball
[pve-ha-manager.git] / src / PVE / HA / LRM.pm
1 package PVE::HA::LRM;
2
3 # Local Resource Manager
4
5 use strict;
6 use warnings;
7 use POSIX qw(:sys_wait_h);
8
9 use PVE::SafeSyslog;
10 use PVE::Tools;
11 use PVE::HA::Tools ':exit_codes';
12 use PVE::HA::Resources;
13
14 # Server can have several states:
15
16 my $valid_states = {
17 wait_for_agent_lock => "waiting for agent lock",
18 active => "got agent_lock",
19 maintenance => "got agent_lock (maintenance)",
20 lost_agent_lock => "lost agent_lock",
21 };
22
23 sub new {
24 my ($this, $haenv) = @_;
25
26 my $class = ref($this) || $this;
27
28 my $self = bless {
29 haenv => $haenv,
30 status => { state => 'startup' },
31 workers => {},
32 results => {},
33 restart_tries => {},
34 shutdown_request => 0,
35 shutdown_errors => 0,
36 # mode can be: active, reboot, shutdown, restart
37 mode => 'active',
38 cluster_state_update => 0,
39 }, $class;
40
41 $self->set_local_status({ state => 'wait_for_agent_lock' });
42
43 return $self;
44 }
45
46 sub shutdown_request {
47 my ($self) = @_;
48
49 return if $self->{shutdown_request}; # already in shutdown mode
50
51 my $haenv = $self->{haenv};
52
53 my $nodename = $haenv->nodename();
54
55 my ($shutdown, $reboot) = $haenv->is_node_shutdown();
56
57 my $dc_ha_cfg = $haenv->get_ha_settings();
58 my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
59
60 if ($shutdown) { # don't log this on service restart, only on node shutdown
61 $haenv->log('info', "got shutdown request with shutdown policy '$shutdown_policy'");
62 }
63
64 my $freeze_all;
65 if ($shutdown_policy eq 'conditional') {
66 $freeze_all = $reboot;
67 } elsif ($shutdown_policy eq 'freeze') {
68 $freeze_all = 1;
69 } elsif ($shutdown_policy eq 'failover') {
70 $freeze_all = 0;
71 } else {
72 $haenv->log('err', "unknown shutdown policy '$shutdown_policy', fall back to conditional");
73 $freeze_all = $reboot;
74 }
75
76 if ($shutdown) {
77 # *always* queue stop jobs for all services if the node shuts down,
78 # independent if it's a reboot or a poweroff, else we may corrupt
79 # services or hinder node shutdown
80 my $ss = $self->{service_status};
81
82 foreach my $sid (keys %$ss) {
83 my $sd = $ss->{$sid};
84 next if !$sd->{node};
85 next if $sd->{node} ne $nodename;
86 # Note: use undef uid to mark shutdown/stop jobs
87 $self->queue_resource_command($sid, undef, 'request_stop');
88 }
89 }
90
91 if ($shutdown) {
92 if ($freeze_all) {
93 if ($reboot) {
94 $haenv->log('info', "reboot LRM, stop and freeze all services");
95 } else {
96 $haenv->log('info', "shutdown LRM, stop and freeze all services");
97 }
98 $self->{mode} = 'restart';
99 } else {
100 $haenv->log('info', "shutdown LRM, stop all services");
101 $self->{mode} = 'shutdown';
102 }
103 } else {
104 $haenv->log('info', "restart LRM, freeze all services");
105 $self->{mode} = 'restart';
106 }
107
108 $self->{shutdown_request} = 1;
109
110 eval { $self->update_lrm_status(); };
111 if (my $err = $@) {
112 $self->log('err', "unable to update lrm status file - $err");
113 }
114 }
115
116 sub get_local_status {
117 my ($self) = @_;
118
119 return $self->{status};
120 }
121
122 sub set_local_status {
123 my ($self, $new) = @_;
124
125 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
126
127 my $haenv = $self->{haenv};
128
129 my $old = $self->{status};
130
131 # important: only update if if really changed
132 return if $old->{state} eq $new->{state};
133
134 $haenv->log('info', "status change $old->{state} => $new->{state}");
135
136 $new->{state_change_time} = $haenv->get_time();
137
138 $self->{status} = $new;
139 }
140
141 sub update_lrm_status {
142 my ($self) = @_;
143
144 my $haenv = $self->{haenv};
145
146 return 0 if !$haenv->quorate();
147
148 my $lrm_status = {
149 state => $self->{status}->{state},
150 mode => $self->{mode},
151 results => $self->{results},
152 timestamp => $haenv->get_time(),
153 };
154
155 eval { $haenv->write_lrm_status($lrm_status); };
156 if (my $err = $@) {
157 $haenv->log('err', "unable to write lrm status file - $err");
158 return 0;
159 }
160
161 return 1;
162 }
163
164 sub update_service_status {
165 my ($self) = @_;
166
167 my $haenv = $self->{haenv};
168
169 my $ms = eval { $haenv->read_manager_status(); };
170 if (my $err = $@) {
171 $haenv->log('err', "updating service status from manager failed: $err");
172 return undef;
173 } else {
174 $self->{service_status} = $ms->{service_status} || {};
175 return 1;
176 }
177 }
178
179 sub get_protected_ha_agent_lock {
180 my ($self) = @_;
181
182 my $haenv = $self->{haenv};
183
184 my $count = 0;
185 my $starttime = $haenv->get_time();
186
187 for (;;) {
188
189 if ($haenv->get_ha_agent_lock()) {
190 if ($self->{ha_agent_wd}) {
191 $haenv->watchdog_update($self->{ha_agent_wd});
192 } else {
193 my $wfh = $haenv->watchdog_open();
194 $self->{ha_agent_wd} = $wfh;
195 }
196 return 1;
197 }
198
199 last if ++$count > 5; # try max 5 time
200
201 my $delay = $haenv->get_time() - $starttime;
202 last if $delay > 5; # for max 5 seconds
203
204 $haenv->sleep(1);
205 }
206
207 return 0;
208 }
209
210 sub active_service_count {
211 my ($self) = @_;
212
213 my $haenv = $self->{haenv};
214
215 my $nodename = $haenv->nodename();
216
217 my $ss = $self->{service_status};
218
219 my $count = 0;
220
221 foreach my $sid (keys %$ss) {
222 my $sd = $ss->{$sid};
223 next if !$sd->{node};
224 next if $sd->{node} ne $nodename;
225 my $req_state = $sd->{state};
226 next if !defined($req_state);
227 next if $req_state eq 'stopped';
228 next if $req_state eq 'freeze';
229 # erroneous services are not managed by HA, don't count them as active
230 next if $req_state eq 'error';
231
232 $count++;
233 }
234
235 return $count;
236 }
237
238 my $wrote_lrm_status_at_startup = 0;
239
240 sub do_one_iteration {
241 my ($self) = @_;
242
243 my $haenv = $self->{haenv};
244
245 $haenv->loop_start_hook();
246
247 $self->{cluster_state_update} = $haenv->cluster_state_update();
248
249 my $res = $self->work();
250
251 $haenv->loop_end_hook();
252
253 return $res;
254 }
255
256 sub work {
257 my ($self) = @_;
258
259 my $haenv = $self->{haenv};
260
261 if (!$wrote_lrm_status_at_startup) {
262 if ($self->update_lrm_status()) {
263 $wrote_lrm_status_at_startup = 1;
264 } else {
265 # do nothing
266 $haenv->sleep(5);
267 return $self->{shutdown_request} ? 0 : 1;
268 }
269 }
270
271 my $status = $self->get_local_status();
272 my $state = $status->{state};
273
274 $self->update_service_status();
275
276 my $fence_request = PVE::HA::Tools::count_fenced_services($self->{service_status}, $haenv->nodename());
277
278 # do state changes first
279
280 my $ctime = $haenv->get_time();
281
282 # FIXME:
283
284 if ($state eq 'wait_for_agent_lock') {
285
286 my $service_count = $self->active_service_count();
287
288 if (!$fence_request && $service_count && $haenv->quorate()) {
289 if ($self->get_protected_ha_agent_lock()) {
290 $self->set_local_status({ state => 'active' });
291 }
292 }
293
294 } elsif ($state eq 'lost_agent_lock') {
295
296 if (!$fence_request && $haenv->quorate()) {
297 if ($self->get_protected_ha_agent_lock()) {
298 $self->set_local_status({ state => 'active' });
299 }
300 }
301
302 } elsif ($state eq 'active') {
303
304 if ($fence_request) {
305 $haenv->log('err', "node need to be fenced - releasing agent_lock\n");
306 $self->set_local_status({ state => 'lost_agent_lock'});
307 } elsif (!$self->get_protected_ha_agent_lock()) {
308 $self->set_local_status({ state => 'lost_agent_lock'});
309 }
310 }
311
312 $status = $self->get_local_status();
313 $state = $status->{state};
314
315 # do work
316
317 if ($state eq 'wait_for_agent_lock') {
318
319 return 0 if $self->{shutdown_request};
320
321 $self->update_lrm_status();
322
323 $haenv->sleep(5);
324
325 } elsif ($state eq 'active') {
326
327 my $startime = $haenv->get_time();
328
329 my $max_time = 10;
330
331 my $shutdown = 0;
332
333 # do work (max_time seconds)
334 eval {
335 # fixme: set alert timer
336
337 # if we could not get the current service status there's no point
338 # in doing anything, try again next round.
339 return if !$self->update_service_status();
340
341 if ($self->{shutdown_request}) {
342
343 if ($self->{mode} eq 'restart') {
344
345 my $service_count = $self->active_service_count();
346
347 if ($service_count == 0) {
348
349 if ($self->run_workers() == 0) {
350 if ($self->{ha_agent_wd}) {
351 $haenv->watchdog_close($self->{ha_agent_wd});
352 delete $self->{ha_agent_wd};
353 }
354
355 $shutdown = 1;
356
357 # restart with no or freezed services, release the lock
358 $haenv->release_ha_agent_lock();
359 }
360 }
361 } else {
362
363 if ($self->run_workers() == 0) {
364 if ($self->{shutdown_errors} == 0) {
365 if ($self->{ha_agent_wd}) {
366 $haenv->watchdog_close($self->{ha_agent_wd});
367 delete $self->{ha_agent_wd};
368 }
369
370 # shutdown with all services stopped thus release the lock
371 $haenv->release_ha_agent_lock();
372 }
373
374 $shutdown = 1;
375 }
376 }
377 } else {
378 if (!$self->{cluster_state_update}) {
379 # update failed but we could still renew our lock (cfs restart?),
380 # safely skip manage and expect to update just fine next round
381 $haenv->log('notice', "temporary inconsistent cluster state " .
382 "(cfs restart?), skip round");
383 return;
384 }
385
386 $self->manage_resources();
387
388 }
389 };
390 if (my $err = $@) {
391 $haenv->log('err', "got unexpected error - $err");
392 }
393
394 $self->update_lrm_status();
395
396 return 0 if $shutdown;
397
398 $haenv->sleep_until($startime + $max_time);
399
400 } elsif ($state eq 'lost_agent_lock') {
401
402 # Note: watchdog is active an will triger soon!
403
404 # so we hope to get the lock back soon!
405
406 if ($self->{shutdown_request}) {
407
408 my $service_count = $self->active_service_count();
409
410 if ($service_count > 0) {
411 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
412 "detected $service_count running services");
413
414 if ($self->{mode} eq 'restart') {
415 my $state_mt = $self->{status}->{state_change_time};
416
417 # watchdog should have already triggered, so either it's set
418 # set to noboot or it failed. As we are in restart mode, and
419 # have infinity stoptimeout -> exit now - we don't touch services
420 # or change state, so this is save, relatively speaking
421 if (($haenv->get_time() - $state_mt) > 90) {
422 $haenv->log('err', "lost agent lock and restart request for over 90 seconds - giving up!");
423 return 0;
424 }
425 }
426 } else {
427
428 # all services are stopped, so we can close the watchdog
429
430 if ($self->{ha_agent_wd}) {
431 $haenv->watchdog_close($self->{ha_agent_wd});
432 delete $self->{ha_agent_wd};
433 }
434
435 return 0;
436 }
437 }
438
439 $haenv->sleep(5);
440
441 } else {
442
443 die "got unexpected status '$state'\n";
444
445 }
446
447 return 1;
448 }
449
450 sub run_workers {
451 my ($self) = @_;
452
453 my $haenv = $self->{haenv};
454
455 my $starttime = $haenv->get_time();
456
457 # number of workers to start, if 0 we exec the command directly witouth forking
458 my $max_workers = $haenv->get_max_workers();
459
460 my $sc = $haenv->read_service_config();
461
462 while (($haenv->get_time() - $starttime) < 5) {
463 my $count = $self->check_active_workers();
464
465 foreach my $sid (sort keys %{$self->{workers}}) {
466 last if $count >= $max_workers && $max_workers > 0;
467
468 my $w = $self->{workers}->{$sid};
469 if (!$w->{pid}) {
470 # only fork if we may else call exec_resource_agent
471 # directly (e.g. for regression tests)
472 if ($max_workers > 0) {
473 my $pid = fork();
474 if (!defined($pid)) {
475 $haenv->log('err', "fork worker failed");
476 $count = 0; last; # abort, try later
477 } elsif ($pid == 0) {
478 $haenv->after_fork(); # cleanup
479
480 # do work
481 my $res = -1;
482 eval {
483 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
484 };
485 if (my $err = $@) {
486 $haenv->log('err', $err);
487 POSIX::_exit(-1);
488 }
489 POSIX::_exit($res);
490 } else {
491 $count++;
492 $w->{pid} = $pid;
493 }
494 } else {
495 my $res = -1;
496 eval {
497 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
498 $res = $res << 8 if $res > 0;
499 };
500 if (my $err = $@) {
501 $haenv->log('err', $err);
502 }
503 if (defined($w->{uid})) {
504 $self->resource_command_finished($sid, $w->{uid}, $res);
505 } else {
506 $self->stop_command_finished($sid, $res);
507 }
508 }
509 }
510 }
511
512 last if !$count;
513
514 $haenv->sleep(1);
515 }
516
517 return scalar(keys %{$self->{workers}});
518 }
519
520 sub manage_resources {
521 my ($self) = @_;
522
523 my $haenv = $self->{haenv};
524
525 my $nodename = $haenv->nodename();
526
527 my $ss = $self->{service_status};
528
529 foreach my $sid (keys %{$self->{restart_tries}}) {
530 delete $self->{restart_tries}->{$sid} if !$ss->{$sid};
531 }
532
533 foreach my $sid (keys %$ss) {
534 my $sd = $ss->{$sid};
535 next if !$sd->{node};
536 next if !$sd->{uid};
537 next if $sd->{node} ne $nodename;
538 my $req_state = $sd->{state};
539 next if !defined($req_state);
540 next if $req_state eq 'freeze';
541 $self->queue_resource_command($sid, $sd->{uid}, $req_state, $sd->{target});
542 }
543
544 return $self->run_workers();
545 }
546
547 sub queue_resource_command {
548 my ($self, $sid, $uid, $state, $target) = @_;
549
550 # do not queue the excatly same command twice as this may lead to
551 # an inconsistent HA state when the first command fails but the CRM
552 # does not process its failure right away and the LRM starts a second
553 # try, without the CRM knowing of it (race condition)
554 # The 'stopped' command is an exception as we do not process its result
555 # in the CRM and we want to execute it always (even with no active CRM)
556 return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid});
557
558 if (my $w = $self->{workers}->{$sid}) {
559 return if $w->{pid}; # already started
560 # else, delete and overwrite queue entry with new command
561 delete $self->{workers}->{$sid};
562 }
563
564 $self->{workers}->{$sid} = {
565 sid => $sid,
566 uid => $uid,
567 state => $state,
568 };
569
570 $self->{workers}->{$sid}->{target} = $target if $target;
571 }
572
573 sub check_active_workers {
574 my ($self) = @_;
575
576 # finish/count workers
577 my $count = 0;
578 foreach my $sid (keys %{$self->{workers}}) {
579 my $w = $self->{workers}->{$sid};
580 if (my $pid = $w->{pid}) {
581 # check status
582 my $waitpid = waitpid($pid, WNOHANG);
583 if (defined($waitpid) && ($waitpid == $pid)) {
584 if (defined($w->{uid})) {
585 $self->resource_command_finished($sid, $w->{uid}, $?);
586 } else {
587 $self->stop_command_finished($sid, $?);
588 }
589 } else {
590 $count++;
591 }
592 }
593 }
594
595 return $count;
596 }
597
598 sub stop_command_finished {
599 my ($self, $sid, $status) = @_;
600
601 my $haenv = $self->{haenv};
602
603 my $w = delete $self->{workers}->{$sid};
604 return if !$w; # should not happen
605
606 my $exit_code = -1;
607
608 if ($status == -1) {
609 $haenv->log('err', "resource agent $sid finished - failed to execute");
610 } elsif (my $sig = ($status & 127)) {
611 $haenv->log('err', "resource agent $sid finished - got signal $sig");
612 } else {
613 $exit_code = ($status >> 8);
614 }
615
616 if ($exit_code != 0) {
617 $self->{shutdown_errors}++;
618 }
619 }
620
621 sub resource_command_finished {
622 my ($self, $sid, $uid, $status) = @_;
623
624 my $haenv = $self->{haenv};
625
626 my $w = delete $self->{workers}->{$sid};
627 return if !$w; # should not happen
628
629 my $exit_code = -1;
630
631 if ($status == -1) {
632 $haenv->log('err', "resource agent $sid finished - failed to execute");
633 } elsif (my $sig = ($status & 127)) {
634 $haenv->log('err', "resource agent $sid finished - got signal $sig");
635 } else {
636 $exit_code = ($status >> 8);
637 }
638
639 $exit_code = $self->handle_service_exitcode($sid, $w->{state}, $exit_code);
640
641 return if $exit_code == ETRY_AGAIN; # tell nobody, simply retry
642
643 $self->{results}->{$uid} = {
644 sid => $w->{sid},
645 state => $w->{state},
646 exit_code => $exit_code,
647 };
648
649 my $ss = $self->{service_status};
650
651 # compute hash of valid/existing uids
652 my $valid_uids = {};
653 foreach my $sid (keys %$ss) {
654 my $sd = $ss->{$sid};
655 next if !$sd->{uid};
656 $valid_uids->{$sd->{uid}} = 1;
657 }
658
659 my $results = {};
660 foreach my $id (keys %{$self->{results}}) {
661 next if !$valid_uids->{$id};
662 $results->{$id} = $self->{results}->{$id};
663 }
664 $self->{results} = $results;
665 }
666
667 # processes the exit code from a finished resource agent, so that the CRM knows
668 # if the LRM wants to retry an action based on the current recovery policies for
669 # the failed service, or the CRM itself must try to recover from the failure.
670 sub handle_service_exitcode {
671 my ($self, $sid, $cmd, $exit_code) = @_;
672
673 my $haenv = $self->{haenv};
674 my $tries = $self->{restart_tries};
675
676 my $sc = $haenv->read_service_config();
677
678 my $max_restart = 0;
679
680 if (my $cd = $sc->{$sid}) {
681 $max_restart = $cd->{max_restart};
682 }
683
684 if ($cmd eq 'started') {
685
686 if ($exit_code == SUCCESS) {
687
688 $tries->{$sid} = 0;
689
690 return $exit_code;
691
692 } elsif ($exit_code == ERROR) {
693
694 $tries->{$sid} = 0 if !defined($tries->{$sid});
695
696 if ($tries->{$sid} >= $max_restart) {
697 $haenv->log('err', "unable to start service $sid on local node".
698 " after $tries->{$sid} retries");
699 $tries->{$sid} = 0;
700 return ERROR;
701 }
702
703 $tries->{$sid}++;
704
705 $haenv->log('warning', "restart policy: retry number $tries->{$sid}" .
706 " for service '$sid'");
707 # tell CRM that we retry the start
708 return ETRY_AGAIN;
709 }
710 }
711
712 return $exit_code;
713
714 }
715
716 sub exec_resource_agent {
717 my ($self, $sid, $service_config, $cmd, @params) = @_;
718
719 # setup execution environment
720
721 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
722
723 my $haenv = $self->{haenv};
724
725 my $nodename = $haenv->nodename();
726
727 my (undef, $service_type, $service_name) = $haenv->parse_sid($sid);
728
729 my $plugin = PVE::HA::Resources->lookup($service_type);
730 if (!$plugin) {
731 $haenv->log('err', "service type '$service_type' not implemented");
732 return EUNKNOWN_SERVICE_TYPE;
733 }
734
735 if (!$service_config) {
736 $haenv->log('err', "missing resource configuration for '$sid'");
737 return EUNKNOWN_SERVICE;
738 }
739
740 # process error state early
741 if ($cmd eq 'error') {
742
743 $haenv->log('err', "service $sid is in an error state and needs manual " .
744 "intervention. Look up 'ERROR RECOVERY' in the documentation.");
745
746 return SUCCESS; # error always succeeds
747 }
748
749 if ($service_config->{node} ne $nodename) {
750 $haenv->log('err', "service '$sid' not on this node");
751 return EWRONG_NODE;
752 }
753
754 my $id = $service_name;
755
756 my $running = $plugin->check_running($haenv, $id);
757
758 if ($cmd eq 'started') {
759
760 return SUCCESS if $running;
761
762 $haenv->log("info", "starting service $sid");
763
764 $plugin->start($haenv, $id);
765
766 $running = $plugin->check_running($haenv, $id);
767
768 if ($running) {
769 $haenv->log("info", "service status $sid started");
770 return SUCCESS;
771 } else {
772 $haenv->log("warning", "unable to start service $sid");
773 return ERROR;
774 }
775
776 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
777
778 return SUCCESS if !$running;
779
780 $haenv->log("info", "stopping service $sid");
781
782 $plugin->shutdown($haenv, $id);
783
784 $running = $plugin->check_running($haenv, $id);
785
786 if (!$running) {
787 $haenv->log("info", "service status $sid stopped");
788 return SUCCESS;
789 } else {
790 $haenv->log("info", "unable to stop stop service $sid (still running)");
791 return ERROR;
792 }
793
794 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
795
796 my $target = $params[0];
797 if (!defined($target)) {
798 die "$cmd '$sid' failed - missing target\n" if !defined($target);
799 return EINVALID_PARAMETER;
800 }
801
802 if ($service_config->{node} eq $target) {
803 # already there
804 return SUCCESS;
805 }
806
807 my $online = ($cmd eq 'migrate') ? 1 : 0;
808
809 my $res = $plugin->migrate($haenv, $id, $target, $online);
810
811 # something went wrong if service is still on this node
812 if (!$res) {
813 $haenv->log("err", "service $sid not moved (migration error)");
814 return ERROR;
815 }
816
817 return SUCCESS;
818
819 }
820
821 $haenv->log("err", "implement me (cmd '$cmd')");
822 return EUNKNOWN_COMMAND;
823 }
824
825
826 1;