]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/LRM.pm
lrm: exit on restart and agent lock lost for > 90s
[pve-ha-manager.git] / src / PVE / HA / LRM.pm
1 package PVE::HA::LRM;
2
3 # Local Resource Manager
4
5 use strict;
6 use warnings;
7 use POSIX qw(:sys_wait_h);
8
9 use PVE::SafeSyslog;
10 use PVE::Tools;
11 use PVE::HA::Tools ':exit_codes';
12 use PVE::HA::Resources;
13
14 # Server can have several states:
15
16 my $valid_states = {
17 wait_for_agent_lock => "waiting for agent lock",
18 active => "got agent_lock",
19 lost_agent_lock => "lost agent_lock",
20 };
21
22 sub new {
23 my ($this, $haenv) = @_;
24
25 my $class = ref($this) || $this;
26
27 my $self = bless {
28 haenv => $haenv,
29 status => { state => 'startup' },
30 workers => {},
31 results => {},
32 restart_tries => {},
33 shutdown_request => 0,
34 shutdown_errors => 0,
35 # mode can be: active, reboot, shutdown, restart
36 mode => 'active',
37 cluster_state_update => 0,
38 }, $class;
39
40 $self->set_local_status({ state => 'wait_for_agent_lock' });
41
42 return $self;
43 }
44
45 sub shutdown_request {
46 my ($self) = @_;
47
48 return if $self->{shutdown_request}; # already in shutdown mode
49
50 my $haenv = $self->{haenv};
51
52 my $nodename = $haenv->nodename();
53
54 my ($shutdown, $reboot) = $haenv->is_node_shutdown();
55
56 my $dc_ha_cfg = $haenv->get_ha_settings();
57 my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
58
59 if ($shutdown) { # don't log this on service restart, only on node shutdown
60 $haenv->log('info', "got shutdown request with shutdown policy '$shutdown_policy'");
61 }
62
63 my $freeze_all;
64 if ($shutdown_policy eq 'conditional') {
65 $freeze_all = $reboot;
66 } elsif ($shutdown_policy eq 'freeze') {
67 $freeze_all = 1;
68 } elsif ($shutdown_policy eq 'failover') {
69 $freeze_all = 0;
70 } else {
71 $haenv->log('err', "unknown shutdown policy '$shutdown_policy', fall back to conditional");
72 $freeze_all = $reboot;
73 }
74
75 if ($shutdown) {
76 # *always* queue stop jobs for all services if the node shuts down,
77 # independent if it's a reboot or a poweroff, else we may corrupt
78 # services or hinder node shutdown
79 my $ss = $self->{service_status};
80
81 foreach my $sid (keys %$ss) {
82 my $sd = $ss->{$sid};
83 next if !$sd->{node};
84 next if $sd->{node} ne $nodename;
85 # Note: use undef uid to mark shutdown/stop jobs
86 $self->queue_resource_command($sid, undef, 'request_stop');
87 }
88 }
89
90 if ($shutdown) {
91 if ($freeze_all) {
92 if ($reboot) {
93 $haenv->log('info', "reboot LRM, stop and freeze all services");
94 } else {
95 $haenv->log('info', "shutdown LRM, stop and freeze all services");
96 }
97 $self->{mode} = 'restart';
98 } else {
99 $haenv->log('info', "shutdown LRM, stop all services");
100 $self->{mode} = 'shutdown';
101 }
102 } else {
103 $haenv->log('info', "restart LRM, freeze all services");
104 $self->{mode} = 'restart';
105 }
106
107 $self->{shutdown_request} = 1;
108
109 eval { $self->update_lrm_status(); };
110 if (my $err = $@) {
111 $self->log('err', "unable to update lrm status file - $err");
112 }
113 }
114
115 sub get_local_status {
116 my ($self) = @_;
117
118 return $self->{status};
119 }
120
121 sub set_local_status {
122 my ($self, $new) = @_;
123
124 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
125
126 my $haenv = $self->{haenv};
127
128 my $old = $self->{status};
129
130 # important: only update if if really changed
131 return if $old->{state} eq $new->{state};
132
133 $haenv->log('info', "status change $old->{state} => $new->{state}");
134
135 $new->{state_change_time} = $haenv->get_time();
136
137 $self->{status} = $new;
138 }
139
140 sub update_lrm_status {
141 my ($self) = @_;
142
143 my $haenv = $self->{haenv};
144
145 return 0 if !$haenv->quorate();
146
147 my $lrm_status = {
148 state => $self->{status}->{state},
149 mode => $self->{mode},
150 results => $self->{results},
151 timestamp => $haenv->get_time(),
152 };
153
154 eval { $haenv->write_lrm_status($lrm_status); };
155 if (my $err = $@) {
156 $haenv->log('err', "unable to write lrm status file - $err");
157 return 0;
158 }
159
160 return 1;
161 }
162
163 sub update_service_status {
164 my ($self) = @_;
165
166 my $haenv = $self->{haenv};
167
168 my $ms = eval { $haenv->read_manager_status(); };
169 if (my $err = $@) {
170 $haenv->log('err', "updating service status from manager failed: $err");
171 return undef;
172 } else {
173 $self->{service_status} = $ms->{service_status} || {};
174 return 1;
175 }
176 }
177
178 sub get_protected_ha_agent_lock {
179 my ($self) = @_;
180
181 my $haenv = $self->{haenv};
182
183 my $count = 0;
184 my $starttime = $haenv->get_time();
185
186 for (;;) {
187
188 if ($haenv->get_ha_agent_lock()) {
189 if ($self->{ha_agent_wd}) {
190 $haenv->watchdog_update($self->{ha_agent_wd});
191 } else {
192 my $wfh = $haenv->watchdog_open();
193 $self->{ha_agent_wd} = $wfh;
194 }
195 return 1;
196 }
197
198 last if ++$count > 5; # try max 5 time
199
200 my $delay = $haenv->get_time() - $starttime;
201 last if $delay > 5; # for max 5 seconds
202
203 $haenv->sleep(1);
204 }
205
206 return 0;
207 }
208
209 sub active_service_count {
210 my ($self) = @_;
211
212 my $haenv = $self->{haenv};
213
214 my $nodename = $haenv->nodename();
215
216 my $ss = $self->{service_status};
217
218 my $count = 0;
219
220 foreach my $sid (keys %$ss) {
221 my $sd = $ss->{$sid};
222 next if !$sd->{node};
223 next if $sd->{node} ne $nodename;
224 my $req_state = $sd->{state};
225 next if !defined($req_state);
226 next if $req_state eq 'stopped';
227 next if $req_state eq 'freeze';
228 # erroneous services are not managed by HA, don't count them as active
229 next if $req_state eq 'error';
230
231 $count++;
232 }
233
234 return $count;
235 }
236
237 my $wrote_lrm_status_at_startup = 0;
238
239 sub do_one_iteration {
240 my ($self) = @_;
241
242 my $haenv = $self->{haenv};
243
244 $haenv->loop_start_hook();
245
246 $self->{cluster_state_update} = $haenv->cluster_state_update();
247
248 my $res = $self->work();
249
250 $haenv->loop_end_hook();
251
252 return $res;
253 }
254
255 sub work {
256 my ($self) = @_;
257
258 my $haenv = $self->{haenv};
259
260 if (!$wrote_lrm_status_at_startup) {
261 if ($self->update_lrm_status()) {
262 $wrote_lrm_status_at_startup = 1;
263 } else {
264 # do nothing
265 $haenv->sleep(5);
266 return $self->{shutdown_request} ? 0 : 1;
267 }
268 }
269
270 my $status = $self->get_local_status();
271 my $state = $status->{state};
272
273 $self->update_service_status();
274
275 my $fence_request = PVE::HA::Tools::count_fenced_services($self->{service_status}, $haenv->nodename());
276
277 # do state changes first
278
279 my $ctime = $haenv->get_time();
280
281 if ($state eq 'wait_for_agent_lock') {
282
283 my $service_count = $self->active_service_count();
284
285 if (!$fence_request && $service_count && $haenv->quorate()) {
286 if ($self->get_protected_ha_agent_lock()) {
287 $self->set_local_status({ state => 'active' });
288 }
289 }
290
291 } elsif ($state eq 'lost_agent_lock') {
292
293 if (!$fence_request && $haenv->quorate()) {
294 if ($self->get_protected_ha_agent_lock()) {
295 $self->set_local_status({ state => 'active' });
296 }
297 }
298
299 } elsif ($state eq 'active') {
300
301 if ($fence_request) {
302 $haenv->log('err', "node need to be fenced - releasing agent_lock\n");
303 $self->set_local_status({ state => 'lost_agent_lock'});
304 } elsif (!$self->get_protected_ha_agent_lock()) {
305 $self->set_local_status({ state => 'lost_agent_lock'});
306 }
307 }
308
309 $status = $self->get_local_status();
310 $state = $status->{state};
311
312 # do work
313
314 if ($state eq 'wait_for_agent_lock') {
315
316 return 0 if $self->{shutdown_request};
317
318 $self->update_lrm_status();
319
320 $haenv->sleep(5);
321
322 } elsif ($state eq 'active') {
323
324 my $startime = $haenv->get_time();
325
326 my $max_time = 10;
327
328 my $shutdown = 0;
329
330 # do work (max_time seconds)
331 eval {
332 # fixme: set alert timer
333
334 # if we could not get the current service status there's no point
335 # in doing anything, try again next round.
336 return if !$self->update_service_status();
337
338 if ($self->{shutdown_request}) {
339
340 if ($self->{mode} eq 'restart') {
341
342 my $service_count = $self->active_service_count();
343
344 if ($service_count == 0) {
345
346 if ($self->run_workers() == 0) {
347 if ($self->{ha_agent_wd}) {
348 $haenv->watchdog_close($self->{ha_agent_wd});
349 delete $self->{ha_agent_wd};
350 }
351
352 $shutdown = 1;
353
354 # restart with no or freezed services, release the lock
355 $haenv->release_ha_agent_lock();
356 }
357 }
358 } else {
359
360 if ($self->run_workers() == 0) {
361 if ($self->{shutdown_errors} == 0) {
362 if ($self->{ha_agent_wd}) {
363 $haenv->watchdog_close($self->{ha_agent_wd});
364 delete $self->{ha_agent_wd};
365 }
366
367 # shutdown with all services stopped thus release the lock
368 $haenv->release_ha_agent_lock();
369 }
370
371 $shutdown = 1;
372 }
373 }
374 } else {
375 if (!$self->{cluster_state_update}) {
376 # update failed but we could still renew our lock (cfs restart?),
377 # safely skip manage and expect to update just fine next round
378 $haenv->log('notice', "temporary inconsistent cluster state " .
379 "(cfs restart?), skip round");
380 return;
381 }
382
383 $self->manage_resources();
384
385 }
386 };
387 if (my $err = $@) {
388 $haenv->log('err', "got unexpected error - $err");
389 }
390
391 $self->update_lrm_status();
392
393 return 0 if $shutdown;
394
395 $haenv->sleep_until($startime + $max_time);
396
397 } elsif ($state eq 'lost_agent_lock') {
398
399 # Note: watchdog is active an will triger soon!
400
401 # so we hope to get the lock back soon!
402
403 if ($self->{shutdown_request}) {
404
405 my $service_count = $self->active_service_count();
406
407 if ($service_count > 0) {
408 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
409 "detected $service_count running services");
410
411 if ($self->{mode} eq 'restart') {
412 my $state_mt = $self->{status}->{state_change_time};
413
414 # watchdog should have already triggered, so either it's set
415 # set to noboot or it failed. As we are in restart mode, and
416 # have infinity stoptimeout -> exit now - we don't touch services
417 # or change state, so this is save, relatively speaking
418 if (($haenv->get_time() - $state_mt) > 90) {
419 $haenv->log('err', "lost agent lock and restart request for over 90 seconds - giving up!");
420 return 0;
421 }
422 }
423 } else {
424
425 # all services are stopped, so we can close the watchdog
426
427 if ($self->{ha_agent_wd}) {
428 $haenv->watchdog_close($self->{ha_agent_wd});
429 delete $self->{ha_agent_wd};
430 }
431
432 return 0;
433 }
434 }
435
436 $haenv->sleep(5);
437
438 } else {
439
440 die "got unexpected status '$state'\n";
441
442 }
443
444 return 1;
445 }
446
447 sub run_workers {
448 my ($self) = @_;
449
450 my $haenv = $self->{haenv};
451
452 my $starttime = $haenv->get_time();
453
454 # number of workers to start, if 0 we exec the command directly witouth forking
455 my $max_workers = $haenv->get_max_workers();
456
457 my $sc = $haenv->read_service_config();
458
459 while (($haenv->get_time() - $starttime) < 5) {
460 my $count = $self->check_active_workers();
461
462 foreach my $sid (sort keys %{$self->{workers}}) {
463 last if $count >= $max_workers && $max_workers > 0;
464
465 my $w = $self->{workers}->{$sid};
466 if (!$w->{pid}) {
467 # only fork if we may else call exec_resource_agent
468 # directly (e.g. for regression tests)
469 if ($max_workers > 0) {
470 my $pid = fork();
471 if (!defined($pid)) {
472 $haenv->log('err', "fork worker failed");
473 $count = 0; last; # abort, try later
474 } elsif ($pid == 0) {
475 $haenv->after_fork(); # cleanup
476
477 # do work
478 my $res = -1;
479 eval {
480 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
481 };
482 if (my $err = $@) {
483 $haenv->log('err', $err);
484 POSIX::_exit(-1);
485 }
486 POSIX::_exit($res);
487 } else {
488 $count++;
489 $w->{pid} = $pid;
490 }
491 } else {
492 my $res = -1;
493 eval {
494 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
495 $res = $res << 8 if $res > 0;
496 };
497 if (my $err = $@) {
498 $haenv->log('err', $err);
499 }
500 if (defined($w->{uid})) {
501 $self->resource_command_finished($sid, $w->{uid}, $res);
502 } else {
503 $self->stop_command_finished($sid, $res);
504 }
505 }
506 }
507 }
508
509 last if !$count;
510
511 $haenv->sleep(1);
512 }
513
514 return scalar(keys %{$self->{workers}});
515 }
516
517 sub manage_resources {
518 my ($self) = @_;
519
520 my $haenv = $self->{haenv};
521
522 my $nodename = $haenv->nodename();
523
524 my $ss = $self->{service_status};
525
526 foreach my $sid (keys %{$self->{restart_tries}}) {
527 delete $self->{restart_tries}->{$sid} if !$ss->{$sid};
528 }
529
530 foreach my $sid (keys %$ss) {
531 my $sd = $ss->{$sid};
532 next if !$sd->{node};
533 next if !$sd->{uid};
534 next if $sd->{node} ne $nodename;
535 my $req_state = $sd->{state};
536 next if !defined($req_state);
537 next if $req_state eq 'freeze';
538 $self->queue_resource_command($sid, $sd->{uid}, $req_state, $sd->{target});
539 }
540
541 return $self->run_workers();
542 }
543
544 sub queue_resource_command {
545 my ($self, $sid, $uid, $state, $target) = @_;
546
547 # do not queue the excatly same command twice as this may lead to
548 # an inconsistent HA state when the first command fails but the CRM
549 # does not process its failure right away and the LRM starts a second
550 # try, without the CRM knowing of it (race condition)
551 # The 'stopped' command is an exception as we do not process its result
552 # in the CRM and we want to execute it always (even with no active CRM)
553 return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid});
554
555 if (my $w = $self->{workers}->{$sid}) {
556 return if $w->{pid}; # already started
557 # else, delete and overwrite queue entry with new command
558 delete $self->{workers}->{$sid};
559 }
560
561 $self->{workers}->{$sid} = {
562 sid => $sid,
563 uid => $uid,
564 state => $state,
565 };
566
567 $self->{workers}->{$sid}->{target} = $target if $target;
568 }
569
570 sub check_active_workers {
571 my ($self) = @_;
572
573 # finish/count workers
574 my $count = 0;
575 foreach my $sid (keys %{$self->{workers}}) {
576 my $w = $self->{workers}->{$sid};
577 if (my $pid = $w->{pid}) {
578 # check status
579 my $waitpid = waitpid($pid, WNOHANG);
580 if (defined($waitpid) && ($waitpid == $pid)) {
581 if (defined($w->{uid})) {
582 $self->resource_command_finished($sid, $w->{uid}, $?);
583 } else {
584 $self->stop_command_finished($sid, $?);
585 }
586 } else {
587 $count++;
588 }
589 }
590 }
591
592 return $count;
593 }
594
595 sub stop_command_finished {
596 my ($self, $sid, $status) = @_;
597
598 my $haenv = $self->{haenv};
599
600 my $w = delete $self->{workers}->{$sid};
601 return if !$w; # should not happen
602
603 my $exit_code = -1;
604
605 if ($status == -1) {
606 $haenv->log('err', "resource agent $sid finished - failed to execute");
607 } elsif (my $sig = ($status & 127)) {
608 $haenv->log('err', "resource agent $sid finished - got signal $sig");
609 } else {
610 $exit_code = ($status >> 8);
611 }
612
613 if ($exit_code != 0) {
614 $self->{shutdown_errors}++;
615 }
616 }
617
618 sub resource_command_finished {
619 my ($self, $sid, $uid, $status) = @_;
620
621 my $haenv = $self->{haenv};
622
623 my $w = delete $self->{workers}->{$sid};
624 return if !$w; # should not happen
625
626 my $exit_code = -1;
627
628 if ($status == -1) {
629 $haenv->log('err', "resource agent $sid finished - failed to execute");
630 } elsif (my $sig = ($status & 127)) {
631 $haenv->log('err', "resource agent $sid finished - got signal $sig");
632 } else {
633 $exit_code = ($status >> 8);
634 }
635
636 $exit_code = $self->handle_service_exitcode($sid, $w->{state}, $exit_code);
637
638 return if $exit_code == ETRY_AGAIN; # tell nobody, simply retry
639
640 $self->{results}->{$uid} = {
641 sid => $w->{sid},
642 state => $w->{state},
643 exit_code => $exit_code,
644 };
645
646 my $ss = $self->{service_status};
647
648 # compute hash of valid/existing uids
649 my $valid_uids = {};
650 foreach my $sid (keys %$ss) {
651 my $sd = $ss->{$sid};
652 next if !$sd->{uid};
653 $valid_uids->{$sd->{uid}} = 1;
654 }
655
656 my $results = {};
657 foreach my $id (keys %{$self->{results}}) {
658 next if !$valid_uids->{$id};
659 $results->{$id} = $self->{results}->{$id};
660 }
661 $self->{results} = $results;
662 }
663
664 # processes the exit code from a finished resource agent, so that the CRM knows
665 # if the LRM wants to retry an action based on the current recovery policies for
666 # the failed service, or the CRM itself must try to recover from the failure.
667 sub handle_service_exitcode {
668 my ($self, $sid, $cmd, $exit_code) = @_;
669
670 my $haenv = $self->{haenv};
671 my $tries = $self->{restart_tries};
672
673 my $sc = $haenv->read_service_config();
674
675 my $max_restart = 0;
676
677 if (my $cd = $sc->{$sid}) {
678 $max_restart = $cd->{max_restart};
679 }
680
681 if ($cmd eq 'started') {
682
683 if ($exit_code == SUCCESS) {
684
685 $tries->{$sid} = 0;
686
687 return $exit_code;
688
689 } elsif ($exit_code == ERROR) {
690
691 $tries->{$sid} = 0 if !defined($tries->{$sid});
692
693 if ($tries->{$sid} >= $max_restart) {
694 $haenv->log('err', "unable to start service $sid on local node".
695 " after $tries->{$sid} retries");
696 $tries->{$sid} = 0;
697 return ERROR;
698 }
699
700 $tries->{$sid}++;
701
702 $haenv->log('warning', "restart policy: retry number $tries->{$sid}" .
703 " for service '$sid'");
704 # tell CRM that we retry the start
705 return ETRY_AGAIN;
706 }
707 }
708
709 return $exit_code;
710
711 }
712
713 sub exec_resource_agent {
714 my ($self, $sid, $service_config, $cmd, @params) = @_;
715
716 # setup execution environment
717
718 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
719
720 my $haenv = $self->{haenv};
721
722 my $nodename = $haenv->nodename();
723
724 my (undef, $service_type, $service_name) = $haenv->parse_sid($sid);
725
726 my $plugin = PVE::HA::Resources->lookup($service_type);
727 if (!$plugin) {
728 $haenv->log('err', "service type '$service_type' not implemented");
729 return EUNKNOWN_SERVICE_TYPE;
730 }
731
732 if (!$service_config) {
733 $haenv->log('err', "missing resource configuration for '$sid'");
734 return EUNKNOWN_SERVICE;
735 }
736
737 # process error state early
738 if ($cmd eq 'error') {
739
740 $haenv->log('err', "service $sid is in an error state and needs manual " .
741 "intervention. Look up 'ERROR RECOVERY' in the documentation.");
742
743 return SUCCESS; # error always succeeds
744 }
745
746 if ($service_config->{node} ne $nodename) {
747 $haenv->log('err', "service '$sid' not on this node");
748 return EWRONG_NODE;
749 }
750
751 my $id = $service_name;
752
753 my $running = $plugin->check_running($haenv, $id);
754
755 if ($cmd eq 'started') {
756
757 return SUCCESS if $running;
758
759 $haenv->log("info", "starting service $sid");
760
761 $plugin->start($haenv, $id);
762
763 $running = $plugin->check_running($haenv, $id);
764
765 if ($running) {
766 $haenv->log("info", "service status $sid started");
767 return SUCCESS;
768 } else {
769 $haenv->log("warning", "unable to start service $sid");
770 return ERROR;
771 }
772
773 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
774
775 return SUCCESS if !$running;
776
777 $haenv->log("info", "stopping service $sid");
778
779 $plugin->shutdown($haenv, $id);
780
781 $running = $plugin->check_running($haenv, $id);
782
783 if (!$running) {
784 $haenv->log("info", "service status $sid stopped");
785 return SUCCESS;
786 } else {
787 $haenv->log("info", "unable to stop stop service $sid (still running)");
788 return ERROR;
789 }
790
791 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
792
793 my $target = $params[0];
794 if (!defined($target)) {
795 die "$cmd '$sid' failed - missing target\n" if !defined($target);
796 return EINVALID_PARAMETER;
797 }
798
799 if ($service_config->{node} eq $target) {
800 # already there
801 return SUCCESS;
802 }
803
804 my $online = ($cmd eq 'migrate') ? 1 : 0;
805
806 my $res = $plugin->migrate($haenv, $id, $target, $online);
807
808 # something went wrong if service is still on this node
809 if (!$res) {
810 $haenv->log("err", "service $sid not moved (migration error)");
811 return ERROR;
812 }
813
814 return SUCCESS;
815
816 }
817
818 $haenv->log("err", "implement me (cmd '$cmd')");
819 return EUNKNOWN_COMMAND;
820 }
821
822
823 1;