]> git.proxmox.com Git - pve-ha-manager.git/blame - PVE/HA/CRM.pm
fixup watchdog to cleanly compile with systemd
[pve-ha-manager.git] / PVE / HA / CRM.pm
CommitLineData
6cd38bc6
DM
1package PVE::HA::CRM;
2
3# Cluster Resource Manager
f25a336a
DM
4
5use strict;
6use warnings;
7
8use PVE::SafeSyslog;
9use PVE::Tools;
17654a06 10use PVE::HA::Tools;
f25a336a
DM
11
12use PVE::HA::Manager;
13
14# Server can have several state:
f25a336a
DM
15
16my $valid_states = {
b6044542
DM
17 wait_for_quorum => "cluster is not quorate, waiting",
18 master => "quorate, and we got the ha_manager lock",
19 lost_manager_lock => "we lost the ha_manager lock (watchgog active)",
20 slave => "quorate, but we do not own the ha_manager lock",
f25a336a
DM
21};
22
23sub new {
24 my ($this, $haenv) = @_;
25
26 my $class = ref($this) || $this;
27
28 my $self = bless {
29 haenv => $haenv,
30 manager => undef,
b6044542 31 status => { state => 'startup' },
f25a336a
DM
32 }, $class;
33
b6044542 34 $self->set_local_status({ state => 'wait_for_quorum' });
f25a336a
DM
35
36 return $self;
37}
38
b6044542
DM
39sub shutdown_request {
40 my ($self) = @_;
41
378af518
DM
42 syslog('info' , "server received shutdown request")
43 if !$self->{shutdown_request};
44
b6044542
DM
45 $self->{shutdown_request} = 1;
46}
47
f25a336a
DM
48sub get_local_status {
49 my ($self) = @_;
50
51 return $self->{status};
52}
53
54sub set_local_status {
17654a06 55 my ($self, $new) = @_;
f25a336a 56
17654a06 57 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
f25a336a
DM
58
59 my $haenv = $self->{haenv};
60
17654a06 61 my $old = $self->{status};
f25a336a 62
b6044542
DM
63 # important: only update if if really changed
64 return if $old->{state} eq $new->{state};
f25a336a 65
0bba8f60 66 $haenv->log('info', "status change $old->{state} => $new->{state}");
f25a336a 67
17654a06 68 $new->{state_change_time} = $haenv->get_time();
f25a336a 69
17654a06 70 $self->{status} = $new;
f25a336a 71
b6044542 72 # fixme: do not use extra class
17654a06 73 if ($new->{state} eq 'master') {
f25a336a
DM
74 $self->{manager} = PVE::HA::Manager->new($haenv);
75 } else {
76 if ($self->{manager}) {
77 # fixme: what should we do here?
78 $self->{manager}->cleanup();
79 $self->{manager} = undef;
80 }
81 }
82}
83
b6044542 84sub get_protected_ha_manager_lock {
f25a336a
DM
85 my ($self) = @_;
86
87 my $haenv = $self->{haenv};
88
89 my $count = 0;
b6044542 90 my $starttime = $haenv->get_time();
68549bf1 91
f25a336a 92 for (;;) {
b6044542
DM
93
94 if ($haenv->get_ha_manager_lock()) {
95 if ($self->{ha_manager_wd}) {
96 $haenv->watchdog_update($self->{ha_manager_wd});
97 } else {
98 my $wfh = $haenv->watchdog_open();
99 $self->{ha_manager_wd} = $wfh;
68549bf1 100 }
b6044542 101 return 1;
68549bf1
DM
102 }
103
b6044542 104 last if ++$count > 5; # try max 5 time
68549bf1 105
b6044542
DM
106 my $delay = $haenv->get_time() - $starttime;
107 last if $delay > 5; # for max 5 seconds
68549bf1 108
f25a336a
DM
109 $haenv->sleep(1);
110 }
b6044542 111
f25a336a
DM
112 return 0;
113}
114
115sub do_one_iteration {
116 my ($self) = @_;
117
118 my $haenv = $self->{haenv};
119
120 my $status = $self->get_local_status();
17654a06 121 my $state = $status->{state};
f25a336a 122
17654a06 123 # do state changes first
f25a336a 124
b6044542 125 if ($state eq 'wait_for_quorum') {
f25a336a 126
f25a336a 127 if ($haenv->quorate()) {
b6044542 128 if ($self->get_protected_ha_manager_lock()) {
17654a06
DM
129 $self->set_local_status({ state => 'master' });
130 } else {
131 $self->set_local_status({ state => 'slave' });
132 }
133 }
134
b6044542 135 } elsif ($state eq 'slave') {
17654a06 136
b6044542
DM
137 if ($haenv->quorate()) {
138 if ($self->get_protected_ha_manager_lock()) {
139 $self->set_local_status({ state => 'master' });
f25a336a 140 }
b6044542
DM
141 } else {
142 $self->set_local_status({ state => 'wait_for_quorum' });
f25a336a
DM
143 }
144
b6044542 145 } elsif ($state eq 'lost_manager_lock') {
17654a06
DM
146
147 if ($haenv->quorate()) {
b6044542 148 if ($self->get_protected_ha_manager_lock()) {
17654a06
DM
149 $self->set_local_status({ state => 'master' });
150 }
17654a06
DM
151 }
152
b6044542
DM
153 } elsif ($state eq 'master') {
154
155 if (!$self->get_protected_ha_manager_lock()) {
156 $self->set_local_status({ state => 'lost_manager_lock'});
157 }
17654a06
DM
158 }
159
160 $status = $self->get_local_status();
161 $state = $status->{state};
162
163 # do work
164
b6044542 165 if ($state eq 'wait_for_quorum') {
17654a06 166
b6044542 167 return 0 if $self->{shutdown_request};
17654a06
DM
168
169 $haenv->sleep(5);
170
171 } elsif ($state eq 'master') {
f25a336a
DM
172
173 my $manager = $self->{manager};
174
175 die "no manager" if !defined($manager);
176
177 my $startime = $haenv->get_time();
178
179 my $max_time = 10;
180
181 # do work (max_time seconds)
182 eval {
183 # fixme: set alert timer
184 $manager->manage();
185 };
186 if (my $err = $@) {
f25a336a 187 $haenv->log('err', "got unexpected error - $err");
b6044542 188 }
f25a336a 189
b6044542
DM
190 $haenv->sleep_until($startime + $max_time);
191
192 } elsif ($state eq 'lost_manager_lock') {
193
194 if ($self->{ha_manager_wd}) {
195 $haenv->watchdog_close($self->{ha_manager_wd});
196 delete $self->{ha_manager_wd};
f25a336a
DM
197 }
198
b6044542
DM
199 return 0 if $self->{shutdown_request};
200
201 $self->set_local_status({ state => 'wait_for_quorum' });
202
17654a06 203 } elsif ($state eq 'slave') {
b6044542 204
1c7886c2
DM
205 return 0 if $self->{shutdown_request};
206
b6044542
DM
207 # wait until we get master
208
f25a336a 209 } else {
b6044542 210
17654a06 211 die "got unexpected status '$state'\n";
f25a336a
DM
212 }
213
f25a336a
DM
214 return 1;
215}
216
2171;