]>
Commit | Line | Data |
---|---|---|
6cd38bc6 DM |
1 | package PVE::HA::CRM; |
2 | ||
3 | # Cluster Resource Manager | |
f25a336a DM |
4 | |
5 | use strict; | |
6 | use warnings; | |
7 | ||
8 | use PVE::SafeSyslog; | |
9 | use PVE::Tools; | |
17654a06 | 10 | use PVE::HA::Tools; |
f25a336a DM |
11 | |
12 | use PVE::HA::Manager; | |
13 | ||
14 | # Server can have several state: | |
f25a336a DM |
15 | |
16 | my $valid_states = { | |
b6044542 DM |
17 | wait_for_quorum => "cluster is not quorate, waiting", |
18 | master => "quorate, and we got the ha_manager lock", | |
19 | lost_manager_lock => "we lost the ha_manager lock (watchgog active)", | |
20 | slave => "quorate, but we do not own the ha_manager lock", | |
f25a336a DM |
21 | }; |
22 | ||
23 | sub new { | |
24 | my ($this, $haenv) = @_; | |
25 | ||
26 | my $class = ref($this) || $this; | |
27 | ||
28 | my $self = bless { | |
29 | haenv => $haenv, | |
30 | manager => undef, | |
b6044542 | 31 | status => { state => 'startup' }, |
f25a336a DM |
32 | }, $class; |
33 | ||
b6044542 | 34 | $self->set_local_status({ state => 'wait_for_quorum' }); |
f25a336a DM |
35 | |
36 | return $self; | |
37 | } | |
38 | ||
b6044542 DM |
39 | sub shutdown_request { |
40 | my ($self) = @_; | |
41 | ||
378af518 DM |
42 | syslog('info' , "server received shutdown request") |
43 | if !$self->{shutdown_request}; | |
44 | ||
b6044542 DM |
45 | $self->{shutdown_request} = 1; |
46 | } | |
47 | ||
f25a336a DM |
48 | sub get_local_status { |
49 | my ($self) = @_; | |
50 | ||
51 | return $self->{status}; | |
52 | } | |
53 | ||
54 | sub set_local_status { | |
17654a06 | 55 | my ($self, $new) = @_; |
f25a336a | 56 | |
17654a06 | 57 | die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}}; |
f25a336a DM |
58 | |
59 | my $haenv = $self->{haenv}; | |
60 | ||
17654a06 | 61 | my $old = $self->{status}; |
f25a336a | 62 | |
b6044542 DM |
63 | # important: only update if if really changed |
64 | return if $old->{state} eq $new->{state}; | |
f25a336a | 65 | |
0bba8f60 | 66 | $haenv->log('info', "status change $old->{state} => $new->{state}"); |
f25a336a | 67 | |
17654a06 | 68 | $new->{state_change_time} = $haenv->get_time(); |
f25a336a | 69 | |
17654a06 | 70 | $self->{status} = $new; |
f25a336a | 71 | |
b6044542 | 72 | # fixme: do not use extra class |
17654a06 | 73 | if ($new->{state} eq 'master') { |
f25a336a DM |
74 | $self->{manager} = PVE::HA::Manager->new($haenv); |
75 | } else { | |
76 | if ($self->{manager}) { | |
77 | # fixme: what should we do here? | |
78 | $self->{manager}->cleanup(); | |
79 | $self->{manager} = undef; | |
80 | } | |
81 | } | |
82 | } | |
83 | ||
b6044542 | 84 | sub get_protected_ha_manager_lock { |
f25a336a DM |
85 | my ($self) = @_; |
86 | ||
87 | my $haenv = $self->{haenv}; | |
88 | ||
89 | my $count = 0; | |
b6044542 | 90 | my $starttime = $haenv->get_time(); |
68549bf1 | 91 | |
f25a336a | 92 | for (;;) { |
b6044542 DM |
93 | |
94 | if ($haenv->get_ha_manager_lock()) { | |
95 | if ($self->{ha_manager_wd}) { | |
96 | $haenv->watchdog_update($self->{ha_manager_wd}); | |
97 | } else { | |
98 | my $wfh = $haenv->watchdog_open(); | |
99 | $self->{ha_manager_wd} = $wfh; | |
68549bf1 | 100 | } |
b6044542 | 101 | return 1; |
68549bf1 DM |
102 | } |
103 | ||
b6044542 | 104 | last if ++$count > 5; # try max 5 time |
68549bf1 | 105 | |
b6044542 DM |
106 | my $delay = $haenv->get_time() - $starttime; |
107 | last if $delay > 5; # for max 5 seconds | |
68549bf1 | 108 | |
f25a336a DM |
109 | $haenv->sleep(1); |
110 | } | |
b6044542 | 111 | |
f25a336a DM |
112 | return 0; |
113 | } | |
114 | ||
115 | sub do_one_iteration { | |
116 | my ($self) = @_; | |
117 | ||
118 | my $haenv = $self->{haenv}; | |
119 | ||
120 | my $status = $self->get_local_status(); | |
17654a06 | 121 | my $state = $status->{state}; |
f25a336a | 122 | |
17654a06 | 123 | # do state changes first |
f25a336a | 124 | |
b6044542 | 125 | if ($state eq 'wait_for_quorum') { |
f25a336a | 126 | |
f25a336a | 127 | if ($haenv->quorate()) { |
b6044542 | 128 | if ($self->get_protected_ha_manager_lock()) { |
17654a06 DM |
129 | $self->set_local_status({ state => 'master' }); |
130 | } else { | |
131 | $self->set_local_status({ state => 'slave' }); | |
132 | } | |
133 | } | |
134 | ||
b6044542 | 135 | } elsif ($state eq 'slave') { |
17654a06 | 136 | |
b6044542 DM |
137 | if ($haenv->quorate()) { |
138 | if ($self->get_protected_ha_manager_lock()) { | |
139 | $self->set_local_status({ state => 'master' }); | |
f25a336a | 140 | } |
b6044542 DM |
141 | } else { |
142 | $self->set_local_status({ state => 'wait_for_quorum' }); | |
f25a336a DM |
143 | } |
144 | ||
b6044542 | 145 | } elsif ($state eq 'lost_manager_lock') { |
17654a06 DM |
146 | |
147 | if ($haenv->quorate()) { | |
b6044542 | 148 | if ($self->get_protected_ha_manager_lock()) { |
17654a06 DM |
149 | $self->set_local_status({ state => 'master' }); |
150 | } | |
17654a06 DM |
151 | } |
152 | ||
b6044542 DM |
153 | } elsif ($state eq 'master') { |
154 | ||
155 | if (!$self->get_protected_ha_manager_lock()) { | |
156 | $self->set_local_status({ state => 'lost_manager_lock'}); | |
157 | } | |
17654a06 DM |
158 | } |
159 | ||
160 | $status = $self->get_local_status(); | |
161 | $state = $status->{state}; | |
162 | ||
163 | # do work | |
164 | ||
b6044542 | 165 | if ($state eq 'wait_for_quorum') { |
17654a06 | 166 | |
b6044542 | 167 | return 0 if $self->{shutdown_request}; |
17654a06 DM |
168 | |
169 | $haenv->sleep(5); | |
170 | ||
171 | } elsif ($state eq 'master') { | |
f25a336a DM |
172 | |
173 | my $manager = $self->{manager}; | |
174 | ||
175 | die "no manager" if !defined($manager); | |
176 | ||
177 | my $startime = $haenv->get_time(); | |
178 | ||
179 | my $max_time = 10; | |
180 | ||
181 | # do work (max_time seconds) | |
182 | eval { | |
183 | # fixme: set alert timer | |
184 | $manager->manage(); | |
185 | }; | |
186 | if (my $err = $@) { | |
f25a336a | 187 | $haenv->log('err', "got unexpected error - $err"); |
b6044542 | 188 | } |
f25a336a | 189 | |
b6044542 DM |
190 | $haenv->sleep_until($startime + $max_time); |
191 | ||
192 | } elsif ($state eq 'lost_manager_lock') { | |
193 | ||
194 | if ($self->{ha_manager_wd}) { | |
195 | $haenv->watchdog_close($self->{ha_manager_wd}); | |
196 | delete $self->{ha_manager_wd}; | |
f25a336a DM |
197 | } |
198 | ||
b6044542 DM |
199 | return 0 if $self->{shutdown_request}; |
200 | ||
201 | $self->set_local_status({ state => 'wait_for_quorum' }); | |
202 | ||
17654a06 | 203 | } elsif ($state eq 'slave') { |
b6044542 | 204 | |
1c7886c2 DM |
205 | return 0 if $self->{shutdown_request}; |
206 | ||
b6044542 DM |
207 | # wait until we get master |
208 | ||
f25a336a | 209 | } else { |
b6044542 | 210 | |
17654a06 | 211 | die "got unexpected status '$state'\n"; |
f25a336a DM |
212 | } |
213 | ||
f25a336a DM |
214 | return 1; |
215 | } | |
216 | ||
217 | 1; |