]>
Commit | Line | Data |
---|---|---|
c0bbd038 DM |
1 | package PVE::HA::Manager; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
c4a221bc | 5 | use Digest::MD5 qw(md5_base64); |
c0bbd038 DM |
6 | |
7 | use Data::Dumper; | |
8 | ||
9 | use PVE::HA::NodeStatus; | |
10 | ||
11 | sub new { | |
8f0bb968 | 12 | my ($this, $haenv) = @_; |
c0bbd038 DM |
13 | |
14 | my $class = ref($this) || $this; | |
15 | ||
8f0bb968 DM |
16 | my $ms = $haenv->read_manager_status(); |
17 | ||
18 | $ms->{master_node} = $haenv->nodename(); | |
19 | ||
20 | my $ns = PVE::HA::NodeStatus->new($haenv, $ms->{node_status} || {}); | |
21 | ||
59fd7207 DM |
22 | # fixme: use separate class PVE::HA::ServiceStatus |
23 | my $ss = $ms->{service_status} || {}; | |
24 | ||
c0bbd038 | 25 | my $self = bless { |
8f0bb968 DM |
26 | haenv => $haenv, |
27 | ms => $ms, # master status | |
28 | ns => $ns, # PVE::HA::NodeStatus | |
59fd7207 | 29 | ss => $ss, # service status |
c0bbd038 DM |
30 | }, $class; |
31 | ||
32 | return $self; | |
33 | } | |
34 | ||
d84da043 DM |
35 | sub cleanup { |
36 | my ($self) = @_; | |
37 | ||
38 | # todo: ? | |
39 | } | |
40 | ||
8f0bb968 | 41 | sub flush_master_status { |
c0bbd038 DM |
42 | my ($self) = @_; |
43 | ||
59fd7207 | 44 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 45 | |
8f0bb968 | 46 | $ms->{node_status} = $ns->{status}; |
59fd7207 DM |
47 | $ms->{service_status} = $ss; |
48 | ||
8f0bb968 DM |
49 | $haenv->write_manager_status($ms); |
50 | } | |
c0bbd038 | 51 | |
819c61f0 | 52 | # Attention: must be idempotent (alway return the same result for same input!) |
f7ccd1b3 DM |
53 | sub select_service_node { |
54 | my ($self, $service_conf) = @_; | |
55 | ||
56 | my $ns = $self->{ns}; | |
57 | ||
58 | my $pref_node = $service_conf->{node}; | |
59 | ||
60 | return $pref_node if $ns->node_is_online($pref_node); | |
61 | ||
62 | my $online_nodes = $ns->list_online_nodes(); | |
63 | ||
64 | return shift @$online_nodes; | |
65 | } | |
66 | ||
c4a221bc DM |
67 | my $uid_counter = 0; |
68 | ||
618fbeda DM |
69 | my $valid_service_states = { |
70 | stopped => 1, | |
71 | request_stop => 1, | |
72 | started => 1, | |
73 | fence => 1, | |
8456bde2 | 74 | move => 1, |
618fbeda DM |
75 | migrate => 1, |
76 | error => 1, | |
77 | }; | |
78 | ||
4e01bc86 DM |
79 | my $change_service_state = sub { |
80 | my ($self, $sid, $new_state, %params) = @_; | |
81 | ||
82 | my ($haenv, $ss) = ($self->{haenv}, $self->{ss}); | |
83 | ||
84 | my $sd = $ss->{$sid} || die "no such service '$sid"; | |
85 | ||
86 | my $old_state = $sd->{state}; | |
87 | ||
88 | die "no state change" if $old_state eq $new_state; # just to be sure | |
89 | ||
618fbeda DM |
90 | die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state}; |
91 | ||
4e01bc86 DM |
92 | my $changes = ''; |
93 | foreach my $k (keys %params) { | |
94 | my $v = $params{$k}; | |
95 | next if defined($sd->{$k}) && $sd->{$k} eq $v; | |
96 | $changes .= ", " if $changes; | |
97 | $changes .= "$k = $v"; | |
98 | $sd->{$k} = $v; | |
99 | } | |
100 | ||
101 | $sd->{state} = $new_state; | |
c4a221bc DM |
102 | $uid_counter++; |
103 | $sd->{uid} = md5_base64($new_state . $$ . time() . $uid_counter); | |
4e01bc86 DM |
104 | |
105 | # fixme: cleanup state (remove unused values) | |
106 | ||
107 | $changes = " ($changes)" if $changes; | |
108 | $haenv->log('info', "service '$sid': state changed to '$new_state' $changes\n"); | |
109 | }; | |
110 | ||
332170bd | 111 | # read LRM status for all active nodes |
c4a221bc | 112 | sub read_lrm_status { |
332170bd | 113 | my ($self) = @_; |
c4a221bc | 114 | |
332170bd | 115 | my $nodes = $self->{ns}->list_online_nodes(); |
c4a221bc DM |
116 | my $haenv = $self->{haenv}; |
117 | ||
118 | my $res = {}; | |
119 | ||
332170bd | 120 | foreach my $node (@$nodes) { |
c4a221bc DM |
121 | my $ls = $haenv->read_lrm_status($node); |
122 | foreach my $uid (keys %$ls) { | |
123 | next if $res->{$uid}; # should not happen | |
124 | $res->{$uid} = $ls->{$uid}; | |
125 | } | |
126 | } | |
127 | ||
128 | return $res; | |
129 | } | |
130 | ||
aa98a844 DM |
131 | # read new crm commands and save them into crm master status |
132 | sub update_crm_commands { | |
133 | my ($self) = @_; | |
134 | ||
135 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); | |
136 | ||
137 | my $cmdlist = $haenv->read_crm_commands(); | |
138 | ||
139 | foreach my $cmd (split(/\n/, $cmdlist)) { | |
140 | chomp $cmd; | |
141 | ||
142 | if ($cmd =~ m/^migrate\s+(\S+)\s+(\S+)$/) { | |
143 | my ($sid, $node) = ($1, $2); | |
144 | if (my $sd = $ss->{$sid}) { | |
145 | if (!$ns->node_is_online($node)) { | |
146 | $haenv->log('err', "crm command error - node not online: $cmd"); | |
147 | } else { | |
148 | if ($node eq $sd->{node}) { | |
149 | $haenv->log('info', "ignore crm command - service already on target node: $cmd"); | |
150 | } else { | |
151 | $haenv->log('info', "got crm command: $cmd"); | |
152 | $ss->{$sid}->{cmd} = [ 'migrate', $node]; | |
153 | } | |
154 | } | |
155 | } else { | |
156 | $haenv->log('err', "crm command error - no such service: $cmd"); | |
157 | } | |
158 | ||
159 | } else { | |
160 | $haenv->log('err', "unable to parse crm command: $cmd"); | |
161 | } | |
162 | } | |
163 | ||
164 | } | |
165 | ||
8f0bb968 DM |
166 | sub manage { |
167 | my ($self) = @_; | |
c0bbd038 | 168 | |
59fd7207 | 169 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 170 | |
332170bd | 171 | $ns->update($haenv->get_node_info()); |
c79442f2 DM |
172 | |
173 | if (!$ns->node_is_online($haenv->nodename())) { | |
174 | $haenv->log('info', "master seems offline\n"); | |
175 | return; | |
176 | } | |
177 | ||
332170bd | 178 | my $lrm_status = $self->read_lrm_status(); |
c4a221bc | 179 | |
f7ccd1b3 DM |
180 | my $sc = $haenv->read_service_config(); |
181 | ||
182 | # compute new service status | |
183 | ||
184 | # add new service | |
185 | foreach my $sid (keys %$sc) { | |
186 | next if $ss->{$sid}; # already there | |
187 | $haenv->log('info', "Adding new service '$sid'\n"); | |
188 | # assume we are running to avoid relocate running service at add | |
8456bde2 | 189 | $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node}}; |
f7ccd1b3 DM |
190 | } |
191 | ||
aa98a844 DM |
192 | $self->update_crm_commands(); |
193 | ||
c79442f2 DM |
194 | for (;;) { |
195 | my $repeat = 0; | |
f7ccd1b3 | 196 | |
c79442f2 DM |
197 | foreach my $sid (keys %$ss) { |
198 | my $sd = $ss->{$sid}; | |
199 | my $cd = $sc->{$sid} || { state => 'disabled' }; | |
f7ccd1b3 | 200 | |
a875fbe8 DM |
201 | my $lrm_res = $sd->{uid} ? $lrm_status->{$sd->{uid}} : undef; |
202 | ||
c79442f2 DM |
203 | my $last_state = $sd->{state}; |
204 | ||
205 | if ($last_state eq 'stopped') { | |
206 | ||
a875fbe8 | 207 | $self->next_state_stopped($sid, $cd, $sd); |
f7ccd1b3 | 208 | |
c79442f2 | 209 | } elsif ($last_state eq 'started') { |
f7ccd1b3 | 210 | |
a875fbe8 | 211 | $self->next_state_started($sid, $cd, $sd); |
f7ccd1b3 | 212 | |
c79442f2 | 213 | } elsif ($last_state eq 'migrate') { |
f7ccd1b3 | 214 | |
e88469ba DM |
215 | # check result from LRM daemon |
216 | if ($lrm_res) { | |
217 | my $exit_code = $lrm_res->{exit_code}; | |
218 | if ($exit_code == 0) { | |
219 | &$change_service_state($self, $sid, 'started', node => $sd->{target}); | |
220 | } else { | |
221 | $haenv->log('err', "service '$sid' - migration failed (exit code $exit_code)"); | |
222 | &$change_service_state($self, $sid, 'started', node => $sd->{node}); | |
223 | } | |
224 | } | |
f7ccd1b3 | 225 | |
8456bde2 DM |
226 | } elsif ($last_state eq 'move') { |
227 | ||
228 | #die "implement me"; | |
229 | ||
c79442f2 | 230 | } elsif ($last_state eq 'fence') { |
f7ccd1b3 | 231 | |
21e37ed4 | 232 | # do nothing here - wait until fenced |
f7ccd1b3 | 233 | |
c79442f2 | 234 | } elsif ($last_state eq 'request_stop') { |
f7ccd1b3 | 235 | |
a875fbe8 DM |
236 | # check result from LRM daemon |
237 | if ($lrm_res) { | |
238 | my $exit_code = $lrm_res->{exit_code}; | |
618fbeda DM |
239 | if ($exit_code == 0) { |
240 | &$change_service_state($self, $sid, 'stopped'); | |
241 | } else { | |
242 | &$change_service_state($self, $sid, 'error'); # fixme: what state? | |
243 | } | |
618fbeda DM |
244 | } |
245 | ||
e88469ba DM |
246 | } elsif ($last_state eq 'error') { |
247 | ||
248 | # fixme: | |
249 | ||
a875fbe8 DM |
250 | } else { |
251 | ||
252 | die "unknown service state '$last_state'"; | |
618fbeda | 253 | } |
21e37ed4 | 254 | |
c79442f2 | 255 | $repeat = 1 if $sd->{state} ne $last_state; |
f7ccd1b3 DM |
256 | } |
257 | ||
21e37ed4 DM |
258 | # handle fencing |
259 | my $fenced_nodes = {}; | |
260 | foreach my $sid (keys %$ss) { | |
261 | my $sd = $ss->{$sid}; | |
262 | next if $sd->{state} ne 'fence'; | |
263 | ||
264 | if (!defined($fenced_nodes->{$sd->{node}})) { | |
265 | $fenced_nodes->{$sd->{node}} = $ns->fence_node($sd->{node}) || 0; | |
266 | } | |
267 | ||
268 | next if !$fenced_nodes->{$sd->{node}}; | |
269 | ||
270 | # node fence was sucessful - mark service as stopped | |
271 | &$change_service_state($self, $sid, 'stopped'); | |
272 | } | |
273 | ||
c79442f2 | 274 | last if !$repeat; |
f7ccd1b3 | 275 | } |
f7ccd1b3 DM |
276 | |
277 | # remove stale services | |
278 | # fixme: | |
279 | ||
8f0bb968 | 280 | $self->flush_master_status(); |
c0bbd038 DM |
281 | } |
282 | ||
a875fbe8 DM |
283 | # functions to compute next service states |
284 | # $cd: service configuration data (read only) | |
285 | # $sd: service status data (read only) | |
286 | # | |
287 | # Note: use change_service_state() to alter state | |
288 | # | |
289 | ||
290 | sub next_state_stopped { | |
291 | my ($self, $sid, $cd, $sd) = @_; | |
292 | ||
293 | my $haenv = $self->{haenv}; | |
e88469ba | 294 | my $ns = $self->{ns}; |
a875fbe8 | 295 | |
ff6f1c5c DM |
296 | if ($sd->{node} ne $cd->{node}) { |
297 | # this can happen if we fence a node with active migrations | |
298 | # hack: modify $sd (normally this should be considered read-only) | |
299 | $haenv->log('info', "fixup service '$sid' location ($sd->{node} => $cd->{node}"); | |
300 | $sd->{node} = $cd->{node}; | |
301 | } | |
302 | ||
94b7ebe2 DM |
303 | if ($sd->{cmd}) { |
304 | my ($cmd, $target) = @{$sd->{cmd}}; | |
305 | delete $sd->{cmd}; | |
306 | ||
307 | if ($cmd eq 'migrate') { | |
308 | if (!$ns->node_is_online($target)) { | |
309 | $haenv->log('err', "ignore service '$sid' migrate request - node '$target' not online"); | |
e88469ba DM |
310 | } elsif ($sd->{node} eq $target) { |
311 | $haenv->log('info', "ignore service '$sid' migrate request - service already on node '$target'"); | |
94b7ebe2 DM |
312 | } else { |
313 | $haenv->change_service_location($sid, $target); | |
314 | $cd->{node} = $sd->{node} = $target; # fixme: $sd is read-only??!! | |
315 | $haenv->log('info', "migrate service '$sid' to node '$target' (stopped)"); | |
316 | } | |
317 | } else { | |
318 | $haenv->log('err', "unknown command '$cmd' for service '$sid'"); | |
319 | } | |
320 | } | |
321 | ||
a875fbe8 DM |
322 | if ($cd->{state} eq 'disabled') { |
323 | # do nothing | |
e88469ba DM |
324 | return; |
325 | } | |
326 | ||
327 | if ($cd->{state} eq 'enabled') { | |
a875fbe8 DM |
328 | if (my $node = $self->select_service_node($cd)) { |
329 | if ($node && ($sd->{node} ne $node)) { | |
330 | $haenv->change_service_location($sid, $node); | |
331 | } | |
332 | &$change_service_state($self, $sid, 'started', node => $node); | |
333 | } else { | |
334 | # fixme: warn | |
335 | } | |
e88469ba DM |
336 | |
337 | return; | |
a875fbe8 | 338 | } |
e88469ba DM |
339 | |
340 | $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration"); | |
a875fbe8 DM |
341 | } |
342 | ||
343 | sub next_state_started { | |
344 | my ($self, $sid, $cd, $sd) = @_; | |
345 | ||
346 | my $haenv = $self->{haenv}; | |
347 | my $ns = $self->{ns}; | |
348 | ||
349 | if (!$ns->node_is_online($sd->{node})) { | |
350 | ||
351 | &$change_service_state($self, $sid, 'fence'); | |
e88469ba DM |
352 | return; |
353 | } | |
a875fbe8 | 354 | |
e88469ba DM |
355 | if ($cd->{state} eq 'disabled') { |
356 | &$change_service_state($self, $sid, 'request_stop'); | |
357 | return; | |
358 | } | |
359 | ||
360 | if ($cd->{state} eq 'enabled') { | |
361 | my $node = $self->select_service_node($cd); | |
362 | ||
363 | if ($sd->{cmd}) { | |
364 | my ($cmd, $target) = @{$sd->{cmd}}; | |
365 | delete $sd->{cmd}; | |
366 | ||
367 | if ($cmd eq 'migrate') { | |
368 | if (!$ns->node_is_online($target)) { | |
369 | $haenv->log('err', "ignore service '$sid' migrate request - node '$target' not online"); | |
370 | } elsif ($sd->{node} eq $target) { | |
371 | $haenv->log('info', "ignore service '$sid' migrate request - service already on node '$target'"); | |
372 | } else { | |
373 | $node = $target; | |
374 | } | |
a875fbe8 | 375 | } else { |
e88469ba | 376 | $haenv->log('err', "unknown command '$cmd' for service '$sid'"); |
a875fbe8 | 377 | } |
e88469ba DM |
378 | } |
379 | ||
380 | if ($node && ($sd->{node} ne $node)) { | |
381 | $haenv->log('info', "migrate service '$sid' to node '$node' (running)"); | |
382 | &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node); | |
a875fbe8 | 383 | } else { |
e88469ba | 384 | # do nothing |
a875fbe8 | 385 | } |
e88469ba DM |
386 | |
387 | return; | |
388 | } | |
389 | ||
390 | $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration"); | |
a875fbe8 | 391 | } |
c0bbd038 DM |
392 | |
393 | 1; |