]>
Commit | Line | Data |
---|---|---|
1 | package PVE::HA::Manager; | |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use Digest::MD5 qw(md5_base64); | |
6 | ||
7 | use Data::Dumper; | |
8 | use PVE::Tools; | |
9 | use PVE::HA::NodeStatus; | |
10 | ||
11 | my $fence_delay = 30; | |
12 | ||
13 | sub new { | |
14 | my ($this, $haenv) = @_; | |
15 | ||
16 | my $class = ref($this) || $this; | |
17 | ||
18 | my $ms = $haenv->read_manager_status(); | |
19 | ||
20 | $ms->{master_node} = $haenv->nodename(); | |
21 | ||
22 | my $ns = PVE::HA::NodeStatus->new($haenv, $ms->{node_status} || {}); | |
23 | ||
24 | # fixme: use separate class PVE::HA::ServiceStatus | |
25 | my $ss = $ms->{service_status} || {}; | |
26 | ||
27 | my $self = bless { | |
28 | haenv => $haenv, | |
29 | ms => $ms, # master status | |
30 | ns => $ns, # PVE::HA::NodeStatus | |
31 | ss => $ss, # service status | |
32 | }, $class; | |
33 | ||
34 | return $self; | |
35 | } | |
36 | ||
37 | sub cleanup { | |
38 | my ($self) = @_; | |
39 | ||
40 | # todo: ? | |
41 | } | |
42 | ||
43 | sub flush_master_status { | |
44 | my ($self) = @_; | |
45 | ||
46 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); | |
47 | ||
48 | $ms->{node_status} = $ns->{status}; | |
49 | $ms->{service_status} = $ss; | |
50 | ||
51 | $haenv->write_manager_status($ms); | |
52 | } | |
53 | ||
54 | sub select_service_node { | |
55 | my ($groups, $online_node_usage, $service_conf, $current_node, $try_next) = @_; | |
56 | ||
57 | my $group = { 'nodes' => $service_conf->{node} }; # default group | |
58 | ||
59 | $group = $groups->{ids}->{$service_conf->{group}} if $service_conf->{group} && | |
60 | $groups->{ids}->{$service_conf->{group}}; | |
61 | ||
62 | my $pri_groups = {}; | |
63 | my $group_members = {}; | |
64 | foreach my $entry (PVE::Tools::split_list($group->{nodes})) { | |
65 | my ($node, $pri) = ($entry, 0); | |
66 | if ($entry =~ m/^(\S+):(\d+)$/) { | |
67 | ($node, $pri) = ($1, $2); | |
68 | } | |
69 | next if !defined($online_node_usage->{$node}); # offline | |
70 | $pri_groups->{$pri}->{$node} = 1; | |
71 | $group_members->{$node} = $pri; | |
72 | } | |
73 | ||
74 | ||
75 | # add non-group members to unrestricted groups (priority -1) | |
76 | if (!$group->{restricted}) { | |
77 | my $pri = -1; | |
78 | foreach my $node (keys %$online_node_usage) { | |
79 | next if defined($group_members->{$node}); | |
80 | $pri_groups->{$pri}->{$node} = 1; | |
81 | $group_members->{$node} = -1; | |
82 | } | |
83 | } | |
84 | ||
85 | ||
86 | my @pri_list = sort {$b <=> $a} keys %$pri_groups; | |
87 | return undef if !scalar(@pri_list); | |
88 | ||
89 | if (!$try_next && $group->{nofailback} && defined($group_members->{$current_node})) { | |
90 | return $current_node; | |
91 | } | |
92 | ||
93 | # select node from top priority node list | |
94 | ||
95 | my $top_pri = $pri_list[0]; | |
96 | ||
97 | my @nodes = sort { | |
98 | $online_node_usage->{$a} <=> $online_node_usage->{$b} || $a cmp $b | |
99 | } keys %{$pri_groups->{$top_pri}}; | |
100 | ||
101 | my $found; | |
102 | for (my $i = scalar(@nodes) - 1; $i >= 0; $i--) { | |
103 | my $node = $nodes[$i]; | |
104 | if ($node eq $current_node) { | |
105 | $found = $i; | |
106 | last; | |
107 | } | |
108 | } | |
109 | ||
110 | if ($try_next) { | |
111 | ||
112 | if (defined($found) && ($found < (scalar(@nodes) - 1))) { | |
113 | return $nodes[$found + 1]; | |
114 | } else { | |
115 | return $nodes[0]; | |
116 | } | |
117 | ||
118 | } else { | |
119 | ||
120 | return $nodes[$found] if defined($found); | |
121 | ||
122 | return $nodes[0]; | |
123 | ||
124 | } | |
125 | } | |
126 | ||
127 | my $uid_counter = 0; | |
128 | ||
129 | sub compute_new_uuid { | |
130 | my ($state) = @_; | |
131 | ||
132 | $uid_counter++; | |
133 | return md5_base64($state . $$ . time() . $uid_counter); | |
134 | } | |
135 | ||
136 | my $valid_service_states = { | |
137 | stopped => 1, | |
138 | request_stop => 1, | |
139 | started => 1, | |
140 | fence => 1, | |
141 | migrate => 1, | |
142 | relocate => 1, | |
143 | error => 1, | |
144 | }; | |
145 | ||
146 | sub recompute_online_node_usage { | |
147 | my ($self) = @_; | |
148 | ||
149 | my $online_node_usage = {}; | |
150 | ||
151 | my $online_nodes = $self->{ns}->list_online_nodes(); | |
152 | ||
153 | foreach my $node (@$online_nodes) { | |
154 | $online_node_usage->{$node} = 0; | |
155 | } | |
156 | ||
157 | foreach my $sid (keys %{$self->{ss}}) { | |
158 | my $sd = $self->{ss}->{$sid}; | |
159 | my $state = $sd->{state}; | |
160 | if (defined($online_node_usage->{$sd->{node}})) { | |
161 | if (($state eq 'started') || ($state eq 'request_stop') || | |
162 | ($state eq 'fence') || ($state eq 'error')) { | |
163 | $online_node_usage->{$sd->{node}}++; | |
164 | } elsif (($state eq 'migrate') || ($state eq 'relocate')) { | |
165 | $online_node_usage->{$sd->{target}}++; | |
166 | } elsif ($state eq 'stopped') { | |
167 | # do nothing | |
168 | } else { | |
169 | die "should not be reached"; | |
170 | } | |
171 | } | |
172 | } | |
173 | ||
174 | $self->{online_node_usage} = $online_node_usage; | |
175 | } | |
176 | ||
177 | my $change_service_state = sub { | |
178 | my ($self, $sid, $new_state, %params) = @_; | |
179 | ||
180 | my ($haenv, $ss) = ($self->{haenv}, $self->{ss}); | |
181 | ||
182 | my $sd = $ss->{$sid} || die "no such service '$sid"; | |
183 | ||
184 | my $old_state = $sd->{state}; | |
185 | my $old_node = $sd->{node}; | |
186 | ||
187 | die "no state change" if $old_state eq $new_state; # just to be sure | |
188 | ||
189 | die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state}; | |
190 | ||
191 | foreach my $k (keys %$sd) { delete $sd->{$k}; }; | |
192 | ||
193 | $sd->{state} = $new_state; | |
194 | $sd->{node} = $old_node; | |
195 | ||
196 | my $text_state = ''; | |
197 | foreach my $k (keys %params) { | |
198 | my $v = $params{$k}; | |
199 | $text_state .= ", " if $text_state; | |
200 | $text_state .= "$k = $v"; | |
201 | $sd->{$k} = $v; | |
202 | } | |
203 | ||
204 | $self->recompute_online_node_usage(); | |
205 | ||
206 | $sd->{uid} = compute_new_uuid($new_state); | |
207 | ||
208 | ||
209 | $text_state = " ($text_state)" if $text_state; | |
210 | $haenv->log('info', "service '$sid': state changed from '${old_state}' to '${new_state}' $text_state\n"); | |
211 | }; | |
212 | ||
213 | # read LRM status for all active nodes | |
214 | sub read_lrm_status { | |
215 | my ($self) = @_; | |
216 | ||
217 | my $nodes = $self->{ns}->list_online_nodes(); | |
218 | my $haenv = $self->{haenv}; | |
219 | ||
220 | my $res = {}; | |
221 | ||
222 | foreach my $node (@$nodes) { | |
223 | my $ls = $haenv->read_lrm_status($node); | |
224 | foreach my $uid (keys %$ls) { | |
225 | next if $res->{$uid}; # should not happen | |
226 | $res->{$uid} = $ls->{$uid}; | |
227 | } | |
228 | } | |
229 | ||
230 | return $res; | |
231 | } | |
232 | ||
233 | # read new crm commands and save them into crm master status | |
234 | sub update_crm_commands { | |
235 | my ($self) = @_; | |
236 | ||
237 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); | |
238 | ||
239 | my $cmdlist = $haenv->read_crm_commands(); | |
240 | ||
241 | foreach my $cmd (split(/\n/, $cmdlist)) { | |
242 | chomp $cmd; | |
243 | ||
244 | if ($cmd =~ m/^(migrate|relocate)\s+(\S+)\s+(\S+)$/) { | |
245 | my ($task, $sid, $node) = ($1, $2, $3); | |
246 | if (my $sd = $ss->{$sid}) { | |
247 | if (!$ns->node_is_online($node)) { | |
248 | $haenv->log('err', "crm command error - node not online: $cmd"); | |
249 | } else { | |
250 | if ($node eq $sd->{node}) { | |
251 | $haenv->log('info', "ignore crm command - service already on target node: $cmd"); | |
252 | } else { | |
253 | $haenv->log('info', "got crm command: $cmd"); | |
254 | $ss->{$sid}->{cmd} = [ $task, $node]; | |
255 | } | |
256 | } | |
257 | } else { | |
258 | $haenv->log('err', "crm command error - no such service: $cmd"); | |
259 | } | |
260 | ||
261 | } else { | |
262 | $haenv->log('err', "unable to parse crm command: $cmd"); | |
263 | } | |
264 | } | |
265 | ||
266 | } | |
267 | ||
268 | sub manage { | |
269 | my ($self) = @_; | |
270 | ||
271 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); | |
272 | ||
273 | $ns->update($haenv->get_node_info()); | |
274 | ||
275 | if (!$ns->node_is_online($haenv->nodename())) { | |
276 | $haenv->log('info', "master seems offline\n"); | |
277 | return; | |
278 | } | |
279 | ||
280 | my $lrm_status = $self->read_lrm_status(); | |
281 | ||
282 | my $sc = $haenv->read_service_config(); | |
283 | ||
284 | $self->{groups} = $haenv->read_group_config(); # update | |
285 | ||
286 | # compute new service status | |
287 | ||
288 | # add new service | |
289 | foreach my $sid (keys %$sc) { | |
290 | next if $ss->{$sid}; # already there | |
291 | $haenv->log('info', "Adding new service '$sid'\n"); | |
292 | # assume we are running to avoid relocate running service at add | |
293 | $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node}, | |
294 | uid => compute_new_uuid('started') }; | |
295 | } | |
296 | ||
297 | $self->update_crm_commands(); | |
298 | ||
299 | for (;;) { | |
300 | my $repeat = 0; | |
301 | ||
302 | $self->recompute_online_node_usage(); | |
303 | ||
304 | foreach my $sid (keys %$ss) { | |
305 | my $sd = $ss->{$sid}; | |
306 | my $cd = $sc->{$sid} || { state => 'disabled' }; | |
307 | ||
308 | my $lrm_res = $sd->{uid} ? $lrm_status->{$sd->{uid}} : undef; | |
309 | ||
310 | my $last_state = $sd->{state}; | |
311 | ||
312 | if ($last_state eq 'stopped') { | |
313 | ||
314 | $self->next_state_stopped($sid, $cd, $sd, $lrm_res); | |
315 | ||
316 | } elsif ($last_state eq 'started') { | |
317 | ||
318 | $self->next_state_started($sid, $cd, $sd, $lrm_res); | |
319 | ||
320 | } elsif ($last_state eq 'migrate' || $last_state eq 'relocate') { | |
321 | ||
322 | $self->next_state_migrate_relocate($sid, $cd, $sd, $lrm_res); | |
323 | ||
324 | } elsif ($last_state eq 'fence') { | |
325 | ||
326 | # do nothing here - wait until fenced | |
327 | ||
328 | } elsif ($last_state eq 'request_stop') { | |
329 | ||
330 | $self->next_state_request_stop($sid, $cd, $sd, $lrm_res); | |
331 | ||
332 | } elsif ($last_state eq 'error') { | |
333 | ||
334 | # fixme: | |
335 | ||
336 | } else { | |
337 | ||
338 | die "unknown service state '$last_state'"; | |
339 | } | |
340 | ||
341 | $repeat = 1 if $sd->{state} ne $last_state; | |
342 | } | |
343 | ||
344 | # handle fencing | |
345 | my $fenced_nodes = {}; | |
346 | foreach my $sid (keys %$ss) { | |
347 | my $sd = $ss->{$sid}; | |
348 | next if $sd->{state} ne 'fence'; | |
349 | ||
350 | if (!defined($fenced_nodes->{$sd->{node}})) { | |
351 | $fenced_nodes->{$sd->{node}} = $ns->fence_node($sd->{node}) || 0; | |
352 | } | |
353 | ||
354 | next if !$fenced_nodes->{$sd->{node}}; | |
355 | ||
356 | # node fence was sucessful - mark service as stopped | |
357 | &$change_service_state($self, $sid, 'stopped'); | |
358 | } | |
359 | ||
360 | last if !$repeat; | |
361 | } | |
362 | ||
363 | # remove stale services | |
364 | # fixme: | |
365 | ||
366 | $self->flush_master_status(); | |
367 | } | |
368 | ||
369 | # functions to compute next service states | |
370 | # $cd: service configuration data (read only) | |
371 | # $sd: service status data (read only) | |
372 | # | |
373 | # Note: use change_service_state() to alter state | |
374 | # | |
375 | ||
376 | sub next_state_request_stop { | |
377 | my ($self, $sid, $cd, $sd, $lrm_res) = @_; | |
378 | ||
379 | my $haenv = $self->{haenv}; | |
380 | my $ns = $self->{ns}; | |
381 | ||
382 | # check result from LRM daemon | |
383 | if ($lrm_res) { | |
384 | my $exit_code = $lrm_res->{exit_code}; | |
385 | if ($exit_code == 0) { | |
386 | &$change_service_state($self, $sid, 'stopped'); | |
387 | return; | |
388 | } else { | |
389 | &$change_service_state($self, $sid, 'error'); # fixme: what state? | |
390 | return; | |
391 | } | |
392 | } | |
393 | ||
394 | if ($ns->node_is_offline_delayed($sd->{node}, $fence_delay)) { | |
395 | &$change_service_state($self, $sid, 'fence'); | |
396 | return; | |
397 | } | |
398 | } | |
399 | ||
400 | sub next_state_migrate_relocate { | |
401 | my ($self, $sid, $cd, $sd, $lrm_res) = @_; | |
402 | ||
403 | my $haenv = $self->{haenv}; | |
404 | my $ns = $self->{ns}; | |
405 | ||
406 | # check result from LRM daemon | |
407 | if ($lrm_res) { | |
408 | my $exit_code = $lrm_res->{exit_code}; | |
409 | if ($exit_code == 0) { | |
410 | &$change_service_state($self, $sid, 'started', node => $sd->{target}); | |
411 | return; | |
412 | } else { | |
413 | $haenv->log('err', "service '$sid' - migration failed (exit code $exit_code)"); | |
414 | &$change_service_state($self, $sid, 'started', node => $sd->{node}); | |
415 | return; | |
416 | } | |
417 | } | |
418 | ||
419 | if ($ns->node_is_offline_delayed($sd->{node}, $fence_delay)) { | |
420 | &$change_service_state($self, $sid, 'fence'); | |
421 | return; | |
422 | } | |
423 | } | |
424 | ||
425 | ||
426 | sub next_state_stopped { | |
427 | my ($self, $sid, $cd, $sd, $lrm_res) = @_; | |
428 | ||
429 | my $haenv = $self->{haenv}; | |
430 | my $ns = $self->{ns}; | |
431 | ||
432 | if ($sd->{node} ne $cd->{node}) { | |
433 | # this can happen if we fence a node with active migrations | |
434 | # hack: modify $sd (normally this should be considered read-only) | |
435 | $haenv->log('info', "fixup service '$sid' location ($sd->{node} => $cd->{node}"); | |
436 | $sd->{node} = $cd->{node}; | |
437 | } | |
438 | ||
439 | if ($sd->{cmd}) { | |
440 | my ($cmd, $target) = @{$sd->{cmd}}; | |
441 | delete $sd->{cmd}; | |
442 | ||
443 | if ($cmd eq 'migrate' || $cmd eq 'relocate') { | |
444 | if (!$ns->node_is_online($target)) { | |
445 | $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online"); | |
446 | } elsif ($sd->{node} eq $target) { | |
447 | $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'"); | |
448 | } else { | |
449 | $haenv->change_service_location($sid, $target); | |
450 | $cd->{node} = $sd->{node} = $target; # fixme: $sd is read-only??!! | |
451 | $haenv->log('info', "$cmd service '$sid' to node '$target' (stopped)"); | |
452 | } | |
453 | } else { | |
454 | $haenv->log('err', "unknown command '$cmd' for service '$sid'"); | |
455 | } | |
456 | } | |
457 | ||
458 | if ($cd->{state} eq 'disabled') { | |
459 | # do nothing | |
460 | return; | |
461 | } | |
462 | ||
463 | if ($cd->{state} eq 'enabled') { | |
464 | if (my $node = select_service_node($self->{groups}, $self->{online_node_usage}, $cd, $sd->{node})) { | |
465 | if ($node && ($sd->{node} ne $node)) { | |
466 | $haenv->change_service_location($sid, $node); | |
467 | } | |
468 | &$change_service_state($self, $sid, 'started', node => $node); | |
469 | } else { | |
470 | # fixme: warn | |
471 | } | |
472 | ||
473 | return; | |
474 | } | |
475 | ||
476 | $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration"); | |
477 | } | |
478 | ||
479 | sub next_state_started { | |
480 | my ($self, $sid, $cd, $sd, $lrm_res) = @_; | |
481 | ||
482 | my $haenv = $self->{haenv}; | |
483 | my $ns = $self->{ns}; | |
484 | ||
485 | if (!$ns->node_is_online($sd->{node})) { | |
486 | if ($ns->node_is_offline_delayed($sd->{node}, $fence_delay)) { | |
487 | &$change_service_state($self, $sid, 'fence'); | |
488 | } | |
489 | return; | |
490 | } | |
491 | ||
492 | if ($cd->{state} eq 'disabled') { | |
493 | &$change_service_state($self, $sid, 'request_stop'); | |
494 | return; | |
495 | } | |
496 | ||
497 | if ($cd->{state} eq 'enabled') { | |
498 | ||
499 | if ($sd->{cmd}) { | |
500 | my ($cmd, $target) = @{$sd->{cmd}}; | |
501 | delete $sd->{cmd}; | |
502 | ||
503 | if ($cmd eq 'migrate' || $cmd eq 'relocate') { | |
504 | if (!$ns->node_is_online($target)) { | |
505 | $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online"); | |
506 | } elsif ($sd->{node} eq $target) { | |
507 | $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'"); | |
508 | } else { | |
509 | $haenv->log('info', "$cmd service '$sid' to node '$target' (running)"); | |
510 | &$change_service_state($self, $sid, $cmd, node => $sd->{node}, target => $target); | |
511 | } | |
512 | } else { | |
513 | $haenv->log('err', "unknown command '$cmd' for service '$sid'"); | |
514 | } | |
515 | } else { | |
516 | ||
517 | my $try_next = 0; | |
518 | if ($lrm_res && ($lrm_res->{exit_code} != 0)) { # fixme: other exit codes? | |
519 | $try_next = 1; | |
520 | } | |
521 | ||
522 | my $node = select_service_node($self->{groups}, $self->{online_node_usage}, | |
523 | $cd, $sd->{node}, $try_next); | |
524 | ||
525 | if ($node && ($sd->{node} ne $node)) { | |
526 | $haenv->log('info', "migrate service '$sid' to node '$node' (running)"); | |
527 | &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node); | |
528 | } else { | |
529 | # do nothing | |
530 | } | |
531 | } | |
532 | ||
533 | return; | |
534 | } | |
535 | ||
536 | $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration"); | |
537 | } | |
538 | ||
539 | 1; |