]>
git.proxmox.com Git - pve-ha-manager.git/blob - PVE/HA/Manager.pm
1 package PVE
::HA
::Manager
;
5 use Digest
::MD5
qw(md5_base64);
9 use PVE
::HA
::NodeStatus
;
12 my ($this, $haenv) = @_;
14 my $class = ref($this) || $this;
16 my $ms = $haenv->read_manager_status();
18 $ms->{master_node
} = $haenv->nodename();
20 my $ns = PVE
::HA
::NodeStatus-
>new($haenv, $ms->{node_status
} || {});
22 # fixme: use separate class PVE::HA::ServiceStatus
23 my $ss = $ms->{service_status
} || {};
27 ms
=> $ms, # master status
28 ns
=> $ns, # PVE::HA::NodeStatus
29 ss
=> $ss, # service status
41 sub flush_master_status
{
44 my ($haenv, $ms, $ns, $ss) = ($self->{haenv
}, $self->{ms
}, $self->{ns
}, $self->{ss
});
46 $ms->{node_status
} = $ns->{status
};
47 $ms->{service_status
} = $ss;
49 $haenv->write_manager_status($ms);
52 # Attention: must be idempotent (alway return the same result for same input!)
53 sub select_service_node
{
54 my ($self, $service_conf) = @_;
58 my $pref_node = $service_conf->{node
};
60 return $pref_node if $ns->node_is_online($pref_node);
62 my $online_nodes = $ns->list_online_nodes();
64 return shift @$online_nodes;
69 my $valid_service_states = {
79 my $change_service_state = sub {
80 my ($self, $sid, $new_state, %params) = @_;
82 my ($haenv, $ss) = ($self->{haenv
}, $self->{ss
});
84 my $sd = $ss->{$sid} || die "no such service '$sid";
86 my $old_state = $sd->{state};
87 my $old_node = $sd->{node
};
89 die "no state change" if $old_state eq $new_state; # just to be sure
91 die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state};
93 foreach my $k (keys %$sd) { delete $sd->{$k}; };
95 $sd->{state} = $new_state;
96 $sd->{node
} = $old_node;
99 foreach my $k (keys %params) {
101 $text_state .= ", " if $text_state;
102 $text_state .= "$k = $v";
107 $sd->{uid
} = md5_base64
($new_state . $$ . time() . $uid_counter);
109 $text_state = " ($text_state)" if $text_state;
110 $haenv->log('info', "service '$sid': state changed from '${old_state}' to '${new_state}' $text_state\n");
113 # read LRM status for all active nodes
114 sub read_lrm_status
{
117 my $nodes = $self->{ns
}->list_online_nodes();
118 my $haenv = $self->{haenv
};
122 foreach my $node (@$nodes) {
123 my $ls = $haenv->read_lrm_status($node);
124 foreach my $uid (keys %$ls) {
125 next if $res->{$uid}; # should not happen
126 $res->{$uid} = $ls->{$uid};
133 # read new crm commands and save them into crm master status
134 sub update_crm_commands
{
137 my ($haenv, $ms, $ns, $ss) = ($self->{haenv
}, $self->{ms
}, $self->{ns
}, $self->{ss
});
139 my $cmdlist = $haenv->read_crm_commands();
141 foreach my $cmd (split(/\n/, $cmdlist)) {
144 if ($cmd =~ m/^(migrate|relocate)\s+(\S+)\s+(\S+)$/) {
145 my ($task, $sid, $node) = ($1, $2, $3);
146 if (my $sd = $ss->{$sid}) {
147 if (!$ns->node_is_online($node)) {
148 $haenv->log('err', "crm command error - node not online: $cmd");
150 if ($node eq $sd->{node
}) {
151 $haenv->log('info', "ignore crm command - service already on target node: $cmd");
153 $haenv->log('info', "got crm command: $cmd");
154 $ss->{$sid}->{cmd
} = [ $task, $node];
158 $haenv->log('err', "crm command error - no such service: $cmd");
162 $haenv->log('err', "unable to parse crm command: $cmd");
171 my ($haenv, $ms, $ns, $ss) = ($self->{haenv
}, $self->{ms
}, $self->{ns
}, $self->{ss
});
173 $ns->update($haenv->get_node_info());
175 if (!$ns->node_is_online($haenv->nodename())) {
176 $haenv->log('info', "master seems offline\n");
180 my $lrm_status = $self->read_lrm_status();
182 my $sc = $haenv->read_service_config();
184 # compute new service status
187 foreach my $sid (keys %$sc) {
188 next if $ss->{$sid}; # already there
189 $haenv->log('info', "Adding new service '$sid'\n");
190 # assume we are running to avoid relocate running service at add
191 $ss->{$sid} = { state => 'started', node
=> $sc->{$sid}->{node
}};
194 $self->update_crm_commands();
199 foreach my $sid (keys %$ss) {
200 my $sd = $ss->{$sid};
201 my $cd = $sc->{$sid} || { state => 'disabled' };
203 my $lrm_res = $sd->{uid
} ?
$lrm_status->{$sd->{uid
}} : undef;
205 my $last_state = $sd->{state};
207 if ($last_state eq 'stopped') {
209 $self->next_state_stopped($sid, $cd, $sd);
211 } elsif ($last_state eq 'started') {
213 $self->next_state_started($sid, $cd, $sd);
215 } elsif ($last_state eq 'migrate' || $last_state eq 'relocate') {
217 # check result from LRM daemon
219 my $exit_code = $lrm_res->{exit_code
};
220 if ($exit_code == 0) {
221 &$change_service_state($self, $sid, 'started', node
=> $sd->{target
});
223 $haenv->log('err', "service '$sid' - migration failed (exit code $exit_code)");
224 &$change_service_state($self, $sid, 'started', node
=> $sd->{node
});
228 } elsif ($last_state eq 'fence') {
230 # do nothing here - wait until fenced
232 } elsif ($last_state eq 'request_stop') {
234 # check result from LRM daemon
236 my $exit_code = $lrm_res->{exit_code
};
237 if ($exit_code == 0) {
238 &$change_service_state($self, $sid, 'stopped');
240 &$change_service_state($self, $sid, 'error'); # fixme: what state?
244 } elsif ($last_state eq 'error') {
250 die "unknown service state '$last_state'";
253 $repeat = 1 if $sd->{state} ne $last_state;
257 my $fenced_nodes = {};
258 foreach my $sid (keys %$ss) {
259 my $sd = $ss->{$sid};
260 next if $sd->{state} ne 'fence';
262 if (!defined($fenced_nodes->{$sd->{node
}})) {
263 $fenced_nodes->{$sd->{node
}} = $ns->fence_node($sd->{node
}) || 0;
266 next if !$fenced_nodes->{$sd->{node
}};
268 # node fence was sucessful - mark service as stopped
269 &$change_service_state($self, $sid, 'stopped');
275 # remove stale services
278 $self->flush_master_status();
281 # functions to compute next service states
282 # $cd: service configuration data (read only)
283 # $sd: service status data (read only)
285 # Note: use change_service_state() to alter state
288 sub next_state_stopped
{
289 my ($self, $sid, $cd, $sd) = @_;
291 my $haenv = $self->{haenv
};
292 my $ns = $self->{ns
};
294 if ($sd->{node
} ne $cd->{node
}) {
295 # this can happen if we fence a node with active migrations
296 # hack: modify $sd (normally this should be considered read-only)
297 $haenv->log('info', "fixup service '$sid' location ($sd->{node} => $cd->{node}");
298 $sd->{node
} = $cd->{node
};
302 my ($cmd, $target) = @{$sd->{cmd
}};
305 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
306 if (!$ns->node_is_online($target)) {
307 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
308 } elsif ($sd->{node
} eq $target) {
309 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
311 $haenv->change_service_location($sid, $target);
312 $cd->{node
} = $sd->{node
} = $target; # fixme: $sd is read-only??!!
313 $haenv->log('info', "$cmd service '$sid' to node '$target' (stopped)");
316 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
320 if ($cd->{state} eq 'disabled') {
325 if ($cd->{state} eq 'enabled') {
326 if (my $node = $self->select_service_node($cd)) {
327 if ($node && ($sd->{node
} ne $node)) {
328 $haenv->change_service_location($sid, $node);
330 &$change_service_state($self, $sid, 'started', node
=> $node);
338 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
341 sub next_state_started
{
342 my ($self, $sid, $cd, $sd) = @_;
344 my $haenv = $self->{haenv
};
345 my $ns = $self->{ns
};
347 if (!$ns->node_is_online($sd->{node
})) {
349 &$change_service_state($self, $sid, 'fence');
353 if ($cd->{state} eq 'disabled') {
354 &$change_service_state($self, $sid, 'request_stop');
358 if ($cd->{state} eq 'enabled') {
361 my ($cmd, $target) = @{$sd->{cmd
}};
364 if ($cmd eq 'migrate' || $cmd eq 'relocate') {
365 if (!$ns->node_is_online($target)) {
366 $haenv->log('err', "ignore service '$sid' $cmd request - node '$target' not online");
367 } elsif ($sd->{node
} eq $target) {
368 $haenv->log('info', "ignore service '$sid' $cmd request - service already on node '$target'");
370 $haenv->log('info', "$cmd service '$sid' to node '$target' (running)");
371 &$change_service_state($self, $sid, $cmd, node
=> $sd->{node
}, target
=> $target);
374 $haenv->log('err', "unknown command '$cmd' for service '$sid'");
378 my $node = $self->select_service_node($cd);
380 if ($node && ($sd->{node
} ne $node)) {
381 $haenv->log('info', "migrate service '$sid' to node '$node' (running)");
382 &$change_service_state($self, $sid, 'migrate', node
=> $sd->{node
}, target
=> $node);
391 $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");