]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/NodeStatus.pm
1 package PVE
::HA
::NodeStatus
;
11 my ($this, $haenv, $status) = @_;
13 my $class = ref($this) || $this;
24 # possible node state:
25 my $valid_node_states = {
26 online
=> "node online and member of quorate partition",
27 unknown
=> "not member of quorate partition, but possibly still running",
28 fence
=> "node needs to be fenced",
29 gone
=> "node vanished from cluster members list, possibly deleted"
33 my ($self, $node) = @_;
35 $self->{status
}->{$node} = 'unknown'
36 if !$self->{status
}->{$node};
38 return $self->{status
}->{$node};
42 my ($self, $node) = @_;
44 return $self->get_node_state($node) eq 'online';
47 sub node_is_offline_delayed
{
48 my ($self, $node, $delay) = @_;
50 $delay = $fence_delay if !defined($delay);
52 my $haenv = $self->{haenv
};
54 return undef if $self->get_node_state($node) eq 'online';
56 my $last_online = $self->{last_online
}->{$node};
58 my $ctime = $haenv->get_time();
60 if (!defined($last_online)) {
61 $self->{last_online
}->{$node} = $ctime;
65 return ($ctime - $last_online) >= $delay;
71 return [sort keys %{$self->{status
}}];
74 sub list_online_nodes
{
79 foreach my $node (sort keys %{$self->{status
}}) {
80 next if $self->{status
}->{$node} ne 'online';
87 my $delete_node = sub {
88 my ($self, $node) = @_;
90 return undef if $self->get_node_state($node) ne 'gone';
92 my $haenv = $self->{haenv
};
94 delete $self->{last_online
}->{$node};
95 delete $self->{status
}->{$node};
97 $haenv->log('notice', "deleting gone node '$node', not a cluster member".
101 my $set_node_state = sub {
102 my ($self, $node, $state) = @_;
104 my $haenv = $self->{haenv
};
106 die "unknown node state '$state'\n"
107 if !defined($valid_node_states->{$state});
109 my $last_state = $self->get_node_state($node);
111 return if $state eq $last_state;
113 $self->{status
}->{$node} = $state;
115 $haenv->log('info', "node '$node': state changed from " .
116 "'$last_state' => '$state'\n");
120 my ($self, $node_info) = @_;
122 my $haenv = $self->{haenv
};
124 foreach my $node (sort keys %$node_info) {
125 my $d = $node_info->{$node};
126 next if !$d->{online
};
128 # record last time the node was online (required to implement fence delay)
129 $self->{last_online
}->{$node} = $haenv->get_time();
131 my $state = $self->get_node_state($node);
133 if ($state eq 'online') {
134 # &$set_node_state($self, $node, 'online');
135 } elsif ($state eq 'unknown' || $state eq 'gone') {
136 &$set_node_state($self, $node, 'online');
137 } elsif ($state eq 'fence') {
138 # do nothing, wait until fenced
140 die "detected unknown node state '$state";
144 foreach my $node (sort keys %{$self->{status
}}) {
145 my $d = $node_info->{$node};
146 next if $d && $d->{online
};
148 my $state = $self->get_node_state($node);
150 # node is not inside quorate partition, possibly not active
152 if ($state eq 'online') {
153 &$set_node_state($self, $node, 'unknown');
154 } elsif ($state eq 'unknown') {
156 # node isn't in the member list anymore, deleted from the cluster?
157 &$set_node_state($self, $node, 'gone') if(!defined($d));
159 } elsif ($state eq 'fence') {
160 # do nothing, wait until fenced
161 } elsif($state eq 'gone') {
162 if ($self->node_is_offline_delayed($node, 3600)) {
163 &$delete_node($self, $node);
166 die "detected unknown node state '$state";
172 # assembles a commont text for fence emails
173 my $send_fence_state_email = sub {
174 my ($self, $subject_prefix, $subject, $node) = @_;
176 my $haenv = $self->{haenv
};
178 my $mail_text = <<EOF
179 The node '$node' failed and needs manual intervention.
181 The PVE HA manager tries to fence it and recover the
182 configured HA resources to a healthy node if possible.
184 Current fence status: $subject_prefix
188 Overall Cluster status:
189 -----------------------
193 my $mail_subject = $subject_prefix . ': ' . $subject;
195 my $status = $haenv->read_manager_status();
196 my $data = { manager_status
=> $status, node_status
=> $self->{status
} };
198 $mail_text .= to_json
($data, { pretty
=> 1, canonical
=> 1});
200 $haenv->sendmail($mail_subject, $mail_text);
206 my ($self, $node) = @_;
208 my $haenv = $self->{haenv
};
210 my $state = $self->get_node_state($node);
212 if ($state ne 'fence') {
213 &$set_node_state($self, $node, 'fence');
214 my $msg = "Try to fence node '$node'";
215 &$send_fence_state_email($self, 'FENCE', $msg, $node);
218 my $success = $haenv->get_ha_agent_lock($node);
221 my $msg = "fencing: acknowledged - got agent lock for node '$node'";
222 $haenv->log("info", $msg);
223 &$set_node_state($self, $node, 'unknown');
224 &$send_fence_state_email($self, 'SUCCEED', $msg, $node);