]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/NodeStatus.pm
bump version to 1.0-29
[pve-ha-manager.git] / src / PVE / HA / NodeStatus.pm
CommitLineData
cbca2c55
DM
1package PVE::HA::NodeStatus;
2
3use strict;
4use warnings;
5
6use Data::Dumper;
7
8sub new {
c79442f2 9 my ($this, $haenv, $status) = @_;
cbca2c55
DM
10
11 my $class = ref($this) || $this;
12
13 my $self = bless {
c79442f2 14 haenv => $haenv,
c0bbd038 15 status => $status,
5385a606 16 last_online => {},
cbca2c55
DM
17 }, $class;
18
19 return $self;
20}
21
22# possible node state:
b9e715a1 23my $valid_node_states = {
c0bbd038
DM
24 online => "node online and member of quorate partition",
25 unknown => "not member of quorate partition, but possibly still running",
f7ccd1b3 26 fence => "node needs to be fenced",
7dd15f22 27 gone => "node vanished from cluster members list, possibly deleted"
b9e715a1 28};
cbca2c55
DM
29
30sub get_node_state {
31 my ($self, $node) = @_;
32
f7ccd1b3 33 $self->{status}->{$node} = 'unknown'
b9e715a1 34 if !$self->{status}->{$node};
cbca2c55 35
b9e715a1 36 return $self->{status}->{$node};
cbca2c55
DM
37}
38
f7ccd1b3
DM
39sub node_is_online {
40 my ($self, $node) = @_;
41
42 return $self->get_node_state($node) eq 'online';
43}
44
5385a606
DM
45sub node_is_offline_delayed {
46 my ($self, $node, $delay) = @_;
47
d8b6f99b
DM
48 my $haenv = $self->{haenv};
49
5385a606
DM
50 return undef if $self->get_node_state($node) eq 'online';
51
52 my $last_online = $self->{last_online}->{$node};
53
d8b6f99b
DM
54 my $ctime = $haenv->get_time();
55
5385a606
DM
56 if (!defined($last_online)) {
57 $self->{last_online}->{$node} = $ctime;
58 return undef;
59 }
60
d8b6f99b 61 return ($ctime - $last_online) >= $delay;
5385a606
DM
62}
63
9c7d068b
DM
64sub list_nodes {
65 my ($self) = @_;
66
67 return [sort keys %{$self->{status}}];
68}
69
f7ccd1b3
DM
70sub list_online_nodes {
71 my ($self) = @_;
72
73 my $res = [];
74
c79442f2 75 foreach my $node (sort keys %{$self->{status}}) {
f7ccd1b3
DM
76 next if $self->{status}->{$node} ne 'online';
77 push @$res, $node;
78 }
79
80 return $res;
81}
82
7dd15f22
TL
83my $delete_node = sub {
84 my ($self, $node) = @_;
85
86 return undef if $self->get_node_state($node) ne 'gone';
87
88 my $haenv = $self->{haenv};
89
90 delete $self->{last_online}->{$node};
91 delete $self->{status}->{$node};
92
93 $haenv->log('notice', "deleting gone node '$node', not a cluster member".
94 " anymore.");
95};
96
cbca2c55
DM
97my $set_node_state = sub {
98 my ($self, $node, $state) = @_;
99
c79442f2
DM
100 my $haenv = $self->{haenv};
101
b9e715a1
DM
102 die "unknown node state '$state'\n"
103 if !defined($valid_node_states->{$state});
cbca2c55
DM
104
105 my $last_state = $self->get_node_state($node);
106
107 return if $state eq $last_state;
108
109 $self->{status}->{$node} = $state;
110
c79442f2
DM
111 $haenv->log('info', "node '$node': state changed from " .
112 "'$last_state' => '$state'\n");
cbca2c55
DM
113};
114
115sub update {
116 my ($self, $node_info) = @_;
117
d8b6f99b
DM
118 my $haenv = $self->{haenv};
119
120 foreach my $node (sort keys %$node_info) {
cbca2c55
DM
121 my $d = $node_info->{$node};
122 next if !$d->{online};
123
5385a606 124 # record last time the node was online (required to implement fence delay)
d8b6f99b 125 $self->{last_online}->{$node} = $haenv->get_time();
5385a606 126
cbca2c55
DM
127 my $state = $self->get_node_state($node);
128
f7ccd1b3 129 if ($state eq 'online') {
c0bbd038 130 # &$set_node_state($self, $node, 'online');
7dd15f22 131 } elsif ($state eq 'unknown' || $state eq 'gone') {
c0bbd038 132 &$set_node_state($self, $node, 'online');
f7ccd1b3 133 } elsif ($state eq 'fence') {
c0bbd038 134 # do nothing, wait until fenced
c0bbd038
DM
135 } else {
136 die "detected unknown node state '$state";
cbca2c55
DM
137 }
138 }
139
140 foreach my $node (keys %{$self->{status}}) {
141 my $d = $node_info->{$node};
142 next if $d && $d->{online};
143
144 my $state = $self->get_node_state($node);
145
c0bbd038
DM
146 # node is not inside quorate partition, possibly not active
147
f7ccd1b3 148 if ($state eq 'online') {
c0bbd038
DM
149 &$set_node_state($self, $node, 'unknown');
150 } elsif ($state eq 'unknown') {
7dd15f22
TL
151
152 # node isn't in the member list anymore, deleted from the cluster?
153 &$set_node_state($self, $node, 'gone') if(!defined($d));
154
f7ccd1b3 155 } elsif ($state eq 'fence') {
c0bbd038 156 # do nothing, wait until fenced
7dd15f22
TL
157 } elsif($state eq 'gone') {
158 if($self->node_is_offline_delayed($node, 3600)) {
159 &$delete_node($self, $node);
160 }
c0bbd038
DM
161 } else {
162 die "detected unknown node state '$state";
163 }
164
cbca2c55
DM
165 }
166}
167
c79442f2 168# start fencing
f7ccd1b3
DM
169sub fence_node {
170 my ($self, $node) = @_;
171
c79442f2
DM
172 my $haenv = $self->{haenv};
173
f7ccd1b3
DM
174 my $state = $self->get_node_state($node);
175
c79442f2
DM
176 if ($state ne 'fence') {
177 &$set_node_state($self, $node, 'fence');
f7ccd1b3
DM
178 }
179
f5c29173 180 my $success = $haenv->get_ha_agent_lock($node);
ffa555c5
DM
181
182 if ($success) {
3c36cbca 183 $haenv->log("info", "fencing: acknowleged - got agent lock for node '$node'");
21e37ed4 184 &$set_node_state($self, $node, 'unknown');
ffa555c5
DM
185 }
186
187 return $success;
f7ccd1b3
DM
188}
189
cbca2c55 1901;