]> git.proxmox.com Git - pve-ha-manager.git/blame - PVE/HA/Manager.pm
implement a way to send commands to the CRM master
[pve-ha-manager.git] / PVE / HA / Manager.pm
CommitLineData
c0bbd038
DM
1package PVE::HA::Manager;
2
3use strict;
4use warnings;
c4a221bc 5use Digest::MD5 qw(md5_base64);
c0bbd038
DM
6
7use Data::Dumper;
8
9use PVE::HA::NodeStatus;
10
11sub new {
8f0bb968 12 my ($this, $haenv) = @_;
c0bbd038
DM
13
14 my $class = ref($this) || $this;
15
8f0bb968
DM
16 my $ms = $haenv->read_manager_status();
17
18 $ms->{master_node} = $haenv->nodename();
19
20 my $ns = PVE::HA::NodeStatus->new($haenv, $ms->{node_status} || {});
21
59fd7207
DM
22 # fixme: use separate class PVE::HA::ServiceStatus
23 my $ss = $ms->{service_status} || {};
24
c0bbd038 25 my $self = bless {
8f0bb968
DM
26 haenv => $haenv,
27 ms => $ms, # master status
28 ns => $ns, # PVE::HA::NodeStatus
59fd7207 29 ss => $ss, # service status
c0bbd038
DM
30 }, $class;
31
32 return $self;
33}
34
d84da043
DM
35sub cleanup {
36 my ($self) = @_;
37
38 # todo: ?
39}
40
8f0bb968 41sub flush_master_status {
c0bbd038
DM
42 my ($self) = @_;
43
59fd7207 44 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
c0bbd038 45
8f0bb968 46 $ms->{node_status} = $ns->{status};
59fd7207
DM
47 $ms->{service_status} = $ss;
48
8f0bb968
DM
49 $haenv->write_manager_status($ms);
50}
c0bbd038 51
819c61f0 52# Attention: must be idempotent (alway return the same result for same input!)
f7ccd1b3
DM
53sub select_service_node {
54 my ($self, $service_conf) = @_;
55
56 my $ns = $self->{ns};
57
58 my $pref_node = $service_conf->{node};
59
60 return $pref_node if $ns->node_is_online($pref_node);
61
62 my $online_nodes = $ns->list_online_nodes();
63
64 return shift @$online_nodes;
65}
66
c4a221bc
DM
67my $uid_counter = 0;
68
618fbeda
DM
69my $valid_service_states = {
70 stopped => 1,
71 request_stop => 1,
72 started => 1,
73 fence => 1,
8456bde2 74 move => 1,
618fbeda
DM
75 migrate => 1,
76 error => 1,
77};
78
4e01bc86
DM
79my $change_service_state = sub {
80 my ($self, $sid, $new_state, %params) = @_;
81
82 my ($haenv, $ss) = ($self->{haenv}, $self->{ss});
83
84 my $sd = $ss->{$sid} || die "no such service '$sid";
85
86 my $old_state = $sd->{state};
87
88 die "no state change" if $old_state eq $new_state; # just to be sure
89
618fbeda
DM
90 die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state};
91
4e01bc86
DM
92 my $changes = '';
93 foreach my $k (keys %params) {
94 my $v = $params{$k};
95 next if defined($sd->{$k}) && $sd->{$k} eq $v;
96 $changes .= ", " if $changes;
97 $changes .= "$k = $v";
98 $sd->{$k} = $v;
99 }
100
101 $sd->{state} = $new_state;
c4a221bc
DM
102 $uid_counter++;
103 $sd->{uid} = md5_base64($new_state . $$ . time() . $uid_counter);
4e01bc86
DM
104
105 # fixme: cleanup state (remove unused values)
106
107 $changes = " ($changes)" if $changes;
108 $haenv->log('info', "service '$sid': state changed to '$new_state' $changes\n");
109};
110
332170bd 111# read LRM status for all active nodes
c4a221bc 112sub read_lrm_status {
332170bd 113 my ($self) = @_;
c4a221bc 114
332170bd 115 my $nodes = $self->{ns}->list_online_nodes();
c4a221bc
DM
116 my $haenv = $self->{haenv};
117
118 my $res = {};
119
332170bd 120 foreach my $node (@$nodes) {
c4a221bc
DM
121 my $ls = $haenv->read_lrm_status($node);
122 foreach my $uid (keys %$ls) {
123 next if $res->{$uid}; # should not happen
124 $res->{$uid} = $ls->{$uid};
125 }
126 }
127
128 return $res;
129}
130
8f0bb968
DM
131sub manage {
132 my ($self) = @_;
c0bbd038 133
59fd7207 134 my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
c0bbd038 135
332170bd 136 $ns->update($haenv->get_node_info());
c79442f2
DM
137
138 if (!$ns->node_is_online($haenv->nodename())) {
139 $haenv->log('info', "master seems offline\n");
140 return;
141 }
142
332170bd 143 my $lrm_status = $self->read_lrm_status();
c4a221bc 144
f7ccd1b3
DM
145 my $sc = $haenv->read_service_config();
146
147 # compute new service status
148
149 # add new service
150 foreach my $sid (keys %$sc) {
151 next if $ss->{$sid}; # already there
152 $haenv->log('info', "Adding new service '$sid'\n");
153 # assume we are running to avoid relocate running service at add
8456bde2 154 $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node}};
f7ccd1b3
DM
155 }
156
c79442f2
DM
157 for (;;) {
158 my $repeat = 0;
f7ccd1b3 159
c79442f2
DM
160 foreach my $sid (keys %$ss) {
161 my $sd = $ss->{$sid};
162 my $cd = $sc->{$sid} || { state => 'disabled' };
f7ccd1b3 163
a875fbe8
DM
164 my $lrm_res = $sd->{uid} ? $lrm_status->{$sd->{uid}} : undef;
165
c79442f2
DM
166 my $last_state = $sd->{state};
167
168 if ($last_state eq 'stopped') {
169
a875fbe8 170 $self->next_state_stopped($sid, $cd, $sd);
f7ccd1b3 171
c79442f2 172 } elsif ($last_state eq 'started') {
f7ccd1b3 173
a875fbe8 174 $self->next_state_started($sid, $cd, $sd);
f7ccd1b3 175
c79442f2 176 } elsif ($last_state eq 'migrate') {
f7ccd1b3 177
c79442f2 178 die "implement me";
f7ccd1b3 179
8456bde2
DM
180 } elsif ($last_state eq 'move') {
181
182 #die "implement me";
183
c79442f2 184 } elsif ($last_state eq 'fence') {
f7ccd1b3 185
21e37ed4 186 # do nothing here - wait until fenced
f7ccd1b3 187
c79442f2 188 } elsif ($last_state eq 'request_stop') {
f7ccd1b3 189
a875fbe8
DM
190 # check result from LRM daemon
191 if ($lrm_res) {
192 my $exit_code = $lrm_res->{exit_code};
618fbeda
DM
193 if ($exit_code == 0) {
194 &$change_service_state($self, $sid, 'stopped');
195 } else {
196 &$change_service_state($self, $sid, 'error'); # fixme: what state?
197 }
618fbeda
DM
198 }
199
a875fbe8
DM
200 } else {
201
202 die "unknown service state '$last_state'";
618fbeda 203 }
21e37ed4 204
c79442f2 205 $repeat = 1 if $sd->{state} ne $last_state;
f7ccd1b3
DM
206 }
207
21e37ed4
DM
208 # handle fencing
209 my $fenced_nodes = {};
210 foreach my $sid (keys %$ss) {
211 my $sd = $ss->{$sid};
212 next if $sd->{state} ne 'fence';
213
214 if (!defined($fenced_nodes->{$sd->{node}})) {
215 $fenced_nodes->{$sd->{node}} = $ns->fence_node($sd->{node}) || 0;
216 }
217
218 next if !$fenced_nodes->{$sd->{node}};
219
220 # node fence was sucessful - mark service as stopped
221 &$change_service_state($self, $sid, 'stopped');
222 }
223
c79442f2 224 last if !$repeat;
f7ccd1b3 225 }
f7ccd1b3
DM
226
227 # remove stale services
228 # fixme:
229
8f0bb968 230 $self->flush_master_status();
c0bbd038
DM
231}
232
a875fbe8
DM
233# functions to compute next service states
234# $cd: service configuration data (read only)
235# $sd: service status data (read only)
236#
237# Note: use change_service_state() to alter state
238#
239
240sub next_state_stopped {
241 my ($self, $sid, $cd, $sd) = @_;
242
243 my $haenv = $self->{haenv};
244
245 if ($cd->{state} eq 'disabled') {
246 # do nothing
247 } elsif ($cd->{state} eq 'enabled') {
248 if (my $node = $self->select_service_node($cd)) {
249 if ($node && ($sd->{node} ne $node)) {
250 $haenv->change_service_location($sid, $node);
251 }
252 &$change_service_state($self, $sid, 'started', node => $node);
253 } else {
254 # fixme: warn
255 }
256 } else {
257 # do nothing - todo: log something?
258 }
259}
260
261sub next_state_started {
262 my ($self, $sid, $cd, $sd) = @_;
263
264 my $haenv = $self->{haenv};
265 my $ns = $self->{ns};
266
267 if (!$ns->node_is_online($sd->{node})) {
268
269 &$change_service_state($self, $sid, 'fence');
270
271 } else {
272
273 if ($cd->{state} eq 'disabled') {
274 &$change_service_state($self, $sid, 'request_stop');
275 } elsif ($cd->{state} eq 'enabled') {
276 my $node = $self->select_service_node($cd);
277 if ($node && ($sd->{node} ne $node)) {
278 &$change_service_state($self, $sid, 'migrate');
279 } else {
280 # do nothing
281 }
282 } else {
283 # do nothing - todo: log something?
284 }
285 }
286}
c0bbd038
DM
287
2881;