]>
Commit | Line | Data |
---|---|---|
c0bbd038 DM |
1 | package PVE::HA::Manager; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
c4a221bc | 5 | use Digest::MD5 qw(md5_base64); |
c0bbd038 DM |
6 | |
7 | use Data::Dumper; | |
8 | ||
9 | use PVE::HA::NodeStatus; | |
10 | ||
11 | sub new { | |
8f0bb968 | 12 | my ($this, $haenv) = @_; |
c0bbd038 DM |
13 | |
14 | my $class = ref($this) || $this; | |
15 | ||
8f0bb968 DM |
16 | my $ms = $haenv->read_manager_status(); |
17 | ||
18 | $ms->{master_node} = $haenv->nodename(); | |
19 | ||
20 | my $ns = PVE::HA::NodeStatus->new($haenv, $ms->{node_status} || {}); | |
21 | ||
59fd7207 DM |
22 | # fixme: use separate class PVE::HA::ServiceStatus |
23 | my $ss = $ms->{service_status} || {}; | |
24 | ||
c0bbd038 | 25 | my $self = bless { |
8f0bb968 DM |
26 | haenv => $haenv, |
27 | ms => $ms, # master status | |
28 | ns => $ns, # PVE::HA::NodeStatus | |
59fd7207 | 29 | ss => $ss, # service status |
c0bbd038 DM |
30 | }, $class; |
31 | ||
32 | return $self; | |
33 | } | |
34 | ||
d84da043 DM |
35 | sub cleanup { |
36 | my ($self) = @_; | |
37 | ||
38 | # todo: ? | |
39 | } | |
40 | ||
8f0bb968 | 41 | sub flush_master_status { |
c0bbd038 DM |
42 | my ($self) = @_; |
43 | ||
59fd7207 | 44 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 45 | |
8f0bb968 | 46 | $ms->{node_status} = $ns->{status}; |
59fd7207 DM |
47 | $ms->{service_status} = $ss; |
48 | ||
8f0bb968 DM |
49 | $haenv->write_manager_status($ms); |
50 | } | |
c0bbd038 | 51 | |
819c61f0 | 52 | # Attention: must be idempotent (alway return the same result for same input!) |
f7ccd1b3 DM |
53 | sub select_service_node { |
54 | my ($self, $service_conf) = @_; | |
55 | ||
56 | my $ns = $self->{ns}; | |
57 | ||
58 | my $pref_node = $service_conf->{node}; | |
59 | ||
60 | return $pref_node if $ns->node_is_online($pref_node); | |
61 | ||
62 | my $online_nodes = $ns->list_online_nodes(); | |
63 | ||
64 | return shift @$online_nodes; | |
65 | } | |
66 | ||
c4a221bc DM |
67 | my $uid_counter = 0; |
68 | ||
618fbeda DM |
69 | my $valid_service_states = { |
70 | stopped => 1, | |
71 | request_stop => 1, | |
72 | started => 1, | |
73 | fence => 1, | |
8456bde2 | 74 | move => 1, |
618fbeda DM |
75 | migrate => 1, |
76 | error => 1, | |
77 | }; | |
78 | ||
4e01bc86 DM |
79 | my $change_service_state = sub { |
80 | my ($self, $sid, $new_state, %params) = @_; | |
81 | ||
82 | my ($haenv, $ss) = ($self->{haenv}, $self->{ss}); | |
83 | ||
84 | my $sd = $ss->{$sid} || die "no such service '$sid"; | |
85 | ||
86 | my $old_state = $sd->{state}; | |
87 | ||
88 | die "no state change" if $old_state eq $new_state; # just to be sure | |
89 | ||
618fbeda DM |
90 | die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state}; |
91 | ||
4e01bc86 DM |
92 | my $changes = ''; |
93 | foreach my $k (keys %params) { | |
94 | my $v = $params{$k}; | |
95 | next if defined($sd->{$k}) && $sd->{$k} eq $v; | |
96 | $changes .= ", " if $changes; | |
97 | $changes .= "$k = $v"; | |
98 | $sd->{$k} = $v; | |
99 | } | |
100 | ||
101 | $sd->{state} = $new_state; | |
c4a221bc DM |
102 | $uid_counter++; |
103 | $sd->{uid} = md5_base64($new_state . $$ . time() . $uid_counter); | |
4e01bc86 DM |
104 | |
105 | # fixme: cleanup state (remove unused values) | |
106 | ||
107 | $changes = " ($changes)" if $changes; | |
108 | $haenv->log('info', "service '$sid': state changed to '$new_state' $changes\n"); | |
109 | }; | |
110 | ||
332170bd | 111 | # read LRM status for all active nodes |
c4a221bc | 112 | sub read_lrm_status { |
332170bd | 113 | my ($self) = @_; |
c4a221bc | 114 | |
332170bd | 115 | my $nodes = $self->{ns}->list_online_nodes(); |
c4a221bc DM |
116 | my $haenv = $self->{haenv}; |
117 | ||
118 | my $res = {}; | |
119 | ||
332170bd | 120 | foreach my $node (@$nodes) { |
c4a221bc DM |
121 | my $ls = $haenv->read_lrm_status($node); |
122 | foreach my $uid (keys %$ls) { | |
123 | next if $res->{$uid}; # should not happen | |
124 | $res->{$uid} = $ls->{$uid}; | |
125 | } | |
126 | } | |
127 | ||
128 | return $res; | |
129 | } | |
130 | ||
8f0bb968 DM |
131 | sub manage { |
132 | my ($self) = @_; | |
c0bbd038 | 133 | |
59fd7207 | 134 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 135 | |
332170bd | 136 | $ns->update($haenv->get_node_info()); |
c79442f2 DM |
137 | |
138 | if (!$ns->node_is_online($haenv->nodename())) { | |
139 | $haenv->log('info', "master seems offline\n"); | |
140 | return; | |
141 | } | |
142 | ||
332170bd | 143 | my $lrm_status = $self->read_lrm_status(); |
c4a221bc | 144 | |
f7ccd1b3 DM |
145 | my $sc = $haenv->read_service_config(); |
146 | ||
147 | # compute new service status | |
148 | ||
149 | # add new service | |
150 | foreach my $sid (keys %$sc) { | |
151 | next if $ss->{$sid}; # already there | |
152 | $haenv->log('info', "Adding new service '$sid'\n"); | |
153 | # assume we are running to avoid relocate running service at add | |
8456bde2 | 154 | $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node}}; |
f7ccd1b3 DM |
155 | } |
156 | ||
c79442f2 DM |
157 | for (;;) { |
158 | my $repeat = 0; | |
f7ccd1b3 | 159 | |
c79442f2 DM |
160 | foreach my $sid (keys %$ss) { |
161 | my $sd = $ss->{$sid}; | |
162 | my $cd = $sc->{$sid} || { state => 'disabled' }; | |
f7ccd1b3 | 163 | |
a875fbe8 DM |
164 | my $lrm_res = $sd->{uid} ? $lrm_status->{$sd->{uid}} : undef; |
165 | ||
c79442f2 DM |
166 | my $last_state = $sd->{state}; |
167 | ||
168 | if ($last_state eq 'stopped') { | |
169 | ||
a875fbe8 | 170 | $self->next_state_stopped($sid, $cd, $sd); |
f7ccd1b3 | 171 | |
c79442f2 | 172 | } elsif ($last_state eq 'started') { |
f7ccd1b3 | 173 | |
a875fbe8 | 174 | $self->next_state_started($sid, $cd, $sd); |
f7ccd1b3 | 175 | |
c79442f2 | 176 | } elsif ($last_state eq 'migrate') { |
f7ccd1b3 | 177 | |
c79442f2 | 178 | die "implement me"; |
f7ccd1b3 | 179 | |
8456bde2 DM |
180 | } elsif ($last_state eq 'move') { |
181 | ||
182 | #die "implement me"; | |
183 | ||
c79442f2 | 184 | } elsif ($last_state eq 'fence') { |
f7ccd1b3 | 185 | |
21e37ed4 | 186 | # do nothing here - wait until fenced |
f7ccd1b3 | 187 | |
c79442f2 | 188 | } elsif ($last_state eq 'request_stop') { |
f7ccd1b3 | 189 | |
a875fbe8 DM |
190 | # check result from LRM daemon |
191 | if ($lrm_res) { | |
192 | my $exit_code = $lrm_res->{exit_code}; | |
618fbeda DM |
193 | if ($exit_code == 0) { |
194 | &$change_service_state($self, $sid, 'stopped'); | |
195 | } else { | |
196 | &$change_service_state($self, $sid, 'error'); # fixme: what state? | |
197 | } | |
618fbeda DM |
198 | } |
199 | ||
a875fbe8 DM |
200 | } else { |
201 | ||
202 | die "unknown service state '$last_state'"; | |
618fbeda | 203 | } |
21e37ed4 | 204 | |
c79442f2 | 205 | $repeat = 1 if $sd->{state} ne $last_state; |
f7ccd1b3 DM |
206 | } |
207 | ||
21e37ed4 DM |
208 | # handle fencing |
209 | my $fenced_nodes = {}; | |
210 | foreach my $sid (keys %$ss) { | |
211 | my $sd = $ss->{$sid}; | |
212 | next if $sd->{state} ne 'fence'; | |
213 | ||
214 | if (!defined($fenced_nodes->{$sd->{node}})) { | |
215 | $fenced_nodes->{$sd->{node}} = $ns->fence_node($sd->{node}) || 0; | |
216 | } | |
217 | ||
218 | next if !$fenced_nodes->{$sd->{node}}; | |
219 | ||
220 | # node fence was sucessful - mark service as stopped | |
221 | &$change_service_state($self, $sid, 'stopped'); | |
222 | } | |
223 | ||
c79442f2 | 224 | last if !$repeat; |
f7ccd1b3 | 225 | } |
f7ccd1b3 DM |
226 | |
227 | # remove stale services | |
228 | # fixme: | |
229 | ||
8f0bb968 | 230 | $self->flush_master_status(); |
c0bbd038 DM |
231 | } |
232 | ||
a875fbe8 DM |
233 | # functions to compute next service states |
234 | # $cd: service configuration data (read only) | |
235 | # $sd: service status data (read only) | |
236 | # | |
237 | # Note: use change_service_state() to alter state | |
238 | # | |
239 | ||
240 | sub next_state_stopped { | |
241 | my ($self, $sid, $cd, $sd) = @_; | |
242 | ||
243 | my $haenv = $self->{haenv}; | |
244 | ||
245 | if ($cd->{state} eq 'disabled') { | |
246 | # do nothing | |
247 | } elsif ($cd->{state} eq 'enabled') { | |
248 | if (my $node = $self->select_service_node($cd)) { | |
249 | if ($node && ($sd->{node} ne $node)) { | |
250 | $haenv->change_service_location($sid, $node); | |
251 | } | |
252 | &$change_service_state($self, $sid, 'started', node => $node); | |
253 | } else { | |
254 | # fixme: warn | |
255 | } | |
256 | } else { | |
257 | # do nothing - todo: log something? | |
258 | } | |
259 | } | |
260 | ||
261 | sub next_state_started { | |
262 | my ($self, $sid, $cd, $sd) = @_; | |
263 | ||
264 | my $haenv = $self->{haenv}; | |
265 | my $ns = $self->{ns}; | |
266 | ||
267 | if (!$ns->node_is_online($sd->{node})) { | |
268 | ||
269 | &$change_service_state($self, $sid, 'fence'); | |
270 | ||
271 | } else { | |
272 | ||
273 | if ($cd->{state} eq 'disabled') { | |
274 | &$change_service_state($self, $sid, 'request_stop'); | |
275 | } elsif ($cd->{state} eq 'enabled') { | |
276 | my $node = $self->select_service_node($cd); | |
277 | if ($node && ($sd->{node} ne $node)) { | |
278 | &$change_service_state($self, $sid, 'migrate'); | |
279 | } else { | |
280 | # do nothing | |
281 | } | |
282 | } else { | |
283 | # do nothing - todo: log something? | |
284 | } | |
285 | } | |
286 | } | |
c0bbd038 DM |
287 | |
288 | 1; |