]>
Commit | Line | Data |
---|---|---|
c0bbd038 DM |
1 | package PVE::HA::Manager; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
c4a221bc | 5 | use Digest::MD5 qw(md5_base64); |
c0bbd038 DM |
6 | |
7 | use Data::Dumper; | |
8 | ||
9 | use PVE::HA::NodeStatus; | |
10 | ||
11 | sub new { | |
8f0bb968 | 12 | my ($this, $haenv) = @_; |
c0bbd038 DM |
13 | |
14 | my $class = ref($this) || $this; | |
15 | ||
8f0bb968 DM |
16 | my $ms = $haenv->read_manager_status(); |
17 | ||
18 | $ms->{master_node} = $haenv->nodename(); | |
19 | ||
20 | my $ns = PVE::HA::NodeStatus->new($haenv, $ms->{node_status} || {}); | |
21 | ||
59fd7207 DM |
22 | # fixme: use separate class PVE::HA::ServiceStatus |
23 | my $ss = $ms->{service_status} || {}; | |
24 | ||
c0bbd038 | 25 | my $self = bless { |
8f0bb968 DM |
26 | haenv => $haenv, |
27 | ms => $ms, # master status | |
28 | ns => $ns, # PVE::HA::NodeStatus | |
59fd7207 | 29 | ss => $ss, # service status |
c0bbd038 DM |
30 | }, $class; |
31 | ||
32 | return $self; | |
33 | } | |
34 | ||
d84da043 DM |
35 | sub cleanup { |
36 | my ($self) = @_; | |
37 | ||
38 | # todo: ? | |
39 | } | |
40 | ||
8f0bb968 | 41 | sub flush_master_status { |
c0bbd038 DM |
42 | my ($self) = @_; |
43 | ||
59fd7207 | 44 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 45 | |
8f0bb968 | 46 | $ms->{node_status} = $ns->{status}; |
59fd7207 DM |
47 | $ms->{service_status} = $ss; |
48 | ||
8f0bb968 DM |
49 | $haenv->write_manager_status($ms); |
50 | } | |
c0bbd038 | 51 | |
819c61f0 | 52 | # Attention: must be idempotent (alway return the same result for same input!) |
f7ccd1b3 DM |
53 | sub select_service_node { |
54 | my ($self, $service_conf) = @_; | |
55 | ||
56 | my $ns = $self->{ns}; | |
57 | ||
58 | my $pref_node = $service_conf->{node}; | |
59 | ||
60 | return $pref_node if $ns->node_is_online($pref_node); | |
61 | ||
62 | my $online_nodes = $ns->list_online_nodes(); | |
63 | ||
64 | return shift @$online_nodes; | |
65 | } | |
66 | ||
c4a221bc DM |
67 | my $uid_counter = 0; |
68 | ||
4e01bc86 DM |
69 | my $change_service_state = sub { |
70 | my ($self, $sid, $new_state, %params) = @_; | |
71 | ||
72 | my ($haenv, $ss) = ($self->{haenv}, $self->{ss}); | |
73 | ||
74 | my $sd = $ss->{$sid} || die "no such service '$sid"; | |
75 | ||
76 | my $old_state = $sd->{state}; | |
77 | ||
78 | die "no state change" if $old_state eq $new_state; # just to be sure | |
79 | ||
80 | my $changes = ''; | |
81 | foreach my $k (keys %params) { | |
82 | my $v = $params{$k}; | |
83 | next if defined($sd->{$k}) && $sd->{$k} eq $v; | |
84 | $changes .= ", " if $changes; | |
85 | $changes .= "$k = $v"; | |
86 | $sd->{$k} = $v; | |
87 | } | |
88 | ||
89 | $sd->{state} = $new_state; | |
c4a221bc DM |
90 | $uid_counter++; |
91 | $sd->{uid} = md5_base64($new_state . $$ . time() . $uid_counter); | |
4e01bc86 DM |
92 | |
93 | # fixme: cleanup state (remove unused values) | |
94 | ||
95 | $changes = " ($changes)" if $changes; | |
96 | $haenv->log('info', "service '$sid': state changed to '$new_state' $changes\n"); | |
97 | }; | |
98 | ||
c4a221bc DM |
99 | # read LRM status for all nodes (even for offline nodes) |
100 | sub read_lrm_status { | |
101 | my ($self, $node_info) = @_; | |
102 | ||
103 | my $haenv = $self->{haenv}; | |
104 | ||
105 | my $res = {}; | |
106 | ||
107 | foreach my $node (keys %$node_info) { | |
108 | my $ls = $haenv->read_lrm_status($node); | |
109 | foreach my $uid (keys %$ls) { | |
110 | next if $res->{$uid}; # should not happen | |
111 | $res->{$uid} = $ls->{$uid}; | |
112 | } | |
113 | } | |
114 | ||
115 | return $res; | |
116 | } | |
117 | ||
8f0bb968 DM |
118 | sub manage { |
119 | my ($self) = @_; | |
c0bbd038 | 120 | |
59fd7207 | 121 | my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss}); |
c0bbd038 | 122 | |
c4a221bc DM |
123 | my ($node_info, $quorate) = $haenv->get_node_info(); |
124 | $ns->update($node_info); | |
125 | ||
126 | # fixme: what if $quorate is 0?? | |
c79442f2 DM |
127 | |
128 | if (!$ns->node_is_online($haenv->nodename())) { | |
129 | $haenv->log('info', "master seems offline\n"); | |
130 | return; | |
131 | } | |
132 | ||
c4a221bc DM |
133 | my $lrm_status = $self->read_lrm_status($node_info); |
134 | ||
f7ccd1b3 DM |
135 | my $sc = $haenv->read_service_config(); |
136 | ||
137 | # compute new service status | |
138 | ||
139 | # add new service | |
140 | foreach my $sid (keys %$sc) { | |
141 | next if $ss->{$sid}; # already there | |
142 | $haenv->log('info', "Adding new service '$sid'\n"); | |
143 | # assume we are running to avoid relocate running service at add | |
144 | $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{current_node}}; | |
145 | } | |
146 | ||
c79442f2 DM |
147 | for (;;) { |
148 | my $repeat = 0; | |
f7ccd1b3 | 149 | |
c79442f2 DM |
150 | foreach my $sid (keys %$ss) { |
151 | my $sd = $ss->{$sid}; | |
152 | my $cd = $sc->{$sid} || { state => 'disabled' }; | |
f7ccd1b3 | 153 | |
c79442f2 DM |
154 | my $last_state = $sd->{state}; |
155 | ||
156 | if ($last_state eq 'stopped') { | |
157 | ||
158 | if ($cd->{state} eq 'disabled') { | |
159 | # do nothing | |
160 | } elsif ($cd->{state} eq 'enabled') { | |
161 | if (my $node = $self->select_service_node($cd)) { | |
162 | &$change_service_state($self, $sid, 'started', node => $node); | |
163 | } else { | |
164 | # fixme: warn | |
165 | } | |
f7ccd1b3 | 166 | } else { |
c79442f2 | 167 | # do nothing - todo: log something? |
f7ccd1b3 | 168 | } |
f7ccd1b3 | 169 | |
c79442f2 | 170 | } elsif ($last_state eq 'started') { |
f7ccd1b3 | 171 | |
c79442f2 | 172 | if (!$ns->node_is_online($sd->{node})) { |
f7ccd1b3 | 173 | |
c79442f2 | 174 | &$change_service_state($self, $sid, 'fence'); |
f7ccd1b3 | 175 | |
c79442f2 | 176 | } else { |
f7ccd1b3 | 177 | |
c79442f2 DM |
178 | if ($cd->{state} eq 'disabled') { |
179 | &$change_service_state($self, $sid, 'request_stop'); | |
180 | } elsif ($cd->{state} eq 'enabled') { | |
181 | my $node = $self->select_service_node($cd); | |
819c61f0 | 182 | if ($node && ($sd->{node} ne $node)) { |
c79442f2 DM |
183 | &$change_service_state($self, $sid, 'migrate'); |
184 | } else { | |
185 | # do nothing | |
186 | } | |
f7ccd1b3 | 187 | } else { |
c79442f2 | 188 | # do nothing - todo: log something? |
f7ccd1b3 | 189 | } |
f7ccd1b3 | 190 | } |
f7ccd1b3 | 191 | |
c79442f2 | 192 | } elsif ($last_state eq 'migrate') { |
f7ccd1b3 | 193 | |
c79442f2 | 194 | die "implement me"; |
f7ccd1b3 | 195 | |
c79442f2 | 196 | } elsif ($last_state eq 'fence') { |
f7ccd1b3 | 197 | |
21e37ed4 | 198 | # do nothing here - wait until fenced |
f7ccd1b3 | 199 | |
c79442f2 | 200 | } elsif ($last_state eq 'request_stop') { |
f7ccd1b3 | 201 | |
c79442f2 | 202 | #fixme: die "implement me"; |
f7ccd1b3 | 203 | |
c79442f2 DM |
204 | } else { |
205 | ||
206 | die "unknown service state '$last_state'"; | |
207 | } | |
f7ccd1b3 | 208 | |
21e37ed4 | 209 | |
c79442f2 | 210 | $repeat = 1 if $sd->{state} ne $last_state; |
f7ccd1b3 DM |
211 | } |
212 | ||
21e37ed4 DM |
213 | # handle fencing |
214 | my $fenced_nodes = {}; | |
215 | foreach my $sid (keys %$ss) { | |
216 | my $sd = $ss->{$sid}; | |
217 | next if $sd->{state} ne 'fence'; | |
218 | ||
219 | if (!defined($fenced_nodes->{$sd->{node}})) { | |
220 | $fenced_nodes->{$sd->{node}} = $ns->fence_node($sd->{node}) || 0; | |
221 | } | |
222 | ||
223 | next if !$fenced_nodes->{$sd->{node}}; | |
224 | ||
225 | # node fence was sucessful - mark service as stopped | |
226 | &$change_service_state($self, $sid, 'stopped'); | |
227 | } | |
228 | ||
c79442f2 | 229 | last if !$repeat; |
f7ccd1b3 | 230 | } |
f7ccd1b3 DM |
231 | |
232 | # remove stale services | |
233 | # fixme: | |
234 | ||
8f0bb968 | 235 | $self->flush_master_status(); |
c0bbd038 DM |
236 | } |
237 | ||
238 | ||
239 | 1; |