]>
Commit | Line | Data |
---|---|---|
5f095798 DM |
1 | package PVE::HA::LRM; |
2 | ||
3 | # Local Resource Manager | |
4 | ||
5 | use strict; | |
6 | use warnings; | |
7 | ||
8 | use PVE::SafeSyslog; | |
9 | use PVE::Tools; | |
10 | use PVE::HA::Tools; | |
11 | ||
12 | # Server can have several states: | |
13 | ||
14 | my $valid_states = { | |
15 | wait_for_agent_lock => "waiting for agnet lock", | |
0bba8f60 | 16 | active => "got agent_lock", |
5f095798 DM |
17 | lost_agent_lock => "lost agent_lock", |
18 | }; | |
19 | ||
20 | sub new { | |
21 | my ($this, $haenv) = @_; | |
22 | ||
23 | my $class = ref($this) || $this; | |
24 | ||
25 | my $self = bless { | |
26 | haenv => $haenv, | |
27 | status => { state => 'startup' }, | |
28 | }, $class; | |
29 | ||
30 | $self->set_local_status({ state => 'wait_for_agent_lock' }); | |
31 | ||
32 | return $self; | |
33 | } | |
34 | ||
35 | sub shutdown_request { | |
36 | my ($self) = @_; | |
37 | ||
38 | $self->{shutdown_request} = 1; | |
39 | } | |
40 | ||
41 | sub get_local_status { | |
42 | my ($self) = @_; | |
43 | ||
44 | return $self->{status}; | |
45 | } | |
46 | ||
47 | sub set_local_status { | |
48 | my ($self, $new) = @_; | |
49 | ||
50 | die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}}; | |
51 | ||
52 | my $haenv = $self->{haenv}; | |
53 | ||
54 | my $old = $self->{status}; | |
55 | ||
56 | # important: only update if if really changed | |
57 | return if $old->{state} eq $new->{state}; | |
58 | ||
0bba8f60 | 59 | $haenv->log('info', "status change $old->{state} => $new->{state}"); |
5f095798 DM |
60 | |
61 | $new->{state_change_time} = $haenv->get_time(); | |
62 | ||
63 | $self->{status} = $new; | |
64 | } | |
65 | ||
66 | sub get_protected_ha_agent_lock { | |
67 | my ($self) = @_; | |
68 | ||
69 | my $haenv = $self->{haenv}; | |
70 | ||
71 | my $count = 0; | |
72 | my $starttime = $haenv->get_time(); | |
73 | ||
74 | for (;;) { | |
75 | ||
76 | if ($haenv->get_ha_agent_lock()) { | |
77 | if ($self->{ha_agent_wd}) { | |
78 | $haenv->watchdog_update($self->{ha_agent_wd}); | |
79 | } else { | |
80 | my $wfh = $haenv->watchdog_open(); | |
81 | $self->{ha_agent_wd} = $wfh; | |
82 | } | |
83 | return 1; | |
84 | } | |
85 | ||
86 | last if ++$count > 5; # try max 5 time | |
87 | ||
88 | my $delay = $haenv->get_time() - $starttime; | |
89 | last if $delay > 5; # for max 5 seconds | |
90 | ||
91 | $haenv->sleep(1); | |
92 | } | |
93 | ||
94 | return 0; | |
95 | } | |
96 | ||
97 | sub do_one_iteration { | |
98 | my ($self) = @_; | |
99 | ||
100 | my $haenv = $self->{haenv}; | |
101 | ||
102 | my $status = $self->get_local_status(); | |
103 | my $state = $status->{state}; | |
104 | ||
105 | # do state changes first | |
106 | ||
107 | my $ctime = $haenv->get_time(); | |
108 | ||
109 | if ($state eq 'wait_for_agent_lock') { | |
110 | ||
111 | my $service_count = 1; # todo: correctly compute | |
112 | ||
113 | if ($service_count && $haenv->quorate()) { | |
0bba8f60 DM |
114 | if ($self->get_protected_ha_agent_lock()) { |
115 | $self->set_local_status({ state => 'active' }); | |
5f095798 DM |
116 | } |
117 | } | |
118 | ||
119 | } elsif ($state eq 'lost_agent_lock') { | |
120 | ||
121 | if ($haenv->quorate()) { | |
0bba8f60 DM |
122 | if ($self->get_protected_ha_agent_lock()) { |
123 | $self->set_local_status({ state => 'active' }); | |
5f095798 DM |
124 | } |
125 | } | |
126 | ||
0bba8f60 | 127 | } elsif ($state eq 'active') { |
5f095798 | 128 | |
0bba8f60 | 129 | if (!$self->get_protected_ha_agent_lock()) { |
5f095798 DM |
130 | $self->set_local_status({ state => 'lost_agent_lock'}); |
131 | } | |
132 | } | |
133 | ||
134 | $status = $self->get_local_status(); | |
135 | $state = $status->{state}; | |
136 | ||
137 | # do work | |
138 | ||
139 | if ($state eq 'wait_for_agent_lock') { | |
140 | ||
141 | return 0 if $self->{shutdown_request}; | |
142 | ||
143 | $haenv->sleep(5); | |
144 | ||
0bba8f60 | 145 | } elsif ($state eq 'active') { |
5f095798 DM |
146 | |
147 | my $startime = $haenv->get_time(); | |
148 | ||
149 | my $max_time = 10; | |
150 | ||
151 | my $shutdown = 0; | |
152 | ||
153 | # do work (max_time seconds) | |
154 | eval { | |
155 | # fixme: set alert timer | |
156 | ||
157 | if ($self->{shutdown_request}) { | |
158 | ||
159 | # fixme: request service stop or relocate ? | |
160 | ||
161 | my $service_count = 0; # fixme | |
162 | ||
163 | if ($service_count == 0) { | |
164 | ||
165 | if ($self->{ha_agent_wd}) { | |
166 | $haenv->watchdog_close($self->{ha_agent_wd}); | |
167 | delete $self->{ha_agent_wd}; | |
168 | } | |
169 | ||
170 | $shutdown = 1; | |
171 | } | |
172 | } | |
173 | }; | |
174 | if (my $err = $@) { | |
175 | $haenv->log('err', "got unexpected error - $err"); | |
176 | } | |
177 | ||
178 | return 0 if $shutdown; | |
179 | ||
180 | $haenv->sleep_until($startime + $max_time); | |
181 | ||
182 | } elsif ($state eq 'lost_agent_lock') { | |
183 | ||
184 | # Note: watchdog is active an will triger soon! | |
185 | ||
186 | # so we hope to get the lock back soon! | |
187 | ||
188 | if ($self->{shutdown_request}) { | |
189 | ||
190 | my $running_services = 0; # fixme: correctly compute | |
191 | ||
192 | if ($running_services > 0) { | |
193 | $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " . | |
194 | "killing running services"); | |
195 | ||
196 | # fixme: kill all services as fast as possible | |
197 | } | |
198 | ||
199 | # now all services are stopped, so we can close the watchdog | |
200 | ||
201 | if ($self->{ha_agent_wd}) { | |
202 | $haenv->watchdog_close($self->{ha_agent_wd}); | |
203 | delete $self->{ha_agent_wd}; | |
204 | } | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | } else { | |
210 | ||
211 | die "got unexpected status '$state'\n"; | |
212 | ||
213 | } | |
214 | ||
215 | return 1; | |
216 | } | |
217 | ||
218 | 1; |