]>
Commit | Line | Data |
---|---|---|
87b82b15 DM |
1 | package PVE::HA::Sim::Env; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use POSIX qw(strftime EINTR); | |
f5c29173 | 6 | use JSON; |
87b82b15 DM |
7 | use IO::File; |
8 | use Fcntl qw(:DEFAULT :flock); | |
9 | ||
10 | use PVE::HA::Tools; | |
11 | use PVE::HA::Env; | |
9e5ea8f7 DM |
12 | use PVE::HA::Resources; |
13 | use PVE::HA::Sim::Resources::VirtVM; | |
14 | use PVE::HA::Sim::Resources::VirtCT; | |
ec368d74 | 15 | use PVE::HA::Sim::Resources::VirtFail; |
9e5ea8f7 DM |
16 | |
17 | PVE::HA::Sim::Resources::VirtVM->register(); | |
18 | PVE::HA::Sim::Resources::VirtCT->register(); | |
ec368d74 | 19 | PVE::HA::Sim::Resources::VirtFail->register(); |
9e5ea8f7 DM |
20 | |
21 | PVE::HA::Resources->init(); | |
87b82b15 DM |
22 | |
23 | sub new { | |
24 | my ($this, $nodename, $hardware, $log_id) = @_; | |
25 | ||
26 | die "missing nodename" if !$nodename; | |
27 | die "missing log_id" if !$log_id; | |
f5c29173 | 28 | |
87b82b15 DM |
29 | my $class = ref($this) || $this; |
30 | ||
31 | my $self = bless {}, $class; | |
32 | ||
33 | $self->{statusdir} = $hardware->statusdir(); | |
34 | $self->{nodename} = $nodename; | |
35 | ||
36 | $self->{hardware} = $hardware; | |
37 | $self->{lock_timeout} = 120; | |
38 | ||
39 | $self->{log_id} = $log_id; | |
40 | ||
41 | return $self; | |
42 | } | |
43 | ||
44 | sub nodename { | |
45 | my ($self) = @_; | |
46 | ||
47 | return $self->{nodename}; | |
48 | } | |
49 | ||
dd9c0c9d TL |
50 | sub hardware { |
51 | my ($self) = @_; | |
52 | ||
53 | return $self->{hardware}; | |
54 | } | |
55 | ||
ba2a45cd TL |
56 | my $assert_cfs_can_rw = sub { |
57 | my ($self, $emsg) = @_; | |
58 | ||
59 | $emsg //= 'cfs connection refused - not mounted?'; | |
60 | ||
61 | die "$emsg\n" | |
62 | if !$self->{hardware}->get_cfs_state($self->{nodename}, 'rw'); | |
63 | }; | |
64 | ||
87b82b15 DM |
65 | sub sim_get_lock { |
66 | my ($self, $lock_name, $unlock) = @_; | |
67 | ||
68 | return 0 if !$self->quorate(); | |
69 | ||
70 | my $filename = "$self->{statusdir}/cluster_locks"; | |
71 | ||
72 | my $code = sub { | |
73 | ||
f5c29173 | 74 | my $data = PVE::HA::Tools::read_json_from_file($filename, {}); |
87b82b15 DM |
75 | |
76 | my $res; | |
77 | ||
78 | my $nodename = $self->nodename(); | |
79 | my $ctime = $self->get_time(); | |
80 | ||
81 | if ($unlock) { | |
82 | ||
83 | if (my $d = $data->{$lock_name}) { | |
84 | my $tdiff = $ctime - $d->{time}; | |
f5c29173 | 85 | |
87b82b15 DM |
86 | if ($tdiff > $self->{lock_timeout}) { |
87 | $res = 1; | |
88 | } elsif (($tdiff <= $self->{lock_timeout}) && ($d->{node} eq $nodename)) { | |
89 | delete $data->{$lock_name}; | |
90 | $res = 1; | |
91 | } else { | |
92 | $res = 0; | |
93 | } | |
94 | } | |
95 | ||
96 | } else { | |
97 | ||
98 | if (my $d = $data->{$lock_name}) { | |
f5c29173 | 99 | |
87b82b15 | 100 | my $tdiff = $ctime - $d->{time}; |
f5c29173 | 101 | |
87b82b15 DM |
102 | if ($tdiff <= $self->{lock_timeout}) { |
103 | if ($d->{node} eq $nodename) { | |
104 | $d->{time} = $ctime; | |
105 | $res = 1; | |
106 | } else { | |
107 | $res = 0; | |
108 | } | |
109 | } else { | |
110 | $self->log('info', "got lock '$lock_name'"); | |
111 | $d->{node} = $nodename; | |
a371ef65 | 112 | $d->{time} = $ctime; |
87b82b15 DM |
113 | $res = 1; |
114 | } | |
115 | ||
116 | } else { | |
117 | $data->{$lock_name} = { | |
118 | time => $ctime, | |
119 | node => $nodename, | |
120 | }; | |
121 | $self->log('info', "got lock '$lock_name'"); | |
122 | $res = 1; | |
123 | } | |
124 | } | |
125 | ||
f5c29173 | 126 | PVE::HA::Tools::write_json_to_file($filename, $data); |
87b82b15 DM |
127 | |
128 | return $res; | |
129 | }; | |
130 | ||
131 | return $self->{hardware}->global_lock($code); | |
132 | } | |
133 | ||
134 | sub read_manager_status { | |
135 | my ($self) = @_; | |
f5c29173 | 136 | |
ba2a45cd TL |
137 | $assert_cfs_can_rw->($self); |
138 | ||
87b82b15 DM |
139 | my $filename = "$self->{statusdir}/manager_status"; |
140 | ||
f5c29173 | 141 | return PVE::HA::Tools::read_json_from_file($filename, {}); |
87b82b15 DM |
142 | } |
143 | ||
144 | sub write_manager_status { | |
145 | my ($self, $status_obj) = @_; | |
146 | ||
ba2a45cd TL |
147 | $assert_cfs_can_rw->($self); |
148 | ||
87b82b15 DM |
149 | my $filename = "$self->{statusdir}/manager_status"; |
150 | ||
f5c29173 | 151 | PVE::HA::Tools::write_json_to_file($filename, $status_obj); |
87b82b15 DM |
152 | } |
153 | ||
c4a221bc DM |
154 | sub read_lrm_status { |
155 | my ($self, $node) = @_; | |
156 | ||
157 | $node = $self->{nodename} if !defined($node); | |
158 | ||
ba2a45cd TL |
159 | $assert_cfs_can_rw->($self); |
160 | ||
c4a221bc DM |
161 | return $self->{hardware}->read_lrm_status($node); |
162 | } | |
163 | ||
164 | sub write_lrm_status { | |
165 | my ($self, $status_obj) = @_; | |
166 | ||
167 | my $node = $self->{nodename}; | |
168 | ||
ba2a45cd TL |
169 | $assert_cfs_can_rw->($self); |
170 | ||
c4a221bc DM |
171 | return $self->{hardware}->write_lrm_status($node, $status_obj); |
172 | } | |
173 | ||
cde77779 | 174 | sub is_node_shutdown { |
d42219a3 TL |
175 | my ($self) = @_; |
176 | ||
f65f41b9 TL |
177 | my $node = $self->{nodename}; |
178 | my $cstatus = $self->{hardware}->read_hardware_status_nolock(); | |
179 | ||
180 | die "undefined node status for node '$node'" if !defined($cstatus->{$node}); | |
181 | ||
182 | my ($shutdown, $reboot) = (0, 0); | |
183 | ||
184 | if (my $target = $cstatus->{$node}->{shutdown}) { | |
185 | if ($target eq 'shutdown') { | |
186 | $shutdown = 1; | |
187 | } elsif ($target eq 'reboot') { | |
188 | $shutdown = 1; | |
189 | $reboot = 1; | |
190 | } else { | |
191 | die "unknown shutdown target '$target'"; | |
192 | } | |
193 | } | |
194 | ||
195 | return ($shutdown, $reboot); | |
d42219a3 | 196 | } |
b83b4ae8 | 197 | |
87b82b15 DM |
198 | sub read_service_config { |
199 | my ($self) = @_; | |
200 | ||
ba2a45cd TL |
201 | $assert_cfs_can_rw->($self); |
202 | ||
95360669 | 203 | return $self->{hardware}->read_service_config(); |
87b82b15 DM |
204 | } |
205 | ||
76b83c72 FE |
206 | sub update_service_config { |
207 | my ($self, $sid, $param) = @_; | |
208 | ||
209 | return $self->{hardware}->update_service_config($sid, $param); | |
210 | } | |
211 | ||
0087839a FG |
212 | sub parse_sid { |
213 | my ($self, $sid) = @_; | |
214 | ||
215 | die "unable to parse service id '$sid'\n" | |
216 | if !($sid =~ m/^(\S+):(\S+)$/); | |
217 | ||
218 | my $name = $2; | |
219 | my $type = $1; | |
220 | ||
221 | return wantarray ? ($sid, $type, $name) : $sid; | |
222 | } | |
223 | ||
c982dfee TL |
224 | sub read_fence_config { |
225 | my ($self) = @_; | |
226 | ||
ba2a45cd TL |
227 | $assert_cfs_can_rw->($self); |
228 | ||
c982dfee TL |
229 | return $self->{hardware}->read_fence_config(); |
230 | } | |
231 | ||
232 | # the test/sim framework has hardware enabled fencing if | |
233 | # it has devices configured | |
234 | sub fencing_mode { | |
235 | my ($self) = @_; | |
236 | ||
237 | my $cfg = $self->read_fence_config(); | |
238 | ||
239 | return (defined($cfg) && keys %{$cfg}) ? 'hardware' : 'watchdog'; | |
240 | } | |
241 | ||
242 | sub exec_fence_agent { | |
243 | my ($self, $agent, $node, @param) = @_; | |
244 | ||
245 | return $self->{hardware}->exec_fence_agent($agent, $node, @param); | |
246 | } | |
247 | ||
abc920b4 DM |
248 | sub read_group_config { |
249 | my ($self) = @_; | |
250 | ||
ba2a45cd TL |
251 | $assert_cfs_can_rw->($self); |
252 | ||
abc920b4 DM |
253 | return $self->{hardware}->read_group_config(); |
254 | } | |
255 | ||
9da84a0d TL |
256 | # this is normally only allowed by the master to recover a _fenced_ service |
257 | sub steal_service { | |
6da27e23 | 258 | my ($self, $sid, $current_node, $new_node) = @_; |
8456bde2 | 259 | |
ba2a45cd TL |
260 | $assert_cfs_can_rw->($self); |
261 | ||
6da27e23 | 262 | return $self->{hardware}->change_service_location($sid, $current_node, $new_node); |
8456bde2 DM |
263 | } |
264 | ||
3b996922 DM |
265 | sub queue_crm_commands { |
266 | my ($self, $cmd) = @_; | |
267 | ||
ba2a45cd TL |
268 | $assert_cfs_can_rw->($self); |
269 | ||
3b996922 DM |
270 | return $self->{hardware}->queue_crm_commands($cmd); |
271 | } | |
272 | ||
273 | sub read_crm_commands { | |
274 | my ($self) = @_; | |
275 | ||
ba2a45cd TL |
276 | $assert_cfs_can_rw->($self); |
277 | ||
3b996922 DM |
278 | return $self->{hardware}->read_crm_commands(); |
279 | } | |
280 | ||
87b82b15 DM |
281 | sub log { |
282 | my ($self, $level, $msg) = @_; | |
283 | ||
284 | chomp $msg; | |
285 | ||
286 | my $time = $self->get_time(); | |
287 | ||
288 | printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}"); | |
289 | } | |
290 | ||
4cb3b2cf LW |
291 | sub send_notification { |
292 | my ($self, $subject, $text, $properties) = @_; | |
293 | ||
294 | # The template for the subject is "{{subject-prefix}}: {{subject}}" | |
295 | # We have to perform poor-man's template rendering to pass the test cases. | |
296 | ||
297 | $subject = $subject =~ s/\{\{subject-prefix}}/$properties->{"subject-prefix"}/r; | |
298 | $subject = $subject =~ s/\{\{subject}}/$properties->{"subject"}/r; | |
1b3969b6 TL |
299 | |
300 | # only log subject, do not spam the logs | |
301 | $self->log('email', $subject); | |
302 | } | |
303 | ||
87b82b15 DM |
304 | sub get_time { |
305 | my ($self) = @_; | |
306 | ||
307 | die "implement in subclass"; | |
308 | } | |
309 | ||
310 | sub sleep { | |
311 | my ($self, $delay) = @_; | |
312 | ||
313 | die "implement in subclass"; | |
314 | } | |
315 | ||
316 | sub sleep_until { | |
317 | my ($self, $end_time) = @_; | |
318 | ||
319 | die "implement in subclass"; | |
320 | } | |
321 | ||
322 | sub get_ha_manager_lock { | |
323 | my ($self) = @_; | |
324 | ||
325 | return $self->sim_get_lock('ha_manager_lock'); | |
326 | } | |
327 | ||
de002253 TL |
328 | # release the cluster wide manager lock. |
329 | # when released another CRM may step up and get the lock, thus this should only | |
330 | # get called when shutting down/deactivating the current master | |
331 | sub release_ha_manager_lock { | |
332 | my ($self) = @_; | |
333 | ||
334 | return $self->sim_get_lock('ha_manager_lock', 1); | |
335 | } | |
336 | ||
87b82b15 DM |
337 | sub get_ha_agent_lock_name { |
338 | my ($self, $node) = @_; | |
339 | ||
340 | $node = $self->nodename() if !$node; | |
341 | ||
342 | return "ha_agent_${node}_lock"; | |
343 | } | |
344 | ||
345 | sub get_ha_agent_lock { | |
87b82b15 DM |
346 | my ($self, $node) = @_; |
347 | ||
348 | my $lck = $self->get_ha_agent_lock_name($node); | |
f5c29173 | 349 | return $self->sim_get_lock($lck); |
87b82b15 DM |
350 | } |
351 | ||
ff165cd8 TL |
352 | |
353 | # release the respective node agent lock. | |
354 | # this should only get called if the nodes LRM gracefully shuts down with | |
355 | # all services already cleanly stopped! | |
356 | sub release_ha_agent_lock { | |
357 | my ($self) = @_; | |
358 | ||
359 | my $node = $self->nodename(); | |
360 | ||
361 | my $lock = $self->get_ha_agent_lock_name($node); | |
362 | return $self->sim_get_lock($lock, 1); | |
363 | } | |
364 | ||
87b82b15 DM |
365 | # return true when cluster is quorate |
366 | sub quorate { | |
367 | my ($self) = @_; | |
368 | ||
369 | my ($node_info, $quorate) = $self->{hardware}->get_node_info(); | |
370 | my $node = $self->nodename(); | |
371 | return 0 if !$node_info->{$node}->{online}; | |
372 | return $quorate; | |
373 | } | |
374 | ||
375 | sub get_node_info { | |
376 | my ($self) = @_; | |
377 | ||
378 | return $self->{hardware}->get_node_info(); | |
379 | } | |
380 | ||
381 | sub loop_start_hook { | |
da6f0416 | 382 | my ($self) = @_; |
87b82b15 DM |
383 | |
384 | # do nothing, overwrite in subclass | |
385 | } | |
386 | ||
387 | sub loop_end_hook { | |
388 | my ($self) = @_; | |
389 | ||
390 | # do nothing, overwrite in subclass | |
391 | } | |
392 | ||
3df15380 TL |
393 | |
394 | sub cluster_state_update { | |
395 | my ($self) = @_; | |
396 | ||
397 | return $self->{hardware}->get_cfs_state($self->{nodename}, 'update'); | |
398 | } | |
399 | ||
87b82b15 DM |
400 | sub watchdog_open { |
401 | my ($self) = @_; | |
402 | ||
403 | my $node = $self->nodename(); | |
404 | ||
405 | return $self->{hardware}->watchdog_open($node); | |
406 | } | |
407 | ||
408 | sub watchdog_update { | |
409 | my ($self, $wfh) = @_; | |
410 | ||
411 | return $self->{hardware}->watchdog_update($wfh); | |
412 | } | |
413 | ||
414 | sub watchdog_close { | |
415 | my ($self, $wfh) = @_; | |
416 | ||
417 | return $self->{hardware}->watchdog_close($wfh); | |
418 | } | |
419 | ||
a28fa330 | 420 | sub after_fork { |
1b3ee441 DM |
421 | my ($self) = @_; |
422 | ||
a28fa330 | 423 | # nothing to clean up in the simulation environment |
1b3ee441 DM |
424 | } |
425 | ||
a28fa330 TL |
426 | |
427 | sub get_max_workers { | |
a2aae08a TL |
428 | my ($self) = @_; |
429 | ||
a28fa330 | 430 | return 4; |
a2aae08a TL |
431 | } |
432 | ||
ed408b44 | 433 | # return cluster wide enforced HA settings |
749d8161 | 434 | sub get_datacenter_settings { |
ed408b44 TL |
435 | my ($self) = @_; |
436 | ||
437 | my $datacenterconfig = $self->{hardware}->read_datacenter_conf(); | |
438 | ||
7c142d68 FE |
439 | return { |
440 | ha => $datacenterconfig->{ha} // {}, | |
441 | crs => $datacenterconfig->{crs} // {}, | |
442 | }; | |
ed408b44 TL |
443 | } |
444 | ||
5db695c3 FE |
445 | sub get_static_node_stats { |
446 | my ($self) = @_; | |
447 | ||
448 | return $self->{hardware}->get_static_node_stats(); | |
449 | } | |
450 | ||
87b82b15 | 451 | 1; |