]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Env.pm
manager: send notifications via new notification module
[pve-ha-manager.git] / src / PVE / HA / Sim / Env.pm
CommitLineData
87b82b15
DM
1package PVE::HA::Sim::Env;
2
3use strict;
4use warnings;
5use POSIX qw(strftime EINTR);
f5c29173 6use JSON;
87b82b15
DM
7use IO::File;
8use Fcntl qw(:DEFAULT :flock);
9
10use PVE::HA::Tools;
11use PVE::HA::Env;
9e5ea8f7
DM
12use PVE::HA::Resources;
13use PVE::HA::Sim::Resources::VirtVM;
14use PVE::HA::Sim::Resources::VirtCT;
ec368d74 15use PVE::HA::Sim::Resources::VirtFail;
9e5ea8f7
DM
16
17PVE::HA::Sim::Resources::VirtVM->register();
18PVE::HA::Sim::Resources::VirtCT->register();
ec368d74 19PVE::HA::Sim::Resources::VirtFail->register();
9e5ea8f7
DM
20
21PVE::HA::Resources->init();
87b82b15
DM
22
23sub new {
24 my ($this, $nodename, $hardware, $log_id) = @_;
25
26 die "missing nodename" if !$nodename;
27 die "missing log_id" if !$log_id;
f5c29173 28
87b82b15
DM
29 my $class = ref($this) || $this;
30
31 my $self = bless {}, $class;
32
33 $self->{statusdir} = $hardware->statusdir();
34 $self->{nodename} = $nodename;
35
36 $self->{hardware} = $hardware;
37 $self->{lock_timeout} = 120;
38
39 $self->{log_id} = $log_id;
40
41 return $self;
42}
43
44sub nodename {
45 my ($self) = @_;
46
47 return $self->{nodename};
48}
49
dd9c0c9d
TL
50sub hardware {
51 my ($self) = @_;
52
53 return $self->{hardware};
54}
55
ba2a45cd
TL
56my $assert_cfs_can_rw = sub {
57 my ($self, $emsg) = @_;
58
59 $emsg //= 'cfs connection refused - not mounted?';
60
61 die "$emsg\n"
62 if !$self->{hardware}->get_cfs_state($self->{nodename}, 'rw');
63};
64
87b82b15
DM
65sub sim_get_lock {
66 my ($self, $lock_name, $unlock) = @_;
67
68 return 0 if !$self->quorate();
69
70 my $filename = "$self->{statusdir}/cluster_locks";
71
72 my $code = sub {
73
f5c29173 74 my $data = PVE::HA::Tools::read_json_from_file($filename, {});
87b82b15
DM
75
76 my $res;
77
78 my $nodename = $self->nodename();
79 my $ctime = $self->get_time();
80
81 if ($unlock) {
82
83 if (my $d = $data->{$lock_name}) {
84 my $tdiff = $ctime - $d->{time};
f5c29173 85
87b82b15
DM
86 if ($tdiff > $self->{lock_timeout}) {
87 $res = 1;
88 } elsif (($tdiff <= $self->{lock_timeout}) && ($d->{node} eq $nodename)) {
89 delete $data->{$lock_name};
90 $res = 1;
91 } else {
92 $res = 0;
93 }
94 }
95
96 } else {
97
98 if (my $d = $data->{$lock_name}) {
f5c29173 99
87b82b15 100 my $tdiff = $ctime - $d->{time};
f5c29173 101
87b82b15
DM
102 if ($tdiff <= $self->{lock_timeout}) {
103 if ($d->{node} eq $nodename) {
104 $d->{time} = $ctime;
105 $res = 1;
106 } else {
107 $res = 0;
108 }
109 } else {
110 $self->log('info', "got lock '$lock_name'");
111 $d->{node} = $nodename;
a371ef65 112 $d->{time} = $ctime;
87b82b15
DM
113 $res = 1;
114 }
115
116 } else {
117 $data->{$lock_name} = {
118 time => $ctime,
119 node => $nodename,
120 };
121 $self->log('info', "got lock '$lock_name'");
122 $res = 1;
123 }
124 }
125
f5c29173 126 PVE::HA::Tools::write_json_to_file($filename, $data);
87b82b15
DM
127
128 return $res;
129 };
130
131 return $self->{hardware}->global_lock($code);
132}
133
134sub read_manager_status {
135 my ($self) = @_;
f5c29173 136
ba2a45cd
TL
137 $assert_cfs_can_rw->($self);
138
87b82b15
DM
139 my $filename = "$self->{statusdir}/manager_status";
140
f5c29173 141 return PVE::HA::Tools::read_json_from_file($filename, {});
87b82b15
DM
142}
143
144sub write_manager_status {
145 my ($self, $status_obj) = @_;
146
ba2a45cd
TL
147 $assert_cfs_can_rw->($self);
148
87b82b15
DM
149 my $filename = "$self->{statusdir}/manager_status";
150
f5c29173 151 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
87b82b15
DM
152}
153
c4a221bc
DM
154sub read_lrm_status {
155 my ($self, $node) = @_;
156
157 $node = $self->{nodename} if !defined($node);
158
ba2a45cd
TL
159 $assert_cfs_can_rw->($self);
160
c4a221bc
DM
161 return $self->{hardware}->read_lrm_status($node);
162}
163
164sub write_lrm_status {
165 my ($self, $status_obj) = @_;
166
167 my $node = $self->{nodename};
168
ba2a45cd
TL
169 $assert_cfs_can_rw->($self);
170
c4a221bc
DM
171 return $self->{hardware}->write_lrm_status($node, $status_obj);
172}
173
cde77779 174sub is_node_shutdown {
d42219a3
TL
175 my ($self) = @_;
176
f65f41b9
TL
177 my $node = $self->{nodename};
178 my $cstatus = $self->{hardware}->read_hardware_status_nolock();
179
180 die "undefined node status for node '$node'" if !defined($cstatus->{$node});
181
182 my ($shutdown, $reboot) = (0, 0);
183
184 if (my $target = $cstatus->{$node}->{shutdown}) {
185 if ($target eq 'shutdown') {
186 $shutdown = 1;
187 } elsif ($target eq 'reboot') {
188 $shutdown = 1;
189 $reboot = 1;
190 } else {
191 die "unknown shutdown target '$target'";
192 }
193 }
194
195 return ($shutdown, $reboot);
d42219a3 196}
b83b4ae8 197
87b82b15
DM
198sub read_service_config {
199 my ($self) = @_;
200
ba2a45cd
TL
201 $assert_cfs_can_rw->($self);
202
95360669 203 return $self->{hardware}->read_service_config();
87b82b15
DM
204}
205
76b83c72
FE
206sub update_service_config {
207 my ($self, $sid, $param) = @_;
208
209 return $self->{hardware}->update_service_config($sid, $param);
210}
211
0087839a
FG
212sub parse_sid {
213 my ($self, $sid) = @_;
214
215 die "unable to parse service id '$sid'\n"
216 if !($sid =~ m/^(\S+):(\S+)$/);
217
218 my $name = $2;
219 my $type = $1;
220
221 return wantarray ? ($sid, $type, $name) : $sid;
222}
223
c982dfee
TL
224sub read_fence_config {
225 my ($self) = @_;
226
ba2a45cd
TL
227 $assert_cfs_can_rw->($self);
228
c982dfee
TL
229 return $self->{hardware}->read_fence_config();
230}
231
232# the test/sim framework has hardware enabled fencing if
233# it has devices configured
234sub fencing_mode {
235 my ($self) = @_;
236
237 my $cfg = $self->read_fence_config();
238
239 return (defined($cfg) && keys %{$cfg}) ? 'hardware' : 'watchdog';
240}
241
242sub exec_fence_agent {
243 my ($self, $agent, $node, @param) = @_;
244
245 return $self->{hardware}->exec_fence_agent($agent, $node, @param);
246}
247
abc920b4
DM
248sub read_group_config {
249 my ($self) = @_;
250
ba2a45cd
TL
251 $assert_cfs_can_rw->($self);
252
abc920b4
DM
253 return $self->{hardware}->read_group_config();
254}
255
9da84a0d
TL
256# this is normally only allowed by the master to recover a _fenced_ service
257sub steal_service {
6da27e23 258 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 259
ba2a45cd
TL
260 $assert_cfs_can_rw->($self);
261
6da27e23 262 return $self->{hardware}->change_service_location($sid, $current_node, $new_node);
8456bde2
DM
263}
264
3b996922
DM
265sub queue_crm_commands {
266 my ($self, $cmd) = @_;
267
ba2a45cd
TL
268 $assert_cfs_can_rw->($self);
269
3b996922
DM
270 return $self->{hardware}->queue_crm_commands($cmd);
271}
272
273sub read_crm_commands {
274 my ($self) = @_;
275
ba2a45cd
TL
276 $assert_cfs_can_rw->($self);
277
3b996922
DM
278 return $self->{hardware}->read_crm_commands();
279}
280
87b82b15
DM
281sub log {
282 my ($self, $level, $msg) = @_;
283
284 chomp $msg;
285
286 my $time = $self->get_time();
287
288 printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}");
289}
290
4cb3b2cf
LW
291sub send_notification {
292 my ($self, $subject, $text, $properties) = @_;
293
294 # The template for the subject is "{{subject-prefix}}: {{subject}}"
295 # We have to perform poor-man's template rendering to pass the test cases.
296
297 $subject = $subject =~ s/\{\{subject-prefix}}/$properties->{"subject-prefix"}/r;
298 $subject = $subject =~ s/\{\{subject}}/$properties->{"subject"}/r;
1b3969b6
TL
299
300 # only log subject, do not spam the logs
301 $self->log('email', $subject);
302}
303
87b82b15
DM
304sub get_time {
305 my ($self) = @_;
306
307 die "implement in subclass";
308}
309
310sub sleep {
311 my ($self, $delay) = @_;
312
313 die "implement in subclass";
314}
315
316sub sleep_until {
317 my ($self, $end_time) = @_;
318
319 die "implement in subclass";
320}
321
322sub get_ha_manager_lock {
323 my ($self) = @_;
324
325 return $self->sim_get_lock('ha_manager_lock');
326}
327
de002253
TL
328# release the cluster wide manager lock.
329# when released another CRM may step up and get the lock, thus this should only
330# get called when shutting down/deactivating the current master
331sub release_ha_manager_lock {
332 my ($self) = @_;
333
334 return $self->sim_get_lock('ha_manager_lock', 1);
335}
336
87b82b15
DM
337sub get_ha_agent_lock_name {
338 my ($self, $node) = @_;
339
340 $node = $self->nodename() if !$node;
341
342 return "ha_agent_${node}_lock";
343}
344
345sub get_ha_agent_lock {
87b82b15
DM
346 my ($self, $node) = @_;
347
348 my $lck = $self->get_ha_agent_lock_name($node);
f5c29173 349 return $self->sim_get_lock($lck);
87b82b15
DM
350}
351
ff165cd8
TL
352
353# release the respective node agent lock.
354# this should only get called if the nodes LRM gracefully shuts down with
355# all services already cleanly stopped!
356sub release_ha_agent_lock {
357 my ($self) = @_;
358
359 my $node = $self->nodename();
360
361 my $lock = $self->get_ha_agent_lock_name($node);
362 return $self->sim_get_lock($lock, 1);
363}
364
87b82b15
DM
365# return true when cluster is quorate
366sub quorate {
367 my ($self) = @_;
368
369 my ($node_info, $quorate) = $self->{hardware}->get_node_info();
370 my $node = $self->nodename();
371 return 0 if !$node_info->{$node}->{online};
372 return $quorate;
373}
374
375sub get_node_info {
376 my ($self) = @_;
377
378 return $self->{hardware}->get_node_info();
379}
380
381sub loop_start_hook {
da6f0416 382 my ($self) = @_;
87b82b15
DM
383
384 # do nothing, overwrite in subclass
385}
386
387sub loop_end_hook {
388 my ($self) = @_;
389
390 # do nothing, overwrite in subclass
391}
392
3df15380
TL
393
394sub cluster_state_update {
395 my ($self) = @_;
396
397 return $self->{hardware}->get_cfs_state($self->{nodename}, 'update');
398}
399
87b82b15
DM
400sub watchdog_open {
401 my ($self) = @_;
402
403 my $node = $self->nodename();
404
405 return $self->{hardware}->watchdog_open($node);
406}
407
408sub watchdog_update {
409 my ($self, $wfh) = @_;
410
411 return $self->{hardware}->watchdog_update($wfh);
412}
413
414sub watchdog_close {
415 my ($self, $wfh) = @_;
416
417 return $self->{hardware}->watchdog_close($wfh);
418}
419
a28fa330 420sub after_fork {
1b3ee441
DM
421 my ($self) = @_;
422
a28fa330 423 # nothing to clean up in the simulation environment
1b3ee441
DM
424}
425
a28fa330
TL
426
427sub get_max_workers {
a2aae08a
TL
428 my ($self) = @_;
429
a28fa330 430 return 4;
a2aae08a
TL
431}
432
ed408b44 433# return cluster wide enforced HA settings
749d8161 434sub get_datacenter_settings {
ed408b44
TL
435 my ($self) = @_;
436
437 my $datacenterconfig = $self->{hardware}->read_datacenter_conf();
438
7c142d68
FE
439 return {
440 ha => $datacenterconfig->{ha} // {},
441 crs => $datacenterconfig->{crs} // {},
442 };
ed408b44
TL
443}
444
5db695c3
FE
445sub get_static_node_stats {
446 my ($self) = @_;
447
448 return $self->{hardware}->get_static_node_stats();
449}
450
87b82b15 4511;