]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
truncate log level in regression tests
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
11use POSIX qw(strftime EINTR);
12use Data::Dumper;
13use JSON;
14use IO::File;
15use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
16use File::Copy;
17use File::Path qw(make_path remove_tree);
f7cb19c6 18use PVE::HA::Config 'testenv';
f5a14b93 19
17b5cf98 20my $watchdog_timeout = 60;
0bba8f60 21
0cfd8f5b 22
787b66eb
DM
23# Status directory layout
24#
25# configuration
26#
8456bde2
DM
27# $testdir/cmdlist Command list for simulation
28# $testdir/hardware_status Hardware description (number of nodes, ...)
29# $testdir/manager_status CRM status (start with {})
30# $testdir/service_config Service configuration
abc920b4 31# $testdir/groups HA groups configuration
8456bde2 32# $testdir/service_status_<node> Service status
3c36cbca 33
9329c1e2
DM
34#
35# runtime status for simulation system
36#
37# $testdir/status/cluster_locks Cluster locks
38# $testdir/status/hardware_status Hardware status (power/network on/off)
39# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
40#
41# runtime status
9329c1e2 42#
8456bde2
DM
43# $testdir/status/lrm_status_<node> LRM status
44# $testdir/status/manager_status CRM status
abc920b4 45# $testdir/status/crm_commands CRM command queue
8456bde2
DM
46# $testdir/status/service_config Service configuration
47# $testdir/status/service_status_<node> Service status
abc920b4 48# $testdir/status/groups HA groups configuration
c4a221bc
DM
49
50sub read_lrm_status {
51 my ($self, $node) = @_;
52
53 my $filename = "$self->{statusdir}/lrm_status_$node";
54
55 return PVE::HA::Tools::read_json_from_file($filename, {});
56}
57
58sub write_lrm_status {
59 my ($self, $node, $status_obj) = @_;
60
61 my $filename = "$self->{statusdir}/lrm_status_$node";
62
63 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
64}
787b66eb 65
8b3f9144 66sub read_hardware_status_nolock {
0cfd8f5b
DM
67 my ($self) = @_;
68
8b3f9144 69 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
70
71 my $raw = PVE::Tools::file_get_contents($filename);
72 my $cstatus = decode_json($raw);
73
74 return $cstatus;
75}
76
8b3f9144 77sub write_hardware_status_nolock {
0cfd8f5b
DM
78 my ($self, $cstatus) = @_;
79
8b3f9144 80 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
81
82 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
83};
84
95360669
DM
85sub read_service_config {
86 my ($self) = @_;
87
88 my $filename = "$self->{statusdir}/service_config";
89 my $conf = PVE::HA::Tools::read_json_from_file($filename);
90
91 foreach my $sid (keys %$conf) {
92 my $d = $conf->{$sid};
8456bde2
DM
93
94 die "service '$sid' without assigned node!" if !$d->{node};
95
b026c8c9
DM
96 if ($sid =~ m/^(vm|ct):(\d+)$/) {
97 $d->{type} = $1;
98 $d->{name} = $2;
95360669
DM
99 } else {
100 die "implement me";
101 }
102 $d->{state} = 'disabled' if !$d->{state};
103 }
104
105 return $conf;
106}
107
79e0e005
DM
108sub write_service_config {
109 my ($self, $conf) = @_;
110
95360669
DM
111 $self->{service_config} = $conf;
112
79e0e005
DM
113 my $filename = "$self->{statusdir}/service_config";
114 return PVE::HA::Tools::write_json_to_file($filename, $conf);
115}
116
e5f43426
TL
117sub set_service_state {
118 my ($self, $sid, $state) = @_;
119
120 my $conf = $self->read_service_config();
121 die "no such service '$sid'" if !$conf->{$sid};
122
123 $conf->{$sid}->{state} = $state;
124
125 $self->write_service_config($conf);
126
127 return $conf;
128}
129
27ccc95c
TL
130sub add_service {
131 my ($self, $sid, $opts) = @_;
132
133 my $conf = $self->read_service_config();
134 die "resource ID '$sid' already defined\n" if $conf->{$sid};
135
136 $conf->{$sid} = $opts;
137
138 $self->write_service_config($conf);
139
140 return $conf;
141}
142
143sub delete_service {
144 my ($self, $sid) = @_;
145
146 my $conf = $self->read_service_config();
147
148 die "no such service '$sid'" if !$conf->{$sid};
149
150 delete $conf->{$sid};
151
152 $self->write_service_config($conf);
153
154 return $conf;
155}
156
8456bde2 157sub change_service_location {
6da27e23 158 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2
DM
159
160 my $conf = $self->read_service_config();
161
162 die "no such service '$sid'\n" if !$conf->{$sid};
163
6da27e23
DM
164 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
165 if $current_node ne $conf->{$sid}->{node};
166
167 $conf->{$sid}->{node} = $new_node;
8456bde2
DM
168
169 $self->write_service_config($conf);
170}
171
b70aa69e 172sub queue_crm_commands_nolock {
3b996922
DM
173 my ($self, $cmd) = @_;
174
175 chomp $cmd;
176
b70aa69e
DM
177 my $data = '';
178 my $filename = "$self->{statusdir}/crm_commands";
179 if (-f $filename) {
180 $data = PVE::Tools::file_get_contents($filename);
181 }
182 $data .= "$cmd\n";
183 PVE::Tools::file_set_contents($filename, $data);
184
185 return undef;
186}
187
188sub queue_crm_commands {
189 my ($self, $cmd) = @_;
190
191 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
3b996922
DM
192
193 $self->global_lock($code);
194
195 return undef;
196}
197
198sub read_crm_commands {
199 my ($self) = @_;
200
201 my $code = sub {
202 my $data = '';
203
204 my $filename = "$self->{statusdir}/crm_commands";
205 if (-f $filename) {
206 $data = PVE::Tools::file_get_contents($filename);
207 }
208 PVE::Tools::file_set_contents($filename, '');
209
210 return $data;
211 };
212
213 return $self->global_lock($code);
214}
215
abc920b4
DM
216sub read_group_config {
217 my ($self) = @_;
218
219 my $filename = "$self->{statusdir}/groups";
220 my $raw = '';
221 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
222
cc32b737 223 return PVE::HA::Config::parse_groups_config($filename, $raw);
abc920b4
DM
224}
225
c4a221bc 226sub read_service_status {
8456bde2 227 my ($self, $node) = @_;
c4a221bc 228
8456bde2 229 my $filename = "$self->{statusdir}/service_status_$node";
c4a221bc
DM
230 return PVE::HA::Tools::read_json_from_file($filename);
231}
232
233sub write_service_status {
8456bde2
DM
234 my ($self, $node, $data) = @_;
235
236 my $filename = "$self->{statusdir}/service_status_$node";
237 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
238
239 # fixme: add test if a service runs on two nodes!!!
c4a221bc 240
8456bde2 241 return $res;
c4a221bc
DM
242}
243
abc920b4
DM
244my $default_group_config = <<__EOD;
245group: prefer_node1
246 nodes node1
e941bdc5 247 nofailback 1
abc920b4
DM
248
249group: prefer_node2
250 nodes node2
e941bdc5 251 nofailback 1
abc920b4
DM
252
253group: prefer_node3
7a294ad4 254 nodes node3
e941bdc5 255 nofailback 1
abc920b4
DM
256__EOD
257
0cfd8f5b
DM
258sub new {
259 my ($this, $testdir) = @_;
260
261 die "missing testdir" if !$testdir;
262
263 my $class = ref($this) || $this;
264
265 my $self = bless {}, $class;
266
787b66eb
DM
267 my $statusdir = $self->{statusdir} = "$testdir/status";
268
269 remove_tree($statusdir);
270 mkdir $statusdir;
0cfd8f5b 271
787b66eb
DM
272 # copy initial configuartion
273 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 274
abc920b4
DM
275 if (-f "$testdir/groups") {
276 copy("$testdir/groups", "$statusdir/groups");
277 } else {
278 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
279 }
280
79e0e005
DM
281 if (-f "$testdir/service_config") {
282 copy("$testdir/service_config", "$statusdir/service_config");
283 } else {
284 my $conf = {
eda9314d
DM
285 'vm:101' => { node => 'node1', group => 'prefer_node1' },
286 'vm:102' => { node => 'node2', group => 'prefer_node2' },
287 'vm:103' => { node => 'node3', group => 'prefer_node3' },
288 'vm:104' => { node => 'node1', group => 'prefer_node1' },
289 'vm:105' => { node => 'node2', group => 'prefer_node2' },
290 'vm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
291 };
292 $self->write_service_config($conf);
293 }
787b66eb 294
853f5867
DM
295 if (-f "$testdir/hardware_status") {
296 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
297 die "Copy failed: $!\n";
298 } else {
299 my $cstatus = {
300 node1 => { power => 'off', network => 'off' },
301 node2 => { power => 'off', network => 'off' },
302 node3 => { power => 'off', network => 'off' },
303 };
304 $self->write_hardware_status_nolock($cstatus);
305 }
787b66eb 306
0cfd8f5b 307
8b3f9144 308 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
309
310 foreach my $node (sort keys %$cstatus) {
0bba8f60 311 $self->{nodes}->{$node} = {};
8456bde2
DM
312
313 if (-f "$testdir/service_status_$node") {
314 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
315 } else {
316 $self->write_service_status($node, {});
317 }
0cfd8f5b
DM
318 }
319
95360669
DM
320 $self->{service_config} = $self->read_service_config();
321
0cfd8f5b
DM
322 return $self;
323}
324
325sub get_time {
326 my ($self) = @_;
327
bf93e2a2 328 die "implement in subclass";
0cfd8f5b
DM
329}
330
331sub log {
fde8362a 332 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
333
334 chomp $msg;
335
336 my $time = $self->get_time();
337
fde8362a
DM
338 $id = 'hardware' if !$id;
339
0bba8f60 340 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
341}
342
343sub statusdir {
344 my ($self, $node) = @_;
345
346 return $self->{statusdir};
347}
348
8b3f9144 349sub global_lock {
0cfd8f5b
DM
350 my ($self, $code, @param) = @_;
351
8b3f9144 352 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
353 my $fh = IO::File->new(">>$lockfile") ||
354 die "unable to open '$lockfile'\n";
355
356 my $success;
357 for (;;) {
358 $success = flock($fh, LOCK_EX);
359 if ($success || ($! != EINTR)) {
360 last;
361 }
362 if (!$success) {
9de9a6ce 363 close($fh);
63f6a08c 364 die "can't acquire lock '$lockfile' - $!\n";
0cfd8f5b
DM
365 }
366 }
9de9a6ce 367
0cfd8f5b
DM
368 my $res;
369
9de9a6ce 370 eval { $res = &$code($fh, @param) };
0cfd8f5b 371 my $err = $@;
9de9a6ce 372
0cfd8f5b
DM
373 close($fh);
374
375 die $err if $err;
376
377 return $res;
378}
379
8b3f9144
DM
380my $compute_node_info = sub {
381 my ($self, $cstatus) = @_;
382
383 my $node_info = {};
384
385 my $node_count = 0;
386 my $online_count = 0;
387
388 foreach my $node (keys %$cstatus) {
389 my $d = $cstatus->{$node};
390
391 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
392 $node_info->{$node}->{online} = $online;
393
394 $node_count++;
395 $online_count++ if $online;
396 }
397
398 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
399
400 if (!$quorate) {
401 foreach my $node (keys %$cstatus) {
402 my $d = $cstatus->{$node};
403 $node_info->{$node}->{online} = 0;
404 }
405 }
406
407 return ($node_info, $quorate);
408};
409
410sub get_node_info {
411 my ($self) = @_;
412
5516f102
TL
413 my $cstatus = $self->read_hardware_status_nolock();
414 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
8b3f9144
DM
415
416 return ($node_info, $quorate);
417}
418
419# simulate hardware commands
0cfd8f5b
DM
420# power <node> <on|off>
421# network <node> <on|off>
422
8b3f9144 423sub sim_hardware_cmd {
fde8362a 424 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 425
bf93e2a2 426 die "implement in subclass";
0cfd8f5b
DM
427}
428
429sub run {
430 my ($self) = @_;
431
bf93e2a2 432 die "implement in subclass";
0cfd8f5b 433}
9329c1e2
DM
434
435my $modify_watchog = sub {
436 my ($self, $code) = @_;
437
438 my $update_cmd = sub {
439
440 my $filename = "$self->{statusdir}/watchdog_status";
0cfd8f5b 441
9329c1e2
DM
442 my ($res, $wdstatus);
443
444 if (-f $filename) {
445 my $raw = PVE::Tools::file_get_contents($filename);
446 $wdstatus = decode_json($raw);
447 } else {
448 $wdstatus = {};
449 }
450
451 ($wdstatus, $res) = &$code($wdstatus);
452
453 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
454
455 return $res;
456 };
457
458 return $self->global_lock($update_cmd);
459};
460
0590c6a7
DM
461sub watchdog_reset_nolock {
462 my ($self, $node) = @_;
463
464 my $filename = "$self->{statusdir}/watchdog_status";
465
466 if (-f $filename) {
467 my $raw = PVE::Tools::file_get_contents($filename);
468 my $wdstatus = decode_json($raw);
469
470 foreach my $id (keys %$wdstatus) {
471 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
472 }
473
474 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
475 }
476}
477
9329c1e2
DM
478sub watchdog_check {
479 my ($self, $node) = @_;
480
481 my $code = sub {
482 my ($wdstatus) = @_;
483
484 my $res = 1;
485
486 foreach my $wfh (keys %$wdstatus) {
487 my $wd = $wdstatus->{$wfh};
488 next if $wd->{node} ne $node;
489
490 my $ctime = $self->get_time();
491 my $tdiff = $ctime - $wd->{update_time};
492
0bba8f60 493 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
494 $res = 0;
495 delete $wdstatus->{$wfh};
496 }
497 }
498
499 return ($wdstatus, $res);
500 };
501
502 return &$modify_watchog($self, $code);
503}
504
505my $wdcounter = 0;
506
507sub watchdog_open {
508 my ($self, $node) = @_;
509
510 my $code = sub {
511 my ($wdstatus) = @_;
512
513 ++$wdcounter;
514
515 my $id = "WD:$node:$$:$wdcounter";
516
517 die "internal error" if defined($wdstatus->{$id});
518
519 $wdstatus->{$id} = {
520 node => $node,
521 update_time => $self->get_time(),
522 };
523
524 return ($wdstatus, $id);
525 };
526
527 return &$modify_watchog($self, $code);
528}
529
530sub watchdog_close {
531 my ($self, $wfh) = @_;
532
533 my $code = sub {
534 my ($wdstatus) = @_;
535
536 my $wd = $wdstatus->{$wfh};
537 die "no such watchdog handle '$wfh'\n" if !defined($wd);
538
539 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 540 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
541
542 delete $wdstatus->{$wfh};
543
544 return ($wdstatus);
545 };
546
547 return &$modify_watchog($self, $code);
548}
549
550sub watchdog_update {
551 my ($self, $wfh) = @_;
552
553 my $code = sub {
554 my ($wdstatus) = @_;
555
556 my $wd = $wdstatus->{$wfh};
557
558 die "no such watchdog handle '$wfh'\n" if !defined($wd);
559
560 my $ctime = $self->get_time();
561 my $tdiff = $ctime - $wd->{update_time};
562
0bba8f60 563 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
564
565 $wd->{update_time} = $ctime;
566
567 return ($wdstatus);
568 };
569
570 return &$modify_watchog($self, $code);
571}
572
0cfd8f5b 5731;