]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
Make parameters for LRM resource commands more flexible
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
11use POSIX qw(strftime EINTR);
289e4784 12use JSON;
0cfd8f5b
DM
13use IO::File;
14use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
15use File::Copy;
16use File::Path qw(make_path remove_tree);
c982dfee 17use PVE::HA::FenceConfig;
7d33cb12 18use PVE::HA::Groups;
f5a14b93 19
17b5cf98 20my $watchdog_timeout = 60;
0bba8f60 21
0cfd8f5b 22
787b66eb
DM
23# Status directory layout
24#
25# configuration
26#
8456bde2
DM
27# $testdir/cmdlist Command list for simulation
28# $testdir/hardware_status Hardware description (number of nodes, ...)
29# $testdir/manager_status CRM status (start with {})
30# $testdir/service_config Service configuration
abc920b4 31# $testdir/groups HA groups configuration
8456bde2 32# $testdir/service_status_<node> Service status
ed408b44 33# $testdir/datacenter.cfg Datacenter wide HA configuration
3c36cbca 34
9329c1e2
DM
35#
36# runtime status for simulation system
37#
38# $testdir/status/cluster_locks Cluster locks
39# $testdir/status/hardware_status Hardware status (power/network on/off)
40# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
41#
42# runtime status
9329c1e2 43#
8456bde2
DM
44# $testdir/status/lrm_status_<node> LRM status
45# $testdir/status/manager_status CRM status
abc920b4 46# $testdir/status/crm_commands CRM command queue
8456bde2
DM
47# $testdir/status/service_config Service configuration
48# $testdir/status/service_status_<node> Service status
abc920b4 49# $testdir/status/groups HA groups configuration
c4a221bc
DM
50
51sub read_lrm_status {
52 my ($self, $node) = @_;
53
54 my $filename = "$self->{statusdir}/lrm_status_$node";
55
289e4784 56 return PVE::HA::Tools::read_json_from_file($filename, {});
c4a221bc
DM
57}
58
59sub write_lrm_status {
60 my ($self, $node, $status_obj) = @_;
61
62 my $filename = "$self->{statusdir}/lrm_status_$node";
63
289e4784 64 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
c4a221bc 65}
787b66eb 66
8b3f9144 67sub read_hardware_status_nolock {
0cfd8f5b
DM
68 my ($self) = @_;
69
8b3f9144 70 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
71
72 my $raw = PVE::Tools::file_get_contents($filename);
73 my $cstatus = decode_json($raw);
74
75 return $cstatus;
76}
77
8b3f9144 78sub write_hardware_status_nolock {
0cfd8f5b
DM
79 my ($self, $cstatus) = @_;
80
8b3f9144 81 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
82
83 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
84};
85
95360669
DM
86sub read_service_config {
87 my ($self) = @_;
88
89 my $filename = "$self->{statusdir}/service_config";
289e4784 90 my $conf = PVE::HA::Tools::read_json_from_file($filename);
95360669
DM
91
92 foreach my $sid (keys %$conf) {
93 my $d = $conf->{$sid};
8456bde2
DM
94
95 die "service '$sid' without assigned node!" if !$d->{node};
96
ec368d74 97 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
b026c8c9
DM
98 $d->{type} = $1;
99 $d->{name} = $2;
95360669
DM
100 } else {
101 die "implement me";
102 }
103 $d->{state} = 'disabled' if !$d->{state};
bb07bd2c 104 $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility
ec368d74
TL
105 $d->{max_restart} = 1 if !defined($d->{max_restart});
106 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
95360669
DM
107 }
108
109 return $conf;
110}
111
79e0e005
DM
112sub write_service_config {
113 my ($self, $conf) = @_;
114
95360669
DM
115 $self->{service_config} = $conf;
116
79e0e005
DM
117 my $filename = "$self->{statusdir}/service_config";
118 return PVE::HA::Tools::write_json_to_file($filename, $conf);
c982dfee
TL
119}
120
121sub read_fence_config {
122 my ($self) = @_;
123
124 my $raw = undef;
125
126 my $filename = "$self->{statusdir}/fence.cfg";
127 if (-e $filename) {
128 $raw = PVE::Tools::file_get_contents($filename);
129 }
130
131 return PVE::HA::FenceConfig::parse_config($filename, $raw);
132}
133
134sub exec_fence_agent {
135 my ($self, $agent, $node, @param) = @_;
136
137 # let all agent succeed and behave the same for now
138 $self->sim_hardware_cmd("power $node off", $agent);
139
140 return 0; # EXIT_SUCCESS
141}
79e0e005 142
e5f43426
TL
143sub set_service_state {
144 my ($self, $sid, $state) = @_;
145
146 my $conf = $self->read_service_config();
147 die "no such service '$sid'" if !$conf->{$sid};
148
149 $conf->{$sid}->{state} = $state;
150
151 $self->write_service_config($conf);
152
153 return $conf;
154}
155
27ccc95c
TL
156sub add_service {
157 my ($self, $sid, $opts) = @_;
158
159 my $conf = $self->read_service_config();
160 die "resource ID '$sid' already defined\n" if $conf->{$sid};
161
162 $conf->{$sid} = $opts;
163
164 $self->write_service_config($conf);
165
166 return $conf;
167}
168
169sub delete_service {
170 my ($self, $sid) = @_;
171
172 my $conf = $self->read_service_config();
173
174 die "no such service '$sid'" if !$conf->{$sid};
175
176 delete $conf->{$sid};
177
178 $self->write_service_config($conf);
179
180 return $conf;
181}
182
8456bde2 183sub change_service_location {
6da27e23 184 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2
DM
185
186 my $conf = $self->read_service_config();
187
188 die "no such service '$sid'\n" if !$conf->{$sid};
189
289e4784 190 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
6da27e23 191 if $current_node ne $conf->{$sid}->{node};
289e4784 192
6da27e23 193 $conf->{$sid}->{node} = $new_node;
8456bde2
DM
194
195 $self->write_service_config($conf);
196}
197
cde11324
TL
198sub service_has_lock {
199 my ($self, $sid) = @_;
200
201 my $conf = $self->read_service_config();
202
203 die "no such service '$sid'\n" if !$conf->{$sid};
204
205 return $conf->{$sid}->{lock};
206}
207
208sub lock_service {
209 my ($self, $sid, $lock) = @_;
210
211 my $conf = $self->read_service_config();
212
213 die "no such service '$sid'\n" if !$conf->{$sid};
214
215 $conf->{$sid}->{lock} = $lock || 'backup';
216
217 $self->write_service_config($conf);
218
219 return $conf;
220}
221
222sub unlock_service {
223 my ($self, $sid, $lock) = @_;
224
225 my $conf = $self->read_service_config();
226
227 die "no such service '$sid'\n" if !$conf->{$sid};
228
229 if (!defined($conf->{$sid}->{lock})) {
cde11324
TL
230 return undef;
231 }
232
233 if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
234 warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
235 return undef;
236 }
237
238 my $removed_lock = delete $conf->{$sid}->{lock};
239
240 $self->write_service_config($conf);
241
242 return $removed_lock;
243}
244
b70aa69e 245sub queue_crm_commands_nolock {
3b996922
DM
246 my ($self, $cmd) = @_;
247
248 chomp $cmd;
249
b70aa69e
DM
250 my $data = '';
251 my $filename = "$self->{statusdir}/crm_commands";
252 if (-f $filename) {
253 $data = PVE::Tools::file_get_contents($filename);
254 }
255 $data .= "$cmd\n";
256 PVE::Tools::file_set_contents($filename, $data);
257
258 return undef;
259}
260
261sub queue_crm_commands {
262 my ($self, $cmd) = @_;
263
264 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
289e4784 265
3b996922
DM
266 $self->global_lock($code);
267
268 return undef;
269}
270
271sub read_crm_commands {
272 my ($self) = @_;
273
274 my $code = sub {
275 my $data = '';
276
277 my $filename = "$self->{statusdir}/crm_commands";
278 if (-f $filename) {
279 $data = PVE::Tools::file_get_contents($filename);
280 }
281 PVE::Tools::file_set_contents($filename, '');
282
283 return $data;
284 };
289e4784 285
3b996922
DM
286 return $self->global_lock($code);
287}
288
abc920b4
DM
289sub read_group_config {
290 my ($self) = @_;
291
292 my $filename = "$self->{statusdir}/groups";
293 my $raw = '';
294 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
295
7d33cb12 296 return PVE::HA::Groups->parse_config($filename, $raw);
abc920b4
DM
297}
298
c4a221bc 299sub read_service_status {
8456bde2 300 my ($self, $node) = @_;
c4a221bc 301
8456bde2 302 my $filename = "$self->{statusdir}/service_status_$node";
289e4784 303 return PVE::HA::Tools::read_json_from_file($filename);
c4a221bc
DM
304}
305
306sub write_service_status {
8456bde2
DM
307 my ($self, $node, $data) = @_;
308
309 my $filename = "$self->{statusdir}/service_status_$node";
310 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
311
312 # fixme: add test if a service runs on two nodes!!!
c4a221bc 313
8456bde2 314 return $res;
289e4784 315}
c4a221bc 316
abc920b4
DM
317my $default_group_config = <<__EOD;
318group: prefer_node1
319 nodes node1
e941bdc5 320 nofailback 1
abc920b4
DM
321
322group: prefer_node2
323 nodes node2
e941bdc5 324 nofailback 1
abc920b4
DM
325
326group: prefer_node3
7a294ad4 327 nodes node3
e941bdc5 328 nofailback 1
abc920b4
DM
329__EOD
330
0cfd8f5b
DM
331sub new {
332 my ($this, $testdir) = @_;
333
334 die "missing testdir" if !$testdir;
335
ba9e808e
TL
336 die "testdir '$testdir' does not exist or is not a directory!\n"
337 if !-d $testdir;
338
0cfd8f5b
DM
339 my $class = ref($this) || $this;
340
341 my $self = bless {}, $class;
342
787b66eb
DM
343 my $statusdir = $self->{statusdir} = "$testdir/status";
344
345 remove_tree($statusdir);
346 mkdir $statusdir;
0cfd8f5b 347
787b66eb
DM
348 # copy initial configuartion
349 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 350
abc920b4
DM
351 if (-f "$testdir/groups") {
352 copy("$testdir/groups", "$statusdir/groups");
353 } else {
354 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
355 }
356
79e0e005
DM
357 if (-f "$testdir/service_config") {
358 copy("$testdir/service_config", "$statusdir/service_config");
359 } else {
360 my $conf = {
eda9314d
DM
361 'vm:101' => { node => 'node1', group => 'prefer_node1' },
362 'vm:102' => { node => 'node2', group => 'prefer_node2' },
363 'vm:103' => { node => 'node3', group => 'prefer_node3' },
364 'vm:104' => { node => 'node1', group => 'prefer_node1' },
365 'vm:105' => { node => 'node2', group => 'prefer_node2' },
366 'vm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
367 };
368 $self->write_service_config($conf);
369 }
787b66eb 370
853f5867
DM
371 if (-f "$testdir/hardware_status") {
372 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
373 die "Copy failed: $!\n";
374 } else {
375 my $cstatus = {
376 node1 => { power => 'off', network => 'off' },
377 node2 => { power => 'off', network => 'off' },
378 node3 => { power => 'off', network => 'off' },
379 };
380 $self->write_hardware_status_nolock($cstatus);
381 }
787b66eb 382
c982dfee
TL
383 if (-f "$testdir/fence.cfg") {
384 copy("$testdir/fence.cfg", "$statusdir/fence.cfg");
385 }
0cfd8f5b 386
ed408b44
TL
387 if (-f "$testdir/datacenter.cfg") {
388 copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg");
389 }
390
8b3f9144 391 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
392
393 foreach my $node (sort keys %$cstatus) {
0bba8f60 394 $self->{nodes}->{$node} = {};
8456bde2
DM
395
396 if (-f "$testdir/service_status_$node") {
397 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
289e4784 398 } else {
8456bde2
DM
399 $self->write_service_status($node, {});
400 }
0cfd8f5b
DM
401 }
402
95360669
DM
403 $self->{service_config} = $self->read_service_config();
404
0cfd8f5b
DM
405 return $self;
406}
407
408sub get_time {
409 my ($self) = @_;
410
bf93e2a2 411 die "implement in subclass";
0cfd8f5b
DM
412}
413
414sub log {
fde8362a 415 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
416
417 chomp $msg;
418
419 my $time = $self->get_time();
420
fde8362a
DM
421 $id = 'hardware' if !$id;
422
0bba8f60 423 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
424}
425
426sub statusdir {
427 my ($self, $node) = @_;
428
429 return $self->{statusdir};
430}
431
ed408b44
TL
432sub read_datacenter_conf {
433 my ($self, $node) = @_;
434
435 my $filename = "$self->{statusdir}/datacenter.cfg";
436 return PVE::HA::Tools::read_json_from_file($filename, {});
437}
438
8b3f9144 439sub global_lock {
0cfd8f5b
DM
440 my ($self, $code, @param) = @_;
441
8b3f9144 442 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
443 my $fh = IO::File->new(">>$lockfile") ||
444 die "unable to open '$lockfile'\n";
445
446 my $success;
447 for (;;) {
448 $success = flock($fh, LOCK_EX);
449 if ($success || ($! != EINTR)) {
450 last;
451 }
452 if (!$success) {
9de9a6ce 453 close($fh);
63f6a08c 454 die "can't acquire lock '$lockfile' - $!\n";
0cfd8f5b
DM
455 }
456 }
9de9a6ce 457
0cfd8f5b
DM
458 my $res;
459
9de9a6ce 460 eval { $res = &$code($fh, @param) };
0cfd8f5b 461 my $err = $@;
289e4784 462
0cfd8f5b
DM
463 close($fh);
464
465 die $err if $err;
289e4784 466
0cfd8f5b
DM
467 return $res;
468}
469
8b3f9144
DM
470my $compute_node_info = sub {
471 my ($self, $cstatus) = @_;
472
473 my $node_info = {};
474
475 my $node_count = 0;
476 my $online_count = 0;
477
478 foreach my $node (keys %$cstatus) {
479 my $d = $cstatus->{$node};
480
481 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
482 $node_info->{$node}->{online} = $online;
483
484 $node_count++;
485 $online_count++ if $online;
486 }
487
488 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
289e4784 489
8b3f9144
DM
490 if (!$quorate) {
491 foreach my $node (keys %$cstatus) {
492 my $d = $cstatus->{$node};
493 $node_info->{$node}->{online} = 0;
494 }
495 }
496
497 return ($node_info, $quorate);
498};
499
500sub get_node_info {
501 my ($self) = @_;
502
5516f102
TL
503 my $cstatus = $self->read_hardware_status_nolock();
504 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
8b3f9144
DM
505
506 return ($node_info, $quorate);
507}
508
ba2a45cd
TL
509# helper for Sim/ only
510sub get_cfs_state {
511 my ($self, $node, $state) = @_;
512
513 # TODO: ensure nolock is OK when adding this to RTSim
514 my $cstatus = $self->read_hardware_status_nolock();
515 my $res = $cstatus->{$node}->{cfs}->{$state};
516
517 # we assume default true if not defined
518 return !defined($res) || $res;
519}
520
8b3f9144 521# simulate hardware commands
0cfd8f5b
DM
522# power <node> <on|off>
523# network <node> <on|off>
ba2a45cd 524# cfs <node> <rw|update> <work|fail>
e08a0717
TL
525# reboot <node>
526# shutdown <node>
527# restart-lrm <node>
667670b2 528# service <sid> <started|disabled|stopped|ignored>
e08a0717
TL
529# service <sid> <migrate|relocate> <target>
530# service <sid> lock/unlock [lockname]
0cfd8f5b 531
8b3f9144 532sub sim_hardware_cmd {
fde8362a 533 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 534
e08a0717
TL
535 my $code = sub {
536 my ($lock_fh) = @_;
537
538 my $cstatus = $self->read_hardware_status_nolock();
539
540 my ($cmd, $objid, $action, $target) = split(/\s+/, $cmdstr);
541
542 die "sim_hardware_cmd: no node or service for command specified"
543 if !$objid;
544
545 my ($node, $sid, $d);
546
547 if ($cmd eq 'service') {
548 $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid);
549 } else {
550 $node = $objid;
551 $d = $self->{nodes}->{$node} ||
552 die "sim_hardware_cmd: no such node '$node'\n";
553 }
554
555 $self->log('info', "execute $cmdstr", $logid);
556
557 if ($cmd eq 'power') {
558 die "sim_hardware_cmd: unknown action '$action'\n"
559 if $action !~ m/^(on|off)$/;
560
561 if ($cstatus->{$node}->{power} ne $action) {
562 if ($action eq 'on') {
563
564 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
565 $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
566 $d->{lrm_restart} = undef;
ba2a45cd 567 $cstatus->{$node}->{cfs} = {};
e08a0717
TL
568
569 } else {
570
571 if ($d->{crm}) {
572 $d->{crm_env}->log('info', "killed by poweroff");
573 $self->crm_control('stop', $d, $lock_fh);
574 $d->{crm} = undef;
575 }
576 if ($d->{lrm}) {
577 $d->{lrm_env}->log('info', "killed by poweroff");
578 $self->lrm_control('stop', $d, $lock_fh);
579 $d->{lrm} = undef;
580 $d->{lrm_restart} = undef;
581 }
582
583 $self->watchdog_reset_nolock($node);
584 $self->write_service_status($node, {});
585 }
586 }
587
588 $cstatus->{$node}->{power} = $action;
589 $cstatus->{$node}->{network} = $action;
590 $cstatus->{$node}->{shutdown} = undef;
591
592 $self->write_hardware_status_nolock($cstatus);
593
594 } elsif ($cmd eq 'network') {
595 die "sim_hardware_cmd: unknown network action '$action'"
596 if $action !~ m/^(on|off)$/;
597 $cstatus->{$node}->{network} = $action;
598
599 $self->write_hardware_status_nolock($cstatus);
600
ba2a45cd
TL
601 } elsif ($cmd eq 'cfs') {
602 die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
603 if $action !~ m/^(rw|update)$/;
604 die "sim_hardware_cmd: unknown cfs command '$target' for '$action' on node '$node'"
605 if $target !~ m/^(work|fail)$/;
606
607 $cstatus->{$node}->{cfs}->{$action} = $target eq 'work';
608 $self->write_hardware_status_nolock($cstatus);
609
e08a0717
TL
610 } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
611 $cstatus->{$node}->{shutdown} = $cmd;
612
613 $self->write_hardware_status_nolock($cstatus);
614
615 $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm});
616 } elsif ($cmd eq 'restart-lrm') {
617 if ($d->{lrm}) {
618 $d->{lrm_restart} = 1;
619 $self->lrm_control('shutdown', $d, $lock_fh);
620 }
621 } elsif ($cmd eq 'crm') {
622
623 if ($action eq 'stop') {
624 if ($d->{crm}) {
625 $d->{crm_stop} = 1;
626 $self->crm_control('shutdown', $d, $lock_fh);
627 }
628 } elsif ($action eq 'start') {
629 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
630 } else {
631 die "sim_hardware_cmd: unknown action '$action'";
632 }
633
634 } elsif ($cmd eq 'service') {
667670b2
TL
635 if ($action eq 'started' || $action eq 'disabled' ||
636 $action eq 'stopped' || $action eq 'ignored') {
e08a0717
TL
637
638 $self->set_service_state($sid, $action);
639
640 } elsif ($action eq 'migrate' || $action eq 'relocate') {
641
642 die "sim_hardware_cmd: missing target node for '$action' command"
643 if !$target;
644
645 $self->queue_crm_commands_nolock("$action $sid $target");
646
647 } elsif ($action eq 'add') {
648
649 $self->add_service($sid, {state => 'started', node => $target});
650
651 } elsif ($action eq 'delete') {
652
653 $self->delete_service($sid);
654
655 } elsif ($action eq 'lock') {
656
657 $self->lock_service($sid, $target);
658
659 } elsif ($action eq 'unlock') {
660
661 $self->unlock_service($sid, $target);
662
663 } else {
664 die "sim_hardware_cmd: unknown service action '$action' " .
665 "- not implemented\n"
666 }
667 } else {
668 die "sim_hardware_cmd: unknown command '$cmdstr'\n";
669 }
670
671 return $cstatus;
672 };
673
674 return $self->global_lock($code);
675}
676
677# for controlling the resource manager services
678sub crm_control {
679 my ($self, $action, $data, $lock_fh) = @_;
680
681 die "implement in subclass";
682}
683
684sub lrm_control {
685 my ($self, $action, $data, $lock_fh) = @_;
686
bf93e2a2 687 die "implement in subclass";
0cfd8f5b
DM
688}
689
690sub run {
691 my ($self) = @_;
692
bf93e2a2 693 die "implement in subclass";
0cfd8f5b 694}
9329c1e2
DM
695
696my $modify_watchog = sub {
697 my ($self, $code) = @_;
698
699 my $update_cmd = sub {
700
701 my $filename = "$self->{statusdir}/watchdog_status";
289e4784 702
9329c1e2
DM
703 my ($res, $wdstatus);
704
705 if (-f $filename) {
706 my $raw = PVE::Tools::file_get_contents($filename);
707 $wdstatus = decode_json($raw);
708 } else {
709 $wdstatus = {};
710 }
289e4784 711
9329c1e2
DM
712 ($wdstatus, $res) = &$code($wdstatus);
713
714 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
715
716 return $res;
717 };
718
719 return $self->global_lock($update_cmd);
720};
721
0590c6a7
DM
722sub watchdog_reset_nolock {
723 my ($self, $node) = @_;
724
725 my $filename = "$self->{statusdir}/watchdog_status";
726
727 if (-f $filename) {
728 my $raw = PVE::Tools::file_get_contents($filename);
729 my $wdstatus = decode_json($raw);
730
731 foreach my $id (keys %$wdstatus) {
732 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
733 }
289e4784 734
0590c6a7
DM
735 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
736 }
737}
738
9329c1e2
DM
739sub watchdog_check {
740 my ($self, $node) = @_;
741
742 my $code = sub {
743 my ($wdstatus) = @_;
744
745 my $res = 1;
746
747 foreach my $wfh (keys %$wdstatus) {
748 my $wd = $wdstatus->{$wfh};
749 next if $wd->{node} ne $node;
750
751 my $ctime = $self->get_time();
752 my $tdiff = $ctime - $wd->{update_time};
753
0bba8f60 754 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
755 $res = 0;
756 delete $wdstatus->{$wfh};
757 }
758 }
289e4784 759
9329c1e2
DM
760 return ($wdstatus, $res);
761 };
762
763 return &$modify_watchog($self, $code);
764}
765
766my $wdcounter = 0;
767
768sub watchdog_open {
769 my ($self, $node) = @_;
770
771 my $code = sub {
772 my ($wdstatus) = @_;
773
774 ++$wdcounter;
775
776 my $id = "WD:$node:$$:$wdcounter";
777
778 die "internal error" if defined($wdstatus->{$id});
779
780 $wdstatus->{$id} = {
781 node => $node,
782 update_time => $self->get_time(),
783 };
784
785 return ($wdstatus, $id);
786 };
787
788 return &$modify_watchog($self, $code);
789}
790
791sub watchdog_close {
792 my ($self, $wfh) = @_;
793
794 my $code = sub {
795 my ($wdstatus) = @_;
796
797 my $wd = $wdstatus->{$wfh};
798 die "no such watchdog handle '$wfh'\n" if !defined($wd);
799
800 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 801 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
802
803 delete $wdstatus->{$wfh};
804
805 return ($wdstatus);
806 };
807
808 return &$modify_watchog($self, $code);
809}
810
811sub watchdog_update {
812 my ($self, $wfh) = @_;
813
814 my $code = sub {
815 my ($wdstatus) = @_;
816
817 my $wd = $wdstatus->{$wfh};
818
819 die "no such watchdog handle '$wfh'\n" if !defined($wd);
820
821 my $ctime = $self->get_time();
822 my $tdiff = $ctime - $wd->{update_time};
823
0bba8f60 824 die "watchdog expired" if $tdiff > $watchdog_timeout;
289e4784 825
9329c1e2
DM
826 $wd->{update_time} = $ctime;
827
828 return ($wdstatus);
829 };
830
831 return &$modify_watchog($self, $code);
832}
833
0cfd8f5b 8341;