]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
sim/hardware: sort and split use statements
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
a0a7d11e 11
0cfd8f5b 12use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
13use File::Copy;
14use File::Path qw(make_path remove_tree);
a0a7d11e
TL
15use IO::File;
16use JSON;
17use POSIX qw(strftime EINTR);
18
c982dfee 19use PVE::HA::FenceConfig;
7d33cb12 20use PVE::HA::Groups;
f5a14b93 21
17b5cf98 22my $watchdog_timeout = 60;
0bba8f60 23
787b66eb
DM
24# Status directory layout
25#
26# configuration
27#
8456bde2
DM
28# $testdir/cmdlist Command list for simulation
29# $testdir/hardware_status Hardware description (number of nodes, ...)
30# $testdir/manager_status CRM status (start with {})
31# $testdir/service_config Service configuration
abc920b4 32# $testdir/groups HA groups configuration
8456bde2 33# $testdir/service_status_<node> Service status
ed408b44 34# $testdir/datacenter.cfg Datacenter wide HA configuration
3c36cbca 35
9329c1e2
DM
36#
37# runtime status for simulation system
38#
39# $testdir/status/cluster_locks Cluster locks
40# $testdir/status/hardware_status Hardware status (power/network on/off)
41# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
42#
43# runtime status
9329c1e2 44#
8456bde2
DM
45# $testdir/status/lrm_status_<node> LRM status
46# $testdir/status/manager_status CRM status
abc920b4 47# $testdir/status/crm_commands CRM command queue
8456bde2
DM
48# $testdir/status/service_config Service configuration
49# $testdir/status/service_status_<node> Service status
abc920b4 50# $testdir/status/groups HA groups configuration
c4a221bc
DM
51
52sub read_lrm_status {
53 my ($self, $node) = @_;
54
55 my $filename = "$self->{statusdir}/lrm_status_$node";
56
289e4784 57 return PVE::HA::Tools::read_json_from_file($filename, {});
c4a221bc
DM
58}
59
60sub write_lrm_status {
61 my ($self, $node, $status_obj) = @_;
62
63 my $filename = "$self->{statusdir}/lrm_status_$node";
64
289e4784 65 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
c4a221bc 66}
787b66eb 67
8b3f9144 68sub read_hardware_status_nolock {
0cfd8f5b
DM
69 my ($self) = @_;
70
8b3f9144 71 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
72
73 my $raw = PVE::Tools::file_get_contents($filename);
74 my $cstatus = decode_json($raw);
75
76 return $cstatus;
77}
78
8b3f9144 79sub write_hardware_status_nolock {
0cfd8f5b
DM
80 my ($self, $cstatus) = @_;
81
8b3f9144 82 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
83
84 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
85};
86
95360669
DM
87sub read_service_config {
88 my ($self) = @_;
89
90 my $filename = "$self->{statusdir}/service_config";
289e4784 91 my $conf = PVE::HA::Tools::read_json_from_file($filename);
95360669
DM
92
93 foreach my $sid (keys %$conf) {
94 my $d = $conf->{$sid};
8456bde2
DM
95
96 die "service '$sid' without assigned node!" if !$d->{node};
97
ec368d74 98 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
b026c8c9
DM
99 $d->{type} = $1;
100 $d->{name} = $2;
95360669
DM
101 } else {
102 die "implement me";
103 }
104 $d->{state} = 'disabled' if !$d->{state};
bb07bd2c 105 $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility
ec368d74
TL
106 $d->{max_restart} = 1 if !defined($d->{max_restart});
107 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
95360669
DM
108 }
109
110 return $conf;
111}
112
76b83c72
FE
113sub update_service_config {
114 my ($self, $sid, $param) = @_;
115
116 my $conf = $self->read_service_config();
117
118 my $sconf = $conf->{$sid} || die "no such resource '$sid'\n";
119
120 foreach my $k (%$param) {
121 $sconf->{$k} = $param->{$k};
122 }
123
124 $self->write_service_config($conf);
125}
126
79e0e005
DM
127sub write_service_config {
128 my ($self, $conf) = @_;
129
95360669
DM
130 $self->{service_config} = $conf;
131
79e0e005
DM
132 my $filename = "$self->{statusdir}/service_config";
133 return PVE::HA::Tools::write_json_to_file($filename, $conf);
c982dfee
TL
134}
135
136sub read_fence_config {
137 my ($self) = @_;
138
139 my $raw = undef;
140
141 my $filename = "$self->{statusdir}/fence.cfg";
142 if (-e $filename) {
143 $raw = PVE::Tools::file_get_contents($filename);
144 }
145
146 return PVE::HA::FenceConfig::parse_config($filename, $raw);
147}
148
149sub exec_fence_agent {
150 my ($self, $agent, $node, @param) = @_;
151
152 # let all agent succeed and behave the same for now
153 $self->sim_hardware_cmd("power $node off", $agent);
154
155 return 0; # EXIT_SUCCESS
156}
79e0e005 157
e5f43426
TL
158sub set_service_state {
159 my ($self, $sid, $state) = @_;
160
161 my $conf = $self->read_service_config();
162 die "no such service '$sid'" if !$conf->{$sid};
163
164 $conf->{$sid}->{state} = $state;
165
166 $self->write_service_config($conf);
167
168 return $conf;
169}
170
27ccc95c
TL
171sub add_service {
172 my ($self, $sid, $opts) = @_;
173
174 my $conf = $self->read_service_config();
175 die "resource ID '$sid' already defined\n" if $conf->{$sid};
176
177 $conf->{$sid} = $opts;
178
179 $self->write_service_config($conf);
180
181 return $conf;
182}
183
184sub delete_service {
185 my ($self, $sid) = @_;
186
187 my $conf = $self->read_service_config();
188
189 die "no such service '$sid'" if !$conf->{$sid};
190
191 delete $conf->{$sid};
192
193 $self->write_service_config($conf);
194
195 return $conf;
196}
197
8456bde2 198sub change_service_location {
6da27e23 199 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2
DM
200
201 my $conf = $self->read_service_config();
202
203 die "no such service '$sid'\n" if !$conf->{$sid};
204
289e4784 205 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
6da27e23 206 if $current_node ne $conf->{$sid}->{node};
289e4784 207
6da27e23 208 $conf->{$sid}->{node} = $new_node;
8456bde2
DM
209
210 $self->write_service_config($conf);
211}
212
cde11324
TL
213sub service_has_lock {
214 my ($self, $sid) = @_;
215
216 my $conf = $self->read_service_config();
217
218 die "no such service '$sid'\n" if !$conf->{$sid};
219
220 return $conf->{$sid}->{lock};
221}
222
223sub lock_service {
224 my ($self, $sid, $lock) = @_;
225
226 my $conf = $self->read_service_config();
227
228 die "no such service '$sid'\n" if !$conf->{$sid};
229
230 $conf->{$sid}->{lock} = $lock || 'backup';
231
232 $self->write_service_config($conf);
233
234 return $conf;
235}
236
237sub unlock_service {
238 my ($self, $sid, $lock) = @_;
239
240 my $conf = $self->read_service_config();
241
242 die "no such service '$sid'\n" if !$conf->{$sid};
243
244 if (!defined($conf->{$sid}->{lock})) {
cde11324
TL
245 return undef;
246 }
247
248 if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
249 warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
250 return undef;
251 }
252
253 my $removed_lock = delete $conf->{$sid}->{lock};
254
255 $self->write_service_config($conf);
256
257 return $removed_lock;
258}
259
b70aa69e 260sub queue_crm_commands_nolock {
3b996922
DM
261 my ($self, $cmd) = @_;
262
263 chomp $cmd;
264
b70aa69e
DM
265 my $data = '';
266 my $filename = "$self->{statusdir}/crm_commands";
267 if (-f $filename) {
268 $data = PVE::Tools::file_get_contents($filename);
269 }
270 $data .= "$cmd\n";
271 PVE::Tools::file_set_contents($filename, $data);
272
273 return undef;
274}
275
276sub queue_crm_commands {
277 my ($self, $cmd) = @_;
278
279 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
289e4784 280
3b996922
DM
281 $self->global_lock($code);
282
283 return undef;
284}
285
286sub read_crm_commands {
287 my ($self) = @_;
288
289 my $code = sub {
290 my $data = '';
291
292 my $filename = "$self->{statusdir}/crm_commands";
293 if (-f $filename) {
294 $data = PVE::Tools::file_get_contents($filename);
295 }
296 PVE::Tools::file_set_contents($filename, '');
297
298 return $data;
299 };
289e4784 300
3b996922
DM
301 return $self->global_lock($code);
302}
303
abc920b4
DM
304sub read_group_config {
305 my ($self) = @_;
306
307 my $filename = "$self->{statusdir}/groups";
308 my $raw = '';
309 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
310
7d33cb12 311 return PVE::HA::Groups->parse_config($filename, $raw);
abc920b4
DM
312}
313
c4a221bc 314sub read_service_status {
8456bde2 315 my ($self, $node) = @_;
c4a221bc 316
8456bde2 317 my $filename = "$self->{statusdir}/service_status_$node";
289e4784 318 return PVE::HA::Tools::read_json_from_file($filename);
c4a221bc
DM
319}
320
321sub write_service_status {
8456bde2
DM
322 my ($self, $node, $data) = @_;
323
324 my $filename = "$self->{statusdir}/service_status_$node";
325 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
326
327 # fixme: add test if a service runs on two nodes!!!
c4a221bc 328
8456bde2 329 return $res;
289e4784 330}
c4a221bc 331
abc920b4
DM
332my $default_group_config = <<__EOD;
333group: prefer_node1
334 nodes node1
e941bdc5 335 nofailback 1
abc920b4
DM
336
337group: prefer_node2
338 nodes node2
e941bdc5 339 nofailback 1
abc920b4
DM
340
341group: prefer_node3
7a294ad4 342 nodes node3
e941bdc5 343 nofailback 1
abc920b4
DM
344__EOD
345
0cfd8f5b
DM
346sub new {
347 my ($this, $testdir) = @_;
348
349 die "missing testdir" if !$testdir;
350
ba9e808e
TL
351 die "testdir '$testdir' does not exist or is not a directory!\n"
352 if !-d $testdir;
353
0cfd8f5b
DM
354 my $class = ref($this) || $this;
355
356 my $self = bless {}, $class;
357
787b66eb
DM
358 my $statusdir = $self->{statusdir} = "$testdir/status";
359
360 remove_tree($statusdir);
361 mkdir $statusdir;
0cfd8f5b 362
787b66eb
DM
363 # copy initial configuartion
364 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 365
abc920b4
DM
366 if (-f "$testdir/groups") {
367 copy("$testdir/groups", "$statusdir/groups");
368 } else {
369 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
370 }
371
79e0e005
DM
372 if (-f "$testdir/service_config") {
373 copy("$testdir/service_config", "$statusdir/service_config");
374 } else {
375 my $conf = {
eda9314d
DM
376 'vm:101' => { node => 'node1', group => 'prefer_node1' },
377 'vm:102' => { node => 'node2', group => 'prefer_node2' },
378 'vm:103' => { node => 'node3', group => 'prefer_node3' },
379 'vm:104' => { node => 'node1', group => 'prefer_node1' },
380 'vm:105' => { node => 'node2', group => 'prefer_node2' },
381 'vm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
382 };
383 $self->write_service_config($conf);
384 }
787b66eb 385
853f5867
DM
386 if (-f "$testdir/hardware_status") {
387 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
388 die "Copy failed: $!\n";
389 } else {
390 my $cstatus = {
391 node1 => { power => 'off', network => 'off' },
392 node2 => { power => 'off', network => 'off' },
393 node3 => { power => 'off', network => 'off' },
394 };
395 $self->write_hardware_status_nolock($cstatus);
396 }
787b66eb 397
c982dfee
TL
398 if (-f "$testdir/fence.cfg") {
399 copy("$testdir/fence.cfg", "$statusdir/fence.cfg");
400 }
0cfd8f5b 401
ed408b44
TL
402 if (-f "$testdir/datacenter.cfg") {
403 copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg");
404 }
405
8b3f9144 406 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
407
408 foreach my $node (sort keys %$cstatus) {
0bba8f60 409 $self->{nodes}->{$node} = {};
8456bde2
DM
410
411 if (-f "$testdir/service_status_$node") {
412 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
289e4784 413 } else {
8456bde2
DM
414 $self->write_service_status($node, {});
415 }
0cfd8f5b
DM
416 }
417
95360669
DM
418 $self->{service_config} = $self->read_service_config();
419
0cfd8f5b
DM
420 return $self;
421}
422
423sub get_time {
424 my ($self) = @_;
425
bf93e2a2 426 die "implement in subclass";
0cfd8f5b
DM
427}
428
429sub log {
fde8362a 430 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
431
432 chomp $msg;
433
434 my $time = $self->get_time();
435
fde8362a
DM
436 $id = 'hardware' if !$id;
437
0bba8f60 438 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
439}
440
441sub statusdir {
442 my ($self, $node) = @_;
443
444 return $self->{statusdir};
445}
446
ed408b44
TL
447sub read_datacenter_conf {
448 my ($self, $node) = @_;
449
450 my $filename = "$self->{statusdir}/datacenter.cfg";
451 return PVE::HA::Tools::read_json_from_file($filename, {});
452}
453
8b3f9144 454sub global_lock {
0cfd8f5b
DM
455 my ($self, $code, @param) = @_;
456
8b3f9144 457 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
458 my $fh = IO::File->new(">>$lockfile") ||
459 die "unable to open '$lockfile'\n";
460
461 my $success;
462 for (;;) {
463 $success = flock($fh, LOCK_EX);
464 if ($success || ($! != EINTR)) {
465 last;
466 }
467 if (!$success) {
9de9a6ce 468 close($fh);
63f6a08c 469 die "can't acquire lock '$lockfile' - $!\n";
0cfd8f5b
DM
470 }
471 }
9de9a6ce 472
0cfd8f5b
DM
473 my $res;
474
9de9a6ce 475 eval { $res = &$code($fh, @param) };
0cfd8f5b 476 my $err = $@;
289e4784 477
0cfd8f5b
DM
478 close($fh);
479
480 die $err if $err;
289e4784 481
0cfd8f5b
DM
482 return $res;
483}
484
8b3f9144
DM
485my $compute_node_info = sub {
486 my ($self, $cstatus) = @_;
487
488 my $node_info = {};
489
490 my $node_count = 0;
491 my $online_count = 0;
492
493 foreach my $node (keys %$cstatus) {
494 my $d = $cstatus->{$node};
495
496 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
497 $node_info->{$node}->{online} = $online;
498
499 $node_count++;
500 $online_count++ if $online;
501 }
502
503 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
289e4784 504
8b3f9144
DM
505 if (!$quorate) {
506 foreach my $node (keys %$cstatus) {
507 my $d = $cstatus->{$node};
508 $node_info->{$node}->{online} = 0;
509 }
510 }
511
512 return ($node_info, $quorate);
513};
514
515sub get_node_info {
516 my ($self) = @_;
517
5516f102
TL
518 my $cstatus = $self->read_hardware_status_nolock();
519 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
8b3f9144
DM
520
521 return ($node_info, $quorate);
522}
523
ba2a45cd
TL
524# helper for Sim/ only
525sub get_cfs_state {
526 my ($self, $node, $state) = @_;
527
528 # TODO: ensure nolock is OK when adding this to RTSim
529 my $cstatus = $self->read_hardware_status_nolock();
530 my $res = $cstatus->{$node}->{cfs}->{$state};
531
532 # we assume default true if not defined
533 return !defined($res) || $res;
534}
535
a5d48ae1
TL
536# simulate hardware commands, the following commands are available:
537# power <node> <on|off>
538# network <node> <on|off>
539# delay <seconds>
540# cfs <node> <rw|update> <work|fail>
541# reboot <node>
542# shutdown <node>
543# restart-lrm <node>
544# service <sid> <started|disabled|stopped|ignored>
545# service <sid> <migrate|relocate> <target>
546# service <sid> stop <timeout>
547# service <sid> lock/unlock [lockname]
548# service <sid> <add|delete>
8b3f9144 549sub sim_hardware_cmd {
fde8362a 550 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 551
e08a0717
TL
552 my $code = sub {
553 my ($lock_fh) = @_;
554
555 my $cstatus = $self->read_hardware_status_nolock();
556
b94b4785 557 my ($cmd, $objid, $action, $param) = split(/\s+/, $cmdstr);
e08a0717
TL
558
559 die "sim_hardware_cmd: no node or service for command specified"
560 if !$objid;
561
562 my ($node, $sid, $d);
563
564 if ($cmd eq 'service') {
565 $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid);
566 } else {
567 $node = $objid;
568 $d = $self->{nodes}->{$node} ||
569 die "sim_hardware_cmd: no such node '$node'\n";
570 }
571
572 $self->log('info', "execute $cmdstr", $logid);
573
574 if ($cmd eq 'power') {
575 die "sim_hardware_cmd: unknown action '$action'\n"
576 if $action !~ m/^(on|off)$/;
577
578 if ($cstatus->{$node}->{power} ne $action) {
579 if ($action eq 'on') {
580
581 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
582 $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
583 $d->{lrm_restart} = undef;
ba2a45cd 584 $cstatus->{$node}->{cfs} = {};
e08a0717
TL
585
586 } else {
587
588 if ($d->{crm}) {
589 $d->{crm_env}->log('info', "killed by poweroff");
590 $self->crm_control('stop', $d, $lock_fh);
591 $d->{crm} = undef;
592 }
593 if ($d->{lrm}) {
594 $d->{lrm_env}->log('info', "killed by poweroff");
595 $self->lrm_control('stop', $d, $lock_fh);
596 $d->{lrm} = undef;
597 $d->{lrm_restart} = undef;
598 }
599
600 $self->watchdog_reset_nolock($node);
601 $self->write_service_status($node, {});
602 }
603 }
604
605 $cstatus->{$node}->{power} = $action;
606 $cstatus->{$node}->{network} = $action;
607 $cstatus->{$node}->{shutdown} = undef;
608
609 $self->write_hardware_status_nolock($cstatus);
610
611 } elsif ($cmd eq 'network') {
612 die "sim_hardware_cmd: unknown network action '$action'"
613 if $action !~ m/^(on|off)$/;
614 $cstatus->{$node}->{network} = $action;
615
616 $self->write_hardware_status_nolock($cstatus);
617
ba2a45cd
TL
618 } elsif ($cmd eq 'cfs') {
619 die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
620 if $action !~ m/^(rw|update)$/;
b94b4785
FE
621 die "sim_hardware_cmd: unknown cfs command '$param' for '$action' on node '$node'"
622 if $param !~ m/^(work|fail)$/;
ba2a45cd 623
b94b4785 624 $cstatus->{$node}->{cfs}->{$action} = $param eq 'work';
ba2a45cd
TL
625 $self->write_hardware_status_nolock($cstatus);
626
e08a0717
TL
627 } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
628 $cstatus->{$node}->{shutdown} = $cmd;
629
630 $self->write_hardware_status_nolock($cstatus);
631
632 $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm});
633 } elsif ($cmd eq 'restart-lrm') {
634 if ($d->{lrm}) {
635 $d->{lrm_restart} = 1;
636 $self->lrm_control('shutdown', $d, $lock_fh);
637 }
638 } elsif ($cmd eq 'crm') {
639
640 if ($action eq 'stop') {
641 if ($d->{crm}) {
642 $d->{crm_stop} = 1;
643 $self->crm_control('shutdown', $d, $lock_fh);
644 }
645 } elsif ($action eq 'start') {
646 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
647 } else {
648 die "sim_hardware_cmd: unknown action '$action'";
649 }
650
651 } elsif ($cmd eq 'service') {
667670b2
TL
652 if ($action eq 'started' || $action eq 'disabled' ||
653 $action eq 'stopped' || $action eq 'ignored') {
e08a0717
TL
654
655 $self->set_service_state($sid, $action);
656
657 } elsif ($action eq 'migrate' || $action eq 'relocate') {
658
659 die "sim_hardware_cmd: missing target node for '$action' command"
b94b4785 660 if !$param;
e08a0717 661
b94b4785 662 $self->queue_crm_commands_nolock("$action $sid $param");
e08a0717 663
21caf0db
FE
664 } elsif ($action eq 'stop') {
665
666 die "sim_hardware_cmd: missing timeout for '$action' command"
667 if !defined($param);
668
669 $self->queue_crm_commands_nolock("$action $sid $param");
670
e08a0717
TL
671 } elsif ($action eq 'add') {
672
b94b4785 673 $self->add_service($sid, {state => 'started', node => $param});
e08a0717
TL
674
675 } elsif ($action eq 'delete') {
676
677 $self->delete_service($sid);
678
679 } elsif ($action eq 'lock') {
680
b94b4785 681 $self->lock_service($sid, $param);
e08a0717
TL
682
683 } elsif ($action eq 'unlock') {
684
b94b4785 685 $self->unlock_service($sid, $param);
e08a0717
TL
686
687 } else {
688 die "sim_hardware_cmd: unknown service action '$action' " .
689 "- not implemented\n"
690 }
691 } else {
692 die "sim_hardware_cmd: unknown command '$cmdstr'\n";
693 }
694
695 return $cstatus;
696 };
697
698 return $self->global_lock($code);
699}
700
701# for controlling the resource manager services
702sub crm_control {
703 my ($self, $action, $data, $lock_fh) = @_;
704
705 die "implement in subclass";
706}
707
708sub lrm_control {
709 my ($self, $action, $data, $lock_fh) = @_;
710
bf93e2a2 711 die "implement in subclass";
0cfd8f5b
DM
712}
713
714sub run {
715 my ($self) = @_;
716
bf93e2a2 717 die "implement in subclass";
0cfd8f5b 718}
9329c1e2
DM
719
720my $modify_watchog = sub {
721 my ($self, $code) = @_;
722
723 my $update_cmd = sub {
724
725 my $filename = "$self->{statusdir}/watchdog_status";
289e4784 726
9329c1e2
DM
727 my ($res, $wdstatus);
728
729 if (-f $filename) {
730 my $raw = PVE::Tools::file_get_contents($filename);
731 $wdstatus = decode_json($raw);
732 } else {
733 $wdstatus = {};
734 }
289e4784 735
9329c1e2
DM
736 ($wdstatus, $res) = &$code($wdstatus);
737
738 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
739
740 return $res;
741 };
742
743 return $self->global_lock($update_cmd);
744};
745
0590c6a7
DM
746sub watchdog_reset_nolock {
747 my ($self, $node) = @_;
748
749 my $filename = "$self->{statusdir}/watchdog_status";
750
751 if (-f $filename) {
752 my $raw = PVE::Tools::file_get_contents($filename);
753 my $wdstatus = decode_json($raw);
754
755 foreach my $id (keys %$wdstatus) {
756 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
757 }
289e4784 758
0590c6a7
DM
759 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
760 }
761}
762
9329c1e2
DM
763sub watchdog_check {
764 my ($self, $node) = @_;
765
766 my $code = sub {
767 my ($wdstatus) = @_;
768
769 my $res = 1;
770
771 foreach my $wfh (keys %$wdstatus) {
772 my $wd = $wdstatus->{$wfh};
773 next if $wd->{node} ne $node;
774
775 my $ctime = $self->get_time();
776 my $tdiff = $ctime - $wd->{update_time};
777
0bba8f60 778 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
779 $res = 0;
780 delete $wdstatus->{$wfh};
781 }
782 }
289e4784 783
9329c1e2
DM
784 return ($wdstatus, $res);
785 };
786
787 return &$modify_watchog($self, $code);
788}
789
790my $wdcounter = 0;
791
792sub watchdog_open {
793 my ($self, $node) = @_;
794
795 my $code = sub {
796 my ($wdstatus) = @_;
797
798 ++$wdcounter;
799
800 my $id = "WD:$node:$$:$wdcounter";
801
802 die "internal error" if defined($wdstatus->{$id});
803
804 $wdstatus->{$id} = {
805 node => $node,
806 update_time => $self->get_time(),
807 };
808
809 return ($wdstatus, $id);
810 };
811
812 return &$modify_watchog($self, $code);
813}
814
815sub watchdog_close {
816 my ($self, $wfh) = @_;
817
818 my $code = sub {
819 my ($wdstatus) = @_;
820
821 my $wd = $wdstatus->{$wfh};
822 die "no such watchdog handle '$wfh'\n" if !defined($wd);
823
824 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 825 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
826
827 delete $wdstatus->{$wfh};
828
829 return ($wdstatus);
830 };
831
832 return &$modify_watchog($self, $code);
833}
834
835sub watchdog_update {
836 my ($self, $wfh) = @_;
837
838 my $code = sub {
839 my ($wdstatus) = @_;
840
841 my $wd = $wdstatus->{$wfh};
842
843 die "no such watchdog handle '$wfh'\n" if !defined($wd);
844
845 my $ctime = $self->get_time();
846 my $tdiff = $ctime - $wd->{update_time};
847
0bba8f60 848 die "watchdog expired" if $tdiff > $watchdog_timeout;
289e4784 849
9329c1e2
DM
850 $wd->{update_time} = $ctime;
851
852 return ($wdstatus);
853 };
854
855 return &$modify_watchog($self, $code);
856}
857
0cfd8f5b 8581;