]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
Add crm command 'stop'
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
11use POSIX qw(strftime EINTR);
289e4784 12use JSON;
0cfd8f5b
DM
13use IO::File;
14use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
15use File::Copy;
16use File::Path qw(make_path remove_tree);
c982dfee 17use PVE::HA::FenceConfig;
7d33cb12 18use PVE::HA::Groups;
f5a14b93 19
17b5cf98 20my $watchdog_timeout = 60;
0bba8f60 21
0cfd8f5b 22
787b66eb
DM
23# Status directory layout
24#
25# configuration
26#
8456bde2
DM
27# $testdir/cmdlist Command list for simulation
28# $testdir/hardware_status Hardware description (number of nodes, ...)
29# $testdir/manager_status CRM status (start with {})
30# $testdir/service_config Service configuration
abc920b4 31# $testdir/groups HA groups configuration
8456bde2 32# $testdir/service_status_<node> Service status
ed408b44 33# $testdir/datacenter.cfg Datacenter wide HA configuration
3c36cbca 34
9329c1e2
DM
35#
36# runtime status for simulation system
37#
38# $testdir/status/cluster_locks Cluster locks
39# $testdir/status/hardware_status Hardware status (power/network on/off)
40# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
41#
42# runtime status
9329c1e2 43#
8456bde2
DM
44# $testdir/status/lrm_status_<node> LRM status
45# $testdir/status/manager_status CRM status
abc920b4 46# $testdir/status/crm_commands CRM command queue
8456bde2
DM
47# $testdir/status/service_config Service configuration
48# $testdir/status/service_status_<node> Service status
abc920b4 49# $testdir/status/groups HA groups configuration
c4a221bc
DM
50
51sub read_lrm_status {
52 my ($self, $node) = @_;
53
54 my $filename = "$self->{statusdir}/lrm_status_$node";
55
289e4784 56 return PVE::HA::Tools::read_json_from_file($filename, {});
c4a221bc
DM
57}
58
59sub write_lrm_status {
60 my ($self, $node, $status_obj) = @_;
61
62 my $filename = "$self->{statusdir}/lrm_status_$node";
63
289e4784 64 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
c4a221bc 65}
787b66eb 66
8b3f9144 67sub read_hardware_status_nolock {
0cfd8f5b
DM
68 my ($self) = @_;
69
8b3f9144 70 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
71
72 my $raw = PVE::Tools::file_get_contents($filename);
73 my $cstatus = decode_json($raw);
74
75 return $cstatus;
76}
77
8b3f9144 78sub write_hardware_status_nolock {
0cfd8f5b
DM
79 my ($self, $cstatus) = @_;
80
8b3f9144 81 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
82
83 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
84};
85
95360669
DM
86sub read_service_config {
87 my ($self) = @_;
88
89 my $filename = "$self->{statusdir}/service_config";
289e4784 90 my $conf = PVE::HA::Tools::read_json_from_file($filename);
95360669
DM
91
92 foreach my $sid (keys %$conf) {
93 my $d = $conf->{$sid};
8456bde2
DM
94
95 die "service '$sid' without assigned node!" if !$d->{node};
96
ec368d74 97 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
b026c8c9
DM
98 $d->{type} = $1;
99 $d->{name} = $2;
95360669
DM
100 } else {
101 die "implement me";
102 }
103 $d->{state} = 'disabled' if !$d->{state};
bb07bd2c 104 $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility
ec368d74
TL
105 $d->{max_restart} = 1 if !defined($d->{max_restart});
106 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
95360669
DM
107 }
108
109 return $conf;
110}
111
76b83c72
FE
112sub update_service_config {
113 my ($self, $sid, $param) = @_;
114
115 my $conf = $self->read_service_config();
116
117 my $sconf = $conf->{$sid} || die "no such resource '$sid'\n";
118
119 foreach my $k (%$param) {
120 $sconf->{$k} = $param->{$k};
121 }
122
123 $self->write_service_config($conf);
124}
125
79e0e005
DM
126sub write_service_config {
127 my ($self, $conf) = @_;
128
95360669
DM
129 $self->{service_config} = $conf;
130
79e0e005
DM
131 my $filename = "$self->{statusdir}/service_config";
132 return PVE::HA::Tools::write_json_to_file($filename, $conf);
c982dfee
TL
133}
134
135sub read_fence_config {
136 my ($self) = @_;
137
138 my $raw = undef;
139
140 my $filename = "$self->{statusdir}/fence.cfg";
141 if (-e $filename) {
142 $raw = PVE::Tools::file_get_contents($filename);
143 }
144
145 return PVE::HA::FenceConfig::parse_config($filename, $raw);
146}
147
148sub exec_fence_agent {
149 my ($self, $agent, $node, @param) = @_;
150
151 # let all agent succeed and behave the same for now
152 $self->sim_hardware_cmd("power $node off", $agent);
153
154 return 0; # EXIT_SUCCESS
155}
79e0e005 156
e5f43426
TL
157sub set_service_state {
158 my ($self, $sid, $state) = @_;
159
160 my $conf = $self->read_service_config();
161 die "no such service '$sid'" if !$conf->{$sid};
162
163 $conf->{$sid}->{state} = $state;
164
165 $self->write_service_config($conf);
166
167 return $conf;
168}
169
27ccc95c
TL
170sub add_service {
171 my ($self, $sid, $opts) = @_;
172
173 my $conf = $self->read_service_config();
174 die "resource ID '$sid' already defined\n" if $conf->{$sid};
175
176 $conf->{$sid} = $opts;
177
178 $self->write_service_config($conf);
179
180 return $conf;
181}
182
183sub delete_service {
184 my ($self, $sid) = @_;
185
186 my $conf = $self->read_service_config();
187
188 die "no such service '$sid'" if !$conf->{$sid};
189
190 delete $conf->{$sid};
191
192 $self->write_service_config($conf);
193
194 return $conf;
195}
196
8456bde2 197sub change_service_location {
6da27e23 198 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2
DM
199
200 my $conf = $self->read_service_config();
201
202 die "no such service '$sid'\n" if !$conf->{$sid};
203
289e4784 204 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
6da27e23 205 if $current_node ne $conf->{$sid}->{node};
289e4784 206
6da27e23 207 $conf->{$sid}->{node} = $new_node;
8456bde2
DM
208
209 $self->write_service_config($conf);
210}
211
cde11324
TL
212sub service_has_lock {
213 my ($self, $sid) = @_;
214
215 my $conf = $self->read_service_config();
216
217 die "no such service '$sid'\n" if !$conf->{$sid};
218
219 return $conf->{$sid}->{lock};
220}
221
222sub lock_service {
223 my ($self, $sid, $lock) = @_;
224
225 my $conf = $self->read_service_config();
226
227 die "no such service '$sid'\n" if !$conf->{$sid};
228
229 $conf->{$sid}->{lock} = $lock || 'backup';
230
231 $self->write_service_config($conf);
232
233 return $conf;
234}
235
236sub unlock_service {
237 my ($self, $sid, $lock) = @_;
238
239 my $conf = $self->read_service_config();
240
241 die "no such service '$sid'\n" if !$conf->{$sid};
242
243 if (!defined($conf->{$sid}->{lock})) {
cde11324
TL
244 return undef;
245 }
246
247 if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
248 warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
249 return undef;
250 }
251
252 my $removed_lock = delete $conf->{$sid}->{lock};
253
254 $self->write_service_config($conf);
255
256 return $removed_lock;
257}
258
b70aa69e 259sub queue_crm_commands_nolock {
3b996922
DM
260 my ($self, $cmd) = @_;
261
262 chomp $cmd;
263
b70aa69e
DM
264 my $data = '';
265 my $filename = "$self->{statusdir}/crm_commands";
266 if (-f $filename) {
267 $data = PVE::Tools::file_get_contents($filename);
268 }
269 $data .= "$cmd\n";
270 PVE::Tools::file_set_contents($filename, $data);
271
272 return undef;
273}
274
275sub queue_crm_commands {
276 my ($self, $cmd) = @_;
277
278 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
289e4784 279
3b996922
DM
280 $self->global_lock($code);
281
282 return undef;
283}
284
285sub read_crm_commands {
286 my ($self) = @_;
287
288 my $code = sub {
289 my $data = '';
290
291 my $filename = "$self->{statusdir}/crm_commands";
292 if (-f $filename) {
293 $data = PVE::Tools::file_get_contents($filename);
294 }
295 PVE::Tools::file_set_contents($filename, '');
296
297 return $data;
298 };
289e4784 299
3b996922
DM
300 return $self->global_lock($code);
301}
302
abc920b4
DM
303sub read_group_config {
304 my ($self) = @_;
305
306 my $filename = "$self->{statusdir}/groups";
307 my $raw = '';
308 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
309
7d33cb12 310 return PVE::HA::Groups->parse_config($filename, $raw);
abc920b4
DM
311}
312
c4a221bc 313sub read_service_status {
8456bde2 314 my ($self, $node) = @_;
c4a221bc 315
8456bde2 316 my $filename = "$self->{statusdir}/service_status_$node";
289e4784 317 return PVE::HA::Tools::read_json_from_file($filename);
c4a221bc
DM
318}
319
320sub write_service_status {
8456bde2
DM
321 my ($self, $node, $data) = @_;
322
323 my $filename = "$self->{statusdir}/service_status_$node";
324 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
325
326 # fixme: add test if a service runs on two nodes!!!
c4a221bc 327
8456bde2 328 return $res;
289e4784 329}
c4a221bc 330
abc920b4
DM
331my $default_group_config = <<__EOD;
332group: prefer_node1
333 nodes node1
e941bdc5 334 nofailback 1
abc920b4
DM
335
336group: prefer_node2
337 nodes node2
e941bdc5 338 nofailback 1
abc920b4
DM
339
340group: prefer_node3
7a294ad4 341 nodes node3
e941bdc5 342 nofailback 1
abc920b4
DM
343__EOD
344
0cfd8f5b
DM
345sub new {
346 my ($this, $testdir) = @_;
347
348 die "missing testdir" if !$testdir;
349
ba9e808e
TL
350 die "testdir '$testdir' does not exist or is not a directory!\n"
351 if !-d $testdir;
352
0cfd8f5b
DM
353 my $class = ref($this) || $this;
354
355 my $self = bless {}, $class;
356
787b66eb
DM
357 my $statusdir = $self->{statusdir} = "$testdir/status";
358
359 remove_tree($statusdir);
360 mkdir $statusdir;
0cfd8f5b 361
787b66eb
DM
362 # copy initial configuartion
363 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 364
abc920b4
DM
365 if (-f "$testdir/groups") {
366 copy("$testdir/groups", "$statusdir/groups");
367 } else {
368 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
369 }
370
79e0e005
DM
371 if (-f "$testdir/service_config") {
372 copy("$testdir/service_config", "$statusdir/service_config");
373 } else {
374 my $conf = {
eda9314d
DM
375 'vm:101' => { node => 'node1', group => 'prefer_node1' },
376 'vm:102' => { node => 'node2', group => 'prefer_node2' },
377 'vm:103' => { node => 'node3', group => 'prefer_node3' },
378 'vm:104' => { node => 'node1', group => 'prefer_node1' },
379 'vm:105' => { node => 'node2', group => 'prefer_node2' },
380 'vm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
381 };
382 $self->write_service_config($conf);
383 }
787b66eb 384
853f5867
DM
385 if (-f "$testdir/hardware_status") {
386 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
387 die "Copy failed: $!\n";
388 } else {
389 my $cstatus = {
390 node1 => { power => 'off', network => 'off' },
391 node2 => { power => 'off', network => 'off' },
392 node3 => { power => 'off', network => 'off' },
393 };
394 $self->write_hardware_status_nolock($cstatus);
395 }
787b66eb 396
c982dfee
TL
397 if (-f "$testdir/fence.cfg") {
398 copy("$testdir/fence.cfg", "$statusdir/fence.cfg");
399 }
0cfd8f5b 400
ed408b44
TL
401 if (-f "$testdir/datacenter.cfg") {
402 copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg");
403 }
404
8b3f9144 405 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
406
407 foreach my $node (sort keys %$cstatus) {
0bba8f60 408 $self->{nodes}->{$node} = {};
8456bde2
DM
409
410 if (-f "$testdir/service_status_$node") {
411 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
289e4784 412 } else {
8456bde2
DM
413 $self->write_service_status($node, {});
414 }
0cfd8f5b
DM
415 }
416
95360669
DM
417 $self->{service_config} = $self->read_service_config();
418
0cfd8f5b
DM
419 return $self;
420}
421
422sub get_time {
423 my ($self) = @_;
424
bf93e2a2 425 die "implement in subclass";
0cfd8f5b
DM
426}
427
428sub log {
fde8362a 429 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
430
431 chomp $msg;
432
433 my $time = $self->get_time();
434
fde8362a
DM
435 $id = 'hardware' if !$id;
436
0bba8f60 437 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
438}
439
440sub statusdir {
441 my ($self, $node) = @_;
442
443 return $self->{statusdir};
444}
445
ed408b44
TL
446sub read_datacenter_conf {
447 my ($self, $node) = @_;
448
449 my $filename = "$self->{statusdir}/datacenter.cfg";
450 return PVE::HA::Tools::read_json_from_file($filename, {});
451}
452
8b3f9144 453sub global_lock {
0cfd8f5b
DM
454 my ($self, $code, @param) = @_;
455
8b3f9144 456 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
457 my $fh = IO::File->new(">>$lockfile") ||
458 die "unable to open '$lockfile'\n";
459
460 my $success;
461 for (;;) {
462 $success = flock($fh, LOCK_EX);
463 if ($success || ($! != EINTR)) {
464 last;
465 }
466 if (!$success) {
9de9a6ce 467 close($fh);
63f6a08c 468 die "can't acquire lock '$lockfile' - $!\n";
0cfd8f5b
DM
469 }
470 }
9de9a6ce 471
0cfd8f5b
DM
472 my $res;
473
9de9a6ce 474 eval { $res = &$code($fh, @param) };
0cfd8f5b 475 my $err = $@;
289e4784 476
0cfd8f5b
DM
477 close($fh);
478
479 die $err if $err;
289e4784 480
0cfd8f5b
DM
481 return $res;
482}
483
8b3f9144
DM
484my $compute_node_info = sub {
485 my ($self, $cstatus) = @_;
486
487 my $node_info = {};
488
489 my $node_count = 0;
490 my $online_count = 0;
491
492 foreach my $node (keys %$cstatus) {
493 my $d = $cstatus->{$node};
494
495 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
496 $node_info->{$node}->{online} = $online;
497
498 $node_count++;
499 $online_count++ if $online;
500 }
501
502 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
289e4784 503
8b3f9144
DM
504 if (!$quorate) {
505 foreach my $node (keys %$cstatus) {
506 my $d = $cstatus->{$node};
507 $node_info->{$node}->{online} = 0;
508 }
509 }
510
511 return ($node_info, $quorate);
512};
513
514sub get_node_info {
515 my ($self) = @_;
516
5516f102
TL
517 my $cstatus = $self->read_hardware_status_nolock();
518 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
8b3f9144
DM
519
520 return ($node_info, $quorate);
521}
522
ba2a45cd
TL
523# helper for Sim/ only
524sub get_cfs_state {
525 my ($self, $node, $state) = @_;
526
527 # TODO: ensure nolock is OK when adding this to RTSim
528 my $cstatus = $self->read_hardware_status_nolock();
529 my $res = $cstatus->{$node}->{cfs}->{$state};
530
531 # we assume default true if not defined
532 return !defined($res) || $res;
533}
534
8b3f9144 535# simulate hardware commands
0cfd8f5b
DM
536# power <node> <on|off>
537# network <node> <on|off>
ba2a45cd 538# cfs <node> <rw|update> <work|fail>
e08a0717
TL
539# reboot <node>
540# shutdown <node>
541# restart-lrm <node>
667670b2 542# service <sid> <started|disabled|stopped|ignored>
e08a0717 543# service <sid> <migrate|relocate> <target>
21caf0db 544# service <sid> stop <timeout>
e08a0717 545# service <sid> lock/unlock [lockname]
0cfd8f5b 546
8b3f9144 547sub sim_hardware_cmd {
fde8362a 548 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 549
e08a0717
TL
550 my $code = sub {
551 my ($lock_fh) = @_;
552
553 my $cstatus = $self->read_hardware_status_nolock();
554
b94b4785 555 my ($cmd, $objid, $action, $param) = split(/\s+/, $cmdstr);
e08a0717
TL
556
557 die "sim_hardware_cmd: no node or service for command specified"
558 if !$objid;
559
560 my ($node, $sid, $d);
561
562 if ($cmd eq 'service') {
563 $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid);
564 } else {
565 $node = $objid;
566 $d = $self->{nodes}->{$node} ||
567 die "sim_hardware_cmd: no such node '$node'\n";
568 }
569
570 $self->log('info', "execute $cmdstr", $logid);
571
572 if ($cmd eq 'power') {
573 die "sim_hardware_cmd: unknown action '$action'\n"
574 if $action !~ m/^(on|off)$/;
575
576 if ($cstatus->{$node}->{power} ne $action) {
577 if ($action eq 'on') {
578
579 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
580 $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
581 $d->{lrm_restart} = undef;
ba2a45cd 582 $cstatus->{$node}->{cfs} = {};
e08a0717
TL
583
584 } else {
585
586 if ($d->{crm}) {
587 $d->{crm_env}->log('info', "killed by poweroff");
588 $self->crm_control('stop', $d, $lock_fh);
589 $d->{crm} = undef;
590 }
591 if ($d->{lrm}) {
592 $d->{lrm_env}->log('info', "killed by poweroff");
593 $self->lrm_control('stop', $d, $lock_fh);
594 $d->{lrm} = undef;
595 $d->{lrm_restart} = undef;
596 }
597
598 $self->watchdog_reset_nolock($node);
599 $self->write_service_status($node, {});
600 }
601 }
602
603 $cstatus->{$node}->{power} = $action;
604 $cstatus->{$node}->{network} = $action;
605 $cstatus->{$node}->{shutdown} = undef;
606
607 $self->write_hardware_status_nolock($cstatus);
608
609 } elsif ($cmd eq 'network') {
610 die "sim_hardware_cmd: unknown network action '$action'"
611 if $action !~ m/^(on|off)$/;
612 $cstatus->{$node}->{network} = $action;
613
614 $self->write_hardware_status_nolock($cstatus);
615
ba2a45cd
TL
616 } elsif ($cmd eq 'cfs') {
617 die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
618 if $action !~ m/^(rw|update)$/;
b94b4785
FE
619 die "sim_hardware_cmd: unknown cfs command '$param' for '$action' on node '$node'"
620 if $param !~ m/^(work|fail)$/;
ba2a45cd 621
b94b4785 622 $cstatus->{$node}->{cfs}->{$action} = $param eq 'work';
ba2a45cd
TL
623 $self->write_hardware_status_nolock($cstatus);
624
e08a0717
TL
625 } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
626 $cstatus->{$node}->{shutdown} = $cmd;
627
628 $self->write_hardware_status_nolock($cstatus);
629
630 $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm});
631 } elsif ($cmd eq 'restart-lrm') {
632 if ($d->{lrm}) {
633 $d->{lrm_restart} = 1;
634 $self->lrm_control('shutdown', $d, $lock_fh);
635 }
636 } elsif ($cmd eq 'crm') {
637
638 if ($action eq 'stop') {
639 if ($d->{crm}) {
640 $d->{crm_stop} = 1;
641 $self->crm_control('shutdown', $d, $lock_fh);
642 }
643 } elsif ($action eq 'start') {
644 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
645 } else {
646 die "sim_hardware_cmd: unknown action '$action'";
647 }
648
649 } elsif ($cmd eq 'service') {
667670b2
TL
650 if ($action eq 'started' || $action eq 'disabled' ||
651 $action eq 'stopped' || $action eq 'ignored') {
e08a0717
TL
652
653 $self->set_service_state($sid, $action);
654
655 } elsif ($action eq 'migrate' || $action eq 'relocate') {
656
657 die "sim_hardware_cmd: missing target node for '$action' command"
b94b4785 658 if !$param;
e08a0717 659
b94b4785 660 $self->queue_crm_commands_nolock("$action $sid $param");
e08a0717 661
21caf0db
FE
662 } elsif ($action eq 'stop') {
663
664 die "sim_hardware_cmd: missing timeout for '$action' command"
665 if !defined($param);
666
667 $self->queue_crm_commands_nolock("$action $sid $param");
668
e08a0717
TL
669 } elsif ($action eq 'add') {
670
b94b4785 671 $self->add_service($sid, {state => 'started', node => $param});
e08a0717
TL
672
673 } elsif ($action eq 'delete') {
674
675 $self->delete_service($sid);
676
677 } elsif ($action eq 'lock') {
678
b94b4785 679 $self->lock_service($sid, $param);
e08a0717
TL
680
681 } elsif ($action eq 'unlock') {
682
b94b4785 683 $self->unlock_service($sid, $param);
e08a0717
TL
684
685 } else {
686 die "sim_hardware_cmd: unknown service action '$action' " .
687 "- not implemented\n"
688 }
689 } else {
690 die "sim_hardware_cmd: unknown command '$cmdstr'\n";
691 }
692
693 return $cstatus;
694 };
695
696 return $self->global_lock($code);
697}
698
699# for controlling the resource manager services
700sub crm_control {
701 my ($self, $action, $data, $lock_fh) = @_;
702
703 die "implement in subclass";
704}
705
706sub lrm_control {
707 my ($self, $action, $data, $lock_fh) = @_;
708
bf93e2a2 709 die "implement in subclass";
0cfd8f5b
DM
710}
711
712sub run {
713 my ($self) = @_;
714
bf93e2a2 715 die "implement in subclass";
0cfd8f5b 716}
9329c1e2
DM
717
718my $modify_watchog = sub {
719 my ($self, $code) = @_;
720
721 my $update_cmd = sub {
722
723 my $filename = "$self->{statusdir}/watchdog_status";
289e4784 724
9329c1e2
DM
725 my ($res, $wdstatus);
726
727 if (-f $filename) {
728 my $raw = PVE::Tools::file_get_contents($filename);
729 $wdstatus = decode_json($raw);
730 } else {
731 $wdstatus = {};
732 }
289e4784 733
9329c1e2
DM
734 ($wdstatus, $res) = &$code($wdstatus);
735
736 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
737
738 return $res;
739 };
740
741 return $self->global_lock($update_cmd);
742};
743
0590c6a7
DM
744sub watchdog_reset_nolock {
745 my ($self, $node) = @_;
746
747 my $filename = "$self->{statusdir}/watchdog_status";
748
749 if (-f $filename) {
750 my $raw = PVE::Tools::file_get_contents($filename);
751 my $wdstatus = decode_json($raw);
752
753 foreach my $id (keys %$wdstatus) {
754 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
755 }
289e4784 756
0590c6a7
DM
757 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
758 }
759}
760
9329c1e2
DM
761sub watchdog_check {
762 my ($self, $node) = @_;
763
764 my $code = sub {
765 my ($wdstatus) = @_;
766
767 my $res = 1;
768
769 foreach my $wfh (keys %$wdstatus) {
770 my $wd = $wdstatus->{$wfh};
771 next if $wd->{node} ne $node;
772
773 my $ctime = $self->get_time();
774 my $tdiff = $ctime - $wd->{update_time};
775
0bba8f60 776 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
777 $res = 0;
778 delete $wdstatus->{$wfh};
779 }
780 }
289e4784 781
9329c1e2
DM
782 return ($wdstatus, $res);
783 };
784
785 return &$modify_watchog($self, $code);
786}
787
788my $wdcounter = 0;
789
790sub watchdog_open {
791 my ($self, $node) = @_;
792
793 my $code = sub {
794 my ($wdstatus) = @_;
795
796 ++$wdcounter;
797
798 my $id = "WD:$node:$$:$wdcounter";
799
800 die "internal error" if defined($wdstatus->{$id});
801
802 $wdstatus->{$id} = {
803 node => $node,
804 update_time => $self->get_time(),
805 };
806
807 return ($wdstatus, $id);
808 };
809
810 return &$modify_watchog($self, $code);
811}
812
813sub watchdog_close {
814 my ($self, $wfh) = @_;
815
816 my $code = sub {
817 my ($wdstatus) = @_;
818
819 my $wd = $wdstatus->{$wfh};
820 die "no such watchdog handle '$wfh'\n" if !defined($wd);
821
822 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 823 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
824
825 delete $wdstatus->{$wfh};
826
827 return ($wdstatus);
828 };
829
830 return &$modify_watchog($self, $code);
831}
832
833sub watchdog_update {
834 my ($self, $wfh) = @_;
835
836 my $code = sub {
837 my ($wdstatus) = @_;
838
839 my $wd = $wdstatus->{$wfh};
840
841 die "no such watchdog handle '$wfh'\n" if !defined($wd);
842
843 my $ctime = $self->get_time();
844 my $tdiff = $ctime - $wd->{update_time};
845
0bba8f60 846 die "watchdog expired" if $tdiff > $watchdog_timeout;
289e4784 847
9329c1e2
DM
848 $wd->{update_time} = $ctime;
849
850 return ($wdstatus);
851 };
852
853 return &$modify_watchog($self, $code);
854}
855
0cfd8f5b 8561;