]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Hardware.pm
sim: hardware: commands: fix documentation for add
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
1 package PVE::HA::Sim::Hardware;
2
3 # Simulate Hardware resources
4
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
8
9 use strict;
10 use warnings;
11
12 use Fcntl qw(:DEFAULT :flock);
13 use File::Copy;
14 use File::Path qw(make_path remove_tree);
15 use IO::File;
16 use JSON;
17 use POSIX qw(strftime EINTR);
18
19 use PVE::HA::FenceConfig;
20 use PVE::HA::Groups;
21
22 my $watchdog_timeout = 60;
23
24 # Status directory layout
25 #
26 # configuration
27 #
28 # $testdir/cmdlist Command list for simulation
29 # $testdir/hardware_status Hardware description (number of nodes, ...)
30 # $testdir/manager_status CRM status (start with {})
31 # $testdir/service_config Service configuration
32 # $testdir/static_service_stats Static service usage information (cpu, memory)
33 # $testdir/groups HA groups configuration
34 # $testdir/service_status_<node> Service status
35 # $testdir/datacenter.cfg Datacenter wide HA configuration
36
37 #
38 # runtime status for simulation system
39 #
40 # $testdir/status/cluster_locks Cluster locks
41 # $testdir/status/hardware_status Hardware status (power/network on/off)
42 # $testdir/status/static_service_stats Static service usage information (cpu, memory)
43 # $testdir/status/watchdog_status Watchdog status
44 #
45 # runtime status
46 #
47 # $testdir/status/lrm_status_<node> LRM status
48 # $testdir/status/manager_status CRM status
49 # $testdir/status/crm_commands CRM command queue
50 # $testdir/status/service_config Service configuration
51 # $testdir/status/service_status_<node> Service status
52 # $testdir/status/groups HA groups configuration
53
54 sub read_lrm_status {
55 my ($self, $node) = @_;
56
57 my $filename = "$self->{statusdir}/lrm_status_$node";
58
59 return PVE::HA::Tools::read_json_from_file($filename, {});
60 }
61
62 sub write_lrm_status {
63 my ($self, $node, $status_obj) = @_;
64
65 my $filename = "$self->{statusdir}/lrm_status_$node";
66
67 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
68 }
69
70 sub read_hardware_status_nolock {
71 my ($self) = @_;
72
73 my $filename = "$self->{statusdir}/hardware_status";
74
75 my $raw = PVE::Tools::file_get_contents($filename);
76 my $cstatus = decode_json($raw);
77
78 return $cstatus;
79 }
80
81 sub write_hardware_status_nolock {
82 my ($self, $cstatus) = @_;
83
84 my $filename = "$self->{statusdir}/hardware_status";
85
86 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
87 };
88
89 sub read_service_config {
90 my ($self) = @_;
91
92 my $filename = "$self->{statusdir}/service_config";
93 my $conf = PVE::HA::Tools::read_json_from_file($filename);
94
95 foreach my $sid (keys %$conf) {
96 my $d = $conf->{$sid};
97
98 die "service '$sid' without assigned node!" if !$d->{node};
99
100 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
101 $d->{type} = $1;
102 $d->{name} = $2;
103 } else {
104 die "implement me";
105 }
106 $d->{state} = 'disabled' if !$d->{state};
107 $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility
108 $d->{max_restart} = 1 if !defined($d->{max_restart});
109 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
110 }
111
112 return $conf;
113 }
114
115 sub update_service_config {
116 my ($self, $sid, $param) = @_;
117
118 my $conf = $self->read_service_config();
119
120 my $sconf = $conf->{$sid} || die "no such resource '$sid'\n";
121
122 foreach my $k (%$param) {
123 $sconf->{$k} = $param->{$k};
124 }
125
126 $self->write_service_config($conf);
127 }
128
129 sub write_service_config {
130 my ($self, $conf) = @_;
131
132 $self->{service_config} = $conf;
133
134 my $filename = "$self->{statusdir}/service_config";
135 return PVE::HA::Tools::write_json_to_file($filename, $conf);
136 }
137
138 sub read_fence_config {
139 my ($self) = @_;
140
141 my $raw = undef;
142
143 my $filename = "$self->{statusdir}/fence.cfg";
144 if (-e $filename) {
145 $raw = PVE::Tools::file_get_contents($filename);
146 }
147
148 return PVE::HA::FenceConfig::parse_config($filename, $raw);
149 }
150
151 sub exec_fence_agent {
152 my ($self, $agent, $node, @param) = @_;
153
154 # let all agent succeed and behave the same for now
155 $self->sim_hardware_cmd("power $node off", $agent);
156
157 return 0; # EXIT_SUCCESS
158 }
159
160 sub set_service_state {
161 my ($self, $sid, $state) = @_;
162
163 my $conf = $self->read_service_config();
164 die "no such service '$sid'" if !$conf->{$sid};
165
166 $conf->{$sid}->{state} = $state;
167
168 $self->write_service_config($conf);
169
170 return $conf;
171 }
172
173 sub add_service {
174 my ($self, $sid, $opts) = @_;
175
176 my $conf = $self->read_service_config();
177 die "resource ID '$sid' already defined\n" if $conf->{$sid};
178
179 $conf->{$sid} = $opts;
180 $conf->{$sid}->@{qw(type name)} = split(/:/, $sid);
181
182 $self->write_service_config($conf);
183
184 return $conf;
185 }
186
187 sub delete_service {
188 my ($self, $sid) = @_;
189
190 my $conf = $self->read_service_config();
191
192 die "no such service '$sid'" if !$conf->{$sid};
193
194 delete $conf->{$sid};
195
196 $self->write_service_config($conf);
197
198 return $conf;
199 }
200
201 sub change_service_location {
202 my ($self, $sid, $current_node, $new_node) = @_;
203
204 my $conf = $self->read_service_config();
205
206 die "no such service '$sid'\n" if !$conf->{$sid};
207
208 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
209 if $current_node ne $conf->{$sid}->{node};
210
211 $conf->{$sid}->{node} = $new_node;
212
213 $self->write_service_config($conf);
214 }
215
216 sub service_has_lock {
217 my ($self, $sid) = @_;
218
219 my $conf = $self->read_service_config();
220
221 die "no such service '$sid'\n" if !$conf->{$sid};
222
223 return $conf->{$sid}->{lock};
224 }
225
226 sub lock_service {
227 my ($self, $sid, $lock) = @_;
228
229 my $conf = $self->read_service_config();
230
231 die "no such service '$sid'\n" if !$conf->{$sid};
232
233 $conf->{$sid}->{lock} = $lock || 'backup';
234
235 $self->write_service_config($conf);
236
237 return $conf;
238 }
239
240 sub unlock_service {
241 my ($self, $sid, $lock) = @_;
242
243 my $conf = $self->read_service_config();
244
245 die "no such service '$sid'\n" if !$conf->{$sid};
246
247 if (!defined($conf->{$sid}->{lock})) {
248 return undef;
249 }
250
251 if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
252 warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
253 return undef;
254 }
255
256 my $removed_lock = delete $conf->{$sid}->{lock};
257
258 $self->write_service_config($conf);
259
260 return $removed_lock;
261 }
262
263 sub queue_crm_commands_nolock {
264 my ($self, $cmd) = @_;
265
266 chomp $cmd;
267
268 my $data = '';
269 my $filename = "$self->{statusdir}/crm_commands";
270 if (-f $filename) {
271 $data = PVE::Tools::file_get_contents($filename);
272 }
273 $data .= "$cmd\n";
274 PVE::Tools::file_set_contents($filename, $data);
275
276 return undef;
277 }
278
279 sub queue_crm_commands {
280 my ($self, $cmd) = @_;
281
282 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
283
284 $self->global_lock($code);
285
286 return undef;
287 }
288
289 sub read_crm_commands {
290 my ($self) = @_;
291
292 my $code = sub {
293 my $data = '';
294
295 my $filename = "$self->{statusdir}/crm_commands";
296 if (-f $filename) {
297 $data = PVE::Tools::file_get_contents($filename);
298 }
299 PVE::Tools::file_set_contents($filename, '');
300
301 return $data;
302 };
303
304 return $self->global_lock($code);
305 }
306
307 sub read_group_config {
308 my ($self) = @_;
309
310 my $filename = "$self->{statusdir}/groups";
311 my $raw = '';
312 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
313
314 return PVE::HA::Groups->parse_config($filename, $raw);
315 }
316
317 sub read_service_status {
318 my ($self, $node) = @_;
319
320 my $filename = "$self->{statusdir}/service_status_$node";
321 return PVE::HA::Tools::read_json_from_file($filename);
322 }
323
324 sub write_service_status {
325 my ($self, $node, $data) = @_;
326
327 my $filename = "$self->{statusdir}/service_status_$node";
328 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
329
330 # fixme: add test if a service runs on two nodes!!!
331
332 return $res;
333 }
334
335 sub read_static_service_stats {
336 my ($self) = @_;
337
338 my $filename = "$self->{statusdir}/static_service_stats";
339 my $stats = eval { PVE::HA::Tools::read_json_from_file($filename) };
340 $self->log('error', "loading static service stats failed - $@") if $@;
341
342 return $stats;
343 }
344
345 my $default_group_config = <<__EOD;
346 group: prefer_node1
347 nodes node1
348 nofailback 1
349
350 group: prefer_node2
351 nodes node2
352 nofailback 1
353
354 group: prefer_node3
355 nodes node3
356 nofailback 1
357 __EOD
358
359 sub new {
360 my ($this, $testdir) = @_;
361
362 die "missing testdir" if !$testdir;
363
364 die "testdir '$testdir' does not exist or is not a directory!\n"
365 if !-d $testdir;
366
367 my $class = ref($this) || $this;
368
369 my $self = bless {}, $class;
370
371 my $statusdir = $self->{statusdir} = "$testdir/status";
372
373 remove_tree($statusdir);
374 mkdir $statusdir;
375
376 # copy initial configuartion
377 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
378
379 if (-f "$testdir/groups") {
380 copy("$testdir/groups", "$statusdir/groups");
381 } else {
382 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
383 }
384
385 if (-f "$testdir/service_config") {
386 copy("$testdir/service_config", "$statusdir/service_config");
387 } else {
388 my $conf = {
389 'vm:101' => { node => 'node1', group => 'prefer_node1' },
390 'vm:102' => { node => 'node2', group => 'prefer_node2' },
391 'vm:103' => { node => 'node3', group => 'prefer_node3' },
392 'vm:104' => { node => 'node1', group => 'prefer_node1' },
393 'vm:105' => { node => 'node2', group => 'prefer_node2' },
394 'vm:106' => { node => 'node3', group => 'prefer_node3' },
395 };
396 $self->write_service_config($conf);
397 }
398
399 if (-f "$testdir/hardware_status") {
400 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
401 die "Copy failed: $!\n";
402 } else {
403 my $cstatus = {
404 node1 => { power => 'off', network => 'off' },
405 node2 => { power => 'off', network => 'off' },
406 node3 => { power => 'off', network => 'off' },
407 };
408 $self->write_hardware_status_nolock($cstatus);
409 }
410
411 if (-f "$testdir/fence.cfg") {
412 copy("$testdir/fence.cfg", "$statusdir/fence.cfg");
413 }
414
415 if (-f "$testdir/datacenter.cfg") {
416 copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg");
417 }
418
419 if (-f "$testdir/static_service_stats") {
420 copy("$testdir/static_service_stats", "$statusdir/static_service_stats");
421 }
422
423 my $cstatus = $self->read_hardware_status_nolock();
424
425 foreach my $node (sort keys %$cstatus) {
426 $self->{nodes}->{$node} = {};
427
428 if (-f "$testdir/service_status_$node") {
429 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
430 } else {
431 $self->write_service_status($node, {});
432 }
433 }
434
435 $self->{service_config} = $self->read_service_config();
436
437 return $self;
438 }
439
440 sub get_time {
441 my ($self) = @_;
442
443 die "implement in subclass";
444 }
445
446 sub log {
447 my ($self, $level, $msg, $id) = @_;
448
449 chomp $msg;
450
451 my $time = $self->get_time();
452
453 $id = 'hardware' if !$id;
454
455 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
456 }
457
458 sub statusdir {
459 my ($self, $node) = @_;
460
461 return $self->{statusdir};
462 }
463
464 sub read_datacenter_conf {
465 my ($self, $node) = @_;
466
467 my $filename = "$self->{statusdir}/datacenter.cfg";
468 return PVE::HA::Tools::read_json_from_file($filename, {});
469 }
470
471 sub global_lock {
472 my ($self, $code, @param) = @_;
473
474 my $lockfile = "$self->{statusdir}/hardware.lck";
475 my $fh = IO::File->new(">>$lockfile") ||
476 die "unable to open '$lockfile'\n";
477
478 my $success;
479 for (;;) {
480 $success = flock($fh, LOCK_EX);
481 if ($success || ($! != EINTR)) {
482 last;
483 }
484 if (!$success) {
485 close($fh);
486 die "can't acquire lock '$lockfile' - $!\n";
487 }
488 }
489
490 my $res;
491
492 eval { $res = &$code($fh, @param) };
493 my $err = $@;
494
495 close($fh);
496
497 die $err if $err;
498
499 return $res;
500 }
501
502 my $compute_node_info = sub {
503 my ($self, $cstatus) = @_;
504
505 my $node_info = {};
506
507 my $node_count = 0;
508 my $online_count = 0;
509
510 foreach my $node (keys %$cstatus) {
511 my $d = $cstatus->{$node};
512
513 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
514 $node_info->{$node}->{online} = $online;
515
516 $node_count++;
517 $online_count++ if $online;
518 }
519
520 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
521
522 if (!$quorate) {
523 foreach my $node (keys %$cstatus) {
524 my $d = $cstatus->{$node};
525 $node_info->{$node}->{online} = 0;
526 }
527 }
528
529 return ($node_info, $quorate);
530 };
531
532 sub get_node_info {
533 my ($self) = @_;
534
535 my $cstatus = $self->read_hardware_status_nolock();
536 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
537
538 return ($node_info, $quorate);
539 }
540
541 # helper for Sim/ only
542 sub get_cfs_state {
543 my ($self, $node, $state) = @_;
544
545 # TODO: ensure nolock is OK when adding this to RTSim
546 my $cstatus = $self->read_hardware_status_nolock();
547 my $res = $cstatus->{$node}->{cfs}->{$state};
548
549 # we assume default true if not defined
550 return !defined($res) || $res;
551 }
552
553 # simulate hardware commands, the following commands are available:
554 # power <node> <on|off>
555 # network <node> <on|off>
556 # delay <seconds>
557 # skip-round <crm|lrm> [<rounds=1>]
558 # cfs <node> <rw|update> <work|fail>
559 # reboot <node>
560 # shutdown <node>
561 # restart-lrm <node>
562 # service <sid> <started|disabled|stopped|ignored>
563 # service <sid> <migrate|relocate> <target>
564 # service <sid> stop <timeout>
565 # service <sid> lock/unlock [lockname]
566 # service <sid> add <node> [<request-state=started>]
567 # service <sid> delete
568 sub sim_hardware_cmd {
569 my ($self, $cmdstr, $logid) = @_;
570
571 my $code = sub {
572 my ($lock_fh) = @_;
573
574 my $cstatus = $self->read_hardware_status_nolock();
575
576 my ($cmd, $objid, $action, @params) = split(/\s+/, $cmdstr);
577 my $param = $params[0]; # for convenience/legacy
578
579 die "sim_hardware_cmd: no node or service for command specified"
580 if !$objid;
581
582 my ($node, $sid, $d);
583
584 if ($cmd eq 'service') {
585 $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid);
586 } else {
587 $node = $objid;
588 $d = $self->{nodes}->{$node} ||
589 die "sim_hardware_cmd: no such node '$node'\n";
590 }
591
592 $self->log('info', "execute $cmdstr", $logid);
593
594 if ($cmd eq 'power') {
595 die "sim_hardware_cmd: unknown action '$action'\n"
596 if $action !~ m/^(on|off)$/;
597
598 if ($cstatus->{$node}->{power} ne $action) {
599 if ($action eq 'on') {
600
601 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
602 $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
603 $d->{lrm_restart} = undef;
604 $cstatus->{$node}->{cfs} = {};
605
606 } else {
607
608 if ($d->{crm}) {
609 $d->{crm_env}->log('info', "killed by poweroff");
610 $self->crm_control('stop', $d, $lock_fh);
611 $d->{crm} = undef;
612 }
613 if ($d->{lrm}) {
614 $d->{lrm_env}->log('info', "killed by poweroff");
615 $self->lrm_control('stop', $d, $lock_fh);
616 $d->{lrm} = undef;
617 $d->{lrm_restart} = undef;
618 }
619
620 $self->watchdog_reset_nolock($node);
621 $self->write_service_status($node, {});
622 }
623 }
624
625 $cstatus->{$node}->{power} = $action;
626 $cstatus->{$node}->{network} = $action;
627 $cstatus->{$node}->{shutdown} = undef;
628
629 $self->write_hardware_status_nolock($cstatus);
630
631 } elsif ($cmd eq 'network') {
632 die "sim_hardware_cmd: unknown network action '$action'"
633 if $action !~ m/^(on|off)$/;
634 $cstatus->{$node}->{network} = $action;
635
636 $self->write_hardware_status_nolock($cstatus);
637
638 } elsif ($cmd eq 'cfs') {
639 die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
640 if $action !~ m/^(rw|update)$/;
641 die "sim_hardware_cmd: unknown cfs command '$param' for '$action' on node '$node'"
642 if $param !~ m/^(work|fail)$/;
643
644 $cstatus->{$node}->{cfs}->{$action} = $param eq 'work';
645 $self->write_hardware_status_nolock($cstatus);
646
647 } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
648 $cstatus->{$node}->{shutdown} = $cmd;
649
650 $self->write_hardware_status_nolock($cstatus);
651
652 $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm});
653 } elsif ($cmd eq 'restart-lrm') {
654 if ($d->{lrm}) {
655 $d->{lrm_restart} = 1;
656 $self->lrm_control('shutdown', $d, $lock_fh);
657 }
658 } elsif ($cmd eq 'crm') {
659
660 if ($action eq 'stop') {
661 if ($d->{crm}) {
662 $d->{crm_stop} = 1;
663 $self->crm_control('shutdown', $d, $lock_fh);
664 }
665 } elsif ($action eq 'start') {
666 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
667 } elsif ($action eq 'enable-node-maintenance' || $action eq 'disable-node-maintenance') {
668 $self->queue_crm_commands_nolock("$action $node");
669 } else {
670 die "sim_hardware_cmd: unknown action '$action'";
671 }
672
673 } elsif ($cmd eq 'service') {
674 if ($action eq 'started' || $action eq 'disabled' ||
675 $action eq 'stopped' || $action eq 'ignored') {
676
677 $self->set_service_state($sid, $action);
678
679 } elsif ($action eq 'migrate' || $action eq 'relocate') {
680
681 die "sim_hardware_cmd: missing target node for '$action' command"
682 if !$param;
683
684 $self->queue_crm_commands_nolock("$action $sid $param");
685
686 } elsif ($action eq 'stop') {
687
688 die "sim_hardware_cmd: missing timeout for '$action' command"
689 if !defined($param);
690
691 $self->queue_crm_commands_nolock("$action $sid $param");
692
693 } elsif ($action eq 'add') {
694
695 $self->add_service($sid, {state => $params[1] || 'started', node => $param});
696
697 } elsif ($action eq 'delete') {
698
699 $self->delete_service($sid);
700
701 } elsif ($action eq 'lock') {
702
703 $self->lock_service($sid, $param);
704
705 } elsif ($action eq 'unlock') {
706
707 $self->unlock_service($sid, $param);
708
709 } else {
710 die "sim_hardware_cmd: unknown service action '$action' " .
711 "- not implemented\n"
712 }
713 } else {
714 die "sim_hardware_cmd: unknown command '$cmdstr'\n";
715 }
716
717 return $cstatus;
718 };
719
720 return $self->global_lock($code);
721 }
722
723 # for controlling the resource manager services
724 sub crm_control {
725 my ($self, $action, $data, $lock_fh) = @_;
726
727 die "implement in subclass";
728 }
729
730 sub lrm_control {
731 my ($self, $action, $data, $lock_fh) = @_;
732
733 die "implement in subclass";
734 }
735
736 sub run {
737 my ($self) = @_;
738
739 die "implement in subclass";
740 }
741
742 my $modify_watchog = sub {
743 my ($self, $code) = @_;
744
745 my $update_cmd = sub {
746
747 my $filename = "$self->{statusdir}/watchdog_status";
748
749 my ($res, $wdstatus);
750
751 if (-f $filename) {
752 my $raw = PVE::Tools::file_get_contents($filename);
753 $wdstatus = decode_json($raw);
754 } else {
755 $wdstatus = {};
756 }
757
758 ($wdstatus, $res) = &$code($wdstatus);
759
760 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
761
762 return $res;
763 };
764
765 return $self->global_lock($update_cmd);
766 };
767
768 sub watchdog_reset_nolock {
769 my ($self, $node) = @_;
770
771 my $filename = "$self->{statusdir}/watchdog_status";
772
773 if (-f $filename) {
774 my $raw = PVE::Tools::file_get_contents($filename);
775 my $wdstatus = decode_json($raw);
776
777 foreach my $id (keys %$wdstatus) {
778 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
779 }
780
781 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
782 }
783 }
784
785 sub watchdog_check {
786 my ($self, $node) = @_;
787
788 my $code = sub {
789 my ($wdstatus) = @_;
790
791 my $res = 1;
792
793 foreach my $wfh (keys %$wdstatus) {
794 my $wd = $wdstatus->{$wfh};
795 next if $wd->{node} ne $node;
796
797 my $ctime = $self->get_time();
798 my $tdiff = $ctime - $wd->{update_time};
799
800 if ($tdiff > $watchdog_timeout) { # expired
801 $res = 0;
802 delete $wdstatus->{$wfh};
803 }
804 }
805
806 return ($wdstatus, $res);
807 };
808
809 return &$modify_watchog($self, $code);
810 }
811
812 my $wdcounter = 0;
813
814 sub watchdog_open {
815 my ($self, $node) = @_;
816
817 my $code = sub {
818 my ($wdstatus) = @_;
819
820 ++$wdcounter;
821
822 my $id = "WD:$node:$$:$wdcounter";
823
824 die "internal error" if defined($wdstatus->{$id});
825
826 $wdstatus->{$id} = {
827 node => $node,
828 update_time => $self->get_time(),
829 };
830
831 return ($wdstatus, $id);
832 };
833
834 return &$modify_watchog($self, $code);
835 }
836
837 sub watchdog_close {
838 my ($self, $wfh) = @_;
839
840 my $code = sub {
841 my ($wdstatus) = @_;
842
843 my $wd = $wdstatus->{$wfh};
844 die "no such watchdog handle '$wfh'\n" if !defined($wd);
845
846 my $tdiff = $self->get_time() - $wd->{update_time};
847 die "watchdog expired" if $tdiff > $watchdog_timeout;
848
849 delete $wdstatus->{$wfh};
850
851 return ($wdstatus);
852 };
853
854 return &$modify_watchog($self, $code);
855 }
856
857 sub watchdog_update {
858 my ($self, $wfh) = @_;
859
860 my $code = sub {
861 my ($wdstatus) = @_;
862
863 my $wd = $wdstatus->{$wfh};
864
865 die "no such watchdog handle '$wfh'\n" if !defined($wd);
866
867 my $ctime = $self->get_time();
868 my $tdiff = $ctime - $wd->{update_time};
869
870 die "watchdog expired" if $tdiff > $watchdog_timeout;
871
872 $wd->{update_time} = $ctime;
873
874 return ($wdstatus);
875 };
876
877 return &$modify_watchog($self, $code);
878 }
879
880 sub get_static_node_stats {
881 my ($self) = @_;
882
883 my $cstatus = $self->read_hardware_status_nolock();
884
885 my $stats = {};
886 for my $node (keys $cstatus->%*) {
887 $stats->{$node} = { $cstatus->{$node}->%{qw(cpus memory)} };
888 }
889
890 return $stats;
891 }
892
893 1;