1 package PVE
::HA
::Sim
::Hardware
;
3 # Simulate Hardware resources
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
11 use POSIX
qw(strftime EINTR);
15 use Fcntl
qw(:DEFAULT :flock);
17 use File
::Path
qw(make_path remove_tree);
20 # virtual resource classes
21 use PVE
::HA
::Sim
::Resources
::VirtVM
;
22 use PVE
::HA
::Sim
::Resources
::VirtCT
;
24 PVE
::HA
::Sim
::Resources
::VirtVM-
>register();
25 PVE
::HA
::Sim
::Resources
::VirtCT-
>register();
27 PVE
::HA
::Sim
::Resources-
>init();
29 my $watchdog_timeout = 60;
32 # Status directory layout
36 # $testdir/cmdlist Command list for simulation
37 # $testdir/hardware_status Hardware description (number of nodes, ...)
38 # $testdir/manager_status CRM status (start with {})
39 # $testdir/service_config Service configuration
40 # $testdir/groups HA groups configuration
41 # $testdir/service_status_<node> Service status
44 # runtime status for simulation system
46 # $testdir/status/cluster_locks Cluster locks
47 # $testdir/status/hardware_status Hardware status (power/network on/off)
48 # $testdir/status/watchdog_status Watchdog status
52 # $testdir/status/lrm_status_<node> LRM status
53 # $testdir/status/manager_status CRM status
54 # $testdir/status/crm_commands CRM command queue
55 # $testdir/status/service_config Service configuration
56 # $testdir/status/service_status_<node> Service status
57 # $testdir/status/groups HA groups configuration
60 my ($self, $node) = @_;
62 my $filename = "$self->{statusdir}/lrm_status_$node";
64 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
67 sub write_lrm_status
{
68 my ($self, $node, $status_obj) = @_;
70 my $filename = "$self->{statusdir}/lrm_status_$node";
72 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
75 sub read_hardware_status_nolock
{
78 my $filename = "$self->{statusdir}/hardware_status";
80 my $raw = PVE
::Tools
::file_get_contents
($filename);
81 my $cstatus = decode_json
($raw);
86 sub write_hardware_status_nolock
{
87 my ($self, $cstatus) = @_;
89 my $filename = "$self->{statusdir}/hardware_status";
91 PVE
::Tools
::file_set_contents
($filename, encode_json
($cstatus));
94 sub read_service_config
{
97 my $filename = "$self->{statusdir}/service_config";
98 my $conf = PVE
::HA
::Tools
::read_json_from_file
($filename);
100 foreach my $sid (keys %$conf) {
101 my $d = $conf->{$sid};
103 die "service '$sid' without assigned node!" if !$d->{node
};
105 if ($sid =~ m/^(vm|ct):(\d+)$/) {
111 $d->{state} = 'disabled' if !$d->{state};
117 sub write_service_config
{
118 my ($self, $conf) = @_;
120 $self->{service_config
} = $conf;
122 my $filename = "$self->{statusdir}/service_config";
123 return PVE
::HA
::Tools
::write_json_to_file
($filename, $conf);
126 sub set_service_state
{
127 my ($self, $sid, $state) = @_;
129 my $conf = $self->read_service_config();
130 die "no such service '$sid'" if !$conf->{$sid};
132 $conf->{$sid}->{state} = $state;
134 $self->write_service_config($conf);
140 my ($self, $sid, $opts) = @_;
142 my $conf = $self->read_service_config();
143 die "resource ID '$sid' already defined\n" if $conf->{$sid};
145 $conf->{$sid} = $opts;
147 $self->write_service_config($conf);
153 my ($self, $sid) = @_;
155 my $conf = $self->read_service_config();
157 die "no such service '$sid'" if !$conf->{$sid};
159 delete $conf->{$sid};
161 $self->write_service_config($conf);
166 sub change_service_location
{
167 my ($self, $sid, $current_node, $new_node) = @_;
169 my $conf = $self->read_service_config();
171 die "no such service '$sid'\n" if !$conf->{$sid};
173 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
174 if $current_node ne $conf->{$sid}->{node
};
176 $conf->{$sid}->{node
} = $new_node;
178 $self->write_service_config($conf);
181 sub queue_crm_commands_nolock
{
182 my ($self, $cmd) = @_;
187 my $filename = "$self->{statusdir}/crm_commands";
189 $data = PVE
::Tools
::file_get_contents
($filename);
192 PVE
::Tools
::file_set_contents
($filename, $data);
197 sub queue_crm_commands
{
198 my ($self, $cmd) = @_;
200 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
202 $self->global_lock($code);
207 sub read_crm_commands
{
213 my $filename = "$self->{statusdir}/crm_commands";
215 $data = PVE
::Tools
::file_get_contents
($filename);
217 PVE
::Tools
::file_set_contents
($filename, '');
222 return $self->global_lock($code);
225 sub read_group_config
{
228 my $filename = "$self->{statusdir}/groups";
230 $raw = PVE
::Tools
::file_get_contents
($filename) if -f
$filename;
232 return PVE
::HA
::Config
::parse_groups_config
($filename, $raw);
235 sub read_service_status
{
236 my ($self, $node) = @_;
238 my $filename = "$self->{statusdir}/service_status_$node";
239 return PVE
::HA
::Tools
::read_json_from_file
($filename);
242 sub write_service_status
{
243 my ($self, $node, $data) = @_;
245 my $filename = "$self->{statusdir}/service_status_$node";
246 my $res = PVE
::HA
::Tools
::write_json_to_file
($filename, $data);
248 # fixme: add test if a service runs on two nodes!!!
253 my $default_group_config = <<__EOD;
268 my ($this, $testdir) = @_;
270 die "missing testdir" if !$testdir;
272 my $class = ref($this) || $this;
274 my $self = bless {}, $class;
276 my $statusdir = $self->{statusdir
} = "$testdir/status";
278 remove_tree
($statusdir);
281 # copy initial configuartion
282 copy
("$testdir/manager_status", "$statusdir/manager_status"); # optional
284 if (-f
"$testdir/groups") {
285 copy
("$testdir/groups", "$statusdir/groups");
287 PVE
::Tools
::file_set_contents
("$statusdir/groups", $default_group_config);
290 if (-f
"$testdir/service_config") {
291 copy
("$testdir/service_config", "$statusdir/service_config");
294 'vm:101' => { node
=> 'node1', group
=> 'prefer_node1' },
295 'vm:102' => { node
=> 'node2', group
=> 'prefer_node2' },
296 'vm:103' => { node
=> 'node3', group
=> 'prefer_node3' },
297 'vm:104' => { node
=> 'node1', group
=> 'prefer_node1' },
298 'vm:105' => { node
=> 'node2', group
=> 'prefer_node2' },
299 'vm:106' => { node
=> 'node3', group
=> 'prefer_node3' },
301 $self->write_service_config($conf);
304 if (-f
"$testdir/hardware_status") {
305 copy
("$testdir/hardware_status", "$statusdir/hardware_status") ||
306 die "Copy failed: $!\n";
309 node1
=> { power
=> 'off', network
=> 'off' },
310 node2
=> { power
=> 'off', network
=> 'off' },
311 node3
=> { power
=> 'off', network
=> 'off' },
313 $self->write_hardware_status_nolock($cstatus);
317 my $cstatus = $self->read_hardware_status_nolock();
319 foreach my $node (sort keys %$cstatus) {
320 $self->{nodes
}->{$node} = {};
322 if (-f
"$testdir/service_status_$node") {
323 copy
("$testdir/service_status_$node", "$statusdir/service_status_$node");
325 $self->write_service_status($node, {});
329 $self->{service_config
} = $self->read_service_config();
337 die "implement in subclass";
341 my ($self, $level, $msg, $id) = @_;
345 my $time = $self->get_time();
347 $id = 'hardware' if !$id;
349 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
353 my ($self, $node) = @_;
355 return $self->{statusdir
};
359 my ($self, $code, @param) = @_;
361 my $lockfile = "$self->{statusdir}/hardware.lck";
362 my $fh = IO
::File-
>new(">>$lockfile") ||
363 die "unable to open '$lockfile'\n";
367 $success = flock($fh, LOCK_EX
);
368 if ($success || ($! != EINTR
)) {
373 die "can't acquire lock '$lockfile' - $!\n";
379 eval { $res = &$code($fh, @param) };
389 my $compute_node_info = sub {
390 my ($self, $cstatus) = @_;
395 my $online_count = 0;
397 foreach my $node (keys %$cstatus) {
398 my $d = $cstatus->{$node};
400 my $online = ($d->{power
} eq 'on' && $d->{network
} eq 'on') ?
1 : 0;
401 $node_info->{$node}->{online
} = $online;
404 $online_count++ if $online;
407 my $quorate = ($online_count > int($node_count/2)) ?
1 : 0;
410 foreach my $node (keys %$cstatus) {
411 my $d = $cstatus->{$node};
412 $node_info->{$node}->{online
} = 0;
416 return ($node_info, $quorate);
422 my $cstatus = $self->read_hardware_status_nolock();
423 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
425 return ($node_info, $quorate);
428 # simulate hardware commands
429 # power <node> <on|off>
430 # network <node> <on|off>
432 sub sim_hardware_cmd
{
433 my ($self, $cmdstr, $logid) = @_;
435 die "implement in subclass";
441 die "implement in subclass";
444 my $modify_watchog = sub {
445 my ($self, $code) = @_;
447 my $update_cmd = sub {
449 my $filename = "$self->{statusdir}/watchdog_status";
451 my ($res, $wdstatus);
454 my $raw = PVE
::Tools
::file_get_contents
($filename);
455 $wdstatus = decode_json
($raw);
460 ($wdstatus, $res) = &$code($wdstatus);
462 PVE
::Tools
::file_set_contents
($filename, encode_json
($wdstatus));
467 return $self->global_lock($update_cmd);
470 sub watchdog_reset_nolock
{
471 my ($self, $node) = @_;
473 my $filename = "$self->{statusdir}/watchdog_status";
476 my $raw = PVE
::Tools
::file_get_contents
($filename);
477 my $wdstatus = decode_json
($raw);
479 foreach my $id (keys %$wdstatus) {
480 delete $wdstatus->{$id} if $wdstatus->{$id}->{node
} eq $node;
483 PVE
::Tools
::file_set_contents
($filename, encode_json
($wdstatus));
488 my ($self, $node) = @_;
495 foreach my $wfh (keys %$wdstatus) {
496 my $wd = $wdstatus->{$wfh};
497 next if $wd->{node
} ne $node;
499 my $ctime = $self->get_time();
500 my $tdiff = $ctime - $wd->{update_time
};
502 if ($tdiff > $watchdog_timeout) { # expired
504 delete $wdstatus->{$wfh};
508 return ($wdstatus, $res);
511 return &$modify_watchog($self, $code);
517 my ($self, $node) = @_;
524 my $id = "WD:$node:$$:$wdcounter";
526 die "internal error" if defined($wdstatus->{$id});
530 update_time
=> $self->get_time(),
533 return ($wdstatus, $id);
536 return &$modify_watchog($self, $code);
540 my ($self, $wfh) = @_;
545 my $wd = $wdstatus->{$wfh};
546 die "no such watchdog handle '$wfh'\n" if !defined($wd);
548 my $tdiff = $self->get_time() - $wd->{update_time
};
549 die "watchdog expired" if $tdiff > $watchdog_timeout;
551 delete $wdstatus->{$wfh};
556 return &$modify_watchog($self, $code);
559 sub watchdog_update
{
560 my ($self, $wfh) = @_;
565 my $wd = $wdstatus->{$wfh};
567 die "no such watchdog handle '$wfh'\n" if !defined($wd);
569 my $ctime = $self->get_time();
570 my $tdiff = $ctime - $wd->{update_time
};
572 die "watchdog expired" if $tdiff > $watchdog_timeout;
574 $wd->{update_time
} = $ctime;
579 return &$modify_watchog($self, $code);