1 package PVE
::HA
::Sim
::Hardware
;
3 # Simulate Hardware resources
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
11 use POSIX
qw(strftime EINTR);
15 use Fcntl
qw(:DEFAULT :flock);
17 use File
::Path
qw(make_path remove_tree);
19 use PVE
::HA
::FenceConfig
;
21 my $watchdog_timeout = 60;
24 # Status directory layout
28 # $testdir/cmdlist Command list for simulation
29 # $testdir/hardware_status Hardware description (number of nodes, ...)
30 # $testdir/manager_status CRM status (start with {})
31 # $testdir/service_config Service configuration
32 # $testdir/groups HA groups configuration
33 # $testdir/service_status_<node> Service status
36 # runtime status for simulation system
38 # $testdir/status/cluster_locks Cluster locks
39 # $testdir/status/hardware_status Hardware status (power/network on/off)
40 # $testdir/status/watchdog_status Watchdog status
44 # $testdir/status/lrm_status_<node> LRM status
45 # $testdir/status/manager_status CRM status
46 # $testdir/status/crm_commands CRM command queue
47 # $testdir/status/service_config Service configuration
48 # $testdir/status/service_status_<node> Service status
49 # $testdir/status/groups HA groups configuration
52 my ($self, $node) = @_;
54 my $filename = "$self->{statusdir}/lrm_status_$node";
56 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
59 sub write_lrm_status
{
60 my ($self, $node, $status_obj) = @_;
62 my $filename = "$self->{statusdir}/lrm_status_$node";
64 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
67 sub read_hardware_status_nolock
{
70 my $filename = "$self->{statusdir}/hardware_status";
72 my $raw = PVE
::Tools
::file_get_contents
($filename);
73 my $cstatus = decode_json
($raw);
78 sub write_hardware_status_nolock
{
79 my ($self, $cstatus) = @_;
81 my $filename = "$self->{statusdir}/hardware_status";
83 PVE
::Tools
::file_set_contents
($filename, encode_json
($cstatus));
86 sub read_service_config
{
89 my $filename = "$self->{statusdir}/service_config";
90 my $conf = PVE
::HA
::Tools
::read_json_from_file
($filename);
92 foreach my $sid (keys %$conf) {
93 my $d = $conf->{$sid};
95 die "service '$sid' without assigned node!" if !$d->{node
};
97 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
103 $d->{state} = 'disabled' if !$d->{state};
104 $d->{max_restart
} = 1 if !defined($d->{max_restart
});
105 $d->{max_relocate
} = 1 if !defined($d->{max_relocate
});
111 sub write_service_config
{
112 my ($self, $conf) = @_;
114 $self->{service_config
} = $conf;
116 my $filename = "$self->{statusdir}/service_config";
117 return PVE
::HA
::Tools
::write_json_to_file
($filename, $conf);
120 sub read_fence_config
{
125 my $filename = "$self->{statusdir}/fence.cfg";
127 $raw = PVE
::Tools
::file_get_contents
($filename);
130 return PVE
::HA
::FenceConfig
::parse_config
($filename, $raw);
133 sub exec_fence_agent
{
134 my ($self, $agent, $node, @param) = @_;
136 # let all agent succeed and behave the same for now
137 $self->sim_hardware_cmd("power $node off", $agent);
139 return 0; # EXIT_SUCCESS
142 sub set_service_state
{
143 my ($self, $sid, $state) = @_;
145 my $conf = $self->read_service_config();
146 die "no such service '$sid'" if !$conf->{$sid};
148 $conf->{$sid}->{state} = $state;
150 $self->write_service_config($conf);
156 my ($self, $sid, $opts) = @_;
158 my $conf = $self->read_service_config();
159 die "resource ID '$sid' already defined\n" if $conf->{$sid};
161 $conf->{$sid} = $opts;
163 $self->write_service_config($conf);
169 my ($self, $sid) = @_;
171 my $conf = $self->read_service_config();
173 die "no such service '$sid'" if !$conf->{$sid};
175 delete $conf->{$sid};
177 $self->write_service_config($conf);
182 sub change_service_location
{
183 my ($self, $sid, $current_node, $new_node) = @_;
185 my $conf = $self->read_service_config();
187 die "no such service '$sid'\n" if !$conf->{$sid};
189 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
190 if $current_node ne $conf->{$sid}->{node
};
192 $conf->{$sid}->{node
} = $new_node;
194 $self->write_service_config($conf);
197 sub queue_crm_commands_nolock
{
198 my ($self, $cmd) = @_;
203 my $filename = "$self->{statusdir}/crm_commands";
205 $data = PVE
::Tools
::file_get_contents
($filename);
208 PVE
::Tools
::file_set_contents
($filename, $data);
213 sub queue_crm_commands
{
214 my ($self, $cmd) = @_;
216 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
218 $self->global_lock($code);
223 sub read_crm_commands
{
229 my $filename = "$self->{statusdir}/crm_commands";
231 $data = PVE
::Tools
::file_get_contents
($filename);
233 PVE
::Tools
::file_set_contents
($filename, '');
238 return $self->global_lock($code);
241 sub read_group_config
{
244 my $filename = "$self->{statusdir}/groups";
246 $raw = PVE
::Tools
::file_get_contents
($filename) if -f
$filename;
248 return PVE
::HA
::Config
::parse_groups_config
($filename, $raw);
251 sub read_service_status
{
252 my ($self, $node) = @_;
254 my $filename = "$self->{statusdir}/service_status_$node";
255 return PVE
::HA
::Tools
::read_json_from_file
($filename);
258 sub write_service_status
{
259 my ($self, $node, $data) = @_;
261 my $filename = "$self->{statusdir}/service_status_$node";
262 my $res = PVE
::HA
::Tools
::write_json_to_file
($filename, $data);
264 # fixme: add test if a service runs on two nodes!!!
269 my $default_group_config = <<__EOD;
284 my ($this, $testdir) = @_;
286 die "missing testdir" if !$testdir;
288 my $class = ref($this) || $this;
290 my $self = bless {}, $class;
292 my $statusdir = $self->{statusdir
} = "$testdir/status";
294 remove_tree
($statusdir);
297 # copy initial configuartion
298 copy
("$testdir/manager_status", "$statusdir/manager_status"); # optional
300 if (-f
"$testdir/groups") {
301 copy
("$testdir/groups", "$statusdir/groups");
303 PVE
::Tools
::file_set_contents
("$statusdir/groups", $default_group_config);
306 if (-f
"$testdir/service_config") {
307 copy
("$testdir/service_config", "$statusdir/service_config");
310 'vm:101' => { node
=> 'node1', group
=> 'prefer_node1' },
311 'vm:102' => { node
=> 'node2', group
=> 'prefer_node2' },
312 'vm:103' => { node
=> 'node3', group
=> 'prefer_node3' },
313 'vm:104' => { node
=> 'node1', group
=> 'prefer_node1' },
314 'vm:105' => { node
=> 'node2', group
=> 'prefer_node2' },
315 'vm:106' => { node
=> 'node3', group
=> 'prefer_node3' },
317 $self->write_service_config($conf);
320 if (-f
"$testdir/hardware_status") {
321 copy
("$testdir/hardware_status", "$statusdir/hardware_status") ||
322 die "Copy failed: $!\n";
325 node1
=> { power
=> 'off', network
=> 'off' },
326 node2
=> { power
=> 'off', network
=> 'off' },
327 node3
=> { power
=> 'off', network
=> 'off' },
329 $self->write_hardware_status_nolock($cstatus);
332 if (-f
"$testdir/fence.cfg") {
333 copy
("$testdir/fence.cfg", "$statusdir/fence.cfg");
336 my $cstatus = $self->read_hardware_status_nolock();
338 foreach my $node (sort keys %$cstatus) {
339 $self->{nodes
}->{$node} = {};
341 if (-f
"$testdir/service_status_$node") {
342 copy
("$testdir/service_status_$node", "$statusdir/service_status_$node");
344 $self->write_service_status($node, {});
348 $self->{service_config
} = $self->read_service_config();
356 die "implement in subclass";
360 my ($self, $level, $msg, $id) = @_;
364 my $time = $self->get_time();
366 $id = 'hardware' if !$id;
368 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
372 my ($self, $node) = @_;
374 return $self->{statusdir
};
378 my ($self, $code, @param) = @_;
380 my $lockfile = "$self->{statusdir}/hardware.lck";
381 my $fh = IO
::File-
>new(">>$lockfile") ||
382 die "unable to open '$lockfile'\n";
386 $success = flock($fh, LOCK_EX
);
387 if ($success || ($! != EINTR
)) {
392 die "can't acquire lock '$lockfile' - $!\n";
398 eval { $res = &$code($fh, @param) };
408 my $compute_node_info = sub {
409 my ($self, $cstatus) = @_;
414 my $online_count = 0;
416 foreach my $node (keys %$cstatus) {
417 my $d = $cstatus->{$node};
419 my $online = ($d->{power
} eq 'on' && $d->{network
} eq 'on') ?
1 : 0;
420 $node_info->{$node}->{online
} = $online;
423 $online_count++ if $online;
426 my $quorate = ($online_count > int($node_count/2)) ?
1 : 0;
429 foreach my $node (keys %$cstatus) {
430 my $d = $cstatus->{$node};
431 $node_info->{$node}->{online
} = 0;
435 return ($node_info, $quorate);
441 my $cstatus = $self->read_hardware_status_nolock();
442 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
444 return ($node_info, $quorate);
447 # simulate hardware commands
448 # power <node> <on|off>
449 # network <node> <on|off>
451 sub sim_hardware_cmd
{
452 my ($self, $cmdstr, $logid) = @_;
454 die "implement in subclass";
460 die "implement in subclass";
463 my $modify_watchog = sub {
464 my ($self, $code) = @_;
466 my $update_cmd = sub {
468 my $filename = "$self->{statusdir}/watchdog_status";
470 my ($res, $wdstatus);
473 my $raw = PVE
::Tools
::file_get_contents
($filename);
474 $wdstatus = decode_json
($raw);
479 ($wdstatus, $res) = &$code($wdstatus);
481 PVE
::Tools
::file_set_contents
($filename, encode_json
($wdstatus));
486 return $self->global_lock($update_cmd);
489 sub watchdog_reset_nolock
{
490 my ($self, $node) = @_;
492 my $filename = "$self->{statusdir}/watchdog_status";
495 my $raw = PVE
::Tools
::file_get_contents
($filename);
496 my $wdstatus = decode_json
($raw);
498 foreach my $id (keys %$wdstatus) {
499 delete $wdstatus->{$id} if $wdstatus->{$id}->{node
} eq $node;
502 PVE
::Tools
::file_set_contents
($filename, encode_json
($wdstatus));
507 my ($self, $node) = @_;
514 foreach my $wfh (keys %$wdstatus) {
515 my $wd = $wdstatus->{$wfh};
516 next if $wd->{node
} ne $node;
518 my $ctime = $self->get_time();
519 my $tdiff = $ctime - $wd->{update_time
};
521 if ($tdiff > $watchdog_timeout) { # expired
523 delete $wdstatus->{$wfh};
527 return ($wdstatus, $res);
530 return &$modify_watchog($self, $code);
536 my ($self, $node) = @_;
543 my $id = "WD:$node:$$:$wdcounter";
545 die "internal error" if defined($wdstatus->{$id});
549 update_time
=> $self->get_time(),
552 return ($wdstatus, $id);
555 return &$modify_watchog($self, $code);
559 my ($self, $wfh) = @_;
564 my $wd = $wdstatus->{$wfh};
565 die "no such watchdog handle '$wfh'\n" if !defined($wd);
567 my $tdiff = $self->get_time() - $wd->{update_time
};
568 die "watchdog expired" if $tdiff > $watchdog_timeout;
570 delete $wdstatus->{$wfh};
575 return &$modify_watchog($self, $code);
578 sub watchdog_update
{
579 my ($self, $wfh) = @_;
584 my $wd = $wdstatus->{$wfh};
586 die "no such watchdog handle '$wfh'\n" if !defined($wd);
588 my $ctime = $self->get_time();
589 my $tdiff = $ctime - $wd->{update_time
};
591 die "watchdog expired" if $tdiff > $watchdog_timeout;
593 $wd->{update_time
} = $ctime;
598 return &$modify_watchog($self, $code);