my $valid_states = {
wait_for_agent_lock => "waiting for agnet lock",
- locked => "got agent_lock",
+ active => "got agent_lock",
lost_agent_lock => "lost agent_lock",
};
# important: only update if if really changed
return if $old->{state} eq $new->{state};
- $haenv->log('info', "LRM status change $old->{state} => $new->{state}");
+ $haenv->log('info', "status change $old->{state} => $new->{state}");
$new->{state_change_time} = $haenv->get_time();
my $service_count = 1; # todo: correctly compute
if ($service_count && $haenv->quorate()) {
- if ($self->get_protectedt_ha_agent_lock()) {
- $self->set_local_status({ state => 'locked' });
+ if ($self->get_protected_ha_agent_lock()) {
+ $self->set_local_status({ state => 'active' });
}
}
} elsif ($state eq 'lost_agent_lock') {
if ($haenv->quorate()) {
- if ($self->get_protectedt_ha_agent_lock()) {
- $self->set_local_status({ state => 'locked' });
+ if ($self->get_protected_ha_agent_lock()) {
+ $self->set_local_status({ state => 'active' });
}
}
- } elsif ($state eq 'locked') {
+ } elsif ($state eq 'active') {
- if (!$self->get_protectedt_ha_agent_lock()) {
+ if (!$self->get_protected_ha_agent_lock()) {
$self->set_local_status({ state => 'lost_agent_lock'});
}
}
$haenv->sleep(5);
- } elsif ($state eq 'locked') {
+ } elsif ($state eq 'active') {
my $startime = $haenv->get_time();
use PVE::HA::Env;
sub new {
- my ($this, $nodename, $hardware) = @_;
+ my ($this, $nodename, $hardware, $log_id, $lock_timeout) = @_;
die "missing nodename" if !$nodename;
-
+ die "missing log_id" if !$log_id;
+ die "missing lock_timeout" if !$lock_timeout;
+
my $class = ref($this) || $this;
my $self = bless {}, $class;
$self->{cur_time} = 0;
$self->{loop_delay} = 0;
+ $self->{lock_timeout} = $lock_timeout;
+
+ $self->{log_id} = $log_id;
+
return $self;
}
if (my $d = $data->{$lock_name}) {
my $tdiff = $ctime - $d->{time};
- if ($tdiff > 120) {
+ if ($tdiff > $self->{lock_timeout}) {
$res = 1;
- } elsif (($tdiff <= 120) && ($d->{node} eq $nodename)) {
+ } elsif (($tdiff <= $self->{lock_timeout}) && ($d->{node} eq $nodename)) {
delete $data->{$lock_name};
$res = 1;
} else {
my $tdiff = $ctime - $d->{time};
- if ($tdiff <= 120) {
+ if ($tdiff <= $self->{lock_timeout}) {
if ($d->{node} eq $nodename) {
$d->{time} = $ctime;
$res = 1;
my $time = $self->get_time();
- printf("%-5s %5d %10s: $msg\n", $level, $time, $self->{nodename});
+ printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}");
}
sub get_time {
use File::Copy;
use File::Path qw(make_path remove_tree);
+my $watchdog_timeout = 180;
+my $lock_timeout = 200;
+
my $max_sim_time = 10000;
use PVE::HA::Sim::Env;
use PVE::HA::CRM;
+use PVE::HA::LRM;
# Status directory layout
#
foreach my $node (sort keys %$cstatus) {
- my $haenv = PVE::HA::Env->new('PVE::HA::Sim::Env', $node, $self);
+ $self->{nodes}->{$node} = {};
- die "HA is not enabled\n" if !$haenv->manager_status_exists();
+ $self->{nodes}->{$node}->{crm_env} =
+ PVE::HA::Env->new('PVE::HA::Sim::Env', $node, $self, 'crm', $lock_timeout);
- $haenv->log('info', "starting server");
+ $self->{nodes}->{$node}->{lrm_env} =
+ PVE::HA::Env->new('PVE::HA::Sim::Env', $node, $self, 'lrm', $lock_timeout);
- $self->{nodes}->{$node}->{haenv} = $haenv;
$self->{nodes}->{$node}->{crm} = undef; # create on power on
+ $self->{nodes}->{$node}->{lrm} = undef; # create on power on
}
return $self;
$id = 'hardware' if !$id;
- printf("%-5s %5d %10s: $msg\n", $level, $time, $id);
+ printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
}
sub statusdir {
die "sim_hardware_cmd: no node specified" if !$node;
die "sim_hardware_cmd: unknown action '$action'" if $action !~ m/^(on|off)$/;
- my $haenv = $self->{nodes}->{$node}->{haenv};
- die "sim_hardware_cmd: no such node '$node'\n" if !$haenv;
+ my $d = $self->{nodes}->{$node};
+ die "sim_hardware_cmd: no such node '$node'\n" if !$d;
+
+ $self->log('info', "execute $cmdstr", $logid);
if ($cmd eq 'power') {
if ($cstatus->{$node}->{power} ne $action) {
- if ($action eq 'on') {
- my $crm = $self->{nodes}->{$node}->{crm} =
- PVE::HA::CRM->new($haenv);
- } elsif ($self->{nodes}->{$node}->{crm}) {
- $haenv->log('info', "server killed by poweroff", $logid);
- $self->{nodes}->{$node}->{crm} = undef;
+ if ($action eq 'on') {
+ $d->{crm} = PVE::HA::CRM->new($d->{crm_env}) if !$d->{crm};
+ $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}) if !$d->{lrm};
+ } else {
+ if ($d->{crm}) {
+ $d->{crm_env}->log('info', "killed by poweroff");
+ $d->{crm} = undef;
+ }
+ if ($d->{lrm}) {
+ $d->{lrm_env}->log('info', "killed by poweroff");
+ $d->{lrm} = undef;
+ }
}
}
die "sim_hardware_cmd: unknown command '$cmd'\n";
}
- $self->log('info', "execute $cmdstr", $logid);
-
$self->write_hardware_status_nolock($cstatus);
};
my @nodes = sort keys %{$self->{nodes}};
foreach my $node (@nodes) {
- my $haenv = $self->{nodes}->{$node}->{haenv};
- my $crm = $self->{nodes}->{$node}->{crm};
- next if !$crm;
+ my $d = $self->{nodes}->{$node};
+
+ if (my $crm = $d->{crm}) {
- $haenv->loop_start_hook($self->get_time());
+ $d->{crm_env}->loop_start_hook($self->get_time());
- die "implement me" if !$crm->do_one_iteration();
+ die "implement me (CRM exit)" if !$crm->do_one_iteration();
- $haenv->loop_end_hook();
+ $d->{crm_env}->loop_end_hook();
+
+ my $nodetime = $d->{crm_env}->get_time();
+ $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
+ }
+
+ if (my $lrm = $d->{lrm}) {
+
+ $d->{lrm_env}->loop_start_hook($self->get_time());
+
+ die "implement me (LRM exit)" if !$lrm->do_one_iteration();
+
+ $d->{lrm_env}->loop_end_hook();
+
+ my $nodetime = $d->{lrm_env}->get_time();
+ $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
+ }
- my $nodetime = $haenv->get_time();
- $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
-
foreach my $n (@nodes) {
if (!$self->watchdog_check($n)) {
- $self->sim_hardware_cmd("power $n off", 'fencedev');
- $self->log('info', "server '$n' killed by poweroff (fencing)");
+ $self->sim_hardware_cmd("power $n off", 'watchdog');
+ $self->log('info', "server '$n' stopped by poweroff (watchdog)");
$self->{nodes}->{$n}->{crm} = undef;
+ $self->{nodes}->{$n}->{lrm} = undef;
}
}
}
my $ctime = $self->get_time();
my $tdiff = $ctime - $wd->{update_time};
- if ($tdiff > 60) { # expired
+ if ($tdiff > $watchdog_timeout) { # expired
$res = 0;
delete $wdstatus->{$wfh};
}
die "no such watchdog handle '$wfh'\n" if !defined($wd);
my $tdiff = $self->get_time() - $wd->{update_time};
- die "watchdog expired" if $tdiff > 60;
+ die "watchdog expired" if $tdiff > $watchdog_timeout;
delete $wdstatus->{$wfh};
my $ctime = $self->get_time();
my $tdiff = $ctime - $wd->{update_time};
- die "watchdog expired" if $tdiff > 60;
+ die "watchdog expired" if $tdiff > $watchdog_timeout;
$wd->{update_time} = $ctime;