]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/TestHardware.pm
6b2968f7b172d5f78e27bfe090f4f271635ca739
1 package PVE
::HA
::Sim
::TestHardware
;
3 # Simulate Hardware resources
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
11 use POSIX
qw(strftime EINTR);
14 use Fcntl
qw(:DEFAULT :flock);
16 use File
::Path
qw(make_path remove_tree);
21 use PVE
::HA
::Sim
::TestEnv
;
22 use base
qw(PVE::HA::Sim::Hardware);
24 my $max_sim_time = 10000;
27 my ($this, $testdir) = @_;
29 my $class = ref($this) || $this;
31 my $self = $class->SUPER::new
($testdir);
33 my $raw = PVE
::Tools
::file_get_contents
("$testdir/cmdlist");
34 $self->{cmdlist
} = decode_json
($raw);
36 $self->{loop_count
} = 0;
37 $self->{cur_time
} = 0;
39 my $statusdir = $self->statusdir();
40 my $logfile = "$statusdir/log";
41 $self->{logfh
} = IO
::File-
>new(">>$logfile") ||
42 die "unable to open '$logfile' - $!";
44 foreach my $node (sort keys %{$self->{nodes
}}) {
46 my $d = $self->{nodes
}->{$node};
49 PVE
::HA
::Env-
>new('PVE::HA::Sim::TestEnv', $node, $self, 'crm');
52 PVE
::HA
::Env-
>new('PVE::HA::Sim::TestEnv', $node, $self, 'lrm');
54 $d->{crm
} = undef; # create on power on
55 $d->{lrm
} = undef; # create on power on
64 return $self->{cur_time
};
68 my ($self, $level, $msg, $id) = @_;
72 my $time = $self->get_time();
74 $id = 'hardware' if !$id;
76 my $line = sprintf("%-5s %5d %12s: $msg\n", $level, $time, $id);
79 $self->{logfh
}->print($line);
80 $self->{logfh
}->flush();
83 # for controlling the resource manager services (CRM and LRM)
85 my ($self, $action, $data, $lock_fh) = @_;
87 if ($action eq 'start') {
88 return PVE
::HA
::CRM-
>new($data->{crm_env
});
89 } elsif ($action eq 'stop') {
90 # nothing todo sim_hardware_cmd sets us to undef, thats enough
91 } elsif ($action eq 'shutdown') {
92 $data->{crm
}->shutdown_request();
94 die "unknown CRM control action: '$action'\n";
99 my ($self, $action, $data, $lock_fh) = @_;
101 if ($action eq 'start') {
102 return PVE
::HA
::LRM-
>new($data->{lrm_env
});
103 } elsif ($action eq 'stop') {
104 # nothing todo sim_hardware_cmd sets us to undef, thats enough
105 } elsif ($action eq 'shutdown') {
106 $data->{lrm
}->shutdown_request();
108 die "unknown LRM control action: '$action'\n";
116 my $last_command_time = 0;
121 my $starttime = $self->get_time();
123 my @nodes = sort keys %{$self->{nodes
}};
125 my $nodecount = scalar(@nodes);
127 my $looptime = $nodecount*2;
128 $looptime = 20 if $looptime < 20;
130 die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n"
133 foreach my $node (@nodes) {
135 my $d = $self->{nodes
}->{$node};
137 if (my $crm = $d->{crm
}) {
139 my $exit_crm = !$crm->do_one_iteration();
141 my $nodetime = $d->{crm_env
}->get_time();
142 $self->{cur_time
} = $nodetime if $nodetime > $self->{cur_time
};
145 $d->{crm_env
}->log('info', "exit (loop end)");
148 my $cstatus = $self->read_hardware_status_nolock();
149 my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
150 my $shutdown = $nstatus->{shutdown} || '';
151 if ($shutdown eq 'reboot') {
152 $self->sim_hardware_cmd("power $node off", 'reboot');
153 $self->sim_hardware_cmd("power $node on", 'reboot');
154 } elsif ($shutdown eq 'shutdown') {
155 $self->sim_hardware_cmd("power $node off", 'shutdown');
156 } elsif (!$d->{crm_stop
}) {
157 die "unexpected CRM exit - not implemented"
159 $d->{crm_stop
} = undef;
163 if (my $lrm = $d->{lrm
}) {
165 my $exit_lrm = !$lrm->do_one_iteration();
167 my $nodetime = $d->{lrm_env
}->get_time();
168 $self->{cur_time
} = $nodetime if $nodetime > $self->{cur_time
};
171 $d->{lrm_env
}->log('info', "exit (loop end)");
173 my $cstatus = $self->read_hardware_status_nolock();
174 my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
175 my $shutdown = $nstatus->{shutdown} || '';
176 if ($d->{lrm_restart
}) {
177 die "lrm restart during shutdown - not implemented" if $shutdown;
178 $d->{lrm_restart
} = undef;
179 $d->{lrm
} = PVE
::HA
::LRM-
>new($d->{lrm_env
});
180 } elsif ($shutdown eq 'reboot' || $shutdown eq 'shutdown') {
181 # exit the LRM before the CRM to reflect real world behaviour
182 $self->sim_hardware_cmd("crm $node stop", $shutdown);
184 die "unexpected LRM exit - not implemented"
189 foreach my $n (@nodes) {
190 if (!$self->watchdog_check($n)) {
191 $self->sim_hardware_cmd("power $n off", 'watchdog');
192 $self->log('info', "server '$n' stopped by poweroff (watchdog)");
193 $self->{nodes
}->{$n}->{crm
} = undef;
194 $self->{nodes
}->{$n}->{lrm
} = undef;
200 $self->{cur_time
} = $starttime + $looptime
201 if ($self->{cur_time
} - $starttime) < $looptime;
203 die "simulation end\n" if $self->{cur_time
} > $max_sim_time;
205 foreach my $node (@nodes) {
206 my $d = $self->{nodes
}->{$node};
208 $d->{lrm_env
}->loop_start_hook();
209 $d->{crm_env
}->loop_start_hook();
212 next if $self->{cur_time
} < $next_cmd_at;
214 # apply new comand after 5 loop iterations
216 if (($self->{loop_count
} % 5) == 0) {
217 my $list = shift @{$self->{cmdlist
}};
219 # end sumulation (500 seconds after last command)
220 return if (($self->{cur_time
} - $last_command_time) > 500);
223 foreach my $cmd (@$list) {
224 $last_command_time = $self->{cur_time
};
226 if ($cmd =~ m/^delay\s+(\d+)\s*$/) {
227 $self->log('info', "execute $cmd", 'cmdlist');
228 $next_cmd_at = $self->{cur_time
} + $1;
230 $self->sim_hardware_cmd($cmd, 'cmdlist');
235 ++$self->{loop_count
};