]>
Commit | Line | Data |
---|---|---|
bf93e2a2 DM |
1 | package PVE::HA::Sim::TestHardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
8 | ||
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
16 | use File::Copy; | |
17 | use File::Path qw(make_path remove_tree); | |
18 | ||
bf93e2a2 DM |
19 | use PVE::HA::CRM; |
20 | use PVE::HA::LRM; | |
21 | ||
533d82ca | 22 | use PVE::HA::Sim::TestEnv; |
bf93e2a2 DM |
23 | use base qw(PVE::HA::Sim::Hardware); |
24 | ||
25 | my $max_sim_time = 10000; | |
26 | ||
27 | sub new { | |
28 | my ($this, $testdir) = @_; | |
29 | ||
30 | my $class = ref($this) || $this; | |
31 | ||
32 | my $self = $class->SUPER::new($testdir); | |
33 | ||
34 | my $raw = PVE::Tools::file_get_contents("$testdir/cmdlist"); | |
35 | $self->{cmdlist} = decode_json($raw); | |
36 | ||
37 | $self->{loop_count} = 0; | |
38 | $self->{cur_time} = 0; | |
39 | ||
8a6e5294 DM |
40 | my $statusdir = $self->statusdir(); |
41 | my $logfile = "$statusdir/log"; | |
42 | $self->{logfh} = IO::File->new(">>$logfile") || | |
43 | die "unable to open '$logfile' - $!"; | |
44 | ||
bf93e2a2 DM |
45 | foreach my $node (sort keys %{$self->{nodes}}) { |
46 | ||
47 | my $d = $self->{nodes}->{$node}; | |
48 | ||
49 | $d->{crm_env} = | |
533d82ca | 50 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'crm'); |
bf93e2a2 DM |
51 | |
52 | $d->{lrm_env} = | |
533d82ca | 53 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'lrm'); |
bf93e2a2 DM |
54 | |
55 | $d->{crm} = undef; # create on power on | |
56 | $d->{lrm} = undef; # create on power on | |
57 | } | |
58 | ||
59 | return $self; | |
60 | } | |
61 | ||
62 | sub get_time { | |
63 | my ($self) = @_; | |
64 | ||
65 | return $self->{cur_time}; | |
66 | } | |
67 | ||
8a6e5294 DM |
68 | sub log { |
69 | my ($self, $level, $msg, $id) = @_; | |
70 | ||
71 | chomp $msg; | |
72 | ||
73 | my $time = $self->get_time(); | |
74 | ||
75 | $id = 'hardware' if !$id; | |
76 | ||
77 | my $line = sprintf("%-5s %5d %12s: $msg\n", $level, $time, $id); | |
78 | print $line; | |
79 | ||
80 | $self->{logfh}->print($line); | |
81 | $self->{logfh}->flush(); | |
82 | } | |
83 | ||
bf93e2a2 DM |
84 | # simulate hardware commands |
85 | # power <node> <on|off> | |
86 | # network <node> <on|off> | |
27a9e51d DM |
87 | # reboot <node> |
88 | # shutdown <node> | |
cadf64a6 | 89 | # restart-lrm <node> |
bf93e2a2 DM |
90 | |
91 | sub sim_hardware_cmd { | |
92 | my ($self, $cmdstr, $logid) = @_; | |
93 | ||
94 | my $code = sub { | |
95 | ||
96 | my $cstatus = $self->read_hardware_status_nolock(); | |
97 | ||
98 | my ($cmd, $node, $action) = split(/\s+/, $cmdstr); | |
99 | ||
100 | die "sim_hardware_cmd: no node specified" if !$node; | |
bf93e2a2 DM |
101 | |
102 | my $d = $self->{nodes}->{$node}; | |
103 | die "sim_hardware_cmd: no such node '$node'\n" if !$d; | |
104 | ||
105 | $self->log('info', "execute $cmdstr", $logid); | |
106 | ||
107 | if ($cmd eq 'power') { | |
27a9e51d | 108 | die "sim_hardware_cmd: unknown action '$action'" if $action !~ m/^(on|off)$/; |
bf93e2a2 | 109 | if ($cstatus->{$node}->{power} ne $action) { |
7987c18c | 110 | if ($action eq 'on') { |
bf93e2a2 DM |
111 | $d->{crm} = PVE::HA::CRM->new($d->{crm_env}) if !$d->{crm}; |
112 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}) if !$d->{lrm}; | |
cadf64a6 | 113 | $d->{lrm_restart} = undef; |
bf93e2a2 DM |
114 | } else { |
115 | if ($d->{crm}) { | |
116 | $d->{crm_env}->log('info', "killed by poweroff"); | |
117 | $d->{crm} = undef; | |
118 | } | |
119 | if ($d->{lrm}) { | |
120 | $d->{lrm_env}->log('info', "killed by poweroff"); | |
121 | $d->{lrm} = undef; | |
cadf64a6 | 122 | $d->{lrm_restart} = undef; |
bf93e2a2 | 123 | } |
7987c18c DM |
124 | $self->watchdog_reset_nolock($node); |
125 | $self->write_service_status($node, {}); | |
bf93e2a2 DM |
126 | } |
127 | } | |
128 | ||
129 | $cstatus->{$node}->{power} = $action; | |
130 | $cstatus->{$node}->{network} = $action; | |
27a9e51d DM |
131 | $cstatus->{$node}->{shutdown} = undef; |
132 | ||
133 | $self->write_hardware_status_nolock($cstatus); | |
bf93e2a2 DM |
134 | |
135 | } elsif ($cmd eq 'network') { | |
27a9e51d DM |
136 | die "sim_hardware_cmd: unknown action '$action'" if $action !~ m/^(on|off)$/; |
137 | $cstatus->{$node}->{network} = $action; | |
138 | ||
139 | $self->write_hardware_status_nolock($cstatus); | |
140 | ||
141 | } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') { | |
142 | $cstatus->{$node}->{shutdown} = $cmd; | |
143 | ||
144 | $self->write_hardware_status_nolock($cstatus); | |
145 | ||
499f06e3 | 146 | $d->{lrm}->shutdown_request() if $d->{lrm}; |
cadf64a6 DM |
147 | } elsif ($cmd eq 'restart-lrm') { |
148 | if ($d->{lrm}) { | |
149 | $d->{lrm_restart} = 1; | |
150 | $d->{lrm}->shutdown_request(); | |
151 | } | |
bf93e2a2 | 152 | } else { |
27a9e51d | 153 | die "sim_hardware_cmd: unknown command '$cmdstr'\n"; |
bf93e2a2 DM |
154 | } |
155 | ||
bf93e2a2 DM |
156 | }; |
157 | ||
158 | return $self->global_lock($code); | |
159 | } | |
160 | ||
161 | sub run { | |
162 | my ($self) = @_; | |
163 | ||
164 | my $last_command_time = 0; | |
a4b0c3d8 DM |
165 | my $next_cmd_at = 0; |
166 | ||
bf93e2a2 DM |
167 | for (;;) { |
168 | ||
169 | my $starttime = $self->get_time(); | |
170 | ||
171 | my @nodes = sort keys %{$self->{nodes}}; | |
172 | ||
17b5cf98 DM |
173 | my $nodecount = scalar(@nodes); |
174 | ||
175 | my $looptime = $nodecount*2; | |
176 | $looptime = 20 if $looptime < 20; | |
177 | ||
6323a5ce DM |
178 | die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n" |
179 | if $looptime >= 60; | |
180 | ||
bf93e2a2 DM |
181 | foreach my $node (@nodes) { |
182 | ||
183 | my $d = $self->{nodes}->{$node}; | |
184 | ||
185 | if (my $crm = $d->{crm}) { | |
186 | ||
187 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
188 | ||
189 | die "implement me (CRM exit)" if !$crm->do_one_iteration(); | |
190 | ||
191 | $d->{crm_env}->loop_end_hook(); | |
192 | ||
193 | my $nodetime = $d->{crm_env}->get_time(); | |
194 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
195 | } | |
196 | ||
197 | if (my $lrm = $d->{lrm}) { | |
198 | ||
199 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
200 | ||
27a9e51d | 201 | my $exit_lrm = !$lrm->do_one_iteration(); |
bf93e2a2 DM |
202 | |
203 | $d->{lrm_env}->loop_end_hook(); | |
204 | ||
205 | my $nodetime = $d->{lrm_env}->get_time(); | |
206 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
27a9e51d DM |
207 | |
208 | if ($exit_lrm) { | |
209 | $d->{lrm_env}->log('info', "exit (loop end)"); | |
210 | $d->{lrm} = undef; | |
211 | my $cstatus = $self->read_hardware_status_nolock(); | |
212 | my $nstatus = $cstatus->{$node} || die "no node status for node '$node'"; | |
213 | my $shutdown = $nstatus->{shutdown}; | |
cadf64a6 | 214 | if ($d->{lrm_restart}) { |
116dea30 | 215 | die "lrm restart during shutdown - not implemented" if $shutdown; |
cadf64a6 DM |
216 | $d->{lrm_restart} = undef; |
217 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}); | |
218 | } elsif ($shutdown eq 'reboot') { | |
27a9e51d DM |
219 | $self->sim_hardware_cmd("power $node off", 'reboot'); |
220 | $self->sim_hardware_cmd("power $node on", 'reboot'); | |
221 | } elsif ($shutdown eq 'shutdown') { | |
222 | $self->sim_hardware_cmd("power $node off", 'shutdown'); | |
223 | } else { | |
224 | die "unexpected LRM exit - not implemented" | |
225 | } | |
226 | } | |
bf93e2a2 DM |
227 | } |
228 | ||
229 | foreach my $n (@nodes) { | |
230 | if (!$self->watchdog_check($n)) { | |
231 | $self->sim_hardware_cmd("power $n off", 'watchdog'); | |
232 | $self->log('info', "server '$n' stopped by poweroff (watchdog)"); | |
233 | $self->{nodes}->{$n}->{crm} = undef; | |
234 | $self->{nodes}->{$n}->{lrm} = undef; | |
235 | } | |
236 | } | |
237 | } | |
238 | ||
17b5cf98 DM |
239 | |
240 | $self->{cur_time} = $starttime + $looptime | |
241 | if ($self->{cur_time} - $starttime) < $looptime; | |
bf93e2a2 DM |
242 | |
243 | die "simulation end\n" if $self->{cur_time} > $max_sim_time; | |
244 | ||
8a6e5294 DM |
245 | foreach my $node (@nodes) { |
246 | my $d = $self->{nodes}->{$node}; | |
247 | # forced time update | |
248 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
249 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
250 | } | |
a4b0c3d8 DM |
251 | |
252 | next if $self->{cur_time} < $next_cmd_at; | |
253 | ||
bf93e2a2 DM |
254 | # apply new comand after 5 loop iterations |
255 | ||
256 | if (($self->{loop_count} % 5) == 0) { | |
a1af884e | 257 | my $list = shift @{$self->{cmdlist}}; |
bf93e2a2 DM |
258 | if (!$list) { |
259 | # end sumulation (500 seconds after last command) | |
260 | return if (($self->{cur_time} - $last_command_time) > 500); | |
261 | } | |
262 | ||
263 | foreach my $cmd (@$list) { | |
264 | $last_command_time = $self->{cur_time}; | |
a4b0c3d8 DM |
265 | |
266 | if ($cmd =~ m/^delay\s+(\d+)\s*$/) { | |
267 | $next_cmd_at = $self->{cur_time} + $1; | |
268 | } else { | |
269 | $self->sim_hardware_cmd($cmd, 'cmdlist'); | |
270 | } | |
bf93e2a2 DM |
271 | } |
272 | } | |
273 | ||
274 | ++$self->{loop_count}; | |
275 | } | |
276 | } | |
277 | ||
278 | 1; |