]>
Commit | Line | Data |
---|---|---|
bf93e2a2 DM |
1 | package PVE::HA::Sim::TestHardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
8 | ||
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
16 | use File::Copy; | |
17 | use File::Path qw(make_path remove_tree); | |
18 | ||
bf93e2a2 DM |
19 | use PVE::HA::CRM; |
20 | use PVE::HA::LRM; | |
21 | ||
533d82ca | 22 | use PVE::HA::Sim::TestEnv; |
bf93e2a2 DM |
23 | use base qw(PVE::HA::Sim::Hardware); |
24 | ||
25 | my $max_sim_time = 10000; | |
26 | ||
27 | sub new { | |
28 | my ($this, $testdir) = @_; | |
29 | ||
30 | my $class = ref($this) || $this; | |
31 | ||
32 | my $self = $class->SUPER::new($testdir); | |
33 | ||
34 | my $raw = PVE::Tools::file_get_contents("$testdir/cmdlist"); | |
35 | $self->{cmdlist} = decode_json($raw); | |
36 | ||
37 | $self->{loop_count} = 0; | |
38 | $self->{cur_time} = 0; | |
39 | ||
8a6e5294 DM |
40 | my $statusdir = $self->statusdir(); |
41 | my $logfile = "$statusdir/log"; | |
42 | $self->{logfh} = IO::File->new(">>$logfile") || | |
43 | die "unable to open '$logfile' - $!"; | |
44 | ||
bf93e2a2 DM |
45 | foreach my $node (sort keys %{$self->{nodes}}) { |
46 | ||
47 | my $d = $self->{nodes}->{$node}; | |
48 | ||
49 | $d->{crm_env} = | |
533d82ca | 50 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'crm'); |
bf93e2a2 DM |
51 | |
52 | $d->{lrm_env} = | |
533d82ca | 53 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'lrm'); |
bf93e2a2 DM |
54 | |
55 | $d->{crm} = undef; # create on power on | |
56 | $d->{lrm} = undef; # create on power on | |
57 | } | |
58 | ||
59 | return $self; | |
60 | } | |
61 | ||
62 | sub get_time { | |
63 | my ($self) = @_; | |
64 | ||
65 | return $self->{cur_time}; | |
66 | } | |
67 | ||
8a6e5294 DM |
68 | sub log { |
69 | my ($self, $level, $msg, $id) = @_; | |
70 | ||
71 | chomp $msg; | |
72 | ||
73 | my $time = $self->get_time(); | |
74 | ||
75 | $id = 'hardware' if !$id; | |
76 | ||
77 | my $line = sprintf("%-5s %5d %12s: $msg\n", $level, $time, $id); | |
78 | print $line; | |
79 | ||
80 | $self->{logfh}->print($line); | |
81 | $self->{logfh}->flush(); | |
82 | } | |
83 | ||
bf93e2a2 DM |
84 | # simulate hardware commands |
85 | # power <node> <on|off> | |
86 | # network <node> <on|off> | |
27a9e51d DM |
87 | # reboot <node> |
88 | # shutdown <node> | |
cadf64a6 | 89 | # restart-lrm <node> |
bf93e2a2 DM |
90 | |
91 | sub sim_hardware_cmd { | |
92 | my ($self, $cmdstr, $logid) = @_; | |
93 | ||
94 | my $code = sub { | |
95 | ||
96 | my $cstatus = $self->read_hardware_status_nolock(); | |
97 | ||
98 | my ($cmd, $node, $action) = split(/\s+/, $cmdstr); | |
99 | ||
100 | die "sim_hardware_cmd: no node specified" if !$node; | |
bf93e2a2 DM |
101 | |
102 | my $d = $self->{nodes}->{$node}; | |
103 | die "sim_hardware_cmd: no such node '$node'\n" if !$d; | |
104 | ||
105 | $self->log('info', "execute $cmdstr", $logid); | |
106 | ||
107 | if ($cmd eq 'power') { | |
27a9e51d | 108 | die "sim_hardware_cmd: unknown action '$action'" if $action !~ m/^(on|off)$/; |
bf93e2a2 | 109 | if ($cstatus->{$node}->{power} ne $action) { |
7987c18c | 110 | if ($action eq 'on') { |
bf93e2a2 DM |
111 | $d->{crm} = PVE::HA::CRM->new($d->{crm_env}) if !$d->{crm}; |
112 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}) if !$d->{lrm}; | |
cadf64a6 | 113 | $d->{lrm_restart} = undef; |
bf93e2a2 DM |
114 | } else { |
115 | if ($d->{crm}) { | |
116 | $d->{crm_env}->log('info', "killed by poweroff"); | |
117 | $d->{crm} = undef; | |
118 | } | |
119 | if ($d->{lrm}) { | |
120 | $d->{lrm_env}->log('info', "killed by poweroff"); | |
121 | $d->{lrm} = undef; | |
cadf64a6 | 122 | $d->{lrm_restart} = undef; |
bf93e2a2 | 123 | } |
7987c18c DM |
124 | $self->watchdog_reset_nolock($node); |
125 | $self->write_service_status($node, {}); | |
bf93e2a2 DM |
126 | } |
127 | } | |
128 | ||
129 | $cstatus->{$node}->{power} = $action; | |
130 | $cstatus->{$node}->{network} = $action; | |
27a9e51d DM |
131 | $cstatus->{$node}->{shutdown} = undef; |
132 | ||
133 | $self->write_hardware_status_nolock($cstatus); | |
bf93e2a2 DM |
134 | |
135 | } elsif ($cmd eq 'network') { | |
8b5f7921 TL |
136 | die "sim_hardware_cmd: unknown network action '$action'" |
137 | if $action !~ m/^(on|off)$/; | |
27a9e51d DM |
138 | $cstatus->{$node}->{network} = $action; |
139 | ||
140 | $self->write_hardware_status_nolock($cstatus); | |
141 | ||
142 | } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') { | |
143 | $cstatus->{$node}->{shutdown} = $cmd; | |
144 | ||
145 | $self->write_hardware_status_nolock($cstatus); | |
146 | ||
499f06e3 | 147 | $d->{lrm}->shutdown_request() if $d->{lrm}; |
cadf64a6 DM |
148 | } elsif ($cmd eq 'restart-lrm') { |
149 | if ($d->{lrm}) { | |
150 | $d->{lrm_restart} = 1; | |
151 | $d->{lrm}->shutdown_request(); | |
152 | } | |
bf93e2a2 | 153 | } else { |
27a9e51d | 154 | die "sim_hardware_cmd: unknown command '$cmdstr'\n"; |
bf93e2a2 DM |
155 | } |
156 | ||
bf93e2a2 DM |
157 | }; |
158 | ||
159 | return $self->global_lock($code); | |
160 | } | |
161 | ||
162 | sub run { | |
163 | my ($self) = @_; | |
164 | ||
165 | my $last_command_time = 0; | |
a4b0c3d8 DM |
166 | my $next_cmd_at = 0; |
167 | ||
bf93e2a2 DM |
168 | for (;;) { |
169 | ||
170 | my $starttime = $self->get_time(); | |
171 | ||
172 | my @nodes = sort keys %{$self->{nodes}}; | |
173 | ||
17b5cf98 DM |
174 | my $nodecount = scalar(@nodes); |
175 | ||
176 | my $looptime = $nodecount*2; | |
177 | $looptime = 20 if $looptime < 20; | |
178 | ||
6323a5ce DM |
179 | die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n" |
180 | if $looptime >= 60; | |
181 | ||
bf93e2a2 DM |
182 | foreach my $node (@nodes) { |
183 | ||
184 | my $d = $self->{nodes}->{$node}; | |
185 | ||
186 | if (my $crm = $d->{crm}) { | |
187 | ||
188 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
189 | ||
190 | die "implement me (CRM exit)" if !$crm->do_one_iteration(); | |
191 | ||
192 | $d->{crm_env}->loop_end_hook(); | |
193 | ||
194 | my $nodetime = $d->{crm_env}->get_time(); | |
195 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
196 | } | |
197 | ||
198 | if (my $lrm = $d->{lrm}) { | |
199 | ||
200 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
201 | ||
27a9e51d | 202 | my $exit_lrm = !$lrm->do_one_iteration(); |
bf93e2a2 DM |
203 | |
204 | $d->{lrm_env}->loop_end_hook(); | |
205 | ||
206 | my $nodetime = $d->{lrm_env}->get_time(); | |
207 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
27a9e51d DM |
208 | |
209 | if ($exit_lrm) { | |
210 | $d->{lrm_env}->log('info', "exit (loop end)"); | |
211 | $d->{lrm} = undef; | |
212 | my $cstatus = $self->read_hardware_status_nolock(); | |
213 | my $nstatus = $cstatus->{$node} || die "no node status for node '$node'"; | |
fe0ce040 | 214 | my $shutdown = $nstatus->{shutdown} || ''; |
cadf64a6 | 215 | if ($d->{lrm_restart}) { |
116dea30 | 216 | die "lrm restart during shutdown - not implemented" if $shutdown; |
cadf64a6 DM |
217 | $d->{lrm_restart} = undef; |
218 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}); | |
219 | } elsif ($shutdown eq 'reboot') { | |
27a9e51d DM |
220 | $self->sim_hardware_cmd("power $node off", 'reboot'); |
221 | $self->sim_hardware_cmd("power $node on", 'reboot'); | |
222 | } elsif ($shutdown eq 'shutdown') { | |
223 | $self->sim_hardware_cmd("power $node off", 'shutdown'); | |
224 | } else { | |
225 | die "unexpected LRM exit - not implemented" | |
226 | } | |
227 | } | |
bf93e2a2 DM |
228 | } |
229 | ||
230 | foreach my $n (@nodes) { | |
231 | if (!$self->watchdog_check($n)) { | |
232 | $self->sim_hardware_cmd("power $n off", 'watchdog'); | |
233 | $self->log('info', "server '$n' stopped by poweroff (watchdog)"); | |
234 | $self->{nodes}->{$n}->{crm} = undef; | |
235 | $self->{nodes}->{$n}->{lrm} = undef; | |
236 | } | |
237 | } | |
238 | } | |
239 | ||
17b5cf98 DM |
240 | |
241 | $self->{cur_time} = $starttime + $looptime | |
242 | if ($self->{cur_time} - $starttime) < $looptime; | |
bf93e2a2 DM |
243 | |
244 | die "simulation end\n" if $self->{cur_time} > $max_sim_time; | |
245 | ||
8a6e5294 DM |
246 | foreach my $node (@nodes) { |
247 | my $d = $self->{nodes}->{$node}; | |
248 | # forced time update | |
249 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
250 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
251 | } | |
a4b0c3d8 DM |
252 | |
253 | next if $self->{cur_time} < $next_cmd_at; | |
254 | ||
bf93e2a2 DM |
255 | # apply new comand after 5 loop iterations |
256 | ||
257 | if (($self->{loop_count} % 5) == 0) { | |
a1af884e | 258 | my $list = shift @{$self->{cmdlist}}; |
bf93e2a2 DM |
259 | if (!$list) { |
260 | # end sumulation (500 seconds after last command) | |
261 | return if (($self->{cur_time} - $last_command_time) > 500); | |
262 | } | |
263 | ||
264 | foreach my $cmd (@$list) { | |
265 | $last_command_time = $self->{cur_time}; | |
a4b0c3d8 DM |
266 | |
267 | if ($cmd =~ m/^delay\s+(\d+)\s*$/) { | |
268 | $next_cmd_at = $self->{cur_time} + $1; | |
269 | } else { | |
270 | $self->sim_hardware_cmd($cmd, 'cmdlist'); | |
271 | } | |
bf93e2a2 DM |
272 | } |
273 | } | |
274 | ||
275 | ++$self->{loop_count}; | |
276 | } | |
277 | } | |
278 | ||
279 | 1; |