]>
Commit | Line | Data |
---|---|---|
bf93e2a2 DM |
1 | package PVE::HA::Sim::TestHardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
8 | ||
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
16 | use File::Copy; | |
17 | use File::Path qw(make_path remove_tree); | |
18 | ||
bf93e2a2 DM |
19 | use PVE::HA::CRM; |
20 | use PVE::HA::LRM; | |
21 | ||
533d82ca | 22 | use PVE::HA::Sim::TestEnv; |
bf93e2a2 DM |
23 | use base qw(PVE::HA::Sim::Hardware); |
24 | ||
25 | my $max_sim_time = 10000; | |
26 | ||
27 | sub new { | |
28 | my ($this, $testdir) = @_; | |
29 | ||
30 | my $class = ref($this) || $this; | |
31 | ||
32 | my $self = $class->SUPER::new($testdir); | |
33 | ||
34 | my $raw = PVE::Tools::file_get_contents("$testdir/cmdlist"); | |
35 | $self->{cmdlist} = decode_json($raw); | |
36 | ||
37 | $self->{loop_count} = 0; | |
38 | $self->{cur_time} = 0; | |
39 | ||
8a6e5294 DM |
40 | my $statusdir = $self->statusdir(); |
41 | my $logfile = "$statusdir/log"; | |
42 | $self->{logfh} = IO::File->new(">>$logfile") || | |
43 | die "unable to open '$logfile' - $!"; | |
44 | ||
bf93e2a2 DM |
45 | foreach my $node (sort keys %{$self->{nodes}}) { |
46 | ||
47 | my $d = $self->{nodes}->{$node}; | |
48 | ||
49 | $d->{crm_env} = | |
533d82ca | 50 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'crm'); |
bf93e2a2 DM |
51 | |
52 | $d->{lrm_env} = | |
533d82ca | 53 | PVE::HA::Env->new('PVE::HA::Sim::TestEnv', $node, $self, 'lrm'); |
bf93e2a2 DM |
54 | |
55 | $d->{crm} = undef; # create on power on | |
56 | $d->{lrm} = undef; # create on power on | |
57 | } | |
58 | ||
59 | return $self; | |
60 | } | |
61 | ||
62 | sub get_time { | |
63 | my ($self) = @_; | |
64 | ||
65 | return $self->{cur_time}; | |
66 | } | |
67 | ||
8a6e5294 DM |
68 | sub log { |
69 | my ($self, $level, $msg, $id) = @_; | |
70 | ||
71 | chomp $msg; | |
72 | ||
73 | my $time = $self->get_time(); | |
74 | ||
75 | $id = 'hardware' if !$id; | |
76 | ||
77 | my $line = sprintf("%-5s %5d %12s: $msg\n", $level, $time, $id); | |
78 | print $line; | |
79 | ||
80 | $self->{logfh}->print($line); | |
81 | $self->{logfh}->flush(); | |
82 | } | |
83 | ||
bf93e2a2 DM |
84 | # simulate hardware commands |
85 | # power <node> <on|off> | |
86 | # network <node> <on|off> | |
27a9e51d DM |
87 | # reboot <node> |
88 | # shutdown <node> | |
cadf64a6 | 89 | # restart-lrm <node> |
4ca70a17 | 90 | # service <sid> <enabled|disabled> |
7d5143d0 | 91 | # service <sid> <migrate|relocate> <target> |
bf93e2a2 DM |
92 | |
93 | sub sim_hardware_cmd { | |
94 | my ($self, $cmdstr, $logid) = @_; | |
95 | ||
96 | my $code = sub { | |
97 | ||
98 | my $cstatus = $self->read_hardware_status_nolock(); | |
99 | ||
4ca70a17 | 100 | my ($cmd, $objid, $action, $target) = split(/\s+/, $cmdstr); |
bf93e2a2 | 101 | |
4ca70a17 TL |
102 | die "sim_hardware_cmd: no node or service for command specified" |
103 | if !$objid; | |
bf93e2a2 | 104 | |
4ca70a17 TL |
105 | my ($node, $sid, $d); |
106 | ||
107 | if ($cmd eq 'service') { | |
108 | $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid); | |
109 | } else { | |
110 | $node = $objid; | |
111 | $d = $self->{nodes}->{$node} || | |
112 | die "sim_hardware_cmd: no such node '$node'\n"; | |
113 | } | |
bf93e2a2 DM |
114 | |
115 | $self->log('info', "execute $cmdstr", $logid); | |
116 | ||
117 | if ($cmd eq 'power') { | |
27a9e51d | 118 | die "sim_hardware_cmd: unknown action '$action'" if $action !~ m/^(on|off)$/; |
bf93e2a2 | 119 | if ($cstatus->{$node}->{power} ne $action) { |
7987c18c | 120 | if ($action eq 'on') { |
bf93e2a2 DM |
121 | $d->{crm} = PVE::HA::CRM->new($d->{crm_env}) if !$d->{crm}; |
122 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}) if !$d->{lrm}; | |
cadf64a6 | 123 | $d->{lrm_restart} = undef; |
bf93e2a2 DM |
124 | } else { |
125 | if ($d->{crm}) { | |
126 | $d->{crm_env}->log('info', "killed by poweroff"); | |
127 | $d->{crm} = undef; | |
128 | } | |
129 | if ($d->{lrm}) { | |
130 | $d->{lrm_env}->log('info', "killed by poweroff"); | |
131 | $d->{lrm} = undef; | |
cadf64a6 | 132 | $d->{lrm_restart} = undef; |
bf93e2a2 | 133 | } |
7987c18c DM |
134 | $self->watchdog_reset_nolock($node); |
135 | $self->write_service_status($node, {}); | |
bf93e2a2 DM |
136 | } |
137 | } | |
138 | ||
139 | $cstatus->{$node}->{power} = $action; | |
140 | $cstatus->{$node}->{network} = $action; | |
27a9e51d DM |
141 | $cstatus->{$node}->{shutdown} = undef; |
142 | ||
143 | $self->write_hardware_status_nolock($cstatus); | |
bf93e2a2 DM |
144 | |
145 | } elsif ($cmd eq 'network') { | |
8b5f7921 TL |
146 | die "sim_hardware_cmd: unknown network action '$action'" |
147 | if $action !~ m/^(on|off)$/; | |
27a9e51d DM |
148 | $cstatus->{$node}->{network} = $action; |
149 | ||
150 | $self->write_hardware_status_nolock($cstatus); | |
151 | ||
152 | } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') { | |
153 | $cstatus->{$node}->{shutdown} = $cmd; | |
154 | ||
155 | $self->write_hardware_status_nolock($cstatus); | |
156 | ||
499f06e3 | 157 | $d->{lrm}->shutdown_request() if $d->{lrm}; |
cadf64a6 DM |
158 | } elsif ($cmd eq 'restart-lrm') { |
159 | if ($d->{lrm}) { | |
160 | $d->{lrm_restart} = 1; | |
161 | $d->{lrm}->shutdown_request(); | |
162 | } | |
4ca70a17 TL |
163 | } elsif ($cmd eq 'service') { |
164 | if ($action eq 'enabled' || $action eq 'disabled') { | |
165 | ||
166 | $self->set_service_state($sid, $action); | |
167 | ||
7d5143d0 TL |
168 | } elsif ($action eq 'migrate' || $action eq 'relocate') { |
169 | ||
170 | die "sim_hardware_cmd: missing target node for '$action' command" | |
171 | if !$target; | |
172 | ||
173 | $self->queue_crm_commands_nolock("$action $sid $target"); | |
174 | ||
27ccc95c TL |
175 | } elsif ($action eq 'add') { |
176 | ||
177 | $self->add_service($sid, {state => 'enabled', node => $target}); | |
178 | ||
179 | } elsif ($action eq 'delete') { | |
180 | ||
181 | $self->delete_service($sid); | |
182 | ||
4ca70a17 TL |
183 | } else { |
184 | die "sim_hardware_cmd: unknown service action '$action' " . | |
185 | "- not implemented\n" | |
186 | } | |
bf93e2a2 | 187 | } else { |
27a9e51d | 188 | die "sim_hardware_cmd: unknown command '$cmdstr'\n"; |
bf93e2a2 DM |
189 | } |
190 | ||
bf93e2a2 DM |
191 | }; |
192 | ||
193 | return $self->global_lock($code); | |
194 | } | |
195 | ||
196 | sub run { | |
197 | my ($self) = @_; | |
198 | ||
199 | my $last_command_time = 0; | |
a4b0c3d8 DM |
200 | my $next_cmd_at = 0; |
201 | ||
bf93e2a2 DM |
202 | for (;;) { |
203 | ||
204 | my $starttime = $self->get_time(); | |
205 | ||
206 | my @nodes = sort keys %{$self->{nodes}}; | |
207 | ||
17b5cf98 DM |
208 | my $nodecount = scalar(@nodes); |
209 | ||
210 | my $looptime = $nodecount*2; | |
211 | $looptime = 20 if $looptime < 20; | |
212 | ||
6323a5ce DM |
213 | die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n" |
214 | if $looptime >= 60; | |
215 | ||
bf93e2a2 DM |
216 | foreach my $node (@nodes) { |
217 | ||
218 | my $d = $self->{nodes}->{$node}; | |
219 | ||
220 | if (my $crm = $d->{crm}) { | |
221 | ||
222 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
223 | ||
224 | die "implement me (CRM exit)" if !$crm->do_one_iteration(); | |
225 | ||
226 | $d->{crm_env}->loop_end_hook(); | |
227 | ||
228 | my $nodetime = $d->{crm_env}->get_time(); | |
229 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
230 | } | |
231 | ||
232 | if (my $lrm = $d->{lrm}) { | |
233 | ||
234 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
235 | ||
27a9e51d | 236 | my $exit_lrm = !$lrm->do_one_iteration(); |
bf93e2a2 DM |
237 | |
238 | $d->{lrm_env}->loop_end_hook(); | |
239 | ||
240 | my $nodetime = $d->{lrm_env}->get_time(); | |
241 | $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time}; | |
27a9e51d DM |
242 | |
243 | if ($exit_lrm) { | |
244 | $d->{lrm_env}->log('info', "exit (loop end)"); | |
245 | $d->{lrm} = undef; | |
246 | my $cstatus = $self->read_hardware_status_nolock(); | |
247 | my $nstatus = $cstatus->{$node} || die "no node status for node '$node'"; | |
fe0ce040 | 248 | my $shutdown = $nstatus->{shutdown} || ''; |
cadf64a6 | 249 | if ($d->{lrm_restart}) { |
116dea30 | 250 | die "lrm restart during shutdown - not implemented" if $shutdown; |
cadf64a6 DM |
251 | $d->{lrm_restart} = undef; |
252 | $d->{lrm} = PVE::HA::LRM->new($d->{lrm_env}); | |
253 | } elsif ($shutdown eq 'reboot') { | |
27a9e51d DM |
254 | $self->sim_hardware_cmd("power $node off", 'reboot'); |
255 | $self->sim_hardware_cmd("power $node on", 'reboot'); | |
256 | } elsif ($shutdown eq 'shutdown') { | |
257 | $self->sim_hardware_cmd("power $node off", 'shutdown'); | |
258 | } else { | |
259 | die "unexpected LRM exit - not implemented" | |
260 | } | |
261 | } | |
bf93e2a2 DM |
262 | } |
263 | ||
264 | foreach my $n (@nodes) { | |
265 | if (!$self->watchdog_check($n)) { | |
266 | $self->sim_hardware_cmd("power $n off", 'watchdog'); | |
267 | $self->log('info', "server '$n' stopped by poweroff (watchdog)"); | |
268 | $self->{nodes}->{$n}->{crm} = undef; | |
269 | $self->{nodes}->{$n}->{lrm} = undef; | |
270 | } | |
271 | } | |
272 | } | |
273 | ||
17b5cf98 DM |
274 | |
275 | $self->{cur_time} = $starttime + $looptime | |
276 | if ($self->{cur_time} - $starttime) < $looptime; | |
bf93e2a2 DM |
277 | |
278 | die "simulation end\n" if $self->{cur_time} > $max_sim_time; | |
279 | ||
8a6e5294 DM |
280 | foreach my $node (@nodes) { |
281 | my $d = $self->{nodes}->{$node}; | |
282 | # forced time update | |
283 | $d->{lrm_env}->loop_start_hook($self->get_time()); | |
284 | $d->{crm_env}->loop_start_hook($self->get_time()); | |
285 | } | |
a4b0c3d8 DM |
286 | |
287 | next if $self->{cur_time} < $next_cmd_at; | |
288 | ||
bf93e2a2 DM |
289 | # apply new comand after 5 loop iterations |
290 | ||
291 | if (($self->{loop_count} % 5) == 0) { | |
a1af884e | 292 | my $list = shift @{$self->{cmdlist}}; |
bf93e2a2 DM |
293 | if (!$list) { |
294 | # end sumulation (500 seconds after last command) | |
295 | return if (($self->{cur_time} - $last_command_time) > 500); | |
296 | } | |
297 | ||
298 | foreach my $cmd (@$list) { | |
299 | $last_command_time = $self->{cur_time}; | |
a4b0c3d8 DM |
300 | |
301 | if ($cmd =~ m/^delay\s+(\d+)\s*$/) { | |
302 | $next_cmd_at = $self->{cur_time} + $1; | |
303 | } else { | |
304 | $self->sim_hardware_cmd($cmd, 'cmdlist'); | |
305 | } | |
bf93e2a2 DM |
306 | } |
307 | } | |
308 | ||
309 | ++$self->{loop_count}; | |
310 | } | |
311 | } | |
312 | ||
313 | 1; |