]>
Commit | Line | Data |
---|---|---|
8b3f9144 DM |
1 | package PVE::HA::Sim::Hardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
0cfd8f5b DM |
8 | |
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
787b66eb DM |
16 | use File::Copy; |
17 | use File::Path qw(make_path remove_tree); | |
0cfd8f5b | 18 | |
17b5cf98 | 19 | my $watchdog_timeout = 60; |
0bba8f60 | 20 | |
0cfd8f5b | 21 | |
787b66eb DM |
22 | # Status directory layout |
23 | # | |
24 | # configuration | |
25 | # | |
8456bde2 DM |
26 | # $testdir/cmdlist Command list for simulation |
27 | # $testdir/hardware_status Hardware description (number of nodes, ...) | |
28 | # $testdir/manager_status CRM status (start with {}) | |
29 | # $testdir/service_config Service configuration | |
30 | # $testdir/service_status_<node> Service status | |
3c36cbca | 31 | |
9329c1e2 DM |
32 | # |
33 | # runtime status for simulation system | |
34 | # | |
35 | # $testdir/status/cluster_locks Cluster locks | |
36 | # $testdir/status/hardware_status Hardware status (power/network on/off) | |
37 | # $testdir/status/watchdog_status Watchdog status | |
787b66eb DM |
38 | # |
39 | # runtime status | |
9329c1e2 | 40 | # |
8456bde2 DM |
41 | # $testdir/status/lrm_status_<node> LRM status |
42 | # $testdir/status/manager_status CRM status | |
43 | # $testdir/status/service_config Service configuration | |
44 | # $testdir/status/service_status_<node> Service status | |
c4a221bc DM |
45 | |
46 | sub read_lrm_status { | |
47 | my ($self, $node) = @_; | |
48 | ||
49 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
50 | ||
51 | return PVE::HA::Tools::read_json_from_file($filename, {}); | |
52 | } | |
53 | ||
54 | sub write_lrm_status { | |
55 | my ($self, $node, $status_obj) = @_; | |
56 | ||
57 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
58 | ||
59 | PVE::HA::Tools::write_json_to_file($filename, $status_obj); | |
60 | } | |
787b66eb | 61 | |
8b3f9144 | 62 | sub read_hardware_status_nolock { |
0cfd8f5b DM |
63 | my ($self) = @_; |
64 | ||
8b3f9144 | 65 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
66 | |
67 | my $raw = PVE::Tools::file_get_contents($filename); | |
68 | my $cstatus = decode_json($raw); | |
69 | ||
70 | return $cstatus; | |
71 | } | |
72 | ||
8b3f9144 | 73 | sub write_hardware_status_nolock { |
0cfd8f5b DM |
74 | my ($self, $cstatus) = @_; |
75 | ||
8b3f9144 | 76 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
77 | |
78 | PVE::Tools::file_set_contents($filename, encode_json($cstatus)); | |
79 | }; | |
80 | ||
95360669 DM |
81 | sub read_service_config { |
82 | my ($self) = @_; | |
83 | ||
84 | my $filename = "$self->{statusdir}/service_config"; | |
85 | my $conf = PVE::HA::Tools::read_json_from_file($filename); | |
86 | ||
87 | foreach my $sid (keys %$conf) { | |
88 | my $d = $conf->{$sid}; | |
8456bde2 DM |
89 | |
90 | die "service '$sid' without assigned node!" if !$d->{node}; | |
91 | ||
95360669 DM |
92 | if ($sid =~ m/^pvevm:(\d+)$/) { |
93 | $d->{type} = 'pvevm'; | |
94 | $d->{name} = $1; | |
95 | } else { | |
96 | die "implement me"; | |
97 | } | |
98 | $d->{state} = 'disabled' if !$d->{state}; | |
99 | } | |
100 | ||
101 | return $conf; | |
102 | } | |
103 | ||
79e0e005 DM |
104 | sub write_service_config { |
105 | my ($self, $conf) = @_; | |
106 | ||
95360669 DM |
107 | $self->{service_config} = $conf; |
108 | ||
79e0e005 DM |
109 | my $filename = "$self->{statusdir}/service_config"; |
110 | return PVE::HA::Tools::write_json_to_file($filename, $conf); | |
111 | } | |
112 | ||
8456bde2 DM |
113 | sub change_service_location { |
114 | my ($self, $sid, $node) = @_; | |
115 | ||
116 | my $conf = $self->read_service_config(); | |
117 | ||
118 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
119 | ||
120 | $conf->{$sid}->{node} = $node; | |
121 | ||
122 | $self->write_service_config($conf); | |
123 | } | |
124 | ||
3b996922 DM |
125 | sub queue_crm_commands { |
126 | my ($self, $cmd) = @_; | |
127 | ||
128 | chomp $cmd; | |
129 | ||
130 | my $code = sub { | |
131 | my $data = ''; | |
132 | my $filename = "$self->{statusdir}/crm_commands"; | |
133 | if (-f $filename) { | |
134 | $data = PVE::Tools::file_get_contents($filename); | |
135 | } | |
136 | $data .= "$cmd\n"; | |
137 | PVE::Tools::file_set_contents($filename, $data); | |
138 | }; | |
139 | ||
140 | $self->global_lock($code); | |
141 | ||
142 | return undef; | |
143 | } | |
144 | ||
145 | sub read_crm_commands { | |
146 | my ($self) = @_; | |
147 | ||
148 | my $code = sub { | |
149 | my $data = ''; | |
150 | ||
151 | my $filename = "$self->{statusdir}/crm_commands"; | |
152 | if (-f $filename) { | |
153 | $data = PVE::Tools::file_get_contents($filename); | |
154 | } | |
155 | PVE::Tools::file_set_contents($filename, ''); | |
156 | ||
157 | return $data; | |
158 | }; | |
159 | ||
160 | return $self->global_lock($code); | |
161 | } | |
162 | ||
c4a221bc | 163 | sub read_service_status { |
8456bde2 | 164 | my ($self, $node) = @_; |
c4a221bc | 165 | |
8456bde2 | 166 | my $filename = "$self->{statusdir}/service_status_$node"; |
c4a221bc DM |
167 | return PVE::HA::Tools::read_json_from_file($filename); |
168 | } | |
169 | ||
170 | sub write_service_status { | |
8456bde2 DM |
171 | my ($self, $node, $data) = @_; |
172 | ||
173 | my $filename = "$self->{statusdir}/service_status_$node"; | |
174 | my $res = PVE::HA::Tools::write_json_to_file($filename, $data); | |
175 | ||
176 | # fixme: add test if a service runs on two nodes!!! | |
c4a221bc | 177 | |
8456bde2 | 178 | return $res; |
c4a221bc DM |
179 | } |
180 | ||
0cfd8f5b DM |
181 | sub new { |
182 | my ($this, $testdir) = @_; | |
183 | ||
184 | die "missing testdir" if !$testdir; | |
185 | ||
186 | my $class = ref($this) || $this; | |
187 | ||
188 | my $self = bless {}, $class; | |
189 | ||
787b66eb DM |
190 | my $statusdir = $self->{statusdir} = "$testdir/status"; |
191 | ||
192 | remove_tree($statusdir); | |
193 | mkdir $statusdir; | |
0cfd8f5b | 194 | |
787b66eb DM |
195 | # copy initial configuartion |
196 | copy("$testdir/manager_status", "$statusdir/manager_status"); # optional | |
79e0e005 DM |
197 | |
198 | if (-f "$testdir/service_config") { | |
199 | copy("$testdir/service_config", "$statusdir/service_config"); | |
200 | } else { | |
201 | my $conf = { | |
1e132215 DM |
202 | 'pvevm:101' => { node => 'node1' }, |
203 | 'pvevm:102' => { node => 'node2' }, | |
204 | 'pvevm:103' => { node => 'node3' }, | |
46350703 DM |
205 | 'pvevm:104' => { node => 'node1' }, |
206 | 'pvevm:105' => { node => 'node2' }, | |
207 | 'pvevm:106' => { node => 'node3' }, | |
79e0e005 DM |
208 | }; |
209 | $self->write_service_config($conf); | |
210 | } | |
787b66eb | 211 | |
853f5867 DM |
212 | if (-f "$testdir/hardware_status") { |
213 | copy("$testdir/hardware_status", "$statusdir/hardware_status") || | |
214 | die "Copy failed: $!\n"; | |
215 | } else { | |
216 | my $cstatus = { | |
217 | node1 => { power => 'off', network => 'off' }, | |
218 | node2 => { power => 'off', network => 'off' }, | |
219 | node3 => { power => 'off', network => 'off' }, | |
220 | }; | |
221 | $self->write_hardware_status_nolock($cstatus); | |
222 | } | |
787b66eb | 223 | |
0cfd8f5b | 224 | |
8b3f9144 | 225 | my $cstatus = $self->read_hardware_status_nolock(); |
0cfd8f5b DM |
226 | |
227 | foreach my $node (sort keys %$cstatus) { | |
0bba8f60 | 228 | $self->{nodes}->{$node} = {}; |
8456bde2 DM |
229 | |
230 | if (-f "$testdir/service_status_$node") { | |
231 | copy("$testdir/service_status_$node", "$statusdir/service_status_$node"); | |
232 | } else { | |
233 | $self->write_service_status($node, {}); | |
234 | } | |
0cfd8f5b DM |
235 | } |
236 | ||
95360669 DM |
237 | $self->{service_config} = $self->read_service_config(); |
238 | ||
0cfd8f5b DM |
239 | return $self; |
240 | } | |
241 | ||
242 | sub get_time { | |
243 | my ($self) = @_; | |
244 | ||
bf93e2a2 | 245 | die "implement in subclass"; |
0cfd8f5b DM |
246 | } |
247 | ||
248 | sub log { | |
fde8362a | 249 | my ($self, $level, $msg, $id) = @_; |
0cfd8f5b DM |
250 | |
251 | chomp $msg; | |
252 | ||
253 | my $time = $self->get_time(); | |
254 | ||
fde8362a DM |
255 | $id = 'hardware' if !$id; |
256 | ||
0bba8f60 | 257 | printf("%-5s %5d %12s: $msg\n", $level, $time, $id); |
0cfd8f5b DM |
258 | } |
259 | ||
260 | sub statusdir { | |
261 | my ($self, $node) = @_; | |
262 | ||
263 | return $self->{statusdir}; | |
264 | } | |
265 | ||
8b3f9144 | 266 | sub global_lock { |
0cfd8f5b DM |
267 | my ($self, $code, @param) = @_; |
268 | ||
8b3f9144 | 269 | my $lockfile = "$self->{statusdir}/hardware.lck"; |
0cfd8f5b DM |
270 | my $fh = IO::File->new(">>$lockfile") || |
271 | die "unable to open '$lockfile'\n"; | |
272 | ||
273 | my $success; | |
274 | for (;;) { | |
275 | $success = flock($fh, LOCK_EX); | |
276 | if ($success || ($! != EINTR)) { | |
277 | last; | |
278 | } | |
279 | if (!$success) { | |
9de9a6ce | 280 | close($fh); |
0cfd8f5b DM |
281 | die "can't aquire lock '$lockfile' - $!\n"; |
282 | } | |
283 | } | |
9de9a6ce | 284 | |
0cfd8f5b DM |
285 | my $res; |
286 | ||
9de9a6ce | 287 | eval { $res = &$code($fh, @param) }; |
0cfd8f5b | 288 | my $err = $@; |
9de9a6ce | 289 | |
0cfd8f5b DM |
290 | close($fh); |
291 | ||
292 | die $err if $err; | |
293 | ||
294 | return $res; | |
295 | } | |
296 | ||
8b3f9144 DM |
297 | my $compute_node_info = sub { |
298 | my ($self, $cstatus) = @_; | |
299 | ||
300 | my $node_info = {}; | |
301 | ||
302 | my $node_count = 0; | |
303 | my $online_count = 0; | |
304 | ||
305 | foreach my $node (keys %$cstatus) { | |
306 | my $d = $cstatus->{$node}; | |
307 | ||
308 | my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0; | |
309 | $node_info->{$node}->{online} = $online; | |
310 | ||
311 | $node_count++; | |
312 | $online_count++ if $online; | |
313 | } | |
314 | ||
315 | my $quorate = ($online_count > int($node_count/2)) ? 1 : 0; | |
316 | ||
317 | if (!$quorate) { | |
318 | foreach my $node (keys %$cstatus) { | |
319 | my $d = $cstatus->{$node}; | |
320 | $node_info->{$node}->{online} = 0; | |
321 | } | |
322 | } | |
323 | ||
324 | return ($node_info, $quorate); | |
325 | }; | |
326 | ||
327 | sub get_node_info { | |
328 | my ($self) = @_; | |
329 | ||
330 | my ($node_info, $quorate); | |
331 | ||
332 | my $code = sub { | |
333 | my $cstatus = $self->read_hardware_status_nolock(); | |
334 | ($node_info, $quorate) = &$compute_node_info($self, $cstatus); | |
335 | }; | |
336 | ||
337 | $self->global_lock($code); | |
338 | ||
339 | return ($node_info, $quorate); | |
340 | } | |
341 | ||
342 | # simulate hardware commands | |
0cfd8f5b DM |
343 | # power <node> <on|off> |
344 | # network <node> <on|off> | |
345 | ||
8b3f9144 | 346 | sub sim_hardware_cmd { |
fde8362a | 347 | my ($self, $cmdstr, $logid) = @_; |
0cfd8f5b | 348 | |
bf93e2a2 | 349 | die "implement in subclass"; |
0cfd8f5b DM |
350 | } |
351 | ||
352 | sub run { | |
353 | my ($self) = @_; | |
354 | ||
bf93e2a2 | 355 | die "implement in subclass"; |
0cfd8f5b | 356 | } |
9329c1e2 DM |
357 | |
358 | my $modify_watchog = sub { | |
359 | my ($self, $code) = @_; | |
360 | ||
361 | my $update_cmd = sub { | |
362 | ||
363 | my $filename = "$self->{statusdir}/watchdog_status"; | |
0cfd8f5b | 364 | |
9329c1e2 DM |
365 | my ($res, $wdstatus); |
366 | ||
367 | if (-f $filename) { | |
368 | my $raw = PVE::Tools::file_get_contents($filename); | |
369 | $wdstatus = decode_json($raw); | |
370 | } else { | |
371 | $wdstatus = {}; | |
372 | } | |
373 | ||
374 | ($wdstatus, $res) = &$code($wdstatus); | |
375 | ||
376 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
377 | ||
378 | return $res; | |
379 | }; | |
380 | ||
381 | return $self->global_lock($update_cmd); | |
382 | }; | |
383 | ||
384 | sub watchdog_check { | |
385 | my ($self, $node) = @_; | |
386 | ||
387 | my $code = sub { | |
388 | my ($wdstatus) = @_; | |
389 | ||
390 | my $res = 1; | |
391 | ||
392 | foreach my $wfh (keys %$wdstatus) { | |
393 | my $wd = $wdstatus->{$wfh}; | |
394 | next if $wd->{node} ne $node; | |
395 | ||
396 | my $ctime = $self->get_time(); | |
397 | my $tdiff = $ctime - $wd->{update_time}; | |
398 | ||
0bba8f60 | 399 | if ($tdiff > $watchdog_timeout) { # expired |
9329c1e2 DM |
400 | $res = 0; |
401 | delete $wdstatus->{$wfh}; | |
402 | } | |
403 | } | |
404 | ||
405 | return ($wdstatus, $res); | |
406 | }; | |
407 | ||
408 | return &$modify_watchog($self, $code); | |
409 | } | |
410 | ||
411 | my $wdcounter = 0; | |
412 | ||
413 | sub watchdog_open { | |
414 | my ($self, $node) = @_; | |
415 | ||
416 | my $code = sub { | |
417 | my ($wdstatus) = @_; | |
418 | ||
419 | ++$wdcounter; | |
420 | ||
421 | my $id = "WD:$node:$$:$wdcounter"; | |
422 | ||
423 | die "internal error" if defined($wdstatus->{$id}); | |
424 | ||
425 | $wdstatus->{$id} = { | |
426 | node => $node, | |
427 | update_time => $self->get_time(), | |
428 | }; | |
429 | ||
430 | return ($wdstatus, $id); | |
431 | }; | |
432 | ||
433 | return &$modify_watchog($self, $code); | |
434 | } | |
435 | ||
436 | sub watchdog_close { | |
437 | my ($self, $wfh) = @_; | |
438 | ||
439 | my $code = sub { | |
440 | my ($wdstatus) = @_; | |
441 | ||
442 | my $wd = $wdstatus->{$wfh}; | |
443 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
444 | ||
445 | my $tdiff = $self->get_time() - $wd->{update_time}; | |
0bba8f60 | 446 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
447 | |
448 | delete $wdstatus->{$wfh}; | |
449 | ||
450 | return ($wdstatus); | |
451 | }; | |
452 | ||
453 | return &$modify_watchog($self, $code); | |
454 | } | |
455 | ||
456 | sub watchdog_update { | |
457 | my ($self, $wfh) = @_; | |
458 | ||
459 | my $code = sub { | |
460 | my ($wdstatus) = @_; | |
461 | ||
462 | my $wd = $wdstatus->{$wfh}; | |
463 | ||
464 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
465 | ||
466 | my $ctime = $self->get_time(); | |
467 | my $tdiff = $ctime - $wd->{update_time}; | |
468 | ||
0bba8f60 | 469 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
470 | |
471 | $wd->{update_time} = $ctime; | |
472 | ||
473 | return ($wdstatus); | |
474 | }; | |
475 | ||
476 | return &$modify_watchog($self, $code); | |
477 | } | |
478 | ||
0cfd8f5b DM |
479 | |
480 | ||
481 | 1; |