]>
Commit | Line | Data |
---|---|---|
8b3f9144 DM |
1 | package PVE::HA::Sim::Hardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
0cfd8f5b DM |
8 | |
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
787b66eb DM |
16 | use File::Copy; |
17 | use File::Path qw(make_path remove_tree); | |
f7cb19c6 | 18 | use PVE::HA::Config 'testenv'; |
f5a14b93 | 19 | |
17b5cf98 | 20 | my $watchdog_timeout = 60; |
0bba8f60 | 21 | |
0cfd8f5b | 22 | |
787b66eb DM |
23 | # Status directory layout |
24 | # | |
25 | # configuration | |
26 | # | |
8456bde2 DM |
27 | # $testdir/cmdlist Command list for simulation |
28 | # $testdir/hardware_status Hardware description (number of nodes, ...) | |
29 | # $testdir/manager_status CRM status (start with {}) | |
30 | # $testdir/service_config Service configuration | |
abc920b4 | 31 | # $testdir/groups HA groups configuration |
8456bde2 | 32 | # $testdir/service_status_<node> Service status |
3c36cbca | 33 | |
9329c1e2 DM |
34 | # |
35 | # runtime status for simulation system | |
36 | # | |
37 | # $testdir/status/cluster_locks Cluster locks | |
38 | # $testdir/status/hardware_status Hardware status (power/network on/off) | |
39 | # $testdir/status/watchdog_status Watchdog status | |
787b66eb DM |
40 | # |
41 | # runtime status | |
9329c1e2 | 42 | # |
8456bde2 DM |
43 | # $testdir/status/lrm_status_<node> LRM status |
44 | # $testdir/status/manager_status CRM status | |
abc920b4 | 45 | # $testdir/status/crm_commands CRM command queue |
8456bde2 DM |
46 | # $testdir/status/service_config Service configuration |
47 | # $testdir/status/service_status_<node> Service status | |
abc920b4 | 48 | # $testdir/status/groups HA groups configuration |
c4a221bc DM |
49 | |
50 | sub read_lrm_status { | |
51 | my ($self, $node) = @_; | |
52 | ||
53 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
54 | ||
55 | return PVE::HA::Tools::read_json_from_file($filename, {}); | |
56 | } | |
57 | ||
58 | sub write_lrm_status { | |
59 | my ($self, $node, $status_obj) = @_; | |
60 | ||
61 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
62 | ||
63 | PVE::HA::Tools::write_json_to_file($filename, $status_obj); | |
64 | } | |
787b66eb | 65 | |
8b3f9144 | 66 | sub read_hardware_status_nolock { |
0cfd8f5b DM |
67 | my ($self) = @_; |
68 | ||
8b3f9144 | 69 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
70 | |
71 | my $raw = PVE::Tools::file_get_contents($filename); | |
72 | my $cstatus = decode_json($raw); | |
73 | ||
74 | return $cstatus; | |
75 | } | |
76 | ||
8b3f9144 | 77 | sub write_hardware_status_nolock { |
0cfd8f5b DM |
78 | my ($self, $cstatus) = @_; |
79 | ||
8b3f9144 | 80 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
81 | |
82 | PVE::Tools::file_set_contents($filename, encode_json($cstatus)); | |
83 | }; | |
84 | ||
95360669 DM |
85 | sub read_service_config { |
86 | my ($self) = @_; | |
87 | ||
88 | my $filename = "$self->{statusdir}/service_config"; | |
89 | my $conf = PVE::HA::Tools::read_json_from_file($filename); | |
90 | ||
91 | foreach my $sid (keys %$conf) { | |
92 | my $d = $conf->{$sid}; | |
8456bde2 DM |
93 | |
94 | die "service '$sid' without assigned node!" if !$d->{node}; | |
95 | ||
b026c8c9 DM |
96 | if ($sid =~ m/^(vm|ct):(\d+)$/) { |
97 | $d->{type} = $1; | |
98 | $d->{name} = $2; | |
95360669 DM |
99 | } else { |
100 | die "implement me"; | |
101 | } | |
102 | $d->{state} = 'disabled' if !$d->{state}; | |
103 | } | |
104 | ||
105 | return $conf; | |
106 | } | |
107 | ||
79e0e005 DM |
108 | sub write_service_config { |
109 | my ($self, $conf) = @_; | |
110 | ||
95360669 DM |
111 | $self->{service_config} = $conf; |
112 | ||
79e0e005 DM |
113 | my $filename = "$self->{statusdir}/service_config"; |
114 | return PVE::HA::Tools::write_json_to_file($filename, $conf); | |
115 | } | |
116 | ||
e5f43426 TL |
117 | sub set_service_state { |
118 | my ($self, $sid, $state) = @_; | |
119 | ||
120 | my $conf = $self->read_service_config(); | |
121 | die "no such service '$sid'" if !$conf->{$sid}; | |
122 | ||
123 | $conf->{$sid}->{state} = $state; | |
124 | ||
125 | $self->write_service_config($conf); | |
126 | ||
127 | return $conf; | |
128 | } | |
129 | ||
27ccc95c TL |
130 | sub add_service { |
131 | my ($self, $sid, $opts) = @_; | |
132 | ||
133 | my $conf = $self->read_service_config(); | |
134 | die "resource ID '$sid' already defined\n" if $conf->{$sid}; | |
135 | ||
136 | $conf->{$sid} = $opts; | |
137 | ||
138 | $self->write_service_config($conf); | |
139 | ||
140 | return $conf; | |
141 | } | |
142 | ||
143 | sub delete_service { | |
144 | my ($self, $sid) = @_; | |
145 | ||
146 | my $conf = $self->read_service_config(); | |
147 | ||
148 | die "no such service '$sid'" if !$conf->{$sid}; | |
149 | ||
150 | delete $conf->{$sid}; | |
151 | ||
152 | $self->write_service_config($conf); | |
153 | ||
154 | return $conf; | |
155 | } | |
156 | ||
8456bde2 | 157 | sub change_service_location { |
6da27e23 | 158 | my ($self, $sid, $current_node, $new_node) = @_; |
8456bde2 DM |
159 | |
160 | my $conf = $self->read_service_config(); | |
161 | ||
162 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
163 | ||
6da27e23 DM |
164 | die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n" |
165 | if $current_node ne $conf->{$sid}->{node}; | |
166 | ||
167 | $conf->{$sid}->{node} = $new_node; | |
8456bde2 DM |
168 | |
169 | $self->write_service_config($conf); | |
170 | } | |
171 | ||
b70aa69e | 172 | sub queue_crm_commands_nolock { |
3b996922 DM |
173 | my ($self, $cmd) = @_; |
174 | ||
175 | chomp $cmd; | |
176 | ||
b70aa69e DM |
177 | my $data = ''; |
178 | my $filename = "$self->{statusdir}/crm_commands"; | |
179 | if (-f $filename) { | |
180 | $data = PVE::Tools::file_get_contents($filename); | |
181 | } | |
182 | $data .= "$cmd\n"; | |
183 | PVE::Tools::file_set_contents($filename, $data); | |
184 | ||
185 | return undef; | |
186 | } | |
187 | ||
188 | sub queue_crm_commands { | |
189 | my ($self, $cmd) = @_; | |
190 | ||
191 | my $code = sub { $self->queue_crm_commands_nolock($cmd); }; | |
3b996922 DM |
192 | |
193 | $self->global_lock($code); | |
194 | ||
195 | return undef; | |
196 | } | |
197 | ||
198 | sub read_crm_commands { | |
199 | my ($self) = @_; | |
200 | ||
201 | my $code = sub { | |
202 | my $data = ''; | |
203 | ||
204 | my $filename = "$self->{statusdir}/crm_commands"; | |
205 | if (-f $filename) { | |
206 | $data = PVE::Tools::file_get_contents($filename); | |
207 | } | |
208 | PVE::Tools::file_set_contents($filename, ''); | |
209 | ||
210 | return $data; | |
211 | }; | |
212 | ||
213 | return $self->global_lock($code); | |
214 | } | |
215 | ||
abc920b4 DM |
216 | sub read_group_config { |
217 | my ($self) = @_; | |
218 | ||
219 | my $filename = "$self->{statusdir}/groups"; | |
220 | my $raw = ''; | |
221 | $raw = PVE::Tools::file_get_contents($filename) if -f $filename; | |
222 | ||
cc32b737 | 223 | return PVE::HA::Config::parse_groups_config($filename, $raw); |
abc920b4 DM |
224 | } |
225 | ||
c4a221bc | 226 | sub read_service_status { |
8456bde2 | 227 | my ($self, $node) = @_; |
c4a221bc | 228 | |
8456bde2 | 229 | my $filename = "$self->{statusdir}/service_status_$node"; |
c4a221bc DM |
230 | return PVE::HA::Tools::read_json_from_file($filename); |
231 | } | |
232 | ||
233 | sub write_service_status { | |
8456bde2 DM |
234 | my ($self, $node, $data) = @_; |
235 | ||
236 | my $filename = "$self->{statusdir}/service_status_$node"; | |
237 | my $res = PVE::HA::Tools::write_json_to_file($filename, $data); | |
238 | ||
239 | # fixme: add test if a service runs on two nodes!!! | |
c4a221bc | 240 | |
8456bde2 | 241 | return $res; |
c4a221bc DM |
242 | } |
243 | ||
abc920b4 DM |
244 | my $default_group_config = <<__EOD; |
245 | group: prefer_node1 | |
246 | nodes node1 | |
e941bdc5 | 247 | nofailback 1 |
abc920b4 DM |
248 | |
249 | group: prefer_node2 | |
250 | nodes node2 | |
e941bdc5 | 251 | nofailback 1 |
abc920b4 DM |
252 | |
253 | group: prefer_node3 | |
7a294ad4 | 254 | nodes node3 |
e941bdc5 | 255 | nofailback 1 |
abc920b4 DM |
256 | __EOD |
257 | ||
0cfd8f5b DM |
258 | sub new { |
259 | my ($this, $testdir) = @_; | |
260 | ||
261 | die "missing testdir" if !$testdir; | |
262 | ||
263 | my $class = ref($this) || $this; | |
264 | ||
265 | my $self = bless {}, $class; | |
266 | ||
787b66eb DM |
267 | my $statusdir = $self->{statusdir} = "$testdir/status"; |
268 | ||
269 | remove_tree($statusdir); | |
270 | mkdir $statusdir; | |
0cfd8f5b | 271 | |
787b66eb DM |
272 | # copy initial configuartion |
273 | copy("$testdir/manager_status", "$statusdir/manager_status"); # optional | |
79e0e005 | 274 | |
abc920b4 DM |
275 | if (-f "$testdir/groups") { |
276 | copy("$testdir/groups", "$statusdir/groups"); | |
277 | } else { | |
278 | PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config); | |
279 | } | |
280 | ||
79e0e005 DM |
281 | if (-f "$testdir/service_config") { |
282 | copy("$testdir/service_config", "$statusdir/service_config"); | |
283 | } else { | |
284 | my $conf = { | |
eda9314d DM |
285 | 'vm:101' => { node => 'node1', group => 'prefer_node1' }, |
286 | 'vm:102' => { node => 'node2', group => 'prefer_node2' }, | |
287 | 'vm:103' => { node => 'node3', group => 'prefer_node3' }, | |
288 | 'vm:104' => { node => 'node1', group => 'prefer_node1' }, | |
289 | 'vm:105' => { node => 'node2', group => 'prefer_node2' }, | |
290 | 'vm:106' => { node => 'node3', group => 'prefer_node3' }, | |
79e0e005 DM |
291 | }; |
292 | $self->write_service_config($conf); | |
293 | } | |
787b66eb | 294 | |
853f5867 DM |
295 | if (-f "$testdir/hardware_status") { |
296 | copy("$testdir/hardware_status", "$statusdir/hardware_status") || | |
297 | die "Copy failed: $!\n"; | |
298 | } else { | |
299 | my $cstatus = { | |
300 | node1 => { power => 'off', network => 'off' }, | |
301 | node2 => { power => 'off', network => 'off' }, | |
302 | node3 => { power => 'off', network => 'off' }, | |
303 | }; | |
304 | $self->write_hardware_status_nolock($cstatus); | |
305 | } | |
787b66eb | 306 | |
0cfd8f5b | 307 | |
8b3f9144 | 308 | my $cstatus = $self->read_hardware_status_nolock(); |
0cfd8f5b DM |
309 | |
310 | foreach my $node (sort keys %$cstatus) { | |
0bba8f60 | 311 | $self->{nodes}->{$node} = {}; |
8456bde2 DM |
312 | |
313 | if (-f "$testdir/service_status_$node") { | |
314 | copy("$testdir/service_status_$node", "$statusdir/service_status_$node"); | |
315 | } else { | |
316 | $self->write_service_status($node, {}); | |
317 | } | |
0cfd8f5b DM |
318 | } |
319 | ||
95360669 DM |
320 | $self->{service_config} = $self->read_service_config(); |
321 | ||
0cfd8f5b DM |
322 | return $self; |
323 | } | |
324 | ||
325 | sub get_time { | |
326 | my ($self) = @_; | |
327 | ||
bf93e2a2 | 328 | die "implement in subclass"; |
0cfd8f5b DM |
329 | } |
330 | ||
331 | sub log { | |
fde8362a | 332 | my ($self, $level, $msg, $id) = @_; |
0cfd8f5b DM |
333 | |
334 | chomp $msg; | |
335 | ||
336 | my $time = $self->get_time(); | |
337 | ||
fde8362a DM |
338 | $id = 'hardware' if !$id; |
339 | ||
0bba8f60 | 340 | printf("%-5s %5d %12s: $msg\n", $level, $time, $id); |
0cfd8f5b DM |
341 | } |
342 | ||
343 | sub statusdir { | |
344 | my ($self, $node) = @_; | |
345 | ||
346 | return $self->{statusdir}; | |
347 | } | |
348 | ||
8b3f9144 | 349 | sub global_lock { |
0cfd8f5b DM |
350 | my ($self, $code, @param) = @_; |
351 | ||
8b3f9144 | 352 | my $lockfile = "$self->{statusdir}/hardware.lck"; |
0cfd8f5b DM |
353 | my $fh = IO::File->new(">>$lockfile") || |
354 | die "unable to open '$lockfile'\n"; | |
355 | ||
356 | my $success; | |
357 | for (;;) { | |
358 | $success = flock($fh, LOCK_EX); | |
359 | if ($success || ($! != EINTR)) { | |
360 | last; | |
361 | } | |
362 | if (!$success) { | |
9de9a6ce | 363 | close($fh); |
63f6a08c | 364 | die "can't acquire lock '$lockfile' - $!\n"; |
0cfd8f5b DM |
365 | } |
366 | } | |
9de9a6ce | 367 | |
0cfd8f5b DM |
368 | my $res; |
369 | ||
9de9a6ce | 370 | eval { $res = &$code($fh, @param) }; |
0cfd8f5b | 371 | my $err = $@; |
9de9a6ce | 372 | |
0cfd8f5b DM |
373 | close($fh); |
374 | ||
375 | die $err if $err; | |
376 | ||
377 | return $res; | |
378 | } | |
379 | ||
8b3f9144 DM |
380 | my $compute_node_info = sub { |
381 | my ($self, $cstatus) = @_; | |
382 | ||
383 | my $node_info = {}; | |
384 | ||
385 | my $node_count = 0; | |
386 | my $online_count = 0; | |
387 | ||
388 | foreach my $node (keys %$cstatus) { | |
389 | my $d = $cstatus->{$node}; | |
390 | ||
391 | my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0; | |
392 | $node_info->{$node}->{online} = $online; | |
393 | ||
394 | $node_count++; | |
395 | $online_count++ if $online; | |
396 | } | |
397 | ||
398 | my $quorate = ($online_count > int($node_count/2)) ? 1 : 0; | |
399 | ||
400 | if (!$quorate) { | |
401 | foreach my $node (keys %$cstatus) { | |
402 | my $d = $cstatus->{$node}; | |
403 | $node_info->{$node}->{online} = 0; | |
404 | } | |
405 | } | |
406 | ||
407 | return ($node_info, $quorate); | |
408 | }; | |
409 | ||
410 | sub get_node_info { | |
411 | my ($self) = @_; | |
412 | ||
5516f102 TL |
413 | my $cstatus = $self->read_hardware_status_nolock(); |
414 | my ($node_info, $quorate) = &$compute_node_info($self, $cstatus); | |
8b3f9144 DM |
415 | |
416 | return ($node_info, $quorate); | |
417 | } | |
418 | ||
419 | # simulate hardware commands | |
0cfd8f5b DM |
420 | # power <node> <on|off> |
421 | # network <node> <on|off> | |
422 | ||
8b3f9144 | 423 | sub sim_hardware_cmd { |
fde8362a | 424 | my ($self, $cmdstr, $logid) = @_; |
0cfd8f5b | 425 | |
bf93e2a2 | 426 | die "implement in subclass"; |
0cfd8f5b DM |
427 | } |
428 | ||
429 | sub run { | |
430 | my ($self) = @_; | |
431 | ||
bf93e2a2 | 432 | die "implement in subclass"; |
0cfd8f5b | 433 | } |
9329c1e2 DM |
434 | |
435 | my $modify_watchog = sub { | |
436 | my ($self, $code) = @_; | |
437 | ||
438 | my $update_cmd = sub { | |
439 | ||
440 | my $filename = "$self->{statusdir}/watchdog_status"; | |
0cfd8f5b | 441 | |
9329c1e2 DM |
442 | my ($res, $wdstatus); |
443 | ||
444 | if (-f $filename) { | |
445 | my $raw = PVE::Tools::file_get_contents($filename); | |
446 | $wdstatus = decode_json($raw); | |
447 | } else { | |
448 | $wdstatus = {}; | |
449 | } | |
450 | ||
451 | ($wdstatus, $res) = &$code($wdstatus); | |
452 | ||
453 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
454 | ||
455 | return $res; | |
456 | }; | |
457 | ||
458 | return $self->global_lock($update_cmd); | |
459 | }; | |
460 | ||
0590c6a7 DM |
461 | sub watchdog_reset_nolock { |
462 | my ($self, $node) = @_; | |
463 | ||
464 | my $filename = "$self->{statusdir}/watchdog_status"; | |
465 | ||
466 | if (-f $filename) { | |
467 | my $raw = PVE::Tools::file_get_contents($filename); | |
468 | my $wdstatus = decode_json($raw); | |
469 | ||
470 | foreach my $id (keys %$wdstatus) { | |
471 | delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node; | |
472 | } | |
473 | ||
474 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
475 | } | |
476 | } | |
477 | ||
9329c1e2 DM |
478 | sub watchdog_check { |
479 | my ($self, $node) = @_; | |
480 | ||
481 | my $code = sub { | |
482 | my ($wdstatus) = @_; | |
483 | ||
484 | my $res = 1; | |
485 | ||
486 | foreach my $wfh (keys %$wdstatus) { | |
487 | my $wd = $wdstatus->{$wfh}; | |
488 | next if $wd->{node} ne $node; | |
489 | ||
490 | my $ctime = $self->get_time(); | |
491 | my $tdiff = $ctime - $wd->{update_time}; | |
492 | ||
0bba8f60 | 493 | if ($tdiff > $watchdog_timeout) { # expired |
9329c1e2 DM |
494 | $res = 0; |
495 | delete $wdstatus->{$wfh}; | |
496 | } | |
497 | } | |
498 | ||
499 | return ($wdstatus, $res); | |
500 | }; | |
501 | ||
502 | return &$modify_watchog($self, $code); | |
503 | } | |
504 | ||
505 | my $wdcounter = 0; | |
506 | ||
507 | sub watchdog_open { | |
508 | my ($self, $node) = @_; | |
509 | ||
510 | my $code = sub { | |
511 | my ($wdstatus) = @_; | |
512 | ||
513 | ++$wdcounter; | |
514 | ||
515 | my $id = "WD:$node:$$:$wdcounter"; | |
516 | ||
517 | die "internal error" if defined($wdstatus->{$id}); | |
518 | ||
519 | $wdstatus->{$id} = { | |
520 | node => $node, | |
521 | update_time => $self->get_time(), | |
522 | }; | |
523 | ||
524 | return ($wdstatus, $id); | |
525 | }; | |
526 | ||
527 | return &$modify_watchog($self, $code); | |
528 | } | |
529 | ||
530 | sub watchdog_close { | |
531 | my ($self, $wfh) = @_; | |
532 | ||
533 | my $code = sub { | |
534 | my ($wdstatus) = @_; | |
535 | ||
536 | my $wd = $wdstatus->{$wfh}; | |
537 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
538 | ||
539 | my $tdiff = $self->get_time() - $wd->{update_time}; | |
0bba8f60 | 540 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
541 | |
542 | delete $wdstatus->{$wfh}; | |
543 | ||
544 | return ($wdstatus); | |
545 | }; | |
546 | ||
547 | return &$modify_watchog($self, $code); | |
548 | } | |
549 | ||
550 | sub watchdog_update { | |
551 | my ($self, $wfh) = @_; | |
552 | ||
553 | my $code = sub { | |
554 | my ($wdstatus) = @_; | |
555 | ||
556 | my $wd = $wdstatus->{$wfh}; | |
557 | ||
558 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
559 | ||
560 | my $ctime = $self->get_time(); | |
561 | my $tdiff = $ctime - $wd->{update_time}; | |
562 | ||
0bba8f60 | 563 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
564 | |
565 | $wd->{update_time} = $ctime; | |
566 | ||
567 | return ($wdstatus); | |
568 | }; | |
569 | ||
570 | return &$modify_watchog($self, $code); | |
571 | } | |
572 | ||
0cfd8f5b | 573 | 1; |