]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Hardware.pm
Sim/Hardware: move set_service_state to base class
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
1 package PVE::HA::Sim::Hardware;
2
3 # Simulate Hardware resources
4
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
8
9 use strict;
10 use warnings;
11 use POSIX qw(strftime EINTR);
12 use Data::Dumper;
13 use JSON;
14 use IO::File;
15 use Fcntl qw(:DEFAULT :flock);
16 use File::Copy;
17 use File::Path qw(make_path remove_tree);
18 use PVE::HA::Config;
19
20 my $watchdog_timeout = 60;
21
22
23 # Status directory layout
24 #
25 # configuration
26 #
27 # $testdir/cmdlist Command list for simulation
28 # $testdir/hardware_status Hardware description (number of nodes, ...)
29 # $testdir/manager_status CRM status (start with {})
30 # $testdir/service_config Service configuration
31 # $testdir/groups HA groups configuration
32 # $testdir/service_status_<node> Service status
33
34 #
35 # runtime status for simulation system
36 #
37 # $testdir/status/cluster_locks Cluster locks
38 # $testdir/status/hardware_status Hardware status (power/network on/off)
39 # $testdir/status/watchdog_status Watchdog status
40 #
41 # runtime status
42 #
43 # $testdir/status/lrm_status_<node> LRM status
44 # $testdir/status/manager_status CRM status
45 # $testdir/status/crm_commands CRM command queue
46 # $testdir/status/service_config Service configuration
47 # $testdir/status/service_status_<node> Service status
48 # $testdir/status/groups HA groups configuration
49
50 sub read_lrm_status {
51 my ($self, $node) = @_;
52
53 my $filename = "$self->{statusdir}/lrm_status_$node";
54
55 return PVE::HA::Tools::read_json_from_file($filename, {});
56 }
57
58 sub write_lrm_status {
59 my ($self, $node, $status_obj) = @_;
60
61 my $filename = "$self->{statusdir}/lrm_status_$node";
62
63 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
64 }
65
66 sub read_hardware_status_nolock {
67 my ($self) = @_;
68
69 my $filename = "$self->{statusdir}/hardware_status";
70
71 my $raw = PVE::Tools::file_get_contents($filename);
72 my $cstatus = decode_json($raw);
73
74 return $cstatus;
75 }
76
77 sub write_hardware_status_nolock {
78 my ($self, $cstatus) = @_;
79
80 my $filename = "$self->{statusdir}/hardware_status";
81
82 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
83 };
84
85 sub read_service_config {
86 my ($self) = @_;
87
88 my $filename = "$self->{statusdir}/service_config";
89 my $conf = PVE::HA::Tools::read_json_from_file($filename);
90
91 foreach my $sid (keys %$conf) {
92 my $d = $conf->{$sid};
93
94 die "service '$sid' without assigned node!" if !$d->{node};
95
96 if ($sid =~ m/^vm:(\d+)$/) {
97 $d->{type} = 'vm';
98 $d->{name} = $1;
99 } else {
100 die "implement me";
101 }
102 $d->{state} = 'disabled' if !$d->{state};
103 }
104
105 return $conf;
106 }
107
108 sub write_service_config {
109 my ($self, $conf) = @_;
110
111 $self->{service_config} = $conf;
112
113 my $filename = "$self->{statusdir}/service_config";
114 return PVE::HA::Tools::write_json_to_file($filename, $conf);
115 }
116
117 sub set_service_state {
118 my ($self, $sid, $state) = @_;
119
120 my $conf = $self->read_service_config();
121 die "no such service '$sid'" if !$conf->{$sid};
122
123 $conf->{$sid}->{state} = $state;
124
125 $self->write_service_config($conf);
126
127 return $conf;
128 }
129
130 sub change_service_location {
131 my ($self, $sid, $current_node, $new_node) = @_;
132
133 my $conf = $self->read_service_config();
134
135 die "no such service '$sid'\n" if !$conf->{$sid};
136
137 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
138 if $current_node ne $conf->{$sid}->{node};
139
140 $conf->{$sid}->{node} = $new_node;
141
142 $self->write_service_config($conf);
143 }
144
145 sub queue_crm_commands {
146 my ($self, $cmd) = @_;
147
148 chomp $cmd;
149
150 my $code = sub {
151 my $data = '';
152 my $filename = "$self->{statusdir}/crm_commands";
153 if (-f $filename) {
154 $data = PVE::Tools::file_get_contents($filename);
155 }
156 $data .= "$cmd\n";
157 PVE::Tools::file_set_contents($filename, $data);
158 };
159
160 $self->global_lock($code);
161
162 return undef;
163 }
164
165 sub read_crm_commands {
166 my ($self) = @_;
167
168 my $code = sub {
169 my $data = '';
170
171 my $filename = "$self->{statusdir}/crm_commands";
172 if (-f $filename) {
173 $data = PVE::Tools::file_get_contents($filename);
174 }
175 PVE::Tools::file_set_contents($filename, '');
176
177 return $data;
178 };
179
180 return $self->global_lock($code);
181 }
182
183 sub read_group_config {
184 my ($self) = @_;
185
186 my $filename = "$self->{statusdir}/groups";
187 my $raw = '';
188 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
189
190 return PVE::HA::Config::parse_groups_config($filename, $raw);
191 }
192
193 sub read_service_status {
194 my ($self, $node) = @_;
195
196 my $filename = "$self->{statusdir}/service_status_$node";
197 return PVE::HA::Tools::read_json_from_file($filename);
198 }
199
200 sub write_service_status {
201 my ($self, $node, $data) = @_;
202
203 my $filename = "$self->{statusdir}/service_status_$node";
204 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
205
206 # fixme: add test if a service runs on two nodes!!!
207
208 return $res;
209 }
210
211 my $default_group_config = <<__EOD;
212 group: prefer_node1
213 nodes node1
214 nofailback 1
215
216 group: prefer_node2
217 nodes node2
218 nofailback 1
219
220 group: prefer_node3
221 nodes node3
222 nofailback 1
223 __EOD
224
225 sub new {
226 my ($this, $testdir) = @_;
227
228 die "missing testdir" if !$testdir;
229
230 my $class = ref($this) || $this;
231
232 my $self = bless {}, $class;
233
234 my $statusdir = $self->{statusdir} = "$testdir/status";
235
236 remove_tree($statusdir);
237 mkdir $statusdir;
238
239 # copy initial configuartion
240 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
241
242 if (-f "$testdir/groups") {
243 copy("$testdir/groups", "$statusdir/groups");
244 } else {
245 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
246 }
247
248 if (-f "$testdir/service_config") {
249 copy("$testdir/service_config", "$statusdir/service_config");
250 } else {
251 my $conf = {
252 'vm:101' => { node => 'node1', group => 'prefer_node1' },
253 'vm:102' => { node => 'node2', group => 'prefer_node2' },
254 'vm:103' => { node => 'node3', group => 'prefer_node3' },
255 'vm:104' => { node => 'node1', group => 'prefer_node1' },
256 'vm:105' => { node => 'node2', group => 'prefer_node2' },
257 'vm:106' => { node => 'node3', group => 'prefer_node3' },
258 };
259 $self->write_service_config($conf);
260 }
261
262 if (-f "$testdir/hardware_status") {
263 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
264 die "Copy failed: $!\n";
265 } else {
266 my $cstatus = {
267 node1 => { power => 'off', network => 'off' },
268 node2 => { power => 'off', network => 'off' },
269 node3 => { power => 'off', network => 'off' },
270 };
271 $self->write_hardware_status_nolock($cstatus);
272 }
273
274
275 my $cstatus = $self->read_hardware_status_nolock();
276
277 foreach my $node (sort keys %$cstatus) {
278 $self->{nodes}->{$node} = {};
279
280 if (-f "$testdir/service_status_$node") {
281 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
282 } else {
283 $self->write_service_status($node, {});
284 }
285 }
286
287 $self->{service_config} = $self->read_service_config();
288
289 return $self;
290 }
291
292 sub get_time {
293 my ($self) = @_;
294
295 die "implement in subclass";
296 }
297
298 sub log {
299 my ($self, $level, $msg, $id) = @_;
300
301 chomp $msg;
302
303 my $time = $self->get_time();
304
305 $id = 'hardware' if !$id;
306
307 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
308 }
309
310 sub statusdir {
311 my ($self, $node) = @_;
312
313 return $self->{statusdir};
314 }
315
316 sub global_lock {
317 my ($self, $code, @param) = @_;
318
319 my $lockfile = "$self->{statusdir}/hardware.lck";
320 my $fh = IO::File->new(">>$lockfile") ||
321 die "unable to open '$lockfile'\n";
322
323 my $success;
324 for (;;) {
325 $success = flock($fh, LOCK_EX);
326 if ($success || ($! != EINTR)) {
327 last;
328 }
329 if (!$success) {
330 close($fh);
331 die "can't acquire lock '$lockfile' - $!\n";
332 }
333 }
334
335 my $res;
336
337 eval { $res = &$code($fh, @param) };
338 my $err = $@;
339
340 close($fh);
341
342 die $err if $err;
343
344 return $res;
345 }
346
347 my $compute_node_info = sub {
348 my ($self, $cstatus) = @_;
349
350 my $node_info = {};
351
352 my $node_count = 0;
353 my $online_count = 0;
354
355 foreach my $node (keys %$cstatus) {
356 my $d = $cstatus->{$node};
357
358 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
359 $node_info->{$node}->{online} = $online;
360
361 $node_count++;
362 $online_count++ if $online;
363 }
364
365 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
366
367 if (!$quorate) {
368 foreach my $node (keys %$cstatus) {
369 my $d = $cstatus->{$node};
370 $node_info->{$node}->{online} = 0;
371 }
372 }
373
374 return ($node_info, $quorate);
375 };
376
377 sub get_node_info {
378 my ($self) = @_;
379
380 my $cstatus = $self->read_hardware_status_nolock();
381 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
382
383 return ($node_info, $quorate);
384 }
385
386 # simulate hardware commands
387 # power <node> <on|off>
388 # network <node> <on|off>
389
390 sub sim_hardware_cmd {
391 my ($self, $cmdstr, $logid) = @_;
392
393 die "implement in subclass";
394 }
395
396 sub run {
397 my ($self) = @_;
398
399 die "implement in subclass";
400 }
401
402 my $modify_watchog = sub {
403 my ($self, $code) = @_;
404
405 my $update_cmd = sub {
406
407 my $filename = "$self->{statusdir}/watchdog_status";
408
409 my ($res, $wdstatus);
410
411 if (-f $filename) {
412 my $raw = PVE::Tools::file_get_contents($filename);
413 $wdstatus = decode_json($raw);
414 } else {
415 $wdstatus = {};
416 }
417
418 ($wdstatus, $res) = &$code($wdstatus);
419
420 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
421
422 return $res;
423 };
424
425 return $self->global_lock($update_cmd);
426 };
427
428 sub watchdog_reset_nolock {
429 my ($self, $node) = @_;
430
431 my $filename = "$self->{statusdir}/watchdog_status";
432
433 if (-f $filename) {
434 my $raw = PVE::Tools::file_get_contents($filename);
435 my $wdstatus = decode_json($raw);
436
437 foreach my $id (keys %$wdstatus) {
438 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
439 }
440
441 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
442 }
443 }
444
445 sub watchdog_check {
446 my ($self, $node) = @_;
447
448 my $code = sub {
449 my ($wdstatus) = @_;
450
451 my $res = 1;
452
453 foreach my $wfh (keys %$wdstatus) {
454 my $wd = $wdstatus->{$wfh};
455 next if $wd->{node} ne $node;
456
457 my $ctime = $self->get_time();
458 my $tdiff = $ctime - $wd->{update_time};
459
460 if ($tdiff > $watchdog_timeout) { # expired
461 $res = 0;
462 delete $wdstatus->{$wfh};
463 }
464 }
465
466 return ($wdstatus, $res);
467 };
468
469 return &$modify_watchog($self, $code);
470 }
471
472 my $wdcounter = 0;
473
474 sub watchdog_open {
475 my ($self, $node) = @_;
476
477 my $code = sub {
478 my ($wdstatus) = @_;
479
480 ++$wdcounter;
481
482 my $id = "WD:$node:$$:$wdcounter";
483
484 die "internal error" if defined($wdstatus->{$id});
485
486 $wdstatus->{$id} = {
487 node => $node,
488 update_time => $self->get_time(),
489 };
490
491 return ($wdstatus, $id);
492 };
493
494 return &$modify_watchog($self, $code);
495 }
496
497 sub watchdog_close {
498 my ($self, $wfh) = @_;
499
500 my $code = sub {
501 my ($wdstatus) = @_;
502
503 my $wd = $wdstatus->{$wfh};
504 die "no such watchdog handle '$wfh'\n" if !defined($wd);
505
506 my $tdiff = $self->get_time() - $wd->{update_time};
507 die "watchdog expired" if $tdiff > $watchdog_timeout;
508
509 delete $wdstatus->{$wfh};
510
511 return ($wdstatus);
512 };
513
514 return &$modify_watchog($self, $code);
515 }
516
517 sub watchdog_update {
518 my ($self, $wfh) = @_;
519
520 my $code = sub {
521 my ($wdstatus) = @_;
522
523 my $wd = $wdstatus->{$wfh};
524
525 die "no such watchdog handle '$wfh'\n" if !defined($wd);
526
527 my $ctime = $self->get_time();
528 my $tdiff = $ctime - $wd->{update_time};
529
530 die "watchdog expired" if $tdiff > $watchdog_timeout;
531
532 $wd->{update_time} = $ctime;
533
534 return ($wdstatus);
535 };
536
537 return &$modify_watchog($self, $code);
538 }
539
540 1;