]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Hardware.pm
implement change_service_location
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
1 package PVE::HA::Sim::Hardware;
2
3 # Simulate Hardware resources
4
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
8
9 use strict;
10 use warnings;
11 use POSIX qw(strftime EINTR);
12 use Data::Dumper;
13 use JSON;
14 use IO::File;
15 use Fcntl qw(:DEFAULT :flock);
16 use File::Copy;
17 use File::Path qw(make_path remove_tree);
18 use PVE::HA::Config;
19
20 my $watchdog_timeout = 60;
21
22
23 # Status directory layout
24 #
25 # configuration
26 #
27 # $testdir/cmdlist Command list for simulation
28 # $testdir/hardware_status Hardware description (number of nodes, ...)
29 # $testdir/manager_status CRM status (start with {})
30 # $testdir/service_config Service configuration
31 # $testdir/groups HA groups configuration
32 # $testdir/service_status_<node> Service status
33
34 #
35 # runtime status for simulation system
36 #
37 # $testdir/status/cluster_locks Cluster locks
38 # $testdir/status/hardware_status Hardware status (power/network on/off)
39 # $testdir/status/watchdog_status Watchdog status
40 #
41 # runtime status
42 #
43 # $testdir/status/lrm_status_<node> LRM status
44 # $testdir/status/manager_status CRM status
45 # $testdir/status/crm_commands CRM command queue
46 # $testdir/status/service_config Service configuration
47 # $testdir/status/service_status_<node> Service status
48 # $testdir/status/groups HA groups configuration
49
50 sub read_lrm_status {
51 my ($self, $node) = @_;
52
53 my $filename = "$self->{statusdir}/lrm_status_$node";
54
55 return PVE::HA::Tools::read_json_from_file($filename, {});
56 }
57
58 sub write_lrm_status {
59 my ($self, $node, $status_obj) = @_;
60
61 my $filename = "$self->{statusdir}/lrm_status_$node";
62
63 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
64 }
65
66 sub read_hardware_status_nolock {
67 my ($self) = @_;
68
69 my $filename = "$self->{statusdir}/hardware_status";
70
71 my $raw = PVE::Tools::file_get_contents($filename);
72 my $cstatus = decode_json($raw);
73
74 return $cstatus;
75 }
76
77 sub write_hardware_status_nolock {
78 my ($self, $cstatus) = @_;
79
80 my $filename = "$self->{statusdir}/hardware_status";
81
82 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
83 };
84
85 sub read_service_config {
86 my ($self) = @_;
87
88 my $filename = "$self->{statusdir}/service_config";
89 my $conf = PVE::HA::Tools::read_json_from_file($filename);
90
91 foreach my $sid (keys %$conf) {
92 my $d = $conf->{$sid};
93
94 die "service '$sid' without assigned node!" if !$d->{node};
95
96 if ($sid =~ m/^pvevm:(\d+)$/) {
97 $d->{type} = 'pvevm';
98 $d->{name} = $1;
99 } else {
100 die "implement me";
101 }
102 $d->{state} = 'disabled' if !$d->{state};
103 }
104
105 return $conf;
106 }
107
108 sub write_service_config {
109 my ($self, $conf) = @_;
110
111 $self->{service_config} = $conf;
112
113 my $filename = "$self->{statusdir}/service_config";
114 return PVE::HA::Tools::write_json_to_file($filename, $conf);
115 }
116
117 sub change_service_location {
118 my ($self, $sid, $current_node, $new_node) = @_;
119
120 my $conf = $self->read_service_config();
121
122 die "no such service '$sid'\n" if !$conf->{$sid};
123
124 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
125 if $current_node ne $conf->{$sid}->{node};
126
127 $conf->{$sid}->{node} = $new_node;
128
129 $self->write_service_config($conf);
130 }
131
132 sub queue_crm_commands {
133 my ($self, $cmd) = @_;
134
135 chomp $cmd;
136
137 my $code = sub {
138 my $data = '';
139 my $filename = "$self->{statusdir}/crm_commands";
140 if (-f $filename) {
141 $data = PVE::Tools::file_get_contents($filename);
142 }
143 $data .= "$cmd\n";
144 PVE::Tools::file_set_contents($filename, $data);
145 };
146
147 $self->global_lock($code);
148
149 return undef;
150 }
151
152 sub read_crm_commands {
153 my ($self) = @_;
154
155 my $code = sub {
156 my $data = '';
157
158 my $filename = "$self->{statusdir}/crm_commands";
159 if (-f $filename) {
160 $data = PVE::Tools::file_get_contents($filename);
161 }
162 PVE::Tools::file_set_contents($filename, '');
163
164 return $data;
165 };
166
167 return $self->global_lock($code);
168 }
169
170 sub read_group_config {
171 my ($self) = @_;
172
173 my $filename = "$self->{statusdir}/groups";
174 my $raw = '';
175 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
176
177 return PVE::HA::Config::parse_groups_config($filename, $raw);
178 }
179
180 sub read_service_status {
181 my ($self, $node) = @_;
182
183 my $filename = "$self->{statusdir}/service_status_$node";
184 return PVE::HA::Tools::read_json_from_file($filename);
185 }
186
187 sub write_service_status {
188 my ($self, $node, $data) = @_;
189
190 my $filename = "$self->{statusdir}/service_status_$node";
191 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
192
193 # fixme: add test if a service runs on two nodes!!!
194
195 return $res;
196 }
197
198 my $default_group_config = <<__EOD;
199 group: prefer_node1
200 nodes node1
201 nofailback 1
202
203 group: prefer_node2
204 nodes node2
205 nofailback 1
206
207 group: prefer_node3
208 nodes node3
209 nofailback 1
210 __EOD
211
212 sub new {
213 my ($this, $testdir) = @_;
214
215 die "missing testdir" if !$testdir;
216
217 my $class = ref($this) || $this;
218
219 my $self = bless {}, $class;
220
221 my $statusdir = $self->{statusdir} = "$testdir/status";
222
223 remove_tree($statusdir);
224 mkdir $statusdir;
225
226 # copy initial configuartion
227 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
228
229 if (-f "$testdir/groups") {
230 copy("$testdir/groups", "$statusdir/groups");
231 } else {
232 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
233 }
234
235 if (-f "$testdir/service_config") {
236 copy("$testdir/service_config", "$statusdir/service_config");
237 } else {
238 my $conf = {
239 'pvevm:101' => { node => 'node1', group => 'prefer_node1' },
240 'pvevm:102' => { node => 'node2', group => 'prefer_node2' },
241 'pvevm:103' => { node => 'node3', group => 'prefer_node3' },
242 'pvevm:104' => { node => 'node1', group => 'prefer_node1' },
243 'pvevm:105' => { node => 'node2', group => 'prefer_node2' },
244 'pvevm:106' => { node => 'node3', group => 'prefer_node3' },
245 };
246 $self->write_service_config($conf);
247 }
248
249 if (-f "$testdir/hardware_status") {
250 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
251 die "Copy failed: $!\n";
252 } else {
253 my $cstatus = {
254 node1 => { power => 'off', network => 'off' },
255 node2 => { power => 'off', network => 'off' },
256 node3 => { power => 'off', network => 'off' },
257 };
258 $self->write_hardware_status_nolock($cstatus);
259 }
260
261
262 my $cstatus = $self->read_hardware_status_nolock();
263
264 foreach my $node (sort keys %$cstatus) {
265 $self->{nodes}->{$node} = {};
266
267 if (-f "$testdir/service_status_$node") {
268 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
269 } else {
270 $self->write_service_status($node, {});
271 }
272 }
273
274 $self->{service_config} = $self->read_service_config();
275
276 return $self;
277 }
278
279 sub get_time {
280 my ($self) = @_;
281
282 die "implement in subclass";
283 }
284
285 sub log {
286 my ($self, $level, $msg, $id) = @_;
287
288 chomp $msg;
289
290 my $time = $self->get_time();
291
292 $id = 'hardware' if !$id;
293
294 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
295 }
296
297 sub statusdir {
298 my ($self, $node) = @_;
299
300 return $self->{statusdir};
301 }
302
303 sub global_lock {
304 my ($self, $code, @param) = @_;
305
306 my $lockfile = "$self->{statusdir}/hardware.lck";
307 my $fh = IO::File->new(">>$lockfile") ||
308 die "unable to open '$lockfile'\n";
309
310 my $success;
311 for (;;) {
312 $success = flock($fh, LOCK_EX);
313 if ($success || ($! != EINTR)) {
314 last;
315 }
316 if (!$success) {
317 close($fh);
318 die "can't aquire lock '$lockfile' - $!\n";
319 }
320 }
321
322 my $res;
323
324 eval { $res = &$code($fh, @param) };
325 my $err = $@;
326
327 close($fh);
328
329 die $err if $err;
330
331 return $res;
332 }
333
334 my $compute_node_info = sub {
335 my ($self, $cstatus) = @_;
336
337 my $node_info = {};
338
339 my $node_count = 0;
340 my $online_count = 0;
341
342 foreach my $node (keys %$cstatus) {
343 my $d = $cstatus->{$node};
344
345 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
346 $node_info->{$node}->{online} = $online;
347
348 $node_count++;
349 $online_count++ if $online;
350 }
351
352 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
353
354 if (!$quorate) {
355 foreach my $node (keys %$cstatus) {
356 my $d = $cstatus->{$node};
357 $node_info->{$node}->{online} = 0;
358 }
359 }
360
361 return ($node_info, $quorate);
362 };
363
364 sub get_node_info {
365 my ($self) = @_;
366
367 my ($node_info, $quorate);
368
369 my $code = sub {
370 my $cstatus = $self->read_hardware_status_nolock();
371 ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
372 };
373
374 $self->global_lock($code);
375
376 return ($node_info, $quorate);
377 }
378
379 # simulate hardware commands
380 # power <node> <on|off>
381 # network <node> <on|off>
382
383 sub sim_hardware_cmd {
384 my ($self, $cmdstr, $logid) = @_;
385
386 die "implement in subclass";
387 }
388
389 sub run {
390 my ($self) = @_;
391
392 die "implement in subclass";
393 }
394
395 my $modify_watchog = sub {
396 my ($self, $code) = @_;
397
398 my $update_cmd = sub {
399
400 my $filename = "$self->{statusdir}/watchdog_status";
401
402 my ($res, $wdstatus);
403
404 if (-f $filename) {
405 my $raw = PVE::Tools::file_get_contents($filename);
406 $wdstatus = decode_json($raw);
407 } else {
408 $wdstatus = {};
409 }
410
411 ($wdstatus, $res) = &$code($wdstatus);
412
413 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
414
415 return $res;
416 };
417
418 return $self->global_lock($update_cmd);
419 };
420
421 sub watchdog_reset_nolock {
422 my ($self, $node) = @_;
423
424 my $filename = "$self->{statusdir}/watchdog_status";
425
426 if (-f $filename) {
427 my $raw = PVE::Tools::file_get_contents($filename);
428 my $wdstatus = decode_json($raw);
429
430 foreach my $id (keys %$wdstatus) {
431 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
432 }
433
434 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
435 }
436 }
437
438 sub watchdog_check {
439 my ($self, $node) = @_;
440
441 my $code = sub {
442 my ($wdstatus) = @_;
443
444 my $res = 1;
445
446 foreach my $wfh (keys %$wdstatus) {
447 my $wd = $wdstatus->{$wfh};
448 next if $wd->{node} ne $node;
449
450 my $ctime = $self->get_time();
451 my $tdiff = $ctime - $wd->{update_time};
452
453 if ($tdiff > $watchdog_timeout) { # expired
454 $res = 0;
455 delete $wdstatus->{$wfh};
456 }
457 }
458
459 return ($wdstatus, $res);
460 };
461
462 return &$modify_watchog($self, $code);
463 }
464
465 my $wdcounter = 0;
466
467 sub watchdog_open {
468 my ($self, $node) = @_;
469
470 my $code = sub {
471 my ($wdstatus) = @_;
472
473 ++$wdcounter;
474
475 my $id = "WD:$node:$$:$wdcounter";
476
477 die "internal error" if defined($wdstatus->{$id});
478
479 $wdstatus->{$id} = {
480 node => $node,
481 update_time => $self->get_time(),
482 };
483
484 return ($wdstatus, $id);
485 };
486
487 return &$modify_watchog($self, $code);
488 }
489
490 sub watchdog_close {
491 my ($self, $wfh) = @_;
492
493 my $code = sub {
494 my ($wdstatus) = @_;
495
496 my $wd = $wdstatus->{$wfh};
497 die "no such watchdog handle '$wfh'\n" if !defined($wd);
498
499 my $tdiff = $self->get_time() - $wd->{update_time};
500 die "watchdog expired" if $tdiff > $watchdog_timeout;
501
502 delete $wdstatus->{$wfh};
503
504 return ($wdstatus);
505 };
506
507 return &$modify_watchog($self, $code);
508 }
509
510 sub watchdog_update {
511 my ($self, $wfh) = @_;
512
513 my $code = sub {
514 my ($wdstatus) = @_;
515
516 my $wd = $wdstatus->{$wfh};
517
518 die "no such watchdog handle '$wfh'\n" if !defined($wd);
519
520 my $ctime = $self->get_time();
521 my $tdiff = $ctime - $wd->{update_time};
522
523 die "watchdog expired" if $tdiff > $watchdog_timeout;
524
525 $wd->{update_time} = $ctime;
526
527 return ($wdstatus);
528 };
529
530 return &$modify_watchog($self, $code);
531 }
532
533 1;