]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
release agent lock if requested by master
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
11use POSIX qw(strftime EINTR);
12use Data::Dumper;
13use JSON;
14use IO::File;
15use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
16use File::Copy;
17use File::Path qw(make_path remove_tree);
cc32b737 18use PVE::HA::Config;
0cfd8f5b 19
17b5cf98 20my $watchdog_timeout = 60;
0bba8f60 21
0cfd8f5b 22
787b66eb
DM
23# Status directory layout
24#
25# configuration
26#
8456bde2
DM
27# $testdir/cmdlist Command list for simulation
28# $testdir/hardware_status Hardware description (number of nodes, ...)
29# $testdir/manager_status CRM status (start with {})
30# $testdir/service_config Service configuration
abc920b4 31# $testdir/groups HA groups configuration
8456bde2 32# $testdir/service_status_<node> Service status
3c36cbca 33
9329c1e2
DM
34#
35# runtime status for simulation system
36#
37# $testdir/status/cluster_locks Cluster locks
38# $testdir/status/hardware_status Hardware status (power/network on/off)
39# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
40#
41# runtime status
9329c1e2 42#
8456bde2
DM
43# $testdir/status/lrm_status_<node> LRM status
44# $testdir/status/manager_status CRM status
abc920b4 45# $testdir/status/crm_commands CRM command queue
8456bde2
DM
46# $testdir/status/service_config Service configuration
47# $testdir/status/service_status_<node> Service status
abc920b4 48# $testdir/status/groups HA groups configuration
c4a221bc
DM
49
50sub read_lrm_status {
51 my ($self, $node) = @_;
52
53 my $filename = "$self->{statusdir}/lrm_status_$node";
54
55 return PVE::HA::Tools::read_json_from_file($filename, {});
56}
57
58sub write_lrm_status {
59 my ($self, $node, $status_obj) = @_;
60
61 my $filename = "$self->{statusdir}/lrm_status_$node";
62
63 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
64}
787b66eb 65
8b3f9144 66sub read_hardware_status_nolock {
0cfd8f5b
DM
67 my ($self) = @_;
68
8b3f9144 69 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
70
71 my $raw = PVE::Tools::file_get_contents($filename);
72 my $cstatus = decode_json($raw);
73
74 return $cstatus;
75}
76
8b3f9144 77sub write_hardware_status_nolock {
0cfd8f5b
DM
78 my ($self, $cstatus) = @_;
79
8b3f9144 80 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
81
82 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
83};
84
95360669
DM
85sub read_service_config {
86 my ($self) = @_;
87
88 my $filename = "$self->{statusdir}/service_config";
89 my $conf = PVE::HA::Tools::read_json_from_file($filename);
90
91 foreach my $sid (keys %$conf) {
92 my $d = $conf->{$sid};
8456bde2
DM
93
94 die "service '$sid' without assigned node!" if !$d->{node};
95
95360669
DM
96 if ($sid =~ m/^pvevm:(\d+)$/) {
97 $d->{type} = 'pvevm';
98 $d->{name} = $1;
99 } else {
100 die "implement me";
101 }
102 $d->{state} = 'disabled' if !$d->{state};
103 }
104
105 return $conf;
106}
107
79e0e005
DM
108sub write_service_config {
109 my ($self, $conf) = @_;
110
95360669
DM
111 $self->{service_config} = $conf;
112
79e0e005
DM
113 my $filename = "$self->{statusdir}/service_config";
114 return PVE::HA::Tools::write_json_to_file($filename, $conf);
115}
116
8456bde2
DM
117sub change_service_location {
118 my ($self, $sid, $node) = @_;
119
120 my $conf = $self->read_service_config();
121
122 die "no such service '$sid'\n" if !$conf->{$sid};
123
124 $conf->{$sid}->{node} = $node;
125
126 $self->write_service_config($conf);
127}
128
3b996922
DM
129sub queue_crm_commands {
130 my ($self, $cmd) = @_;
131
132 chomp $cmd;
133
134 my $code = sub {
135 my $data = '';
136 my $filename = "$self->{statusdir}/crm_commands";
137 if (-f $filename) {
138 $data = PVE::Tools::file_get_contents($filename);
139 }
140 $data .= "$cmd\n";
141 PVE::Tools::file_set_contents($filename, $data);
142 };
143
144 $self->global_lock($code);
145
146 return undef;
147}
148
149sub read_crm_commands {
150 my ($self) = @_;
151
152 my $code = sub {
153 my $data = '';
154
155 my $filename = "$self->{statusdir}/crm_commands";
156 if (-f $filename) {
157 $data = PVE::Tools::file_get_contents($filename);
158 }
159 PVE::Tools::file_set_contents($filename, '');
160
161 return $data;
162 };
163
164 return $self->global_lock($code);
165}
166
abc920b4
DM
167sub read_group_config {
168 my ($self) = @_;
169
170 my $filename = "$self->{statusdir}/groups";
171 my $raw = '';
172 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
173
cc32b737 174 return PVE::HA::Config::parse_groups_config($filename, $raw);
abc920b4
DM
175}
176
c4a221bc 177sub read_service_status {
8456bde2 178 my ($self, $node) = @_;
c4a221bc 179
8456bde2 180 my $filename = "$self->{statusdir}/service_status_$node";
c4a221bc
DM
181 return PVE::HA::Tools::read_json_from_file($filename);
182}
183
184sub write_service_status {
8456bde2
DM
185 my ($self, $node, $data) = @_;
186
187 my $filename = "$self->{statusdir}/service_status_$node";
188 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
189
190 # fixme: add test if a service runs on two nodes!!!
c4a221bc 191
8456bde2 192 return $res;
c4a221bc
DM
193}
194
abc920b4
DM
195my $default_group_config = <<__EOD;
196group: prefer_node1
197 nodes node1
e941bdc5 198 nofailback 1
abc920b4
DM
199
200group: prefer_node2
201 nodes node2
e941bdc5 202 nofailback 1
abc920b4
DM
203
204group: prefer_node3
7a294ad4 205 nodes node3
e941bdc5 206 nofailback 1
abc920b4
DM
207__EOD
208
0cfd8f5b
DM
209sub new {
210 my ($this, $testdir) = @_;
211
212 die "missing testdir" if !$testdir;
213
214 my $class = ref($this) || $this;
215
216 my $self = bless {}, $class;
217
787b66eb
DM
218 my $statusdir = $self->{statusdir} = "$testdir/status";
219
220 remove_tree($statusdir);
221 mkdir $statusdir;
0cfd8f5b 222
787b66eb
DM
223 # copy initial configuartion
224 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 225
abc920b4
DM
226 if (-f "$testdir/groups") {
227 copy("$testdir/groups", "$statusdir/groups");
228 } else {
229 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
230 }
231
79e0e005
DM
232 if (-f "$testdir/service_config") {
233 copy("$testdir/service_config", "$statusdir/service_config");
234 } else {
235 my $conf = {
abc920b4
DM
236 'pvevm:101' => { node => 'node1', group => 'prefer_node1' },
237 'pvevm:102' => { node => 'node2', group => 'prefer_node2' },
238 'pvevm:103' => { node => 'node3', group => 'prefer_node3' },
239 'pvevm:104' => { node => 'node1', group => 'prefer_node1' },
240 'pvevm:105' => { node => 'node2', group => 'prefer_node2' },
241 'pvevm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
242 };
243 $self->write_service_config($conf);
244 }
787b66eb 245
853f5867
DM
246 if (-f "$testdir/hardware_status") {
247 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
248 die "Copy failed: $!\n";
249 } else {
250 my $cstatus = {
251 node1 => { power => 'off', network => 'off' },
252 node2 => { power => 'off', network => 'off' },
253 node3 => { power => 'off', network => 'off' },
254 };
255 $self->write_hardware_status_nolock($cstatus);
256 }
787b66eb 257
0cfd8f5b 258
8b3f9144 259 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
260
261 foreach my $node (sort keys %$cstatus) {
0bba8f60 262 $self->{nodes}->{$node} = {};
8456bde2
DM
263
264 if (-f "$testdir/service_status_$node") {
265 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
266 } else {
267 $self->write_service_status($node, {});
268 }
0cfd8f5b
DM
269 }
270
95360669
DM
271 $self->{service_config} = $self->read_service_config();
272
0cfd8f5b
DM
273 return $self;
274}
275
276sub get_time {
277 my ($self) = @_;
278
bf93e2a2 279 die "implement in subclass";
0cfd8f5b
DM
280}
281
282sub log {
fde8362a 283 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
284
285 chomp $msg;
286
287 my $time = $self->get_time();
288
fde8362a
DM
289 $id = 'hardware' if !$id;
290
0bba8f60 291 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
292}
293
294sub statusdir {
295 my ($self, $node) = @_;
296
297 return $self->{statusdir};
298}
299
8b3f9144 300sub global_lock {
0cfd8f5b
DM
301 my ($self, $code, @param) = @_;
302
8b3f9144 303 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
304 my $fh = IO::File->new(">>$lockfile") ||
305 die "unable to open '$lockfile'\n";
306
307 my $success;
308 for (;;) {
309 $success = flock($fh, LOCK_EX);
310 if ($success || ($! != EINTR)) {
311 last;
312 }
313 if (!$success) {
9de9a6ce 314 close($fh);
0cfd8f5b
DM
315 die "can't aquire lock '$lockfile' - $!\n";
316 }
317 }
9de9a6ce 318
0cfd8f5b
DM
319 my $res;
320
9de9a6ce 321 eval { $res = &$code($fh, @param) };
0cfd8f5b 322 my $err = $@;
9de9a6ce 323
0cfd8f5b
DM
324 close($fh);
325
326 die $err if $err;
327
328 return $res;
329}
330
8b3f9144
DM
331my $compute_node_info = sub {
332 my ($self, $cstatus) = @_;
333
334 my $node_info = {};
335
336 my $node_count = 0;
337 my $online_count = 0;
338
339 foreach my $node (keys %$cstatus) {
340 my $d = $cstatus->{$node};
341
342 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
343 $node_info->{$node}->{online} = $online;
344
345 $node_count++;
346 $online_count++ if $online;
347 }
348
349 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
350
351 if (!$quorate) {
352 foreach my $node (keys %$cstatus) {
353 my $d = $cstatus->{$node};
354 $node_info->{$node}->{online} = 0;
355 }
356 }
357
358 return ($node_info, $quorate);
359};
360
361sub get_node_info {
362 my ($self) = @_;
363
364 my ($node_info, $quorate);
365
366 my $code = sub {
367 my $cstatus = $self->read_hardware_status_nolock();
368 ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
369 };
370
371 $self->global_lock($code);
372
373 return ($node_info, $quorate);
374}
375
376# simulate hardware commands
0cfd8f5b
DM
377# power <node> <on|off>
378# network <node> <on|off>
379
8b3f9144 380sub sim_hardware_cmd {
fde8362a 381 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 382
bf93e2a2 383 die "implement in subclass";
0cfd8f5b
DM
384}
385
386sub run {
387 my ($self) = @_;
388
bf93e2a2 389 die "implement in subclass";
0cfd8f5b 390}
9329c1e2
DM
391
392my $modify_watchog = sub {
393 my ($self, $code) = @_;
394
395 my $update_cmd = sub {
396
397 my $filename = "$self->{statusdir}/watchdog_status";
0cfd8f5b 398
9329c1e2
DM
399 my ($res, $wdstatus);
400
401 if (-f $filename) {
402 my $raw = PVE::Tools::file_get_contents($filename);
403 $wdstatus = decode_json($raw);
404 } else {
405 $wdstatus = {};
406 }
407
408 ($wdstatus, $res) = &$code($wdstatus);
409
410 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
411
412 return $res;
413 };
414
415 return $self->global_lock($update_cmd);
416};
417
418sub watchdog_check {
419 my ($self, $node) = @_;
420
421 my $code = sub {
422 my ($wdstatus) = @_;
423
424 my $res = 1;
425
426 foreach my $wfh (keys %$wdstatus) {
427 my $wd = $wdstatus->{$wfh};
428 next if $wd->{node} ne $node;
429
430 my $ctime = $self->get_time();
431 my $tdiff = $ctime - $wd->{update_time};
432
0bba8f60 433 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
434 $res = 0;
435 delete $wdstatus->{$wfh};
436 }
437 }
438
439 return ($wdstatus, $res);
440 };
441
442 return &$modify_watchog($self, $code);
443}
444
445my $wdcounter = 0;
446
447sub watchdog_open {
448 my ($self, $node) = @_;
449
450 my $code = sub {
451 my ($wdstatus) = @_;
452
453 ++$wdcounter;
454
455 my $id = "WD:$node:$$:$wdcounter";
456
457 die "internal error" if defined($wdstatus->{$id});
458
459 $wdstatus->{$id} = {
460 node => $node,
461 update_time => $self->get_time(),
462 };
463
464 return ($wdstatus, $id);
465 };
466
467 return &$modify_watchog($self, $code);
468}
469
470sub watchdog_close {
471 my ($self, $wfh) = @_;
472
473 my $code = sub {
474 my ($wdstatus) = @_;
475
476 my $wd = $wdstatus->{$wfh};
477 die "no such watchdog handle '$wfh'\n" if !defined($wd);
478
479 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 480 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
481
482 delete $wdstatus->{$wfh};
483
484 return ($wdstatus);
485 };
486
487 return &$modify_watchog($self, $code);
488}
489
490sub watchdog_update {
491 my ($self, $wfh) = @_;
492
493 my $code = sub {
494 my ($wdstatus) = @_;
495
496 my $wd = $wdstatus->{$wfh};
497
498 die "no such watchdog handle '$wfh'\n" if !defined($wd);
499
500 my $ctime = $self->get_time();
501 my $tdiff = $ctime - $wd->{update_time};
502
0bba8f60 503 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
504
505 $wd->{update_time} = $ctime;
506
507 return ($wdstatus);
508 };
509
510 return &$modify_watchog($self, $code);
511}
512
0cfd8f5b
DM
513
514
5151;