]> git.proxmox.com Git - pve-ha-manager.git/blame - PVE/HA/Sim/Hardware.pm
add parser for ha groups (similar to failover domains)
[pve-ha-manager.git] / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
11use POSIX qw(strftime EINTR);
12use Data::Dumper;
13use JSON;
14use IO::File;
15use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
16use File::Copy;
17use File::Path qw(make_path remove_tree);
0cfd8f5b 18
17b5cf98 19my $watchdog_timeout = 60;
0bba8f60 20
0cfd8f5b 21
787b66eb
DM
22# Status directory layout
23#
24# configuration
25#
8456bde2
DM
26# $testdir/cmdlist Command list for simulation
27# $testdir/hardware_status Hardware description (number of nodes, ...)
28# $testdir/manager_status CRM status (start with {})
29# $testdir/service_config Service configuration
30# $testdir/service_status_<node> Service status
3c36cbca 31
9329c1e2
DM
32#
33# runtime status for simulation system
34#
35# $testdir/status/cluster_locks Cluster locks
36# $testdir/status/hardware_status Hardware status (power/network on/off)
37# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
38#
39# runtime status
9329c1e2 40#
8456bde2
DM
41# $testdir/status/lrm_status_<node> LRM status
42# $testdir/status/manager_status CRM status
43# $testdir/status/service_config Service configuration
44# $testdir/status/service_status_<node> Service status
c4a221bc
DM
45
46sub read_lrm_status {
47 my ($self, $node) = @_;
48
49 my $filename = "$self->{statusdir}/lrm_status_$node";
50
51 return PVE::HA::Tools::read_json_from_file($filename, {});
52}
53
54sub write_lrm_status {
55 my ($self, $node, $status_obj) = @_;
56
57 my $filename = "$self->{statusdir}/lrm_status_$node";
58
59 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
60}
787b66eb 61
8b3f9144 62sub read_hardware_status_nolock {
0cfd8f5b
DM
63 my ($self) = @_;
64
8b3f9144 65 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
66
67 my $raw = PVE::Tools::file_get_contents($filename);
68 my $cstatus = decode_json($raw);
69
70 return $cstatus;
71}
72
8b3f9144 73sub write_hardware_status_nolock {
0cfd8f5b
DM
74 my ($self, $cstatus) = @_;
75
8b3f9144 76 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
77
78 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
79};
80
95360669
DM
81sub read_service_config {
82 my ($self) = @_;
83
84 my $filename = "$self->{statusdir}/service_config";
85 my $conf = PVE::HA::Tools::read_json_from_file($filename);
86
87 foreach my $sid (keys %$conf) {
88 my $d = $conf->{$sid};
8456bde2
DM
89
90 die "service '$sid' without assigned node!" if !$d->{node};
91
95360669
DM
92 if ($sid =~ m/^pvevm:(\d+)$/) {
93 $d->{type} = 'pvevm';
94 $d->{name} = $1;
95 } else {
96 die "implement me";
97 }
98 $d->{state} = 'disabled' if !$d->{state};
99 }
100
101 return $conf;
102}
103
79e0e005
DM
104sub write_service_config {
105 my ($self, $conf) = @_;
106
95360669
DM
107 $self->{service_config} = $conf;
108
79e0e005
DM
109 my $filename = "$self->{statusdir}/service_config";
110 return PVE::HA::Tools::write_json_to_file($filename, $conf);
111}
112
8456bde2
DM
113sub change_service_location {
114 my ($self, $sid, $node) = @_;
115
116 my $conf = $self->read_service_config();
117
118 die "no such service '$sid'\n" if !$conf->{$sid};
119
120 $conf->{$sid}->{node} = $node;
121
122 $self->write_service_config($conf);
123}
124
3b996922
DM
125sub queue_crm_commands {
126 my ($self, $cmd) = @_;
127
128 chomp $cmd;
129
130 my $code = sub {
131 my $data = '';
132 my $filename = "$self->{statusdir}/crm_commands";
133 if (-f $filename) {
134 $data = PVE::Tools::file_get_contents($filename);
135 }
136 $data .= "$cmd\n";
137 PVE::Tools::file_set_contents($filename, $data);
138 };
139
140 $self->global_lock($code);
141
142 return undef;
143}
144
145sub read_crm_commands {
146 my ($self) = @_;
147
148 my $code = sub {
149 my $data = '';
150
151 my $filename = "$self->{statusdir}/crm_commands";
152 if (-f $filename) {
153 $data = PVE::Tools::file_get_contents($filename);
154 }
155 PVE::Tools::file_set_contents($filename, '');
156
157 return $data;
158 };
159
160 return $self->global_lock($code);
161}
162
c4a221bc 163sub read_service_status {
8456bde2 164 my ($self, $node) = @_;
c4a221bc 165
8456bde2 166 my $filename = "$self->{statusdir}/service_status_$node";
c4a221bc
DM
167 return PVE::HA::Tools::read_json_from_file($filename);
168}
169
170sub write_service_status {
8456bde2
DM
171 my ($self, $node, $data) = @_;
172
173 my $filename = "$self->{statusdir}/service_status_$node";
174 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
175
176 # fixme: add test if a service runs on two nodes!!!
c4a221bc 177
8456bde2 178 return $res;
c4a221bc
DM
179}
180
0cfd8f5b
DM
181sub new {
182 my ($this, $testdir) = @_;
183
184 die "missing testdir" if !$testdir;
185
186 my $class = ref($this) || $this;
187
188 my $self = bless {}, $class;
189
787b66eb
DM
190 my $statusdir = $self->{statusdir} = "$testdir/status";
191
192 remove_tree($statusdir);
193 mkdir $statusdir;
0cfd8f5b 194
787b66eb
DM
195 # copy initial configuartion
196 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005
DM
197
198 if (-f "$testdir/service_config") {
199 copy("$testdir/service_config", "$statusdir/service_config");
200 } else {
201 my $conf = {
1e132215
DM
202 'pvevm:101' => { node => 'node1' },
203 'pvevm:102' => { node => 'node2' },
204 'pvevm:103' => { node => 'node3' },
46350703
DM
205 'pvevm:104' => { node => 'node1' },
206 'pvevm:105' => { node => 'node2' },
207 'pvevm:106' => { node => 'node3' },
79e0e005
DM
208 };
209 $self->write_service_config($conf);
210 }
787b66eb 211
853f5867
DM
212 if (-f "$testdir/hardware_status") {
213 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
214 die "Copy failed: $!\n";
215 } else {
216 my $cstatus = {
217 node1 => { power => 'off', network => 'off' },
218 node2 => { power => 'off', network => 'off' },
219 node3 => { power => 'off', network => 'off' },
220 };
221 $self->write_hardware_status_nolock($cstatus);
222 }
787b66eb 223
0cfd8f5b 224
8b3f9144 225 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
226
227 foreach my $node (sort keys %$cstatus) {
0bba8f60 228 $self->{nodes}->{$node} = {};
8456bde2
DM
229
230 if (-f "$testdir/service_status_$node") {
231 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
232 } else {
233 $self->write_service_status($node, {});
234 }
0cfd8f5b
DM
235 }
236
95360669
DM
237 $self->{service_config} = $self->read_service_config();
238
0cfd8f5b
DM
239 return $self;
240}
241
242sub get_time {
243 my ($self) = @_;
244
bf93e2a2 245 die "implement in subclass";
0cfd8f5b
DM
246}
247
248sub log {
fde8362a 249 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
250
251 chomp $msg;
252
253 my $time = $self->get_time();
254
fde8362a
DM
255 $id = 'hardware' if !$id;
256
0bba8f60 257 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
258}
259
260sub statusdir {
261 my ($self, $node) = @_;
262
263 return $self->{statusdir};
264}
265
8b3f9144 266sub global_lock {
0cfd8f5b
DM
267 my ($self, $code, @param) = @_;
268
8b3f9144 269 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
270 my $fh = IO::File->new(">>$lockfile") ||
271 die "unable to open '$lockfile'\n";
272
273 my $success;
274 for (;;) {
275 $success = flock($fh, LOCK_EX);
276 if ($success || ($! != EINTR)) {
277 last;
278 }
279 if (!$success) {
9de9a6ce 280 close($fh);
0cfd8f5b
DM
281 die "can't aquire lock '$lockfile' - $!\n";
282 }
283 }
9de9a6ce 284
0cfd8f5b
DM
285 my $res;
286
9de9a6ce 287 eval { $res = &$code($fh, @param) };
0cfd8f5b 288 my $err = $@;
9de9a6ce 289
0cfd8f5b
DM
290 close($fh);
291
292 die $err if $err;
293
294 return $res;
295}
296
8b3f9144
DM
297my $compute_node_info = sub {
298 my ($self, $cstatus) = @_;
299
300 my $node_info = {};
301
302 my $node_count = 0;
303 my $online_count = 0;
304
305 foreach my $node (keys %$cstatus) {
306 my $d = $cstatus->{$node};
307
308 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
309 $node_info->{$node}->{online} = $online;
310
311 $node_count++;
312 $online_count++ if $online;
313 }
314
315 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
316
317 if (!$quorate) {
318 foreach my $node (keys %$cstatus) {
319 my $d = $cstatus->{$node};
320 $node_info->{$node}->{online} = 0;
321 }
322 }
323
324 return ($node_info, $quorate);
325};
326
327sub get_node_info {
328 my ($self) = @_;
329
330 my ($node_info, $quorate);
331
332 my $code = sub {
333 my $cstatus = $self->read_hardware_status_nolock();
334 ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
335 };
336
337 $self->global_lock($code);
338
339 return ($node_info, $quorate);
340}
341
342# simulate hardware commands
0cfd8f5b
DM
343# power <node> <on|off>
344# network <node> <on|off>
345
8b3f9144 346sub sim_hardware_cmd {
fde8362a 347 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 348
bf93e2a2 349 die "implement in subclass";
0cfd8f5b
DM
350}
351
352sub run {
353 my ($self) = @_;
354
bf93e2a2 355 die "implement in subclass";
0cfd8f5b 356}
9329c1e2
DM
357
358my $modify_watchog = sub {
359 my ($self, $code) = @_;
360
361 my $update_cmd = sub {
362
363 my $filename = "$self->{statusdir}/watchdog_status";
0cfd8f5b 364
9329c1e2
DM
365 my ($res, $wdstatus);
366
367 if (-f $filename) {
368 my $raw = PVE::Tools::file_get_contents($filename);
369 $wdstatus = decode_json($raw);
370 } else {
371 $wdstatus = {};
372 }
373
374 ($wdstatus, $res) = &$code($wdstatus);
375
376 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
377
378 return $res;
379 };
380
381 return $self->global_lock($update_cmd);
382};
383
384sub watchdog_check {
385 my ($self, $node) = @_;
386
387 my $code = sub {
388 my ($wdstatus) = @_;
389
390 my $res = 1;
391
392 foreach my $wfh (keys %$wdstatus) {
393 my $wd = $wdstatus->{$wfh};
394 next if $wd->{node} ne $node;
395
396 my $ctime = $self->get_time();
397 my $tdiff = $ctime - $wd->{update_time};
398
0bba8f60 399 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
400 $res = 0;
401 delete $wdstatus->{$wfh};
402 }
403 }
404
405 return ($wdstatus, $res);
406 };
407
408 return &$modify_watchog($self, $code);
409}
410
411my $wdcounter = 0;
412
413sub watchdog_open {
414 my ($self, $node) = @_;
415
416 my $code = sub {
417 my ($wdstatus) = @_;
418
419 ++$wdcounter;
420
421 my $id = "WD:$node:$$:$wdcounter";
422
423 die "internal error" if defined($wdstatus->{$id});
424
425 $wdstatus->{$id} = {
426 node => $node,
427 update_time => $self->get_time(),
428 };
429
430 return ($wdstatus, $id);
431 };
432
433 return &$modify_watchog($self, $code);
434}
435
436sub watchdog_close {
437 my ($self, $wfh) = @_;
438
439 my $code = sub {
440 my ($wdstatus) = @_;
441
442 my $wd = $wdstatus->{$wfh};
443 die "no such watchdog handle '$wfh'\n" if !defined($wd);
444
445 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 446 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
447
448 delete $wdstatus->{$wfh};
449
450 return ($wdstatus);
451 };
452
453 return &$modify_watchog($self, $code);
454}
455
456sub watchdog_update {
457 my ($self, $wfh) = @_;
458
459 my $code = sub {
460 my ($wdstatus) = @_;
461
462 my $wd = $wdstatus->{$wfh};
463
464 die "no such watchdog handle '$wfh'\n" if !defined($wd);
465
466 my $ctime = $self->get_time();
467 my $tdiff = $ctime - $wd->{update_time};
468
0bba8f60 469 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
470
471 $wd->{update_time} = $ctime;
472
473 return ($wdstatus);
474 };
475
476 return &$modify_watchog($self, $code);
477}
478
0cfd8f5b
DM
479
480
4811;