]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Sim/Hardware.pm
sim: hardware: commands: make it possible to add already running service
[pve-ha-manager.git] / src / PVE / HA / Sim / Hardware.pm
CommitLineData
8b3f9144
DM
1package PVE::HA::Sim::Hardware;
2
3# Simulate Hardware resources
4
5# power supply for nodes: on/off
6# network connection to nodes: on/off
7# watchdog devices for nodes
0cfd8f5b
DM
8
9use strict;
10use warnings;
a0a7d11e 11
0cfd8f5b 12use Fcntl qw(:DEFAULT :flock);
787b66eb
DM
13use File::Copy;
14use File::Path qw(make_path remove_tree);
a0a7d11e
TL
15use IO::File;
16use JSON;
17use POSIX qw(strftime EINTR);
18
c982dfee 19use PVE::HA::FenceConfig;
7d33cb12 20use PVE::HA::Groups;
f5a14b93 21
17b5cf98 22my $watchdog_timeout = 60;
0bba8f60 23
787b66eb
DM
24# Status directory layout
25#
26# configuration
27#
8456bde2
DM
28# $testdir/cmdlist Command list for simulation
29# $testdir/hardware_status Hardware description (number of nodes, ...)
30# $testdir/manager_status CRM status (start with {})
31# $testdir/service_config Service configuration
eea0c609 32# $testdir/static_service_stats Static service usage information (cpu, memory)
abc920b4 33# $testdir/groups HA groups configuration
8456bde2 34# $testdir/service_status_<node> Service status
ed408b44 35# $testdir/datacenter.cfg Datacenter wide HA configuration
3c36cbca 36
9329c1e2
DM
37#
38# runtime status for simulation system
39#
40# $testdir/status/cluster_locks Cluster locks
41# $testdir/status/hardware_status Hardware status (power/network on/off)
eea0c609 42# $testdir/status/static_service_stats Static service usage information (cpu, memory)
9329c1e2 43# $testdir/status/watchdog_status Watchdog status
787b66eb
DM
44#
45# runtime status
9329c1e2 46#
8456bde2
DM
47# $testdir/status/lrm_status_<node> LRM status
48# $testdir/status/manager_status CRM status
abc920b4 49# $testdir/status/crm_commands CRM command queue
8456bde2
DM
50# $testdir/status/service_config Service configuration
51# $testdir/status/service_status_<node> Service status
abc920b4 52# $testdir/status/groups HA groups configuration
c4a221bc
DM
53
54sub read_lrm_status {
55 my ($self, $node) = @_;
56
57 my $filename = "$self->{statusdir}/lrm_status_$node";
58
289e4784 59 return PVE::HA::Tools::read_json_from_file($filename, {});
c4a221bc
DM
60}
61
62sub write_lrm_status {
63 my ($self, $node, $status_obj) = @_;
64
65 my $filename = "$self->{statusdir}/lrm_status_$node";
66
289e4784 67 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
c4a221bc 68}
787b66eb 69
8b3f9144 70sub read_hardware_status_nolock {
0cfd8f5b
DM
71 my ($self) = @_;
72
8b3f9144 73 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
74
75 my $raw = PVE::Tools::file_get_contents($filename);
76 my $cstatus = decode_json($raw);
77
78 return $cstatus;
79}
80
8b3f9144 81sub write_hardware_status_nolock {
0cfd8f5b
DM
82 my ($self, $cstatus) = @_;
83
8b3f9144 84 my $filename = "$self->{statusdir}/hardware_status";
0cfd8f5b
DM
85
86 PVE::Tools::file_set_contents($filename, encode_json($cstatus));
87};
88
95360669
DM
89sub read_service_config {
90 my ($self) = @_;
91
92 my $filename = "$self->{statusdir}/service_config";
289e4784 93 my $conf = PVE::HA::Tools::read_json_from_file($filename);
95360669
DM
94
95 foreach my $sid (keys %$conf) {
96 my $d = $conf->{$sid};
8456bde2
DM
97
98 die "service '$sid' without assigned node!" if !$d->{node};
99
ec368d74 100 if ($sid =~ m/^(vm|ct|fa):(\d+)$/) {
b026c8c9
DM
101 $d->{type} = $1;
102 $d->{name} = $2;
95360669
DM
103 } else {
104 die "implement me";
105 }
106 $d->{state} = 'disabled' if !$d->{state};
bb07bd2c 107 $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility
ec368d74
TL
108 $d->{max_restart} = 1 if !defined($d->{max_restart});
109 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
95360669
DM
110 }
111
112 return $conf;
113}
114
76b83c72
FE
115sub update_service_config {
116 my ($self, $sid, $param) = @_;
117
118 my $conf = $self->read_service_config();
119
120 my $sconf = $conf->{$sid} || die "no such resource '$sid'\n";
121
122 foreach my $k (%$param) {
123 $sconf->{$k} = $param->{$k};
124 }
125
126 $self->write_service_config($conf);
127}
128
79e0e005
DM
129sub write_service_config {
130 my ($self, $conf) = @_;
131
95360669
DM
132 $self->{service_config} = $conf;
133
79e0e005
DM
134 my $filename = "$self->{statusdir}/service_config";
135 return PVE::HA::Tools::write_json_to_file($filename, $conf);
c982dfee
TL
136}
137
138sub read_fence_config {
139 my ($self) = @_;
140
141 my $raw = undef;
142
143 my $filename = "$self->{statusdir}/fence.cfg";
144 if (-e $filename) {
145 $raw = PVE::Tools::file_get_contents($filename);
146 }
147
148 return PVE::HA::FenceConfig::parse_config($filename, $raw);
149}
150
151sub exec_fence_agent {
152 my ($self, $agent, $node, @param) = @_;
153
154 # let all agent succeed and behave the same for now
155 $self->sim_hardware_cmd("power $node off", $agent);
156
157 return 0; # EXIT_SUCCESS
158}
79e0e005 159
e5f43426
TL
160sub set_service_state {
161 my ($self, $sid, $state) = @_;
162
163 my $conf = $self->read_service_config();
164 die "no such service '$sid'" if !$conf->{$sid};
165
166 $conf->{$sid}->{state} = $state;
167
168 $self->write_service_config($conf);
169
170 return $conf;
171}
172
27ccc95c 173sub add_service {
81e8e7d0 174 my ($self, $sid, $opts, $running) = @_;
27ccc95c
TL
175
176 my $conf = $self->read_service_config();
177 die "resource ID '$sid' already defined\n" if $conf->{$sid};
178
179 $conf->{$sid} = $opts;
1323ef6e 180 $conf->{$sid}->@{qw(type name)} = split(/:/, $sid);
27ccc95c
TL
181
182 $self->write_service_config($conf);
183
81e8e7d0
FE
184 my $ss = $self->read_service_status($opts->{node});
185 $ss->{$sid} = $running;
186 $self->write_service_status($opts->{node}, $ss);
187
27ccc95c
TL
188 return $conf;
189}
190
191sub delete_service {
192 my ($self, $sid) = @_;
193
194 my $conf = $self->read_service_config();
195
196 die "no such service '$sid'" if !$conf->{$sid};
197
198 delete $conf->{$sid};
199
200 $self->write_service_config($conf);
201
202 return $conf;
203}
204
8456bde2 205sub change_service_location {
6da27e23 206 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2
DM
207
208 my $conf = $self->read_service_config();
209
210 die "no such service '$sid'\n" if !$conf->{$sid};
211
289e4784 212 die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n"
6da27e23 213 if $current_node ne $conf->{$sid}->{node};
289e4784 214
6da27e23 215 $conf->{$sid}->{node} = $new_node;
8456bde2
DM
216
217 $self->write_service_config($conf);
218}
219
cde11324
TL
220sub service_has_lock {
221 my ($self, $sid) = @_;
222
223 my $conf = $self->read_service_config();
224
225 die "no such service '$sid'\n" if !$conf->{$sid};
226
227 return $conf->{$sid}->{lock};
228}
229
230sub lock_service {
231 my ($self, $sid, $lock) = @_;
232
233 my $conf = $self->read_service_config();
234
235 die "no such service '$sid'\n" if !$conf->{$sid};
236
237 $conf->{$sid}->{lock} = $lock || 'backup';
238
239 $self->write_service_config($conf);
240
241 return $conf;
242}
243
244sub unlock_service {
245 my ($self, $sid, $lock) = @_;
246
247 my $conf = $self->read_service_config();
248
249 die "no such service '$sid'\n" if !$conf->{$sid};
250
251 if (!defined($conf->{$sid}->{lock})) {
cde11324
TL
252 return undef;
253 }
254
255 if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
256 warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
257 return undef;
258 }
259
260 my $removed_lock = delete $conf->{$sid}->{lock};
261
262 $self->write_service_config($conf);
263
264 return $removed_lock;
265}
266
b70aa69e 267sub queue_crm_commands_nolock {
3b996922
DM
268 my ($self, $cmd) = @_;
269
270 chomp $cmd;
271
b70aa69e
DM
272 my $data = '';
273 my $filename = "$self->{statusdir}/crm_commands";
274 if (-f $filename) {
275 $data = PVE::Tools::file_get_contents($filename);
276 }
277 $data .= "$cmd\n";
278 PVE::Tools::file_set_contents($filename, $data);
279
280 return undef;
281}
282
283sub queue_crm_commands {
284 my ($self, $cmd) = @_;
285
286 my $code = sub { $self->queue_crm_commands_nolock($cmd); };
289e4784 287
3b996922
DM
288 $self->global_lock($code);
289
290 return undef;
291}
292
293sub read_crm_commands {
294 my ($self) = @_;
295
296 my $code = sub {
297 my $data = '';
298
299 my $filename = "$self->{statusdir}/crm_commands";
300 if (-f $filename) {
301 $data = PVE::Tools::file_get_contents($filename);
302 }
303 PVE::Tools::file_set_contents($filename, '');
304
305 return $data;
306 };
289e4784 307
3b996922
DM
308 return $self->global_lock($code);
309}
310
abc920b4
DM
311sub read_group_config {
312 my ($self) = @_;
313
314 my $filename = "$self->{statusdir}/groups";
315 my $raw = '';
316 $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
317
7d33cb12 318 return PVE::HA::Groups->parse_config($filename, $raw);
abc920b4
DM
319}
320
c4a221bc 321sub read_service_status {
8456bde2 322 my ($self, $node) = @_;
c4a221bc 323
8456bde2 324 my $filename = "$self->{statusdir}/service_status_$node";
289e4784 325 return PVE::HA::Tools::read_json_from_file($filename);
c4a221bc
DM
326}
327
328sub write_service_status {
8456bde2
DM
329 my ($self, $node, $data) = @_;
330
331 my $filename = "$self->{statusdir}/service_status_$node";
332 my $res = PVE::HA::Tools::write_json_to_file($filename, $data);
333
334 # fixme: add test if a service runs on two nodes!!!
c4a221bc 335
8456bde2 336 return $res;
289e4784 337}
c4a221bc 338
eea0c609
FE
339sub read_static_service_stats {
340 my ($self) = @_;
341
342 my $filename = "$self->{statusdir}/static_service_stats";
49b0ccc7
TL
343 my $stats = eval { PVE::HA::Tools::read_json_from_file($filename) };
344 $self->log('error', "loading static service stats failed - $@") if $@;
eea0c609
FE
345
346 return $stats;
347}
348
abc920b4
DM
349my $default_group_config = <<__EOD;
350group: prefer_node1
351 nodes node1
e941bdc5 352 nofailback 1
abc920b4
DM
353
354group: prefer_node2
355 nodes node2
e941bdc5 356 nofailback 1
abc920b4
DM
357
358group: prefer_node3
7a294ad4 359 nodes node3
e941bdc5 360 nofailback 1
abc920b4
DM
361__EOD
362
0cfd8f5b
DM
363sub new {
364 my ($this, $testdir) = @_;
365
366 die "missing testdir" if !$testdir;
367
ba9e808e
TL
368 die "testdir '$testdir' does not exist or is not a directory!\n"
369 if !-d $testdir;
370
0cfd8f5b
DM
371 my $class = ref($this) || $this;
372
373 my $self = bless {}, $class;
374
787b66eb
DM
375 my $statusdir = $self->{statusdir} = "$testdir/status";
376
377 remove_tree($statusdir);
378 mkdir $statusdir;
0cfd8f5b 379
787b66eb
DM
380 # copy initial configuartion
381 copy("$testdir/manager_status", "$statusdir/manager_status"); # optional
79e0e005 382
abc920b4
DM
383 if (-f "$testdir/groups") {
384 copy("$testdir/groups", "$statusdir/groups");
385 } else {
386 PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
387 }
388
79e0e005
DM
389 if (-f "$testdir/service_config") {
390 copy("$testdir/service_config", "$statusdir/service_config");
391 } else {
392 my $conf = {
eda9314d
DM
393 'vm:101' => { node => 'node1', group => 'prefer_node1' },
394 'vm:102' => { node => 'node2', group => 'prefer_node2' },
395 'vm:103' => { node => 'node3', group => 'prefer_node3' },
396 'vm:104' => { node => 'node1', group => 'prefer_node1' },
397 'vm:105' => { node => 'node2', group => 'prefer_node2' },
398 'vm:106' => { node => 'node3', group => 'prefer_node3' },
79e0e005
DM
399 };
400 $self->write_service_config($conf);
401 }
787b66eb 402
853f5867
DM
403 if (-f "$testdir/hardware_status") {
404 copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
405 die "Copy failed: $!\n";
406 } else {
407 my $cstatus = {
408 node1 => { power => 'off', network => 'off' },
409 node2 => { power => 'off', network => 'off' },
410 node3 => { power => 'off', network => 'off' },
411 };
412 $self->write_hardware_status_nolock($cstatus);
413 }
787b66eb 414
c982dfee
TL
415 if (-f "$testdir/fence.cfg") {
416 copy("$testdir/fence.cfg", "$statusdir/fence.cfg");
417 }
0cfd8f5b 418
ed408b44
TL
419 if (-f "$testdir/datacenter.cfg") {
420 copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg");
421 }
422
eea0c609
FE
423 if (-f "$testdir/static_service_stats") {
424 copy("$testdir/static_service_stats", "$statusdir/static_service_stats");
425 }
426
8b3f9144 427 my $cstatus = $self->read_hardware_status_nolock();
0cfd8f5b
DM
428
429 foreach my $node (sort keys %$cstatus) {
0bba8f60 430 $self->{nodes}->{$node} = {};
8456bde2
DM
431
432 if (-f "$testdir/service_status_$node") {
433 copy("$testdir/service_status_$node", "$statusdir/service_status_$node");
289e4784 434 } else {
8456bde2
DM
435 $self->write_service_status($node, {});
436 }
0cfd8f5b
DM
437 }
438
95360669
DM
439 $self->{service_config} = $self->read_service_config();
440
0cfd8f5b
DM
441 return $self;
442}
443
444sub get_time {
445 my ($self) = @_;
446
bf93e2a2 447 die "implement in subclass";
0cfd8f5b
DM
448}
449
450sub log {
fde8362a 451 my ($self, $level, $msg, $id) = @_;
0cfd8f5b
DM
452
453 chomp $msg;
454
455 my $time = $self->get_time();
456
fde8362a
DM
457 $id = 'hardware' if !$id;
458
0bba8f60 459 printf("%-5s %5d %12s: $msg\n", $level, $time, $id);
0cfd8f5b
DM
460}
461
462sub statusdir {
463 my ($self, $node) = @_;
464
465 return $self->{statusdir};
466}
467
ed408b44
TL
468sub read_datacenter_conf {
469 my ($self, $node) = @_;
470
471 my $filename = "$self->{statusdir}/datacenter.cfg";
472 return PVE::HA::Tools::read_json_from_file($filename, {});
473}
474
8b3f9144 475sub global_lock {
0cfd8f5b
DM
476 my ($self, $code, @param) = @_;
477
8b3f9144 478 my $lockfile = "$self->{statusdir}/hardware.lck";
0cfd8f5b
DM
479 my $fh = IO::File->new(">>$lockfile") ||
480 die "unable to open '$lockfile'\n";
481
482 my $success;
483 for (;;) {
484 $success = flock($fh, LOCK_EX);
485 if ($success || ($! != EINTR)) {
486 last;
487 }
488 if (!$success) {
9de9a6ce 489 close($fh);
63f6a08c 490 die "can't acquire lock '$lockfile' - $!\n";
0cfd8f5b
DM
491 }
492 }
9de9a6ce 493
0cfd8f5b
DM
494 my $res;
495
9de9a6ce 496 eval { $res = &$code($fh, @param) };
0cfd8f5b 497 my $err = $@;
289e4784 498
0cfd8f5b
DM
499 close($fh);
500
501 die $err if $err;
289e4784 502
0cfd8f5b
DM
503 return $res;
504}
505
8b3f9144
DM
506my $compute_node_info = sub {
507 my ($self, $cstatus) = @_;
508
509 my $node_info = {};
510
511 my $node_count = 0;
512 my $online_count = 0;
513
514 foreach my $node (keys %$cstatus) {
515 my $d = $cstatus->{$node};
516
517 my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0;
518 $node_info->{$node}->{online} = $online;
519
520 $node_count++;
521 $online_count++ if $online;
522 }
523
524 my $quorate = ($online_count > int($node_count/2)) ? 1 : 0;
289e4784 525
8b3f9144
DM
526 if (!$quorate) {
527 foreach my $node (keys %$cstatus) {
528 my $d = $cstatus->{$node};
529 $node_info->{$node}->{online} = 0;
530 }
531 }
532
533 return ($node_info, $quorate);
534};
535
536sub get_node_info {
537 my ($self) = @_;
538
5516f102
TL
539 my $cstatus = $self->read_hardware_status_nolock();
540 my ($node_info, $quorate) = &$compute_node_info($self, $cstatus);
8b3f9144
DM
541
542 return ($node_info, $quorate);
543}
544
ba2a45cd
TL
545# helper for Sim/ only
546sub get_cfs_state {
547 my ($self, $node, $state) = @_;
548
549 # TODO: ensure nolock is OK when adding this to RTSim
550 my $cstatus = $self->read_hardware_status_nolock();
551 my $res = $cstatus->{$node}->{cfs}->{$state};
552
553 # we assume default true if not defined
554 return !defined($res) || $res;
555}
556
a5d48ae1
TL
557# simulate hardware commands, the following commands are available:
558# power <node> <on|off>
559# network <node> <on|off>
560# delay <seconds>
1b21e7e6 561# skip-round <crm|lrm> [<rounds=1>]
a5d48ae1
TL
562# cfs <node> <rw|update> <work|fail>
563# reboot <node>
564# shutdown <node>
565# restart-lrm <node>
566# service <sid> <started|disabled|stopped|ignored>
567# service <sid> <migrate|relocate> <target>
568# service <sid> stop <timeout>
569# service <sid> lock/unlock [lockname]
81e8e7d0 570# service <sid> add <node> [<request-state=started>] [<running=0>]
b8d86ec4 571# service <sid> delete
8b3f9144 572sub sim_hardware_cmd {
fde8362a 573 my ($self, $cmdstr, $logid) = @_;
0cfd8f5b 574
e08a0717
TL
575 my $code = sub {
576 my ($lock_fh) = @_;
577
578 my $cstatus = $self->read_hardware_status_nolock();
579
0e13a6c1
TL
580 my ($cmd, $objid, $action, @params) = split(/\s+/, $cmdstr);
581 my $param = $params[0]; # for convenience/legacy
e08a0717
TL
582
583 die "sim_hardware_cmd: no node or service for command specified"
584 if !$objid;
585
586 my ($node, $sid, $d);
587
588 if ($cmd eq 'service') {
589 $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid);
590 } else {
591 $node = $objid;
592 $d = $self->{nodes}->{$node} ||
593 die "sim_hardware_cmd: no such node '$node'\n";
594 }
595
596 $self->log('info', "execute $cmdstr", $logid);
597
598 if ($cmd eq 'power') {
599 die "sim_hardware_cmd: unknown action '$action'\n"
600 if $action !~ m/^(on|off)$/;
601
602 if ($cstatus->{$node}->{power} ne $action) {
603 if ($action eq 'on') {
604
605 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
606 $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
607 $d->{lrm_restart} = undef;
ba2a45cd 608 $cstatus->{$node}->{cfs} = {};
e08a0717
TL
609
610 } else {
611
612 if ($d->{crm}) {
613 $d->{crm_env}->log('info', "killed by poweroff");
614 $self->crm_control('stop', $d, $lock_fh);
615 $d->{crm} = undef;
616 }
617 if ($d->{lrm}) {
618 $d->{lrm_env}->log('info', "killed by poweroff");
619 $self->lrm_control('stop', $d, $lock_fh);
620 $d->{lrm} = undef;
621 $d->{lrm_restart} = undef;
622 }
623
624 $self->watchdog_reset_nolock($node);
625 $self->write_service_status($node, {});
626 }
627 }
628
629 $cstatus->{$node}->{power} = $action;
630 $cstatus->{$node}->{network} = $action;
631 $cstatus->{$node}->{shutdown} = undef;
632
633 $self->write_hardware_status_nolock($cstatus);
634
635 } elsif ($cmd eq 'network') {
636 die "sim_hardware_cmd: unknown network action '$action'"
637 if $action !~ m/^(on|off)$/;
638 $cstatus->{$node}->{network} = $action;
639
640 $self->write_hardware_status_nolock($cstatus);
641
ba2a45cd
TL
642 } elsif ($cmd eq 'cfs') {
643 die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
644 if $action !~ m/^(rw|update)$/;
b94b4785
FE
645 die "sim_hardware_cmd: unknown cfs command '$param' for '$action' on node '$node'"
646 if $param !~ m/^(work|fail)$/;
ba2a45cd 647
b94b4785 648 $cstatus->{$node}->{cfs}->{$action} = $param eq 'work';
ba2a45cd
TL
649 $self->write_hardware_status_nolock($cstatus);
650
e08a0717
TL
651 } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
652 $cstatus->{$node}->{shutdown} = $cmd;
653
654 $self->write_hardware_status_nolock($cstatus);
655
656 $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm});
657 } elsif ($cmd eq 'restart-lrm') {
658 if ($d->{lrm}) {
659 $d->{lrm_restart} = 1;
660 $self->lrm_control('shutdown', $d, $lock_fh);
661 }
662 } elsif ($cmd eq 'crm') {
663
664 if ($action eq 'stop') {
665 if ($d->{crm}) {
666 $d->{crm_stop} = 1;
667 $self->crm_control('shutdown', $d, $lock_fh);
668 }
669 } elsif ($action eq 'start') {
670 $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
989c4c49
TL
671 } elsif ($action eq 'enable-node-maintenance' || $action eq 'disable-node-maintenance') {
672 $self->queue_crm_commands_nolock("$action $node");
e08a0717
TL
673 } else {
674 die "sim_hardware_cmd: unknown action '$action'";
675 }
676
677 } elsif ($cmd eq 'service') {
667670b2
TL
678 if ($action eq 'started' || $action eq 'disabled' ||
679 $action eq 'stopped' || $action eq 'ignored') {
e08a0717
TL
680
681 $self->set_service_state($sid, $action);
682
683 } elsif ($action eq 'migrate' || $action eq 'relocate') {
684
685 die "sim_hardware_cmd: missing target node for '$action' command"
b94b4785 686 if !$param;
e08a0717 687
b94b4785 688 $self->queue_crm_commands_nolock("$action $sid $param");
e08a0717 689
21caf0db
FE
690 } elsif ($action eq 'stop') {
691
692 die "sim_hardware_cmd: missing timeout for '$action' command"
693 if !defined($param);
694
695 $self->queue_crm_commands_nolock("$action $sid $param");
696
e08a0717 697 } elsif ($action eq 'add') {
81e8e7d0
FE
698 $self->add_service(
699 $sid,
700 {state => $params[1] || 'started', node => $param},
701 $params[2] || 0,
702 );
e08a0717
TL
703
704 } elsif ($action eq 'delete') {
705
706 $self->delete_service($sid);
707
708 } elsif ($action eq 'lock') {
709
b94b4785 710 $self->lock_service($sid, $param);
e08a0717
TL
711
712 } elsif ($action eq 'unlock') {
713
b94b4785 714 $self->unlock_service($sid, $param);
e08a0717
TL
715
716 } else {
717 die "sim_hardware_cmd: unknown service action '$action' " .
718 "- not implemented\n"
719 }
720 } else {
721 die "sim_hardware_cmd: unknown command '$cmdstr'\n";
722 }
723
724 return $cstatus;
725 };
726
727 return $self->global_lock($code);
728}
729
730# for controlling the resource manager services
731sub crm_control {
732 my ($self, $action, $data, $lock_fh) = @_;
733
734 die "implement in subclass";
735}
736
737sub lrm_control {
738 my ($self, $action, $data, $lock_fh) = @_;
739
bf93e2a2 740 die "implement in subclass";
0cfd8f5b
DM
741}
742
743sub run {
744 my ($self) = @_;
745
bf93e2a2 746 die "implement in subclass";
0cfd8f5b 747}
9329c1e2
DM
748
749my $modify_watchog = sub {
750 my ($self, $code) = @_;
751
752 my $update_cmd = sub {
753
754 my $filename = "$self->{statusdir}/watchdog_status";
289e4784 755
9329c1e2
DM
756 my ($res, $wdstatus);
757
758 if (-f $filename) {
759 my $raw = PVE::Tools::file_get_contents($filename);
760 $wdstatus = decode_json($raw);
761 } else {
762 $wdstatus = {};
763 }
289e4784 764
9329c1e2
DM
765 ($wdstatus, $res) = &$code($wdstatus);
766
767 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
768
769 return $res;
770 };
771
772 return $self->global_lock($update_cmd);
773};
774
0590c6a7
DM
775sub watchdog_reset_nolock {
776 my ($self, $node) = @_;
777
778 my $filename = "$self->{statusdir}/watchdog_status";
779
780 if (-f $filename) {
781 my $raw = PVE::Tools::file_get_contents($filename);
782 my $wdstatus = decode_json($raw);
783
784 foreach my $id (keys %$wdstatus) {
785 delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node;
786 }
289e4784 787
0590c6a7
DM
788 PVE::Tools::file_set_contents($filename, encode_json($wdstatus));
789 }
790}
791
9329c1e2
DM
792sub watchdog_check {
793 my ($self, $node) = @_;
794
795 my $code = sub {
796 my ($wdstatus) = @_;
797
798 my $res = 1;
799
800 foreach my $wfh (keys %$wdstatus) {
801 my $wd = $wdstatus->{$wfh};
802 next if $wd->{node} ne $node;
803
804 my $ctime = $self->get_time();
805 my $tdiff = $ctime - $wd->{update_time};
806
0bba8f60 807 if ($tdiff > $watchdog_timeout) { # expired
9329c1e2
DM
808 $res = 0;
809 delete $wdstatus->{$wfh};
810 }
811 }
289e4784 812
9329c1e2
DM
813 return ($wdstatus, $res);
814 };
815
816 return &$modify_watchog($self, $code);
817}
818
819my $wdcounter = 0;
820
821sub watchdog_open {
822 my ($self, $node) = @_;
823
824 my $code = sub {
825 my ($wdstatus) = @_;
826
827 ++$wdcounter;
828
829 my $id = "WD:$node:$$:$wdcounter";
830
831 die "internal error" if defined($wdstatus->{$id});
832
833 $wdstatus->{$id} = {
834 node => $node,
835 update_time => $self->get_time(),
836 };
837
838 return ($wdstatus, $id);
839 };
840
841 return &$modify_watchog($self, $code);
842}
843
844sub watchdog_close {
845 my ($self, $wfh) = @_;
846
847 my $code = sub {
848 my ($wdstatus) = @_;
849
850 my $wd = $wdstatus->{$wfh};
851 die "no such watchdog handle '$wfh'\n" if !defined($wd);
852
853 my $tdiff = $self->get_time() - $wd->{update_time};
0bba8f60 854 die "watchdog expired" if $tdiff > $watchdog_timeout;
9329c1e2
DM
855
856 delete $wdstatus->{$wfh};
857
858 return ($wdstatus);
859 };
860
861 return &$modify_watchog($self, $code);
862}
863
864sub watchdog_update {
865 my ($self, $wfh) = @_;
866
867 my $code = sub {
868 my ($wdstatus) = @_;
869
870 my $wd = $wdstatus->{$wfh};
871
872 die "no such watchdog handle '$wfh'\n" if !defined($wd);
873
874 my $ctime = $self->get_time();
875 my $tdiff = $ctime - $wd->{update_time};
876
0bba8f60 877 die "watchdog expired" if $tdiff > $watchdog_timeout;
289e4784 878
9329c1e2
DM
879 $wd->{update_time} = $ctime;
880
881 return ($wdstatus);
882 };
883
884 return &$modify_watchog($self, $code);
885}
886
5db695c3
FE
887sub get_static_node_stats {
888 my ($self) = @_;
889
890 my $cstatus = $self->read_hardware_status_nolock();
891
892 my $stats = {};
893 for my $node (keys $cstatus->%*) {
894 $stats->{$node} = { $cstatus->{$node}->%{qw(cpus memory)} };
895 }
896
897 return $stats;
898}
899
0cfd8f5b 9001;