]>
Commit | Line | Data |
---|---|---|
8b3f9144 DM |
1 | package PVE::HA::Sim::Hardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
0cfd8f5b DM |
8 | |
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
289e4784 | 12 | use JSON; |
0cfd8f5b DM |
13 | use IO::File; |
14 | use Fcntl qw(:DEFAULT :flock); | |
787b66eb DM |
15 | use File::Copy; |
16 | use File::Path qw(make_path remove_tree); | |
c982dfee | 17 | use PVE::HA::FenceConfig; |
7d33cb12 | 18 | use PVE::HA::Groups; |
f5a14b93 | 19 | |
17b5cf98 | 20 | my $watchdog_timeout = 60; |
0bba8f60 | 21 | |
0cfd8f5b | 22 | |
787b66eb DM |
23 | # Status directory layout |
24 | # | |
25 | # configuration | |
26 | # | |
8456bde2 DM |
27 | # $testdir/cmdlist Command list for simulation |
28 | # $testdir/hardware_status Hardware description (number of nodes, ...) | |
29 | # $testdir/manager_status CRM status (start with {}) | |
30 | # $testdir/service_config Service configuration | |
abc920b4 | 31 | # $testdir/groups HA groups configuration |
8456bde2 | 32 | # $testdir/service_status_<node> Service status |
ed408b44 | 33 | # $testdir/datacenter.cfg Datacenter wide HA configuration |
3c36cbca | 34 | |
9329c1e2 DM |
35 | # |
36 | # runtime status for simulation system | |
37 | # | |
38 | # $testdir/status/cluster_locks Cluster locks | |
39 | # $testdir/status/hardware_status Hardware status (power/network on/off) | |
40 | # $testdir/status/watchdog_status Watchdog status | |
787b66eb DM |
41 | # |
42 | # runtime status | |
9329c1e2 | 43 | # |
8456bde2 DM |
44 | # $testdir/status/lrm_status_<node> LRM status |
45 | # $testdir/status/manager_status CRM status | |
abc920b4 | 46 | # $testdir/status/crm_commands CRM command queue |
8456bde2 DM |
47 | # $testdir/status/service_config Service configuration |
48 | # $testdir/status/service_status_<node> Service status | |
abc920b4 | 49 | # $testdir/status/groups HA groups configuration |
c4a221bc DM |
50 | |
51 | sub read_lrm_status { | |
52 | my ($self, $node) = @_; | |
53 | ||
54 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
55 | ||
289e4784 | 56 | return PVE::HA::Tools::read_json_from_file($filename, {}); |
c4a221bc DM |
57 | } |
58 | ||
59 | sub write_lrm_status { | |
60 | my ($self, $node, $status_obj) = @_; | |
61 | ||
62 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
63 | ||
289e4784 | 64 | PVE::HA::Tools::write_json_to_file($filename, $status_obj); |
c4a221bc | 65 | } |
787b66eb | 66 | |
8b3f9144 | 67 | sub read_hardware_status_nolock { |
0cfd8f5b DM |
68 | my ($self) = @_; |
69 | ||
8b3f9144 | 70 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
71 | |
72 | my $raw = PVE::Tools::file_get_contents($filename); | |
73 | my $cstatus = decode_json($raw); | |
74 | ||
75 | return $cstatus; | |
76 | } | |
77 | ||
8b3f9144 | 78 | sub write_hardware_status_nolock { |
0cfd8f5b DM |
79 | my ($self, $cstatus) = @_; |
80 | ||
8b3f9144 | 81 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
82 | |
83 | PVE::Tools::file_set_contents($filename, encode_json($cstatus)); | |
84 | }; | |
85 | ||
95360669 DM |
86 | sub read_service_config { |
87 | my ($self) = @_; | |
88 | ||
89 | my $filename = "$self->{statusdir}/service_config"; | |
289e4784 | 90 | my $conf = PVE::HA::Tools::read_json_from_file($filename); |
95360669 DM |
91 | |
92 | foreach my $sid (keys %$conf) { | |
93 | my $d = $conf->{$sid}; | |
8456bde2 DM |
94 | |
95 | die "service '$sid' without assigned node!" if !$d->{node}; | |
96 | ||
ec368d74 | 97 | if ($sid =~ m/^(vm|ct|fa):(\d+)$/) { |
b026c8c9 DM |
98 | $d->{type} = $1; |
99 | $d->{name} = $2; | |
95360669 DM |
100 | } else { |
101 | die "implement me"; | |
102 | } | |
103 | $d->{state} = 'disabled' if !$d->{state}; | |
bb07bd2c | 104 | $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility |
ec368d74 TL |
105 | $d->{max_restart} = 1 if !defined($d->{max_restart}); |
106 | $d->{max_relocate} = 1 if !defined($d->{max_relocate}); | |
95360669 DM |
107 | } |
108 | ||
109 | return $conf; | |
110 | } | |
111 | ||
79e0e005 DM |
112 | sub write_service_config { |
113 | my ($self, $conf) = @_; | |
114 | ||
95360669 DM |
115 | $self->{service_config} = $conf; |
116 | ||
79e0e005 DM |
117 | my $filename = "$self->{statusdir}/service_config"; |
118 | return PVE::HA::Tools::write_json_to_file($filename, $conf); | |
c982dfee TL |
119 | } |
120 | ||
121 | sub read_fence_config { | |
122 | my ($self) = @_; | |
123 | ||
124 | my $raw = undef; | |
125 | ||
126 | my $filename = "$self->{statusdir}/fence.cfg"; | |
127 | if (-e $filename) { | |
128 | $raw = PVE::Tools::file_get_contents($filename); | |
129 | } | |
130 | ||
131 | return PVE::HA::FenceConfig::parse_config($filename, $raw); | |
132 | } | |
133 | ||
134 | sub exec_fence_agent { | |
135 | my ($self, $agent, $node, @param) = @_; | |
136 | ||
137 | # let all agent succeed and behave the same for now | |
138 | $self->sim_hardware_cmd("power $node off", $agent); | |
139 | ||
140 | return 0; # EXIT_SUCCESS | |
141 | } | |
79e0e005 | 142 | |
e5f43426 TL |
143 | sub set_service_state { |
144 | my ($self, $sid, $state) = @_; | |
145 | ||
146 | my $conf = $self->read_service_config(); | |
147 | die "no such service '$sid'" if !$conf->{$sid}; | |
148 | ||
149 | $conf->{$sid}->{state} = $state; | |
150 | ||
151 | $self->write_service_config($conf); | |
152 | ||
153 | return $conf; | |
154 | } | |
155 | ||
27ccc95c TL |
156 | sub add_service { |
157 | my ($self, $sid, $opts) = @_; | |
158 | ||
159 | my $conf = $self->read_service_config(); | |
160 | die "resource ID '$sid' already defined\n" if $conf->{$sid}; | |
161 | ||
162 | $conf->{$sid} = $opts; | |
163 | ||
164 | $self->write_service_config($conf); | |
165 | ||
166 | return $conf; | |
167 | } | |
168 | ||
169 | sub delete_service { | |
170 | my ($self, $sid) = @_; | |
171 | ||
172 | my $conf = $self->read_service_config(); | |
173 | ||
174 | die "no such service '$sid'" if !$conf->{$sid}; | |
175 | ||
176 | delete $conf->{$sid}; | |
177 | ||
178 | $self->write_service_config($conf); | |
179 | ||
180 | return $conf; | |
181 | } | |
182 | ||
8456bde2 | 183 | sub change_service_location { |
6da27e23 | 184 | my ($self, $sid, $current_node, $new_node) = @_; |
8456bde2 DM |
185 | |
186 | my $conf = $self->read_service_config(); | |
187 | ||
188 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
189 | ||
289e4784 | 190 | die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n" |
6da27e23 | 191 | if $current_node ne $conf->{$sid}->{node}; |
289e4784 | 192 | |
6da27e23 | 193 | $conf->{$sid}->{node} = $new_node; |
8456bde2 DM |
194 | |
195 | $self->write_service_config($conf); | |
196 | } | |
197 | ||
cde11324 TL |
198 | sub service_has_lock { |
199 | my ($self, $sid) = @_; | |
200 | ||
201 | my $conf = $self->read_service_config(); | |
202 | ||
203 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
204 | ||
205 | return $conf->{$sid}->{lock}; | |
206 | } | |
207 | ||
208 | sub lock_service { | |
209 | my ($self, $sid, $lock) = @_; | |
210 | ||
211 | my $conf = $self->read_service_config(); | |
212 | ||
213 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
214 | ||
215 | $conf->{$sid}->{lock} = $lock || 'backup'; | |
216 | ||
217 | $self->write_service_config($conf); | |
218 | ||
219 | return $conf; | |
220 | } | |
221 | ||
222 | sub unlock_service { | |
223 | my ($self, $sid, $lock) = @_; | |
224 | ||
225 | my $conf = $self->read_service_config(); | |
226 | ||
227 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
228 | ||
229 | if (!defined($conf->{$sid}->{lock})) { | |
cde11324 TL |
230 | return undef; |
231 | } | |
232 | ||
233 | if (defined($lock) && $conf->{$sid}->{lock} ne $lock) { | |
234 | warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n"; | |
235 | return undef; | |
236 | } | |
237 | ||
238 | my $removed_lock = delete $conf->{$sid}->{lock}; | |
239 | ||
240 | $self->write_service_config($conf); | |
241 | ||
242 | return $removed_lock; | |
243 | } | |
244 | ||
b70aa69e | 245 | sub queue_crm_commands_nolock { |
3b996922 DM |
246 | my ($self, $cmd) = @_; |
247 | ||
248 | chomp $cmd; | |
249 | ||
b70aa69e DM |
250 | my $data = ''; |
251 | my $filename = "$self->{statusdir}/crm_commands"; | |
252 | if (-f $filename) { | |
253 | $data = PVE::Tools::file_get_contents($filename); | |
254 | } | |
255 | $data .= "$cmd\n"; | |
256 | PVE::Tools::file_set_contents($filename, $data); | |
257 | ||
258 | return undef; | |
259 | } | |
260 | ||
261 | sub queue_crm_commands { | |
262 | my ($self, $cmd) = @_; | |
263 | ||
264 | my $code = sub { $self->queue_crm_commands_nolock($cmd); }; | |
289e4784 | 265 | |
3b996922 DM |
266 | $self->global_lock($code); |
267 | ||
268 | return undef; | |
269 | } | |
270 | ||
271 | sub read_crm_commands { | |
272 | my ($self) = @_; | |
273 | ||
274 | my $code = sub { | |
275 | my $data = ''; | |
276 | ||
277 | my $filename = "$self->{statusdir}/crm_commands"; | |
278 | if (-f $filename) { | |
279 | $data = PVE::Tools::file_get_contents($filename); | |
280 | } | |
281 | PVE::Tools::file_set_contents($filename, ''); | |
282 | ||
283 | return $data; | |
284 | }; | |
289e4784 | 285 | |
3b996922 DM |
286 | return $self->global_lock($code); |
287 | } | |
288 | ||
abc920b4 DM |
289 | sub read_group_config { |
290 | my ($self) = @_; | |
291 | ||
292 | my $filename = "$self->{statusdir}/groups"; | |
293 | my $raw = ''; | |
294 | $raw = PVE::Tools::file_get_contents($filename) if -f $filename; | |
295 | ||
7d33cb12 | 296 | return PVE::HA::Groups->parse_config($filename, $raw); |
abc920b4 DM |
297 | } |
298 | ||
c4a221bc | 299 | sub read_service_status { |
8456bde2 | 300 | my ($self, $node) = @_; |
c4a221bc | 301 | |
8456bde2 | 302 | my $filename = "$self->{statusdir}/service_status_$node"; |
289e4784 | 303 | return PVE::HA::Tools::read_json_from_file($filename); |
c4a221bc DM |
304 | } |
305 | ||
306 | sub write_service_status { | |
8456bde2 DM |
307 | my ($self, $node, $data) = @_; |
308 | ||
309 | my $filename = "$self->{statusdir}/service_status_$node"; | |
310 | my $res = PVE::HA::Tools::write_json_to_file($filename, $data); | |
311 | ||
312 | # fixme: add test if a service runs on two nodes!!! | |
c4a221bc | 313 | |
8456bde2 | 314 | return $res; |
289e4784 | 315 | } |
c4a221bc | 316 | |
abc920b4 DM |
317 | my $default_group_config = <<__EOD; |
318 | group: prefer_node1 | |
319 | nodes node1 | |
e941bdc5 | 320 | nofailback 1 |
abc920b4 DM |
321 | |
322 | group: prefer_node2 | |
323 | nodes node2 | |
e941bdc5 | 324 | nofailback 1 |
abc920b4 DM |
325 | |
326 | group: prefer_node3 | |
7a294ad4 | 327 | nodes node3 |
e941bdc5 | 328 | nofailback 1 |
abc920b4 DM |
329 | __EOD |
330 | ||
0cfd8f5b DM |
331 | sub new { |
332 | my ($this, $testdir) = @_; | |
333 | ||
334 | die "missing testdir" if !$testdir; | |
335 | ||
ba9e808e TL |
336 | die "testdir '$testdir' does not exist or is not a directory!\n" |
337 | if !-d $testdir; | |
338 | ||
0cfd8f5b DM |
339 | my $class = ref($this) || $this; |
340 | ||
341 | my $self = bless {}, $class; | |
342 | ||
787b66eb DM |
343 | my $statusdir = $self->{statusdir} = "$testdir/status"; |
344 | ||
345 | remove_tree($statusdir); | |
346 | mkdir $statusdir; | |
0cfd8f5b | 347 | |
787b66eb DM |
348 | # copy initial configuartion |
349 | copy("$testdir/manager_status", "$statusdir/manager_status"); # optional | |
79e0e005 | 350 | |
abc920b4 DM |
351 | if (-f "$testdir/groups") { |
352 | copy("$testdir/groups", "$statusdir/groups"); | |
353 | } else { | |
354 | PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config); | |
355 | } | |
356 | ||
79e0e005 DM |
357 | if (-f "$testdir/service_config") { |
358 | copy("$testdir/service_config", "$statusdir/service_config"); | |
359 | } else { | |
360 | my $conf = { | |
eda9314d DM |
361 | 'vm:101' => { node => 'node1', group => 'prefer_node1' }, |
362 | 'vm:102' => { node => 'node2', group => 'prefer_node2' }, | |
363 | 'vm:103' => { node => 'node3', group => 'prefer_node3' }, | |
364 | 'vm:104' => { node => 'node1', group => 'prefer_node1' }, | |
365 | 'vm:105' => { node => 'node2', group => 'prefer_node2' }, | |
366 | 'vm:106' => { node => 'node3', group => 'prefer_node3' }, | |
79e0e005 DM |
367 | }; |
368 | $self->write_service_config($conf); | |
369 | } | |
787b66eb | 370 | |
853f5867 DM |
371 | if (-f "$testdir/hardware_status") { |
372 | copy("$testdir/hardware_status", "$statusdir/hardware_status") || | |
373 | die "Copy failed: $!\n"; | |
374 | } else { | |
375 | my $cstatus = { | |
376 | node1 => { power => 'off', network => 'off' }, | |
377 | node2 => { power => 'off', network => 'off' }, | |
378 | node3 => { power => 'off', network => 'off' }, | |
379 | }; | |
380 | $self->write_hardware_status_nolock($cstatus); | |
381 | } | |
787b66eb | 382 | |
c982dfee TL |
383 | if (-f "$testdir/fence.cfg") { |
384 | copy("$testdir/fence.cfg", "$statusdir/fence.cfg"); | |
385 | } | |
0cfd8f5b | 386 | |
ed408b44 TL |
387 | if (-f "$testdir/datacenter.cfg") { |
388 | copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg"); | |
389 | } | |
390 | ||
8b3f9144 | 391 | my $cstatus = $self->read_hardware_status_nolock(); |
0cfd8f5b DM |
392 | |
393 | foreach my $node (sort keys %$cstatus) { | |
0bba8f60 | 394 | $self->{nodes}->{$node} = {}; |
8456bde2 DM |
395 | |
396 | if (-f "$testdir/service_status_$node") { | |
397 | copy("$testdir/service_status_$node", "$statusdir/service_status_$node"); | |
289e4784 | 398 | } else { |
8456bde2 DM |
399 | $self->write_service_status($node, {}); |
400 | } | |
0cfd8f5b DM |
401 | } |
402 | ||
95360669 DM |
403 | $self->{service_config} = $self->read_service_config(); |
404 | ||
0cfd8f5b DM |
405 | return $self; |
406 | } | |
407 | ||
408 | sub get_time { | |
409 | my ($self) = @_; | |
410 | ||
bf93e2a2 | 411 | die "implement in subclass"; |
0cfd8f5b DM |
412 | } |
413 | ||
414 | sub log { | |
fde8362a | 415 | my ($self, $level, $msg, $id) = @_; |
0cfd8f5b DM |
416 | |
417 | chomp $msg; | |
418 | ||
419 | my $time = $self->get_time(); | |
420 | ||
fde8362a DM |
421 | $id = 'hardware' if !$id; |
422 | ||
0bba8f60 | 423 | printf("%-5s %5d %12s: $msg\n", $level, $time, $id); |
0cfd8f5b DM |
424 | } |
425 | ||
426 | sub statusdir { | |
427 | my ($self, $node) = @_; | |
428 | ||
429 | return $self->{statusdir}; | |
430 | } | |
431 | ||
ed408b44 TL |
432 | sub read_datacenter_conf { |
433 | my ($self, $node) = @_; | |
434 | ||
435 | my $filename = "$self->{statusdir}/datacenter.cfg"; | |
436 | return PVE::HA::Tools::read_json_from_file($filename, {}); | |
437 | } | |
438 | ||
8b3f9144 | 439 | sub global_lock { |
0cfd8f5b DM |
440 | my ($self, $code, @param) = @_; |
441 | ||
8b3f9144 | 442 | my $lockfile = "$self->{statusdir}/hardware.lck"; |
0cfd8f5b DM |
443 | my $fh = IO::File->new(">>$lockfile") || |
444 | die "unable to open '$lockfile'\n"; | |
445 | ||
446 | my $success; | |
447 | for (;;) { | |
448 | $success = flock($fh, LOCK_EX); | |
449 | if ($success || ($! != EINTR)) { | |
450 | last; | |
451 | } | |
452 | if (!$success) { | |
9de9a6ce | 453 | close($fh); |
63f6a08c | 454 | die "can't acquire lock '$lockfile' - $!\n"; |
0cfd8f5b DM |
455 | } |
456 | } | |
9de9a6ce | 457 | |
0cfd8f5b DM |
458 | my $res; |
459 | ||
9de9a6ce | 460 | eval { $res = &$code($fh, @param) }; |
0cfd8f5b | 461 | my $err = $@; |
289e4784 | 462 | |
0cfd8f5b DM |
463 | close($fh); |
464 | ||
465 | die $err if $err; | |
289e4784 | 466 | |
0cfd8f5b DM |
467 | return $res; |
468 | } | |
469 | ||
8b3f9144 DM |
470 | my $compute_node_info = sub { |
471 | my ($self, $cstatus) = @_; | |
472 | ||
473 | my $node_info = {}; | |
474 | ||
475 | my $node_count = 0; | |
476 | my $online_count = 0; | |
477 | ||
478 | foreach my $node (keys %$cstatus) { | |
479 | my $d = $cstatus->{$node}; | |
480 | ||
481 | my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0; | |
482 | $node_info->{$node}->{online} = $online; | |
483 | ||
484 | $node_count++; | |
485 | $online_count++ if $online; | |
486 | } | |
487 | ||
488 | my $quorate = ($online_count > int($node_count/2)) ? 1 : 0; | |
289e4784 | 489 | |
8b3f9144 DM |
490 | if (!$quorate) { |
491 | foreach my $node (keys %$cstatus) { | |
492 | my $d = $cstatus->{$node}; | |
493 | $node_info->{$node}->{online} = 0; | |
494 | } | |
495 | } | |
496 | ||
497 | return ($node_info, $quorate); | |
498 | }; | |
499 | ||
500 | sub get_node_info { | |
501 | my ($self) = @_; | |
502 | ||
5516f102 TL |
503 | my $cstatus = $self->read_hardware_status_nolock(); |
504 | my ($node_info, $quorate) = &$compute_node_info($self, $cstatus); | |
8b3f9144 DM |
505 | |
506 | return ($node_info, $quorate); | |
507 | } | |
508 | ||
ba2a45cd TL |
509 | # helper for Sim/ only |
510 | sub get_cfs_state { | |
511 | my ($self, $node, $state) = @_; | |
512 | ||
513 | # TODO: ensure nolock is OK when adding this to RTSim | |
514 | my $cstatus = $self->read_hardware_status_nolock(); | |
515 | my $res = $cstatus->{$node}->{cfs}->{$state}; | |
516 | ||
517 | # we assume default true if not defined | |
518 | return !defined($res) || $res; | |
519 | } | |
520 | ||
8b3f9144 | 521 | # simulate hardware commands |
0cfd8f5b DM |
522 | # power <node> <on|off> |
523 | # network <node> <on|off> | |
ba2a45cd | 524 | # cfs <node> <rw|update> <work|fail> |
e08a0717 TL |
525 | # reboot <node> |
526 | # shutdown <node> | |
527 | # restart-lrm <node> | |
667670b2 | 528 | # service <sid> <started|disabled|stopped|ignored> |
e08a0717 TL |
529 | # service <sid> <migrate|relocate> <target> |
530 | # service <sid> lock/unlock [lockname] | |
0cfd8f5b | 531 | |
8b3f9144 | 532 | sub sim_hardware_cmd { |
fde8362a | 533 | my ($self, $cmdstr, $logid) = @_; |
0cfd8f5b | 534 | |
e08a0717 TL |
535 | my $code = sub { |
536 | my ($lock_fh) = @_; | |
537 | ||
538 | my $cstatus = $self->read_hardware_status_nolock(); | |
539 | ||
540 | my ($cmd, $objid, $action, $target) = split(/\s+/, $cmdstr); | |
541 | ||
542 | die "sim_hardware_cmd: no node or service for command specified" | |
543 | if !$objid; | |
544 | ||
545 | my ($node, $sid, $d); | |
546 | ||
547 | if ($cmd eq 'service') { | |
548 | $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid); | |
549 | } else { | |
550 | $node = $objid; | |
551 | $d = $self->{nodes}->{$node} || | |
552 | die "sim_hardware_cmd: no such node '$node'\n"; | |
553 | } | |
554 | ||
555 | $self->log('info', "execute $cmdstr", $logid); | |
556 | ||
557 | if ($cmd eq 'power') { | |
558 | die "sim_hardware_cmd: unknown action '$action'\n" | |
559 | if $action !~ m/^(on|off)$/; | |
560 | ||
561 | if ($cstatus->{$node}->{power} ne $action) { | |
562 | if ($action eq 'on') { | |
563 | ||
564 | $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm}); | |
565 | $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm}); | |
566 | $d->{lrm_restart} = undef; | |
ba2a45cd | 567 | $cstatus->{$node}->{cfs} = {}; |
e08a0717 TL |
568 | |
569 | } else { | |
570 | ||
571 | if ($d->{crm}) { | |
572 | $d->{crm_env}->log('info', "killed by poweroff"); | |
573 | $self->crm_control('stop', $d, $lock_fh); | |
574 | $d->{crm} = undef; | |
575 | } | |
576 | if ($d->{lrm}) { | |
577 | $d->{lrm_env}->log('info', "killed by poweroff"); | |
578 | $self->lrm_control('stop', $d, $lock_fh); | |
579 | $d->{lrm} = undef; | |
580 | $d->{lrm_restart} = undef; | |
581 | } | |
582 | ||
583 | $self->watchdog_reset_nolock($node); | |
584 | $self->write_service_status($node, {}); | |
585 | } | |
586 | } | |
587 | ||
588 | $cstatus->{$node}->{power} = $action; | |
589 | $cstatus->{$node}->{network} = $action; | |
590 | $cstatus->{$node}->{shutdown} = undef; | |
591 | ||
592 | $self->write_hardware_status_nolock($cstatus); | |
593 | ||
594 | } elsif ($cmd eq 'network') { | |
595 | die "sim_hardware_cmd: unknown network action '$action'" | |
596 | if $action !~ m/^(on|off)$/; | |
597 | $cstatus->{$node}->{network} = $action; | |
598 | ||
599 | $self->write_hardware_status_nolock($cstatus); | |
600 | ||
ba2a45cd TL |
601 | } elsif ($cmd eq 'cfs') { |
602 | die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'" | |
603 | if $action !~ m/^(rw|update)$/; | |
604 | die "sim_hardware_cmd: unknown cfs command '$target' for '$action' on node '$node'" | |
605 | if $target !~ m/^(work|fail)$/; | |
606 | ||
607 | $cstatus->{$node}->{cfs}->{$action} = $target eq 'work'; | |
608 | $self->write_hardware_status_nolock($cstatus); | |
609 | ||
e08a0717 TL |
610 | } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') { |
611 | $cstatus->{$node}->{shutdown} = $cmd; | |
612 | ||
613 | $self->write_hardware_status_nolock($cstatus); | |
614 | ||
615 | $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm}); | |
616 | } elsif ($cmd eq 'restart-lrm') { | |
617 | if ($d->{lrm}) { | |
618 | $d->{lrm_restart} = 1; | |
619 | $self->lrm_control('shutdown', $d, $lock_fh); | |
620 | } | |
621 | } elsif ($cmd eq 'crm') { | |
622 | ||
623 | if ($action eq 'stop') { | |
624 | if ($d->{crm}) { | |
625 | $d->{crm_stop} = 1; | |
626 | $self->crm_control('shutdown', $d, $lock_fh); | |
627 | } | |
628 | } elsif ($action eq 'start') { | |
629 | $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm}); | |
630 | } else { | |
631 | die "sim_hardware_cmd: unknown action '$action'"; | |
632 | } | |
633 | ||
634 | } elsif ($cmd eq 'service') { | |
667670b2 TL |
635 | if ($action eq 'started' || $action eq 'disabled' || |
636 | $action eq 'stopped' || $action eq 'ignored') { | |
e08a0717 TL |
637 | |
638 | $self->set_service_state($sid, $action); | |
639 | ||
640 | } elsif ($action eq 'migrate' || $action eq 'relocate') { | |
641 | ||
642 | die "sim_hardware_cmd: missing target node for '$action' command" | |
643 | if !$target; | |
644 | ||
645 | $self->queue_crm_commands_nolock("$action $sid $target"); | |
646 | ||
647 | } elsif ($action eq 'add') { | |
648 | ||
649 | $self->add_service($sid, {state => 'started', node => $target}); | |
650 | ||
651 | } elsif ($action eq 'delete') { | |
652 | ||
653 | $self->delete_service($sid); | |
654 | ||
655 | } elsif ($action eq 'lock') { | |
656 | ||
657 | $self->lock_service($sid, $target); | |
658 | ||
659 | } elsif ($action eq 'unlock') { | |
660 | ||
661 | $self->unlock_service($sid, $target); | |
662 | ||
663 | } else { | |
664 | die "sim_hardware_cmd: unknown service action '$action' " . | |
665 | "- not implemented\n" | |
666 | } | |
667 | } else { | |
668 | die "sim_hardware_cmd: unknown command '$cmdstr'\n"; | |
669 | } | |
670 | ||
671 | return $cstatus; | |
672 | }; | |
673 | ||
674 | return $self->global_lock($code); | |
675 | } | |
676 | ||
677 | # for controlling the resource manager services | |
678 | sub crm_control { | |
679 | my ($self, $action, $data, $lock_fh) = @_; | |
680 | ||
681 | die "implement in subclass"; | |
682 | } | |
683 | ||
684 | sub lrm_control { | |
685 | my ($self, $action, $data, $lock_fh) = @_; | |
686 | ||
bf93e2a2 | 687 | die "implement in subclass"; |
0cfd8f5b DM |
688 | } |
689 | ||
690 | sub run { | |
691 | my ($self) = @_; | |
692 | ||
bf93e2a2 | 693 | die "implement in subclass"; |
0cfd8f5b | 694 | } |
9329c1e2 DM |
695 | |
696 | my $modify_watchog = sub { | |
697 | my ($self, $code) = @_; | |
698 | ||
699 | my $update_cmd = sub { | |
700 | ||
701 | my $filename = "$self->{statusdir}/watchdog_status"; | |
289e4784 | 702 | |
9329c1e2 DM |
703 | my ($res, $wdstatus); |
704 | ||
705 | if (-f $filename) { | |
706 | my $raw = PVE::Tools::file_get_contents($filename); | |
707 | $wdstatus = decode_json($raw); | |
708 | } else { | |
709 | $wdstatus = {}; | |
710 | } | |
289e4784 | 711 | |
9329c1e2 DM |
712 | ($wdstatus, $res) = &$code($wdstatus); |
713 | ||
714 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
715 | ||
716 | return $res; | |
717 | }; | |
718 | ||
719 | return $self->global_lock($update_cmd); | |
720 | }; | |
721 | ||
0590c6a7 DM |
722 | sub watchdog_reset_nolock { |
723 | my ($self, $node) = @_; | |
724 | ||
725 | my $filename = "$self->{statusdir}/watchdog_status"; | |
726 | ||
727 | if (-f $filename) { | |
728 | my $raw = PVE::Tools::file_get_contents($filename); | |
729 | my $wdstatus = decode_json($raw); | |
730 | ||
731 | foreach my $id (keys %$wdstatus) { | |
732 | delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node; | |
733 | } | |
289e4784 | 734 | |
0590c6a7 DM |
735 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); |
736 | } | |
737 | } | |
738 | ||
9329c1e2 DM |
739 | sub watchdog_check { |
740 | my ($self, $node) = @_; | |
741 | ||
742 | my $code = sub { | |
743 | my ($wdstatus) = @_; | |
744 | ||
745 | my $res = 1; | |
746 | ||
747 | foreach my $wfh (keys %$wdstatus) { | |
748 | my $wd = $wdstatus->{$wfh}; | |
749 | next if $wd->{node} ne $node; | |
750 | ||
751 | my $ctime = $self->get_time(); | |
752 | my $tdiff = $ctime - $wd->{update_time}; | |
753 | ||
0bba8f60 | 754 | if ($tdiff > $watchdog_timeout) { # expired |
9329c1e2 DM |
755 | $res = 0; |
756 | delete $wdstatus->{$wfh}; | |
757 | } | |
758 | } | |
289e4784 | 759 | |
9329c1e2 DM |
760 | return ($wdstatus, $res); |
761 | }; | |
762 | ||
763 | return &$modify_watchog($self, $code); | |
764 | } | |
765 | ||
766 | my $wdcounter = 0; | |
767 | ||
768 | sub watchdog_open { | |
769 | my ($self, $node) = @_; | |
770 | ||
771 | my $code = sub { | |
772 | my ($wdstatus) = @_; | |
773 | ||
774 | ++$wdcounter; | |
775 | ||
776 | my $id = "WD:$node:$$:$wdcounter"; | |
777 | ||
778 | die "internal error" if defined($wdstatus->{$id}); | |
779 | ||
780 | $wdstatus->{$id} = { | |
781 | node => $node, | |
782 | update_time => $self->get_time(), | |
783 | }; | |
784 | ||
785 | return ($wdstatus, $id); | |
786 | }; | |
787 | ||
788 | return &$modify_watchog($self, $code); | |
789 | } | |
790 | ||
791 | sub watchdog_close { | |
792 | my ($self, $wfh) = @_; | |
793 | ||
794 | my $code = sub { | |
795 | my ($wdstatus) = @_; | |
796 | ||
797 | my $wd = $wdstatus->{$wfh}; | |
798 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
799 | ||
800 | my $tdiff = $self->get_time() - $wd->{update_time}; | |
0bba8f60 | 801 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
802 | |
803 | delete $wdstatus->{$wfh}; | |
804 | ||
805 | return ($wdstatus); | |
806 | }; | |
807 | ||
808 | return &$modify_watchog($self, $code); | |
809 | } | |
810 | ||
811 | sub watchdog_update { | |
812 | my ($self, $wfh) = @_; | |
813 | ||
814 | my $code = sub { | |
815 | my ($wdstatus) = @_; | |
816 | ||
817 | my $wd = $wdstatus->{$wfh}; | |
818 | ||
819 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
820 | ||
821 | my $ctime = $self->get_time(); | |
822 | my $tdiff = $ctime - $wd->{update_time}; | |
823 | ||
0bba8f60 | 824 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
289e4784 | 825 | |
9329c1e2 DM |
826 | $wd->{update_time} = $ctime; |
827 | ||
828 | return ($wdstatus); | |
829 | }; | |
830 | ||
831 | return &$modify_watchog($self, $code); | |
832 | } | |
833 | ||
0cfd8f5b | 834 | 1; |