]>
Commit | Line | Data |
---|---|---|
8b3f9144 DM |
1 | package PVE::HA::Sim::Hardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
0cfd8f5b DM |
8 | |
9 | use strict; | |
10 | use warnings; | |
a0a7d11e | 11 | |
0cfd8f5b | 12 | use Fcntl qw(:DEFAULT :flock); |
787b66eb DM |
13 | use File::Copy; |
14 | use File::Path qw(make_path remove_tree); | |
a0a7d11e TL |
15 | use IO::File; |
16 | use JSON; | |
17 | use POSIX qw(strftime EINTR); | |
18 | ||
c982dfee | 19 | use PVE::HA::FenceConfig; |
7d33cb12 | 20 | use PVE::HA::Groups; |
f5a14b93 | 21 | |
17b5cf98 | 22 | my $watchdog_timeout = 60; |
0bba8f60 | 23 | |
787b66eb DM |
24 | # Status directory layout |
25 | # | |
26 | # configuration | |
27 | # | |
8456bde2 DM |
28 | # $testdir/cmdlist Command list for simulation |
29 | # $testdir/hardware_status Hardware description (number of nodes, ...) | |
30 | # $testdir/manager_status CRM status (start with {}) | |
31 | # $testdir/service_config Service configuration | |
eea0c609 | 32 | # $testdir/static_service_stats Static service usage information (cpu, memory) |
abc920b4 | 33 | # $testdir/groups HA groups configuration |
8456bde2 | 34 | # $testdir/service_status_<node> Service status |
ed408b44 | 35 | # $testdir/datacenter.cfg Datacenter wide HA configuration |
3c36cbca | 36 | |
9329c1e2 DM |
37 | # |
38 | # runtime status for simulation system | |
39 | # | |
40 | # $testdir/status/cluster_locks Cluster locks | |
41 | # $testdir/status/hardware_status Hardware status (power/network on/off) | |
eea0c609 | 42 | # $testdir/status/static_service_stats Static service usage information (cpu, memory) |
9329c1e2 | 43 | # $testdir/status/watchdog_status Watchdog status |
787b66eb DM |
44 | # |
45 | # runtime status | |
9329c1e2 | 46 | # |
8456bde2 DM |
47 | # $testdir/status/lrm_status_<node> LRM status |
48 | # $testdir/status/manager_status CRM status | |
abc920b4 | 49 | # $testdir/status/crm_commands CRM command queue |
8456bde2 DM |
50 | # $testdir/status/service_config Service configuration |
51 | # $testdir/status/service_status_<node> Service status | |
abc920b4 | 52 | # $testdir/status/groups HA groups configuration |
c4a221bc DM |
53 | |
54 | sub read_lrm_status { | |
55 | my ($self, $node) = @_; | |
56 | ||
57 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
58 | ||
289e4784 | 59 | return PVE::HA::Tools::read_json_from_file($filename, {}); |
c4a221bc DM |
60 | } |
61 | ||
62 | sub write_lrm_status { | |
63 | my ($self, $node, $status_obj) = @_; | |
64 | ||
65 | my $filename = "$self->{statusdir}/lrm_status_$node"; | |
66 | ||
289e4784 | 67 | PVE::HA::Tools::write_json_to_file($filename, $status_obj); |
c4a221bc | 68 | } |
787b66eb | 69 | |
8b3f9144 | 70 | sub read_hardware_status_nolock { |
0cfd8f5b DM |
71 | my ($self) = @_; |
72 | ||
8b3f9144 | 73 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
74 | |
75 | my $raw = PVE::Tools::file_get_contents($filename); | |
76 | my $cstatus = decode_json($raw); | |
77 | ||
78 | return $cstatus; | |
79 | } | |
80 | ||
8b3f9144 | 81 | sub write_hardware_status_nolock { |
0cfd8f5b DM |
82 | my ($self, $cstatus) = @_; |
83 | ||
8b3f9144 | 84 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
85 | |
86 | PVE::Tools::file_set_contents($filename, encode_json($cstatus)); | |
87 | }; | |
88 | ||
95360669 DM |
89 | sub read_service_config { |
90 | my ($self) = @_; | |
91 | ||
92 | my $filename = "$self->{statusdir}/service_config"; | |
289e4784 | 93 | my $conf = PVE::HA::Tools::read_json_from_file($filename); |
95360669 DM |
94 | |
95 | foreach my $sid (keys %$conf) { | |
96 | my $d = $conf->{$sid}; | |
8456bde2 DM |
97 | |
98 | die "service '$sid' without assigned node!" if !$d->{node}; | |
99 | ||
ec368d74 | 100 | if ($sid =~ m/^(vm|ct|fa):(\d+)$/) { |
b026c8c9 DM |
101 | $d->{type} = $1; |
102 | $d->{name} = $2; | |
95360669 DM |
103 | } else { |
104 | die "implement me"; | |
105 | } | |
106 | $d->{state} = 'disabled' if !$d->{state}; | |
bb07bd2c | 107 | $d->{state} = 'started' if $d->{state} eq 'enabled'; # backward compatibility |
ec368d74 TL |
108 | $d->{max_restart} = 1 if !defined($d->{max_restart}); |
109 | $d->{max_relocate} = 1 if !defined($d->{max_relocate}); | |
95360669 DM |
110 | } |
111 | ||
112 | return $conf; | |
113 | } | |
114 | ||
76b83c72 FE |
115 | sub update_service_config { |
116 | my ($self, $sid, $param) = @_; | |
117 | ||
118 | my $conf = $self->read_service_config(); | |
119 | ||
120 | my $sconf = $conf->{$sid} || die "no such resource '$sid'\n"; | |
121 | ||
122 | foreach my $k (%$param) { | |
123 | $sconf->{$k} = $param->{$k}; | |
124 | } | |
125 | ||
126 | $self->write_service_config($conf); | |
127 | } | |
128 | ||
79e0e005 DM |
129 | sub write_service_config { |
130 | my ($self, $conf) = @_; | |
131 | ||
95360669 DM |
132 | $self->{service_config} = $conf; |
133 | ||
79e0e005 DM |
134 | my $filename = "$self->{statusdir}/service_config"; |
135 | return PVE::HA::Tools::write_json_to_file($filename, $conf); | |
c982dfee TL |
136 | } |
137 | ||
138 | sub read_fence_config { | |
139 | my ($self) = @_; | |
140 | ||
141 | my $raw = undef; | |
142 | ||
143 | my $filename = "$self->{statusdir}/fence.cfg"; | |
144 | if (-e $filename) { | |
145 | $raw = PVE::Tools::file_get_contents($filename); | |
146 | } | |
147 | ||
148 | return PVE::HA::FenceConfig::parse_config($filename, $raw); | |
149 | } | |
150 | ||
151 | sub exec_fence_agent { | |
152 | my ($self, $agent, $node, @param) = @_; | |
153 | ||
154 | # let all agent succeed and behave the same for now | |
155 | $self->sim_hardware_cmd("power $node off", $agent); | |
156 | ||
157 | return 0; # EXIT_SUCCESS | |
158 | } | |
79e0e005 | 159 | |
e5f43426 TL |
160 | sub set_service_state { |
161 | my ($self, $sid, $state) = @_; | |
162 | ||
163 | my $conf = $self->read_service_config(); | |
164 | die "no such service '$sid'" if !$conf->{$sid}; | |
165 | ||
166 | $conf->{$sid}->{state} = $state; | |
167 | ||
168 | $self->write_service_config($conf); | |
169 | ||
170 | return $conf; | |
171 | } | |
172 | ||
27ccc95c | 173 | sub add_service { |
81e8e7d0 | 174 | my ($self, $sid, $opts, $running) = @_; |
27ccc95c TL |
175 | |
176 | my $conf = $self->read_service_config(); | |
177 | die "resource ID '$sid' already defined\n" if $conf->{$sid}; | |
178 | ||
179 | $conf->{$sid} = $opts; | |
1323ef6e | 180 | $conf->{$sid}->@{qw(type name)} = split(/:/, $sid); |
27ccc95c TL |
181 | |
182 | $self->write_service_config($conf); | |
183 | ||
81e8e7d0 FE |
184 | my $ss = $self->read_service_status($opts->{node}); |
185 | $ss->{$sid} = $running; | |
186 | $self->write_service_status($opts->{node}, $ss); | |
187 | ||
27ccc95c TL |
188 | return $conf; |
189 | } | |
190 | ||
191 | sub delete_service { | |
192 | my ($self, $sid) = @_; | |
193 | ||
194 | my $conf = $self->read_service_config(); | |
195 | ||
196 | die "no such service '$sid'" if !$conf->{$sid}; | |
197 | ||
198 | delete $conf->{$sid}; | |
199 | ||
200 | $self->write_service_config($conf); | |
201 | ||
202 | return $conf; | |
203 | } | |
204 | ||
8456bde2 | 205 | sub change_service_location { |
6da27e23 | 206 | my ($self, $sid, $current_node, $new_node) = @_; |
8456bde2 DM |
207 | |
208 | my $conf = $self->read_service_config(); | |
209 | ||
210 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
211 | ||
289e4784 | 212 | die "current_node for '$sid' does not match ($current_node != $conf->{$sid}->{node})\n" |
6da27e23 | 213 | if $current_node ne $conf->{$sid}->{node}; |
289e4784 | 214 | |
6da27e23 | 215 | $conf->{$sid}->{node} = $new_node; |
8456bde2 DM |
216 | |
217 | $self->write_service_config($conf); | |
218 | } | |
219 | ||
cde11324 TL |
220 | sub service_has_lock { |
221 | my ($self, $sid) = @_; | |
222 | ||
223 | my $conf = $self->read_service_config(); | |
224 | ||
225 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
226 | ||
227 | return $conf->{$sid}->{lock}; | |
228 | } | |
229 | ||
230 | sub lock_service { | |
231 | my ($self, $sid, $lock) = @_; | |
232 | ||
233 | my $conf = $self->read_service_config(); | |
234 | ||
235 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
236 | ||
237 | $conf->{$sid}->{lock} = $lock || 'backup'; | |
238 | ||
239 | $self->write_service_config($conf); | |
240 | ||
241 | return $conf; | |
242 | } | |
243 | ||
244 | sub unlock_service { | |
245 | my ($self, $sid, $lock) = @_; | |
246 | ||
247 | my $conf = $self->read_service_config(); | |
248 | ||
249 | die "no such service '$sid'\n" if !$conf->{$sid}; | |
250 | ||
251 | if (!defined($conf->{$sid}->{lock})) { | |
cde11324 TL |
252 | return undef; |
253 | } | |
254 | ||
255 | if (defined($lock) && $conf->{$sid}->{lock} ne $lock) { | |
256 | warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n"; | |
257 | return undef; | |
258 | } | |
259 | ||
260 | my $removed_lock = delete $conf->{$sid}->{lock}; | |
261 | ||
262 | $self->write_service_config($conf); | |
263 | ||
264 | return $removed_lock; | |
265 | } | |
266 | ||
b70aa69e | 267 | sub queue_crm_commands_nolock { |
3b996922 DM |
268 | my ($self, $cmd) = @_; |
269 | ||
270 | chomp $cmd; | |
271 | ||
b70aa69e DM |
272 | my $data = ''; |
273 | my $filename = "$self->{statusdir}/crm_commands"; | |
274 | if (-f $filename) { | |
275 | $data = PVE::Tools::file_get_contents($filename); | |
276 | } | |
277 | $data .= "$cmd\n"; | |
278 | PVE::Tools::file_set_contents($filename, $data); | |
279 | ||
280 | return undef; | |
281 | } | |
282 | ||
283 | sub queue_crm_commands { | |
284 | my ($self, $cmd) = @_; | |
285 | ||
286 | my $code = sub { $self->queue_crm_commands_nolock($cmd); }; | |
289e4784 | 287 | |
3b996922 DM |
288 | $self->global_lock($code); |
289 | ||
290 | return undef; | |
291 | } | |
292 | ||
293 | sub read_crm_commands { | |
294 | my ($self) = @_; | |
295 | ||
296 | my $code = sub { | |
297 | my $data = ''; | |
298 | ||
299 | my $filename = "$self->{statusdir}/crm_commands"; | |
300 | if (-f $filename) { | |
301 | $data = PVE::Tools::file_get_contents($filename); | |
302 | } | |
303 | PVE::Tools::file_set_contents($filename, ''); | |
304 | ||
305 | return $data; | |
306 | }; | |
289e4784 | 307 | |
3b996922 DM |
308 | return $self->global_lock($code); |
309 | } | |
310 | ||
abc920b4 DM |
311 | sub read_group_config { |
312 | my ($self) = @_; | |
313 | ||
314 | my $filename = "$self->{statusdir}/groups"; | |
315 | my $raw = ''; | |
316 | $raw = PVE::Tools::file_get_contents($filename) if -f $filename; | |
317 | ||
7d33cb12 | 318 | return PVE::HA::Groups->parse_config($filename, $raw); |
abc920b4 DM |
319 | } |
320 | ||
c4a221bc | 321 | sub read_service_status { |
8456bde2 | 322 | my ($self, $node) = @_; |
c4a221bc | 323 | |
8456bde2 | 324 | my $filename = "$self->{statusdir}/service_status_$node"; |
289e4784 | 325 | return PVE::HA::Tools::read_json_from_file($filename); |
c4a221bc DM |
326 | } |
327 | ||
328 | sub write_service_status { | |
8456bde2 DM |
329 | my ($self, $node, $data) = @_; |
330 | ||
331 | my $filename = "$self->{statusdir}/service_status_$node"; | |
332 | my $res = PVE::HA::Tools::write_json_to_file($filename, $data); | |
333 | ||
334 | # fixme: add test if a service runs on two nodes!!! | |
c4a221bc | 335 | |
8456bde2 | 336 | return $res; |
289e4784 | 337 | } |
c4a221bc | 338 | |
eea0c609 FE |
339 | sub read_static_service_stats { |
340 | my ($self) = @_; | |
341 | ||
342 | my $filename = "$self->{statusdir}/static_service_stats"; | |
49b0ccc7 TL |
343 | my $stats = eval { PVE::HA::Tools::read_json_from_file($filename) }; |
344 | $self->log('error', "loading static service stats failed - $@") if $@; | |
eea0c609 FE |
345 | |
346 | return $stats; | |
347 | } | |
348 | ||
abc920b4 DM |
349 | my $default_group_config = <<__EOD; |
350 | group: prefer_node1 | |
351 | nodes node1 | |
e941bdc5 | 352 | nofailback 1 |
abc920b4 DM |
353 | |
354 | group: prefer_node2 | |
355 | nodes node2 | |
e941bdc5 | 356 | nofailback 1 |
abc920b4 DM |
357 | |
358 | group: prefer_node3 | |
7a294ad4 | 359 | nodes node3 |
e941bdc5 | 360 | nofailback 1 |
abc920b4 DM |
361 | __EOD |
362 | ||
0cfd8f5b DM |
363 | sub new { |
364 | my ($this, $testdir) = @_; | |
365 | ||
366 | die "missing testdir" if !$testdir; | |
367 | ||
ba9e808e TL |
368 | die "testdir '$testdir' does not exist or is not a directory!\n" |
369 | if !-d $testdir; | |
370 | ||
0cfd8f5b DM |
371 | my $class = ref($this) || $this; |
372 | ||
373 | my $self = bless {}, $class; | |
374 | ||
787b66eb DM |
375 | my $statusdir = $self->{statusdir} = "$testdir/status"; |
376 | ||
377 | remove_tree($statusdir); | |
378 | mkdir $statusdir; | |
0cfd8f5b | 379 | |
787b66eb DM |
380 | # copy initial configuartion |
381 | copy("$testdir/manager_status", "$statusdir/manager_status"); # optional | |
79e0e005 | 382 | |
abc920b4 DM |
383 | if (-f "$testdir/groups") { |
384 | copy("$testdir/groups", "$statusdir/groups"); | |
385 | } else { | |
386 | PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config); | |
387 | } | |
388 | ||
79e0e005 DM |
389 | if (-f "$testdir/service_config") { |
390 | copy("$testdir/service_config", "$statusdir/service_config"); | |
391 | } else { | |
392 | my $conf = { | |
eda9314d DM |
393 | 'vm:101' => { node => 'node1', group => 'prefer_node1' }, |
394 | 'vm:102' => { node => 'node2', group => 'prefer_node2' }, | |
395 | 'vm:103' => { node => 'node3', group => 'prefer_node3' }, | |
396 | 'vm:104' => { node => 'node1', group => 'prefer_node1' }, | |
397 | 'vm:105' => { node => 'node2', group => 'prefer_node2' }, | |
398 | 'vm:106' => { node => 'node3', group => 'prefer_node3' }, | |
79e0e005 DM |
399 | }; |
400 | $self->write_service_config($conf); | |
401 | } | |
787b66eb | 402 | |
853f5867 DM |
403 | if (-f "$testdir/hardware_status") { |
404 | copy("$testdir/hardware_status", "$statusdir/hardware_status") || | |
405 | die "Copy failed: $!\n"; | |
406 | } else { | |
407 | my $cstatus = { | |
408 | node1 => { power => 'off', network => 'off' }, | |
409 | node2 => { power => 'off', network => 'off' }, | |
410 | node3 => { power => 'off', network => 'off' }, | |
411 | }; | |
412 | $self->write_hardware_status_nolock($cstatus); | |
413 | } | |
787b66eb | 414 | |
c982dfee TL |
415 | if (-f "$testdir/fence.cfg") { |
416 | copy("$testdir/fence.cfg", "$statusdir/fence.cfg"); | |
417 | } | |
0cfd8f5b | 418 | |
ed408b44 TL |
419 | if (-f "$testdir/datacenter.cfg") { |
420 | copy("$testdir/datacenter.cfg", "$statusdir/datacenter.cfg"); | |
421 | } | |
422 | ||
eea0c609 FE |
423 | if (-f "$testdir/static_service_stats") { |
424 | copy("$testdir/static_service_stats", "$statusdir/static_service_stats"); | |
425 | } | |
426 | ||
8b3f9144 | 427 | my $cstatus = $self->read_hardware_status_nolock(); |
0cfd8f5b DM |
428 | |
429 | foreach my $node (sort keys %$cstatus) { | |
0bba8f60 | 430 | $self->{nodes}->{$node} = {}; |
8456bde2 DM |
431 | |
432 | if (-f "$testdir/service_status_$node") { | |
433 | copy("$testdir/service_status_$node", "$statusdir/service_status_$node"); | |
289e4784 | 434 | } else { |
8456bde2 DM |
435 | $self->write_service_status($node, {}); |
436 | } | |
0cfd8f5b DM |
437 | } |
438 | ||
95360669 DM |
439 | $self->{service_config} = $self->read_service_config(); |
440 | ||
0cfd8f5b DM |
441 | return $self; |
442 | } | |
443 | ||
444 | sub get_time { | |
445 | my ($self) = @_; | |
446 | ||
bf93e2a2 | 447 | die "implement in subclass"; |
0cfd8f5b DM |
448 | } |
449 | ||
450 | sub log { | |
fde8362a | 451 | my ($self, $level, $msg, $id) = @_; |
0cfd8f5b DM |
452 | |
453 | chomp $msg; | |
454 | ||
455 | my $time = $self->get_time(); | |
456 | ||
fde8362a DM |
457 | $id = 'hardware' if !$id; |
458 | ||
0bba8f60 | 459 | printf("%-5s %5d %12s: $msg\n", $level, $time, $id); |
0cfd8f5b DM |
460 | } |
461 | ||
462 | sub statusdir { | |
463 | my ($self, $node) = @_; | |
464 | ||
465 | return $self->{statusdir}; | |
466 | } | |
467 | ||
ed408b44 TL |
468 | sub read_datacenter_conf { |
469 | my ($self, $node) = @_; | |
470 | ||
471 | my $filename = "$self->{statusdir}/datacenter.cfg"; | |
472 | return PVE::HA::Tools::read_json_from_file($filename, {}); | |
473 | } | |
474 | ||
8b3f9144 | 475 | sub global_lock { |
0cfd8f5b DM |
476 | my ($self, $code, @param) = @_; |
477 | ||
8b3f9144 | 478 | my $lockfile = "$self->{statusdir}/hardware.lck"; |
0cfd8f5b DM |
479 | my $fh = IO::File->new(">>$lockfile") || |
480 | die "unable to open '$lockfile'\n"; | |
481 | ||
482 | my $success; | |
483 | for (;;) { | |
484 | $success = flock($fh, LOCK_EX); | |
485 | if ($success || ($! != EINTR)) { | |
486 | last; | |
487 | } | |
488 | if (!$success) { | |
9de9a6ce | 489 | close($fh); |
63f6a08c | 490 | die "can't acquire lock '$lockfile' - $!\n"; |
0cfd8f5b DM |
491 | } |
492 | } | |
9de9a6ce | 493 | |
0cfd8f5b DM |
494 | my $res; |
495 | ||
9de9a6ce | 496 | eval { $res = &$code($fh, @param) }; |
0cfd8f5b | 497 | my $err = $@; |
289e4784 | 498 | |
0cfd8f5b DM |
499 | close($fh); |
500 | ||
501 | die $err if $err; | |
289e4784 | 502 | |
0cfd8f5b DM |
503 | return $res; |
504 | } | |
505 | ||
8b3f9144 DM |
506 | my $compute_node_info = sub { |
507 | my ($self, $cstatus) = @_; | |
508 | ||
509 | my $node_info = {}; | |
510 | ||
511 | my $node_count = 0; | |
512 | my $online_count = 0; | |
513 | ||
514 | foreach my $node (keys %$cstatus) { | |
515 | my $d = $cstatus->{$node}; | |
516 | ||
517 | my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0; | |
518 | $node_info->{$node}->{online} = $online; | |
519 | ||
520 | $node_count++; | |
521 | $online_count++ if $online; | |
522 | } | |
523 | ||
524 | my $quorate = ($online_count > int($node_count/2)) ? 1 : 0; | |
289e4784 | 525 | |
8b3f9144 DM |
526 | if (!$quorate) { |
527 | foreach my $node (keys %$cstatus) { | |
528 | my $d = $cstatus->{$node}; | |
529 | $node_info->{$node}->{online} = 0; | |
530 | } | |
531 | } | |
532 | ||
533 | return ($node_info, $quorate); | |
534 | }; | |
535 | ||
536 | sub get_node_info { | |
537 | my ($self) = @_; | |
538 | ||
5516f102 TL |
539 | my $cstatus = $self->read_hardware_status_nolock(); |
540 | my ($node_info, $quorate) = &$compute_node_info($self, $cstatus); | |
8b3f9144 DM |
541 | |
542 | return ($node_info, $quorate); | |
543 | } | |
544 | ||
ba2a45cd TL |
545 | # helper for Sim/ only |
546 | sub get_cfs_state { | |
547 | my ($self, $node, $state) = @_; | |
548 | ||
549 | # TODO: ensure nolock is OK when adding this to RTSim | |
550 | my $cstatus = $self->read_hardware_status_nolock(); | |
551 | my $res = $cstatus->{$node}->{cfs}->{$state}; | |
552 | ||
553 | # we assume default true if not defined | |
554 | return !defined($res) || $res; | |
555 | } | |
556 | ||
a5d48ae1 TL |
557 | # simulate hardware commands, the following commands are available: |
558 | # power <node> <on|off> | |
559 | # network <node> <on|off> | |
560 | # delay <seconds> | |
1b21e7e6 | 561 | # skip-round <crm|lrm> [<rounds=1>] |
a5d48ae1 TL |
562 | # cfs <node> <rw|update> <work|fail> |
563 | # reboot <node> | |
564 | # shutdown <node> | |
565 | # restart-lrm <node> | |
566 | # service <sid> <started|disabled|stopped|ignored> | |
567 | # service <sid> <migrate|relocate> <target> | |
568 | # service <sid> stop <timeout> | |
569 | # service <sid> lock/unlock [lockname] | |
81e8e7d0 | 570 | # service <sid> add <node> [<request-state=started>] [<running=0>] |
b8d86ec4 | 571 | # service <sid> delete |
8b3f9144 | 572 | sub sim_hardware_cmd { |
fde8362a | 573 | my ($self, $cmdstr, $logid) = @_; |
0cfd8f5b | 574 | |
e08a0717 TL |
575 | my $code = sub { |
576 | my ($lock_fh) = @_; | |
577 | ||
578 | my $cstatus = $self->read_hardware_status_nolock(); | |
579 | ||
0e13a6c1 TL |
580 | my ($cmd, $objid, $action, @params) = split(/\s+/, $cmdstr); |
581 | my $param = $params[0]; # for convenience/legacy | |
e08a0717 TL |
582 | |
583 | die "sim_hardware_cmd: no node or service for command specified" | |
584 | if !$objid; | |
585 | ||
586 | my ($node, $sid, $d); | |
587 | ||
588 | if ($cmd eq 'service') { | |
589 | $sid = PVE::HA::Tools::pve_verify_ha_resource_id($objid); | |
590 | } else { | |
591 | $node = $objid; | |
592 | $d = $self->{nodes}->{$node} || | |
593 | die "sim_hardware_cmd: no such node '$node'\n"; | |
594 | } | |
595 | ||
596 | $self->log('info', "execute $cmdstr", $logid); | |
597 | ||
598 | if ($cmd eq 'power') { | |
599 | die "sim_hardware_cmd: unknown action '$action'\n" | |
600 | if $action !~ m/^(on|off)$/; | |
601 | ||
602 | if ($cstatus->{$node}->{power} ne $action) { | |
603 | if ($action eq 'on') { | |
604 | ||
605 | $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm}); | |
606 | $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm}); | |
607 | $d->{lrm_restart} = undef; | |
ba2a45cd | 608 | $cstatus->{$node}->{cfs} = {}; |
e08a0717 TL |
609 | |
610 | } else { | |
611 | ||
612 | if ($d->{crm}) { | |
613 | $d->{crm_env}->log('info', "killed by poweroff"); | |
614 | $self->crm_control('stop', $d, $lock_fh); | |
615 | $d->{crm} = undef; | |
616 | } | |
617 | if ($d->{lrm}) { | |
618 | $d->{lrm_env}->log('info', "killed by poweroff"); | |
619 | $self->lrm_control('stop', $d, $lock_fh); | |
620 | $d->{lrm} = undef; | |
621 | $d->{lrm_restart} = undef; | |
622 | } | |
623 | ||
624 | $self->watchdog_reset_nolock($node); | |
625 | $self->write_service_status($node, {}); | |
626 | } | |
627 | } | |
628 | ||
629 | $cstatus->{$node}->{power} = $action; | |
630 | $cstatus->{$node}->{network} = $action; | |
631 | $cstatus->{$node}->{shutdown} = undef; | |
632 | ||
633 | $self->write_hardware_status_nolock($cstatus); | |
634 | ||
635 | } elsif ($cmd eq 'network') { | |
636 | die "sim_hardware_cmd: unknown network action '$action'" | |
637 | if $action !~ m/^(on|off)$/; | |
638 | $cstatus->{$node}->{network} = $action; | |
639 | ||
640 | $self->write_hardware_status_nolock($cstatus); | |
641 | ||
ba2a45cd TL |
642 | } elsif ($cmd eq 'cfs') { |
643 | die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'" | |
644 | if $action !~ m/^(rw|update)$/; | |
b94b4785 FE |
645 | die "sim_hardware_cmd: unknown cfs command '$param' for '$action' on node '$node'" |
646 | if $param !~ m/^(work|fail)$/; | |
ba2a45cd | 647 | |
b94b4785 | 648 | $cstatus->{$node}->{cfs}->{$action} = $param eq 'work'; |
ba2a45cd TL |
649 | $self->write_hardware_status_nolock($cstatus); |
650 | ||
e08a0717 TL |
651 | } elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') { |
652 | $cstatus->{$node}->{shutdown} = $cmd; | |
653 | ||
654 | $self->write_hardware_status_nolock($cstatus); | |
655 | ||
656 | $self->lrm_control('shutdown', $d, $lock_fh) if defined($d->{lrm}); | |
657 | } elsif ($cmd eq 'restart-lrm') { | |
658 | if ($d->{lrm}) { | |
659 | $d->{lrm_restart} = 1; | |
660 | $self->lrm_control('shutdown', $d, $lock_fh); | |
661 | } | |
662 | } elsif ($cmd eq 'crm') { | |
663 | ||
664 | if ($action eq 'stop') { | |
665 | if ($d->{crm}) { | |
666 | $d->{crm_stop} = 1; | |
667 | $self->crm_control('shutdown', $d, $lock_fh); | |
668 | } | |
669 | } elsif ($action eq 'start') { | |
670 | $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm}); | |
989c4c49 TL |
671 | } elsif ($action eq 'enable-node-maintenance' || $action eq 'disable-node-maintenance') { |
672 | $self->queue_crm_commands_nolock("$action $node"); | |
e08a0717 TL |
673 | } else { |
674 | die "sim_hardware_cmd: unknown action '$action'"; | |
675 | } | |
676 | ||
677 | } elsif ($cmd eq 'service') { | |
667670b2 TL |
678 | if ($action eq 'started' || $action eq 'disabled' || |
679 | $action eq 'stopped' || $action eq 'ignored') { | |
e08a0717 TL |
680 | |
681 | $self->set_service_state($sid, $action); | |
682 | ||
683 | } elsif ($action eq 'migrate' || $action eq 'relocate') { | |
684 | ||
685 | die "sim_hardware_cmd: missing target node for '$action' command" | |
b94b4785 | 686 | if !$param; |
e08a0717 | 687 | |
b94b4785 | 688 | $self->queue_crm_commands_nolock("$action $sid $param"); |
e08a0717 | 689 | |
21caf0db FE |
690 | } elsif ($action eq 'stop') { |
691 | ||
692 | die "sim_hardware_cmd: missing timeout for '$action' command" | |
693 | if !defined($param); | |
694 | ||
695 | $self->queue_crm_commands_nolock("$action $sid $param"); | |
696 | ||
e08a0717 | 697 | } elsif ($action eq 'add') { |
81e8e7d0 FE |
698 | $self->add_service( |
699 | $sid, | |
700 | {state => $params[1] || 'started', node => $param}, | |
701 | $params[2] || 0, | |
702 | ); | |
e08a0717 TL |
703 | |
704 | } elsif ($action eq 'delete') { | |
705 | ||
706 | $self->delete_service($sid); | |
707 | ||
708 | } elsif ($action eq 'lock') { | |
709 | ||
b94b4785 | 710 | $self->lock_service($sid, $param); |
e08a0717 TL |
711 | |
712 | } elsif ($action eq 'unlock') { | |
713 | ||
b94b4785 | 714 | $self->unlock_service($sid, $param); |
e08a0717 TL |
715 | |
716 | } else { | |
717 | die "sim_hardware_cmd: unknown service action '$action' " . | |
718 | "- not implemented\n" | |
719 | } | |
720 | } else { | |
721 | die "sim_hardware_cmd: unknown command '$cmdstr'\n"; | |
722 | } | |
723 | ||
724 | return $cstatus; | |
725 | }; | |
726 | ||
727 | return $self->global_lock($code); | |
728 | } | |
729 | ||
730 | # for controlling the resource manager services | |
731 | sub crm_control { | |
732 | my ($self, $action, $data, $lock_fh) = @_; | |
733 | ||
734 | die "implement in subclass"; | |
735 | } | |
736 | ||
737 | sub lrm_control { | |
738 | my ($self, $action, $data, $lock_fh) = @_; | |
739 | ||
bf93e2a2 | 740 | die "implement in subclass"; |
0cfd8f5b DM |
741 | } |
742 | ||
743 | sub run { | |
744 | my ($self) = @_; | |
745 | ||
bf93e2a2 | 746 | die "implement in subclass"; |
0cfd8f5b | 747 | } |
9329c1e2 DM |
748 | |
749 | my $modify_watchog = sub { | |
750 | my ($self, $code) = @_; | |
751 | ||
752 | my $update_cmd = sub { | |
753 | ||
754 | my $filename = "$self->{statusdir}/watchdog_status"; | |
289e4784 | 755 | |
9329c1e2 DM |
756 | my ($res, $wdstatus); |
757 | ||
758 | if (-f $filename) { | |
759 | my $raw = PVE::Tools::file_get_contents($filename); | |
760 | $wdstatus = decode_json($raw); | |
761 | } else { | |
762 | $wdstatus = {}; | |
763 | } | |
289e4784 | 764 | |
9329c1e2 DM |
765 | ($wdstatus, $res) = &$code($wdstatus); |
766 | ||
767 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
768 | ||
769 | return $res; | |
770 | }; | |
771 | ||
772 | return $self->global_lock($update_cmd); | |
773 | }; | |
774 | ||
0590c6a7 DM |
775 | sub watchdog_reset_nolock { |
776 | my ($self, $node) = @_; | |
777 | ||
778 | my $filename = "$self->{statusdir}/watchdog_status"; | |
779 | ||
780 | if (-f $filename) { | |
781 | my $raw = PVE::Tools::file_get_contents($filename); | |
782 | my $wdstatus = decode_json($raw); | |
783 | ||
784 | foreach my $id (keys %$wdstatus) { | |
785 | delete $wdstatus->{$id} if $wdstatus->{$id}->{node} eq $node; | |
786 | } | |
289e4784 | 787 | |
0590c6a7 DM |
788 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); |
789 | } | |
790 | } | |
791 | ||
9329c1e2 DM |
792 | sub watchdog_check { |
793 | my ($self, $node) = @_; | |
794 | ||
795 | my $code = sub { | |
796 | my ($wdstatus) = @_; | |
797 | ||
798 | my $res = 1; | |
799 | ||
800 | foreach my $wfh (keys %$wdstatus) { | |
801 | my $wd = $wdstatus->{$wfh}; | |
802 | next if $wd->{node} ne $node; | |
803 | ||
804 | my $ctime = $self->get_time(); | |
805 | my $tdiff = $ctime - $wd->{update_time}; | |
806 | ||
0bba8f60 | 807 | if ($tdiff > $watchdog_timeout) { # expired |
9329c1e2 DM |
808 | $res = 0; |
809 | delete $wdstatus->{$wfh}; | |
810 | } | |
811 | } | |
289e4784 | 812 | |
9329c1e2 DM |
813 | return ($wdstatus, $res); |
814 | }; | |
815 | ||
816 | return &$modify_watchog($self, $code); | |
817 | } | |
818 | ||
819 | my $wdcounter = 0; | |
820 | ||
821 | sub watchdog_open { | |
822 | my ($self, $node) = @_; | |
823 | ||
824 | my $code = sub { | |
825 | my ($wdstatus) = @_; | |
826 | ||
827 | ++$wdcounter; | |
828 | ||
829 | my $id = "WD:$node:$$:$wdcounter"; | |
830 | ||
831 | die "internal error" if defined($wdstatus->{$id}); | |
832 | ||
833 | $wdstatus->{$id} = { | |
834 | node => $node, | |
835 | update_time => $self->get_time(), | |
836 | }; | |
837 | ||
838 | return ($wdstatus, $id); | |
839 | }; | |
840 | ||
841 | return &$modify_watchog($self, $code); | |
842 | } | |
843 | ||
844 | sub watchdog_close { | |
845 | my ($self, $wfh) = @_; | |
846 | ||
847 | my $code = sub { | |
848 | my ($wdstatus) = @_; | |
849 | ||
850 | my $wd = $wdstatus->{$wfh}; | |
851 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
852 | ||
853 | my $tdiff = $self->get_time() - $wd->{update_time}; | |
0bba8f60 | 854 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
855 | |
856 | delete $wdstatus->{$wfh}; | |
857 | ||
858 | return ($wdstatus); | |
859 | }; | |
860 | ||
861 | return &$modify_watchog($self, $code); | |
862 | } | |
863 | ||
864 | sub watchdog_update { | |
865 | my ($self, $wfh) = @_; | |
866 | ||
867 | my $code = sub { | |
868 | my ($wdstatus) = @_; | |
869 | ||
870 | my $wd = $wdstatus->{$wfh}; | |
871 | ||
872 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
873 | ||
874 | my $ctime = $self->get_time(); | |
875 | my $tdiff = $ctime - $wd->{update_time}; | |
876 | ||
0bba8f60 | 877 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
289e4784 | 878 | |
9329c1e2 DM |
879 | $wd->{update_time} = $ctime; |
880 | ||
881 | return ($wdstatus); | |
882 | }; | |
883 | ||
884 | return &$modify_watchog($self, $code); | |
885 | } | |
886 | ||
5db695c3 FE |
887 | sub get_static_node_stats { |
888 | my ($self) = @_; | |
889 | ||
890 | my $cstatus = $self->read_hardware_status_nolock(); | |
891 | ||
892 | my $stats = {}; | |
893 | for my $node (keys $cstatus->%*) { | |
894 | $stats->{$node} = { $cstatus->{$node}->%{qw(cpus memory)} }; | |
895 | } | |
896 | ||
897 | return $stats; | |
898 | } | |
899 | ||
0cfd8f5b | 900 | 1; |