]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/LRM.pm
partially revert previous unclean commit
[pve-ha-manager.git] / src / PVE / HA / LRM.pm
CommitLineData
5f095798
DM
1package PVE::HA::LRM;
2
3# Local Resource Manager
4
5use strict;
6use warnings;
c4a221bc 7use POSIX qw(:sys_wait_h);
5f095798
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
a89ff919 11use PVE::HA::Tools ':exit_codes';
2a045f55 12use PVE::HA::Resources;
5f095798
DM
13
14# Server can have several states:
15
16my $valid_states = {
ec911edd 17 wait_for_agent_lock => "waiting for agent lock",
0bba8f60 18 active => "got agent_lock",
5f095798
DM
19 lost_agent_lock => "lost agent_lock",
20};
21
22sub new {
23 my ($this, $haenv) = @_;
24
25 my $class = ref($this) || $this;
26
27 my $self = bless {
28 haenv => $haenv,
29 status => { state => 'startup' },
c4a221bc
DM
30 workers => {},
31 results => {},
ea4443cc 32 restart_tries => {},
067cdf33 33 shutdown_request => 0,
116dea30 34 shutdown_errors => 0,
9c7d068b
DM
35 # mode can be: active, reboot, shutdown, restart
36 mode => 'active',
3df15380 37 cluster_state_update => 0,
5f095798
DM
38 }, $class;
39
289e4784 40 $self->set_local_status({ state => 'wait_for_agent_lock' });
9c7d068b 41
5f095798
DM
42 return $self;
43}
44
45sub shutdown_request {
46 my ($self) = @_;
47
f1be5b3a
DM
48 return if $self->{shutdown_request}; # already in shutdown mode
49
499f06e3
DM
50 my $haenv = $self->{haenv};
51
116dea30
DM
52 my $nodename = $haenv->nodename();
53
f65f41b9 54 my ($shutdown, $reboot) = $haenv->is_node_shutdown();
499f06e3 55
ba15a9b9
TL
56 my $dc_ha_cfg = $haenv->get_ha_settings();
57 my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
58
7a20d688
TL
59 if ($shutdown) { # don't log this on service restart, only on node shutdown
60 $haenv->log('info', "got shutdown request with shutdown policy '$shutdown_policy'");
61 }
62
d2236278 63 my $freeze_all;
ba15a9b9
TL
64 if ($shutdown_policy eq 'conditional') {
65 $freeze_all = $reboot;
66 } elsif ($shutdown_policy eq 'freeze') {
67 $freeze_all = 1;
68 } elsif ($shutdown_policy eq 'failover') {
69 $freeze_all = 0;
70 } else {
d2236278
TL
71 $haenv->log('err', "unknown shutdown policy '$shutdown_policy', fall back to conditional");
72 $freeze_all = $reboot;
ba15a9b9
TL
73 }
74
499f06e3 75 if ($shutdown) {
f65f41b9
TL
76 # *always* queue stop jobs for all services if the node shuts down,
77 # independent if it's a reboot or a poweroff, else we may corrupt
78 # services or hinder node shutdown
116dea30
DM
79 my $ss = $self->{service_status};
80
81 foreach my $sid (keys %$ss) {
82 my $sd = $ss->{$sid};
83 next if !$sd->{node};
84 next if $sd->{node} ne $nodename;
c0edbd7e 85 # Note: use undef uid to mark shutdown/stop jobs
116dea30
DM
86 $self->queue_resource_command($sid, undef, 'request_stop');
87 }
f65f41b9 88 }
116dea30 89
f65f41b9 90 if ($shutdown) {
ba15a9b9 91 if ($freeze_all) {
d2236278 92 if ($reboot) {
ba15a9b9
TL
93 $haenv->log('info', "reboot LRM, stop and freeze all services");
94 } else {
95 $haenv->log('info', "shutdown LRM, stop and freeze all services");
96 }
f65f41b9
TL
97 $self->{mode} = 'restart';
98 } else {
99 $haenv->log('info', "shutdown LRM, stop all services");
100 $self->{mode} = 'shutdown';
101 }
499f06e3
DM
102 } else {
103 $haenv->log('info', "restart LRM, freeze all services");
104 $self->{mode} = 'restart';
105 }
9c7d068b 106
499f06e3 107 $self->{shutdown_request} = 1;
9c7d068b
DM
108
109 eval { $self->update_lrm_status(); };
110 if (my $err = $@) {
5bd7aa54 111 $self->log('err', "unable to update lrm status file - $err");
9c7d068b 112 }
5f095798
DM
113}
114
115sub get_local_status {
116 my ($self) = @_;
117
118 return $self->{status};
119}
120
121sub set_local_status {
122 my ($self, $new) = @_;
123
124 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
125
126 my $haenv = $self->{haenv};
127
128 my $old = $self->{status};
129
289e4784 130 # important: only update if if really changed
5f095798
DM
131 return if $old->{state} eq $new->{state};
132
0bba8f60 133 $haenv->log('info', "status change $old->{state} => $new->{state}");
5f095798
DM
134
135 $new->{state_change_time} = $haenv->get_time();
136
137 $self->{status} = $new;
138}
139
9c7d068b
DM
140sub update_lrm_status {
141 my ($self) = @_;
142
5bd7aa54
DM
143 my $haenv = $self->{haenv};
144
79829202 145 return 0 if !$haenv->quorate();
289e4784
TL
146
147 my $lrm_status = {
331a9f00 148 state => $self->{status}->{state},
9c7d068b
DM
149 mode => $self->{mode},
150 results => $self->{results},
aa330d1c 151 timestamp => $haenv->get_time(),
9c7d068b 152 };
289e4784 153
5bd7aa54
DM
154 eval { $haenv->write_lrm_status($lrm_status); };
155 if (my $err = $@) {
156 $haenv->log('err', "unable to write lrm status file - $err");
157 return 0;
158 }
159
160 return 1;
9c7d068b
DM
161}
162
8e940b68
TL
163sub update_service_status {
164 my ($self) = @_;
165
166 my $haenv = $self->{haenv};
167
168 my $ms = eval { $haenv->read_manager_status(); };
169 if (my $err = $@) {
170 $haenv->log('err', "updating service status from manager failed: $err");
171 return undef;
172 } else {
173 $self->{service_status} = $ms->{service_status} || {};
174 return 1;
175 }
176}
177
5f095798
DM
178sub get_protected_ha_agent_lock {
179 my ($self) = @_;
180
181 my $haenv = $self->{haenv};
182
183 my $count = 0;
184 my $starttime = $haenv->get_time();
185
186 for (;;) {
289e4784 187
5f095798
DM
188 if ($haenv->get_ha_agent_lock()) {
189 if ($self->{ha_agent_wd}) {
190 $haenv->watchdog_update($self->{ha_agent_wd});
191 } else {
192 my $wfh = $haenv->watchdog_open();
193 $self->{ha_agent_wd} = $wfh;
194 }
195 return 1;
196 }
289e4784 197
5f095798
DM
198 last if ++$count > 5; # try max 5 time
199
200 my $delay = $haenv->get_time() - $starttime;
201 last if $delay > 5; # for max 5 seconds
202
203 $haenv->sleep(1);
204 }
289e4784 205
5f095798
DM
206 return 0;
207}
208
546e2f1f
DM
209sub active_service_count {
210 my ($self) = @_;
289e4784 211
546e2f1f
DM
212 my $haenv = $self->{haenv};
213
214 my $nodename = $haenv->nodename();
215
216 my $ss = $self->{service_status};
217
218 my $count = 0;
289e4784 219
546e2f1f
DM
220 foreach my $sid (keys %$ss) {
221 my $sd = $ss->{$sid};
222 next if !$sd->{node};
223 next if $sd->{node} ne $nodename;
224 my $req_state = $sd->{state};
225 next if !defined($req_state);
226 next if $req_state eq 'stopped';
9c7d068b 227 next if $req_state eq 'freeze';
38545741
TL
228 # erroneous services are not managed by HA, don't count them as active
229 next if $req_state eq 'error';
546e2f1f
DM
230
231 $count++;
232 }
289e4784 233
546e2f1f
DM
234 return $count;
235}
5bd7aa54
DM
236
237my $wrote_lrm_status_at_startup = 0;
238
5f095798
DM
239sub do_one_iteration {
240 my ($self) = @_;
241
242 my $haenv = $self->{haenv};
243
da6f0416
TL
244 $haenv->loop_start_hook();
245
3df15380
TL
246 $self->{cluster_state_update} = $haenv->cluster_state_update();
247
da6f0416
TL
248 my $res = $self->work();
249
250 $haenv->loop_end_hook();
251
252 return $res;
253}
254
255sub work {
256 my ($self) = @_;
257
258 my $haenv = $self->{haenv};
259
c5ec095f 260 if (!$wrote_lrm_status_at_startup) {
79829202 261 if ($self->update_lrm_status()) {
c5ec095f
DM
262 $wrote_lrm_status_at_startup = 1;
263 } else {
264 # do nothing
265 $haenv->sleep(5);
266 return $self->{shutdown_request} ? 0 : 1;
267 }
5bd7aa54 268 }
289e4784 269
5f095798
DM
270 my $status = $self->get_local_status();
271 my $state = $status->{state};
272
8e940b68 273 $self->update_service_status();
067cdf33 274
49777d09 275 my $fence_request = PVE::HA::Tools::count_fenced_services($self->{service_status}, $haenv->nodename());
289e4784
TL
276
277 # do state changes first
5f095798
DM
278
279 my $ctime = $haenv->get_time();
280
b0bf08a9 281 if ($state eq 'wait_for_agent_lock') {
5f095798 282
546e2f1f 283 my $service_count = $self->active_service_count();
5f095798 284
067cdf33 285 if (!$fence_request && $service_count && $haenv->quorate()) {
0bba8f60
DM
286 if ($self->get_protected_ha_agent_lock()) {
287 $self->set_local_status({ state => 'active' });
5f095798
DM
288 }
289 }
289e4784 290
5f095798
DM
291 } elsif ($state eq 'lost_agent_lock') {
292
067cdf33 293 if (!$fence_request && $haenv->quorate()) {
0bba8f60
DM
294 if ($self->get_protected_ha_agent_lock()) {
295 $self->set_local_status({ state => 'active' });
5f095798
DM
296 }
297 }
298
0bba8f60 299 } elsif ($state eq 'active') {
5f095798 300
289e4784 301 if ($fence_request) {
067cdf33 302 $haenv->log('err', "node need to be fenced - releasing agent_lock\n");
289e4784 303 $self->set_local_status({ state => 'lost_agent_lock'});
067cdf33 304 } elsif (!$self->get_protected_ha_agent_lock()) {
5f095798
DM
305 $self->set_local_status({ state => 'lost_agent_lock'});
306 }
307 }
308
309 $status = $self->get_local_status();
310 $state = $status->{state};
311
312 # do work
313
314 if ($state eq 'wait_for_agent_lock') {
315
316 return 0 if $self->{shutdown_request};
289e4784 317
79829202 318 $self->update_lrm_status();
289e4784 319
5f095798 320 $haenv->sleep(5);
289e4784 321
0bba8f60 322 } elsif ($state eq 'active') {
5f095798
DM
323
324 my $startime = $haenv->get_time();
325
326 my $max_time = 10;
327
328 my $shutdown = 0;
329
330 # do work (max_time seconds)
331 eval {
332 # fixme: set alert timer
333
8e940b68
TL
334 # if we could not get the current service status there's no point
335 # in doing anything, try again next round.
336 return if !$self->update_service_status();
337
5f095798
DM
338 if ($self->{shutdown_request}) {
339
499f06e3 340 if ($self->{mode} eq 'restart') {
5f095798 341
499f06e3 342 my $service_count = $self->active_service_count();
5f095798 343
499f06e3 344 if ($service_count == 0) {
5f095798 345
116dea30
DM
346 if ($self->run_workers() == 0) {
347 if ($self->{ha_agent_wd}) {
348 $haenv->watchdog_close($self->{ha_agent_wd});
349 delete $self->{ha_agent_wd};
350 }
351
352 $shutdown = 1;
e23f674c
TL
353
354 # restart with no or freezed services, release the lock
355 $haenv->release_ha_agent_lock();
116dea30
DM
356 }
357 }
358 } else {
359
360 if ($self->run_workers() == 0) {
361 if ($self->{shutdown_errors} == 0) {
362 if ($self->{ha_agent_wd}) {
363 $haenv->watchdog_close($self->{ha_agent_wd});
364 delete $self->{ha_agent_wd};
365 }
0e5b1a43
TL
366
367 # shutdown with all services stopped thus release the lock
368 $haenv->release_ha_agent_lock();
499f06e3 369 }
5f095798 370
499f06e3
DM
371 $shutdown = 1;
372 }
5f095798 373 }
c4a221bc 374 } else {
724bd3f3
TL
375 if (!$self->{cluster_state_update}) {
376 # update failed but we could still renew our lock (cfs restart?),
377 # safely skip manage and expect to update just fine next round
378 $haenv->log('notice', "temporary inconsistent cluster state " .
379 "(cfs restart?), skip round");
380 return;
381 }
c4a221bc
DM
382
383 $self->manage_resources();
067cdf33 384
5f095798
DM
385 }
386 };
387 if (my $err = $@) {
388 $haenv->log('err', "got unexpected error - $err");
389 }
390
79829202 391 $self->update_lrm_status();
289e4784 392
5f095798
DM
393 return 0 if $shutdown;
394
395 $haenv->sleep_until($startime + $max_time);
396
397 } elsif ($state eq 'lost_agent_lock') {
289e4784 398
5f095798
DM
399 # Note: watchdog is active an will triger soon!
400
401 # so we hope to get the lock back soon!
402
403 if ($self->{shutdown_request}) {
404
546e2f1f 405 my $service_count = $self->active_service_count();
5f095798 406
546e2f1f 407 if ($service_count > 0) {
289e4784 408 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
546e2f1f 409 "detected $service_count running services");
5f095798 410
c5c7faf6
TL
411 if ($self->{mode} eq 'restart') {
412 my $state_mt = $self->{status}->{state_change_time};
413
414 # watchdog should have already triggered, so either it's set
415 # set to noboot or it failed. As we are in restart mode, and
416 # have infinity stoptimeout -> exit now - we don't touch services
417 # or change state, so this is save, relatively speaking
418 if (($haenv->get_time() - $state_mt) > 90) {
419 $haenv->log('err', "lost agent lock and restart request for over 90 seconds - giving up!");
420 return 0;
421 }
422 }
546e2f1f 423 } else {
5f095798 424
546e2f1f 425 # all services are stopped, so we can close the watchdog
5f095798 426
546e2f1f
DM
427 if ($self->{ha_agent_wd}) {
428 $haenv->watchdog_close($self->{ha_agent_wd});
429 delete $self->{ha_agent_wd};
430 }
289e4784 431
546e2f1f 432 return 0;
5f095798 433 }
5f095798
DM
434 }
435
b0bf08a9
DM
436 $haenv->sleep(5);
437
5f095798
DM
438 } else {
439
440 die "got unexpected status '$state'\n";
441
442 }
443
444 return 1;
445}
446
116dea30 447sub run_workers {
c4a221bc
DM
448 my ($self) = @_;
449
450 my $haenv = $self->{haenv};
451
f31b7e94 452 my $starttime = $haenv->get_time();
c4a221bc 453
a28fa330
TL
454 # number of workers to start, if 0 we exec the command directly witouth forking
455 my $max_workers = $haenv->get_max_workers();
c4a221bc 456
6dbf93a0 457 my $sc = $haenv->read_service_config();
f31b7e94
DM
458
459 while (($haenv->get_time() - $starttime) < 5) {
c4a221bc
DM
460 my $count = $self->check_active_workers();
461
a5e4bef4 462 foreach my $sid (sort keys %{$self->{workers}}) {
a28fa330
TL
463 last if $count >= $max_workers && $max_workers > 0;
464
c4a221bc
DM
465 my $w = $self->{workers}->{$sid};
466 if (!$w->{pid}) {
a28fa330
TL
467 # only fork if we may else call exec_resource_agent
468 # directly (e.g. for regression tests)
469 if ($max_workers > 0) {
f31b7e94
DM
470 my $pid = fork();
471 if (!defined($pid)) {
472 $haenv->log('err', "fork worker failed");
473 $count = 0; last; # abort, try later
474 } elsif ($pid == 0) {
a2aae08a
TL
475 $haenv->after_fork(); # cleanup
476
f31b7e94
DM
477 # do work
478 my $res = -1;
479 eval {
aaabde6a 480 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
f31b7e94
DM
481 };
482 if (my $err = $@) {
483 $haenv->log('err', $err);
484 POSIX::_exit(-1);
289e4784
TL
485 }
486 POSIX::_exit($res);
f31b7e94
DM
487 } else {
488 $count++;
489 $w->{pid} = $pid;
490 }
491 } else {
c4a221bc
DM
492 my $res = -1;
493 eval {
aaabde6a 494 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
b33b5743 495 $res = $res << 8 if $res > 0;
c4a221bc
DM
496 };
497 if (my $err = $@) {
f31b7e94 498 $haenv->log('err', $err);
116dea30
DM
499 }
500 if (defined($w->{uid})) {
501 $self->resource_command_finished($sid, $w->{uid}, $res);
502 } else {
503 $self->stop_command_finished($sid, $res);
504 }
c4a221bc
DM
505 }
506 }
507 }
508
509 last if !$count;
510
f31b7e94 511 $haenv->sleep(1);
c4a221bc 512 }
116dea30
DM
513
514 return scalar(keys %{$self->{workers}});
515}
516
517sub manage_resources {
518 my ($self) = @_;
519
520 my $haenv = $self->{haenv};
521
522 my $nodename = $haenv->nodename();
523
524 my $ss = $self->{service_status};
525
5a28da91
TL
526 foreach my $sid (keys %{$self->{restart_tries}}) {
527 delete $self->{restart_tries}->{$sid} if !$ss->{$sid};
528 }
529
116dea30
DM
530 foreach my $sid (keys %$ss) {
531 my $sd = $ss->{$sid};
532 next if !$sd->{node};
533 next if !$sd->{uid};
534 next if $sd->{node} ne $nodename;
535 my $req_state = $sd->{state};
536 next if !defined($req_state);
537 next if $req_state eq 'freeze';
538 $self->queue_resource_command($sid, $sd->{uid}, $req_state, $sd->{target});
539 }
540
541 return $self->run_workers();
c4a221bc
DM
542}
543
c4a221bc 544sub queue_resource_command {
e88469ba 545 my ($self, $sid, $uid, $state, $target) = @_;
c4a221bc 546
35cbb764
TL
547 # do not queue the excatly same command twice as this may lead to
548 # an inconsistent HA state when the first command fails but the CRM
549 # does not process its failure right away and the LRM starts a second
550 # try, without the CRM knowing of it (race condition)
551 # The 'stopped' command is an exception as we do not process its result
552 # in the CRM and we want to execute it always (even with no active CRM)
553 return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid});
554
c4a221bc
DM
555 if (my $w = $self->{workers}->{$sid}) {
556 return if $w->{pid}; # already started
557 # else, delete and overwrite queue entry with new command
558 delete $self->{workers}->{$sid};
559 }
560
561 $self->{workers}->{$sid} = {
562 sid => $sid,
563 uid => $uid,
564 state => $state,
565 };
e88469ba
DM
566
567 $self->{workers}->{$sid}->{target} = $target if $target;
c4a221bc
DM
568}
569
570sub check_active_workers {
571 my ($self) = @_;
572
573 # finish/count workers
574 my $count = 0;
575 foreach my $sid (keys %{$self->{workers}}) {
576 my $w = $self->{workers}->{$sid};
577 if (my $pid = $w->{pid}) {
578 # check status
579 my $waitpid = waitpid($pid, WNOHANG);
580 if (defined($waitpid) && ($waitpid == $pid)) {
c0edbd7e 581 if (defined($w->{uid})) {
116dea30
DM
582 $self->resource_command_finished($sid, $w->{uid}, $?);
583 } else {
584 $self->stop_command_finished($sid, $?);
585 }
c4a221bc
DM
586 } else {
587 $count++;
588 }
589 }
590 }
289e4784 591
c4a221bc
DM
592 return $count;
593}
594
116dea30
DM
595sub stop_command_finished {
596 my ($self, $sid, $status) = @_;
597
598 my $haenv = $self->{haenv};
599
600 my $w = delete $self->{workers}->{$sid};
601 return if !$w; # should not happen
602
603 my $exit_code = -1;
604
605 if ($status == -1) {
606 $haenv->log('err', "resource agent $sid finished - failed to execute");
607 } elsif (my $sig = ($status & 127)) {
608 $haenv->log('err', "resource agent $sid finished - got signal $sig");
609 } else {
610 $exit_code = ($status >> 8);
611 }
612
613 if ($exit_code != 0) {
614 $self->{shutdown_errors}++;
615 }
616}
617
c4a221bc
DM
618sub resource_command_finished {
619 my ($self, $sid, $uid, $status) = @_;
620
621 my $haenv = $self->{haenv};
622
623 my $w = delete $self->{workers}->{$sid};
624 return if !$w; # should not happen
625
626 my $exit_code = -1;
627
628 if ($status == -1) {
289e4784 629 $haenv->log('err', "resource agent $sid finished - failed to execute");
c4a221bc 630 } elsif (my $sig = ($status & 127)) {
0f70400d 631 $haenv->log('err', "resource agent $sid finished - got signal $sig");
c4a221bc
DM
632 } else {
633 $exit_code = ($status >> 8);
c4a221bc
DM
634 }
635
ea4443cc
TL
636 $exit_code = $self->handle_service_exitcode($sid, $w->{state}, $exit_code);
637
280ee5d5
DM
638 return if $exit_code == ETRY_AGAIN; # tell nobody, simply retry
639
c4a221bc
DM
640 $self->{results}->{$uid} = {
641 sid => $w->{sid},
642 state => $w->{state},
643 exit_code => $exit_code,
644 };
645
646 my $ss = $self->{service_status};
647
648 # compute hash of valid/existing uids
649 my $valid_uids = {};
650 foreach my $sid (keys %$ss) {
651 my $sd = $ss->{$sid};
652 next if !$sd->{uid};
653 $valid_uids->{$sd->{uid}} = 1;
654 }
655
656 my $results = {};
657 foreach my $id (keys %{$self->{results}}) {
658 next if !$valid_uids->{$id};
659 $results->{$id} = $self->{results}->{$id};
660 }
661 $self->{results} = $results;
c4a221bc
DM
662}
663
ea4443cc
TL
664# processes the exit code from a finished resource agent, so that the CRM knows
665# if the LRM wants to retry an action based on the current recovery policies for
666# the failed service, or the CRM itself must try to recover from the failure.
667sub handle_service_exitcode {
668 my ($self, $sid, $cmd, $exit_code) = @_;
669
670 my $haenv = $self->{haenv};
671 my $tries = $self->{restart_tries};
672
673 my $sc = $haenv->read_service_config();
aaabde6a
DM
674
675 my $max_restart = 0;
676
677 if (my $cd = $sc->{$sid}) {
678 $max_restart = $cd->{max_restart};
679 }
ea4443cc
TL
680
681 if ($cmd eq 'started') {
682
a89ff919 683 if ($exit_code == SUCCESS) {
ea4443cc
TL
684
685 $tries->{$sid} = 0;
686
687 return $exit_code;
688
a89ff919 689 } elsif ($exit_code == ERROR) {
ea4443cc
TL
690
691 $tries->{$sid} = 0 if !defined($tries->{$sid});
692
aaabde6a 693 if ($tries->{$sid} >= $max_restart) {
ea4443cc
TL
694 $haenv->log('err', "unable to start service $sid on local node".
695 " after $tries->{$sid} retries");
696 $tries->{$sid} = 0;
a89ff919 697 return ERROR;
ea4443cc
TL
698 }
699
e9e1cd68
TL
700 $tries->{$sid}++;
701
702 $haenv->log('warning', "restart policy: retry number $tries->{$sid}" .
703 " for service '$sid'");
a89ff919
TL
704 # tell CRM that we retry the start
705 return ETRY_AGAIN;
ea4443cc
TL
706 }
707 }
708
709 return $exit_code;
710
711}
712
2a045f55
TL
713sub exec_resource_agent {
714 my ($self, $sid, $service_config, $cmd, @params) = @_;
715
716 # setup execution environment
717
718 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
719
2a045f55
TL
720 my $haenv = $self->{haenv};
721
722 my $nodename = $haenv->nodename();
723
0087839a 724 my (undef, $service_type, $service_name) = $haenv->parse_sid($sid);
2a045f55
TL
725
726 my $plugin = PVE::HA::Resources->lookup($service_type);
727 if (!$plugin) {
728 $haenv->log('err', "service type '$service_type' not implemented");
729 return EUNKNOWN_SERVICE_TYPE;
730 }
731
aaabde6a
DM
732 if (!$service_config) {
733 $haenv->log('err', "missing resource configuration for '$sid'");
734 return EUNKNOWN_SERVICE;
735 }
736
d338a56f
TL
737 # process error state early
738 if ($cmd eq 'error') {
739
740 $haenv->log('err', "service $sid is in an error state and needs manual " .
741 "intervention. Look up 'ERROR RECOVERY' in the documentation.");
742
743 return SUCCESS; # error always succeeds
744 }
745
2a045f55
TL
746 if ($service_config->{node} ne $nodename) {
747 $haenv->log('err', "service '$sid' not on this node");
748 return EWRONG_NODE;
749 }
750
751 my $id = $service_name;
752
753 my $running = $plugin->check_running($haenv, $id);
754
755 if ($cmd eq 'started') {
756
757 return SUCCESS if $running;
758
759 $haenv->log("info", "starting service $sid");
760
761 $plugin->start($haenv, $id);
762
763 $running = $plugin->check_running($haenv, $id);
764
765 if ($running) {
766 $haenv->log("info", "service status $sid started");
767 return SUCCESS;
768 } else {
769 $haenv->log("warning", "unable to start service $sid");
770 return ERROR;
771 }
772
773 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
774
775 return SUCCESS if !$running;
776
777 $haenv->log("info", "stopping service $sid");
778
779 $plugin->shutdown($haenv, $id);
780
781 $running = $plugin->check_running($haenv, $id);
782
783 if (!$running) {
784 $haenv->log("info", "service status $sid stopped");
785 return SUCCESS;
786 } else {
787 $haenv->log("info", "unable to stop stop service $sid (still running)");
788 return ERROR;
789 }
790
791 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
792
793 my $target = $params[0];
794 if (!defined($target)) {
795 die "$cmd '$sid' failed - missing target\n" if !defined($target);
796 return EINVALID_PARAMETER;
797 }
798
799 if ($service_config->{node} eq $target) {
800 # already there
801 return SUCCESS;
802 }
803
804 my $online = ($cmd eq 'migrate') ? 1 : 0;
805
ea28f873 806 my $res = $plugin->migrate($haenv, $id, $target, $online);
2a045f55
TL
807
808 # something went wrong if service is still on this node
ea28f873 809 if (!$res) {
2a045f55
TL
810 $haenv->log("err", "service $sid not moved (migration error)");
811 return ERROR;
812 }
813
814 return SUCCESS;
815
2a045f55
TL
816 }
817
818 $haenv->log("err", "implement me (cmd '$cmd')");
819 return EUNKNOWN_COMMAND;
820}
821
822
5f095798 8231;