]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/LRM.pm
PVE2 Env: get_ha_settings: don't die if pmxcfs failed
[pve-ha-manager.git] / src / PVE / HA / LRM.pm
CommitLineData
5f095798
DM
1package PVE::HA::LRM;
2
3# Local Resource Manager
4
5use strict;
6use warnings;
c4a221bc 7use POSIX qw(:sys_wait_h);
5f095798
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
a89ff919 11use PVE::HA::Tools ':exit_codes';
2a045f55 12use PVE::HA::Resources;
5f095798
DM
13
14# Server can have several states:
15
16my $valid_states = {
ec911edd 17 wait_for_agent_lock => "waiting for agent lock",
0bba8f60 18 active => "got agent_lock",
5f095798
DM
19 lost_agent_lock => "lost agent_lock",
20};
21
22sub new {
23 my ($this, $haenv) = @_;
24
25 my $class = ref($this) || $this;
26
27 my $self = bless {
28 haenv => $haenv,
29 status => { state => 'startup' },
c4a221bc
DM
30 workers => {},
31 results => {},
ea4443cc 32 restart_tries => {},
067cdf33 33 shutdown_request => 0,
116dea30 34 shutdown_errors => 0,
9c7d068b
DM
35 # mode can be: active, reboot, shutdown, restart
36 mode => 'active',
3df15380 37 cluster_state_update => 0,
5f095798
DM
38 }, $class;
39
289e4784 40 $self->set_local_status({ state => 'wait_for_agent_lock' });
9c7d068b 41
5f095798
DM
42 return $self;
43}
44
45sub shutdown_request {
46 my ($self) = @_;
47
f1be5b3a
DM
48 return if $self->{shutdown_request}; # already in shutdown mode
49
499f06e3
DM
50 my $haenv = $self->{haenv};
51
116dea30
DM
52 my $nodename = $haenv->nodename();
53
f65f41b9 54 my ($shutdown, $reboot) = $haenv->is_node_shutdown();
499f06e3 55
ba15a9b9
TL
56 my $dc_ha_cfg = $haenv->get_ha_settings();
57 my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
58
7a20d688
TL
59 if ($shutdown) { # don't log this on service restart, only on node shutdown
60 $haenv->log('info', "got shutdown request with shutdown policy '$shutdown_policy'");
61 }
62
d2236278 63 my $freeze_all;
ba15a9b9
TL
64 if ($shutdown_policy eq 'conditional') {
65 $freeze_all = $reboot;
66 } elsif ($shutdown_policy eq 'freeze') {
67 $freeze_all = 1;
68 } elsif ($shutdown_policy eq 'failover') {
69 $freeze_all = 0;
70 } else {
d2236278
TL
71 $haenv->log('err', "unknown shutdown policy '$shutdown_policy', fall back to conditional");
72 $freeze_all = $reboot;
ba15a9b9
TL
73 }
74
499f06e3 75 if ($shutdown) {
f65f41b9
TL
76 # *always* queue stop jobs for all services if the node shuts down,
77 # independent if it's a reboot or a poweroff, else we may corrupt
78 # services or hinder node shutdown
116dea30
DM
79 my $ss = $self->{service_status};
80
81 foreach my $sid (keys %$ss) {
82 my $sd = $ss->{$sid};
83 next if !$sd->{node};
84 next if $sd->{node} ne $nodename;
c0edbd7e 85 # Note: use undef uid to mark shutdown/stop jobs
116dea30
DM
86 $self->queue_resource_command($sid, undef, 'request_stop');
87 }
f65f41b9 88 }
116dea30 89
f65f41b9 90 if ($shutdown) {
ba15a9b9 91 if ($freeze_all) {
d2236278 92 if ($reboot) {
ba15a9b9
TL
93 $haenv->log('info', "reboot LRM, stop and freeze all services");
94 } else {
95 $haenv->log('info', "shutdown LRM, stop and freeze all services");
96 }
f65f41b9
TL
97 $self->{mode} = 'restart';
98 } else {
99 $haenv->log('info', "shutdown LRM, stop all services");
100 $self->{mode} = 'shutdown';
101 }
499f06e3
DM
102 } else {
103 $haenv->log('info', "restart LRM, freeze all services");
104 $self->{mode} = 'restart';
105 }
9c7d068b 106
499f06e3 107 $self->{shutdown_request} = 1;
9c7d068b
DM
108
109 eval { $self->update_lrm_status(); };
110 if (my $err = $@) {
5bd7aa54 111 $self->log('err', "unable to update lrm status file - $err");
9c7d068b 112 }
5f095798
DM
113}
114
115sub get_local_status {
116 my ($self) = @_;
117
118 return $self->{status};
119}
120
121sub set_local_status {
122 my ($self, $new) = @_;
123
124 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
125
126 my $haenv = $self->{haenv};
127
128 my $old = $self->{status};
129
289e4784 130 # important: only update if if really changed
5f095798
DM
131 return if $old->{state} eq $new->{state};
132
0bba8f60 133 $haenv->log('info', "status change $old->{state} => $new->{state}");
5f095798
DM
134
135 $new->{state_change_time} = $haenv->get_time();
136
137 $self->{status} = $new;
138}
139
9c7d068b
DM
140sub update_lrm_status {
141 my ($self) = @_;
142
5bd7aa54
DM
143 my $haenv = $self->{haenv};
144
79829202 145 return 0 if !$haenv->quorate();
289e4784
TL
146
147 my $lrm_status = {
331a9f00 148 state => $self->{status}->{state},
9c7d068b
DM
149 mode => $self->{mode},
150 results => $self->{results},
aa330d1c 151 timestamp => $haenv->get_time(),
9c7d068b 152 };
289e4784 153
5bd7aa54
DM
154 eval { $haenv->write_lrm_status($lrm_status); };
155 if (my $err = $@) {
156 $haenv->log('err', "unable to write lrm status file - $err");
157 return 0;
158 }
159
160 return 1;
9c7d068b
DM
161}
162
8e940b68
TL
163sub update_service_status {
164 my ($self) = @_;
165
166 my $haenv = $self->{haenv};
167
168 my $ms = eval { $haenv->read_manager_status(); };
169 if (my $err = $@) {
170 $haenv->log('err', "updating service status from manager failed: $err");
171 return undef;
172 } else {
173 $self->{service_status} = $ms->{service_status} || {};
174 return 1;
175 }
176}
177
5f095798
DM
178sub get_protected_ha_agent_lock {
179 my ($self) = @_;
180
181 my $haenv = $self->{haenv};
182
183 my $count = 0;
184 my $starttime = $haenv->get_time();
185
186 for (;;) {
289e4784 187
5f095798
DM
188 if ($haenv->get_ha_agent_lock()) {
189 if ($self->{ha_agent_wd}) {
190 $haenv->watchdog_update($self->{ha_agent_wd});
191 } else {
192 my $wfh = $haenv->watchdog_open();
193 $self->{ha_agent_wd} = $wfh;
194 }
195 return 1;
196 }
289e4784 197
5f095798
DM
198 last if ++$count > 5; # try max 5 time
199
200 my $delay = $haenv->get_time() - $starttime;
201 last if $delay > 5; # for max 5 seconds
202
203 $haenv->sleep(1);
204 }
289e4784 205
5f095798
DM
206 return 0;
207}
208
546e2f1f
DM
209sub active_service_count {
210 my ($self) = @_;
289e4784 211
546e2f1f
DM
212 my $haenv = $self->{haenv};
213
214 my $nodename = $haenv->nodename();
215
216 my $ss = $self->{service_status};
217
218 my $count = 0;
289e4784 219
546e2f1f
DM
220 foreach my $sid (keys %$ss) {
221 my $sd = $ss->{$sid};
222 next if !$sd->{node};
223 next if $sd->{node} ne $nodename;
224 my $req_state = $sd->{state};
225 next if !defined($req_state);
226 next if $req_state eq 'stopped';
9c7d068b 227 next if $req_state eq 'freeze';
38545741
TL
228 # erroneous services are not managed by HA, don't count them as active
229 next if $req_state eq 'error';
546e2f1f
DM
230
231 $count++;
232 }
289e4784 233
546e2f1f
DM
234 return $count;
235}
5bd7aa54
DM
236
237my $wrote_lrm_status_at_startup = 0;
238
5f095798
DM
239sub do_one_iteration {
240 my ($self) = @_;
241
242 my $haenv = $self->{haenv};
243
da6f0416
TL
244 $haenv->loop_start_hook();
245
3df15380
TL
246 $self->{cluster_state_update} = $haenv->cluster_state_update();
247
da6f0416
TL
248 my $res = $self->work();
249
250 $haenv->loop_end_hook();
251
252 return $res;
253}
254
255sub work {
256 my ($self) = @_;
257
258 my $haenv = $self->{haenv};
259
c5ec095f 260 if (!$wrote_lrm_status_at_startup) {
79829202 261 if ($self->update_lrm_status()) {
c5ec095f
DM
262 $wrote_lrm_status_at_startup = 1;
263 } else {
264 # do nothing
265 $haenv->sleep(5);
266 return $self->{shutdown_request} ? 0 : 1;
267 }
5bd7aa54 268 }
289e4784 269
5f095798
DM
270 my $status = $self->get_local_status();
271 my $state = $status->{state};
272
8e940b68 273 $self->update_service_status();
067cdf33 274
49777d09 275 my $fence_request = PVE::HA::Tools::count_fenced_services($self->{service_status}, $haenv->nodename());
289e4784
TL
276
277 # do state changes first
5f095798
DM
278
279 my $ctime = $haenv->get_time();
280
b0bf08a9 281 if ($state eq 'wait_for_agent_lock') {
5f095798 282
546e2f1f 283 my $service_count = $self->active_service_count();
5f095798 284
067cdf33 285 if (!$fence_request && $service_count && $haenv->quorate()) {
0bba8f60
DM
286 if ($self->get_protected_ha_agent_lock()) {
287 $self->set_local_status({ state => 'active' });
5f095798
DM
288 }
289 }
289e4784 290
5f095798
DM
291 } elsif ($state eq 'lost_agent_lock') {
292
067cdf33 293 if (!$fence_request && $haenv->quorate()) {
0bba8f60
DM
294 if ($self->get_protected_ha_agent_lock()) {
295 $self->set_local_status({ state => 'active' });
5f095798
DM
296 }
297 }
298
0bba8f60 299 } elsif ($state eq 'active') {
5f095798 300
289e4784 301 if ($fence_request) {
067cdf33 302 $haenv->log('err', "node need to be fenced - releasing agent_lock\n");
289e4784 303 $self->set_local_status({ state => 'lost_agent_lock'});
067cdf33 304 } elsif (!$self->get_protected_ha_agent_lock()) {
5f095798
DM
305 $self->set_local_status({ state => 'lost_agent_lock'});
306 }
307 }
308
309 $status = $self->get_local_status();
310 $state = $status->{state};
311
312 # do work
313
314 if ($state eq 'wait_for_agent_lock') {
315
316 return 0 if $self->{shutdown_request};
289e4784 317
79829202 318 $self->update_lrm_status();
289e4784 319
5f095798 320 $haenv->sleep(5);
289e4784 321
0bba8f60 322 } elsif ($state eq 'active') {
5f095798
DM
323
324 my $startime = $haenv->get_time();
325
326 my $max_time = 10;
327
328 my $shutdown = 0;
329
330 # do work (max_time seconds)
331 eval {
332 # fixme: set alert timer
333
8e940b68
TL
334 # if we could not get the current service status there's no point
335 # in doing anything, try again next round.
336 return if !$self->update_service_status();
337
5f095798
DM
338 if ($self->{shutdown_request}) {
339
499f06e3 340 if ($self->{mode} eq 'restart') {
5f095798 341
499f06e3 342 my $service_count = $self->active_service_count();
5f095798 343
499f06e3 344 if ($service_count == 0) {
5f095798 345
116dea30
DM
346 if ($self->run_workers() == 0) {
347 if ($self->{ha_agent_wd}) {
348 $haenv->watchdog_close($self->{ha_agent_wd});
349 delete $self->{ha_agent_wd};
350 }
351
352 $shutdown = 1;
e23f674c
TL
353
354 # restart with no or freezed services, release the lock
355 $haenv->release_ha_agent_lock();
116dea30
DM
356 }
357 }
358 } else {
359
360 if ($self->run_workers() == 0) {
361 if ($self->{shutdown_errors} == 0) {
362 if ($self->{ha_agent_wd}) {
363 $haenv->watchdog_close($self->{ha_agent_wd});
364 delete $self->{ha_agent_wd};
365 }
0e5b1a43
TL
366
367 # shutdown with all services stopped thus release the lock
368 $haenv->release_ha_agent_lock();
499f06e3 369 }
5f095798 370
499f06e3
DM
371 $shutdown = 1;
372 }
5f095798 373 }
c4a221bc 374 } else {
724bd3f3
TL
375 if (!$self->{cluster_state_update}) {
376 # update failed but we could still renew our lock (cfs restart?),
377 # safely skip manage and expect to update just fine next round
378 $haenv->log('notice', "temporary inconsistent cluster state " .
379 "(cfs restart?), skip round");
380 return;
381 }
c4a221bc
DM
382
383 $self->manage_resources();
067cdf33 384
5f095798
DM
385 }
386 };
387 if (my $err = $@) {
388 $haenv->log('err', "got unexpected error - $err");
389 }
390
79829202 391 $self->update_lrm_status();
289e4784 392
5f095798
DM
393 return 0 if $shutdown;
394
395 $haenv->sleep_until($startime + $max_time);
396
397 } elsif ($state eq 'lost_agent_lock') {
289e4784 398
5f095798
DM
399 # Note: watchdog is active an will triger soon!
400
401 # so we hope to get the lock back soon!
402
403 if ($self->{shutdown_request}) {
404
546e2f1f 405 my $service_count = $self->active_service_count();
5f095798 406
546e2f1f 407 if ($service_count > 0) {
289e4784 408 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
546e2f1f 409 "detected $service_count running services");
5f095798 410
546e2f1f 411 } else {
5f095798 412
546e2f1f 413 # all services are stopped, so we can close the watchdog
5f095798 414
546e2f1f
DM
415 if ($self->{ha_agent_wd}) {
416 $haenv->watchdog_close($self->{ha_agent_wd});
417 delete $self->{ha_agent_wd};
418 }
289e4784 419
546e2f1f 420 return 0;
5f095798 421 }
5f095798
DM
422 }
423
b0bf08a9
DM
424 $haenv->sleep(5);
425
5f095798
DM
426 } else {
427
428 die "got unexpected status '$state'\n";
429
430 }
431
432 return 1;
433}
434
116dea30 435sub run_workers {
c4a221bc
DM
436 my ($self) = @_;
437
438 my $haenv = $self->{haenv};
439
f31b7e94 440 my $starttime = $haenv->get_time();
c4a221bc 441
a28fa330
TL
442 # number of workers to start, if 0 we exec the command directly witouth forking
443 my $max_workers = $haenv->get_max_workers();
c4a221bc 444
6dbf93a0 445 my $sc = $haenv->read_service_config();
f31b7e94
DM
446
447 while (($haenv->get_time() - $starttime) < 5) {
c4a221bc
DM
448 my $count = $self->check_active_workers();
449
a5e4bef4 450 foreach my $sid (sort keys %{$self->{workers}}) {
a28fa330
TL
451 last if $count >= $max_workers && $max_workers > 0;
452
c4a221bc
DM
453 my $w = $self->{workers}->{$sid};
454 if (!$w->{pid}) {
a28fa330
TL
455 # only fork if we may else call exec_resource_agent
456 # directly (e.g. for regression tests)
457 if ($max_workers > 0) {
f31b7e94
DM
458 my $pid = fork();
459 if (!defined($pid)) {
460 $haenv->log('err', "fork worker failed");
461 $count = 0; last; # abort, try later
462 } elsif ($pid == 0) {
a2aae08a
TL
463 $haenv->after_fork(); # cleanup
464
f31b7e94
DM
465 # do work
466 my $res = -1;
467 eval {
aaabde6a 468 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
f31b7e94
DM
469 };
470 if (my $err = $@) {
471 $haenv->log('err', $err);
472 POSIX::_exit(-1);
289e4784
TL
473 }
474 POSIX::_exit($res);
f31b7e94
DM
475 } else {
476 $count++;
477 $w->{pid} = $pid;
478 }
479 } else {
c4a221bc
DM
480 my $res = -1;
481 eval {
aaabde6a 482 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
b33b5743 483 $res = $res << 8 if $res > 0;
c4a221bc
DM
484 };
485 if (my $err = $@) {
f31b7e94 486 $haenv->log('err', $err);
116dea30
DM
487 }
488 if (defined($w->{uid})) {
489 $self->resource_command_finished($sid, $w->{uid}, $res);
490 } else {
491 $self->stop_command_finished($sid, $res);
492 }
c4a221bc
DM
493 }
494 }
495 }
496
497 last if !$count;
498
f31b7e94 499 $haenv->sleep(1);
c4a221bc 500 }
116dea30
DM
501
502 return scalar(keys %{$self->{workers}});
503}
504
505sub manage_resources {
506 my ($self) = @_;
507
508 my $haenv = $self->{haenv};
509
510 my $nodename = $haenv->nodename();
511
512 my $ss = $self->{service_status};
513
5a28da91
TL
514 foreach my $sid (keys %{$self->{restart_tries}}) {
515 delete $self->{restart_tries}->{$sid} if !$ss->{$sid};
516 }
517
116dea30
DM
518 foreach my $sid (keys %$ss) {
519 my $sd = $ss->{$sid};
520 next if !$sd->{node};
521 next if !$sd->{uid};
522 next if $sd->{node} ne $nodename;
523 my $req_state = $sd->{state};
524 next if !defined($req_state);
525 next if $req_state eq 'freeze';
526 $self->queue_resource_command($sid, $sd->{uid}, $req_state, $sd->{target});
527 }
528
529 return $self->run_workers();
c4a221bc
DM
530}
531
c4a221bc 532sub queue_resource_command {
e88469ba 533 my ($self, $sid, $uid, $state, $target) = @_;
c4a221bc 534
35cbb764
TL
535 # do not queue the excatly same command twice as this may lead to
536 # an inconsistent HA state when the first command fails but the CRM
537 # does not process its failure right away and the LRM starts a second
538 # try, without the CRM knowing of it (race condition)
539 # The 'stopped' command is an exception as we do not process its result
540 # in the CRM and we want to execute it always (even with no active CRM)
541 return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid});
542
c4a221bc
DM
543 if (my $w = $self->{workers}->{$sid}) {
544 return if $w->{pid}; # already started
545 # else, delete and overwrite queue entry with new command
546 delete $self->{workers}->{$sid};
547 }
548
549 $self->{workers}->{$sid} = {
550 sid => $sid,
551 uid => $uid,
552 state => $state,
553 };
e88469ba
DM
554
555 $self->{workers}->{$sid}->{target} = $target if $target;
c4a221bc
DM
556}
557
558sub check_active_workers {
559 my ($self) = @_;
560
561 # finish/count workers
562 my $count = 0;
563 foreach my $sid (keys %{$self->{workers}}) {
564 my $w = $self->{workers}->{$sid};
565 if (my $pid = $w->{pid}) {
566 # check status
567 my $waitpid = waitpid($pid, WNOHANG);
568 if (defined($waitpid) && ($waitpid == $pid)) {
c0edbd7e 569 if (defined($w->{uid})) {
116dea30
DM
570 $self->resource_command_finished($sid, $w->{uid}, $?);
571 } else {
572 $self->stop_command_finished($sid, $?);
573 }
c4a221bc
DM
574 } else {
575 $count++;
576 }
577 }
578 }
289e4784 579
c4a221bc
DM
580 return $count;
581}
582
116dea30
DM
583sub stop_command_finished {
584 my ($self, $sid, $status) = @_;
585
586 my $haenv = $self->{haenv};
587
588 my $w = delete $self->{workers}->{$sid};
589 return if !$w; # should not happen
590
591 my $exit_code = -1;
592
593 if ($status == -1) {
594 $haenv->log('err', "resource agent $sid finished - failed to execute");
595 } elsif (my $sig = ($status & 127)) {
596 $haenv->log('err', "resource agent $sid finished - got signal $sig");
597 } else {
598 $exit_code = ($status >> 8);
599 }
600
601 if ($exit_code != 0) {
602 $self->{shutdown_errors}++;
603 }
604}
605
c4a221bc
DM
606sub resource_command_finished {
607 my ($self, $sid, $uid, $status) = @_;
608
609 my $haenv = $self->{haenv};
610
611 my $w = delete $self->{workers}->{$sid};
612 return if !$w; # should not happen
613
614 my $exit_code = -1;
615
616 if ($status == -1) {
289e4784 617 $haenv->log('err', "resource agent $sid finished - failed to execute");
c4a221bc 618 } elsif (my $sig = ($status & 127)) {
0f70400d 619 $haenv->log('err', "resource agent $sid finished - got signal $sig");
c4a221bc
DM
620 } else {
621 $exit_code = ($status >> 8);
c4a221bc
DM
622 }
623
ea4443cc
TL
624 $exit_code = $self->handle_service_exitcode($sid, $w->{state}, $exit_code);
625
280ee5d5
DM
626 return if $exit_code == ETRY_AGAIN; # tell nobody, simply retry
627
c4a221bc
DM
628 $self->{results}->{$uid} = {
629 sid => $w->{sid},
630 state => $w->{state},
631 exit_code => $exit_code,
632 };
633
634 my $ss = $self->{service_status};
635
636 # compute hash of valid/existing uids
637 my $valid_uids = {};
638 foreach my $sid (keys %$ss) {
639 my $sd = $ss->{$sid};
640 next if !$sd->{uid};
641 $valid_uids->{$sd->{uid}} = 1;
642 }
643
644 my $results = {};
645 foreach my $id (keys %{$self->{results}}) {
646 next if !$valid_uids->{$id};
647 $results->{$id} = $self->{results}->{$id};
648 }
649 $self->{results} = $results;
c4a221bc
DM
650}
651
ea4443cc
TL
652# processes the exit code from a finished resource agent, so that the CRM knows
653# if the LRM wants to retry an action based on the current recovery policies for
654# the failed service, or the CRM itself must try to recover from the failure.
655sub handle_service_exitcode {
656 my ($self, $sid, $cmd, $exit_code) = @_;
657
658 my $haenv = $self->{haenv};
659 my $tries = $self->{restart_tries};
660
661 my $sc = $haenv->read_service_config();
aaabde6a
DM
662
663 my $max_restart = 0;
664
665 if (my $cd = $sc->{$sid}) {
666 $max_restart = $cd->{max_restart};
667 }
ea4443cc
TL
668
669 if ($cmd eq 'started') {
670
a89ff919 671 if ($exit_code == SUCCESS) {
ea4443cc
TL
672
673 $tries->{$sid} = 0;
674
675 return $exit_code;
676
a89ff919 677 } elsif ($exit_code == ERROR) {
ea4443cc
TL
678
679 $tries->{$sid} = 0 if !defined($tries->{$sid});
680
aaabde6a 681 if ($tries->{$sid} >= $max_restart) {
ea4443cc
TL
682 $haenv->log('err', "unable to start service $sid on local node".
683 " after $tries->{$sid} retries");
684 $tries->{$sid} = 0;
a89ff919 685 return ERROR;
ea4443cc
TL
686 }
687
e9e1cd68
TL
688 $tries->{$sid}++;
689
690 $haenv->log('warning', "restart policy: retry number $tries->{$sid}" .
691 " for service '$sid'");
a89ff919
TL
692 # tell CRM that we retry the start
693 return ETRY_AGAIN;
ea4443cc
TL
694 }
695 }
696
697 return $exit_code;
698
699}
700
2a045f55
TL
701sub exec_resource_agent {
702 my ($self, $sid, $service_config, $cmd, @params) = @_;
703
704 # setup execution environment
705
706 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
707
2a045f55
TL
708 my $haenv = $self->{haenv};
709
710 my $nodename = $haenv->nodename();
711
0087839a 712 my (undef, $service_type, $service_name) = $haenv->parse_sid($sid);
2a045f55
TL
713
714 my $plugin = PVE::HA::Resources->lookup($service_type);
715 if (!$plugin) {
716 $haenv->log('err', "service type '$service_type' not implemented");
717 return EUNKNOWN_SERVICE_TYPE;
718 }
719
aaabde6a
DM
720 if (!$service_config) {
721 $haenv->log('err', "missing resource configuration for '$sid'");
722 return EUNKNOWN_SERVICE;
723 }
724
d338a56f
TL
725 # process error state early
726 if ($cmd eq 'error') {
727
728 $haenv->log('err', "service $sid is in an error state and needs manual " .
729 "intervention. Look up 'ERROR RECOVERY' in the documentation.");
730
731 return SUCCESS; # error always succeeds
732 }
733
2a045f55
TL
734 if ($service_config->{node} ne $nodename) {
735 $haenv->log('err', "service '$sid' not on this node");
736 return EWRONG_NODE;
737 }
738
739 my $id = $service_name;
740
741 my $running = $plugin->check_running($haenv, $id);
742
743 if ($cmd eq 'started') {
744
745 return SUCCESS if $running;
746
747 $haenv->log("info", "starting service $sid");
748
749 $plugin->start($haenv, $id);
750
751 $running = $plugin->check_running($haenv, $id);
752
753 if ($running) {
754 $haenv->log("info", "service status $sid started");
755 return SUCCESS;
756 } else {
757 $haenv->log("warning", "unable to start service $sid");
758 return ERROR;
759 }
760
761 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
762
763 return SUCCESS if !$running;
764
765 $haenv->log("info", "stopping service $sid");
766
767 $plugin->shutdown($haenv, $id);
768
769 $running = $plugin->check_running($haenv, $id);
770
771 if (!$running) {
772 $haenv->log("info", "service status $sid stopped");
773 return SUCCESS;
774 } else {
775 $haenv->log("info", "unable to stop stop service $sid (still running)");
776 return ERROR;
777 }
778
779 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
780
781 my $target = $params[0];
782 if (!defined($target)) {
783 die "$cmd '$sid' failed - missing target\n" if !defined($target);
784 return EINVALID_PARAMETER;
785 }
786
787 if ($service_config->{node} eq $target) {
788 # already there
789 return SUCCESS;
790 }
791
792 my $online = ($cmd eq 'migrate') ? 1 : 0;
793
ea28f873 794 my $res = $plugin->migrate($haenv, $id, $target, $online);
2a045f55
TL
795
796 # something went wrong if service is still on this node
ea28f873 797 if (!$res) {
2a045f55
TL
798 $haenv->log("err", "service $sid not moved (migration error)");
799 return ERROR;
800 }
801
802 return SUCCESS;
803
2a045f55
TL
804 }
805
806 $haenv->log("err", "implement me (cmd '$cmd')");
807 return EUNKNOWN_COMMAND;
808}
809
810
5f095798 8111;