]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/LRM.pm
make clean: also clean source tar ball
[pve-ha-manager.git] / src / PVE / HA / LRM.pm
CommitLineData
5f095798
DM
1package PVE::HA::LRM;
2
3# Local Resource Manager
4
5use strict;
6use warnings;
c4a221bc 7use POSIX qw(:sys_wait_h);
5f095798
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
a89ff919 11use PVE::HA::Tools ':exit_codes';
2a045f55 12use PVE::HA::Resources;
5f095798
DM
13
14# Server can have several states:
15
16my $valid_states = {
ec911edd 17 wait_for_agent_lock => "waiting for agent lock",
0bba8f60 18 active => "got agent_lock",
31c1bd1f 19 maintenance => "got agent_lock (maintenance)",
5f095798
DM
20 lost_agent_lock => "lost agent_lock",
21};
22
23sub new {
24 my ($this, $haenv) = @_;
25
26 my $class = ref($this) || $this;
27
28 my $self = bless {
29 haenv => $haenv,
30 status => { state => 'startup' },
c4a221bc
DM
31 workers => {},
32 results => {},
ea4443cc 33 restart_tries => {},
067cdf33 34 shutdown_request => 0,
116dea30 35 shutdown_errors => 0,
9c7d068b
DM
36 # mode can be: active, reboot, shutdown, restart
37 mode => 'active',
3df15380 38 cluster_state_update => 0,
5f095798
DM
39 }, $class;
40
289e4784 41 $self->set_local_status({ state => 'wait_for_agent_lock' });
9c7d068b 42
5f095798
DM
43 return $self;
44}
45
46sub shutdown_request {
47 my ($self) = @_;
48
f1be5b3a
DM
49 return if $self->{shutdown_request}; # already in shutdown mode
50
499f06e3
DM
51 my $haenv = $self->{haenv};
52
116dea30
DM
53 my $nodename = $haenv->nodename();
54
f65f41b9 55 my ($shutdown, $reboot) = $haenv->is_node_shutdown();
499f06e3 56
ba15a9b9
TL
57 my $dc_ha_cfg = $haenv->get_ha_settings();
58 my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
59
7a20d688
TL
60 if ($shutdown) { # don't log this on service restart, only on node shutdown
61 $haenv->log('info', "got shutdown request with shutdown policy '$shutdown_policy'");
62 }
63
d2236278 64 my $freeze_all;
ba15a9b9
TL
65 if ($shutdown_policy eq 'conditional') {
66 $freeze_all = $reboot;
67 } elsif ($shutdown_policy eq 'freeze') {
68 $freeze_all = 1;
69 } elsif ($shutdown_policy eq 'failover') {
70 $freeze_all = 0;
71 } else {
d2236278
TL
72 $haenv->log('err', "unknown shutdown policy '$shutdown_policy', fall back to conditional");
73 $freeze_all = $reboot;
ba15a9b9
TL
74 }
75
499f06e3 76 if ($shutdown) {
f65f41b9
TL
77 # *always* queue stop jobs for all services if the node shuts down,
78 # independent if it's a reboot or a poweroff, else we may corrupt
79 # services or hinder node shutdown
116dea30
DM
80 my $ss = $self->{service_status};
81
82 foreach my $sid (keys %$ss) {
83 my $sd = $ss->{$sid};
84 next if !$sd->{node};
85 next if $sd->{node} ne $nodename;
c0edbd7e 86 # Note: use undef uid to mark shutdown/stop jobs
116dea30
DM
87 $self->queue_resource_command($sid, undef, 'request_stop');
88 }
f65f41b9 89 }
116dea30 90
f65f41b9 91 if ($shutdown) {
ba15a9b9 92 if ($freeze_all) {
d2236278 93 if ($reboot) {
ba15a9b9
TL
94 $haenv->log('info', "reboot LRM, stop and freeze all services");
95 } else {
96 $haenv->log('info', "shutdown LRM, stop and freeze all services");
97 }
f65f41b9
TL
98 $self->{mode} = 'restart';
99 } else {
100 $haenv->log('info', "shutdown LRM, stop all services");
101 $self->{mode} = 'shutdown';
102 }
499f06e3
DM
103 } else {
104 $haenv->log('info', "restart LRM, freeze all services");
105 $self->{mode} = 'restart';
106 }
9c7d068b 107
499f06e3 108 $self->{shutdown_request} = 1;
9c7d068b
DM
109
110 eval { $self->update_lrm_status(); };
111 if (my $err = $@) {
5bd7aa54 112 $self->log('err', "unable to update lrm status file - $err");
9c7d068b 113 }
5f095798
DM
114}
115
116sub get_local_status {
117 my ($self) = @_;
118
119 return $self->{status};
120}
121
122sub set_local_status {
123 my ($self, $new) = @_;
124
125 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
126
127 my $haenv = $self->{haenv};
128
129 my $old = $self->{status};
130
289e4784 131 # important: only update if if really changed
5f095798
DM
132 return if $old->{state} eq $new->{state};
133
0bba8f60 134 $haenv->log('info', "status change $old->{state} => $new->{state}");
5f095798
DM
135
136 $new->{state_change_time} = $haenv->get_time();
137
138 $self->{status} = $new;
139}
140
9c7d068b
DM
141sub update_lrm_status {
142 my ($self) = @_;
143
5bd7aa54
DM
144 my $haenv = $self->{haenv};
145
79829202 146 return 0 if !$haenv->quorate();
289e4784
TL
147
148 my $lrm_status = {
331a9f00 149 state => $self->{status}->{state},
9c7d068b
DM
150 mode => $self->{mode},
151 results => $self->{results},
aa330d1c 152 timestamp => $haenv->get_time(),
9c7d068b 153 };
289e4784 154
5bd7aa54
DM
155 eval { $haenv->write_lrm_status($lrm_status); };
156 if (my $err = $@) {
157 $haenv->log('err', "unable to write lrm status file - $err");
158 return 0;
159 }
160
161 return 1;
9c7d068b
DM
162}
163
8e940b68
TL
164sub update_service_status {
165 my ($self) = @_;
166
167 my $haenv = $self->{haenv};
168
169 my $ms = eval { $haenv->read_manager_status(); };
170 if (my $err = $@) {
171 $haenv->log('err', "updating service status from manager failed: $err");
172 return undef;
173 } else {
174 $self->{service_status} = $ms->{service_status} || {};
175 return 1;
176 }
177}
178
5f095798
DM
179sub get_protected_ha_agent_lock {
180 my ($self) = @_;
181
182 my $haenv = $self->{haenv};
183
184 my $count = 0;
185 my $starttime = $haenv->get_time();
186
187 for (;;) {
289e4784 188
5f095798
DM
189 if ($haenv->get_ha_agent_lock()) {
190 if ($self->{ha_agent_wd}) {
191 $haenv->watchdog_update($self->{ha_agent_wd});
192 } else {
193 my $wfh = $haenv->watchdog_open();
194 $self->{ha_agent_wd} = $wfh;
195 }
196 return 1;
197 }
289e4784 198
5f095798
DM
199 last if ++$count > 5; # try max 5 time
200
201 my $delay = $haenv->get_time() - $starttime;
202 last if $delay > 5; # for max 5 seconds
203
204 $haenv->sleep(1);
205 }
289e4784 206
5f095798
DM
207 return 0;
208}
209
546e2f1f
DM
210sub active_service_count {
211 my ($self) = @_;
289e4784 212
546e2f1f
DM
213 my $haenv = $self->{haenv};
214
215 my $nodename = $haenv->nodename();
216
217 my $ss = $self->{service_status};
218
219 my $count = 0;
289e4784 220
546e2f1f
DM
221 foreach my $sid (keys %$ss) {
222 my $sd = $ss->{$sid};
223 next if !$sd->{node};
224 next if $sd->{node} ne $nodename;
225 my $req_state = $sd->{state};
226 next if !defined($req_state);
227 next if $req_state eq 'stopped';
9c7d068b 228 next if $req_state eq 'freeze';
38545741
TL
229 # erroneous services are not managed by HA, don't count them as active
230 next if $req_state eq 'error';
546e2f1f
DM
231
232 $count++;
233 }
289e4784 234
546e2f1f
DM
235 return $count;
236}
5bd7aa54
DM
237
238my $wrote_lrm_status_at_startup = 0;
239
5f095798
DM
240sub do_one_iteration {
241 my ($self) = @_;
242
243 my $haenv = $self->{haenv};
244
da6f0416
TL
245 $haenv->loop_start_hook();
246
3df15380
TL
247 $self->{cluster_state_update} = $haenv->cluster_state_update();
248
da6f0416
TL
249 my $res = $self->work();
250
251 $haenv->loop_end_hook();
252
253 return $res;
254}
255
256sub work {
257 my ($self) = @_;
258
259 my $haenv = $self->{haenv};
260
c5ec095f 261 if (!$wrote_lrm_status_at_startup) {
79829202 262 if ($self->update_lrm_status()) {
c5ec095f
DM
263 $wrote_lrm_status_at_startup = 1;
264 } else {
265 # do nothing
266 $haenv->sleep(5);
267 return $self->{shutdown_request} ? 0 : 1;
268 }
5bd7aa54 269 }
289e4784 270
5f095798
DM
271 my $status = $self->get_local_status();
272 my $state = $status->{state};
273
8e940b68 274 $self->update_service_status();
067cdf33 275
49777d09 276 my $fence_request = PVE::HA::Tools::count_fenced_services($self->{service_status}, $haenv->nodename());
289e4784
TL
277
278 # do state changes first
5f095798
DM
279
280 my $ctime = $haenv->get_time();
281
31c1bd1f
TL
282 # FIXME:
283
b0bf08a9 284 if ($state eq 'wait_for_agent_lock') {
5f095798 285
546e2f1f 286 my $service_count = $self->active_service_count();
5f095798 287
067cdf33 288 if (!$fence_request && $service_count && $haenv->quorate()) {
0bba8f60
DM
289 if ($self->get_protected_ha_agent_lock()) {
290 $self->set_local_status({ state => 'active' });
5f095798
DM
291 }
292 }
289e4784 293
5f095798
DM
294 } elsif ($state eq 'lost_agent_lock') {
295
067cdf33 296 if (!$fence_request && $haenv->quorate()) {
0bba8f60
DM
297 if ($self->get_protected_ha_agent_lock()) {
298 $self->set_local_status({ state => 'active' });
5f095798
DM
299 }
300 }
301
0bba8f60 302 } elsif ($state eq 'active') {
5f095798 303
289e4784 304 if ($fence_request) {
067cdf33 305 $haenv->log('err', "node need to be fenced - releasing agent_lock\n");
289e4784 306 $self->set_local_status({ state => 'lost_agent_lock'});
067cdf33 307 } elsif (!$self->get_protected_ha_agent_lock()) {
5f095798
DM
308 $self->set_local_status({ state => 'lost_agent_lock'});
309 }
310 }
311
312 $status = $self->get_local_status();
313 $state = $status->{state};
314
315 # do work
316
317 if ($state eq 'wait_for_agent_lock') {
318
319 return 0 if $self->{shutdown_request};
289e4784 320
79829202 321 $self->update_lrm_status();
289e4784 322
5f095798 323 $haenv->sleep(5);
289e4784 324
0bba8f60 325 } elsif ($state eq 'active') {
5f095798
DM
326
327 my $startime = $haenv->get_time();
328
329 my $max_time = 10;
330
331 my $shutdown = 0;
332
333 # do work (max_time seconds)
334 eval {
335 # fixme: set alert timer
336
8e940b68
TL
337 # if we could not get the current service status there's no point
338 # in doing anything, try again next round.
339 return if !$self->update_service_status();
340
5f095798
DM
341 if ($self->{shutdown_request}) {
342
499f06e3 343 if ($self->{mode} eq 'restart') {
5f095798 344
499f06e3 345 my $service_count = $self->active_service_count();
5f095798 346
499f06e3 347 if ($service_count == 0) {
5f095798 348
116dea30
DM
349 if ($self->run_workers() == 0) {
350 if ($self->{ha_agent_wd}) {
351 $haenv->watchdog_close($self->{ha_agent_wd});
352 delete $self->{ha_agent_wd};
353 }
354
355 $shutdown = 1;
e23f674c
TL
356
357 # restart with no or freezed services, release the lock
358 $haenv->release_ha_agent_lock();
116dea30
DM
359 }
360 }
361 } else {
362
363 if ($self->run_workers() == 0) {
364 if ($self->{shutdown_errors} == 0) {
365 if ($self->{ha_agent_wd}) {
366 $haenv->watchdog_close($self->{ha_agent_wd});
367 delete $self->{ha_agent_wd};
368 }
0e5b1a43
TL
369
370 # shutdown with all services stopped thus release the lock
371 $haenv->release_ha_agent_lock();
499f06e3 372 }
5f095798 373
499f06e3
DM
374 $shutdown = 1;
375 }
5f095798 376 }
c4a221bc 377 } else {
724bd3f3
TL
378 if (!$self->{cluster_state_update}) {
379 # update failed but we could still renew our lock (cfs restart?),
380 # safely skip manage and expect to update just fine next round
381 $haenv->log('notice', "temporary inconsistent cluster state " .
382 "(cfs restart?), skip round");
383 return;
384 }
c4a221bc
DM
385
386 $self->manage_resources();
067cdf33 387
5f095798
DM
388 }
389 };
390 if (my $err = $@) {
391 $haenv->log('err', "got unexpected error - $err");
392 }
393
79829202 394 $self->update_lrm_status();
289e4784 395
5f095798
DM
396 return 0 if $shutdown;
397
398 $haenv->sleep_until($startime + $max_time);
399
400 } elsif ($state eq 'lost_agent_lock') {
289e4784 401
5f095798
DM
402 # Note: watchdog is active an will triger soon!
403
404 # so we hope to get the lock back soon!
405
406 if ($self->{shutdown_request}) {
407
546e2f1f 408 my $service_count = $self->active_service_count();
5f095798 409
546e2f1f 410 if ($service_count > 0) {
289e4784 411 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
546e2f1f 412 "detected $service_count running services");
5f095798 413
c5c7faf6
TL
414 if ($self->{mode} eq 'restart') {
415 my $state_mt = $self->{status}->{state_change_time};
416
417 # watchdog should have already triggered, so either it's set
418 # set to noboot or it failed. As we are in restart mode, and
419 # have infinity stoptimeout -> exit now - we don't touch services
420 # or change state, so this is save, relatively speaking
421 if (($haenv->get_time() - $state_mt) > 90) {
422 $haenv->log('err', "lost agent lock and restart request for over 90 seconds - giving up!");
423 return 0;
424 }
425 }
546e2f1f 426 } else {
5f095798 427
546e2f1f 428 # all services are stopped, so we can close the watchdog
5f095798 429
546e2f1f
DM
430 if ($self->{ha_agent_wd}) {
431 $haenv->watchdog_close($self->{ha_agent_wd});
432 delete $self->{ha_agent_wd};
433 }
289e4784 434
546e2f1f 435 return 0;
5f095798 436 }
5f095798
DM
437 }
438
b0bf08a9
DM
439 $haenv->sleep(5);
440
5f095798
DM
441 } else {
442
443 die "got unexpected status '$state'\n";
444
445 }
446
447 return 1;
448}
449
116dea30 450sub run_workers {
c4a221bc
DM
451 my ($self) = @_;
452
453 my $haenv = $self->{haenv};
454
f31b7e94 455 my $starttime = $haenv->get_time();
c4a221bc 456
a28fa330
TL
457 # number of workers to start, if 0 we exec the command directly witouth forking
458 my $max_workers = $haenv->get_max_workers();
c4a221bc 459
6dbf93a0 460 my $sc = $haenv->read_service_config();
f31b7e94
DM
461
462 while (($haenv->get_time() - $starttime) < 5) {
c4a221bc
DM
463 my $count = $self->check_active_workers();
464
a5e4bef4 465 foreach my $sid (sort keys %{$self->{workers}}) {
a28fa330
TL
466 last if $count >= $max_workers && $max_workers > 0;
467
c4a221bc
DM
468 my $w = $self->{workers}->{$sid};
469 if (!$w->{pid}) {
a28fa330
TL
470 # only fork if we may else call exec_resource_agent
471 # directly (e.g. for regression tests)
472 if ($max_workers > 0) {
f31b7e94
DM
473 my $pid = fork();
474 if (!defined($pid)) {
475 $haenv->log('err', "fork worker failed");
476 $count = 0; last; # abort, try later
477 } elsif ($pid == 0) {
a2aae08a
TL
478 $haenv->after_fork(); # cleanup
479
f31b7e94
DM
480 # do work
481 my $res = -1;
482 eval {
aaabde6a 483 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
f31b7e94
DM
484 };
485 if (my $err = $@) {
486 $haenv->log('err', $err);
487 POSIX::_exit(-1);
289e4784
TL
488 }
489 POSIX::_exit($res);
f31b7e94
DM
490 } else {
491 $count++;
492 $w->{pid} = $pid;
493 }
494 } else {
c4a221bc
DM
495 my $res = -1;
496 eval {
aaabde6a 497 $res = $self->exec_resource_agent($sid, $sc->{$sid}, $w->{state}, $w->{target});
b33b5743 498 $res = $res << 8 if $res > 0;
c4a221bc
DM
499 };
500 if (my $err = $@) {
f31b7e94 501 $haenv->log('err', $err);
116dea30
DM
502 }
503 if (defined($w->{uid})) {
504 $self->resource_command_finished($sid, $w->{uid}, $res);
505 } else {
506 $self->stop_command_finished($sid, $res);
507 }
c4a221bc
DM
508 }
509 }
510 }
511
512 last if !$count;
513
f31b7e94 514 $haenv->sleep(1);
c4a221bc 515 }
116dea30
DM
516
517 return scalar(keys %{$self->{workers}});
518}
519
520sub manage_resources {
521 my ($self) = @_;
522
523 my $haenv = $self->{haenv};
524
525 my $nodename = $haenv->nodename();
526
527 my $ss = $self->{service_status};
528
5a28da91
TL
529 foreach my $sid (keys %{$self->{restart_tries}}) {
530 delete $self->{restart_tries}->{$sid} if !$ss->{$sid};
531 }
532
116dea30
DM
533 foreach my $sid (keys %$ss) {
534 my $sd = $ss->{$sid};
535 next if !$sd->{node};
536 next if !$sd->{uid};
537 next if $sd->{node} ne $nodename;
538 my $req_state = $sd->{state};
539 next if !defined($req_state);
540 next if $req_state eq 'freeze';
541 $self->queue_resource_command($sid, $sd->{uid}, $req_state, $sd->{target});
542 }
543
544 return $self->run_workers();
c4a221bc
DM
545}
546
c4a221bc 547sub queue_resource_command {
e88469ba 548 my ($self, $sid, $uid, $state, $target) = @_;
c4a221bc 549
35cbb764
TL
550 # do not queue the excatly same command twice as this may lead to
551 # an inconsistent HA state when the first command fails but the CRM
552 # does not process its failure right away and the LRM starts a second
553 # try, without the CRM knowing of it (race condition)
554 # The 'stopped' command is an exception as we do not process its result
555 # in the CRM and we want to execute it always (even with no active CRM)
556 return if $state ne 'stopped' && $uid && defined($self->{results}->{$uid});
557
c4a221bc
DM
558 if (my $w = $self->{workers}->{$sid}) {
559 return if $w->{pid}; # already started
560 # else, delete and overwrite queue entry with new command
561 delete $self->{workers}->{$sid};
562 }
563
564 $self->{workers}->{$sid} = {
565 sid => $sid,
566 uid => $uid,
567 state => $state,
568 };
e88469ba
DM
569
570 $self->{workers}->{$sid}->{target} = $target if $target;
c4a221bc
DM
571}
572
573sub check_active_workers {
574 my ($self) = @_;
575
576 # finish/count workers
577 my $count = 0;
578 foreach my $sid (keys %{$self->{workers}}) {
579 my $w = $self->{workers}->{$sid};
580 if (my $pid = $w->{pid}) {
581 # check status
582 my $waitpid = waitpid($pid, WNOHANG);
583 if (defined($waitpid) && ($waitpid == $pid)) {
c0edbd7e 584 if (defined($w->{uid})) {
116dea30
DM
585 $self->resource_command_finished($sid, $w->{uid}, $?);
586 } else {
587 $self->stop_command_finished($sid, $?);
588 }
c4a221bc
DM
589 } else {
590 $count++;
591 }
592 }
593 }
289e4784 594
c4a221bc
DM
595 return $count;
596}
597
116dea30
DM
598sub stop_command_finished {
599 my ($self, $sid, $status) = @_;
600
601 my $haenv = $self->{haenv};
602
603 my $w = delete $self->{workers}->{$sid};
604 return if !$w; # should not happen
605
606 my $exit_code = -1;
607
608 if ($status == -1) {
609 $haenv->log('err', "resource agent $sid finished - failed to execute");
610 } elsif (my $sig = ($status & 127)) {
611 $haenv->log('err', "resource agent $sid finished - got signal $sig");
612 } else {
613 $exit_code = ($status >> 8);
614 }
615
616 if ($exit_code != 0) {
617 $self->{shutdown_errors}++;
618 }
619}
620
c4a221bc
DM
621sub resource_command_finished {
622 my ($self, $sid, $uid, $status) = @_;
623
624 my $haenv = $self->{haenv};
625
626 my $w = delete $self->{workers}->{$sid};
627 return if !$w; # should not happen
628
629 my $exit_code = -1;
630
631 if ($status == -1) {
289e4784 632 $haenv->log('err', "resource agent $sid finished - failed to execute");
c4a221bc 633 } elsif (my $sig = ($status & 127)) {
0f70400d 634 $haenv->log('err', "resource agent $sid finished - got signal $sig");
c4a221bc
DM
635 } else {
636 $exit_code = ($status >> 8);
c4a221bc
DM
637 }
638
ea4443cc
TL
639 $exit_code = $self->handle_service_exitcode($sid, $w->{state}, $exit_code);
640
280ee5d5
DM
641 return if $exit_code == ETRY_AGAIN; # tell nobody, simply retry
642
c4a221bc
DM
643 $self->{results}->{$uid} = {
644 sid => $w->{sid},
645 state => $w->{state},
646 exit_code => $exit_code,
647 };
648
649 my $ss = $self->{service_status};
650
651 # compute hash of valid/existing uids
652 my $valid_uids = {};
653 foreach my $sid (keys %$ss) {
654 my $sd = $ss->{$sid};
655 next if !$sd->{uid};
656 $valid_uids->{$sd->{uid}} = 1;
657 }
658
659 my $results = {};
660 foreach my $id (keys %{$self->{results}}) {
661 next if !$valid_uids->{$id};
662 $results->{$id} = $self->{results}->{$id};
663 }
664 $self->{results} = $results;
c4a221bc
DM
665}
666
ea4443cc
TL
667# processes the exit code from a finished resource agent, so that the CRM knows
668# if the LRM wants to retry an action based on the current recovery policies for
669# the failed service, or the CRM itself must try to recover from the failure.
670sub handle_service_exitcode {
671 my ($self, $sid, $cmd, $exit_code) = @_;
672
673 my $haenv = $self->{haenv};
674 my $tries = $self->{restart_tries};
675
676 my $sc = $haenv->read_service_config();
aaabde6a
DM
677
678 my $max_restart = 0;
679
680 if (my $cd = $sc->{$sid}) {
681 $max_restart = $cd->{max_restart};
682 }
ea4443cc
TL
683
684 if ($cmd eq 'started') {
685
a89ff919 686 if ($exit_code == SUCCESS) {
ea4443cc
TL
687
688 $tries->{$sid} = 0;
689
690 return $exit_code;
691
a89ff919 692 } elsif ($exit_code == ERROR) {
ea4443cc
TL
693
694 $tries->{$sid} = 0 if !defined($tries->{$sid});
695
aaabde6a 696 if ($tries->{$sid} >= $max_restart) {
ea4443cc
TL
697 $haenv->log('err', "unable to start service $sid on local node".
698 " after $tries->{$sid} retries");
699 $tries->{$sid} = 0;
a89ff919 700 return ERROR;
ea4443cc
TL
701 }
702
e9e1cd68
TL
703 $tries->{$sid}++;
704
705 $haenv->log('warning', "restart policy: retry number $tries->{$sid}" .
706 " for service '$sid'");
a89ff919
TL
707 # tell CRM that we retry the start
708 return ETRY_AGAIN;
ea4443cc
TL
709 }
710 }
711
712 return $exit_code;
713
714}
715
2a045f55
TL
716sub exec_resource_agent {
717 my ($self, $sid, $service_config, $cmd, @params) = @_;
718
719 # setup execution environment
720
721 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
722
2a045f55
TL
723 my $haenv = $self->{haenv};
724
725 my $nodename = $haenv->nodename();
726
0087839a 727 my (undef, $service_type, $service_name) = $haenv->parse_sid($sid);
2a045f55
TL
728
729 my $plugin = PVE::HA::Resources->lookup($service_type);
730 if (!$plugin) {
731 $haenv->log('err', "service type '$service_type' not implemented");
732 return EUNKNOWN_SERVICE_TYPE;
733 }
734
aaabde6a
DM
735 if (!$service_config) {
736 $haenv->log('err', "missing resource configuration for '$sid'");
737 return EUNKNOWN_SERVICE;
738 }
739
d338a56f
TL
740 # process error state early
741 if ($cmd eq 'error') {
742
743 $haenv->log('err', "service $sid is in an error state and needs manual " .
744 "intervention. Look up 'ERROR RECOVERY' in the documentation.");
745
746 return SUCCESS; # error always succeeds
747 }
748
2a045f55
TL
749 if ($service_config->{node} ne $nodename) {
750 $haenv->log('err', "service '$sid' not on this node");
751 return EWRONG_NODE;
752 }
753
754 my $id = $service_name;
755
756 my $running = $plugin->check_running($haenv, $id);
757
758 if ($cmd eq 'started') {
759
760 return SUCCESS if $running;
761
762 $haenv->log("info", "starting service $sid");
763
764 $plugin->start($haenv, $id);
765
766 $running = $plugin->check_running($haenv, $id);
767
768 if ($running) {
769 $haenv->log("info", "service status $sid started");
770 return SUCCESS;
771 } else {
772 $haenv->log("warning", "unable to start service $sid");
773 return ERROR;
774 }
775
776 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
777
778 return SUCCESS if !$running;
779
780 $haenv->log("info", "stopping service $sid");
781
782 $plugin->shutdown($haenv, $id);
783
784 $running = $plugin->check_running($haenv, $id);
785
786 if (!$running) {
787 $haenv->log("info", "service status $sid stopped");
788 return SUCCESS;
789 } else {
790 $haenv->log("info", "unable to stop stop service $sid (still running)");
791 return ERROR;
792 }
793
794 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
795
796 my $target = $params[0];
797 if (!defined($target)) {
798 die "$cmd '$sid' failed - missing target\n" if !defined($target);
799 return EINVALID_PARAMETER;
800 }
801
802 if ($service_config->{node} eq $target) {
803 # already there
804 return SUCCESS;
805 }
806
807 my $online = ($cmd eq 'migrate') ? 1 : 0;
808
ea28f873 809 my $res = $plugin->migrate($haenv, $id, $target, $online);
2a045f55
TL
810
811 # something went wrong if service is still on this node
ea28f873 812 if (!$res) {
2a045f55
TL
813 $haenv->log("err", "service $sid not moved (migration error)");
814 return ERROR;
815 }
816
817 return SUCCESS;
818
2a045f55
TL
819 }
820
821 $haenv->log("err", "implement me (cmd '$cmd')");
822 return EUNKNOWN_COMMAND;
823}
824
825
5f095798 8261;