]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
resource agents: fix relocate
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
119656b9 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016 14
a89ff919 15use PVE::HA::Tools ':exit_codes';
714a4016 16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79 19
007fcc8b
DM
20my $lockdir = "/etc/pve/priv/lock";
21
714a4016
DM
22sub new {
23 my ($this, $nodename) = @_;
24
25 die "missing nodename" if !$nodename;
26
27 my $class = ref($this) || $this;
28
29 my $self = bless {}, $class;
30
31 $self->{nodename} = $nodename;
32
33 return $self;
34}
35
36sub nodename {
37 my ($self) = @_;
38
39 return $self->{nodename};
40}
41
42sub read_manager_status {
43 my ($self) = @_;
714a4016 44
139a9b90 45 return PVE::HA::Config::read_manager_status();
714a4016
DM
46}
47
48sub write_manager_status {
49 my ($self, $status_obj) = @_;
63f6a08c 50
139a9b90 51 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
52}
53
c4a221bc
DM
54sub read_lrm_status {
55 my ($self, $node) = @_;
56
57 $node = $self->{nodename} if !defined($node);
58
139a9b90 59 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
60}
61
62sub write_lrm_status {
63 my ($self, $status_obj) = @_;
64
6cbcb5f7 65 my $node = $self->{nodename};
63f6a08c 66
139a9b90
DM
67 PVE::HA::Config::write_lrm_status($node, $status_obj);
68}
c4a221bc 69
cde77779 70sub is_node_shutdown {
d42219a3
TL
71 my ($self) = @_;
72
cde77779 73 my $shutdown = 0;
d42219a3
TL
74
75 my $code = sub {
76 my $line = shift;
77
cde77779 78 $shutdown = 1 if ($line =~ m/shutdown\.target/);
d42219a3
TL
79 };
80
81 my $cmd = ['/bin/systemctl', 'list-jobs'];
82 eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
83
cde77779 84 return $shutdown;
d42219a3
TL
85}
86
139a9b90
DM
87sub queue_crm_commands {
88 my ($self, $cmd) = @_;
c4a221bc 89
139a9b90
DM
90 return PVE::HA::Config::queue_crm_commands($cmd);
91}
92
93sub read_crm_commands {
94 my ($self) = @_;
95
96 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
97}
98
b83b4ae8 99sub service_config_exists {
714a4016 100 my ($self) = @_;
63f6a08c 101
b83b4ae8
DM
102 return PVE::HA::Config::resources_config_exists();
103}
714a4016 104
b83b4ae8
DM
105sub read_service_config {
106 my ($self) = @_;
ce216792 107
b83b4ae8 108 my $res = PVE::HA::Config::read_resources_config();
63f6a08c 109
ce216792
DM
110 my $vmlist = PVE::Cluster::get_vmlist();
111 my $conf = {};
112
113 foreach my $sid (keys %{$res->{ids}}) {
114 my $d = $res->{ids}->{$sid};
6ca2edcd 115 my (undef, undef, $name) = PVE::HA::Tools::parse_sid($sid);
7a19642e 116 $d->{state} = 'enabled' if !defined($d->{state});
ea4443cc
TL
117 $d->{max_restart} = 1 if !defined($d->{max_restart});
118 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
303a08aa 119 if (PVE::HA::Resources->lookup($d->{type})) {
b47a7a1b 120 if (my $vmd = $vmlist->{ids}->{$name}) {
ce216792 121 if (!$vmd) {
b47a7a1b 122 warn "no such VM '$name'\n";
ce216792
DM
123 } else {
124 $d->{node} = $vmd->{node};
125 $conf->{$sid} = $d;
126 }
127 } else {
128 if (defined($d->{node})) {
129 $conf->{$sid} = $d;
130 } else {
131 warn "service '$sid' without node\n";
132 }
133 }
134 }
135 }
63f6a08c 136
ce216792 137 return $conf;
714a4016
DM
138}
139
8456bde2 140sub change_service_location {
6da27e23 141 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 142
6ca2edcd 143 my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
6da27e23 144
303a08aa
TL
145 if(my $plugin = PVE::HA::Resources->lookup($type)) {
146 my $old = $plugin->config_file($name, $current_node);
147 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
148 rename($old, $new) ||
149 die "rename '$old' to '$new' failed - $!\n";
150 } else {
151 die "implement me";
152 }
8456bde2
DM
153}
154
abc920b4
DM
155sub read_group_config {
156 my ($self) = @_;
157
139a9b90 158 return PVE::HA::Config::read_group_config();
3b996922
DM
159}
160
714a4016
DM
161# this should return a hash containing info
162# what nodes are members and online.
163sub get_node_info {
164 my ($self) = @_;
165
d706ef8b 166 my ($node_info, $quorate) = ({}, 0);
63f6a08c 167
d706ef8b
DM
168 my $nodename = $self->{nodename};
169
170 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
171
172 my $members = PVE::Cluster::get_members();
173
174 foreach my $node (keys %$members) {
175 my $d = $members->{$node};
63f6a08c 176 $node_info->{$node}->{online} = $d->{online};
d706ef8b 177 }
63f6a08c 178
d706ef8b 179 $node_info->{$nodename}->{online} = 1; # local node is always up
63f6a08c 180
d706ef8b 181 return ($node_info, $quorate);
714a4016
DM
182}
183
184sub log {
185 my ($self, $level, $msg) = @_;
186
187 chomp $msg;
188
189 syslog($level, $msg);
190}
191
007fcc8b
DM
192my $last_lock_status = {};
193
194sub get_pve_lock {
195 my ($self, $lockid) = @_;
714a4016 196
007fcc8b 197 my $got_lock = 0;
4d24e7db 198
4d24e7db
DM
199 my $filename = "$lockdir/$lockid";
200
007fcc8b
DM
201 my $last = $last_lock_status->{$lockid} || 0;
202
203 my $ctime = time();
4d24e7db 204
75aca181
DM
205 my $retry = 0;
206 my $retry_timeout = 100; # fixme: what timeout
63f6a08c 207
4d24e7db
DM
208 eval {
209
210 mkdir $lockdir;
211
007fcc8b
DM
212 # pve cluster filesystem not online
213 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
214
75aca181
DM
215 if ($last && (($ctime - $last) < $retry_timeout)) {
216 # send cfs lock update request (utime)
217 if (!utime(0, $ctime, $filename)) {
218 $retry = 1;
007fcc8b 219 die "cfs lock update failed - $!\n";
75aca181 220 }
007fcc8b
DM
221 } else {
222
223 # fixme: wait some time?
224 if (!(mkdir $filename)) {
225 utime 0, 0, $filename; # cfs unlock request
226 die "can't get cfs lock\n";
227 }
228 }
4d24e7db 229
007fcc8b 230 $got_lock = 1;
4d24e7db
DM
231 };
232
007fcc8b
DM
233 my $err = $@;
234
75aca181
DM
235 if ($retry) {
236 # $self->log('err', $err) if $err; # for debugging
237 return 0;
238 }
63f6a08c 239
007fcc8b
DM
240 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
241
17e90af6 242 if (!!$got_lock != !!$last) {
007fcc8b 243 if ($got_lock) {
63f6a08c 244 $self->log('info', "successfully acquired lock '$lockid'");
007fcc8b
DM
245 } else {
246 my $msg = "lost lock '$lockid";
63f6a08c 247 $msg .= " - $err" if $err;
007fcc8b
DM
248 $self->log('err', $msg);
249 }
75aca181
DM
250 } else {
251 # $self->log('err', $err) if $err; # for debugging
007fcc8b
DM
252 }
253
254 return $got_lock;
255}
256
257sub get_ha_manager_lock {
258 my ($self) = @_;
259
007fcc8b 260 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
261}
262
263sub get_ha_agent_lock {
714a4016 264 my ($self, $node) = @_;
63f6a08c 265
f5c29173 266 $node = $self->nodename() if !defined($node);
714a4016 267
f5c29173 268 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
269}
270
271sub quorate {
272 my ($self) = @_;
273
4d24e7db 274 my $quorate = 0;
63f6a08c
TL
275 eval {
276 $quorate = PVE::Cluster::check_cfs_quorum();
4d24e7db 277 };
63f6a08c 278
4d24e7db 279 return $quorate;
714a4016
DM
280}
281
282sub get_time {
283 my ($self) = @_;
284
285 return time();
286}
287
288sub sleep {
289 my ($self, $delay) = @_;
290
291 CORE::sleep($delay);
292}
293
294sub sleep_until {
295 my ($self, $end_time) = @_;
296
297 for (;;) {
298 my $cur_time = time();
299
300 last if $cur_time >= $end_time;
301
302 $self->sleep(1);
303 }
304}
305
306sub loop_start_hook {
307 my ($self) = @_;
308
4d24e7db 309 PVE::Cluster::cfs_update();
63f6a08c 310
714a4016
DM
311 $self->{loop_start} = $self->get_time();
312}
313
314sub loop_end_hook {
315 my ($self) = @_;
316
317 my $delay = $self->get_time() - $self->{loop_start};
63f6a08c 318
714a4016
DM
319 warn "loop take too long ($delay seconds)\n" if $delay > 30;
320}
321
76737af5
DM
322my $watchdog_fh;
323
714a4016
DM
324sub watchdog_open {
325 my ($self) = @_;
326
76737af5
DM
327 die "watchdog already open\n" if defined($watchdog_fh);
328
115805fd
DM
329 $watchdog_fh = IO::Socket::UNIX->new(
330 Type => SOCK_STREAM(),
331 Peer => "/run/watchdog-mux.sock") ||
332 die "unable to open watchdog socket - $!\n";
63f6a08c 333
76737af5 334 $self->log('info', "watchdog active");
714a4016
DM
335}
336
337sub watchdog_update {
338 my ($self, $wfh) = @_;
339
76737af5
DM
340 my $res = $watchdog_fh->syswrite("\0", 1);
341 if (!defined($res)) {
342 $self->log('err', "watchdog update failed - $!\n");
343 return 0;
344 }
345 if ($res != 1) {
346 $self->log('err', "watchdog update failed - write $res bytes\n");
347 return 0;
348 }
349
350 return 1;
714a4016
DM
351}
352
353sub watchdog_close {
354 my ($self, $wfh) = @_;
355
76737af5
DM
356 $watchdog_fh->syswrite("V", 1); # magic watchdog close
357 if (!$watchdog_fh->close()) {
358 $self->log('err', "watchdog close failed - $!");
359 } else {
360 $watchdog_fh = undef;
361 $self->log('info', "watchdog closed (disabled)");
362 }
714a4016
DM
363}
364
022e4e79
DM
365sub upid_wait {
366 my ($self, $upid) = @_;
367
368 my $task = PVE::Tools::upid_decode($upid);
369
370 CORE::sleep(1);
371 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
372 $self->log('debug', "Task still active, waiting");
373 CORE::sleep(1);
374 }
375}
376
0d1d32fb
DM
377sub can_fork {
378 my ($self) = @_;
379
380 return 1;
381}
382
c4a221bc 383sub exec_resource_agent {
6dbf93a0 384 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 385
022e4e79 386 # setup execution environment
63f6a08c 387
022e4e79
DM
388 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
389
390 PVE::INotify::inotify_close();
63f6a08c 391
022e4e79
DM
392 PVE::INotify::inotify_init();
393
394 PVE::Cluster::cfs_update();
63f6a08c 395
022e4e79
DM
396 my $nodename = $self->{nodename};
397
6ca2edcd 398 my (undef, $service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
b47a7a1b 399
303a08aa 400 my $plugin = PVE::HA::Resources->lookup($service_type);
85316fd0
TL
401 if (!$plugin) {
402 $self->log('err', "service type '$service_type' not implemented");
403 return EUNKNOWN_SERVICE_TYPE;
404 }
303a08aa 405
85316fd0
TL
406 if ($service_config->{node} ne $nodename) {
407 $self->log('err', "service '$sid' not on this node");
408 return EWRONG_NODE;
409 }
022e4e79 410
b47a7a1b 411 my $vmid = $service_name;
022e4e79 412
303a08aa 413 my $running = $plugin->check_running($vmid);
022e4e79 414
303a08aa 415 if ($cmd eq 'started') {
022e4e79 416
a89ff919 417 return SUCCESS if $running;
022e4e79
DM
418
419 $self->log("info", "starting service $sid");
420
b865e4cb 421 $plugin->start($self, $vmid);
022e4e79 422
303a08aa 423 $running = $plugin->check_running($vmid);
022e4e79
DM
424
425 if ($running) {
426 $self->log("info", "service status $sid started");
a89ff919 427 return SUCCESS;
022e4e79 428 } else {
4877c031 429 $self->log("warning", "unable to start service $sid");
a89ff919 430 return ERROR;
022e4e79
DM
431 }
432
433 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
434
a89ff919 435 return SUCCESS if !$running;
022e4e79
DM
436
437 $self->log("info", "stopping service $sid");
438
b865e4cb 439 $plugin->shutdown($self, $vmid);
022e4e79 440
303a08aa 441 $running = $plugin->check_running($vmid);
022e4e79
DM
442
443 if (!$running) {
444 $self->log("info", "service status $sid stopped");
a89ff919 445 return SUCCESS;
022e4e79 446 } else {
33f01524 447 $self->log("info", "unable to stop stop service $sid (still running)");
a89ff919 448 return ERROR;
022e4e79
DM
449 }
450
451 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
452
6da27e23 453 my $target = $params[0];
85316fd0
TL
454 if (!defined($target)) {
455 die "$cmd '$sid' failed - missing target\n" if !defined($target);
456 return EINVALID_PARAMETER;
457 }
e319b50c 458
6da27e23
DM
459 if ($service_config->{node} eq $target) {
460 # already there
a89ff919 461 return SUCCESS;
303a08aa 462 }
e319b50c 463
c0255b2c
TL
464 my $online = ($cmd eq 'migrate') ? 1 : 0;
465
303a08aa 466 my $oldconfig = $plugin->config_file($vmid, $nodename);
e319b50c 467
c0255b2c 468 $plugin->migrate($self, $vmid, $target, $online);
e319b50c 469
303a08aa
TL
470 # something went wrong if old config file is still there
471 if (-f $oldconfig) {
472 $self->log("err", "service $sid not moved (migration error)");
a89ff919 473 return ERROR;
6da27e23 474 }
303a08aa 475
a89ff919 476 return SUCCESS;
303a08aa 477
a2881965
TL
478 } elsif ($cmd eq 'error') {
479
63f6a08c 480 if ($running) {
a2881965
TL
481 $self->log("err", "service $sid is in an error state while running");
482 } else {
4877c031 483 $self->log("warning", "service $sid is not running and in an error state");
a2881965 484 }
a89ff919 485 return SUCCESS; # error always succeeds
a2881965 486
022e4e79
DM
487 }
488
85316fd0
TL
489 $self->log("err", "implement me (cmd '$cmd')");
490 return EUNKNOWN_COMMAND;
c4a221bc
DM
491}
492
714a4016 4931;