]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
ha-manager status: include service state
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
abc920b4 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016
DM
14
15use PVE::HA::Tools;
16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79
DM
19use PVE::QemuServer;
20use PVE::API2::Qemu;
21
007fcc8b
DM
22my $lockdir = "/etc/pve/priv/lock";
23
ce216792 24my $manager_status_filename = "/etc/pve/ha/manager_status";
7a19642e
DM
25my $ha_groups_config = "/etc/pve/ha/groups.cfg";
26my $ha_resources_config = "/etc/pve/ha/resources.cfg";
abc920b4 27
95ea5d67 28# fixme:
6cbcb5f7
DM
29#cfs_register_file($ha_groups_config,
30# sub { PVE::HA::Groups->parse_config(@_); },
31# sub { PVE::HA::Groups->write_config(@_); });
95ea5d67
DM
32#cfs_register_file($ha_resources_config,
33# sub { PVE::HA::Resources->parse_config(@_); },
34# sub { PVE::HA::Resources->write_config(@_); });
35
36sub read_resources_config {
37 my $raw = '';
38
39 $raw = PVE::Tools::file_get_contents($ha_resources_config)
40 if -f $ha_resources_config;
41
42 return PVE::HA::Config::parse_resources_config($ha_resources_config, $raw);
43}
44
45sub write_resources_config {
46 my ($cfg) = @_;
47
48 my $raw = PVE::HA::Resources->write_config($ha_resources_config, $cfg);
49 PVE::Tools::file_set_contents($ha_resources_config, $raw);
50}
51
52sub lock_ha_config {
53 my ($code, $errmsg) = @_;
54
55 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
56 my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
57 my $err = $@;
58 if ($err) {
59 $errmsg ? die "$errmsg: $err" : die $err;
60 }
61 return $res;
62}
714a4016
DM
63
64sub new {
65 my ($this, $nodename) = @_;
66
67 die "missing nodename" if !$nodename;
68
69 my $class = ref($this) || $this;
70
71 my $self = bless {}, $class;
72
73 $self->{nodename} = $nodename;
74
75 return $self;
76}
77
78sub nodename {
79 my ($self) = @_;
80
81 return $self->{nodename};
82}
83
84sub read_manager_status {
85 my ($self) = @_;
86
87 my $filename = $manager_status_filename;
88
89 return PVE::HA::Tools::read_json_from_file($filename, {});
90}
91
92sub write_manager_status {
93 my ($self, $status_obj) = @_;
94
95 my $filename = $manager_status_filename;
96
97 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
98}
99
c4a221bc
DM
100sub read_lrm_status {
101 my ($self, $node) = @_;
102
103 $node = $self->{nodename} if !defined($node);
104
105 my $filename = "/etc/pve/nodes/$node/lrm_status";
106
107 return PVE::HA::Tools::read_json_from_file($filename, {});
108}
109
110sub write_lrm_status {
111 my ($self, $status_obj) = @_;
112
6cbcb5f7 113 my $node = $self->{nodename};
c4a221bc
DM
114
115 my $filename = "/etc/pve/nodes/$node/lrm_status";
116
117 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
118}
119
ec046210 120sub service_config_exists {
714a4016 121 my ($self) = @_;
ec046210
DM
122
123 return -f $ha_resources_config ? 1 : 0;
714a4016
DM
124}
125
126sub read_service_config {
127 my ($self) = @_;
128
95ea5d67 129 my $res = read_resources_config();
ce216792
DM
130
131 my $vmlist = PVE::Cluster::get_vmlist();
132 my $conf = {};
133
134 foreach my $sid (keys %{$res->{ids}}) {
135 my $d = $res->{ids}->{$sid};
b47a7a1b 136 my $name = PVE::HA::Tools::parse_sid($sid);
7a19642e 137 $d->{state} = 'enabled' if !defined($d->{state});
ce216792 138 if ($d->{type} eq 'pvevm') {
b47a7a1b 139 if (my $vmd = $vmlist->{ids}->{$name}) {
ce216792 140 if (!$vmd) {
b47a7a1b 141 warn "no such VM '$name'\n";
ce216792
DM
142 } else {
143 $d->{node} = $vmd->{node};
144 $conf->{$sid} = $d;
145 }
146 } else {
147 if (defined($d->{node})) {
148 $conf->{$sid} = $d;
149 } else {
150 warn "service '$sid' without node\n";
151 }
152 }
153 }
154 }
155
156 return $conf;
714a4016
DM
157}
158
8456bde2 159sub change_service_location {
6da27e23 160 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 161
6da27e23
DM
162 my ($type, $name) = PVE::HA::Tools::parse_sid($sid);
163
164 if ($type eq 'pvevm') {
165 my $old = PVE::QemuServer::config_file($name, $current_node);
166 my $new = PVE::QemuServer::config_file($name, $new_node);
167 rename($old, $new) ||
168 die "rename '$old' to '$new' failed - $!\n";
169 } else {
170 die "implement me";
171 }
8456bde2
DM
172}
173
abc920b4
DM
174sub read_group_config {
175 my ($self) = @_;
176
ce216792
DM
177 # fixme: use cfs_read_file
178
179 my $raw = '';
180
181 $raw = PVE::Tools::file_get_contents($ha_groups_config)
182 if -f $ha_groups_config;
183
184 return PVE::HA::Config::parse_groups_config($ha_groups_config, $raw);
abc920b4
DM
185}
186
3b996922
DM
187sub queue_crm_commands {
188 my ($self, $cmd) = @_;
189
ce216792
DM
190 chomp $cmd;
191
192 my $code = sub {
193 my $data = '';
194 my $filename = "/etc/pve/ha/crm_commands";
195 if (-f $filename) {
196 $data = PVE::Tools::file_get_contents($filename);
197 }
198 $data .= "$cmd\n";
199 PVE::Tools::file_set_contents($filename, $data);
200 };
201
95ea5d67 202 return lock_ha_config($code);
3b996922
DM
203}
204
205sub read_crm_commands {
206 my ($self) = @_;
207
ce216792
DM
208 my $code = sub {
209 my $data = '';
210
211 my $filename = "/etc/pve/ha/crm_commands";
212 if (-f $filename) {
213 $data = PVE::Tools::file_get_contents($filename);
214 PVE::Tools::file_set_contents($filename, '');
215 }
216
217 return $data;
218 };
219
95ea5d67 220 return lock_ha_config($code);
3b996922
DM
221}
222
714a4016
DM
223# this should return a hash containing info
224# what nodes are members and online.
225sub get_node_info {
226 my ($self) = @_;
227
d706ef8b
DM
228 my ($node_info, $quorate) = ({}, 0);
229
230 my $nodename = $self->{nodename};
231
232 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
233
234 my $members = PVE::Cluster::get_members();
235
236 foreach my $node (keys %$members) {
237 my $d = $members->{$node};
238 $node_info->{$node}->{online} = $d->{online};
239 }
240
241 $node_info->{$nodename}->{online} = 1; # local node is always up
242
243 return ($node_info, $quorate);
714a4016
DM
244}
245
246sub log {
247 my ($self, $level, $msg) = @_;
248
249 chomp $msg;
250
251 syslog($level, $msg);
252}
253
007fcc8b
DM
254my $last_lock_status = {};
255
256sub get_pve_lock {
257 my ($self, $lockid) = @_;
714a4016 258
007fcc8b 259 my $got_lock = 0;
4d24e7db 260
4d24e7db
DM
261 my $filename = "$lockdir/$lockid";
262
007fcc8b
DM
263 my $last = $last_lock_status->{$lockid} || 0;
264
265 my $ctime = time();
4d24e7db
DM
266
267 eval {
268
269 mkdir $lockdir;
270
007fcc8b
DM
271 # pve cluster filesystem not online
272 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
273
274 if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
275 utime(0, $ctime, $filename) || # cfs lock update request
276 die "cfs lock update failed - $!\n";
277 } else {
278
279 # fixme: wait some time?
280 if (!(mkdir $filename)) {
281 utime 0, 0, $filename; # cfs unlock request
282 die "can't get cfs lock\n";
283 }
284 }
4d24e7db 285
007fcc8b 286 $got_lock = 1;
4d24e7db
DM
287 };
288
007fcc8b
DM
289 my $err = $@;
290
291 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
292
17e90af6 293 if (!!$got_lock != !!$last) {
007fcc8b
DM
294 if ($got_lock) {
295 $self->log('info', "successfully aquired lock '$lockid'");
296 } else {
297 my $msg = "lost lock '$lockid";
298 $msg .= " - $err" if $err;
299 $self->log('err', $msg);
300 }
301 }
302
303 return $got_lock;
304}
305
306sub get_ha_manager_lock {
307 my ($self) = @_;
308
007fcc8b 309 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
310}
311
312sub get_ha_agent_lock {
714a4016 313 my ($self, $node) = @_;
007fcc8b 314
f5c29173 315 $node = $self->nodename() if !defined($node);
714a4016 316
f5c29173 317 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
318}
319
320sub quorate {
321 my ($self) = @_;
322
4d24e7db
DM
323 my $quorate = 0;
324 eval {
325 $quorate = PVE::Cluster::check_cfs_quorum();
326 };
327
328 return $quorate;
714a4016
DM
329}
330
331sub get_time {
332 my ($self) = @_;
333
334 return time();
335}
336
337sub sleep {
338 my ($self, $delay) = @_;
339
340 CORE::sleep($delay);
341}
342
343sub sleep_until {
344 my ($self, $end_time) = @_;
345
346 for (;;) {
347 my $cur_time = time();
348
349 last if $cur_time >= $end_time;
350
351 $self->sleep(1);
352 }
353}
354
355sub loop_start_hook {
356 my ($self) = @_;
357
4d24e7db
DM
358 PVE::Cluster::cfs_update();
359
714a4016
DM
360 $self->{loop_start} = $self->get_time();
361}
362
363sub loop_end_hook {
364 my ($self) = @_;
365
366 my $delay = $self->get_time() - $self->{loop_start};
367
368 warn "loop take too long ($delay seconds)\n" if $delay > 30;
369}
370
76737af5
DM
371my $watchdog_fh;
372
714a4016
DM
373sub watchdog_open {
374 my ($self) = @_;
375
76737af5
DM
376 die "watchdog already open\n" if defined($watchdog_fh);
377
115805fd
DM
378 $watchdog_fh = IO::Socket::UNIX->new(
379 Type => SOCK_STREAM(),
380 Peer => "/run/watchdog-mux.sock") ||
381 die "unable to open watchdog socket - $!\n";
382
76737af5 383 $self->log('info', "watchdog active");
714a4016
DM
384}
385
386sub watchdog_update {
387 my ($self, $wfh) = @_;
388
76737af5
DM
389 my $res = $watchdog_fh->syswrite("\0", 1);
390 if (!defined($res)) {
391 $self->log('err', "watchdog update failed - $!\n");
392 return 0;
393 }
394 if ($res != 1) {
395 $self->log('err', "watchdog update failed - write $res bytes\n");
396 return 0;
397 }
398
399 return 1;
714a4016
DM
400}
401
402sub watchdog_close {
403 my ($self, $wfh) = @_;
404
76737af5
DM
405 $watchdog_fh->syswrite("V", 1); # magic watchdog close
406 if (!$watchdog_fh->close()) {
407 $self->log('err', "watchdog close failed - $!");
408 } else {
409 $watchdog_fh = undef;
410 $self->log('info', "watchdog closed (disabled)");
411 }
714a4016
DM
412}
413
022e4e79
DM
414sub upid_wait {
415 my ($self, $upid) = @_;
416
417 my $task = PVE::Tools::upid_decode($upid);
418
419 CORE::sleep(1);
420 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
421 $self->log('debug', "Task still active, waiting");
422 CORE::sleep(1);
423 }
424}
425
0d1d32fb
DM
426sub can_fork {
427 my ($self) = @_;
428
429 return 1;
430}
431
c4a221bc 432sub exec_resource_agent {
6dbf93a0 433 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 434
022e4e79
DM
435 # setup execution environment
436
437 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
438
439 PVE::INotify::inotify_close();
440
441 PVE::INotify::inotify_init();
442
443 PVE::Cluster::cfs_update();
444
445 my $nodename = $self->{nodename};
446
447 # fixme: return valid_exit code (instead of using die) ?
448
b47a7a1b
DM
449 my ($service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
450
022e4e79
DM
451 die "service type '$service_type'not implemented" if $service_type ne 'pvevm';
452
b47a7a1b 453 my $vmid = $service_name;
022e4e79
DM
454
455 my $running = PVE::QemuServer::check_running($vmid, 1);
456
457 if ($cmd eq 'started') {
458
459 # fixme: return valid_exit code
460 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
461
462 # fixme: count failures
463
464 return 0 if $running;
465
466 $self->log("info", "starting service $sid");
467
468 my $upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $vmid});
469 $self->upid_wait($upid);
470
471 $running = PVE::QemuServer::check_running($vmid, 1);
472
473 if ($running) {
474 $self->log("info", "service status $sid started");
475 return 0;
476 } else {
477 $self->log("info", "unable to start service $sid");
478 return 1;
479 }
480
481 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
482
483 # fixme: return valid_exit code
484 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
485
486 return 0 if !$running;
487
488 $self->log("info", "stopping service $sid");
489
490 my $timeout = 60; # fixme: make this configurable
491
492 my $param = {
493 node => $nodename,
494 vmid => $vmid,
495 timeout => $timeout,
496 forceStop => 1,
497 };
498
499 my $upid = PVE::API2::Qemu->vm_shutdown($param);
500 $self->upid_wait($upid);
501
502 $running = PVE::QemuServer::check_running($vmid, 1);
503
504 if (!$running) {
505 $self->log("info", "service status $sid stopped");
506 return 0;
507 } else {
508 return 1;
509 }
510
511 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
512
6da27e23
DM
513 my $target = $params[0];
514 die "$cmd '$sid' failed - missing target\n" if !defined($target);
e319b50c
DM
515
516 # fixme: return valid_exit code
517 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
6da27e23
DM
518
519 if ($service_config->{node} eq $target) {
520 # already there
521 return 0;
522 }
523
524 if (!$running) {
e319b50c 525 $self->change_service_location($sid, $nodename, $target);
6da27e23
DM
526 $self->log("info", "service $sid moved to node '$target'");
527 return 0;
528 } else {
529 # we alwas do live migration if VM is online
e319b50c 530
e319b50c
DM
531 my $params = {
532 node => $nodename,
533 vmid => $vmid,
534 target => $target,
535 online => 1,
536 };
537
538 my $oldconfig = PVE::QemuServer::config_file($vmid, $nodename);
539
540 my $upid = PVE::API2::Qemu->migrate_vm($params);
541 $self->upid_wait($upid);
542
543 # something went wrong if old config file is still there
544 if (-f $oldconfig) {
545 $self->log("err", "service $sid not moved (migration error)");
546 return 1;
547 }
548
549 return 0;
6da27e23 550 }
022e4e79
DM
551
552 }
553
554 die "implement me (cmd '$cmd')";
c4a221bc
DM
555}
556
714a4016 5571;