]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Env/PVE2.pm
1 package PVE
::HA
::Env
::PVE2
;
5 use POSIX
qw(:errno_h :fcntl_h);
11 use PVE
::Cluster
qw(cfs_register_file cfs_read_file cfs_lock_file);
13 use PVE
::RPCEnvironment
;
22 my $lockdir = "/etc/pve/priv/lock";
24 my $manager_status_filename = "/etc/pve/ha/manager_status";
25 my $ha_groups_config = "/etc/pve/ha/groups.cfg";
26 my $ha_resources_config = "/etc/pve/ha/resources.cfg";
29 #cfs_register_file($ha_groups_config,
30 # sub { PVE::HA::Groups->parse_config(@_); },
31 # sub { PVE::HA::Groups->write_config(@_); });
32 #cfs_register_file($ha_resources_config,
33 # sub { PVE::HA::Resources->parse_config(@_); },
34 # sub { PVE::HA::Resources->write_config(@_); });
36 sub read_resources_config
{
39 $raw = PVE
::Tools
::file_get_contents
($ha_resources_config)
40 if -f
$ha_resources_config;
42 return PVE
::HA
::Config
::parse_resources_config
($ha_resources_config, $raw);
45 sub write_resources_config
{
48 my $raw = PVE
::HA
::Resources-
>write_config($ha_resources_config, $cfg);
49 PVE
::Tools
::file_set_contents
($ha_resources_config, $raw);
53 my ($code, $errmsg) = @_;
55 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
56 my $res = PVE
::Cluster
::cfs_lock_storage
("_ha_crm_commands", undef, $code);
59 $errmsg ?
die "$errmsg: $err" : die $err;
65 my ($this, $nodename) = @_;
67 die "missing nodename" if !$nodename;
69 my $class = ref($this) || $this;
71 my $self = bless {}, $class;
73 $self->{nodename
} = $nodename;
81 return $self->{nodename
};
84 sub read_manager_status
{
87 my $filename = $manager_status_filename;
89 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
92 sub write_manager_status
{
93 my ($self, $status_obj) = @_;
95 my $filename = $manager_status_filename;
97 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
100 sub read_lrm_status
{
101 my ($self, $node) = @_;
103 $node = $self->{nodename
} if !defined($node);
105 my $filename = "/etc/pve/nodes/$node/lrm_status";
107 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
110 sub write_lrm_status
{
111 my ($self, $status_obj) = @_;
113 my $node = $self->{nodename
};
115 my $filename = "/etc/pve/nodes/$node/lrm_status";
117 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
120 sub manager_status_exists
{
123 return -f
$manager_status_filename ?
1 : 0;
126 sub read_service_config
{
129 my $res = read_resources_config
();
131 my $vmlist = PVE
::Cluster
::get_vmlist
();
134 foreach my $sid (keys %{$res->{ids
}}) {
135 my $d = $res->{ids
}->{$sid};
136 $d->{state} = 'enabled' if !defined($d->{state});
137 if ($d->{type
} eq 'pvevm') {
138 if (my $vmd = $vmlist->{ids
}->{$d->{name
}}) {
140 warn "no such VM '$d->{name}'\n";
142 $d->{node
} = $vmd->{node
};
146 if (defined($d->{node
})) {
149 warn "service '$sid' without node\n";
158 sub change_service_location
{
159 my ($self, $sid, $node) = @_;
164 sub read_group_config
{
167 # fixme: use cfs_read_file
171 $raw = PVE
::Tools
::file_get_contents
($ha_groups_config)
172 if -f
$ha_groups_config;
174 return PVE
::HA
::Config
::parse_groups_config
($ha_groups_config, $raw);
177 sub queue_crm_commands
{
178 my ($self, $cmd) = @_;
184 my $filename = "/etc/pve/ha/crm_commands";
186 $data = PVE
::Tools
::file_get_contents
($filename);
189 PVE
::Tools
::file_set_contents
($filename, $data);
192 return lock_ha_config
($code);
195 sub read_crm_commands
{
201 my $filename = "/etc/pve/ha/crm_commands";
203 $data = PVE
::Tools
::file_get_contents
($filename);
204 PVE
::Tools
::file_set_contents
($filename, '');
210 return lock_ha_config
($code);
213 # this should return a hash containing info
214 # what nodes are members and online.
218 my ($node_info, $quorate) = ({}, 0);
220 my $nodename = $self->{nodename
};
222 $quorate = PVE
::Cluster
::check_cfs_quorum
(1) || 0;
224 my $members = PVE
::Cluster
::get_members
();
226 foreach my $node (keys %$members) {
227 my $d = $members->{$node};
228 $node_info->{$node}->{online
} = $d->{online
};
231 $node_info->{$nodename}->{online
} = 1; # local node is always up
233 return ($node_info, $quorate);
237 my ($self, $level, $msg) = @_;
241 syslog
($level, $msg);
244 my $last_lock_status = {};
247 my ($self, $lockid) = @_;
251 my $filename = "$lockdir/$lockid";
253 my $last = $last_lock_status->{$lockid} || 0;
261 # pve cluster filesystem not online
262 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d
$lockdir;
264 if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
265 utime(0, $ctime, $filename) || # cfs lock update request
266 die "cfs lock update failed - $!\n";
269 # fixme: wait some time?
270 if (!(mkdir $filename)) {
271 utime 0, 0, $filename; # cfs unlock request
272 die "can't get cfs lock\n";
281 $last_lock_status->{$lockid} = $got_lock ?
$ctime : 0;
283 if (!!$got_lock != !!$last) {
285 $self->log('info', "successfully aquired lock '$lockid'");
287 my $msg = "lost lock '$lockid";
288 $msg .= " - $err" if $err;
289 $self->log('err', $msg);
296 sub get_ha_manager_lock
{
299 return $self->get_pve_lock("ha_manager_lock");
302 sub get_ha_agent_lock
{
305 my $node = $self->nodename();
307 return $self->get_pve_lock("ha_agent_${node}_lock");
310 sub test_ha_agent_lock
{
311 my ($self, $node) = @_;
313 my $lockid = "ha_agent_${node}_lock";
314 my $filename = "$lockdir/$lockid";
315 my $res = $self->get_pve_lock($lockid);
316 rmdir $filename if $res; # cfs unlock
326 $quorate = PVE
::Cluster
::check_cfs_quorum
();
339 my ($self, $delay) = @_;
345 my ($self, $end_time) = @_;
348 my $cur_time = time();
350 last if $cur_time >= $end_time;
356 sub loop_start_hook
{
359 PVE
::Cluster
::cfs_update
();
361 $self->{loop_start
} = $self->get_time();
367 my $delay = $self->get_time() - $self->{loop_start
};
369 warn "loop take too long ($delay seconds)\n" if $delay > 30;
377 die "watchdog already open\n" if defined($watchdog_fh);
379 $watchdog_fh = IO
::Socket
::UNIX-
>new(
380 Type
=> SOCK_STREAM
(),
381 Peer
=> "/run/watchdog-mux.sock") ||
382 die "unable to open watchdog socket - $!\n";
384 $self->log('info', "watchdog active");
387 sub watchdog_update
{
388 my ($self, $wfh) = @_;
390 my $res = $watchdog_fh->syswrite("\0", 1);
391 if (!defined($res)) {
392 $self->log('err', "watchdog update failed - $!\n");
396 $self->log('err', "watchdog update failed - write $res bytes\n");
404 my ($self, $wfh) = @_;
406 $watchdog_fh->syswrite("V", 1); # magic watchdog close
407 if (!$watchdog_fh->close()) {
408 $self->log('err', "watchdog close failed - $!");
410 $watchdog_fh = undef;
411 $self->log('info', "watchdog closed (disabled)");
416 my ($self, $upid) = @_;
418 my $task = PVE
::Tools
::upid_decode
($upid);
421 while (PVE
::ProcFSTools
::check_process_running
($task->{pid
}, $task->{pstart
})) {
422 $self->log('debug', "Task still active, waiting");
427 sub exec_resource_agent
{
428 my ($self, $sid, $service_config, $cmd, @params) = @_;
430 # setup execution environment
432 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
434 PVE
::INotify
::inotify_close
();
436 PVE
::INotify
::inotify_init
();
438 PVE
::Cluster
::cfs_update
();
440 my $nodename = $self->{nodename
};
442 # fixme: return valid_exit code (instead of using die) ?
444 my $service_type = $service_config->{type
};
446 die "service type '$service_type'not implemented" if $service_type ne 'pvevm';
448 my $vmid = $service_config->{name
};
450 my $running = PVE
::QemuServer
::check_running
($vmid, 1);
452 if ($cmd eq 'started') {
454 # fixme: return valid_exit code
455 die "service '$sid' not on this node" if $service_config->{node
} ne $nodename;
457 # fixme: count failures
459 return 0 if $running;
461 $self->log("info", "starting service $sid");
463 my $upid = PVE
::API2
::Qemu-
>vm_start({node
=> $nodename, vmid
=> $vmid});
464 $self->upid_wait($upid);
466 $running = PVE
::QemuServer
::check_running
($vmid, 1);
469 $self->log("info", "service status $sid started");
472 $self->log("info", "unable to start service $sid");
476 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
478 # fixme: return valid_exit code
479 die "service '$sid' not on this node" if $service_config->{node
} ne $nodename;
481 return 0 if !$running;
483 $self->log("info", "stopping service $sid");
485 my $timeout = 60; # fixme: make this configurable
494 my $upid = PVE
::API2
::Qemu-
>vm_shutdown($param);
495 $self->upid_wait($upid);
497 $running = PVE
::QemuServer
::check_running
($vmid, 1);
500 $self->log("info", "service status $sid stopped");
506 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
512 die "implement me (cmd '$cmd')";