use PVE::SafeSyslog;
use PVE::Tools;
-use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_lock_file);
+use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
+use PVE::INotify;
+use PVE::RPCEnvironment;
-use PVE::HA::Tools;
+use PVE::HA::Tools ':exit_codes';
use PVE::HA::Env;
use PVE::HA::Config;
+use PVE::HA::FenceConfig;
+use PVE::HA::Resources;
+use PVE::HA::Resources::PVEVM;
+use PVE::HA::Resources::PVECT;
-my $lockdir = "/etc/pve/priv/lock";
+PVE::HA::Resources::PVEVM->register();
+PVE::HA::Resources::PVECT->register();
-my $manager_status_filename = "/etc/pve/ha/manager_status";
-my $ha_groups_config = "/etc/pve/ha/groups.cfg";
-my $ha_resources_config = "/etc/pve/ha/resources.cfg";
+PVE::HA::Resources->init();
-#cfs_register_file($ha_groups_config,
-# sub { PVE::HA::Groups->parse_config(@_); },
-# sub { PVE::HA::Groups->write_config(@_); });
+my $lockdir = "/etc/pve/priv/lock";
sub new {
my ($this, $nodename) = @_;
return $self->{nodename};
}
+sub hardware {
+ my ($self) = @_;
+
+ die "hardware is for testing and simulation only";
+}
+
sub read_manager_status {
my ($self) = @_;
-
- my $filename = $manager_status_filename;
- return PVE::HA::Tools::read_json_from_file($filename, {});
+ return PVE::HA::Config::read_manager_status();
}
sub write_manager_status {
my ($self, $status_obj) = @_;
-
- my $filename = $manager_status_filename;
- PVE::HA::Tools::write_json_to_file($filename, $status_obj);
+ PVE::HA::Config::write_manager_status($status_obj);
}
sub read_lrm_status {
$node = $self->{nodename} if !defined($node);
- my $filename = "/etc/pve/nodes/$node/lrm_status";
-
- return PVE::HA::Tools::read_json_from_file($filename, {});
+ return PVE::HA::Config::read_lrm_status($node);
}
sub write_lrm_status {
my $node = $self->{nodename};
- my $filename = "/etc/pve/nodes/$node/lrm_status";
+ PVE::HA::Config::write_lrm_status($node, $status_obj);
+}
+
+sub is_node_shutdown {
+ my ($self) = @_;
+
+ my $shutdown = 0;
+
+ my $code = sub {
+ my $line = shift;
+
+ $shutdown = 1 if ($line =~ m/shutdown\.target/);
+ };
+
+ my $cmd = ['/bin/systemctl', 'list-jobs'];
+ eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
+
+ return $shutdown;
+}
+
+sub queue_crm_commands {
+ my ($self, $cmd) = @_;
- PVE::HA::Tools::write_json_to_file($filename, $status_obj);
+ return PVE::HA::Config::queue_crm_commands($cmd);
}
-sub manager_status_exists {
+sub read_crm_commands {
my ($self) = @_;
-
- return -f $manager_status_filename ? 1 : 0;
+
+ return PVE::HA::Config::read_crm_commands();
}
sub read_service_config {
my ($self) = @_;
- # fixme: use cfs_read_file
-
- my $raw = '';
-
- $raw = PVE::Tools::file_get_contents($ha_resources_config)
- if -f $ha_resources_config;
-
- my $res = PVE::HA::Config::parse_resources_config($ha_resources_config, $raw);
-
- my $vmlist = PVE::Cluster::get_vmlist();
- my $conf = {};
-
- foreach my $sid (keys %{$res->{ids}}) {
- my $d = $res->{ids}->{$sid};
- $d->{state} = 'enabled' if !defined($d->{state});
- if ($d->{type} eq 'pvevm') {
- if (my $vmd = $vmlist->{ids}->{$d->{name}}) {
- if (!$vmd) {
- warn "no such VM '$d->{name}'\n";
- } else {
- $d->{node} = $vmd->{node};
- $conf->{$sid} = $d;
- }
- } else {
- if (defined($d->{node})) {
- $conf->{$sid} = $d;
- } else {
- warn "service '$sid' without node\n";
- }
- }
- }
- }
-
- return $conf;
+ return PVE::HA::Config::read_and_check_resources_config();
}
-sub change_service_location {
- my ($self, $sid, $node) = @_;
+sub read_fence_config {
+ my ($self) = @_;
- die "implement me";
+ return PVE::HA::Config::read_fence_config();
}
-sub read_group_config {
+sub fencing_mode {
my ($self) = @_;
- # fixme: use cfs_read_file
-
- my $raw = '';
+ my $datacenterconfig = cfs_read_file('datacenter.cfg');
+
+ return 'watchdog' if !$datacenterconfig->{fencing};
- $raw = PVE::Tools::file_get_contents($ha_groups_config)
- if -f $ha_groups_config;
-
- return PVE::HA::Config::parse_groups_config($ha_groups_config, $raw);
+ return $datacenterconfig->{fencing};
}
-sub queue_crm_commands {
- my ($self, $cmd) = @_;
+sub exec_fence_agent {
+ my ($self, $agent, $node, @param) = @_;
- chomp $cmd;
+ # setup execution environment
+ $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
- my $code = sub {
- my $data = '';
- my $filename = "/etc/pve/ha/crm_commands";
- if (-f $filename) {
- $data = PVE::Tools::file_get_contents($filename);
- }
- $data .= "$cmd\n";
- PVE::Tools::file_set_contents($filename, $data);
- };
+ my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
- # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
- my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
- die $@ if $@;
- return $res;
+ exec($cmd);
+ exit -1;
}
-sub read_crm_commands {
- my ($self) = @_;
+# this is only allowed by the master to recover a _fenced_ service
+sub steal_service {
+ my ($self, $sid, $current_node, $new_node) = @_;
- my $code = sub {
- my $data = '';
+ my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
- my $filename = "/etc/pve/ha/crm_commands";
- if (-f $filename) {
- $data = PVE::Tools::file_get_contents($filename);
- PVE::Tools::file_set_contents($filename, '');
- }
+ if(my $plugin = PVE::HA::Resources->lookup($type)) {
+ my $old = $plugin->config_file($name, $current_node);
+ my $new = $plugin->config_file($name, $new_node);
+ rename($old, $new) ||
+ die "rename '$old' to '$new' failed - $!\n";
+ } else {
+ die "implement me";
+ }
+}
- return $data;
- };
+sub read_group_config {
+ my ($self) = @_;
- # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
- my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
- die $@ if $@;
- return $res;
+ return PVE::HA::Config::read_group_config();
}
# this should return a hash containing info
my ($self) = @_;
my ($node_info, $quorate) = ({}, 0);
-
+
my $nodename = $self->{nodename};
$quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
foreach my $node (keys %$members) {
my $d = $members->{$node};
- $node_info->{$node}->{online} = $d->{online};
+ $node_info->{$node}->{online} = $d->{online};
}
-
+
$node_info->{$nodename}->{online} = 1; # local node is always up
-
+
return ($node_info, $quorate);
}
syslog($level, $msg);
}
-my $last_lock_status = {};
+sub sendmail {
+ my ($self, $subject, $text) = @_;
+
+ my $mailfrom = 'root@' . $self->nodename();
+ my $mailto = 'root@localhost';
+
+ PVE::Tools::sendmail($mailto, $subject, $text, undef, $mailfrom);
+}
+
+my $last_lock_status_hash = {};
sub get_pve_lock {
my ($self, $lockid) = @_;
my $filename = "$lockdir/$lockid";
- my $last = $last_lock_status->{$lockid} || 0;
+ $last_lock_status_hash->{$lockid} //= { lock_time => 0, got_lock => 0};
+ my $last = $last_lock_status_hash->{$lockid};
my $ctime = time();
+ my $last_lock_time = $last->{lock_time} // 0;
+ my $last_got_lock = $last->{got_lock};
+
+ my $retry_timeout = 100; # fixme: what timeout
eval {
# pve cluster filesystem not online
die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
- if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
- utime(0, $ctime, $filename) || # cfs lock update request
- die "cfs lock update failed - $!\n";
- } else {
+ # try cfs lock update request (utime)
+ if (utime(0, $ctime, $filename)) {
+ $got_lock = 1;
+ return;
+ }
+
+ if ($last_lock_time && (($ctime - $last_lock_time) < $retry_timeout)) {
+ die "cfs lock update failed - $!\n";
+ }
- # fixme: wait some time?
- if (!(mkdir $filename)) {
- utime 0, 0, $filename; # cfs unlock request
- die "can't get cfs lock\n";
- }
+ if (mkdir $filename) {
+ $got_lock = 1;
+ return;
}
- $got_lock = 1;
+ utime 0, 0, $filename; # cfs unlock request
+ die "can't get cfs lock\n";
};
my $err = $@;
- $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
+ #$self->log('err', $err) if $err; # for debugging
- if (!!$got_lock != !!$last) {
+ $last->{got_lock} = $got_lock;
+ $last->{lock_time} = $ctime if $got_lock;
+
+ if (!!$got_lock != !!$last_got_lock) {
if ($got_lock) {
- $self->log('info', "successfully aquired lock '$lockid'");
+ $self->log('info', "successfully acquired lock '$lockid'");
} else {
my $msg = "lost lock '$lockid";
- $msg .= " - $err" if $err;
+ $msg .= " - $err" if $err;
$self->log('err', $msg);
}
}
return $self->get_pve_lock("ha_manager_lock");
}
-sub get_ha_agent_lock {
+# release the cluster wide manager lock.
+# when released another CRM may step up and get the lock, thus this should only
+# get called when shutting down/deactivating the current master
+sub release_ha_manager_lock {
my ($self) = @_;
-
- my $node = $self->nodename();
- return $self->get_pve_lock("ha_agent_${node}_lock");
+ return rmdir("$lockdir/ha_manager_lock");
}
-sub test_ha_agent_lock {
+sub get_ha_agent_lock {
my ($self, $node) = @_;
-
- my $lockid = "ha_agent_${node}_lock";
- my $filename = "$lockdir/$lockid";
- my $res = $self->get_pve_lock($lockid);
- rmdir $filename if $res; # cfs unlock
- return $res;
+ $node = $self->nodename() if !defined($node);
+
+ return $self->get_pve_lock("ha_agent_${node}_lock");
+}
+
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+ my ($self) = @_;
+
+ my $node = $self->nodename();
+
+ return rmdir("$lockdir/ha_agent_${node}_lock");
}
sub quorate {
my ($self) = @_;
my $quorate = 0;
- eval {
- $quorate = PVE::Cluster::check_cfs_quorum();
+ eval {
+ $quorate = PVE::Cluster::check_cfs_quorum();
};
-
+
return $quorate;
}
my ($self) = @_;
PVE::Cluster::cfs_update();
-
+
$self->{loop_start} = $self->get_time();
}
my ($self) = @_;
my $delay = $self->get_time() - $self->{loop_start};
-
+
warn "loop take too long ($delay seconds)\n" if $delay > 30;
}
Type => SOCK_STREAM(),
Peer => "/run/watchdog-mux.sock") ||
die "unable to open watchdog socket - $!\n";
-
+
$self->log('info', "watchdog active");
}
}
}
-sub exec_resource_agent {
- my ($self, $sid, $service_config, $cmd, @params) = @_;
+sub after_fork {
+ my ($self) = @_;
+
+ # close inherited inotify FD from parent and reopen our own
+ PVE::INotify::inotify_close();
+ PVE::INotify::inotify_init();
+
+ PVE::Cluster::cfs_update();
+}
+
+sub get_max_workers {
+ my ($self) = @_;
+
+ my $datacenterconfig = cfs_read_file('datacenter.cfg');
- die "implement me";
+ return $datacenterconfig->{max_workers} || 4;
}
1;