Env, HW: add HW fencing related functions

[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm

index 3709a17f7cc93aacf1eaae8ff71e564b2c12e4fc..37823eef000407d9a4fadbed0441bac754fe7787 100644 (file)
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -15,7 +15,15 @@ use PVE::RPCEnvironment;
  use PVE::HA::Tools ':exit_codes';
  use PVE::HA::Env;
  use PVE::HA::Config;
+use PVE::HA::FenceConfig;
+use PVE::HA::Resources;
+use PVE::HA::Resources::PVEVM;
+use PVE::HA::Resources::PVECT;
  
+PVE::HA::Resources::PVEVM->register();
+PVE::HA::Resources::PVECT->register();
+
+PVE::HA::Resources->init();
  
  my $lockdir = "/etc/pve/priv/lock";
  
@@ -39,6 +47,12 @@ sub nodename {
      return $self->{nodename};
  }
  
+sub hardware {
+    my ($self) = @_;
+
+    die "hardware is for testing and simulation only";
+}
+
  sub read_manager_status {
      my ($self) = @_;
  
@@ -96,12 +110,6 @@ sub read_crm_commands {
      return PVE::HA::Config::read_crm_commands();
  }
  
-sub service_config_exists {
-    my ($self) = @_;
-
-    return PVE::HA::Config::resources_config_exists();
-}
-
  sub read_service_config {
      my ($self) = @_;
  
@@ -137,7 +145,36 @@ sub read_service_config {
      return $conf;
  }
  
-sub change_service_location {
+sub read_fence_config {
+    my ($self) = @_;
+
+    return PVE::HA::Config::read_fence_config();
+}
+
+sub fencing_mode {
+    my ($self) = @_;
+
+    my $datacenterconfig = cfs_read_file('datacenter.cfg');
+
+    return 'watchdog' if !$datacenterconfig->{fencing};
+
+    return $datacenterconfig->{fencing};
+}
+
+sub exec_fence_agent {
+    my ($self, $agent, $node, @param) = @_;
+
+    # setup execution environment
+    $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
+
+    my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
+
+    exec($cmd);
+    exit -1;
+}
+
+# this is only allowed by the master to recover a _fenced_ service
+sub steal_service {
      my ($self, $sid, $current_node, $new_node) = @_;
  
      my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
@@ -260,6 +297,15 @@ sub get_ha_manager_lock {
      return $self->get_pve_lock("ha_manager_lock");
  }
  
+# release the cluster wide manager lock.
+# when released another CRM may step up and get the lock, thus this should only
+# get called when shutting down/deactivating the current master
+sub release_ha_manager_lock {
+    my ($self) = @_;
+
+    return rmdir("$lockdir/ha_manager_lock");
+}
+
  sub get_ha_agent_lock {
      my ($self, $node) = @_;
  
@@ -268,6 +314,17 @@ sub get_ha_agent_lock {
      return $self->get_pve_lock("ha_agent_${node}_lock");
  }
  
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+    my ($self) = @_;
+
+    my $node = $self->nodename();
+
+    return rmdir("$lockdir/ha_agent_${node}_lock");
+}
+
  sub quorate {
      my ($self) = @_;
  
@@ -362,152 +419,22 @@ sub watchdog_close {
      }
  }
  
-sub upid_wait {
-    my ($self, $upid) = @_;
-
-    my $task = PVE::Tools::upid_decode($upid);
-
-    CORE::sleep(1);
-    while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
-       $self->log('debug', "Task still active, waiting");
-       CORE::sleep(1);
-    }
-}
-
-sub can_fork {
+sub after_fork {
      my ($self) = @_;
  
-    return 1;
-}
-
-sub exec_resource_agent {
-    my ($self, $sid, $service_config, $cmd, @params) = @_;
-
-    # setup execution environment
-
-    $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
-
+    # close inherited inotify FD from parent and reopen our own
      PVE::INotify::inotify_close();
-
      PVE::INotify::inotify_init();
  
      PVE::Cluster::cfs_update();
+}
  
-    my $nodename = $self->{nodename};
-
-    my (undef, $service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
-
-    my $plugin = PVE::HA::Resources->lookup($service_type);
-    if (!$plugin) {
-       $self->log('err', "service type '$service_type' not implemented");
-       return EUNKNOWN_SERVICE_TYPE;
-    }
-
-    if ($service_config->{node} ne $nodename) {
-       $self->log('err', "service '$sid' not on this node");
-       return EWRONG_NODE;
-    }
-
-    my $vmid = $service_name;
-
-    my $running = $plugin->check_running($vmid);
-
-    if ($cmd eq 'started') {
-
-       return SUCCESS if $running;
-
-       $self->log("info", "starting service $sid");
-
-       my $params = {
-           node => $nodename,
-           vmid => $vmid
-       };
-
-       $plugin->start($self, $params);
-
-       $running = $plugin->check_running($vmid);
-
-       if ($running) {
-           $self->log("info", "service status $sid started");
-           return SUCCESS;
-       } else {
-           $self->log("warning", "unable to start service $sid");
-           return ERROR;
-       }
-
-    } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
-
-       return SUCCESS if !$running;
-
-       $self->log("info", "stopping service $sid");
-
-       my $timeout = 60; # fixme: make this configurable
-
-       my $params = {
-           node => $nodename,
-           vmid => $vmid,
-           timeout => $timeout,
-           forceStop => 1,
-       };
-
-       $plugin->shutdown($self, $params);
-
-       $running = $plugin->check_running($vmid);
-
-       if (!$running) {
-           $self->log("info", "service status $sid stopped");
-           return SUCCESS;
-       } else {
-           $self->log("info", "unable to stop stop service $sid (still running)");
-           return ERROR;
-       }
-
-    } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
-
-       my $target = $params[0];
-       if (!defined($target)) {
-           die "$cmd '$sid' failed - missing target\n" if !defined($target);
-           return EINVALID_PARAMETER;
-       }
-
-       if ($service_config->{node} eq $target) {
-           # already there
-           return SUCCESS;
-       }
-
-       # we always do (live) migration
-       my $params = {
-           node => $nodename,
-           vmid => $vmid,
-           target => $target,
-           online => 1,
-       };
-
-       my $oldconfig = $plugin->config_file($vmid, $nodename);
-
-       $plugin->migrate($self, $params);
-
-       # something went wrong if old config file is still there
-       if (-f $oldconfig) {
-           $self->log("err", "service $sid not moved (migration error)");
-           return ERROR;
-       }
-
-       return SUCCESS;
-
-    } elsif ($cmd eq 'error') {
-
-       if ($running) {
-           $self->log("err", "service $sid is in an error state while running");
-       } else {
-           $self->log("warning", "service $sid is not running and in an error state");
-       }
-       return SUCCESS; # error always succeeds
+sub get_max_workers {
+    my ($self) = @_;
  
-    }
+    my $datacenterconfig = cfs_read_file('datacenter.cfg');
  
-    $self->log("err", "implement me (cmd '$cmd')");
-    return EUNKNOWN_COMMAND;
+    return $datacenterconfig->{max_workers} || 4;
  }
  
  1;