X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=src%2FPVE%2FHA%2FSim%2FEnv.pm;h=e00b2b1fc2fd6e7adb249d9e4a6f58aebf8f0677;hb=9adedcd738df0e861466b3879d6e86a1bc65ebb3;hp=56b8bda5e92639919eb3f6c6d7745557716a575e;hpb=5c19491e9c32651a3c8b42b88edcd210825decef;p=pve-ha-manager.git diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm index 56b8bda..e00b2b1 100644 --- a/src/PVE/HA/Sim/Env.pm +++ b/src/PVE/HA/Sim/Env.pm @@ -3,13 +3,22 @@ package PVE::HA::Sim::Env; use strict; use warnings; use POSIX qw(strftime EINTR); -use Data::Dumper; use JSON; use IO::File; use Fcntl qw(:DEFAULT :flock); use PVE::HA::Tools; use PVE::HA::Env; +use PVE::HA::Resources; +use PVE::HA::Sim::Resources::VirtVM; +use PVE::HA::Sim::Resources::VirtCT; +use PVE::HA::Sim::Resources::VirtFail; + +PVE::HA::Sim::Resources::VirtVM->register(); +PVE::HA::Sim::Resources::VirtCT->register(); +PVE::HA::Sim::Resources::VirtFail->register(); + +PVE::HA::Resources->init(); sub new { my ($this, $nodename, $hardware, $log_id) = @_; @@ -38,6 +47,12 @@ sub nodename { return $self->{nodename}; } +sub hardware { + my ($self) = @_; + + return $self->{hardware}; +} + sub sim_get_lock { my ($self, $lock_name, $unlock) = @_; @@ -142,13 +157,25 @@ sub write_lrm_status { sub is_node_shutdown { my ($self) = @_; - return 0; # default to freezing services if not overwritten by subclass -} + my $node = $self->{nodename}; + my $cstatus = $self->{hardware}->read_hardware_status_nolock(); -sub service_config_exists { - my ($self) = @_; - - return 1; + die "undefined node status for node '$node'" if !defined($cstatus->{$node}); + + my ($shutdown, $reboot) = (0, 0); + + if (my $target = $cstatus->{$node}->{shutdown}) { + if ($target eq 'shutdown') { + $shutdown = 1; + } elsif ($target eq 'reboot') { + $shutdown = 1; + $reboot = 1; + } else { + die "unknown shutdown target '$target'"; + } + } + + return ($shutdown, $reboot); } sub read_service_config { @@ -157,13 +184,36 @@ sub read_service_config { return $self->{hardware}->read_service_config(); } +sub read_fence_config { + my ($self) = @_; + + return $self->{hardware}->read_fence_config(); +} + +# the test/sim framework has hardware enabled fencing if +# it has devices configured +sub fencing_mode { + my ($self) = @_; + + my $cfg = $self->read_fence_config(); + + return (defined($cfg) && keys %{$cfg}) ? 'hardware' : 'watchdog'; +} + +sub exec_fence_agent { + my ($self, $agent, $node, @param) = @_; + + return $self->{hardware}->exec_fence_agent($agent, $node, @param); +} + sub read_group_config { my ($self) = @_; return $self->{hardware}->read_group_config(); } -sub change_service_location { +# this is normally only allowed by the master to recover a _fenced_ service +sub steal_service { my ($self, $sid, $current_node, $new_node) = @_; return $self->{hardware}->change_service_location($sid, $current_node, $new_node); @@ -191,6 +241,13 @@ sub log { printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}"); } +sub sendmail { + my ($self, $subject, $text) = @_; + + # only log subject, do not spam the logs + $self->log('email', $subject); +} + sub get_time { my ($self) = @_; @@ -215,6 +272,15 @@ sub get_ha_manager_lock { return $self->sim_get_lock('ha_manager_lock'); } +# release the cluster wide manager lock. +# when released another CRM may step up and get the lock, thus this should only +# get called when shutting down/deactivating the current master +sub release_ha_manager_lock { + my ($self) = @_; + + return $self->sim_get_lock('ha_manager_lock', 1); +} + sub get_ha_agent_lock_name { my ($self, $node) = @_; @@ -230,6 +296,19 @@ sub get_ha_agent_lock { return $self->sim_get_lock($lck); } + +# release the respective node agent lock. +# this should only get called if the nodes LRM gracefully shuts down with +# all services already cleanly stopped! +sub release_ha_agent_lock { + my ($self) = @_; + + my $node = $self->nodename(); + + my $lock = $self->get_ha_agent_lock_name($node); + return $self->sim_get_lock($lock, 1); +} + # return true when cluster is quorate sub quorate { my ($self) = @_; @@ -278,104 +357,17 @@ sub watchdog_close { return $self->{hardware}->watchdog_close($wfh); } -sub can_fork { +sub after_fork { my ($self) = @_; - return 1; + # nothing to clean up in the simulation environment } -sub exec_resource_agent { - my ($self, $sid, $cd, $cmd, @params) = @_; - - my $hardware = $self->{hardware}; - - my $nodename = $self->{nodename}; - - # fixme: return valid_exit code (instead of using die) - - my $ss = $hardware->read_service_status($nodename); - - if ($cmd eq 'started') { - - # fixme: return valid_exit code - die "service '$sid' not on this node" if $cd->{node} ne $nodename; - - if ($ss->{$sid}) { - return 0; - } - $self->log("info", "starting service $sid"); - - $self->sleep(2); - - $ss->{$sid} = 1; - $hardware->write_service_status($nodename, $ss); - - $self->log("info", "service status $sid started"); - - return 0; - - } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') { - - # fixme: return valid_exit code - die "service '$sid' not on this node" if $cd->{node} ne $nodename; - - if (!$ss->{$sid}) { - return 0; - } - $self->log("info", "stopping service $sid"); - - $self->sleep(2); - - $ss->{$sid} = 0; - $hardware->write_service_status($nodename, $ss); - $self->log("info", "service status $sid stopped"); - - return 0; - - } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') { - - my $target = $params[0]; - die "$cmd '$sid' failed - missing target\n" if !defined($target); - - if ($cd->{node} eq $target) { - # already migrate - return 0; - } elsif ($cd->{node} eq $nodename) { - - $self->log("info", "service $sid - start $cmd to node '$target'"); - - if ($cmd eq 'relocate') { - - if ($ss->{$sid}) { - $self->log("info", "stopping service $sid (relocate)"); - $self->sleep(1); # time to stop service - $ss->{$sid} = 0; - $hardware->write_service_status($nodename, $ss); - } - - $self->log("info", "service status $sid stopped"); - - } else { - $self->sleep(2); # (live) migration time - } - - $self->change_service_location($sid, $nodename, $target); - $self->log("info", "service $sid - end $cmd to node '$target'"); - # ensure that the old node doesn't has the service anymore - $ss->{$sid} = 0; - $hardware->write_service_status($nodename, $ss); - - return 0; - - } else { - die "migrate '$sid' failed - service is not on this node\n"; - } - - - } +sub get_max_workers { + my ($self) = @_; - die "implement me (cmd '$cmd')"; + return 4; } 1;