From 561e7f4bfb235fcdca5b0bbb8422ce742a5da75f Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Thu, 17 Nov 2022 15:00:11 +0100 Subject: [PATCH] manager: use static resource scheduler when configured Note that recompute_online_node_usage() becomes much slower when the 'static' resource scheduler mode is used. Tested it with ~300 HA services (minimal containers) running on my virtual test cluster. Timings with 'basic' mode were between 0.0004 - 0.001 seconds Timings with 'static' mode were between 0.007 - 0.012 seconds Combined with the fact that recompute_online_node_usage() is currently called very often this can lead to a lot of delay during recovery situations with hundreds of services and low thousands of services overall and with genereous estimates even run into the watchdog timer. Ideas to remedy this is using PVE::Cluster's get_guest_config_properties() instead of load_config() and/or optimizing how often recompute_online_node_usage() is called. Signed-off-by: Fiona Ebner --- src/PVE/HA/Manager.pm | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 1638442..7f1d1d7 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -8,6 +8,7 @@ use PVE::Tools; use PVE::HA::Tools ':exit_codes'; use PVE::HA::NodeStatus; use PVE::HA::Usage::Basic; +use PVE::HA::Usage::Static; ## Variable Name & Abbreviations Convention # @@ -203,14 +204,35 @@ my $valid_service_states = { error => 1, }; +# FIXME with 'static' mode and thousands of services, the overhead can be noticable and the fact +# that this function is called for each state change and upon recovery doesn't help. sub recompute_online_node_usage { my ($self) = @_; - my $online_node_usage = PVE::HA::Usage::Basic->new($self->{haenv}); + my $haenv = $self->{haenv}; my $online_nodes = $self->{ns}->list_online_nodes(); - $online_node_usage->add_node($_) for $online_nodes->@*; + my $online_node_usage; + + if (my $mode = $self->{'scheduler-mode'}) { + if ($mode eq 'static') { + $online_node_usage = eval { + my $scheduler = PVE::HA::Usage::Static->new($haenv); + $scheduler->add_node($_) for $online_nodes->@*; + return $scheduler; + }; + $haenv->log('warning', "using 'basic' scheduler mode, init for 'static' failed - $@") + if $@; + } elsif ($mode ne 'basic') { + $haenv->log('warning', "got unknown scheduler mode '$mode', using 'basic'"); + } + } + + if (!$online_node_usage) { + $online_node_usage = PVE::HA::Usage::Basic->new($haenv); + $online_node_usage->add_node($_) for $online_nodes->@*; + } foreach my $sid (keys %{$self->{ss}}) { my $sd = $self->{ss}->{$sid}; -- 2.39.2