]> git.proxmox.com Git - pve-ha-manager.git/commitdiff
sim: implement skip-round command for crm/lrm
authorThomas Lamprecht <t.lamprecht@proxmox.com>
Wed, 19 Jan 2022 09:55:29 +0000 (10:55 +0100)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Wed, 19 Jan 2022 10:19:34 +0000 (11:19 +0100)
This allows to simulate situations where there's some asymmetry
required in service type scheduling, e.g., if we the master should
not pickup LRM changes just yet - something that can happen quite
often in the real world due to scheduling not being predictable,
especially across different hosts.

The implementation is pretty simple for now, that also means we just
do not care about watchdog updates for the skipped service, meaning
that one is limited to skip two 20s rounds max before self-fencing
kicks in.

This can be made more advanced once required.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
src/PVE/HA/Sim/Hardware.pm
src/PVE/HA/Sim/TestHardware.pm

index 0754ec14320d9b47d4101bec366f54d5a4a0e97e..96a4064b9646969e64abc0f7c77f1318c3003275 100644 (file)
@@ -538,6 +538,7 @@ sub get_cfs_state {
 #   power <node> <on|off>
 #   network <node> <on|off>
 #   delay <seconds>
+#   skip-round <crm|lrm> [<rounds=1>]
 #   cfs <node> <rw|update> <work|fail>
 #   reboot <node>
 #   shutdown <node>
index 7c6fab1f97698c7b84ee83aa630d299533ee966a..1966596378a794e0f57fd00a4d703b5af492b2cc 100644 (file)
@@ -113,7 +113,7 @@ sub lrm_control {
 sub run {
     my ($self) = @_;
 
-    my ($last_command_time, $next_cmd_at) = (0, 0);
+    my ($last_command_time, $next_cmd_at, $skip_service_round) = (0, 0, {});
 
     for (;;) {
        my $starttime = $self->get_time();
@@ -126,12 +126,18 @@ sub run {
        die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n"
            if $looptime >= 60;
 
+       my $first_loop = 1;
        foreach my $node (@nodes) {
            my $d = $self->{nodes}->{$node};
 
            if (my $crm = $d->{crm}) {
+               my $exit_crm;
 
-               my $exit_crm = !$crm->do_one_iteration();
+               if (!$skip_service_round->{crm}) {
+                   $exit_crm = !$crm->do_one_iteration();
+               } else {
+                   $self->log('info', "skipping CRM round", 'run-loop') if $first_loop;
+               }
 
                my $nodetime = $d->{crm_env}->get_time();
                $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
@@ -156,8 +162,13 @@ sub run {
            }
 
            if (my $lrm = $d->{lrm}) {
+               my $exit_lrm;
 
-               my $exit_lrm = !$lrm->do_one_iteration();
+               if (!$skip_service_round->{lrm}) {
+                   $exit_lrm = !$lrm->do_one_iteration();
+               } else {
+                   $self->log('info', "skipping LRM round", 'run-loop') if $first_loop;
+               }
 
                my $nodetime = $d->{lrm_env}->get_time();
                $self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
@@ -189,8 +200,11 @@ sub run {
                    $self->{nodes}->{$n}->{lrm} = undef;
                }
            }
+           $first_loop = 0;
        }
 
+       $skip_service_round->{crm}-- if $skip_service_round->{crm};
+       $skip_service_round->{lrm}-- if $skip_service_round->{lrm};
 
        $self->{cur_time} = $starttime + $looptime if ($self->{cur_time} - $starttime) < $looptime;
 
@@ -219,6 +233,11 @@ sub run {
                if ($cmd =~ m/^delay\s+(\d+)\s*$/) {
                    $self->log('info', "execute $cmd", 'cmdlist');
                    $next_cmd_at = $self->{cur_time} + $1;
+               } elsif ($cmd =~ m/^skip-round\s+(lrm|crm)(?:\s+(\d+))?\s*$/) {
+                   $self->log('info', "execute $cmd", 'cmdlist');
+                   my ($what, $rounds) = ($1, $2 // 1);
+                   $skip_service_round->{$what} = 0 if !defined($skip_service_round->{$what});
+                   $skip_service_round->{$what} += $rounds;
                } else {
                    $self->sim_hardware_cmd($cmd, 'cmdlist');
                }