]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/TestHardware.pm
1 package PVE
::HA
::Sim
::TestHardware
;
3 # Simulate Hardware resources
5 # power supply for nodes: on/off
6 # network connection to nodes: on/off
7 # watchdog devices for nodes
11 use POSIX
qw(strftime EINTR);
14 use Fcntl
qw(:DEFAULT :flock);
16 use File
::Path
qw(make_path remove_tree);
21 use PVE
::HA
::Sim
::TestEnv
;
22 use base
qw(PVE::HA::Sim::Hardware);
24 my $max_sim_time = 10000;
27 my ($this, $testdir) = @_;
29 my $class = ref($this) || $this;
31 my $self = $class->SUPER::new
($testdir);
33 my $raw = PVE
::Tools
::file_get_contents
("$testdir/cmdlist");
34 $self->{cmdlist
} = decode_json
($raw);
36 $self->{loop_count
} = 0;
37 $self->{cur_time
} = 0;
39 my $statusdir = $self->statusdir();
40 my $logfile = "$statusdir/log";
41 $self->{logfh
} = IO
::File-
>new(">>$logfile") ||
42 die "unable to open '$logfile' - $!";
44 foreach my $node (sort keys %{$self->{nodes
}}) {
46 my $d = $self->{nodes
}->{$node};
49 PVE
::HA
::Env-
>new('PVE::HA::Sim::TestEnv', $node, $self, 'crm');
52 PVE
::HA
::Env-
>new('PVE::HA::Sim::TestEnv', $node, $self, 'lrm');
54 $d->{crm
} = undef; # create on power on
55 $d->{lrm
} = undef; # create on power on
64 return $self->{cur_time
};
68 my ($self, $level, $msg, $id) = @_;
72 my $time = $self->get_time();
74 $id = 'hardware' if !$id;
76 my $line = sprintf("%-5s %5d %12s: $msg\n", $level, $time, $id);
79 $self->{logfh
}->print($line);
80 $self->{logfh
}->flush();
83 # for controlling the resource manager services (CRM and LRM)
85 my ($self, $action, $data, $lock_fh) = @_;
87 if ($action eq 'start') {
88 return PVE
::HA
::CRM-
>new($data->{crm_env
});
89 } elsif ($action eq 'stop') {
90 # nothing todo sim_hardware_cmd sets us to undef, thats enough
91 } elsif ($action eq 'shutdown') {
92 $data->{crm
}->shutdown_request();
94 die "unknown CRM control action: '$action'\n";
99 my ($self, $action, $data, $lock_fh) = @_;
101 if ($action eq 'start') {
102 return PVE
::HA
::LRM-
>new($data->{lrm_env
});
103 } elsif ($action eq 'stop') {
104 # nothing todo sim_hardware_cmd sets us to undef, thats enough
105 } elsif ($action eq 'shutdown') {
106 $data->{lrm
}->shutdown_request();
108 die "unknown LRM control action: '$action'\n";
116 my ($last_command_time, $next_cmd_at, $skip_service_round) = (0, 0, {});
119 my $starttime = $self->get_time();
121 my @nodes = sort keys %{$self->{nodes
}};
123 my $looptime = scalar(@nodes) * 2; # twice the node count
124 $looptime = 20 if $looptime < 20;
126 die "unable to simulate so many nodes. You need to increate watchdog/lock timeouts.\n"
130 foreach my $node (@nodes) {
131 my $d = $self->{nodes
}->{$node};
133 if (my $crm = $d->{crm
}) {
136 if (!$skip_service_round->{crm
}) {
137 $exit_crm = !$crm->do_one_iteration();
139 $self->log('info', "skipping CRM round", 'run-loop') if $first_loop;
142 my $nodetime = $d->{crm_env
}->get_time();
143 $self->{cur_time
} = $nodetime if $nodetime > $self->{cur_time
};
146 $d->{crm_env
}->log('info', "exit (loop end)");
149 my $cstatus = $self->read_hardware_status_nolock();
150 my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
151 my $shutdown = $nstatus->{shutdown} || '';
152 if ($shutdown eq 'reboot') {
153 $self->sim_hardware_cmd("power $node off", 'reboot');
154 $self->sim_hardware_cmd("power $node on", 'reboot');
155 } elsif ($shutdown eq 'shutdown') {
156 $self->sim_hardware_cmd("power $node off", 'shutdown');
157 } elsif (!$d->{crm_stop
}) {
158 die "unexpected CRM exit - not implemented"
160 $d->{crm_stop
} = undef;
164 if (my $lrm = $d->{lrm
}) {
167 if (!$skip_service_round->{lrm
}) {
168 $exit_lrm = !$lrm->do_one_iteration();
170 $self->log('info', "skipping LRM round", 'run-loop') if $first_loop;
173 my $nodetime = $d->{lrm_env
}->get_time();
174 $self->{cur_time
} = $nodetime if $nodetime > $self->{cur_time
};
177 $d->{lrm_env
}->log('info', "exit (loop end)");
179 my $cstatus = $self->read_hardware_status_nolock();
180 my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
181 my $shutdown = $nstatus->{shutdown} || '';
182 if ($d->{lrm_restart
}) {
183 die "lrm restart during shutdown - not implemented" if $shutdown;
184 $d->{lrm_restart
} = undef;
185 $d->{lrm
} = PVE
::HA
::LRM-
>new($d->{lrm_env
});
186 } elsif ($shutdown eq 'reboot' || $shutdown eq 'shutdown') {
187 # exit the LRM before the CRM to reflect real world behaviour
188 $self->sim_hardware_cmd("crm $node stop", $shutdown);
190 die "unexpected LRM exit - not implemented"
195 foreach my $n (@nodes) {
196 if (!$self->watchdog_check($n)) {
197 $self->sim_hardware_cmd("power $n off", 'watchdog');
198 $self->log('info', "server '$n' stopped by poweroff (watchdog)");
199 $self->{nodes
}->{$n}->{crm
} = undef;
200 $self->{nodes
}->{$n}->{lrm
} = undef;
206 $skip_service_round->{crm
}-- if $skip_service_round->{crm
};
207 $skip_service_round->{lrm
}-- if $skip_service_round->{lrm
};
209 $self->{cur_time
} = $starttime + $looptime if ($self->{cur_time
} - $starttime) < $looptime;
211 die "simulation exceeded maximum time ($max_sim_time) - force end\n"
212 if $self->{cur_time
} > $max_sim_time;
214 foreach my $node (@nodes) {
215 my $d = $self->{nodes
}->{$node};
217 $d->{lrm_env
}->loop_start_hook();
218 $d->{crm_env
}->loop_start_hook();
221 next if $self->{cur_time
} < $next_cmd_at;
223 if (($self->{loop_count
} % 5) == 0) { # apply new command every 5 loop iterations
224 my $list = shift @{$self->{cmdlist
}};
226 # end simulation 500 seconds after the last command was executed
227 return if ($self->{cur_time
} - $last_command_time) > 500;
230 foreach my $cmd (@$list) {
231 $last_command_time = $self->{cur_time
};
233 if ($cmd =~ m/^delay\s+(\d+)\s*$/) {
234 $self->log('info', "execute $cmd", 'cmdlist');
235 $next_cmd_at = $self->{cur_time
} + $1;
236 } elsif ($cmd =~ m/^skip-round\s+(lrm|crm)(?:\s+(\d+))?\s*$/) {
237 $self->log('info', "execute $cmd", 'cmdlist');
238 my ($what, $rounds) = ($1, $2 // 1);
239 $skip_service_round->{$what} = 0 if !defined($skip_service_round->{$what});
240 $skip_service_round->{$what} += $rounds;
242 $self->sim_hardware_cmd($cmd, 'cmdlist');
247 ++$self->{loop_count
};