]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Env.pm
1 package PVE
::HA
::Sim
::Env
;
5 use POSIX
qw(strftime EINTR);
8 use Fcntl
qw(:DEFAULT :flock);
12 use PVE
::HA
::Resources
;
13 use PVE
::HA
::Sim
::Resources
::VirtVM
;
14 use PVE
::HA
::Sim
::Resources
::VirtCT
;
15 use PVE
::HA
::Sim
::Resources
::VirtFail
;
17 PVE
::HA
::Sim
::Resources
::VirtVM-
>register();
18 PVE
::HA
::Sim
::Resources
::VirtCT-
>register();
19 PVE
::HA
::Sim
::Resources
::VirtFail-
>register();
21 PVE
::HA
::Resources-
>init();
24 my ($this, $nodename, $hardware, $log_id) = @_;
26 die "missing nodename" if !$nodename;
27 die "missing log_id" if !$log_id;
29 my $class = ref($this) || $this;
31 my $self = bless {}, $class;
33 $self->{statusdir
} = $hardware->statusdir();
34 $self->{nodename
} = $nodename;
36 $self->{hardware
} = $hardware;
37 $self->{lock_timeout
} = 120;
39 $self->{log_id
} = $log_id;
47 return $self->{nodename
};
53 return $self->{hardware
};
56 my $assert_cfs_can_rw = sub {
57 my ($self, $emsg) = @_;
59 $emsg //= 'cfs connection refused - not mounted?';
62 if !$self->{hardware
}->get_cfs_state($self->{nodename
}, 'rw');
66 my ($self, $lock_name, $unlock) = @_;
68 return 0 if !$self->quorate();
70 my $filename = "$self->{statusdir}/cluster_locks";
74 my $data = PVE
::HA
::Tools
::read_json_from_file
($filename, {});
78 my $nodename = $self->nodename();
79 my $ctime = $self->get_time();
83 if (my $d = $data->{$lock_name}) {
84 my $tdiff = $ctime - $d->{time};
86 if ($tdiff > $self->{lock_timeout
}) {
88 } elsif (($tdiff <= $self->{lock_timeout
}) && ($d->{node
} eq $nodename)) {
89 delete $data->{$lock_name};
98 if (my $d = $data->{$lock_name}) {
100 my $tdiff = $ctime - $d->{time};
102 if ($tdiff <= $self->{lock_timeout
}) {
103 if ($d->{node
} eq $nodename) {
110 $self->log('info', "got lock '$lock_name'");
111 $d->{node
} = $nodename;
117 $data->{$lock_name} = {
121 $self->log('info', "got lock '$lock_name'");
126 PVE
::HA
::Tools
::write_json_to_file
($filename, $data);
131 return $self->{hardware
}->global_lock($code);
134 sub read_manager_status
{
137 $assert_cfs_can_rw->($self);
139 my $filename = "$self->{statusdir}/manager_status";
141 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
144 sub write_manager_status
{
145 my ($self, $status_obj) = @_;
147 $assert_cfs_can_rw->($self);
149 my $filename = "$self->{statusdir}/manager_status";
151 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
154 sub read_lrm_status
{
155 my ($self, $node) = @_;
157 $node = $self->{nodename
} if !defined($node);
159 $assert_cfs_can_rw->($self);
161 return $self->{hardware
}->read_lrm_status($node);
164 sub write_lrm_status
{
165 my ($self, $status_obj) = @_;
167 my $node = $self->{nodename
};
169 $assert_cfs_can_rw->($self);
171 return $self->{hardware
}->write_lrm_status($node, $status_obj);
174 sub is_node_shutdown
{
177 my $node = $self->{nodename
};
178 my $cstatus = $self->{hardware
}->read_hardware_status_nolock();
180 die "undefined node status for node '$node'" if !defined($cstatus->{$node});
182 my ($shutdown, $reboot) = (0, 0);
184 if (my $target = $cstatus->{$node}->{shutdown}) {
185 if ($target eq 'shutdown') {
187 } elsif ($target eq 'reboot') {
191 die "unknown shutdown target '$target'";
195 return ($shutdown, $reboot);
198 sub read_service_config
{
201 $assert_cfs_can_rw->($self);
203 return $self->{hardware
}->read_service_config();
206 sub read_fence_config
{
209 $assert_cfs_can_rw->($self);
211 return $self->{hardware
}->read_fence_config();
214 # the test/sim framework has hardware enabled fencing if
215 # it has devices configured
219 my $cfg = $self->read_fence_config();
221 return (defined($cfg) && keys %{$cfg}) ?
'hardware' : 'watchdog';
224 sub exec_fence_agent
{
225 my ($self, $agent, $node, @param) = @_;
227 return $self->{hardware
}->exec_fence_agent($agent, $node, @param);
230 sub read_group_config
{
233 $assert_cfs_can_rw->($self);
235 return $self->{hardware
}->read_group_config();
238 # this is normally only allowed by the master to recover a _fenced_ service
240 my ($self, $sid, $current_node, $new_node) = @_;
242 $assert_cfs_can_rw->($self);
244 return $self->{hardware
}->change_service_location($sid, $current_node, $new_node);
247 sub queue_crm_commands
{
248 my ($self, $cmd) = @_;
250 $assert_cfs_can_rw->($self);
252 return $self->{hardware
}->queue_crm_commands($cmd);
255 sub read_crm_commands
{
258 $assert_cfs_can_rw->($self);
260 return $self->{hardware
}->read_crm_commands();
264 my ($self, $level, $msg) = @_;
268 my $time = $self->get_time();
270 printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}");
274 my ($self, $subject, $text) = @_;
276 # only log subject, do not spam the logs
277 $self->log('email', $subject);
283 die "implement in subclass";
287 my ($self, $delay) = @_;
289 die "implement in subclass";
293 my ($self, $end_time) = @_;
295 die "implement in subclass";
298 sub get_ha_manager_lock
{
301 return $self->sim_get_lock('ha_manager_lock');
304 # release the cluster wide manager lock.
305 # when released another CRM may step up and get the lock, thus this should only
306 # get called when shutting down/deactivating the current master
307 sub release_ha_manager_lock
{
310 return $self->sim_get_lock('ha_manager_lock', 1);
313 sub get_ha_agent_lock_name
{
314 my ($self, $node) = @_;
316 $node = $self->nodename() if !$node;
318 return "ha_agent_${node}_lock";
321 sub get_ha_agent_lock
{
322 my ($self, $node) = @_;
324 my $lck = $self->get_ha_agent_lock_name($node);
325 return $self->sim_get_lock($lck);
329 # release the respective node agent lock.
330 # this should only get called if the nodes LRM gracefully shuts down with
331 # all services already cleanly stopped!
332 sub release_ha_agent_lock
{
335 my $node = $self->nodename();
337 my $lock = $self->get_ha_agent_lock_name($node);
338 return $self->sim_get_lock($lock, 1);
341 # return true when cluster is quorate
345 my ($node_info, $quorate) = $self->{hardware
}->get_node_info();
346 my $node = $self->nodename();
347 return 0 if !$node_info->{$node}->{online
};
354 return $self->{hardware
}->get_node_info();
357 sub loop_start_hook
{
360 # do nothing, overwrite in subclass
366 # do nothing, overwrite in subclass
370 sub cluster_state_update
{
373 return $self->{hardware
}->get_cfs_state($self->{nodename
}, 'update');
379 my $node = $self->nodename();
381 return $self->{hardware
}->watchdog_open($node);
384 sub watchdog_update
{
385 my ($self, $wfh) = @_;
387 return $self->{hardware
}->watchdog_update($wfh);
391 my ($self, $wfh) = @_;
393 return $self->{hardware
}->watchdog_close($wfh);
399 # nothing to clean up in the simulation environment
403 sub get_max_workers
{