]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Env.pm
6bd35b3be999df2a1bb137f7ba60be3a010df18d
1 package PVE
::HA
::Sim
::Env
;
5 use POSIX
qw(strftime EINTR);
8 use Fcntl
qw(:DEFAULT :flock);
12 use PVE
::HA
::Resources
;
13 use PVE
::HA
::Sim
::Resources
::VirtVM
;
14 use PVE
::HA
::Sim
::Resources
::VirtCT
;
15 use PVE
::HA
::Sim
::Resources
::VirtFail
;
17 PVE
::HA
::Sim
::Resources
::VirtVM-
>register();
18 PVE
::HA
::Sim
::Resources
::VirtCT-
>register();
19 PVE
::HA
::Sim
::Resources
::VirtFail-
>register();
21 PVE
::HA
::Resources-
>init();
24 my ($this, $nodename, $hardware, $log_id) = @_;
26 die "missing nodename" if !$nodename;
27 die "missing log_id" if !$log_id;
29 my $class = ref($this) || $this;
31 my $self = bless {}, $class;
33 $self->{statusdir
} = $hardware->statusdir();
34 $self->{nodename
} = $nodename;
36 $self->{hardware
} = $hardware;
37 $self->{lock_timeout
} = 120;
39 $self->{log_id
} = $log_id;
47 return $self->{nodename
};
53 return $self->{hardware
};
56 my $assert_cfs_can_rw = sub {
57 my ($self, $emsg) = @_;
59 $emsg //= 'cfs connection refused - not mounted?';
62 if !$self->{hardware
}->get_cfs_state($self->{nodename
}, 'rw');
66 my ($self, $lock_name, $unlock) = @_;
68 return 0 if !$self->quorate();
70 my $filename = "$self->{statusdir}/cluster_locks";
74 my $data = PVE
::HA
::Tools
::read_json_from_file
($filename, {});
78 my $nodename = $self->nodename();
79 my $ctime = $self->get_time();
83 if (my $d = $data->{$lock_name}) {
84 my $tdiff = $ctime - $d->{time};
86 if ($tdiff > $self->{lock_timeout
}) {
88 } elsif (($tdiff <= $self->{lock_timeout
}) && ($d->{node
} eq $nodename)) {
89 delete $data->{$lock_name};
98 if (my $d = $data->{$lock_name}) {
100 my $tdiff = $ctime - $d->{time};
102 if ($tdiff <= $self->{lock_timeout
}) {
103 if ($d->{node
} eq $nodename) {
110 $self->log('info', "got lock '$lock_name'");
111 $d->{node
} = $nodename;
117 $data->{$lock_name} = {
121 $self->log('info', "got lock '$lock_name'");
126 PVE
::HA
::Tools
::write_json_to_file
($filename, $data);
131 return $self->{hardware
}->global_lock($code);
134 sub read_manager_status
{
137 $assert_cfs_can_rw->($self);
139 my $filename = "$self->{statusdir}/manager_status";
141 return PVE
::HA
::Tools
::read_json_from_file
($filename, {});
144 sub write_manager_status
{
145 my ($self, $status_obj) = @_;
147 $assert_cfs_can_rw->($self);
149 my $filename = "$self->{statusdir}/manager_status";
151 PVE
::HA
::Tools
::write_json_to_file
($filename, $status_obj);
154 sub read_lrm_status
{
155 my ($self, $node) = @_;
157 $node = $self->{nodename
} if !defined($node);
159 $assert_cfs_can_rw->($self);
161 return $self->{hardware
}->read_lrm_status($node);
164 sub write_lrm_status
{
165 my ($self, $status_obj) = @_;
167 my $node = $self->{nodename
};
169 $assert_cfs_can_rw->($self);
171 return $self->{hardware
}->write_lrm_status($node, $status_obj);
174 sub is_node_shutdown
{
177 my $node = $self->{nodename
};
178 my $cstatus = $self->{hardware
}->read_hardware_status_nolock();
180 die "undefined node status for node '$node'" if !defined($cstatus->{$node});
182 my ($shutdown, $reboot) = (0, 0);
184 if (my $target = $cstatus->{$node}->{shutdown}) {
185 if ($target eq 'shutdown') {
187 } elsif ($target eq 'reboot') {
191 die "unknown shutdown target '$target'";
195 return ($shutdown, $reboot);
198 sub read_service_config
{
201 $assert_cfs_can_rw->($self);
203 return $self->{hardware
}->read_service_config();
206 sub update_service_config
{
207 my ($self, $sid, $param) = @_;
209 return $self->{hardware
}->update_service_config($sid, $param);
213 my ($self, $sid) = @_;
215 die "unable to parse service id '$sid'\n"
216 if !($sid =~ m/^(\S+):(\S+)$/);
221 return wantarray ?
($sid, $type, $name) : $sid;
224 sub read_fence_config
{
227 $assert_cfs_can_rw->($self);
229 return $self->{hardware
}->read_fence_config();
232 # the test/sim framework has hardware enabled fencing if
233 # it has devices configured
237 my $cfg = $self->read_fence_config();
239 return (defined($cfg) && keys %{$cfg}) ?
'hardware' : 'watchdog';
242 sub exec_fence_agent
{
243 my ($self, $agent, $node, @param) = @_;
245 return $self->{hardware
}->exec_fence_agent($agent, $node, @param);
248 sub read_group_config
{
251 $assert_cfs_can_rw->($self);
253 return $self->{hardware
}->read_group_config();
256 # this is normally only allowed by the master to recover a _fenced_ service
258 my ($self, $sid, $current_node, $new_node) = @_;
260 $assert_cfs_can_rw->($self);
262 return $self->{hardware
}->change_service_location($sid, $current_node, $new_node);
265 sub queue_crm_commands
{
266 my ($self, $cmd) = @_;
268 $assert_cfs_can_rw->($self);
270 return $self->{hardware
}->queue_crm_commands($cmd);
273 sub read_crm_commands
{
276 $assert_cfs_can_rw->($self);
278 return $self->{hardware
}->read_crm_commands();
282 my ($self, $level, $msg) = @_;
286 my $time = $self->get_time();
288 printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}");
292 my ($self, $subject, $text) = @_;
294 # only log subject, do not spam the logs
295 $self->log('email', $subject);
301 die "implement in subclass";
305 my ($self, $delay) = @_;
307 die "implement in subclass";
311 my ($self, $end_time) = @_;
313 die "implement in subclass";
316 sub get_ha_manager_lock
{
319 return $self->sim_get_lock('ha_manager_lock');
322 # release the cluster wide manager lock.
323 # when released another CRM may step up and get the lock, thus this should only
324 # get called when shutting down/deactivating the current master
325 sub release_ha_manager_lock
{
328 return $self->sim_get_lock('ha_manager_lock', 1);
331 sub get_ha_agent_lock_name
{
332 my ($self, $node) = @_;
334 $node = $self->nodename() if !$node;
336 return "ha_agent_${node}_lock";
339 sub get_ha_agent_lock
{
340 my ($self, $node) = @_;
342 my $lck = $self->get_ha_agent_lock_name($node);
343 return $self->sim_get_lock($lck);
347 # release the respective node agent lock.
348 # this should only get called if the nodes LRM gracefully shuts down with
349 # all services already cleanly stopped!
350 sub release_ha_agent_lock
{
353 my $node = $self->nodename();
355 my $lock = $self->get_ha_agent_lock_name($node);
356 return $self->sim_get_lock($lock, 1);
359 # return true when cluster is quorate
363 my ($node_info, $quorate) = $self->{hardware
}->get_node_info();
364 my $node = $self->nodename();
365 return 0 if !$node_info->{$node}->{online
};
372 return $self->{hardware
}->get_node_info();
375 sub loop_start_hook
{
378 # do nothing, overwrite in subclass
384 # do nothing, overwrite in subclass
388 sub cluster_state_update
{
391 return $self->{hardware
}->get_cfs_state($self->{nodename
}, 'update');
397 my $node = $self->nodename();
399 return $self->{hardware
}->watchdog_open($node);
402 sub watchdog_update
{
403 my ($self, $wfh) = @_;
405 return $self->{hardware
}->watchdog_update($wfh);
409 my ($self, $wfh) = @_;
411 return $self->{hardware
}->watchdog_close($wfh);
417 # nothing to clean up in the simulation environment
421 sub get_max_workers
{
427 # return cluster wide enforced HA settings
428 sub get_ha_settings
{
431 my $datacenterconfig = $self->{hardware
}->read_datacenter_conf();
433 return $datacenterconfig->{ha
};
436 sub get_static_node_stats
{
439 return $self->{hardware
}->get_static_node_stats();