]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Env/PVE2.pm
1 package PVE
::HA
::Env
::PVE2
;
5 use POSIX
qw(:errno_h :fcntl_h);
11 use PVE
::Cluster
qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
13 use PVE
::RPCEnvironment
;
15 use PVE
::HA
::Tools
':exit_codes';
18 use PVE
::HA
::Resources
;
19 use PVE
::HA
::Resources
::PVEVM
;
20 use PVE
::HA
::Resources
::PVECT
;
22 PVE
::HA
::Resources
::PVEVM-
>register();
23 PVE
::HA
::Resources
::PVECT-
>register();
25 PVE
::HA
::Resources-
>init();
27 my $lockdir = "/etc/pve/priv/lock";
30 my ($this, $nodename) = @_;
32 die "missing nodename" if !$nodename;
34 my $class = ref($this) || $this;
36 my $self = bless {}, $class;
38 $self->{nodename
} = $nodename;
46 return $self->{nodename
};
52 die "hardware is for testing and simulation only";
55 sub read_manager_status
{
58 return PVE
::HA
::Config
::read_manager_status
();
61 sub write_manager_status
{
62 my ($self, $status_obj) = @_;
64 PVE
::HA
::Config
::write_manager_status
($status_obj);
68 my ($self, $node) = @_;
70 $node = $self->{nodename
} if !defined($node);
72 return PVE
::HA
::Config
::read_lrm_status
($node);
75 sub write_lrm_status
{
76 my ($self, $status_obj) = @_;
78 my $node = $self->{nodename
};
80 PVE
::HA
::Config
::write_lrm_status
($node, $status_obj);
83 sub is_node_shutdown
{
91 $shutdown = 1 if ($line =~ m/shutdown\.target/);
94 my $cmd = ['/bin/systemctl', 'list-jobs'];
95 eval { PVE
::Tools
::run_command
($cmd, outfunc
=> $code, noerr
=> 1); };
100 sub queue_crm_commands
{
101 my ($self, $cmd) = @_;
103 return PVE
::HA
::Config
::queue_crm_commands
($cmd);
106 sub read_crm_commands
{
109 return PVE
::HA
::Config
::read_crm_commands
();
112 sub read_service_config
{
115 my $res = PVE
::HA
::Config
::read_resources_config
();
117 my $vmlist = PVE
::Cluster
::get_vmlist
();
120 foreach my $sid (keys %{$res->{ids
}}) {
121 my $d = $res->{ids
}->{$sid};
122 my (undef, undef, $name) = PVE
::HA
::Tools
::parse_sid
($sid);
123 $d->{state} = 'enabled' if !defined($d->{state});
124 $d->{max_restart
} = 1 if !defined($d->{max_restart
});
125 $d->{max_relocate
} = 1 if !defined($d->{max_relocate
});
126 if (PVE
::HA
::Resources-
>lookup($d->{type
})) {
127 if (my $vmd = $vmlist->{ids
}->{$name}) {
129 warn "no such VM '$name'\n";
131 $d->{node
} = $vmd->{node
};
135 if (defined($d->{node
})) {
138 warn "service '$sid' without node\n";
147 # this is only allowed by the master to recover a _fenced_ service
149 my ($self, $sid, $current_node, $new_node) = @_;
151 my (undef, $type, $name) = PVE
::HA
::Tools
::parse_sid
($sid);
153 if(my $plugin = PVE
::HA
::Resources-
>lookup($type)) {
154 my $old = $plugin->config_file($name, $current_node);
155 my $new = $plugin->config_file($name, $new_node);
156 rename($old, $new) ||
157 die "rename '$old' to '$new' failed - $!\n";
163 sub read_group_config
{
166 return PVE
::HA
::Config
::read_group_config
();
169 # this should return a hash containing info
170 # what nodes are members and online.
174 my ($node_info, $quorate) = ({}, 0);
176 my $nodename = $self->{nodename
};
178 $quorate = PVE
::Cluster
::check_cfs_quorum
(1) || 0;
180 my $members = PVE
::Cluster
::get_members
();
182 foreach my $node (keys %$members) {
183 my $d = $members->{$node};
184 $node_info->{$node}->{online
} = $d->{online
};
187 $node_info->{$nodename}->{online
} = 1; # local node is always up
189 return ($node_info, $quorate);
193 my ($self, $level, $msg) = @_;
197 syslog
($level, $msg);
200 my $last_lock_status = {};
203 my ($self, $lockid) = @_;
207 my $filename = "$lockdir/$lockid";
209 my $last = $last_lock_status->{$lockid} || 0;
214 my $retry_timeout = 100; # fixme: what timeout
220 # pve cluster filesystem not online
221 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d
$lockdir;
223 if ($last && (($ctime - $last) < $retry_timeout)) {
224 # send cfs lock update request (utime)
225 if (!utime(0, $ctime, $filename)) {
227 die "cfs lock update failed - $!\n";
231 # fixme: wait some time?
232 if (!(mkdir $filename)) {
233 utime 0, 0, $filename; # cfs unlock request
234 die "can't get cfs lock\n";
244 # $self->log('err', $err) if $err; # for debugging
248 $last_lock_status->{$lockid} = $got_lock ?
$ctime : 0;
250 if (!!$got_lock != !!$last) {
252 $self->log('info', "successfully acquired lock '$lockid'");
254 my $msg = "lost lock '$lockid";
255 $msg .= " - $err" if $err;
256 $self->log('err', $msg);
259 # $self->log('err', $err) if $err; # for debugging
265 sub get_ha_manager_lock
{
268 return $self->get_pve_lock("ha_manager_lock");
271 # release the cluster wide manager lock.
272 # when released another CRM may step up and get the lock, thus this should only
273 # get called when shutting down/deactivating the current master
274 sub release_ha_manager_lock
{
277 return rmdir("$lockdir/ha_manager_lock");
280 sub get_ha_agent_lock
{
281 my ($self, $node) = @_;
283 $node = $self->nodename() if !defined($node);
285 return $self->get_pve_lock("ha_agent_${node}_lock");
288 # release the respective node agent lock.
289 # this should only get called if the nodes LRM gracefully shuts down with
290 # all services already cleanly stopped!
291 sub release_ha_agent_lock
{
294 my $node = $self->nodename();
296 return rmdir("$lockdir/ha_agent_${node}_lock");
304 $quorate = PVE
::Cluster
::check_cfs_quorum
();
317 my ($self, $delay) = @_;
323 my ($self, $end_time) = @_;
326 my $cur_time = time();
328 last if $cur_time >= $end_time;
334 sub loop_start_hook
{
337 PVE
::Cluster
::cfs_update
();
339 $self->{loop_start
} = $self->get_time();
345 my $delay = $self->get_time() - $self->{loop_start
};
347 warn "loop take too long ($delay seconds)\n" if $delay > 30;
355 die "watchdog already open\n" if defined($watchdog_fh);
357 $watchdog_fh = IO
::Socket
::UNIX-
>new(
358 Type
=> SOCK_STREAM
(),
359 Peer
=> "/run/watchdog-mux.sock") ||
360 die "unable to open watchdog socket - $!\n";
362 $self->log('info', "watchdog active");
365 sub watchdog_update
{
366 my ($self, $wfh) = @_;
368 my $res = $watchdog_fh->syswrite("\0", 1);
369 if (!defined($res)) {
370 $self->log('err', "watchdog update failed - $!\n");
374 $self->log('err', "watchdog update failed - write $res bytes\n");
382 my ($self, $wfh) = @_;
384 $watchdog_fh->syswrite("V", 1); # magic watchdog close
385 if (!$watchdog_fh->close()) {
386 $self->log('err', "watchdog close failed - $!");
388 $watchdog_fh = undef;
389 $self->log('info', "watchdog closed (disabled)");
396 # close inherited inotify FD from parent and reopen our own
397 PVE
::INotify
::inotify_close
();
398 PVE
::INotify
::inotify_init
();
400 PVE
::Cluster
::cfs_update
();
403 sub get_max_workers
{
406 my $datacenterconfig = cfs_read_file
('datacenter.cfg');
408 return $datacenterconfig->{max_workers
} || 4;