]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
Env: add get_ha_settings method
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
119656b9 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016 14
a89ff919 15use PVE::HA::Tools ':exit_codes';
714a4016 16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
c982dfee 18use PVE::HA::FenceConfig;
9e5ea8f7
DM
19use PVE::HA::Resources;
20use PVE::HA::Resources::PVEVM;
21use PVE::HA::Resources::PVECT;
714a4016 22
9e5ea8f7
DM
23PVE::HA::Resources::PVEVM->register();
24PVE::HA::Resources::PVECT->register();
25
26PVE::HA::Resources->init();
022e4e79 27
007fcc8b
DM
28my $lockdir = "/etc/pve/priv/lock";
29
714a4016
DM
30sub new {
31 my ($this, $nodename) = @_;
32
33 die "missing nodename" if !$nodename;
34
35 my $class = ref($this) || $this;
36
37 my $self = bless {}, $class;
38
39 $self->{nodename} = $nodename;
40
41 return $self;
42}
43
44sub nodename {
45 my ($self) = @_;
46
47 return $self->{nodename};
48}
49
dd9c0c9d
TL
50sub hardware {
51 my ($self) = @_;
52
53 die "hardware is for testing and simulation only";
54}
55
714a4016
DM
56sub read_manager_status {
57 my ($self) = @_;
714a4016 58
139a9b90 59 return PVE::HA::Config::read_manager_status();
714a4016
DM
60}
61
62sub write_manager_status {
63 my ($self, $status_obj) = @_;
63f6a08c 64
139a9b90 65 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
66}
67
c4a221bc
DM
68sub read_lrm_status {
69 my ($self, $node) = @_;
70
71 $node = $self->{nodename} if !defined($node);
72
139a9b90 73 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
74}
75
76sub write_lrm_status {
77 my ($self, $status_obj) = @_;
78
6cbcb5f7 79 my $node = $self->{nodename};
63f6a08c 80
139a9b90
DM
81 PVE::HA::Config::write_lrm_status($node, $status_obj);
82}
c4a221bc 83
cde77779 84sub is_node_shutdown {
d42219a3
TL
85 my ($self) = @_;
86
cde77779 87 my $shutdown = 0;
f65f41b9 88 my $reboot = 0;
d42219a3
TL
89
90 my $code = sub {
91 my $line = shift;
92
61ae38eb 93 # ensure we match the full unit name by matching /^JOB_ID UNIT /
f65f41b9
TL
94 # see: man systemd.special
95 $shutdown = 1 if ($line =~ m/^\d+\s+shutdown\.target\s+/);
96 $reboot = 1 if ($line =~ m/^\d+\s+reboot\.target\s+/);
d42219a3
TL
97 };
98
61ae38eb 99 my $cmd = ['/bin/systemctl', '--full', 'list-jobs'];
d42219a3
TL
100 eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
101
f65f41b9 102 return ($shutdown, $reboot);
d42219a3
TL
103}
104
139a9b90
DM
105sub queue_crm_commands {
106 my ($self, $cmd) = @_;
c4a221bc 107
139a9b90
DM
108 return PVE::HA::Config::queue_crm_commands($cmd);
109}
110
111sub read_crm_commands {
112 my ($self) = @_;
113
114 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
115}
116
b83b4ae8
DM
117sub read_service_config {
118 my ($self) = @_;
ce216792 119
85f6e9ca 120 return PVE::HA::Config::read_and_check_resources_config();
714a4016
DM
121}
122
0087839a
FG
123sub parse_sid {
124 my ($self, $sid) = @_;
125
126 return PVE::HA::Config::parse_sid($sid);
127}
128
c982dfee
TL
129sub read_fence_config {
130 my ($self) = @_;
131
132 return PVE::HA::Config::read_fence_config();
133}
134
135sub fencing_mode {
136 my ($self) = @_;
137
138 my $datacenterconfig = cfs_read_file('datacenter.cfg');
139
140 return 'watchdog' if !$datacenterconfig->{fencing};
141
142 return $datacenterconfig->{fencing};
143}
144
145sub exec_fence_agent {
146 my ($self, $agent, $node, @param) = @_;
147
148 # setup execution environment
149 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
150
151 my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
152
153 exec($cmd);
154 exit -1;
155}
156
9da84a0d
TL
157# this is only allowed by the master to recover a _fenced_ service
158sub steal_service {
6da27e23 159 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 160
6ca2edcd 161 my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
6da27e23 162
303a08aa
TL
163 if(my $plugin = PVE::HA::Resources->lookup($type)) {
164 my $old = $plugin->config_file($name, $current_node);
165 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
166 rename($old, $new) ||
167 die "rename '$old' to '$new' failed - $!\n";
168 } else {
169 die "implement me";
170 }
8456bde2
DM
171}
172
abc920b4
DM
173sub read_group_config {
174 my ($self) = @_;
175
139a9b90 176 return PVE::HA::Config::read_group_config();
3b996922
DM
177}
178
714a4016
DM
179# this should return a hash containing info
180# what nodes are members and online.
181sub get_node_info {
182 my ($self) = @_;
183
d706ef8b 184 my ($node_info, $quorate) = ({}, 0);
63f6a08c 185
d706ef8b
DM
186 my $nodename = $self->{nodename};
187
188 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
189
190 my $members = PVE::Cluster::get_members();
191
192 foreach my $node (keys %$members) {
193 my $d = $members->{$node};
63f6a08c 194 $node_info->{$node}->{online} = $d->{online};
d706ef8b 195 }
63f6a08c 196
d706ef8b 197 $node_info->{$nodename}->{online} = 1; # local node is always up
63f6a08c 198
d706ef8b 199 return ($node_info, $quorate);
714a4016
DM
200}
201
202sub log {
203 my ($self, $level, $msg) = @_;
204
205 chomp $msg;
206
207 syslog($level, $msg);
208}
209
1b3969b6
TL
210sub sendmail {
211 my ($self, $subject, $text) = @_;
212
227c2c74
PA
213 # Leave it to postfix to append the correct hostname
214 my $mailfrom = 'root';
215 # /root/.forward makes pvemailforward redirect the
216 # mail to the address configured in the datacenter
217 my $mailto = 'root';
1b3969b6
TL
218
219 PVE::Tools::sendmail($mailto, $subject, $text, undef, $mailfrom);
220}
221
d69a79f3 222my $last_lock_status_hash = {};
007fcc8b
DM
223
224sub get_pve_lock {
225 my ($self, $lockid) = @_;
714a4016 226
007fcc8b 227 my $got_lock = 0;
4d24e7db 228
4d24e7db
DM
229 my $filename = "$lockdir/$lockid";
230
d69a79f3
DM
231 $last_lock_status_hash->{$lockid} //= { lock_time => 0, got_lock => 0};
232 my $last = $last_lock_status_hash->{$lockid};
007fcc8b
DM
233
234 my $ctime = time();
d69a79f3
DM
235 my $last_lock_time = $last->{lock_time} // 0;
236 my $last_got_lock = $last->{got_lock};
4d24e7db 237
5d2406c9 238 my $retry_timeout = 120; # hardcoded lock lifetime limit from pmxcfs
63f6a08c 239
4d24e7db
DM
240 eval {
241
242 mkdir $lockdir;
243
007fcc8b
DM
244 # pve cluster filesystem not online
245 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
246
cb0bac5e 247 if (($ctime - $last_lock_time) < $retry_timeout) {
737abf2f
DM
248 # try cfs lock update request (utime)
249 if (utime(0, $ctime, $filename)) {
250 $got_lock = 1;
251 return;
252 }
d69a79f3
DM
253 die "cfs lock update failed - $!\n";
254 }
007fcc8b 255
d69a79f3
DM
256 if (mkdir $filename) {
257 $got_lock = 1;
258 return;
007fcc8b 259 }
4d24e7db 260
d69a79f3
DM
261 utime 0, 0, $filename; # cfs unlock request
262 die "can't get cfs lock\n";
4d24e7db
DM
263 };
264
007fcc8b
DM
265 my $err = $@;
266
d69a79f3 267 #$self->log('err', $err) if $err; # for debugging
63f6a08c 268
d69a79f3
DM
269 $last->{got_lock} = $got_lock;
270 $last->{lock_time} = $ctime if $got_lock;
007fcc8b 271
d69a79f3 272 if (!!$got_lock != !!$last_got_lock) {
007fcc8b 273 if ($got_lock) {
63f6a08c 274 $self->log('info', "successfully acquired lock '$lockid'");
007fcc8b
DM
275 } else {
276 my $msg = "lost lock '$lockid";
63f6a08c 277 $msg .= " - $err" if $err;
007fcc8b
DM
278 $self->log('err', $msg);
279 }
280 }
281
282 return $got_lock;
283}
284
285sub get_ha_manager_lock {
286 my ($self) = @_;
287
007fcc8b 288 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
289}
290
de002253
TL
291# release the cluster wide manager lock.
292# when released another CRM may step up and get the lock, thus this should only
293# get called when shutting down/deactivating the current master
294sub release_ha_manager_lock {
295 my ($self) = @_;
296
297 return rmdir("$lockdir/ha_manager_lock");
298}
299
714a4016 300sub get_ha_agent_lock {
714a4016 301 my ($self, $node) = @_;
63f6a08c 302
f5c29173 303 $node = $self->nodename() if !defined($node);
714a4016 304
f5c29173 305 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
306}
307
ff165cd8
TL
308# release the respective node agent lock.
309# this should only get called if the nodes LRM gracefully shuts down with
310# all services already cleanly stopped!
311sub release_ha_agent_lock {
312 my ($self) = @_;
313
314 my $node = $self->nodename();
315
316 return rmdir("$lockdir/ha_agent_${node}_lock");
317}
318
714a4016
DM
319sub quorate {
320 my ($self) = @_;
321
4d24e7db 322 my $quorate = 0;
63f6a08c
TL
323 eval {
324 $quorate = PVE::Cluster::check_cfs_quorum();
4d24e7db 325 };
63f6a08c 326
4d24e7db 327 return $quorate;
714a4016
DM
328}
329
330sub get_time {
331 my ($self) = @_;
332
333 return time();
334}
335
336sub sleep {
337 my ($self, $delay) = @_;
338
339 CORE::sleep($delay);
340}
341
342sub sleep_until {
343 my ($self, $end_time) = @_;
344
345 for (;;) {
346 my $cur_time = time();
347
348 last if $cur_time >= $end_time;
349
350 $self->sleep(1);
351 }
352}
353
354sub loop_start_hook {
355 my ($self) = @_;
356
357 $self->{loop_start} = $self->get_time();
3df15380 358
714a4016
DM
359}
360
361sub loop_end_hook {
362 my ($self) = @_;
363
364 my $delay = $self->get_time() - $self->{loop_start};
63f6a08c 365
714a4016
DM
366 warn "loop take too long ($delay seconds)\n" if $delay > 30;
367}
368
3df15380
TL
369sub cluster_state_update {
370 my ($self) = @_;
371
372 eval { PVE::Cluster::cfs_update(1) };
373 if (my $err = $@) {
374 $self->log('warn', "cluster file system update failed - $err");
375 return 0;
376 }
377
378 return 1;
379}
380
76737af5
DM
381my $watchdog_fh;
382
714a4016
DM
383sub watchdog_open {
384 my ($self) = @_;
385
76737af5
DM
386 die "watchdog already open\n" if defined($watchdog_fh);
387
115805fd
DM
388 $watchdog_fh = IO::Socket::UNIX->new(
389 Type => SOCK_STREAM(),
390 Peer => "/run/watchdog-mux.sock") ||
391 die "unable to open watchdog socket - $!\n";
63f6a08c 392
76737af5 393 $self->log('info', "watchdog active");
714a4016
DM
394}
395
396sub watchdog_update {
397 my ($self, $wfh) = @_;
398
76737af5
DM
399 my $res = $watchdog_fh->syswrite("\0", 1);
400 if (!defined($res)) {
401 $self->log('err', "watchdog update failed - $!\n");
402 return 0;
403 }
404 if ($res != 1) {
405 $self->log('err', "watchdog update failed - write $res bytes\n");
406 return 0;
407 }
408
409 return 1;
714a4016
DM
410}
411
412sub watchdog_close {
413 my ($self, $wfh) = @_;
414
76737af5
DM
415 $watchdog_fh->syswrite("V", 1); # magic watchdog close
416 if (!$watchdog_fh->close()) {
417 $self->log('err', "watchdog close failed - $!");
418 } else {
419 $watchdog_fh = undef;
420 $self->log('info', "watchdog closed (disabled)");
421 }
714a4016
DM
422}
423
a2aae08a
TL
424sub after_fork {
425 my ($self) = @_;
426
427 # close inherited inotify FD from parent and reopen our own
428 PVE::INotify::inotify_close();
429 PVE::INotify::inotify_init();
430
431 PVE::Cluster::cfs_update();
432}
433
a28fa330
TL
434sub get_max_workers {
435 my ($self) = @_;
436
437 my $datacenterconfig = cfs_read_file('datacenter.cfg');
438
439 return $datacenterconfig->{max_workers} || 4;
440}
441
ed408b44
TL
442# return cluster wide enforced HA settings
443sub get_ha_settings {
444 my ($self) = @_;
445
446 my $datacenterconfig = cfs_read_file('datacenter.cfg');
447
448 return $datacenterconfig->{ha};
449}
450
714a4016 4511;