]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
env: rename get_ha_settings to get_datacenter_settings
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
5db695c3 8use JSON;
714a4016
DM
9
10use PVE::SafeSyslog;
11use PVE::Tools;
119656b9 12use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
ef39a1ca 13use PVE::DataCenterConfig;
022e4e79
DM
14use PVE::INotify;
15use PVE::RPCEnvironment;
714a4016 16
a89ff919 17use PVE::HA::Tools ':exit_codes';
714a4016 18use PVE::HA::Env;
ce216792 19use PVE::HA::Config;
c982dfee 20use PVE::HA::FenceConfig;
9e5ea8f7
DM
21use PVE::HA::Resources;
22use PVE::HA::Resources::PVEVM;
23use PVE::HA::Resources::PVECT;
714a4016 24
9e5ea8f7
DM
25PVE::HA::Resources::PVEVM->register();
26PVE::HA::Resources::PVECT->register();
27
28PVE::HA::Resources->init();
022e4e79 29
007fcc8b
DM
30my $lockdir = "/etc/pve/priv/lock";
31
714a4016
DM
32sub new {
33 my ($this, $nodename) = @_;
34
35 die "missing nodename" if !$nodename;
36
37 my $class = ref($this) || $this;
38
39 my $self = bless {}, $class;
40
41 $self->{nodename} = $nodename;
42
43 return $self;
44}
45
46sub nodename {
47 my ($self) = @_;
48
49 return $self->{nodename};
50}
51
dd9c0c9d
TL
52sub hardware {
53 my ($self) = @_;
54
55 die "hardware is for testing and simulation only";
56}
57
714a4016
DM
58sub read_manager_status {
59 my ($self) = @_;
714a4016 60
139a9b90 61 return PVE::HA::Config::read_manager_status();
714a4016
DM
62}
63
64sub write_manager_status {
65 my ($self, $status_obj) = @_;
63f6a08c 66
139a9b90 67 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
68}
69
c4a221bc
DM
70sub read_lrm_status {
71 my ($self, $node) = @_;
72
73 $node = $self->{nodename} if !defined($node);
74
139a9b90 75 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
76}
77
78sub write_lrm_status {
79 my ($self, $status_obj) = @_;
80
6cbcb5f7 81 my $node = $self->{nodename};
63f6a08c 82
139a9b90
DM
83 PVE::HA::Config::write_lrm_status($node, $status_obj);
84}
c4a221bc 85
cde77779 86sub is_node_shutdown {
d42219a3
TL
87 my ($self) = @_;
88
cde77779 89 my $shutdown = 0;
f65f41b9 90 my $reboot = 0;
d42219a3
TL
91
92 my $code = sub {
93 my $line = shift;
94
61ae38eb 95 # ensure we match the full unit name by matching /^JOB_ID UNIT /
f65f41b9
TL
96 # see: man systemd.special
97 $shutdown = 1 if ($line =~ m/^\d+\s+shutdown\.target\s+/);
98 $reboot = 1 if ($line =~ m/^\d+\s+reboot\.target\s+/);
d42219a3
TL
99 };
100
61ae38eb 101 my $cmd = ['/bin/systemctl', '--full', 'list-jobs'];
d42219a3
TL
102 eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
103
f65f41b9 104 return ($shutdown, $reboot);
d42219a3
TL
105}
106
139a9b90
DM
107sub queue_crm_commands {
108 my ($self, $cmd) = @_;
c4a221bc 109
139a9b90
DM
110 return PVE::HA::Config::queue_crm_commands($cmd);
111}
112
113sub read_crm_commands {
114 my ($self) = @_;
115
116 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
117}
118
b83b4ae8
DM
119sub read_service_config {
120 my ($self) = @_;
ce216792 121
85f6e9ca 122 return PVE::HA::Config::read_and_check_resources_config();
714a4016
DM
123}
124
76b83c72
FE
125sub update_service_config {
126 my ($self, $sid, $param) = @_;
127
128 return PVE::HA::Config::update_resources_config($sid, $param);
129}
130
0087839a
FG
131sub parse_sid {
132 my ($self, $sid) = @_;
133
134 return PVE::HA::Config::parse_sid($sid);
135}
136
c982dfee
TL
137sub read_fence_config {
138 my ($self) = @_;
139
140 return PVE::HA::Config::read_fence_config();
141}
142
143sub fencing_mode {
144 my ($self) = @_;
145
146 my $datacenterconfig = cfs_read_file('datacenter.cfg');
147
148 return 'watchdog' if !$datacenterconfig->{fencing};
149
150 return $datacenterconfig->{fencing};
151}
152
153sub exec_fence_agent {
154 my ($self, $agent, $node, @param) = @_;
155
156 # setup execution environment
157 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
158
159 my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
160
161 exec($cmd);
162 exit -1;
163}
164
9da84a0d
TL
165# this is only allowed by the master to recover a _fenced_ service
166sub steal_service {
6da27e23 167 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 168
0354cbe9 169 my (undef, $type, $name) = PVE::HA::Config::parse_sid($sid);
6da27e23 170
303a08aa
TL
171 if(my $plugin = PVE::HA::Resources->lookup($type)) {
172 my $old = $plugin->config_file($name, $current_node);
173 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
174 rename($old, $new) ||
175 die "rename '$old' to '$new' failed - $!\n";
176 } else {
177 die "implement me";
178 }
48f2144b
FE
179
180 # Necessary for (at least) static usage plugin to always be able to read service config from new
181 # node right away.
182 $self->cluster_state_update();
8456bde2
DM
183}
184
abc920b4
DM
185sub read_group_config {
186 my ($self) = @_;
187
139a9b90 188 return PVE::HA::Config::read_group_config();
3b996922
DM
189}
190
714a4016
DM
191# this should return a hash containing info
192# what nodes are members and online.
193sub get_node_info {
194 my ($self) = @_;
195
d706ef8b 196 my ($node_info, $quorate) = ({}, 0);
63f6a08c 197
d706ef8b
DM
198 my $nodename = $self->{nodename};
199
200 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
201
202 my $members = PVE::Cluster::get_members();
203
204 foreach my $node (keys %$members) {
205 my $d = $members->{$node};
63f6a08c 206 $node_info->{$node}->{online} = $d->{online};
d706ef8b 207 }
63f6a08c 208
d706ef8b 209 $node_info->{$nodename}->{online} = 1; # local node is always up
63f6a08c 210
d706ef8b 211 return ($node_info, $quorate);
714a4016
DM
212}
213
214sub log {
215 my ($self, $level, $msg) = @_;
216
217 chomp $msg;
218
219 syslog($level, $msg);
220}
221
1b3969b6
TL
222sub sendmail {
223 my ($self, $subject, $text) = @_;
224
227c2c74
PA
225 # Leave it to postfix to append the correct hostname
226 my $mailfrom = 'root';
227 # /root/.forward makes pvemailforward redirect the
228 # mail to the address configured in the datacenter
229 my $mailto = 'root';
1b3969b6
TL
230
231 PVE::Tools::sendmail($mailto, $subject, $text, undef, $mailfrom);
232}
233
d69a79f3 234my $last_lock_status_hash = {};
007fcc8b
DM
235
236sub get_pve_lock {
237 my ($self, $lockid) = @_;
714a4016 238
007fcc8b 239 my $got_lock = 0;
4d24e7db 240
4d24e7db
DM
241 my $filename = "$lockdir/$lockid";
242
d69a79f3
DM
243 $last_lock_status_hash->{$lockid} //= { lock_time => 0, got_lock => 0};
244 my $last = $last_lock_status_hash->{$lockid};
007fcc8b
DM
245
246 my $ctime = time();
d69a79f3
DM
247 my $last_lock_time = $last->{lock_time} // 0;
248 my $last_got_lock = $last->{got_lock};
4d24e7db 249
5d2406c9 250 my $retry_timeout = 120; # hardcoded lock lifetime limit from pmxcfs
63f6a08c 251
4d24e7db
DM
252 eval {
253
254 mkdir $lockdir;
255
007fcc8b
DM
256 # pve cluster filesystem not online
257 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
258
cb0bac5e 259 if (($ctime - $last_lock_time) < $retry_timeout) {
737abf2f
DM
260 # try cfs lock update request (utime)
261 if (utime(0, $ctime, $filename)) {
262 $got_lock = 1;
263 return;
264 }
d69a79f3
DM
265 die "cfs lock update failed - $!\n";
266 }
007fcc8b 267
d69a79f3
DM
268 if (mkdir $filename) {
269 $got_lock = 1;
270 return;
007fcc8b 271 }
4d24e7db 272
d69a79f3
DM
273 utime 0, 0, $filename; # cfs unlock request
274 die "can't get cfs lock\n";
4d24e7db
DM
275 };
276
007fcc8b
DM
277 my $err = $@;
278
d69a79f3 279 #$self->log('err', $err) if $err; # for debugging
63f6a08c 280
d69a79f3
DM
281 $last->{got_lock} = $got_lock;
282 $last->{lock_time} = $ctime if $got_lock;
007fcc8b 283
d69a79f3 284 if (!!$got_lock != !!$last_got_lock) {
007fcc8b 285 if ($got_lock) {
63f6a08c 286 $self->log('info', "successfully acquired lock '$lockid'");
007fcc8b
DM
287 } else {
288 my $msg = "lost lock '$lockid";
63f6a08c 289 $msg .= " - $err" if $err;
007fcc8b
DM
290 $self->log('err', $msg);
291 }
292 }
293
294 return $got_lock;
295}
296
297sub get_ha_manager_lock {
298 my ($self) = @_;
299
007fcc8b 300 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
301}
302
de002253
TL
303# release the cluster wide manager lock.
304# when released another CRM may step up and get the lock, thus this should only
305# get called when shutting down/deactivating the current master
306sub release_ha_manager_lock {
307 my ($self) = @_;
308
309 return rmdir("$lockdir/ha_manager_lock");
310}
311
714a4016 312sub get_ha_agent_lock {
714a4016 313 my ($self, $node) = @_;
63f6a08c 314
f5c29173 315 $node = $self->nodename() if !defined($node);
714a4016 316
f5c29173 317 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
318}
319
ff165cd8
TL
320# release the respective node agent lock.
321# this should only get called if the nodes LRM gracefully shuts down with
322# all services already cleanly stopped!
323sub release_ha_agent_lock {
324 my ($self) = @_;
325
326 my $node = $self->nodename();
327
328 return rmdir("$lockdir/ha_agent_${node}_lock");
329}
330
714a4016
DM
331sub quorate {
332 my ($self) = @_;
333
4d24e7db 334 my $quorate = 0;
63f6a08c
TL
335 eval {
336 $quorate = PVE::Cluster::check_cfs_quorum();
4d24e7db 337 };
63f6a08c 338
4d24e7db 339 return $quorate;
714a4016
DM
340}
341
342sub get_time {
343 my ($self) = @_;
344
345 return time();
346}
347
348sub sleep {
349 my ($self, $delay) = @_;
350
351 CORE::sleep($delay);
352}
353
354sub sleep_until {
355 my ($self, $end_time) = @_;
356
357 for (;;) {
358 my $cur_time = time();
359
360 last if $cur_time >= $end_time;
361
362 $self->sleep(1);
363 }
364}
365
366sub loop_start_hook {
367 my ($self) = @_;
368
369 $self->{loop_start} = $self->get_time();
3df15380 370
714a4016
DM
371}
372
373sub loop_end_hook {
374 my ($self) = @_;
375
376 my $delay = $self->get_time() - $self->{loop_start};
63f6a08c 377
714a4016
DM
378 warn "loop take too long ($delay seconds)\n" if $delay > 30;
379}
380
3df15380
TL
381sub cluster_state_update {
382 my ($self) = @_;
383
384 eval { PVE::Cluster::cfs_update(1) };
385 if (my $err = $@) {
386 $self->log('warn', "cluster file system update failed - $err");
387 return 0;
388 }
389
390 return 1;
391}
392
76737af5
DM
393my $watchdog_fh;
394
714a4016
DM
395sub watchdog_open {
396 my ($self) = @_;
397
76737af5
DM
398 die "watchdog already open\n" if defined($watchdog_fh);
399
115805fd
DM
400 $watchdog_fh = IO::Socket::UNIX->new(
401 Type => SOCK_STREAM(),
402 Peer => "/run/watchdog-mux.sock") ||
403 die "unable to open watchdog socket - $!\n";
63f6a08c 404
76737af5 405 $self->log('info', "watchdog active");
714a4016
DM
406}
407
408sub watchdog_update {
409 my ($self, $wfh) = @_;
410
76737af5
DM
411 my $res = $watchdog_fh->syswrite("\0", 1);
412 if (!defined($res)) {
413 $self->log('err', "watchdog update failed - $!\n");
414 return 0;
415 }
416 if ($res != 1) {
417 $self->log('err', "watchdog update failed - write $res bytes\n");
418 return 0;
419 }
420
421 return 1;
714a4016
DM
422}
423
424sub watchdog_close {
425 my ($self, $wfh) = @_;
426
76737af5
DM
427 $watchdog_fh->syswrite("V", 1); # magic watchdog close
428 if (!$watchdog_fh->close()) {
429 $self->log('err', "watchdog close failed - $!");
430 } else {
431 $watchdog_fh = undef;
432 $self->log('info', "watchdog closed (disabled)");
433 }
714a4016
DM
434}
435
a2aae08a
TL
436sub after_fork {
437 my ($self) = @_;
438
439 # close inherited inotify FD from parent and reopen our own
440 PVE::INotify::inotify_close();
441 PVE::INotify::inotify_init();
442
443 PVE::Cluster::cfs_update();
444}
445
a28fa330
TL
446sub get_max_workers {
447 my ($self) = @_;
448
449 my $datacenterconfig = cfs_read_file('datacenter.cfg');
450
451 return $datacenterconfig->{max_workers} || 4;
452}
453
ed408b44 454# return cluster wide enforced HA settings
749d8161 455sub get_datacenter_settings {
ed408b44
TL
456 my ($self) = @_;
457
f3e2a4f2
TL
458 my $datacenterconfig = eval { cfs_read_file('datacenter.cfg') };
459 if (my $err = $@) {
460 $self->log('err', "unable to get HA settings from datacenter.cfg - $err");
461 return {};
462 }
ed408b44
TL
463
464 return $datacenterconfig->{ha};
465}
466
5db695c3
FE
467sub get_static_node_stats {
468 my ($self) = @_;
469
470 my $stats = PVE::Cluster::get_node_kv('static-info');
471 for my $node (keys $stats->%*) {
472 $stats->{$node} = eval { decode_json($stats->{$node}) };
473 $self->log('err', "unable to decode static node info for '$node' - $@") if $@;
474 }
475
476 return $stats;
477}
478
714a4016 4791;