]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
env: switch to matcher-based notification system
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
5db695c3 8use JSON;
714a4016
DM
9
10use PVE::SafeSyslog;
11use PVE::Tools;
119656b9 12use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
ef39a1ca 13use PVE::DataCenterConfig;
022e4e79
DM
14use PVE::INotify;
15use PVE::RPCEnvironment;
4cb3b2cf 16use PVE::Notify;
714a4016 17
a89ff919 18use PVE::HA::Tools ':exit_codes';
714a4016 19use PVE::HA::Env;
ce216792 20use PVE::HA::Config;
c982dfee 21use PVE::HA::FenceConfig;
9e5ea8f7
DM
22use PVE::HA::Resources;
23use PVE::HA::Resources::PVEVM;
24use PVE::HA::Resources::PVECT;
714a4016 25
9e5ea8f7
DM
26PVE::HA::Resources::PVEVM->register();
27PVE::HA::Resources::PVECT->register();
28
29PVE::HA::Resources->init();
022e4e79 30
007fcc8b
DM
31my $lockdir = "/etc/pve/priv/lock";
32
714a4016
DM
33sub new {
34 my ($this, $nodename) = @_;
35
36 die "missing nodename" if !$nodename;
37
38 my $class = ref($this) || $this;
39
40 my $self = bless {}, $class;
41
42 $self->{nodename} = $nodename;
43
44 return $self;
45}
46
47sub nodename {
48 my ($self) = @_;
49
50 return $self->{nodename};
51}
52
dd9c0c9d
TL
53sub hardware {
54 my ($self) = @_;
55
56 die "hardware is for testing and simulation only";
57}
58
714a4016
DM
59sub read_manager_status {
60 my ($self) = @_;
714a4016 61
139a9b90 62 return PVE::HA::Config::read_manager_status();
714a4016
DM
63}
64
65sub write_manager_status {
66 my ($self, $status_obj) = @_;
63f6a08c 67
139a9b90 68 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
69}
70
c4a221bc
DM
71sub read_lrm_status {
72 my ($self, $node) = @_;
73
74 $node = $self->{nodename} if !defined($node);
75
139a9b90 76 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
77}
78
79sub write_lrm_status {
80 my ($self, $status_obj) = @_;
81
6cbcb5f7 82 my $node = $self->{nodename};
63f6a08c 83
139a9b90
DM
84 PVE::HA::Config::write_lrm_status($node, $status_obj);
85}
c4a221bc 86
cde77779 87sub is_node_shutdown {
d42219a3
TL
88 my ($self) = @_;
89
cde77779 90 my $shutdown = 0;
f65f41b9 91 my $reboot = 0;
d42219a3
TL
92
93 my $code = sub {
94 my $line = shift;
95
61ae38eb 96 # ensure we match the full unit name by matching /^JOB_ID UNIT /
f65f41b9
TL
97 # see: man systemd.special
98 $shutdown = 1 if ($line =~ m/^\d+\s+shutdown\.target\s+/);
99 $reboot = 1 if ($line =~ m/^\d+\s+reboot\.target\s+/);
d42219a3
TL
100 };
101
61ae38eb 102 my $cmd = ['/bin/systemctl', '--full', 'list-jobs'];
d42219a3
TL
103 eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
104
f65f41b9 105 return ($shutdown, $reboot);
d42219a3
TL
106}
107
139a9b90
DM
108sub queue_crm_commands {
109 my ($self, $cmd) = @_;
c4a221bc 110
139a9b90
DM
111 return PVE::HA::Config::queue_crm_commands($cmd);
112}
113
114sub read_crm_commands {
115 my ($self) = @_;
116
117 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
118}
119
b83b4ae8
DM
120sub read_service_config {
121 my ($self) = @_;
ce216792 122
85f6e9ca 123 return PVE::HA::Config::read_and_check_resources_config();
714a4016
DM
124}
125
76b83c72
FE
126sub update_service_config {
127 my ($self, $sid, $param) = @_;
128
129 return PVE::HA::Config::update_resources_config($sid, $param);
130}
131
0087839a
FG
132sub parse_sid {
133 my ($self, $sid) = @_;
134
135 return PVE::HA::Config::parse_sid($sid);
136}
137
c982dfee
TL
138sub read_fence_config {
139 my ($self) = @_;
140
141 return PVE::HA::Config::read_fence_config();
142}
143
144sub fencing_mode {
145 my ($self) = @_;
146
147 my $datacenterconfig = cfs_read_file('datacenter.cfg');
148
149 return 'watchdog' if !$datacenterconfig->{fencing};
150
151 return $datacenterconfig->{fencing};
152}
153
154sub exec_fence_agent {
155 my ($self, $agent, $node, @param) = @_;
156
157 # setup execution environment
158 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
159
160 my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
161
162 exec($cmd);
163 exit -1;
164}
165
9da84a0d
TL
166# this is only allowed by the master to recover a _fenced_ service
167sub steal_service {
6da27e23 168 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 169
0354cbe9 170 my (undef, $type, $name) = PVE::HA::Config::parse_sid($sid);
6da27e23 171
303a08aa
TL
172 if(my $plugin = PVE::HA::Resources->lookup($type)) {
173 my $old = $plugin->config_file($name, $current_node);
174 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
175 rename($old, $new) ||
176 die "rename '$old' to '$new' failed - $!\n";
177 } else {
178 die "implement me";
179 }
48f2144b
FE
180
181 # Necessary for (at least) static usage plugin to always be able to read service config from new
182 # node right away.
183 $self->cluster_state_update();
8456bde2
DM
184}
185
abc920b4
DM
186sub read_group_config {
187 my ($self) = @_;
188
139a9b90 189 return PVE::HA::Config::read_group_config();
3b996922
DM
190}
191
714a4016
DM
192# this should return a hash containing info
193# what nodes are members and online.
194sub get_node_info {
195 my ($self) = @_;
196
d706ef8b 197 my ($node_info, $quorate) = ({}, 0);
63f6a08c 198
d706ef8b
DM
199 my $nodename = $self->{nodename};
200
201 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
202
203 my $members = PVE::Cluster::get_members();
204
205 foreach my $node (keys %$members) {
206 my $d = $members->{$node};
63f6a08c 207 $node_info->{$node}->{online} = $d->{online};
d706ef8b 208 }
63f6a08c 209
d706ef8b 210 $node_info->{$nodename}->{online} = 1; # local node is always up
63f6a08c 211
d706ef8b 212 return ($node_info, $quorate);
714a4016
DM
213}
214
215sub log {
216 my ($self, $level, $msg) = @_;
217
218 chomp $msg;
219
220 syslog($level, $msg);
221}
222
4cb3b2cf 223sub send_notification {
868d3cd4 224 my ($self, $subject, $text, $template_data, $metadata_fields) = @_;
1b3969b6 225
4cb3b2cf 226 eval {
868d3cd4 227 PVE::Notify::error($subject, $text, $template_data, $metadata_fields);
4cb3b2cf 228 };
1b3969b6 229
4cb3b2cf 230 $self->log("warning", "could not notify: $@") if $@;
1b3969b6
TL
231}
232
d69a79f3 233my $last_lock_status_hash = {};
007fcc8b
DM
234
235sub get_pve_lock {
236 my ($self, $lockid) = @_;
714a4016 237
007fcc8b 238 my $got_lock = 0;
4d24e7db 239
4d24e7db
DM
240 my $filename = "$lockdir/$lockid";
241
d69a79f3
DM
242 $last_lock_status_hash->{$lockid} //= { lock_time => 0, got_lock => 0};
243 my $last = $last_lock_status_hash->{$lockid};
007fcc8b
DM
244
245 my $ctime = time();
d69a79f3
DM
246 my $last_lock_time = $last->{lock_time} // 0;
247 my $last_got_lock = $last->{got_lock};
4d24e7db 248
5d2406c9 249 my $retry_timeout = 120; # hardcoded lock lifetime limit from pmxcfs
63f6a08c 250
4d24e7db
DM
251 eval {
252
253 mkdir $lockdir;
254
007fcc8b
DM
255 # pve cluster filesystem not online
256 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
257
cb0bac5e 258 if (($ctime - $last_lock_time) < $retry_timeout) {
737abf2f
DM
259 # try cfs lock update request (utime)
260 if (utime(0, $ctime, $filename)) {
261 $got_lock = 1;
262 return;
263 }
d69a79f3
DM
264 die "cfs lock update failed - $!\n";
265 }
007fcc8b 266
d69a79f3
DM
267 if (mkdir $filename) {
268 $got_lock = 1;
269 return;
007fcc8b 270 }
4d24e7db 271
d69a79f3
DM
272 utime 0, 0, $filename; # cfs unlock request
273 die "can't get cfs lock\n";
4d24e7db
DM
274 };
275
007fcc8b
DM
276 my $err = $@;
277
d69a79f3 278 #$self->log('err', $err) if $err; # for debugging
63f6a08c 279
d69a79f3
DM
280 $last->{got_lock} = $got_lock;
281 $last->{lock_time} = $ctime if $got_lock;
007fcc8b 282
d69a79f3 283 if (!!$got_lock != !!$last_got_lock) {
007fcc8b 284 if ($got_lock) {
63f6a08c 285 $self->log('info', "successfully acquired lock '$lockid'");
007fcc8b
DM
286 } else {
287 my $msg = "lost lock '$lockid";
63f6a08c 288 $msg .= " - $err" if $err;
007fcc8b
DM
289 $self->log('err', $msg);
290 }
291 }
292
293 return $got_lock;
294}
295
296sub get_ha_manager_lock {
297 my ($self) = @_;
298
007fcc8b 299 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
300}
301
de002253
TL
302# release the cluster wide manager lock.
303# when released another CRM may step up and get the lock, thus this should only
304# get called when shutting down/deactivating the current master
305sub release_ha_manager_lock {
306 my ($self) = @_;
307
308 return rmdir("$lockdir/ha_manager_lock");
309}
310
714a4016 311sub get_ha_agent_lock {
714a4016 312 my ($self, $node) = @_;
63f6a08c 313
f5c29173 314 $node = $self->nodename() if !defined($node);
714a4016 315
f5c29173 316 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
317}
318
ff165cd8
TL
319# release the respective node agent lock.
320# this should only get called if the nodes LRM gracefully shuts down with
321# all services already cleanly stopped!
322sub release_ha_agent_lock {
323 my ($self) = @_;
324
325 my $node = $self->nodename();
326
327 return rmdir("$lockdir/ha_agent_${node}_lock");
328}
329
714a4016
DM
330sub quorate {
331 my ($self) = @_;
332
4d24e7db 333 my $quorate = 0;
63f6a08c
TL
334 eval {
335 $quorate = PVE::Cluster::check_cfs_quorum();
4d24e7db 336 };
63f6a08c 337
4d24e7db 338 return $quorate;
714a4016
DM
339}
340
341sub get_time {
342 my ($self) = @_;
343
344 return time();
345}
346
347sub sleep {
348 my ($self, $delay) = @_;
349
350 CORE::sleep($delay);
351}
352
353sub sleep_until {
354 my ($self, $end_time) = @_;
355
356 for (;;) {
357 my $cur_time = time();
358
359 last if $cur_time >= $end_time;
360
361 $self->sleep(1);
362 }
363}
364
365sub loop_start_hook {
366 my ($self) = @_;
367
368 $self->{loop_start} = $self->get_time();
3df15380 369
714a4016
DM
370}
371
372sub loop_end_hook {
373 my ($self) = @_;
374
375 my $delay = $self->get_time() - $self->{loop_start};
63f6a08c 376
714a4016
DM
377 warn "loop take too long ($delay seconds)\n" if $delay > 30;
378}
379
3df15380
TL
380sub cluster_state_update {
381 my ($self) = @_;
382
383 eval { PVE::Cluster::cfs_update(1) };
384 if (my $err = $@) {
385 $self->log('warn', "cluster file system update failed - $err");
386 return 0;
387 }
388
389 return 1;
390}
391
76737af5
DM
392my $watchdog_fh;
393
714a4016
DM
394sub watchdog_open {
395 my ($self) = @_;
396
76737af5
DM
397 die "watchdog already open\n" if defined($watchdog_fh);
398
115805fd
DM
399 $watchdog_fh = IO::Socket::UNIX->new(
400 Type => SOCK_STREAM(),
401 Peer => "/run/watchdog-mux.sock") ||
402 die "unable to open watchdog socket - $!\n";
63f6a08c 403
76737af5 404 $self->log('info', "watchdog active");
714a4016
DM
405}
406
407sub watchdog_update {
408 my ($self, $wfh) = @_;
409
76737af5
DM
410 my $res = $watchdog_fh->syswrite("\0", 1);
411 if (!defined($res)) {
412 $self->log('err', "watchdog update failed - $!\n");
413 return 0;
414 }
415 if ($res != 1) {
416 $self->log('err', "watchdog update failed - write $res bytes\n");
417 return 0;
418 }
419
420 return 1;
714a4016
DM
421}
422
423sub watchdog_close {
424 my ($self, $wfh) = @_;
425
76737af5
DM
426 $watchdog_fh->syswrite("V", 1); # magic watchdog close
427 if (!$watchdog_fh->close()) {
428 $self->log('err', "watchdog close failed - $!");
429 } else {
430 $watchdog_fh = undef;
431 $self->log('info', "watchdog closed (disabled)");
432 }
714a4016
DM
433}
434
a2aae08a
TL
435sub after_fork {
436 my ($self) = @_;
437
438 # close inherited inotify FD from parent and reopen our own
439 PVE::INotify::inotify_close();
440 PVE::INotify::inotify_init();
441
442 PVE::Cluster::cfs_update();
443}
444
a28fa330
TL
445sub get_max_workers {
446 my ($self) = @_;
447
448 my $datacenterconfig = cfs_read_file('datacenter.cfg');
449
450 return $datacenterconfig->{max_workers} || 4;
451}
452
ed408b44 453# return cluster wide enforced HA settings
749d8161 454sub get_datacenter_settings {
ed408b44
TL
455 my ($self) = @_;
456
f3e2a4f2 457 my $datacenterconfig = eval { cfs_read_file('datacenter.cfg') };
7c142d68 458 $self->log('err', "unable to get HA settings from datacenter.cfg - $@") if $@;
ed408b44 459
7c142d68
FE
460 return {
461 ha => $datacenterconfig->{ha} // {},
462 crs => $datacenterconfig->{crs} // {},
463 };
ed408b44
TL
464}
465
5db695c3
FE
466sub get_static_node_stats {
467 my ($self) = @_;
468
469 my $stats = PVE::Cluster::get_node_kv('static-info');
470 for my $node (keys $stats->%*) {
471 $stats->{$node} = eval { decode_json($stats->{$node}) };
472 $self->log('err', "unable to decode static node info for '$node' - $@") if $@;
473 }
474
475 return $stats;
476}
477
714a4016 4781;