]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
add regression test for 'stopped' state
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
119656b9 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016 14
a89ff919 15use PVE::HA::Tools ':exit_codes';
714a4016 16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
c982dfee 18use PVE::HA::FenceConfig;
9e5ea8f7
DM
19use PVE::HA::Resources;
20use PVE::HA::Resources::PVEVM;
21use PVE::HA::Resources::PVECT;
714a4016 22
9e5ea8f7
DM
23PVE::HA::Resources::PVEVM->register();
24PVE::HA::Resources::PVECT->register();
25
26PVE::HA::Resources->init();
022e4e79 27
007fcc8b
DM
28my $lockdir = "/etc/pve/priv/lock";
29
714a4016
DM
30sub new {
31 my ($this, $nodename) = @_;
32
33 die "missing nodename" if !$nodename;
34
35 my $class = ref($this) || $this;
36
37 my $self = bless {}, $class;
38
39 $self->{nodename} = $nodename;
40
41 return $self;
42}
43
44sub nodename {
45 my ($self) = @_;
46
47 return $self->{nodename};
48}
49
dd9c0c9d
TL
50sub hardware {
51 my ($self) = @_;
52
53 die "hardware is for testing and simulation only";
54}
55
714a4016
DM
56sub read_manager_status {
57 my ($self) = @_;
714a4016 58
139a9b90 59 return PVE::HA::Config::read_manager_status();
714a4016
DM
60}
61
62sub write_manager_status {
63 my ($self, $status_obj) = @_;
63f6a08c 64
139a9b90 65 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
66}
67
c4a221bc
DM
68sub read_lrm_status {
69 my ($self, $node) = @_;
70
71 $node = $self->{nodename} if !defined($node);
72
139a9b90 73 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
74}
75
76sub write_lrm_status {
77 my ($self, $status_obj) = @_;
78
6cbcb5f7 79 my $node = $self->{nodename};
63f6a08c 80
139a9b90
DM
81 PVE::HA::Config::write_lrm_status($node, $status_obj);
82}
c4a221bc 83
cde77779 84sub is_node_shutdown {
d42219a3
TL
85 my ($self) = @_;
86
cde77779 87 my $shutdown = 0;
d42219a3
TL
88
89 my $code = sub {
90 my $line = shift;
91
cde77779 92 $shutdown = 1 if ($line =~ m/shutdown\.target/);
d42219a3
TL
93 };
94
95 my $cmd = ['/bin/systemctl', 'list-jobs'];
96 eval { PVE::Tools::run_command($cmd, outfunc => $code, noerr => 1); };
97
cde77779 98 return $shutdown;
d42219a3
TL
99}
100
139a9b90
DM
101sub queue_crm_commands {
102 my ($self, $cmd) = @_;
c4a221bc 103
139a9b90
DM
104 return PVE::HA::Config::queue_crm_commands($cmd);
105}
106
107sub read_crm_commands {
108 my ($self) = @_;
109
110 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
111}
112
b83b4ae8
DM
113sub read_service_config {
114 my ($self) = @_;
ce216792 115
85f6e9ca 116 return PVE::HA::Config::read_and_check_resources_config();
714a4016
DM
117}
118
c982dfee
TL
119sub read_fence_config {
120 my ($self) = @_;
121
122 return PVE::HA::Config::read_fence_config();
123}
124
125sub fencing_mode {
126 my ($self) = @_;
127
128 my $datacenterconfig = cfs_read_file('datacenter.cfg');
129
130 return 'watchdog' if !$datacenterconfig->{fencing};
131
132 return $datacenterconfig->{fencing};
133}
134
135sub exec_fence_agent {
136 my ($self, $agent, $node, @param) = @_;
137
138 # setup execution environment
139 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
140
141 my $cmd = "$agent " . PVE::HA::FenceConfig::gen_arg_str(@param);
142
143 exec($cmd);
144 exit -1;
145}
146
9da84a0d
TL
147# this is only allowed by the master to recover a _fenced_ service
148sub steal_service {
6da27e23 149 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 150
6ca2edcd 151 my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
6da27e23 152
303a08aa
TL
153 if(my $plugin = PVE::HA::Resources->lookup($type)) {
154 my $old = $plugin->config_file($name, $current_node);
155 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
156 rename($old, $new) ||
157 die "rename '$old' to '$new' failed - $!\n";
158 } else {
159 die "implement me";
160 }
8456bde2
DM
161}
162
abc920b4
DM
163sub read_group_config {
164 my ($self) = @_;
165
139a9b90 166 return PVE::HA::Config::read_group_config();
3b996922
DM
167}
168
714a4016
DM
169# this should return a hash containing info
170# what nodes are members and online.
171sub get_node_info {
172 my ($self) = @_;
173
d706ef8b 174 my ($node_info, $quorate) = ({}, 0);
63f6a08c 175
d706ef8b
DM
176 my $nodename = $self->{nodename};
177
178 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
179
180 my $members = PVE::Cluster::get_members();
181
182 foreach my $node (keys %$members) {
183 my $d = $members->{$node};
63f6a08c 184 $node_info->{$node}->{online} = $d->{online};
d706ef8b 185 }
63f6a08c 186
d706ef8b 187 $node_info->{$nodename}->{online} = 1; # local node is always up
63f6a08c 188
d706ef8b 189 return ($node_info, $quorate);
714a4016
DM
190}
191
192sub log {
193 my ($self, $level, $msg) = @_;
194
195 chomp $msg;
196
197 syslog($level, $msg);
198}
199
1b3969b6
TL
200sub sendmail {
201 my ($self, $subject, $text) = @_;
202
203 my $mailfrom = 'root@' . $self->nodename();
204 my $mailto = 'root@localhost';
205
206 PVE::Tools::sendmail($mailto, $subject, $text, undef, $mailfrom);
207}
208
007fcc8b
DM
209my $last_lock_status = {};
210
211sub get_pve_lock {
212 my ($self, $lockid) = @_;
714a4016 213
007fcc8b 214 my $got_lock = 0;
4d24e7db 215
4d24e7db
DM
216 my $filename = "$lockdir/$lockid";
217
007fcc8b
DM
218 my $last = $last_lock_status->{$lockid} || 0;
219
220 my $ctime = time();
4d24e7db 221
75aca181
DM
222 my $retry = 0;
223 my $retry_timeout = 100; # fixme: what timeout
63f6a08c 224
4d24e7db
DM
225 eval {
226
227 mkdir $lockdir;
228
007fcc8b
DM
229 # pve cluster filesystem not online
230 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
231
75aca181
DM
232 if ($last && (($ctime - $last) < $retry_timeout)) {
233 # send cfs lock update request (utime)
234 if (!utime(0, $ctime, $filename)) {
235 $retry = 1;
007fcc8b 236 die "cfs lock update failed - $!\n";
75aca181 237 }
007fcc8b
DM
238 } else {
239
240 # fixme: wait some time?
241 if (!(mkdir $filename)) {
242 utime 0, 0, $filename; # cfs unlock request
243 die "can't get cfs lock\n";
244 }
245 }
4d24e7db 246
007fcc8b 247 $got_lock = 1;
4d24e7db
DM
248 };
249
007fcc8b
DM
250 my $err = $@;
251
75aca181
DM
252 if ($retry) {
253 # $self->log('err', $err) if $err; # for debugging
254 return 0;
255 }
63f6a08c 256
007fcc8b
DM
257 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
258
17e90af6 259 if (!!$got_lock != !!$last) {
007fcc8b 260 if ($got_lock) {
63f6a08c 261 $self->log('info', "successfully acquired lock '$lockid'");
007fcc8b
DM
262 } else {
263 my $msg = "lost lock '$lockid";
63f6a08c 264 $msg .= " - $err" if $err;
007fcc8b
DM
265 $self->log('err', $msg);
266 }
75aca181
DM
267 } else {
268 # $self->log('err', $err) if $err; # for debugging
007fcc8b
DM
269 }
270
271 return $got_lock;
272}
273
274sub get_ha_manager_lock {
275 my ($self) = @_;
276
007fcc8b 277 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
278}
279
de002253
TL
280# release the cluster wide manager lock.
281# when released another CRM may step up and get the lock, thus this should only
282# get called when shutting down/deactivating the current master
283sub release_ha_manager_lock {
284 my ($self) = @_;
285
286 return rmdir("$lockdir/ha_manager_lock");
287}
288
714a4016 289sub get_ha_agent_lock {
714a4016 290 my ($self, $node) = @_;
63f6a08c 291
f5c29173 292 $node = $self->nodename() if !defined($node);
714a4016 293
f5c29173 294 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
295}
296
ff165cd8
TL
297# release the respective node agent lock.
298# this should only get called if the nodes LRM gracefully shuts down with
299# all services already cleanly stopped!
300sub release_ha_agent_lock {
301 my ($self) = @_;
302
303 my $node = $self->nodename();
304
305 return rmdir("$lockdir/ha_agent_${node}_lock");
306}
307
714a4016
DM
308sub quorate {
309 my ($self) = @_;
310
4d24e7db 311 my $quorate = 0;
63f6a08c
TL
312 eval {
313 $quorate = PVE::Cluster::check_cfs_quorum();
4d24e7db 314 };
63f6a08c 315
4d24e7db 316 return $quorate;
714a4016
DM
317}
318
319sub get_time {
320 my ($self) = @_;
321
322 return time();
323}
324
325sub sleep {
326 my ($self, $delay) = @_;
327
328 CORE::sleep($delay);
329}
330
331sub sleep_until {
332 my ($self, $end_time) = @_;
333
334 for (;;) {
335 my $cur_time = time();
336
337 last if $cur_time >= $end_time;
338
339 $self->sleep(1);
340 }
341}
342
343sub loop_start_hook {
344 my ($self) = @_;
345
4d24e7db 346 PVE::Cluster::cfs_update();
63f6a08c 347
714a4016
DM
348 $self->{loop_start} = $self->get_time();
349}
350
351sub loop_end_hook {
352 my ($self) = @_;
353
354 my $delay = $self->get_time() - $self->{loop_start};
63f6a08c 355
714a4016
DM
356 warn "loop take too long ($delay seconds)\n" if $delay > 30;
357}
358
76737af5
DM
359my $watchdog_fh;
360
714a4016
DM
361sub watchdog_open {
362 my ($self) = @_;
363
76737af5
DM
364 die "watchdog already open\n" if defined($watchdog_fh);
365
115805fd
DM
366 $watchdog_fh = IO::Socket::UNIX->new(
367 Type => SOCK_STREAM(),
368 Peer => "/run/watchdog-mux.sock") ||
369 die "unable to open watchdog socket - $!\n";
63f6a08c 370
76737af5 371 $self->log('info', "watchdog active");
714a4016
DM
372}
373
374sub watchdog_update {
375 my ($self, $wfh) = @_;
376
76737af5
DM
377 my $res = $watchdog_fh->syswrite("\0", 1);
378 if (!defined($res)) {
379 $self->log('err', "watchdog update failed - $!\n");
380 return 0;
381 }
382 if ($res != 1) {
383 $self->log('err', "watchdog update failed - write $res bytes\n");
384 return 0;
385 }
386
387 return 1;
714a4016
DM
388}
389
390sub watchdog_close {
391 my ($self, $wfh) = @_;
392
76737af5
DM
393 $watchdog_fh->syswrite("V", 1); # magic watchdog close
394 if (!$watchdog_fh->close()) {
395 $self->log('err', "watchdog close failed - $!");
396 } else {
397 $watchdog_fh = undef;
398 $self->log('info', "watchdog closed (disabled)");
399 }
714a4016
DM
400}
401
a2aae08a
TL
402sub after_fork {
403 my ($self) = @_;
404
405 # close inherited inotify FD from parent and reopen our own
406 PVE::INotify::inotify_close();
407 PVE::INotify::inotify_init();
408
409 PVE::Cluster::cfs_update();
410}
411
a28fa330
TL
412sub get_max_workers {
413 my ($self) = @_;
414
415 my $datacenterconfig = cfs_read_file('datacenter.cfg');
416
417 return $datacenterconfig->{max_workers} || 4;
418}
419
714a4016 4201;