]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
lrm: fix stop timeout
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
abc920b4 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016
DM
14
15use PVE::HA::Tools;
16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79
DM
19use PVE::QemuServer;
20use PVE::API2::Qemu;
21
007fcc8b
DM
22my $lockdir = "/etc/pve/priv/lock";
23
ce216792 24my $manager_status_filename = "/etc/pve/ha/manager_status";
7a19642e
DM
25my $ha_groups_config = "/etc/pve/ha/groups.cfg";
26my $ha_resources_config = "/etc/pve/ha/resources.cfg";
abc920b4 27
95ea5d67 28# fixme:
6cbcb5f7
DM
29#cfs_register_file($ha_groups_config,
30# sub { PVE::HA::Groups->parse_config(@_); },
31# sub { PVE::HA::Groups->write_config(@_); });
95ea5d67
DM
32#cfs_register_file($ha_resources_config,
33# sub { PVE::HA::Resources->parse_config(@_); },
34# sub { PVE::HA::Resources->write_config(@_); });
35
36sub read_resources_config {
37 my $raw = '';
38
39 $raw = PVE::Tools::file_get_contents($ha_resources_config)
40 if -f $ha_resources_config;
41
42 return PVE::HA::Config::parse_resources_config($ha_resources_config, $raw);
43}
44
45sub write_resources_config {
46 my ($cfg) = @_;
47
48 my $raw = PVE::HA::Resources->write_config($ha_resources_config, $cfg);
49 PVE::Tools::file_set_contents($ha_resources_config, $raw);
50}
51
52sub lock_ha_config {
53 my ($code, $errmsg) = @_;
54
55 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
56 my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
57 my $err = $@;
58 if ($err) {
59 $errmsg ? die "$errmsg: $err" : die $err;
60 }
61 return $res;
62}
714a4016
DM
63
64sub new {
65 my ($this, $nodename) = @_;
66
67 die "missing nodename" if !$nodename;
68
69 my $class = ref($this) || $this;
70
71 my $self = bless {}, $class;
72
73 $self->{nodename} = $nodename;
74
75 return $self;
76}
77
78sub nodename {
79 my ($self) = @_;
80
81 return $self->{nodename};
82}
83
84sub read_manager_status {
85 my ($self) = @_;
86
87 my $filename = $manager_status_filename;
88
89 return PVE::HA::Tools::read_json_from_file($filename, {});
90}
91
92sub write_manager_status {
93 my ($self, $status_obj) = @_;
94
95 my $filename = $manager_status_filename;
96
97 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
98}
99
c4a221bc
DM
100sub read_lrm_status {
101 my ($self, $node) = @_;
102
103 $node = $self->{nodename} if !defined($node);
104
105 my $filename = "/etc/pve/nodes/$node/lrm_status";
106
107 return PVE::HA::Tools::read_json_from_file($filename, {});
108}
109
110sub write_lrm_status {
111 my ($self, $status_obj) = @_;
112
6cbcb5f7 113 my $node = $self->{nodename};
c4a221bc
DM
114
115 my $filename = "/etc/pve/nodes/$node/lrm_status";
116
117 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
118}
119
ec046210 120sub service_config_exists {
714a4016 121 my ($self) = @_;
ec046210
DM
122
123 return -f $ha_resources_config ? 1 : 0;
714a4016
DM
124}
125
126sub read_service_config {
127 my ($self) = @_;
128
95ea5d67 129 my $res = read_resources_config();
ce216792
DM
130
131 my $vmlist = PVE::Cluster::get_vmlist();
132 my $conf = {};
133
134 foreach my $sid (keys %{$res->{ids}}) {
135 my $d = $res->{ids}->{$sid};
b47a7a1b 136 my $name = PVE::HA::Tools::parse_sid($sid);
7a19642e 137 $d->{state} = 'enabled' if !defined($d->{state});
ce216792 138 if ($d->{type} eq 'pvevm') {
b47a7a1b 139 if (my $vmd = $vmlist->{ids}->{$name}) {
ce216792 140 if (!$vmd) {
b47a7a1b 141 warn "no such VM '$name'\n";
ce216792
DM
142 } else {
143 $d->{node} = $vmd->{node};
144 $conf->{$sid} = $d;
145 }
146 } else {
147 if (defined($d->{node})) {
148 $conf->{$sid} = $d;
149 } else {
150 warn "service '$sid' without node\n";
151 }
152 }
153 }
154 }
155
156 return $conf;
714a4016
DM
157}
158
8456bde2
DM
159sub change_service_location {
160 my ($self, $sid, $node) = @_;
161
162 die "implement me";
163}
164
abc920b4
DM
165sub read_group_config {
166 my ($self) = @_;
167
ce216792
DM
168 # fixme: use cfs_read_file
169
170 my $raw = '';
171
172 $raw = PVE::Tools::file_get_contents($ha_groups_config)
173 if -f $ha_groups_config;
174
175 return PVE::HA::Config::parse_groups_config($ha_groups_config, $raw);
abc920b4
DM
176}
177
3b996922
DM
178sub queue_crm_commands {
179 my ($self, $cmd) = @_;
180
ce216792
DM
181 chomp $cmd;
182
183 my $code = sub {
184 my $data = '';
185 my $filename = "/etc/pve/ha/crm_commands";
186 if (-f $filename) {
187 $data = PVE::Tools::file_get_contents($filename);
188 }
189 $data .= "$cmd\n";
190 PVE::Tools::file_set_contents($filename, $data);
191 };
192
95ea5d67 193 return lock_ha_config($code);
3b996922
DM
194}
195
196sub read_crm_commands {
197 my ($self) = @_;
198
ce216792
DM
199 my $code = sub {
200 my $data = '';
201
202 my $filename = "/etc/pve/ha/crm_commands";
203 if (-f $filename) {
204 $data = PVE::Tools::file_get_contents($filename);
205 PVE::Tools::file_set_contents($filename, '');
206 }
207
208 return $data;
209 };
210
95ea5d67 211 return lock_ha_config($code);
3b996922
DM
212}
213
714a4016
DM
214# this should return a hash containing info
215# what nodes are members and online.
216sub get_node_info {
217 my ($self) = @_;
218
d706ef8b
DM
219 my ($node_info, $quorate) = ({}, 0);
220
221 my $nodename = $self->{nodename};
222
223 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
224
225 my $members = PVE::Cluster::get_members();
226
227 foreach my $node (keys %$members) {
228 my $d = $members->{$node};
229 $node_info->{$node}->{online} = $d->{online};
230 }
231
232 $node_info->{$nodename}->{online} = 1; # local node is always up
233
234 return ($node_info, $quorate);
714a4016
DM
235}
236
237sub log {
238 my ($self, $level, $msg) = @_;
239
240 chomp $msg;
241
242 syslog($level, $msg);
243}
244
007fcc8b
DM
245my $last_lock_status = {};
246
247sub get_pve_lock {
248 my ($self, $lockid) = @_;
714a4016 249
007fcc8b 250 my $got_lock = 0;
4d24e7db 251
4d24e7db
DM
252 my $filename = "$lockdir/$lockid";
253
007fcc8b
DM
254 my $last = $last_lock_status->{$lockid} || 0;
255
256 my $ctime = time();
4d24e7db
DM
257
258 eval {
259
260 mkdir $lockdir;
261
007fcc8b
DM
262 # pve cluster filesystem not online
263 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
264
265 if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
266 utime(0, $ctime, $filename) || # cfs lock update request
267 die "cfs lock update failed - $!\n";
268 } else {
269
270 # fixme: wait some time?
271 if (!(mkdir $filename)) {
272 utime 0, 0, $filename; # cfs unlock request
273 die "can't get cfs lock\n";
274 }
275 }
4d24e7db 276
007fcc8b 277 $got_lock = 1;
4d24e7db
DM
278 };
279
007fcc8b
DM
280 my $err = $@;
281
282 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
283
17e90af6 284 if (!!$got_lock != !!$last) {
007fcc8b
DM
285 if ($got_lock) {
286 $self->log('info', "successfully aquired lock '$lockid'");
287 } else {
288 my $msg = "lost lock '$lockid";
289 $msg .= " - $err" if $err;
290 $self->log('err', $msg);
291 }
292 }
293
294 return $got_lock;
295}
296
297sub get_ha_manager_lock {
298 my ($self) = @_;
299
007fcc8b 300 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
301}
302
303sub get_ha_agent_lock {
714a4016 304 my ($self, $node) = @_;
007fcc8b 305
f5c29173 306 $node = $self->nodename() if !defined($node);
714a4016 307
f5c29173 308 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
309}
310
311sub quorate {
312 my ($self) = @_;
313
4d24e7db
DM
314 my $quorate = 0;
315 eval {
316 $quorate = PVE::Cluster::check_cfs_quorum();
317 };
318
319 return $quorate;
714a4016
DM
320}
321
322sub get_time {
323 my ($self) = @_;
324
325 return time();
326}
327
328sub sleep {
329 my ($self, $delay) = @_;
330
331 CORE::sleep($delay);
332}
333
334sub sleep_until {
335 my ($self, $end_time) = @_;
336
337 for (;;) {
338 my $cur_time = time();
339
340 last if $cur_time >= $end_time;
341
342 $self->sleep(1);
343 }
344}
345
346sub loop_start_hook {
347 my ($self) = @_;
348
4d24e7db
DM
349 PVE::Cluster::cfs_update();
350
714a4016
DM
351 $self->{loop_start} = $self->get_time();
352}
353
354sub loop_end_hook {
355 my ($self) = @_;
356
357 my $delay = $self->get_time() - $self->{loop_start};
358
359 warn "loop take too long ($delay seconds)\n" if $delay > 30;
360}
361
76737af5
DM
362my $watchdog_fh;
363
714a4016
DM
364sub watchdog_open {
365 my ($self) = @_;
366
76737af5
DM
367 die "watchdog already open\n" if defined($watchdog_fh);
368
115805fd
DM
369 $watchdog_fh = IO::Socket::UNIX->new(
370 Type => SOCK_STREAM(),
371 Peer => "/run/watchdog-mux.sock") ||
372 die "unable to open watchdog socket - $!\n";
373
76737af5 374 $self->log('info', "watchdog active");
714a4016
DM
375}
376
377sub watchdog_update {
378 my ($self, $wfh) = @_;
379
76737af5
DM
380 my $res = $watchdog_fh->syswrite("\0", 1);
381 if (!defined($res)) {
382 $self->log('err', "watchdog update failed - $!\n");
383 return 0;
384 }
385 if ($res != 1) {
386 $self->log('err', "watchdog update failed - write $res bytes\n");
387 return 0;
388 }
389
390 return 1;
714a4016
DM
391}
392
393sub watchdog_close {
394 my ($self, $wfh) = @_;
395
76737af5
DM
396 $watchdog_fh->syswrite("V", 1); # magic watchdog close
397 if (!$watchdog_fh->close()) {
398 $self->log('err', "watchdog close failed - $!");
399 } else {
400 $watchdog_fh = undef;
401 $self->log('info', "watchdog closed (disabled)");
402 }
714a4016
DM
403}
404
022e4e79
DM
405sub upid_wait {
406 my ($self, $upid) = @_;
407
408 my $task = PVE::Tools::upid_decode($upid);
409
410 CORE::sleep(1);
411 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
412 $self->log('debug', "Task still active, waiting");
413 CORE::sleep(1);
414 }
415}
416
0d1d32fb
DM
417sub can_fork {
418 my ($self) = @_;
419
420 return 1;
421}
422
c4a221bc 423sub exec_resource_agent {
6dbf93a0 424 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 425
022e4e79
DM
426 # setup execution environment
427
428 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
429
430 PVE::INotify::inotify_close();
431
432 PVE::INotify::inotify_init();
433
434 PVE::Cluster::cfs_update();
435
436 my $nodename = $self->{nodename};
437
438 # fixme: return valid_exit code (instead of using die) ?
439
b47a7a1b
DM
440 my ($service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
441
022e4e79
DM
442 die "service type '$service_type'not implemented" if $service_type ne 'pvevm';
443
b47a7a1b 444 my $vmid = $service_name;
022e4e79
DM
445
446 my $running = PVE::QemuServer::check_running($vmid, 1);
447
448 if ($cmd eq 'started') {
449
450 # fixme: return valid_exit code
451 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
452
453 # fixme: count failures
454
455 return 0 if $running;
456
457 $self->log("info", "starting service $sid");
458
459 my $upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $vmid});
460 $self->upid_wait($upid);
461
462 $running = PVE::QemuServer::check_running($vmid, 1);
463
464 if ($running) {
465 $self->log("info", "service status $sid started");
466 return 0;
467 } else {
468 $self->log("info", "unable to start service $sid");
469 return 1;
470 }
471
472 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
473
474 # fixme: return valid_exit code
475 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
476
477 return 0 if !$running;
478
479 $self->log("info", "stopping service $sid");
480
481 my $timeout = 60; # fixme: make this configurable
482
483 my $param = {
484 node => $nodename,
485 vmid => $vmid,
486 timeout => $timeout,
487 forceStop => 1,
488 };
489
490 my $upid = PVE::API2::Qemu->vm_shutdown($param);
491 $self->upid_wait($upid);
492
493 $running = PVE::QemuServer::check_running($vmid, 1);
494
495 if (!$running) {
496 $self->log("info", "service status $sid stopped");
497 return 0;
498 } else {
499 return 1;
500 }
501
502 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
503
504 # implement me
505
506 }
507
508 die "implement me (cmd '$cmd')";
c4a221bc
DM
509}
510
714a4016 5111;