]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
improve status API
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
119656b9 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016
DM
14
15use PVE::HA::Tools;
16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79
DM
19use PVE::QemuServer;
20use PVE::API2::Qemu;
21
007fcc8b
DM
22my $lockdir = "/etc/pve/priv/lock";
23
95ea5d67
DM
24
25sub read_resources_config {
95ea5d67 26
139a9b90 27 return PVE::HA::Config::read_resources_config();
95ea5d67
DM
28}
29
30sub write_resources_config {
31 my ($cfg) = @_;
32
139a9b90 33 PVE::HA::Config::write_resources_config($cfg);
95ea5d67 34}
714a4016
DM
35
36sub new {
37 my ($this, $nodename) = @_;
38
39 die "missing nodename" if !$nodename;
40
41 my $class = ref($this) || $this;
42
43 my $self = bless {}, $class;
44
45 $self->{nodename} = $nodename;
46
47 return $self;
48}
49
50sub nodename {
51 my ($self) = @_;
52
53 return $self->{nodename};
54}
55
56sub read_manager_status {
57 my ($self) = @_;
714a4016 58
139a9b90 59 return PVE::HA::Config::read_manager_status();
714a4016
DM
60}
61
62sub write_manager_status {
63 my ($self, $status_obj) = @_;
64
139a9b90 65 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
66}
67
c4a221bc
DM
68sub read_lrm_status {
69 my ($self, $node) = @_;
70
71 $node = $self->{nodename} if !defined($node);
72
139a9b90 73 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
74}
75
76sub write_lrm_status {
77 my ($self, $status_obj) = @_;
78
6cbcb5f7 79 my $node = $self->{nodename};
139a9b90
DM
80
81 PVE::HA::Config::write_lrm_status($node, $status_obj);
82}
c4a221bc 83
139a9b90
DM
84sub queue_crm_commands {
85 my ($self, $cmd) = @_;
c4a221bc 86
139a9b90
DM
87 return PVE::HA::Config::queue_crm_commands($cmd);
88}
89
90sub read_crm_commands {
91 my ($self) = @_;
92
93 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
94}
95
714a4016
DM
96sub read_service_config {
97 my ($self) = @_;
98
95ea5d67 99 my $res = read_resources_config();
ce216792
DM
100
101 my $vmlist = PVE::Cluster::get_vmlist();
102 my $conf = {};
103
104 foreach my $sid (keys %{$res->{ids}}) {
105 my $d = $res->{ids}->{$sid};
b47a7a1b 106 my $name = PVE::HA::Tools::parse_sid($sid);
7a19642e 107 $d->{state} = 'enabled' if !defined($d->{state});
eda9314d 108 if ($d->{type} eq 'vm') {
b47a7a1b 109 if (my $vmd = $vmlist->{ids}->{$name}) {
ce216792 110 if (!$vmd) {
b47a7a1b 111 warn "no such VM '$name'\n";
ce216792
DM
112 } else {
113 $d->{node} = $vmd->{node};
114 $conf->{$sid} = $d;
115 }
116 } else {
117 if (defined($d->{node})) {
118 $conf->{$sid} = $d;
119 } else {
120 warn "service '$sid' without node\n";
121 }
122 }
123 }
124 }
125
126 return $conf;
714a4016
DM
127}
128
8456bde2 129sub change_service_location {
6da27e23 130 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 131
6da27e23
DM
132 my ($type, $name) = PVE::HA::Tools::parse_sid($sid);
133
eda9314d 134 if ($type eq 'vm') {
6da27e23
DM
135 my $old = PVE::QemuServer::config_file($name, $current_node);
136 my $new = PVE::QemuServer::config_file($name, $new_node);
137 rename($old, $new) ||
138 die "rename '$old' to '$new' failed - $!\n";
139 } else {
140 die "implement me";
141 }
8456bde2
DM
142}
143
abc920b4
DM
144sub read_group_config {
145 my ($self) = @_;
146
139a9b90 147 return PVE::HA::Config::read_group_config();
3b996922
DM
148}
149
714a4016
DM
150# this should return a hash containing info
151# what nodes are members and online.
152sub get_node_info {
153 my ($self) = @_;
154
d706ef8b
DM
155 my ($node_info, $quorate) = ({}, 0);
156
157 my $nodename = $self->{nodename};
158
159 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
160
161 my $members = PVE::Cluster::get_members();
162
163 foreach my $node (keys %$members) {
164 my $d = $members->{$node};
165 $node_info->{$node}->{online} = $d->{online};
166 }
167
168 $node_info->{$nodename}->{online} = 1; # local node is always up
169
170 return ($node_info, $quorate);
714a4016
DM
171}
172
173sub log {
174 my ($self, $level, $msg) = @_;
175
176 chomp $msg;
177
178 syslog($level, $msg);
179}
180
007fcc8b
DM
181my $last_lock_status = {};
182
183sub get_pve_lock {
184 my ($self, $lockid) = @_;
714a4016 185
007fcc8b 186 my $got_lock = 0;
4d24e7db 187
4d24e7db
DM
188 my $filename = "$lockdir/$lockid";
189
007fcc8b
DM
190 my $last = $last_lock_status->{$lockid} || 0;
191
192 my $ctime = time();
4d24e7db 193
75aca181
DM
194 my $retry = 0;
195 my $retry_timeout = 100; # fixme: what timeout
196
4d24e7db
DM
197 eval {
198
199 mkdir $lockdir;
200
007fcc8b
DM
201 # pve cluster filesystem not online
202 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
203
75aca181
DM
204 if ($last && (($ctime - $last) < $retry_timeout)) {
205 # send cfs lock update request (utime)
206 if (!utime(0, $ctime, $filename)) {
207 $retry = 1;
007fcc8b 208 die "cfs lock update failed - $!\n";
75aca181 209 }
007fcc8b
DM
210 } else {
211
212 # fixme: wait some time?
213 if (!(mkdir $filename)) {
214 utime 0, 0, $filename; # cfs unlock request
215 die "can't get cfs lock\n";
216 }
217 }
4d24e7db 218
007fcc8b 219 $got_lock = 1;
4d24e7db
DM
220 };
221
007fcc8b
DM
222 my $err = $@;
223
75aca181
DM
224 if ($retry) {
225 # $self->log('err', $err) if $err; # for debugging
226 return 0;
227 }
228
007fcc8b
DM
229 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
230
17e90af6 231 if (!!$got_lock != !!$last) {
007fcc8b
DM
232 if ($got_lock) {
233 $self->log('info', "successfully aquired lock '$lockid'");
234 } else {
235 my $msg = "lost lock '$lockid";
236 $msg .= " - $err" if $err;
237 $self->log('err', $msg);
238 }
75aca181
DM
239 } else {
240 # $self->log('err', $err) if $err; # for debugging
007fcc8b
DM
241 }
242
243 return $got_lock;
244}
245
246sub get_ha_manager_lock {
247 my ($self) = @_;
248
007fcc8b 249 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
250}
251
252sub get_ha_agent_lock {
714a4016 253 my ($self, $node) = @_;
007fcc8b 254
f5c29173 255 $node = $self->nodename() if !defined($node);
714a4016 256
f5c29173 257 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
258}
259
260sub quorate {
261 my ($self) = @_;
262
4d24e7db
DM
263 my $quorate = 0;
264 eval {
265 $quorate = PVE::Cluster::check_cfs_quorum();
266 };
267
268 return $quorate;
714a4016
DM
269}
270
271sub get_time {
272 my ($self) = @_;
273
274 return time();
275}
276
277sub sleep {
278 my ($self, $delay) = @_;
279
280 CORE::sleep($delay);
281}
282
283sub sleep_until {
284 my ($self, $end_time) = @_;
285
286 for (;;) {
287 my $cur_time = time();
288
289 last if $cur_time >= $end_time;
290
291 $self->sleep(1);
292 }
293}
294
295sub loop_start_hook {
296 my ($self) = @_;
297
4d24e7db
DM
298 PVE::Cluster::cfs_update();
299
714a4016
DM
300 $self->{loop_start} = $self->get_time();
301}
302
303sub loop_end_hook {
304 my ($self) = @_;
305
306 my $delay = $self->get_time() - $self->{loop_start};
307
308 warn "loop take too long ($delay seconds)\n" if $delay > 30;
309}
310
76737af5
DM
311my $watchdog_fh;
312
714a4016
DM
313sub watchdog_open {
314 my ($self) = @_;
315
76737af5
DM
316 die "watchdog already open\n" if defined($watchdog_fh);
317
115805fd
DM
318 $watchdog_fh = IO::Socket::UNIX->new(
319 Type => SOCK_STREAM(),
320 Peer => "/run/watchdog-mux.sock") ||
321 die "unable to open watchdog socket - $!\n";
322
76737af5 323 $self->log('info', "watchdog active");
714a4016
DM
324}
325
326sub watchdog_update {
327 my ($self, $wfh) = @_;
328
76737af5
DM
329 my $res = $watchdog_fh->syswrite("\0", 1);
330 if (!defined($res)) {
331 $self->log('err', "watchdog update failed - $!\n");
332 return 0;
333 }
334 if ($res != 1) {
335 $self->log('err', "watchdog update failed - write $res bytes\n");
336 return 0;
337 }
338
339 return 1;
714a4016
DM
340}
341
342sub watchdog_close {
343 my ($self, $wfh) = @_;
344
76737af5
DM
345 $watchdog_fh->syswrite("V", 1); # magic watchdog close
346 if (!$watchdog_fh->close()) {
347 $self->log('err', "watchdog close failed - $!");
348 } else {
349 $watchdog_fh = undef;
350 $self->log('info', "watchdog closed (disabled)");
351 }
714a4016
DM
352}
353
022e4e79
DM
354sub upid_wait {
355 my ($self, $upid) = @_;
356
357 my $task = PVE::Tools::upid_decode($upid);
358
359 CORE::sleep(1);
360 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
361 $self->log('debug', "Task still active, waiting");
362 CORE::sleep(1);
363 }
364}
365
0d1d32fb
DM
366sub can_fork {
367 my ($self) = @_;
368
369 return 1;
370}
371
c4a221bc 372sub exec_resource_agent {
6dbf93a0 373 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 374
022e4e79
DM
375 # setup execution environment
376
377 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
378
379 PVE::INotify::inotify_close();
380
381 PVE::INotify::inotify_init();
382
383 PVE::Cluster::cfs_update();
384
385 my $nodename = $self->{nodename};
386
387 # fixme: return valid_exit code (instead of using die) ?
388
b47a7a1b
DM
389 my ($service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
390
eda9314d 391 die "service type '$service_type'not implemented" if $service_type ne 'vm';
022e4e79 392
b47a7a1b 393 my $vmid = $service_name;
022e4e79
DM
394
395 my $running = PVE::QemuServer::check_running($vmid, 1);
396
397 if ($cmd eq 'started') {
398
399 # fixme: return valid_exit code
400 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
401
402 # fixme: count failures
403
404 return 0 if $running;
405
406 $self->log("info", "starting service $sid");
407
408 my $upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $vmid});
409 $self->upid_wait($upid);
410
411 $running = PVE::QemuServer::check_running($vmid, 1);
412
413 if ($running) {
414 $self->log("info", "service status $sid started");
415 return 0;
416 } else {
417 $self->log("info", "unable to start service $sid");
418 return 1;
419 }
420
421 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
422
423 # fixme: return valid_exit code
424 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
425
426 return 0 if !$running;
427
428 $self->log("info", "stopping service $sid");
429
430 my $timeout = 60; # fixme: make this configurable
431
432 my $param = {
433 node => $nodename,
434 vmid => $vmid,
435 timeout => $timeout,
436 forceStop => 1,
437 };
438
439 my $upid = PVE::API2::Qemu->vm_shutdown($param);
440 $self->upid_wait($upid);
441
442 $running = PVE::QemuServer::check_running($vmid, 1);
443
444 if (!$running) {
445 $self->log("info", "service status $sid stopped");
446 return 0;
447 } else {
448 return 1;
449 }
450
451 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
452
6da27e23
DM
453 my $target = $params[0];
454 die "$cmd '$sid' failed - missing target\n" if !defined($target);
e319b50c
DM
455
456 # fixme: return valid_exit code
457 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
6da27e23
DM
458
459 if ($service_config->{node} eq $target) {
460 # already there
461 return 0;
462 }
463
464 if (!$running) {
e319b50c 465 $self->change_service_location($sid, $nodename, $target);
6da27e23
DM
466 $self->log("info", "service $sid moved to node '$target'");
467 return 0;
468 } else {
469 # we alwas do live migration if VM is online
e319b50c 470
e319b50c
DM
471 my $params = {
472 node => $nodename,
473 vmid => $vmid,
474 target => $target,
475 online => 1,
476 };
477
478 my $oldconfig = PVE::QemuServer::config_file($vmid, $nodename);
479
480 my $upid = PVE::API2::Qemu->migrate_vm($params);
481 $self->upid_wait($upid);
482
483 # something went wrong if old config file is still there
484 if (-f $oldconfig) {
485 $self->log("err", "service $sid not moved (migration error)");
486 return 1;
487 }
488
489 return 0;
6da27e23 490 }
022e4e79
DM
491
492 }
493
494 die "implement me (cmd '$cmd')";
c4a221bc
DM
495}
496
714a4016 4971;