]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
adjust log level on failed start and error to warning
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
119656b9 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016
DM
14
15use PVE::HA::Tools;
16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79 19
007fcc8b
DM
20my $lockdir = "/etc/pve/priv/lock";
21
714a4016
DM
22sub new {
23 my ($this, $nodename) = @_;
24
25 die "missing nodename" if !$nodename;
26
27 my $class = ref($this) || $this;
28
29 my $self = bless {}, $class;
30
31 $self->{nodename} = $nodename;
32
33 return $self;
34}
35
36sub nodename {
37 my ($self) = @_;
38
39 return $self->{nodename};
40}
41
42sub read_manager_status {
43 my ($self) = @_;
714a4016 44
139a9b90 45 return PVE::HA::Config::read_manager_status();
714a4016
DM
46}
47
48sub write_manager_status {
49 my ($self, $status_obj) = @_;
50
139a9b90 51 PVE::HA::Config::write_manager_status($status_obj);
714a4016
DM
52}
53
c4a221bc
DM
54sub read_lrm_status {
55 my ($self, $node) = @_;
56
57 $node = $self->{nodename} if !defined($node);
58
139a9b90 59 return PVE::HA::Config::read_lrm_status($node);
c4a221bc
DM
60}
61
62sub write_lrm_status {
63 my ($self, $status_obj) = @_;
64
6cbcb5f7 65 my $node = $self->{nodename};
139a9b90
DM
66
67 PVE::HA::Config::write_lrm_status($node, $status_obj);
68}
c4a221bc 69
139a9b90
DM
70sub queue_crm_commands {
71 my ($self, $cmd) = @_;
c4a221bc 72
139a9b90
DM
73 return PVE::HA::Config::queue_crm_commands($cmd);
74}
75
76sub read_crm_commands {
77 my ($self) = @_;
78
79 return PVE::HA::Config::read_crm_commands();
c4a221bc
DM
80}
81
b83b4ae8 82sub service_config_exists {
714a4016 83 my ($self) = @_;
b83b4ae8
DM
84
85 return PVE::HA::Config::resources_config_exists();
86}
714a4016 87
b83b4ae8
DM
88sub read_service_config {
89 my ($self) = @_;
ce216792 90
b83b4ae8
DM
91 my $res = PVE::HA::Config::read_resources_config();
92
ce216792
DM
93 my $vmlist = PVE::Cluster::get_vmlist();
94 my $conf = {};
95
96 foreach my $sid (keys %{$res->{ids}}) {
97 my $d = $res->{ids}->{$sid};
6ca2edcd 98 my (undef, undef, $name) = PVE::HA::Tools::parse_sid($sid);
7a19642e 99 $d->{state} = 'enabled' if !defined($d->{state});
ea4443cc
TL
100 $d->{max_restart} = 1 if !defined($d->{max_restart});
101 $d->{max_relocate} = 1 if !defined($d->{max_relocate});
303a08aa 102 if (PVE::HA::Resources->lookup($d->{type})) {
b47a7a1b 103 if (my $vmd = $vmlist->{ids}->{$name}) {
ce216792 104 if (!$vmd) {
b47a7a1b 105 warn "no such VM '$name'\n";
ce216792
DM
106 } else {
107 $d->{node} = $vmd->{node};
108 $conf->{$sid} = $d;
109 }
110 } else {
111 if (defined($d->{node})) {
112 $conf->{$sid} = $d;
113 } else {
114 warn "service '$sid' without node\n";
115 }
116 }
117 }
118 }
119
120 return $conf;
714a4016
DM
121}
122
8456bde2 123sub change_service_location {
6da27e23 124 my ($self, $sid, $current_node, $new_node) = @_;
8456bde2 125
6ca2edcd 126 my (undef, $type, $name) = PVE::HA::Tools::parse_sid($sid);
6da27e23 127
303a08aa
TL
128 if(my $plugin = PVE::HA::Resources->lookup($type)) {
129 my $old = $plugin->config_file($name, $current_node);
130 my $new = $plugin->config_file($name, $new_node);
6da27e23
DM
131 rename($old, $new) ||
132 die "rename '$old' to '$new' failed - $!\n";
133 } else {
134 die "implement me";
135 }
8456bde2
DM
136}
137
abc920b4
DM
138sub read_group_config {
139 my ($self) = @_;
140
139a9b90 141 return PVE::HA::Config::read_group_config();
3b996922
DM
142}
143
714a4016
DM
144# this should return a hash containing info
145# what nodes are members and online.
146sub get_node_info {
147 my ($self) = @_;
148
d706ef8b
DM
149 my ($node_info, $quorate) = ({}, 0);
150
151 my $nodename = $self->{nodename};
152
153 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
154
155 my $members = PVE::Cluster::get_members();
156
157 foreach my $node (keys %$members) {
158 my $d = $members->{$node};
159 $node_info->{$node}->{online} = $d->{online};
160 }
161
162 $node_info->{$nodename}->{online} = 1; # local node is always up
163
164 return ($node_info, $quorate);
714a4016
DM
165}
166
167sub log {
168 my ($self, $level, $msg) = @_;
169
170 chomp $msg;
171
172 syslog($level, $msg);
173}
174
007fcc8b
DM
175my $last_lock_status = {};
176
177sub get_pve_lock {
178 my ($self, $lockid) = @_;
714a4016 179
007fcc8b 180 my $got_lock = 0;
4d24e7db 181
4d24e7db
DM
182 my $filename = "$lockdir/$lockid";
183
007fcc8b
DM
184 my $last = $last_lock_status->{$lockid} || 0;
185
186 my $ctime = time();
4d24e7db 187
75aca181
DM
188 my $retry = 0;
189 my $retry_timeout = 100; # fixme: what timeout
190
4d24e7db
DM
191 eval {
192
193 mkdir $lockdir;
194
007fcc8b
DM
195 # pve cluster filesystem not online
196 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
197
75aca181
DM
198 if ($last && (($ctime - $last) < $retry_timeout)) {
199 # send cfs lock update request (utime)
200 if (!utime(0, $ctime, $filename)) {
201 $retry = 1;
007fcc8b 202 die "cfs lock update failed - $!\n";
75aca181 203 }
007fcc8b
DM
204 } else {
205
206 # fixme: wait some time?
207 if (!(mkdir $filename)) {
208 utime 0, 0, $filename; # cfs unlock request
209 die "can't get cfs lock\n";
210 }
211 }
4d24e7db 212
007fcc8b 213 $got_lock = 1;
4d24e7db
DM
214 };
215
007fcc8b
DM
216 my $err = $@;
217
75aca181
DM
218 if ($retry) {
219 # $self->log('err', $err) if $err; # for debugging
220 return 0;
221 }
222
007fcc8b
DM
223 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
224
17e90af6 225 if (!!$got_lock != !!$last) {
007fcc8b
DM
226 if ($got_lock) {
227 $self->log('info', "successfully aquired lock '$lockid'");
228 } else {
229 my $msg = "lost lock '$lockid";
230 $msg .= " - $err" if $err;
231 $self->log('err', $msg);
232 }
75aca181
DM
233 } else {
234 # $self->log('err', $err) if $err; # for debugging
007fcc8b
DM
235 }
236
237 return $got_lock;
238}
239
240sub get_ha_manager_lock {
241 my ($self) = @_;
242
007fcc8b 243 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
244}
245
246sub get_ha_agent_lock {
714a4016 247 my ($self, $node) = @_;
007fcc8b 248
f5c29173 249 $node = $self->nodename() if !defined($node);
714a4016 250
f5c29173 251 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
252}
253
254sub quorate {
255 my ($self) = @_;
256
4d24e7db
DM
257 my $quorate = 0;
258 eval {
259 $quorate = PVE::Cluster::check_cfs_quorum();
260 };
261
262 return $quorate;
714a4016
DM
263}
264
265sub get_time {
266 my ($self) = @_;
267
268 return time();
269}
270
271sub sleep {
272 my ($self, $delay) = @_;
273
274 CORE::sleep($delay);
275}
276
277sub sleep_until {
278 my ($self, $end_time) = @_;
279
280 for (;;) {
281 my $cur_time = time();
282
283 last if $cur_time >= $end_time;
284
285 $self->sleep(1);
286 }
287}
288
289sub loop_start_hook {
290 my ($self) = @_;
291
4d24e7db
DM
292 PVE::Cluster::cfs_update();
293
714a4016
DM
294 $self->{loop_start} = $self->get_time();
295}
296
297sub loop_end_hook {
298 my ($self) = @_;
299
300 my $delay = $self->get_time() - $self->{loop_start};
301
302 warn "loop take too long ($delay seconds)\n" if $delay > 30;
303}
304
76737af5
DM
305my $watchdog_fh;
306
714a4016
DM
307sub watchdog_open {
308 my ($self) = @_;
309
76737af5
DM
310 die "watchdog already open\n" if defined($watchdog_fh);
311
115805fd
DM
312 $watchdog_fh = IO::Socket::UNIX->new(
313 Type => SOCK_STREAM(),
314 Peer => "/run/watchdog-mux.sock") ||
315 die "unable to open watchdog socket - $!\n";
316
76737af5 317 $self->log('info', "watchdog active");
714a4016
DM
318}
319
320sub watchdog_update {
321 my ($self, $wfh) = @_;
322
76737af5
DM
323 my $res = $watchdog_fh->syswrite("\0", 1);
324 if (!defined($res)) {
325 $self->log('err', "watchdog update failed - $!\n");
326 return 0;
327 }
328 if ($res != 1) {
329 $self->log('err', "watchdog update failed - write $res bytes\n");
330 return 0;
331 }
332
333 return 1;
714a4016
DM
334}
335
336sub watchdog_close {
337 my ($self, $wfh) = @_;
338
76737af5
DM
339 $watchdog_fh->syswrite("V", 1); # magic watchdog close
340 if (!$watchdog_fh->close()) {
341 $self->log('err', "watchdog close failed - $!");
342 } else {
343 $watchdog_fh = undef;
344 $self->log('info', "watchdog closed (disabled)");
345 }
714a4016
DM
346}
347
022e4e79
DM
348sub upid_wait {
349 my ($self, $upid) = @_;
350
351 my $task = PVE::Tools::upid_decode($upid);
352
353 CORE::sleep(1);
354 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
355 $self->log('debug', "Task still active, waiting");
356 CORE::sleep(1);
357 }
358}
359
0d1d32fb
DM
360sub can_fork {
361 my ($self) = @_;
362
363 return 1;
364}
365
c4a221bc 366sub exec_resource_agent {
6dbf93a0 367 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 368
022e4e79
DM
369 # setup execution environment
370
371 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
372
373 PVE::INotify::inotify_close();
374
375 PVE::INotify::inotify_init();
376
377 PVE::Cluster::cfs_update();
378
379 my $nodename = $self->{nodename};
380
381 # fixme: return valid_exit code (instead of using die) ?
382
6ca2edcd 383 my (undef, $service_type, $service_name) = PVE::HA::Tools::parse_sid($sid);
b47a7a1b 384
303a08aa
TL
385 my $plugin = PVE::HA::Resources->lookup($service_type);
386 die "service type '$service_type' not implemented" if !$plugin;
387
388 # fixme: return valid_exit code
389 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
022e4e79 390
b47a7a1b 391 my $vmid = $service_name;
022e4e79 392
303a08aa 393 my $running = $plugin->check_running($vmid);
022e4e79 394
303a08aa 395 if ($cmd eq 'started') {
022e4e79 396
022e4e79
DM
397 return 0 if $running;
398
399 $self->log("info", "starting service $sid");
400
303a08aa
TL
401 my $params = {
402 node => $nodename,
403 vmid => $vmid
404 };
405
406 $plugin->start($self, $params);
022e4e79 407
303a08aa 408 $running = $plugin->check_running($vmid);
022e4e79
DM
409
410 if ($running) {
411 $self->log("info", "service status $sid started");
412 return 0;
413 } else {
4877c031 414 $self->log("warning", "unable to start service $sid");
022e4e79
DM
415 return 1;
416 }
417
418 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
419
022e4e79
DM
420 return 0 if !$running;
421
422 $self->log("info", "stopping service $sid");
423
424 my $timeout = 60; # fixme: make this configurable
303a08aa
TL
425
426 my $params = {
427 node => $nodename,
428 vmid => $vmid,
022e4e79
DM
429 timeout => $timeout,
430 forceStop => 1,
431 };
432
303a08aa 433 $plugin->shutdown($self, $params);
022e4e79 434
303a08aa 435 $running = $plugin->check_running($vmid);
022e4e79
DM
436
437 if (!$running) {
438 $self->log("info", "service status $sid stopped");
439 return 0;
440 } else {
441 return 1;
442 }
443
444 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
445
6da27e23
DM
446 my $target = $params[0];
447 die "$cmd '$sid' failed - missing target\n" if !defined($target);
e319b50c 448
6da27e23
DM
449 if ($service_config->{node} eq $target) {
450 # already there
451 return 0;
303a08aa 452 }
e319b50c 453
303a08aa
TL
454 # we always do (live) migration
455 my $params = {
456 node => $nodename,
457 vmid => $vmid,
458 target => $target,
459 online => 1,
460 };
e319b50c 461
303a08aa 462 my $oldconfig = $plugin->config_file($vmid, $nodename);
e319b50c 463
303a08aa 464 $plugin->migrate($self, $params);
e319b50c 465
303a08aa
TL
466 # something went wrong if old config file is still there
467 if (-f $oldconfig) {
468 $self->log("err", "service $sid not moved (migration error)");
469 return 1;
6da27e23 470 }
303a08aa
TL
471
472 return 0;
473
a2881965
TL
474 } elsif ($cmd eq 'error') {
475
476
477 if($running) {
478 $self->log("err", "service $sid is in an error state while running");
479 } else {
4877c031 480 $self->log("warning", "service $sid is not running and in an error state");
a2881965
TL
481 }
482 return 0;
483
022e4e79
DM
484 }
485
486 die "implement me (cmd '$cmd')";
c4a221bc
DM
487}
488
714a4016 4891;