]> git.proxmox.com Git - pve-ha-manager.git/blame - src/PVE/HA/Env/PVE2.pm
fix clean target
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
CommitLineData
714a4016
DM
1package PVE::HA::Env::PVE2;
2
3use strict;
4use warnings;
76737af5
DM
5use POSIX qw(:errno_h :fcntl_h);
6use IO::File;
115805fd 7use IO::Socket::UNIX;
714a4016
DM
8
9use PVE::SafeSyslog;
10use PVE::Tools;
abc920b4 11use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_lock_file);
022e4e79
DM
12use PVE::INotify;
13use PVE::RPCEnvironment;
714a4016
DM
14
15use PVE::HA::Tools;
16use PVE::HA::Env;
ce216792 17use PVE::HA::Config;
714a4016 18
022e4e79
DM
19use PVE::QemuServer;
20use PVE::API2::Qemu;
21
007fcc8b
DM
22my $lockdir = "/etc/pve/priv/lock";
23
ce216792 24my $manager_status_filename = "/etc/pve/ha/manager_status";
7a19642e
DM
25my $ha_groups_config = "/etc/pve/ha/groups.cfg";
26my $ha_resources_config = "/etc/pve/ha/resources.cfg";
abc920b4 27
6cbcb5f7
DM
28#cfs_register_file($ha_groups_config,
29# sub { PVE::HA::Groups->parse_config(@_); },
30# sub { PVE::HA::Groups->write_config(@_); });
714a4016
DM
31
32sub new {
33 my ($this, $nodename) = @_;
34
35 die "missing nodename" if !$nodename;
36
37 my $class = ref($this) || $this;
38
39 my $self = bless {}, $class;
40
41 $self->{nodename} = $nodename;
42
43 return $self;
44}
45
46sub nodename {
47 my ($self) = @_;
48
49 return $self->{nodename};
50}
51
52sub read_manager_status {
53 my ($self) = @_;
54
55 my $filename = $manager_status_filename;
56
57 return PVE::HA::Tools::read_json_from_file($filename, {});
58}
59
60sub write_manager_status {
61 my ($self, $status_obj) = @_;
62
63 my $filename = $manager_status_filename;
64
65 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
66}
67
c4a221bc
DM
68sub read_lrm_status {
69 my ($self, $node) = @_;
70
71 $node = $self->{nodename} if !defined($node);
72
73 my $filename = "/etc/pve/nodes/$node/lrm_status";
74
75 return PVE::HA::Tools::read_json_from_file($filename, {});
76}
77
78sub write_lrm_status {
79 my ($self, $status_obj) = @_;
80
6cbcb5f7 81 my $node = $self->{nodename};
c4a221bc
DM
82
83 my $filename = "/etc/pve/nodes/$node/lrm_status";
84
85 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
86}
87
714a4016
DM
88sub manager_status_exists {
89 my ($self) = @_;
90
91 return -f $manager_status_filename ? 1 : 0;
92}
93
94sub read_service_config {
95 my ($self) = @_;
96
ce216792
DM
97 # fixme: use cfs_read_file
98
99 my $raw = '';
100
101 $raw = PVE::Tools::file_get_contents($ha_resources_config)
102 if -f $ha_resources_config;
103
104 my $res = PVE::HA::Config::parse_resources_config($ha_resources_config, $raw);
105
106 my $vmlist = PVE::Cluster::get_vmlist();
107 my $conf = {};
108
109 foreach my $sid (keys %{$res->{ids}}) {
110 my $d = $res->{ids}->{$sid};
7a19642e 111 $d->{state} = 'enabled' if !defined($d->{state});
ce216792
DM
112 if ($d->{type} eq 'pvevm') {
113 if (my $vmd = $vmlist->{ids}->{$d->{name}}) {
114 if (!$vmd) {
115 warn "no such VM '$d->{name}'\n";
116 } else {
117 $d->{node} = $vmd->{node};
118 $conf->{$sid} = $d;
119 }
120 } else {
121 if (defined($d->{node})) {
122 $conf->{$sid} = $d;
123 } else {
124 warn "service '$sid' without node\n";
125 }
126 }
127 }
128 }
129
130 return $conf;
714a4016
DM
131}
132
8456bde2
DM
133sub change_service_location {
134 my ($self, $sid, $node) = @_;
135
136 die "implement me";
137}
138
abc920b4
DM
139sub read_group_config {
140 my ($self) = @_;
141
ce216792
DM
142 # fixme: use cfs_read_file
143
144 my $raw = '';
145
146 $raw = PVE::Tools::file_get_contents($ha_groups_config)
147 if -f $ha_groups_config;
148
149 return PVE::HA::Config::parse_groups_config($ha_groups_config, $raw);
abc920b4
DM
150}
151
3b996922
DM
152sub queue_crm_commands {
153 my ($self, $cmd) = @_;
154
ce216792
DM
155 chomp $cmd;
156
157 my $code = sub {
158 my $data = '';
159 my $filename = "/etc/pve/ha/crm_commands";
160 if (-f $filename) {
161 $data = PVE::Tools::file_get_contents($filename);
162 }
163 $data .= "$cmd\n";
164 PVE::Tools::file_set_contents($filename, $data);
165 };
166
167 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
168 my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
169 die $@ if $@;
170 return $res;
3b996922
DM
171}
172
173sub read_crm_commands {
174 my ($self) = @_;
175
ce216792
DM
176 my $code = sub {
177 my $data = '';
178
179 my $filename = "/etc/pve/ha/crm_commands";
180 if (-f $filename) {
181 $data = PVE::Tools::file_get_contents($filename);
182 PVE::Tools::file_set_contents($filename, '');
183 }
184
185 return $data;
186 };
187
188 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
189 my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
190 die $@ if $@;
191 return $res;
3b996922
DM
192}
193
714a4016
DM
194# this should return a hash containing info
195# what nodes are members and online.
196sub get_node_info {
197 my ($self) = @_;
198
d706ef8b
DM
199 my ($node_info, $quorate) = ({}, 0);
200
201 my $nodename = $self->{nodename};
202
203 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
204
205 my $members = PVE::Cluster::get_members();
206
207 foreach my $node (keys %$members) {
208 my $d = $members->{$node};
209 $node_info->{$node}->{online} = $d->{online};
210 }
211
212 $node_info->{$nodename}->{online} = 1; # local node is always up
213
214 return ($node_info, $quorate);
714a4016
DM
215}
216
217sub log {
218 my ($self, $level, $msg) = @_;
219
220 chomp $msg;
221
222 syslog($level, $msg);
223}
224
007fcc8b
DM
225my $last_lock_status = {};
226
227sub get_pve_lock {
228 my ($self, $lockid) = @_;
714a4016 229
007fcc8b 230 my $got_lock = 0;
4d24e7db 231
4d24e7db
DM
232 my $filename = "$lockdir/$lockid";
233
007fcc8b
DM
234 my $last = $last_lock_status->{$lockid} || 0;
235
236 my $ctime = time();
4d24e7db
DM
237
238 eval {
239
240 mkdir $lockdir;
241
007fcc8b
DM
242 # pve cluster filesystem not online
243 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
244
245 if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
246 utime(0, $ctime, $filename) || # cfs lock update request
247 die "cfs lock update failed - $!\n";
248 } else {
249
250 # fixme: wait some time?
251 if (!(mkdir $filename)) {
252 utime 0, 0, $filename; # cfs unlock request
253 die "can't get cfs lock\n";
254 }
255 }
4d24e7db 256
007fcc8b 257 $got_lock = 1;
4d24e7db
DM
258 };
259
007fcc8b
DM
260 my $err = $@;
261
262 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
263
17e90af6 264 if (!!$got_lock != !!$last) {
007fcc8b
DM
265 if ($got_lock) {
266 $self->log('info', "successfully aquired lock '$lockid'");
267 } else {
268 my $msg = "lost lock '$lockid";
269 $msg .= " - $err" if $err;
270 $self->log('err', $msg);
271 }
272 }
273
274 return $got_lock;
275}
276
277sub get_ha_manager_lock {
278 my ($self) = @_;
279
007fcc8b 280 return $self->get_pve_lock("ha_manager_lock");
714a4016
DM
281}
282
283sub get_ha_agent_lock {
284 my ($self) = @_;
007fcc8b
DM
285
286 my $node = $self->nodename();
714a4016 287
007fcc8b 288 return $self->get_pve_lock("ha_agent_${node}_lock");
714a4016
DM
289}
290
291sub test_ha_agent_lock {
292 my ($self, $node) = @_;
007fcc8b
DM
293
294 my $lockid = "ha_agent_${node}_lock";
295 my $filename = "$lockdir/$lockid";
296 my $res = $self->get_pve_lock($lockid);
297 rmdir $filename if $res; # cfs unlock
714a4016 298
007fcc8b 299 return $res;
714a4016
DM
300}
301
302sub quorate {
303 my ($self) = @_;
304
4d24e7db
DM
305 my $quorate = 0;
306 eval {
307 $quorate = PVE::Cluster::check_cfs_quorum();
308 };
309
310 return $quorate;
714a4016
DM
311}
312
313sub get_time {
314 my ($self) = @_;
315
316 return time();
317}
318
319sub sleep {
320 my ($self, $delay) = @_;
321
322 CORE::sleep($delay);
323}
324
325sub sleep_until {
326 my ($self, $end_time) = @_;
327
328 for (;;) {
329 my $cur_time = time();
330
331 last if $cur_time >= $end_time;
332
333 $self->sleep(1);
334 }
335}
336
337sub loop_start_hook {
338 my ($self) = @_;
339
4d24e7db
DM
340 PVE::Cluster::cfs_update();
341
714a4016
DM
342 $self->{loop_start} = $self->get_time();
343}
344
345sub loop_end_hook {
346 my ($self) = @_;
347
348 my $delay = $self->get_time() - $self->{loop_start};
349
350 warn "loop take too long ($delay seconds)\n" if $delay > 30;
351}
352
76737af5
DM
353my $watchdog_fh;
354
714a4016
DM
355sub watchdog_open {
356 my ($self) = @_;
357
76737af5
DM
358 die "watchdog already open\n" if defined($watchdog_fh);
359
115805fd
DM
360 $watchdog_fh = IO::Socket::UNIX->new(
361 Type => SOCK_STREAM(),
362 Peer => "/run/watchdog-mux.sock") ||
363 die "unable to open watchdog socket - $!\n";
364
76737af5 365 $self->log('info', "watchdog active");
714a4016
DM
366}
367
368sub watchdog_update {
369 my ($self, $wfh) = @_;
370
76737af5
DM
371 my $res = $watchdog_fh->syswrite("\0", 1);
372 if (!defined($res)) {
373 $self->log('err', "watchdog update failed - $!\n");
374 return 0;
375 }
376 if ($res != 1) {
377 $self->log('err', "watchdog update failed - write $res bytes\n");
378 return 0;
379 }
380
381 return 1;
714a4016
DM
382}
383
384sub watchdog_close {
385 my ($self, $wfh) = @_;
386
76737af5
DM
387 $watchdog_fh->syswrite("V", 1); # magic watchdog close
388 if (!$watchdog_fh->close()) {
389 $self->log('err', "watchdog close failed - $!");
390 } else {
391 $watchdog_fh = undef;
392 $self->log('info', "watchdog closed (disabled)");
393 }
714a4016
DM
394}
395
022e4e79
DM
396sub upid_wait {
397 my ($self, $upid) = @_;
398
399 my $task = PVE::Tools::upid_decode($upid);
400
401 CORE::sleep(1);
402 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
403 $self->log('debug', "Task still active, waiting");
404 CORE::sleep(1);
405 }
406}
407
c4a221bc 408sub exec_resource_agent {
6dbf93a0 409 my ($self, $sid, $service_config, $cmd, @params) = @_;
c4a221bc 410
022e4e79
DM
411 # setup execution environment
412
413 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
414
415 PVE::INotify::inotify_close();
416
417 PVE::INotify::inotify_init();
418
419 PVE::Cluster::cfs_update();
420
421 my $nodename = $self->{nodename};
422
423 # fixme: return valid_exit code (instead of using die) ?
424
425 my $service_type = $service_config->{type};
426
427 die "service type '$service_type'not implemented" if $service_type ne 'pvevm';
428
429 my $vmid = $service_config->{name};
430
431 my $running = PVE::QemuServer::check_running($vmid, 1);
432
433 if ($cmd eq 'started') {
434
435 # fixme: return valid_exit code
436 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
437
438 # fixme: count failures
439
440 return 0 if $running;
441
442 $self->log("info", "starting service $sid");
443
444 my $upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $vmid});
445 $self->upid_wait($upid);
446
447 $running = PVE::QemuServer::check_running($vmid, 1);
448
449 if ($running) {
450 $self->log("info", "service status $sid started");
451 return 0;
452 } else {
453 $self->log("info", "unable to start service $sid");
454 return 1;
455 }
456
457 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
458
459 # fixme: return valid_exit code
460 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
461
462 return 0 if !$running;
463
464 $self->log("info", "stopping service $sid");
465
466 my $timeout = 60; # fixme: make this configurable
467
468 my $param = {
469 node => $nodename,
470 vmid => $vmid,
471 timeout => $timeout,
472 forceStop => 1,
473 };
474
475 my $upid = PVE::API2::Qemu->vm_shutdown($param);
476 $self->upid_wait($upid);
477
478 $running = PVE::QemuServer::check_running($vmid, 1);
479
480 if (!$running) {
481 $self->log("info", "service status $sid stopped");
482 return 0;
483 } else {
484 return 1;
485 }
486
487 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
488
489 # implement me
490
491 }
492
493 die "implement me (cmd '$cmd')";
c4a221bc
DM
494}
495
714a4016 4961;