]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Env/PVE2.pm
improve API for resources
[pve-ha-manager.git] / src / PVE / HA / Env / PVE2.pm
1 package PVE::HA::Env::PVE2;
2
3 use strict;
4 use warnings;
5 use POSIX qw(:errno_h :fcntl_h);
6 use IO::File;
7 use IO::Socket::UNIX;
8
9 use PVE::SafeSyslog;
10 use PVE::Tools;
11 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_lock_file);
12 use PVE::INotify;
13 use PVE::RPCEnvironment;
14
15 use PVE::HA::Tools;
16 use PVE::HA::Env;
17 use PVE::HA::Config;
18
19 use PVE::QemuServer;
20 use PVE::API2::Qemu;
21
22 my $lockdir = "/etc/pve/priv/lock";
23
24 my $manager_status_filename = "/etc/pve/ha/manager_status";
25 my $ha_groups_config = "/etc/pve/ha/groups.cfg";
26 my $ha_resources_config = "/etc/pve/ha/resources.cfg";
27
28 # fixme:
29 #cfs_register_file($ha_groups_config,
30 # sub { PVE::HA::Groups->parse_config(@_); },
31 # sub { PVE::HA::Groups->write_config(@_); });
32 #cfs_register_file($ha_resources_config,
33 # sub { PVE::HA::Resources->parse_config(@_); },
34 # sub { PVE::HA::Resources->write_config(@_); });
35
36 sub read_resources_config {
37 my $raw = '';
38
39 $raw = PVE::Tools::file_get_contents($ha_resources_config)
40 if -f $ha_resources_config;
41
42 return PVE::HA::Config::parse_resources_config($ha_resources_config, $raw);
43 }
44
45 sub write_resources_config {
46 my ($cfg) = @_;
47
48 my $raw = PVE::HA::Resources->write_config($ha_resources_config, $cfg);
49 PVE::Tools::file_set_contents($ha_resources_config, $raw);
50 }
51
52 sub lock_ha_config {
53 my ($code, $errmsg) = @_;
54
55 # fixme: do not use cfs_lock_storage (replace with cfs_lock_ha)
56 my $res = PVE::Cluster::cfs_lock_storage("_ha_crm_commands", undef, $code);
57 my $err = $@;
58 if ($err) {
59 $errmsg ? die "$errmsg: $err" : die $err;
60 }
61 return $res;
62 }
63
64 sub new {
65 my ($this, $nodename) = @_;
66
67 die "missing nodename" if !$nodename;
68
69 my $class = ref($this) || $this;
70
71 my $self = bless {}, $class;
72
73 $self->{nodename} = $nodename;
74
75 return $self;
76 }
77
78 sub nodename {
79 my ($self) = @_;
80
81 return $self->{nodename};
82 }
83
84 sub read_manager_status {
85 my ($self) = @_;
86
87 my $filename = $manager_status_filename;
88
89 return PVE::HA::Tools::read_json_from_file($filename, {});
90 }
91
92 sub write_manager_status {
93 my ($self, $status_obj) = @_;
94
95 my $filename = $manager_status_filename;
96
97 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
98 }
99
100 sub read_lrm_status {
101 my ($self, $node) = @_;
102
103 $node = $self->{nodename} if !defined($node);
104
105 my $filename = "/etc/pve/nodes/$node/lrm_status";
106
107 return PVE::HA::Tools::read_json_from_file($filename, {});
108 }
109
110 sub write_lrm_status {
111 my ($self, $status_obj) = @_;
112
113 my $node = $self->{nodename};
114
115 my $filename = "/etc/pve/nodes/$node/lrm_status";
116
117 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
118 }
119
120 sub manager_status_exists {
121 my ($self) = @_;
122
123 return -f $manager_status_filename ? 1 : 0;
124 }
125
126 sub read_service_config {
127 my ($self) = @_;
128
129 my $res = read_resources_config();
130
131 my $vmlist = PVE::Cluster::get_vmlist();
132 my $conf = {};
133
134 foreach my $sid (keys %{$res->{ids}}) {
135 my $d = $res->{ids}->{$sid};
136 $d->{state} = 'enabled' if !defined($d->{state});
137 if ($d->{type} eq 'pvevm') {
138 if (my $vmd = $vmlist->{ids}->{$d->{name}}) {
139 if (!$vmd) {
140 warn "no such VM '$d->{name}'\n";
141 } else {
142 $d->{node} = $vmd->{node};
143 $conf->{$sid} = $d;
144 }
145 } else {
146 if (defined($d->{node})) {
147 $conf->{$sid} = $d;
148 } else {
149 warn "service '$sid' without node\n";
150 }
151 }
152 }
153 }
154
155 return $conf;
156 }
157
158 sub change_service_location {
159 my ($self, $sid, $node) = @_;
160
161 die "implement me";
162 }
163
164 sub read_group_config {
165 my ($self) = @_;
166
167 # fixme: use cfs_read_file
168
169 my $raw = '';
170
171 $raw = PVE::Tools::file_get_contents($ha_groups_config)
172 if -f $ha_groups_config;
173
174 return PVE::HA::Config::parse_groups_config($ha_groups_config, $raw);
175 }
176
177 sub queue_crm_commands {
178 my ($self, $cmd) = @_;
179
180 chomp $cmd;
181
182 my $code = sub {
183 my $data = '';
184 my $filename = "/etc/pve/ha/crm_commands";
185 if (-f $filename) {
186 $data = PVE::Tools::file_get_contents($filename);
187 }
188 $data .= "$cmd\n";
189 PVE::Tools::file_set_contents($filename, $data);
190 };
191
192 return lock_ha_config($code);
193 }
194
195 sub read_crm_commands {
196 my ($self) = @_;
197
198 my $code = sub {
199 my $data = '';
200
201 my $filename = "/etc/pve/ha/crm_commands";
202 if (-f $filename) {
203 $data = PVE::Tools::file_get_contents($filename);
204 PVE::Tools::file_set_contents($filename, '');
205 }
206
207 return $data;
208 };
209
210 return lock_ha_config($code);
211 }
212
213 # this should return a hash containing info
214 # what nodes are members and online.
215 sub get_node_info {
216 my ($self) = @_;
217
218 my ($node_info, $quorate) = ({}, 0);
219
220 my $nodename = $self->{nodename};
221
222 $quorate = PVE::Cluster::check_cfs_quorum(1) || 0;
223
224 my $members = PVE::Cluster::get_members();
225
226 foreach my $node (keys %$members) {
227 my $d = $members->{$node};
228 $node_info->{$node}->{online} = $d->{online};
229 }
230
231 $node_info->{$nodename}->{online} = 1; # local node is always up
232
233 return ($node_info, $quorate);
234 }
235
236 sub log {
237 my ($self, $level, $msg) = @_;
238
239 chomp $msg;
240
241 syslog($level, $msg);
242 }
243
244 my $last_lock_status = {};
245
246 sub get_pve_lock {
247 my ($self, $lockid) = @_;
248
249 my $got_lock = 0;
250
251 my $filename = "$lockdir/$lockid";
252
253 my $last = $last_lock_status->{$lockid} || 0;
254
255 my $ctime = time();
256
257 eval {
258
259 mkdir $lockdir;
260
261 # pve cluster filesystem not online
262 die "can't create '$lockdir' (pmxcfs not mounted?)\n" if ! -d $lockdir;
263
264 if ($last && (($ctime - $last) < 100)) { # fixme: what timeout
265 utime(0, $ctime, $filename) || # cfs lock update request
266 die "cfs lock update failed - $!\n";
267 } else {
268
269 # fixme: wait some time?
270 if (!(mkdir $filename)) {
271 utime 0, 0, $filename; # cfs unlock request
272 die "can't get cfs lock\n";
273 }
274 }
275
276 $got_lock = 1;
277 };
278
279 my $err = $@;
280
281 $last_lock_status->{$lockid} = $got_lock ? $ctime : 0;
282
283 if (!!$got_lock != !!$last) {
284 if ($got_lock) {
285 $self->log('info', "successfully aquired lock '$lockid'");
286 } else {
287 my $msg = "lost lock '$lockid";
288 $msg .= " - $err" if $err;
289 $self->log('err', $msg);
290 }
291 }
292
293 return $got_lock;
294 }
295
296 sub get_ha_manager_lock {
297 my ($self) = @_;
298
299 return $self->get_pve_lock("ha_manager_lock");
300 }
301
302 sub get_ha_agent_lock {
303 my ($self) = @_;
304
305 my $node = $self->nodename();
306
307 return $self->get_pve_lock("ha_agent_${node}_lock");
308 }
309
310 sub test_ha_agent_lock {
311 my ($self, $node) = @_;
312
313 my $lockid = "ha_agent_${node}_lock";
314 my $filename = "$lockdir/$lockid";
315 my $res = $self->get_pve_lock($lockid);
316 rmdir $filename if $res; # cfs unlock
317
318 return $res;
319 }
320
321 sub quorate {
322 my ($self) = @_;
323
324 my $quorate = 0;
325 eval {
326 $quorate = PVE::Cluster::check_cfs_quorum();
327 };
328
329 return $quorate;
330 }
331
332 sub get_time {
333 my ($self) = @_;
334
335 return time();
336 }
337
338 sub sleep {
339 my ($self, $delay) = @_;
340
341 CORE::sleep($delay);
342 }
343
344 sub sleep_until {
345 my ($self, $end_time) = @_;
346
347 for (;;) {
348 my $cur_time = time();
349
350 last if $cur_time >= $end_time;
351
352 $self->sleep(1);
353 }
354 }
355
356 sub loop_start_hook {
357 my ($self) = @_;
358
359 PVE::Cluster::cfs_update();
360
361 $self->{loop_start} = $self->get_time();
362 }
363
364 sub loop_end_hook {
365 my ($self) = @_;
366
367 my $delay = $self->get_time() - $self->{loop_start};
368
369 warn "loop take too long ($delay seconds)\n" if $delay > 30;
370 }
371
372 my $watchdog_fh;
373
374 sub watchdog_open {
375 my ($self) = @_;
376
377 die "watchdog already open\n" if defined($watchdog_fh);
378
379 $watchdog_fh = IO::Socket::UNIX->new(
380 Type => SOCK_STREAM(),
381 Peer => "/run/watchdog-mux.sock") ||
382 die "unable to open watchdog socket - $!\n";
383
384 $self->log('info', "watchdog active");
385 }
386
387 sub watchdog_update {
388 my ($self, $wfh) = @_;
389
390 my $res = $watchdog_fh->syswrite("\0", 1);
391 if (!defined($res)) {
392 $self->log('err', "watchdog update failed - $!\n");
393 return 0;
394 }
395 if ($res != 1) {
396 $self->log('err', "watchdog update failed - write $res bytes\n");
397 return 0;
398 }
399
400 return 1;
401 }
402
403 sub watchdog_close {
404 my ($self, $wfh) = @_;
405
406 $watchdog_fh->syswrite("V", 1); # magic watchdog close
407 if (!$watchdog_fh->close()) {
408 $self->log('err', "watchdog close failed - $!");
409 } else {
410 $watchdog_fh = undef;
411 $self->log('info', "watchdog closed (disabled)");
412 }
413 }
414
415 sub upid_wait {
416 my ($self, $upid) = @_;
417
418 my $task = PVE::Tools::upid_decode($upid);
419
420 CORE::sleep(1);
421 while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
422 $self->log('debug', "Task still active, waiting");
423 CORE::sleep(1);
424 }
425 }
426
427 sub exec_resource_agent {
428 my ($self, $sid, $service_config, $cmd, @params) = @_;
429
430 # setup execution environment
431
432 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
433
434 PVE::INotify::inotify_close();
435
436 PVE::INotify::inotify_init();
437
438 PVE::Cluster::cfs_update();
439
440 my $nodename = $self->{nodename};
441
442 # fixme: return valid_exit code (instead of using die) ?
443
444 my $service_type = $service_config->{type};
445
446 die "service type '$service_type'not implemented" if $service_type ne 'pvevm';
447
448 my $vmid = $service_config->{name};
449
450 my $running = PVE::QemuServer::check_running($vmid, 1);
451
452 if ($cmd eq 'started') {
453
454 # fixme: return valid_exit code
455 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
456
457 # fixme: count failures
458
459 return 0 if $running;
460
461 $self->log("info", "starting service $sid");
462
463 my $upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $vmid});
464 $self->upid_wait($upid);
465
466 $running = PVE::QemuServer::check_running($vmid, 1);
467
468 if ($running) {
469 $self->log("info", "service status $sid started");
470 return 0;
471 } else {
472 $self->log("info", "unable to start service $sid");
473 return 1;
474 }
475
476 } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') {
477
478 # fixme: return valid_exit code
479 die "service '$sid' not on this node" if $service_config->{node} ne $nodename;
480
481 return 0 if !$running;
482
483 $self->log("info", "stopping service $sid");
484
485 my $timeout = 60; # fixme: make this configurable
486
487 my $param = {
488 node => $nodename,
489 vmid => $vmid,
490 timeout => $timeout,
491 forceStop => 1,
492 };
493
494 my $upid = PVE::API2::Qemu->vm_shutdown($param);
495 $self->upid_wait($upid);
496
497 $running = PVE::QemuServer::check_running($vmid, 1);
498
499 if (!$running) {
500 $self->log("info", "service status $sid stopped");
501 return 0;
502 } else {
503 return 1;
504 }
505
506 } elsif ($cmd eq 'migrate' || $cmd eq 'relocate') {
507
508 # implement me
509
510 }
511
512 die "implement me (cmd '$cmd')";
513 }
514
515 1;