]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/Cluster.pm
grammar fix: s/does not exists/does not exist/g
[pve-cluster.git] / data / PVE / Cluster.pm
1 package PVE::Cluster;
2
3 use strict;
4 use warnings;
5
6 use Encode;
7 use File::stat qw();
8 use JSON;
9 use Net::SSLeay;
10 use POSIX qw(ENOENT);
11 use Socket;
12 use Storable qw(dclone);
13
14 use PVE::Certificate;
15 use PVE::INotify;
16 use PVE::IPCC;
17 use PVE::JSONSchema;
18 use PVE::Network;
19 use PVE::SafeSyslog;
20 use PVE::Tools qw(run_command);
21
22 use PVE::Cluster::IPCConst;
23
24 use base 'Exporter';
25
26 our @EXPORT_OK = qw(
27 cfs_read_file
28 cfs_write_file
29 cfs_register_file
30 cfs_lock_file);
31
32 # x509 certificate utils
33
34 my $basedir = "/etc/pve";
35 my $authdir = "$basedir/priv";
36 my $lockdir = "/etc/pve/priv/lock";
37
38 # cfs and corosync files
39 my $dbfile = "/var/lib/pve-cluster/config.db";
40 my $dbbackupdir = "/var/lib/pve-cluster/backup";
41
42 # this is just a readonly copy, the relevant one is in status.c from pmxcfs
43 # observed files are the one we can get directly through IPCC, they are cached
44 # using a computed version and only those can be used by the cfs_*_file methods
45 my $observed = {
46 'vzdump.cron' => 1,
47 'storage.cfg' => 1,
48 'datacenter.cfg' => 1,
49 'replication.cfg' => 1,
50 'corosync.conf' => 1,
51 'corosync.conf.new' => 1,
52 'user.cfg' => 1,
53 'domains.cfg' => 1,
54 'priv/shadow.cfg' => 1,
55 'priv/tfa.cfg' => 1,
56 '/qemu-server/' => 1,
57 '/openvz/' => 1,
58 '/lxc/' => 1,
59 'ha/crm_commands' => 1,
60 'ha/manager_status' => 1,
61 'ha/resources.cfg' => 1,
62 'ha/groups.cfg' => 1,
63 'ha/fence.cfg' => 1,
64 'status.cfg' => 1,
65 'ceph.conf' => 1,
66 'sdn/vnets.cfg' => 1,
67 'sdn/vnets.cfg.new' => 1,
68 'sdn/zones.cfg' => 1,
69 'sdn/zones.cfg.new' => 1,
70 'sdn/controllers.cfg' => 1,
71 'sdn/controllers.cfg.new' => 1,
72
73 };
74
75 sub base_dir {
76 return $basedir;
77 }
78
79 sub auth_dir {
80 return $authdir;
81 }
82
83 sub check_cfs_quorum {
84 my ($noerr) = @_;
85
86 # note: -w filename always return 1 for root, so wee need
87 # to use File::lstat here
88 my $st = File::stat::lstat("$basedir/local");
89 my $quorate = ($st && (($st->mode & 0200) != 0));
90
91 die "cluster not ready - no quorum?\n" if !$quorate && !$noerr;
92
93 return $quorate;
94 }
95
96 sub check_cfs_is_mounted {
97 my ($noerr) = @_;
98
99 my $res = -l "$basedir/local";
100
101 die "pve configuration filesystem not mounted\n"
102 if !$res && !$noerr;
103
104 return $res;
105 }
106
107 my $versions = {};
108 my $vmlist = {};
109 my $clinfo = {};
110
111 my $ipcc_send_rec = sub {
112 my ($msgid, $data) = @_;
113
114 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
115
116 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
117
118 return $res;
119 };
120
121 my $ipcc_send_rec_json = sub {
122 my ($msgid, $data) = @_;
123
124 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
125
126 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
127
128 return decode_json($res);
129 };
130
131 my $ipcc_get_config = sub {
132 my ($path) = @_;
133
134 my $bindata = pack "Z*", $path;
135 my $res = PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_CONFIG, $bindata);
136 if (!defined($res)) {
137 if ($! != 0) {
138 return undef if $! == ENOENT;
139 die "$!\n";
140 }
141 return '';
142 }
143
144 return $res;
145 };
146
147 my $ipcc_get_status = sub {
148 my ($name, $nodename) = @_;
149
150 my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
151 return PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_STATUS, $bindata);
152 };
153
154 my $ipcc_remove_status = sub {
155 my ($name) = @_;
156 # we just omit the data payload, pmxcfs takes this as hint and removes this
157 # key from the status hashtable
158 my $bindata = pack "Z[256]", $name;
159 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
160 };
161
162 my $ipcc_update_status = sub {
163 my ($name, $data) = @_;
164
165 my $raw = ref($data) ? encode_json($data) : $data;
166 # update status
167 my $bindata = pack "Z[256]Z*", $name, $raw;
168
169 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
170 };
171
172 my $ipcc_log = sub {
173 my ($priority, $ident, $tag, $msg) = @_;
174
175 my $bindata = pack "CCCZ*Z*Z*", $priority, bytes::length($ident) + 1,
176 bytes::length($tag) + 1, $ident, $tag, $msg;
177
178 return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG, $bindata);
179 };
180
181 my $ipcc_get_cluster_log = sub {
182 my ($user, $max) = @_;
183
184 $max = 0 if !defined($max);
185
186 my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
187 return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG, $bindata);
188 };
189
190 my $ccache = {};
191
192 sub cfs_update {
193 my ($fail) = @_;
194 eval {
195 my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION);
196 die "no starttime\n" if !$res->{starttime};
197
198 if (!$res->{starttime} || !$versions->{starttime} ||
199 $res->{starttime} != $versions->{starttime}) {
200 #print "detected changed starttime\n";
201 $vmlist = {};
202 $clinfo = {};
203 $ccache = {};
204 }
205
206 $versions = $res;
207 };
208 my $err = $@;
209 if ($err) {
210 $versions = {};
211 $vmlist = {};
212 $clinfo = {};
213 $ccache = {};
214 die $err if $fail;
215 warn $err;
216 }
217
218 eval {
219 if (!$clinfo->{version} || $clinfo->{version} != $versions->{clinfo}) {
220 #warn "detected new clinfo\n";
221 $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO);
222 }
223 };
224 $err = $@;
225 if ($err) {
226 $clinfo = {};
227 die $err if $fail;
228 warn $err;
229 }
230
231 eval {
232 if (!$vmlist->{version} || $vmlist->{version} != $versions->{vmlist}) {
233 #warn "detected new vmlist1\n";
234 $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST);
235 }
236 };
237 $err = $@;
238 if ($err) {
239 $vmlist = {};
240 die $err if $fail;
241 warn $err;
242 }
243 }
244
245 sub get_vmlist {
246 return $vmlist;
247 }
248
249 sub get_clinfo {
250 return $clinfo;
251 }
252
253 sub get_members {
254 return $clinfo->{nodelist};
255 }
256
257 sub get_nodelist {
258 my $nodelist = $clinfo->{nodelist};
259
260 my $nodename = PVE::INotify::nodename();
261
262 if (!$nodelist || !$nodelist->{$nodename}) {
263 return [ $nodename ];
264 }
265
266 return [ keys %$nodelist ];
267 }
268
269 # only stored in a in-memory hashtable inside pmxcfs, local data is gone after
270 # a restart (of pmxcfs or the node), peer data is still available then
271 # best used for status data, like running (ceph) services, package versions, ...
272 sub broadcast_node_kv {
273 my ($key, $data) = @_;
274
275 if (!defined($data)) {
276 eval {
277 $ipcc_remove_status->("kv/$key");
278 };
279 } else {
280 die "cannot send a reference\n" if ref($data);
281 my $size = length($data);
282 die "data for '$key' too big\n" if $size >= (32 * 1024); # limit from pmxfs
283
284 eval {
285 $ipcc_update_status->("kv/$key", $data);
286 };
287 }
288
289 warn $@ if $@;
290 }
291
292 # nodename is optional
293 sub get_node_kv {
294 my ($key, $nodename) = @_;
295
296 my $res = {};
297 my $get_node_data = sub {
298 my ($node) = @_;
299 my $raw = $ipcc_get_status->("kv/$key", $node);
300 $res->{$node} = unpack("Z*", $raw) if $raw;
301 };
302
303 if ($nodename) {
304 $get_node_data->($nodename);
305 } else {
306 my $nodelist = get_nodelist();
307
308 foreach my $node (@$nodelist) {
309 $get_node_data->($node);
310 }
311 }
312
313 return $res;
314 }
315
316 # property: a config property you want to get, e.g., this is perfect to get
317 # the 'lock' entry of a guest _fast_ (>100 faster than manual parsing here)
318 # vmid: optipnal, if a valid is passed we only check that one, else return all
319 # NOTE: does *not* searches snapshot and PENDING entries sections!
320 sub get_guest_config_property {
321 my ($property, $vmid) = @_;
322
323 die "property is required" if !defined($property);
324
325 my $bindata = pack "VZ*", $vmid // 0, $property;
326 my $res = $ipcc_send_rec_json->(CFS_IPC_GET_GUEST_CONFIG_PROPERTY, $bindata);
327
328 return $res;
329 }
330
331 # $data must be a chronological descending ordered array of tasks
332 sub broadcast_tasklist {
333 my ($data) = @_;
334
335 # the serialized list may not get bigger than 32kb (CFS_MAX_STATUS_SIZE
336 # from pmxcfs) - drop older items until we satisfy this constraint
337 my $size = length(encode_json($data));
338 while ($size >= (32 * 1024)) {
339 pop @$data;
340 $size = length(encode_json($data));
341 }
342
343 eval {
344 &$ipcc_update_status("tasklist", $data);
345 };
346
347 warn $@ if $@;
348 }
349
350 my $tasklistcache = {};
351
352 sub get_tasklist {
353 my ($nodename) = @_;
354
355 my $kvstore = $versions->{kvstore} || {};
356
357 my $nodelist = get_nodelist();
358
359 my $res = [];
360 foreach my $node (@$nodelist) {
361 next if $nodename && ($nodename ne $node);
362 eval {
363 my $ver = $kvstore->{$node}->{tasklist} if $kvstore->{$node};
364 my $cd = $tasklistcache->{$node};
365 if (!$cd || !$ver || !$cd->{version} ||
366 ($cd->{version} != $ver)) {
367 my $raw = &$ipcc_get_status("tasklist", $node) || '[]';
368 my $data = decode_json($raw);
369 push @$res, @$data;
370 $cd = $tasklistcache->{$node} = {
371 data => $data,
372 version => $ver,
373 };
374 } elsif ($cd && $cd->{data}) {
375 push @$res, @{$cd->{data}};
376 }
377 };
378 my $err = $@;
379 syslog('err', $err) if $err;
380 }
381
382 return $res;
383 }
384
385 sub broadcast_rrd {
386 my ($rrdid, $data) = @_;
387
388 eval {
389 &$ipcc_update_status("rrd/$rrdid", $data);
390 };
391 my $err = $@;
392
393 warn $err if $err;
394 }
395
396 my $last_rrd_dump = 0;
397 my $last_rrd_data = "";
398
399 sub rrd_dump {
400
401 my $ctime = time();
402
403 my $diff = $ctime - $last_rrd_dump;
404 if ($diff < 2) {
405 return $last_rrd_data;
406 }
407
408 my $raw;
409 eval {
410 $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP);
411 };
412 my $err = $@;
413
414 if ($err) {
415 warn $err;
416 return {};
417 }
418
419 my $res = {};
420
421 if ($raw) {
422 while ($raw =~ s/^(.*)\n//) {
423 my ($key, @ela) = split(/:/, $1);
424 next if !$key;
425 next if !(scalar(@ela) > 1);
426 $res->{$key} = [ map { $_ eq 'U' ? undef : $_ } @ela ];
427 }
428 }
429
430 $last_rrd_dump = $ctime;
431 $last_rrd_data = $res;
432
433 return $res;
434 }
435
436
437 # a fast way to read files (avoid fuse overhead)
438 sub get_config {
439 my ($path) = @_;
440
441 return &$ipcc_get_config($path);
442 }
443
444 sub get_cluster_log {
445 my ($user, $max) = @_;
446
447 return &$ipcc_get_cluster_log($user, $max);
448 }
449
450 my $file_info = {};
451
452 sub cfs_register_file {
453 my ($filename, $parser, $writer) = @_;
454
455 $observed->{$filename} || die "unknown file '$filename'";
456
457 die "file '$filename' already registered" if $file_info->{$filename};
458
459 $file_info->{$filename} = {
460 parser => $parser,
461 writer => $writer,
462 };
463 }
464
465 my $ccache_read = sub {
466 my ($filename, $parser, $version) = @_;
467
468 $ccache->{$filename} = {} if !$ccache->{$filename};
469
470 my $ci = $ccache->{$filename};
471
472 if (!$ci->{version} || !$version || $ci->{version} != $version) {
473 # we always call the parser, even when the file does not exist
474 # (in that case $data is undef)
475 my $data = get_config($filename);
476 $ci->{data} = &$parser("/etc/pve/$filename", $data);
477 $ci->{version} = $version;
478 }
479
480 my $res = ref($ci->{data}) ? dclone($ci->{data}) : $ci->{data};
481
482 return $res;
483 };
484
485 sub cfs_file_version {
486 my ($filename) = @_;
487
488 my $version;
489 my $infotag;
490 if ($filename =~ m!^nodes/[^/]+/(openvz|lxc|qemu-server)/(\d+)\.conf$!) {
491 my ($type, $vmid) = ($1, $2);
492 if ($vmlist && $vmlist->{ids} && $vmlist->{ids}->{$vmid}) {
493 $version = $vmlist->{ids}->{$vmid}->{version};
494 }
495 $infotag = "/$type/";
496 } else {
497 $infotag = $filename;
498 $version = $versions->{$filename};
499 }
500
501 my $info = $file_info->{$infotag} ||
502 die "unknown file type '$filename'\n";
503
504 return wantarray ? ($version, $info) : $version;
505 }
506
507 sub cfs_read_file {
508 my ($filename) = @_;
509
510 my ($version, $info) = cfs_file_version($filename);
511 my $parser = $info->{parser};
512
513 return &$ccache_read($filename, $parser, $version);
514 }
515
516 sub cfs_write_file {
517 my ($filename, $data) = @_;
518
519 my ($version, $info) = cfs_file_version($filename);
520
521 my $writer = $info->{writer} || die "no writer defined";
522
523 my $fsname = "/etc/pve/$filename";
524
525 my $raw = &$writer($fsname, $data);
526
527 if (my $ci = $ccache->{$filename}) {
528 $ci->{version} = undef;
529 }
530
531 PVE::Tools::file_set_contents($fsname, $raw);
532 }
533
534 my $cfs_lock = sub {
535 my ($lockid, $timeout, $code, @param) = @_;
536
537 my $prev_alarm = alarm(0); # suspend outer alarm early
538
539 my $res;
540 my $got_lock = 0;
541
542 # this timeout is for acquire the lock
543 $timeout = 10 if !$timeout;
544
545 my $filename = "$lockdir/$lockid";
546
547 eval {
548
549 mkdir $lockdir;
550
551 if (! -d $lockdir) {
552 die "pve cluster filesystem not online.\n";
553 }
554
555 my $timeout_err = sub { die "got lock request timeout\n"; };
556 local $SIG{ALRM} = $timeout_err;
557
558 while (1) {
559 alarm ($timeout);
560 $got_lock = mkdir($filename);
561 $timeout = alarm(0) - 1; # we'll sleep for 1s, see down below
562
563 last if $got_lock;
564
565 $timeout_err->() if $timeout <= 0;
566
567 print STDERR "trying to acquire cfs lock '$lockid' ...\n";
568 utime (0, 0, $filename); # cfs unlock request
569 sleep(1);
570 }
571
572 # fixed command timeout: cfs locks have a timeout of 120
573 # using 60 gives us another 60 seconds to abort the task
574 local $SIG{ALRM} = sub { die "got lock timeout - aborting command\n"; };
575 alarm(60);
576
577 cfs_update(); # make sure we read latest versions inside code()
578
579 $res = &$code(@param);
580
581 alarm(0);
582 };
583
584 my $err = $@;
585
586 $err = "no quorum!\n" if !$got_lock && !check_cfs_quorum(1);
587
588 rmdir $filename if $got_lock; # if we held the lock always unlock again
589
590 alarm($prev_alarm);
591
592 if ($err) {
593 $@ = "error with cfs lock '$lockid': $err";
594 return undef;
595 }
596
597 $@ = undef;
598
599 return $res;
600 };
601
602 sub cfs_lock_file {
603 my ($filename, $timeout, $code, @param) = @_;
604
605 my $info = $observed->{$filename} || die "unknown file '$filename'";
606
607 my $lockid = "file-$filename";
608 $lockid =~ s/[.\/]/_/g;
609
610 &$cfs_lock($lockid, $timeout, $code, @param);
611 }
612
613 sub cfs_lock_storage {
614 my ($storeid, $timeout, $code, @param) = @_;
615
616 my $lockid = "storage-$storeid";
617
618 &$cfs_lock($lockid, $timeout, $code, @param);
619 }
620
621 sub cfs_lock_domain {
622 my ($domainname, $timeout, $code, @param) = @_;
623
624 my $lockid = "domain-$domainname";
625
626 &$cfs_lock($lockid, $timeout, $code, @param);
627 }
628
629 sub cfs_lock_acme {
630 my ($account, $timeout, $code, @param) = @_;
631
632 my $lockid = "acme-$account";
633
634 &$cfs_lock($lockid, $timeout, $code, @param);
635 }
636
637 sub cfs_lock_authkey {
638 my ($timeout, $code, @param) = @_;
639
640 $cfs_lock->('authkey', $timeout, $code, @param);
641 }
642
643 my $log_levels = {
644 "emerg" => 0,
645 "alert" => 1,
646 "crit" => 2,
647 "critical" => 2,
648 "err" => 3,
649 "error" => 3,
650 "warn" => 4,
651 "warning" => 4,
652 "notice" => 5,
653 "info" => 6,
654 "debug" => 7,
655 };
656
657 sub log_msg {
658 my ($priority, $ident, $msg) = @_;
659
660 if (my $tmp = $log_levels->{$priority}) {
661 $priority = $tmp;
662 }
663
664 die "need numeric log priority" if $priority !~ /^\d+$/;
665
666 my $tag = PVE::SafeSyslog::tag();
667
668 $msg = "empty message" if !$msg;
669
670 $ident = "" if !$ident;
671 $ident = encode("ascii", $ident,
672 sub { sprintf "\\u%04x", shift });
673
674 my $ascii = encode("ascii", $msg, sub { sprintf "\\u%04x", shift });
675
676 if ($ident) {
677 syslog($priority, "<%s> %s", $ident, $ascii);
678 } else {
679 syslog($priority, "%s", $ascii);
680 }
681
682 eval { &$ipcc_log($priority, $ident, $tag, $ascii); };
683
684 syslog("err", "writing cluster log failed: $@") if $@;
685 }
686
687 sub check_vmid_unused {
688 my ($vmid, $noerr) = @_;
689
690 my $vmlist = get_vmlist();
691
692 my $d = $vmlist->{ids}->{$vmid};
693 return 1 if !defined($d);
694
695 return undef if $noerr;
696
697 my $vmtypestr = $d->{type} eq 'qemu' ? 'VM' : 'CT';
698 die "$vmtypestr $vmid already exists on node '$d->{node}'\n";
699 }
700
701 sub check_node_exists {
702 my ($nodename, $noerr) = @_;
703
704 my $nodelist = $clinfo->{nodelist};
705 return 1 if $nodelist && $nodelist->{$nodename};
706
707 return undef if $noerr;
708
709 die "no such cluster node '$nodename'\n";
710 }
711
712 # this is also used to get the IP of the local node
713 sub remote_node_ip {
714 my ($nodename, $noerr) = @_;
715
716 my $nodelist = $clinfo->{nodelist};
717 if ($nodelist && $nodelist->{$nodename}) {
718 if (my $ip = $nodelist->{$nodename}->{ip}) {
719 return $ip if !wantarray;
720 my $family = $nodelist->{$nodename}->{address_family};
721 if (!$family) {
722 $nodelist->{$nodename}->{address_family} =
723 $family =
724 PVE::Tools::get_host_address_family($ip);
725 }
726 return wantarray ? ($ip, $family) : $ip;
727 }
728 }
729
730 # fallback: try to get IP by other means
731 return PVE::Network::get_ip_from_hostname($nodename, $noerr);
732 }
733
734 sub get_node_fingerprint {
735 my ($node) = @_;
736
737 my $cert_path = "/etc/pve/nodes/$node/pve-ssl.pem";
738 my $custom_cert_path = "/etc/pve/nodes/$node/pveproxy-ssl.pem";
739
740 $cert_path = $custom_cert_path if -f $custom_cert_path;
741
742 return PVE::Certificate::get_certificate_fingerprint($cert_path);
743 }
744
745 # bash completion helpers
746
747 sub complete_next_vmid {
748
749 my $vmlist = get_vmlist() || {};
750 my $idlist = $vmlist->{ids} || {};
751
752 for (my $i = 100; $i < 10000; $i++) {
753 return [$i] if !defined($idlist->{$i});
754 }
755
756 return [];
757 }
758
759 sub complete_vmid {
760
761 my $vmlist = get_vmlist();
762 my $ids = $vmlist->{ids} || {};
763
764 return [ keys %$ids ];
765 }
766
767 sub complete_local_vmid {
768
769 my $vmlist = get_vmlist();
770 my $ids = $vmlist->{ids} || {};
771
772 my $nodename = PVE::INotify::nodename();
773
774 my $res = [];
775 foreach my $vmid (keys %$ids) {
776 my $d = $ids->{$vmid};
777 next if !$d->{node} || $d->{node} ne $nodename;
778 push @$res, $vmid;
779 }
780
781 return $res;
782 }
783
784 sub complete_migration_target {
785
786 my $res = [];
787
788 my $nodename = PVE::INotify::nodename();
789
790 my $nodelist = get_nodelist();
791 foreach my $node (@$nodelist) {
792 next if $node eq $nodename;
793 push @$res, $node;
794 }
795
796 return $res;
797 }
798
799
800 # NOTE: filesystem must be offline here, no DB changes allowed
801 sub cfs_backup_database {
802 mkdir $dbbackupdir;
803
804 my $ctime = time();
805 my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
806
807 print "backup old database to '$backup_fn'\n";
808
809 my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \ ">${backup_fn}"] ];
810 run_command($cmd, 'errmsg' => "cannot backup old database\n");
811
812 my $maxfiles = 10; # purge older backup
813 my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql.gz> ];
814
815 if ((my $count = scalar(@$backups)) > $maxfiles) {
816 foreach my $f (@$backups[$maxfiles..$count-1]) {
817 next if $f !~ m/^(\S+)$/; # untaint
818 print "delete old backup '$1'\n";
819 unlink $1;
820 }
821 }
822
823 return $dbfile;
824 }
825
826 1;