12 use Storable
qw(dclone);
20 use PVE
::Tools
qw(run_command);
22 use PVE
::Cluster
::IPCConst
;
32 # x509 certificate utils
34 my $basedir = "/etc/pve";
35 my $authdir = "$basedir/priv";
36 my $lockdir = "/etc/pve/priv/lock";
38 # cfs and corosync files
39 my $dbfile = "/var/lib/pve-cluster/config.db";
40 my $dbbackupdir = "/var/lib/pve-cluster/backup";
42 # this is just a readonly copy, the relevant one is in status.c from pmxcfs
43 # observed files are the one we can get directly through IPCC, they are cached
44 # using a computed version and only those can be used by the cfs_*_file methods
48 'datacenter.cfg' => 1,
49 'replication.cfg' => 1,
51 'corosync.conf.new' => 1,
54 'priv/shadow.cfg' => 1,
59 'ha/crm_commands' => 1,
60 'ha/manager_status' => 1,
61 'ha/resources.cfg' => 1,
78 sub check_cfs_quorum
{
81 # note: -w filename always return 1 for root, so wee need
82 # to use File::lstat here
83 my $st = File
::stat::lstat("$basedir/local");
84 my $quorate = ($st && (($st->mode & 0200) != 0));
86 die "cluster not ready - no quorum?\n" if !$quorate && !$noerr;
91 sub check_cfs_is_mounted
{
94 my $res = -l
"$basedir/local";
96 die "pve configuration filesystem not mounted\n"
106 my $ipcc_send_rec = sub {
107 my ($msgid, $data) = @_;
109 my $res = PVE
::IPCC
::ipcc_send_rec
($msgid, $data);
111 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
116 my $ipcc_send_rec_json = sub {
117 my ($msgid, $data) = @_;
119 my $res = PVE
::IPCC
::ipcc_send_rec
($msgid, $data);
121 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
123 return decode_json
($res);
126 my $ipcc_get_config = sub {
129 my $bindata = pack "Z*", $path;
130 my $res = PVE
::IPCC
::ipcc_send_rec
(CFS_IPC_GET_CONFIG
, $bindata);
131 if (!defined($res)) {
133 return undef if $! == ENOENT
;
142 my $ipcc_get_status = sub {
143 my ($name, $nodename) = @_;
145 my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
146 return PVE
::IPCC
::ipcc_send_rec
(CFS_IPC_GET_STATUS
, $bindata);
149 my $ipcc_remove_status = sub {
151 # we just omit the data payload, pmxcfs takes this as hint and removes this
152 # key from the status hashtable
153 my $bindata = pack "Z[256]", $name;
154 return &$ipcc_send_rec(CFS_IPC_SET_STATUS
, $bindata);
157 my $ipcc_update_status = sub {
158 my ($name, $data) = @_;
160 my $raw = ref($data) ? encode_json
($data) : $data;
162 my $bindata = pack "Z[256]Z*", $name, $raw;
164 return &$ipcc_send_rec(CFS_IPC_SET_STATUS
, $bindata);
168 my ($priority, $ident, $tag, $msg) = @_;
170 my $bindata = pack "CCCZ*Z*Z*", $priority, bytes
::length($ident) + 1,
171 bytes
::length($tag) + 1, $ident, $tag, $msg;
173 return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG
, $bindata);
176 my $ipcc_get_cluster_log = sub {
177 my ($user, $max) = @_;
179 $max = 0 if !defined($max);
181 my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
182 return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG
, $bindata);
190 my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION
);
191 die "no starttime\n" if !$res->{starttime
};
193 if (!$res->{starttime
} || !$versions->{starttime
} ||
194 $res->{starttime
} != $versions->{starttime
}) {
195 #print "detected changed starttime\n";
214 if (!$clinfo->{version
} || $clinfo->{version
} != $versions->{clinfo
}) {
215 #warn "detected new clinfo\n";
216 $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO
);
227 if (!$vmlist->{version
} || $vmlist->{version
} != $versions->{vmlist
}) {
228 #warn "detected new vmlist1\n";
229 $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST
);
249 return $clinfo->{nodelist
};
253 my $nodelist = $clinfo->{nodelist
};
255 my $nodename = PVE
::INotify
::nodename
();
257 if (!$nodelist || !$nodelist->{$nodename}) {
258 return [ $nodename ];
261 return [ keys %$nodelist ];
264 # only stored in a in-memory hashtable inside pmxcfs, local data is gone after
265 # a restart (of pmxcfs or the node), peer data is still available then
266 # best used for status data, like running (ceph) services, package versions, ...
267 sub broadcast_node_kv
{
268 my ($key, $data) = @_;
270 if (!defined($data)) {
272 $ipcc_remove_status->("kv/$key");
275 die "cannot send a reference\n" if ref($data);
276 my $size = length($data);
277 die "data for '$key' too big\n" if $size >= (32 * 1024); # limit from pmxfs
280 $ipcc_update_status->("kv/$key", $data);
287 # nodename is optional
289 my ($key, $nodename) = @_;
292 my $get_node_data = sub {
294 my $raw = $ipcc_get_status->("kv/$key", $node);
295 $res->{$node} = unpack("Z*", $raw) if $raw;
299 $get_node_data->($nodename);
301 my $nodelist = get_nodelist
();
303 foreach my $node (@$nodelist) {
304 $get_node_data->($node);
311 # property: a config property you want to get, e.g., this is perfect to get
312 # the 'lock' entry of a guest _fast_ (>100 faster than manual parsing here)
313 # vmid: optipnal, if a valid is passed we only check that one, else return all
314 # NOTE: does *not* searches snapshot and PENDING entries sections!
315 sub get_guest_config_property
{
316 my ($property, $vmid) = @_;
318 die "property is required" if !defined($property);
320 my $bindata = pack "VZ*", $vmid // 0, $property;
321 my $res = $ipcc_send_rec_json->(CFS_IPC_GET_GUEST_CONFIG_PROPERTY
, $bindata);
326 # $data must be a chronological descending ordered array of tasks
327 sub broadcast_tasklist
{
330 # the serialized list may not get bigger than 32kb (CFS_MAX_STATUS_SIZE
331 # from pmxcfs) - drop older items until we satisfy this constraint
332 my $size = length(encode_json
($data));
333 while ($size >= (32 * 1024)) {
335 $size = length(encode_json
($data));
339 &$ipcc_update_status("tasklist", $data);
345 my $tasklistcache = {};
350 my $kvstore = $versions->{kvstore
} || {};
352 my $nodelist = get_nodelist
();
355 foreach my $node (@$nodelist) {
356 next if $nodename && ($nodename ne $node);
358 my $ver = $kvstore->{$node}->{tasklist
} if $kvstore->{$node};
359 my $cd = $tasklistcache->{$node};
360 if (!$cd || !$ver || !$cd->{version
} ||
361 ($cd->{version
} != $ver)) {
362 my $raw = &$ipcc_get_status("tasklist", $node) || '[]';
363 my $data = decode_json
($raw);
365 $cd = $tasklistcache->{$node} = {
369 } elsif ($cd && $cd->{data
}) {
370 push @$res, @{$cd->{data
}};
374 syslog
('err', $err) if $err;
381 my ($rrdid, $data) = @_;
384 &$ipcc_update_status("rrd/$rrdid", $data);
391 my $last_rrd_dump = 0;
392 my $last_rrd_data = "";
398 my $diff = $ctime - $last_rrd_dump;
400 return $last_rrd_data;
405 $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP
);
417 while ($raw =~ s/^(.*)\n//) {
418 my ($key, @ela) = split(/:/, $1);
420 next if !(scalar(@ela) > 1);
421 $res->{$key} = [ map { $_ eq 'U' ?
undef : $_ } @ela ];
425 $last_rrd_dump = $ctime;
426 $last_rrd_data = $res;
432 # a fast way to read files (avoid fuse overhead)
436 return &$ipcc_get_config($path);
439 sub get_cluster_log
{
440 my ($user, $max) = @_;
442 return &$ipcc_get_cluster_log($user, $max);
447 sub cfs_register_file
{
448 my ($filename, $parser, $writer) = @_;
450 $observed->{$filename} || die "unknown file '$filename'";
452 die "file '$filename' already registered" if $file_info->{$filename};
454 $file_info->{$filename} = {
460 my $ccache_read = sub {
461 my ($filename, $parser, $version) = @_;
463 $ccache->{$filename} = {} if !$ccache->{$filename};
465 my $ci = $ccache->{$filename};
467 if (!$ci->{version
} || !$version || $ci->{version
} != $version) {
468 # we always call the parser, even when the file does not exists
469 # (in that case $data is undef)
470 my $data = get_config
($filename);
471 $ci->{data
} = &$parser("/etc/pve/$filename", $data);
472 $ci->{version
} = $version;
475 my $res = ref($ci->{data
}) ? dclone
($ci->{data
}) : $ci->{data
};
480 sub cfs_file_version
{
485 if ($filename =~ m!^nodes/[^/]+/(openvz|lxc|qemu-server)/(\d+)\.conf$!) {
486 my ($type, $vmid) = ($1, $2);
487 if ($vmlist && $vmlist->{ids
} && $vmlist->{ids
}->{$vmid}) {
488 $version = $vmlist->{ids
}->{$vmid}->{version
};
490 $infotag = "/$type/";
492 $infotag = $filename;
493 $version = $versions->{$filename};
496 my $info = $file_info->{$infotag} ||
497 die "unknown file type '$filename'\n";
499 return wantarray ?
($version, $info) : $version;
505 my ($version, $info) = cfs_file_version
($filename);
506 my $parser = $info->{parser
};
508 return &$ccache_read($filename, $parser, $version);
512 my ($filename, $data) = @_;
514 my ($version, $info) = cfs_file_version
($filename);
516 my $writer = $info->{writer
} || die "no writer defined";
518 my $fsname = "/etc/pve/$filename";
520 my $raw = &$writer($fsname, $data);
522 if (my $ci = $ccache->{$filename}) {
523 $ci->{version
} = undef;
526 PVE
::Tools
::file_set_contents
($fsname, $raw);
530 my ($lockid, $timeout, $code, @param) = @_;
532 my $prev_alarm = alarm(0); # suspend outer alarm early
537 # this timeout is for acquire the lock
538 $timeout = 10 if !$timeout;
540 my $filename = "$lockdir/$lockid";
547 die "pve cluster filesystem not online.\n";
550 my $timeout_err = sub { die "got lock request timeout\n"; };
551 local $SIG{ALRM
} = $timeout_err;
555 $got_lock = mkdir($filename);
556 $timeout = alarm(0) - 1; # we'll sleep for 1s, see down below
560 $timeout_err->() if $timeout <= 0;
562 print STDERR
"trying to acquire cfs lock '$lockid' ...\n";
563 utime (0, 0, $filename); # cfs unlock request
567 # fixed command timeout: cfs locks have a timeout of 120
568 # using 60 gives us another 60 seconds to abort the task
569 local $SIG{ALRM
} = sub { die "got lock timeout - aborting command\n"; };
572 cfs_update
(); # make sure we read latest versions inside code()
574 $res = &$code(@param);
581 $err = "no quorum!\n" if !$got_lock && !check_cfs_quorum
(1);
583 rmdir $filename if $got_lock; # if we held the lock always unlock again
588 $@ = "error with cfs lock '$lockid': $err";
598 my ($filename, $timeout, $code, @param) = @_;
600 my $info = $observed->{$filename} || die "unknown file '$filename'";
602 my $lockid = "file-$filename";
603 $lockid =~ s/[.\/]/_
/g
;
605 &$cfs_lock($lockid, $timeout, $code, @param);
608 sub cfs_lock_storage
{
609 my ($storeid, $timeout, $code, @param) = @_;
611 my $lockid = "storage-$storeid";
613 &$cfs_lock($lockid, $timeout, $code, @param);
616 sub cfs_lock_domain
{
617 my ($domainname, $timeout, $code, @param) = @_;
619 my $lockid = "domain-$domainname";
621 &$cfs_lock($lockid, $timeout, $code, @param);
625 my ($account, $timeout, $code, @param) = @_;
627 my $lockid = "acme-$account";
629 &$cfs_lock($lockid, $timeout, $code, @param);
632 sub cfs_lock_authkey
{
633 my ($timeout, $code, @param) = @_;
635 $cfs_lock->('authkey', $timeout, $code, @param);
653 my ($priority, $ident, $msg) = @_;
655 if (my $tmp = $log_levels->{$priority}) {
659 die "need numeric log priority" if $priority !~ /^\d+$/;
661 my $tag = PVE
::SafeSyslog
::tag
();
663 $msg = "empty message" if !$msg;
665 $ident = "" if !$ident;
666 $ident = encode
("ascii", $ident,
667 sub { sprintf "\\u%04x", shift });
669 my $ascii = encode
("ascii", $msg, sub { sprintf "\\u%04x", shift });
672 syslog
($priority, "<%s> %s", $ident, $ascii);
674 syslog
($priority, "%s", $ascii);
677 eval { &$ipcc_log($priority, $ident, $tag, $ascii); };
679 syslog
("err", "writing cluster log failed: $@") if $@;
682 sub check_vmid_unused
{
683 my ($vmid, $noerr) = @_;
685 my $vmlist = get_vmlist
();
687 my $d = $vmlist->{ids
}->{$vmid};
688 return 1 if !defined($d);
690 return undef if $noerr;
692 my $vmtypestr = $d->{type
} eq 'qemu' ?
'VM' : 'CT';
693 die "$vmtypestr $vmid already exists on node '$d->{node}'\n";
696 sub check_node_exists
{
697 my ($nodename, $noerr) = @_;
699 my $nodelist = $clinfo->{nodelist
};
700 return 1 if $nodelist && $nodelist->{$nodename};
702 return undef if $noerr;
704 die "no such cluster node '$nodename'\n";
707 # this is also used to get the IP of the local node
709 my ($nodename, $noerr) = @_;
711 my $nodelist = $clinfo->{nodelist
};
712 if ($nodelist && $nodelist->{$nodename}) {
713 if (my $ip = $nodelist->{$nodename}->{ip
}) {
714 return $ip if !wantarray;
715 my $family = $nodelist->{$nodename}->{address_family
};
717 $nodelist->{$nodename}->{address_family
} =
719 PVE
::Tools
::get_host_address_family
($ip);
721 return wantarray ?
($ip, $family) : $ip;
725 # fallback: try to get IP by other means
726 return PVE
::Network
::get_ip_from_hostname
($nodename, $noerr);
729 sub get_node_fingerprint
{
732 my $cert_path = "/etc/pve/nodes/$node/pve-ssl.pem";
733 my $custom_cert_path = "/etc/pve/nodes/$node/pveproxy-ssl.pem";
735 $cert_path = $custom_cert_path if -f
$custom_cert_path;
737 return PVE
::Certificate
::get_certificate_fingerprint
($cert_path);
740 # bash completion helpers
742 sub complete_next_vmid
{
744 my $vmlist = get_vmlist
() || {};
745 my $idlist = $vmlist->{ids
} || {};
747 for (my $i = 100; $i < 10000; $i++) {
748 return [$i] if !defined($idlist->{$i});
756 my $vmlist = get_vmlist
();
757 my $ids = $vmlist->{ids
} || {};
759 return [ keys %$ids ];
762 sub complete_local_vmid
{
764 my $vmlist = get_vmlist
();
765 my $ids = $vmlist->{ids
} || {};
767 my $nodename = PVE
::INotify
::nodename
();
770 foreach my $vmid (keys %$ids) {
771 my $d = $ids->{$vmid};
772 next if !$d->{node
} || $d->{node
} ne $nodename;
779 sub complete_migration_target
{
783 my $nodename = PVE
::INotify
::nodename
();
785 my $nodelist = get_nodelist
();
786 foreach my $node (@$nodelist) {
787 next if $node eq $nodename;
795 # NOTE: filesystem must be offline here, no DB changes allowed
796 sub cfs_backup_database
{
800 my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
802 print "backup old database to '$backup_fn'\n";
804 my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \
">${backup_fn}"] ];
805 run_command
($cmd, 'errmsg' => "cannot backup old database\n");
807 my $maxfiles = 10; # purge older backup
808 my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql
.gz
> ];
810 if ((my $count = scalar(@$backups)) > $maxfiles) {
811 foreach my $f (@$backups[$maxfiles..$count-1]) {
812 next if $f !~ m/^(\S+)$/; # untaint
813 print "delete old backup '$1'\n";