use strict;
use warnings;
-use POSIX qw(EEXIST ENOENT);
+
+use Digest::HMAC_SHA1;
+use Digest::SHA;
+use Encode;
use File::stat qw();
-use Socket;
-use Storable qw(dclone);
use IO::File;
+use JSON;
use MIME::Base64;
-use Digest::SHA;
-use Digest::HMAC_SHA1;
use Net::SSLeay;
-use PVE::Tools;
+use POSIX qw(EEXIST ENOENT);
+use RRDs;
+use Socket;
+use Storable qw(dclone);
+use UUID;
+
use PVE::INotify;
use PVE::IPCC;
-use PVE::SafeSyslog;
use PVE::JSONSchema;
use PVE::Network;
-use JSON;
-use RRDs;
-use Encode;
-use UUID;
+use PVE::SafeSyslog;
+use PVE::Tools qw(run_command);
+
+use PVE::Cluster::IPCConst;
+
use base 'Exporter';
our @EXPORT_OK = qw(
my $lockdir = "/etc/pve/priv/lock";
# cfs and corosync files
+my $dbfile = "/var/lib/pve-cluster/config.db";
+my $dbbackupdir = "/var/lib/pve-cluster/backup";
my $localclusterdir = "/etc/corosync";
+my $localclusterconf = "$localclusterdir/corosync.conf";
my $authfile = "$localclusterdir/authkey";
my $clusterconf = "$basedir/corosync.conf";
my $rootsshauthkeysbackup = "${rootsshauthkeys}.org";
my $rootsshconfig = "/root/.ssh/config";
+# this is just a readonly copy, the relevant one is in status.c from pmxcfs
+# observed files are the one we can get directly through IPCC, they are cached
+# using a computed version and only those can be used by the cfs_*_file methods
my $observed = {
'vzdump.cron' => 1,
'storage.cfg' => 1,
'user.cfg' => 1,
'domains.cfg' => 1,
'priv/shadow.cfg' => 1,
+ 'priv/tfa.cfg' => 1,
'/qemu-server/' => 1,
'/openvz/' => 1,
'/lxc/' => 1,
'ha/groups.cfg' => 1,
'ha/fence.cfg' => 1,
'status.cfg' => 1,
+ 'ceph.conf' => 1,
};
# only write output if something fails
my ($cmd) = @_;
my $outbuf = '';
+ my $record = sub { $outbuf .= shift . "\n"; };
- my $record_output = sub {
- $outbuf .= shift;
- $outbuf .= "\n";
- };
+ eval { run_command($cmd, outfunc => $record, errfunc => $record) };
- eval {
- PVE::Tools::run_command($cmd, outfunc => $record_output,
- errfunc => $record_output);
- };
-
- my $err = $@;
-
- if ($err) {
+ if (my $err = $@) {
print STDERR $outbuf;
die $err;
}
check_cfs_is_mounted();
- mkdir $authdir || $! == EEXIST || die "unable to create dir '$authdir' - $!\n";
+ cfs_lock_authkey(undef, sub {
+ mkdir $authdir || $! == EEXIST || die "unable to create dir '$authdir' - $!\n";
+
+ run_silent_cmd(['openssl', 'genrsa', '-out', $authprivkeyfn, '2048']);
- run_silent_cmd(['openssl', 'genrsa', '-out', $authprivkeyfn, '2048']);
+ run_silent_cmd(['openssl', 'rsa', '-in', $authprivkeyfn, '-pubout', '-out', $authpubkeyfn]);
+ });
- run_silent_cmd(['openssl', 'rsa', '-in', $authprivkeyfn, '-pubout', '-out', $authpubkeyfn]);
+ die "$@\n" if $@;
}
sub gen_pveca_key {
my ($path) = @_;
my $bindata = pack "Z*", $path;
- my $res = PVE::IPCC::ipcc_send_rec(6, $bindata);
+ my $res = PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_CONFIG, $bindata);
if (!defined($res)) {
if ($! != 0) {
return undef if $! == ENOENT;
my ($name, $nodename) = @_;
my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
- return PVE::IPCC::ipcc_send_rec(5, $bindata);
+ return PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_STATUS, $bindata);
};
my $ipcc_update_status = sub {
# update status
my $bindata = pack "Z[256]Z*", $name, $raw;
- return &$ipcc_send_rec(4, $bindata);
+ return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
};
my $ipcc_log = sub {
my $bindata = pack "CCCZ*Z*Z*", $priority, bytes::length($ident) + 1,
bytes::length($tag) + 1, $ident, $tag, $msg;
- return &$ipcc_send_rec(7, $bindata);
+ return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG, $bindata);
};
my $ipcc_get_cluster_log = sub {
$max = 0 if !defined($max);
my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
- return &$ipcc_send_rec(8, $bindata);
+ return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG, $bindata);
};
my $ccache = {};
sub cfs_update {
my ($fail) = @_;
eval {
- my $res = &$ipcc_send_rec_json(1);
+ my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION);
#warn "GOT1: " . Dumper($res);
die "no starttime\n" if !$res->{starttime};
eval {
if (!$clinfo->{version} || $clinfo->{version} != $versions->{clinfo}) {
#warn "detected new clinfo\n";
- $clinfo = &$ipcc_send_rec_json(2);
+ $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO);
}
};
$err = $@;
eval {
if (!$vmlist->{version} || $vmlist->{version} != $versions->{vmlist}) {
#warn "detected new vmlist1\n";
- $vmlist = &$ipcc_send_rec_json(3);
+ $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST);
}
};
$err = $@;
}
sub get_nodelist {
-
my $nodelist = $clinfo->{nodelist};
- my $result = [];
-
my $nodename = PVE::INotify::nodename();
if (!$nodelist || !$nodelist->{$nodename}) {
my $raw;
eval {
- $raw = &$ipcc_send_rec(10);
+ $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP);
};
my $err = $@;
my $res;
my $got_lock = 0;
- # this timeout is for aquire the lock
+ # this timeout is for acquire the lock
$timeout = 10 if !$timeout;
my $filename = "$lockdir/$lockid";
$timeout_err->() if $timeout <= 0;
- print STDERR "trying to aquire cfs lock '$lockid' ...\n";
+ print STDERR "trying to acquire cfs lock '$lockid' ...\n";
utime (0, 0, $filename); # cfs unlock request
sleep(1);
}
&$cfs_lock($lockid, $timeout, $code, @param);
}
+sub cfs_lock_acme {
+ my ($account, $timeout, $code, @param) = @_;
+
+ my $lockid = "acme-$account";
+
+ &$cfs_lock($lockid, $timeout, $code, @param);
+}
+
+sub cfs_lock_authkey {
+ my ($timeout, $code, @param) = @_;
+
+ $cfs_lock->('authkey', $timeout, $code, @param);
+}
+
my $log_levels = {
"emerg" => 0,
"alert" => 1,
},
};
+my $ha_format = {
+ shutdown_policy => {
+ type => 'string',
+ enum => ['freeze', 'failover', 'conditional'],
+ description => "The policy for HA services on node shutdown. 'freeze' disables auto-recovery, 'failover' ensures recovery, 'conditional' recovers on poweroff and freezes on reboot. Running HA Services will always get stopped first on shutdown.",
+ verbose_description => "Describes the policy for handling HA services on poweroff or reboot of a node. Freeze will always freeze services which are still located on the node on shutdown, those services won't be recovered by the HA manager. Failover will not mark the services as frozen and thus the services will get recovered to other nodes, if the shutdown node does not come up again quickly (< 1min). 'conditional' chooses automatically depending on the type of shutdown, i.e., on a reboot the service will be frozen but on a poweroff the service will stay as is, and thus get recovered after about 2 minutes.",
+ default => 'conditional',
+ }
+};
+
+PVE::JSONSchema::register_format('mac-prefix', \&pve_verify_mac_prefix);
+sub pve_verify_mac_prefix {
+ my ($mac_prefix, $noerr) = @_;
+
+ if ($mac_prefix !~ m/^[a-f0-9][02468ace](?::[a-f0-9]{2}){0,2}:?$/i) {
+ return undef if $noerr;
+ die "value is not a valid unicast MAC address prefix\n";
+ }
+ return $mac_prefix;
+}
+
+our $u2f_format = {
+ appid => {
+ type => 'string',
+ description => "U2F AppId URL override. Defaults to the origin.",
+ format_description => 'APPID',
+ optional => 1,
+ },
+ origin => {
+ type => 'string',
+ description => "U2F Origin override. Mostly useful for single nodes with a single URL.",
+ format_description => 'URL',
+ optional => 1,
+ },
+};
+
my $datacenter_schema = {
type => "object",
additionalProperties => 0,
optional => 1,
type => 'string',
description => "Default GUI language.",
- enum => [ 'en', 'de' ],
+ enum => [
+ 'zh_CN',
+ 'zh_TW',
+ 'ca',
+ 'en',
+ 'eu',
+ 'fr',
+ 'de',
+ 'it',
+ 'es',
+ 'ja',
+ 'nb',
+ 'nn',
+ 'fa',
+ 'pl',
+ 'pt_BR',
+ 'ru',
+ 'sl',
+ 'sv',
+ 'tr',
+ ],
},
http_proxy => {
optional => 1,
console => {
optional => 1,
type => 'string',
- description => "Select the default Console viewer. You can either use the builtin java applet (VNC; deprecated and maps to html5), an external virt-viewer comtatible application (SPICE), or an HTML5 based viewer (noVNC).",
- enum => ['applet', 'vv', 'html5'],
+ description => "Select the default Console viewer. You can either use the builtin java applet (VNC; deprecated and maps to html5), an external virt-viewer comtatible application (SPICE), an HTML5 based vnc viewer (noVNC), or an HTML5 based console client (xtermjs). If the selected viewer is not available (e.g. SPICE not activated for the VM), the fallback is noVNC.",
+ enum => ['applet', 'vv', 'html5', 'xtermjs'],
},
email_from => {
optional => 1,
" With both all two modes are used." .
"\n\nWARNING: 'hardware' and 'both' are EXPERIMENTAL & WIP",
},
+ ha => {
+ optional => 1,
+ type => 'string', format => $ha_format,
+ description => "Cluster wide HA settings.",
+ },
mac_prefix => {
optional => 1,
type => 'string',
- pattern => qr/[a-f0-9]{2}(?::[a-f0-9]{2}){0,2}:?/i,
+ format => 'mac-prefix',
description => 'Prefix for autogenerated MAC addresses.',
},
bwlimit => PVE::JSONSchema::get_standard_option('bwlimit'),
+ u2f => {
+ optional => 1,
+ type => 'string',
+ format => $u2f_format,
+ description => 'u2f',
+ },
},
};
$res->{migration} = PVE::JSONSchema::parse_property_string($migration_format, $migration);
}
+ if (my $ha = $res->{ha}) {
+ $res->{ha} = PVE::JSONSchema::parse_property_string($ha_format, $ha);
+ }
+
# for backwards compatibility only, new migration property has precedence
if (defined($res->{migration_unsecure})) {
if (defined($res->{migration}->{type})) {
$cfg->{console} = 'html5';
}
- if (my $migration = $cfg->{migration}) {
+ if (ref($cfg->{migration})) {
+ my $migration = $cfg->{migration};
$cfg->{migration} = PVE::JSONSchema::print_property_string($migration, $migration_format);
}
+ if (ref($cfg->{ha})) {
+ my $ha = $cfg->{ha};
+ $cfg->{ha} = PVE::JSONSchema::print_property_string($ha, $ha_format);
+ }
+
return PVE::JSONSchema::dump_config($datacenter_schema, $filename, $cfg);
}
or die "unable to read '$cert_path' - $!\n";
my $cert = Net::SSLeay::PEM_read_bio_X509($bio);
- if (!$cert) {
- Net::SSLeay::BIO_free($bio);
- die "unable to read certificate from '$cert_path'\n";
- }
+ Net::SSLeay::BIO_free($bio);
+
+ die "unable to read certificate from '$cert_path'\n" if !$cert;
my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
Net::SSLeay::X509_free($cert);
sub assert_joinable {
my ($ring0_addr, $ring1_addr, $force) = @_;
- my ($errors, $warnings) = ('', '');
- my $error = sub {
- my ($msg, $suppress) = @_;
+ my $errors = '';
+ my $error = sub { $errors .= "* $_[0]\n"; };
- if ($suppress) {
- $warnings .= "* $msg\n";
- } else {
- $errors .= "* $msg\n";
- }
- };
-
- if (!$force) {
-
- if (-f $authfile) {
- $error->("authentication key '$authfile' already exists", $force);
- }
+ if (-f $authfile) {
+ $error->("authentication key '$authfile' already exists");
+ }
- if (-f $clusterconf) {
- $error->("cluster config '$clusterconf' already exists", $force);
- }
+ if (-f $clusterconf) {
+ $error->("cluster config '$clusterconf' already exists");
+ }
- my $vmlist = get_vmlist();
- if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
- $error->("this host already contains virtual guests", $force);
- }
+ my $vmlist = get_vmlist();
+ if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
+ $error->("this host already contains virtual guests");
+ }
- if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
- $error->("corosync is already running, is this node already in a cluster?!", $force);
- }
+ if (run_command(['corosync-quorumtool', '-l'], noerr => 1, quiet => 1) == 0) {
+ $error->("corosync is already running, is this node already in a cluster?!");
}
# check if corosync ring IPs are configured on the current nodes interfaces
my $host = $ip;
eval { $ip = PVE::Network::get_ip_from_hostname($host); };
if ($@) {
- $error->("cannot use '$host': $@\n", 1) ;
+ $error->("cannot use '$host': $@\n") ;
return;
}
}
$check_ip->($ring0_addr);
$check_ip->($ring1_addr);
- warn "warning, ignore the following errors:\n$warnings" if $warnings;
- die "detected the following error(s):\n$errors" if $errors;
+ if ($errors) {
+ warn "detected the following error(s):\n$errors";
+ die "Check if node may join a cluster failed!\n" if !$force;
+ }
+}
+
+# NOTE: filesystem must be offline here, no DB changes allowed
+my $backup_cfs_database = sub {
+ my ($dbfile) = @_;
+
+ mkdir $dbbackupdir;
+
+ my $ctime = time();
+ my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
+
+ print "backup old database to '$backup_fn'\n";
+
+ my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \ ">${backup_fn}"] ];
+ run_command($cmd, 'errmsg' => "cannot backup old database\n");
+
+ my $maxfiles = 10; # purge older backup
+ my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql.gz> ];
+
+ if ((my $count = scalar(@$backups)) > $maxfiles) {
+ foreach my $f (@$backups[$maxfiles..$count-1]) {
+ next if $f !~ m/^(\S+)$/; # untaint
+ print "delete old backup '$1'\n";
+ unlink $1;
+ }
+ }
+};
+
+sub join {
+ my ($param) = @_;
+
+ my $nodename = PVE::INotify::nodename();
+
+ setup_sshd_config();
+ setup_rootsshconfig();
+ setup_ssh_keys();
+
+ # check if we can join with the given parameters and current node state
+ my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
+ assert_joinable($ring0_addr, $ring1_addr, $param->{force});
+
+ # make sure known_hosts is on local filesystem
+ ssh_unmerge_known_hosts();
+
+ my $host = $param->{hostname};
+ my $local_ip_address = remote_node_ip($nodename);
+
+ my $conn_args = {
+ username => 'root@pam',
+ password => $param->{password},
+ cookie_name => 'PVEAuthCookie',
+ protocol => 'https',
+ host => $host,
+ port => 8006,
+ };
+
+ if (my $fp = $param->{fingerprint}) {
+ $conn_args->{cached_fingerprints} = { uc($fp) => 1 };
+ } else {
+ # API schema ensures that we can only get here from CLI handler
+ $conn_args->{manual_verification} = 1;
+ }
+
+ print "Establishing API connection with host '$host'\n";
+
+ my $conn = PVE::APIClient::LWP->new(%$conn_args);
+ $conn->login();
+
+ # login raises an exception on failure, so if we get here we're good
+ print "Login succeeded.\n";
+
+ my $args = {};
+ $args->{force} = $param->{force} if defined($param->{force});
+ $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
+ $args->{votes} = $param->{votes} if defined($param->{votes});
+ $args->{ring0_addr} = $ring0_addr // $local_ip_address;
+ $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
+
+ print "Request addition of this node\n";
+ my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
+
+ print "Join request OK, finishing setup locally\n";
+
+ # added successfuly - now prepare local node
+ finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
+}
+
+sub finish_join {
+ my ($nodename, $corosync_conf, $corosync_authkey) = @_;
+
+ mkdir "$localclusterdir";
+ PVE::Tools::file_set_contents($authfile, $corosync_authkey);
+ PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
+
+ print "stopping pve-cluster service\n";
+ my $cmd = ['systemctl', 'stop', 'pve-cluster'];
+ run_command($cmd, errmsg => "can't stop pve-cluster service");
+
+ $backup_cfs_database->($dbfile);
+ unlink $dbfile;
+
+ $cmd = ['systemctl', 'start', 'corosync', 'pve-cluster'];
+ run_command($cmd, errmsg => "starting pve-cluster failed");
+
+ # wait for quorum
+ my $printqmsg = 1;
+ while (!check_cfs_quorum(1)) {
+ if ($printqmsg) {
+ print "waiting for quorum...";
+ STDOUT->flush();
+ $printqmsg = 0;
+ }
+ sleep(1);
+ }
+ print "OK\n" if !$printqmsg;
+
+ updatecerts_and_ssh(1);
+
+ print "generated new node certificate, restart pveproxy and pvedaemon services\n";
+ run_command(['systemctl', 'reload-or-restart', 'pvedaemon', 'pveproxy']);
+
+ print "successfully added node '$nodename' to cluster.\n";
+}
+
+sub updatecerts_and_ssh {
+ my ($force_new_cert, $silent) = @_;
+
+ my $p = sub { print "$_[0]\n" if !$silent };
+
+ setup_rootsshconfig();
+
+ gen_pve_vzdump_symlink();
+
+ if (!check_cfs_quorum(1)) {
+ return undef if $silent;
+ die "no quorum - unable to update files\n";
+ }
+
+ setup_ssh_keys();
+
+ my $nodename = PVE::INotify::nodename();
+ my $local_ip_address = remote_node_ip($nodename);
+
+ $p->("(re)generate node files");
+ $p->("generate new node certificate") if $force_new_cert;
+ gen_pve_node_files($nodename, $local_ip_address, $force_new_cert);
+
+ $p->("merge authorized SSH keys and known hosts");
+ ssh_merge_keys();
+ ssh_merge_known_hosts($nodename, $local_ip_address, 1);
+ gen_pve_vzdump_files();
}
1;