1 package PVE
::CLI
::pvecm
;
9 use Time
::HiRes
qw(usleep);
11 use PVE
::Tools
qw(run_command);
14 use PVE
::JSONSchema
qw(get_standard_option);
15 use PVE
::RPCEnvironment
;
18 use PVE
::API2
::ClusterConfig
;
20 use PVE
::Cluster
::Setup
;
22 use base
qw(PVE::CLIHandler);
24 $ENV{HOME
} = '/root'; # for ssh-copy-id
26 my $basedir = "/etc/pve";
27 my $clusterconf = "$basedir/corosync.conf";
28 my $libdir = "/var/lib/pve-cluster";
29 my $authfile = "/etc/corosync/authkey";
32 sub setup_environment
{
33 PVE
::RPCEnvironment-
>setup_default_cli_env();
36 __PACKAGE__-
>register_method ({
40 description
=> "Generate new cryptographic key for corosync.",
42 additionalProperties
=> 0,
46 description
=> "Output file name"
50 returns
=> { type
=> 'null' },
55 my $filename = $param->{filename
};
58 $> == 0 || die "Error: Authorization key must be generated as root user.\n";
59 my $dirname = dirname
($filename);
61 die "key file '$filename' already exists\n" if -e
$filename;
63 File
::Path
::make_path
($dirname) if $dirname;
65 run_command
(['corosync-keygen', '-l', '-k', $filename]);
70 my $foreach_member = sub {
71 my ($code, $noerr) = @_;
73 my $members = PVE
::Cluster
::get_members
();
74 foreach my $node (sort keys %$members) {
75 if (my $ip = $members->{$node}->{ip
}) {
78 die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
79 warn "cannot get the cluster IP for node '$node'.\n";
85 __PACKAGE__-
>register_method ({
86 name
=> 'setup_qdevice',
87 path
=> 'setup_qdevice',
89 description
=> "Setup the use of a QDevice",
91 additionalProperties
=> 0,
94 type
=> 'string', format
=> 'ip',
95 description
=> "Specifies the network address of an external corosync QDevice" ,
100 description
=> 'The network which should be used to connect to the external qdevice',
105 description
=> "Do not throw error on possible dangerous operations.",
110 returns
=> { type
=> 'null' },
115 PVE
::Corosync
::check_conf_exists
();
117 my $members = PVE
::Cluster
::get_members
();
118 foreach my $node (sort keys %$members) {
119 die "All nodes must be online! Node $node is offline, aborting.\n"
120 if !$members->{$node}->{online
};
123 my $conf = PVE
::Cluster
::cfs_read_file
("corosync.conf");
125 die "QDevice already configured!\n"
126 if defined($conf->{main
}->{quorum
}->{device
}) && !$param->{force
};
128 my $network = $param->{network
};
131 my $algorithm = 'ffsplit';
132 if (scalar(%{$members}) & 1) {
133 if ($param->{force
}) {
136 die "Clusters with an odd node count are not officially supported!\n";
140 my $qnetd_addr = $param->{address
};
141 my $base_dir = "/etc/corosync/qdevice/net";
142 my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
143 my $db_dir_node = "$base_dir/nssdb";
144 my $ca_export_base = "qnetd-cacert.crt";
145 my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
146 my $crq_file_base = "qdevice-net-node.crq";
147 my $p12_file_base = "qdevice-net-node.p12";
148 my $qdevice_certutil = "corosync-qdevice-net-certutil";
149 my $qnetd_certutil= "corosync-qnetd-certutil";
150 my $clustername = $conf->{main
}->{totem
}->{cluster_name
};
152 run_command
(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
154 if (-d
$db_dir_node) {
155 # FIXME: check on all nodes?!
156 if ($param->{force
}) {
159 die "QDevice certificate store already initialised, set force to delete!\n";
163 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
164 my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
166 print "\nINFO: initializing qnetd server\n";
168 [@$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-i"],
172 print "\nINFO: copying CA cert and initializing on all nodes\n";
173 run_command
([@$scp_cmd, "root\@\[$qnetd_addr\]:$ca_export_file", "/etc/pve/$ca_export_base"]);
174 $foreach_member->(sub {
175 my ($node, $ip) = @_;
176 my $outsub = sub { print "\nnode '$node': " . shift };
178 [@$ssh_cmd, $ip, $qdevice_certutil, "-i", "-c", "/etc/pve/$ca_export_base"],
179 noerr
=> 1, outfunc
=> \
&$outsub
182 unlink "/etc/pve/$ca_export_base";
184 print "\nINFO: generating cert request\n";
185 run_command
([$qdevice_certutil, "-r", "-n", $clustername]);
187 print "\nINFO: copying exported cert request to qnetd server\n";
188 run_command
([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@\[$qnetd_addr\]:/tmp"]);
190 print "\nINFO: sign and export cluster cert\n";
192 @$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-s", "-c",
193 "/tmp/$crq_file_base", "-n", "$clustername"
196 print "\nINFO: copy exported CRT\n";
198 @$scp_cmd, "root\@\[$qnetd_addr\]:$db_dir_qnetd/cluster-$clustername.crt",
202 print "\nINFO: import certificate\n";
203 run_command
(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
205 print "\nINFO: copy and import pk12 cert to all nodes\n";
206 run_command
([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
207 $foreach_member->(sub {
208 my ($node, $ip) = @_;
209 my $outsub = sub { print "\nnode '$node': " . shift };
211 @$ssh_cmd, $ip, "$qdevice_certutil", "-m", "-c",
212 "/etc/pve/$p12_file_base"], outfunc
=> \
&$outsub
215 unlink "/etc/pve/$p12_file_base";
219 my $conf = PVE
::Cluster
::cfs_read_file
("corosync.conf");
220 my $quorum_section = $conf->{main
}->{quorum
};
222 die "Qdevice already configured, must be removed before setting up new one!\n"
223 if defined($quorum_section->{device
}); # must not be forced!
230 algorithm
=> $algorithm,
233 $qdev_section->{votes
} = 1 if $algorithm eq 'ffsplit';
235 $quorum_section->{device
} = $qdev_section;
237 PVE
::Corosync
::atomic_write_conf
($conf);
240 print "\nINFO: add QDevice to cluster configuration\n";
241 PVE
::Cluster
::cfs_lock_file
('corosync.conf', 10, $code);
244 $foreach_member->(sub {
245 my ($node, $ip) = @_;
246 my $outsub = sub { print "\nnode '$node': " . shift };
247 print "\nINFO: start and enable corosync qdevice daemon on node '$node'...\n";
248 run_command
([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice'], outfunc
=> \
&$outsub);
249 run_command
([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice'], outfunc
=> \
&$outsub);
252 run_command
(['corosync-cfgtool', '-R']); # do cluster wide config reload
257 __PACKAGE__-
>register_method ({
258 name
=> 'remove_qdevice',
259 path
=> 'remove_qdevice',
261 description
=> "Remove a configured QDevice",
263 additionalProperties
=> 0,
266 returns
=> { type
=> 'null' },
271 PVE
::Corosync
::check_conf_exists
();
273 my $members = PVE
::Cluster
::get_members
();
274 foreach my $node (sort keys %$members) {
275 die "All nodes must be online! Node $node is offline, aborting.\n"
276 if !$members->{$node}->{online
};
279 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
282 my $conf = PVE
::Cluster
::cfs_read_file
("corosync.conf");
283 my $quorum_section = $conf->{main
}->{quorum
};
285 die "No QDevice configured!\n" if !defined($quorum_section->{device
});
287 delete $quorum_section->{device
};
289 PVE
::Corosync
::atomic_write_conf
($conf);
291 # cleanup qdev state (cert storage)
292 my $qdev_state_dir = "/etc/corosync/qdevice";
293 $foreach_member->(sub {
294 my (undef, $ip) = @_;
295 run_command
([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir]);
299 PVE
::Cluster
::cfs_lock_file
('corosync.conf', 10, $code);
302 $foreach_member->(sub {
303 my (undef, $ip) = @_;
304 run_command
([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
305 run_command
([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
308 run_command
(['corosync-cfgtool', '-R']);
310 print "\nRemoved Qdevice.\n";
315 __PACKAGE__-
>register_method ({
319 description
=> "Adds the current node to an existing cluster.",
321 additionalProperties
=> 0,
322 properties
=> PVE
::Corosync
::add_corosync_link_properties
({
325 description
=> "Hostname (or IP) of an existing cluster member."
327 nodeid
=> get_standard_option
('corosync-nodeid'),
330 description
=> "Number of votes for this node",
336 description
=> "Do not throw error if node already exists.",
339 fingerprint
=> get_standard_option
('fingerprint-sha256', {
344 description
=> "Always use SSH to join, even if peer may do it over API.",
349 returns
=> { type
=> 'null' },
354 # avoid "transport endpoint not connected" errors that occur if
355 # restarting pmxcfs while in fuse-mounted /etc/pve
356 die "Navigate out of $basedir before running 'pvecm add', for example by running 'cd'.\n"
357 if getcwd
() =~ m!^$basedir(/.*)?$!;
359 my $nodename = PVE
::INotify
::nodename
();
360 my $host = $param->{hostname
};
364 if (!$param->{use_ssh
}) {
365 my $password = PVE
::PTY
::read_password
("Please enter superuser (root) password for '$host': ");
367 delete $param->{use_ssh
};
368 $param->{password
} = $password;
370 my $local_cluster_lock = "/var/lock/pvecm.lock";
371 PVE
::Tools
::lock_file
($local_cluster_lock, 10, \
&PVE
::Cluster
::Setup
::join, $param);
374 if (ref($err) eq 'PVE::APIClient::Exception' && defined($err->{code
}) && $err->{code
} == 501) {
375 $err = "Remote side is not able to use API for Cluster join!\n" .
376 "Pass the 'use_ssh' switch or update the remote side.\n";
380 return; # all OK, the API join endpoint successfully set us up
383 # allow fallback to old ssh only join if wished or needed
385 my $local_ip_address = PVE
::Cluster
::remote_node_ip
($nodename);
386 my $links = PVE
::Corosync
::extract_corosync_link_args
($param);
388 PVE
::Cluster
::Setup
::assert_joinable
($local_ip_address, $links, $param->{force
});
390 PVE
::Cluster
::Setup
::setup_sshd_config
();
391 PVE
::Cluster
::Setup
::setup_rootsshconfig
();
392 PVE
::Cluster
::Setup
::setup_ssh_keys
();
394 # make sure known_hosts is on local filesystem
395 PVE
::Cluster
::Setup
::ssh_unmerge_known_hosts
();
397 my $cmd = ['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$host"];
399 $cmd, 'outfunc' => sub {}, 'errfunc' => sub {}, 'errmsg' => "unable to copy ssh ID");
401 $cmd = ['ssh', $host, '-o', 'BatchMode=yes', 'pvecm', 'apiver'];
402 my $remote_apiver = 0;
403 run_command
($cmd, 'outfunc' => sub {
404 $remote_apiver = shift;
405 chomp $remote_apiver;
408 PVE
::Cluster
::Setup
::assert_we_can_join_cluster_version
($remote_apiver);
410 $cmd = ['ssh', $host, '-o', 'BatchMode=yes', 'pvecm', 'addnode', $nodename, '--force', 1];
412 push @$cmd, '--nodeid', $param->{nodeid
} if $param->{nodeid
};
413 push @$cmd, '--votes', $param->{votes
} if defined($param->{votes
});
415 my $link_desc = get_standard_option
('corosync-link');
417 foreach my $link (keys %$links) {
418 push @$cmd, "--link$link", PVE
::JSONSchema
::print_property_string
(
419 $links->{$link}, $link_desc->{format
});
422 # this will be used as fallback if no links are specified
424 push @$cmd, '--link0', $local_ip_address if $remote_apiver == 0;
425 push @$cmd, '--new_node_ip', $local_ip_address if $remote_apiver >= 1;
427 print "No cluster network links passed explicitly, fallback to local node"
428 . " IP '$local_ip_address'\n";
431 if (system (@$cmd) != 0) {
432 my $cmdtxt = join (' ', @$cmd);
433 die "unable to add node: command failed ($cmdtxt)\n";
436 my $tmpdir = "$libdir/.pvecm_add.tmp.$$";
440 print "copy corosync auth key\n";
441 $cmd = ['rsync', '--rsh=ssh -l root -o BatchMode=yes', '-lpgoq',
442 "[$host]:$authfile", "[$host]:$clusterconf", $tmpdir];
444 system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
446 my $corosync_conf = PVE
::Tools
::file_get_contents
("$tmpdir/corosync.conf");
447 my $corosync_authkey = PVE
::Tools
::file_get_contents
("$tmpdir/authkey");
449 PVE
::Cluster
::Setup
::finish_join
($nodename, $corosync_conf, $corosync_authkey);
458 # use a synced worker so we get a nice task log when joining through CLI
459 my $rpcenv = PVE
::RPCEnvironment
::get
();
460 my $authuser = $rpcenv->get_user();
462 $rpcenv->fork_worker('clusterjoin', '', $authuser, $worker);
467 __PACKAGE__-
>register_method ({
471 description
=> "Displays the local view of the cluster status.",
473 additionalProperties
=> 0,
476 returns
=> { type
=> 'null' },
481 PVE
::Corosync
::check_conf_exists
();
482 my $conf = eval { PVE
::Cluster
::cfs_read_file
("corosync.conf") } // {};
484 my $totem = PVE
::Corosync
::totem_config
($conf);
486 if (scalar(%$totem)) {
487 my $print_info = sub {
488 my ($label, $key, $default) = @_;
489 my $val = $totem->{$key} // $default;
490 printf "%-17s %s\n", "$label:", "$val";
493 printf "Cluster information\n";
494 printf "-------------------\n";
495 $print_info->('Name', 'cluster_name', 'UNKOWN?');
496 $print_info->('Config Version', 'config_version', -1);
497 $print_info->('Transport', 'transport', 'knet');
498 $print_info->('Secure auth', 'secauth', 'off');
502 exec ('corosync-quorumtool', '-siH');
503 exit (-1); # should not be reached
506 __PACKAGE__-
>register_method ({
510 description
=> "Displays the local view of the cluster nodes.",
512 additionalProperties
=> 0,
515 returns
=> { type
=> 'null' },
520 PVE
::Corosync
::check_conf_exists
();
522 exec ('corosync-quorumtool', '-l');
523 exit (-1); # should not be reached
526 __PACKAGE__-
>register_method ({
530 description
=> "Tells corosync a new value of expected votes.",
532 additionalProperties
=> 0,
536 description
=> "Expected votes",
541 returns
=> { type
=> 'null' },
546 PVE
::Corosync
::check_conf_exists
();
548 exec ('corosync-quorumtool', '-e', $param->{expected
});
549 exit (-1); # should not be reached
552 __PACKAGE__-
>register_method ({
553 name
=> 'updatecerts',
554 path
=> 'updatecerts',
556 description
=> "Update node certificates (and generate all needed files/directories).",
558 additionalProperties
=> 0,
561 description
=> "Force generation of new SSL certificate.",
566 description
=> "Ignore errors (i.e. when cluster has no quorum).",
572 returns
=> { type
=> 'null' },
576 # we get called by the pveproxy.service ExecStartPre and as we do
577 # IO (on /etc/pve) which can hang (uninterruptedly D state). That'd be
578 # no-good for ExecStartPre as it fails the whole service in this case
579 PVE
::Tools
::run_fork_with_timeout
(30, sub {
580 PVE
::Cluster
::Setup
::generate_local_files
();
582 for (my $i = 0; !PVE
::Cluster
::check_cfs_quorum
(1); $i++) {
583 print "waiting for pmxcfs mount to appear and get quorate...\n" if $i % 50 == 0;
587 PVE
::Cluster
::Setup
::updatecerts_and_ssh
($param->@{qw(force silent)});
588 PVE
::Cluster
::prepare_observed_file_basedirs
();
594 __PACKAGE__-
>register_method ({
598 description
=> "Used by VM/CT migration - do not use manually.",
600 additionalProperties
=> 0,
602 get_migration_ip
=> {
605 description
=> 'return the migration IP, if configured',
608 migration_network
=> {
611 description
=> 'the migration network used to detect the local migration IP',
616 description
=> 'Run a command with a tcp socket as standard input.'
617 .' The IP address and port are printed via this'
618 ." command's stdandard output first, each on a separate line.",
621 'extra-args' => PVE
::JSONSchema
::get_standard_option
('extra-args'),
624 returns
=> { type
=> 'null'},
628 if (!PVE
::Cluster
::check_cfs_quorum
(1)) {
633 my $get_local_migration_ip = sub {
636 if (!defined($cidr)) {
637 my $dc_conf = cfs_read_file
('datacenter.cfg');
638 $cidr = $dc_conf->{migration
}->{network
}
639 if defined($dc_conf->{migration
}->{network
});
642 if (defined($cidr)) {
643 my $ips = PVE
::Network
::get_local_ip_from_cidr
($cidr);
645 die "could not get migration ip: no IP address configured on local " .
646 "node for network '$cidr'\n" if scalar(@$ips) == 0;
648 die "could not get migration ip: multiple, different, IP address configured for " .
649 "network '$cidr'\n" if scalar(@$ips) > 1 && grep { @$ips[0] ne $_ } @$ips;
657 my $network = $param->{migration_network
};
658 if ($param->{get_migration_ip
}) {
659 die "cannot use --run-command with --get_migration_ip\n"
660 if $param->{'run-command'};
662 if (my $ip = $get_local_migration_ip->($network)) {
667 # do not keep tunnel open when asked for migration ip
671 if ($param->{'run-command'}) {
672 my $cmd = $param->{'extra-args'};
673 die "missing command\n"
674 if !$cmd || !scalar(@$cmd);
676 # Get an ip address to listen on, and find a free migration port
678 if (defined($network)) {
679 $ip = $get_local_migration_ip->($network)
680 or die "failed to get migration IP address to listen on\n";
681 $family = PVE
::Tools
::get_host_address_family
($ip);
683 my $nodename = PVE
::INotify
::nodename
();
684 ($ip, $family) = PVE
::Network
::get_ip_from_hostname
($nodename, 0);
686 my $port = PVE
::Tools
::next_migrate_port
($family, $ip);
688 PVE
::Tools
::pipe_socket_to_command
($cmd, $ip, $port);
692 print "tunnel online\n";
695 while (my $line = <STDIN
>) {
697 last if $line =~ m/^quit$/;
705 apiver
=> [ 'PVE::API2::ClusterConfig', 'join_api_version', [], {}, sub {
709 keygen
=> [ __PACKAGE__
, 'keygen', ['filename']],
710 create
=> [ 'PVE::API2::ClusterConfig', 'create', ['clustername']],
711 add
=> [ __PACKAGE__
, 'add', ['hostname']],
712 addnode
=> [ 'PVE::API2::ClusterConfig', 'addnode', ['node']],
713 delnode
=> [ 'PVE::API2::ClusterConfig', 'delnode', ['node']],
714 status
=> [ __PACKAGE__
, 'status' ],
715 nodes
=> [ __PACKAGE__
, 'nodes' ],
716 expected
=> [ __PACKAGE__
, 'expected', ['expected']],
717 updatecerts
=> [ __PACKAGE__
, 'updatecerts', []],
718 mtunnel
=> [ __PACKAGE__
, 'mtunnel', ['extra-args']],
720 setup
=> [ __PACKAGE__
, 'setup_qdevice', ['address']],
721 remove
=> [ __PACKAGE__
, 'remove_qdevice', []],