]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/CLI/pvecm.pm
pvecm: qdevice setup: fix check for odd node count
[pve-cluster.git] / data / PVE / CLI / pvecm.pm
1 package PVE::CLI::pvecm;
2
3 use strict;
4 use warnings;
5
6 use File::Path;
7 use File::Basename;
8 use PVE::Tools qw(run_command);
9 use PVE::Cluster;
10 use PVE::INotify;
11 use PVE::JSONSchema qw(get_standard_option);
12 use PVE::RPCEnvironment;
13 use PVE::CLIHandler;
14 use PVE::PTY;
15 use PVE::API2::ClusterConfig;
16 use PVE::Corosync;
17 use PVE::Cluster::Setup;
18
19 use base qw(PVE::CLIHandler);
20
21 $ENV{HOME} = '/root'; # for ssh-copy-id
22
23 my $basedir = "/etc/pve";
24 my $clusterconf = "$basedir/corosync.conf";
25 my $libdir = "/var/lib/pve-cluster";
26 my $authfile = "/etc/corosync/authkey";
27
28
29 sub setup_environment {
30 PVE::RPCEnvironment->setup_default_cli_env();
31 }
32
33 __PACKAGE__->register_method ({
34 name => 'keygen',
35 path => 'keygen',
36 method => 'PUT',
37 description => "Generate new cryptographic key for corosync.",
38 parameters => {
39 additionalProperties => 0,
40 properties => {
41 filename => {
42 type => 'string',
43 description => "Output file name"
44 }
45 },
46 },
47 returns => { type => 'null' },
48
49 code => sub {
50 my ($param) = @_;
51
52 my $filename = $param->{filename};
53
54 # test EUID
55 $> == 0 || die "Error: Authorization key must be generated as root user.\n";
56 my $dirname = dirname($filename);
57
58 die "key file '$filename' already exists\n" if -e $filename;
59
60 File::Path::make_path($dirname) if $dirname;
61
62 run_command(['corosync-keygen', '-l', '-k', $filename]);
63
64 return undef;
65 }});
66
67 my $foreach_member = sub {
68 my ($code, $noerr) = @_;
69
70 my $members = PVE::Cluster::get_members();
71 foreach my $node (sort keys %$members) {
72 if (my $ip = $members->{$node}->{ip}) {
73 $code->($node, $ip);
74 } else {
75 die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
76 warn "cannot get the cluster IP for node '$node'.\n";
77 return undef;
78 }
79 }
80 };
81
82 __PACKAGE__->register_method ({
83 name => 'setup_qdevice',
84 path => 'setup_qdevice',
85 method => 'PUT',
86 description => "Setup the use of a QDevice",
87 parameters => {
88 additionalProperties => 0,
89 properties => {
90 address => {
91 type => 'string', format => 'ip',
92 description => "Specifies the network address of an external corosync QDevice" ,
93 },
94 network => {
95 type => 'string',
96 format => 'CIDR',
97 description => 'The network which should be used to connect to the external qdevice',
98 optional => 1,
99 },
100 force => {
101 type => 'boolean',
102 description => "Do not throw error on possible dangerous operations.",
103 optional => 1,
104 },
105 },
106 },
107 returns => { type => 'null' },
108
109 code => sub {
110 my ($param) = @_;
111
112 PVE::Corosync::check_conf_exists();
113
114 my $members = PVE::Cluster::get_members();
115 foreach my $node (sort keys %$members) {
116 die "All nodes must be online! Node $node is offline, aborting.\n"
117 if !$members->{$node}->{online};
118 }
119
120 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
121
122 die "QDevice already configured!\n"
123 if defined($conf->{main}->{quorum}->{device}) && !$param->{force};
124
125 my $network = $param->{network};
126
127 my $model = "net";
128 my $algorithm = 'ffsplit';
129 if (scalar(%{$members}) & 1) {
130 if ($param->{force}) {
131 $algorithm = 'lms';
132 } else {
133 die "Clusters with an odd node count are not officially supported!\n";
134 }
135 }
136
137 my $qnetd_addr = $param->{address};
138 my $base_dir = "/etc/corosync/qdevice/net";
139 my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
140 my $db_dir_node = "$base_dir/nssdb";
141 my $ca_export_base = "qnetd-cacert.crt";
142 my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
143 my $crq_file_base = "qdevice-net-node.crq";
144 my $p12_file_base = "qdevice-net-node.p12";
145 my $qdevice_certutil = "corosync-qdevice-net-certutil";
146 my $qnetd_certutil= "corosync-qnetd-certutil";
147 my $clustername = $conf->{main}->{totem}->{cluster_name};
148
149 run_command(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
150
151 if (-d $db_dir_node) {
152 # FIXME: check on all nodes?!
153 if ($param->{force}) {
154 rmtree $db_dir_node;
155 } else {
156 die "QDevice certificate store already initialised, set force to delete!\n";
157 }
158 }
159
160 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
161 my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
162
163 print "\nINFO: initializing qnetd server\n";
164 run_command(
165 [@$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-i"],
166 noerr => 1
167 );
168
169 print "\nINFO: copying CA cert and initializing on all nodes\n";
170 run_command([@$scp_cmd, "root\@\[$qnetd_addr\]:$ca_export_file", "/etc/pve/$ca_export_base"]);
171 $foreach_member->(sub {
172 my ($node, $ip) = @_;
173 my $outsub = sub { print "\nnode '$node': " . shift };
174 run_command(
175 [@$ssh_cmd, $ip, $qdevice_certutil, "-i", "-c", "/etc/pve/$ca_export_base"],
176 noerr => 1, outfunc => \&$outsub
177 );
178 });
179 unlink "/etc/pve/$ca_export_base";
180
181 print "\nINFO: generating cert request\n";
182 run_command([$qdevice_certutil, "-r", "-n", $clustername]);
183
184 print "\nINFO: copying exported cert request to qnetd server\n";
185 run_command([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@\[$qnetd_addr\]:/tmp"]);
186
187 print "\nINFO: sign and export cluster cert\n";
188 run_command([
189 @$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-s", "-c",
190 "/tmp/$crq_file_base", "-n", "$clustername"
191 ]);
192
193 print "\nINFO: copy exported CRT\n";
194 run_command([
195 @$scp_cmd, "root\@\[$qnetd_addr\]:$db_dir_qnetd/cluster-$clustername.crt",
196 "$db_dir_node"
197 ]);
198
199 print "\nINFO: import certificate\n";
200 run_command(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
201
202 print "\nINFO: copy and import pk12 cert to all nodes\n";
203 run_command([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
204 $foreach_member->(sub {
205 my ($node, $ip) = @_;
206 my $outsub = sub { print "\nnode '$node': " . shift };
207 run_command([
208 @$ssh_cmd, $ip, "$qdevice_certutil", "-m", "-c",
209 "/etc/pve/$p12_file_base"], outfunc => \&$outsub
210 );
211 });
212 unlink "/etc/pve/$p12_file_base";
213
214
215 my $code = sub {
216 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
217 my $quorum_section = $conf->{main}->{quorum};
218
219 die "Qdevice already configured, must be removed before setting up new one!\n"
220 if defined($quorum_section->{device}); # must not be forced!
221
222 my $qdev_section = {
223 model => $model,
224 "$model" => {
225 tls => 'on',
226 host => $qnetd_addr,
227 algorithm => $algorithm,
228 }
229 };
230 $qdev_section->{votes} = 1 if $algorithm eq 'ffsplit';
231
232 $quorum_section->{device} = $qdev_section;
233
234 PVE::Corosync::atomic_write_conf($conf);
235 };
236
237 print "\nINFO: add QDevice to cluster configuration\n";
238 PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
239 die $@ if $@;
240
241 $foreach_member->(sub {
242 my ($node, $ip) = @_;
243 my $outsub = sub { print "\nnode '$node': " . shift };
244 print "\nINFO: start and enable corosync qdevice daemon on node '$node'...\n";
245 run_command([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice'], outfunc => \&$outsub);
246 run_command([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice'], outfunc => \&$outsub);
247 });
248
249 run_command(['corosync-cfgtool', '-R']); # do cluster wide config reload
250
251 return undef;
252 }});
253
254 __PACKAGE__->register_method ({
255 name => 'remove_qdevice',
256 path => 'remove_qdevice',
257 method => 'DELETE',
258 description => "Remove a configured QDevice",
259 parameters => {
260 additionalProperties => 0,
261 properties => {},
262 },
263 returns => { type => 'null' },
264
265 code => sub {
266 my ($param) = @_;
267
268 PVE::Corosync::check_conf_exists();
269
270 my $members = PVE::Cluster::get_members();
271 foreach my $node (sort keys %$members) {
272 die "All nodes must be online! Node $node is offline, aborting.\n"
273 if !$members->{$node}->{online};
274 }
275
276 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
277
278 my $code = sub {
279 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
280 my $quorum_section = $conf->{main}->{quorum};
281
282 die "No QDevice configured!\n" if !defined($quorum_section->{device});
283
284 delete $quorum_section->{device};
285
286 PVE::Corosync::atomic_write_conf($conf);
287
288 # cleanup qdev state (cert storage)
289 my $qdev_state_dir = "/etc/corosync/qdevice";
290 $foreach_member->(sub {
291 my (undef, $ip) = @_;
292 run_command([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir]);
293 });
294 };
295
296 PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
297 die $@ if $@;
298
299 $foreach_member->(sub {
300 my (undef, $ip) = @_;
301 run_command([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
302 run_command([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
303 });
304
305 run_command(['corosync-cfgtool', '-R']);
306
307 print "\nRemoved Qdevice.\n";
308
309 return undef;
310 }});
311
312 __PACKAGE__->register_method ({
313 name => 'add',
314 path => 'add',
315 method => 'PUT',
316 description => "Adds the current node to an existing cluster.",
317 parameters => {
318 additionalProperties => 0,
319 properties => PVE::Corosync::add_corosync_link_properties({
320 hostname => {
321 type => 'string',
322 description => "Hostname (or IP) of an existing cluster member."
323 },
324 nodeid => get_standard_option('corosync-nodeid'),
325 votes => {
326 type => 'integer',
327 description => "Number of votes for this node",
328 minimum => 0,
329 optional => 1,
330 },
331 force => {
332 type => 'boolean',
333 description => "Do not throw error if node already exists.",
334 optional => 1,
335 },
336 fingerprint => get_standard_option('fingerprint-sha256', {
337 optional => 1,
338 }),
339 'use_ssh' => {
340 type => 'boolean',
341 description => "Always use SSH to join, even if peer may do it over API.",
342 optional => 1,
343 },
344 }),
345 },
346 returns => { type => 'null' },
347
348 code => sub {
349 my ($param) = @_;
350
351 my $nodename = PVE::INotify::nodename();
352 my $host = $param->{hostname};
353
354 my $worker = sub {
355
356 if (!$param->{use_ssh}) {
357 my $password = PVE::PTY::read_password("Please enter superuser (root) password for '$host': ");
358
359 delete $param->{use_ssh};
360 $param->{password} = $password;
361
362 my $local_cluster_lock = "/var/lock/pvecm.lock";
363 PVE::Tools::lock_file($local_cluster_lock, 10, \&PVE::Cluster::Setup::join, $param);
364
365 if (my $err = $@) {
366 if (ref($err) eq 'PVE::APIClient::Exception' && defined($err->{code}) && $err->{code} == 501) {
367 $err = "Remote side is not able to use API for Cluster join!\n" .
368 "Pass the 'use_ssh' switch or update the remote side.\n";
369 }
370 die $err;
371 }
372 return; # all OK, the API join endpoint successfully set us up
373 }
374
375 # allow fallback to old ssh only join if wished or needed
376
377 my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
378 my $links = PVE::Corosync::extract_corosync_link_args($param);
379
380 PVE::Cluster::Setup::assert_joinable($local_ip_address, $links, $param->{force});
381
382 PVE::Cluster::Setup::setup_sshd_config();
383 PVE::Cluster::Setup::setup_rootsshconfig();
384 PVE::Cluster::Setup::setup_ssh_keys();
385
386 # make sure known_hosts is on local filesystem
387 PVE::Cluster::Setup::ssh_unmerge_known_hosts();
388
389 my $cmd = ['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$host"];
390 run_command($cmd, 'outfunc' => sub {}, 'errfunc' => sub {},
391 'errmsg' => "unable to copy ssh ID");
392
393 $cmd = ['ssh', $host, '-o', 'BatchMode=yes',
394 'pvecm', 'addnode', $nodename, '--force', 1];
395
396 push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
397 push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
398
399 foreach my $link (keys %$links) {
400 push @$cmd, "--link$link", PVE::JSONSchema::print_property_string(
401 $links->{$link}, get_standard_option('corosync-link'));
402 }
403
404 # this will be used as fallback if no links are specified
405 if (!%$links) {
406 push @$cmd, '--link0', $local_ip_address;
407 print "No cluster network links passed explicitly, fallback to local node"
408 . " IP '$local_ip_address'\n";
409 }
410
411 if (system (@$cmd) != 0) {
412 my $cmdtxt = join (' ', @$cmd);
413 die "unable to add node: command failed ($cmdtxt)\n";
414 }
415
416 my $tmpdir = "$libdir/.pvecm_add.tmp.$$";
417 mkdir $tmpdir;
418
419 eval {
420 print "copy corosync auth key\n";
421 $cmd = ['rsync', '--rsh=ssh -l root -o BatchMode=yes', '-lpgoq',
422 "[$host]:$authfile $clusterconf", $tmpdir];
423
424 system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
425
426 my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
427 my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
428
429 PVE::Cluster::Setup::finish_join($host, $corosync_conf, $corosync_authkey);
430 };
431 my $err = $@;
432
433 rmtree $tmpdir;
434
435 die $err if $err;
436 };
437
438 # use a synced worker so we get a nice task log when joining through CLI
439 my $rpcenv = PVE::RPCEnvironment::get();
440 my $authuser = $rpcenv->get_user();
441
442 $rpcenv->fork_worker('clusterjoin', '', $authuser, $worker);
443
444 return undef;
445 }});
446
447 __PACKAGE__->register_method ({
448 name => 'status',
449 path => 'status',
450 method => 'GET',
451 description => "Displays the local view of the cluster status.",
452 parameters => {
453 additionalProperties => 0,
454 properties => {},
455 },
456 returns => { type => 'null' },
457
458 code => sub {
459 my ($param) = @_;
460
461 PVE::Corosync::check_conf_exists();
462 my $conf = eval { PVE::Cluster::cfs_read_file("corosync.conf") } // {};
463 warn "$@" if $@;
464 my $totem = PVE::Corosync::totem_config($conf);
465
466 if (scalar(%$totem)) {
467 my $print_info = sub {
468 my ($label, $key, $default) = @_;
469 my $val = $totem->{$key} // $default;
470 printf "%-17s %s\n", "$label:", "$val";
471 };
472
473 printf "Cluster information\n";
474 printf "-------------------\n";
475 $print_info->('Name', 'cluster_name', 'UNKOWN?');
476 $print_info->('Config Version', 'config_version', -1);
477 $print_info->('Transport', 'transport', 'knet');
478 $print_info->('Secure auth', 'secauth', 'off');
479 printf "\n";
480 }
481
482 exec ('corosync-quorumtool', '-siH');
483 exit (-1); # should not be reached
484 }});
485
486 __PACKAGE__->register_method ({
487 name => 'nodes',
488 path => 'nodes',
489 method => 'GET',
490 description => "Displays the local view of the cluster nodes.",
491 parameters => {
492 additionalProperties => 0,
493 properties => {},
494 },
495 returns => { type => 'null' },
496
497 code => sub {
498 my ($param) = @_;
499
500 PVE::Corosync::check_conf_exists();
501
502 exec ('corosync-quorumtool', '-l');
503 exit (-1); # should not be reached
504 }});
505
506 __PACKAGE__->register_method ({
507 name => 'expected',
508 path => 'expected',
509 method => 'PUT',
510 description => "Tells corosync a new value of expected votes.",
511 parameters => {
512 additionalProperties => 0,
513 properties => {
514 expected => {
515 type => 'integer',
516 description => "Expected votes",
517 minimum => 1,
518 },
519 },
520 },
521 returns => { type => 'null' },
522
523 code => sub {
524 my ($param) = @_;
525
526 PVE::Corosync::check_conf_exists();
527
528 exec ('corosync-quorumtool', '-e', $param->{expected});
529 exit (-1); # should not be reached
530 }});
531
532 __PACKAGE__->register_method ({
533 name => 'updatecerts',
534 path => 'updatecerts',
535 method => 'PUT',
536 description => "Update node certificates (and generate all needed files/directories).",
537 parameters => {
538 additionalProperties => 0,
539 properties => {
540 force => {
541 description => "Force generation of new SSL certifate.",
542 type => 'boolean',
543 optional => 1,
544 },
545 silent => {
546 description => "Ignore errors (i.e. when cluster has no quorum).",
547 type => 'boolean',
548 optional => 1,
549 },
550 },
551 },
552 returns => { type => 'null' },
553 code => sub {
554 my ($param) = @_;
555
556 # we get called by the pve-cluster.service ExecStartPost and as we do
557 # IO (on /etc/pve) which can hang (uninterruptedly D state). That'd be
558 # no-good for ExecStartPost as it fails the whole service in this case
559 PVE::Tools::run_fork_with_timeout(30, sub {
560 PVE::Cluster::Setup::updatecerts_and_ssh($param->@{qw(force silent)});
561 });
562
563 return undef;
564 }});
565
566 __PACKAGE__->register_method ({
567 name => 'mtunnel',
568 path => 'mtunnel',
569 method => 'POST',
570 description => "Used by VM/CT migration - do not use manually.",
571 parameters => {
572 additionalProperties => 0,
573 properties => {
574 get_migration_ip => {
575 type => 'boolean',
576 default => 0,
577 description => 'return the migration IP, if configured',
578 optional => 1,
579 },
580 migration_network => {
581 type => 'string',
582 format => 'CIDR',
583 description => 'the migration network used to detect the local migration IP',
584 optional => 1,
585 },
586 'run-command' => {
587 type => 'boolean',
588 description => 'Run a command with a tcp socket as standard input.'
589 .' The IP address and port are printed via this'
590 ." command's stdandard output first, each on a separate line.",
591 optional => 1,
592 },
593 'extra-args' => PVE::JSONSchema::get_standard_option('extra-args'),
594 },
595 },
596 returns => { type => 'null'},
597 code => sub {
598 my ($param) = @_;
599
600 if (!PVE::Cluster::check_cfs_quorum(1)) {
601 print "no quorum\n";
602 return undef;
603 }
604
605 my $get_local_migration_ip = sub {
606 my ($cidr) = @_;
607
608 if (!defined($cidr)) {
609 my $dc_conf = cfs_read_file('datacenter.cfg');
610 $cidr = $dc_conf->{migration}->{network}
611 if defined($dc_conf->{migration}->{network});
612 }
613
614 if (defined($cidr)) {
615 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
616
617 die "could not get migration ip: no IP address configured on local " .
618 "node for network '$cidr'\n" if scalar(@$ips) == 0;
619
620 die "could not get migration ip: multiple, different, IP address configured for " .
621 "network '$cidr'\n" if scalar(@$ips) > 1 && grep { @$ips[0] ne $_ } @$ips;
622
623 return @$ips[0];
624 }
625
626 return undef;
627 };
628
629 my $network = $param->{migration_network};
630 if ($param->{get_migration_ip}) {
631 die "cannot use --run-command with --get_migration_ip\n"
632 if $param->{'run-command'};
633
634 if (my $ip = $get_local_migration_ip->($network)) {
635 print "ip: '$ip'\n";
636 } else {
637 print "no ip\n";
638 }
639 # do not keep tunnel open when asked for migration ip
640 return undef;
641 }
642
643 if ($param->{'run-command'}) {
644 my $cmd = $param->{'extra-args'};
645 die "missing command\n"
646 if !$cmd || !scalar(@$cmd);
647
648 # Get an ip address to listen on, and find a free migration port
649 my ($ip, $family);
650 if (defined($network)) {
651 $ip = $get_local_migration_ip->($network)
652 or die "failed to get migration IP address to listen on\n";
653 $family = PVE::Tools::get_host_address_family($ip);
654 } else {
655 my $nodename = PVE::INotify::nodename();
656 ($ip, $family) = PVE::Network::get_ip_from_hostname($nodename, 0);
657 }
658 my $port = PVE::Tools::next_migrate_port($family, $ip);
659
660 PVE::Tools::pipe_socket_to_command($cmd, $ip, $port);
661 return undef;
662 }
663
664 print "tunnel online\n";
665 *STDOUT->flush();
666
667 while (my $line = <STDIN>) {
668 chomp $line;
669 last if $line =~ m/^quit$/;
670 }
671
672 return undef;
673 }});
674
675
676 our $cmddef = {
677 keygen => [ __PACKAGE__, 'keygen', ['filename']],
678 create => [ 'PVE::API2::ClusterConfig', 'create', ['clustername']],
679 add => [ __PACKAGE__, 'add', ['hostname']],
680 addnode => [ 'PVE::API2::ClusterConfig', 'addnode', ['node']],
681 delnode => [ 'PVE::API2::ClusterConfig', 'delnode', ['node']],
682 status => [ __PACKAGE__, 'status' ],
683 nodes => [ __PACKAGE__, 'nodes' ],
684 expected => [ __PACKAGE__, 'expected', ['expected']],
685 updatecerts => [ __PACKAGE__, 'updatecerts', []],
686 mtunnel => [ __PACKAGE__, 'mtunnel', ['extra-args']],
687 qdevice => {
688 setup => [ __PACKAGE__, 'setup_qdevice', ['address']],
689 remove => [ __PACKAGE__, 'remove_qdevice', []],
690 }
691 };
692
693 1;