]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/CLI/pvecm.pm
0a20af3a26dda73dbf815d54ad5368821e0f5364
[pve-cluster.git] / data / PVE / CLI / pvecm.pm
1 package PVE::CLI::pvecm;
2
3 use strict;
4 use warnings;
5
6 use File::Path;
7 use File::Basename;
8 use PVE::Tools qw(run_command);
9 use PVE::Cluster;
10 use PVE::INotify;
11 use PVE::JSONSchema qw(get_standard_option);
12 use PVE::RPCEnvironment;
13 use PVE::CLIHandler;
14 use PVE::PTY;
15 use PVE::API2::ClusterConfig;
16 use PVE::Corosync;
17
18 use base qw(PVE::CLIHandler);
19
20 $ENV{HOME} = '/root'; # for ssh-copy-id
21
22 my $basedir = "/etc/pve";
23 my $clusterconf = "$basedir/corosync.conf";
24 my $libdir = "/var/lib/pve-cluster";
25 my $authfile = "/etc/corosync/authkey";
26
27
28 sub setup_environment {
29 PVE::RPCEnvironment->setup_default_cli_env();
30 }
31
32 __PACKAGE__->register_method ({
33 name => 'keygen',
34 path => 'keygen',
35 method => 'PUT',
36 description => "Generate new cryptographic key for corosync.",
37 parameters => {
38 additionalProperties => 0,
39 properties => {
40 filename => {
41 type => 'string',
42 description => "Output file name"
43 }
44 },
45 },
46 returns => { type => 'null' },
47
48 code => sub {
49 my ($param) = @_;
50
51 my $filename = $param->{filename};
52
53 # test EUID
54 $> == 0 || die "Error: Authorization key must be generated as root user.\n";
55 my $dirname = dirname($filename);
56
57 die "key file '$filename' already exists\n" if -e $filename;
58
59 File::Path::make_path($dirname) if $dirname;
60
61 run_command(['corosync-keygen', '-l', '-k', $filename]);
62
63 return undef;
64 }});
65
66 my $foreach_member = sub {
67 my ($code, $noerr) = @_;
68
69 my $members = PVE::Cluster::get_members();
70 foreach my $node (sort keys %$members) {
71 if (my $ip = $members->{$node}->{ip}) {
72 $code->($node, $ip);
73 } else {
74 die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
75 warn "cannot get the cluster IP for node '$node'.\n";
76 return undef;
77 }
78 }
79 };
80
81 __PACKAGE__->register_method ({
82 name => 'setup_qdevice',
83 path => 'setup_qdevice',
84 method => 'PUT',
85 description => "Setup the use of a QDevice",
86 parameters => {
87 additionalProperties => 0,
88 properties => {
89 address => {
90 type => 'string', format => 'ip',
91 description => "Specifies the network address of an external corosync QDevice" ,
92 },
93 network => {
94 type => 'string',
95 format => 'CIDR',
96 description => 'The network which should be used to connect to the external qdevice',
97 optional => 1,
98 },
99 force => {
100 type => 'boolean',
101 description => "Do not throw error on possible dangerous operations.",
102 optional => 1,
103 },
104 },
105 },
106 returns => { type => 'null' },
107
108 code => sub {
109 my ($param) = @_;
110
111 die "Node not in a cluster. Aborting.\n"
112 if !PVE::Corosync::check_conf_exists(1);
113
114 my $members = PVE::Cluster::get_members();
115 foreach my $node (sort keys %$members) {
116 die "All nodes must be online! Node $node is offline, aborting.\n"
117 if !$members->{$node}->{online};
118 }
119
120 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
121
122 die "QDevice already configured!\n"
123 if defined($conf->{main}->{quorum}->{device}) && !$param->{force};
124
125 my $network = $param->{network};
126
127 my $model = "net";
128 my $algorithm = 'ffsplit';
129 if (scalar($members) & 1) {
130 if ($param->{force}) {
131 $algorithm = 'lms';
132 } else {
133 die "Clusters with an odd node count are not officially supported!\n";
134 }
135 }
136
137 my $qnetd_addr = $param->{address};
138 my $base_dir = "/etc/corosync/qdevice/net";
139 my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
140 my $db_dir_node = "$base_dir/nssdb";
141 my $ca_export_base = "qnetd-cacert.crt";
142 my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
143 my $crq_file_base = "qdevice-net-node.crq";
144 my $p12_file_base = "qdevice-net-node.p12";
145 my $qdevice_certutil = "corosync-qdevice-net-certutil";
146 my $qnetd_certutil= "corosync-qnetd-certutil";
147 my $clustername = $conf->{main}->{totem}->{cluster_name};
148
149 run_command(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
150
151 if (-d $db_dir_node) {
152 # FIXME: check on all nodes?!
153 if ($param->{force}) {
154 rmtree $db_dir_node;
155 } else {
156 die "QDevice certificate store already initialised, set force to delete!\n";
157 }
158 }
159
160 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
161 my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
162
163 print "\nINFO: initializing qnetd server\n";
164 run_command(
165 [@$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-i"],
166 noerr => 1
167 );
168
169 print "\nINFO: copying CA cert and initializing on all nodes\n";
170 run_command([@$scp_cmd, "root\@\[$qnetd_addr\]:$ca_export_file", "/etc/pve/$ca_export_base"]);
171 $foreach_member->(sub {
172 my ($node, $ip) = @_;
173 my $outsub = sub { print "\nnode '$node': " . shift };
174 run_command(
175 [@$ssh_cmd, $ip, $qdevice_certutil, "-i", "-c", "/etc/pve/$ca_export_base"],
176 noerr => 1, outfunc => \&$outsub
177 );
178 });
179 unlink "/etc/pve/$ca_export_base";
180
181 print "\nINFO: generating cert request\n";
182 run_command([$qdevice_certutil, "-r", "-n", $clustername]);
183
184 print "\nINFO: copying exported cert request to qnetd server\n";
185 run_command([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@\[$qnetd_addr\]:/tmp"]);
186
187 print "\nINFO: sign and export cluster cert\n";
188 run_command([
189 @$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-s", "-c",
190 "/tmp/$crq_file_base", "-n", "$clustername"
191 ]);
192
193 print "\nINFO: copy exported CRT\n";
194 run_command([
195 @$scp_cmd, "root\@\[$qnetd_addr\]:$db_dir_qnetd/cluster-$clustername.crt",
196 "$db_dir_node"
197 ]);
198
199 print "\nINFO: import certificate\n";
200 run_command(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
201
202 print "\nINFO: copy and import pk12 cert to all nodes\n";
203 run_command([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
204 $foreach_member->(sub {
205 my ($node, $ip) = @_;
206 my $outsub = sub { print "\nnode '$node': " . shift };
207 run_command([
208 @$ssh_cmd, $ip, "$qdevice_certutil", "-m", "-c",
209 "/etc/pve/$p12_file_base"], outfunc => \&$outsub
210 );
211 });
212 unlink "/etc/pve/$p12_file_base";
213
214
215 my $code = sub {
216 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
217 my $quorum_section = $conf->{main}->{quorum};
218
219 die "Qdevice already configured, must be removed before setting up new one!\n"
220 if defined($quorum_section->{device}); # must not be forced!
221
222 my $qdev_section = {
223 model => $model,
224 "$model" => {
225 tls => 'on',
226 host => $qnetd_addr,
227 algorithm => $algorithm,
228 }
229 };
230 $qdev_section->{votes} = 1 if $algorithm eq 'ffsplit';
231
232 $quorum_section->{device} = $qdev_section;
233
234 PVE::Corosync::atomic_write_conf($conf);
235 };
236
237 print "\nINFO: add QDevice to cluster configuration\n";
238 PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
239 die $@ if $@;
240
241 $foreach_member->(sub {
242 my ($node, $ip) = @_;
243 my $outsub = sub { print "\nnode '$node': " . shift };
244 print "\nINFO: start and enable corosync qdevice daemon on node '$node'...\n";
245 run_command([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice'], outfunc => \&$outsub);
246 run_command([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice'], outfunc => \&$outsub);
247 });
248
249 run_command(['corosync-cfgtool', '-R']); # do cluster wide config reload
250
251 return undef;
252 }});
253
254 __PACKAGE__->register_method ({
255 name => 'remove_qdevice',
256 path => 'remove_qdevice',
257 method => 'DELETE',
258 description => "Remove a configured QDevice",
259 parameters => {
260 additionalProperties => 0,
261 properties => {},
262 },
263 returns => { type => 'null' },
264
265 code => sub {
266 my ($param) = @_;
267
268 die "Node not in a cluster. Aborting.\n"
269 if !PVE::Corosync::check_conf_exists(1);
270
271 my $members = PVE::Cluster::get_members();
272 foreach my $node (sort keys %$members) {
273 die "All nodes must be online! Node $node is offline, aborting.\n"
274 if !$members->{$node}->{online};
275 }
276
277 my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
278
279 my $code = sub {
280 my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
281 my $quorum_section = $conf->{main}->{quorum};
282
283 die "No QDevice configured!\n" if !defined($quorum_section->{device});
284
285 delete $quorum_section->{device};
286
287 PVE::Corosync::atomic_write_conf($conf);
288
289 # cleanup qdev state (cert storage)
290 my $qdev_state_dir = "/etc/corosync/qdevice";
291 $foreach_member->(sub {
292 my (undef, $ip) = @_;
293 run_command([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir]);
294 });
295 };
296
297 PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
298 die $@ if $@;
299
300 $foreach_member->(sub {
301 my (undef, $ip) = @_;
302 run_command([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
303 run_command([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
304 });
305
306 run_command(['corosync-cfgtool', '-R']);
307
308 print "\nRemoved Qdevice.\n";
309
310 return undef;
311 }});
312
313 __PACKAGE__->register_method ({
314 name => 'add',
315 path => 'add',
316 method => 'PUT',
317 description => "Adds the current node to an existing cluster.",
318 parameters => {
319 additionalProperties => 0,
320 properties => {
321 hostname => {
322 type => 'string',
323 description => "Hostname (or IP) of an existing cluster member."
324 },
325 nodeid => get_standard_option('corosync-nodeid'),
326 votes => {
327 type => 'integer',
328 description => "Number of votes for this node",
329 minimum => 0,
330 optional => 1,
331 },
332 force => {
333 type => 'boolean',
334 description => "Do not throw error if node already exists.",
335 optional => 1,
336 },
337 link0 => get_standard_option('corosync-link'),
338 link1 => get_standard_option('corosync-link'),
339 fingerprint => get_standard_option('fingerprint-sha256', {
340 optional => 1,
341 }),
342 'use_ssh' => {
343 type => 'boolean',
344 description => "Always use SSH to join, even if peer may do it over API.",
345 optional => 1,
346 },
347 },
348 },
349 returns => { type => 'null' },
350
351 code => sub {
352 my ($param) = @_;
353
354 my $nodename = PVE::INotify::nodename();
355
356 my $host = $param->{hostname};
357 my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
358
359 my $link0 = PVE::Cluster::parse_corosync_link($param->{link0});
360 my $link1 = PVE::Cluster::parse_corosync_link($param->{link1});
361
362 PVE::Cluster::assert_joinable($local_ip_address, $link0, $link1, $param->{force});
363
364 my $worker = sub {
365
366 if (!$param->{use_ssh}) {
367 print "Please enter superuser (root) password for '$host':\n";
368 my $password = PVE::PTY::read_password("Password for root\@$host: ");
369
370 delete $param->{use_ssh};
371 $param->{password} = $password;
372
373 my $local_cluster_lock = "/var/lock/pvecm.lock";
374 PVE::Tools::lock_file($local_cluster_lock, 10, \&PVE::Cluster::join, $param);
375
376 if (my $err = $@) {
377 if (ref($err) eq 'PVE::APIClient::Exception' && defined($err->{code}) && $err->{code} == 501) {
378 $err = "Remote side is not able to use API for Cluster join!\n" .
379 "Pass the 'use_ssh' switch or update the remote side.\n";
380 }
381 die $err;
382 }
383 return; # all OK, the API join endpoint successfully set us up
384 }
385
386 # allow fallback to old ssh only join if wished or needed
387
388 PVE::Cluster::setup_sshd_config();
389 PVE::Cluster::setup_rootsshconfig();
390 PVE::Cluster::setup_ssh_keys();
391
392 # make sure known_hosts is on local filesystem
393 PVE::Cluster::ssh_unmerge_known_hosts();
394
395 my $cmd = ['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$host"];
396 run_command($cmd, 'outfunc' => sub {}, 'errfunc' => sub {},
397 'errmsg' => "unable to copy ssh ID");
398
399 $cmd = ['ssh', $host, '-o', 'BatchMode=yes',
400 'pvecm', 'addnode', $nodename, '--force', 1];
401
402 push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
403 push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
404 # just pass the un-parsed string through, or as we've address as
405 # the default_key, we can just pass the fallback directly too
406 push @$cmd, '--link0', $param->{link0} // $local_ip_address;
407 push @$cmd, '--link1', $param->{link1} if defined($param->{link1});
408
409 if (system (@$cmd) != 0) {
410 my $cmdtxt = join (' ', @$cmd);
411 die "unable to add node: command failed ($cmdtxt)\n";
412 }
413
414 my $tmpdir = "$libdir/.pvecm_add.tmp.$$";
415 mkdir $tmpdir;
416
417 eval {
418 print "copy corosync auth key\n";
419 $cmd = ['rsync', '--rsh=ssh -l root -o BatchMode=yes', '-lpgoq',
420 "[$host]:$authfile $clusterconf", $tmpdir];
421
422 system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
423
424 my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
425 my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
426
427 PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey);
428 };
429 my $err = $@;
430
431 rmtree $tmpdir;
432
433 die $err if $err;
434 };
435
436 # use a synced worker so we get a nice task log when joining through CLI
437 my $rpcenv = PVE::RPCEnvironment::get();
438 my $authuser = $rpcenv->get_user();
439
440 $rpcenv->fork_worker('clusterjoin', '', $authuser, $worker);
441
442 return undef;
443 }});
444
445 __PACKAGE__->register_method ({
446 name => 'status',
447 path => 'status',
448 method => 'GET',
449 description => "Displays the local view of the cluster status.",
450 parameters => {
451 additionalProperties => 0,
452 properties => {},
453 },
454 returns => { type => 'null' },
455
456 code => sub {
457 my ($param) = @_;
458
459 PVE::Corosync::check_conf_exists();
460
461 my $cmd = ['corosync-quorumtool', '-siH'];
462
463 exec (@$cmd);
464
465 exit (-1); # should not be reached
466 }});
467
468 __PACKAGE__->register_method ({
469 name => 'nodes',
470 path => 'nodes',
471 method => 'GET',
472 description => "Displays the local view of the cluster nodes.",
473 parameters => {
474 additionalProperties => 0,
475 properties => {},
476 },
477 returns => { type => 'null' },
478
479 code => sub {
480 my ($param) = @_;
481
482 PVE::Corosync::check_conf_exists();
483
484 my $cmd = ['corosync-quorumtool', '-l'];
485
486 exec (@$cmd);
487
488 exit (-1); # should not be reached
489 }});
490
491 __PACKAGE__->register_method ({
492 name => 'expected',
493 path => 'expected',
494 method => 'PUT',
495 description => "Tells corosync a new value of expected votes.",
496 parameters => {
497 additionalProperties => 0,
498 properties => {
499 expected => {
500 type => 'integer',
501 description => "Expected votes",
502 minimum => 1,
503 },
504 },
505 },
506 returns => { type => 'null' },
507
508 code => sub {
509 my ($param) = @_;
510
511 PVE::Corosync::check_conf_exists();
512
513 my $cmd = ['corosync-quorumtool', '-e', $param->{expected}];
514
515 exec (@$cmd);
516
517 exit (-1); # should not be reached
518
519 }});
520
521 __PACKAGE__->register_method ({
522 name => 'updatecerts',
523 path => 'updatecerts',
524 method => 'PUT',
525 description => "Update node certificates (and generate all needed files/directories).",
526 parameters => {
527 additionalProperties => 0,
528 properties => {
529 force => {
530 description => "Force generation of new SSL certifate.",
531 type => 'boolean',
532 optional => 1,
533 },
534 silent => {
535 description => "Ignore errors (i.e. when cluster has no quorum).",
536 type => 'boolean',
537 optional => 1,
538 },
539 },
540 },
541 returns => { type => 'null' },
542 code => sub {
543 my ($param) = @_;
544
545 # we get called by the pve-cluster.service ExecStartPost and as we do
546 # IO (on /etc/pve) which can hang (uninterruptedly D state). That'd be
547 # no-good for ExecStartPost as it fails the whole service in this case
548 PVE::Tools::run_fork_with_timeout(30, sub {
549 PVE::Cluster::updatecerts_and_ssh($param->@{qw(force silent)});
550 });
551
552 return undef;
553 }});
554
555 our $cmddef = {
556 keygen => [ __PACKAGE__, 'keygen', ['filename']],
557 create => [ 'PVE::API2::ClusterConfig', 'create', ['clustername']],
558 add => [ __PACKAGE__, 'add', ['hostname']],
559 addnode => [ 'PVE::API2::ClusterConfig', 'addnode', ['node']],
560 delnode => [ 'PVE::API2::ClusterConfig', 'delnode', ['node']],
561 status => [ __PACKAGE__, 'status' ],
562 nodes => [ __PACKAGE__, 'nodes' ],
563 expected => [ __PACKAGE__, 'expected', ['expected']],
564 updatecerts => [ __PACKAGE__, 'updatecerts', []],
565 qdevice => {
566 setup => [ __PACKAGE__, 'setup_qdevice', ['address']],
567 remove => [ __PACKAGE__, 'remove_qdevice', []],
568 }
569 };
570
571 1;