]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/Cluster.pm
0cc092e8e59d8060e625e48976f1138f093326d9
[pve-cluster.git] / data / PVE / Cluster.pm
1 package PVE::Cluster;
2
3 use strict;
4 use warnings;
5
6 use Digest::HMAC_SHA1;
7 use Digest::SHA;
8 use Encode;
9 use File::stat qw();
10 use IO::File;
11 use JSON;
12 use MIME::Base64;
13 use Net::SSLeay;
14 use POSIX qw(EEXIST ENOENT);
15 use RRDs;
16 use Socket;
17 use Storable qw(dclone);
18 use UUID;
19
20 use PVE::INotify;
21 use PVE::IPCC;
22 use PVE::JSONSchema;
23 use PVE::Network;
24 use PVE::SafeSyslog;
25 use PVE::Tools qw(run_command);
26
27 use PVE::Cluster::IPCConst;
28
29 use base 'Exporter';
30
31 our @EXPORT_OK = qw(
32 cfs_read_file
33 cfs_write_file
34 cfs_register_file
35 cfs_lock_file);
36
37 use Data::Dumper; # fixme: remove
38
39 # x509 certificate utils
40
41 my $basedir = "/etc/pve";
42 my $authdir = "$basedir/priv";
43 my $lockdir = "/etc/pve/priv/lock";
44
45 # cfs and corosync files
46 my $dbfile = "/var/lib/pve-cluster/config.db";
47 my $dbbackupdir = "/var/lib/pve-cluster/backup";
48 my $localclusterdir = "/etc/corosync";
49 my $localclusterconf = "$localclusterdir/corosync.conf";
50 my $authfile = "$localclusterdir/authkey";
51 my $clusterconf = "$basedir/corosync.conf";
52
53 my $authprivkeyfn = "$authdir/authkey.key";
54 my $authpubkeyfn = "$basedir/authkey.pub";
55 my $pveca_key_fn = "$authdir/pve-root-ca.key";
56 my $pveca_srl_fn = "$authdir/pve-root-ca.srl";
57 my $pveca_cert_fn = "$basedir/pve-root-ca.pem";
58 # this is just a secret accessable by the web browser
59 # and is used for CSRF prevention
60 my $pvewww_key_fn = "$basedir/pve-www.key";
61
62 # ssh related files
63 my $ssh_rsa_id_priv = "/root/.ssh/id_rsa";
64 my $ssh_rsa_id = "/root/.ssh/id_rsa.pub";
65 my $ssh_host_rsa_id = "/etc/ssh/ssh_host_rsa_key.pub";
66 my $sshglobalknownhosts = "/etc/ssh/ssh_known_hosts";
67 my $sshknownhosts = "/etc/pve/priv/known_hosts";
68 my $sshauthkeys = "/etc/pve/priv/authorized_keys";
69 my $sshd_config_fn = "/etc/ssh/sshd_config";
70 my $rootsshauthkeys = "/root/.ssh/authorized_keys";
71 my $rootsshauthkeysbackup = "${rootsshauthkeys}.org";
72 my $rootsshconfig = "/root/.ssh/config";
73
74 # this is just a readonly copy, the relevant one is in status.c from pmxcfs
75 # observed files are the one we can get directly through IPCC, they are cached
76 # using a computed version and only those can be used by the cfs_*_file methods
77 my $observed = {
78 'vzdump.cron' => 1,
79 'storage.cfg' => 1,
80 'datacenter.cfg' => 1,
81 'replication.cfg' => 1,
82 'corosync.conf' => 1,
83 'corosync.conf.new' => 1,
84 'user.cfg' => 1,
85 'domains.cfg' => 1,
86 'priv/shadow.cfg' => 1,
87 'priv/tfa.cfg' => 1,
88 '/qemu-server/' => 1,
89 '/openvz/' => 1,
90 '/lxc/' => 1,
91 'ha/crm_commands' => 1,
92 'ha/manager_status' => 1,
93 'ha/resources.cfg' => 1,
94 'ha/groups.cfg' => 1,
95 'ha/fence.cfg' => 1,
96 'status.cfg' => 1,
97 'ceph.conf' => 1,
98 };
99
100 # only write output if something fails
101 sub run_silent_cmd {
102 my ($cmd) = @_;
103
104 my $outbuf = '';
105 my $record = sub { $outbuf .= shift . "\n"; };
106
107 eval { run_command($cmd, outfunc => $record, errfunc => $record) };
108
109 if (my $err = $@) {
110 print STDERR $outbuf;
111 die $err;
112 }
113 }
114
115 sub check_cfs_quorum {
116 my ($noerr) = @_;
117
118 # note: -w filename always return 1 for root, so wee need
119 # to use File::lstat here
120 my $st = File::stat::lstat("$basedir/local");
121 my $quorate = ($st && (($st->mode & 0200) != 0));
122
123 die "cluster not ready - no quorum?\n" if !$quorate && !$noerr;
124
125 return $quorate;
126 }
127
128 sub check_cfs_is_mounted {
129 my ($noerr) = @_;
130
131 my $res = -l "$basedir/local";
132
133 die "pve configuration filesystem not mounted\n"
134 if !$res && !$noerr;
135
136 return $res;
137 }
138
139 sub gen_local_dirs {
140 my ($nodename) = @_;
141
142 check_cfs_is_mounted();
143
144 my @required_dirs = (
145 "$basedir/priv",
146 "$basedir/nodes",
147 "$basedir/nodes/$nodename",
148 "$basedir/nodes/$nodename/lxc",
149 "$basedir/nodes/$nodename/qemu-server",
150 "$basedir/nodes/$nodename/openvz",
151 "$basedir/nodes/$nodename/priv");
152
153 foreach my $dir (@required_dirs) {
154 if (! -d $dir) {
155 mkdir($dir) || $! == EEXIST || die "unable to create directory '$dir' - $!\n";
156 }
157 }
158 }
159
160 sub gen_auth_key {
161
162 return if -f "$authprivkeyfn";
163
164 check_cfs_is_mounted();
165
166 cfs_lock_authkey(undef, sub {
167 mkdir $authdir || $! == EEXIST || die "unable to create dir '$authdir' - $!\n";
168
169 run_silent_cmd(['openssl', 'genrsa', '-out', $authprivkeyfn, '2048']);
170
171 run_silent_cmd(['openssl', 'rsa', '-in', $authprivkeyfn, '-pubout', '-out', $authpubkeyfn]);
172 });
173
174 die "$@\n" if $@;
175 }
176
177 sub gen_pveca_key {
178
179 return if -f $pveca_key_fn;
180
181 eval {
182 run_silent_cmd(['openssl', 'genrsa', '-out', $pveca_key_fn, '4096']);
183 };
184
185 die "unable to generate pve ca key:\n$@" if $@;
186 }
187
188 sub gen_pveca_cert {
189
190 if (-f $pveca_key_fn && -f $pveca_cert_fn) {
191 return 0;
192 }
193
194 gen_pveca_key();
195
196 # we try to generate an unique 'subject' to avoid browser problems
197 # (reused serial numbers, ..)
198 my $uuid;
199 UUID::generate($uuid);
200 my $uuid_str;
201 UUID::unparse($uuid, $uuid_str);
202
203 eval {
204 # wrap openssl with faketime to prevent bug #904
205 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'req', '-batch',
206 '-days', '3650', '-new', '-x509', '-nodes', '-key',
207 $pveca_key_fn, '-out', $pveca_cert_fn, '-subj',
208 "/CN=Proxmox Virtual Environment/OU=$uuid_str/O=PVE Cluster Manager CA/"]);
209 };
210
211 die "generating pve root certificate failed:\n$@" if $@;
212
213 return 1;
214 }
215
216 sub gen_pve_ssl_key {
217 my ($nodename) = @_;
218
219 die "no node name specified" if !$nodename;
220
221 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
222
223 return if -f $pvessl_key_fn;
224
225 eval {
226 run_silent_cmd(['openssl', 'genrsa', '-out', $pvessl_key_fn, '2048']);
227 };
228
229 die "unable to generate pve ssl key for node '$nodename':\n$@" if $@;
230 }
231
232 sub gen_pve_www_key {
233
234 return if -f $pvewww_key_fn;
235
236 eval {
237 run_silent_cmd(['openssl', 'genrsa', '-out', $pvewww_key_fn, '2048']);
238 };
239
240 die "unable to generate pve www key:\n$@" if $@;
241 }
242
243 sub update_serial {
244 my ($serial) = @_;
245
246 PVE::Tools::file_set_contents($pveca_srl_fn, $serial);
247 }
248
249 sub gen_pve_ssl_cert {
250 my ($force, $nodename, $ip) = @_;
251
252 die "no node name specified" if !$nodename;
253 die "no IP specified" if !$ip;
254
255 my $pvessl_cert_fn = "$basedir/nodes/$nodename/pve-ssl.pem";
256
257 return if !$force && -f $pvessl_cert_fn;
258
259 my $names = "IP:127.0.0.1,IP:::1,DNS:localhost";
260
261 my $rc = PVE::INotify::read_file('resolvconf');
262
263 $names .= ",IP:$ip";
264
265 my $fqdn = $nodename;
266
267 $names .= ",DNS:$nodename";
268
269 if ($rc && $rc->{search}) {
270 $fqdn = $nodename . "." . $rc->{search};
271 $names .= ",DNS:$fqdn";
272 }
273
274 my $sslconf = <<__EOD;
275 RANDFILE = /root/.rnd
276 extensions = v3_req
277
278 [ req ]
279 default_bits = 2048
280 distinguished_name = req_distinguished_name
281 req_extensions = v3_req
282 prompt = no
283 string_mask = nombstr
284
285 [ req_distinguished_name ]
286 organizationalUnitName = PVE Cluster Node
287 organizationName = Proxmox Virtual Environment
288 commonName = $fqdn
289
290 [ v3_req ]
291 basicConstraints = CA:FALSE
292 extendedKeyUsage = serverAuth
293 subjectAltName = $names
294 __EOD
295
296 my $cfgfn = "/tmp/pvesslconf-$$.tmp";
297 my $fh = IO::File->new ($cfgfn, "w");
298 print $fh $sslconf;
299 close ($fh);
300
301 my $reqfn = "/tmp/pvecertreq-$$.tmp";
302 unlink $reqfn;
303
304 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
305 eval {
306 run_silent_cmd(['openssl', 'req', '-batch', '-new', '-config', $cfgfn,
307 '-key', $pvessl_key_fn, '-out', $reqfn]);
308 };
309
310 if (my $err = $@) {
311 unlink $reqfn;
312 unlink $cfgfn;
313 die "unable to generate pve certificate request:\n$err";
314 }
315
316 update_serial("0000000000000000") if ! -f $pveca_srl_fn;
317
318 eval {
319 # wrap openssl with faketime to prevent bug #904
320 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'x509', '-req',
321 '-in', $reqfn, '-days', '3650', '-out', $pvessl_cert_fn,
322 '-CAkey', $pveca_key_fn, '-CA', $pveca_cert_fn,
323 '-CAserial', $pveca_srl_fn, '-extfile', $cfgfn]);
324 };
325
326 if (my $err = $@) {
327 unlink $reqfn;
328 unlink $cfgfn;
329 die "unable to generate pve ssl certificate:\n$err";
330 }
331
332 unlink $cfgfn;
333 unlink $reqfn;
334 }
335
336 sub gen_pve_node_files {
337 my ($nodename, $ip, $opt_force) = @_;
338
339 gen_local_dirs($nodename);
340
341 gen_auth_key();
342
343 # make sure we have a (cluster wide) secret
344 # for CSRFR prevention
345 gen_pve_www_key();
346
347 # make sure we have a (per node) private key
348 gen_pve_ssl_key($nodename);
349
350 # make sure we have a CA
351 my $force = gen_pveca_cert();
352
353 $force = 1 if $opt_force;
354
355 gen_pve_ssl_cert($force, $nodename, $ip);
356 }
357
358 my $vzdump_cron_dummy = <<__EOD;
359 # cluster wide vzdump cron schedule
360 # Atomatically generated file - do not edit
361
362 PATH="/usr/sbin:/usr/bin:/sbin:/bin"
363
364 __EOD
365
366 sub gen_pve_vzdump_symlink {
367
368 my $filename = "/etc/pve/vzdump.cron";
369
370 my $link_fn = "/etc/cron.d/vzdump";
371
372 if ((-f $filename) && (! -l $link_fn)) {
373 rename($link_fn, "/root/etc_cron_vzdump.org"); # make backup if file exists
374 symlink($filename, $link_fn);
375 }
376 }
377
378 sub gen_pve_vzdump_files {
379
380 my $filename = "/etc/pve/vzdump.cron";
381
382 PVE::Tools::file_set_contents($filename, $vzdump_cron_dummy)
383 if ! -f $filename;
384
385 gen_pve_vzdump_symlink();
386 };
387
388 my $versions = {};
389 my $vmlist = {};
390 my $clinfo = {};
391
392 my $ipcc_send_rec = sub {
393 my ($msgid, $data) = @_;
394
395 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
396
397 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
398
399 return $res;
400 };
401
402 my $ipcc_send_rec_json = sub {
403 my ($msgid, $data) = @_;
404
405 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
406
407 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
408
409 return decode_json($res);
410 };
411
412 my $ipcc_get_config = sub {
413 my ($path) = @_;
414
415 my $bindata = pack "Z*", $path;
416 my $res = PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_CONFIG, $bindata);
417 if (!defined($res)) {
418 if ($! != 0) {
419 return undef if $! == ENOENT;
420 die "$!\n";
421 }
422 return '';
423 }
424
425 return $res;
426 };
427
428 my $ipcc_get_status = sub {
429 my ($name, $nodename) = @_;
430
431 my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
432 return PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_STATUS, $bindata);
433 };
434
435 my $ipcc_remove_status = sub {
436 my ($name) = @_;
437 my $bindata = pack "Z[256]", $name;
438 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
439 };
440
441 my $ipcc_update_status = sub {
442 my ($name, $data) = @_;
443
444 my $raw = ref($data) ? encode_json($data) : $data;
445 # update status
446 my $bindata = pack "Z[256]Z*", $name, $raw;
447
448 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
449 };
450
451 my $ipcc_log = sub {
452 my ($priority, $ident, $tag, $msg) = @_;
453
454 my $bindata = pack "CCCZ*Z*Z*", $priority, bytes::length($ident) + 1,
455 bytes::length($tag) + 1, $ident, $tag, $msg;
456
457 return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG, $bindata);
458 };
459
460 my $ipcc_get_cluster_log = sub {
461 my ($user, $max) = @_;
462
463 $max = 0 if !defined($max);
464
465 my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
466 return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG, $bindata);
467 };
468
469 my $ccache = {};
470
471 sub cfs_update {
472 my ($fail) = @_;
473 eval {
474 my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION);
475 #warn "GOT1: " . Dumper($res);
476 die "no starttime\n" if !$res->{starttime};
477
478 if (!$res->{starttime} || !$versions->{starttime} ||
479 $res->{starttime} != $versions->{starttime}) {
480 #print "detected changed starttime\n";
481 $vmlist = {};
482 $clinfo = {};
483 $ccache = {};
484 }
485
486 $versions = $res;
487 };
488 my $err = $@;
489 if ($err) {
490 $versions = {};
491 $vmlist = {};
492 $clinfo = {};
493 $ccache = {};
494 die $err if $fail;
495 warn $err;
496 }
497
498 eval {
499 if (!$clinfo->{version} || $clinfo->{version} != $versions->{clinfo}) {
500 #warn "detected new clinfo\n";
501 $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO);
502 }
503 };
504 $err = $@;
505 if ($err) {
506 $clinfo = {};
507 die $err if $fail;
508 warn $err;
509 }
510
511 eval {
512 if (!$vmlist->{version} || $vmlist->{version} != $versions->{vmlist}) {
513 #warn "detected new vmlist1\n";
514 $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST);
515 }
516 };
517 $err = $@;
518 if ($err) {
519 $vmlist = {};
520 die $err if $fail;
521 warn $err;
522 }
523 }
524
525 sub get_vmlist {
526 return $vmlist;
527 }
528
529 sub get_clinfo {
530 return $clinfo;
531 }
532
533 sub get_members {
534 return $clinfo->{nodelist};
535 }
536
537 sub get_nodelist {
538 my $nodelist = $clinfo->{nodelist};
539
540 my $nodename = PVE::INotify::nodename();
541
542 if (!$nodelist || !$nodelist->{$nodename}) {
543 return [ $nodename ];
544 }
545
546 return [ keys %$nodelist ];
547 }
548
549 # best effort data store for cluster
550 # this data is gone if the pmxcfs is restarted, but only the local data,
551 # so we should not use this for very important data
552 sub broadcast_node_kv {
553 my ($key, $data) = @_;
554
555 if (!defined($data)) {
556 eval {
557 $ipcc_remove_status->("kv/$key");
558 };
559 } else {
560 die "cannot send a reference\n" if ref($data);
561 my $size = length($data);
562 # pmxcfs has an upper bound of 32k for each entry
563 die "data for '$key' too big\n"
564 if $size >= (32*1024);
565
566 eval {
567 $ipcc_update_status->("kv/$key", $data);
568 };
569 }
570
571 warn $@ if $@;
572 }
573
574 sub get_node_kv {
575 my ($key, $nodename) = @_;
576
577 my $res = {};
578 my $get_node_data = sub {
579 my ($node) = @_;
580 my $raw = $ipcc_get_status->("kv/$key", $node);
581 $res->{$node} = $raw if $raw;
582 };
583
584 if ($nodename) {
585 $get_node_data->($nodename);
586 } else {
587 my $nodelist = get_nodelist();
588
589 foreach my $node (@$nodelist) {
590 $get_node_data->($node);
591 }
592 }
593
594 return $res;
595 }
596
597 # $data must be a chronological descending ordered array of tasks
598 sub broadcast_tasklist {
599 my ($data) = @_;
600
601 # the serialized list may not get bigger than 32kb (CFS_MAX_STATUS_SIZE
602 # from pmxcfs) - drop older items until we satisfy this constraint
603 my $size = length(encode_json($data));
604 while ($size >= (32 * 1024)) {
605 pop @$data;
606 $size = length(encode_json($data));
607 }
608
609 eval {
610 &$ipcc_update_status("tasklist", $data);
611 };
612
613 warn $@ if $@;
614 }
615
616 my $tasklistcache = {};
617
618 sub get_tasklist {
619 my ($nodename) = @_;
620
621 my $kvstore = $versions->{kvstore} || {};
622
623 my $nodelist = get_nodelist();
624
625 my $res = [];
626 foreach my $node (@$nodelist) {
627 next if $nodename && ($nodename ne $node);
628 eval {
629 my $ver = $kvstore->{$node}->{tasklist} if $kvstore->{$node};
630 my $cd = $tasklistcache->{$node};
631 if (!$cd || !$ver || !$cd->{version} ||
632 ($cd->{version} != $ver)) {
633 my $raw = &$ipcc_get_status("tasklist", $node) || '[]';
634 my $data = decode_json($raw);
635 push @$res, @$data;
636 $cd = $tasklistcache->{$node} = {
637 data => $data,
638 version => $ver,
639 };
640 } elsif ($cd && $cd->{data}) {
641 push @$res, @{$cd->{data}};
642 }
643 };
644 my $err = $@;
645 syslog('err', $err) if $err;
646 }
647
648 return $res;
649 }
650
651 sub broadcast_rrd {
652 my ($rrdid, $data) = @_;
653
654 eval {
655 &$ipcc_update_status("rrd/$rrdid", $data);
656 };
657 my $err = $@;
658
659 warn $err if $err;
660 }
661
662 my $last_rrd_dump = 0;
663 my $last_rrd_data = "";
664
665 sub rrd_dump {
666
667 my $ctime = time();
668
669 my $diff = $ctime - $last_rrd_dump;
670 if ($diff < 2) {
671 return $last_rrd_data;
672 }
673
674 my $raw;
675 eval {
676 $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP);
677 };
678 my $err = $@;
679
680 if ($err) {
681 warn $err;
682 return {};
683 }
684
685 my $res = {};
686
687 if ($raw) {
688 while ($raw =~ s/^(.*)\n//) {
689 my ($key, @ela) = split(/:/, $1);
690 next if !$key;
691 next if !(scalar(@ela) > 1);
692 $res->{$key} = [ map { $_ eq 'U' ? undef : $_ } @ela ];
693 }
694 }
695
696 $last_rrd_dump = $ctime;
697 $last_rrd_data = $res;
698
699 return $res;
700 }
701
702 sub create_rrd_data {
703 my ($rrdname, $timeframe, $cf) = @_;
704
705 my $rrddir = "/var/lib/rrdcached/db";
706
707 my $rrd = "$rrddir/$rrdname";
708
709 my $setup = {
710 hour => [ 60, 70 ],
711 day => [ 60*30, 70 ],
712 week => [ 60*180, 70 ],
713 month => [ 60*720, 70 ],
714 year => [ 60*10080, 70 ],
715 };
716
717 my ($reso, $count) = @{$setup->{$timeframe}};
718 my $ctime = $reso*int(time()/$reso);
719 my $req_start = $ctime - $reso*$count;
720
721 $cf = "AVERAGE" if !$cf;
722
723 my @args = (
724 "-s" => $req_start,
725 "-e" => $ctime - 1,
726 "-r" => $reso,
727 );
728
729 my $socket = "/var/run/rrdcached.sock";
730 push @args, "--daemon" => "unix:$socket" if -S $socket;
731
732 my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args);
733
734 my $err = RRDs::error;
735 die "RRD error: $err\n" if $err;
736
737 die "got wrong time resolution ($step != $reso)\n"
738 if $step != $reso;
739
740 my $res = [];
741 my $fields = scalar(@$names);
742 for my $line (@$data) {
743 my $entry = { 'time' => $start };
744 $start += $step;
745 for (my $i = 0; $i < $fields; $i++) {
746 my $name = $names->[$i];
747 if (defined(my $val = $line->[$i])) {
748 $entry->{$name} = $val;
749 } else {
750 # leave empty fields undefined
751 # maybe make this configurable?
752 }
753 }
754 push @$res, $entry;
755 }
756
757 return $res;
758 }
759
760 sub create_rrd_graph {
761 my ($rrdname, $timeframe, $ds, $cf) = @_;
762
763 # Using RRD graph is clumsy - maybe it
764 # is better to simply fetch the data, and do all display
765 # related things with javascript (new extjs html5 graph library).
766
767 my $rrddir = "/var/lib/rrdcached/db";
768
769 my $rrd = "$rrddir/$rrdname";
770
771 my @ids = PVE::Tools::split_list($ds);
772
773 my $ds_txt = join('_', @ids);
774
775 my $filename = "${rrd}_${ds_txt}.png";
776
777 my $setup = {
778 hour => [ 60, 60 ],
779 day => [ 60*30, 70 ],
780 week => [ 60*180, 70 ],
781 month => [ 60*720, 70 ],
782 year => [ 60*10080, 70 ],
783 };
784
785 my ($reso, $count) = @{$setup->{$timeframe}};
786
787 my @args = (
788 "--imgformat" => "PNG",
789 "--border" => 0,
790 "--height" => 200,
791 "--width" => 800,
792 "--start" => - $reso*$count,
793 "--end" => 'now' ,
794 "--lower-limit" => 0,
795 );
796
797 my $socket = "/var/run/rrdcached.sock";
798 push @args, "--daemon" => "unix:$socket" if -S $socket;
799
800 my @coldef = ('#00ddff', '#ff0000');
801
802 $cf = "AVERAGE" if !$cf;
803
804 my $i = 0;
805 foreach my $id (@ids) {
806 my $col = $coldef[$i++] || die "fixme: no color definition";
807 push @args, "DEF:${id}=$rrd:${id}:$cf";
808 my $dataid = $id;
809 if ($id eq 'cpu' || $id eq 'iowait') {
810 push @args, "CDEF:${id}_per=${id},100,*";
811 $dataid = "${id}_per";
812 }
813 push @args, "LINE2:${dataid}${col}:${id}";
814 }
815
816 push @args, '--full-size-mode';
817
818 # we do not really store data into the file
819 my $res = RRDs::graphv('-', @args);
820
821 my $err = RRDs::error;
822 die "RRD error: $err\n" if $err;
823
824 return { filename => $filename, image => $res->{image} };
825 }
826
827 # a fast way to read files (avoid fuse overhead)
828 sub get_config {
829 my ($path) = @_;
830
831 return &$ipcc_get_config($path);
832 }
833
834 sub get_cluster_log {
835 my ($user, $max) = @_;
836
837 return &$ipcc_get_cluster_log($user, $max);
838 }
839
840 my $file_info = {};
841
842 sub cfs_register_file {
843 my ($filename, $parser, $writer) = @_;
844
845 $observed->{$filename} || die "unknown file '$filename'";
846
847 die "file '$filename' already registered" if $file_info->{$filename};
848
849 $file_info->{$filename} = {
850 parser => $parser,
851 writer => $writer,
852 };
853 }
854
855 my $ccache_read = sub {
856 my ($filename, $parser, $version) = @_;
857
858 $ccache->{$filename} = {} if !$ccache->{$filename};
859
860 my $ci = $ccache->{$filename};
861
862 if (!$ci->{version} || !$version || $ci->{version} != $version) {
863 # we always call the parser, even when the file does not exists
864 # (in that case $data is undef)
865 my $data = get_config($filename);
866 $ci->{data} = &$parser("/etc/pve/$filename", $data);
867 $ci->{version} = $version;
868 }
869
870 my $res = ref($ci->{data}) ? dclone($ci->{data}) : $ci->{data};
871
872 return $res;
873 };
874
875 sub cfs_file_version {
876 my ($filename) = @_;
877
878 my $version;
879 my $infotag;
880 if ($filename =~ m!^nodes/[^/]+/(openvz|lxc|qemu-server)/(\d+)\.conf$!) {
881 my ($type, $vmid) = ($1, $2);
882 if ($vmlist && $vmlist->{ids} && $vmlist->{ids}->{$vmid}) {
883 $version = $vmlist->{ids}->{$vmid}->{version};
884 }
885 $infotag = "/$type/";
886 } else {
887 $infotag = $filename;
888 $version = $versions->{$filename};
889 }
890
891 my $info = $file_info->{$infotag} ||
892 die "unknown file type '$filename'\n";
893
894 return wantarray ? ($version, $info) : $version;
895 }
896
897 sub cfs_read_file {
898 my ($filename) = @_;
899
900 my ($version, $info) = cfs_file_version($filename);
901 my $parser = $info->{parser};
902
903 return &$ccache_read($filename, $parser, $version);
904 }
905
906 sub cfs_write_file {
907 my ($filename, $data) = @_;
908
909 my ($version, $info) = cfs_file_version($filename);
910
911 my $writer = $info->{writer} || die "no writer defined";
912
913 my $fsname = "/etc/pve/$filename";
914
915 my $raw = &$writer($fsname, $data);
916
917 if (my $ci = $ccache->{$filename}) {
918 $ci->{version} = undef;
919 }
920
921 PVE::Tools::file_set_contents($fsname, $raw);
922 }
923
924 my $cfs_lock = sub {
925 my ($lockid, $timeout, $code, @param) = @_;
926
927 my $prev_alarm = alarm(0); # suspend outer alarm early
928
929 my $res;
930 my $got_lock = 0;
931
932 # this timeout is for acquire the lock
933 $timeout = 10 if !$timeout;
934
935 my $filename = "$lockdir/$lockid";
936
937 eval {
938
939 mkdir $lockdir;
940
941 if (! -d $lockdir) {
942 die "pve cluster filesystem not online.\n";
943 }
944
945 my $timeout_err = sub { die "got lock request timeout\n"; };
946 local $SIG{ALRM} = $timeout_err;
947
948 while (1) {
949 alarm ($timeout);
950 $got_lock = mkdir($filename);
951 $timeout = alarm(0) - 1; # we'll sleep for 1s, see down below
952
953 last if $got_lock;
954
955 $timeout_err->() if $timeout <= 0;
956
957 print STDERR "trying to acquire cfs lock '$lockid' ...\n";
958 utime (0, 0, $filename); # cfs unlock request
959 sleep(1);
960 }
961
962 # fixed command timeout: cfs locks have a timeout of 120
963 # using 60 gives us another 60 seconds to abort the task
964 local $SIG{ALRM} = sub { die "got lock timeout - aborting command\n"; };
965 alarm(60);
966
967 cfs_update(); # make sure we read latest versions inside code()
968
969 $res = &$code(@param);
970
971 alarm(0);
972 };
973
974 my $err = $@;
975
976 $err = "no quorum!\n" if !$got_lock && !check_cfs_quorum(1);
977
978 rmdir $filename if $got_lock; # if we held the lock always unlock again
979
980 alarm($prev_alarm);
981
982 if ($err) {
983 $@ = "error with cfs lock '$lockid': $err";
984 return undef;
985 }
986
987 $@ = undef;
988
989 return $res;
990 };
991
992 sub cfs_lock_file {
993 my ($filename, $timeout, $code, @param) = @_;
994
995 my $info = $observed->{$filename} || die "unknown file '$filename'";
996
997 my $lockid = "file-$filename";
998 $lockid =~ s/[.\/]/_/g;
999
1000 &$cfs_lock($lockid, $timeout, $code, @param);
1001 }
1002
1003 sub cfs_lock_storage {
1004 my ($storeid, $timeout, $code, @param) = @_;
1005
1006 my $lockid = "storage-$storeid";
1007
1008 &$cfs_lock($lockid, $timeout, $code, @param);
1009 }
1010
1011 sub cfs_lock_domain {
1012 my ($domainname, $timeout, $code, @param) = @_;
1013
1014 my $lockid = "domain-$domainname";
1015
1016 &$cfs_lock($lockid, $timeout, $code, @param);
1017 }
1018
1019 sub cfs_lock_acme {
1020 my ($account, $timeout, $code, @param) = @_;
1021
1022 my $lockid = "acme-$account";
1023
1024 &$cfs_lock($lockid, $timeout, $code, @param);
1025 }
1026
1027 sub cfs_lock_authkey {
1028 my ($timeout, $code, @param) = @_;
1029
1030 $cfs_lock->('authkey', $timeout, $code, @param);
1031 }
1032
1033 my $log_levels = {
1034 "emerg" => 0,
1035 "alert" => 1,
1036 "crit" => 2,
1037 "critical" => 2,
1038 "err" => 3,
1039 "error" => 3,
1040 "warn" => 4,
1041 "warning" => 4,
1042 "notice" => 5,
1043 "info" => 6,
1044 "debug" => 7,
1045 };
1046
1047 sub log_msg {
1048 my ($priority, $ident, $msg) = @_;
1049
1050 if (my $tmp = $log_levels->{$priority}) {
1051 $priority = $tmp;
1052 }
1053
1054 die "need numeric log priority" if $priority !~ /^\d+$/;
1055
1056 my $tag = PVE::SafeSyslog::tag();
1057
1058 $msg = "empty message" if !$msg;
1059
1060 $ident = "" if !$ident;
1061 $ident = encode("ascii", $ident,
1062 sub { sprintf "\\u%04x", shift });
1063
1064 my $ascii = encode("ascii", $msg, sub { sprintf "\\u%04x", shift });
1065
1066 if ($ident) {
1067 syslog($priority, "<%s> %s", $ident, $ascii);
1068 } else {
1069 syslog($priority, "%s", $ascii);
1070 }
1071
1072 eval { &$ipcc_log($priority, $ident, $tag, $ascii); };
1073
1074 syslog("err", "writing cluster log failed: $@") if $@;
1075 }
1076
1077 sub check_vmid_unused {
1078 my ($vmid, $noerr) = @_;
1079
1080 my $vmlist = get_vmlist();
1081
1082 my $d = $vmlist->{ids}->{$vmid};
1083 return 1 if !defined($d);
1084
1085 return undef if $noerr;
1086
1087 my $vmtypestr = $d->{type} eq 'qemu' ? 'VM' : 'CT';
1088 die "$vmtypestr $vmid already exists on node '$d->{node}'\n";
1089 }
1090
1091 sub check_node_exists {
1092 my ($nodename, $noerr) = @_;
1093
1094 my $nodelist = $clinfo->{nodelist};
1095 return 1 if $nodelist && $nodelist->{$nodename};
1096
1097 return undef if $noerr;
1098
1099 die "no such cluster node '$nodename'\n";
1100 }
1101
1102 # this is also used to get the IP of the local node
1103 sub remote_node_ip {
1104 my ($nodename, $noerr) = @_;
1105
1106 my $nodelist = $clinfo->{nodelist};
1107 if ($nodelist && $nodelist->{$nodename}) {
1108 if (my $ip = $nodelist->{$nodename}->{ip}) {
1109 return $ip if !wantarray;
1110 my $family = $nodelist->{$nodename}->{address_family};
1111 if (!$family) {
1112 $nodelist->{$nodename}->{address_family} =
1113 $family =
1114 PVE::Tools::get_host_address_family($ip);
1115 }
1116 return wantarray ? ($ip, $family) : $ip;
1117 }
1118 }
1119
1120 # fallback: try to get IP by other means
1121 return PVE::Network::get_ip_from_hostname($nodename, $noerr);
1122 }
1123
1124 sub get_local_migration_ip {
1125 my ($migration_network, $noerr) = @_;
1126
1127 my $cidr = $migration_network;
1128
1129 if (!defined($cidr)) {
1130 my $dc_conf = cfs_read_file('datacenter.cfg');
1131 $cidr = $dc_conf->{migration}->{network}
1132 if defined($dc_conf->{migration}->{network});
1133 }
1134
1135 if (defined($cidr)) {
1136 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
1137
1138 die "could not get migration ip: no IP address configured on local " .
1139 "node for network '$cidr'\n" if !$noerr && (scalar(@$ips) == 0);
1140
1141 die "could not get migration ip: multiple IP address configured for " .
1142 "network '$cidr'\n" if !$noerr && (scalar(@$ips) > 1);
1143
1144 return @$ips[0];
1145 }
1146
1147 return undef;
1148 };
1149
1150 # ssh related utility functions
1151
1152 sub ssh_merge_keys {
1153 # remove duplicate keys in $sshauthkeys
1154 # ssh-copy-id simply add keys, so the file can grow to large
1155
1156 my $data = '';
1157 if (-f $sshauthkeys) {
1158 $data = PVE::Tools::file_get_contents($sshauthkeys, 128*1024);
1159 chomp($data);
1160 }
1161
1162 my $found_backup;
1163 if (-f $rootsshauthkeysbackup) {
1164 $data .= "\n";
1165 $data .= PVE::Tools::file_get_contents($rootsshauthkeysbackup, 128*1024);
1166 chomp($data);
1167 $found_backup = 1;
1168 }
1169
1170 # always add ourself
1171 if (-f $ssh_rsa_id) {
1172 my $pub = PVE::Tools::file_get_contents($ssh_rsa_id);
1173 chomp($pub);
1174 $data .= "\n$pub\n";
1175 }
1176
1177 my $newdata = "";
1178 my $vhash = {};
1179 my @lines = split(/\n/, $data);
1180 foreach my $line (@lines) {
1181 if ($line !~ /^#/ && $line =~ m/(^|\s)ssh-(rsa|dsa)\s+(\S+)\s+\S+$/) {
1182 next if $vhash->{$3}++;
1183 }
1184 $newdata .= "$line\n";
1185 }
1186
1187 PVE::Tools::file_set_contents($sshauthkeys, $newdata, 0600);
1188
1189 if ($found_backup && -l $rootsshauthkeys) {
1190 # everything went well, so we can remove the backup
1191 unlink $rootsshauthkeysbackup;
1192 }
1193 }
1194
1195 sub setup_sshd_config {
1196 my () = @_;
1197
1198 my $conf = PVE::Tools::file_get_contents($sshd_config_fn);
1199
1200 return if $conf =~ m/^PermitRootLogin\s+yes\s*$/m;
1201
1202 if ($conf !~ s/^#?PermitRootLogin.*$/PermitRootLogin yes/m) {
1203 chomp $conf;
1204 $conf .= "\nPermitRootLogin yes\n";
1205 }
1206
1207 PVE::Tools::file_set_contents($sshd_config_fn, $conf);
1208
1209 PVE::Tools::run_command(['systemctl', 'reload-or-restart', 'sshd']);
1210 }
1211
1212 sub setup_rootsshconfig {
1213
1214 # create ssh key if it does not exist
1215 if (! -f $ssh_rsa_id) {
1216 mkdir '/root/.ssh/';
1217 system ("echo|ssh-keygen -t rsa -N '' -b 2048 -f ${ssh_rsa_id_priv}");
1218 }
1219
1220 # create ssh config if it does not exist
1221 if (! -f $rootsshconfig) {
1222 mkdir '/root/.ssh';
1223 if (my $fh = IO::File->new($rootsshconfig, O_CREAT|O_WRONLY|O_EXCL, 0640)) {
1224 # this is the default ciphers list from Debian's OpenSSH package (OpenSSH_7.4p1 Debian-10, OpenSSL 1.0.2k 26 Jan 2017)
1225 # changed order to put AES before Chacha20 (most hardware has AESNI)
1226 print $fh "Ciphers aes128-ctr,aes192-ctr,aes256-ctr,aes128-gcm\@openssh.com,aes256-gcm\@openssh.com,chacha20-poly1305\@openssh.com\n";
1227 close($fh);
1228 }
1229 }
1230 }
1231
1232 sub setup_ssh_keys {
1233
1234 mkdir $authdir;
1235
1236 my $import_ok;
1237
1238 if (! -f $sshauthkeys) {
1239 my $old;
1240 if (-f $rootsshauthkeys) {
1241 $old = PVE::Tools::file_get_contents($rootsshauthkeys, 128*1024);
1242 }
1243 if (my $fh = IO::File->new ($sshauthkeys, O_CREAT|O_WRONLY|O_EXCL, 0400)) {
1244 PVE::Tools::safe_print($sshauthkeys, $fh, $old) if $old;
1245 close($fh);
1246 $import_ok = 1;
1247 }
1248 }
1249
1250 warn "can't create shared ssh key database '$sshauthkeys'\n"
1251 if ! -f $sshauthkeys;
1252
1253 if (-f $rootsshauthkeys && ! -l $rootsshauthkeys) {
1254 if (!rename($rootsshauthkeys , $rootsshauthkeysbackup)) {
1255 warn "rename $rootsshauthkeys failed - $!\n";
1256 }
1257 }
1258
1259 if (! -l $rootsshauthkeys) {
1260 symlink $sshauthkeys, $rootsshauthkeys;
1261 }
1262
1263 if (! -l $rootsshauthkeys) {
1264 warn "can't create symlink for ssh keys '$rootsshauthkeys' -> '$sshauthkeys'\n";
1265 } else {
1266 unlink $rootsshauthkeysbackup if $import_ok;
1267 }
1268 }
1269
1270 sub ssh_unmerge_known_hosts {
1271 return if ! -l $sshglobalknownhosts;
1272
1273 my $old = '';
1274 $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024)
1275 if -f $sshknownhosts;
1276
1277 PVE::Tools::file_set_contents($sshglobalknownhosts, $old);
1278 }
1279
1280 sub ssh_merge_known_hosts {
1281 my ($nodename, $ip_address, $createLink) = @_;
1282
1283 die "no node name specified" if !$nodename;
1284 die "no ip address specified" if !$ip_address;
1285
1286 # ssh lowercases hostnames (aliases) before comparision, so we need too
1287 $nodename = lc($nodename);
1288 $ip_address = lc($ip_address);
1289
1290 mkdir $authdir;
1291
1292 if (! -f $sshknownhosts) {
1293 if (my $fh = IO::File->new($sshknownhosts, O_CREAT|O_WRONLY|O_EXCL, 0600)) {
1294 close($fh);
1295 }
1296 }
1297
1298 my $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024);
1299
1300 my $new = '';
1301
1302 if ((! -l $sshglobalknownhosts) && (-f $sshglobalknownhosts)) {
1303 $new = PVE::Tools::file_get_contents($sshglobalknownhosts, 128*1024);
1304 }
1305
1306 my $hostkey = PVE::Tools::file_get_contents($ssh_host_rsa_id);
1307 # Note: file sometimes containe emty lines at start, so we use multiline match
1308 die "can't parse $ssh_host_rsa_id" if $hostkey !~ m/^(ssh-rsa\s\S+)(\s.*)?$/m;
1309 $hostkey = $1;
1310
1311 my $data = '';
1312 my $vhash = {};
1313
1314 my $found_nodename;
1315 my $found_local_ip;
1316
1317 my $merge_line = sub {
1318 my ($line, $all) = @_;
1319
1320 return if $line =~ m/^\s*$/; # skip empty lines
1321 return if $line =~ m/^#/; # skip comments
1322
1323 if ($line =~ m/^(\S+)\s(ssh-rsa\s\S+)(\s.*)?$/) {
1324 my $key = $1;
1325 my $rsakey = $2;
1326 if (!$vhash->{$key}) {
1327 $vhash->{$key} = 1;
1328 if ($key =~ m/\|1\|([^\|\s]+)\|([^\|\s]+)$/) {
1329 my $salt = decode_base64($1);
1330 my $digest = $2;
1331 my $hmac = Digest::HMAC_SHA1->new($salt);
1332 $hmac->add($nodename);
1333 my $hd = $hmac->b64digest . '=';
1334 if ($digest eq $hd) {
1335 if ($rsakey eq $hostkey) {
1336 $found_nodename = 1;
1337 $data .= $line;
1338 }
1339 return;
1340 }
1341 $hmac = Digest::HMAC_SHA1->new($salt);
1342 $hmac->add($ip_address);
1343 $hd = $hmac->b64digest . '=';
1344 if ($digest eq $hd) {
1345 if ($rsakey eq $hostkey) {
1346 $found_local_ip = 1;
1347 $data .= $line;
1348 }
1349 return;
1350 }
1351 } else {
1352 $key = lc($key); # avoid duplicate entries, ssh compares lowercased
1353 if ($key eq $ip_address) {
1354 $found_local_ip = 1 if $rsakey eq $hostkey;
1355 } elsif ($key eq $nodename) {
1356 $found_nodename = 1 if $rsakey eq $hostkey;
1357 }
1358 }
1359 $data .= $line;
1360 }
1361 } elsif ($all) {
1362 $data .= $line;
1363 }
1364 };
1365
1366 while ($old && $old =~ s/^((.*?)(\n|$))//) {
1367 my $line = "$2\n";
1368 &$merge_line($line, 1);
1369 }
1370
1371 while ($new && $new =~ s/^((.*?)(\n|$))//) {
1372 my $line = "$2\n";
1373 &$merge_line($line);
1374 }
1375
1376 # add our own key if not already there
1377 $data .= "$nodename $hostkey\n" if !$found_nodename;
1378 $data .= "$ip_address $hostkey\n" if !$found_local_ip;
1379
1380 PVE::Tools::file_set_contents($sshknownhosts, $data);
1381
1382 return if !$createLink;
1383
1384 unlink $sshglobalknownhosts;
1385 symlink $sshknownhosts, $sshglobalknownhosts;
1386
1387 warn "can't create symlink for ssh known hosts '$sshglobalknownhosts' -> '$sshknownhosts'\n"
1388 if ! -l $sshglobalknownhosts;
1389
1390 }
1391
1392 my $migration_format = {
1393 type => {
1394 default_key => 1,
1395 type => 'string',
1396 enum => ['secure', 'insecure'],
1397 description => "Migration traffic is encrypted using an SSH tunnel by " .
1398 "default. On secure, completely private networks this can be " .
1399 "disabled to increase performance.",
1400 default => 'secure',
1401 },
1402 network => {
1403 optional => 1,
1404 type => 'string', format => 'CIDR',
1405 format_description => 'CIDR',
1406 description => "CIDR of the (sub) network that is used for migration."
1407 },
1408 };
1409
1410 my $ha_format = {
1411 shutdown_policy => {
1412 type => 'string',
1413 enum => ['freeze', 'failover', 'conditional'],
1414 description => "The policy for HA services on node shutdown. 'freeze' disables auto-recovery, 'failover' ensures recovery, 'conditional' recovers on poweroff and freezes on reboot. Running HA Services will always get stopped first on shutdown.",
1415 verbose_description => "Describes the policy for handling HA services on poweroff or reboot of a node. Freeze will always freeze services which are still located on the node on shutdown, those services won't be recovered by the HA manager. Failover will not mark the services as frozen and thus the services will get recovered to other nodes, if the shutdown node does not come up again quickly (< 1min). 'conditional' chooses automatically depending on the type of shutdown, i.e., on a reboot the service will be frozen but on a poweroff the service will stay as is, and thus get recovered after about 2 minutes.",
1416 default => 'conditional',
1417 }
1418 };
1419
1420 PVE::JSONSchema::register_format('mac-prefix', \&pve_verify_mac_prefix);
1421 sub pve_verify_mac_prefix {
1422 my ($mac_prefix, $noerr) = @_;
1423
1424 if ($mac_prefix !~ m/^[a-f0-9][02468ace](?::[a-f0-9]{2}){0,2}:?$/i) {
1425 return undef if $noerr;
1426 die "value is not a valid unicast MAC address prefix\n";
1427 }
1428 return $mac_prefix;
1429 }
1430
1431 our $u2f_format = {
1432 appid => {
1433 type => 'string',
1434 description => "U2F AppId URL override. Defaults to the origin.",
1435 format_description => 'APPID',
1436 optional => 1,
1437 },
1438 origin => {
1439 type => 'string',
1440 description => "U2F Origin override. Mostly useful for single nodes with a single URL.",
1441 format_description => 'URL',
1442 optional => 1,
1443 },
1444 };
1445
1446 my $datacenter_schema = {
1447 type => "object",
1448 additionalProperties => 0,
1449 properties => {
1450 keyboard => {
1451 optional => 1,
1452 type => 'string',
1453 description => "Default keybord layout for vnc server.",
1454 enum => PVE::Tools::kvmkeymaplist(),
1455 },
1456 language => {
1457 optional => 1,
1458 type => 'string',
1459 description => "Default GUI language.",
1460 enum => [
1461 'zh_CN',
1462 'zh_TW',
1463 'ca',
1464 'en',
1465 'eu',
1466 'fr',
1467 'de',
1468 'it',
1469 'es',
1470 'ja',
1471 'nb',
1472 'nn',
1473 'fa',
1474 'pl',
1475 'pt_BR',
1476 'ru',
1477 'sl',
1478 'sv',
1479 'tr',
1480 ],
1481 },
1482 http_proxy => {
1483 optional => 1,
1484 type => 'string',
1485 description => "Specify external http proxy which is used for downloads (example: 'http://username:password\@host:port/')",
1486 pattern => "http://.*",
1487 },
1488 migration_unsecure => {
1489 optional => 1,
1490 type => 'boolean',
1491 description => "Migration is secure using SSH tunnel by default. " .
1492 "For secure private networks you can disable it to speed up " .
1493 "migration. Deprecated, use the 'migration' property instead!",
1494 },
1495 migration => {
1496 optional => 1,
1497 type => 'string', format => $migration_format,
1498 description => "For cluster wide migration settings.",
1499 },
1500 console => {
1501 optional => 1,
1502 type => 'string',
1503 description => "Select the default Console viewer. You can either use the builtin java applet (VNC; deprecated and maps to html5), an external virt-viewer comtatible application (SPICE), an HTML5 based vnc viewer (noVNC), or an HTML5 based console client (xtermjs). If the selected viewer is not available (e.g. SPICE not activated for the VM), the fallback is noVNC.",
1504 enum => ['applet', 'vv', 'html5', 'xtermjs'],
1505 },
1506 email_from => {
1507 optional => 1,
1508 type => 'string',
1509 format => 'email-opt',
1510 description => "Specify email address to send notification from (default is root@\$hostname)",
1511 },
1512 max_workers => {
1513 optional => 1,
1514 type => 'integer',
1515 minimum => 1,
1516 description => "Defines how many workers (per node) are maximal started ".
1517 " on actions like 'stopall VMs' or task from the ha-manager.",
1518 },
1519 fencing => {
1520 optional => 1,
1521 type => 'string',
1522 default => 'watchdog',
1523 enum => [ 'watchdog', 'hardware', 'both' ],
1524 description => "Set the fencing mode of the HA cluster. Hardware mode " .
1525 "needs a valid configuration of fence devices in /etc/pve/ha/fence.cfg." .
1526 " With both all two modes are used." .
1527 "\n\nWARNING: 'hardware' and 'both' are EXPERIMENTAL & WIP",
1528 },
1529 ha => {
1530 optional => 1,
1531 type => 'string', format => $ha_format,
1532 description => "Cluster wide HA settings.",
1533 },
1534 mac_prefix => {
1535 optional => 1,
1536 type => 'string',
1537 format => 'mac-prefix',
1538 description => 'Prefix for autogenerated MAC addresses.',
1539 },
1540 bwlimit => PVE::JSONSchema::get_standard_option('bwlimit'),
1541 u2f => {
1542 optional => 1,
1543 type => 'string',
1544 format => $u2f_format,
1545 description => 'u2f',
1546 },
1547 },
1548 };
1549
1550 # make schema accessible from outside (for documentation)
1551 sub get_datacenter_schema { return $datacenter_schema };
1552
1553 sub parse_datacenter_config {
1554 my ($filename, $raw) = @_;
1555
1556 my $res = PVE::JSONSchema::parse_config($datacenter_schema, $filename, $raw // '');
1557
1558 if (my $migration = $res->{migration}) {
1559 $res->{migration} = PVE::JSONSchema::parse_property_string($migration_format, $migration);
1560 }
1561
1562 if (my $ha = $res->{ha}) {
1563 $res->{ha} = PVE::JSONSchema::parse_property_string($ha_format, $ha);
1564 }
1565
1566 # for backwards compatibility only, new migration property has precedence
1567 if (defined($res->{migration_unsecure})) {
1568 if (defined($res->{migration}->{type})) {
1569 warn "deprecated setting 'migration_unsecure' and new 'migration: type' " .
1570 "set at same time! Ignore 'migration_unsecure'\n";
1571 } else {
1572 $res->{migration}->{type} = ($res->{migration_unsecure}) ? 'insecure' : 'secure';
1573 }
1574 }
1575
1576 # for backwards compatibility only, applet maps to html5
1577 if (defined($res->{console}) && $res->{console} eq 'applet') {
1578 $res->{console} = 'html5';
1579 }
1580
1581 return $res;
1582 }
1583
1584 sub write_datacenter_config {
1585 my ($filename, $cfg) = @_;
1586
1587 # map deprecated setting to new one
1588 if (defined($cfg->{migration_unsecure}) && !defined($cfg->{migration})) {
1589 my $migration_unsecure = delete $cfg->{migration_unsecure};
1590 $cfg->{migration}->{type} = ($migration_unsecure) ? 'insecure' : 'secure';
1591 }
1592
1593 # map deprecated applet setting to html5
1594 if (defined($cfg->{console}) && $cfg->{console} eq 'applet') {
1595 $cfg->{console} = 'html5';
1596 }
1597
1598 if (ref($cfg->{migration})) {
1599 my $migration = $cfg->{migration};
1600 $cfg->{migration} = PVE::JSONSchema::print_property_string($migration, $migration_format);
1601 }
1602
1603 if (ref($cfg->{ha})) {
1604 my $ha = $cfg->{ha};
1605 $cfg->{ha} = PVE::JSONSchema::print_property_string($ha, $ha_format);
1606 }
1607
1608 return PVE::JSONSchema::dump_config($datacenter_schema, $filename, $cfg);
1609 }
1610
1611 cfs_register_file('datacenter.cfg',
1612 \&parse_datacenter_config,
1613 \&write_datacenter_config);
1614
1615 # X509 Certificate cache helper
1616
1617 my $cert_cache_nodes = {};
1618 my $cert_cache_timestamp = time();
1619 my $cert_cache_fingerprints = {};
1620
1621 sub update_cert_cache {
1622 my ($update_node, $clear) = @_;
1623
1624 syslog('info', "Clearing outdated entries from certificate cache")
1625 if $clear;
1626
1627 $cert_cache_timestamp = time() if !defined($update_node);
1628
1629 my $node_list = defined($update_node) ?
1630 [ $update_node ] : [ keys %$cert_cache_nodes ];
1631
1632 foreach my $node (@$node_list) {
1633 my $clear_old = sub {
1634 if (my $old_fp = $cert_cache_nodes->{$node}) {
1635 # distrust old fingerprint
1636 delete $cert_cache_fingerprints->{$old_fp};
1637 # ensure reload on next proxied request
1638 delete $cert_cache_nodes->{$node};
1639 }
1640 };
1641
1642 my $fp = eval { get_node_fingerprint($node) };
1643 if (my $err = $@) {
1644 warn "$err\n";
1645 &$clear_old() if $clear;
1646 next;
1647 }
1648
1649 my $old_fp = $cert_cache_nodes->{$node};
1650 $cert_cache_fingerprints->{$fp} = 1;
1651 $cert_cache_nodes->{$node} = $fp;
1652
1653 if (defined($old_fp) && $fp ne $old_fp) {
1654 delete $cert_cache_fingerprints->{$old_fp};
1655 }
1656 }
1657 }
1658
1659 # load and cache cert fingerprint once
1660 sub initialize_cert_cache {
1661 my ($node) = @_;
1662
1663 update_cert_cache($node)
1664 if defined($node) && !defined($cert_cache_nodes->{$node});
1665 }
1666
1667 sub read_ssl_cert_fingerprint {
1668 my ($cert_path) = @_;
1669
1670 my $bio = Net::SSLeay::BIO_new_file($cert_path, 'r')
1671 or die "unable to read '$cert_path' - $!\n";
1672
1673 my $cert = Net::SSLeay::PEM_read_bio_X509($bio);
1674 Net::SSLeay::BIO_free($bio);
1675
1676 die "unable to read certificate from '$cert_path'\n" if !$cert;
1677
1678 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1679 Net::SSLeay::X509_free($cert);
1680
1681 die "unable to get fingerprint for '$cert_path' - got empty value\n"
1682 if !defined($fp) || $fp eq '';
1683
1684 return $fp;
1685 }
1686
1687 sub get_node_fingerprint {
1688 my ($node) = @_;
1689
1690 my $cert_path = "/etc/pve/nodes/$node/pve-ssl.pem";
1691 my $custom_cert_path = "/etc/pve/nodes/$node/pveproxy-ssl.pem";
1692
1693 $cert_path = $custom_cert_path if -f $custom_cert_path;
1694
1695 return read_ssl_cert_fingerprint($cert_path);
1696 }
1697
1698
1699 sub check_cert_fingerprint {
1700 my ($cert) = @_;
1701
1702 # clear cache every 30 minutes at least
1703 update_cert_cache(undef, 1) if time() - $cert_cache_timestamp >= 60*30;
1704
1705 # get fingerprint of server certificate
1706 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1707 return 0 if !defined($fp) || $fp eq ''; # error
1708
1709 my $check = sub {
1710 for my $expected (keys %$cert_cache_fingerprints) {
1711 return 1 if $fp eq $expected;
1712 }
1713 return 0;
1714 };
1715
1716 return 1 if &$check();
1717
1718 # clear cache and retry at most once every minute
1719 if (time() - $cert_cache_timestamp >= 60) {
1720 syslog ('info', "Could not verify remote node certificate '$fp' with list of pinned certificates, refreshing cache");
1721 update_cert_cache();
1722 return &$check();
1723 }
1724
1725 return 0;
1726 }
1727
1728 # bash completion helpers
1729
1730 sub complete_next_vmid {
1731
1732 my $vmlist = get_vmlist() || {};
1733 my $idlist = $vmlist->{ids} || {};
1734
1735 for (my $i = 100; $i < 10000; $i++) {
1736 return [$i] if !defined($idlist->{$i});
1737 }
1738
1739 return [];
1740 }
1741
1742 sub complete_vmid {
1743
1744 my $vmlist = get_vmlist();
1745 my $ids = $vmlist->{ids} || {};
1746
1747 return [ keys %$ids ];
1748 }
1749
1750 sub complete_local_vmid {
1751
1752 my $vmlist = get_vmlist();
1753 my $ids = $vmlist->{ids} || {};
1754
1755 my $nodename = PVE::INotify::nodename();
1756
1757 my $res = [];
1758 foreach my $vmid (keys %$ids) {
1759 my $d = $ids->{$vmid};
1760 next if !$d->{node} || $d->{node} ne $nodename;
1761 push @$res, $vmid;
1762 }
1763
1764 return $res;
1765 }
1766
1767 sub complete_migration_target {
1768
1769 my $res = [];
1770
1771 my $nodename = PVE::INotify::nodename();
1772
1773 my $nodelist = get_nodelist();
1774 foreach my $node (@$nodelist) {
1775 next if $node eq $nodename;
1776 push @$res, $node;
1777 }
1778
1779 return $res;
1780 }
1781
1782 sub get_ssh_info {
1783 my ($node, $network_cidr) = @_;
1784
1785 my $ip;
1786 if (defined($network_cidr)) {
1787 # Use mtunnel via to get the remote node's ip inside $network_cidr.
1788 # This goes over the regular network (iow. uses get_ssh_info() with
1789 # $network_cidr undefined.
1790 # FIXME: Use the REST API client for this after creating an API entry
1791 # for get_migration_ip.
1792 my $default_remote = get_ssh_info($node, undef);
1793 my $default_ssh = ssh_info_to_command($default_remote);
1794 my $cmd =[@$default_ssh, 'pvecm', 'mtunnel',
1795 '-migration_network', $network_cidr,
1796 '-get_migration_ip'
1797 ];
1798 PVE::Tools::run_command($cmd, outfunc => sub {
1799 my ($line) = @_;
1800 chomp $line;
1801 die "internal error: unexpected output from mtunnel\n"
1802 if defined($ip);
1803 if ($line =~ /^ip: '(.*)'$/) {
1804 $ip = $1;
1805 } else {
1806 die "internal error: bad output from mtunnel\n"
1807 if defined($ip);
1808 }
1809 });
1810 die "failed to get ip for node '$node' in network '$network_cidr'\n"
1811 if !defined($ip);
1812 } else {
1813 $ip = remote_node_ip($node);
1814 }
1815
1816 return {
1817 ip => $ip,
1818 name => $node,
1819 network => $network_cidr,
1820 };
1821 }
1822
1823 sub ssh_info_to_command_base {
1824 my ($info, @extra_options) = @_;
1825 return [
1826 '/usr/bin/ssh',
1827 '-e', 'none',
1828 '-o', 'BatchMode=yes',
1829 '-o', 'HostKeyAlias='.$info->{name},
1830 @extra_options
1831 ];
1832 }
1833
1834 sub ssh_info_to_command {
1835 my ($info, @extra_options) = @_;
1836 my $cmd = ssh_info_to_command_base($info, @extra_options);
1837 push @$cmd, "root\@$info->{ip}";
1838 return $cmd;
1839 }
1840
1841 sub assert_joinable {
1842 my ($local_addr, $ring0_addr, $ring1_addr, $force) = @_;
1843
1844 my $errors = '';
1845 my $error = sub { $errors .= "* $_[0]\n"; };
1846
1847 if (-f $authfile) {
1848 $error->("authentication key '$authfile' already exists");
1849 }
1850
1851 if (-f $clusterconf) {
1852 $error->("cluster config '$clusterconf' already exists");
1853 }
1854
1855 my $vmlist = get_vmlist();
1856 if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
1857 $error->("this host already contains virtual guests");
1858 }
1859
1860 if (run_command(['corosync-quorumtool', '-l'], noerr => 1, quiet => 1) == 0) {
1861 $error->("corosync is already running, is this node already in a cluster?!");
1862 }
1863
1864 # check if corosync ring IPs are configured on the current nodes interfaces
1865 my $check_ip = sub {
1866 my $ip = shift // return;
1867 my $logid = shift;
1868 if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
1869 my $host = $ip;
1870 eval { $ip = PVE::Network::get_ip_from_hostname($host); };
1871 if ($@) {
1872 $error->("$logid: cannot use '$host': $@\n") ;
1873 return;
1874 }
1875 }
1876
1877 my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
1878 my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
1879
1880 $error->("$logid: cannot use IP '$ip', it must be configured exactly once on local node!\n")
1881 if (scalar(@$configured_ips) != 1);
1882 };
1883
1884 $check_ip->($local_addr, 'local node address');
1885 $check_ip->($ring0_addr, 'ring0');
1886 $check_ip->($ring1_addr, 'ring1');
1887
1888 if ($errors) {
1889 warn "detected the following error(s):\n$errors";
1890 die "Check if node may join a cluster failed!\n" if !$force;
1891 }
1892 }
1893
1894 # NOTE: filesystem must be offline here, no DB changes allowed
1895 my $backup_cfs_database = sub {
1896 my ($dbfile) = @_;
1897
1898 mkdir $dbbackupdir;
1899
1900 my $ctime = time();
1901 my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
1902
1903 print "backup old database to '$backup_fn'\n";
1904
1905 my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \ ">${backup_fn}"] ];
1906 run_command($cmd, 'errmsg' => "cannot backup old database\n");
1907
1908 my $maxfiles = 10; # purge older backup
1909 my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql.gz> ];
1910
1911 if ((my $count = scalar(@$backups)) > $maxfiles) {
1912 foreach my $f (@$backups[$maxfiles..$count-1]) {
1913 next if $f !~ m/^(\S+)$/; # untaint
1914 print "delete old backup '$1'\n";
1915 unlink $1;
1916 }
1917 }
1918 };
1919
1920 sub join {
1921 my ($param) = @_;
1922
1923 my $nodename = PVE::INotify::nodename();
1924 my $local_ip_address = remote_node_ip($nodename);
1925
1926 my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
1927 # check if we can join with the given parameters and current node state
1928 assert_joinable($local_ip_address, $ring0_addr, $ring1_addr, $param->{force});
1929
1930 setup_sshd_config();
1931 setup_rootsshconfig();
1932 setup_ssh_keys();
1933
1934 # make sure known_hosts is on local filesystem
1935 ssh_unmerge_known_hosts();
1936
1937 my $host = $param->{hostname};
1938 my $conn_args = {
1939 username => 'root@pam',
1940 password => $param->{password},
1941 cookie_name => 'PVEAuthCookie',
1942 protocol => 'https',
1943 host => $host,
1944 port => 8006,
1945 };
1946
1947 if (my $fp = $param->{fingerprint}) {
1948 $conn_args->{cached_fingerprints} = { uc($fp) => 1 };
1949 } else {
1950 # API schema ensures that we can only get here from CLI handler
1951 $conn_args->{manual_verification} = 1;
1952 }
1953
1954 print "Establishing API connection with host '$host'\n";
1955
1956 my $conn = PVE::APIClient::LWP->new(%$conn_args);
1957 $conn->login();
1958
1959 # login raises an exception on failure, so if we get here we're good
1960 print "Login succeeded.\n";
1961
1962 my $args = {};
1963 $args->{force} = $param->{force} if defined($param->{force});
1964 $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
1965 $args->{votes} = $param->{votes} if defined($param->{votes});
1966 $args->{ring0_addr} = $ring0_addr // $local_ip_address;
1967 $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
1968
1969 print "Request addition of this node\n";
1970 my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
1971
1972 print "Join request OK, finishing setup locally\n";
1973
1974 # added successfuly - now prepare local node
1975 finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
1976 }
1977
1978 sub finish_join {
1979 my ($nodename, $corosync_conf, $corosync_authkey) = @_;
1980
1981 mkdir "$localclusterdir";
1982 PVE::Tools::file_set_contents($authfile, $corosync_authkey);
1983 PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
1984
1985 print "stopping pve-cluster service\n";
1986 my $cmd = ['systemctl', 'stop', 'pve-cluster'];
1987 run_command($cmd, errmsg => "can't stop pve-cluster service");
1988
1989 $backup_cfs_database->($dbfile);
1990 unlink $dbfile;
1991
1992 $cmd = ['systemctl', 'start', 'corosync', 'pve-cluster'];
1993 run_command($cmd, errmsg => "starting pve-cluster failed");
1994
1995 # wait for quorum
1996 my $printqmsg = 1;
1997 while (!check_cfs_quorum(1)) {
1998 if ($printqmsg) {
1999 print "waiting for quorum...";
2000 STDOUT->flush();
2001 $printqmsg = 0;
2002 }
2003 sleep(1);
2004 }
2005 print "OK\n" if !$printqmsg;
2006
2007 updatecerts_and_ssh(1);
2008
2009 print "generated new node certificate, restart pveproxy and pvedaemon services\n";
2010 run_command(['systemctl', 'reload-or-restart', 'pvedaemon', 'pveproxy']);
2011
2012 print "successfully added node '$nodename' to cluster.\n";
2013 }
2014
2015 sub updatecerts_and_ssh {
2016 my ($force_new_cert, $silent) = @_;
2017
2018 my $p = sub { print "$_[0]\n" if !$silent };
2019
2020 setup_rootsshconfig();
2021
2022 gen_pve_vzdump_symlink();
2023
2024 if (!check_cfs_quorum(1)) {
2025 return undef if $silent;
2026 die "no quorum - unable to update files\n";
2027 }
2028
2029 setup_ssh_keys();
2030
2031 my $nodename = PVE::INotify::nodename();
2032 my $local_ip_address = remote_node_ip($nodename);
2033
2034 $p->("(re)generate node files");
2035 $p->("generate new node certificate") if $force_new_cert;
2036 gen_pve_node_files($nodename, $local_ip_address, $force_new_cert);
2037
2038 $p->("merge authorized SSH keys and known hosts");
2039 ssh_merge_keys();
2040 ssh_merge_known_hosts($nodename, $local_ip_address, 1);
2041 gen_pve_vzdump_files();
2042 }
2043
2044 1;