]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/Cluster.pm
follouwp: fix and add comments + minor code cleanup
[pve-cluster.git] / data / PVE / Cluster.pm
1 package PVE::Cluster;
2
3 use strict;
4 use warnings;
5
6 use Digest::HMAC_SHA1;
7 use Digest::SHA;
8 use Encode;
9 use File::stat qw();
10 use IO::File;
11 use JSON;
12 use MIME::Base64;
13 use Net::SSLeay;
14 use POSIX qw(EEXIST ENOENT);
15 use RRDs;
16 use Socket;
17 use Storable qw(dclone);
18 use UUID;
19
20 use PVE::INotify;
21 use PVE::IPCC;
22 use PVE::JSONSchema;
23 use PVE::Network;
24 use PVE::SafeSyslog;
25 use PVE::Tools qw(run_command);
26
27 use PVE::Cluster::IPCConst;
28
29 use base 'Exporter';
30
31 our @EXPORT_OK = qw(
32 cfs_read_file
33 cfs_write_file
34 cfs_register_file
35 cfs_lock_file);
36
37 use Data::Dumper; # fixme: remove
38
39 # x509 certificate utils
40
41 my $basedir = "/etc/pve";
42 my $authdir = "$basedir/priv";
43 my $lockdir = "/etc/pve/priv/lock";
44
45 # cfs and corosync files
46 my $dbfile = "/var/lib/pve-cluster/config.db";
47 my $dbbackupdir = "/var/lib/pve-cluster/backup";
48 my $localclusterdir = "/etc/corosync";
49 my $localclusterconf = "$localclusterdir/corosync.conf";
50 my $authfile = "$localclusterdir/authkey";
51 my $clusterconf = "$basedir/corosync.conf";
52
53 my $authprivkeyfn = "$authdir/authkey.key";
54 my $authpubkeyfn = "$basedir/authkey.pub";
55 my $pveca_key_fn = "$authdir/pve-root-ca.key";
56 my $pveca_srl_fn = "$authdir/pve-root-ca.srl";
57 my $pveca_cert_fn = "$basedir/pve-root-ca.pem";
58 # this is just a secret accessable by the web browser
59 # and is used for CSRF prevention
60 my $pvewww_key_fn = "$basedir/pve-www.key";
61
62 # ssh related files
63 my $ssh_rsa_id_priv = "/root/.ssh/id_rsa";
64 my $ssh_rsa_id = "/root/.ssh/id_rsa.pub";
65 my $ssh_host_rsa_id = "/etc/ssh/ssh_host_rsa_key.pub";
66 my $sshglobalknownhosts = "/etc/ssh/ssh_known_hosts";
67 my $sshknownhosts = "/etc/pve/priv/known_hosts";
68 my $sshauthkeys = "/etc/pve/priv/authorized_keys";
69 my $sshd_config_fn = "/etc/ssh/sshd_config";
70 my $rootsshauthkeys = "/root/.ssh/authorized_keys";
71 my $rootsshauthkeysbackup = "${rootsshauthkeys}.org";
72 my $rootsshconfig = "/root/.ssh/config";
73
74 # this is just a readonly copy, the relevant one is in status.c from pmxcfs
75 # observed files are the one we can get directly through IPCC, they are cached
76 # using a computed version and only those can be used by the cfs_*_file methods
77 my $observed = {
78 'vzdump.cron' => 1,
79 'storage.cfg' => 1,
80 'datacenter.cfg' => 1,
81 'replication.cfg' => 1,
82 'corosync.conf' => 1,
83 'corosync.conf.new' => 1,
84 'user.cfg' => 1,
85 'domains.cfg' => 1,
86 'priv/shadow.cfg' => 1,
87 'priv/tfa.cfg' => 1,
88 '/qemu-server/' => 1,
89 '/openvz/' => 1,
90 '/lxc/' => 1,
91 'ha/crm_commands' => 1,
92 'ha/manager_status' => 1,
93 'ha/resources.cfg' => 1,
94 'ha/groups.cfg' => 1,
95 'ha/fence.cfg' => 1,
96 'status.cfg' => 1,
97 'ceph.conf' => 1,
98 };
99
100 # only write output if something fails
101 sub run_silent_cmd {
102 my ($cmd) = @_;
103
104 my $outbuf = '';
105 my $record = sub { $outbuf .= shift . "\n"; };
106
107 eval { run_command($cmd, outfunc => $record, errfunc => $record) };
108
109 if (my $err = $@) {
110 print STDERR $outbuf;
111 die $err;
112 }
113 }
114
115 sub check_cfs_quorum {
116 my ($noerr) = @_;
117
118 # note: -w filename always return 1 for root, so wee need
119 # to use File::lstat here
120 my $st = File::stat::lstat("$basedir/local");
121 my $quorate = ($st && (($st->mode & 0200) != 0));
122
123 die "cluster not ready - no quorum?\n" if !$quorate && !$noerr;
124
125 return $quorate;
126 }
127
128 sub check_cfs_is_mounted {
129 my ($noerr) = @_;
130
131 my $res = -l "$basedir/local";
132
133 die "pve configuration filesystem not mounted\n"
134 if !$res && !$noerr;
135
136 return $res;
137 }
138
139 sub gen_local_dirs {
140 my ($nodename) = @_;
141
142 check_cfs_is_mounted();
143
144 my @required_dirs = (
145 "$basedir/priv",
146 "$basedir/nodes",
147 "$basedir/nodes/$nodename",
148 "$basedir/nodes/$nodename/lxc",
149 "$basedir/nodes/$nodename/qemu-server",
150 "$basedir/nodes/$nodename/openvz",
151 "$basedir/nodes/$nodename/priv");
152
153 foreach my $dir (@required_dirs) {
154 if (! -d $dir) {
155 mkdir($dir) || $! == EEXIST || die "unable to create directory '$dir' - $!\n";
156 }
157 }
158 }
159
160 sub gen_auth_key {
161
162 return if -f "$authprivkeyfn";
163
164 check_cfs_is_mounted();
165
166 cfs_lock_authkey(undef, sub {
167 mkdir $authdir || $! == EEXIST || die "unable to create dir '$authdir' - $!\n";
168
169 run_silent_cmd(['openssl', 'genrsa', '-out', $authprivkeyfn, '2048']);
170
171 run_silent_cmd(['openssl', 'rsa', '-in', $authprivkeyfn, '-pubout', '-out', $authpubkeyfn]);
172 });
173
174 die "$@\n" if $@;
175 }
176
177 sub gen_pveca_key {
178
179 return if -f $pveca_key_fn;
180
181 eval {
182 run_silent_cmd(['openssl', 'genrsa', '-out', $pveca_key_fn, '4096']);
183 };
184
185 die "unable to generate pve ca key:\n$@" if $@;
186 }
187
188 sub gen_pveca_cert {
189
190 if (-f $pveca_key_fn && -f $pveca_cert_fn) {
191 return 0;
192 }
193
194 gen_pveca_key();
195
196 # we try to generate an unique 'subject' to avoid browser problems
197 # (reused serial numbers, ..)
198 my $uuid;
199 UUID::generate($uuid);
200 my $uuid_str;
201 UUID::unparse($uuid, $uuid_str);
202
203 eval {
204 # wrap openssl with faketime to prevent bug #904
205 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'req', '-batch',
206 '-days', '3650', '-new', '-x509', '-nodes', '-key',
207 $pveca_key_fn, '-out', $pveca_cert_fn, '-subj',
208 "/CN=Proxmox Virtual Environment/OU=$uuid_str/O=PVE Cluster Manager CA/"]);
209 };
210
211 die "generating pve root certificate failed:\n$@" if $@;
212
213 return 1;
214 }
215
216 sub gen_pve_ssl_key {
217 my ($nodename) = @_;
218
219 die "no node name specified" if !$nodename;
220
221 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
222
223 return if -f $pvessl_key_fn;
224
225 eval {
226 run_silent_cmd(['openssl', 'genrsa', '-out', $pvessl_key_fn, '2048']);
227 };
228
229 die "unable to generate pve ssl key for node '$nodename':\n$@" if $@;
230 }
231
232 sub gen_pve_www_key {
233
234 return if -f $pvewww_key_fn;
235
236 eval {
237 run_silent_cmd(['openssl', 'genrsa', '-out', $pvewww_key_fn, '2048']);
238 };
239
240 die "unable to generate pve www key:\n$@" if $@;
241 }
242
243 sub update_serial {
244 my ($serial) = @_;
245
246 PVE::Tools::file_set_contents($pveca_srl_fn, $serial);
247 }
248
249 sub gen_pve_ssl_cert {
250 my ($force, $nodename, $ip) = @_;
251
252 die "no node name specified" if !$nodename;
253 die "no IP specified" if !$ip;
254
255 my $pvessl_cert_fn = "$basedir/nodes/$nodename/pve-ssl.pem";
256
257 return if !$force && -f $pvessl_cert_fn;
258
259 my $names = "IP:127.0.0.1,IP:::1,DNS:localhost";
260
261 my $rc = PVE::INotify::read_file('resolvconf');
262
263 $names .= ",IP:$ip";
264
265 my $fqdn = $nodename;
266
267 $names .= ",DNS:$nodename";
268
269 if ($rc && $rc->{search}) {
270 $fqdn = $nodename . "." . $rc->{search};
271 $names .= ",DNS:$fqdn";
272 }
273
274 my $sslconf = <<__EOD;
275 RANDFILE = /root/.rnd
276 extensions = v3_req
277
278 [ req ]
279 default_bits = 2048
280 distinguished_name = req_distinguished_name
281 req_extensions = v3_req
282 prompt = no
283 string_mask = nombstr
284
285 [ req_distinguished_name ]
286 organizationalUnitName = PVE Cluster Node
287 organizationName = Proxmox Virtual Environment
288 commonName = $fqdn
289
290 [ v3_req ]
291 basicConstraints = CA:FALSE
292 extendedKeyUsage = serverAuth
293 subjectAltName = $names
294 __EOD
295
296 my $cfgfn = "/tmp/pvesslconf-$$.tmp";
297 my $fh = IO::File->new ($cfgfn, "w");
298 print $fh $sslconf;
299 close ($fh);
300
301 my $reqfn = "/tmp/pvecertreq-$$.tmp";
302 unlink $reqfn;
303
304 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
305 eval {
306 run_silent_cmd(['openssl', 'req', '-batch', '-new', '-config', $cfgfn,
307 '-key', $pvessl_key_fn, '-out', $reqfn]);
308 };
309
310 if (my $err = $@) {
311 unlink $reqfn;
312 unlink $cfgfn;
313 die "unable to generate pve certificate request:\n$err";
314 }
315
316 update_serial("0000000000000000") if ! -f $pveca_srl_fn;
317
318 eval {
319 # wrap openssl with faketime to prevent bug #904
320 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'x509', '-req',
321 '-in', $reqfn, '-days', '3650', '-out', $pvessl_cert_fn,
322 '-CAkey', $pveca_key_fn, '-CA', $pveca_cert_fn,
323 '-CAserial', $pveca_srl_fn, '-extfile', $cfgfn]);
324 };
325
326 if (my $err = $@) {
327 unlink $reqfn;
328 unlink $cfgfn;
329 die "unable to generate pve ssl certificate:\n$err";
330 }
331
332 unlink $cfgfn;
333 unlink $reqfn;
334 }
335
336 sub gen_pve_node_files {
337 my ($nodename, $ip, $opt_force) = @_;
338
339 gen_local_dirs($nodename);
340
341 gen_auth_key();
342
343 # make sure we have a (cluster wide) secret
344 # for CSRFR prevention
345 gen_pve_www_key();
346
347 # make sure we have a (per node) private key
348 gen_pve_ssl_key($nodename);
349
350 # make sure we have a CA
351 my $force = gen_pveca_cert();
352
353 $force = 1 if $opt_force;
354
355 gen_pve_ssl_cert($force, $nodename, $ip);
356 }
357
358 my $vzdump_cron_dummy = <<__EOD;
359 # cluster wide vzdump cron schedule
360 # Atomatically generated file - do not edit
361
362 PATH="/usr/sbin:/usr/bin:/sbin:/bin"
363
364 __EOD
365
366 sub gen_pve_vzdump_symlink {
367
368 my $filename = "/etc/pve/vzdump.cron";
369
370 my $link_fn = "/etc/cron.d/vzdump";
371
372 if ((-f $filename) && (! -l $link_fn)) {
373 rename($link_fn, "/root/etc_cron_vzdump.org"); # make backup if file exists
374 symlink($filename, $link_fn);
375 }
376 }
377
378 sub gen_pve_vzdump_files {
379
380 my $filename = "/etc/pve/vzdump.cron";
381
382 PVE::Tools::file_set_contents($filename, $vzdump_cron_dummy)
383 if ! -f $filename;
384
385 gen_pve_vzdump_symlink();
386 };
387
388 my $versions = {};
389 my $vmlist = {};
390 my $clinfo = {};
391
392 my $ipcc_send_rec = sub {
393 my ($msgid, $data) = @_;
394
395 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
396
397 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
398
399 return $res;
400 };
401
402 my $ipcc_send_rec_json = sub {
403 my ($msgid, $data) = @_;
404
405 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
406
407 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
408
409 return decode_json($res);
410 };
411
412 my $ipcc_get_config = sub {
413 my ($path) = @_;
414
415 my $bindata = pack "Z*", $path;
416 my $res = PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_CONFIG, $bindata);
417 if (!defined($res)) {
418 if ($! != 0) {
419 return undef if $! == ENOENT;
420 die "$!\n";
421 }
422 return '';
423 }
424
425 return $res;
426 };
427
428 my $ipcc_get_status = sub {
429 my ($name, $nodename) = @_;
430
431 my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
432 return PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_STATUS, $bindata);
433 };
434
435 my $ipcc_remove_status = sub {
436 my ($name) = @_;
437 # we just omit the data payload, pmxcfs takes this as hint and removes this
438 # key from the status hashtable
439 my $bindata = pack "Z[256]", $name;
440 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
441 };
442
443 my $ipcc_update_status = sub {
444 my ($name, $data) = @_;
445
446 my $raw = ref($data) ? encode_json($data) : $data;
447 # update status
448 my $bindata = pack "Z[256]Z*", $name, $raw;
449
450 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
451 };
452
453 my $ipcc_log = sub {
454 my ($priority, $ident, $tag, $msg) = @_;
455
456 my $bindata = pack "CCCZ*Z*Z*", $priority, bytes::length($ident) + 1,
457 bytes::length($tag) + 1, $ident, $tag, $msg;
458
459 return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG, $bindata);
460 };
461
462 my $ipcc_get_cluster_log = sub {
463 my ($user, $max) = @_;
464
465 $max = 0 if !defined($max);
466
467 my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
468 return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG, $bindata);
469 };
470
471 my $ccache = {};
472
473 sub cfs_update {
474 my ($fail) = @_;
475 eval {
476 my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION);
477 #warn "GOT1: " . Dumper($res);
478 die "no starttime\n" if !$res->{starttime};
479
480 if (!$res->{starttime} || !$versions->{starttime} ||
481 $res->{starttime} != $versions->{starttime}) {
482 #print "detected changed starttime\n";
483 $vmlist = {};
484 $clinfo = {};
485 $ccache = {};
486 }
487
488 $versions = $res;
489 };
490 my $err = $@;
491 if ($err) {
492 $versions = {};
493 $vmlist = {};
494 $clinfo = {};
495 $ccache = {};
496 die $err if $fail;
497 warn $err;
498 }
499
500 eval {
501 if (!$clinfo->{version} || $clinfo->{version} != $versions->{clinfo}) {
502 #warn "detected new clinfo\n";
503 $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO);
504 }
505 };
506 $err = $@;
507 if ($err) {
508 $clinfo = {};
509 die $err if $fail;
510 warn $err;
511 }
512
513 eval {
514 if (!$vmlist->{version} || $vmlist->{version} != $versions->{vmlist}) {
515 #warn "detected new vmlist1\n";
516 $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST);
517 }
518 };
519 $err = $@;
520 if ($err) {
521 $vmlist = {};
522 die $err if $fail;
523 warn $err;
524 }
525 }
526
527 sub get_vmlist {
528 return $vmlist;
529 }
530
531 sub get_clinfo {
532 return $clinfo;
533 }
534
535 sub get_members {
536 return $clinfo->{nodelist};
537 }
538
539 sub get_nodelist {
540 my $nodelist = $clinfo->{nodelist};
541
542 my $nodename = PVE::INotify::nodename();
543
544 if (!$nodelist || !$nodelist->{$nodename}) {
545 return [ $nodename ];
546 }
547
548 return [ keys %$nodelist ];
549 }
550
551 # only stored in a in-memory hashtable inside pmxcfs, local data is gone after
552 # a restart (of pmxcfs or the node), peer data is still available then
553 # best used for status data, like running (ceph) services, package versions, ...
554 sub broadcast_node_kv {
555 my ($key, $data) = @_;
556
557 if (!defined($data)) {
558 eval {
559 $ipcc_remove_status->("kv/$key");
560 };
561 } else {
562 die "cannot send a reference\n" if ref($data);
563 my $size = length($data);
564 die "data for '$key' too big\n" if $size >= (32 * 1024); # limit from pmxfs
565
566 eval {
567 $ipcc_update_status->("kv/$key", $data);
568 };
569 }
570
571 warn $@ if $@;
572 }
573
574 # nodename is optional
575 sub get_node_kv {
576 my ($key, $nodename) = @_;
577
578 my $res = {};
579 my $get_node_data = sub {
580 my ($node) = @_;
581 my $raw = $ipcc_get_status->("kv/$key", $node);
582 $res->{$node} = $raw if $raw;
583 };
584
585 if ($nodename) {
586 $get_node_data->($nodename);
587 } else {
588 my $nodelist = get_nodelist();
589
590 foreach my $node (@$nodelist) {
591 $get_node_data->($node);
592 }
593 }
594
595 return $res;
596 }
597
598 # $data must be a chronological descending ordered array of tasks
599 sub broadcast_tasklist {
600 my ($data) = @_;
601
602 # the serialized list may not get bigger than 32kb (CFS_MAX_STATUS_SIZE
603 # from pmxcfs) - drop older items until we satisfy this constraint
604 my $size = length(encode_json($data));
605 while ($size >= (32 * 1024)) {
606 pop @$data;
607 $size = length(encode_json($data));
608 }
609
610 eval {
611 &$ipcc_update_status("tasklist", $data);
612 };
613
614 warn $@ if $@;
615 }
616
617 my $tasklistcache = {};
618
619 sub get_tasklist {
620 my ($nodename) = @_;
621
622 my $kvstore = $versions->{kvstore} || {};
623
624 my $nodelist = get_nodelist();
625
626 my $res = [];
627 foreach my $node (@$nodelist) {
628 next if $nodename && ($nodename ne $node);
629 eval {
630 my $ver = $kvstore->{$node}->{tasklist} if $kvstore->{$node};
631 my $cd = $tasklistcache->{$node};
632 if (!$cd || !$ver || !$cd->{version} ||
633 ($cd->{version} != $ver)) {
634 my $raw = &$ipcc_get_status("tasklist", $node) || '[]';
635 my $data = decode_json($raw);
636 push @$res, @$data;
637 $cd = $tasklistcache->{$node} = {
638 data => $data,
639 version => $ver,
640 };
641 } elsif ($cd && $cd->{data}) {
642 push @$res, @{$cd->{data}};
643 }
644 };
645 my $err = $@;
646 syslog('err', $err) if $err;
647 }
648
649 return $res;
650 }
651
652 sub broadcast_rrd {
653 my ($rrdid, $data) = @_;
654
655 eval {
656 &$ipcc_update_status("rrd/$rrdid", $data);
657 };
658 my $err = $@;
659
660 warn $err if $err;
661 }
662
663 my $last_rrd_dump = 0;
664 my $last_rrd_data = "";
665
666 sub rrd_dump {
667
668 my $ctime = time();
669
670 my $diff = $ctime - $last_rrd_dump;
671 if ($diff < 2) {
672 return $last_rrd_data;
673 }
674
675 my $raw;
676 eval {
677 $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP);
678 };
679 my $err = $@;
680
681 if ($err) {
682 warn $err;
683 return {};
684 }
685
686 my $res = {};
687
688 if ($raw) {
689 while ($raw =~ s/^(.*)\n//) {
690 my ($key, @ela) = split(/:/, $1);
691 next if !$key;
692 next if !(scalar(@ela) > 1);
693 $res->{$key} = [ map { $_ eq 'U' ? undef : $_ } @ela ];
694 }
695 }
696
697 $last_rrd_dump = $ctime;
698 $last_rrd_data = $res;
699
700 return $res;
701 }
702
703 sub create_rrd_data {
704 my ($rrdname, $timeframe, $cf) = @_;
705
706 my $rrddir = "/var/lib/rrdcached/db";
707
708 my $rrd = "$rrddir/$rrdname";
709
710 my $setup = {
711 hour => [ 60, 70 ],
712 day => [ 60*30, 70 ],
713 week => [ 60*180, 70 ],
714 month => [ 60*720, 70 ],
715 year => [ 60*10080, 70 ],
716 };
717
718 my ($reso, $count) = @{$setup->{$timeframe}};
719 my $ctime = $reso*int(time()/$reso);
720 my $req_start = $ctime - $reso*$count;
721
722 $cf = "AVERAGE" if !$cf;
723
724 my @args = (
725 "-s" => $req_start,
726 "-e" => $ctime - 1,
727 "-r" => $reso,
728 );
729
730 my $socket = "/var/run/rrdcached.sock";
731 push @args, "--daemon" => "unix:$socket" if -S $socket;
732
733 my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args);
734
735 my $err = RRDs::error;
736 die "RRD error: $err\n" if $err;
737
738 die "got wrong time resolution ($step != $reso)\n"
739 if $step != $reso;
740
741 my $res = [];
742 my $fields = scalar(@$names);
743 for my $line (@$data) {
744 my $entry = { 'time' => $start };
745 $start += $step;
746 for (my $i = 0; $i < $fields; $i++) {
747 my $name = $names->[$i];
748 if (defined(my $val = $line->[$i])) {
749 $entry->{$name} = $val;
750 } else {
751 # leave empty fields undefined
752 # maybe make this configurable?
753 }
754 }
755 push @$res, $entry;
756 }
757
758 return $res;
759 }
760
761 sub create_rrd_graph {
762 my ($rrdname, $timeframe, $ds, $cf) = @_;
763
764 # Using RRD graph is clumsy - maybe it
765 # is better to simply fetch the data, and do all display
766 # related things with javascript (new extjs html5 graph library).
767
768 my $rrddir = "/var/lib/rrdcached/db";
769
770 my $rrd = "$rrddir/$rrdname";
771
772 my @ids = PVE::Tools::split_list($ds);
773
774 my $ds_txt = join('_', @ids);
775
776 my $filename = "${rrd}_${ds_txt}.png";
777
778 my $setup = {
779 hour => [ 60, 60 ],
780 day => [ 60*30, 70 ],
781 week => [ 60*180, 70 ],
782 month => [ 60*720, 70 ],
783 year => [ 60*10080, 70 ],
784 };
785
786 my ($reso, $count) = @{$setup->{$timeframe}};
787
788 my @args = (
789 "--imgformat" => "PNG",
790 "--border" => 0,
791 "--height" => 200,
792 "--width" => 800,
793 "--start" => - $reso*$count,
794 "--end" => 'now' ,
795 "--lower-limit" => 0,
796 );
797
798 my $socket = "/var/run/rrdcached.sock";
799 push @args, "--daemon" => "unix:$socket" if -S $socket;
800
801 my @coldef = ('#00ddff', '#ff0000');
802
803 $cf = "AVERAGE" if !$cf;
804
805 my $i = 0;
806 foreach my $id (@ids) {
807 my $col = $coldef[$i++] || die "fixme: no color definition";
808 push @args, "DEF:${id}=$rrd:${id}:$cf";
809 my $dataid = $id;
810 if ($id eq 'cpu' || $id eq 'iowait') {
811 push @args, "CDEF:${id}_per=${id},100,*";
812 $dataid = "${id}_per";
813 }
814 push @args, "LINE2:${dataid}${col}:${id}";
815 }
816
817 push @args, '--full-size-mode';
818
819 # we do not really store data into the file
820 my $res = RRDs::graphv('-', @args);
821
822 my $err = RRDs::error;
823 die "RRD error: $err\n" if $err;
824
825 return { filename => $filename, image => $res->{image} };
826 }
827
828 # a fast way to read files (avoid fuse overhead)
829 sub get_config {
830 my ($path) = @_;
831
832 return &$ipcc_get_config($path);
833 }
834
835 sub get_cluster_log {
836 my ($user, $max) = @_;
837
838 return &$ipcc_get_cluster_log($user, $max);
839 }
840
841 my $file_info = {};
842
843 sub cfs_register_file {
844 my ($filename, $parser, $writer) = @_;
845
846 $observed->{$filename} || die "unknown file '$filename'";
847
848 die "file '$filename' already registered" if $file_info->{$filename};
849
850 $file_info->{$filename} = {
851 parser => $parser,
852 writer => $writer,
853 };
854 }
855
856 my $ccache_read = sub {
857 my ($filename, $parser, $version) = @_;
858
859 $ccache->{$filename} = {} if !$ccache->{$filename};
860
861 my $ci = $ccache->{$filename};
862
863 if (!$ci->{version} || !$version || $ci->{version} != $version) {
864 # we always call the parser, even when the file does not exists
865 # (in that case $data is undef)
866 my $data = get_config($filename);
867 $ci->{data} = &$parser("/etc/pve/$filename", $data);
868 $ci->{version} = $version;
869 }
870
871 my $res = ref($ci->{data}) ? dclone($ci->{data}) : $ci->{data};
872
873 return $res;
874 };
875
876 sub cfs_file_version {
877 my ($filename) = @_;
878
879 my $version;
880 my $infotag;
881 if ($filename =~ m!^nodes/[^/]+/(openvz|lxc|qemu-server)/(\d+)\.conf$!) {
882 my ($type, $vmid) = ($1, $2);
883 if ($vmlist && $vmlist->{ids} && $vmlist->{ids}->{$vmid}) {
884 $version = $vmlist->{ids}->{$vmid}->{version};
885 }
886 $infotag = "/$type/";
887 } else {
888 $infotag = $filename;
889 $version = $versions->{$filename};
890 }
891
892 my $info = $file_info->{$infotag} ||
893 die "unknown file type '$filename'\n";
894
895 return wantarray ? ($version, $info) : $version;
896 }
897
898 sub cfs_read_file {
899 my ($filename) = @_;
900
901 my ($version, $info) = cfs_file_version($filename);
902 my $parser = $info->{parser};
903
904 return &$ccache_read($filename, $parser, $version);
905 }
906
907 sub cfs_write_file {
908 my ($filename, $data) = @_;
909
910 my ($version, $info) = cfs_file_version($filename);
911
912 my $writer = $info->{writer} || die "no writer defined";
913
914 my $fsname = "/etc/pve/$filename";
915
916 my $raw = &$writer($fsname, $data);
917
918 if (my $ci = $ccache->{$filename}) {
919 $ci->{version} = undef;
920 }
921
922 PVE::Tools::file_set_contents($fsname, $raw);
923 }
924
925 my $cfs_lock = sub {
926 my ($lockid, $timeout, $code, @param) = @_;
927
928 my $prev_alarm = alarm(0); # suspend outer alarm early
929
930 my $res;
931 my $got_lock = 0;
932
933 # this timeout is for acquire the lock
934 $timeout = 10 if !$timeout;
935
936 my $filename = "$lockdir/$lockid";
937
938 eval {
939
940 mkdir $lockdir;
941
942 if (! -d $lockdir) {
943 die "pve cluster filesystem not online.\n";
944 }
945
946 my $timeout_err = sub { die "got lock request timeout\n"; };
947 local $SIG{ALRM} = $timeout_err;
948
949 while (1) {
950 alarm ($timeout);
951 $got_lock = mkdir($filename);
952 $timeout = alarm(0) - 1; # we'll sleep for 1s, see down below
953
954 last if $got_lock;
955
956 $timeout_err->() if $timeout <= 0;
957
958 print STDERR "trying to acquire cfs lock '$lockid' ...\n";
959 utime (0, 0, $filename); # cfs unlock request
960 sleep(1);
961 }
962
963 # fixed command timeout: cfs locks have a timeout of 120
964 # using 60 gives us another 60 seconds to abort the task
965 local $SIG{ALRM} = sub { die "got lock timeout - aborting command\n"; };
966 alarm(60);
967
968 cfs_update(); # make sure we read latest versions inside code()
969
970 $res = &$code(@param);
971
972 alarm(0);
973 };
974
975 my $err = $@;
976
977 $err = "no quorum!\n" if !$got_lock && !check_cfs_quorum(1);
978
979 rmdir $filename if $got_lock; # if we held the lock always unlock again
980
981 alarm($prev_alarm);
982
983 if ($err) {
984 $@ = "error with cfs lock '$lockid': $err";
985 return undef;
986 }
987
988 $@ = undef;
989
990 return $res;
991 };
992
993 sub cfs_lock_file {
994 my ($filename, $timeout, $code, @param) = @_;
995
996 my $info = $observed->{$filename} || die "unknown file '$filename'";
997
998 my $lockid = "file-$filename";
999 $lockid =~ s/[.\/]/_/g;
1000
1001 &$cfs_lock($lockid, $timeout, $code, @param);
1002 }
1003
1004 sub cfs_lock_storage {
1005 my ($storeid, $timeout, $code, @param) = @_;
1006
1007 my $lockid = "storage-$storeid";
1008
1009 &$cfs_lock($lockid, $timeout, $code, @param);
1010 }
1011
1012 sub cfs_lock_domain {
1013 my ($domainname, $timeout, $code, @param) = @_;
1014
1015 my $lockid = "domain-$domainname";
1016
1017 &$cfs_lock($lockid, $timeout, $code, @param);
1018 }
1019
1020 sub cfs_lock_acme {
1021 my ($account, $timeout, $code, @param) = @_;
1022
1023 my $lockid = "acme-$account";
1024
1025 &$cfs_lock($lockid, $timeout, $code, @param);
1026 }
1027
1028 sub cfs_lock_authkey {
1029 my ($timeout, $code, @param) = @_;
1030
1031 $cfs_lock->('authkey', $timeout, $code, @param);
1032 }
1033
1034 my $log_levels = {
1035 "emerg" => 0,
1036 "alert" => 1,
1037 "crit" => 2,
1038 "critical" => 2,
1039 "err" => 3,
1040 "error" => 3,
1041 "warn" => 4,
1042 "warning" => 4,
1043 "notice" => 5,
1044 "info" => 6,
1045 "debug" => 7,
1046 };
1047
1048 sub log_msg {
1049 my ($priority, $ident, $msg) = @_;
1050
1051 if (my $tmp = $log_levels->{$priority}) {
1052 $priority = $tmp;
1053 }
1054
1055 die "need numeric log priority" if $priority !~ /^\d+$/;
1056
1057 my $tag = PVE::SafeSyslog::tag();
1058
1059 $msg = "empty message" if !$msg;
1060
1061 $ident = "" if !$ident;
1062 $ident = encode("ascii", $ident,
1063 sub { sprintf "\\u%04x", shift });
1064
1065 my $ascii = encode("ascii", $msg, sub { sprintf "\\u%04x", shift });
1066
1067 if ($ident) {
1068 syslog($priority, "<%s> %s", $ident, $ascii);
1069 } else {
1070 syslog($priority, "%s", $ascii);
1071 }
1072
1073 eval { &$ipcc_log($priority, $ident, $tag, $ascii); };
1074
1075 syslog("err", "writing cluster log failed: $@") if $@;
1076 }
1077
1078 sub check_vmid_unused {
1079 my ($vmid, $noerr) = @_;
1080
1081 my $vmlist = get_vmlist();
1082
1083 my $d = $vmlist->{ids}->{$vmid};
1084 return 1 if !defined($d);
1085
1086 return undef if $noerr;
1087
1088 my $vmtypestr = $d->{type} eq 'qemu' ? 'VM' : 'CT';
1089 die "$vmtypestr $vmid already exists on node '$d->{node}'\n";
1090 }
1091
1092 sub check_node_exists {
1093 my ($nodename, $noerr) = @_;
1094
1095 my $nodelist = $clinfo->{nodelist};
1096 return 1 if $nodelist && $nodelist->{$nodename};
1097
1098 return undef if $noerr;
1099
1100 die "no such cluster node '$nodename'\n";
1101 }
1102
1103 # this is also used to get the IP of the local node
1104 sub remote_node_ip {
1105 my ($nodename, $noerr) = @_;
1106
1107 my $nodelist = $clinfo->{nodelist};
1108 if ($nodelist && $nodelist->{$nodename}) {
1109 if (my $ip = $nodelist->{$nodename}->{ip}) {
1110 return $ip if !wantarray;
1111 my $family = $nodelist->{$nodename}->{address_family};
1112 if (!$family) {
1113 $nodelist->{$nodename}->{address_family} =
1114 $family =
1115 PVE::Tools::get_host_address_family($ip);
1116 }
1117 return wantarray ? ($ip, $family) : $ip;
1118 }
1119 }
1120
1121 # fallback: try to get IP by other means
1122 return PVE::Network::get_ip_from_hostname($nodename, $noerr);
1123 }
1124
1125 sub get_local_migration_ip {
1126 my ($migration_network, $noerr) = @_;
1127
1128 my $cidr = $migration_network;
1129
1130 if (!defined($cidr)) {
1131 my $dc_conf = cfs_read_file('datacenter.cfg');
1132 $cidr = $dc_conf->{migration}->{network}
1133 if defined($dc_conf->{migration}->{network});
1134 }
1135
1136 if (defined($cidr)) {
1137 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
1138
1139 die "could not get migration ip: no IP address configured on local " .
1140 "node for network '$cidr'\n" if !$noerr && (scalar(@$ips) == 0);
1141
1142 die "could not get migration ip: multiple IP address configured for " .
1143 "network '$cidr'\n" if !$noerr && (scalar(@$ips) > 1);
1144
1145 return @$ips[0];
1146 }
1147
1148 return undef;
1149 };
1150
1151 # ssh related utility functions
1152
1153 sub ssh_merge_keys {
1154 # remove duplicate keys in $sshauthkeys
1155 # ssh-copy-id simply add keys, so the file can grow to large
1156
1157 my $data = '';
1158 if (-f $sshauthkeys) {
1159 $data = PVE::Tools::file_get_contents($sshauthkeys, 128*1024);
1160 chomp($data);
1161 }
1162
1163 my $found_backup;
1164 if (-f $rootsshauthkeysbackup) {
1165 $data .= "\n";
1166 $data .= PVE::Tools::file_get_contents($rootsshauthkeysbackup, 128*1024);
1167 chomp($data);
1168 $found_backup = 1;
1169 }
1170
1171 # always add ourself
1172 if (-f $ssh_rsa_id) {
1173 my $pub = PVE::Tools::file_get_contents($ssh_rsa_id);
1174 chomp($pub);
1175 $data .= "\n$pub\n";
1176 }
1177
1178 my $newdata = "";
1179 my $vhash = {};
1180 my @lines = split(/\n/, $data);
1181 foreach my $line (@lines) {
1182 if ($line !~ /^#/ && $line =~ m/(^|\s)ssh-(rsa|dsa)\s+(\S+)\s+\S+$/) {
1183 next if $vhash->{$3}++;
1184 }
1185 $newdata .= "$line\n";
1186 }
1187
1188 PVE::Tools::file_set_contents($sshauthkeys, $newdata, 0600);
1189
1190 if ($found_backup && -l $rootsshauthkeys) {
1191 # everything went well, so we can remove the backup
1192 unlink $rootsshauthkeysbackup;
1193 }
1194 }
1195
1196 sub setup_sshd_config {
1197 my () = @_;
1198
1199 my $conf = PVE::Tools::file_get_contents($sshd_config_fn);
1200
1201 return if $conf =~ m/^PermitRootLogin\s+yes\s*$/m;
1202
1203 if ($conf !~ s/^#?PermitRootLogin.*$/PermitRootLogin yes/m) {
1204 chomp $conf;
1205 $conf .= "\nPermitRootLogin yes\n";
1206 }
1207
1208 PVE::Tools::file_set_contents($sshd_config_fn, $conf);
1209
1210 PVE::Tools::run_command(['systemctl', 'reload-or-restart', 'sshd']);
1211 }
1212
1213 sub setup_rootsshconfig {
1214
1215 # create ssh key if it does not exist
1216 if (! -f $ssh_rsa_id) {
1217 mkdir '/root/.ssh/';
1218 system ("echo|ssh-keygen -t rsa -N '' -b 2048 -f ${ssh_rsa_id_priv}");
1219 }
1220
1221 # create ssh config if it does not exist
1222 if (! -f $rootsshconfig) {
1223 mkdir '/root/.ssh';
1224 if (my $fh = IO::File->new($rootsshconfig, O_CREAT|O_WRONLY|O_EXCL, 0640)) {
1225 # this is the default ciphers list from Debian's OpenSSH package (OpenSSH_7.4p1 Debian-10, OpenSSL 1.0.2k 26 Jan 2017)
1226 # changed order to put AES before Chacha20 (most hardware has AESNI)
1227 print $fh "Ciphers aes128-ctr,aes192-ctr,aes256-ctr,aes128-gcm\@openssh.com,aes256-gcm\@openssh.com,chacha20-poly1305\@openssh.com\n";
1228 close($fh);
1229 }
1230 }
1231 }
1232
1233 sub setup_ssh_keys {
1234
1235 mkdir $authdir;
1236
1237 my $import_ok;
1238
1239 if (! -f $sshauthkeys) {
1240 my $old;
1241 if (-f $rootsshauthkeys) {
1242 $old = PVE::Tools::file_get_contents($rootsshauthkeys, 128*1024);
1243 }
1244 if (my $fh = IO::File->new ($sshauthkeys, O_CREAT|O_WRONLY|O_EXCL, 0400)) {
1245 PVE::Tools::safe_print($sshauthkeys, $fh, $old) if $old;
1246 close($fh);
1247 $import_ok = 1;
1248 }
1249 }
1250
1251 warn "can't create shared ssh key database '$sshauthkeys'\n"
1252 if ! -f $sshauthkeys;
1253
1254 if (-f $rootsshauthkeys && ! -l $rootsshauthkeys) {
1255 if (!rename($rootsshauthkeys , $rootsshauthkeysbackup)) {
1256 warn "rename $rootsshauthkeys failed - $!\n";
1257 }
1258 }
1259
1260 if (! -l $rootsshauthkeys) {
1261 symlink $sshauthkeys, $rootsshauthkeys;
1262 }
1263
1264 if (! -l $rootsshauthkeys) {
1265 warn "can't create symlink for ssh keys '$rootsshauthkeys' -> '$sshauthkeys'\n";
1266 } else {
1267 unlink $rootsshauthkeysbackup if $import_ok;
1268 }
1269 }
1270
1271 sub ssh_unmerge_known_hosts {
1272 return if ! -l $sshglobalknownhosts;
1273
1274 my $old = '';
1275 $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024)
1276 if -f $sshknownhosts;
1277
1278 PVE::Tools::file_set_contents($sshglobalknownhosts, $old);
1279 }
1280
1281 sub ssh_merge_known_hosts {
1282 my ($nodename, $ip_address, $createLink) = @_;
1283
1284 die "no node name specified" if !$nodename;
1285 die "no ip address specified" if !$ip_address;
1286
1287 # ssh lowercases hostnames (aliases) before comparision, so we need too
1288 $nodename = lc($nodename);
1289 $ip_address = lc($ip_address);
1290
1291 mkdir $authdir;
1292
1293 if (! -f $sshknownhosts) {
1294 if (my $fh = IO::File->new($sshknownhosts, O_CREAT|O_WRONLY|O_EXCL, 0600)) {
1295 close($fh);
1296 }
1297 }
1298
1299 my $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024);
1300
1301 my $new = '';
1302
1303 if ((! -l $sshglobalknownhosts) && (-f $sshglobalknownhosts)) {
1304 $new = PVE::Tools::file_get_contents($sshglobalknownhosts, 128*1024);
1305 }
1306
1307 my $hostkey = PVE::Tools::file_get_contents($ssh_host_rsa_id);
1308 # Note: file sometimes containe emty lines at start, so we use multiline match
1309 die "can't parse $ssh_host_rsa_id" if $hostkey !~ m/^(ssh-rsa\s\S+)(\s.*)?$/m;
1310 $hostkey = $1;
1311
1312 my $data = '';
1313 my $vhash = {};
1314
1315 my $found_nodename;
1316 my $found_local_ip;
1317
1318 my $merge_line = sub {
1319 my ($line, $all) = @_;
1320
1321 return if $line =~ m/^\s*$/; # skip empty lines
1322 return if $line =~ m/^#/; # skip comments
1323
1324 if ($line =~ m/^(\S+)\s(ssh-rsa\s\S+)(\s.*)?$/) {
1325 my $key = $1;
1326 my $rsakey = $2;
1327 if (!$vhash->{$key}) {
1328 $vhash->{$key} = 1;
1329 if ($key =~ m/\|1\|([^\|\s]+)\|([^\|\s]+)$/) {
1330 my $salt = decode_base64($1);
1331 my $digest = $2;
1332 my $hmac = Digest::HMAC_SHA1->new($salt);
1333 $hmac->add($nodename);
1334 my $hd = $hmac->b64digest . '=';
1335 if ($digest eq $hd) {
1336 if ($rsakey eq $hostkey) {
1337 $found_nodename = 1;
1338 $data .= $line;
1339 }
1340 return;
1341 }
1342 $hmac = Digest::HMAC_SHA1->new($salt);
1343 $hmac->add($ip_address);
1344 $hd = $hmac->b64digest . '=';
1345 if ($digest eq $hd) {
1346 if ($rsakey eq $hostkey) {
1347 $found_local_ip = 1;
1348 $data .= $line;
1349 }
1350 return;
1351 }
1352 } else {
1353 $key = lc($key); # avoid duplicate entries, ssh compares lowercased
1354 if ($key eq $ip_address) {
1355 $found_local_ip = 1 if $rsakey eq $hostkey;
1356 } elsif ($key eq $nodename) {
1357 $found_nodename = 1 if $rsakey eq $hostkey;
1358 }
1359 }
1360 $data .= $line;
1361 }
1362 } elsif ($all) {
1363 $data .= $line;
1364 }
1365 };
1366
1367 while ($old && $old =~ s/^((.*?)(\n|$))//) {
1368 my $line = "$2\n";
1369 &$merge_line($line, 1);
1370 }
1371
1372 while ($new && $new =~ s/^((.*?)(\n|$))//) {
1373 my $line = "$2\n";
1374 &$merge_line($line);
1375 }
1376
1377 # add our own key if not already there
1378 $data .= "$nodename $hostkey\n" if !$found_nodename;
1379 $data .= "$ip_address $hostkey\n" if !$found_local_ip;
1380
1381 PVE::Tools::file_set_contents($sshknownhosts, $data);
1382
1383 return if !$createLink;
1384
1385 unlink $sshglobalknownhosts;
1386 symlink $sshknownhosts, $sshglobalknownhosts;
1387
1388 warn "can't create symlink for ssh known hosts '$sshglobalknownhosts' -> '$sshknownhosts'\n"
1389 if ! -l $sshglobalknownhosts;
1390
1391 }
1392
1393 my $migration_format = {
1394 type => {
1395 default_key => 1,
1396 type => 'string',
1397 enum => ['secure', 'insecure'],
1398 description => "Migration traffic is encrypted using an SSH tunnel by " .
1399 "default. On secure, completely private networks this can be " .
1400 "disabled to increase performance.",
1401 default => 'secure',
1402 },
1403 network => {
1404 optional => 1,
1405 type => 'string', format => 'CIDR',
1406 format_description => 'CIDR',
1407 description => "CIDR of the (sub) network that is used for migration."
1408 },
1409 };
1410
1411 my $ha_format = {
1412 shutdown_policy => {
1413 type => 'string',
1414 enum => ['freeze', 'failover', 'conditional'],
1415 description => "The policy for HA services on node shutdown. 'freeze' disables auto-recovery, 'failover' ensures recovery, 'conditional' recovers on poweroff and freezes on reboot. Running HA Services will always get stopped first on shutdown.",
1416 verbose_description => "Describes the policy for handling HA services on poweroff or reboot of a node. Freeze will always freeze services which are still located on the node on shutdown, those services won't be recovered by the HA manager. Failover will not mark the services as frozen and thus the services will get recovered to other nodes, if the shutdown node does not come up again quickly (< 1min). 'conditional' chooses automatically depending on the type of shutdown, i.e., on a reboot the service will be frozen but on a poweroff the service will stay as is, and thus get recovered after about 2 minutes.",
1417 default => 'conditional',
1418 }
1419 };
1420
1421 PVE::JSONSchema::register_format('mac-prefix', \&pve_verify_mac_prefix);
1422 sub pve_verify_mac_prefix {
1423 my ($mac_prefix, $noerr) = @_;
1424
1425 if ($mac_prefix !~ m/^[a-f0-9][02468ace](?::[a-f0-9]{2}){0,2}:?$/i) {
1426 return undef if $noerr;
1427 die "value is not a valid unicast MAC address prefix\n";
1428 }
1429 return $mac_prefix;
1430 }
1431
1432 our $u2f_format = {
1433 appid => {
1434 type => 'string',
1435 description => "U2F AppId URL override. Defaults to the origin.",
1436 format_description => 'APPID',
1437 optional => 1,
1438 },
1439 origin => {
1440 type => 'string',
1441 description => "U2F Origin override. Mostly useful for single nodes with a single URL.",
1442 format_description => 'URL',
1443 optional => 1,
1444 },
1445 };
1446
1447 my $datacenter_schema = {
1448 type => "object",
1449 additionalProperties => 0,
1450 properties => {
1451 keyboard => {
1452 optional => 1,
1453 type => 'string',
1454 description => "Default keybord layout for vnc server.",
1455 enum => PVE::Tools::kvmkeymaplist(),
1456 },
1457 language => {
1458 optional => 1,
1459 type => 'string',
1460 description => "Default GUI language.",
1461 enum => [
1462 'zh_CN',
1463 'zh_TW',
1464 'ca',
1465 'en',
1466 'eu',
1467 'fr',
1468 'de',
1469 'it',
1470 'es',
1471 'ja',
1472 'nb',
1473 'nn',
1474 'fa',
1475 'pl',
1476 'pt_BR',
1477 'ru',
1478 'sl',
1479 'sv',
1480 'tr',
1481 ],
1482 },
1483 http_proxy => {
1484 optional => 1,
1485 type => 'string',
1486 description => "Specify external http proxy which is used for downloads (example: 'http://username:password\@host:port/')",
1487 pattern => "http://.*",
1488 },
1489 migration_unsecure => {
1490 optional => 1,
1491 type => 'boolean',
1492 description => "Migration is secure using SSH tunnel by default. " .
1493 "For secure private networks you can disable it to speed up " .
1494 "migration. Deprecated, use the 'migration' property instead!",
1495 },
1496 migration => {
1497 optional => 1,
1498 type => 'string', format => $migration_format,
1499 description => "For cluster wide migration settings.",
1500 },
1501 console => {
1502 optional => 1,
1503 type => 'string',
1504 description => "Select the default Console viewer. You can either use the builtin java applet (VNC; deprecated and maps to html5), an external virt-viewer comtatible application (SPICE), an HTML5 based vnc viewer (noVNC), or an HTML5 based console client (xtermjs). If the selected viewer is not available (e.g. SPICE not activated for the VM), the fallback is noVNC.",
1505 enum => ['applet', 'vv', 'html5', 'xtermjs'],
1506 },
1507 email_from => {
1508 optional => 1,
1509 type => 'string',
1510 format => 'email-opt',
1511 description => "Specify email address to send notification from (default is root@\$hostname)",
1512 },
1513 max_workers => {
1514 optional => 1,
1515 type => 'integer',
1516 minimum => 1,
1517 description => "Defines how many workers (per node) are maximal started ".
1518 " on actions like 'stopall VMs' or task from the ha-manager.",
1519 },
1520 fencing => {
1521 optional => 1,
1522 type => 'string',
1523 default => 'watchdog',
1524 enum => [ 'watchdog', 'hardware', 'both' ],
1525 description => "Set the fencing mode of the HA cluster. Hardware mode " .
1526 "needs a valid configuration of fence devices in /etc/pve/ha/fence.cfg." .
1527 " With both all two modes are used." .
1528 "\n\nWARNING: 'hardware' and 'both' are EXPERIMENTAL & WIP",
1529 },
1530 ha => {
1531 optional => 1,
1532 type => 'string', format => $ha_format,
1533 description => "Cluster wide HA settings.",
1534 },
1535 mac_prefix => {
1536 optional => 1,
1537 type => 'string',
1538 format => 'mac-prefix',
1539 description => 'Prefix for autogenerated MAC addresses.',
1540 },
1541 bwlimit => PVE::JSONSchema::get_standard_option('bwlimit'),
1542 u2f => {
1543 optional => 1,
1544 type => 'string',
1545 format => $u2f_format,
1546 description => 'u2f',
1547 },
1548 },
1549 };
1550
1551 # make schema accessible from outside (for documentation)
1552 sub get_datacenter_schema { return $datacenter_schema };
1553
1554 sub parse_datacenter_config {
1555 my ($filename, $raw) = @_;
1556
1557 my $res = PVE::JSONSchema::parse_config($datacenter_schema, $filename, $raw // '');
1558
1559 if (my $migration = $res->{migration}) {
1560 $res->{migration} = PVE::JSONSchema::parse_property_string($migration_format, $migration);
1561 }
1562
1563 if (my $ha = $res->{ha}) {
1564 $res->{ha} = PVE::JSONSchema::parse_property_string($ha_format, $ha);
1565 }
1566
1567 # for backwards compatibility only, new migration property has precedence
1568 if (defined($res->{migration_unsecure})) {
1569 if (defined($res->{migration}->{type})) {
1570 warn "deprecated setting 'migration_unsecure' and new 'migration: type' " .
1571 "set at same time! Ignore 'migration_unsecure'\n";
1572 } else {
1573 $res->{migration}->{type} = ($res->{migration_unsecure}) ? 'insecure' : 'secure';
1574 }
1575 }
1576
1577 # for backwards compatibility only, applet maps to html5
1578 if (defined($res->{console}) && $res->{console} eq 'applet') {
1579 $res->{console} = 'html5';
1580 }
1581
1582 return $res;
1583 }
1584
1585 sub write_datacenter_config {
1586 my ($filename, $cfg) = @_;
1587
1588 # map deprecated setting to new one
1589 if (defined($cfg->{migration_unsecure}) && !defined($cfg->{migration})) {
1590 my $migration_unsecure = delete $cfg->{migration_unsecure};
1591 $cfg->{migration}->{type} = ($migration_unsecure) ? 'insecure' : 'secure';
1592 }
1593
1594 # map deprecated applet setting to html5
1595 if (defined($cfg->{console}) && $cfg->{console} eq 'applet') {
1596 $cfg->{console} = 'html5';
1597 }
1598
1599 if (ref($cfg->{migration})) {
1600 my $migration = $cfg->{migration};
1601 $cfg->{migration} = PVE::JSONSchema::print_property_string($migration, $migration_format);
1602 }
1603
1604 if (ref($cfg->{ha})) {
1605 my $ha = $cfg->{ha};
1606 $cfg->{ha} = PVE::JSONSchema::print_property_string($ha, $ha_format);
1607 }
1608
1609 return PVE::JSONSchema::dump_config($datacenter_schema, $filename, $cfg);
1610 }
1611
1612 cfs_register_file('datacenter.cfg',
1613 \&parse_datacenter_config,
1614 \&write_datacenter_config);
1615
1616 # X509 Certificate cache helper
1617
1618 my $cert_cache_nodes = {};
1619 my $cert_cache_timestamp = time();
1620 my $cert_cache_fingerprints = {};
1621
1622 sub update_cert_cache {
1623 my ($update_node, $clear) = @_;
1624
1625 syslog('info', "Clearing outdated entries from certificate cache")
1626 if $clear;
1627
1628 $cert_cache_timestamp = time() if !defined($update_node);
1629
1630 my $node_list = defined($update_node) ?
1631 [ $update_node ] : [ keys %$cert_cache_nodes ];
1632
1633 foreach my $node (@$node_list) {
1634 my $clear_old = sub {
1635 if (my $old_fp = $cert_cache_nodes->{$node}) {
1636 # distrust old fingerprint
1637 delete $cert_cache_fingerprints->{$old_fp};
1638 # ensure reload on next proxied request
1639 delete $cert_cache_nodes->{$node};
1640 }
1641 };
1642
1643 my $fp = eval { get_node_fingerprint($node) };
1644 if (my $err = $@) {
1645 warn "$err\n";
1646 &$clear_old() if $clear;
1647 next;
1648 }
1649
1650 my $old_fp = $cert_cache_nodes->{$node};
1651 $cert_cache_fingerprints->{$fp} = 1;
1652 $cert_cache_nodes->{$node} = $fp;
1653
1654 if (defined($old_fp) && $fp ne $old_fp) {
1655 delete $cert_cache_fingerprints->{$old_fp};
1656 }
1657 }
1658 }
1659
1660 # load and cache cert fingerprint once
1661 sub initialize_cert_cache {
1662 my ($node) = @_;
1663
1664 update_cert_cache($node)
1665 if defined($node) && !defined($cert_cache_nodes->{$node});
1666 }
1667
1668 sub read_ssl_cert_fingerprint {
1669 my ($cert_path) = @_;
1670
1671 my $bio = Net::SSLeay::BIO_new_file($cert_path, 'r')
1672 or die "unable to read '$cert_path' - $!\n";
1673
1674 my $cert = Net::SSLeay::PEM_read_bio_X509($bio);
1675 Net::SSLeay::BIO_free($bio);
1676
1677 die "unable to read certificate from '$cert_path'\n" if !$cert;
1678
1679 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1680 Net::SSLeay::X509_free($cert);
1681
1682 die "unable to get fingerprint for '$cert_path' - got empty value\n"
1683 if !defined($fp) || $fp eq '';
1684
1685 return $fp;
1686 }
1687
1688 sub get_node_fingerprint {
1689 my ($node) = @_;
1690
1691 my $cert_path = "/etc/pve/nodes/$node/pve-ssl.pem";
1692 my $custom_cert_path = "/etc/pve/nodes/$node/pveproxy-ssl.pem";
1693
1694 $cert_path = $custom_cert_path if -f $custom_cert_path;
1695
1696 return read_ssl_cert_fingerprint($cert_path);
1697 }
1698
1699
1700 sub check_cert_fingerprint {
1701 my ($cert) = @_;
1702
1703 # clear cache every 30 minutes at least
1704 update_cert_cache(undef, 1) if time() - $cert_cache_timestamp >= 60*30;
1705
1706 # get fingerprint of server certificate
1707 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1708 return 0 if !defined($fp) || $fp eq ''; # error
1709
1710 my $check = sub {
1711 for my $expected (keys %$cert_cache_fingerprints) {
1712 return 1 if $fp eq $expected;
1713 }
1714 return 0;
1715 };
1716
1717 return 1 if &$check();
1718
1719 # clear cache and retry at most once every minute
1720 if (time() - $cert_cache_timestamp >= 60) {
1721 syslog ('info', "Could not verify remote node certificate '$fp' with list of pinned certificates, refreshing cache");
1722 update_cert_cache();
1723 return &$check();
1724 }
1725
1726 return 0;
1727 }
1728
1729 # bash completion helpers
1730
1731 sub complete_next_vmid {
1732
1733 my $vmlist = get_vmlist() || {};
1734 my $idlist = $vmlist->{ids} || {};
1735
1736 for (my $i = 100; $i < 10000; $i++) {
1737 return [$i] if !defined($idlist->{$i});
1738 }
1739
1740 return [];
1741 }
1742
1743 sub complete_vmid {
1744
1745 my $vmlist = get_vmlist();
1746 my $ids = $vmlist->{ids} || {};
1747
1748 return [ keys %$ids ];
1749 }
1750
1751 sub complete_local_vmid {
1752
1753 my $vmlist = get_vmlist();
1754 my $ids = $vmlist->{ids} || {};
1755
1756 my $nodename = PVE::INotify::nodename();
1757
1758 my $res = [];
1759 foreach my $vmid (keys %$ids) {
1760 my $d = $ids->{$vmid};
1761 next if !$d->{node} || $d->{node} ne $nodename;
1762 push @$res, $vmid;
1763 }
1764
1765 return $res;
1766 }
1767
1768 sub complete_migration_target {
1769
1770 my $res = [];
1771
1772 my $nodename = PVE::INotify::nodename();
1773
1774 my $nodelist = get_nodelist();
1775 foreach my $node (@$nodelist) {
1776 next if $node eq $nodename;
1777 push @$res, $node;
1778 }
1779
1780 return $res;
1781 }
1782
1783 sub get_ssh_info {
1784 my ($node, $network_cidr) = @_;
1785
1786 my $ip;
1787 if (defined($network_cidr)) {
1788 # Use mtunnel via to get the remote node's ip inside $network_cidr.
1789 # This goes over the regular network (iow. uses get_ssh_info() with
1790 # $network_cidr undefined.
1791 # FIXME: Use the REST API client for this after creating an API entry
1792 # for get_migration_ip.
1793 my $default_remote = get_ssh_info($node, undef);
1794 my $default_ssh = ssh_info_to_command($default_remote);
1795 my $cmd =[@$default_ssh, 'pvecm', 'mtunnel',
1796 '-migration_network', $network_cidr,
1797 '-get_migration_ip'
1798 ];
1799 PVE::Tools::run_command($cmd, outfunc => sub {
1800 my ($line) = @_;
1801 chomp $line;
1802 die "internal error: unexpected output from mtunnel\n"
1803 if defined($ip);
1804 if ($line =~ /^ip: '(.*)'$/) {
1805 $ip = $1;
1806 } else {
1807 die "internal error: bad output from mtunnel\n"
1808 if defined($ip);
1809 }
1810 });
1811 die "failed to get ip for node '$node' in network '$network_cidr'\n"
1812 if !defined($ip);
1813 } else {
1814 $ip = remote_node_ip($node);
1815 }
1816
1817 return {
1818 ip => $ip,
1819 name => $node,
1820 network => $network_cidr,
1821 };
1822 }
1823
1824 sub ssh_info_to_command_base {
1825 my ($info, @extra_options) = @_;
1826 return [
1827 '/usr/bin/ssh',
1828 '-e', 'none',
1829 '-o', 'BatchMode=yes',
1830 '-o', 'HostKeyAlias='.$info->{name},
1831 @extra_options
1832 ];
1833 }
1834
1835 sub ssh_info_to_command {
1836 my ($info, @extra_options) = @_;
1837 my $cmd = ssh_info_to_command_base($info, @extra_options);
1838 push @$cmd, "root\@$info->{ip}";
1839 return $cmd;
1840 }
1841
1842 sub assert_joinable {
1843 my ($local_addr, $ring0_addr, $ring1_addr, $force) = @_;
1844
1845 my $errors = '';
1846 my $error = sub { $errors .= "* $_[0]\n"; };
1847
1848 if (-f $authfile) {
1849 $error->("authentication key '$authfile' already exists");
1850 }
1851
1852 if (-f $clusterconf) {
1853 $error->("cluster config '$clusterconf' already exists");
1854 }
1855
1856 my $vmlist = get_vmlist();
1857 if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
1858 $error->("this host already contains virtual guests");
1859 }
1860
1861 if (run_command(['corosync-quorumtool', '-l'], noerr => 1, quiet => 1) == 0) {
1862 $error->("corosync is already running, is this node already in a cluster?!");
1863 }
1864
1865 # check if corosync ring IPs are configured on the current nodes interfaces
1866 my $check_ip = sub {
1867 my $ip = shift // return;
1868 my $logid = shift;
1869 if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
1870 my $host = $ip;
1871 eval { $ip = PVE::Network::get_ip_from_hostname($host); };
1872 if ($@) {
1873 $error->("$logid: cannot use '$host': $@\n") ;
1874 return;
1875 }
1876 }
1877
1878 my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
1879 my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
1880
1881 $error->("$logid: cannot use IP '$ip', it must be configured exactly once on local node!\n")
1882 if (scalar(@$configured_ips) != 1);
1883 };
1884
1885 $check_ip->($local_addr, 'local node address');
1886 $check_ip->($ring0_addr, 'ring0');
1887 $check_ip->($ring1_addr, 'ring1');
1888
1889 if ($errors) {
1890 warn "detected the following error(s):\n$errors";
1891 die "Check if node may join a cluster failed!\n" if !$force;
1892 }
1893 }
1894
1895 # NOTE: filesystem must be offline here, no DB changes allowed
1896 my $backup_cfs_database = sub {
1897 my ($dbfile) = @_;
1898
1899 mkdir $dbbackupdir;
1900
1901 my $ctime = time();
1902 my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
1903
1904 print "backup old database to '$backup_fn'\n";
1905
1906 my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \ ">${backup_fn}"] ];
1907 run_command($cmd, 'errmsg' => "cannot backup old database\n");
1908
1909 my $maxfiles = 10; # purge older backup
1910 my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql.gz> ];
1911
1912 if ((my $count = scalar(@$backups)) > $maxfiles) {
1913 foreach my $f (@$backups[$maxfiles..$count-1]) {
1914 next if $f !~ m/^(\S+)$/; # untaint
1915 print "delete old backup '$1'\n";
1916 unlink $1;
1917 }
1918 }
1919 };
1920
1921 sub join {
1922 my ($param) = @_;
1923
1924 my $nodename = PVE::INotify::nodename();
1925 my $local_ip_address = remote_node_ip($nodename);
1926
1927 my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
1928 # check if we can join with the given parameters and current node state
1929 assert_joinable($local_ip_address, $ring0_addr, $ring1_addr, $param->{force});
1930
1931 setup_sshd_config();
1932 setup_rootsshconfig();
1933 setup_ssh_keys();
1934
1935 # make sure known_hosts is on local filesystem
1936 ssh_unmerge_known_hosts();
1937
1938 my $host = $param->{hostname};
1939 my $conn_args = {
1940 username => 'root@pam',
1941 password => $param->{password},
1942 cookie_name => 'PVEAuthCookie',
1943 protocol => 'https',
1944 host => $host,
1945 port => 8006,
1946 };
1947
1948 if (my $fp = $param->{fingerprint}) {
1949 $conn_args->{cached_fingerprints} = { uc($fp) => 1 };
1950 } else {
1951 # API schema ensures that we can only get here from CLI handler
1952 $conn_args->{manual_verification} = 1;
1953 }
1954
1955 print "Establishing API connection with host '$host'\n";
1956
1957 my $conn = PVE::APIClient::LWP->new(%$conn_args);
1958 $conn->login();
1959
1960 # login raises an exception on failure, so if we get here we're good
1961 print "Login succeeded.\n";
1962
1963 my $args = {};
1964 $args->{force} = $param->{force} if defined($param->{force});
1965 $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
1966 $args->{votes} = $param->{votes} if defined($param->{votes});
1967 $args->{ring0_addr} = $ring0_addr // $local_ip_address;
1968 $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
1969
1970 print "Request addition of this node\n";
1971 my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
1972
1973 print "Join request OK, finishing setup locally\n";
1974
1975 # added successfuly - now prepare local node
1976 finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
1977 }
1978
1979 sub finish_join {
1980 my ($nodename, $corosync_conf, $corosync_authkey) = @_;
1981
1982 mkdir "$localclusterdir";
1983 PVE::Tools::file_set_contents($authfile, $corosync_authkey);
1984 PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
1985
1986 print "stopping pve-cluster service\n";
1987 my $cmd = ['systemctl', 'stop', 'pve-cluster'];
1988 run_command($cmd, errmsg => "can't stop pve-cluster service");
1989
1990 $backup_cfs_database->($dbfile);
1991 unlink $dbfile;
1992
1993 $cmd = ['systemctl', 'start', 'corosync', 'pve-cluster'];
1994 run_command($cmd, errmsg => "starting pve-cluster failed");
1995
1996 # wait for quorum
1997 my $printqmsg = 1;
1998 while (!check_cfs_quorum(1)) {
1999 if ($printqmsg) {
2000 print "waiting for quorum...";
2001 STDOUT->flush();
2002 $printqmsg = 0;
2003 }
2004 sleep(1);
2005 }
2006 print "OK\n" if !$printqmsg;
2007
2008 updatecerts_and_ssh(1);
2009
2010 print "generated new node certificate, restart pveproxy and pvedaemon services\n";
2011 run_command(['systemctl', 'reload-or-restart', 'pvedaemon', 'pveproxy']);
2012
2013 print "successfully added node '$nodename' to cluster.\n";
2014 }
2015
2016 sub updatecerts_and_ssh {
2017 my ($force_new_cert, $silent) = @_;
2018
2019 my $p = sub { print "$_[0]\n" if !$silent };
2020
2021 setup_rootsshconfig();
2022
2023 gen_pve_vzdump_symlink();
2024
2025 if (!check_cfs_quorum(1)) {
2026 return undef if $silent;
2027 die "no quorum - unable to update files\n";
2028 }
2029
2030 setup_ssh_keys();
2031
2032 my $nodename = PVE::INotify::nodename();
2033 my $local_ip_address = remote_node_ip($nodename);
2034
2035 $p->("(re)generate node files");
2036 $p->("generate new node certificate") if $force_new_cert;
2037 gen_pve_node_files($nodename, $local_ip_address, $force_new_cert);
2038
2039 $p->("merge authorized SSH keys and known hosts");
2040 ssh_merge_keys();
2041 ssh_merge_known_hosts($nodename, $local_ip_address, 1);
2042 gen_pve_vzdump_files();
2043 }
2044
2045 1;