]> git.proxmox.com Git - pve-cluster.git/blob - data/PVE/Cluster.pm
corosync: allow to set link priorities
[pve-cluster.git] / data / PVE / Cluster.pm
1 package PVE::Cluster;
2
3 use strict;
4 use warnings;
5
6 use Digest::HMAC_SHA1;
7 use Digest::SHA;
8 use Encode;
9 use File::stat qw();
10 use IO::File;
11 use JSON;
12 use MIME::Base64;
13 use Net::SSLeay;
14 use POSIX qw(EEXIST ENOENT);
15 use RRDs;
16 use Socket;
17 use Storable qw(dclone);
18 use UUID;
19
20 use PVE::INotify;
21 use PVE::IPCC;
22 use PVE::JSONSchema;
23 use PVE::Network;
24 use PVE::SafeSyslog;
25 use PVE::Tools qw(run_command);
26
27 use PVE::Cluster::IPCConst;
28
29 use base 'Exporter';
30
31 our @EXPORT_OK = qw(
32 cfs_read_file
33 cfs_write_file
34 cfs_register_file
35 cfs_lock_file);
36
37 use Data::Dumper; # fixme: remove
38
39 # x509 certificate utils
40
41 my $basedir = "/etc/pve";
42 my $authdir = "$basedir/priv";
43 my $lockdir = "/etc/pve/priv/lock";
44
45 # cfs and corosync files
46 my $dbfile = "/var/lib/pve-cluster/config.db";
47 my $dbbackupdir = "/var/lib/pve-cluster/backup";
48 my $localclusterdir = "/etc/corosync";
49 my $localclusterconf = "$localclusterdir/corosync.conf";
50 my $authfile = "$localclusterdir/authkey";
51 my $clusterconf = "$basedir/corosync.conf";
52
53 my $authprivkeyfn = "$authdir/authkey.key";
54 my $authpubkeyfn = "$basedir/authkey.pub";
55 my $pveca_key_fn = "$authdir/pve-root-ca.key";
56 my $pveca_srl_fn = "$authdir/pve-root-ca.srl";
57 my $pveca_cert_fn = "$basedir/pve-root-ca.pem";
58 # this is just a secret accessable by the web browser
59 # and is used for CSRF prevention
60 my $pvewww_key_fn = "$basedir/pve-www.key";
61
62 # ssh related files
63 my $ssh_rsa_id_priv = "/root/.ssh/id_rsa";
64 my $ssh_rsa_id = "/root/.ssh/id_rsa.pub";
65 my $ssh_host_rsa_id = "/etc/ssh/ssh_host_rsa_key.pub";
66 my $sshglobalknownhosts = "/etc/ssh/ssh_known_hosts";
67 my $sshknownhosts = "/etc/pve/priv/known_hosts";
68 my $sshauthkeys = "/etc/pve/priv/authorized_keys";
69 my $sshd_config_fn = "/etc/ssh/sshd_config";
70 my $rootsshauthkeys = "/root/.ssh/authorized_keys";
71 my $rootsshauthkeysbackup = "${rootsshauthkeys}.org";
72 my $rootsshconfig = "/root/.ssh/config";
73
74 # this is just a readonly copy, the relevant one is in status.c from pmxcfs
75 # observed files are the one we can get directly through IPCC, they are cached
76 # using a computed version and only those can be used by the cfs_*_file methods
77 my $observed = {
78 'vzdump.cron' => 1,
79 'storage.cfg' => 1,
80 'datacenter.cfg' => 1,
81 'replication.cfg' => 1,
82 'corosync.conf' => 1,
83 'corosync.conf.new' => 1,
84 'user.cfg' => 1,
85 'domains.cfg' => 1,
86 'priv/shadow.cfg' => 1,
87 'priv/tfa.cfg' => 1,
88 '/qemu-server/' => 1,
89 '/openvz/' => 1,
90 '/lxc/' => 1,
91 'ha/crm_commands' => 1,
92 'ha/manager_status' => 1,
93 'ha/resources.cfg' => 1,
94 'ha/groups.cfg' => 1,
95 'ha/fence.cfg' => 1,
96 'status.cfg' => 1,
97 'ceph.conf' => 1,
98 };
99
100 # only write output if something fails
101 sub run_silent_cmd {
102 my ($cmd) = @_;
103
104 my $outbuf = '';
105 my $record = sub { $outbuf .= shift . "\n"; };
106
107 eval { run_command($cmd, outfunc => $record, errfunc => $record) };
108
109 if (my $err = $@) {
110 print STDERR $outbuf;
111 die $err;
112 }
113 }
114
115 sub check_cfs_quorum {
116 my ($noerr) = @_;
117
118 # note: -w filename always return 1 for root, so wee need
119 # to use File::lstat here
120 my $st = File::stat::lstat("$basedir/local");
121 my $quorate = ($st && (($st->mode & 0200) != 0));
122
123 die "cluster not ready - no quorum?\n" if !$quorate && !$noerr;
124
125 return $quorate;
126 }
127
128 sub check_cfs_is_mounted {
129 my ($noerr) = @_;
130
131 my $res = -l "$basedir/local";
132
133 die "pve configuration filesystem not mounted\n"
134 if !$res && !$noerr;
135
136 return $res;
137 }
138
139 sub gen_local_dirs {
140 my ($nodename) = @_;
141
142 check_cfs_is_mounted();
143
144 my @required_dirs = (
145 "$basedir/priv",
146 "$basedir/nodes",
147 "$basedir/nodes/$nodename",
148 "$basedir/nodes/$nodename/lxc",
149 "$basedir/nodes/$nodename/qemu-server",
150 "$basedir/nodes/$nodename/openvz",
151 "$basedir/nodes/$nodename/priv");
152
153 foreach my $dir (@required_dirs) {
154 if (! -d $dir) {
155 mkdir($dir) || $! == EEXIST || die "unable to create directory '$dir' - $!\n";
156 }
157 }
158 }
159
160 sub gen_auth_key {
161
162 return if -f "$authprivkeyfn";
163
164 check_cfs_is_mounted();
165
166 cfs_lock_authkey(undef, sub {
167 mkdir $authdir || $! == EEXIST || die "unable to create dir '$authdir' - $!\n";
168
169 run_silent_cmd(['openssl', 'genrsa', '-out', $authprivkeyfn, '2048']);
170
171 run_silent_cmd(['openssl', 'rsa', '-in', $authprivkeyfn, '-pubout', '-out', $authpubkeyfn]);
172 });
173
174 die "$@\n" if $@;
175 }
176
177 sub gen_pveca_key {
178
179 return if -f $pveca_key_fn;
180
181 eval {
182 run_silent_cmd(['openssl', 'genrsa', '-out', $pveca_key_fn, '4096']);
183 };
184
185 die "unable to generate pve ca key:\n$@" if $@;
186 }
187
188 sub gen_pveca_cert {
189
190 if (-f $pveca_key_fn && -f $pveca_cert_fn) {
191 return 0;
192 }
193
194 gen_pveca_key();
195
196 # we try to generate an unique 'subject' to avoid browser problems
197 # (reused serial numbers, ..)
198 my $uuid;
199 UUID::generate($uuid);
200 my $uuid_str;
201 UUID::unparse($uuid, $uuid_str);
202
203 eval {
204 # wrap openssl with faketime to prevent bug #904
205 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'req', '-batch',
206 '-days', '3650', '-new', '-x509', '-nodes', '-key',
207 $pveca_key_fn, '-out', $pveca_cert_fn, '-subj',
208 "/CN=Proxmox Virtual Environment/OU=$uuid_str/O=PVE Cluster Manager CA/"]);
209 };
210
211 die "generating pve root certificate failed:\n$@" if $@;
212
213 return 1;
214 }
215
216 sub gen_pve_ssl_key {
217 my ($nodename) = @_;
218
219 die "no node name specified" if !$nodename;
220
221 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
222
223 return if -f $pvessl_key_fn;
224
225 eval {
226 run_silent_cmd(['openssl', 'genrsa', '-out', $pvessl_key_fn, '2048']);
227 };
228
229 die "unable to generate pve ssl key for node '$nodename':\n$@" if $@;
230 }
231
232 sub gen_pve_www_key {
233
234 return if -f $pvewww_key_fn;
235
236 eval {
237 run_silent_cmd(['openssl', 'genrsa', '-out', $pvewww_key_fn, '2048']);
238 };
239
240 die "unable to generate pve www key:\n$@" if $@;
241 }
242
243 sub update_serial {
244 my ($serial) = @_;
245
246 PVE::Tools::file_set_contents($pveca_srl_fn, $serial);
247 }
248
249 sub gen_pve_ssl_cert {
250 my ($force, $nodename, $ip) = @_;
251
252 die "no node name specified" if !$nodename;
253 die "no IP specified" if !$ip;
254
255 my $pvessl_cert_fn = "$basedir/nodes/$nodename/pve-ssl.pem";
256
257 return if !$force && -f $pvessl_cert_fn;
258
259 my $names = "IP:127.0.0.1,IP:::1,DNS:localhost";
260
261 my $rc = PVE::INotify::read_file('resolvconf');
262
263 $names .= ",IP:$ip";
264
265 my $fqdn = $nodename;
266
267 $names .= ",DNS:$nodename";
268
269 if ($rc && $rc->{search}) {
270 $fqdn = $nodename . "." . $rc->{search};
271 $names .= ",DNS:$fqdn";
272 }
273
274 my $sslconf = <<__EOD;
275 RANDFILE = /root/.rnd
276 extensions = v3_req
277
278 [ req ]
279 default_bits = 2048
280 distinguished_name = req_distinguished_name
281 req_extensions = v3_req
282 prompt = no
283 string_mask = nombstr
284
285 [ req_distinguished_name ]
286 organizationalUnitName = PVE Cluster Node
287 organizationName = Proxmox Virtual Environment
288 commonName = $fqdn
289
290 [ v3_req ]
291 basicConstraints = CA:FALSE
292 extendedKeyUsage = serverAuth
293 subjectAltName = $names
294 __EOD
295
296 my $cfgfn = "/tmp/pvesslconf-$$.tmp";
297 my $fh = IO::File->new ($cfgfn, "w");
298 print $fh $sslconf;
299 close ($fh);
300
301 my $reqfn = "/tmp/pvecertreq-$$.tmp";
302 unlink $reqfn;
303
304 my $pvessl_key_fn = "$basedir/nodes/$nodename/pve-ssl.key";
305 eval {
306 run_silent_cmd(['openssl', 'req', '-batch', '-new', '-config', $cfgfn,
307 '-key', $pvessl_key_fn, '-out', $reqfn]);
308 };
309
310 if (my $err = $@) {
311 unlink $reqfn;
312 unlink $cfgfn;
313 die "unable to generate pve certificate request:\n$err";
314 }
315
316 update_serial("0000000000000000") if ! -f $pveca_srl_fn;
317
318 eval {
319 # wrap openssl with faketime to prevent bug #904
320 run_silent_cmd(['faketime', 'yesterday', 'openssl', 'x509', '-req',
321 '-in', $reqfn, '-days', '3650', '-out', $pvessl_cert_fn,
322 '-CAkey', $pveca_key_fn, '-CA', $pveca_cert_fn,
323 '-CAserial', $pveca_srl_fn, '-extfile', $cfgfn]);
324 };
325
326 if (my $err = $@) {
327 unlink $reqfn;
328 unlink $cfgfn;
329 die "unable to generate pve ssl certificate:\n$err";
330 }
331
332 unlink $cfgfn;
333 unlink $reqfn;
334 }
335
336 sub gen_pve_node_files {
337 my ($nodename, $ip, $opt_force) = @_;
338
339 gen_local_dirs($nodename);
340
341 gen_auth_key();
342
343 # make sure we have a (cluster wide) secret
344 # for CSRFR prevention
345 gen_pve_www_key();
346
347 # make sure we have a (per node) private key
348 gen_pve_ssl_key($nodename);
349
350 # make sure we have a CA
351 my $force = gen_pveca_cert();
352
353 $force = 1 if $opt_force;
354
355 gen_pve_ssl_cert($force, $nodename, $ip);
356 }
357
358 my $vzdump_cron_dummy = <<__EOD;
359 # cluster wide vzdump cron schedule
360 # Atomatically generated file - do not edit
361
362 PATH="/usr/sbin:/usr/bin:/sbin:/bin"
363
364 __EOD
365
366 sub gen_pve_vzdump_symlink {
367
368 my $filename = "/etc/pve/vzdump.cron";
369
370 my $link_fn = "/etc/cron.d/vzdump";
371
372 if ((-f $filename) && (! -l $link_fn)) {
373 rename($link_fn, "/root/etc_cron_vzdump.org"); # make backup if file exists
374 symlink($filename, $link_fn);
375 }
376 }
377
378 sub gen_pve_vzdump_files {
379
380 my $filename = "/etc/pve/vzdump.cron";
381
382 PVE::Tools::file_set_contents($filename, $vzdump_cron_dummy)
383 if ! -f $filename;
384
385 gen_pve_vzdump_symlink();
386 };
387
388 my $versions = {};
389 my $vmlist = {};
390 my $clinfo = {};
391
392 my $ipcc_send_rec = sub {
393 my ($msgid, $data) = @_;
394
395 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
396
397 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
398
399 return $res;
400 };
401
402 my $ipcc_send_rec_json = sub {
403 my ($msgid, $data) = @_;
404
405 my $res = PVE::IPCC::ipcc_send_rec($msgid, $data);
406
407 die "ipcc_send_rec[$msgid] failed: $!\n" if !defined($res) && ($! != 0);
408
409 return decode_json($res);
410 };
411
412 my $ipcc_get_config = sub {
413 my ($path) = @_;
414
415 my $bindata = pack "Z*", $path;
416 my $res = PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_CONFIG, $bindata);
417 if (!defined($res)) {
418 if ($! != 0) {
419 return undef if $! == ENOENT;
420 die "$!\n";
421 }
422 return '';
423 }
424
425 return $res;
426 };
427
428 my $ipcc_get_status = sub {
429 my ($name, $nodename) = @_;
430
431 my $bindata = pack "Z[256]Z[256]", $name, ($nodename || "");
432 return PVE::IPCC::ipcc_send_rec(CFS_IPC_GET_STATUS, $bindata);
433 };
434
435 my $ipcc_remove_status = sub {
436 my ($name) = @_;
437 # we just omit the data payload, pmxcfs takes this as hint and removes this
438 # key from the status hashtable
439 my $bindata = pack "Z[256]", $name;
440 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
441 };
442
443 my $ipcc_update_status = sub {
444 my ($name, $data) = @_;
445
446 my $raw = ref($data) ? encode_json($data) : $data;
447 # update status
448 my $bindata = pack "Z[256]Z*", $name, $raw;
449
450 return &$ipcc_send_rec(CFS_IPC_SET_STATUS, $bindata);
451 };
452
453 my $ipcc_log = sub {
454 my ($priority, $ident, $tag, $msg) = @_;
455
456 my $bindata = pack "CCCZ*Z*Z*", $priority, bytes::length($ident) + 1,
457 bytes::length($tag) + 1, $ident, $tag, $msg;
458
459 return &$ipcc_send_rec(CFS_IPC_LOG_CLUSTER_MSG, $bindata);
460 };
461
462 my $ipcc_get_cluster_log = sub {
463 my ($user, $max) = @_;
464
465 $max = 0 if !defined($max);
466
467 my $bindata = pack "VVVVZ*", $max, 0, 0, 0, ($user || "");
468 return &$ipcc_send_rec(CFS_IPC_GET_CLUSTER_LOG, $bindata);
469 };
470
471 my $ccache = {};
472
473 sub cfs_update {
474 my ($fail) = @_;
475 eval {
476 my $res = &$ipcc_send_rec_json(CFS_IPC_GET_FS_VERSION);
477 #warn "GOT1: " . Dumper($res);
478 die "no starttime\n" if !$res->{starttime};
479
480 if (!$res->{starttime} || !$versions->{starttime} ||
481 $res->{starttime} != $versions->{starttime}) {
482 #print "detected changed starttime\n";
483 $vmlist = {};
484 $clinfo = {};
485 $ccache = {};
486 }
487
488 $versions = $res;
489 };
490 my $err = $@;
491 if ($err) {
492 $versions = {};
493 $vmlist = {};
494 $clinfo = {};
495 $ccache = {};
496 die $err if $fail;
497 warn $err;
498 }
499
500 eval {
501 if (!$clinfo->{version} || $clinfo->{version} != $versions->{clinfo}) {
502 #warn "detected new clinfo\n";
503 $clinfo = &$ipcc_send_rec_json(CFS_IPC_GET_CLUSTER_INFO);
504 }
505 };
506 $err = $@;
507 if ($err) {
508 $clinfo = {};
509 die $err if $fail;
510 warn $err;
511 }
512
513 eval {
514 if (!$vmlist->{version} || $vmlist->{version} != $versions->{vmlist}) {
515 #warn "detected new vmlist1\n";
516 $vmlist = &$ipcc_send_rec_json(CFS_IPC_GET_GUEST_LIST);
517 }
518 };
519 $err = $@;
520 if ($err) {
521 $vmlist = {};
522 die $err if $fail;
523 warn $err;
524 }
525 }
526
527 sub get_vmlist {
528 return $vmlist;
529 }
530
531 sub get_clinfo {
532 return $clinfo;
533 }
534
535 sub get_members {
536 return $clinfo->{nodelist};
537 }
538
539 sub get_nodelist {
540 my $nodelist = $clinfo->{nodelist};
541
542 my $nodename = PVE::INotify::nodename();
543
544 if (!$nodelist || !$nodelist->{$nodename}) {
545 return [ $nodename ];
546 }
547
548 return [ keys %$nodelist ];
549 }
550
551 # only stored in a in-memory hashtable inside pmxcfs, local data is gone after
552 # a restart (of pmxcfs or the node), peer data is still available then
553 # best used for status data, like running (ceph) services, package versions, ...
554 sub broadcast_node_kv {
555 my ($key, $data) = @_;
556
557 if (!defined($data)) {
558 eval {
559 $ipcc_remove_status->("kv/$key");
560 };
561 } else {
562 die "cannot send a reference\n" if ref($data);
563 my $size = length($data);
564 die "data for '$key' too big\n" if $size >= (32 * 1024); # limit from pmxfs
565
566 eval {
567 $ipcc_update_status->("kv/$key", $data);
568 };
569 }
570
571 warn $@ if $@;
572 }
573
574 # nodename is optional
575 sub get_node_kv {
576 my ($key, $nodename) = @_;
577
578 my $res = {};
579 my $get_node_data = sub {
580 my ($node) = @_;
581 my $raw = $ipcc_get_status->("kv/$key", $node);
582 $res->{$node} = unpack("Z*", $raw) if $raw;
583 };
584
585 if ($nodename) {
586 $get_node_data->($nodename);
587 } else {
588 my $nodelist = get_nodelist();
589
590 foreach my $node (@$nodelist) {
591 $get_node_data->($node);
592 }
593 }
594
595 return $res;
596 }
597
598 # property: a config property you want to get, e.g., this is perfect to get
599 # the 'lock' entry of a guest _fast_ (>100 faster than manual parsing here)
600 # vmid: optipnal, if a valid is passed we only check that one, else return all
601 # NOTE: does *not* searches snapshot and PENDING entries sections!
602 sub get_guest_config_property {
603 my ($property, $vmid) = @_;
604
605 die "property is required" if !defined($property);
606
607 my $bindata = pack "VZ*", $vmid // 0, $property;
608 my $res = $ipcc_send_rec_json->(CFS_IPC_GET_GUEST_CONFIG_PROPERTY, $bindata);
609
610 return $res;
611 }
612
613 # $data must be a chronological descending ordered array of tasks
614 sub broadcast_tasklist {
615 my ($data) = @_;
616
617 # the serialized list may not get bigger than 32kb (CFS_MAX_STATUS_SIZE
618 # from pmxcfs) - drop older items until we satisfy this constraint
619 my $size = length(encode_json($data));
620 while ($size >= (32 * 1024)) {
621 pop @$data;
622 $size = length(encode_json($data));
623 }
624
625 eval {
626 &$ipcc_update_status("tasklist", $data);
627 };
628
629 warn $@ if $@;
630 }
631
632 my $tasklistcache = {};
633
634 sub get_tasklist {
635 my ($nodename) = @_;
636
637 my $kvstore = $versions->{kvstore} || {};
638
639 my $nodelist = get_nodelist();
640
641 my $res = [];
642 foreach my $node (@$nodelist) {
643 next if $nodename && ($nodename ne $node);
644 eval {
645 my $ver = $kvstore->{$node}->{tasklist} if $kvstore->{$node};
646 my $cd = $tasklistcache->{$node};
647 if (!$cd || !$ver || !$cd->{version} ||
648 ($cd->{version} != $ver)) {
649 my $raw = &$ipcc_get_status("tasklist", $node) || '[]';
650 my $data = decode_json($raw);
651 push @$res, @$data;
652 $cd = $tasklistcache->{$node} = {
653 data => $data,
654 version => $ver,
655 };
656 } elsif ($cd && $cd->{data}) {
657 push @$res, @{$cd->{data}};
658 }
659 };
660 my $err = $@;
661 syslog('err', $err) if $err;
662 }
663
664 return $res;
665 }
666
667 sub broadcast_rrd {
668 my ($rrdid, $data) = @_;
669
670 eval {
671 &$ipcc_update_status("rrd/$rrdid", $data);
672 };
673 my $err = $@;
674
675 warn $err if $err;
676 }
677
678 my $last_rrd_dump = 0;
679 my $last_rrd_data = "";
680
681 sub rrd_dump {
682
683 my $ctime = time();
684
685 my $diff = $ctime - $last_rrd_dump;
686 if ($diff < 2) {
687 return $last_rrd_data;
688 }
689
690 my $raw;
691 eval {
692 $raw = &$ipcc_send_rec(CFS_IPC_GET_RRD_DUMP);
693 };
694 my $err = $@;
695
696 if ($err) {
697 warn $err;
698 return {};
699 }
700
701 my $res = {};
702
703 if ($raw) {
704 while ($raw =~ s/^(.*)\n//) {
705 my ($key, @ela) = split(/:/, $1);
706 next if !$key;
707 next if !(scalar(@ela) > 1);
708 $res->{$key} = [ map { $_ eq 'U' ? undef : $_ } @ela ];
709 }
710 }
711
712 $last_rrd_dump = $ctime;
713 $last_rrd_data = $res;
714
715 return $res;
716 }
717
718 sub create_rrd_data {
719 my ($rrdname, $timeframe, $cf) = @_;
720
721 my $rrddir = "/var/lib/rrdcached/db";
722
723 my $rrd = "$rrddir/$rrdname";
724
725 my $setup = {
726 hour => [ 60, 70 ],
727 day => [ 60*30, 70 ],
728 week => [ 60*180, 70 ],
729 month => [ 60*720, 70 ],
730 year => [ 60*10080, 70 ],
731 };
732
733 my ($reso, $count) = @{$setup->{$timeframe}};
734 my $ctime = $reso*int(time()/$reso);
735 my $req_start = $ctime - $reso*$count;
736
737 $cf = "AVERAGE" if !$cf;
738
739 my @args = (
740 "-s" => $req_start,
741 "-e" => $ctime - 1,
742 "-r" => $reso,
743 );
744
745 my $socket = "/var/run/rrdcached.sock";
746 push @args, "--daemon" => "unix:$socket" if -S $socket;
747
748 my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args);
749
750 my $err = RRDs::error;
751 die "RRD error: $err\n" if $err;
752
753 die "got wrong time resolution ($step != $reso)\n"
754 if $step != $reso;
755
756 my $res = [];
757 my $fields = scalar(@$names);
758 for my $line (@$data) {
759 my $entry = { 'time' => $start };
760 $start += $step;
761 for (my $i = 0; $i < $fields; $i++) {
762 my $name = $names->[$i];
763 if (defined(my $val = $line->[$i])) {
764 $entry->{$name} = $val;
765 } else {
766 # leave empty fields undefined
767 # maybe make this configurable?
768 }
769 }
770 push @$res, $entry;
771 }
772
773 return $res;
774 }
775
776 sub create_rrd_graph {
777 my ($rrdname, $timeframe, $ds, $cf) = @_;
778
779 # Using RRD graph is clumsy - maybe it
780 # is better to simply fetch the data, and do all display
781 # related things with javascript (new extjs html5 graph library).
782
783 my $rrddir = "/var/lib/rrdcached/db";
784
785 my $rrd = "$rrddir/$rrdname";
786
787 my @ids = PVE::Tools::split_list($ds);
788
789 my $ds_txt = join('_', @ids);
790
791 my $filename = "${rrd}_${ds_txt}.png";
792
793 my $setup = {
794 hour => [ 60, 60 ],
795 day => [ 60*30, 70 ],
796 week => [ 60*180, 70 ],
797 month => [ 60*720, 70 ],
798 year => [ 60*10080, 70 ],
799 };
800
801 my ($reso, $count) = @{$setup->{$timeframe}};
802
803 my @args = (
804 "--imgformat" => "PNG",
805 "--border" => 0,
806 "--height" => 200,
807 "--width" => 800,
808 "--start" => - $reso*$count,
809 "--end" => 'now' ,
810 "--lower-limit" => 0,
811 );
812
813 my $socket = "/var/run/rrdcached.sock";
814 push @args, "--daemon" => "unix:$socket" if -S $socket;
815
816 my @coldef = ('#00ddff', '#ff0000');
817
818 $cf = "AVERAGE" if !$cf;
819
820 my $i = 0;
821 foreach my $id (@ids) {
822 my $col = $coldef[$i++] || die "fixme: no color definition";
823 push @args, "DEF:${id}=$rrd:${id}:$cf";
824 my $dataid = $id;
825 if ($id eq 'cpu' || $id eq 'iowait') {
826 push @args, "CDEF:${id}_per=${id},100,*";
827 $dataid = "${id}_per";
828 }
829 push @args, "LINE2:${dataid}${col}:${id}";
830 }
831
832 push @args, '--full-size-mode';
833
834 # we do not really store data into the file
835 my $res = RRDs::graphv('-', @args);
836
837 my $err = RRDs::error;
838 die "RRD error: $err\n" if $err;
839
840 return { filename => $filename, image => $res->{image} };
841 }
842
843 # a fast way to read files (avoid fuse overhead)
844 sub get_config {
845 my ($path) = @_;
846
847 return &$ipcc_get_config($path);
848 }
849
850 sub get_cluster_log {
851 my ($user, $max) = @_;
852
853 return &$ipcc_get_cluster_log($user, $max);
854 }
855
856 my $file_info = {};
857
858 sub cfs_register_file {
859 my ($filename, $parser, $writer) = @_;
860
861 $observed->{$filename} || die "unknown file '$filename'";
862
863 die "file '$filename' already registered" if $file_info->{$filename};
864
865 $file_info->{$filename} = {
866 parser => $parser,
867 writer => $writer,
868 };
869 }
870
871 my $ccache_read = sub {
872 my ($filename, $parser, $version) = @_;
873
874 $ccache->{$filename} = {} if !$ccache->{$filename};
875
876 my $ci = $ccache->{$filename};
877
878 if (!$ci->{version} || !$version || $ci->{version} != $version) {
879 # we always call the parser, even when the file does not exists
880 # (in that case $data is undef)
881 my $data = get_config($filename);
882 $ci->{data} = &$parser("/etc/pve/$filename", $data);
883 $ci->{version} = $version;
884 }
885
886 my $res = ref($ci->{data}) ? dclone($ci->{data}) : $ci->{data};
887
888 return $res;
889 };
890
891 sub cfs_file_version {
892 my ($filename) = @_;
893
894 my $version;
895 my $infotag;
896 if ($filename =~ m!^nodes/[^/]+/(openvz|lxc|qemu-server)/(\d+)\.conf$!) {
897 my ($type, $vmid) = ($1, $2);
898 if ($vmlist && $vmlist->{ids} && $vmlist->{ids}->{$vmid}) {
899 $version = $vmlist->{ids}->{$vmid}->{version};
900 }
901 $infotag = "/$type/";
902 } else {
903 $infotag = $filename;
904 $version = $versions->{$filename};
905 }
906
907 my $info = $file_info->{$infotag} ||
908 die "unknown file type '$filename'\n";
909
910 return wantarray ? ($version, $info) : $version;
911 }
912
913 sub cfs_read_file {
914 my ($filename) = @_;
915
916 my ($version, $info) = cfs_file_version($filename);
917 my $parser = $info->{parser};
918
919 return &$ccache_read($filename, $parser, $version);
920 }
921
922 sub cfs_write_file {
923 my ($filename, $data) = @_;
924
925 my ($version, $info) = cfs_file_version($filename);
926
927 my $writer = $info->{writer} || die "no writer defined";
928
929 my $fsname = "/etc/pve/$filename";
930
931 my $raw = &$writer($fsname, $data);
932
933 if (my $ci = $ccache->{$filename}) {
934 $ci->{version} = undef;
935 }
936
937 PVE::Tools::file_set_contents($fsname, $raw);
938 }
939
940 my $cfs_lock = sub {
941 my ($lockid, $timeout, $code, @param) = @_;
942
943 my $prev_alarm = alarm(0); # suspend outer alarm early
944
945 my $res;
946 my $got_lock = 0;
947
948 # this timeout is for acquire the lock
949 $timeout = 10 if !$timeout;
950
951 my $filename = "$lockdir/$lockid";
952
953 eval {
954
955 mkdir $lockdir;
956
957 if (! -d $lockdir) {
958 die "pve cluster filesystem not online.\n";
959 }
960
961 my $timeout_err = sub { die "got lock request timeout\n"; };
962 local $SIG{ALRM} = $timeout_err;
963
964 while (1) {
965 alarm ($timeout);
966 $got_lock = mkdir($filename);
967 $timeout = alarm(0) - 1; # we'll sleep for 1s, see down below
968
969 last if $got_lock;
970
971 $timeout_err->() if $timeout <= 0;
972
973 print STDERR "trying to acquire cfs lock '$lockid' ...\n";
974 utime (0, 0, $filename); # cfs unlock request
975 sleep(1);
976 }
977
978 # fixed command timeout: cfs locks have a timeout of 120
979 # using 60 gives us another 60 seconds to abort the task
980 local $SIG{ALRM} = sub { die "got lock timeout - aborting command\n"; };
981 alarm(60);
982
983 cfs_update(); # make sure we read latest versions inside code()
984
985 $res = &$code(@param);
986
987 alarm(0);
988 };
989
990 my $err = $@;
991
992 $err = "no quorum!\n" if !$got_lock && !check_cfs_quorum(1);
993
994 rmdir $filename if $got_lock; # if we held the lock always unlock again
995
996 alarm($prev_alarm);
997
998 if ($err) {
999 $@ = "error with cfs lock '$lockid': $err";
1000 return undef;
1001 }
1002
1003 $@ = undef;
1004
1005 return $res;
1006 };
1007
1008 sub cfs_lock_file {
1009 my ($filename, $timeout, $code, @param) = @_;
1010
1011 my $info = $observed->{$filename} || die "unknown file '$filename'";
1012
1013 my $lockid = "file-$filename";
1014 $lockid =~ s/[.\/]/_/g;
1015
1016 &$cfs_lock($lockid, $timeout, $code, @param);
1017 }
1018
1019 sub cfs_lock_storage {
1020 my ($storeid, $timeout, $code, @param) = @_;
1021
1022 my $lockid = "storage-$storeid";
1023
1024 &$cfs_lock($lockid, $timeout, $code, @param);
1025 }
1026
1027 sub cfs_lock_domain {
1028 my ($domainname, $timeout, $code, @param) = @_;
1029
1030 my $lockid = "domain-$domainname";
1031
1032 &$cfs_lock($lockid, $timeout, $code, @param);
1033 }
1034
1035 sub cfs_lock_acme {
1036 my ($account, $timeout, $code, @param) = @_;
1037
1038 my $lockid = "acme-$account";
1039
1040 &$cfs_lock($lockid, $timeout, $code, @param);
1041 }
1042
1043 sub cfs_lock_authkey {
1044 my ($timeout, $code, @param) = @_;
1045
1046 $cfs_lock->('authkey', $timeout, $code, @param);
1047 }
1048
1049 my $log_levels = {
1050 "emerg" => 0,
1051 "alert" => 1,
1052 "crit" => 2,
1053 "critical" => 2,
1054 "err" => 3,
1055 "error" => 3,
1056 "warn" => 4,
1057 "warning" => 4,
1058 "notice" => 5,
1059 "info" => 6,
1060 "debug" => 7,
1061 };
1062
1063 sub log_msg {
1064 my ($priority, $ident, $msg) = @_;
1065
1066 if (my $tmp = $log_levels->{$priority}) {
1067 $priority = $tmp;
1068 }
1069
1070 die "need numeric log priority" if $priority !~ /^\d+$/;
1071
1072 my $tag = PVE::SafeSyslog::tag();
1073
1074 $msg = "empty message" if !$msg;
1075
1076 $ident = "" if !$ident;
1077 $ident = encode("ascii", $ident,
1078 sub { sprintf "\\u%04x", shift });
1079
1080 my $ascii = encode("ascii", $msg, sub { sprintf "\\u%04x", shift });
1081
1082 if ($ident) {
1083 syslog($priority, "<%s> %s", $ident, $ascii);
1084 } else {
1085 syslog($priority, "%s", $ascii);
1086 }
1087
1088 eval { &$ipcc_log($priority, $ident, $tag, $ascii); };
1089
1090 syslog("err", "writing cluster log failed: $@") if $@;
1091 }
1092
1093 sub check_vmid_unused {
1094 my ($vmid, $noerr) = @_;
1095
1096 my $vmlist = get_vmlist();
1097
1098 my $d = $vmlist->{ids}->{$vmid};
1099 return 1 if !defined($d);
1100
1101 return undef if $noerr;
1102
1103 my $vmtypestr = $d->{type} eq 'qemu' ? 'VM' : 'CT';
1104 die "$vmtypestr $vmid already exists on node '$d->{node}'\n";
1105 }
1106
1107 sub check_node_exists {
1108 my ($nodename, $noerr) = @_;
1109
1110 my $nodelist = $clinfo->{nodelist};
1111 return 1 if $nodelist && $nodelist->{$nodename};
1112
1113 return undef if $noerr;
1114
1115 die "no such cluster node '$nodename'\n";
1116 }
1117
1118 # this is also used to get the IP of the local node
1119 sub remote_node_ip {
1120 my ($nodename, $noerr) = @_;
1121
1122 my $nodelist = $clinfo->{nodelist};
1123 if ($nodelist && $nodelist->{$nodename}) {
1124 if (my $ip = $nodelist->{$nodename}->{ip}) {
1125 return $ip if !wantarray;
1126 my $family = $nodelist->{$nodename}->{address_family};
1127 if (!$family) {
1128 $nodelist->{$nodename}->{address_family} =
1129 $family =
1130 PVE::Tools::get_host_address_family($ip);
1131 }
1132 return wantarray ? ($ip, $family) : $ip;
1133 }
1134 }
1135
1136 # fallback: try to get IP by other means
1137 return PVE::Network::get_ip_from_hostname($nodename, $noerr);
1138 }
1139
1140 sub get_local_migration_ip {
1141 my ($migration_network, $noerr) = @_;
1142
1143 my $cidr = $migration_network;
1144
1145 if (!defined($cidr)) {
1146 my $dc_conf = cfs_read_file('datacenter.cfg');
1147 $cidr = $dc_conf->{migration}->{network}
1148 if defined($dc_conf->{migration}->{network});
1149 }
1150
1151 if (defined($cidr)) {
1152 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
1153
1154 die "could not get migration ip: no IP address configured on local " .
1155 "node for network '$cidr'\n" if !$noerr && (scalar(@$ips) == 0);
1156
1157 die "could not get migration ip: multiple IP address configured for " .
1158 "network '$cidr'\n" if !$noerr && (scalar(@$ips) > 1);
1159
1160 return @$ips[0];
1161 }
1162
1163 return undef;
1164 };
1165
1166 # ssh related utility functions
1167
1168 sub ssh_merge_keys {
1169 # remove duplicate keys in $sshauthkeys
1170 # ssh-copy-id simply add keys, so the file can grow to large
1171
1172 my $data = '';
1173 if (-f $sshauthkeys) {
1174 $data = PVE::Tools::file_get_contents($sshauthkeys, 128*1024);
1175 chomp($data);
1176 }
1177
1178 my $found_backup;
1179 if (-f $rootsshauthkeysbackup) {
1180 $data .= "\n";
1181 $data .= PVE::Tools::file_get_contents($rootsshauthkeysbackup, 128*1024);
1182 chomp($data);
1183 $found_backup = 1;
1184 }
1185
1186 # always add ourself
1187 if (-f $ssh_rsa_id) {
1188 my $pub = PVE::Tools::file_get_contents($ssh_rsa_id);
1189 chomp($pub);
1190 $data .= "\n$pub\n";
1191 }
1192
1193 my $newdata = "";
1194 my $vhash = {};
1195 my @lines = split(/\n/, $data);
1196 foreach my $line (@lines) {
1197 if ($line !~ /^#/ && $line =~ m/(^|\s)ssh-(rsa|dsa)\s+(\S+)\s+\S+$/) {
1198 next if $vhash->{$3}++;
1199 }
1200 $newdata .= "$line\n";
1201 }
1202
1203 PVE::Tools::file_set_contents($sshauthkeys, $newdata, 0600);
1204
1205 if ($found_backup && -l $rootsshauthkeys) {
1206 # everything went well, so we can remove the backup
1207 unlink $rootsshauthkeysbackup;
1208 }
1209 }
1210
1211 sub setup_sshd_config {
1212 my () = @_;
1213
1214 my $conf = PVE::Tools::file_get_contents($sshd_config_fn);
1215
1216 return if $conf =~ m/^PermitRootLogin\s+yes\s*$/m;
1217
1218 if ($conf !~ s/^#?PermitRootLogin.*$/PermitRootLogin yes/m) {
1219 chomp $conf;
1220 $conf .= "\nPermitRootLogin yes\n";
1221 }
1222
1223 PVE::Tools::file_set_contents($sshd_config_fn, $conf);
1224
1225 PVE::Tools::run_command(['systemctl', 'reload-or-restart', 'sshd']);
1226 }
1227
1228 sub setup_rootsshconfig {
1229
1230 # create ssh key if it does not exist
1231 if (! -f $ssh_rsa_id) {
1232 mkdir '/root/.ssh/';
1233 system ("echo|ssh-keygen -t rsa -N '' -b 2048 -f ${ssh_rsa_id_priv}");
1234 }
1235
1236 # create ssh config if it does not exist
1237 if (! -f $rootsshconfig) {
1238 mkdir '/root/.ssh';
1239 if (my $fh = IO::File->new($rootsshconfig, O_CREAT|O_WRONLY|O_EXCL, 0640)) {
1240 # this is the default ciphers list from Debian's OpenSSH package (OpenSSH_7.4p1 Debian-10, OpenSSL 1.0.2k 26 Jan 2017)
1241 # changed order to put AES before Chacha20 (most hardware has AESNI)
1242 print $fh "Ciphers aes128-ctr,aes192-ctr,aes256-ctr,aes128-gcm\@openssh.com,aes256-gcm\@openssh.com,chacha20-poly1305\@openssh.com\n";
1243 close($fh);
1244 }
1245 }
1246 }
1247
1248 sub setup_ssh_keys {
1249
1250 mkdir $authdir;
1251
1252 my $import_ok;
1253
1254 if (! -f $sshauthkeys) {
1255 my $old;
1256 if (-f $rootsshauthkeys) {
1257 $old = PVE::Tools::file_get_contents($rootsshauthkeys, 128*1024);
1258 }
1259 if (my $fh = IO::File->new ($sshauthkeys, O_CREAT|O_WRONLY|O_EXCL, 0400)) {
1260 PVE::Tools::safe_print($sshauthkeys, $fh, $old) if $old;
1261 close($fh);
1262 $import_ok = 1;
1263 }
1264 }
1265
1266 warn "can't create shared ssh key database '$sshauthkeys'\n"
1267 if ! -f $sshauthkeys;
1268
1269 if (-f $rootsshauthkeys && ! -l $rootsshauthkeys) {
1270 if (!rename($rootsshauthkeys , $rootsshauthkeysbackup)) {
1271 warn "rename $rootsshauthkeys failed - $!\n";
1272 }
1273 }
1274
1275 if (! -l $rootsshauthkeys) {
1276 symlink $sshauthkeys, $rootsshauthkeys;
1277 }
1278
1279 if (! -l $rootsshauthkeys) {
1280 warn "can't create symlink for ssh keys '$rootsshauthkeys' -> '$sshauthkeys'\n";
1281 } else {
1282 unlink $rootsshauthkeysbackup if $import_ok;
1283 }
1284 }
1285
1286 sub ssh_unmerge_known_hosts {
1287 return if ! -l $sshglobalknownhosts;
1288
1289 my $old = '';
1290 $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024)
1291 if -f $sshknownhosts;
1292
1293 PVE::Tools::file_set_contents($sshglobalknownhosts, $old);
1294 }
1295
1296 sub ssh_merge_known_hosts {
1297 my ($nodename, $ip_address, $createLink) = @_;
1298
1299 die "no node name specified" if !$nodename;
1300 die "no ip address specified" if !$ip_address;
1301
1302 # ssh lowercases hostnames (aliases) before comparision, so we need too
1303 $nodename = lc($nodename);
1304 $ip_address = lc($ip_address);
1305
1306 mkdir $authdir;
1307
1308 if (! -f $sshknownhosts) {
1309 if (my $fh = IO::File->new($sshknownhosts, O_CREAT|O_WRONLY|O_EXCL, 0600)) {
1310 close($fh);
1311 }
1312 }
1313
1314 my $old = PVE::Tools::file_get_contents($sshknownhosts, 128*1024);
1315
1316 my $new = '';
1317
1318 if ((! -l $sshglobalknownhosts) && (-f $sshglobalknownhosts)) {
1319 $new = PVE::Tools::file_get_contents($sshglobalknownhosts, 128*1024);
1320 }
1321
1322 my $hostkey = PVE::Tools::file_get_contents($ssh_host_rsa_id);
1323 # Note: file sometimes containe emty lines at start, so we use multiline match
1324 die "can't parse $ssh_host_rsa_id" if $hostkey !~ m/^(ssh-rsa\s\S+)(\s.*)?$/m;
1325 $hostkey = $1;
1326
1327 my $data = '';
1328 my $vhash = {};
1329
1330 my $found_nodename;
1331 my $found_local_ip;
1332
1333 my $merge_line = sub {
1334 my ($line, $all) = @_;
1335
1336 return if $line =~ m/^\s*$/; # skip empty lines
1337 return if $line =~ m/^#/; # skip comments
1338
1339 if ($line =~ m/^(\S+)\s(ssh-rsa\s\S+)(\s.*)?$/) {
1340 my $key = $1;
1341 my $rsakey = $2;
1342 if (!$vhash->{$key}) {
1343 $vhash->{$key} = 1;
1344 if ($key =~ m/\|1\|([^\|\s]+)\|([^\|\s]+)$/) {
1345 my $salt = decode_base64($1);
1346 my $digest = $2;
1347 my $hmac = Digest::HMAC_SHA1->new($salt);
1348 $hmac->add($nodename);
1349 my $hd = $hmac->b64digest . '=';
1350 if ($digest eq $hd) {
1351 if ($rsakey eq $hostkey) {
1352 $found_nodename = 1;
1353 $data .= $line;
1354 }
1355 return;
1356 }
1357 $hmac = Digest::HMAC_SHA1->new($salt);
1358 $hmac->add($ip_address);
1359 $hd = $hmac->b64digest . '=';
1360 if ($digest eq $hd) {
1361 if ($rsakey eq $hostkey) {
1362 $found_local_ip = 1;
1363 $data .= $line;
1364 }
1365 return;
1366 }
1367 } else {
1368 $key = lc($key); # avoid duplicate entries, ssh compares lowercased
1369 if ($key eq $ip_address) {
1370 $found_local_ip = 1 if $rsakey eq $hostkey;
1371 } elsif ($key eq $nodename) {
1372 $found_nodename = 1 if $rsakey eq $hostkey;
1373 }
1374 }
1375 $data .= $line;
1376 }
1377 } elsif ($all) {
1378 $data .= $line;
1379 }
1380 };
1381
1382 while ($old && $old =~ s/^((.*?)(\n|$))//) {
1383 my $line = "$2\n";
1384 &$merge_line($line, 1);
1385 }
1386
1387 while ($new && $new =~ s/^((.*?)(\n|$))//) {
1388 my $line = "$2\n";
1389 &$merge_line($line);
1390 }
1391
1392 # add our own key if not already there
1393 $data .= "$nodename $hostkey\n" if !$found_nodename;
1394 $data .= "$ip_address $hostkey\n" if !$found_local_ip;
1395
1396 PVE::Tools::file_set_contents($sshknownhosts, $data);
1397
1398 return if !$createLink;
1399
1400 unlink $sshglobalknownhosts;
1401 symlink $sshknownhosts, $sshglobalknownhosts;
1402
1403 warn "can't create symlink for ssh known hosts '$sshglobalknownhosts' -> '$sshknownhosts'\n"
1404 if ! -l $sshglobalknownhosts;
1405
1406 }
1407
1408 my $migration_format = {
1409 type => {
1410 default_key => 1,
1411 type => 'string',
1412 enum => ['secure', 'insecure'],
1413 description => "Migration traffic is encrypted using an SSH tunnel by " .
1414 "default. On secure, completely private networks this can be " .
1415 "disabled to increase performance.",
1416 default => 'secure',
1417 },
1418 network => {
1419 optional => 1,
1420 type => 'string', format => 'CIDR',
1421 format_description => 'CIDR',
1422 description => "CIDR of the (sub) network that is used for migration."
1423 },
1424 };
1425
1426 my $ha_format = {
1427 shutdown_policy => {
1428 type => 'string',
1429 enum => ['freeze', 'failover', 'conditional'],
1430 description => "The policy for HA services on node shutdown. 'freeze' disables auto-recovery, 'failover' ensures recovery, 'conditional' recovers on poweroff and freezes on reboot. Running HA Services will always get stopped first on shutdown.",
1431 verbose_description => "Describes the policy for handling HA services on poweroff or reboot of a node. Freeze will always freeze services which are still located on the node on shutdown, those services won't be recovered by the HA manager. Failover will not mark the services as frozen and thus the services will get recovered to other nodes, if the shutdown node does not come up again quickly (< 1min). 'conditional' chooses automatically depending on the type of shutdown, i.e., on a reboot the service will be frozen but on a poweroff the service will stay as is, and thus get recovered after about 2 minutes.",
1432 default => 'conditional',
1433 }
1434 };
1435
1436 PVE::JSONSchema::register_format('mac-prefix', \&pve_verify_mac_prefix);
1437 sub pve_verify_mac_prefix {
1438 my ($mac_prefix, $noerr) = @_;
1439
1440 if ($mac_prefix !~ m/^[a-f0-9][02468ace](?::[a-f0-9]{2}){0,2}:?$/i) {
1441 return undef if $noerr;
1442 die "value is not a valid unicast MAC address prefix\n";
1443 }
1444 return $mac_prefix;
1445 }
1446
1447 our $u2f_format = {
1448 appid => {
1449 type => 'string',
1450 description => "U2F AppId URL override. Defaults to the origin.",
1451 format_description => 'APPID',
1452 optional => 1,
1453 },
1454 origin => {
1455 type => 'string',
1456 description => "U2F Origin override. Mostly useful for single nodes with a single URL.",
1457 format_description => 'URL',
1458 optional => 1,
1459 },
1460 };
1461
1462 my $datacenter_schema = {
1463 type => "object",
1464 additionalProperties => 0,
1465 properties => {
1466 keyboard => {
1467 optional => 1,
1468 type => 'string',
1469 description => "Default keybord layout for vnc server.",
1470 enum => PVE::Tools::kvmkeymaplist(),
1471 },
1472 language => {
1473 optional => 1,
1474 type => 'string',
1475 description => "Default GUI language.",
1476 enum => [
1477 'zh_CN',
1478 'zh_TW',
1479 'ca',
1480 'en',
1481 'eu',
1482 'fr',
1483 'de',
1484 'it',
1485 'es',
1486 'ja',
1487 'nb',
1488 'nn',
1489 'fa',
1490 'pl',
1491 'pt_BR',
1492 'ru',
1493 'sl',
1494 'sv',
1495 'tr',
1496 ],
1497 },
1498 http_proxy => {
1499 optional => 1,
1500 type => 'string',
1501 description => "Specify external http proxy which is used for downloads (example: 'http://username:password\@host:port/')",
1502 pattern => "http://.*",
1503 },
1504 migration_unsecure => {
1505 optional => 1,
1506 type => 'boolean',
1507 description => "Migration is secure using SSH tunnel by default. " .
1508 "For secure private networks you can disable it to speed up " .
1509 "migration. Deprecated, use the 'migration' property instead!",
1510 },
1511 migration => {
1512 optional => 1,
1513 type => 'string', format => $migration_format,
1514 description => "For cluster wide migration settings.",
1515 },
1516 console => {
1517 optional => 1,
1518 type => 'string',
1519 description => "Select the default Console viewer. You can either use the builtin java applet (VNC; deprecated and maps to html5), an external virt-viewer comtatible application (SPICE), an HTML5 based vnc viewer (noVNC), or an HTML5 based console client (xtermjs). If the selected viewer is not available (e.g. SPICE not activated for the VM), the fallback is noVNC.",
1520 enum => ['applet', 'vv', 'html5', 'xtermjs'],
1521 },
1522 email_from => {
1523 optional => 1,
1524 type => 'string',
1525 format => 'email-opt',
1526 description => "Specify email address to send notification from (default is root@\$hostname)",
1527 },
1528 max_workers => {
1529 optional => 1,
1530 type => 'integer',
1531 minimum => 1,
1532 description => "Defines how many workers (per node) are maximal started ".
1533 " on actions like 'stopall VMs' or task from the ha-manager.",
1534 },
1535 fencing => {
1536 optional => 1,
1537 type => 'string',
1538 default => 'watchdog',
1539 enum => [ 'watchdog', 'hardware', 'both' ],
1540 description => "Set the fencing mode of the HA cluster. Hardware mode " .
1541 "needs a valid configuration of fence devices in /etc/pve/ha/fence.cfg." .
1542 " With both all two modes are used." .
1543 "\n\nWARNING: 'hardware' and 'both' are EXPERIMENTAL & WIP",
1544 },
1545 ha => {
1546 optional => 1,
1547 type => 'string', format => $ha_format,
1548 description => "Cluster wide HA settings.",
1549 },
1550 mac_prefix => {
1551 optional => 1,
1552 type => 'string',
1553 format => 'mac-prefix',
1554 description => 'Prefix for autogenerated MAC addresses.',
1555 },
1556 bwlimit => PVE::JSONSchema::get_standard_option('bwlimit'),
1557 u2f => {
1558 optional => 1,
1559 type => 'string',
1560 format => $u2f_format,
1561 description => 'u2f',
1562 },
1563 },
1564 };
1565
1566 # make schema accessible from outside (for documentation)
1567 sub get_datacenter_schema { return $datacenter_schema };
1568
1569 sub parse_datacenter_config {
1570 my ($filename, $raw) = @_;
1571
1572 my $res = PVE::JSONSchema::parse_config($datacenter_schema, $filename, $raw // '');
1573
1574 if (my $migration = $res->{migration}) {
1575 $res->{migration} = PVE::JSONSchema::parse_property_string($migration_format, $migration);
1576 }
1577
1578 if (my $ha = $res->{ha}) {
1579 $res->{ha} = PVE::JSONSchema::parse_property_string($ha_format, $ha);
1580 }
1581
1582 # for backwards compatibility only, new migration property has precedence
1583 if (defined($res->{migration_unsecure})) {
1584 if (defined($res->{migration}->{type})) {
1585 warn "deprecated setting 'migration_unsecure' and new 'migration: type' " .
1586 "set at same time! Ignore 'migration_unsecure'\n";
1587 } else {
1588 $res->{migration}->{type} = ($res->{migration_unsecure}) ? 'insecure' : 'secure';
1589 }
1590 }
1591
1592 # for backwards compatibility only, applet maps to html5
1593 if (defined($res->{console}) && $res->{console} eq 'applet') {
1594 $res->{console} = 'html5';
1595 }
1596
1597 return $res;
1598 }
1599
1600 sub write_datacenter_config {
1601 my ($filename, $cfg) = @_;
1602
1603 # map deprecated setting to new one
1604 if (defined($cfg->{migration_unsecure}) && !defined($cfg->{migration})) {
1605 my $migration_unsecure = delete $cfg->{migration_unsecure};
1606 $cfg->{migration}->{type} = ($migration_unsecure) ? 'insecure' : 'secure';
1607 }
1608
1609 # map deprecated applet setting to html5
1610 if (defined($cfg->{console}) && $cfg->{console} eq 'applet') {
1611 $cfg->{console} = 'html5';
1612 }
1613
1614 if (ref($cfg->{migration})) {
1615 my $migration = $cfg->{migration};
1616 $cfg->{migration} = PVE::JSONSchema::print_property_string($migration, $migration_format);
1617 }
1618
1619 if (ref($cfg->{ha})) {
1620 my $ha = $cfg->{ha};
1621 $cfg->{ha} = PVE::JSONSchema::print_property_string($ha, $ha_format);
1622 }
1623
1624 return PVE::JSONSchema::dump_config($datacenter_schema, $filename, $cfg);
1625 }
1626
1627 cfs_register_file('datacenter.cfg',
1628 \&parse_datacenter_config,
1629 \&write_datacenter_config);
1630
1631 # X509 Certificate cache helper
1632
1633 my $cert_cache_nodes = {};
1634 my $cert_cache_timestamp = time();
1635 my $cert_cache_fingerprints = {};
1636
1637 sub update_cert_cache {
1638 my ($update_node, $clear) = @_;
1639
1640 syslog('info', "Clearing outdated entries from certificate cache")
1641 if $clear;
1642
1643 $cert_cache_timestamp = time() if !defined($update_node);
1644
1645 my $node_list = defined($update_node) ?
1646 [ $update_node ] : [ keys %$cert_cache_nodes ];
1647
1648 foreach my $node (@$node_list) {
1649 my $clear_old = sub {
1650 if (my $old_fp = $cert_cache_nodes->{$node}) {
1651 # distrust old fingerprint
1652 delete $cert_cache_fingerprints->{$old_fp};
1653 # ensure reload on next proxied request
1654 delete $cert_cache_nodes->{$node};
1655 }
1656 };
1657
1658 my $fp = eval { get_node_fingerprint($node) };
1659 if (my $err = $@) {
1660 warn "$err\n";
1661 &$clear_old() if $clear;
1662 next;
1663 }
1664
1665 my $old_fp = $cert_cache_nodes->{$node};
1666 $cert_cache_fingerprints->{$fp} = 1;
1667 $cert_cache_nodes->{$node} = $fp;
1668
1669 if (defined($old_fp) && $fp ne $old_fp) {
1670 delete $cert_cache_fingerprints->{$old_fp};
1671 }
1672 }
1673 }
1674
1675 # load and cache cert fingerprint once
1676 sub initialize_cert_cache {
1677 my ($node) = @_;
1678
1679 update_cert_cache($node)
1680 if defined($node) && !defined($cert_cache_nodes->{$node});
1681 }
1682
1683 sub read_ssl_cert_fingerprint {
1684 my ($cert_path) = @_;
1685
1686 my $bio = Net::SSLeay::BIO_new_file($cert_path, 'r')
1687 or die "unable to read '$cert_path' - $!\n";
1688
1689 my $cert = Net::SSLeay::PEM_read_bio_X509($bio);
1690 Net::SSLeay::BIO_free($bio);
1691
1692 die "unable to read certificate from '$cert_path'\n" if !$cert;
1693
1694 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1695 Net::SSLeay::X509_free($cert);
1696
1697 die "unable to get fingerprint for '$cert_path' - got empty value\n"
1698 if !defined($fp) || $fp eq '';
1699
1700 return $fp;
1701 }
1702
1703 sub get_node_fingerprint {
1704 my ($node) = @_;
1705
1706 my $cert_path = "/etc/pve/nodes/$node/pve-ssl.pem";
1707 my $custom_cert_path = "/etc/pve/nodes/$node/pveproxy-ssl.pem";
1708
1709 $cert_path = $custom_cert_path if -f $custom_cert_path;
1710
1711 return read_ssl_cert_fingerprint($cert_path);
1712 }
1713
1714
1715 sub check_cert_fingerprint {
1716 my ($cert) = @_;
1717
1718 # clear cache every 30 minutes at least
1719 update_cert_cache(undef, 1) if time() - $cert_cache_timestamp >= 60*30;
1720
1721 # get fingerprint of server certificate
1722 my $fp = Net::SSLeay::X509_get_fingerprint($cert, 'sha256');
1723 return 0 if !defined($fp) || $fp eq ''; # error
1724
1725 my $check = sub {
1726 for my $expected (keys %$cert_cache_fingerprints) {
1727 return 1 if $fp eq $expected;
1728 }
1729 return 0;
1730 };
1731
1732 return 1 if &$check();
1733
1734 # clear cache and retry at most once every minute
1735 if (time() - $cert_cache_timestamp >= 60) {
1736 syslog ('info', "Could not verify remote node certificate '$fp' with list of pinned certificates, refreshing cache");
1737 update_cert_cache();
1738 return &$check();
1739 }
1740
1741 return 0;
1742 }
1743
1744 # bash completion helpers
1745
1746 sub complete_next_vmid {
1747
1748 my $vmlist = get_vmlist() || {};
1749 my $idlist = $vmlist->{ids} || {};
1750
1751 for (my $i = 100; $i < 10000; $i++) {
1752 return [$i] if !defined($idlist->{$i});
1753 }
1754
1755 return [];
1756 }
1757
1758 sub complete_vmid {
1759
1760 my $vmlist = get_vmlist();
1761 my $ids = $vmlist->{ids} || {};
1762
1763 return [ keys %$ids ];
1764 }
1765
1766 sub complete_local_vmid {
1767
1768 my $vmlist = get_vmlist();
1769 my $ids = $vmlist->{ids} || {};
1770
1771 my $nodename = PVE::INotify::nodename();
1772
1773 my $res = [];
1774 foreach my $vmid (keys %$ids) {
1775 my $d = $ids->{$vmid};
1776 next if !$d->{node} || $d->{node} ne $nodename;
1777 push @$res, $vmid;
1778 }
1779
1780 return $res;
1781 }
1782
1783 sub complete_migration_target {
1784
1785 my $res = [];
1786
1787 my $nodename = PVE::INotify::nodename();
1788
1789 my $nodelist = get_nodelist();
1790 foreach my $node (@$nodelist) {
1791 next if $node eq $nodename;
1792 push @$res, $node;
1793 }
1794
1795 return $res;
1796 }
1797
1798 sub get_ssh_info {
1799 my ($node, $network_cidr) = @_;
1800
1801 my $ip;
1802 if (defined($network_cidr)) {
1803 # Use mtunnel via to get the remote node's ip inside $network_cidr.
1804 # This goes over the regular network (iow. uses get_ssh_info() with
1805 # $network_cidr undefined.
1806 # FIXME: Use the REST API client for this after creating an API entry
1807 # for get_migration_ip.
1808 my $default_remote = get_ssh_info($node, undef);
1809 my $default_ssh = ssh_info_to_command($default_remote);
1810 my $cmd =[@$default_ssh, 'pvecm', 'mtunnel',
1811 '-migration_network', $network_cidr,
1812 '-get_migration_ip'
1813 ];
1814 PVE::Tools::run_command($cmd, outfunc => sub {
1815 my ($line) = @_;
1816 chomp $line;
1817 die "internal error: unexpected output from mtunnel\n"
1818 if defined($ip);
1819 if ($line =~ /^ip: '(.*)'$/) {
1820 $ip = $1;
1821 } else {
1822 die "internal error: bad output from mtunnel\n"
1823 if defined($ip);
1824 }
1825 });
1826 die "failed to get ip for node '$node' in network '$network_cidr'\n"
1827 if !defined($ip);
1828 } else {
1829 $ip = remote_node_ip($node);
1830 }
1831
1832 return {
1833 ip => $ip,
1834 name => $node,
1835 network => $network_cidr,
1836 };
1837 }
1838
1839 sub ssh_info_to_command_base {
1840 my ($info, @extra_options) = @_;
1841 return [
1842 '/usr/bin/ssh',
1843 '-e', 'none',
1844 '-o', 'BatchMode=yes',
1845 '-o', 'HostKeyAlias='.$info->{name},
1846 @extra_options
1847 ];
1848 }
1849
1850 sub ssh_info_to_command {
1851 my ($info, @extra_options) = @_;
1852 my $cmd = ssh_info_to_command_base($info, @extra_options);
1853 push @$cmd, "root\@$info->{ip}";
1854 return $cmd;
1855 }
1856
1857 my $corosync_link_format = {
1858 address => {
1859 default_key => 1,
1860 type => 'string', format => 'address',
1861 format_description => 'IP',
1862 description => "Hostname (or IP) of this corosync link address.",
1863 },
1864 priority => {
1865 optional => 1,
1866 type => 'integer',
1867 minimum => 0,
1868 maximum => 255,
1869 default => 0,
1870 description => "The priority for the link when knet is used in 'passive' mode. Lower value means higher priority.",
1871 },
1872 };
1873 my $corosync_link_desc = {
1874 type => 'string', format => $corosync_link_format,
1875 description => "Address and priority information of a single corosync link.",
1876 optional => 1,
1877 };
1878 PVE::JSONSchema::register_standard_option("corosync-link", $corosync_link_desc);
1879
1880 sub parse_corosync_link {
1881 my ($value) = @_;
1882
1883 return undef if !defined($value);
1884
1885 return PVE::JSONSchema::parse_property_string($corosync_link_format, $value);
1886 }
1887
1888 sub assert_joinable {
1889 my ($local_addr, $ring0_addr, $ring1_addr, $force) = @_;
1890
1891 my $errors = '';
1892 my $error = sub { $errors .= "* $_[0]\n"; };
1893
1894 if (-f $authfile) {
1895 $error->("authentication key '$authfile' already exists");
1896 }
1897
1898 if (-f $clusterconf) {
1899 $error->("cluster config '$clusterconf' already exists");
1900 }
1901
1902 my $vmlist = get_vmlist();
1903 if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
1904 $error->("this host already contains virtual guests");
1905 }
1906
1907 if (run_command(['corosync-quorumtool', '-l'], noerr => 1, quiet => 1) == 0) {
1908 $error->("corosync is already running, is this node already in a cluster?!");
1909 }
1910
1911 # check if corosync ring IPs are configured on the current nodes interfaces
1912 my $check_ip = sub {
1913 my $ip = shift // return;
1914 my $logid = shift;
1915 if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
1916 my $host = $ip;
1917 eval { $ip = PVE::Network::get_ip_from_hostname($host); };
1918 if ($@) {
1919 $error->("$logid: cannot use '$host': $@\n") ;
1920 return;
1921 }
1922 }
1923
1924 my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
1925 my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
1926
1927 $error->("$logid: cannot use IP '$ip', it must be configured exactly once on local node!\n")
1928 if (scalar(@$configured_ips) != 1);
1929 };
1930
1931 $check_ip->($local_addr, 'local node address');
1932 $check_ip->($ring0_addr, 'ring0');
1933 $check_ip->($ring1_addr, 'ring1');
1934
1935 if ($errors) {
1936 warn "detected the following error(s):\n$errors";
1937 die "Check if node may join a cluster failed!\n" if !$force;
1938 }
1939 }
1940
1941 # NOTE: filesystem must be offline here, no DB changes allowed
1942 my $backup_cfs_database = sub {
1943 my ($dbfile) = @_;
1944
1945 mkdir $dbbackupdir;
1946
1947 my $ctime = time();
1948 my $backup_fn = "$dbbackupdir/config-$ctime.sql.gz";
1949
1950 print "backup old database to '$backup_fn'\n";
1951
1952 my $cmd = [ ['sqlite3', $dbfile, '.dump'], ['gzip', '-', \ ">${backup_fn}"] ];
1953 run_command($cmd, 'errmsg' => "cannot backup old database\n");
1954
1955 my $maxfiles = 10; # purge older backup
1956 my $backups = [ sort { $b cmp $a } <$dbbackupdir/config-*.sql.gz> ];
1957
1958 if ((my $count = scalar(@$backups)) > $maxfiles) {
1959 foreach my $f (@$backups[$maxfiles..$count-1]) {
1960 next if $f !~ m/^(\S+)$/; # untaint
1961 print "delete old backup '$1'\n";
1962 unlink $1;
1963 }
1964 }
1965 };
1966
1967 sub join {
1968 my ($param) = @_;
1969
1970 my $nodename = PVE::INotify::nodename();
1971 my $local_ip_address = remote_node_ip($nodename);
1972
1973 my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
1974 # check if we can join with the given parameters and current node state
1975 assert_joinable($local_ip_address, $ring0_addr, $ring1_addr, $param->{force});
1976
1977 setup_sshd_config();
1978 setup_rootsshconfig();
1979 setup_ssh_keys();
1980
1981 # make sure known_hosts is on local filesystem
1982 ssh_unmerge_known_hosts();
1983
1984 my $host = $param->{hostname};
1985 my $conn_args = {
1986 username => 'root@pam',
1987 password => $param->{password},
1988 cookie_name => 'PVEAuthCookie',
1989 protocol => 'https',
1990 host => $host,
1991 port => 8006,
1992 };
1993
1994 if (my $fp = $param->{fingerprint}) {
1995 $conn_args->{cached_fingerprints} = { uc($fp) => 1 };
1996 } else {
1997 # API schema ensures that we can only get here from CLI handler
1998 $conn_args->{manual_verification} = 1;
1999 }
2000
2001 print "Establishing API connection with host '$host'\n";
2002
2003 my $conn = PVE::APIClient::LWP->new(%$conn_args);
2004 $conn->login();
2005
2006 # login raises an exception on failure, so if we get here we're good
2007 print "Login succeeded.\n";
2008
2009 my $args = {};
2010 $args->{force} = $param->{force} if defined($param->{force});
2011 $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
2012 $args->{votes} = $param->{votes} if defined($param->{votes});
2013 $args->{ring0_addr} = $ring0_addr // $local_ip_address;
2014 $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
2015
2016 print "Request addition of this node\n";
2017 my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
2018
2019 print "Join request OK, finishing setup locally\n";
2020
2021 # added successfuly - now prepare local node
2022 finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
2023 }
2024
2025 sub finish_join {
2026 my ($nodename, $corosync_conf, $corosync_authkey) = @_;
2027
2028 mkdir "$localclusterdir";
2029 PVE::Tools::file_set_contents($authfile, $corosync_authkey);
2030 PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
2031
2032 print "stopping pve-cluster service\n";
2033 my $cmd = ['systemctl', 'stop', 'pve-cluster'];
2034 run_command($cmd, errmsg => "can't stop pve-cluster service");
2035
2036 $backup_cfs_database->($dbfile);
2037 unlink $dbfile;
2038
2039 $cmd = ['systemctl', 'start', 'corosync', 'pve-cluster'];
2040 run_command($cmd, errmsg => "starting pve-cluster failed");
2041
2042 # wait for quorum
2043 my $printqmsg = 1;
2044 while (!check_cfs_quorum(1)) {
2045 if ($printqmsg) {
2046 print "waiting for quorum...";
2047 STDOUT->flush();
2048 $printqmsg = 0;
2049 }
2050 sleep(1);
2051 }
2052 print "OK\n" if !$printqmsg;
2053
2054 updatecerts_and_ssh(1);
2055
2056 print "generated new node certificate, restart pveproxy and pvedaemon services\n";
2057 run_command(['systemctl', 'reload-or-restart', 'pvedaemon', 'pveproxy']);
2058
2059 print "successfully added node '$nodename' to cluster.\n";
2060 }
2061
2062 sub updatecerts_and_ssh {
2063 my ($force_new_cert, $silent) = @_;
2064
2065 my $p = sub { print "$_[0]\n" if !$silent };
2066
2067 setup_rootsshconfig();
2068
2069 gen_pve_vzdump_symlink();
2070
2071 if (!check_cfs_quorum(1)) {
2072 return undef if $silent;
2073 die "no quorum - unable to update files\n";
2074 }
2075
2076 setup_ssh_keys();
2077
2078 my $nodename = PVE::INotify::nodename();
2079 my $local_ip_address = remote_node_ip($nodename);
2080
2081 $p->("(re)generate node files");
2082 $p->("generate new node certificate") if $force_new_cert;
2083 gen_pve_node_files($nodename, $local_ip_address, $force_new_cert);
2084
2085 $p->("merge authorized SSH keys and known hosts");
2086 ssh_merge_keys();
2087 ssh_merge_known_hosts($nodename, $local_ip_address, 1);
2088 gen_pve_vzdump_files();
2089 }
2090
2091 1;