1 package PVE
::CLI
::pve6to7
;
10 use PVE
::API2
::Certificates
;
17 use PVE
::RPCEnvironment
;
19 use PVE
::Tools
qw(run_command);
26 use base
qw(PVE::CLIHandler);
28 my $nodename = PVE
::INotify
::nodename
();
30 sub setup_environment
{
31 PVE
::RPCEnvironment-
>setup_default_cli_env();
34 my $min_pve_major = 6;
35 my $min_pve_minor = 4;
36 my $min_pve_pkgrel = 1;
46 my ($level, $line) = @_;
48 $counters->{$level}++ if defined($level) && defined($counters->{$level});
50 print uc($level), ': ' if defined($level);
56 $log_line->('pass', @_);
61 $log_line->('info', @_);
64 $log_line->('skip', @_);
67 print color
('yellow');
68 $log_line->('warn', @_);
73 $log_line->('fail', @_);
77 my $print_header_first = 1;
80 print "\n" if !$print_header_first;
82 $print_header_first = 0;
85 my $get_systemd_unit_state = sub {
89 my $filter_output = sub {
94 run_command
(['systemctl', 'is-enabled', "$unit"], outfunc
=> $filter_output, noerr
=> 1);
95 return if !defined($state);
96 run_command
(['systemctl', 'is-active', "$unit"], outfunc
=> $filter_output, noerr
=> 1);
99 return $state // 'unknown';
101 my $log_systemd_unit_state = sub {
102 my ($unit, $no_fail_on_inactive) = @_;
104 my $log_method = \
&log_warn
;
106 my $state = $get_systemd_unit_state->($unit);
107 if ($state eq 'active') {
108 $log_method = \
&log_pass
;
109 } elsif ($state eq 'inactive') {
110 $log_method = $no_fail_on_inactive ? \
&log_warn
: \
&log_fail
;
111 } elsif ($state eq 'failed') {
112 $log_method = \
&log_fail
;
115 $log_method->("systemd unit '$unit' is in state '$state'");
122 $versions = eval { PVE
::API2
::APT-
>versions({ node
=> $nodename }) } if !defined($versions);
124 if (!defined($versions)) {
125 my $msg = "unable to retrieve package version information";
126 $msg .= "- $@" if $@;
131 my $pkgs = [ grep { $_->{Package
} eq $pkg } @$versions ];
132 if (!defined $pkgs || $pkgs == 0) {
133 log_fail
("unable to determine installed $pkg version.");
140 sub check_pve_packages
{
141 print_header
("CHECKING VERSION INFORMATION FOR PVE PACKAGES");
143 print "Checking for package updates..\n";
144 my $updates = eval { PVE
::API2
::APT-
>list_updates({ node
=> $nodename }); };
145 if (!defined($updates)) {
146 log_warn
("$@") if $@;
147 log_fail
("unable to retrieve list of package updates!");
148 } elsif (@$updates > 0) {
149 my $pkgs = join(', ', map { $_->{Package
} } @$updates);
150 log_warn
("updates for the following packages are available:\n $pkgs");
152 log_pass
("all packages uptodate");
155 print "\nChecking proxmox-ve package version..\n";
156 if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) {
157 my $min_pve_ver = "$min_pve_major.$min_pve_minor-$min_pve_pkgrel";
159 my ($maj, $min, $pkgrel) = $proxmox_ve->{OldVersion
} =~ m/^(\d+)\.(\d+)-(\d+)/;
163 if ($maj > $min_pve_major) {
164 log_pass
("already upgraded to Proxmox VE " . ($min_pve_major + 1));
166 } elsif ($maj >= $min_pve_major && $min >= $min_pve_minor && $pkgrel >= $min_pve_pkgrel) {
167 log_pass
("proxmox-ve package has version >= $min_pve_ver");
169 log_fail
("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!");
172 my ($krunning, $kinstalled) = (qr/5\.11/, 'pve-kernel-5.11');
174 ($krunning, $kinstalled) = (qr/5\.(?:4|11)/, 'pve-kernel-4.15');
177 print "\nChecking running kernel version..\n";
178 my $kernel_ver = $proxmox_ve->{RunningKernel
};
179 if (!defined($kernel_ver)) {
180 log_fail
("unable to determine running kernel version.");
181 } elsif ($kernel_ver =~ /^$krunning/) {
182 log_pass
("expected running kernel '$kernel_ver'.");
183 } elsif ($get_pkg->($kinstalled)) {
184 log_warn
("expected kernel '$kinstalled' intalled but not yet rebooted!");
186 log_warn
("unexpected running and installed kernel '$kernel_ver'.");
189 log_fail
("proxmox-ve package not found!");
194 sub check_storage_health
{
195 print_header
("CHECKING CONFIGURED STORAGES");
196 my $cfg = PVE
::Storage
::config
();
200 my $info = PVE
::Storage
::storage_info
($cfg);
202 foreach my $storeid (keys %$info) {
203 my $d = $info->{$storeid};
205 if ($d->{type
} eq 'sheepdog') {
206 log_fail
("storage '$storeid' of type 'sheepdog' is enabled - experimental sheepdog support dropped in PVE 6")
207 } elsif ($d->{active
}) {
208 log_pass
("storage '$storeid' enabled and active.");
210 log_warn
("storage '$storeid' enabled but not active!");
213 log_skip
("storage '$storeid' disabled.");
218 sub check_cluster_corosync
{
219 print_header
("CHECKING CLUSTER HEALTH/SETTINGS");
221 if (!PVE
::Corosync
::check_conf_exists
(1)) {
222 log_skip
("standalone node.");
226 $log_systemd_unit_state->('pve-cluster.service');
227 $log_systemd_unit_state->('corosync.service');
229 if (PVE
::Cluster
::check_cfs_quorum
(1)) {
230 log_pass
("Cluster Filesystem is quorate.");
232 log_fail
("Cluster Filesystem readonly, lost quorum?!");
235 my $conf = PVE
::Cluster
::cfs_read_file
('corosync.conf');
236 my $conf_nodelist = PVE
::Corosync
::nodelist
($conf);
239 print "\nAnalzying quorum settings and state..\n";
240 if (!defined($conf_nodelist)) {
241 log_fail
("unable to retrieve nodelist from corosync.conf");
243 if (grep { $conf_nodelist->{$_}->{quorum_votes
} != 1 } keys %$conf_nodelist) {
244 log_warn
("non-default quorum_votes distribution detected!");
246 map { $node_votes += $conf_nodelist->{$_}->{quorum_votes
} // 0 } keys %$conf_nodelist;
249 my ($expected_votes, $total_votes);
250 my $filter_output = sub {
252 ($expected_votes) = $line =~ /^Expected votes:\s*(\d+)\s*$/
253 if !defined($expected_votes);
254 ($total_votes) = $line =~ /^Total votes:\s*(\d+)\s*$/
255 if !defined($total_votes);
258 run_command
(['corosync-quorumtool', '-s'], outfunc
=> $filter_output, noerr
=> 1);
261 if (!defined($expected_votes)) {
262 log_fail
("unable to get expected number of votes, setting to 0.");
265 if (!defined($total_votes)) {
266 log_fail
("unable to get expected number of votes, setting to 0.");
270 my $cfs_nodelist = PVE
::Cluster
::get_clinfo
()->{nodelist
};
271 my $offline_nodes = grep { $cfs_nodelist->{$_}->{online
} != 1 } keys %$cfs_nodelist;
272 if ($offline_nodes > 0) {
273 log_fail
("$offline_nodes nodes are offline!");
276 my $qdevice_votes = 0;
277 if (my $qdevice_setup = $conf->{main
}->{quorum
}->{device
}) {
278 $qdevice_votes = $qdevice_setup->{votes
} // 1;
281 log_info
("configured votes - nodes: $node_votes");
282 log_info
("configured votes - qdevice: $qdevice_votes");
283 log_info
("current expected votes: $expected_votes");
284 log_info
("current total votes: $total_votes");
286 log_warn
("expected votes set to non-standard value '$expected_votes'.")
287 if $expected_votes != $node_votes + $qdevice_votes;
288 log_warn
("total votes < expected votes: $total_votes/$expected_votes!")
289 if $total_votes < $expected_votes;
291 my $conf_nodelist_count = scalar(keys %$conf_nodelist);
292 my $cfs_nodelist_count = scalar(keys %$cfs_nodelist);
293 log_warn
("cluster consists of less than three quorum-providing nodes!")
294 if $conf_nodelist_count < 3 && $conf_nodelist_count + $qdevice_votes < 3;
296 log_fail
("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.")
297 if $conf_nodelist_count != $cfs_nodelist_count;
299 print "\nChecking nodelist entries..\n";
300 for my $cs_node (sort keys %$conf_nodelist) {
301 my $entry = $conf_nodelist->{$cs_node};
302 log_fail
("$cs_node: no name entry in corosync.conf.")
303 if !defined($entry->{name
});
304 log_fail
("$cs_node: no nodeid configured in corosync.conf.")
305 if !defined($entry->{nodeid
});
307 for my $link (0..7) {
308 $gotLinks++ if defined($entry->{"ring${link}_addr"});
310 log_fail
("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf.") if $gotLinks <= 0;
312 my $verify_ring_ip = sub {
314 if (defined(my $ring = $entry->{$key})) {
315 my ($resolved_ip, undef) = PVE
::Corosync
::resolve_hostname_like_corosync
($ring, $conf);
316 if (defined($resolved_ip)) {
317 if ($resolved_ip ne $ring) {
318 log_warn
("$cs_node: $key '$ring' resolves to '$resolved_ip'.\n Consider replacing it with the currently resolved IP address.");
320 log_pass
("$cs_node: $key is configured to use IP address '$ring'");
323 log_fail
("$cs_node: unable to resolve $key '$ring' to an IP address according to Corosync's resolve strategy - cluster will potentially fail with Corosync 3.x/kronosnet!");
327 for my $link (0..7) {
328 $verify_ring_ip->("ring${link}_addr");
332 print "\nChecking totem settings..\n";
333 my $totem = $conf->{main
}->{totem
};
334 my $transport = $totem->{transport
};
335 if (defined($transport)) {
336 if ($transport ne 'knet') {
337 log_fail
("Corosync transport explicitly set to '$transport' instead of implicit default!");
339 log_pass
("Corosync transport set to '$transport'.");
342 log_pass
("Corosync transport set to implicit default.");
345 # TODO: are those values still up-to-date?
346 if ((!defined($totem->{secauth
}) || $totem->{secauth
} ne 'on') && (!defined($totem->{crypto_cipher
}) || $totem->{crypto_cipher
} eq 'none')) {
347 log_fail
("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
349 if (defined($totem->{crypto_cipher
}) && $totem->{crypto_cipher
} eq '3des') {
350 log_fail
("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed?
352 log_pass
("Corosync encryption and authentication enabled.");
357 log_info
("run 'pvecm status' to get detailed cluster status..");
359 print_header
("CHECKING INSTALLED COROSYNC VERSION");
360 if (defined(my $corosync = $get_pkg->('corosync'))) {
361 if ($corosync->{OldVersion
} =~ m/^2\./) {
362 log_fail
("corosync 2.x installed, cluster-wide upgrade to 3.x needed!");
363 } elsif ($corosync->{OldVersion
} =~ m/^3\./) {
364 log_pass
("corosync 3.x installed.");
366 log_fail
("unexpected corosync version installed: $corosync->{OldVersion}!");
372 print_header
("CHECKING HYPER-CONVERGED CEPH STATUS");
374 if (PVE
::Ceph
::Tools
::check_ceph_inited
(1)) {
375 log_info
("hyper-converged ceph setup detected!");
377 log_skip
("no hyper-converged ceph setup detected!");
381 log_info
("getting Ceph status/health information..");
382 my $ceph_status = eval { PVE
::API2
::Ceph-
>status({ node
=> $nodename }); };
383 my $osd_flags = eval { PVE
::API2
::Ceph-
>get_flags({ node
=> $nodename }); };
384 my $noout_wanted = 1;
385 my $noout = $osd_flags && $osd_flags =~ m/noout/;
387 if (!$ceph_status || !$ceph_status->{health
}) {
388 log_fail
("unable to determine Ceph status!");
390 my $ceph_health = $ceph_status->{health
}->{status
};
392 log_fail
("unable to determine Ceph health!");
393 } elsif ($ceph_health eq 'HEALTH_OK') {
394 log_pass
("Ceph health reported as 'HEALTH_OK'.");
395 } elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health
}->{checks
}} == 1)) {
396 log_pass
("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.");
398 log_warn
("Ceph health reported as '$ceph_health'.\n Use the PVE ".
399 "dashboard or 'ceph -s' to determine the specific issues and try to resolve them.");
403 log_info
("getting Ceph OSD flags..");
406 log_fail
("unable to get Ceph OSD flags!");
408 if ($osd_flags =~ m/recovery_deletes/ && $osd_flags =~ m/purged_snapdirs/) {
409 log_pass
("all PGs have been scrubbed at least once while running Ceph Luminous."); # FIXME: remove?
411 log_fail
("missing 'recovery_deletes' and/or 'purged_snapdirs' flag, scrub of all PGs required before upgrading to Nautilus!");
416 # TODO: check OSD min-required version, if to low it breaks stuff!
418 log_info
("getting Ceph daemon versions..");
419 my $ceph_versions = eval { PVE
::Ceph
::Tools
::get_cluster_versions
(undef, 1); };
420 if (!$ceph_versions) {
421 log_fail
("unable to determine Ceph daemon versions!");
424 { 'key' => 'mon', 'name' => 'monitor' },
425 { 'key' => 'mgr', 'name' => 'manager' },
426 { 'key' => 'mds', 'name' => 'MDS' },
427 { 'key' => 'osd', 'name' => 'OSD' },
430 foreach my $service (@$services) {
431 my $name = $service->{name
};
432 if (my $service_versions = $ceph_versions->{$service->{key
}}) {
433 if (keys %$service_versions == 0) {
434 log_skip
("no running instances detected for daemon type $name.");
435 } elsif (keys %$service_versions == 1) {
436 log_pass
("single running version detected for daemon type $name.");
438 log_warn
("multiple running versions detected for daemon type $name!");
441 log_skip
("unable to determine versions of running Ceph $name instances.");
445 my $overall_versions = $ceph_versions->{overall
};
446 if (!$overall_versions) {
447 log_warn
("unable to determine overall Ceph daemon versions!");
448 } elsif (keys %$overall_versions == 1) {
449 log_pass
("single running overall version detected for all Ceph daemon types.");
450 if ((keys %$overall_versions)[0] =~ /^ceph version 15\./) {
454 log_warn
("overall version mismatch detected, check 'ceph versions' output for details!");
460 log_pass
("'noout' flag set to prevent rebalancing during cluster-wide upgrades.");
462 log_warn
("'noout' flag set, Ceph cluster upgrade seems finished.");
464 } elsif ($noout_wanted) {
465 log_warn
("'noout' flag not set - recommended to prevent rebalancing during upgrades.");
468 log_info
("checking Ceph config..");
469 my $conf = PVE
::Cluster
::cfs_read_file
('ceph.conf');
471 my $global = $conf->{global
};
473 my $global_monhost = $global->{mon_host
} // $global->{"mon host"} // $global->{"mon-host"};
474 if (!defined($global_monhost)) {
475 log_warn
("No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with all monitor addresses (without ports) to the global section.");
477 log_pass
("Found 'mon_host' entry.");
480 my $ipv6 = $global->{ms_bind_ipv6
} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"};
482 my $ipv4 = $global->{ms_bind_ipv4
} // $global->{"ms bind ipv4"} // $global->{"ms-bind-ipv4"};
483 if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) {
484 log_warn
("'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network.");
486 log_pass
("'ms_bind_ipv6' is enabled and 'ms_bind_ipv4' disabled");
489 log_pass
("'ms_bind_ipv6' not enabled");
492 if (defined($global->{keyring
})) {
493 log_warn
("[global] config section contains 'keyring' option, which will prevent services from starting with Nautilus.\n Move 'keyring' option to [client] section instead.");
495 log_pass
("no 'keyring' option in [global] section found.");
499 log_warn
("Empty ceph config found");
502 my $local_ceph_ver = PVE
::Ceph
::Tools
::get_local_version
(1);
503 if (defined($local_ceph_ver)) {
504 if ($local_ceph_ver == 14) {
505 my $ceph_volume_osds = PVE
::Ceph
::Tools
::ceph_volume_list
();
506 my $scanned_osds = PVE
::Tools
::dir_glob_regex
('/etc/ceph/osd', '^.*\.json$');
507 if (-e
'/var/lib/ceph/osd/' && !defined($scanned_osds) && !(keys %$ceph_volume_osds)) {
508 log_warn
("local Ceph version is Nautilus, local OSDs detected, but no conversion from ceph-disk to ceph-volume done (yet).");
512 log_fail
("unable to determine local Ceph version.");
517 print_header
("MISCELLANEOUS CHECKS");
518 my $ssh_config = eval { PVE
::Tools
::file_get_contents
('/root/.ssh/config') };
519 if (defined($ssh_config)) {
520 log_fail
("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1")
521 if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m;
523 log_skip
("No SSH config file found.");
526 log_info
("Checking common daemon services..");
527 $log_systemd_unit_state->('pveproxy.service');
528 $log_systemd_unit_state->('pvedaemon.service');
529 $log_systemd_unit_state->('pvestatd.service');
531 my $root_free = PVE
::Tools
::df
('/', 10);
532 log_warn
("Less than 2G free space on root file system.")
533 if defined($root_free) && $root_free->{avail
} < 2*1024*1024*1024;
535 log_info
("Checking for running guests..");
536 my $running_guests = 0;
538 my $vms = eval { PVE
::API2
::Qemu-
>vmlist({ node
=> $nodename }) };
539 log_warn
("Failed to retrieve information about this node's VMs - $@") if $@;
540 $running_guests += grep { $_->{status
} eq 'running' } @$vms if defined($vms);
542 my $cts = eval { PVE
::API2
::LXC-
>vmlist({ node
=> $nodename }) };
543 log_warn
("Failed to retrieve information about this node's CTs - $@") if $@;
544 $running_guests += grep { $_->{status
} eq 'running' } @$cts if defined($cts);
546 if ($running_guests > 0) {
547 log_warn
("$running_guests running guest(s) detected - consider migrating or stopping them.")
549 log_pass
("no running guest detected.")
552 log_info
("Checking if the local node's hostname '$nodename' is resolvable..");
553 my $local_ip = eval { PVE
::Network
::get_ip_from_hostname
($nodename) };
555 log_warn
("Failed to resolve hostname '$nodename' to IP - $@");
557 log_info
("Checking if resolved IP is configured on local node..");
558 my $cidr = Net
::IP
::ip_is_ipv6
($local_ip) ?
"$local_ip/128" : "$local_ip/32";
559 my $configured_ips = PVE
::Network
::get_local_ip_from_cidr
($cidr);
560 my $ip_count = scalar(@$configured_ips);
562 if ($ip_count <= 0) {
563 log_fail
("Resolved node IP '$local_ip' not configured or active for '$nodename'");
564 } elsif ($ip_count > 1) {
565 log_warn
("Resolved node IP '$local_ip' active on multiple ($ip_count) interfaces!");
567 log_pass
("Resolved node IP '$local_ip' configured and active on single interface.");
571 log_info
("Check node certificate's RSA key size");
572 my $certs = PVE
::API2
::Certificates-
>info({ node
=> $nodename });
578 'id-ecPublicKey' => {
584 my $certs_check_failed = 0;
585 foreach my $cert (@$certs) {
586 my ($type, $size, $fn) = $cert->@{qw(public-key-type public-key-bits filename)};
588 if (!defined($type) || !defined($size)) {
589 log_warn
("'$fn': cannot check certificate, failed to get it's type or size!");
592 my $check = $certs_check->{$type};
593 if (!defined($check)) {
594 log_warn
("'$fn': certificate's public key type '$type' unknown, check Debian Busters release notes");
598 if ($size < $check->{minsize
}) {
599 log_fail
("'$fn', certificate's $check->{name} public key size is less than 2048 bit");
600 $certs_check_failed = 1;
602 log_pass
("Certificate '$fn' passed Debian Busters security level for TLS connections ($size >= 2048)");
607 __PACKAGE__-
>register_method ({
611 description
=> 'Check (pre-/post-)upgrade conditions.',
613 additionalProperties
=> 0,
617 returns
=> { type
=> 'null' },
621 check_pve_packages
();
622 check_cluster_corosync
();
624 check_storage_health
();
627 print_header
("SUMMARY");
630 $total += $_ for values %$counters;
632 print "TOTAL: $total\n";
633 print colored
("PASSED: $counters->{pass}\n", 'green');
634 print "SKIPPED: $counters->{skip}\n";
635 print colored
("WARNINGS: $counters->{warn}\n", 'yellow');
636 print colored
("FAILURES: $counters->{fail}\n", 'red');
638 if ($counters->{warn} > 0 || $counters->{fail
} > 0) {
639 my $color = $counters->{fail
} > 0 ?
'red' : 'yellow';
640 print colored
("\nATTENTION: Please check the output for detailed information!\n", $color);
641 print colored
("Try to solve the problems one at a time and then run this checklist tool again.\n", $color) if $counters->{fail
} > 0;
647 our $cmddef = [ __PACKAGE__
, 'checklist', [], {}];
649 # for now drop all unknown params and just check