use PVE::API2::LXC;
use PVE::API2::Qemu;
use PVE::API2::Certificates;
+use PVE::API2::Cluster::Ceph;
use PVE::AccessControl;
use PVE::Ceph::Tools;
use PVE::NodeConfig;
use PVE::RPCEnvironment;
use PVE::Storage;
+use PVE::Storage::Plugin;
use PVE::Tools qw(run_command split_list);
use PVE::QemuConfig;
use PVE::QemuServer;
use PVE::VZDump::Common;
+use PVE::LXC;
+use PVE::LXC::Config;
+use PVE::LXC::Setup;
use Term::ANSIColor;
my $info = PVE::Storage::storage_info($cfg);
- foreach my $storeid (keys %$info) {
+ foreach my $storeid (sort keys %$info) {
my $d = $info->{$storeid};
if ($d->{enabled}) {
if ($d->{type} eq 'sheepdog') {
if $conf_nodelist_count != $cfs_nodelist_count;
print "\nChecking nodelist entries..\n";
+ my $nodelist_pass = 1;
for my $cs_node (sort keys %$conf_nodelist) {
my $entry = $conf_nodelist->{$cs_node};
- log_fail("$cs_node: no name entry in corosync.conf.")
- if !defined($entry->{name});
- log_fail("$cs_node: no nodeid configured in corosync.conf.")
- if !defined($entry->{nodeid});
+ if (!defined($entry->{name})) {
+ $nodelist_pass = 0;
+ log_fail("$cs_node: no name entry in corosync.conf.");
+ }
+ if (!defined($entry->{nodeid})) {
+ $nodelist_pass = 0;
+ log_fail("$cs_node: no nodeid configured in corosync.conf.");
+ }
my $gotLinks = 0;
for my $link (0..7) {
$gotLinks++ if defined($entry->{"ring${link}_addr"});
}
- log_fail("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf.") if $gotLinks <= 0;
+ if ($gotLinks <= 0) {
+ $nodelist_pass = 0;
+ log_fail("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf.");
+ }
my $verify_ring_ip = sub {
my $key = shift;
my ($resolved_ip, undef) = PVE::Corosync::resolve_hostname_like_corosync($ring, $conf);
if (defined($resolved_ip)) {
if ($resolved_ip ne $ring) {
+ $nodelist_pass = 0;
log_warn("$cs_node: $key '$ring' resolves to '$resolved_ip'.\n Consider replacing it with the currently resolved IP address.");
- } else {
- log_pass("$cs_node: $key is configured to use IP address '$ring'");
}
} else {
+ $nodelist_pass = 0;
log_fail("$cs_node: unable to resolve $key '$ring' to an IP address according to Corosync's resolve strategy - cluster will potentially fail with Corosync 3.x/kronosnet!");
}
}
$verify_ring_ip->("ring${link}_addr");
}
}
+ log_pass("nodelist settings OK") if $nodelist_pass;
print "\nChecking totem settings..\n";
my $totem = $conf->{main}->{totem};
+ my $totem_pass = 1;
+
my $transport = $totem->{transport};
if (defined($transport)) {
if ($transport ne 'knet') {
+ $totem_pass = 0;
log_fail("Corosync transport explicitly set to '$transport' instead of implicit default!");
- } else {
- log_pass("Corosync transport set to '$transport'.");
}
- } else {
- log_pass("Corosync transport set to implicit default.");
}
# TODO: are those values still up-to-date?
if ((!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none')) {
+ $totem_pass = 0;
log_fail("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
- } else {
- if (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') {
- log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed?
- } else {
- log_pass("Corosync encryption and authentication enabled.");
- }
+ } elsif (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') {
+ $totem_pass = 0;
+ log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed?
}
+ log_pass("totem settings OK") if $totem_pass;
print "\n";
log_info("run 'pvecm status' to get detailed cluster status..");
- print_header("CHECKING INSTALLED COROSYNC VERSION");
if (defined(my $corosync = $get_pkg->('corosync'))) {
if ($corosync->{OldVersion} =~ m/^2\./) {
- log_fail("corosync 2.x installed, cluster-wide upgrade to 3.x needed!");
- } elsif ($corosync->{OldVersion} =~ m/^3\./) {
- log_pass("corosync 3.x installed.");
- } else {
- log_fail("unexpected corosync version installed: $corosync->{OldVersion}!");
+ log_fail("\ncorosync 2.x installed, cluster-wide upgrade to 3.x needed!");
+ } elsif ($corosync->{OldVersion} !~ m/^3\./) {
+ log_fail("\nunexpected corosync version installed: $corosync->{OldVersion}!");
}
}
}
log_info("getting Ceph status/health information..");
my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); };
- my $osd_flags = eval { PVE::API2::Ceph->get_flags({ node => $nodename }); };
+ my $noout = eval { PVE::API2::Cluster::Ceph->get_flag({ flag => "noout" }); };
+ if ($@) {
+ log_fail("failed to get 'noout' flag status - $@");
+ }
+
my $noout_wanted = 1;
- my $noout = $osd_flags && $osd_flags =~ m/noout/;
if (!$ceph_status || !$ceph_status->{health}) {
log_fail("unable to determine Ceph status!");
}
}
- log_info("getting Ceph OSD flags..");
- eval {
- if (!$osd_flags) {
- log_fail("unable to get Ceph OSD flags!");
- } else {
- if ($osd_flags =~ m/recovery_deletes/ && $osd_flags =~ m/purged_snapdirs/) {
- log_pass("all PGs have been scrubbed at least once while running Ceph Luminous."); # FIXME: remove?
- } else {
- log_fail("missing 'recovery_deletes' and/or 'purged_snapdirs' flag, scrub of all PGs required before upgrading to Nautilus!");
- }
- }
- };
-
# TODO: check OSD min-required version, if to low it breaks stuff!
log_info("getting Ceph daemon versions..");
log_warn("unable to determine overall Ceph daemon versions!");
} elsif (keys %$overall_versions == 1) {
log_pass("single running overall version detected for all Ceph daemon types.");
- if ((keys %$overall_versions)[0] =~ /^ceph version 15\./) {
- $noout_wanted = 0;
- }
+ $noout_wanted = 0; # off post-upgrade, on pre-upgrade
} else {
log_warn("overall version mismatch detected, check 'ceph versions' output for details!");
}
my $global_monhost = $global->{mon_host} // $global->{"mon host"} // $global->{"mon-host"};
if (!defined($global_monhost)) {
log_warn("No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with all monitor addresses (without ports) to the global section.");
- } else {
- log_pass("Found 'mon_host' entry.");
}
my $ipv6 = $global->{ms_bind_ipv6} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"};
my $ipv4 = $global->{ms_bind_ipv4} // $global->{"ms bind ipv4"} // $global->{"ms-bind-ipv4"};
if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) {
log_warn("'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network.");
- } else {
- log_pass("'ms_bind_ipv6' is enabled and 'ms_bind_ipv4' disabled");
}
- } else {
- log_pass("'ms_bind_ipv6' not enabled");
}
if (defined($global->{keyring})) {
log_warn("[global] config section contains 'keyring' option, which will prevent services from starting with Nautilus.\n Move 'keyring' option to [client] section instead.");
- } else {
- log_pass("no 'keyring' option in [global] section found.");
}
} else {
my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1);
if (defined($local_ceph_ver)) {
- if ($local_ceph_ver == 14) {
- my $ceph_volume_osds = PVE::Ceph::Tools::ceph_volume_list();
- my $scanned_osds = PVE::Tools::dir_glob_regex('/etc/ceph/osd', '^.*\.json$');
- if (-e '/var/lib/ceph/osd/' && !defined($scanned_osds) && !(keys %$ceph_volume_osds)) {
- log_warn("local Ceph version is Nautilus, local OSDs detected, but no conversion from ceph-disk to ceph-volume done (yet).");
- }
+ if ($local_ceph_ver <= 14) {
+ log_fail("local Ceph version too low, at least Octopus required..");
}
} else {
log_fail("unable to determine local Ceph version.");
sub check_custom_pool_roles {
log_info("Checking custom roles for pool permissions..");
+ if (! -f "/etc/pve/user.cfg") {
+ log_skip("user.cfg does not exist");
+ return;
+ }
+
my $raw = eval { PVE::Tools::file_get_contents('/etc/pve/user.cfg'); };
if ($@) {
log_fail("Failed to read '/etc/pve/user.cfg' - $@");
push @$affected_guests, "VM $vmid" if defined($desc) && length($desc) > 8 * 1024;
}
if (scalar($affected_guests->@*) > 0) {
- log_warn("Node config description of the following nodes too long for new limit of 64 KiB:\n"
+ log_warn("Guest config description of the following virtual-guests too long for new limit of 64 KiB:\n"
." * " . join("\n * ", $affected_guests->@*));
} else {
log_pass("All guest config descriptions fit in the new limit of 8 KiB");
}
}
+sub check_storage_content {
+ log_info("Checking storage content type configuration..");
+
+ my $found_referenced;
+ my $found_unreferenced;
+ my $pass = 1;
+
+ my $storage_cfg = PVE::Storage::config();
+
+ my $potentially_affected = {};
+ my $referenced_volids = {};
+
+ for my $storeid (sort keys $storage_cfg->{ids}->%*) {
+ my $scfg = $storage_cfg->{ids}->{$storeid};
+
+ next if !PVE::Storage::storage_check_enabled($storage_cfg, $storeid, undef, 1);
+
+ my $valid_content = PVE::Storage::Plugin::valid_content_types($scfg->{type});
+
+ if (scalar(keys $scfg->{content}->%*) == 0 && !$valid_content->{none}) {
+ $pass = 0;
+ log_fail("storage '$storeid' does not support configured content type 'none'");
+ delete $scfg->{content}->{none}; # scan for guest images below
+ }
+
+ next if $scfg->{content}->{images} && $scfg->{content}->{rootdir};
+
+ # Skip 'iscsi(direct)' (and foreign plugins with potentially similiar behavior) with 'none',
+ # because that means "use LUNs directly" and vdisk_list() in PVE 6.x still lists those.
+ # It's enough to *not* skip 'dir', because it is the only other storage that supports 'none'
+ # and 'images' or 'rootdir', hence being potentially misconfigured.
+ next if $scfg->{type} ne 'dir' && $scfg->{content}->{none};
+
+ eval { PVE::Storage::activate_storage($storage_cfg, $storeid) };
+ if (my $err = $@) {
+ log_warn("activating '$storeid' failed - $err");
+ next;
+ }
+
+ my $res = eval { PVE::Storage::vdisk_list($storage_cfg, $storeid); };
+ if (my $err = $@) {
+ log_warn("listing images on '$storeid' failed - $err");
+ next;
+ }
+ my @volids = map { $_->{volid} } $res->{$storeid}->@*;
+
+ for my $volid (@volids) {
+ $potentially_affected->{$volid} = 1;
+ }
+
+ my $number = scalar(@volids);
+ if ($number > 0 && !$scfg->{content}->{images} && !$scfg->{content}->{rootdir}) {
+ log_info("storage '$storeid' - neither content type 'images' nor 'rootdir' configured"
+ .", but found $number guest volume(s)");
+ }
+ }
+
+ my $check_volid = sub {
+ my ($volid, $vmid, $vmtype, $reference) = @_;
+
+ $referenced_volids->{$volid} = 1 if $reference ne 'unreferenced';
+
+ my $guesttext = $vmtype eq 'qemu' ? 'VM' : 'CT';
+ my $prefix = "$guesttext $vmid - volume '$volid' ($reference)";
+
+ my ($storeid) = PVE::Storage::parse_volume_id($volid, 1);
+ return if !defined($storeid);
+
+ my $scfg = $storage_cfg->{ids}->{$storeid};
+ if (!$scfg) {
+ $pass = 0;
+ log_warn("$prefix - storage does not exist!");
+ return;
+ }
+
+ # cannot use parse_volname for containers, as it can return 'images'
+ # but containers cannot have ISO images attached, so assume 'rootdir'
+ my $vtype = 'rootdir';
+ if ($vmtype eq 'qemu') {
+ ($vtype) = eval { PVE::Storage::parse_volname($storage_cfg, $volid); };
+ return if $@;
+ }
+
+ if (!$scfg->{content}->{$vtype}) {
+ $found_referenced = 1 if $reference ne 'unreferenced';
+ $found_unreferenced = 1 if $reference eq 'unreferenced';
+ $pass = 0;
+ log_warn("$prefix - storage does not have content type '$vtype' configured.");
+ }
+ };
+
+ my $guests = {};
+
+ my $cts = PVE::LXC::config_list();
+ for my $vmid (sort { $a <=> $b } keys %$cts) {
+ $guests->{$vmid} = 'lxc';
+
+ my $conf = PVE::LXC::Config->load_config($vmid);
+
+ my $volhash = {};
+
+ my $check = sub {
+ my ($ms, $mountpoint, $reference) = @_;
+
+ my $volid = $mountpoint->{volume};
+ return if !$volid || $mountpoint->{type} ne 'volume';
+
+ return if $volhash->{$volid}; # volume might be referenced multiple times
+
+ $volhash->{$volid} = 1;
+
+ $check_volid->($volid, $vmid, 'lxc', $reference);
+ };
+
+ my $opts = { include_unused => 1 };
+ PVE::LXC::Config->foreach_volume_full($conf, $opts, $check, 'in config');
+ for my $snapname (keys $conf->{snapshots}->%*) {
+ my $snap = $conf->{snapshots}->{$snapname};
+ PVE::LXC::Config->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
+ }
+ }
+
+ my $vms = PVE::QemuServer::config_list();
+ for my $vmid (sort { $a <=> $b } keys %$vms) {
+ $guests->{$vmid} = 'qemu';
+
+ my $conf = PVE::QemuConfig->load_config($vmid);
+
+ my $volhash = {};
+
+ my $check = sub {
+ my ($key, $drive, $reference) = @_;
+
+ my $volid = $drive->{file};
+ return if $volid =~ m|^/|;
+
+ return if $volhash->{$volid}; # volume might be referenced multiple times
+
+ $volhash->{$volid} = 1;
+
+ $check_volid->($volid, $vmid, 'qemu', $reference);
+ };
+
+ my $opts = {
+ extra_keys => ['vmstate'],
+ include_unused => 1,
+ };
+ # startup from a suspended state works even without 'images' content type on the
+ # state storage, so do not check 'vmstate' for $conf
+ PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, $check, 'in config');
+ for my $snapname (keys $conf->{snapshots}->%*) {
+ my $snap = $conf->{snapshots}->{$snapname};
+ PVE::QemuConfig->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
+ }
+ }
+
+ if ($found_referenced) {
+ log_warn("Proxmox VE 7.0 enforces stricter content type checks. The guests above " .
+ "might not work until the storage configuration is fixed.");
+ }
+
+ for my $volid (sort keys $potentially_affected->%*) {
+ next if $referenced_volids->{$volid}; # already checked
+
+ my (undef, undef, $vmid) = PVE::Storage::parse_volname($storage_cfg, $volid);
+ my $vmtype = $guests->{$vmid};
+ next if !$vmtype;
+
+ $check_volid->($volid, $vmid, $vmtype, 'unreferenced');
+ }
+
+ if ($found_unreferenced) {
+ log_warn("When migrating, Proxmox VE 7.0 only scans storages with the appropriate " .
+ "content types for unreferenced guest volumes.");
+ }
+
+ if ($pass) {
+ log_pass("no problems found");
+ }
+}
+
+sub check_containers_cgroup_compat {
+
+ my $kernel_cli = PVE::Tools::file_get_contents('/proc/cmdline');
+ if ($kernel_cli =~ /systemd.unified_cgroup_hierarchy=0/){
+ log_skip("System explicitly configured for legacy hybrid cgroup hierarchy.");
+ return;
+ }
+
+ my $supports_cgroupv2 = sub {
+ my ($conf, $rootdir, $ctid) = @_;
+
+ my $get_systemd_version = sub {
+ my ($self) = @_;
+
+ my $sd_lib_dir = -d "/lib/systemd" ? "/lib/systemd" : "/usr/lib/systemd";
+ my $libsd = PVE::Tools::dir_glob_regex($sd_lib_dir, "libsystemd-shared-.+\.so");
+ if (defined($libsd) && $libsd =~ /libsystemd-shared-(\d+)\.so/) {
+ return $1;
+ }
+
+ return undef;
+ };
+
+ my $unified_cgroupv2_support = sub {
+ my ($self) = @_;
+
+ # https://www.freedesktop.org/software/systemd/man/systemd.html
+ # systemd is installed as symlink to /sbin/init
+ my $systemd = CORE::readlink('/sbin/init');
+
+ # assume non-systemd init will run with unified cgroupv2
+ if (!defined($systemd) || $systemd !~ m@/systemd$@) {
+ return 1;
+ }
+
+ # systemd version 232 (e.g. debian stretch) supports the unified hierarchy
+ my $sdver = $get_systemd_version->();
+ if (!defined($sdver) || $sdver < 232) {
+ return 0;
+ }
+
+ return 1;
+ };
+
+ my $ostype = $conf->{ostype};
+ if (!defined($ostype)) {
+ log_warn("Found CT ($ctid) without 'ostype' set!");
+ } elsif ($ostype eq 'devuan' || $ostype eq 'alpine') {
+ return 1; # no systemd, no cgroup problems
+ }
+
+ my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
+ return $lxc_setup->protected_call($unified_cgroupv2_support);
+ };
+
+ my $log_problem = sub {
+ my ($ctid) = @_;
+ log_warn("Found at least one CT ($ctid) which does not support running in a unified cgroup v2" .
+ " layout.\n Either upgrade the Container distro or set systemd.unified_cgroup_hierarchy=0 " .
+ "in the Proxmox VE hosts' kernel cmdline! Skipping further CT compat checks."
+ );
+ };
+
+ my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) };
+ if ($@) {
+ log_warn("Failed to retrieve information about this node's CTs - $@");
+ return;
+ }
+
+ if (!defined($cts) || !scalar(@$cts)) {
+ log_skip("No containers on node detected.");
+ return;
+ }
+
+ my @running_cts = sort { $a <=> $b } grep { $_->{status} eq 'running' } @$cts;
+ my @offline_cts = sort { $a <=> $b } grep { $_->{status} ne 'running' } @$cts;
+
+ for my $ct (@running_cts) {
+ my $ctid = $ct->{vmid};
+ my $pid = eval { PVE::LXC::find_lxc_pid($ctid) };
+ if (my $err = $@) {
+ log_warn("Failed to get PID for running CT $ctid - $err");
+ next;
+ }
+ my $rootdir = "/proc/$pid/root";
+ my $conf = PVE::LXC::Config->load_config($ctid);
+
+ my $ret = eval { $supports_cgroupv2->($conf, $rootdir, $ctid) };
+ if (my $err = $@) {
+ log_warn("Failed to get cgroup support status for CT $ctid - $err");
+ next;
+ }
+ if (!$ret) {
+ $log_problem->($ctid);
+ return;
+ }
+ }
+
+ my $storage_cfg = PVE::Storage::config();
+ for my $ct (@offline_cts) {
+ my $ctid = $ct->{vmid};
+ my ($conf, $rootdir, $ret);
+ eval {
+ $conf = PVE::LXC::Config->load_config($ctid);
+ $rootdir = PVE::LXC::mount_all($ctid, $storage_cfg, $conf);
+ $ret = $supports_cgroupv2->($conf, $rootdir, $ctid);
+ };
+ if (my $err = $@) {
+ log_warn("Failed to load config and mount CT $ctid - $err");
+ eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
+ next;
+ }
+ if (!$ret) {
+ $log_problem->($ctid);
+ eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
+ last;
+ }
+
+ eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
+ }
+};
+
sub check_misc {
print_header("MISCELLANEOUS CHECKS");
my $ssh_config = eval { PVE::Tools::file_get_contents('/root/.ssh/config') };
check_cifs_credential_location();
check_custom_pool_roles();
check_description_lengths();
+ check_storage_content();
}
__PACKAGE__->register_method ({
parameters => {
additionalProperties => 0,
properties => {
+ full => {
+ description => 'perform additional, expensive checks.',
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ },
},
},
returns => { type => 'null' },
check_storage_health();
check_misc();
+ if ($param->{full}) {
+ check_containers_cgroup_compat();
+ } else {
+ log_skip("NOTE: Expensive checks, like CT cgroupv2 compat, not performed without '--full' parameter");
+ }
+
print_header("SUMMARY");
my $total = 0;
our $cmddef = [ __PACKAGE__, 'checklist', [], {}];
-# for now drop all unknown params and just check
-@ARGV = ();
-
1;