1 package PVE
::CLI
::pve7to8
;
12 use PVE
::API2
::Certificates
;
13 use PVE
::API2
::Cluster
::Ceph
;
15 use PVE
::AccessControl
;
22 use PVE
::RPCEnvironment
;
24 use PVE
::Storage
::Plugin
;
25 use PVE
::Tools
qw(run_command split_list);
28 use PVE
::VZDump
::Common
;
37 use base
qw(PVE::CLIHandler);
39 my $nodename = PVE
::INotify
::nodename
();
41 sub setup_environment
{
42 PVE
::RPCEnvironment-
>setup_default_cli_env();
45 my ($min_pve_major, $min_pve_minor, $min_pve_pkgrel) = (7, 4, 1);
47 my $ceph_release2code = {
56 my $ceph_supported_release = 17; # the version we support for upgrading (i.e., available on both)
57 my $ceph_supported_code_name = $ceph_release2code->{"$ceph_supported_release"}
58 or die "inconsistent source code, could not map expected ceph version to code name!";
60 my $forced_legacy_cgroup = 0;
70 my ($level, $line) = @_;
72 $counters->{$level}++ if defined($level) && defined($counters->{$level});
74 print uc($level), ': ' if defined($level);
80 $log_line->('pass', @_);
85 $log_line->('info', @_);
88 $log_line->('skip', @_);
91 print color
('yellow');
92 $log_line->('warn', @_);
96 print color
('bold red');
97 $log_line->('fail', @_);
101 my $print_header_first = 1;
104 print "\n" if !$print_header_first;
106 $print_header_first = 0;
109 my $get_systemd_unit_state = sub {
110 my ($unit, $surpress_stderr) = @_;
113 my $filter_output = sub {
118 my %extra = (outfunc
=> $filter_output, noerr
=> 1);
119 $extra{errfunc
} = sub { } if $surpress_stderr;
122 run_command
(['systemctl', 'is-enabled', "$unit"], %extra);
123 return if !defined($state);
124 run_command
(['systemctl', 'is-active', "$unit"], %extra);
127 return $state // 'unknown';
129 my $log_systemd_unit_state = sub {
130 my ($unit, $no_fail_on_inactive) = @_;
132 my $log_method = \
&log_warn
;
134 my $state = $get_systemd_unit_state->($unit);
135 if ($state eq 'active') {
136 $log_method = \
&log_pass
;
137 } elsif ($state eq 'inactive') {
138 $log_method = $no_fail_on_inactive ? \
&log_warn
: \
&log_fail
;
139 } elsif ($state eq 'failed') {
140 $log_method = \
&log_fail
;
143 $log_method->("systemd unit '$unit' is in state '$state'");
150 $versions = eval { PVE
::API2
::APT-
>versions({ node
=> $nodename }) } if !defined($versions);
152 if (!defined($versions)) {
153 my $msg = "unable to retrieve package version information";
154 $msg .= "- $@" if $@;
159 my $pkgs = [ grep { $_->{Package
} eq $pkg } @$versions ];
160 if (!defined $pkgs || $pkgs == 0) {
161 log_fail
("unable to determine installed $pkg version.");
168 sub check_pve_packages
{
169 print_header
("CHECKING VERSION INFORMATION FOR PVE PACKAGES");
171 print "Checking for package updates..\n";
172 my $updates = eval { PVE
::API2
::APT-
>list_updates({ node
=> $nodename }); };
173 if (!defined($updates)) {
174 log_warn
("$@") if $@;
175 log_fail
("unable to retrieve list of package updates!");
176 } elsif (@$updates > 0) {
177 my $pkgs = join(', ', map { $_->{Package
} } @$updates);
178 log_warn
("updates for the following packages are available:\n $pkgs");
180 log_pass
("all packages up-to-date");
183 print "\nChecking proxmox-ve package version..\n";
184 if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) {
185 # TODO: update to native version for pve8to9
186 my $min_pve_ver = "$min_pve_major.$min_pve_minor-$min_pve_pkgrel";
188 my ($maj, $min, $pkgrel) = $proxmox_ve->{OldVersion
} =~ m/^(\d+)\.(\d+)[.-](\d+)/;
192 if ($maj > $min_pve_major) {
193 log_pass
("already upgraded to Proxmox VE " . ($min_pve_major + 1));
195 } elsif ($maj >= $min_pve_major && $min >= $min_pve_minor && $pkgrel >= $min_pve_pkgrel) {
196 log_pass
("proxmox-ve package has version >= $min_pve_ver");
198 log_fail
("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!");
201 my ($krunning, $kinstalled) = (qr/6\.(?:2|5)/, 'pve-kernel-6.2');
203 # we got a few that avoided 5.15 in cluster with mixed CPUs, so allow older too
204 ($krunning, $kinstalled) = (qr/(?:5\.(?:13|15)|6\.2)/, 'pve-kernel-5.15');
207 print "\nChecking running kernel version..\n";
208 my $kernel_ver = $proxmox_ve->{RunningKernel
};
209 if (!defined($kernel_ver)) {
210 log_fail
("unable to determine running kernel version.");
211 } elsif ($kernel_ver =~ /^$krunning/) {
213 log_pass
("running new kernel '$kernel_ver' after upgrade.");
215 log_pass
("running kernel '$kernel_ver' is considered suitable for upgrade.");
217 } elsif ($get_pkg->($kinstalled)) {
218 # with 6.2 kernel being available in both we might want to fine-tune the check?
219 log_warn
("a suitable kernel ($kinstalled) is intalled, but an unsuitable ($kernel_ver) is booted, missing reboot?!");
221 log_warn
("unexpected running and installed kernel '$kernel_ver'.");
224 if ($upgraded && $kernel_ver =~ /^$krunning/) {
225 my $outdated_kernel_meta_pkgs = [];
226 for my $kernel_meta_version ('5.4', '5.11', '5.13', '5.15') {
227 my $pkg = "pve-kernel-${kernel_meta_version}";
228 if ($get_pkg->($pkg)) {
229 push @$outdated_kernel_meta_pkgs, $pkg;
232 if (scalar(@$outdated_kernel_meta_pkgs) > 0) {
234 "Found outdated kernel meta-packages, taking up extra space on boot partitions.\n"
235 ." After a successful upgrade, you can remove them using this command:\n"
236 ." apt remove " . join(' ', $outdated_kernel_meta_pkgs->@*)
241 log_fail
("proxmox-ve package not found!");
246 sub check_storage_health
{
247 print_header
("CHECKING CONFIGURED STORAGES");
248 my $cfg = PVE
::Storage
::config
();
252 my $info = PVE
::Storage
::storage_info
($cfg);
254 foreach my $storeid (sort keys %$info) {
255 my $d = $info->{$storeid};
258 log_pass
("storage '$storeid' enabled and active.");
260 log_warn
("storage '$storeid' enabled but not active!");
263 log_skip
("storage '$storeid' disabled.");
267 check_storage_content
();
268 eval { check_storage_content_dirs
() };
269 log_fail
("failed to check storage content directories - $@") if $@;
272 sub check_cluster_corosync
{
273 print_header
("CHECKING CLUSTER HEALTH/SETTINGS");
275 if (!PVE
::Corosync
::check_conf_exists
(1)) {
276 log_skip
("standalone node.");
280 $log_systemd_unit_state->('pve-cluster.service');
281 $log_systemd_unit_state->('corosync.service');
283 if (PVE
::Cluster
::check_cfs_quorum
(1)) {
284 log_pass
("Cluster Filesystem is quorate.");
286 log_fail
("Cluster Filesystem readonly, lost quorum?!");
289 my $conf = PVE
::Cluster
::cfs_read_file
('corosync.conf');
290 my $conf_nodelist = PVE
::Corosync
::nodelist
($conf);
293 print "\nAnalzying quorum settings and state..\n";
294 if (!defined($conf_nodelist)) {
295 log_fail
("unable to retrieve nodelist from corosync.conf");
297 if (grep { $conf_nodelist->{$_}->{quorum_votes
} != 1 } keys %$conf_nodelist) {
298 log_warn
("non-default quorum_votes distribution detected!");
300 map { $node_votes += $conf_nodelist->{$_}->{quorum_votes
} // 0 } keys %$conf_nodelist;
303 my ($expected_votes, $total_votes);
304 my $filter_output = sub {
306 ($expected_votes) = $line =~ /^Expected votes:\s*(\d+)\s*$/
307 if !defined($expected_votes);
308 ($total_votes) = $line =~ /^Total votes:\s*(\d+)\s*$/
309 if !defined($total_votes);
312 run_command
(['corosync-quorumtool', '-s'], outfunc
=> $filter_output, noerr
=> 1);
315 if (!defined($expected_votes)) {
316 log_fail
("unable to get expected number of votes, assuming 0.");
319 if (!defined($total_votes)) {
320 log_fail
("unable to get expected number of votes, assuming 0.");
324 my $cfs_nodelist = PVE
::Cluster
::get_clinfo
()->{nodelist
};
325 my $offline_nodes = grep { $cfs_nodelist->{$_}->{online
} != 1 } keys %$cfs_nodelist;
326 if ($offline_nodes > 0) {
327 log_fail
("$offline_nodes nodes are offline!");
330 my $qdevice_votes = 0;
331 if (my $qdevice_setup = $conf->{main
}->{quorum
}->{device
}) {
332 $qdevice_votes = $qdevice_setup->{votes
} // 1;
335 log_info
("configured votes - nodes: $node_votes");
336 log_info
("configured votes - qdevice: $qdevice_votes");
337 log_info
("current expected votes: $expected_votes");
338 log_info
("current total votes: $total_votes");
340 log_warn
("expected votes set to non-standard value '$expected_votes'.")
341 if $expected_votes != $node_votes + $qdevice_votes;
342 log_warn
("total votes < expected votes: $total_votes/$expected_votes!")
343 if $total_votes < $expected_votes;
345 my $conf_nodelist_count = scalar(keys %$conf_nodelist);
346 my $cfs_nodelist_count = scalar(keys %$cfs_nodelist);
347 log_warn
("cluster consists of less than three quorum-providing nodes!")
348 if $conf_nodelist_count < 3 && $conf_nodelist_count + $qdevice_votes < 3;
350 log_fail
("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.")
351 if $conf_nodelist_count != $cfs_nodelist_count;
353 print "\nChecking nodelist entries..\n";
354 my $nodelist_pass = 1;
355 for my $cs_node (sort keys %$conf_nodelist) {
356 my $entry = $conf_nodelist->{$cs_node};
357 if (!defined($entry->{name
})) {
359 log_fail
("$cs_node: no name entry in corosync.conf.");
361 if (!defined($entry->{nodeid
})) {
363 log_fail
("$cs_node: no nodeid configured in corosync.conf.");
366 for my $link (0..7) {
367 $gotLinks++ if defined($entry->{"ring${link}_addr"});
369 if ($gotLinks <= 0) {
371 log_fail
("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf.");
374 my $verify_ring_ip = sub {
376 if (defined(my $ring = $entry->{$key})) {
377 my ($resolved_ip, undef) = PVE
::Corosync
::resolve_hostname_like_corosync
($ring, $conf);
378 if (defined($resolved_ip)) {
379 if ($resolved_ip ne $ring) {
382 "$cs_node: $key '$ring' resolves to '$resolved_ip'.\n"
383 ." Consider replacing it with the currently resolved IP address."
389 "$cs_node: unable to resolve $key '$ring' to an IP address according to Corosync's"
390 ." resolve strategy - cluster will potentially fail with Corosync 3.x/kronosnet!"
395 for my $link (0..7) {
396 $verify_ring_ip->("ring${link}_addr");
399 log_pass
("nodelist settings OK") if $nodelist_pass;
401 print "\nChecking totem settings..\n";
402 my $totem = $conf->{main
}->{totem
};
405 my $transport = $totem->{transport
};
406 if (defined($transport)) {
407 if ($transport ne 'knet') {
409 log_fail
("Corosync transport explicitly set to '$transport' instead of implicit default!");
413 # TODO: are those values still up-to-date?
414 if ((!defined($totem->{secauth
}) || $totem->{secauth
} ne 'on') && (!defined($totem->{crypto_cipher
}) || $totem->{crypto_cipher
} eq 'none')) {
416 log_fail
("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
417 } elsif (defined($totem->{crypto_cipher
}) && $totem->{crypto_cipher
} eq '3des') {
419 log_fail
("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed?
422 log_pass
("totem settings OK") if $totem_pass;
424 log_info
("run 'pvecm status' to get detailed cluster status..");
426 if (defined(my $corosync = $get_pkg->('corosync'))) {
427 if ($corosync->{OldVersion
} =~ m/^2\./) {
428 log_fail
("\ncorosync 2.x installed, cluster-wide upgrade to 3.x needed!");
429 } elsif ($corosync->{OldVersion
} !~ m/^3\./) {
430 log_fail
("\nunexpected corosync version installed: $corosync->{OldVersion}!");
436 print_header
("CHECKING HYPER-CONVERGED CEPH STATUS");
438 if (PVE
::Ceph
::Tools
::check_ceph_inited
(1)) {
439 log_info
("hyper-converged ceph setup detected!");
441 log_skip
("no hyper-converged ceph setup detected!");
445 log_info
("getting Ceph status/health information..");
446 my $ceph_status = eval { PVE
::API2
::Ceph-
>status({ node
=> $nodename }); };
447 my $noout = eval { PVE
::API2
::Cluster
::Ceph-
>get_flag({ flag
=> "noout" }); };
449 log_fail
("failed to get 'noout' flag status - $@");
452 my $noout_wanted = 1;
454 if (!$ceph_status || !$ceph_status->{health
}) {
455 log_fail
("unable to determine Ceph status!");
457 my $ceph_health = $ceph_status->{health
}->{status
};
459 log_fail
("unable to determine Ceph health!");
460 } elsif ($ceph_health eq 'HEALTH_OK') {
461 log_pass
("Ceph health reported as 'HEALTH_OK'.");
462 } elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health
}->{checks
}} == 1)) {
463 log_pass
("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.");
466 "Ceph health reported as '$ceph_health'.\n Use the PVE dashboard or 'ceph -s'"
467 ." to determine the specific issues and try to resolve them."
472 # TODO: check OSD min-required version, if to low it breaks stuff!
474 log_info
("checking local Ceph version..");
475 if (my $release = eval { PVE
::Ceph
::Tools
::get_local_version
(1) }) {
476 my $code_name = $ceph_release2code->{"$release"} || 'unknown';
477 if ($release == $ceph_supported_release) {
478 log_pass
("found expected Ceph $ceph_supported_release $ceph_supported_code_name release.")
479 } elsif ($release > $ceph_supported_release) {
481 "found newer Ceph release $release $code_name as the expected $ceph_supported_release"
482 ." $ceph_supported_code_name, installed third party repos?!"
486 "Hyper-converged Ceph $release $code_name is to old for upgrade!\n"
487 ." Upgrade Ceph first to $ceph_supported_code_name following our how-to:\n"
488 ." <https://pve.proxmox.com/wiki/Category:Ceph_Upgrade>"
492 log_fail
("unable to determine local Ceph version!");
495 log_info
("getting Ceph daemon versions..");
496 my $ceph_versions = eval { PVE
::Ceph
::Tools
::get_cluster_versions
(undef, 1); };
497 if (!$ceph_versions) {
498 log_fail
("unable to determine Ceph daemon versions!");
501 { 'key' => 'mon', 'name' => 'monitor' },
502 { 'key' => 'mgr', 'name' => 'manager' },
503 { 'key' => 'mds', 'name' => 'MDS' },
504 { 'key' => 'osd', 'name' => 'OSD' },
507 my $ceph_versions_simple = {};
508 my $ceph_versions_commits = {};
509 for my $type (keys %$ceph_versions) {
510 for my $full_version (keys $ceph_versions->{$type}->%*) {
511 if ($full_version =~ m/^(.*) \((.*)\).*\(.*\)$/) {
512 # String is in the form of
513 # ceph version 17.2.6 (810db68029296377607028a6c6da1ec06f5a2b27) quincy (stable)
514 # only check the first part, e.g. 'ceph version 17.2.6', the commit hash can
516 $ceph_versions_simple->{$type}->{$1} = 1;
517 $ceph_versions_commits->{$type}->{$2} = 1;
522 foreach my $service (@$services) {
523 my ($name, $key) = $service->@{'name', 'key'};
524 if (my $service_versions = $ceph_versions_simple->{$key}) {
525 if (keys %$service_versions == 0) {
526 log_skip
("no running instances detected for daemon type $name.");
527 } elsif (keys %$service_versions == 1) {
528 log_pass
("single running version detected for daemon type $name.");
530 log_warn
("multiple running versions detected for daemon type $name!");
533 log_skip
("unable to determine versions of running Ceph $name instances.");
535 my $service_commits = $ceph_versions_commits->{$key};
536 log_info
("different builds of same version detected for an $name. Are you in the middle of the upgrade?")
537 if $service_commits && keys %$service_commits > 1;
540 my $overall_versions = $ceph_versions->{overall
};
541 if (!$overall_versions) {
542 log_warn
("unable to determine overall Ceph daemon versions!");
543 } elsif (keys %$overall_versions == 1) {
544 log_pass
("single running overall version detected for all Ceph daemon types.");
545 $noout_wanted = 0; # off post-upgrade, on pre-upgrade
546 } elsif (keys $ceph_versions_simple->{overall
}->%* != 1) {
547 log_warn
("overall version mismatch detected, check 'ceph versions' output for details!");
553 log_pass
("'noout' flag set to prevent rebalancing during cluster-wide upgrades.");
555 log_warn
("'noout' flag set, Ceph cluster upgrade seems finished.");
557 } elsif ($noout_wanted) {
558 log_warn
("'noout' flag not set - recommended to prevent rebalancing during upgrades.");
561 log_info
("checking Ceph config..");
562 my $conf = PVE
::Cluster
::cfs_read_file
('ceph.conf');
564 my $global = $conf->{global
};
566 my $global_monhost = $global->{mon_host
} // $global->{"mon host"} // $global->{"mon-host"};
567 if (!defined($global_monhost)) {
569 "No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with"
570 ." all monitor addresses (without ports) to the global section."
574 my $ipv6 = $global->{ms_bind_ipv6
} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"};
576 my $ipv4 = $global->{ms_bind_ipv4
} // $global->{"ms bind ipv4"} // $global->{"ms-bind-ipv4"};
577 if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) {
579 "'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to"
580 ." disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network."
585 if (defined($global->{keyring
})) {
587 "[global] config section contains 'keyring' option, which will prevent services from"
588 ." starting with Nautilus.\n Move 'keyring' option to [client] section instead."
593 log_warn
("Empty ceph config found");
596 my $local_ceph_ver = PVE
::Ceph
::Tools
::get_local_version
(1);
597 if (defined($local_ceph_ver)) {
598 if ($local_ceph_ver <= 14) {
599 log_fail
("local Ceph version too low, at least Octopus required..");
602 log_fail
("unable to determine local Ceph version.");
606 sub check_backup_retention_settings
{
607 log_info
("Checking backup retention settings..");
611 my $node_has_retention;
613 my $maxfiles_msg = "parameter 'maxfiles' is deprecated with PVE 7.x and will be removed in a " .
614 "future version, use 'prune-backups' instead.";
617 my $confdesc = PVE
::VZDump
::Common
::get_confdesc
();
619 my $fn = "/etc/vzdump.conf";
620 my $raw = PVE
::Tools
::file_get_contents
($fn);
622 my $conf_schema = { type
=> 'object', properties
=> $confdesc, };
623 my $param = PVE
::JSONSchema
::parse_config
($conf_schema, $fn, $raw);
625 if (defined($param->{maxfiles
})) {
627 log_warn
("$fn - $maxfiles_msg");
630 $node_has_retention = defined($param->{maxfiles
}) || defined($param->{'prune-backups'});
634 log_warn
("unable to parse node's VZDump configuration - $err");
637 my $storage_cfg = PVE
::Storage
::config
();
639 for my $storeid (keys $storage_cfg->{ids
}->%*) {
640 my $scfg = $storage_cfg->{ids
}->{$storeid};
642 if (defined($scfg->{maxfiles
})) {
644 log_warn
("storage '$storeid' - $maxfiles_msg");
647 next if !$scfg->{content
}->{backup
};
648 next if defined($scfg->{maxfiles
}) || defined($scfg->{'prune-backups'});
649 next if $node_has_retention;
652 "storage '$storeid' - no backup retention settings defined - by default, since PVE 7.0"
653 ." it will no longer keep only the last backup, but all backups"
658 my $vzdump_cron = PVE
::Cluster
::cfs_read_file
('vzdump.cron');
660 # only warn once, there might be many jobs...
661 if (scalar(grep { defined($_->{maxfiles
}) } $vzdump_cron->{jobs
}->@*)) {
663 log_warn
("/etc/pve/vzdump.cron - $maxfiles_msg");
668 log_warn
("unable to parse node's VZDump configuration - $err");
671 log_pass
("no backup retention problems found.") if $pass;
674 sub check_cifs_credential_location
{
675 log_info
("checking CIFS credential location..");
677 my $regex = qr/^(.*)\.cred$/;
681 PVE
::Tools
::dir_glob_foreach
('/etc/pve/priv/', $regex, sub {
684 my ($basename) = $filename =~ $regex;
687 "CIFS credentials '/etc/pve/priv/$filename' will be moved to"
688 ." '/etc/pve/priv/storage/$basename.pw' during the update"
694 log_pass
("no CIFS credentials at outdated location found.") if !$found;
697 sub check_custom_pool_roles
{
698 log_info
("Checking custom role IDs for clashes with new 'PVE' namespace..");
700 if (! -f
"/etc/pve/user.cfg") {
701 log_skip
("user.cfg does not exist");
705 my $raw = eval { PVE
::Tools
::file_get_contents
('/etc/pve/user.cfg'); };
707 log_fail
("Failed to read '/etc/pve/user.cfg' - $@");
712 while ($raw =~ /^\s*(.+?)\s*$/gm) {
716 foreach my $d (split (/:/, $line)) {
722 my $et = shift @data;
723 next if $et ne 'role';
725 my ($role, $privlist) = @data;
726 if (!PVE
::AccessControl
::verify_rolename
($role, 1)) {
727 warn "user config - ignore role '$role' - invalid characters in role name\n";
731 $roles->{$role} = {} if !$roles->{$role};
732 foreach my $priv (split_list
($privlist)) {
733 $roles->{$role}->{$priv} = 1;
737 my ($custom_roles, $pve_namespace_clashes) = (0, 0);
738 for my $role (sort keys %{$roles}) {
739 next if PVE
::AccessControl
::role_is_special
($role);
742 if ($role =~ /^PVE/i) {
743 log_warn
("custom role '$role' clashes with 'PVE' namespace for built-in roles");
744 $pve_namespace_clashes++;
747 if ($pve_namespace_clashes > 0) {
748 log_fail
("$pve_namespace_clashes custom role(s) will clash with 'PVE' namespace for built-in roles enforced in Proxmox VE 8");
749 } elsif ($custom_roles > 0) {
750 log_pass
("none of the $custom_roles custom roles will clash with newly enforced 'PVE' namespace")
752 log_pass
("no custom roles defined, so no clash with 'PVE' role ID namespace enforced in Proxmox VE 8")
756 my sub check_max_length
{
757 my ($raw, $max_length, $warning) = @_;
758 log_warn
($warning) if defined($raw) && length($raw) > $max_length;
761 sub check_node_and_guest_configurations
{
762 log_info
("Checking node and guest description/note length..");
764 my @affected_nodes = grep {
765 my $desc = PVE
::NodeConfig
::load_config
($_)->{desc
};
766 defined($desc) && length($desc) > 64 * 1024
767 } PVE
::Cluster
::get_nodelist
();
769 if (scalar(@affected_nodes) > 0) {
770 log_warn
("Node config description of the following nodes too long for new limit of 64 KiB:\n "
771 . join(', ', @affected_nodes));
773 log_pass
("All node config descriptions fit in the new limit of 64 KiB");
776 my $affected_guests_long_desc = [];
777 my $affected_cts_cgroup_keys = [];
779 my $cts = PVE
::LXC
::config_list
();
780 for my $vmid (sort { $a <=> $b } keys %$cts) {
781 my $conf = PVE
::LXC
::Config-
>load_config($vmid);
783 my $desc = $conf->{description
};
784 push @$affected_guests_long_desc, "CT $vmid" if defined($desc) && length($desc) > 8 * 1024;
786 my $lxc_raw_conf = $conf->{lxc
};
787 push @$affected_cts_cgroup_keys, "CT $vmid" if (grep (@$_[0] =~ /^lxc\.cgroup\./, @$lxc_raw_conf));
789 my $vms = PVE
::QemuServer
::config_list
();
790 for my $vmid (sort { $a <=> $b } keys %$vms) {
791 my $desc = PVE
::QemuConfig-
>load_config($vmid)->{description
};
792 push @$affected_guests_long_desc, "VM $vmid" if defined($desc) && length($desc) > 8 * 1024;
794 if (scalar($affected_guests_long_desc->@*) > 0) {
795 log_warn
("Guest config description of the following virtual-guests too long for new limit of 64 KiB:\n"
796 ." " . join(", ", $affected_guests_long_desc->@*));
798 log_pass
("All guest config descriptions fit in the new limit of 8 KiB");
801 log_info
("Checking container configs for deprecated lxc.cgroup entries");
803 if (scalar($affected_cts_cgroup_keys->@*) > 0) {
804 if ($forced_legacy_cgroup) {
805 log_pass
("Found legacy 'lxc.cgroup' keys, but system explicitly configured for legacy hybrid cgroup hierarchy.");
807 log_warn
("The following CTs have 'lxc.cgroup' keys configured, which will be ignored in the new default unified cgroupv2:\n"
808 ." " . join(", ", $affected_cts_cgroup_keys->@*) ."\n"
809 ." Often it can be enough to change to the new 'lxc.cgroup2' prefix after the upgrade to Proxmox VE 7.x");
812 log_pass
("No legacy 'lxc.cgroup' keys found.");
816 sub check_storage_content
{
817 log_info
("Checking storage content type configuration..");
822 my $storage_cfg = PVE
::Storage
::config
();
824 for my $storeid (sort keys $storage_cfg->{ids
}->%*) {
825 my $scfg = $storage_cfg->{ids
}->{$storeid};
827 next if $scfg->{shared
};
828 next if !PVE
::Storage
::storage_check_enabled
($storage_cfg, $storeid, undef, 1);
830 my $valid_content = PVE
::Storage
::Plugin
::valid_content_types
($scfg->{type
});
832 if (scalar(keys $scfg->{content
}->%*) == 0 && !$valid_content->{none
}) {
834 log_fail
("storage '$storeid' does not support configured content type 'none'");
835 delete $scfg->{content
}->{none
}; # scan for guest images below
838 next if $scfg->{content
}->{images
};
839 next if $scfg->{content
}->{rootdir
};
841 # Skip 'iscsi(direct)' (and foreign plugins with potentially similar behavior) with 'none',
842 # because that means "use LUNs directly" and vdisk_list() in PVE 6.x still lists those.
843 # It's enough to *not* skip 'dir', because it is the only other storage that supports 'none'
844 # and 'images' or 'rootdir', hence being potentially misconfigured.
845 next if $scfg->{type
} ne 'dir' && $scfg->{content
}->{none
};
847 eval { PVE
::Storage
::activate_storage
($storage_cfg, $storeid) };
849 log_warn
("activating '$storeid' failed - $err");
853 my $res = eval { PVE
::Storage
::vdisk_list
($storage_cfg, $storeid); };
855 log_warn
("listing images on '$storeid' failed - $err");
858 my @volids = map { $_->{volid
} } $res->{$storeid}->@*;
860 my $number = scalar(@volids);
863 "storage '$storeid' - neither content type 'images' nor 'rootdir' configured, but"
864 ."found $number guest volume(s)"
869 my $check_volid = sub {
870 my ($volid, $vmid, $vmtype, $reference) = @_;
872 my $guesttext = $vmtype eq 'qemu' ?
'VM' : 'CT';
873 my $prefix = "$guesttext $vmid - volume '$volid' ($reference)";
875 my ($storeid) = PVE
::Storage
::parse_volume_id
($volid, 1);
876 return if !defined($storeid);
878 my $scfg = $storage_cfg->{ids
}->{$storeid};
881 log_warn
("$prefix - storage does not exist!");
885 # cannot use parse_volname for containers, as it can return 'images'
886 # but containers cannot have ISO images attached, so assume 'rootdir'
887 my $vtype = 'rootdir';
888 if ($vmtype eq 'qemu') {
889 ($vtype) = eval { PVE
::Storage
::parse_volname
($storage_cfg, $volid); };
893 if (!$scfg->{content
}->{$vtype}) {
896 log_warn
("$prefix - storage does not have content type '$vtype' configured.");
900 my $cts = PVE
::LXC
::config_list
();
901 for my $vmid (sort { $a <=> $b } keys %$cts) {
902 my $conf = PVE
::LXC
::Config-
>load_config($vmid);
907 my ($ms, $mountpoint, $reference) = @_;
909 my $volid = $mountpoint->{volume
};
910 return if !$volid || $mountpoint->{type
} ne 'volume';
912 return if $volhash->{$volid}; # volume might be referenced multiple times
914 $volhash->{$volid} = 1;
916 $check_volid->($volid, $vmid, 'lxc', $reference);
919 my $opts = { include_unused
=> 1 };
920 PVE
::LXC
::Config-
>foreach_volume_full($conf, $opts, $check, 'in config');
921 for my $snapname (keys $conf->{snapshots
}->%*) {
922 my $snap = $conf->{snapshots
}->{$snapname};
923 PVE
::LXC
::Config-
>foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
927 my $vms = PVE
::QemuServer
::config_list
();
928 for my $vmid (sort { $a <=> $b } keys %$vms) {
929 my $conf = PVE
::QemuConfig-
>load_config($vmid);
934 my ($key, $drive, $reference) = @_;
936 my $volid = $drive->{file
};
937 return if $volid =~ m
|^/|;
938 return if $volhash->{$volid}; # volume might be referenced multiple times
940 $volhash->{$volid} = 1;
941 $check_volid->($volid, $vmid, 'qemu', $reference);
945 extra_keys
=> ['vmstate'],
948 # startup from a suspended state works even without 'images' content type on the
949 # state storage, so do not check 'vmstate' for $conf
950 PVE
::QemuConfig-
>foreach_volume_full($conf, { include_unused
=> 1 }, $check, 'in config');
951 for my $snapname (keys $conf->{snapshots
}->%*) {
952 my $snap = $conf->{snapshots
}->{$snapname};
953 PVE
::QemuConfig-
>foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
958 log_warn
("Proxmox VE enforces stricter content type checks since 7.0. The guests above " .
959 "might not work until the storage configuration is fixed.");
963 log_pass
("no storage content problems found");
967 sub check_storage_content_dirs
{
968 my $storage_cfg = PVE
::Storage
::config
();
970 # check that content dirs are pairwise inequal
971 my $any_problematic = 0;
972 for my $storeid (sort keys $storage_cfg->{ids
}->%*) {
973 my $scfg = $storage_cfg->{ids
}->{$storeid};
975 next if !PVE
::Storage
::storage_check_enabled
($storage_cfg, $storeid, undef, 1);
976 next if !$scfg->{path
} || !$scfg->{content
};
978 eval { PVE
::Storage
::activate_storage
($storage_cfg, $storeid) };
980 log_warn
("activating '$storeid' failed - $err");
984 my $resolved_subdirs = {};
985 my $plugin = PVE
::Storage
::Plugin-
>lookup($scfg->{type
});
986 for my $vtype (keys $scfg->{content
}->%*) {
987 my $abs_subdir = Cwd
::abs_path
($plugin->get_subdir($scfg, $vtype));
988 push $resolved_subdirs->{$abs_subdir}->@*, $vtype;
990 for my $subdir (keys $resolved_subdirs->%*) {
991 if (scalar($resolved_subdirs->{$subdir}->@*) > 1) {
992 my $types = join(", ", $resolved_subdirs->{$subdir}->@*);
993 log_warn
("storage '$storeid' uses directory $subdir for multiple content types ($types).");
994 $any_problematic = 1;
998 if ($any_problematic) {
999 log_fail
("re-using directory for multiple content types (see above) is no longer supported in Proxmox VE 8!")
1001 log_pass
("no storage re-uses a directory for multiple content types.")
1005 sub check_containers_cgroup_compat
{
1006 if ($forced_legacy_cgroup) {
1007 log_warn
("System explicitly configured for legacy hybrid cgroup hierarchy.\n"
1008 ." NOTE: support for the hybrid cgroup hierarchy will be removed in future Proxmox VE 9 (~ 2025)."
1012 my $supports_cgroupv2 = sub {
1013 my ($conf, $rootdir, $ctid) = @_;
1015 my $get_systemd_version = sub {
1018 my $sd_lib_dir = -d
"/lib/systemd" ?
"/lib/systemd" : "/usr/lib/systemd";
1019 my $libsd = PVE
::Tools
::dir_glob_regex
($sd_lib_dir, "libsystemd-shared-.+\.so");
1020 if (defined($libsd) && $libsd =~ /libsystemd-shared-(\d+)\.so/) {
1027 my $unified_cgroupv2_support = sub {
1030 # https://www.freedesktop.org/software/systemd/man/systemd.html
1031 # systemd is installed as symlink to /sbin/init
1032 my $systemd = CORE
::readlink('/sbin/init');
1034 # assume non-systemd init will run with unified cgroupv2
1035 if (!defined($systemd) || $systemd !~ m
@/systemd$@) {
1039 # systemd version 232 (e.g. debian stretch) supports the unified hierarchy
1040 my $sdver = $get_systemd_version->();
1041 if (!defined($sdver) || $sdver < 232) {
1048 my $ostype = $conf->{ostype
};
1049 if (!defined($ostype)) {
1050 log_warn
("Found CT ($ctid) without 'ostype' set!");
1051 } elsif ($ostype eq 'devuan' || $ostype eq 'alpine') {
1052 return 1; # no systemd, no cgroup problems
1055 my $lxc_setup = PVE
::LXC
::Setup-
>new($conf, $rootdir);
1056 return $lxc_setup->protected_call($unified_cgroupv2_support);
1059 my $log_problem = sub {
1061 my $extra = $forced_legacy_cgroup ?
'' : " or set systemd.unified_cgroup_hierarchy=0 in the Proxmox VE hosts' kernel cmdline";
1063 "Found at least one CT ($ctid) which does not support running in a unified cgroup v2 layout\n"
1064 ." Consider upgrading the Containers distro${extra}! Skipping further CT compat checks."
1068 my $cts = eval { PVE
::API2
::LXC-
>vmlist({ node
=> $nodename }) };
1070 log_warn
("Failed to retrieve information about this node's CTs - $@");
1074 if (!defined($cts) || !scalar(@$cts)) {
1075 log_skip
("No containers on node detected.");
1079 my @running_cts = sort { $a <=> $b } grep { $_->{status
} eq 'running' } @$cts;
1080 my @offline_cts = sort { $a <=> $b } grep { $_->{status
} ne 'running' } @$cts;
1082 for my $ct (@running_cts) {
1083 my $ctid = $ct->{vmid
};
1084 my $pid = eval { PVE
::LXC
::find_lxc_pid
($ctid) };
1086 log_warn
("Failed to get PID for running CT $ctid - $err");
1089 my $rootdir = "/proc/$pid/root";
1090 my $conf = PVE
::LXC
::Config-
>load_config($ctid);
1092 my $ret = eval { $supports_cgroupv2->($conf, $rootdir, $ctid) };
1094 log_warn
("Failed to get cgroup support status for CT $ctid - $err");
1098 $log_problem->($ctid);
1103 my $storage_cfg = PVE
::Storage
::config
();
1104 for my $ct (@offline_cts) {
1105 my $ctid = $ct->{vmid
};
1106 my ($conf, $rootdir, $ret);
1108 $conf = PVE
::LXC
::Config-
>load_config($ctid);
1109 $rootdir = PVE
::LXC
::mount_all
($ctid, $storage_cfg, $conf);
1110 $ret = $supports_cgroupv2->($conf, $rootdir, $ctid);
1113 log_warn
("Failed to load config and mount CT $ctid - $err");
1114 eval { PVE
::LXC
::umount_all
($ctid, $storage_cfg, $conf) };
1118 $log_problem->($ctid);
1119 eval { PVE
::LXC
::umount_all
($ctid, $storage_cfg, $conf) };
1123 eval { PVE
::LXC
::umount_all
($ctid, $storage_cfg, $conf) };
1127 sub check_apt_repos
{
1128 log_info
("Checking if the suite for the Debian security repository is correct..");
1132 my $dir = '/etc/apt/sources.list.d';
1135 # TODO: check that (original) debian and Proxmox VE mirrors are present.
1137 my $check_file = sub {
1140 $file = "${dir}/${file}" if $in_dir;
1142 my $raw = eval { PVE
::Tools
::file_get_contents
($file) };
1143 return if !defined($raw);
1144 my @lines = split(/\n/, $raw);
1147 for my $line (@lines) {
1150 next if length($line) == 0; # split would result in undef then...
1152 ($line) = split(/#/, $line);
1154 next if $line !~ m/^deb[[:space:]]/; # is case sensitive
1159 # https://deb.debian.org/debian-security
1160 # http://security.debian.org/debian-security
1161 # http://security.debian.org/
1162 if ($line =~ m
|https?
://deb\
.debian\
.org
/debian-security/?\s
+(\S
*)|i
) {
1164 } elsif ($line =~ m
|https?
://security\
.debian\
.org
(?
:.*?
)\s
+(\S
*)|i
) {
1172 my $where = "in ${file}:${number}";
1173 # TODO: is this useful (for some other checks)?
1177 $check_file->("/etc/apt/sources.list");
1181 PVE
::Tools
::dir_glob_foreach
($dir, '^.*\.list$', $check_file);
1184 # only warn, it might be defined in a .sources file or in a way not caaught above
1185 log_warn
("No Debian security repository detected in /etc/apt/sources.list and " .
1186 "/etc/apt/sources.list.d/*.list");
1190 sub check_time_sync
{
1191 my $unit_active = sub { return $get_systemd_unit_state->($_[0], 1) eq 'active' ?
$_[0] : undef };
1193 log_info
("Checking for supported & active NTP service..");
1194 if ($unit_active->('systemd-timesyncd.service')) {
1196 "systemd-timesyncd is not the best choice for time-keeping on servers, due to only applying"
1197 ." updates on boot.\n While not necessary for the upgrade it's recommended to use one of:\n"
1198 ." * chrony (Default in new Proxmox VE installations)\n * ntpsec\n * openntpd\n"
1200 } elsif ($unit_active->('ntp.service')) {
1201 log_info
("Debian deprecated and removed the ntp package for Bookworm, but the system"
1202 ." will automatically migrate to the 'ntpsec' replacement package on upgrade.");
1203 } elsif (my $active_ntp = ($unit_active->('chrony.service') || $unit_active->('openntpd.service') || $unit_active->('ntpsec.service'))) {
1204 log_pass
("Detected active time synchronisation unit '$active_ntp'");
1207 "No (active) time synchronisation daemon (NTP) detected, but synchronized systems are important,"
1208 ." especially for cluster and/or ceph!"
1214 print_header
("MISCELLANEOUS CHECKS");
1215 my $ssh_config = eval { PVE
::Tools
::file_get_contents
('/root/.ssh/config') };
1216 if (defined($ssh_config)) {
1217 log_fail
("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1")
1218 if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m;
1220 log_skip
("No SSH config file found.");
1223 log_info
("Checking common daemon services..");
1224 $log_systemd_unit_state->('pveproxy.service');
1225 $log_systemd_unit_state->('pvedaemon.service');
1226 $log_systemd_unit_state->('pvescheduler.service');
1227 $log_systemd_unit_state->('pvestatd.service');
1231 my $root_free = PVE
::Tools
::df
('/', 10);
1232 log_warn
("Less than 5 GB free space on root file system.")
1233 if defined($root_free) && $root_free->{avail
} < 5 * 1000*1000*1000;
1235 log_info
("Checking for running guests..");
1236 my $running_guests = 0;
1238 my $vms = eval { PVE
::API2
::Qemu-
>vmlist({ node
=> $nodename }) };
1239 log_warn
("Failed to retrieve information about this node's VMs - $@") if $@;
1240 $running_guests += grep { $_->{status
} eq 'running' } @$vms if defined($vms);
1242 my $cts = eval { PVE
::API2
::LXC-
>vmlist({ node
=> $nodename }) };
1243 log_warn
("Failed to retrieve information about this node's CTs - $@") if $@;
1244 $running_guests += grep { $_->{status
} eq 'running' } @$cts if defined($cts);
1246 if ($running_guests > 0) {
1247 log_warn
("$running_guests running guest(s) detected - consider migrating or stopping them.")
1249 log_pass
("no running guest detected.")
1252 log_info
("Checking if the local node's hostname '$nodename' is resolvable..");
1253 my $local_ip = eval { PVE
::Network
::get_ip_from_hostname
($nodename) };
1255 log_warn
("Failed to resolve hostname '$nodename' to IP - $@");
1257 log_info
("Checking if resolved IP is configured on local node..");
1258 my $cidr = Net
::IP
::ip_is_ipv6
($local_ip) ?
"$local_ip/128" : "$local_ip/32";
1259 my $configured_ips = PVE
::Network
::get_local_ip_from_cidr
($cidr);
1260 my $ip_count = scalar(@$configured_ips);
1262 if ($ip_count <= 0) {
1263 log_fail
("Resolved node IP '$local_ip' not configured or active for '$nodename'");
1264 } elsif ($ip_count > 1) {
1265 log_warn
("Resolved node IP '$local_ip' active on multiple ($ip_count) interfaces!");
1267 log_pass
("Resolved node IP '$local_ip' configured and active on single interface.");
1271 log_info
("Check node certificate's RSA key size");
1272 my $certs = PVE
::API2
::Certificates-
>info({ node
=> $nodename });
1274 'rsaEncryption' => {
1278 'id-ecPublicKey' => {
1284 my $certs_check_failed = 0;
1285 foreach my $cert (@$certs) {
1286 my ($type, $size, $fn) = $cert->@{qw(public-key-type public-key-bits filename)};
1288 if (!defined($type) || !defined($size)) {
1289 log_warn
("'$fn': cannot check certificate, failed to get it's type or size!");
1292 my $check = $certs_check->{$type};
1293 if (!defined($check)) {
1294 log_warn
("'$fn': certificate's public key type '$type' unknown!");
1298 if ($size < $check->{minsize
}) {
1299 log_fail
("'$fn', certificate's $check->{name} public key size is less than 2048 bit");
1300 $certs_check_failed = 1;
1302 log_pass
("Certificate '$fn' passed Debian Busters (and newer) security level for TLS connections ($size >= 2048)");
1306 check_backup_retention_settings
();
1307 check_cifs_credential_location
();
1308 check_custom_pool_roles
();
1309 check_node_and_guest_configurations
();
1314 my ($str, $color, $condition) = @_;
1315 return "". ($condition ? colored
($str, $color) : $str);
1318 __PACKAGE__-
>register_method ({
1319 name
=> 'checklist',
1320 path
=> 'checklist',
1322 description
=> 'Check (pre-/post-)upgrade conditions.',
1324 additionalProperties
=> 0,
1327 description
=> 'perform additional, expensive checks.',
1334 returns
=> { type
=> 'null' },
1338 my $kernel_cli = PVE
::Tools
::file_get_contents
('/proc/cmdline');
1339 if ($kernel_cli =~ /systemd.unified_cgroup_hierarchy=0/){
1340 $forced_legacy_cgroup = 1;
1343 check_pve_packages
();
1344 check_cluster_corosync
();
1346 check_storage_health
();
1349 if ($param->{full
}) {
1350 check_containers_cgroup_compat
();
1352 log_skip
("NOTE: Expensive checks, like CT cgroupv2 compat, not performed without '--full' parameter");
1355 print_header
("SUMMARY");
1358 $total += $_ for values %$counters;
1360 print "TOTAL: $total\n";
1361 print colored
("PASSED: $counters->{pass}\n", 'green');
1362 print "SKIPPED: $counters->{skip}\n";
1363 print colored_if
("WARNINGS: $counters->{warn}\n", 'yellow', $counters->{warn} > 0);
1364 print colored_if
("FAILURES: $counters->{fail}\n", 'bold red', $counters->{fail
} > 0);
1366 if ($counters->{warn} > 0 || $counters->{fail
} > 0) {
1367 my $color = $counters->{fail
} > 0 ?
'bold red' : 'yellow';
1368 print colored
("\nATTENTION: Please check the output for detailed information!\n", $color);
1369 print colored
("Try to solve the problems one at a time and then run this checklist tool again.\n", $color) if $counters->{fail
} > 0;
1375 our $cmddef = [ __PACKAGE__
, 'checklist', [], {}];