]> git.proxmox.com Git - pve-manager.git/blob - PVE/CLI/pve7to8.pm
pve7to8: improve log message for missing kernel reboot
[pve-manager.git] / PVE / CLI / pve7to8.pm
1 package PVE::CLI::pve7to8;
2
3 use strict;
4 use warnings;
5
6 use PVE::API2::APT;
7 use PVE::API2::Ceph;
8 use PVE::API2::LXC;
9 use PVE::API2::Qemu;
10 use PVE::API2::Certificates;
11 use PVE::API2::Cluster::Ceph;
12
13 use PVE::AccessControl;
14 use PVE::Ceph::Tools;
15 use PVE::Cluster;
16 use PVE::Corosync;
17 use PVE::INotify;
18 use PVE::JSONSchema;
19 use PVE::NodeConfig;
20 use PVE::RPCEnvironment;
21 use PVE::Storage;
22 use PVE::Storage::Plugin;
23 use PVE::Tools qw(run_command split_list);
24 use PVE::QemuConfig;
25 use PVE::QemuServer;
26 use PVE::VZDump::Common;
27 use PVE::LXC;
28 use PVE::LXC::Config;
29 use PVE::LXC::Setup;
30
31 use Term::ANSIColor;
32
33 use PVE::CLIHandler;
34
35 use base qw(PVE::CLIHandler);
36
37 my $nodename = PVE::INotify::nodename();
38
39 sub setup_environment {
40 PVE::RPCEnvironment->setup_default_cli_env();
41 }
42
43 my ($min_pve_major, $min_pve_minor, $min_pve_pkgrel) = (7, 4, 1);
44
45 my $forced_legacy_cgroup = 0;
46
47 my $counters = {
48 pass => 0,
49 skip => 0,
50 warn => 0,
51 fail => 0,
52 };
53
54 my $log_line = sub {
55 my ($level, $line) = @_;
56
57 $counters->{$level}++ if defined($level) && defined($counters->{$level});
58
59 print uc($level), ': ' if defined($level);
60 print "$line\n";
61 };
62
63 sub log_pass {
64 print color('green');
65 $log_line->('pass', @_);
66 print color('reset');
67 }
68
69 sub log_info {
70 $log_line->('info', @_);
71 }
72 sub log_skip {
73 $log_line->('skip', @_);
74 }
75 sub log_warn {
76 print color('yellow');
77 $log_line->('warn', @_);
78 print color('reset');
79 }
80 sub log_fail {
81 print color('red');
82 $log_line->('fail', @_);
83 print color('reset');
84 }
85
86 my $print_header_first = 1;
87 sub print_header {
88 my ($h) = @_;
89 print "\n" if !$print_header_first;
90 print "= $h =\n\n";
91 $print_header_first = 0;
92 }
93
94 my $get_systemd_unit_state = sub {
95 my ($unit, $surpress_stderr) = @_;
96
97 my $state;
98 my $filter_output = sub {
99 $state = shift;
100 chomp $state;
101 };
102
103 my %extra = (outfunc => $filter_output, noerr => 1);
104 $extra{errfunc} = sub { } if $surpress_stderr;
105
106 eval {
107 run_command(['systemctl', 'is-enabled', "$unit"], %extra);
108 return if !defined($state);
109 run_command(['systemctl', 'is-active', "$unit"], %extra);
110 };
111
112 return $state // 'unknown';
113 };
114 my $log_systemd_unit_state = sub {
115 my ($unit, $no_fail_on_inactive) = @_;
116
117 my $log_method = \&log_warn;
118
119 my $state = $get_systemd_unit_state->($unit);
120 if ($state eq 'active') {
121 $log_method = \&log_pass;
122 } elsif ($state eq 'inactive') {
123 $log_method = $no_fail_on_inactive ? \&log_warn : \&log_fail;
124 } elsif ($state eq 'failed') {
125 $log_method = \&log_fail;
126 }
127
128 $log_method->("systemd unit '$unit' is in state '$state'");
129 };
130
131 my $versions;
132 my $get_pkg = sub {
133 my ($pkg) = @_;
134
135 $versions = eval { PVE::API2::APT->versions({ node => $nodename }) } if !defined($versions);
136
137 if (!defined($versions)) {
138 my $msg = "unable to retrieve package version information";
139 $msg .= "- $@" if $@;
140 log_fail("$msg");
141 return undef;
142 }
143
144 my $pkgs = [ grep { $_->{Package} eq $pkg } @$versions ];
145 if (!defined $pkgs || $pkgs == 0) {
146 log_fail("unable to determine installed $pkg version.");
147 return undef;
148 } else {
149 return $pkgs->[0];
150 }
151 };
152
153 sub check_pve_packages {
154 print_header("CHECKING VERSION INFORMATION FOR PVE PACKAGES");
155
156 print "Checking for package updates..\n";
157 my $updates = eval { PVE::API2::APT->list_updates({ node => $nodename }); };
158 if (!defined($updates)) {
159 log_warn("$@") if $@;
160 log_fail("unable to retrieve list of package updates!");
161 } elsif (@$updates > 0) {
162 my $pkgs = join(', ', map { $_->{Package} } @$updates);
163 log_warn("updates for the following packages are available:\n $pkgs");
164 } else {
165 log_pass("all packages uptodate");
166 }
167
168 print "\nChecking proxmox-ve package version..\n";
169 if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) {
170 my $min_pve_ver = "$min_pve_major.$min_pve_minor-$min_pve_pkgrel";
171
172 my ($maj, $min, $pkgrel) = $proxmox_ve->{OldVersion} =~ m/^(\d+)\.(\d+)-(\d+)/;
173
174 my $upgraded = 0;
175
176 if ($maj > $min_pve_major) {
177 log_pass("already upgraded to Proxmox VE " . ($min_pve_major + 1));
178 $upgraded = 1;
179 } elsif ($maj >= $min_pve_major && $min >= $min_pve_minor && $pkgrel >= $min_pve_pkgrel) {
180 log_pass("proxmox-ve package has version >= $min_pve_ver");
181 } else {
182 log_fail("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!");
183 }
184
185 my ($krunning, $kinstalled) = (qr/6\.(?:2|5)/, 'pve-kernel-6.2');
186 if (!$upgraded) {
187 # we got a few that avoided 5.15 in cluster with mixed CPUs, so allow older too
188 ($krunning, $kinstalled) = (qr/(?:5\.(?:13|15)|6\.2)/, 'pve-kernel-5.15');
189 }
190
191 print "\nChecking running kernel version..\n";
192 my $kernel_ver = $proxmox_ve->{RunningKernel};
193 if (!defined($kernel_ver)) {
194 log_fail("unable to determine running kernel version.");
195 } elsif ($kernel_ver =~ /^$krunning/) {
196 log_pass("running kernel '$kernel_ver' is considered suitable for upgrade.");
197 } elsif ($get_pkg->($kinstalled)) {
198 # with 6.2 kernel being available in both we might want to fine-tune the check?
199 log_warn("a suitable kernel ($kinstalled) is intalled, but an unsuitable ($kernel_ver) is booted, missing reboot?!");
200 } else {
201 log_warn("unexpected running and installed kernel '$kernel_ver'.");
202 }
203 } else {
204 log_fail("proxmox-ve package not found!");
205 }
206 }
207
208
209 sub check_storage_health {
210 print_header("CHECKING CONFIGURED STORAGES");
211 my $cfg = PVE::Storage::config();
212
213 my $ctime = time();
214
215 my $info = PVE::Storage::storage_info($cfg);
216
217 foreach my $storeid (sort keys %$info) {
218 my $d = $info->{$storeid};
219 if ($d->{enabled}) {
220 if ($d->{active}) {
221 log_pass("storage '$storeid' enabled and active.");
222 } else {
223 log_warn("storage '$storeid' enabled but not active!");
224 }
225 } else {
226 log_skip("storage '$storeid' disabled.");
227 }
228 }
229
230 check_storage_content();
231 }
232
233 sub check_cluster_corosync {
234 print_header("CHECKING CLUSTER HEALTH/SETTINGS");
235
236 if (!PVE::Corosync::check_conf_exists(1)) {
237 log_skip("standalone node.");
238 return;
239 }
240
241 $log_systemd_unit_state->('pve-cluster.service');
242 $log_systemd_unit_state->('corosync.service');
243
244 if (PVE::Cluster::check_cfs_quorum(1)) {
245 log_pass("Cluster Filesystem is quorate.");
246 } else {
247 log_fail("Cluster Filesystem readonly, lost quorum?!");
248 }
249
250 my $conf = PVE::Cluster::cfs_read_file('corosync.conf');
251 my $conf_nodelist = PVE::Corosync::nodelist($conf);
252 my $node_votes = 0;
253
254 print "\nAnalzying quorum settings and state..\n";
255 if (!defined($conf_nodelist)) {
256 log_fail("unable to retrieve nodelist from corosync.conf");
257 } else {
258 if (grep { $conf_nodelist->{$_}->{quorum_votes} != 1 } keys %$conf_nodelist) {
259 log_warn("non-default quorum_votes distribution detected!");
260 }
261 map { $node_votes += $conf_nodelist->{$_}->{quorum_votes} // 0 } keys %$conf_nodelist;
262 }
263
264 my ($expected_votes, $total_votes);
265 my $filter_output = sub {
266 my $line = shift;
267 ($expected_votes) = $line =~ /^Expected votes:\s*(\d+)\s*$/
268 if !defined($expected_votes);
269 ($total_votes) = $line =~ /^Total votes:\s*(\d+)\s*$/
270 if !defined($total_votes);
271 };
272 eval {
273 run_command(['corosync-quorumtool', '-s'], outfunc => $filter_output, noerr => 1);
274 };
275
276 if (!defined($expected_votes)) {
277 log_fail("unable to get expected number of votes, assuming 0.");
278 $expected_votes = 0;
279 }
280 if (!defined($total_votes)) {
281 log_fail("unable to get expected number of votes, assuming 0.");
282 $total_votes = 0;
283 }
284
285 my $cfs_nodelist = PVE::Cluster::get_clinfo()->{nodelist};
286 my $offline_nodes = grep { $cfs_nodelist->{$_}->{online} != 1 } keys %$cfs_nodelist;
287 if ($offline_nodes > 0) {
288 log_fail("$offline_nodes nodes are offline!");
289 }
290
291 my $qdevice_votes = 0;
292 if (my $qdevice_setup = $conf->{main}->{quorum}->{device}) {
293 $qdevice_votes = $qdevice_setup->{votes} // 1;
294 }
295
296 log_info("configured votes - nodes: $node_votes");
297 log_info("configured votes - qdevice: $qdevice_votes");
298 log_info("current expected votes: $expected_votes");
299 log_info("current total votes: $total_votes");
300
301 log_warn("expected votes set to non-standard value '$expected_votes'.")
302 if $expected_votes != $node_votes + $qdevice_votes;
303 log_warn("total votes < expected votes: $total_votes/$expected_votes!")
304 if $total_votes < $expected_votes;
305
306 my $conf_nodelist_count = scalar(keys %$conf_nodelist);
307 my $cfs_nodelist_count = scalar(keys %$cfs_nodelist);
308 log_warn("cluster consists of less than three quorum-providing nodes!")
309 if $conf_nodelist_count < 3 && $conf_nodelist_count + $qdevice_votes < 3;
310
311 log_fail("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.")
312 if $conf_nodelist_count != $cfs_nodelist_count;
313
314 print "\nChecking nodelist entries..\n";
315 my $nodelist_pass = 1;
316 for my $cs_node (sort keys %$conf_nodelist) {
317 my $entry = $conf_nodelist->{$cs_node};
318 if (!defined($entry->{name})) {
319 $nodelist_pass = 0;
320 log_fail("$cs_node: no name entry in corosync.conf.");
321 }
322 if (!defined($entry->{nodeid})) {
323 $nodelist_pass = 0;
324 log_fail("$cs_node: no nodeid configured in corosync.conf.");
325 }
326 my $gotLinks = 0;
327 for my $link (0..7) {
328 $gotLinks++ if defined($entry->{"ring${link}_addr"});
329 }
330 if ($gotLinks <= 0) {
331 $nodelist_pass = 0;
332 log_fail("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf.");
333 }
334
335 my $verify_ring_ip = sub {
336 my $key = shift;
337 if (defined(my $ring = $entry->{$key})) {
338 my ($resolved_ip, undef) = PVE::Corosync::resolve_hostname_like_corosync($ring, $conf);
339 if (defined($resolved_ip)) {
340 if ($resolved_ip ne $ring) {
341 $nodelist_pass = 0;
342 log_warn(
343 "$cs_node: $key '$ring' resolves to '$resolved_ip'.\n"
344 ." Consider replacing it with the currently resolved IP address."
345 );
346 }
347 } else {
348 $nodelist_pass = 0;
349 log_fail(
350 "$cs_node: unable to resolve $key '$ring' to an IP address according to Corosync's"
351 ." resolve strategy - cluster will potentially fail with Corosync 3.x/kronosnet!"
352 );
353 }
354 }
355 };
356 for my $link (0..7) {
357 $verify_ring_ip->("ring${link}_addr");
358 }
359 }
360 log_pass("nodelist settings OK") if $nodelist_pass;
361
362 print "\nChecking totem settings..\n";
363 my $totem = $conf->{main}->{totem};
364 my $totem_pass = 1;
365
366 my $transport = $totem->{transport};
367 if (defined($transport)) {
368 if ($transport ne 'knet') {
369 $totem_pass = 0;
370 log_fail("Corosync transport explicitly set to '$transport' instead of implicit default!");
371 }
372 }
373
374 # TODO: are those values still up-to-date?
375 if ((!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none')) {
376 $totem_pass = 0;
377 log_fail("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
378 } elsif (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') {
379 $totem_pass = 0;
380 log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed?
381 }
382
383 log_pass("totem settings OK") if $totem_pass;
384 print "\n";
385 log_info("run 'pvecm status' to get detailed cluster status..");
386
387 if (defined(my $corosync = $get_pkg->('corosync'))) {
388 if ($corosync->{OldVersion} =~ m/^2\./) {
389 log_fail("\ncorosync 2.x installed, cluster-wide upgrade to 3.x needed!");
390 } elsif ($corosync->{OldVersion} !~ m/^3\./) {
391 log_fail("\nunexpected corosync version installed: $corosync->{OldVersion}!");
392 }
393 }
394 }
395
396 sub check_ceph {
397 print_header("CHECKING HYPER-CONVERGED CEPH STATUS");
398
399 if (PVE::Ceph::Tools::check_ceph_inited(1)) {
400 log_info("hyper-converged ceph setup detected!");
401 } else {
402 log_skip("no hyper-converged ceph setup detected!");
403 return;
404 }
405
406 log_info("getting Ceph status/health information..");
407 my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); };
408 my $noout = eval { PVE::API2::Cluster::Ceph->get_flag({ flag => "noout" }); };
409 if ($@) {
410 log_fail("failed to get 'noout' flag status - $@");
411 }
412
413 my $noout_wanted = 1;
414
415 if (!$ceph_status || !$ceph_status->{health}) {
416 log_fail("unable to determine Ceph status!");
417 } else {
418 my $ceph_health = $ceph_status->{health}->{status};
419 if (!$ceph_health) {
420 log_fail("unable to determine Ceph health!");
421 } elsif ($ceph_health eq 'HEALTH_OK') {
422 log_pass("Ceph health reported as 'HEALTH_OK'.");
423 } elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health}->{checks}} == 1)) {
424 log_pass("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.");
425 } else {
426 log_warn("Ceph health reported as '$ceph_health'.\n Use the PVE ".
427 "dashboard or 'ceph -s' to determine the specific issues and try to resolve them.");
428 }
429 }
430
431 # TODO: check OSD min-required version, if to low it breaks stuff!
432
433 log_info("getting Ceph daemon versions..");
434 my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); };
435 if (!$ceph_versions) {
436 log_fail("unable to determine Ceph daemon versions!");
437 } else {
438 my $services = [
439 { 'key' => 'mon', 'name' => 'monitor' },
440 { 'key' => 'mgr', 'name' => 'manager' },
441 { 'key' => 'mds', 'name' => 'MDS' },
442 { 'key' => 'osd', 'name' => 'OSD' },
443 ];
444
445 foreach my $service (@$services) {
446 my $name = $service->{name};
447 if (my $service_versions = $ceph_versions->{$service->{key}}) {
448 if (keys %$service_versions == 0) {
449 log_skip("no running instances detected for daemon type $name.");
450 } elsif (keys %$service_versions == 1) {
451 log_pass("single running version detected for daemon type $name.");
452 } else {
453 log_warn("multiple running versions detected for daemon type $name!");
454 }
455 } else {
456 log_skip("unable to determine versions of running Ceph $name instances.");
457 }
458 }
459
460 my $overall_versions = $ceph_versions->{overall};
461 if (!$overall_versions) {
462 log_warn("unable to determine overall Ceph daemon versions!");
463 } elsif (keys %$overall_versions == 1) {
464 log_pass("single running overall version detected for all Ceph daemon types.");
465 $noout_wanted = 0; # off post-upgrade, on pre-upgrade
466 } else {
467 log_warn("overall version mismatch detected, check 'ceph versions' output for details!");
468 }
469 }
470
471 if ($noout) {
472 if ($noout_wanted) {
473 log_pass("'noout' flag set to prevent rebalancing during cluster-wide upgrades.");
474 } else {
475 log_warn("'noout' flag set, Ceph cluster upgrade seems finished.");
476 }
477 } elsif ($noout_wanted) {
478 log_warn("'noout' flag not set - recommended to prevent rebalancing during upgrades.");
479 }
480
481 log_info("checking Ceph config..");
482 my $conf = PVE::Cluster::cfs_read_file('ceph.conf');
483 if (%$conf) {
484 my $global = $conf->{global};
485
486 my $global_monhost = $global->{mon_host} // $global->{"mon host"} // $global->{"mon-host"};
487 if (!defined($global_monhost)) {
488 log_warn(
489 "No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with"
490 ." all monitor addresses (without ports) to the global section."
491 );
492 }
493
494 my $ipv6 = $global->{ms_bind_ipv6} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"};
495 if ($ipv6) {
496 my $ipv4 = $global->{ms_bind_ipv4} // $global->{"ms bind ipv4"} // $global->{"ms-bind-ipv4"};
497 if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) {
498 log_warn(
499 "'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to"
500 ." disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network."
501 );
502 }
503 }
504
505 if (defined($global->{keyring})) {
506 log_warn(
507 "[global] config section contains 'keyring' option, which will prevent services from"
508 ." starting with Nautilus.\n Move 'keyring' option to [client] section instead."
509 );
510 }
511
512 } else {
513 log_warn("Empty ceph config found");
514 }
515
516 my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1);
517 if (defined($local_ceph_ver)) {
518 if ($local_ceph_ver <= 14) {
519 log_fail("local Ceph version too low, at least Octopus required..");
520 }
521 } else {
522 log_fail("unable to determine local Ceph version.");
523 }
524 }
525
526 sub check_backup_retention_settings {
527 log_info("Checking backup retention settings..");
528
529 my $pass = 1;
530
531 my $node_has_retention;
532
533 my $maxfiles_msg = "parameter 'maxfiles' is deprecated with PVE 7.x and will be removed in a " .
534 "future version, use 'prune-backups' instead.";
535
536 eval {
537 my $confdesc = PVE::VZDump::Common::get_confdesc();
538
539 my $fn = "/etc/vzdump.conf";
540 my $raw = PVE::Tools::file_get_contents($fn);
541
542 my $conf_schema = { type => 'object', properties => $confdesc, };
543 my $param = PVE::JSONSchema::parse_config($conf_schema, $fn, $raw);
544
545 if (defined($param->{maxfiles})) {
546 $pass = 0;
547 log_warn("$fn - $maxfiles_msg");
548 }
549
550 $node_has_retention = defined($param->{maxfiles}) || defined($param->{'prune-backups'});
551 };
552 if (my $err = $@) {
553 $pass = 0;
554 log_warn("unable to parse node's VZDump configuration - $err");
555 }
556
557 my $storage_cfg = PVE::Storage::config();
558
559 for my $storeid (keys $storage_cfg->{ids}->%*) {
560 my $scfg = $storage_cfg->{ids}->{$storeid};
561
562 if (defined($scfg->{maxfiles})) {
563 $pass = 0;
564 log_warn("storage '$storeid' - $maxfiles_msg");
565 }
566
567 next if !$scfg->{content}->{backup};
568 next if defined($scfg->{maxfiles}) || defined($scfg->{'prune-backups'});
569 next if $node_has_retention;
570
571 log_info(
572 "storage '$storeid' - no backup retention settings defined - by default, since PVE 7.0"
573 ." it will no longer keep only the last backup, but all backups"
574 );
575 }
576
577 eval {
578 my $vzdump_cron = PVE::Cluster::cfs_read_file('vzdump.cron');
579
580 # only warn once, there might be many jobs...
581 if (scalar(grep { defined($_->{maxfiles}) } $vzdump_cron->{jobs}->@*)) {
582 $pass = 0;
583 log_warn("/etc/pve/vzdump.cron - $maxfiles_msg");
584 }
585 };
586 if (my $err = $@) {
587 $pass = 0;
588 log_warn("unable to parse node's VZDump configuration - $err");
589 }
590
591 log_pass("no problems found.") if $pass;
592 }
593
594 sub check_cifs_credential_location {
595 log_info("checking CIFS credential location..");
596
597 my $regex = qr/^(.*)\.cred$/;
598
599 my $found;
600
601 PVE::Tools::dir_glob_foreach('/etc/pve/priv/', $regex, sub {
602 my ($filename) = @_;
603
604 my ($basename) = $filename =~ $regex;
605
606 log_warn(
607 "CIFS credentials '/etc/pve/priv/$filename' will be moved to"
608 ." '/etc/pve/priv/storage/$basename.pw' during the update"
609 );
610
611 $found = 1;
612 });
613
614 log_pass("no CIFS credentials at outdated location found.") if !$found;
615 }
616
617 sub check_custom_pool_roles {
618 log_info("Checking custom roles for pool permissions..");
619
620 if (! -f "/etc/pve/user.cfg") {
621 log_skip("user.cfg does not exist");
622 return;
623 }
624
625 my $raw = eval { PVE::Tools::file_get_contents('/etc/pve/user.cfg'); };
626 if ($@) {
627 log_fail("Failed to read '/etc/pve/user.cfg' - $@");
628 return;
629 }
630
631 my $roles = {};
632 while ($raw =~ /^\s*(.+?)\s*$/gm) {
633 my $line = $1;
634 my @data;
635
636 foreach my $d (split (/:/, $line)) {
637 $d =~ s/^\s+//;
638 $d =~ s/\s+$//;
639 push @data, $d
640 }
641
642 my $et = shift @data;
643 next if $et ne 'role';
644
645 my ($role, $privlist) = @data;
646 if (!PVE::AccessControl::verify_rolename($role, 1)) {
647 warn "user config - ignore role '$role' - invalid characters in role name\n";
648 next;
649 }
650
651 $roles->{$role} = {} if !$roles->{$role};
652 foreach my $priv (split_list($privlist)) {
653 $roles->{$role}->{$priv} = 1;
654 }
655 }
656
657 foreach my $role (sort keys %{$roles}) {
658 next if PVE::AccessControl::role_is_special($role);
659
660 # TODO: any role updates?
661 }
662 }
663
664 my sub check_max_length {
665 my ($raw, $max_length, $warning) = @_;
666 log_warn($warning) if defined($raw) && length($raw) > $max_length;
667 }
668
669 sub check_node_and_guest_configurations {
670 log_info("Checking node and guest description/note legnth..");
671
672 my @affected_nodes = grep {
673 my $desc = PVE::NodeConfig::load_config($_)->{desc};
674 defined($desc) && length($desc) > 64 * 1024
675 } PVE::Cluster::get_nodelist();
676
677 if (scalar(@affected_nodes) > 0) {
678 log_warn("Node config description of the following nodes too long for new limit of 64 KiB:\n "
679 . join(', ', @affected_nodes));
680 } else {
681 log_pass("All node config descriptions fit in the new limit of 64 KiB");
682 }
683
684 my $affected_guests_long_desc = [];
685 my $affected_cts_cgroup_keys = [];
686
687 my $cts = PVE::LXC::config_list();
688 for my $vmid (sort { $a <=> $b } keys %$cts) {
689 my $conf = PVE::LXC::Config->load_config($vmid);
690
691 my $desc = $conf->{description};
692 push @$affected_guests_long_desc, "CT $vmid" if defined($desc) && length($desc) > 8 * 1024;
693
694 my $lxc_raw_conf = $conf->{lxc};
695 push @$affected_cts_cgroup_keys, "CT $vmid" if (grep (@$_[0] =~ /^lxc\.cgroup\./, @$lxc_raw_conf));
696 }
697 my $vms = PVE::QemuServer::config_list();
698 for my $vmid (sort { $a <=> $b } keys %$vms) {
699 my $desc = PVE::QemuConfig->load_config($vmid)->{description};
700 push @$affected_guests_long_desc, "VM $vmid" if defined($desc) && length($desc) > 8 * 1024;
701 }
702 if (scalar($affected_guests_long_desc->@*) > 0) {
703 log_warn("Guest config description of the following virtual-guests too long for new limit of 64 KiB:\n"
704 ." " . join(", ", $affected_guests_long_desc->@*));
705 } else {
706 log_pass("All guest config descriptions fit in the new limit of 8 KiB");
707 }
708
709 log_info("Checking container configs for deprecated lxc.cgroup entries");
710
711 if (scalar($affected_cts_cgroup_keys->@*) > 0) {
712 if ($forced_legacy_cgroup) {
713 log_pass("Found legacy 'lxc.cgroup' keys, but system explicitly configured for legacy hybrid cgroup hierarchy.");
714 } else {
715 log_warn("The following CTs have 'lxc.cgroup' keys configured, which will be ignored in the new default unified cgroupv2:\n"
716 ." " . join(", ", $affected_cts_cgroup_keys->@*) ."\n"
717 ." Often it can be enough to change to the new 'lxc.cgroup2' prefix after the upgrade to Proxmox VE 7.x");
718 }
719 } else {
720 log_pass("No legacy 'lxc.cgroup' keys found.");
721 }
722 }
723
724 sub check_storage_content {
725 log_info("Checking storage content type configuration..");
726
727 my $found;
728 my $pass = 1;
729
730 my $storage_cfg = PVE::Storage::config();
731
732 for my $storeid (sort keys $storage_cfg->{ids}->%*) {
733 my $scfg = $storage_cfg->{ids}->{$storeid};
734
735 next if $scfg->{shared};
736 next if !PVE::Storage::storage_check_enabled($storage_cfg, $storeid, undef, 1);
737
738 my $valid_content = PVE::Storage::Plugin::valid_content_types($scfg->{type});
739
740 if (scalar(keys $scfg->{content}->%*) == 0 && !$valid_content->{none}) {
741 $pass = 0;
742 log_fail("storage '$storeid' does not support configured content type 'none'");
743 delete $scfg->{content}->{none}; # scan for guest images below
744 }
745
746 next if $scfg->{content}->{images};
747 next if $scfg->{content}->{rootdir};
748
749 # Skip 'iscsi(direct)' (and foreign plugins with potentially similiar behavior) with 'none',
750 # because that means "use LUNs directly" and vdisk_list() in PVE 6.x still lists those.
751 # It's enough to *not* skip 'dir', because it is the only other storage that supports 'none'
752 # and 'images' or 'rootdir', hence being potentially misconfigured.
753 next if $scfg->{type} ne 'dir' && $scfg->{content}->{none};
754
755 eval { PVE::Storage::activate_storage($storage_cfg, $storeid) };
756 if (my $err = $@) {
757 log_warn("activating '$storeid' failed - $err");
758 next;
759 }
760
761 my $res = eval { PVE::Storage::vdisk_list($storage_cfg, $storeid); };
762 if (my $err = $@) {
763 log_warn("listing images on '$storeid' failed - $err");
764 next;
765 }
766 my @volids = map { $_->{volid} } $res->{$storeid}->@*;
767
768 my $number = scalar(@volids);
769 if ($number > 0) {
770 log_info(
771 "storage '$storeid' - neither content type 'images' nor 'rootdir' configured, but"
772 ."found $number guest volume(s)"
773 );
774 }
775 }
776
777 my $check_volid = sub {
778 my ($volid, $vmid, $vmtype, $reference) = @_;
779
780 my $guesttext = $vmtype eq 'qemu' ? 'VM' : 'CT';
781 my $prefix = "$guesttext $vmid - volume '$volid' ($reference)";
782
783 my ($storeid) = PVE::Storage::parse_volume_id($volid, 1);
784 return if !defined($storeid);
785
786 my $scfg = $storage_cfg->{ids}->{$storeid};
787 if (!$scfg) {
788 $pass = 0;
789 log_warn("$prefix - storage does not exist!");
790 return;
791 }
792
793 # cannot use parse_volname for containers, as it can return 'images'
794 # but containers cannot have ISO images attached, so assume 'rootdir'
795 my $vtype = 'rootdir';
796 if ($vmtype eq 'qemu') {
797 ($vtype) = eval { PVE::Storage::parse_volname($storage_cfg, $volid); };
798 return if $@;
799 }
800
801 if (!$scfg->{content}->{$vtype}) {
802 $found = 1;
803 $pass = 0;
804 log_warn("$prefix - storage does not have content type '$vtype' configured.");
805 }
806 };
807
808 my $cts = PVE::LXC::config_list();
809 for my $vmid (sort { $a <=> $b } keys %$cts) {
810 my $conf = PVE::LXC::Config->load_config($vmid);
811
812 my $volhash = {};
813
814 my $check = sub {
815 my ($ms, $mountpoint, $reference) = @_;
816
817 my $volid = $mountpoint->{volume};
818 return if !$volid || $mountpoint->{type} ne 'volume';
819
820 return if $volhash->{$volid}; # volume might be referenced multiple times
821
822 $volhash->{$volid} = 1;
823
824 $check_volid->($volid, $vmid, 'lxc', $reference);
825 };
826
827 my $opts = { include_unused => 1 };
828 PVE::LXC::Config->foreach_volume_full($conf, $opts, $check, 'in config');
829 for my $snapname (keys $conf->{snapshots}->%*) {
830 my $snap = $conf->{snapshots}->{$snapname};
831 PVE::LXC::Config->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
832 }
833 }
834
835 my $vms = PVE::QemuServer::config_list();
836 for my $vmid (sort { $a <=> $b } keys %$vms) {
837 my $conf = PVE::QemuConfig->load_config($vmid);
838
839 my $volhash = {};
840
841 my $check = sub {
842 my ($key, $drive, $reference) = @_;
843
844 my $volid = $drive->{file};
845 return if $volid =~ m|^/|;
846 return if $volhash->{$volid}; # volume might be referenced multiple times
847
848 $volhash->{$volid} = 1;
849 $check_volid->($volid, $vmid, 'qemu', $reference);
850 };
851
852 my $opts = {
853 extra_keys => ['vmstate'],
854 include_unused => 1,
855 };
856 # startup from a suspended state works even without 'images' content type on the
857 # state storage, so do not check 'vmstate' for $conf
858 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, $check, 'in config');
859 for my $snapname (keys $conf->{snapshots}->%*) {
860 my $snap = $conf->{snapshots}->{$snapname};
861 PVE::QemuConfig->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'");
862 }
863 }
864
865 if ($found) {
866 log_warn("Proxmox VE enforces stricter content type checks since 7.0. The guests above " .
867 "might not work until the storage configuration is fixed.");
868 }
869
870 if ($pass) {
871 log_pass("no problems found");
872 }
873 }
874
875 sub check_containers_cgroup_compat {
876 if ($forced_legacy_cgroup) {
877 log_warn("System explicitly configured for legacy hybrid cgroup hierarchy.\n"
878 ." NOTE: support for the hybrid cgroup hierachy will be removed in future Proxmox VE 9 (~ 2025)."
879 );
880 }
881
882 my $supports_cgroupv2 = sub {
883 my ($conf, $rootdir, $ctid) = @_;
884
885 my $get_systemd_version = sub {
886 my ($self) = @_;
887
888 my $sd_lib_dir = -d "/lib/systemd" ? "/lib/systemd" : "/usr/lib/systemd";
889 my $libsd = PVE::Tools::dir_glob_regex($sd_lib_dir, "libsystemd-shared-.+\.so");
890 if (defined($libsd) && $libsd =~ /libsystemd-shared-(\d+)\.so/) {
891 return $1;
892 }
893
894 return undef;
895 };
896
897 my $unified_cgroupv2_support = sub {
898 my ($self) = @_;
899
900 # https://www.freedesktop.org/software/systemd/man/systemd.html
901 # systemd is installed as symlink to /sbin/init
902 my $systemd = CORE::readlink('/sbin/init');
903
904 # assume non-systemd init will run with unified cgroupv2
905 if (!defined($systemd) || $systemd !~ m@/systemd$@) {
906 return 1;
907 }
908
909 # systemd version 232 (e.g. debian stretch) supports the unified hierarchy
910 my $sdver = $get_systemd_version->();
911 if (!defined($sdver) || $sdver < 232) {
912 return 0;
913 }
914
915 return 1;
916 };
917
918 my $ostype = $conf->{ostype};
919 if (!defined($ostype)) {
920 log_warn("Found CT ($ctid) without 'ostype' set!");
921 } elsif ($ostype eq 'devuan' || $ostype eq 'alpine') {
922 return 1; # no systemd, no cgroup problems
923 }
924
925 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
926 return $lxc_setup->protected_call($unified_cgroupv2_support);
927 };
928
929 my $log_problem = sub {
930 my ($ctid) = @_;
931 my $extra = $forced_legacy_cgroup ? '' : " or set systemd.unified_cgroup_hierarchy=0 in the Proxmox VE hosts' kernel cmdline";
932 log_warn(
933 "Found at least one CT ($ctid) which does not support running in a unified cgroup v2 layout\n"
934 ." Consider upgrading the Containers distro${extra}! Skipping further CT compat checks."
935 );
936 };
937
938 my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) };
939 if ($@) {
940 log_warn("Failed to retrieve information about this node's CTs - $@");
941 return;
942 }
943
944 if (!defined($cts) || !scalar(@$cts)) {
945 log_skip("No containers on node detected.");
946 return;
947 }
948
949 my @running_cts = sort { $a <=> $b } grep { $_->{status} eq 'running' } @$cts;
950 my @offline_cts = sort { $a <=> $b } grep { $_->{status} ne 'running' } @$cts;
951
952 for my $ct (@running_cts) {
953 my $ctid = $ct->{vmid};
954 my $pid = eval { PVE::LXC::find_lxc_pid($ctid) };
955 if (my $err = $@) {
956 log_warn("Failed to get PID for running CT $ctid - $err");
957 next;
958 }
959 my $rootdir = "/proc/$pid/root";
960 my $conf = PVE::LXC::Config->load_config($ctid);
961
962 my $ret = eval { $supports_cgroupv2->($conf, $rootdir, $ctid) };
963 if (my $err = $@) {
964 log_warn("Failed to get cgroup support status for CT $ctid - $err");
965 next;
966 }
967 if (!$ret) {
968 $log_problem->($ctid);
969 return;
970 }
971 }
972
973 my $storage_cfg = PVE::Storage::config();
974 for my $ct (@offline_cts) {
975 my $ctid = $ct->{vmid};
976 my ($conf, $rootdir, $ret);
977 eval {
978 $conf = PVE::LXC::Config->load_config($ctid);
979 $rootdir = PVE::LXC::mount_all($ctid, $storage_cfg, $conf);
980 $ret = $supports_cgroupv2->($conf, $rootdir, $ctid);
981 };
982 if (my $err = $@) {
983 log_warn("Failed to load config and mount CT $ctid - $err");
984 eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
985 next;
986 }
987 if (!$ret) {
988 $log_problem->($ctid);
989 eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
990 last;
991 }
992
993 eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) };
994 }
995 };
996
997 sub check_apt_repos {
998 log_info("Checking if the suite for the Debian security repository is correct..");
999
1000 my $found = 0;
1001
1002 my $dir = '/etc/apt/sources.list.d';
1003 my $in_dir = 0;
1004
1005 # TODO: check that (original) debian and Proxmox VE mirrors are present.
1006
1007 my $check_file = sub {
1008 my ($file) = @_;
1009
1010 $file = "${dir}/${file}" if $in_dir;
1011
1012 my $raw = eval { PVE::Tools::file_get_contents($file) };
1013 return if !defined($raw);
1014 my @lines = split(/\n/, $raw);
1015
1016 my $number = 0;
1017 for my $line (@lines) {
1018 $number++;
1019
1020 next if length($line) == 0; # split would result in undef then...
1021
1022 ($line) = split(/#/, $line);
1023
1024 next if $line !~ m/^deb[[:space:]]/; # is case sensitive
1025
1026 my $suite;
1027
1028 # catch any of
1029 # https://deb.debian.org/debian-security
1030 # http://security.debian.org/debian-security
1031 # http://security.debian.org/
1032 if ($line =~ m|https?://deb\.debian\.org/debian-security/?\s+(\S*)|i) {
1033 $suite = $1;
1034 } elsif ($line =~ m|https?://security\.debian\.org(?:.*?)\s+(\S*)|i) {
1035 $suite = $1;
1036 } else {
1037 next;
1038 }
1039
1040 $found = 1;
1041
1042 my $where = "in ${file}:${number}";
1043
1044 if ($suite eq 'buster/updates') {
1045 log_info("Make sure to change the suite of the Debian security repository " .
1046 "from 'buster/updates' to 'bullseye-security' - $where");
1047 } elsif ($suite eq 'bullseye-security') {
1048 log_pass("already using 'bullseye-security'");
1049 } else {
1050 log_fail("The new suite of the Debian security repository should be " .
1051 "'bullseye-security' - $where");
1052 }
1053 }
1054 };
1055
1056 $check_file->("/etc/apt/sources.list");
1057
1058 $in_dir = 1;
1059
1060 PVE::Tools::dir_glob_foreach($dir, '^.*\.list$', $check_file);
1061
1062 if (!$found) {
1063 # only warn, it might be defined in a .sources file or in a way not catched above
1064 log_warn("No Debian security repository detected in /etc/apt/sources.list and " .
1065 "/etc/apt/sources.list.d/*.list");
1066 }
1067 }
1068
1069 sub check_time_sync {
1070 my $unit_active = sub { return $get_systemd_unit_state->($_[0], 1) eq 'active' ? $_[0] : undef };
1071
1072 log_info("Checking for supported & active NTP service..");
1073 if ($unit_active->('systemd-timesyncd.service')) {
1074 log_warn(
1075 "systemd-timesyncd is not the best choice for time-keeping on servers, due to only applying"
1076 ." updates on boot.\n While not necesarry for the upgrade it's recommended to use one of:\n"
1077 ." * chrony (Default in new Proxmox VE installations)\n * ntpsec\n * openntpd\n"
1078 );
1079 } elsif ($unit_active->('ntp.service')) {
1080 log_info("Debian deprecated and removed the ntp package for Bookworm, but the system"
1081 ." will automatically migrate to the 'ntpsec' replacement package on upgrade.");
1082 } elsif (my $active_ntp = ($unit_active->('chrony.service') || $unit_active->('openntpd.service') || $unit_active->('ntpsec.service'))) {
1083 log_pass("Detected active time synchronisation unit '$active_ntp'");
1084 } else {
1085 log_warn(
1086 "No (active) time synchronisation daemon (NTP) detected, but synchronized systems are important,"
1087 ." especially for cluster and/or ceph!"
1088 );
1089 }
1090 }
1091
1092 sub check_misc {
1093 print_header("MISCELLANEOUS CHECKS");
1094 my $ssh_config = eval { PVE::Tools::file_get_contents('/root/.ssh/config') };
1095 if (defined($ssh_config)) {
1096 log_fail("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1")
1097 if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m;
1098 } else {
1099 log_skip("No SSH config file found.");
1100 }
1101
1102 log_info("Checking common daemon services..");
1103 $log_systemd_unit_state->('pveproxy.service');
1104 $log_systemd_unit_state->('pvedaemon.service');
1105 $log_systemd_unit_state->('pvescheduler.service');
1106 $log_systemd_unit_state->('pvestatd.service');
1107
1108 check_time_sync();
1109
1110 my $root_free = PVE::Tools::df('/', 10);
1111 log_warn("Less than 5 GB free space on root file system.")
1112 if defined($root_free) && $root_free->{avail} < 5 * 1000*1000*1000;
1113
1114 log_info("Checking for running guests..");
1115 my $running_guests = 0;
1116
1117 my $vms = eval { PVE::API2::Qemu->vmlist({ node => $nodename }) };
1118 log_warn("Failed to retrieve information about this node's VMs - $@") if $@;
1119 $running_guests += grep { $_->{status} eq 'running' } @$vms if defined($vms);
1120
1121 my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) };
1122 log_warn("Failed to retrieve information about this node's CTs - $@") if $@;
1123 $running_guests += grep { $_->{status} eq 'running' } @$cts if defined($cts);
1124
1125 if ($running_guests > 0) {
1126 log_warn("$running_guests running guest(s) detected - consider migrating or stopping them.")
1127 } else {
1128 log_pass("no running guest detected.")
1129 }
1130
1131 log_info("Checking if the local node's hostname '$nodename' is resolvable..");
1132 my $local_ip = eval { PVE::Network::get_ip_from_hostname($nodename) };
1133 if ($@) {
1134 log_warn("Failed to resolve hostname '$nodename' to IP - $@");
1135 } else {
1136 log_info("Checking if resolved IP is configured on local node..");
1137 my $cidr = Net::IP::ip_is_ipv6($local_ip) ? "$local_ip/128" : "$local_ip/32";
1138 my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
1139 my $ip_count = scalar(@$configured_ips);
1140
1141 if ($ip_count <= 0) {
1142 log_fail("Resolved node IP '$local_ip' not configured or active for '$nodename'");
1143 } elsif ($ip_count > 1) {
1144 log_warn("Resolved node IP '$local_ip' active on multiple ($ip_count) interfaces!");
1145 } else {
1146 log_pass("Resolved node IP '$local_ip' configured and active on single interface.");
1147 }
1148 }
1149
1150 log_info("Check node certificate's RSA key size");
1151 my $certs = PVE::API2::Certificates->info({ node => $nodename });
1152 my $certs_check = {
1153 'rsaEncryption' => {
1154 minsize => 2048,
1155 name => 'RSA',
1156 },
1157 'id-ecPublicKey' => {
1158 minsize => 224,
1159 name => 'ECC',
1160 },
1161 };
1162
1163 my $certs_check_failed = 0;
1164 foreach my $cert (@$certs) {
1165 my ($type, $size, $fn) = $cert->@{qw(public-key-type public-key-bits filename)};
1166
1167 if (!defined($type) || !defined($size)) {
1168 log_warn("'$fn': cannot check certificate, failed to get it's type or size!");
1169 }
1170
1171 my $check = $certs_check->{$type};
1172 if (!defined($check)) {
1173 log_warn("'$fn': certificate's public key type '$type' unknown!");
1174 next;
1175 }
1176
1177 if ($size < $check->{minsize}) {
1178 log_fail("'$fn', certificate's $check->{name} public key size is less than 2048 bit");
1179 $certs_check_failed = 1;
1180 } else {
1181 log_pass("Certificate '$fn' passed Debian Busters (and newer) security level for TLS connections ($size >= 2048)");
1182 }
1183 }
1184
1185 check_backup_retention_settings();
1186 check_cifs_credential_location();
1187 check_custom_pool_roles();
1188 check_node_and_guest_configurations();
1189 check_apt_repos();
1190 }
1191
1192 my sub colored_if {
1193 my ($str, $color, $condition) = @_;
1194 return "". ($condition ? colored($str, $color) : $str);
1195 }
1196
1197 __PACKAGE__->register_method ({
1198 name => 'checklist',
1199 path => 'checklist',
1200 method => 'GET',
1201 description => 'Check (pre-/post-)upgrade conditions.',
1202 parameters => {
1203 additionalProperties => 0,
1204 properties => {
1205 full => {
1206 description => 'perform additional, expensive checks.',
1207 type => 'boolean',
1208 optional => 1,
1209 default => 0,
1210 },
1211 },
1212 },
1213 returns => { type => 'null' },
1214 code => sub {
1215 my ($param) = @_;
1216
1217 my $kernel_cli = PVE::Tools::file_get_contents('/proc/cmdline');
1218 if ($kernel_cli =~ /systemd.unified_cgroup_hierarchy=0/){
1219 $forced_legacy_cgroup = 1;
1220 }
1221
1222 check_pve_packages();
1223 check_cluster_corosync();
1224 check_ceph();
1225 check_storage_health();
1226 check_misc();
1227
1228 if ($param->{full}) {
1229 check_containers_cgroup_compat();
1230 } else {
1231 log_skip("NOTE: Expensive checks, like CT cgroupv2 compat, not performed without '--full' parameter");
1232 }
1233
1234 print_header("SUMMARY");
1235
1236 my $total = 0;
1237 $total += $_ for values %$counters;
1238
1239 print "TOTAL: $total\n";
1240 print colored("PASSED: $counters->{pass}\n", 'green');
1241 print "SKIPPED: $counters->{skip}\n";
1242 print colored_if("WARNINGS: $counters->{warn}\n", 'yellow', $counters->{warn} > 0);
1243 print colored_if("FAILURES: $counters->{fail}\n", 'red', $counters->{fail} > 0);
1244
1245 if ($counters->{warn} > 0 || $counters->{fail} > 0) {
1246 my $color = $counters->{fail} > 0 ? 'red' : 'yellow';
1247 print colored("\nATTENTION: Please check the output for detailed information!\n", $color);
1248 print colored("Try to solve the problems one at a time and then run this checklist tool again.\n", $color) if $counters->{fail} > 0;
1249 }
1250
1251 return undef;
1252 }});
1253
1254 our $cmddef = [ __PACKAGE__, 'checklist', [], {}];
1255
1256 1;