]>
Commit | Line | Data |
---|---|---|
a98193c2 TL |
1 | package PVE::CLI::pve7to8; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
eed1e93e TL |
6 | use Cwd (); |
7 | ||
a98193c2 TL |
8 | use PVE::API2::APT; |
9 | use PVE::API2::Ceph; | |
10 | use PVE::API2::LXC; | |
11 | use PVE::API2::Qemu; | |
12 | use PVE::API2::Certificates; | |
13 | use PVE::API2::Cluster::Ceph; | |
14 | ||
15 | use PVE::AccessControl; | |
16 | use PVE::Ceph::Tools; | |
17 | use PVE::Cluster; | |
18 | use PVE::Corosync; | |
19 | use PVE::INotify; | |
20 | use PVE::JSONSchema; | |
21 | use PVE::NodeConfig; | |
22 | use PVE::RPCEnvironment; | |
23 | use PVE::Storage; | |
24 | use PVE::Storage::Plugin; | |
eed1e93e | 25 | use PVE::Tools qw(run_command split_list file_get_contents); |
a98193c2 TL |
26 | use PVE::QemuConfig; |
27 | use PVE::QemuServer; | |
28 | use PVE::VZDump::Common; | |
29 | use PVE::LXC; | |
30 | use PVE::LXC::Config; | |
31 | use PVE::LXC::Setup; | |
32 | ||
33 | use Term::ANSIColor; | |
34 | ||
35 | use PVE::CLIHandler; | |
36 | ||
37 | use base qw(PVE::CLIHandler); | |
38 | ||
39 | my $nodename = PVE::INotify::nodename(); | |
40 | ||
eed1e93e TL |
41 | my $upgraded = 0; # set in check_pve_packages |
42 | ||
a98193c2 TL |
43 | sub setup_environment { |
44 | PVE::RPCEnvironment->setup_default_cli_env(); | |
45 | } | |
46 | ||
47 | my ($min_pve_major, $min_pve_minor, $min_pve_pkgrel) = (7, 4, 1); | |
48 | ||
49 | my $ceph_release2code = { | |
50 | '12' => 'Luminous', | |
51 | '13' => 'Mimic', | |
52 | '14' => 'Nautilus', | |
53 | '15' => 'Octopus', | |
54 | '16' => 'Pacific', | |
55 | '17' => 'Quincy', | |
56 | '18' => 'Reef', | |
57 | }; | |
58 | my $ceph_supported_release = 17; # the version we support for upgrading (i.e., available on both) | |
59 | my $ceph_supported_code_name = $ceph_release2code->{"$ceph_supported_release"} | |
60 | or die "inconsistent source code, could not map expected ceph version to code name!"; | |
61 | ||
62 | my $forced_legacy_cgroup = 0; | |
63 | ||
64 | my $counters = { | |
65 | pass => 0, | |
66 | skip => 0, | |
67 | warn => 0, | |
68 | fail => 0, | |
69 | }; | |
70 | ||
71 | my $log_line = sub { | |
72 | my ($level, $line) = @_; | |
73 | ||
74 | $counters->{$level}++ if defined($level) && defined($counters->{$level}); | |
75 | ||
76 | print uc($level), ': ' if defined($level); | |
77 | print "$line\n"; | |
78 | }; | |
79 | ||
80 | sub log_pass { | |
81 | print color('green'); | |
82 | $log_line->('pass', @_); | |
83 | print color('reset'); | |
84 | } | |
85 | ||
86 | sub log_info { | |
87 | $log_line->('info', @_); | |
88 | } | |
89 | sub log_skip { | |
90 | $log_line->('skip', @_); | |
91 | } | |
92 | sub log_warn { | |
93 | print color('yellow'); | |
94 | $log_line->('warn', @_); | |
95 | print color('reset'); | |
96 | } | |
97 | sub log_fail { | |
98 | print color('bold red'); | |
99 | $log_line->('fail', @_); | |
100 | print color('reset'); | |
101 | } | |
102 | ||
103 | my $print_header_first = 1; | |
104 | sub print_header { | |
105 | my ($h) = @_; | |
106 | print "\n" if !$print_header_first; | |
107 | print "= $h =\n\n"; | |
108 | $print_header_first = 0; | |
109 | } | |
110 | ||
111 | my $get_systemd_unit_state = sub { | |
112 | my ($unit, $surpress_stderr) = @_; | |
113 | ||
114 | my $state; | |
115 | my $filter_output = sub { | |
116 | $state = shift; | |
117 | chomp $state; | |
118 | }; | |
119 | ||
120 | my %extra = (outfunc => $filter_output, noerr => 1); | |
121 | $extra{errfunc} = sub { } if $surpress_stderr; | |
122 | ||
123 | eval { | |
124 | run_command(['systemctl', 'is-enabled', "$unit"], %extra); | |
125 | return if !defined($state); | |
126 | run_command(['systemctl', 'is-active', "$unit"], %extra); | |
127 | }; | |
128 | ||
129 | return $state // 'unknown'; | |
130 | }; | |
131 | my $log_systemd_unit_state = sub { | |
132 | my ($unit, $no_fail_on_inactive) = @_; | |
133 | ||
134 | my $log_method = \&log_warn; | |
135 | ||
136 | my $state = $get_systemd_unit_state->($unit); | |
137 | if ($state eq 'active') { | |
138 | $log_method = \&log_pass; | |
139 | } elsif ($state eq 'inactive') { | |
140 | $log_method = $no_fail_on_inactive ? \&log_warn : \&log_fail; | |
141 | } elsif ($state eq 'failed') { | |
142 | $log_method = \&log_fail; | |
143 | } | |
144 | ||
145 | $log_method->("systemd unit '$unit' is in state '$state'"); | |
146 | }; | |
147 | ||
148 | my $versions; | |
149 | my $get_pkg = sub { | |
150 | my ($pkg) = @_; | |
151 | ||
152 | $versions = eval { PVE::API2::APT->versions({ node => $nodename }) } if !defined($versions); | |
153 | ||
154 | if (!defined($versions)) { | |
155 | my $msg = "unable to retrieve package version information"; | |
156 | $msg .= "- $@" if $@; | |
157 | log_fail("$msg"); | |
158 | return undef; | |
159 | } | |
160 | ||
161 | my $pkgs = [ grep { $_->{Package} eq $pkg } @$versions ]; | |
162 | if (!defined $pkgs || $pkgs == 0) { | |
163 | log_fail("unable to determine installed $pkg version."); | |
164 | return undef; | |
165 | } else { | |
166 | return $pkgs->[0]; | |
167 | } | |
168 | }; | |
169 | ||
170 | sub check_pve_packages { | |
171 | print_header("CHECKING VERSION INFORMATION FOR PVE PACKAGES"); | |
172 | ||
173 | print "Checking for package updates..\n"; | |
174 | my $updates = eval { PVE::API2::APT->list_updates({ node => $nodename }); }; | |
175 | if (!defined($updates)) { | |
176 | log_warn("$@") if $@; | |
177 | log_fail("unable to retrieve list of package updates!"); | |
178 | } elsif (@$updates > 0) { | |
179 | my $pkgs = join(', ', map { $_->{Package} } @$updates); | |
180 | log_warn("updates for the following packages are available:\n $pkgs"); | |
181 | } else { | |
eed1e93e | 182 | log_pass("all packages up-to-date"); |
a98193c2 TL |
183 | } |
184 | ||
185 | print "\nChecking proxmox-ve package version..\n"; | |
186 | if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) { | |
45d1707c | 187 | # TODO: update to native version for pve8to9 |
a98193c2 TL |
188 | my $min_pve_ver = "$min_pve_major.$min_pve_minor-$min_pve_pkgrel"; |
189 | ||
45d1707c | 190 | my ($maj, $min, $pkgrel) = $proxmox_ve->{OldVersion} =~ m/^(\d+)\.(\d+)[.-](\d+)/; |
a98193c2 | 191 | |
a98193c2 TL |
192 | if ($maj > $min_pve_major) { |
193 | log_pass("already upgraded to Proxmox VE " . ($min_pve_major + 1)); | |
194 | $upgraded = 1; | |
195 | } elsif ($maj >= $min_pve_major && $min >= $min_pve_minor && $pkgrel >= $min_pve_pkgrel) { | |
196 | log_pass("proxmox-ve package has version >= $min_pve_ver"); | |
197 | } else { | |
198 | log_fail("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!"); | |
199 | } | |
200 | ||
201 | my ($krunning, $kinstalled) = (qr/6\.(?:2|5)/, 'pve-kernel-6.2'); | |
202 | if (!$upgraded) { | |
203 | # we got a few that avoided 5.15 in cluster with mixed CPUs, so allow older too | |
204 | ($krunning, $kinstalled) = (qr/(?:5\.(?:13|15)|6\.2)/, 'pve-kernel-5.15'); | |
205 | } | |
206 | ||
207 | print "\nChecking running kernel version..\n"; | |
208 | my $kernel_ver = $proxmox_ve->{RunningKernel}; | |
209 | if (!defined($kernel_ver)) { | |
210 | log_fail("unable to determine running kernel version."); | |
211 | } elsif ($kernel_ver =~ /^$krunning/) { | |
212 | if ($upgraded) { | |
213 | log_pass("running new kernel '$kernel_ver' after upgrade."); | |
214 | } else { | |
215 | log_pass("running kernel '$kernel_ver' is considered suitable for upgrade."); | |
216 | } | |
217 | } elsif ($get_pkg->($kinstalled)) { | |
218 | # with 6.2 kernel being available in both we might want to fine-tune the check? | |
219 | log_warn("a suitable kernel ($kinstalled) is intalled, but an unsuitable ($kernel_ver) is booted, missing reboot?!"); | |
220 | } else { | |
221 | log_warn("unexpected running and installed kernel '$kernel_ver'."); | |
222 | } | |
223 | ||
224 | if ($upgraded && $kernel_ver =~ /^$krunning/) { | |
225 | my $outdated_kernel_meta_pkgs = []; | |
226 | for my $kernel_meta_version ('5.4', '5.11', '5.13', '5.15') { | |
227 | my $pkg = "pve-kernel-${kernel_meta_version}"; | |
228 | if ($get_pkg->($pkg)) { | |
229 | push @$outdated_kernel_meta_pkgs, $pkg; | |
230 | } | |
231 | } | |
232 | if (scalar(@$outdated_kernel_meta_pkgs) > 0) { | |
233 | log_info( | |
234 | "Found outdated kernel meta-packages, taking up extra space on boot partitions.\n" | |
235 | ." After a successful upgrade, you can remove them using this command:\n" | |
236 | ." apt remove " . join(' ', $outdated_kernel_meta_pkgs->@*) | |
237 | ); | |
238 | } | |
239 | } | |
240 | } else { | |
241 | log_fail("proxmox-ve package not found!"); | |
242 | } | |
243 | } | |
244 | ||
245 | ||
246 | sub check_storage_health { | |
247 | print_header("CHECKING CONFIGURED STORAGES"); | |
248 | my $cfg = PVE::Storage::config(); | |
249 | ||
250 | my $ctime = time(); | |
251 | ||
252 | my $info = PVE::Storage::storage_info($cfg); | |
253 | ||
254 | foreach my $storeid (sort keys %$info) { | |
255 | my $d = $info->{$storeid}; | |
256 | if ($d->{enabled}) { | |
257 | if ($d->{active}) { | |
258 | log_pass("storage '$storeid' enabled and active."); | |
259 | } else { | |
260 | log_warn("storage '$storeid' enabled but not active!"); | |
261 | } | |
262 | } else { | |
263 | log_skip("storage '$storeid' disabled."); | |
264 | } | |
265 | } | |
266 | ||
267 | check_storage_content(); | |
eed1e93e TL |
268 | eval { check_storage_content_dirs() }; |
269 | log_fail("failed to check storage content directories - $@") if $@; | |
a98193c2 TL |
270 | } |
271 | ||
272 | sub check_cluster_corosync { | |
273 | print_header("CHECKING CLUSTER HEALTH/SETTINGS"); | |
274 | ||
275 | if (!PVE::Corosync::check_conf_exists(1)) { | |
276 | log_skip("standalone node."); | |
277 | return; | |
278 | } | |
279 | ||
280 | $log_systemd_unit_state->('pve-cluster.service'); | |
281 | $log_systemd_unit_state->('corosync.service'); | |
282 | ||
283 | if (PVE::Cluster::check_cfs_quorum(1)) { | |
284 | log_pass("Cluster Filesystem is quorate."); | |
285 | } else { | |
286 | log_fail("Cluster Filesystem readonly, lost quorum?!"); | |
287 | } | |
288 | ||
289 | my $conf = PVE::Cluster::cfs_read_file('corosync.conf'); | |
290 | my $conf_nodelist = PVE::Corosync::nodelist($conf); | |
291 | my $node_votes = 0; | |
292 | ||
293 | print "\nAnalzying quorum settings and state..\n"; | |
294 | if (!defined($conf_nodelist)) { | |
295 | log_fail("unable to retrieve nodelist from corosync.conf"); | |
296 | } else { | |
297 | if (grep { $conf_nodelist->{$_}->{quorum_votes} != 1 } keys %$conf_nodelist) { | |
298 | log_warn("non-default quorum_votes distribution detected!"); | |
299 | } | |
300 | map { $node_votes += $conf_nodelist->{$_}->{quorum_votes} // 0 } keys %$conf_nodelist; | |
301 | } | |
302 | ||
303 | my ($expected_votes, $total_votes); | |
304 | my $filter_output = sub { | |
305 | my $line = shift; | |
306 | ($expected_votes) = $line =~ /^Expected votes:\s*(\d+)\s*$/ | |
307 | if !defined($expected_votes); | |
308 | ($total_votes) = $line =~ /^Total votes:\s*(\d+)\s*$/ | |
309 | if !defined($total_votes); | |
310 | }; | |
311 | eval { | |
312 | run_command(['corosync-quorumtool', '-s'], outfunc => $filter_output, noerr => 1); | |
313 | }; | |
314 | ||
315 | if (!defined($expected_votes)) { | |
316 | log_fail("unable to get expected number of votes, assuming 0."); | |
317 | $expected_votes = 0; | |
318 | } | |
319 | if (!defined($total_votes)) { | |
320 | log_fail("unable to get expected number of votes, assuming 0."); | |
321 | $total_votes = 0; | |
322 | } | |
323 | ||
324 | my $cfs_nodelist = PVE::Cluster::get_clinfo()->{nodelist}; | |
325 | my $offline_nodes = grep { $cfs_nodelist->{$_}->{online} != 1 } keys %$cfs_nodelist; | |
326 | if ($offline_nodes > 0) { | |
327 | log_fail("$offline_nodes nodes are offline!"); | |
328 | } | |
329 | ||
330 | my $qdevice_votes = 0; | |
331 | if (my $qdevice_setup = $conf->{main}->{quorum}->{device}) { | |
332 | $qdevice_votes = $qdevice_setup->{votes} // 1; | |
333 | } | |
334 | ||
335 | log_info("configured votes - nodes: $node_votes"); | |
336 | log_info("configured votes - qdevice: $qdevice_votes"); | |
337 | log_info("current expected votes: $expected_votes"); | |
338 | log_info("current total votes: $total_votes"); | |
339 | ||
340 | log_warn("expected votes set to non-standard value '$expected_votes'.") | |
341 | if $expected_votes != $node_votes + $qdevice_votes; | |
342 | log_warn("total votes < expected votes: $total_votes/$expected_votes!") | |
343 | if $total_votes < $expected_votes; | |
344 | ||
345 | my $conf_nodelist_count = scalar(keys %$conf_nodelist); | |
346 | my $cfs_nodelist_count = scalar(keys %$cfs_nodelist); | |
347 | log_warn("cluster consists of less than three quorum-providing nodes!") | |
348 | if $conf_nodelist_count < 3 && $conf_nodelist_count + $qdevice_votes < 3; | |
349 | ||
350 | log_fail("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.") | |
351 | if $conf_nodelist_count != $cfs_nodelist_count; | |
352 | ||
353 | print "\nChecking nodelist entries..\n"; | |
354 | my $nodelist_pass = 1; | |
355 | for my $cs_node (sort keys %$conf_nodelist) { | |
356 | my $entry = $conf_nodelist->{$cs_node}; | |
357 | if (!defined($entry->{name})) { | |
358 | $nodelist_pass = 0; | |
359 | log_fail("$cs_node: no name entry in corosync.conf."); | |
360 | } | |
361 | if (!defined($entry->{nodeid})) { | |
362 | $nodelist_pass = 0; | |
363 | log_fail("$cs_node: no nodeid configured in corosync.conf."); | |
364 | } | |
365 | my $gotLinks = 0; | |
366 | for my $link (0..7) { | |
367 | $gotLinks++ if defined($entry->{"ring${link}_addr"}); | |
368 | } | |
369 | if ($gotLinks <= 0) { | |
370 | $nodelist_pass = 0; | |
371 | log_fail("$cs_node: no ringX_addr (0 <= X <= 7) link defined in corosync.conf."); | |
372 | } | |
373 | ||
374 | my $verify_ring_ip = sub { | |
375 | my $key = shift; | |
376 | if (defined(my $ring = $entry->{$key})) { | |
377 | my ($resolved_ip, undef) = PVE::Corosync::resolve_hostname_like_corosync($ring, $conf); | |
378 | if (defined($resolved_ip)) { | |
379 | if ($resolved_ip ne $ring) { | |
380 | $nodelist_pass = 0; | |
381 | log_warn( | |
382 | "$cs_node: $key '$ring' resolves to '$resolved_ip'.\n" | |
383 | ." Consider replacing it with the currently resolved IP address." | |
384 | ); | |
385 | } | |
386 | } else { | |
387 | $nodelist_pass = 0; | |
388 | log_fail( | |
389 | "$cs_node: unable to resolve $key '$ring' to an IP address according to Corosync's" | |
390 | ." resolve strategy - cluster will potentially fail with Corosync 3.x/kronosnet!" | |
391 | ); | |
392 | } | |
393 | } | |
394 | }; | |
395 | for my $link (0..7) { | |
396 | $verify_ring_ip->("ring${link}_addr"); | |
397 | } | |
398 | } | |
399 | log_pass("nodelist settings OK") if $nodelist_pass; | |
400 | ||
401 | print "\nChecking totem settings..\n"; | |
402 | my $totem = $conf->{main}->{totem}; | |
403 | my $totem_pass = 1; | |
404 | ||
405 | my $transport = $totem->{transport}; | |
406 | if (defined($transport)) { | |
407 | if ($transport ne 'knet') { | |
408 | $totem_pass = 0; | |
409 | log_fail("Corosync transport explicitly set to '$transport' instead of implicit default!"); | |
410 | } | |
411 | } | |
412 | ||
413 | # TODO: are those values still up-to-date? | |
414 | if ((!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none')) { | |
415 | $totem_pass = 0; | |
416 | log_fail("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!"); | |
417 | } elsif (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') { | |
418 | $totem_pass = 0; | |
419 | log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!"); # FIXME: can be removed? | |
420 | } | |
421 | ||
422 | log_pass("totem settings OK") if $totem_pass; | |
423 | print "\n"; | |
424 | log_info("run 'pvecm status' to get detailed cluster status.."); | |
425 | ||
426 | if (defined(my $corosync = $get_pkg->('corosync'))) { | |
427 | if ($corosync->{OldVersion} =~ m/^2\./) { | |
428 | log_fail("\ncorosync 2.x installed, cluster-wide upgrade to 3.x needed!"); | |
429 | } elsif ($corosync->{OldVersion} !~ m/^3\./) { | |
430 | log_fail("\nunexpected corosync version installed: $corosync->{OldVersion}!"); | |
431 | } | |
432 | } | |
433 | } | |
434 | ||
435 | sub check_ceph { | |
436 | print_header("CHECKING HYPER-CONVERGED CEPH STATUS"); | |
437 | ||
438 | if (PVE::Ceph::Tools::check_ceph_inited(1)) { | |
439 | log_info("hyper-converged ceph setup detected!"); | |
440 | } else { | |
441 | log_skip("no hyper-converged ceph setup detected!"); | |
442 | return; | |
443 | } | |
444 | ||
445 | log_info("getting Ceph status/health information.."); | |
446 | my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); }; | |
447 | my $noout = eval { PVE::API2::Cluster::Ceph->get_flag({ flag => "noout" }); }; | |
448 | if ($@) { | |
449 | log_fail("failed to get 'noout' flag status - $@"); | |
450 | } | |
451 | ||
452 | my $noout_wanted = 1; | |
453 | ||
454 | if (!$ceph_status || !$ceph_status->{health}) { | |
455 | log_fail("unable to determine Ceph status!"); | |
456 | } else { | |
457 | my $ceph_health = $ceph_status->{health}->{status}; | |
458 | if (!$ceph_health) { | |
459 | log_fail("unable to determine Ceph health!"); | |
460 | } elsif ($ceph_health eq 'HEALTH_OK') { | |
461 | log_pass("Ceph health reported as 'HEALTH_OK'."); | |
462 | } elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health}->{checks}} == 1)) { | |
463 | log_pass("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set."); | |
464 | } else { | |
465 | log_warn( | |
466 | "Ceph health reported as '$ceph_health'.\n Use the PVE dashboard or 'ceph -s'" | |
467 | ." to determine the specific issues and try to resolve them." | |
468 | ); | |
469 | } | |
470 | } | |
471 | ||
472 | # TODO: check OSD min-required version, if to low it breaks stuff! | |
473 | ||
eed1e93e | 474 | log_info("checking local Ceph version.."); |
a98193c2 TL |
475 | if (my $release = eval { PVE::Ceph::Tools::get_local_version(1) }) { |
476 | my $code_name = $ceph_release2code->{"$release"} || 'unknown'; | |
477 | if ($release == $ceph_supported_release) { | |
478 | log_pass("found expected Ceph $ceph_supported_release $ceph_supported_code_name release.") | |
479 | } elsif ($release > $ceph_supported_release) { | |
480 | log_warn( | |
481 | "found newer Ceph release $release $code_name as the expected $ceph_supported_release" | |
482 | ." $ceph_supported_code_name, installed third party repos?!" | |
483 | ) | |
484 | } else { | |
485 | log_fail( | |
486 | "Hyper-converged Ceph $release $code_name is to old for upgrade!\n" | |
487 | ." Upgrade Ceph first to $ceph_supported_code_name following our how-to:\n" | |
488 | ." <https://pve.proxmox.com/wiki/Category:Ceph_Upgrade>" | |
489 | ); | |
490 | } | |
491 | } else { | |
492 | log_fail("unable to determine local Ceph version!"); | |
493 | } | |
494 | ||
495 | log_info("getting Ceph daemon versions.."); | |
496 | my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); }; | |
497 | if (!$ceph_versions) { | |
498 | log_fail("unable to determine Ceph daemon versions!"); | |
499 | } else { | |
500 | my $services = [ | |
501 | { 'key' => 'mon', 'name' => 'monitor' }, | |
502 | { 'key' => 'mgr', 'name' => 'manager' }, | |
503 | { 'key' => 'mds', 'name' => 'MDS' }, | |
504 | { 'key' => 'osd', 'name' => 'OSD' }, | |
505 | ]; | |
506 | ||
eed1e93e TL |
507 | my $ceph_versions_simple = {}; |
508 | my $ceph_versions_commits = {}; | |
509 | for my $type (keys %$ceph_versions) { | |
510 | for my $full_version (keys $ceph_versions->{$type}->%*) { | |
511 | if ($full_version =~ m/^(.*) \((.*)\).*\(.*\)$/) { | |
512 | # String is in the form of | |
513 | # ceph version 17.2.6 (810db68029296377607028a6c6da1ec06f5a2b27) quincy (stable) | |
514 | # only check the first part, e.g. 'ceph version 17.2.6', the commit hash can | |
515 | # be different | |
516 | $ceph_versions_simple->{$type}->{$1} = 1; | |
517 | $ceph_versions_commits->{$type}->{$2} = 1; | |
518 | } | |
519 | } | |
520 | } | |
521 | ||
a98193c2 TL |
522 | foreach my $service (@$services) { |
523 | my ($name, $key) = $service->@{'name', 'key'}; | |
eed1e93e | 524 | if (my $service_versions = $ceph_versions_simple->{$key}) { |
a98193c2 TL |
525 | if (keys %$service_versions == 0) { |
526 | log_skip("no running instances detected for daemon type $name."); | |
527 | } elsif (keys %$service_versions == 1) { | |
528 | log_pass("single running version detected for daemon type $name."); | |
529 | } else { | |
530 | log_warn("multiple running versions detected for daemon type $name!"); | |
531 | } | |
532 | } else { | |
533 | log_skip("unable to determine versions of running Ceph $name instances."); | |
534 | } | |
eed1e93e TL |
535 | my $service_commits = $ceph_versions_commits->{$key}; |
536 | log_info("different builds of same version detected for an $name. Are you in the middle of the upgrade?") | |
537 | if $service_commits && keys %$service_commits > 1; | |
a98193c2 TL |
538 | } |
539 | ||
540 | my $overall_versions = $ceph_versions->{overall}; | |
541 | if (!$overall_versions) { | |
542 | log_warn("unable to determine overall Ceph daemon versions!"); | |
543 | } elsif (keys %$overall_versions == 1) { | |
544 | log_pass("single running overall version detected for all Ceph daemon types."); | |
1f8f0272 | 545 | # TODO: needs to be set to 1 in the stable branch each time! - find better solution? |
a98193c2 | 546 | $noout_wanted = 0; # off post-upgrade, on pre-upgrade |
eed1e93e | 547 | } elsif (keys $ceph_versions_simple->{overall}->%* != 1) { |
a98193c2 TL |
548 | log_warn("overall version mismatch detected, check 'ceph versions' output for details!"); |
549 | } | |
550 | } | |
551 | ||
552 | if ($noout) { | |
553 | if ($noout_wanted) { | |
554 | log_pass("'noout' flag set to prevent rebalancing during cluster-wide upgrades."); | |
555 | } else { | |
556 | log_warn("'noout' flag set, Ceph cluster upgrade seems finished."); | |
557 | } | |
558 | } elsif ($noout_wanted) { | |
559 | log_warn("'noout' flag not set - recommended to prevent rebalancing during upgrades."); | |
560 | } | |
561 | ||
562 | log_info("checking Ceph config.."); | |
563 | my $conf = PVE::Cluster::cfs_read_file('ceph.conf'); | |
564 | if (%$conf) { | |
565 | my $global = $conf->{global}; | |
566 | ||
567 | my $global_monhost = $global->{mon_host} // $global->{"mon host"} // $global->{"mon-host"}; | |
568 | if (!defined($global_monhost)) { | |
569 | log_warn( | |
570 | "No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with" | |
571 | ." all monitor addresses (without ports) to the global section." | |
572 | ); | |
573 | } | |
574 | ||
575 | my $ipv6 = $global->{ms_bind_ipv6} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"}; | |
576 | if ($ipv6) { | |
577 | my $ipv4 = $global->{ms_bind_ipv4} // $global->{"ms bind ipv4"} // $global->{"ms-bind-ipv4"}; | |
578 | if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) { | |
579 | log_warn( | |
580 | "'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to" | |
581 | ." disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network." | |
582 | ); | |
583 | } | |
584 | } | |
585 | ||
586 | if (defined($global->{keyring})) { | |
587 | log_warn( | |
588 | "[global] config section contains 'keyring' option, which will prevent services from" | |
589 | ." starting with Nautilus.\n Move 'keyring' option to [client] section instead." | |
590 | ); | |
591 | } | |
592 | ||
593 | } else { | |
594 | log_warn("Empty ceph config found"); | |
595 | } | |
596 | ||
597 | my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1); | |
598 | if (defined($local_ceph_ver)) { | |
599 | if ($local_ceph_ver <= 14) { | |
600 | log_fail("local Ceph version too low, at least Octopus required.."); | |
601 | } | |
602 | } else { | |
603 | log_fail("unable to determine local Ceph version."); | |
604 | } | |
605 | } | |
606 | ||
607 | sub check_backup_retention_settings { | |
608 | log_info("Checking backup retention settings.."); | |
609 | ||
610 | my $pass = 1; | |
611 | ||
612 | my $node_has_retention; | |
613 | ||
614 | my $maxfiles_msg = "parameter 'maxfiles' is deprecated with PVE 7.x and will be removed in a " . | |
615 | "future version, use 'prune-backups' instead."; | |
616 | ||
617 | eval { | |
618 | my $confdesc = PVE::VZDump::Common::get_confdesc(); | |
619 | ||
620 | my $fn = "/etc/vzdump.conf"; | |
621 | my $raw = PVE::Tools::file_get_contents($fn); | |
622 | ||
623 | my $conf_schema = { type => 'object', properties => $confdesc, }; | |
624 | my $param = PVE::JSONSchema::parse_config($conf_schema, $fn, $raw); | |
625 | ||
626 | if (defined($param->{maxfiles})) { | |
627 | $pass = 0; | |
628 | log_warn("$fn - $maxfiles_msg"); | |
629 | } | |
630 | ||
631 | $node_has_retention = defined($param->{maxfiles}) || defined($param->{'prune-backups'}); | |
632 | }; | |
633 | if (my $err = $@) { | |
634 | $pass = 0; | |
635 | log_warn("unable to parse node's VZDump configuration - $err"); | |
636 | } | |
637 | ||
638 | my $storage_cfg = PVE::Storage::config(); | |
639 | ||
640 | for my $storeid (keys $storage_cfg->{ids}->%*) { | |
641 | my $scfg = $storage_cfg->{ids}->{$storeid}; | |
642 | ||
643 | if (defined($scfg->{maxfiles})) { | |
644 | $pass = 0; | |
645 | log_warn("storage '$storeid' - $maxfiles_msg"); | |
646 | } | |
647 | ||
648 | next if !$scfg->{content}->{backup}; | |
649 | next if defined($scfg->{maxfiles}) || defined($scfg->{'prune-backups'}); | |
650 | next if $node_has_retention; | |
651 | ||
652 | log_info( | |
653 | "storage '$storeid' - no backup retention settings defined - by default, since PVE 7.0" | |
654 | ." it will no longer keep only the last backup, but all backups" | |
655 | ); | |
656 | } | |
657 | ||
658 | eval { | |
659 | my $vzdump_cron = PVE::Cluster::cfs_read_file('vzdump.cron'); | |
660 | ||
661 | # only warn once, there might be many jobs... | |
662 | if (scalar(grep { defined($_->{maxfiles}) } $vzdump_cron->{jobs}->@*)) { | |
663 | $pass = 0; | |
664 | log_warn("/etc/pve/vzdump.cron - $maxfiles_msg"); | |
665 | } | |
666 | }; | |
667 | if (my $err = $@) { | |
668 | $pass = 0; | |
669 | log_warn("unable to parse node's VZDump configuration - $err"); | |
670 | } | |
671 | ||
eed1e93e | 672 | log_pass("no backup retention problems found.") if $pass; |
a98193c2 TL |
673 | } |
674 | ||
675 | sub check_cifs_credential_location { | |
676 | log_info("checking CIFS credential location.."); | |
677 | ||
678 | my $regex = qr/^(.*)\.cred$/; | |
679 | ||
680 | my $found; | |
681 | ||
682 | PVE::Tools::dir_glob_foreach('/etc/pve/priv/', $regex, sub { | |
683 | my ($filename) = @_; | |
684 | ||
685 | my ($basename) = $filename =~ $regex; | |
686 | ||
687 | log_warn( | |
688 | "CIFS credentials '/etc/pve/priv/$filename' will be moved to" | |
689 | ." '/etc/pve/priv/storage/$basename.pw' during the update" | |
690 | ); | |
691 | ||
692 | $found = 1; | |
693 | }); | |
694 | ||
695 | log_pass("no CIFS credentials at outdated location found.") if !$found; | |
696 | } | |
697 | ||
698 | sub check_custom_pool_roles { | |
eed1e93e | 699 | log_info("Checking custom role IDs for clashes with new 'PVE' namespace.."); |
a98193c2 TL |
700 | |
701 | if (! -f "/etc/pve/user.cfg") { | |
702 | log_skip("user.cfg does not exist"); | |
703 | return; | |
704 | } | |
705 | ||
706 | my $raw = eval { PVE::Tools::file_get_contents('/etc/pve/user.cfg'); }; | |
707 | if ($@) { | |
708 | log_fail("Failed to read '/etc/pve/user.cfg' - $@"); | |
709 | return; | |
710 | } | |
711 | ||
712 | my $roles = {}; | |
713 | while ($raw =~ /^\s*(.+?)\s*$/gm) { | |
714 | my $line = $1; | |
715 | my @data; | |
716 | ||
717 | foreach my $d (split (/:/, $line)) { | |
718 | $d =~ s/^\s+//; | |
719 | $d =~ s/\s+$//; | |
720 | push @data, $d | |
721 | } | |
722 | ||
723 | my $et = shift @data; | |
724 | next if $et ne 'role'; | |
725 | ||
726 | my ($role, $privlist) = @data; | |
727 | if (!PVE::AccessControl::verify_rolename($role, 1)) { | |
728 | warn "user config - ignore role '$role' - invalid characters in role name\n"; | |
729 | next; | |
730 | } | |
731 | ||
732 | $roles->{$role} = {} if !$roles->{$role}; | |
733 | foreach my $priv (split_list($privlist)) { | |
734 | $roles->{$role}->{$priv} = 1; | |
735 | } | |
736 | } | |
737 | ||
eed1e93e TL |
738 | my ($custom_roles, $pve_namespace_clashes) = (0, 0); |
739 | for my $role (sort keys %{$roles}) { | |
a98193c2 | 740 | next if PVE::AccessControl::role_is_special($role); |
eed1e93e | 741 | $custom_roles++; |
a98193c2 | 742 | |
eed1e93e TL |
743 | if ($role =~ /^PVE/i) { |
744 | log_warn("custom role '$role' clashes with 'PVE' namespace for built-in roles"); | |
745 | $pve_namespace_clashes++; | |
746 | } | |
747 | } | |
748 | if ($pve_namespace_clashes > 0) { | |
749 | log_fail("$pve_namespace_clashes custom role(s) will clash with 'PVE' namespace for built-in roles enforced in Proxmox VE 8"); | |
750 | } elsif ($custom_roles > 0) { | |
751 | log_pass("none of the $custom_roles custom roles will clash with newly enforced 'PVE' namespace") | |
752 | } else { | |
753 | log_pass("no custom roles defined, so no clash with 'PVE' role ID namespace enforced in Proxmox VE 8") | |
a98193c2 TL |
754 | } |
755 | } | |
756 | ||
757 | my sub check_max_length { | |
758 | my ($raw, $max_length, $warning) = @_; | |
759 | log_warn($warning) if defined($raw) && length($raw) > $max_length; | |
760 | } | |
761 | ||
762 | sub check_node_and_guest_configurations { | |
eed1e93e | 763 | log_info("Checking node and guest description/note length.."); |
a98193c2 TL |
764 | |
765 | my @affected_nodes = grep { | |
766 | my $desc = PVE::NodeConfig::load_config($_)->{desc}; | |
767 | defined($desc) && length($desc) > 64 * 1024 | |
768 | } PVE::Cluster::get_nodelist(); | |
769 | ||
770 | if (scalar(@affected_nodes) > 0) { | |
771 | log_warn("Node config description of the following nodes too long for new limit of 64 KiB:\n " | |
772 | . join(', ', @affected_nodes)); | |
773 | } else { | |
774 | log_pass("All node config descriptions fit in the new limit of 64 KiB"); | |
775 | } | |
776 | ||
777 | my $affected_guests_long_desc = []; | |
778 | my $affected_cts_cgroup_keys = []; | |
779 | ||
780 | my $cts = PVE::LXC::config_list(); | |
781 | for my $vmid (sort { $a <=> $b } keys %$cts) { | |
782 | my $conf = PVE::LXC::Config->load_config($vmid); | |
783 | ||
784 | my $desc = $conf->{description}; | |
785 | push @$affected_guests_long_desc, "CT $vmid" if defined($desc) && length($desc) > 8 * 1024; | |
786 | ||
787 | my $lxc_raw_conf = $conf->{lxc}; | |
788 | push @$affected_cts_cgroup_keys, "CT $vmid" if (grep (@$_[0] =~ /^lxc\.cgroup\./, @$lxc_raw_conf)); | |
789 | } | |
790 | my $vms = PVE::QemuServer::config_list(); | |
791 | for my $vmid (sort { $a <=> $b } keys %$vms) { | |
792 | my $desc = PVE::QemuConfig->load_config($vmid)->{description}; | |
793 | push @$affected_guests_long_desc, "VM $vmid" if defined($desc) && length($desc) > 8 * 1024; | |
794 | } | |
795 | if (scalar($affected_guests_long_desc->@*) > 0) { | |
796 | log_warn("Guest config description of the following virtual-guests too long for new limit of 64 KiB:\n" | |
797 | ." " . join(", ", $affected_guests_long_desc->@*)); | |
798 | } else { | |
799 | log_pass("All guest config descriptions fit in the new limit of 8 KiB"); | |
800 | } | |
801 | ||
802 | log_info("Checking container configs for deprecated lxc.cgroup entries"); | |
803 | ||
804 | if (scalar($affected_cts_cgroup_keys->@*) > 0) { | |
805 | if ($forced_legacy_cgroup) { | |
806 | log_pass("Found legacy 'lxc.cgroup' keys, but system explicitly configured for legacy hybrid cgroup hierarchy."); | |
807 | } else { | |
808 | log_warn("The following CTs have 'lxc.cgroup' keys configured, which will be ignored in the new default unified cgroupv2:\n" | |
809 | ." " . join(", ", $affected_cts_cgroup_keys->@*) ."\n" | |
810 | ." Often it can be enough to change to the new 'lxc.cgroup2' prefix after the upgrade to Proxmox VE 7.x"); | |
811 | } | |
812 | } else { | |
813 | log_pass("No legacy 'lxc.cgroup' keys found."); | |
814 | } | |
815 | } | |
816 | ||
817 | sub check_storage_content { | |
818 | log_info("Checking storage content type configuration.."); | |
819 | ||
820 | my $found; | |
821 | my $pass = 1; | |
822 | ||
823 | my $storage_cfg = PVE::Storage::config(); | |
824 | ||
825 | for my $storeid (sort keys $storage_cfg->{ids}->%*) { | |
826 | my $scfg = $storage_cfg->{ids}->{$storeid}; | |
827 | ||
828 | next if $scfg->{shared}; | |
829 | next if !PVE::Storage::storage_check_enabled($storage_cfg, $storeid, undef, 1); | |
830 | ||
831 | my $valid_content = PVE::Storage::Plugin::valid_content_types($scfg->{type}); | |
832 | ||
833 | if (scalar(keys $scfg->{content}->%*) == 0 && !$valid_content->{none}) { | |
834 | $pass = 0; | |
835 | log_fail("storage '$storeid' does not support configured content type 'none'"); | |
836 | delete $scfg->{content}->{none}; # scan for guest images below | |
837 | } | |
838 | ||
839 | next if $scfg->{content}->{images}; | |
840 | next if $scfg->{content}->{rootdir}; | |
841 | ||
eed1e93e | 842 | # Skip 'iscsi(direct)' (and foreign plugins with potentially similar behavior) with 'none', |
a98193c2 TL |
843 | # because that means "use LUNs directly" and vdisk_list() in PVE 6.x still lists those. |
844 | # It's enough to *not* skip 'dir', because it is the only other storage that supports 'none' | |
845 | # and 'images' or 'rootdir', hence being potentially misconfigured. | |
846 | next if $scfg->{type} ne 'dir' && $scfg->{content}->{none}; | |
847 | ||
848 | eval { PVE::Storage::activate_storage($storage_cfg, $storeid) }; | |
849 | if (my $err = $@) { | |
850 | log_warn("activating '$storeid' failed - $err"); | |
851 | next; | |
852 | } | |
853 | ||
854 | my $res = eval { PVE::Storage::vdisk_list($storage_cfg, $storeid); }; | |
855 | if (my $err = $@) { | |
856 | log_warn("listing images on '$storeid' failed - $err"); | |
857 | next; | |
858 | } | |
859 | my @volids = map { $_->{volid} } $res->{$storeid}->@*; | |
860 | ||
861 | my $number = scalar(@volids); | |
862 | if ($number > 0) { | |
863 | log_info( | |
864 | "storage '$storeid' - neither content type 'images' nor 'rootdir' configured, but" | |
865 | ."found $number guest volume(s)" | |
866 | ); | |
867 | } | |
868 | } | |
869 | ||
870 | my $check_volid = sub { | |
871 | my ($volid, $vmid, $vmtype, $reference) = @_; | |
872 | ||
873 | my $guesttext = $vmtype eq 'qemu' ? 'VM' : 'CT'; | |
874 | my $prefix = "$guesttext $vmid - volume '$volid' ($reference)"; | |
875 | ||
876 | my ($storeid) = PVE::Storage::parse_volume_id($volid, 1); | |
877 | return if !defined($storeid); | |
878 | ||
879 | my $scfg = $storage_cfg->{ids}->{$storeid}; | |
880 | if (!$scfg) { | |
881 | $pass = 0; | |
882 | log_warn("$prefix - storage does not exist!"); | |
883 | return; | |
884 | } | |
885 | ||
886 | # cannot use parse_volname for containers, as it can return 'images' | |
887 | # but containers cannot have ISO images attached, so assume 'rootdir' | |
888 | my $vtype = 'rootdir'; | |
889 | if ($vmtype eq 'qemu') { | |
890 | ($vtype) = eval { PVE::Storage::parse_volname($storage_cfg, $volid); }; | |
891 | return if $@; | |
892 | } | |
893 | ||
894 | if (!$scfg->{content}->{$vtype}) { | |
895 | $found = 1; | |
896 | $pass = 0; | |
897 | log_warn("$prefix - storage does not have content type '$vtype' configured."); | |
898 | } | |
899 | }; | |
900 | ||
901 | my $cts = PVE::LXC::config_list(); | |
902 | for my $vmid (sort { $a <=> $b } keys %$cts) { | |
903 | my $conf = PVE::LXC::Config->load_config($vmid); | |
904 | ||
905 | my $volhash = {}; | |
906 | ||
907 | my $check = sub { | |
908 | my ($ms, $mountpoint, $reference) = @_; | |
909 | ||
910 | my $volid = $mountpoint->{volume}; | |
911 | return if !$volid || $mountpoint->{type} ne 'volume'; | |
912 | ||
913 | return if $volhash->{$volid}; # volume might be referenced multiple times | |
914 | ||
915 | $volhash->{$volid} = 1; | |
916 | ||
917 | $check_volid->($volid, $vmid, 'lxc', $reference); | |
918 | }; | |
919 | ||
920 | my $opts = { include_unused => 1 }; | |
921 | PVE::LXC::Config->foreach_volume_full($conf, $opts, $check, 'in config'); | |
922 | for my $snapname (keys $conf->{snapshots}->%*) { | |
923 | my $snap = $conf->{snapshots}->{$snapname}; | |
924 | PVE::LXC::Config->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'"); | |
925 | } | |
926 | } | |
927 | ||
928 | my $vms = PVE::QemuServer::config_list(); | |
929 | for my $vmid (sort { $a <=> $b } keys %$vms) { | |
930 | my $conf = PVE::QemuConfig->load_config($vmid); | |
931 | ||
932 | my $volhash = {}; | |
933 | ||
934 | my $check = sub { | |
935 | my ($key, $drive, $reference) = @_; | |
936 | ||
937 | my $volid = $drive->{file}; | |
938 | return if $volid =~ m|^/|; | |
939 | return if $volhash->{$volid}; # volume might be referenced multiple times | |
940 | ||
941 | $volhash->{$volid} = 1; | |
942 | $check_volid->($volid, $vmid, 'qemu', $reference); | |
943 | }; | |
944 | ||
945 | my $opts = { | |
946 | extra_keys => ['vmstate'], | |
947 | include_unused => 1, | |
948 | }; | |
949 | # startup from a suspended state works even without 'images' content type on the | |
950 | # state storage, so do not check 'vmstate' for $conf | |
951 | PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, $check, 'in config'); | |
952 | for my $snapname (keys $conf->{snapshots}->%*) { | |
953 | my $snap = $conf->{snapshots}->{$snapname}; | |
954 | PVE::QemuConfig->foreach_volume_full($snap, $opts, $check, "in snapshot '$snapname'"); | |
955 | } | |
956 | } | |
957 | ||
958 | if ($found) { | |
959 | log_warn("Proxmox VE enforces stricter content type checks since 7.0. The guests above " . | |
960 | "might not work until the storage configuration is fixed."); | |
961 | } | |
962 | ||
963 | if ($pass) { | |
eed1e93e TL |
964 | log_pass("no storage content problems found"); |
965 | } | |
966 | } | |
967 | ||
968 | sub check_storage_content_dirs { | |
969 | my $storage_cfg = PVE::Storage::config(); | |
970 | ||
971 | # check that content dirs are pairwise inequal | |
972 | my $any_problematic = 0; | |
973 | for my $storeid (sort keys $storage_cfg->{ids}->%*) { | |
974 | my $scfg = $storage_cfg->{ids}->{$storeid}; | |
975 | ||
976 | next if !PVE::Storage::storage_check_enabled($storage_cfg, $storeid, undef, 1); | |
977 | next if !$scfg->{path} || !$scfg->{content}; | |
978 | ||
979 | eval { PVE::Storage::activate_storage($storage_cfg, $storeid) }; | |
980 | if (my $err = $@) { | |
981 | log_warn("activating '$storeid' failed - $err"); | |
982 | next; | |
983 | } | |
984 | ||
985 | my $resolved_subdirs = {}; | |
986 | my $plugin = PVE::Storage::Plugin->lookup($scfg->{type}); | |
987 | for my $vtype (keys $scfg->{content}->%*) { | |
988 | my $abs_subdir = Cwd::abs_path($plugin->get_subdir($scfg, $vtype)); | |
989 | push $resolved_subdirs->{$abs_subdir}->@*, $vtype; | |
990 | } | |
991 | for my $subdir (keys $resolved_subdirs->%*) { | |
992 | if (scalar($resolved_subdirs->{$subdir}->@*) > 1) { | |
993 | my $types = join(", ", $resolved_subdirs->{$subdir}->@*); | |
994 | log_warn("storage '$storeid' uses directory $subdir for multiple content types ($types)."); | |
995 | $any_problematic = 1; | |
996 | } | |
997 | } | |
998 | } | |
999 | if ($any_problematic) { | |
1000 | log_fail("re-using directory for multiple content types (see above) is no longer supported in Proxmox VE 8!") | |
1001 | } else { | |
1002 | log_pass("no storage re-uses a directory for multiple content types.") | |
a98193c2 TL |
1003 | } |
1004 | } | |
1005 | ||
1006 | sub check_containers_cgroup_compat { | |
1007 | if ($forced_legacy_cgroup) { | |
1008 | log_warn("System explicitly configured for legacy hybrid cgroup hierarchy.\n" | |
eed1e93e | 1009 | ." NOTE: support for the hybrid cgroup hierarchy will be removed in future Proxmox VE 9 (~ 2025)." |
a98193c2 TL |
1010 | ); |
1011 | } | |
1012 | ||
1013 | my $supports_cgroupv2 = sub { | |
1014 | my ($conf, $rootdir, $ctid) = @_; | |
1015 | ||
1016 | my $get_systemd_version = sub { | |
1017 | my ($self) = @_; | |
1018 | ||
1019 | my $sd_lib_dir = -d "/lib/systemd" ? "/lib/systemd" : "/usr/lib/systemd"; | |
1020 | my $libsd = PVE::Tools::dir_glob_regex($sd_lib_dir, "libsystemd-shared-.+\.so"); | |
1021 | if (defined($libsd) && $libsd =~ /libsystemd-shared-(\d+)\.so/) { | |
1022 | return $1; | |
1023 | } | |
1024 | ||
1025 | return undef; | |
1026 | }; | |
1027 | ||
1028 | my $unified_cgroupv2_support = sub { | |
1029 | my ($self) = @_; | |
1030 | ||
1031 | # https://www.freedesktop.org/software/systemd/man/systemd.html | |
1032 | # systemd is installed as symlink to /sbin/init | |
1033 | my $systemd = CORE::readlink('/sbin/init'); | |
1034 | ||
1035 | # assume non-systemd init will run with unified cgroupv2 | |
1036 | if (!defined($systemd) || $systemd !~ m@/systemd$@) { | |
1037 | return 1; | |
1038 | } | |
1039 | ||
1040 | # systemd version 232 (e.g. debian stretch) supports the unified hierarchy | |
1041 | my $sdver = $get_systemd_version->(); | |
1042 | if (!defined($sdver) || $sdver < 232) { | |
1043 | return 0; | |
1044 | } | |
1045 | ||
1046 | return 1; | |
1047 | }; | |
1048 | ||
1049 | my $ostype = $conf->{ostype}; | |
1050 | if (!defined($ostype)) { | |
1051 | log_warn("Found CT ($ctid) without 'ostype' set!"); | |
1052 | } elsif ($ostype eq 'devuan' || $ostype eq 'alpine') { | |
1053 | return 1; # no systemd, no cgroup problems | |
1054 | } | |
1055 | ||
1056 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); | |
1057 | return $lxc_setup->protected_call($unified_cgroupv2_support); | |
1058 | }; | |
1059 | ||
1060 | my $log_problem = sub { | |
1061 | my ($ctid) = @_; | |
1062 | my $extra = $forced_legacy_cgroup ? '' : " or set systemd.unified_cgroup_hierarchy=0 in the Proxmox VE hosts' kernel cmdline"; | |
1063 | log_warn( | |
1064 | "Found at least one CT ($ctid) which does not support running in a unified cgroup v2 layout\n" | |
1065 | ." Consider upgrading the Containers distro${extra}! Skipping further CT compat checks." | |
1066 | ); | |
1067 | }; | |
1068 | ||
1069 | my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) }; | |
1070 | if ($@) { | |
1071 | log_warn("Failed to retrieve information about this node's CTs - $@"); | |
1072 | return; | |
1073 | } | |
1074 | ||
1075 | if (!defined($cts) || !scalar(@$cts)) { | |
1076 | log_skip("No containers on node detected."); | |
1077 | return; | |
1078 | } | |
1079 | ||
1080 | my @running_cts = sort { $a <=> $b } grep { $_->{status} eq 'running' } @$cts; | |
1081 | my @offline_cts = sort { $a <=> $b } grep { $_->{status} ne 'running' } @$cts; | |
1082 | ||
1083 | for my $ct (@running_cts) { | |
1084 | my $ctid = $ct->{vmid}; | |
1085 | my $pid = eval { PVE::LXC::find_lxc_pid($ctid) }; | |
1086 | if (my $err = $@) { | |
1087 | log_warn("Failed to get PID for running CT $ctid - $err"); | |
1088 | next; | |
1089 | } | |
1090 | my $rootdir = "/proc/$pid/root"; | |
1091 | my $conf = PVE::LXC::Config->load_config($ctid); | |
1092 | ||
1093 | my $ret = eval { $supports_cgroupv2->($conf, $rootdir, $ctid) }; | |
1094 | if (my $err = $@) { | |
1095 | log_warn("Failed to get cgroup support status for CT $ctid - $err"); | |
1096 | next; | |
1097 | } | |
1098 | if (!$ret) { | |
1099 | $log_problem->($ctid); | |
1100 | return; | |
1101 | } | |
1102 | } | |
1103 | ||
1104 | my $storage_cfg = PVE::Storage::config(); | |
1105 | for my $ct (@offline_cts) { | |
1106 | my $ctid = $ct->{vmid}; | |
1107 | my ($conf, $rootdir, $ret); | |
1108 | eval { | |
1109 | $conf = PVE::LXC::Config->load_config($ctid); | |
1110 | $rootdir = PVE::LXC::mount_all($ctid, $storage_cfg, $conf); | |
1111 | $ret = $supports_cgroupv2->($conf, $rootdir, $ctid); | |
1112 | }; | |
1113 | if (my $err = $@) { | |
1114 | log_warn("Failed to load config and mount CT $ctid - $err"); | |
1115 | eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) }; | |
1116 | next; | |
1117 | } | |
1118 | if (!$ret) { | |
1119 | $log_problem->($ctid); | |
1120 | eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) }; | |
1121 | last; | |
1122 | } | |
1123 | ||
1124 | eval { PVE::LXC::umount_all($ctid, $storage_cfg, $conf) }; | |
1125 | } | |
1126 | }; | |
1127 | ||
eed1e93e TL |
1128 | sub check_lxcfs_fuse_version { |
1129 | log_info("Checking if LXCFS is running with FUSE3 library, if already upgraded.."); | |
1130 | if (!$upgraded) { | |
1131 | log_skip("not yet upgraded, no need to check the FUSE library version LXCFS uses"); | |
1132 | return; | |
1133 | } | |
1134 | ||
1135 | my $lxcfs_pid = eval { file_get_contents('/run/lxcfs.pid') }; | |
1136 | if (my $err = $@) { | |
1137 | log_fail("failed to get LXCFS pid - $err"); | |
1138 | return; | |
1139 | } | |
1140 | chomp $lxcfs_pid; | |
1141 | ||
1142 | my $lxcfs_maps = eval { file_get_contents("/proc/${lxcfs_pid}/maps") }; | |
1143 | if (my $err = $@) { | |
1144 | log_fail("failed to get LXCFS maps - $err"); | |
1145 | return; | |
1146 | } | |
1147 | ||
1148 | if ($lxcfs_maps =~ /\/libfuse.so.2/s) { | |
1149 | log_warn("systems seems to be upgraded but LXCFS is still running with FUSE 2 library, not yet rebooted?") | |
1150 | } elsif ($lxcfs_maps =~ /\/libfuse3.so.3/s) { | |
1151 | log_pass("systems seems to be upgraded and LXCFS is running with FUSE 3 library") | |
1152 | } | |
1153 | return; | |
1154 | } | |
1155 | ||
a98193c2 TL |
1156 | sub check_apt_repos { |
1157 | log_info("Checking if the suite for the Debian security repository is correct.."); | |
1158 | ||
1159 | my $found = 0; | |
1160 | ||
1161 | my $dir = '/etc/apt/sources.list.d'; | |
1162 | my $in_dir = 0; | |
1163 | ||
1164 | # TODO: check that (original) debian and Proxmox VE mirrors are present. | |
1165 | ||
1166 | my $check_file = sub { | |
1167 | my ($file) = @_; | |
1168 | ||
1169 | $file = "${dir}/${file}" if $in_dir; | |
1170 | ||
1171 | my $raw = eval { PVE::Tools::file_get_contents($file) }; | |
1172 | return if !defined($raw); | |
1173 | my @lines = split(/\n/, $raw); | |
1174 | ||
1175 | my $number = 0; | |
1176 | for my $line (@lines) { | |
1177 | $number++; | |
1178 | ||
1179 | next if length($line) == 0; # split would result in undef then... | |
1180 | ||
1181 | ($line) = split(/#/, $line); | |
1182 | ||
1183 | next if $line !~ m/^deb[[:space:]]/; # is case sensitive | |
1184 | ||
1185 | my $suite; | |
1186 | ||
1187 | # catch any of | |
1188 | # https://deb.debian.org/debian-security | |
1189 | # http://security.debian.org/debian-security | |
1190 | # http://security.debian.org/ | |
1191 | if ($line =~ m|https?://deb\.debian\.org/debian-security/?\s+(\S*)|i) { | |
1192 | $suite = $1; | |
1193 | } elsif ($line =~ m|https?://security\.debian\.org(?:.*?)\s+(\S*)|i) { | |
1194 | $suite = $1; | |
1195 | } else { | |
1196 | next; | |
1197 | } | |
1198 | ||
1199 | $found = 1; | |
1200 | ||
1201 | my $where = "in ${file}:${number}"; | |
1202 | # TODO: is this useful (for some other checks)? | |
1203 | } | |
1204 | }; | |
1205 | ||
1206 | $check_file->("/etc/apt/sources.list"); | |
1207 | ||
1208 | $in_dir = 1; | |
1209 | ||
1210 | PVE::Tools::dir_glob_foreach($dir, '^.*\.list$', $check_file); | |
1211 | ||
1212 | if (!$found) { | |
eed1e93e | 1213 | # only warn, it might be defined in a .sources file or in a way not caaught above |
a98193c2 TL |
1214 | log_warn("No Debian security repository detected in /etc/apt/sources.list and " . |
1215 | "/etc/apt/sources.list.d/*.list"); | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | sub check_time_sync { | |
1220 | my $unit_active = sub { return $get_systemd_unit_state->($_[0], 1) eq 'active' ? $_[0] : undef }; | |
1221 | ||
1222 | log_info("Checking for supported & active NTP service.."); | |
1223 | if ($unit_active->('systemd-timesyncd.service')) { | |
1224 | log_warn( | |
1225 | "systemd-timesyncd is not the best choice for time-keeping on servers, due to only applying" | |
eed1e93e | 1226 | ." updates on boot.\n While not necessary for the upgrade it's recommended to use one of:\n" |
a98193c2 TL |
1227 | ." * chrony (Default in new Proxmox VE installations)\n * ntpsec\n * openntpd\n" |
1228 | ); | |
1229 | } elsif ($unit_active->('ntp.service')) { | |
1230 | log_info("Debian deprecated and removed the ntp package for Bookworm, but the system" | |
1231 | ." will automatically migrate to the 'ntpsec' replacement package on upgrade."); | |
1232 | } elsif (my $active_ntp = ($unit_active->('chrony.service') || $unit_active->('openntpd.service') || $unit_active->('ntpsec.service'))) { | |
1233 | log_pass("Detected active time synchronisation unit '$active_ntp'"); | |
1234 | } else { | |
1235 | log_warn( | |
1236 | "No (active) time synchronisation daemon (NTP) detected, but synchronized systems are important," | |
1237 | ." especially for cluster and/or ceph!" | |
1238 | ); | |
1239 | } | |
1240 | } | |
1241 | ||
1242 | sub check_misc { | |
1243 | print_header("MISCELLANEOUS CHECKS"); | |
1244 | my $ssh_config = eval { PVE::Tools::file_get_contents('/root/.ssh/config') }; | |
1245 | if (defined($ssh_config)) { | |
1246 | log_fail("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1") | |
1247 | if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m; | |
1248 | } else { | |
1249 | log_skip("No SSH config file found."); | |
1250 | } | |
1251 | ||
1252 | log_info("Checking common daemon services.."); | |
1253 | $log_systemd_unit_state->('pveproxy.service'); | |
1254 | $log_systemd_unit_state->('pvedaemon.service'); | |
1255 | $log_systemd_unit_state->('pvescheduler.service'); | |
1256 | $log_systemd_unit_state->('pvestatd.service'); | |
1257 | ||
1258 | check_time_sync(); | |
1259 | ||
1260 | my $root_free = PVE::Tools::df('/', 10); | |
1261 | log_warn("Less than 5 GB free space on root file system.") | |
1262 | if defined($root_free) && $root_free->{avail} < 5 * 1000*1000*1000; | |
1263 | ||
1264 | log_info("Checking for running guests.."); | |
1265 | my $running_guests = 0; | |
1266 | ||
1267 | my $vms = eval { PVE::API2::Qemu->vmlist({ node => $nodename }) }; | |
1268 | log_warn("Failed to retrieve information about this node's VMs - $@") if $@; | |
1269 | $running_guests += grep { $_->{status} eq 'running' } @$vms if defined($vms); | |
1270 | ||
1271 | my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) }; | |
1272 | log_warn("Failed to retrieve information about this node's CTs - $@") if $@; | |
1273 | $running_guests += grep { $_->{status} eq 'running' } @$cts if defined($cts); | |
1274 | ||
1275 | if ($running_guests > 0) { | |
1276 | log_warn("$running_guests running guest(s) detected - consider migrating or stopping them.") | |
1277 | } else { | |
1278 | log_pass("no running guest detected.") | |
1279 | } | |
1280 | ||
1281 | log_info("Checking if the local node's hostname '$nodename' is resolvable.."); | |
1282 | my $local_ip = eval { PVE::Network::get_ip_from_hostname($nodename) }; | |
1283 | if ($@) { | |
1284 | log_warn("Failed to resolve hostname '$nodename' to IP - $@"); | |
1285 | } else { | |
1286 | log_info("Checking if resolved IP is configured on local node.."); | |
1287 | my $cidr = Net::IP::ip_is_ipv6($local_ip) ? "$local_ip/128" : "$local_ip/32"; | |
1288 | my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr); | |
1289 | my $ip_count = scalar(@$configured_ips); | |
1290 | ||
1291 | if ($ip_count <= 0) { | |
1292 | log_fail("Resolved node IP '$local_ip' not configured or active for '$nodename'"); | |
1293 | } elsif ($ip_count > 1) { | |
1294 | log_warn("Resolved node IP '$local_ip' active on multiple ($ip_count) interfaces!"); | |
1295 | } else { | |
1296 | log_pass("Resolved node IP '$local_ip' configured and active on single interface."); | |
1297 | } | |
1298 | } | |
1299 | ||
1300 | log_info("Check node certificate's RSA key size"); | |
1301 | my $certs = PVE::API2::Certificates->info({ node => $nodename }); | |
1302 | my $certs_check = { | |
1303 | 'rsaEncryption' => { | |
1304 | minsize => 2048, | |
1305 | name => 'RSA', | |
1306 | }, | |
1307 | 'id-ecPublicKey' => { | |
1308 | minsize => 224, | |
1309 | name => 'ECC', | |
1310 | }, | |
1311 | }; | |
1312 | ||
1313 | my $certs_check_failed = 0; | |
1314 | foreach my $cert (@$certs) { | |
1315 | my ($type, $size, $fn) = $cert->@{qw(public-key-type public-key-bits filename)}; | |
1316 | ||
1317 | if (!defined($type) || !defined($size)) { | |
1318 | log_warn("'$fn': cannot check certificate, failed to get it's type or size!"); | |
1319 | } | |
1320 | ||
1321 | my $check = $certs_check->{$type}; | |
1322 | if (!defined($check)) { | |
1323 | log_warn("'$fn': certificate's public key type '$type' unknown!"); | |
1324 | next; | |
1325 | } | |
1326 | ||
1327 | if ($size < $check->{minsize}) { | |
1328 | log_fail("'$fn', certificate's $check->{name} public key size is less than 2048 bit"); | |
1329 | $certs_check_failed = 1; | |
1330 | } else { | |
1331 | log_pass("Certificate '$fn' passed Debian Busters (and newer) security level for TLS connections ($size >= 2048)"); | |
1332 | } | |
1333 | } | |
1334 | ||
1335 | check_backup_retention_settings(); | |
1336 | check_cifs_credential_location(); | |
1337 | check_custom_pool_roles(); | |
eed1e93e | 1338 | check_lxcfs_fuse_version(); |
a98193c2 TL |
1339 | check_node_and_guest_configurations(); |
1340 | check_apt_repos(); | |
1341 | } | |
1342 | ||
1343 | my sub colored_if { | |
1344 | my ($str, $color, $condition) = @_; | |
1345 | return "". ($condition ? colored($str, $color) : $str); | |
1346 | } | |
1347 | ||
1348 | __PACKAGE__->register_method ({ | |
1349 | name => 'checklist', | |
1350 | path => 'checklist', | |
1351 | method => 'GET', | |
1352 | description => 'Check (pre-/post-)upgrade conditions.', | |
1353 | parameters => { | |
1354 | additionalProperties => 0, | |
1355 | properties => { | |
1356 | full => { | |
1357 | description => 'perform additional, expensive checks.', | |
1358 | type => 'boolean', | |
1359 | optional => 1, | |
1360 | default => 0, | |
1361 | }, | |
1362 | }, | |
1363 | }, | |
1364 | returns => { type => 'null' }, | |
1365 | code => sub { | |
1366 | my ($param) = @_; | |
1367 | ||
1368 | my $kernel_cli = PVE::Tools::file_get_contents('/proc/cmdline'); | |
1369 | if ($kernel_cli =~ /systemd.unified_cgroup_hierarchy=0/){ | |
1370 | $forced_legacy_cgroup = 1; | |
1371 | } | |
1372 | ||
1373 | check_pve_packages(); | |
1374 | check_cluster_corosync(); | |
1375 | check_ceph(); | |
1376 | check_storage_health(); | |
1377 | check_misc(); | |
1378 | ||
1379 | if ($param->{full}) { | |
1380 | check_containers_cgroup_compat(); | |
1381 | } else { | |
1382 | log_skip("NOTE: Expensive checks, like CT cgroupv2 compat, not performed without '--full' parameter"); | |
1383 | } | |
1384 | ||
1385 | print_header("SUMMARY"); | |
1386 | ||
1387 | my $total = 0; | |
1388 | $total += $_ for values %$counters; | |
1389 | ||
1390 | print "TOTAL: $total\n"; | |
1391 | print colored("PASSED: $counters->{pass}\n", 'green'); | |
1392 | print "SKIPPED: $counters->{skip}\n"; | |
1393 | print colored_if("WARNINGS: $counters->{warn}\n", 'yellow', $counters->{warn} > 0); | |
1394 | print colored_if("FAILURES: $counters->{fail}\n", 'bold red', $counters->{fail} > 0); | |
1395 | ||
1396 | if ($counters->{warn} > 0 || $counters->{fail} > 0) { | |
1397 | my $color = $counters->{fail} > 0 ? 'bold red' : 'yellow'; | |
1398 | print colored("\nATTENTION: Please check the output for detailed information!\n", $color); | |
1399 | print colored("Try to solve the problems one at a time and then run this checklist tool again.\n", $color) if $counters->{fail} > 0; | |
1400 | } | |
1401 | ||
1402 | return undef; | |
1403 | }}); | |
1404 | ||
1405 | our $cmddef = [ __PACKAGE__, 'checklist', [], {}]; | |
1406 | ||
1407 | 1; |