]> git.proxmox.com Git - pve-manager.git/blob - PVE/CLI/pve5to6.pm
5to6: improve some log messages
[pve-manager.git] / PVE / CLI / pve5to6.pm
1 package PVE::CLI::pve5to6;
2
3 use strict;
4 use warnings;
5
6 use PVE::API2::APT;
7 use PVE::API2::Ceph;
8 use PVE::API2::LXC;
9 use PVE::API2::Qemu;
10
11 use PVE::Ceph::Tools;
12 use PVE::Cluster;
13 use PVE::Corosync;
14 use PVE::INotify;
15 use PVE::JSONSchema;
16 use PVE::RPCEnvironment;
17 use PVE::Storage;
18 use PVE::Tools;
19 use PVE::QemuServer;
20
21 use Term::ANSIColor;
22
23 use PVE::CLIHandler;
24
25 use base qw(PVE::CLIHandler);
26
27 my $nodename = PVE::INotify::nodename();
28
29 sub setup_environment {
30 PVE::RPCEnvironment->setup_default_cli_env();
31 }
32
33 my $min_pve_major = 5;
34 my $min_pve_minor = 4;
35 my $min_pve_pkgrel = 2;
36
37 my $counters = {
38 pass => 0,
39 skip => 0,
40 warn => 0,
41 fail => 0,
42 };
43
44 my $log_line = sub {
45 my ($level, $line) = @_;
46
47 $counters->{$level}++ if defined($level) && defined($counters->{$level});
48
49 print uc($level), ': ' if defined($level);
50 print "$line\n";
51 };
52
53 sub log_pass {
54 print color('green');
55 $log_line->('pass', @_);
56 print color('reset');
57 }
58
59 sub log_info {
60 $log_line->('info', @_);
61 }
62 sub log_skip {
63 $log_line->('skip', @_);
64 }
65 sub log_warn {
66 print color('yellow');
67 $log_line->('warn', @_);
68 print color('reset');
69 }
70 sub log_fail {
71 print color('red');
72 $log_line->('fail', @_);
73 print color('reset');
74 }
75
76 my $print_header_first = 1;
77 sub print_header {
78 my ($h) = @_;
79 print "\n" if !$print_header_first;
80 print "= $h =\n\n";
81 $print_header_first = 0;
82 }
83
84 my $versions;
85 my $get_pkg = sub {
86 my ($pkg) = @_;
87
88 $versions = eval { PVE::API2::APT->versions({ node => $nodename }) } if !defined($versions);
89
90 if (!defined($versions)) {
91 my $msg = "unable to retrieve package version information";
92 $msg .= "- $@" if $@;
93 log_fail("$msg");
94 return undef;
95 }
96
97 my $pkgs = [ grep { $_->{Package} eq $pkg } @$versions ];
98 if (!defined $pkgs || $pkgs == 0) {
99 log_fail("unable to determine installed $pkg version.");
100 return undef;
101 } else {
102 return $pkgs->[0];
103 }
104 };
105
106 sub check_pve_packages {
107 print_header("CHECKING VERSION INFORMATION FOR PVE PACKAGES");
108
109 print "Checking for package updates..\n";
110 my $updates = eval { PVE::API2::APT->list_updates({ node => $nodename }); };
111 if (!defined($updates)) {
112 log_warn("$@") if $@;
113 log_fail("unable to retrieve list of package updates!");
114 } elsif (@$updates > 0) {
115 my $pkgs = join(', ', map { $_->{Package} } @$updates);
116 log_warn("updates for the following packages are available:\n $pkgs");
117 } else {
118 log_pass("all packages uptodate");
119 }
120
121 print "\nChecking proxmox-ve package version..\n";
122 if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) {
123 my $min_pve_ver = "$min_pve_major.$min_pve_minor-$min_pve_pkgrel";
124
125 my ($maj, $min, $pkgrel) = $proxmox_ve->{OldVersion} =~ m/^(\d+)\.(\d+)-(\d+)/;
126
127 my $upgraded = 0;
128
129 if ($maj > $min_pve_major) {
130 log_pass("already upgraded to Proxmox VE " . ($min_pve_major + 1));
131 $upgraded = 1;
132 } elsif ($maj >= $min_pve_major && $min >= $min_pve_minor && $pkgrel >= $min_pve_pkgrel) {
133 log_pass("proxmox-ve package has version >= $min_pve_ver");
134 } else {
135 log_fail("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!");
136 }
137
138 my ($krunning, $kinstalled) = (qr/5\./, 'pve-kernel-5.0');
139 if (!$upgraded) {
140 ($krunning, $kinstalled) = (qr/4\.15/, 'pve-kernel-4.15');
141 }
142
143 print "\nChecking running kernel version..\n";
144 my $kernel_ver = $proxmox_ve->{RunningKernel};
145 if (!defined($kernel_ver)) {
146 log_fail("unable to determine running kernel version.");
147 } elsif ($kernel_ver =~ /^$krunning/) {
148 log_pass("expected running kernel '$kernel_ver'.");
149 } elsif ($get_pkg->($kinstalled)) {
150 log_warn("expected kernel '$kinstalled' intalled but not yet rebooted!");
151 } else {
152 log_warn("unexpected running and installed kernel '$kernel_ver'.");
153 }
154 }
155 }
156
157 sub get_vms_with_vmx {
158 my $res = {
159 cpu => [],
160 flag => [],
161 };
162 my $vmlist = PVE::QemuServer::vzlist();
163
164 foreach my $vmid ( sort { $a <=> $b } keys %$vmlist ) {
165 my $pid = $vmlist->{$vmid}->{pid};
166 next if !$pid; # skip not running vms
167
168 my $cmdline = eval { PVE::Tools::file_get_contents("/proc/$pid/cmdline") };
169 if ($cmdline) {
170 my @args = split(/\0/, $cmdline);
171 for (my $i = 0; $i < scalar(@args); $i++) {
172 next if !$args[$i] || $args[$i] !~ m/^-?-cpu$/;
173
174 my $cpuarg = $args[$i+1];
175 if ($cpuarg =~ m/^(host|max)/) {
176 push @{$res->{cpu}}, $vmid;
177 } elsif ($cpuarg =~ m/\+(vmx|svm)/) {
178 push @{$res->{flag}}, $vmid;
179 }
180 }
181 }
182 }
183
184 $res = undef if (scalar(@{$res->{cpu}}) + scalar(@{$res->{flag}})) <= 0;
185
186 return $res;
187 }
188
189 sub check_kvm_nested {
190 log_info("Checking KVM nesting support, which breaks live migration for VMs using it..");
191
192 my $module_sysdir = "/sys/module";
193 if (-e "$module_sysdir/kvm_amd") {
194 $module_sysdir .= "/kvm_amd/parameters";
195 } elsif (-e "$module_sysdir/kvm_intel") {
196 $module_sysdir .= "/kvm_intel/parameters";
197 } else {
198 log_skip("no kvm module found");
199 return;
200 }
201
202 if (-f "$module_sysdir/nested") {
203 my $val = eval { PVE::Tools::file_read_firstline("$module_sysdir/nested") };
204 if ($val && $val =~ m/Y|1/) {
205 my $list = get_vms_with_vmx();
206 if (!defined($list)) {
207 log_pass("KVM nested parameter set, but currently no VM with a 'vmx' or 'svm' flag is running.");
208 } else {
209 my $warnmsg = "KVM nested enabled. It will not be possible to live migrate the following running VMs to PVE 6:\n";
210 if (@{$list->{cpu}}) {
211 $warnmsg .= " VMID(s) with cputype 'host' or 'max': " . join(',', @{$list->{cpu}}) . "\n";
212 }
213 if (@{$list->{flag}}) {
214 $warnmsg .= " VMID(s) with enforced cpu flag 'vmx' or 'svm': " . join(',', @{$list->{flag}}) . "\n";
215 }
216 log_warn($warnmsg);
217 }
218 } else {
219 log_pass("KVM nested parameter not set.")
220 }
221 } else {
222 log_skip("KVM nested parameter not found.");
223 }
224 }
225
226 sub check_storage_health {
227 print_header("CHECKING CONFIGURED STORAGES");
228 my $cfg = PVE::Storage::config();
229
230 my $ctime = time();
231
232 my $info = PVE::Storage::storage_info($cfg);
233
234 foreach my $storeid (keys %$info) {
235 my $d = $info->{$storeid};
236 if ($d->{enabled}) {
237 if ($d->{type} eq 'sheepdog') {
238 log_fail("storage '$storeid' of type 'sheepdog' is enabled - experimental sheepdog support dropped in PVE 6")
239 } elsif ($d->{active}) {
240 log_pass("storage '$storeid' enabled and active.");
241 } else {
242 log_warn("storage '$storeid' enabled but not active!");
243 }
244 } else {
245 log_skip("storage '$storeid' disabled.");
246 }
247 }
248 }
249
250 sub check_cluster_corosync {
251 print_header("CHECKING CLUSTER HEALTH/SETTINGS");
252
253 if (!PVE::Corosync::check_conf_exists(1)) {
254 log_skip("standalone node.");
255 return;
256 }
257
258 if (PVE::Cluster::check_cfs_quorum(1)) {
259 log_pass("Cluster is quorate.");
260 } else {
261 log_fail("Cluster lost quorum!");
262 }
263
264 my $conf = PVE::Cluster::cfs_read_file('corosync.conf');
265 my $conf_nodelist = PVE::Corosync::nodelist($conf);
266
267 if (!defined($conf_nodelist)) {
268 log_fail("unable to retrieve nodelist from corosync.conf");
269 } elsif (grep { $conf_nodelist->{$_}->{quorum_votes} != 1 } keys %$conf_nodelist) {
270 log_warn("non-default quorum_votes distribution detected!");
271 }
272
273 my $cfs_nodelist = PVE::Cluster::get_clinfo()->{nodelist};
274 my $offline_nodes = grep { $cfs_nodelist->{$_}->{online} != 1 } keys %$cfs_nodelist;
275 if ($offline_nodes > 0) {
276 log_fail("$offline_nodes nodes are offline!");
277 }
278
279 my $conf_nodelist_count = scalar(keys %$conf_nodelist);
280 my $cfs_nodelist_count = scalar(keys %$cfs_nodelist);
281 log_warn("cluster consists of less than three nodes!")
282 if $conf_nodelist_count < 3;
283
284 log_fail("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.")
285 if $conf_nodelist_count != $cfs_nodelist_count;
286
287 foreach my $cs_node (keys %$conf_nodelist) {
288 my $entry = $conf_nodelist->{$cs_node};
289 log_fail("No name entry for node '$cs_node' in corosync.conf.")
290 if !defined($entry->{name});
291 log_fail("No nodeid configured for node '$cs_node' in corosync.conf.")
292 if !defined($entry->{nodeid});
293
294 my $verify_ring_ip = sub {
295 my $key = shift;
296 my $ring = $entry->{$key};
297 if (defined($ring) && !PVE::JSONSchema::pve_verify_ip($ring, 1)) {
298 log_fail("$key '$ring' of node '$cs_node' is not an IP address, consider replacing it with the currently resolved IP address.");
299 }
300 };
301 $verify_ring_ip->('ring0_addr');
302 $verify_ring_ip->('ring1_addr');
303 }
304
305 my $totem = $conf->{main}->{totem};
306
307 my $transport = $totem->{transport};
308 if (defined($transport)) {
309 log_fail("Corosync transport expliclitly set to '$transport' instead of implicit default!");
310 }
311
312 if ((!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none')) {
313 log_fail("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
314 }
315
316 if (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') {
317 log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!");
318 }
319
320 my $prefix_info = sub { my $line = shift; log_info("$line"); };
321 eval {
322 print "\n";
323 log_info("Printing detailed cluster status..");
324 PVE::Tools::run_command(['corosync-quorumtool', '-siH'], outfunc => $prefix_info, errfunc => $prefix_info);
325 };
326
327 print_header("CHECKING INSTALLED COROSYNC VERSION");
328 if (defined(my $corosync = $get_pkg->('corosync'))) {
329 if ($corosync->{OldVersion} =~ m/^2\./) {
330 log_fail("corosync 2.x installed, cluster-wide upgrade to 3.x needed!");
331 } elsif ($corosync->{OldVersion} =~ m/^3\./) {
332 log_pass("corosync 3.x installed.");
333 } else {
334 log_fail("unexpected corosync version installed: $corosync->{OldVersion}!");
335 }
336 }
337 }
338
339 sub check_ceph {
340 print_header("CHECKING HYPER-CONVERGED CEPH STATUS");
341
342 if (PVE::Ceph::Tools::check_ceph_inited(1)) {
343 log_info("hyper-converged ceph setup detected!");
344 } else {
345 log_skip("no hyper-converged ceph setup detected!");
346 return;
347 }
348
349 log_info("getting Ceph status/health information..");
350 my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); };
351 my $osd_flags = eval { PVE::API2::Ceph->get_flags({ node => $nodename }); };
352 my $noout;
353 $noout = $osd_flags =~ m/noout/ if $osd_flags;
354
355 if (!$ceph_status || !$ceph_status->{health}) {
356 log_fail("unable to determine Ceph status!");
357 } else {
358 my $ceph_health = $ceph_status->{health}->{status};
359 if (!$ceph_health) {
360 log_fail("unable to determine Ceph health!");
361 } elsif ($ceph_health eq 'HEALTH_OK') {
362 log_pass("Ceph health reported as 'HEALTH_OK'.");
363 } elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health}->{checks}} == 1)) {
364 log_pass("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.");
365 } else {
366 log_warn("Ceph health reported as '$ceph_health'.\n Use the PVE ".
367 "dashboard or 'ceph -s' to determine the specific issues and try to resolve them.");
368 }
369 }
370
371 log_info("getting Ceph OSD flags..");
372 eval {
373 if (!$osd_flags) {
374 log_fail("unable to get Ceph OSD flags!");
375 } else {
376 if ($osd_flags =~ m/recovery_deletes/ && $osd_flags =~ m/purged_snapdirs/) {
377 log_pass("all PGs have been scrubbed at least once while running Ceph Luminous.");
378 } else {
379 log_fail("missing 'recovery_deletes' and/or 'purged_snapdirs' flag, scrub of all PGs required before upgrading to Nautilus!");
380 }
381 if ($noout) {
382 log_pass("noout flag set to prevent rebalancing during cluster-wide upgrades.");
383 } else {
384 log_warn("noout flag not set - recommended to prevent rebalancing during upgrades.");
385 }
386 }
387 };
388
389 log_info("getting Ceph daemon versions..");
390 my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); };
391 if (!$ceph_versions) {
392 log_fail("unable to determine Ceph daemon versions!");
393 } else {
394 my $services = [
395 { 'key' => 'mon', 'name' => 'monitor' },
396 { 'key' => 'mgr', 'name' => 'manager' },
397 { 'key' => 'mds', 'name' => 'MDS' },
398 { 'key' => 'osd', 'name' => 'OSD' },
399 ];
400
401 foreach my $service (@$services) {
402 my $name = $service->{name};
403 if (my $service_versions = $ceph_versions->{$service->{key}}) {
404 if (keys %$service_versions == 0) {
405 log_skip("no running instances detected for daemon type $name.");
406 } elsif (keys %$service_versions == 1) {
407 log_pass("single running version detected for daemon type $name.");
408 } else {
409 log_warn("multiple running versions detected for daemon type $name!");
410 }
411 } else {
412 log_skip("unable to determine versions of running Ceph $name instances.");
413 }
414 }
415
416 my $overall_versions = $ceph_versions->{overall};
417 if (!$overall_versions) {
418 log_warn("unable to determine overall Ceph daemon versions!");
419 } elsif (keys %$overall_versions == 1) {
420 log_pass("single running overall version detected for all Ceph daemon types.");
421 } else {
422 log_warn("overall version mismatch detected, check 'ceph versions' output for details!");
423 }
424 }
425
426 my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1);
427 if (defined($local_ceph_ver)) {
428 if ($local_ceph_ver == 14) {
429 my $scanned_osds = PVE::Tools::dir_glob_regex('/etc/ceph/osd', '^.*\.json$');
430 if (-e '/var/lib/ceph/osd/' && !defined($scanned_osds)) {
431 log_warn("local Ceph version is Nautilus, local OSDs detected, but no conversion from ceph-disk to ceph-volume done (yet).");
432 }
433 }
434 } else {
435 log_fail("unable to determine local Ceph version.");
436 }
437 }
438
439 sub check_misc {
440 print_header("MISCELLANEOUS CHECKS");
441 my $ssh_config = eval { PVE::Tools::file_get_contents('/root/.ssh/config') };
442 if (defined($ssh_config)) {
443 log_fail("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1")
444 if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m;
445 } else {
446 log_skip("No SSH config file found.");
447 }
448
449 my $root_free = PVE::Tools::df('/', 10);
450 log_warn("Less than 2G free space on root file system.")
451 if defined($root_free) && $root_free->{avail} < 2*1024*1024*1024;
452
453 log_info("Checking for running guests..");
454 my $running_guests = 0;
455
456 my $vms = eval { PVE::API2::Qemu->vmlist({ node => $nodename }) };
457 log_warn("Failed to retrieve information about this node's VMs - $@") if $@;
458 $running_guests += grep { $_->{status} eq 'running' } @$vms if defined($vms);
459
460 my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) };
461 log_warn("Failed to retrieve information about this node's CTs - $@") if $@;
462 $running_guests += grep { $_->{status} eq 'running' } @$cts if defined($cts);
463
464 if ($running_guests > 0) {
465 log_warn("$running_guests running guest(s) detected - consider migrating or stopping them.")
466 } else {
467 log_pass("no running guest detected.")
468 }
469
470 log_info("Checking if the local node's hostname is resolvable..");
471 my $host = PVE::INotify::nodename();
472 my $local_ip = eval { PVE::Network::get_ip_from_hostname($host) };
473 if ($@) {
474 log_warn("Failed to resolve hostname '$host' to IP - $@");
475 } else {
476 log_info("Checking if resolved IP is configured on local node..");
477 my $cidr = Net::IP::ip_is_ipv6($local_ip) ? "$local_ip/128" : "$local_ip/32";
478 my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
479 my $ip_count = scalar(@$configured_ips);
480
481 if ($ip_count <= 0) {
482 log_fail("Resolved node IP '$local_ip' not configured or active for '$host'");
483 } elsif ($ip_count > 1) {
484 log_warn("Resolved node IP '$local_ip' active on multiple ($ip_count) interfaces!");
485 } else {
486 log_pass("Resolved node IP '$local_ip' configured and active on single interface.");
487 }
488 }
489
490 check_kvm_nested();
491 }
492
493 __PACKAGE__->register_method ({
494 name => 'checklist',
495 path => 'checklist',
496 method => 'GET',
497 description => 'Check (pre-/post-)upgrade conditions.',
498 parameters => {
499 additionalProperties => 0,
500 properties => {
501 },
502 },
503 returns => { type => 'null' },
504 code => sub {
505 my ($param) = @_;
506
507 check_pve_packages();
508 check_cluster_corosync();
509 check_ceph();
510 check_storage_health();
511 check_misc();
512
513 print_header("SUMMARY");
514
515 my $total = 0;
516 $total += $_ for values %$counters;
517
518 print "TOTAL: $total\n";
519 print colored("PASSED: $counters->{pass}\n", 'green');
520 print "SKIPPED: $counters->{skip}\n";
521 print colored("WARNINGS: $counters->{warn}\n", 'yellow');
522 print colored("FAILURES: $counters->{fail}\n", 'red');
523
524 print colored("\nATTENTION: Please check the output for detailed information!\n", 'red')
525 if ($counters->{warn} > 0 || $counters->{fail} > 0);
526
527 return undef;
528 }});
529
530 our $cmddef = [ __PACKAGE__, 'checklist', [], {}];
531
532 # for now drop all unknown params and just check
533 @ARGV = ();
534
535 1;