X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FDiskmanage.pm;h=5a5fc87799fc767406fadac086391352d39c2b4a;hb=76c1e57be7c8495586e38205611290db049896e0;hp=dc6ff4ae00fc085ad127f58a729b4352eee23d62;hpb=cbba9b5b9c92d1dccf7c041df255497c11e8c02d;p=pve-storage.git diff --git a/PVE/Diskmanage.pm b/PVE/Diskmanage.pm index dc6ff4a..5a5fc87 100644 --- a/PVE/Diskmanage.pm +++ b/PVE/Diskmanage.pm @@ -5,6 +5,7 @@ use warnings; use PVE::ProcFSTools; use Data::Dumper; use Cwd qw(abs_path); +use Fcntl ':mode'; use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim); @@ -61,7 +62,7 @@ sub disk_is_used { my $dev = $disk; $dev =~ s|^/dev/||; - my $disklist = get_disks($dev); + my $disklist = get_disks($dev, 1); die "'$disk' is not a valid local disk\n" if !defined($disklist->{$dev}); return 1 if $disklist->{$dev}->{used}; @@ -70,65 +71,79 @@ sub disk_is_used { } sub get_smart_data { - my ($disk) = @_; + my ($disk, $healthonly) = @_; assert_blockdev($disk); my $smartdata = {}; - my $datastarted = 0; + my $type; + + my $returncode = 0; + + $disk =~ s/n\d+$// + if $disk =~ m!^/dev/nvme\d+n\d+$!; + + my $cmd = [$SMARTCTL, '-H']; + push @$cmd, '-A', '-f', 'brief' if !$healthonly; + push @$cmd, $disk; eval { - run_command([$SMARTCTL, '-a', '-f', 'brief', $disk], outfunc => sub{ + $returncode = run_command($cmd, noerr => 1, outfunc => sub{ my ($line) = @_; - if ($datastarted && $line =~ m/^[ \d]{2}\d/) { - $line = trim($line); - my @data = split /\s+/, $line; +# ATA SMART attributes, e.g.: +# ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE +# 1 Raw_Read_Error_Rate POSR-K 100 100 000 - 0 +# +# SAS and NVME disks, e.g.: +# Data Units Written: 5,584,952 [2.85 TB] +# Accumulated start-stop cycles: 34 + + if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) { my $entry = {}; - $entry->{name} = $data[1]; - $entry->{flags} = $data[2]; + + + $entry->{name} = $2 if defined $2; + $entry->{flags} = $3 if defined $3; # the +0 makes a number out of the strings - $entry->{value} = $data[3] + 0; - $entry->{worst} = $data[4] + 0; - $entry->{threshold} = $data[5] + 0; - $entry->{fail} = $data[6]; - $entry->{raw} = $data[7]; - $entry->{id} = $data[0]; + $entry->{value} = $4+0 if defined $4; + $entry->{worst} = $5+0 if defined $5; + # some disks report the default threshold as --- instead of 000 + if (defined($6) && $6 eq '---') { + $entry->{threshold} = 0; + } else { + $entry->{threshold} = $6+0 if defined $6; + } + $entry->{fail} = $7 if defined $7; + $entry->{raw} = $8 if defined $8; + $entry->{id} = $1 if defined $1; push @{$smartdata->{attributes}}, $entry; - } elsif ($line =~ m/self\-assessment test result: (.*)$/) { + } elsif ($line =~ m/(?:Health Status|self\-assessment test result): (.*)$/ ) { $smartdata->{health} = $1; } elsif ($line =~ m/Vendor Specific SMART Attributes with Thresholds:/) { - $datastarted = 1; + $type = 'ata'; + delete $smartdata->{text}; + } elsif ($line =~ m/=== START OF (READ )?SMART DATA SECTION ===/) { + $type = 'text'; + } elsif (defined($type) && $type eq 'text') { + $smartdata->{text} = '' if !defined $smartdata->{text}; + $smartdata->{text} .= "$line\n"; + } elsif ($line =~ m/SMART Disabled/) { + $smartdata->{health} = "SMART Disabled"; } }); }; - die "Error getting S.M.A.R.T. data: $@\n" if $@; - $smartdata->{health} = 'UNKOWN' if !defined $smartdata->{health}; - return $smartdata; -} - -sub get_smart_health { - my ($disk) = @_; - - return "NOT A DEVICE" if !assert_blockdev($disk, 1); + my $err = $@; - my $message = "UNKOWN"; + # bit 0 and 1 mark an severe smartctl error + # all others are for disk status, so ignore them + # see smartctl(8) + if ((defined($returncode) && ($returncode & 0b00000011)) || $err) { + die "Error getting S.M.A.R.T. data: Exit code: $returncode\n"; + } - eval { - run_command([$SMARTCTL, '-H', $disk], outfunc => sub { - my ($line) = @_; + $smartdata->{type} = $type; - if ($line =~ m/test result: (.*)$/) { - $message = $1; - } elsif ($line =~ m/open device: (.*) failed: (.*)$/) { - $message = "FAILED TO OPEN"; - } elsif ($line =~ m/^SMART Disabled/) { - $message = "SMART DISABLED"; - } - }); - }; - # we ignore errors here because by default we want to return UNKNOWN - - return $message; + return $smartdata; } sub get_zfs_devices { @@ -194,11 +209,22 @@ sub get_ceph_journals { my $journalhash = {}; my $journal_uuid = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'; + my $db_uuid = '30cd0809-c2b2-499c-8879-2d6b78529876'; + my $wal_uuid = '5ce17fce-4087-4169-b7ff-056cc58473f9'; + my $block_uuid = 'cafecafe-9b03-4f30-b4c6-b4b80ceff106'; - dir_glob_foreach('/dev/disk/by-parttypeuuid', "$journal_uuid\..+", sub { - my ($entry) = @_; + dir_glob_foreach('/dev/disk/by-parttypeuuid', "($journal_uuid|$db_uuid|$wal_uuid|$block_uuid)\..+", sub { + my ($entry, $type) = @_; my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry"); - $journalhash->{$real_dev} = 1; + if ($type eq $journal_uuid) { + $journalhash->{$real_dev} = 1; + } elsif ($type eq $db_uuid) { + $journalhash->{$real_dev} = 2; + } elsif ($type eq $wal_uuid) { + $journalhash->{$real_dev} = 3; + } elsif ($type eq $block_uuid) { + $journalhash->{$real_dev} = 4; + } }); return $journalhash; @@ -210,7 +236,7 @@ sub get_udev_info { my $info = ""; my $data = {}; eval { - run_command([$UDEVADM, 'info', '-n', $dev, '--query', 'all'], outfunc => sub { + run_command([$UDEVADM, 'info', '-p', $dev, '--query', 'all'], outfunc => sub { my ($line) = @_; $info .= "$line\n"; }); @@ -248,6 +274,10 @@ sub get_udev_info { $data->{usb} = 1; } + if ($info =~ m/^E: ID_MODEL=(.+)$/m) { + $data->{model} = $1; + } + $data->{wwn} = 'unknown'; if ($info =~ m/^E: ID_WWN=(.*)$/m) { $data->{wwn} = $1; @@ -259,6 +289,8 @@ sub get_udev_info { sub get_sysdir_info { my ($sysdir) = @_; + return undef if ! -d "$sysdir/device"; + my $data = {}; my $size = file_read_firstline("$sysdir/size"); @@ -269,7 +301,7 @@ sub get_sysdir_info { $data->{size} = $size * 512; # dir/queue/rotational should be 1 for hdd, 0 for ssd - $data->{rotational} = file_read_firstline("$sysdir/queue/rotational"); + $data->{rotational} = file_read_firstline("$sysdir/queue/rotational") // -1; $data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown'; $data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown'; @@ -277,8 +309,62 @@ sub get_sysdir_info { return $data; } +sub get_wear_leveling_info { + my ($attributes, $model) = @_; + + my $wearout; + + my $vendormap = { + 'kingston' => 231, + 'samsung' => 177, + 'intel' => 233, + 'sandisk' => 233, + 'crucial' => 202, + 'default' => 233, + }; + + # find target attr id + + my $attrid; + + foreach my $vendor (keys %$vendormap) { + if ($model =~ m/$vendor/i) { + $attrid = $vendormap->{$vendor}; + # found the attribute + last; + } + } + + if (!$attrid) { + $attrid = $vendormap->{default}; + } + + foreach my $attr (@$attributes) { + next if $attr->{id} != $attrid; + $wearout = $attr->{value}; + last; + } + + return $wearout; +} + +sub dir_is_empty { + my ($dir) = @_; + + my $dh = IO::Dir->new ($dir); + return 1 if !$dh; + + while (defined(my $tmp = $dh->read)) { + next if $tmp eq '.' || $tmp eq '..'; + $dh->close; + return 0; + } + $dh->close; + return 1; +} + sub get_disks { - my ($disk) = @_; + my ($disk, $nosmart) = @_; my $disklist = {}; my $mounted = {}; @@ -295,27 +381,17 @@ sub get_disks { return $mounted->{$dev}; }; - my $dir_is_empty = sub { - my ($dir) = @_; - - my $dh = IO::Dir->new ($dir); - return 1 if !$dh; - - while (defined(my $tmp = $dh->read)) { - next if $tmp eq '.' || $tmp eq '..'; - $dh->close; - return 0; - } - $dh->close; - return 1; - }; - my $journalhash = get_ceph_journals(); my $zfslist = get_zfs_devices(); my $lvmlist = get_lvm_devices(); + # we get cciss/c0d0 but need cciss!c0d0 + if (defined($disk) && $disk =~ m|^cciss/|) { + $disk =~ s|cciss/|cciss!|; + } + dir_glob_foreach('/sys/block', '.*', sub { my ($dev) = @_; return if defined($disk) && $disk ne $dev; @@ -325,21 +401,19 @@ sub get_disks { # vdX: virtual block device # xvdX: xen virtual block device # nvmeXnY: nvme devices - # cXnY: cciss devices + # cciss!cXnY: cciss devices return if $dev !~ m/^(h|s|x?v)d[a-z]+$/ && $dev !~ m/^nvme\d+n\d+$/ && - $dev !~ m/^c\d+d\d+$/; + $dev !~ m/^cciss\!c\d+d\d+$/; - my $data = get_udev_info($dev); + my $data = get_udev_info("/sys/block/$dev"); return if !defined($data); my $devpath = $data->{devpath}; my $sysdir = "/sys/block/$dev"; - return if ! -d "$sysdir/device"; - # we do not want iscsi devices - return if readlink($sysdir) =~ m|host[^/]*/session[^/]*|; + return if -l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|; my $sysdata = get_sysdir_info($sysdir); return if !defined($sysdata); @@ -358,28 +432,20 @@ sub get_disks { } } - my $health; - my $wearout; - if ($type eq 'ssd' && !defined($disk)) { - # if we have an ssd we try to get the wearout indicator - my $smartdata = get_smart_data($devpath); - $health = $smartdata->{health}; - foreach my $attr (@{$smartdata->{attributes}}) { - # ID 233 is media wearout indicator on intel and sandisk - # ID 177 is media wearout indicator on samsung - next if ($attr->{id} != 233 && $attr->{id} != 177); - next if ($attr->{name} !~ m/wear/i); - $wearout = $attr->{value}; - - # prefer the 233 value - last if ($attr->{id} == 233); - } + my $health = 'UNKNOWN'; + my $wearout = 'N/A'; + + if (!$nosmart) { + eval { + my $smartdata = get_smart_data($devpath, ($type ne 'ssd')); + $health = $smartdata->{health} if $smartdata->{health}; - $wearout = 'N/A' if !defined($wearout); - } elsif (!defined($disk)) { - # we do not need smart data if we check a single disk - # because this functionality is only for disk_is_used - $health = get_smart_health($devpath) if !defined($disk); + if ($type eq 'ssd') { + # if we have an ssd we try to get the wearout indicator + my $wearval = get_wear_leveling_info($smartdata->{attributes}, $data->{model} || $sysdir->{model}); + $wearout = $wearval if $wearval; + } + }; } my $used; @@ -390,9 +456,17 @@ sub get_disks { $used = 'ZFS' if $zfslist->{$devpath}; + # we replaced cciss/ with cciss! above + # but in the result we need cciss/ again + # because the caller might want to check the + # result again with the original parameter + if ($dev =~ m|^cciss!|) { + $dev =~ s|^cciss!|cciss/|; + } + $disklist->{$dev} = { vendor => $sysdata->{vendor}, - model => $sysdata->{model}, + model => $data->{model} || $sysdata->{model}, size => $sysdata->{size}, serial => $data->{serial}, gpt => $data->{gpt}, @@ -405,8 +479,11 @@ sub get_disks { }; my $osdid = -1; + my $bluestore = 0; my $journal_count = 0; + my $db_count = 0; + my $wal_count = 0; my $found_partitions; my $found_lvm; @@ -440,9 +517,14 @@ sub get_disks { $found_zfs = 1; } - $journal_count++ if $journalhash->{"$partpath/$part"}; + if ($journalhash->{"$partpath/$part"}) { + $journal_count++ if $journalhash->{"$partpath/$part"} == 1; + $db_count++ if $journalhash->{"$partpath/$part"} == 2; + $wal_count++ if $journalhash->{"$partpath/$part"} == 3; + $bluestore = 1 if $journalhash->{"$partpath/$part"} == 4; + } - if (!&$dir_is_empty("$sysdir/$part/holders") && !$found_lvm) { + if (!dir_is_empty("$sysdir/$part/holders") && !$found_lvm) { $found_dm = 1; } }); @@ -456,15 +538,76 @@ sub get_disks { # multipath, software raid, etc. # this check comes in last, to show more specific info # if we have it - $used = 'Device Mapper' if !$used && !&$dir_is_empty("$sysdir/holders"); + $used = 'Device Mapper' if !$used && !dir_is_empty("$sysdir/holders"); $disklist->{$dev}->{used} = $used if $used; $disklist->{$dev}->{osdid} = $osdid; - $disklist->{$dev}->{journals} = $journal_count; + $disklist->{$dev}->{journals} = $journal_count if $journal_count; + $disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1; + $disklist->{$dev}->{db} = $db_count if $db_count; + $disklist->{$dev}->{wal} = $wal_count if $wal_count; }); return $disklist; } +sub get_partnum { + my ($part_path) = @_; + + my ($mode, $rdev) = (stat($part_path))[2,6]; + + next if !$mode || !S_ISBLK($mode) || !$rdev; + my $major = int($rdev / 0x100); + my $minor = $rdev % 0x100; + my $partnum_path = "/sys/dev/block/$major:$minor/"; + + my $partnum; + + $partnum = file_read_firstline("${partnum_path}partition"); + + die "Partition does not exists\n" if !defined($partnum); + + #untaint and ensure it is a int + if ($partnum =~ m/(\d+)/) { + $partnum = $1; + die "Partition number $partnum is invalid\n" if $partnum > 128; + } else { + die "Failed to get partition number\n"; + } + + return $partnum; +} + +sub get_blockdev { + my ($part_path) = @_; + + my $dev = $1 if $part_path =~ m|^/dev/(.*)$|; + my $link = readlink "/sys/class/block/$dev"; + my $block_dev = $1 if $link =~ m|([^/]*)/$dev$|; + + die "Can't parse parent device\n" if !defined($block_dev); + die "No valid block device\n" if index($dev, $block_dev) == -1; + + $block_dev = "/dev/$block_dev"; + die "Block device does not exsists\n" if !(-b $block_dev); + + return $block_dev; +} + +sub locked_disk_action { + my ($sub) = @_; + my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub); + die $@ if $@; + return $res; +} + +sub check_unused { + my ($dev) = @_; + + die "device $dev is already in use\n" if disk_is_used($dev); + + return undef; +} + 1;