X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FDiskmanage.pm;h=116a99a8e9037515ce340df21f45c2692ee18168;hb=2c048efdaf2ac73c3a50edeaaf8280b2738a7f98;hp=5d498ce4f801e62b799a54d0ab19b98282de564c;hpb=7b32e2a7cd04f5c4fc360d2d6bd32a49fd4a16f1;p=pve-storage.git diff --git a/PVE/Diskmanage.pm b/PVE/Diskmanage.pm index 5d498ce..116a99a 100644 --- a/PVE/Diskmanage.pm +++ b/PVE/Diskmanage.pm @@ -2,9 +2,13 @@ package PVE::Diskmanage; use strict; use warnings; + use PVE::ProcFSTools; use Data::Dumper; use Cwd qw(abs_path); +use Fcntl ':mode'; +use File::stat; +use JSON; use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim); @@ -12,7 +16,9 @@ my $SMARTCTL = "/usr/sbin/smartctl"; my $ZPOOL = "/sbin/zpool"; my $SGDISK = "/sbin/sgdisk"; my $PVS = "/sbin/pvs"; +my $LVS = "/sbin/lvs"; my $UDEVADM = "/bin/udevadm"; +my $LSBLK = "/bin/lsblk"; sub verify_blockdev_path { my ($rel_path) = @_; @@ -78,8 +84,11 @@ sub get_smart_data { my $returncode = 0; - $disk =~ s/n\d+$// - if $disk =~ m!^/dev/nvme\d+n\d+$!; + if ($disk =~ m!^/dev/(nvme\d+n\d+)$!) { + my $info = get_sysdir_info("/sys/block/$1"); + $disk = "/dev/".($info->{device} + or die "failed to get nvme controller device for $disk\n"); + } my $cmd = [$SMARTCTL, '-H']; push @$cmd, '-A', '-f', 'brief' if !$healthonly; @@ -100,7 +109,6 @@ sub get_smart_data { if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) { my $entry = {}; - $entry->{name} = $2 if defined $2; $entry->{flags} = $3 if defined $3; # the +0 makes a number out of the strings @@ -126,6 +134,10 @@ sub get_smart_data { } elsif (defined($type) && $type eq 'text') { $smartdata->{text} = '' if !defined $smartdata->{text}; $smartdata->{text} .= "$line\n"; + # extract wearout from nvme/sas text, allow for decimal values + if ($line =~ m/Percentage Used(?: endurance indicator)?:\s*(\d+(?:\.\d+)?)\%/i) { + $smartdata->{wearout} = 100 - $1; + } } elsif ($line =~ m/SMART Disabled/) { $smartdata->{health} = "SMART Disabled"; } @@ -145,8 +157,50 @@ sub get_smart_data { return $smartdata; } +sub get_parttype_info() { + my $cmd = [$LSBLK, '--json', '-o', 'path,parttype']; + my $output = ""; + my $res = {}; + eval { + run_command($cmd, outfunc => sub { + my ($line) = @_; + $output .= "$line\n"; + }); + }; + warn "$@\n" if $@; + return $res if $output eq ''; + + my $parsed = eval { decode_json($output) }; + warn "$@\n" if $@; + my $list = $parsed->{blockdevices} // []; + + foreach my $dev (@$list) { + next if !($dev->{parttype}); + my $type = $dev->{parttype}; + $res->{$type} = [] if !defined($res->{$type}); + push @{$res->{$type}}, $dev->{path}; + } + + return $res; +} + +my $get_devices_by_partuuid = sub { + my ($parttype_map, $uuids, $res) = @_; + + $res = {} if !defined($res); + + foreach my $uuid (sort keys %$uuids) { + map { $res->{$_} = $uuids->{$uuid} } @{$parttype_map->{$uuid}}; + } + + return $res; +}; + sub get_zfs_devices { - my $list = {}; + my ($parttype_map) = @_; + my $res = {}; + + return {} if ! -x $ZPOOL; # use zpool and parttype uuid, # because log and cache do not have @@ -156,7 +210,7 @@ sub get_zfs_devices { my ($line) = @_; if ($line =~ m|^\t([^\t]+)\t|) { - $list->{$1} = 1; + $res->{$1} = 1; } }); }; @@ -165,26 +219,26 @@ sub get_zfs_devices { # because maybe zfs tools are not installed warn "$@\n" if $@; - my $applezfsuuid = "6a898cc3-1dd2-11b2-99a6-080020736631"; - my $bsdzfsuuid = "516e7cba-6ecf-11d6-8ff8-00022d09712b"; + my $uuids = { + "6a898cc3-1dd2-11b2-99a6-080020736631" => 1, # apple + "516e7cba-6ecf-11d6-8ff8-00022d09712b" => 1, # bsd + }; - dir_glob_foreach('/dev/disk/by-parttypeuuid', "($applezfsuuid|$bsdzfsuuid)\..+", sub { - my ($entry) = @_; - my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry"); - $list->{$real_dev} = 1; - }); - return $list; + $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res); + + return $res; } sub get_lvm_devices { - my $list = {}; + my ($parttype_map) = @_; + my $res = {}; eval { run_command([$PVS, '--noheadings', '--readonly', '-o', 'pv_name'], outfunc => sub{ my ($line) = @_; $line = trim($line); if ($line =~ m|^/dev/|) { - $list->{$line} = 1; + $res->{$line} = 1; } }); }; @@ -193,29 +247,63 @@ sub get_lvm_devices { # to give up, but indicate an error has occured warn "$@\n" if $@; - my $lvmuuid = "e6d6d379-f507-44c2-a23c-238f2a3df928"; + my $uuids = { + "e6d6d379-f507-44c2-a23c-238f2a3df928" => 1, + }; - dir_glob_foreach('/dev/disk/by-parttypeuuid', "$lvmuuid\..+", sub { - my ($entry) = @_; - my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry"); - $list->{$real_dev} = 1; - }); + $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res); - return $list; + return $res; } sub get_ceph_journals { - my $journalhash = {}; + my ($parttype_map) = @_; + my $res = {}; + + my $uuids = { + '45b0969e-9b03-4f30-b4c6-b4b80ceff106' => 1, # journal + '30cd0809-c2b2-499c-8879-2d6b78529876' => 2, # db + '5ce17fce-4087-4169-b7ff-056cc58473f9' => 3, # wal + 'cafecafe-9b03-4f30-b4c6-b4b80ceff106' => 4, # block + }; + + $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res); - my $journal_uuid = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'; + return $res; +} - dir_glob_foreach('/dev/disk/by-parttypeuuid', "$journal_uuid\..+", sub { - my ($entry) = @_; - my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry"); - $journalhash->{$real_dev} = 1; +# reads the lv_tags and matches them with the devices +sub get_ceph_volume_infos { + my $result = {}; + + my $cmd = [ $LVS, '-S', 'lv_name=~^osd-', '-o', 'devices,lv_name,lv_tags', + '--noheadings', '--readonly', '--separator', ';' ]; + + run_command($cmd, outfunc => sub { + my $line = shift; + $line =~ s/(?:^\s+)|(?:\s+$)//g; # trim whitespaces + + my $fields = [ split(';', $line) ]; + + # lvs syntax is /dev/sdX(Y) where Y is the start (which we do not need) + my ($dev) = $fields->[0] =~ m|^(/dev/[a-z]+)|; + if ($fields->[1] =~ m|^osd-([^-]+)-|) { + my $type = $1; + # $result autovivification is wanted, to not creating empty hashes + if (($type eq 'block' || $type eq 'data') && $fields->[2] =~ m/ceph.osd_id=([^,]+)/) { + $result->{$dev}->{osdid} = $1; + $result->{$dev}->{bluestore} = ($type eq 'block'); + if ($fields->[2] =~ m/ceph\.encrypted=1/) { + $result->{$dev}->{encrypted} = 1; + } + } else { + # undef++ becomes '1' (see `perldoc perlop`: Auto-increment) + $result->{$dev}->{$type}++; + } + } }); - return $journalhash; + return $result; } sub get_udev_info { @@ -271,6 +359,11 @@ sub get_udev_info { $data->{wwn} = $1; } + if ($info =~ m/^E: DEVLINKS=(.+)$/m) { + my @devlinks = grep(m#^/dev/disk/by-id/(ata|scsi|nvme(?!-eui))#, split (/ /, $1)); + $data->{by_id_link} = $devlinks[0] if defined($devlinks[0]); + } + return $data; } @@ -294,43 +387,51 @@ sub get_sysdir_info { $data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown'; $data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown'; + if (defined(my $device = readlink("$sysdir/device"))) { + # strip directory and untaint: + ($data->{device}) = $device =~ m!([^/]+)$!; + } + return $data; } sub get_wear_leveling_info { - my ($attributes, $model) = @_; - - my $wearout; - - my $vendormap = { - 'kingston' => 231, - 'samsung' => 177, - 'intel' => 233, - 'sandisk' => 233, - 'crucial' => 202, - 'default' => 233, - }; - - # find target attr id + my ($smartdata) = @_; + my $attributes = $smartdata->{attributes}; - my $attrid; - - foreach my $vendor (keys %$vendormap) { - if ($model =~ m/$vendor/i) { - $attrid = $vendormap->{$vendor}; - # found the attribute - last; - } + if (defined($smartdata->{wearout})) { + return $smartdata->{wearout}; } - if (!$attrid) { - $attrid = $vendormap->{default}; - } + my $wearout; - foreach my $attr (@$attributes) { - next if $attr->{id} != $attrid; - $wearout = $attr->{value}; - last; + # Common register names that represent percentage values of potential + # failure indicators used in drivedb.h of smartmontool's. Order matters, + # as some drives may have multiple definitions + my @wearoutregisters = ( + "Media_Wearout_Indicator", + "SSD_Life_Left", + "Wear_Leveling_Count", + "Perc_Write\/Erase_Ct_BC", + "Perc_Rated_Life_Remain", + "Remaining_Lifetime_Perc", + "Percent_Lifetime_Remain", + "Lifetime_Left", + "PCT_Life_Remaining", + "Lifetime_Remaining", + "Percent_Life_Remaining", + "Percent_Lifetime_Used", + "Perc_Rated_Life_Used" + ); + + # Search for S.M.A.R.T. attributes for known register + foreach my $register (@wearoutregisters) { + last if defined $wearout; + foreach my $attr (@$attributes) { + next if $attr->{name} !~ m/$register/; + $wearout = $attr->{value}; + last; + } } return $wearout; @@ -351,8 +452,23 @@ sub dir_is_empty { return 1; } +sub is_iscsi { + my ($sysdir) = @_; + + if (-l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|) { + return 1; + } + + return 0; +} + +my sub is_ssdlike { + my ($type) = @_; + return $type eq 'ssd' || $type eq 'nvme'; +} + sub get_disks { - my ($disk, $nosmart) = @_; + my ($disks, $nosmart) = @_; my $disklist = {}; my $mounted = {}; @@ -369,15 +485,30 @@ sub get_disks { return $mounted->{$dev}; }; - my $journalhash = get_ceph_journals(); + my $parttype_map = get_parttype_info(); + + my $journalhash = get_ceph_journals($parttype_map); + my $ceph_volume_infos = get_ceph_volume_infos(); - my $zfslist = get_zfs_devices(); + my $zfshash = get_zfs_devices($parttype_map); + + my $lvmhash = get_lvm_devices($parttype_map); + + my $disk_regex = ".*"; + if (defined($disks)) { + if (!ref($disks)) { + $disks = [ $disks ]; + } elsif (ref($disks) ne 'ARRAY') { + die "disks is not a string or array reference\n"; + } + # we get cciss/c0d0 but need cciss!c0d0 + $_ =~ s|cciss/|cciss!| for @$disks; - my $lvmlist = get_lvm_devices(); + $disk_regex = "(?:" . join('|', @$disks) . ")"; + } - dir_glob_foreach('/sys/block', '.*', sub { + dir_glob_foreach('/sys/block', $disk_regex, sub { my ($dev) = @_; - return if defined($disk) && $disk ne $dev; # whitelisting following devices # hdX: ide block device # sdX: sd block device @@ -396,7 +527,7 @@ sub get_disks { my $sysdir = "/sys/block/$dev"; # we do not want iscsi devices - return if -l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|; + return if is_iscsi($sysdir); my $sysdata = get_sysdir_info($sysdir); return if !defined($sysdata); @@ -405,6 +536,7 @@ sub get_disks { if ($sysdata->{rotational} == 0) { $type = 'ssd'; + $type = 'nvme' if $dev =~ m/^nvme\d+n\d+$/; $data->{rpm} = 0; } elsif ($sysdata->{rotational} == 1) { if ($data->{rpm} != -1) { @@ -420,24 +552,32 @@ sub get_disks { if (!$nosmart) { eval { - my $smartdata = get_smart_data($devpath, ($type ne 'ssd')); + my $smartdata = get_smart_data($devpath, !is_ssdlike($type)); $health = $smartdata->{health} if $smartdata->{health}; - if ($type eq 'ssd') { + if (is_ssdlike($type)) { # if we have an ssd we try to get the wearout indicator - my $wearval = get_wear_leveling_info($smartdata->{attributes}, $data->{model} || $sysdir->{model}); - $wearout = $wearval if $wearval; + my $wearval = get_wear_leveling_info($smartdata); + $wearout = $wearval if defined($wearval); } }; } my $used; - $used = 'LVM' if $lvmlist->{$devpath}; + $used = 'LVM' if $lvmhash->{$devpath}; $used = 'mounted' if &$dev_is_mounted($devpath); - $used = 'ZFS' if $zfslist->{$devpath}; + $used = 'ZFS' if $zfshash->{$devpath}; + + # we replaced cciss/ with cciss! above + # but in the result we need cciss/ again + # because the caller might want to check the + # result again with the original parameter + if ($dev =~ m|^cciss!|) { + $dev =~ s|^cciss!|cciss/|; + } $disklist->{$dev} = { vendor => $sysdata->{vendor}, @@ -453,9 +593,16 @@ sub get_disks { wearout => $wearout, }; + my $by_id_link = $data->{by_id_link}; + $disklist->{$dev}->{by_id_link} = $by_id_link if defined($by_id_link); + my $osdid = -1; + my $bluestore = 0; + my $osdencrypted = 0; my $journal_count = 0; + my $db_count = 0; + my $wal_count = 0; my $found_partitions; my $found_lvm; @@ -481,21 +628,37 @@ sub get_disks { } } - if ($lvmlist->{"$partpath/$part"}) { + if ($lvmhash->{"$partpath/$part"}) { $found_lvm = 1; } - if ($zfslist->{"$partpath/$part"}) { + if ($zfshash->{"$partpath/$part"}) { $found_zfs = 1; } - $journal_count++ if $journalhash->{"$partpath/$part"}; + if (my $journal_part = $journalhash->{"$partpath/$part"}) { + $journal_count++ if $journal_part == 1; + $db_count++ if $journal_part == 2; + $wal_count++ if $journal_part == 3; + $bluestore = 1 if $journal_part == 4; + } if (!dir_is_empty("$sysdir/$part/holders") && !$found_lvm) { $found_dm = 1; } }); + if (my $ceph_volume = $ceph_volume_infos->{$devpath}) { + $journal_count += $ceph_volume->{journal} // 0; + $db_count += $ceph_volume->{db} // 0; + $wal_count += $ceph_volume->{wal} // 0; + if (defined($ceph_volume->{osdid})) { + $osdid = $ceph_volume->{osdid}; + $bluestore = 1 if $ceph_volume->{bluestore}; + $osdencrypted = 1 if $ceph_volume->{encrypted}; + } + } + $used = 'mounted' if $found_mountpoints && !$used; $used = 'LVM' if $found_lvm && !$used; $used = 'ZFS' if $found_zfs && !$used; @@ -509,11 +672,109 @@ sub get_disks { $disklist->{$dev}->{used} = $used if $used; $disklist->{$dev}->{osdid} = $osdid; - $disklist->{$dev}->{journals} = $journal_count; + $disklist->{$dev}->{journals} = $journal_count if $journal_count; + $disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1; + $disklist->{$dev}->{osdencrypted} = $osdencrypted if $osdid != -1; + $disklist->{$dev}->{db} = $db_count if $db_count; + $disklist->{$dev}->{wal} = $wal_count if $wal_count; }); return $disklist; } +sub get_partnum { + my ($part_path) = @_; + + my $st = stat($part_path); + + next if !$st->mode || !S_ISBLK($st->mode) || !$st->rdev; + my $major = PVE::Tools::dev_t_major($st->rdev); + my $minor = PVE::Tools::dev_t_minor($st->rdev); + my $partnum_path = "/sys/dev/block/$major:$minor/"; + + my $partnum; + + $partnum = file_read_firstline("${partnum_path}partition"); + + die "Partition does not exist\n" if !defined($partnum); + + #untaint and ensure it is a int + if ($partnum =~ m/(\d+)/) { + $partnum = $1; + die "Partition number $partnum is invalid\n" if $partnum > 128; + } else { + die "Failed to get partition number\n"; + } + + return $partnum; +} + +sub get_blockdev { + my ($part_path) = @_; + + my ($dev, $block_dev); + if ($part_path =~ m|^/dev/(.*)$|) { + $dev = $1; + my $link = readlink "/sys/class/block/$dev"; + $block_dev = $1 if $link =~ m|([^/]*)/$dev$|; + } + + die "Can't parse parent device\n" if !defined($block_dev); + die "No valid block device\n" if index($dev, $block_dev) == -1; + + $block_dev = "/dev/$block_dev"; + die "Block device does not exsists\n" if !(-b $block_dev); + + return $block_dev; +} + +sub locked_disk_action { + my ($sub) = @_; + my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub); + die $@ if $@; + return $res; +} + +sub assert_disk_unused { + my ($dev) = @_; + + die "device '$dev' is already in use\n" if disk_is_used($dev); + + return undef; +} + +sub append_partition { + my ($dev, $size) = @_; + + my $devname = $dev; + $devname =~ s|^/dev/||; + + my $newpartid = 1; + dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*?(\d+)/, sub { + my ($part, $partid) = @_; + + if ($partid >= $newpartid) { + $newpartid = $partid + 1; + } + }); + + $size = PVE::Tools::convert_size($size, 'b' => 'mb'); + + run_command([ $SGDISK, '-n', "$newpartid:0:+${size}M", $dev ], + errmsg => "error creating partition '$newpartid' on '$dev'"); + + my $partition; + + # loop again to detect the real partiton device which does not always follow + # a strict $devname$partition scheme like /dev/nvme0n1 -> /dev/nvme0n1p1 + dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*$newpartid/, sub { + my ($part) = @_; + + $partition = "/dev/$part"; + }); + + return $partition; +} + 1;