use strict;
use warnings;
+
use PVE::ProcFSTools;
use Data::Dumper;
use Cwd qw(abs_path);
use Fcntl ':mode';
+use File::stat;
+use JSON;
use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim);
my $ZPOOL = "/sbin/zpool";
my $SGDISK = "/sbin/sgdisk";
my $PVS = "/sbin/pvs";
+my $LVS = "/sbin/lvs";
my $UDEVADM = "/bin/udevadm";
+my $LSBLK = "/bin/lsblk";
sub verify_blockdev_path {
my ($rel_path) = @_;
my $returncode = 0;
- $disk =~ s/n\d+$//
- if $disk =~ m!^/dev/nvme\d+n\d+$!;
+ if ($disk =~ m!^/dev/(nvme\d+n\d+)$!) {
+ my $info = get_sysdir_info("/sys/block/$1");
+ $disk = "/dev/".($info->{device}
+ or die "failed to get nvme controller device for $disk\n");
+ }
my $cmd = [$SMARTCTL, '-H'];
push @$cmd, '-A', '-f', 'brief' if !$healthonly;
if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) {
my $entry = {};
-
$entry->{name} = $2 if defined $2;
$entry->{flags} = $3 if defined $3;
# the +0 makes a number out of the strings
} elsif (defined($type) && $type eq 'text') {
$smartdata->{text} = '' if !defined $smartdata->{text};
$smartdata->{text} .= "$line\n";
+ # extract wearout from nvme text, allow for decimal values
+ if ($line =~ m/Percentage Used:\s*(\d+(?:\.\d+)?)\%/i) {
+ $smartdata->{wearout} = 100 - $1;
+ }
} elsif ($line =~ m/SMART Disabled/) {
$smartdata->{health} = "SMART Disabled";
}
return $smartdata;
}
+sub get_parttype_info() {
+ my $cmd = [$LSBLK, '--json', '-o', 'path,parttype'];
+ my $output = "";
+ my $res = {};
+ eval {
+ run_command($cmd, outfunc => sub {
+ my ($line) = @_;
+ $output .= "$line\n";
+ });
+ };
+ warn "$@\n" if $@;
+ return $res if $output eq '';
+
+ my $parsed = eval { decode_json($output) };
+ warn "$@\n" if $@;
+ my $list = $parsed->{blockdevices} // [];
+
+ foreach my $dev (@$list) {
+ next if !($dev->{parttype});
+ my $type = $dev->{parttype};
+ $res->{$type} = [] if !defined($res->{$type});
+ push @{$res->{$type}}, $dev->{path};
+ }
+
+ return $res;
+}
+
+my $get_devices_by_partuuid = sub {
+ my ($parttype_map, $uuids, $res) = @_;
+
+ $res = {} if !defined($res);
+
+ foreach my $uuid (sort keys %$uuids) {
+ map { $res->{$_} = $uuids->{$uuid} } @{$parttype_map->{$uuid}};
+ }
+
+ return $res;
+};
+
sub get_zfs_devices {
- my $list = {};
+ my ($parttype_map) = @_;
+ my $res = {};
+
+ return {} if ! -x $ZPOOL;
# use zpool and parttype uuid,
# because log and cache do not have
my ($line) = @_;
if ($line =~ m|^\t([^\t]+)\t|) {
- $list->{$1} = 1;
+ $res->{$1} = 1;
}
});
};
# because maybe zfs tools are not installed
warn "$@\n" if $@;
- my $applezfsuuid = "6a898cc3-1dd2-11b2-99a6-080020736631";
- my $bsdzfsuuid = "516e7cba-6ecf-11d6-8ff8-00022d09712b";
+ my $uuids = {
+ "6a898cc3-1dd2-11b2-99a6-080020736631" => 1, # apple
+ "516e7cba-6ecf-11d6-8ff8-00022d09712b" => 1, # bsd
+ };
- dir_glob_foreach('/dev/disk/by-parttypeuuid', "($applezfsuuid|$bsdzfsuuid)\..+", sub {
- my ($entry) = @_;
- my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry");
- $list->{$real_dev} = 1;
- });
- return $list;
+ $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res);
+
+ return $res;
}
sub get_lvm_devices {
- my $list = {};
+ my ($parttype_map) = @_;
+ my $res = {};
eval {
run_command([$PVS, '--noheadings', '--readonly', '-o', 'pv_name'], outfunc => sub{
my ($line) = @_;
$line = trim($line);
if ($line =~ m|^/dev/|) {
- $list->{$line} = 1;
+ $res->{$line} = 1;
}
});
};
# to give up, but indicate an error has occured
warn "$@\n" if $@;
- my $lvmuuid = "e6d6d379-f507-44c2-a23c-238f2a3df928";
+ my $uuids = {
+ "e6d6d379-f507-44c2-a23c-238f2a3df928" => 1,
+ };
- dir_glob_foreach('/dev/disk/by-parttypeuuid', "$lvmuuid\..+", sub {
- my ($entry) = @_;
- my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry");
- $list->{$real_dev} = 1;
- });
+ $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res);
- return $list;
+ return $res;
}
sub get_ceph_journals {
- my $journalhash = {};
+ my ($parttype_map) = @_;
+ my $res = {};
+
+ my $uuids = {
+ '45b0969e-9b03-4f30-b4c6-b4b80ceff106' => 1, # journal
+ '30cd0809-c2b2-499c-8879-2d6b78529876' => 2, # db
+ '5ce17fce-4087-4169-b7ff-056cc58473f9' => 3, # wal
+ 'cafecafe-9b03-4f30-b4c6-b4b80ceff106' => 4, # block
+ };
+
+ $res = $get_devices_by_partuuid->($parttype_map, $uuids, $res);
- my $journal_uuid = '45b0969e-9b03-4f30-b4c6-b4b80ceff106';
+ return $res;
+}
- dir_glob_foreach('/dev/disk/by-parttypeuuid', "$journal_uuid\..+", sub {
- my ($entry) = @_;
- my $real_dev = abs_path("/dev/disk/by-parttypeuuid/$entry");
- $journalhash->{$real_dev} = 1;
+# reads the lv_tags and matches them with the devices
+sub get_ceph_volume_infos {
+ my $result = {};
+
+ my $cmd = [ $LVS, '-S', 'lv_name=~^osd-', '-o', 'devices,lv_name,lv_tags',
+ '--noheadings', '--readonly', '--separator', ';' ];
+
+ run_command($cmd, outfunc => sub {
+ my $line = shift;
+ $line =~ s/(?:^\s+)|(?:\s+$)//g; # trim whitespaces
+
+ my $fields = [ split(';', $line) ];
+
+ # lvs syntax is /dev/sdX(Y) where Y is the start (which we do not need)
+ my ($dev) = $fields->[0] =~ m|^(/dev/[a-z]+)|;
+ if ($fields->[1] =~ m|^osd-([^-]+)-|) {
+ my $type = $1;
+ # $result autovivification is wanted, to not creating empty hashes
+ if (($type eq 'block' || $type eq 'data') && $fields->[2] =~ m/ceph.osd_id=([^,]+)/) {
+ $result->{$dev}->{osdid} = $1;
+ $result->{$dev}->{bluestore} = ($type eq 'block');
+ if ($fields->[2] =~ m/ceph\.encrypted=1/) {
+ $result->{$dev}->{encrypted} = 1;
+ }
+ } else {
+ # undef++ becomes '1' (see `perldoc perlop`: Auto-increment)
+ $result->{$dev}->{$type}++;
+ }
+ }
});
- return $journalhash;
+ return $result;
}
sub get_udev_info {
$data->{wwn} = $1;
}
+ if ($info =~ m/^E: DEVLINKS=(.+)$/m) {
+ my @devlinks = grep(m#^/dev/disk/by-id/(ata|scsi|nvme(?!-eui))#, split (/ /, $1));
+ $data->{by_id_link} = $devlinks[0] if defined($devlinks[0]);
+ }
+
return $data;
}
$data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown';
$data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown';
+ if (defined(my $device = readlink("$sysdir/device"))) {
+ # strip directory and untaint:
+ ($data->{device}) = $device =~ m!([^/]+)$!;
+ }
+
return $data;
}
sub get_wear_leveling_info {
- my ($attributes, $model) = @_;
-
- my $wearout;
-
- my $vendormap = {
- 'kingston' => 231,
- 'samsung' => 177,
- 'intel' => 233,
- 'sandisk' => 233,
- 'crucial' => 202,
- 'default' => 233,
- };
-
- # find target attr id
+ my ($smartdata) = @_;
+ my $attributes = $smartdata->{attributes};
- my $attrid;
-
- foreach my $vendor (keys %$vendormap) {
- if ($model =~ m/$vendor/i) {
- $attrid = $vendormap->{$vendor};
- # found the attribute
- last;
- }
+ if (defined($smartdata->{wearout})) {
+ return $smartdata->{wearout};
}
- if (!$attrid) {
- $attrid = $vendormap->{default};
- }
+ my $wearout;
- foreach my $attr (@$attributes) {
- next if $attr->{id} != $attrid;
- $wearout = $attr->{value};
- last;
+ # Common register names that represent percentage values of potential
+ # failure indicators used in drivedb.h of smartmontool's. Order matters,
+ # as some drives may have multiple definitions
+ my @wearoutregisters = (
+ "Media_Wearout_Indicator",
+ "SSD_Life_Left",
+ "Wear_Leveling_Count",
+ "Perc_Write\/Erase_Ct_BC",
+ "Perc_Rated_Life_Remain",
+ "Remaining_Lifetime_Perc",
+ "Percent_Lifetime_Remain",
+ "Lifetime_Left",
+ "PCT_Life_Remaining",
+ "Lifetime_Remaining",
+ "Percent_Life_Remaining",
+ "Percent_Lifetime_Used",
+ "Perc_Rated_Life_Used"
+ );
+
+ # Search for S.M.A.R.T. attributes for known register
+ foreach my $register (@wearoutregisters) {
+ last if defined $wearout;
+ foreach my $attr (@$attributes) {
+ next if $attr->{name} !~ m/$register/;
+ $wearout = $attr->{value};
+ last;
+ }
}
return $wearout;
return 1;
}
+sub is_iscsi {
+ my ($sysdir) = @_;
+
+ if (-l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|) {
+ return 1;
+ }
+
+ return 0;
+}
+
+my sub is_ssdlike {
+ my ($type) = @_;
+ return $type eq 'ssd' || $type eq 'nvme';
+}
+
sub get_disks {
- my ($disk, $nosmart) = @_;
+ my ($disks, $nosmart) = @_;
my $disklist = {};
my $mounted = {};
return $mounted->{$dev};
};
- my $journalhash = get_ceph_journals();
+ my $parttype_map = get_parttype_info();
+
+ my $journalhash = get_ceph_journals($parttype_map);
+ my $ceph_volume_infos = get_ceph_volume_infos();
- my $zfslist = get_zfs_devices();
+ my $zfshash = get_zfs_devices($parttype_map);
+
+ my $lvmhash = get_lvm_devices($parttype_map);
+
+ my $disk_regex = ".*";
+ if (defined($disks)) {
+ if (!ref($disks)) {
+ $disks = [ $disks ];
+ } elsif (ref($disks) ne 'ARRAY') {
+ die "disks is not a string or array reference\n";
+ }
+ # we get cciss/c0d0 but need cciss!c0d0
+ map { s|cciss/|cciss!| } @$disks;
- my $lvmlist = get_lvm_devices();
+ $disk_regex = "(?:" . join('|', @$disks) . ")";
+ }
- dir_glob_foreach('/sys/block', '.*', sub {
+ dir_glob_foreach('/sys/block', $disk_regex, sub {
my ($dev) = @_;
- return if defined($disk) && $disk ne $dev;
# whitelisting following devices
# hdX: ide block device
# sdX: sd block device
my $sysdir = "/sys/block/$dev";
# we do not want iscsi devices
- return if -l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|;
+ return if is_iscsi($sysdir);
my $sysdata = get_sysdir_info($sysdir);
return if !defined($sysdata);
if ($sysdata->{rotational} == 0) {
$type = 'ssd';
+ $type = 'nvme' if $dev =~ m/^nvme\d+n\d+$/;
$data->{rpm} = 0;
} elsif ($sysdata->{rotational} == 1) {
if ($data->{rpm} != -1) {
if (!$nosmart) {
eval {
- my $smartdata = get_smart_data($devpath, ($type ne 'ssd'));
+ my $smartdata = get_smart_data($devpath, !is_ssdlike($type));
$health = $smartdata->{health} if $smartdata->{health};
- if ($type eq 'ssd') {
+ if (is_ssdlike($type)) {
# if we have an ssd we try to get the wearout indicator
- my $wearval = get_wear_leveling_info($smartdata->{attributes}, $data->{model} || $sysdir->{model});
- $wearout = $wearval if $wearval;
+ my $wearval = get_wear_leveling_info($smartdata);
+ $wearout = $wearval if defined($wearval);
}
};
}
my $used;
- $used = 'LVM' if $lvmlist->{$devpath};
+ $used = 'LVM' if $lvmhash->{$devpath};
$used = 'mounted' if &$dev_is_mounted($devpath);
- $used = 'ZFS' if $zfslist->{$devpath};
+ $used = 'ZFS' if $zfshash->{$devpath};
+
+ # we replaced cciss/ with cciss! above
+ # but in the result we need cciss/ again
+ # because the caller might want to check the
+ # result again with the original parameter
+ if ($dev =~ m|^cciss!|) {
+ $dev =~ s|^cciss!|cciss/|;
+ }
$disklist->{$dev} = {
vendor => $sysdata->{vendor},
wearout => $wearout,
};
+ my $by_id_link = $data->{by_id_link};
+ $disklist->{$dev}->{by_id_link} = $by_id_link if defined($by_id_link);
+
my $osdid = -1;
+ my $bluestore = 0;
+ my $osdencrypted = 0;
my $journal_count = 0;
+ my $db_count = 0;
+ my $wal_count = 0;
my $found_partitions;
my $found_lvm;
}
}
- if ($lvmlist->{"$partpath/$part"}) {
+ if ($lvmhash->{"$partpath/$part"}) {
$found_lvm = 1;
}
- if ($zfslist->{"$partpath/$part"}) {
+ if ($zfshash->{"$partpath/$part"}) {
$found_zfs = 1;
}
- $journal_count++ if $journalhash->{"$partpath/$part"};
+ if (my $journal_part = $journalhash->{"$partpath/$part"}) {
+ $journal_count++ if $journal_part == 1;
+ $db_count++ if $journal_part == 2;
+ $wal_count++ if $journal_part == 3;
+ $bluestore = 1 if $journal_part == 4;
+ }
if (!dir_is_empty("$sysdir/$part/holders") && !$found_lvm) {
$found_dm = 1;
}
});
+ if (my $ceph_volume = $ceph_volume_infos->{$devpath}) {
+ $journal_count += $ceph_volume->{journal} // 0;
+ $db_count += $ceph_volume->{db} // 0;
+ $wal_count += $ceph_volume->{wal} // 0;
+ if (defined($ceph_volume->{osdid})) {
+ $osdid = $ceph_volume->{osdid};
+ $bluestore = 1 if $ceph_volume->{bluestore};
+ $osdencrypted = 1 if $ceph_volume->{encrypted};
+ }
+ }
+
$used = 'mounted' if $found_mountpoints && !$used;
$used = 'LVM' if $found_lvm && !$used;
$used = 'ZFS' if $found_zfs && !$used;
$disklist->{$dev}->{used} = $used if $used;
$disklist->{$dev}->{osdid} = $osdid;
- $disklist->{$dev}->{journals} = $journal_count;
+ $disklist->{$dev}->{journals} = $journal_count if $journal_count;
+ $disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1;
+ $disklist->{$dev}->{osdencrypted} = $osdencrypted if $osdid != -1;
+ $disklist->{$dev}->{db} = $db_count if $db_count;
+ $disklist->{$dev}->{wal} = $wal_count if $wal_count;
});
return $disklist;
sub get_partnum {
my ($part_path) = @_;
- my ($mode, $rdev) = (stat($part_path))[2,6];
+ my $st = stat($part_path);
- next if !$mode || !S_ISBLK($mode) || !$rdev;
- my $major = int($rdev / 0x100);
- my $minor = $rdev % 0x100;
+ next if !$st->mode || !S_ISBLK($st->mode) || !$st->rdev;
+ my $major = PVE::Tools::dev_t_major($st->rdev);
+ my $minor = PVE::Tools::dev_t_minor($st->rdev);
my $partnum_path = "/sys/dev/block/$major:$minor/";
my $partnum;
$partnum = file_read_firstline("${partnum_path}partition");
- die "Partition does not exists\n" if !defined($partnum);
+ die "Partition does not exist\n" if !defined($partnum);
#untaint and ensure it is a int
if ($partnum =~ m/(\d+)/) {
return $partnum;
}
+sub get_blockdev {
+ my ($part_path) = @_;
+
+ my $dev = $1 if $part_path =~ m|^/dev/(.*)$|;
+ my $link = readlink "/sys/class/block/$dev";
+ my $block_dev = $1 if $link =~ m|([^/]*)/$dev$|;
+
+ die "Can't parse parent device\n" if !defined($block_dev);
+ die "No valid block device\n" if index($dev, $block_dev) == -1;
+
+ $block_dev = "/dev/$block_dev";
+ die "Block device does not exsists\n" if !(-b $block_dev);
+
+ return $block_dev;
+}
+
+sub locked_disk_action {
+ my ($sub) = @_;
+ my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub);
+ die $@ if $@;
+ return $res;
+}
+
+sub assert_disk_unused {
+ my ($dev) = @_;
+
+ die "device '$dev' is already in use\n" if disk_is_used($dev);
+
+ return undef;
+}
+
+sub append_partition {
+ my ($dev, $size) = @_;
+
+ my $devname = $dev;
+ $devname =~ s|^/dev/||;
+
+ my $newpartid = 1;
+ dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*?(\d+)/, sub {
+ my ($part, $partid) = @_;
+
+ if ($partid >= $newpartid) {
+ $newpartid = $partid + 1;
+ }
+ });
+
+ $size = PVE::Tools::convert_size($size, 'b' => 'mb');
+
+ run_command([ $SGDISK, '-n', "$newpartid:0:+${size}M", $dev ],
+ errmsg => "error creating partition '$newpartid' on '$dev'");
+
+ my $partition;
+
+ # loop again to detect the real partiton device which does not always follow
+ # a strict $devname$partition scheme like /dev/nvme0n1 -> /dev/nvme0n1p1
+ dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*$newpartid/, sub {
+ my ($part) = @_;
+
+ $partition = "/dev/$part";
+ });
+
+ return $partition;
+}
+
1;