]> git.proxmox.com Git - pve-storage.git/blame - PVE/Diskmanage.pm
api: disk SMART: fix details for depreacated return value comment
[pve-storage.git] / PVE / Diskmanage.pm
CommitLineData
cbba9b5b
DC
1package PVE::Diskmanage;
2
3use strict;
4use warnings;
d5c80a5b 5
cbba9b5b
DC
6use PVE::ProcFSTools;
7use Data::Dumper;
8use Cwd qw(abs_path);
3196c387 9use Fcntl ':mode';
262ad7a9 10use File::Basename;
92ae59df 11use File::stat;
8cd6d7e8 12use JSON;
cbba9b5b
DC
13
14use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim);
15
16my $SMARTCTL = "/usr/sbin/smartctl";
17my $ZPOOL = "/sbin/zpool";
18my $SGDISK = "/sbin/sgdisk";
19my $PVS = "/sbin/pvs";
19dcd1ad 20my $LVS = "/sbin/lvs";
8cd6d7e8 21my $LSBLK = "/bin/lsblk";
cbba9b5b 22
a64aedd3
FE
23my sub strip_dev :prototype($) {
24 my ($devpath) = @_;
25 $devpath =~ s|^/dev/||;
26 return $devpath;
27}
28
525b4a6e
FE
29sub check_bin {
30 my ($path) = @_;
525b4a6e
FE
31 return -x $path;
32}
33
cbba9b5b
DC
34sub verify_blockdev_path {
35 my ($rel_path) = @_;
36
37 die "missing path" if !$rel_path;
38 my $path = abs_path($rel_path);
39 die "failed to get absolute path to $rel_path\n" if !$path;
40
41 die "got unusual device path '$path'\n" if $path !~ m|^/dev/(.*)$|;
42
43 $path = "/dev/$1"; # untaint
44
45 assert_blockdev($path);
46
47 return $path;
48}
49
50sub assert_blockdev {
51 my ($dev, $noerr) = @_;
52
53 if ($dev !~ m|^/dev/| || !(-b $dev)) {
9aff3f3d 54 return if $noerr;
cbba9b5b
DC
55 die "not a valid block device\n";
56 }
57
58 return 1;
59}
60
61sub init_disk {
62 my ($disk, $uuid) = @_;
63
64 assert_blockdev($disk);
65
cc884f73
FE
66 # we should already have checked these in the api call, but we check again for safety
67 die "$disk is a partition\n" if is_partition($disk);
cbba9b5b
DC
68 die "disk $disk is already in use\n" if disk_is_used($disk);
69
70 my $id = $uuid || 'R';
71 run_command([$SGDISK, $disk, '-U', $id]);
72 return 1;
73}
74
75sub disk_is_used {
76 my ($disk) = @_;
77
78 my $dev = $disk;
79 $dev =~ s|^/dev/||;
80
a2c34371 81 my $disklist = get_disks($dev, 1, 1);
cbba9b5b
DC
82
83 die "'$disk' is not a valid local disk\n" if !defined($disklist->{$dev});
84 return 1 if $disklist->{$dev}->{used};
85
86 return 0;
87}
88
89sub get_smart_data {
dd902da7 90 my ($disk, $healthonly) = @_;
cbba9b5b
DC
91
92 assert_blockdev($disk);
93 my $smartdata = {};
dc1311cb 94 my $type;
cbba9b5b 95
c3442aa5
WB
96 if ($disk =~ m!^/dev/(nvme\d+n\d+)$!) {
97 my $info = get_sysdir_info("/sys/block/$1");
98 $disk = "/dev/".($info->{device}
99 or die "failed to get nvme controller device for $disk\n");
100 }
c9bd3d22 101
dd902da7
DC
102 my $cmd = [$SMARTCTL, '-H'];
103 push @$cmd, '-A', '-f', 'brief' if !$healthonly;
104 push @$cmd, $disk;
105
9aff3f3d
TL
106 my $returncode = eval {
107 run_command($cmd, noerr => 1, outfunc => sub {
cbba9b5b
DC
108 my ($line) = @_;
109
1c999553
FG
110# ATA SMART attributes, e.g.:
111# ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
112# 1 Raw_Read_Error_Rate POSR-K 100 100 000 - 0
dc1311cb
FG
113#
114# SAS and NVME disks, e.g.:
115# Data Units Written: 5,584,952 [2.85 TB]
116# Accumulated start-stop cycles: 34
117
bd54091c 118 if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) {
cbba9b5b 119 my $entry = {};
bd54091c 120
1c999553
FG
121 $entry->{name} = $2 if defined $2;
122 $entry->{flags} = $3 if defined $3;
cbba9b5b 123 # the +0 makes a number out of the strings
47d1125b 124 # FIXME: 'value' is depreacated by 'normalized'; remove with PVE 7.0
1c999553 125 $entry->{value} = $4+0 if defined $4;
4c86c711 126 $entry->{normalized} = $4+0 if defined $4;
1c999553 127 $entry->{worst} = $5+0 if defined $5;
bd54091c
DC
128 # some disks report the default threshold as --- instead of 000
129 if (defined($6) && $6 eq '---') {
130 $entry->{threshold} = 0;
131 } else {
132 $entry->{threshold} = $6+0 if defined $6;
133 }
1c999553
FG
134 $entry->{fail} = $7 if defined $7;
135 $entry->{raw} = $8 if defined $8;
136 $entry->{id} = $1 if defined $1;
cbba9b5b 137 push @{$smartdata->{attributes}}, $entry;
5db2d529 138 } elsif ($line =~ m/(?:Health Status|self\-assessment test result): (.*)$/ ) {
cbba9b5b
DC
139 $smartdata->{health} = $1;
140 } elsif ($line =~ m/Vendor Specific SMART Attributes with Thresholds:/) {
dc1311cb
FG
141 $type = 'ata';
142 delete $smartdata->{text};
143 } elsif ($line =~ m/=== START OF (READ )?SMART DATA SECTION ===/) {
144 $type = 'text';
145 } elsif (defined($type) && $type eq 'text') {
146 $smartdata->{text} = '' if !defined $smartdata->{text};
147 $smartdata->{text} .= "$line\n";
2c048efd
DC
148 # extract wearout from nvme/sas text, allow for decimal values
149 if ($line =~ m/Percentage Used(?: endurance indicator)?:\s*(\d+(?:\.\d+)?)\%/i) {
ea928fd4
DC
150 $smartdata->{wearout} = 100 - $1;
151 }
dd902da7
DC
152 } elsif ($line =~ m/SMART Disabled/) {
153 $smartdata->{health} = "SMART Disabled";
cbba9b5b 154 }
9aff3f3d 155 })
cbba9b5b 156 };
9018a4e6
DC
157 my $err = $@;
158
9aff3f3d 159 # bit 0 and 1 mark a fatal error, other bits are for disk status -> ignore (see man 8 smartctl)
9018a4e6
DC
160 if ((defined($returncode) && ($returncode & 0b00000011)) || $err) {
161 die "Error getting S.M.A.R.T. data: Exit code: $returncode\n";
162 }
dc1311cb
FG
163
164 $smartdata->{type} = $type;
165
cbba9b5b
DC
166 return $smartdata;
167}
168
9aff3f3d 169sub get_lsblk_info {
59c03cd9 170 my $cmd = [$LSBLK, '--json', '-o', 'path,parttype,fstype'];
8cd6d7e8 171 my $output = "";
9aff3f3d 172 eval { run_command($cmd, outfunc => sub { $output .= "$_[0]\n"; }) };
8cd6d7e8 173 warn "$@\n" if $@;
9aff3f3d 174 return {} if $output eq '';
8cd6d7e8 175
9aff3f3d 176 my $parsed = eval { decode_json($output) } // {};
8cd6d7e8
DC
177 warn "$@\n" if $@;
178 my $list = $parsed->{blockdevices} // [];
179
9aff3f3d
TL
180 return {
181 map {
182 $_->{path} => {
183 parttype => $_->{parttype},
184 fstype => $_->{fstype}
185 }
186 } @{$list}
187 };
8cd6d7e8
DC
188}
189
9aff3f3d 190my sub get_devices_by_partuuid {
b6bbc2ab 191 my ($lsblk_info, $uuids, $res) = @_;
8cd6d7e8
DC
192
193 $res = {} if !defined($res);
194
b6bbc2ab
FE
195 foreach my $dev (sort keys %{$lsblk_info}) {
196 my $uuid = $lsblk_info->{$dev}->{parttype};
197 next if !defined($uuid) || !defined($uuids->{$uuid});
198 $res->{$dev} = $uuids->{$uuid};
8cd6d7e8
DC
199 }
200
201 return $res;
9aff3f3d 202}
8cd6d7e8 203
cbba9b5b 204sub get_zfs_devices {
b6bbc2ab 205 my ($lsblk_info) = @_;
8cd6d7e8 206 my $res = {};
cbba9b5b 207
525b4a6e 208 return {} if !check_bin($ZPOOL);
4526dffa 209
9aff3f3d 210 # use zpool and parttype uuid, because log and cache do not have zfs type uuid
cbba9b5b
DC
211 eval {
212 run_command([$ZPOOL, 'list', '-HPLv'], outfunc => sub {
213 my ($line) = @_;
cbba9b5b 214 if ($line =~ m|^\t([^\t]+)\t|) {
8cd6d7e8 215 $res->{$1} = 1;
cbba9b5b
DC
216 }
217 });
218 };
219
9aff3f3d 220 # only warn here, because maybe zfs tools are not installed
cbba9b5b
DC
221 warn "$@\n" if $@;
222
8cd6d7e8
DC
223 my $uuids = {
224 "6a898cc3-1dd2-11b2-99a6-080020736631" => 1, # apple
225 "516e7cba-6ecf-11d6-8ff8-00022d09712b" => 1, # bsd
226 };
cbba9b5b 227
cbba9b5b 228
9aff3f3d 229 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
8cd6d7e8
DC
230
231 return $res;
cbba9b5b
DC
232}
233
234sub get_lvm_devices {
b6bbc2ab 235 my ($lsblk_info) = @_;
8cd6d7e8 236 my $res = {};
cbba9b5b
DC
237 eval {
238 run_command([$PVS, '--noheadings', '--readonly', '-o', 'pv_name'], outfunc => sub{
239 my ($line) = @_;
240 $line = trim($line);
241 if ($line =~ m|^/dev/|) {
8cd6d7e8 242 $res->{$line} = 1;
cbba9b5b
DC
243 }
244 });
245 };
246
9aff3f3d 247 # if something goes wrong, we do not want to give up, but indicate an error has occurred
cbba9b5b
DC
248 warn "$@\n" if $@;
249
8cd6d7e8
DC
250 my $uuids = {
251 "e6d6d379-f507-44c2-a23c-238f2a3df928" => 1,
252 };
cbba9b5b 253
9aff3f3d 254 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
cbba9b5b 255
8cd6d7e8 256 return $res;
cbba9b5b
DC
257}
258
259sub get_ceph_journals {
b6bbc2ab 260 my ($lsblk_info) = @_;
8cd6d7e8
DC
261 my $res = {};
262
263 my $uuids = {
264 '45b0969e-9b03-4f30-b4c6-b4b80ceff106' => 1, # journal
265 '30cd0809-c2b2-499c-8879-2d6b78529876' => 2, # db
266 '5ce17fce-4087-4169-b7ff-056cc58473f9' => 3, # wal
267 'cafecafe-9b03-4f30-b4c6-b4b80ceff106' => 4, # block
268 };
269
9aff3f3d 270 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
cbba9b5b 271
8cd6d7e8 272 return $res;
cbba9b5b
DC
273}
274
19dcd1ad
DC
275# reads the lv_tags and matches them with the devices
276sub get_ceph_volume_infos {
277 my $result = {};
278
248f43f5
TL
279 my $cmd = [ $LVS, '-S', 'lv_name=~^osd-', '-o', 'devices,lv_name,lv_tags',
280 '--noheadings', '--readonly', '--separator', ';' ];
19dcd1ad
DC
281
282 run_command($cmd, outfunc => sub {
283 my $line = shift;
248f43f5
TL
284 $line =~ s/(?:^\s+)|(?:\s+$)//g; # trim whitespaces
285
286 my $fields = [ split(';', $line) ];
19dcd1ad
DC
287
288 # lvs syntax is /dev/sdX(Y) where Y is the start (which we do not need)
41f93ece 289 my ($dev) = $fields->[0] =~ m|^(/dev/[a-z]+[^(]*)|;
19dcd1ad
DC
290 if ($fields->[1] =~ m|^osd-([^-]+)-|) {
291 my $type = $1;
248f43f5 292 # $result autovivification is wanted, to not creating empty hashes
79f4a7bf 293 if (($type eq 'block' || $type eq 'data') && $fields->[2] =~ m/ceph.osd_id=([^,]+)/) {
19dcd1ad
DC
294 $result->{$dev}->{osdid} = $1;
295 $result->{$dev}->{bluestore} = ($type eq 'block');
bfb3d42d
DC
296 if ($fields->[2] =~ m/ceph\.encrypted=1/) {
297 $result->{$dev}->{encrypted} = 1;
298 }
19dcd1ad 299 } else {
248f43f5 300 # undef++ becomes '1' (see `perldoc perlop`: Auto-increment)
19dcd1ad
DC
301 $result->{$dev}->{$type}++;
302 }
303 }
304 });
305
306 return $result;
307}
308
cbba9b5b
DC
309sub get_udev_info {
310 my ($dev) = @_;
311
312 my $info = "";
313 my $data = {};
314 eval {
d3a5e309 315 run_command(['udevadm', 'info', '-p', $dev, '--query', 'all'], outfunc => sub {
cbba9b5b
DC
316 my ($line) = @_;
317 $info .= "$line\n";
318 });
319 };
320 warn $@ if $@;
9aff3f3d 321 return if !$info;
cbba9b5b 322
9aff3f3d
TL
323 return if $info !~ m/^E: DEVTYPE=(disk|partition)$/m;
324 return if $info =~ m/^E: ID_CDROM/m;
cbba9b5b 325
9aff3f3d 326 # we use this, because some disks are not simply in /dev e.g. /dev/cciss/c0d0
cbba9b5b
DC
327 if ($info =~ m/^E: DEVNAME=(\S+)$/m) {
328 $data->{devpath} = $1;
329 }
330 return if !defined($data->{devpath});
331
332 $data->{serial} = 'unknown';
9aff3f3d 333 $data->{serial} = $1 if $info =~ m/^E: ID_SERIAL_SHORT=(\S+)$/m;
cbba9b5b 334
9aff3f3d 335 $data->{gpt} = $info =~ m/^E: ID_PART_TABLE_TYPE=gpt$/m ? 1 : 0;
cbba9b5b 336
cbba9b5b 337 $data->{rpm} = -1;
9aff3f3d 338 $data->{rpm} = $1 if $info =~ m/^E: ID_ATA_ROTATION_RATE_RPM=(\d+)$/m; # detects SSD implicit
cbba9b5b 339
9aff3f3d 340 $data->{usb} = 1 if $info =~ m/^E: ID_BUS=usb$/m;
cbba9b5b 341
9aff3f3d 342 $data->{model} = $1 if $info =~ m/^E: ID_MODEL=(.+)$/m;
865bdbd9 343
cbba9b5b 344 $data->{wwn} = 'unknown';
9aff3f3d 345 $data->{wwn} = $1 if $info =~ m/^E: ID_WWN=(.*)$/m;
cbba9b5b 346
0f0d99a3
SI
347 if ($info =~ m/^E: DEVLINKS=(.+)$/m) {
348 my @devlinks = grep(m#^/dev/disk/by-id/(ata|scsi|nvme(?!-eui))#, split (/ /, $1));
349 $data->{by_id_link} = $devlinks[0] if defined($devlinks[0]);
350 }
351
cbba9b5b
DC
352 return $data;
353}
354
40be5c5c
FE
355sub get_sysdir_size {
356 my ($sysdir) = @_;
357
358 my $size = file_read_firstline("$sysdir/size");
359 return if !$size;
360
9aff3f3d 361 # linux always considers sectors to be 512 bytes, independently of real block size
40be5c5c
FE
362 return $size * 512;
363}
364
cbba9b5b
DC
365sub get_sysdir_info {
366 my ($sysdir) = @_;
367
9aff3f3d 368 return if ! -d "$sysdir/device";
461a9fd8 369
cbba9b5b
DC
370 my $data = {};
371
40be5c5c 372 $data->{size} = get_sysdir_size($sysdir) or return;
cbba9b5b
DC
373
374 # dir/queue/rotational should be 1 for hdd, 0 for ssd
571b6f26 375 $data->{rotational} = file_read_firstline("$sysdir/queue/rotational") // -1;
cbba9b5b
DC
376
377 $data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown';
378 $data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown';
379
c3442aa5 380 if (defined(my $device = readlink("$sysdir/device"))) {
9aff3f3d 381 ($data->{device}) = $device =~ m!([^/]+)$!; # strip directory and untaint
c3442aa5
WB
382 }
383
cbba9b5b
DC
384 return $data;
385}
386
6965a670 387sub get_wear_leveling_info {
dbad606d 388 my ($smartdata) = @_;
ea928fd4
DC
389 my $attributes = $smartdata->{attributes};
390
391 if (defined($smartdata->{wearout})) {
392 return $smartdata->{wearout};
393 }
6965a670
DC
394
395 my $wearout;
396
9aff3f3d
TL
397 # Common register names that represent percentage values of potential failure indicators used
398 # in drivedb.h of smartmontool's. Order matters, as some drives may have multiple definitions
dbad606d
JJS
399 my @wearoutregisters = (
400 "Media_Wearout_Indicator",
401 "SSD_Life_Left",
402 "Wear_Leveling_Count",
403 "Perc_Write\/Erase_Ct_BC",
404 "Perc_Rated_Life_Remain",
405 "Remaining_Lifetime_Perc",
406 "Percent_Lifetime_Remain",
407 "Lifetime_Left",
408 "PCT_Life_Remaining",
409 "Lifetime_Remaining",
410 "Percent_Life_Remaining",
411 "Percent_Lifetime_Used",
412 "Perc_Rated_Life_Used"
413 );
414
415 # Search for S.M.A.R.T. attributes for known register
416 foreach my $register (@wearoutregisters) {
417 last if defined $wearout;
418 foreach my $attr (@$attributes) {
419 next if $attr->{name} !~ m/$register/;
420 $wearout = $attr->{value};
421 last;
6965a670
DC
422 }
423 }
424
6965a670
DC
425 return $wearout;
426}
427
10a48db5
DC
428sub dir_is_empty {
429 my ($dir) = @_;
430
431 my $dh = IO::Dir->new ($dir);
432 return 1 if !$dh;
433
434 while (defined(my $tmp = $dh->read)) {
435 next if $tmp eq '.' || $tmp eq '..';
436 $dh->close;
437 return 0;
438 }
439 $dh->close;
440 return 1;
441}
442
eebcdb11
DC
443sub is_iscsi {
444 my ($sysdir) = @_;
445
446 if (-l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|) {
447 return 1;
448 }
449
450 return 0;
451}
452
4731eb11
TL
453my sub is_ssdlike {
454 my ($type) = @_;
455 return $type eq 'ssd' || $type eq 'nvme';
456}
457
7e14102a 458sub mounted_blockdevs {
cbba9b5b
DC
459 my $mounted = {};
460
461 my $mounts = PVE::ProcFSTools::parse_proc_mounts();
462
463 foreach my $mount (@$mounts) {
464 next if $mount->[0] !~ m|^/dev/|;
465 $mounted->{abs_path($mount->[0])} = $mount->[1];
466 };
467
7e14102a
FE
468 return $mounted;
469}
470
4de60025
AL
471# returns hashmap of abs mount path -> first part of /proc/mounts (what)
472sub mounted_paths {
473 my $mounted = {};
474
475 my $mounts = PVE::ProcFSTools::parse_proc_mounts();
476
477 foreach my $mount (@$mounts) {
478 $mounted->{abs_path($mount->[1])} = $mount->[0];
479 };
480
481 return $mounted;
482}
483
7e14102a
FE
484sub get_disks {
485 my ($disks, $nosmart, $include_partitions) = @_;
486 my $disklist = {};
487
488 my $mounted = mounted_blockdevs();
489
b6bbc2ab 490 my $lsblk_info = get_lsblk_info();
8cd6d7e8 491
b6bbc2ab 492 my $journalhash = get_ceph_journals($lsblk_info);
19dcd1ad 493 my $ceph_volume_infos = get_ceph_volume_infos();
cbba9b5b 494
b6bbc2ab 495 my $zfshash = get_zfs_devices($lsblk_info);
cbba9b5b 496
b6bbc2ab 497 my $lvmhash = get_lvm_devices($lsblk_info);
cbba9b5b 498
52a064af
DC
499 my $disk_regex = ".*";
500 if (defined($disks)) {
501 if (!ref($disks)) {
502 $disks = [ $disks ];
503 } elsif (ref($disks) ne 'ARRAY') {
504 die "disks is not a string or array reference\n";
505 }
506 # we get cciss/c0d0 but need cciss!c0d0
5045e0b7 507 $_ =~ s|cciss/|cciss!| for @$disks;
52a064af 508
a64aedd3
FE
509 if ($include_partitions) {
510 # Proper blockdevice is needed for the regex, use parent for partitions.
511 for my $disk ($disks->@*) {
512 next if !is_partition("/dev/$disk");
513 $disk = strip_dev(get_blockdev("/dev/$disk"));
514 }
515 }
516
52a064af 517 $disk_regex = "(?:" . join('|', @$disks) . ")";
1590fc13
DC
518 }
519
52a064af 520 dir_glob_foreach('/sys/block', $disk_regex, sub {
cbba9b5b 521 my ($dev) = @_;
cbba9b5b 522 # whitelisting following devices
9aff3f3d
TL
523 # - hdX ide block device
524 # - sdX scsi/sata block device
525 # - vdX virtIO block device
526 # - xvdX: xen virtual block device
527 # - nvmeXnY: nvme devices
528 # - cciss!cXnY cciss devices
cbba9b5b
DC
529 return if $dev !~ m/^(h|s|x?v)d[a-z]+$/ &&
530 $dev !~ m/^nvme\d+n\d+$/ &&
38ddd4ce 531 $dev !~ m/^cciss\!c\d+d\d+$/;
cbba9b5b 532
9aff3f3d 533 my $data = get_udev_info("/sys/block/$dev") // return;
cbba9b5b
DC
534 my $devpath = $data->{devpath};
535
536 my $sysdir = "/sys/block/$dev";
537
cbba9b5b 538 # we do not want iscsi devices
eebcdb11 539 return if is_iscsi($sysdir);
cbba9b5b
DC
540
541 my $sysdata = get_sysdir_info($sysdir);
542 return if !defined($sysdata);
543
544 my $type = 'unknown';
545
546 if ($sysdata->{rotational} == 0) {
547 $type = 'ssd';
4731eb11 548 $type = 'nvme' if $dev =~ m/^nvme\d+n\d+$/;
cbba9b5b
DC
549 $data->{rpm} = 0;
550 } elsif ($sysdata->{rotational} == 1) {
551 if ($data->{rpm} != -1) {
552 $type = 'hdd';
553 } elsif ($data->{usb}) {
554 $type = 'usb';
555 $data->{rpm} = 0;
556 }
557 }
558
9aff3f3d 559 my ($health, $wearout) = ('UNKNOWN', 'N/A');
7a98a62d
FG
560 if (!$nosmart) {
561 eval {
4731eb11 562 my $smartdata = get_smart_data($devpath, !is_ssdlike($type));
dd902da7
DC
563 $health = $smartdata->{health} if $smartdata->{health};
564
9aff3f3d
TL
565 if (is_ssdlike($type)) { # if we have an ssd we try to get the wearout indicator
566 my $wear_level = get_wear_leveling_info($smartdata);
567 $wearout = $wear_level if defined($wear_level);
acd3d916 568 }
7a98a62d
FG
569 };
570 }
cbba9b5b 571
9aff3f3d
TL
572 # we replaced cciss/ with cciss! above, but in the result we need cciss/ again because the
573 # caller might want to check the result again with the original parameter
fc7c0e05
DC
574 if ($dev =~ m|^cciss!|) {
575 $dev =~ s|^cciss!|cciss/|;
576 }
577
cbba9b5b
DC
578 $disklist->{$dev} = {
579 vendor => $sysdata->{vendor},
865bdbd9 580 model => $data->{model} || $sysdata->{model},
cbba9b5b
DC
581 size => $sysdata->{size},
582 serial => $data->{serial},
583 gpt => $data->{gpt},
584 rpm => $data->{rpm},
585 type => $type,
586 wwn => $data->{wwn},
587 health => $health,
588 devpath => $devpath,
589 wearout => $wearout,
590 };
2949acd6 591 $disklist->{$dev}->{mounted} = 1 if exists $mounted->{$devpath};
cbba9b5b 592
0f0d99a3
SI
593 my $by_id_link = $data->{by_id_link};
594 $disklist->{$dev}->{by_id_link} = $by_id_link if defined($by_id_link);
595
9aff3f3d
TL
596 my ($osdid, $bluestore, $osdencrypted) = (-1, 0, 0);
597 my ($journal_count, $db_count, $wal_count) = (0, 0, 0);
cbba9b5b 598
cbba9b5b 599 my $partpath = $devpath;
9aff3f3d 600 # remove trailing part to get the partition base path, e.g. /dev/cciss/c0d0 -> /dev/cciss
cbba9b5b
DC
601 $partpath =~ s/\/[^\/]+$//;
602
01aa7d75
FE
603 my $determine_usage = sub {
604 my ($devpath, $sysdir, $is_partition) = @_;
605
606 return 'LVM' if $lvmhash->{$devpath};
607 return 'ZFS' if $zfshash->{$devpath};
608
609 my $info = $lsblk_info->{$devpath} // {};
d3857eeb 610
9aff3f3d
TL
611 if (defined(my $parttype = $info->{parttype})) {
612 return 'BIOS boot'if $parttype eq '21686148-6449-6e6f-744e-656564454649';
613 return 'EFI' if $parttype eq 'c12a7328-f81f-11d2-ba4b-00a0c93ec93b';
614 return 'ZFS reserved' if $parttype eq '6a945a3b-1dd2-11b2-99a6-080020736631';
d3857eeb
FE
615 }
616
9aff3f3d 617 return "$info->{fstype}" if defined($info->{fstype});
01aa7d75
FE
618 return 'mounted' if $mounted->{$devpath};
619
620 return if !$is_partition;
621
622 # for devices, this check is done explicitly later
623 return 'Device Mapper' if !dir_is_empty("$sysdir/holders");
624
ff91cfae 625 return; # unused partition
01aa7d75
FE
626 };
627
41f93ece
FE
628 my $collect_ceph_info = sub {
629 my ($devpath) = @_;
630
631 my $ceph_volume = $ceph_volume_infos->{$devpath} or return;
632 $journal_count += $ceph_volume->{journal} // 0;
633 $db_count += $ceph_volume->{db} // 0;
634 $wal_count += $ceph_volume->{wal} // 0;
635 if (defined($ceph_volume->{osdid})) {
636 $osdid = $ceph_volume->{osdid};
637 $bluestore = 1 if $ceph_volume->{bluestore};
638 $osdencrypted = 1 if $ceph_volume->{encrypted};
639 }
6a1919b1
FE
640
641 my $result = { %{$ceph_volume} };
9aff3f3d 642 $result->{journals} = delete $result->{journal} if $result->{journal};
6a1919b1 643 return $result;
41f93ece
FE
644 };
645
89c27ea8 646 my $partitions = {};
cbba9b5b
DC
647 dir_glob_foreach("$sysdir", "$dev.+", sub {
648 my ($part) = @_;
649
6a1919b1
FE
650 $partitions->{$part} = $collect_ceph_info->("$partpath/$part");
651 my $lvm_based_osd = defined($partitions->{$part});
652
89c27ea8 653 $partitions->{$part}->{devpath} = "$partpath/$part";
2949c537 654 $partitions->{$part}->{parent} = "$devpath";
2949acd6 655 $partitions->{$part}->{mounted} = 1 if exists $mounted->{"$partpath/$part"};
89c27ea8 656 $partitions->{$part}->{gpt} = $data->{gpt};
31ed94cc 657 $partitions->{$part}->{type} = 'partition';
9aff3f3d
TL
658 $partitions->{$part}->{size} = get_sysdir_size("$sysdir/$part") // 0;
659 $partitions->{$part}->{used} = $determine_usage->("$partpath/$part", "$sysdir/$part", 1);
6a1919b1 660 $partitions->{$part}->{osdid} //= -1;
41f93ece 661
9aff3f3d 662 # avoid counting twice (e.g. partition with the LVM for the DB OSD is in $journalhash)
41f93ece
FE
663 return if $lvm_based_osd;
664
665 # Legacy handling for non-LVM based OSDs
0cca5356 666 if (my $mp = $mounted->{"$partpath/$part"}) {
cbba9b5b
DC
667 if ($mp =~ m|^/var/lib/ceph/osd/ceph-(\d+)$|) {
668 $osdid = $1;
6a1919b1 669 $partitions->{$part}->{osdid} = $osdid;
cbba9b5b
DC
670 }
671 }
672
0180fa42
TL
673 if (my $journal_part = $journalhash->{"$partpath/$part"}) {
674 $journal_count++ if $journal_part == 1;
675 $db_count++ if $journal_part == 2;
676 $wal_count++ if $journal_part == 3;
677 $bluestore = 1 if $journal_part == 4;
6a1919b1
FE
678
679 $partitions->{$part}->{journals} = 1 if $journal_part == 1;
680 $partitions->{$part}->{db} = 1 if $journal_part == 2;
681 $partitions->{$part}->{wal} = 1 if $journal_part == 3;
682 $partitions->{$part}->{bluestore} = 1 if $journal_part == 4;
e2bd817c 683 }
cbba9b5b
DC
684 });
685
01aa7d75 686 my $used = $determine_usage->($devpath, $sysdir, 0);
2949c537
FE
687 if (!$include_partitions) {
688 foreach my $part (sort keys %{$partitions}) {
2949c537
FE
689 $used //= $partitions->{$part}->{used};
690 }
415dc398
FE
691 } else {
692 # fstype might be set even if there are partitions, but showing that is confusing
693 $used = 'partitions' if scalar(keys %{$partitions});
01aa7d75
FE
694 }
695 $used //= 'partitions' if scalar(keys %{$partitions});
cbba9b5b
DC
696 # multipath, software raid, etc.
697 # this check comes in last, to show more specific info
698 # if we have it
01aa7d75 699 $used //= 'Device Mapper' if !dir_is_empty("$sysdir/holders");
cbba9b5b
DC
700
701 $disklist->{$dev}->{used} = $used if $used;
41f93ece
FE
702
703 $collect_ceph_info->($devpath);
704
cbba9b5b 705 $disklist->{$dev}->{osdid} = $osdid;
e2bd817c
DC
706 $disklist->{$dev}->{journals} = $journal_count if $journal_count;
707 $disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1;
bfb3d42d 708 $disklist->{$dev}->{osdencrypted} = $osdencrypted if $osdid != -1;
e2bd817c
DC
709 $disklist->{$dev}->{db} = $db_count if $db_count;
710 $disklist->{$dev}->{wal} = $wal_count if $wal_count;
2949c537
FE
711
712 if ($include_partitions) {
9aff3f3d 713 $disklist->{$_} = $partitions->{$_} for keys %{$partitions};
2949c537 714 }
cbba9b5b
DC
715 });
716
717 return $disklist;
cbba9b5b
DC
718}
719
3196c387
WL
720sub get_partnum {
721 my ($part_path) = @_;
722
92ae59df 723 my $st = stat($part_path);
3196c387 724
ceb7b1ed
FE
725 die "error detecting block device '$part_path'\n"
726 if !$st || !$st->mode || !S_ISBLK($st->mode) || !$st->rdev;
727
92ae59df
AA
728 my $major = PVE::Tools::dev_t_major($st->rdev);
729 my $minor = PVE::Tools::dev_t_minor($st->rdev);
3196c387
WL
730 my $partnum_path = "/sys/dev/block/$major:$minor/";
731
9aff3f3d 732 my $partnum = file_read_firstline("${partnum_path}partition");
481f6177 733 die "Partition does not exist\n" if !defined($partnum);
9aff3f3d
TL
734 die "Failed to get partition number\n" if $partnum !~ m/(\d+)/; # untaint
735 $partnum = $1;
736 die "Partition number $partnum is invalid\n" if $partnum > 128;
3196c387
WL
737
738 return $partnum;
739}
740
0d28307d
WL
741sub get_blockdev {
742 my ($part_path) = @_;
743
1207620c
TL
744 my ($dev, $block_dev);
745 if ($part_path =~ m|^/dev/(.*)$|) {
746 $dev = $1;
747 my $link = readlink "/sys/class/block/$dev";
748 $block_dev = $1 if $link =~ m|([^/]*)/$dev$|;
749 }
0d28307d
WL
750
751 die "Can't parse parent device\n" if !defined($block_dev);
752 die "No valid block device\n" if index($dev, $block_dev) == -1;
753
754 $block_dev = "/dev/$block_dev";
ffc31266 755 die "Block device does not exists\n" if !(-b $block_dev);
0d28307d
WL
756
757 return $block_dev;
758}
759
e8df8fb1
FE
760sub is_partition {
761 my ($dev_path) = @_;
762
763 return defined(eval { get_partnum($dev_path) });
764}
765
e39e8ee2
DC
766sub locked_disk_action {
767 my ($sub) = @_;
768 my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub);
769 die $@ if $@;
770 return $res;
771}
772
0370861c 773sub assert_disk_unused {
76c1e57b 774 my ($dev) = @_;
0370861c 775 die "device '$dev' is already in use\n" if disk_is_used($dev);
9aff3f3d 776 return;
76c1e57b
DC
777}
778
1dc3038d
DC
779sub append_partition {
780 my ($dev, $size) = @_;
781
782 my $devname = $dev;
783 $devname =~ s|^/dev/||;
784
785 my $newpartid = 1;
786 dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*?(\d+)/, sub {
787 my ($part, $partid) = @_;
788
789 if ($partid >= $newpartid) {
790 $newpartid = $partid + 1;
791 }
792 });
793
794 $size = PVE::Tools::convert_size($size, 'b' => 'mb');
795
796 run_command([ $SGDISK, '-n', "$newpartid:0:+${size}M", $dev ],
797 errmsg => "error creating partition '$newpartid' on '$dev'");
798
799 my $partition;
800
ffc31266 801 # loop again to detect the real partition device which does not always follow
1dc3038d
DC
802 # a strict $devname$partition scheme like /dev/nvme0n1 -> /dev/nvme0n1p1
803 dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*$newpartid/, sub {
804 my ($part) = @_;
805
806 $partition = "/dev/$part";
807 });
808
809 return $partition;
810}
811
cb057e21
FE
812# Check if a disk or any of its partitions has a holder.
813# Can also be called with a partition.
814# Expected to be called with a result of verify_blockdev_path().
815sub has_holder {
816 my ($devpath) = @_;
817
70dc7098 818 my $dev = strip_dev($devpath);
cb057e21 819
70dc7098 820 return $devpath if !dir_is_empty("/sys/class/block/${dev}/holders");
cb057e21
FE
821
822 my $found;
cb057e21
FE
823 dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
824 my ($part) = @_;
70dc7098 825 $found = "/dev/${part}" if !dir_is_empty("/sys/class/block/${part}/holders");
cb057e21
FE
826 });
827
828 return $found;
829}
830
3bf7f889
FE
831# Basic check if a disk or any of its partitions is mounted.
832# Can also be called with a partition.
833# Expected to be called with a result of verify_blockdev_path().
834sub is_mounted {
835 my ($devpath) = @_;
836
837 my $mounted = mounted_blockdevs();
838
839 return $devpath if $mounted->{$devpath};
840
70dc7098 841 my $dev = strip_dev($devpath);
3bf7f889
FE
842
843 my $found;
3bf7f889
FE
844 dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
845 my ($part) = @_;
3bf7f889
FE
846 my $partpath = "/dev/${part}";
847
848 $found = $partpath if $mounted->{$partpath};
849 });
850
851 return $found;
852}
853
e8df8fb1
FE
854# Currently only supports GPT-partitioned disks.
855sub change_parttype {
856 my ($partpath, $parttype) = @_;
857
858 my $err = "unable to change partition type for $partpath";
859
860 my $partnum = get_partnum($partpath);
861 my $blockdev = get_blockdev($partpath);
862 my $dev = strip_dev($blockdev);
863
864 my $info = get_disks($dev, 1);
865 die "$err - unable to get disk info for '$blockdev'\n" if !defined($info->{$dev});
866 die "$err - disk '$blockdev' is not GPT partitioned\n" if !$info->{$dev}->{gpt};
867
868 run_command(['sgdisk', "-t${partnum}:${parttype}", $blockdev], errmsg => $err);
869}
870
262ad7a9 871# Wipes all labels and the first 200 MiB of a disk/partition (or the whole if it is smaller).
bd46e59b 872# If called with a partition, also sets the partition type to 0x83 'Linux filesystem'.
262ad7a9
FE
873# Expected to be called with a result of verify_blockdev_path().
874sub wipe_blockdev {
875 my ($devpath) = @_;
876
262ad7a9
FE
877 my $devname = basename($devpath);
878 my $dev_size = PVE::Tools::file_get_contents("/sys/class/block/$devname/size");
879
880 ($dev_size) = $dev_size =~ m|(\d+)|; # untaint $dev_size
881 die "Couldn't get the size of the device $devname\n" if !defined($dev_size);
882
883 my $size = ($dev_size * 512 / 1024 / 1024);
884 my $count = ($size < 200) ? $size : 200;
885
839afff8
TL
886 my $to_wipe = [];
887 dir_glob_foreach("/sys/class/block/${devname}", "${devname}.+", sub {
888 my ($part) = @_;
889 push $to_wipe->@*, "/dev/${part}" if -b "/dev/${part}";
890 });
891
f7a95153 892 if (scalar($to_wipe->@*) > 0) {
d9381782 893 print "found child partitions to wipe: ". join(', ', $to_wipe->@*) ."\n";
839afff8
TL
894 }
895 push $to_wipe->@*, $devpath; # put actual device last
896
897 print "wiping block device ${devpath}\n";
262ad7a9 898
839afff8 899 run_command(['wipefs', '--all', $to_wipe->@*], errmsg => "error wiping '${devpath}'");
fa6d05ab
TL
900
901 run_command(
902 ['dd', 'if=/dev/zero', "of=${devpath}", 'bs=1M', 'conv=fdatasync', "count=${count}"],
903 errmsg => "error wiping '${devpath}'",
904 );
bd46e59b
FE
905
906 if (is_partition($devpath)) {
907 eval { change_parttype($devpath, '8300'); };
908 warn $@ if $@;
909 }
262ad7a9
FE
910}
911
26082b7d
FE
912# FIXME: Remove once we depend on systemd >= v249.
913# Work around udev bug https://github.com/systemd/systemd/issues/18525 ensuring database is updated.
914sub udevadm_trigger {
915 my @devs = @_;
916
917 return if scalar(@devs) == 0;
918
919 eval { run_command(['udevadm', 'trigger', @devs]); };
920 warn $@ if $@;
921}
922
cbba9b5b 9231;