]> git.proxmox.com Git - pve-storage.git/blob - PVE/Diskmanage.pm
Revert "Fix #2020: use /sys to map nvmeXnY to nvmeX"
[pve-storage.git] / PVE / Diskmanage.pm
1 package PVE::Diskmanage;
2
3 use strict;
4 use warnings;
5
6 use PVE::ProcFSTools;
7 use Data::Dumper;
8 use Cwd qw(abs_path);
9 use Fcntl ':mode';
10 use File::Basename;
11 use File::stat;
12 use JSON;
13
14 use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim);
15
16 my $SMARTCTL = "/usr/sbin/smartctl";
17 my $ZPOOL = "/sbin/zpool";
18 my $SGDISK = "/sbin/sgdisk";
19 my $PVS = "/sbin/pvs";
20 my $LVS = "/sbin/lvs";
21 my $LSBLK = "/bin/lsblk";
22
23 my sub strip_dev :prototype($) {
24 my ($devpath) = @_;
25 $devpath =~ s|^/dev/||;
26 return $devpath;
27 }
28
29 sub check_bin {
30 my ($path) = @_;
31 return -x $path;
32 }
33
34 sub verify_blockdev_path {
35 my ($rel_path) = @_;
36
37 die "missing path" if !$rel_path;
38 my $path = abs_path($rel_path);
39 die "failed to get absolute path to $rel_path\n" if !$path;
40
41 die "got unusual device path '$path'\n" if $path !~ m|^/dev/(.*)$|;
42
43 $path = "/dev/$1"; # untaint
44
45 assert_blockdev($path);
46
47 return $path;
48 }
49
50 sub assert_blockdev {
51 my ($dev, $noerr) = @_;
52
53 if ($dev !~ m|^/dev/| || !(-b $dev)) {
54 return if $noerr;
55 die "not a valid block device\n";
56 }
57
58 return 1;
59 }
60
61 sub init_disk {
62 my ($disk, $uuid) = @_;
63
64 assert_blockdev($disk);
65
66 # we should already have checked these in the api call, but we check again for safety
67 die "$disk is a partition\n" if is_partition($disk);
68 die "disk $disk is already in use\n" if disk_is_used($disk);
69
70 my $id = $uuid || 'R';
71 run_command([$SGDISK, $disk, '-U', $id]);
72 return 1;
73 }
74
75 sub disk_is_used {
76 my ($disk) = @_;
77
78 my $dev = $disk;
79 $dev =~ s|^/dev/||;
80
81 my $disklist = get_disks($dev, 1, 1);
82
83 die "'$disk' is not a valid local disk\n" if !defined($disklist->{$dev});
84 return 1 if $disklist->{$dev}->{used};
85
86 return 0;
87 }
88
89 sub get_smart_data {
90 my ($disk, $healthonly) = @_;
91
92 assert_blockdev($disk);
93 my $smartdata = {};
94 my $type;
95
96 $disk =~ s/n\d+$// if $disk =~ m!^/dev/nvme\d+n\d+$!;
97
98 my $cmd = [$SMARTCTL, '-H'];
99 push @$cmd, '-A', '-f', 'brief' if !$healthonly;
100 push @$cmd, $disk;
101
102 my $returncode = eval {
103 run_command($cmd, noerr => 1, outfunc => sub {
104 my ($line) = @_;
105
106 # ATA SMART attributes, e.g.:
107 # ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
108 # 1 Raw_Read_Error_Rate POSR-K 100 100 000 - 0
109 #
110 # SAS and NVME disks, e.g.:
111 # Data Units Written: 5,584,952 [2.85 TB]
112 # Accumulated start-stop cycles: 34
113
114 if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) {
115 my $entry = {};
116
117 $entry->{name} = $2 if defined $2;
118 $entry->{flags} = $3 if defined $3;
119 # the +0 makes a number out of the strings
120 # FIXME: 'value' is depreacated by 'normalized'; remove with PVE 7.0
121 $entry->{value} = $4+0 if defined $4;
122 $entry->{normalized} = $4+0 if defined $4;
123 $entry->{worst} = $5+0 if defined $5;
124 # some disks report the default threshold as --- instead of 000
125 if (defined($6) && $6 eq '---') {
126 $entry->{threshold} = 0;
127 } else {
128 $entry->{threshold} = $6+0 if defined $6;
129 }
130 $entry->{fail} = $7 if defined $7;
131 $entry->{raw} = $8 if defined $8;
132 $entry->{id} = $1 if defined $1;
133 push @{$smartdata->{attributes}}, $entry;
134 } elsif ($line =~ m/(?:Health Status|self\-assessment test result): (.*)$/ ) {
135 $smartdata->{health} = $1;
136 } elsif ($line =~ m/Vendor Specific SMART Attributes with Thresholds:/) {
137 $type = 'ata';
138 delete $smartdata->{text};
139 } elsif ($line =~ m/=== START OF (READ )?SMART DATA SECTION ===/) {
140 $type = 'text';
141 } elsif (defined($type) && $type eq 'text') {
142 $smartdata->{text} = '' if !defined $smartdata->{text};
143 $smartdata->{text} .= "$line\n";
144 # extract wearout from nvme/sas text, allow for decimal values
145 if ($line =~ m/Percentage Used(?: endurance indicator)?:\s*(\d+(?:\.\d+)?)\%/i) {
146 $smartdata->{wearout} = 100 - $1;
147 }
148 } elsif ($line =~ m/SMART Disabled/) {
149 $smartdata->{health} = "SMART Disabled";
150 }
151 })
152 };
153 my $err = $@;
154
155 # bit 0 and 1 mark a fatal error, other bits are for disk status -> ignore (see man 8 smartctl)
156 if ((defined($returncode) && ($returncode & 0b00000011)) || $err) {
157 die "Error getting S.M.A.R.T. data: Exit code: $returncode\n";
158 }
159
160 $smartdata->{type} = $type;
161
162 return $smartdata;
163 }
164
165 sub get_lsblk_info {
166 my $cmd = [$LSBLK, '--json', '-o', 'path,parttype,fstype'];
167 my $output = "";
168 eval { run_command($cmd, outfunc => sub { $output .= "$_[0]\n"; }) };
169 warn "$@\n" if $@;
170 return {} if $output eq '';
171
172 my $parsed = eval { decode_json($output) } // {};
173 warn "$@\n" if $@;
174 my $list = $parsed->{blockdevices} // [];
175
176 return {
177 map {
178 $_->{path} => {
179 parttype => $_->{parttype},
180 fstype => $_->{fstype}
181 }
182 } @{$list}
183 };
184 }
185
186 my sub get_devices_by_partuuid {
187 my ($lsblk_info, $uuids, $res) = @_;
188
189 $res = {} if !defined($res);
190
191 foreach my $dev (sort keys %{$lsblk_info}) {
192 my $uuid = $lsblk_info->{$dev}->{parttype};
193 next if !defined($uuid) || !defined($uuids->{$uuid});
194 $res->{$dev} = $uuids->{$uuid};
195 }
196
197 return $res;
198 }
199
200 sub get_zfs_devices {
201 my ($lsblk_info) = @_;
202 my $res = {};
203
204 return {} if !check_bin($ZPOOL);
205
206 # use zpool and parttype uuid, because log and cache do not have zfs type uuid
207 eval {
208 run_command([$ZPOOL, 'list', '-HPLv'], outfunc => sub {
209 my ($line) = @_;
210 if ($line =~ m|^\t([^\t]+)\t|) {
211 $res->{$1} = 1;
212 }
213 });
214 };
215
216 # only warn here, because maybe zfs tools are not installed
217 warn "$@\n" if $@;
218
219 my $uuids = {
220 "6a898cc3-1dd2-11b2-99a6-080020736631" => 1, # apple
221 "516e7cba-6ecf-11d6-8ff8-00022d09712b" => 1, # bsd
222 };
223
224
225 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
226
227 return $res;
228 }
229
230 sub get_lvm_devices {
231 my ($lsblk_info) = @_;
232 my $res = {};
233 eval {
234 run_command([$PVS, '--noheadings', '--readonly', '-o', 'pv_name'], outfunc => sub{
235 my ($line) = @_;
236 $line = trim($line);
237 if ($line =~ m|^/dev/|) {
238 $res->{$line} = 1;
239 }
240 });
241 };
242
243 # if something goes wrong, we do not want to give up, but indicate an error has occurred
244 warn "$@\n" if $@;
245
246 my $uuids = {
247 "e6d6d379-f507-44c2-a23c-238f2a3df928" => 1,
248 };
249
250 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
251
252 return $res;
253 }
254
255 sub get_ceph_journals {
256 my ($lsblk_info) = @_;
257 my $res = {};
258
259 my $uuids = {
260 '45b0969e-9b03-4f30-b4c6-b4b80ceff106' => 1, # journal
261 '30cd0809-c2b2-499c-8879-2d6b78529876' => 2, # db
262 '5ce17fce-4087-4169-b7ff-056cc58473f9' => 3, # wal
263 'cafecafe-9b03-4f30-b4c6-b4b80ceff106' => 4, # block
264 };
265
266 $res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
267
268 return $res;
269 }
270
271 # reads the lv_tags and matches them with the devices
272 sub get_ceph_volume_infos {
273 my $result = {};
274
275 my $cmd = [ $LVS, '-S', 'lv_name=~^osd-', '-o', 'devices,lv_name,lv_tags',
276 '--noheadings', '--readonly', '--separator', ';' ];
277
278 run_command($cmd, outfunc => sub {
279 my $line = shift;
280 $line =~ s/(?:^\s+)|(?:\s+$)//g; # trim whitespaces
281
282 my $fields = [ split(';', $line) ];
283
284 # lvs syntax is /dev/sdX(Y) where Y is the start (which we do not need)
285 my ($dev) = $fields->[0] =~ m|^(/dev/[a-z]+[^(]*)|;
286 if ($fields->[1] =~ m|^osd-([^-]+)-|) {
287 my $type = $1;
288 # $result autovivification is wanted, to not creating empty hashes
289 if (($type eq 'block' || $type eq 'data') && $fields->[2] =~ m/ceph.osd_id=([^,]+)/) {
290 $result->{$dev}->{osdid} = $1;
291 $result->{$dev}->{bluestore} = ($type eq 'block');
292 if ($fields->[2] =~ m/ceph\.encrypted=1/) {
293 $result->{$dev}->{encrypted} = 1;
294 }
295 } else {
296 # undef++ becomes '1' (see `perldoc perlop`: Auto-increment)
297 $result->{$dev}->{$type}++;
298 }
299 }
300 });
301
302 return $result;
303 }
304
305 sub get_udev_info {
306 my ($dev) = @_;
307
308 my $info = "";
309 my $data = {};
310 eval {
311 run_command(['udevadm', 'info', '-p', $dev, '--query', 'all'], outfunc => sub {
312 my ($line) = @_;
313 $info .= "$line\n";
314 });
315 };
316 warn $@ if $@;
317 return if !$info;
318
319 return if $info !~ m/^E: DEVTYPE=(disk|partition)$/m;
320 return if $info =~ m/^E: ID_CDROM/m;
321
322 # we use this, because some disks are not simply in /dev e.g. /dev/cciss/c0d0
323 if ($info =~ m/^E: DEVNAME=(\S+)$/m) {
324 $data->{devpath} = $1;
325 }
326 return if !defined($data->{devpath});
327
328 $data->{serial} = 'unknown';
329 $data->{serial} = $1 if $info =~ m/^E: ID_SERIAL_SHORT=(\S+)$/m;
330
331 $data->{gpt} = $info =~ m/^E: ID_PART_TABLE_TYPE=gpt$/m ? 1 : 0;
332
333 $data->{rpm} = -1;
334 $data->{rpm} = $1 if $info =~ m/^E: ID_ATA_ROTATION_RATE_RPM=(\d+)$/m; # detects SSD implicit
335
336 $data->{usb} = 1 if $info =~ m/^E: ID_BUS=usb$/m;
337
338 $data->{model} = $1 if $info =~ m/^E: ID_MODEL=(.+)$/m;
339
340 $data->{wwn} = 'unknown';
341 $data->{wwn} = $1 if $info =~ m/^E: ID_WWN=(.*)$/m;
342
343 if ($info =~ m/^E: DEVLINKS=(.+)$/m) {
344 my @devlinks = grep(m#^/dev/disk/by-id/(ata|scsi|nvme(?!-eui))#, split (/ /, $1));
345 $data->{by_id_link} = $devlinks[0] if defined($devlinks[0]);
346 }
347
348 return $data;
349 }
350
351 sub get_sysdir_size {
352 my ($sysdir) = @_;
353
354 my $size = file_read_firstline("$sysdir/size");
355 return if !$size;
356
357 # linux always considers sectors to be 512 bytes, independently of real block size
358 return $size * 512;
359 }
360
361 sub get_sysdir_info {
362 my ($sysdir) = @_;
363
364 return if ! -d "$sysdir/device";
365
366 my $data = {};
367
368 $data->{size} = get_sysdir_size($sysdir) or return;
369
370 # dir/queue/rotational should be 1 for hdd, 0 for ssd
371 $data->{rotational} = file_read_firstline("$sysdir/queue/rotational") // -1;
372
373 $data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown';
374 $data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown';
375
376 return $data;
377 }
378
379 sub get_wear_leveling_info {
380 my ($smartdata) = @_;
381 my $attributes = $smartdata->{attributes};
382
383 if (defined($smartdata->{wearout})) {
384 return $smartdata->{wearout};
385 }
386
387 my $wearout;
388
389 # Common register names that represent percentage values of potential failure indicators used
390 # in drivedb.h of smartmontool's. Order matters, as some drives may have multiple definitions
391 my @wearoutregisters = (
392 "Media_Wearout_Indicator",
393 "SSD_Life_Left",
394 "Wear_Leveling_Count",
395 "Perc_Write\/Erase_Ct_BC",
396 "Perc_Rated_Life_Remain",
397 "Remaining_Lifetime_Perc",
398 "Percent_Lifetime_Remain",
399 "Lifetime_Left",
400 "PCT_Life_Remaining",
401 "Lifetime_Remaining",
402 "Percent_Life_Remaining",
403 "Percent_Lifetime_Used",
404 "Perc_Rated_Life_Used"
405 );
406
407 # Search for S.M.A.R.T. attributes for known register
408 foreach my $register (@wearoutregisters) {
409 last if defined $wearout;
410 foreach my $attr (@$attributes) {
411 next if $attr->{name} !~ m/$register/;
412 $wearout = $attr->{value};
413 last;
414 }
415 }
416
417 return $wearout;
418 }
419
420 sub dir_is_empty {
421 my ($dir) = @_;
422
423 my $dh = IO::Dir->new ($dir);
424 return 1 if !$dh;
425
426 while (defined(my $tmp = $dh->read)) {
427 next if $tmp eq '.' || $tmp eq '..';
428 $dh->close;
429 return 0;
430 }
431 $dh->close;
432 return 1;
433 }
434
435 sub is_iscsi {
436 my ($sysdir) = @_;
437
438 if (-l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|) {
439 return 1;
440 }
441
442 return 0;
443 }
444
445 my sub is_ssdlike {
446 my ($type) = @_;
447 return $type eq 'ssd' || $type eq 'nvme';
448 }
449
450 sub mounted_blockdevs {
451 my $mounted = {};
452
453 my $mounts = PVE::ProcFSTools::parse_proc_mounts();
454
455 foreach my $mount (@$mounts) {
456 next if $mount->[0] !~ m|^/dev/|;
457 $mounted->{abs_path($mount->[0])} = $mount->[1];
458 };
459
460 return $mounted;
461 }
462
463 # returns hashmap of abs mount path -> first part of /proc/mounts (what)
464 sub mounted_paths {
465 my $mounted = {};
466
467 my $mounts = PVE::ProcFSTools::parse_proc_mounts();
468
469 foreach my $mount (@$mounts) {
470 $mounted->{abs_path($mount->[1])} = $mount->[0];
471 };
472
473 return $mounted;
474 }
475
476 sub get_disks {
477 my ($disks, $nosmart, $include_partitions) = @_;
478 my $disklist = {};
479
480 my $mounted = mounted_blockdevs();
481
482 my $lsblk_info = get_lsblk_info();
483
484 my $journalhash = get_ceph_journals($lsblk_info);
485 my $ceph_volume_infos = get_ceph_volume_infos();
486
487 my $zfshash = get_zfs_devices($lsblk_info);
488
489 my $lvmhash = get_lvm_devices($lsblk_info);
490
491 my $disk_regex = ".*";
492 if (defined($disks)) {
493 if (!ref($disks)) {
494 $disks = [ $disks ];
495 } elsif (ref($disks) ne 'ARRAY') {
496 die "disks is not a string or array reference\n";
497 }
498 # we get cciss/c0d0 but need cciss!c0d0
499 $_ =~ s|cciss/|cciss!| for @$disks;
500
501 if ($include_partitions) {
502 # Proper blockdevice is needed for the regex, use parent for partitions.
503 for my $disk ($disks->@*) {
504 next if !is_partition("/dev/$disk");
505 $disk = strip_dev(get_blockdev("/dev/$disk"));
506 }
507 }
508
509 $disk_regex = "(?:" . join('|', @$disks) . ")";
510 }
511
512 dir_glob_foreach('/sys/block', $disk_regex, sub {
513 my ($dev) = @_;
514 # whitelisting following devices
515 # - hdX ide block device
516 # - sdX scsi/sata block device
517 # - vdX virtIO block device
518 # - xvdX: xen virtual block device
519 # - nvmeXnY: nvme devices
520 # - cciss!cXnY cciss devices
521 return if $dev !~ m/^(h|s|x?v)d[a-z]+$/ &&
522 $dev !~ m/^nvme\d+n\d+$/ &&
523 $dev !~ m/^cciss\!c\d+d\d+$/;
524
525 my $data = get_udev_info("/sys/block/$dev") // return;
526 my $devpath = $data->{devpath};
527
528 my $sysdir = "/sys/block/$dev";
529
530 # we do not want iscsi devices
531 return if is_iscsi($sysdir);
532
533 my $sysdata = get_sysdir_info($sysdir);
534 return if !defined($sysdata);
535
536 my $type = 'unknown';
537
538 if ($sysdata->{rotational} == 0) {
539 $type = 'ssd';
540 $type = 'nvme' if $dev =~ m/^nvme\d+n\d+$/;
541 $data->{rpm} = 0;
542 } elsif ($sysdata->{rotational} == 1) {
543 if ($data->{rpm} != -1) {
544 $type = 'hdd';
545 } elsif ($data->{usb}) {
546 $type = 'usb';
547 $data->{rpm} = 0;
548 }
549 }
550
551 my ($health, $wearout) = ('UNKNOWN', 'N/A');
552 if (!$nosmart) {
553 eval {
554 my $smartdata = get_smart_data($devpath, !is_ssdlike($type));
555 $health = $smartdata->{health} if $smartdata->{health};
556
557 if (is_ssdlike($type)) { # if we have an ssd we try to get the wearout indicator
558 my $wear_level = get_wear_leveling_info($smartdata);
559 $wearout = $wear_level if defined($wear_level);
560 }
561 };
562 }
563
564 # we replaced cciss/ with cciss! above, but in the result we need cciss/ again because the
565 # caller might want to check the result again with the original parameter
566 if ($dev =~ m|^cciss!|) {
567 $dev =~ s|^cciss!|cciss/|;
568 }
569
570 $disklist->{$dev} = {
571 vendor => $sysdata->{vendor},
572 model => $data->{model} || $sysdata->{model},
573 size => $sysdata->{size},
574 serial => $data->{serial},
575 gpt => $data->{gpt},
576 rpm => $data->{rpm},
577 type => $type,
578 wwn => $data->{wwn},
579 health => $health,
580 devpath => $devpath,
581 wearout => $wearout,
582 };
583 $disklist->{$dev}->{mounted} = 1 if exists $mounted->{$devpath};
584
585 my $by_id_link = $data->{by_id_link};
586 $disklist->{$dev}->{by_id_link} = $by_id_link if defined($by_id_link);
587
588 my ($osdid, $bluestore, $osdencrypted) = (-1, 0, 0);
589 my ($journal_count, $db_count, $wal_count) = (0, 0, 0);
590
591 my $partpath = $devpath;
592 # remove trailing part to get the partition base path, e.g. /dev/cciss/c0d0 -> /dev/cciss
593 $partpath =~ s/\/[^\/]+$//;
594
595 my $determine_usage = sub {
596 my ($devpath, $sysdir, $is_partition) = @_;
597
598 return 'LVM' if $lvmhash->{$devpath};
599 return 'ZFS' if $zfshash->{$devpath};
600
601 my $info = $lsblk_info->{$devpath} // {};
602
603 if (defined(my $parttype = $info->{parttype})) {
604 return 'BIOS boot'if $parttype eq '21686148-6449-6e6f-744e-656564454649';
605 return 'EFI' if $parttype eq 'c12a7328-f81f-11d2-ba4b-00a0c93ec93b';
606 return 'ZFS reserved' if $parttype eq '6a945a3b-1dd2-11b2-99a6-080020736631';
607 }
608
609 return "$info->{fstype}" if defined($info->{fstype});
610 return 'mounted' if $mounted->{$devpath};
611
612 return if !$is_partition;
613
614 # for devices, this check is done explicitly later
615 return 'Device Mapper' if !dir_is_empty("$sysdir/holders");
616
617 return; # unused partition
618 };
619
620 my $collect_ceph_info = sub {
621 my ($devpath) = @_;
622
623 my $ceph_volume = $ceph_volume_infos->{$devpath} or return;
624 $journal_count += $ceph_volume->{journal} // 0;
625 $db_count += $ceph_volume->{db} // 0;
626 $wal_count += $ceph_volume->{wal} // 0;
627 if (defined($ceph_volume->{osdid})) {
628 $osdid = $ceph_volume->{osdid};
629 $bluestore = 1 if $ceph_volume->{bluestore};
630 $osdencrypted = 1 if $ceph_volume->{encrypted};
631 }
632
633 my $result = { %{$ceph_volume} };
634 $result->{journals} = delete $result->{journal} if $result->{journal};
635 return $result;
636 };
637
638 my $partitions = {};
639 dir_glob_foreach("$sysdir", "$dev.+", sub {
640 my ($part) = @_;
641
642 $partitions->{$part} = $collect_ceph_info->("$partpath/$part");
643 my $lvm_based_osd = defined($partitions->{$part});
644
645 $partitions->{$part}->{devpath} = "$partpath/$part";
646 $partitions->{$part}->{parent} = "$devpath";
647 $partitions->{$part}->{mounted} = 1 if exists $mounted->{"$partpath/$part"};
648 $partitions->{$part}->{gpt} = $data->{gpt};
649 $partitions->{$part}->{type} = 'partition';
650 $partitions->{$part}->{size} = get_sysdir_size("$sysdir/$part") // 0;
651 $partitions->{$part}->{used} = $determine_usage->("$partpath/$part", "$sysdir/$part", 1);
652 $partitions->{$part}->{osdid} //= -1;
653
654 # avoid counting twice (e.g. partition with the LVM for the DB OSD is in $journalhash)
655 return if $lvm_based_osd;
656
657 # Legacy handling for non-LVM based OSDs
658 if (my $mp = $mounted->{"$partpath/$part"}) {
659 if ($mp =~ m|^/var/lib/ceph/osd/ceph-(\d+)$|) {
660 $osdid = $1;
661 $partitions->{$part}->{osdid} = $osdid;
662 }
663 }
664
665 if (my $journal_part = $journalhash->{"$partpath/$part"}) {
666 $journal_count++ if $journal_part == 1;
667 $db_count++ if $journal_part == 2;
668 $wal_count++ if $journal_part == 3;
669 $bluestore = 1 if $journal_part == 4;
670
671 $partitions->{$part}->{journals} = 1 if $journal_part == 1;
672 $partitions->{$part}->{db} = 1 if $journal_part == 2;
673 $partitions->{$part}->{wal} = 1 if $journal_part == 3;
674 $partitions->{$part}->{bluestore} = 1 if $journal_part == 4;
675 }
676 });
677
678 my $used = $determine_usage->($devpath, $sysdir, 0);
679 if (!$include_partitions) {
680 foreach my $part (sort keys %{$partitions}) {
681 $used //= $partitions->{$part}->{used};
682 }
683 } else {
684 # fstype might be set even if there are partitions, but showing that is confusing
685 $used = 'partitions' if scalar(keys %{$partitions});
686 }
687 $used //= 'partitions' if scalar(keys %{$partitions});
688 # multipath, software raid, etc.
689 # this check comes in last, to show more specific info
690 # if we have it
691 $used //= 'Device Mapper' if !dir_is_empty("$sysdir/holders");
692
693 $disklist->{$dev}->{used} = $used if $used;
694
695 $collect_ceph_info->($devpath);
696
697 $disklist->{$dev}->{osdid} = $osdid;
698 $disklist->{$dev}->{journals} = $journal_count if $journal_count;
699 $disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1;
700 $disklist->{$dev}->{osdencrypted} = $osdencrypted if $osdid != -1;
701 $disklist->{$dev}->{db} = $db_count if $db_count;
702 $disklist->{$dev}->{wal} = $wal_count if $wal_count;
703
704 if ($include_partitions) {
705 $disklist->{$_} = $partitions->{$_} for keys %{$partitions};
706 }
707 });
708
709 return $disklist;
710 }
711
712 sub get_partnum {
713 my ($part_path) = @_;
714
715 my $st = stat($part_path);
716
717 die "error detecting block device '$part_path'\n"
718 if !$st || !$st->mode || !S_ISBLK($st->mode) || !$st->rdev;
719
720 my $major = PVE::Tools::dev_t_major($st->rdev);
721 my $minor = PVE::Tools::dev_t_minor($st->rdev);
722 my $partnum_path = "/sys/dev/block/$major:$minor/";
723
724 my $partnum = file_read_firstline("${partnum_path}partition");
725 die "Partition does not exist\n" if !defined($partnum);
726 die "Failed to get partition number\n" if $partnum !~ m/(\d+)/; # untaint
727 $partnum = $1;
728 die "Partition number $partnum is invalid\n" if $partnum > 128;
729
730 return $partnum;
731 }
732
733 sub get_blockdev {
734 my ($part_path) = @_;
735
736 my ($dev, $block_dev);
737 if ($part_path =~ m|^/dev/(.*)$|) {
738 $dev = $1;
739 my $link = readlink "/sys/class/block/$dev";
740 $block_dev = $1 if $link =~ m|([^/]*)/$dev$|;
741 }
742
743 die "Can't parse parent device\n" if !defined($block_dev);
744 die "No valid block device\n" if index($dev, $block_dev) == -1;
745
746 $block_dev = "/dev/$block_dev";
747 die "Block device does not exists\n" if !(-b $block_dev);
748
749 return $block_dev;
750 }
751
752 sub is_partition {
753 my ($dev_path) = @_;
754
755 return defined(eval { get_partnum($dev_path) });
756 }
757
758 sub locked_disk_action {
759 my ($sub) = @_;
760 my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub);
761 die $@ if $@;
762 return $res;
763 }
764
765 sub assert_disk_unused {
766 my ($dev) = @_;
767 die "device '$dev' is already in use\n" if disk_is_used($dev);
768 return;
769 }
770
771 sub append_partition {
772 my ($dev, $size) = @_;
773
774 my $devname = $dev;
775 $devname =~ s|^/dev/||;
776
777 my $newpartid = 1;
778 dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*?(\d+)/, sub {
779 my ($part, $partid) = @_;
780
781 if ($partid >= $newpartid) {
782 $newpartid = $partid + 1;
783 }
784 });
785
786 $size = PVE::Tools::convert_size($size, 'b' => 'mb');
787
788 run_command([ $SGDISK, '-n', "$newpartid:0:+${size}M", $dev ],
789 errmsg => "error creating partition '$newpartid' on '$dev'");
790
791 my $partition;
792
793 # loop again to detect the real partition device which does not always follow
794 # a strict $devname$partition scheme like /dev/nvme0n1 -> /dev/nvme0n1p1
795 dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*$newpartid/, sub {
796 my ($part) = @_;
797
798 $partition = "/dev/$part";
799 });
800
801 return $partition;
802 }
803
804 # Check if a disk or any of its partitions has a holder.
805 # Can also be called with a partition.
806 # Expected to be called with a result of verify_blockdev_path().
807 sub has_holder {
808 my ($devpath) = @_;
809
810 my $dev = strip_dev($devpath);
811
812 return $devpath if !dir_is_empty("/sys/class/block/${dev}/holders");
813
814 my $found;
815 dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
816 my ($part) = @_;
817 $found = "/dev/${part}" if !dir_is_empty("/sys/class/block/${part}/holders");
818 });
819
820 return $found;
821 }
822
823 # Basic check if a disk or any of its partitions is mounted.
824 # Can also be called with a partition.
825 # Expected to be called with a result of verify_blockdev_path().
826 sub is_mounted {
827 my ($devpath) = @_;
828
829 my $mounted = mounted_blockdevs();
830
831 return $devpath if $mounted->{$devpath};
832
833 my $dev = strip_dev($devpath);
834
835 my $found;
836 dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
837 my ($part) = @_;
838 my $partpath = "/dev/${part}";
839
840 $found = $partpath if $mounted->{$partpath};
841 });
842
843 return $found;
844 }
845
846 # Currently only supports GPT-partitioned disks.
847 sub change_parttype {
848 my ($partpath, $parttype) = @_;
849
850 my $err = "unable to change partition type for $partpath";
851
852 my $partnum = get_partnum($partpath);
853 my $blockdev = get_blockdev($partpath);
854 my $dev = strip_dev($blockdev);
855
856 my $info = get_disks($dev, 1);
857 die "$err - unable to get disk info for '$blockdev'\n" if !defined($info->{$dev});
858 die "$err - disk '$blockdev' is not GPT partitioned\n" if !$info->{$dev}->{gpt};
859
860 run_command(['sgdisk', "-t${partnum}:${parttype}", $blockdev], errmsg => $err);
861 }
862
863 # Wipes all labels and the first 200 MiB of a disk/partition (or the whole if it is smaller).
864 # If called with a partition, also sets the partition type to 0x83 'Linux filesystem'.
865 # Expected to be called with a result of verify_blockdev_path().
866 sub wipe_blockdev {
867 my ($devpath) = @_;
868
869 my $devname = basename($devpath);
870 my $dev_size = PVE::Tools::file_get_contents("/sys/class/block/$devname/size");
871
872 ($dev_size) = $dev_size =~ m|(\d+)|; # untaint $dev_size
873 die "Couldn't get the size of the device $devname\n" if !defined($dev_size);
874
875 my $size = ($dev_size * 512 / 1024 / 1024);
876 my $count = ($size < 200) ? $size : 200;
877
878 my $to_wipe = [];
879 dir_glob_foreach("/sys/class/block/${devname}", "${devname}.+", sub {
880 my ($part) = @_;
881 push $to_wipe->@*, "/dev/${part}" if -b "/dev/${part}";
882 });
883
884 if (scalar($to_wipe->@*) > 0) {
885 print "found child partitions to wipe: ". join(', ', $to_wipe->@*) ."\n";
886 }
887 push $to_wipe->@*, $devpath; # put actual device last
888
889 print "wiping block device ${devpath}\n";
890
891 run_command(['wipefs', '--all', $to_wipe->@*], errmsg => "error wiping '${devpath}'");
892
893 run_command(
894 ['dd', 'if=/dev/zero', "of=${devpath}", 'bs=1M', 'conv=fdatasync', "count=${count}"],
895 errmsg => "error wiping '${devpath}'",
896 );
897
898 if (is_partition($devpath)) {
899 eval { change_parttype($devpath, '8300'); };
900 warn $@ if $@;
901 }
902 }
903
904 # FIXME: Remove once we depend on systemd >= v249.
905 # Work around udev bug https://github.com/systemd/systemd/issues/18525 ensuring database is updated.
906 sub udevadm_trigger {
907 my @devs = @_;
908
909 return if scalar(@devs) == 0;
910
911 eval { run_command(['udevadm', 'trigger', @devs]); };
912 warn $@ if $@;
913 }
914
915 1;