1 //! Low-level disk (image) access functions for file restore VMs.
2 use anyhow
::{bail, format_err, Error}
;
3 use lazy_static
::lazy_static
;
6 use std
::collections
::HashMap
;
7 use std
::fs
::{create_dir_all, File}
;
8 use std
::io
::{BufRead, BufReader}
;
9 use std
::path
::{Component, Path, PathBuf}
;
10 use std
::process
::Command
;
12 use proxmox
::const_regex
;
13 use proxmox
::tools
::fs
;
14 use proxmox_backup
::api2
::types
::BLOCKDEVICE_NAME_REGEX
;
15 use proxmox_backup
::tools
::run_command
;
18 VIRTIO_PART_REGEX
= r
"^vd[a-z]+(\d+)$";
19 ZPOOL_POOL_NAME_REGEX
= r
"^ {3}pool: (.*)$";
20 ZPOOL_IMPORT_DISK_REGEX
= r
"^\t {2,4}(vd[a-z]+(?:\d+)?)\s+ONLINE$";
24 static ref FS_OPT_MAP
: HashMap
<&'
static str, &'
static str> = {
25 let mut m
= HashMap
::new();
27 // otherwise ext complains about mounting read-only
28 m
.insert("ext2", "noload");
29 m
.insert("ext3", "noload");
30 m
.insert("ext4", "noload");
32 m
.insert("xfs", "norecovery");
34 // ufs2 is used as default since FreeBSD 5.0 released in 2003, so let's assume that
35 // whatever the user is trying to restore is not using anything older...
36 m
.insert("ufs", "ufstype=ufs2");
38 m
.insert("ntfs", "utf8");
44 pub enum ResolveResult
{
46 BucketTypes(Vec
<&'
static str>),
47 BucketComponents(Vec
<(String
, Option
<u64>)>),
51 struct PartitionBucketData
{
54 mountpoint
: Option
<PathBuf
>,
59 struct ZFSBucketData
{
61 mountpoint
: Option
<PathBuf
>,
66 struct LVMBucketData
{
69 mountpoint
: Option
<PathBuf
>,
73 /// A "Bucket" represents a mapping found on a disk, e.g. a partition, a zfs dataset or an LV. A
74 /// uniquely identifying path to a file then consists of four components:
75 /// "/disk/bucket/component/path"
77 /// disk: fidx file name
78 /// bucket: bucket type
79 /// component: identifier of the specific bucket
80 /// path: relative path of the file on the filesystem indicated by the other parts, may contain
81 /// more subdirectories
82 /// e.g.: "/drive-scsi0/part/0/etc/passwd"
85 Partition(PartitionBucketData
),
86 RawFs(PartitionBucketData
),
92 fn filter_mut
<'a
, A
: AsRef
<str>, B
: AsRef
<str>>(
93 haystack
: &'a
mut Vec
<Bucket
>,
96 ) -> Option
<&'a
mut Bucket
> {
98 haystack
.iter_mut().find(|b
| match b
{
99 Bucket
::Partition(data
) => {
100 if let Some(comp
) = comp
.get(0) {
101 ty
== "part" && comp
.as_ref().parse
::<i32>().unwrap() == data
.number
106 Bucket
::RawFs(_
) => ty
== "raw",
107 Bucket
::ZPool(data
) => {
108 if let Some(ref comp
) = comp
.get(0) {
109 ty
== "zpool" && comp
.as_ref() == &data
.name
114 Bucket
::LVM(data
) => {
115 if let (Some(ref vg
), Some(ref lv
)) = (comp
.get(0), comp
.get(1)) {
116 ty
== "lvm" && vg
.as_ref() == &data
.vg_name
&& lv
.as_ref() == &data
.lv_name
124 fn type_string(&self) -> &'
static str {
126 Bucket
::Partition(_
) => "part",
127 Bucket
::RawFs(_
) => "raw",
128 Bucket
::ZPool(_
) => "zpool",
129 Bucket
::LVM(_
) => "lvm",
133 fn component_string(&self, idx
: usize) -> Result
<String
, Error
> {
134 let max_depth
= Self::component_depth(self.type_string())?
;
135 if idx
>= max_depth
{
137 "internal error: component index out of range {}/{} ({})",
144 Bucket
::Partition(data
) => data
.number
.to_string(),
145 Bucket
::RawFs(_
) => "raw".to_owned(),
146 Bucket
::ZPool(data
) => data
.name
.clone(),
147 Bucket
::LVM(data
) => {
157 fn component_depth(type_string
: &str) -> Result
<usize, Error
> {
158 Ok(match type_string
{
163 _
=> bail
!("invalid bucket type for component depth: {}", type_string
),
167 fn size(&self, idx
: usize) -> Option
<u64> {
169 Bucket
::Partition(data
) | Bucket
::RawFs(data
) => Some(data
.size
),
170 Bucket
::ZPool(data
) => data
.size
,
171 Bucket
::LVM(data
) => {
182 /// Functions related to the local filesystem. This mostly exists so we can use 'supported_fs' in
183 /// try_mount while a Bucket is still mutably borrowed from DiskState.
185 supported_fs
: Vec
<String
>,
189 fn scan() -> Result
<Self, Error
> {
190 // detect kernel supported filesystems
191 let mut supported_fs
= Vec
::new();
192 for f
in BufReader
::new(File
::open("/proc/filesystems")?
)
194 .filter_map(Result
::ok
)
196 // ZFS is treated specially, don't attempt to do a regular mount with it
198 if !f
.starts_with("nodev") && f
!= "zfs" {
199 supported_fs
.push(f
.to_owned());
203 info
!("Supported FS: {}", supported_fs
.join(", "));
205 Ok(Self { supported_fs }
)
208 fn ensure_mounted(&self, bucket
: &mut Bucket
) -> Result
<PathBuf
, Error
> {
210 Bucket
::Partition(data
) | Bucket
::RawFs(data
) => {
211 // regular data partition à la "/dev/vdxN" or FS directly on a disk
212 if let Some(mp
) = &data
.mountpoint
{
213 return Ok(mp
.clone());
216 let mp
= format
!("/mnt{}/", data
.dev_node
);
217 self.try_mount(&data
.dev_node
, &mp
)?
;
218 let mp
= PathBuf
::from(mp
);
219 data
.mountpoint
= Some(mp
.clone());
222 Bucket
::ZPool(data
) => {
223 if let Some(mp
) = &data
.mountpoint
{
224 return Ok(mp
.clone());
227 let mntpath
= format
!("/mnt/zpool/{}", &data
.name
);
228 create_dir_all(&mntpath
)?
;
230 // call ZFS tools to import and mount the pool with the root mount at 'mntpath'
231 let mut cmd
= Command
::new("/sbin/zpool");
246 if let Err(msg
) = run_command(cmd
, None
) {
247 // ignore double import, this may happen if a previous attempt failed further
248 // down below - this way we can at least try again
251 .contains("a pool with that name already exists")
257 // 'mount -a' simply mounts all datasets that haven't been automounted, which
258 // should only be ones that we've imported just now
259 let mut cmd
= Command
::new("/sbin/zfs");
260 cmd
.args(["mount", "-a"].iter());
261 run_command(cmd
, None
)?
;
263 // detect any datasets with 'legacy' mountpoints
264 let mut cmd
= Command
::new("/sbin/zfs");
265 cmd
.args(["list", "-Hpro", "name,mountpoint", &data
.name
].iter());
266 let mps
= run_command(cmd
, None
)?
;
267 for subvol
in mps
.lines() {
268 let subvol
= subvol
.splitn(2, '
\t'
).collect
::<Vec
<&str>>();
269 if subvol
.len() != 2 {
272 let name
= subvol
[0];
276 let mut newmp
= PathBuf
::from(format
!(
279 name
.replace('
/'
, "_")
282 while newmp
.exists() {
283 newmp
.set_extension(i
.to_string());
286 create_dir_all(&newmp
)?
;
287 self.do_mount(Some(name
), newmp
.to_string_lossy().as_ref(), "zfs")?
;
291 // Now that we have imported the pool, we can also query the size
292 let mut cmd
= Command
::new("/sbin/zpool");
293 cmd
.args(["list", "-o", "size", "-Hp", &data
.name
].iter());
294 let size
= run_command(cmd
, None
)?
;
295 if let Ok(size
) = size
.trim().parse
::<u64>() {
296 data
.size
= Some(size
);
299 let mp
= PathBuf
::from(mntpath
);
300 data
.mountpoint
= Some(mp
.clone());
303 Bucket
::LVM(data
) => {
304 if let Some(mp
) = &data
.mountpoint
{
305 return Ok(mp
.clone());
308 let mntpath
= format
!("/mnt/lvm/{}/{}", &data
.vg_name
, &data
.lv_name
);
309 create_dir_all(&mntpath
)?
;
311 let mapper_path
= format
!("/dev/mapper/{}-{}", &data
.vg_name
, &data
.lv_name
);
312 self.try_mount(&mapper_path
, &mntpath
)?
;
314 let mp
= PathBuf
::from(mntpath
);
315 data
.mountpoint
= Some(mp
.clone());
321 fn try_mount(&self, source
: &str, target
: &str) -> Result
<(), Error
> {
322 create_dir_all(target
)?
;
324 // try all supported fs until one works - this is the way Busybox's 'mount' does it too:
325 // https://git.busybox.net/busybox/tree/util-linux/mount.c?id=808d93c0eca49e0b22056e23d965f0d967433fbb#n2152
326 // note that ZFS is intentionally left out (see scan())
327 for fs
in &self.supported_fs
{
328 let fs
: &str = fs
.as_ref();
329 match self.do_mount(Some(source
), target
, fs
) {
331 info
!("mounting '{}' succeeded, fstype: '{}'", source
, fs
);
334 Err(nix
::Error
::Sys(nix
::errno
::Errno
::EINVAL
)) => {}
335 Err(nix
::Error
::Sys(nix
::errno
::Errno
::EBUSY
)) => return Ok(()),
337 warn
!("mount error on '{}' ({}) - {}", source
, fs
, err
);
342 bail
!("all mounts failed or no supported file system")
345 fn do_mount(&self, source
: Option
<&str>, target
: &str, fs
: &str) -> Result
<(), nix
::Error
> {
348 MsFlags
::MS_RDONLY
| MsFlags
::MS_NOEXEC
| MsFlags
::MS_NOSUID
| MsFlags
::MS_NODEV
;
349 let opts
= FS_OPT_MAP
.get(fs
).copied();
350 mount(source
, target
, Some(fs
), flags
, opts
)
354 pub struct DiskState
{
355 filesystems
: Filesystems
,
356 disk_map
: HashMap
<String
, Vec
<Bucket
>>,
360 /// Scan all disks for supported buckets.
361 pub fn scan() -> Result
<Self, Error
> {
362 let filesystems
= Filesystems
::scan()?
;
364 let mut disk_map
= HashMap
::new();
365 let mut drive_info
= HashMap
::new();
367 // create mapping for virtio drives and .fidx files (via serial description)
368 // note: disks::DiskManager relies on udev, which we don't have
369 for entry
in pbs_tools
::fs
::scan_subdir(
372 &BLOCKDEVICE_NAME_REGEX
,
374 .filter_map(Result
::ok
)
376 let name
= unsafe { entry.file_name_utf8_unchecked() }
;
377 if !name
.starts_with("vd") {
381 let sys_path
: &str = &format
!("/sys/block/{}", name
);
383 let serial
= fs
::file_read_string(&format
!("{}/serial", sys_path
));
384 let fidx
= match serial
{
385 Ok(serial
) => serial
,
387 warn
!("disk '{}': could not read serial file - {}", name
, err
);
392 drive_info
.insert(name
.to_owned(), fidx
.clone());
394 // attempt to mount device directly
395 let dev_node
= format
!("/dev/{}", name
);
396 let size
= Self::make_dev_node(&dev_node
, &sys_path
)?
;
397 let mut dfs_bucket
= Bucket
::RawFs(PartitionBucketData
{
398 dev_node
: dev_node
.clone(),
403 if let Ok(_
) = filesystems
.ensure_mounted(&mut dfs_bucket
) {
404 // mount succeeded, add bucket and skip any other checks for the disk
406 "drive '{}' ('{}', '{}') contains fs directly ({}B)",
407 name
, fidx
, dev_node
, size
409 disk_map
.insert(fidx
, vec
![dfs_bucket
]);
413 let mut parts
= Vec
::new();
414 for entry
in pbs_tools
::fs
::scan_subdir(
419 .filter_map(Result
::ok
)
421 let part_name
= unsafe { entry.file_name_utf8_unchecked() }
;
422 let dev_node
= format
!("/dev/{}", part_name
);
423 let part_path
= format
!("/sys/block/{}/{}", name
, part_name
);
425 // create partition device node for further use
426 let size
= Self::make_dev_node(&dev_node
, &part_path
)?
;
428 let number
= fs
::file_read_firstline(&format
!("{}/partition", part_path
))?
433 "drive '{}' ('{}'): found partition '{}' ({}, {}B)",
434 name
, fidx
, dev_node
, number
, size
437 let bucket
= Bucket
::Partition(PartitionBucketData
{
446 drive_info
.insert(part_name
.to_owned(), fidx
.clone());
449 disk_map
.insert(fidx
, parts
);
452 // After the above, every valid disk should have a device node in /dev, so we can query all
453 // of them for zpools
454 let mut cmd
= Command
::new("/sbin/zpool");
455 cmd
.args(["import", "-d", "/dev"].iter());
456 let result
= run_command(cmd
, None
).unwrap();
457 for (pool
, disks
) in Self::parse_zpool_import(&result
) {
458 let mut bucket
= Bucket
::ZPool(ZFSBucketData
{
464 // anything more than 5 disks we assume to take too long to mount, so we don't
465 // automatically - this means that no size can be reported
466 if disks
.len() <= 5 {
467 let mp
= filesystems
.ensure_mounted(&mut bucket
);
469 "zpool '{}' (on: {:?}) auto-mounted at '{:?}' (size: {:?})",
477 "zpool '{}' (on: {:?}) auto-mount skipped, too many disks",
483 if let Some(fidx
) = drive_info
.get(&disk
) {
484 match disk_map
.get_mut(fidx
) {
485 Some(v
) => v
.push(bucket
.clone()),
487 disk_map
.insert(fidx
.to_owned(), vec
![bucket
.clone()]);
494 Self::scan_lvm(&mut disk_map
, &drive_info
)?
;
502 /// scan for LVM volumes and create device nodes for them to later mount on demand
504 disk_map
: &mut HashMap
<String
, Vec
<Bucket
>>,
505 drive_info
: &HashMap
<String
, String
>,
506 ) -> Result
<(), Error
> {
507 // first get mapping between devices and vgs
508 let mut pv_map
: HashMap
<String
, Vec
<String
>> = HashMap
::new();
509 let mut cmd
= Command
::new("/sbin/pvs");
510 cmd
.args(["-o", "pv_name,vg_name", "--reportformat", "json"].iter());
511 let result
= run_command(cmd
, None
).unwrap();
512 let result
: serde_json
::Value
= serde_json
::from_str(&result
)?
;
513 if let Some(result
) = result
["report"][0]["pv"].as_array() {
515 let vg_name
= pv
["vg_name"].as_str().unwrap();
516 if vg_name
.is_empty() {
519 let pv_name
= pv
["pv_name"].as_str().unwrap();
520 // remove '/dev/' part
521 let pv_name
= &pv_name
[pv_name
.rfind('
/'
).map(|i
| i
+ 1).unwrap_or(0)..];
522 if let Some(fidx
) = drive_info
.get(pv_name
) {
523 info
!("LVM: found VG '{}' on '{}' ({})", vg_name
, pv_name
, fidx
);
524 match pv_map
.get_mut(vg_name
) {
525 Some(list
) => list
.push(fidx
.to_owned()),
527 pv_map
.insert(vg_name
.to_owned(), vec
![fidx
.to_owned()]);
535 let mut cmd
= Command
::new("/sbin/vgscan");
536 cmd
.arg("--mknodes");
537 if let Err(err
) = run_command(cmd
, None
) {
538 warn
!("LVM: 'vgscan --mknodes' failed: {}", err
);
542 // then scan for LVs and assign their buckets to the correct disks
543 let mut cmd
= Command
::new("/sbin/lvs");
547 "vg_name,lv_name,lv_size,metadata_lv",
555 let result
= run_command(cmd
, None
).unwrap();
556 let result
: serde_json
::Value
= serde_json
::from_str(&result
)?
;
557 let mut thinpools
= Vec
::new();
558 if let Some(result
) = result
["report"][0]["lv"].as_array() {
559 // first, look for thin-pools
561 let metadata
= lv
["metadata_lv"].as_str().unwrap_or_default();
562 if !metadata
.is_empty() {
563 // this is a thin-pool, activate the metadata LV
564 let vg_name
= lv
["vg_name"].as_str().unwrap();
565 let metadata
= metadata
.trim_matches(&['
['
, '
]'
][..]);
566 info
!("LVM: attempting to activate thinpool '{}'", metadata
);
567 let mut cmd
= Command
::new("/sbin/lvchange");
568 cmd
.args(["-ay", "-y", &format
!("{}/{}", vg_name
, metadata
)].iter());
569 if let Err(err
) = run_command(cmd
, None
) {
570 // not critical, will simply mean its children can't be loaded
571 warn
!("LVM: activating thinpool failed: {}", err
);
573 thinpools
.push((vg_name
, metadata
));
578 // now give the metadata LVs a device node
581 // cannot leave the metadata LV active, otherwise child-LVs won't activate
582 for (vg_name
, metadata
) in thinpools
{
583 let mut cmd
= Command
::new("/sbin/lvchange");
584 cmd
.args(["-an", "-y", &format
!("{}/{}", vg_name
, metadata
)].iter());
585 let _
= run_command(cmd
, None
);
589 let lv_name
= lv
["lv_name"].as_str().unwrap();
590 let vg_name
= lv
["vg_name"].as_str().unwrap();
591 let metadata
= lv
["metadata_lv"].as_str().unwrap_or_default();
592 if lv_name
.is_empty() || vg_name
.is_empty() || !metadata
.is_empty() {
595 let lv_size
= lv
["lv_size"].as_str().unwrap();
596 // lv_size is in bytes with a capital 'B' at the end
597 let lv_size
= lv_size
[..lv_size
.len() - 1].parse
::<u64>().unwrap_or(0);
599 let bucket
= Bucket
::LVM(LVMBucketData
{
600 vg_name
: vg_name
.to_owned(),
601 lv_name
: lv_name
.to_owned(),
606 // activate the LV so 'vgscan' can create a node later - this may fail, and if it
607 // does, we ignore it and continue
608 let mut cmd
= Command
::new("/sbin/lvchange");
609 cmd
.args(["-ay", &format
!("{}/{}", vg_name
, lv_name
)].iter());
610 if let Err(err
) = run_command(cmd
, None
) {
612 "LVM: LV '{}' on '{}' ({}B) failed to activate: {}",
613 lv_name
, vg_name
, lv_size
, err
619 "LVM: found LV '{}' on '{}' ({}B)",
620 lv_name
, vg_name
, lv_size
623 if let Some(drives
) = pv_map
.get(vg_name
) {
625 match disk_map
.get_mut(fidx
) {
626 Some(v
) => v
.push(bucket
.clone()),
628 disk_map
.insert(fidx
.to_owned(), vec
![bucket
.clone()]);
635 // now that we've imported and activated all LV's, we let vgscan create the dev nodes
642 /// Given a path like "/drive-scsi0.img.fidx/part/0/etc/passwd", this will mount the first
643 /// partition of 'drive-scsi0' on-demand (i.e. if not already mounted) and return a path
644 /// pointing to the requested file locally, e.g. "/mnt/vda1/etc/passwd", which can be used to
645 /// read the file. Given a partial path, i.e. only "/drive-scsi0.img.fidx" or
646 /// "/drive-scsi0.img.fidx/part", it will return a list of available bucket types or bucket
647 /// components respectively
648 pub fn resolve(&mut self, path
: &Path
) -> Result
<ResolveResult
, Error
> {
649 let mut cmp
= path
.components().peekable();
651 Some(Component
::RootDir
) | Some(Component
::CurDir
) => {
654 None
=> bail
!("empty path cannot be resolved to file location"),
658 let req_fidx
= match cmp
.next() {
659 Some(Component
::Normal(x
)) => x
.to_string_lossy(),
660 _
=> bail
!("no or invalid image in path"),
663 let buckets
= match self.disk_map
.get_mut(
665 .strip_suffix(".img.fidx")
666 .unwrap_or_else(|| req_fidx
.as_ref()),
669 None
=> bail
!("given image '{}' not found", req_fidx
),
672 let bucket_type
= match cmp
.next() {
673 Some(Component
::Normal(x
)) => x
.to_string_lossy(),
674 Some(c
) => bail
!("invalid bucket in path: {:?}", c
),
676 // list bucket types available
677 let mut types
= buckets
679 .map(|b
| b
.type_string())
680 .collect
::<Vec
<&'
static str>>();
681 // dedup requires duplicates to be consecutive, which is the case - see scan()
683 return Ok(ResolveResult
::BucketTypes(types
));
687 let mut components
= Vec
::new();
688 let component_count
= Bucket
::component_depth(&bucket_type
)?
;
690 while components
.len() < component_count
{
691 let component
= match cmp
.next() {
692 Some(Component
::Normal(x
)) => x
.to_string_lossy(),
693 Some(c
) => bail
!("invalid bucket component in path: {:?}", c
),
695 // list bucket components available at this level
696 let mut comps
= buckets
699 if b
.type_string() != bucket_type
{
702 match b
.component_string(components
.len()) {
703 Ok(cs
) => Some((cs
.to_owned(), b
.size(components
.len()))),
707 .collect
::<Vec
<(String
, Option
<u64>)>>();
708 comps
.sort_by(|a
, b
| a
.0.cmp(&b
.0));
710 return Ok(ResolveResult
::BucketComponents(comps
));
714 components
.push(component
);
717 let mut bucket
= match Bucket
::filter_mut(buckets
, &bucket_type
, &components
) {
718 Some(bucket
) => bucket
,
720 "bucket/component path not found: {}/{}/{}",
727 // bucket found, check mount
728 let mountpoint
= self
730 .ensure_mounted(&mut bucket
)
733 "mounting '{}/{}/{}' failed: {}",
736 components
.join("/"),
741 let mut local_path
= PathBuf
::new();
742 local_path
.push(mountpoint
);
744 local_path
.push(rem
);
747 Ok(ResolveResult
::Path(local_path
))
750 fn make_dev_node(devnode
: &str, sys_path
: &str) -> Result
<u64, Error
> {
751 let dev_num_str
= fs
::file_read_firstline(&format
!("{}/dev", sys_path
))?
;
752 let (major
, minor
) = dev_num_str
.split_at(dev_num_str
.find('
:'
).unwrap());
753 Self::mknod_blk(&devnode
, major
.parse()?
, minor
[1..].trim_end().parse()?
)?
;
755 // this *always* contains the number of 512-byte sectors, regardless of the true
756 // blocksize of this disk - which should always be 512 here anyway
757 let size
= fs
::file_read_firstline(&format
!("{}/size", sys_path
))?
765 fn mknod_blk(path
: &str, maj
: u64, min
: u64) -> Result
<(), Error
> {
767 let dev
= stat
::makedev(maj
, min
);
768 stat
::mknod(path
, stat
::SFlag
::S_IFBLK
, stat
::Mode
::S_IRWXU
, dev
)?
;
772 fn parse_zpool_import(data
: &str) -> Vec
<(String
, Vec
<String
>)> {
773 let mut ret
= Vec
::new();
774 let mut disks
= Vec
::new();
775 let mut cur
= "".to_string();
776 for line
in data
.lines() {
777 if let Some(groups
) = (ZPOOL_POOL_NAME_REGEX
.regex_obj
)().captures(line
) {
778 if let Some(name
) = groups
.get(1) {
779 if !disks
.is_empty() {
780 ret
.push((cur
, disks
.clone()));
783 cur
= name
.as_str().to_owned();
785 } else if let Some(groups
) = (ZPOOL_IMPORT_DISK_REGEX
.regex_obj
)().captures(line
) {
786 if let Some(disk
) = groups
.get(1) {
787 disks
.push(disk
.as_str().to_owned());
791 if !disks
.is_empty() && !cur
.is_empty() {
792 ret
.push((cur
, disks
));