]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/catalog.rs
switch to external pxar and fuse crates
[proxmox-backup.git] / src / backup / catalog.rs
1 use std::convert::TryFrom;
2 use std::ffi::{CStr, CString, OsStr};
3 use std::fmt;
4 use std::io::{Read, Write, Seek, SeekFrom};
5 use std::os::unix::ffi::OsStrExt;
6
7 use anyhow::{bail, format_err, Error};
8 use chrono::offset::{TimeZone, Local};
9
10 use pathpatterns::{MatchList, MatchType};
11 use proxmox::sys::error::io_err_other;
12 use proxmox::tools::io::ReadExt;
13
14 use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
15 use crate::pxar::catalog::BackupCatalogWriter;
16 use crate::tools::runtime::block_on;
17
18 #[repr(u8)]
19 #[derive(Copy,Clone,PartialEq)]
20 enum CatalogEntryType {
21 Directory = b'd',
22 File = b'f',
23 Symlink = b'l',
24 Hardlink = b'h',
25 BlockDevice = b'b',
26 CharDevice = b'c',
27 Fifo = b'p', // Fifo,Pipe
28 Socket = b's',
29 }
30
31 impl TryFrom<u8> for CatalogEntryType {
32 type Error=Error;
33
34 fn try_from(value: u8) -> Result<Self, Error> {
35 Ok(match value {
36 b'd' => CatalogEntryType::Directory,
37 b'f' => CatalogEntryType::File,
38 b'l' => CatalogEntryType::Symlink,
39 b'h' => CatalogEntryType::Hardlink,
40 b'b' => CatalogEntryType::BlockDevice,
41 b'c' => CatalogEntryType::CharDevice,
42 b'p' => CatalogEntryType::Fifo,
43 b's' => CatalogEntryType::Socket,
44 _ => bail!("invalid CatalogEntryType value '{}'", char::from(value)),
45 })
46 }
47 }
48
49 impl fmt::Display for CatalogEntryType {
50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51 write!(f, "{}", char::from(*self as u8))
52 }
53 }
54
55 /// Represents a named directory entry
56 ///
57 /// The ``attr`` property contain the exact type with type specific
58 /// attributes.
59 #[derive(Clone, PartialEq)]
60 pub struct DirEntry {
61 pub name: Vec<u8>,
62 pub attr: DirEntryAttribute,
63 }
64
65 /// Used to specific additional attributes inside DirEntry
66 #[derive(Clone, Debug, PartialEq)]
67 pub enum DirEntryAttribute {
68 Directory { start: u64 },
69 File { size: u64, mtime: u64 },
70 Symlink,
71 Hardlink,
72 BlockDevice,
73 CharDevice,
74 Fifo,
75 Socket,
76 }
77
78 impl DirEntry {
79
80 fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime:u64) -> Self {
81 match etype {
82 CatalogEntryType::Directory => {
83 DirEntry { name, attr: DirEntryAttribute::Directory { start } }
84 }
85 CatalogEntryType::File => {
86 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } }
87 }
88 CatalogEntryType::Symlink => {
89 DirEntry { name, attr: DirEntryAttribute::Symlink }
90 }
91 CatalogEntryType::Hardlink => {
92 DirEntry { name, attr: DirEntryAttribute::Hardlink }
93 }
94 CatalogEntryType::BlockDevice => {
95 DirEntry { name, attr: DirEntryAttribute::BlockDevice }
96 }
97 CatalogEntryType::CharDevice => {
98 DirEntry { name, attr: DirEntryAttribute::CharDevice }
99 }
100 CatalogEntryType::Fifo => {
101 DirEntry { name, attr: DirEntryAttribute::Fifo }
102 }
103 CatalogEntryType::Socket => {
104 DirEntry { name, attr: DirEntryAttribute::Socket }
105 }
106 }
107 }
108
109 /// Get file mode bits for this entry to be used with the `MatchList` api.
110 pub fn get_file_mode(&self) -> Option<u32> {
111 Some(
112 match self.attr {
113 DirEntryAttribute::Directory { .. } => pxar::mode::IFDIR,
114 DirEntryAttribute::File { .. } => pxar::mode::IFREG,
115 DirEntryAttribute::Symlink => pxar::mode::IFLNK,
116 DirEntryAttribute::Hardlink => return None,
117 DirEntryAttribute::BlockDevice => pxar::mode::IFBLK,
118 DirEntryAttribute::CharDevice => pxar::mode::IFCHR,
119 DirEntryAttribute::Fifo => pxar::mode::IFIFO,
120 DirEntryAttribute::Socket => pxar::mode::IFSOCK,
121 }
122 as u32
123 )
124 }
125
126 /// Check if DirEntry is a directory
127 pub fn is_directory(&self) -> bool {
128 match self.attr {
129 DirEntryAttribute::Directory { .. } => true,
130 _ => false,
131 }
132 }
133
134 /// Check if DirEntry is a symlink
135 pub fn is_symlink(&self) -> bool {
136 match self.attr {
137 DirEntryAttribute::Symlink { .. } => true,
138 _ => false,
139 }
140 }
141 }
142
143 struct DirInfo {
144 name: CString,
145 entries: Vec<DirEntry>,
146 }
147
148 impl DirInfo {
149
150 fn new(name: CString) -> Self {
151 DirInfo { name, entries: Vec::new() }
152 }
153
154 fn new_rootdir() -> Self {
155 DirInfo::new(CString::new(b"/".to_vec()).unwrap())
156 }
157
158 fn encode_entry<W: Write>(
159 writer: &mut W,
160 entry: &DirEntry,
161 pos: u64,
162 ) -> Result<(), Error> {
163 match entry {
164 DirEntry { name, attr: DirEntryAttribute::Directory { start } } => {
165 writer.write_all(&[CatalogEntryType::Directory as u8])?;
166 catalog_encode_u64(writer, name.len() as u64)?;
167 writer.write_all(name)?;
168 catalog_encode_u64(writer, pos - start)?;
169 }
170 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } } => {
171 writer.write_all(&[CatalogEntryType::File as u8])?;
172 catalog_encode_u64(writer, name.len() as u64)?;
173 writer.write_all(name)?;
174 catalog_encode_u64(writer, *size)?;
175 catalog_encode_u64(writer, *mtime)?;
176 }
177 DirEntry { name, attr: DirEntryAttribute::Symlink } => {
178 writer.write_all(&[CatalogEntryType::Symlink as u8])?;
179 catalog_encode_u64(writer, name.len() as u64)?;
180 writer.write_all(name)?;
181 }
182 DirEntry { name, attr: DirEntryAttribute::Hardlink } => {
183 writer.write_all(&[CatalogEntryType::Hardlink as u8])?;
184 catalog_encode_u64(writer, name.len() as u64)?;
185 writer.write_all(name)?;
186 }
187 DirEntry { name, attr: DirEntryAttribute::BlockDevice } => {
188 writer.write_all(&[CatalogEntryType::BlockDevice as u8])?;
189 catalog_encode_u64(writer, name.len() as u64)?;
190 writer.write_all(name)?;
191 }
192 DirEntry { name, attr: DirEntryAttribute::CharDevice } => {
193 writer.write_all(&[CatalogEntryType::CharDevice as u8])?;
194 catalog_encode_u64(writer, name.len() as u64)?;
195 writer.write_all(name)?;
196 }
197 DirEntry { name, attr: DirEntryAttribute::Fifo } => {
198 writer.write_all(&[CatalogEntryType::Fifo as u8])?;
199 catalog_encode_u64(writer, name.len() as u64)?;
200 writer.write_all(name)?;
201 }
202 DirEntry { name, attr: DirEntryAttribute::Socket } => {
203 writer.write_all(&[CatalogEntryType::Socket as u8])?;
204 catalog_encode_u64(writer, name.len() as u64)?;
205 writer.write_all(name)?;
206 }
207 }
208 Ok(())
209 }
210
211 fn encode(self, start: u64) -> Result<(CString, Vec<u8>), Error> {
212 let mut table = Vec::new();
213 catalog_encode_u64(&mut table, self.entries.len() as u64)?;
214 for entry in self.entries {
215 Self::encode_entry(&mut table, &entry, start)?;
216 }
217
218 let mut data = Vec::new();
219 catalog_encode_u64(&mut data, table.len() as u64)?;
220 data.extend_from_slice(&table);
221
222 Ok((self.name, data))
223 }
224
225 fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, u64) -> Result<bool, Error>>(
226 data: &[u8],
227 mut callback: C,
228 ) -> Result<(), Error> {
229
230 let mut cursor = data;
231
232 let entries = catalog_decode_u64(&mut cursor)?;
233
234 let mut name_buf = vec![0u8; 4096];
235
236 for _ in 0..entries {
237
238 let mut buf = [ 0u8 ];
239 cursor.read_exact(&mut buf)?;
240 let etype = CatalogEntryType::try_from(buf[0])?;
241
242 let name_len = catalog_decode_u64(&mut cursor)? as usize;
243 if name_len >= name_buf.len() {
244 bail!("directory entry name too long ({} >= {})", name_len, name_buf.len());
245 }
246 let name = &mut name_buf[0..name_len];
247 cursor.read_exact(name)?;
248
249 let cont = match etype {
250 CatalogEntryType::Directory => {
251 let offset = catalog_decode_u64(&mut cursor)?;
252 callback(etype, name, offset, 0, 0)?
253 }
254 CatalogEntryType::File => {
255 let size = catalog_decode_u64(&mut cursor)?;
256 let mtime = catalog_decode_u64(&mut cursor)?;
257 callback(etype, name, 0, size, mtime)?
258 }
259 _ => {
260 callback(etype, name, 0, 0, 0)?
261 }
262 };
263 if !cont {
264 return Ok(());
265 }
266 }
267
268 if !cursor.is_empty() {
269 bail!("unable to parse whole catalog data block");
270 }
271
272 Ok(())
273 }
274 }
275
276 /// Write small catalog files
277 ///
278 /// A Catalogs simply contains list of files and directories
279 /// (directory tree). They are use to find content without having to
280 /// search the real archive (which may be large). For files, they
281 /// include the last modification time and file size.
282 pub struct CatalogWriter<W> {
283 writer: W,
284 dirstack: Vec<DirInfo>,
285 pos: u64,
286 }
287
288 impl <W: Write> CatalogWriter<W> {
289
290 /// Create a new CatalogWriter instance
291 pub fn new(writer: W) -> Result<Self, Error> {
292 let mut me = Self { writer, dirstack: vec![ DirInfo::new_rootdir() ], pos: 0 };
293 me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_1_0)?;
294 Ok(me)
295 }
296
297 fn write_all(&mut self, data: &[u8]) -> Result<(), Error> {
298 self.writer.write_all(data)?;
299 self.pos += u64::try_from(data.len())?;
300 Ok(())
301 }
302
303 /// Finish writing, flush all data
304 ///
305 /// This need to be called before drop.
306 pub fn finish(&mut self) -> Result<(), Error> {
307 if self.dirstack.len() != 1 {
308 bail!("unable to finish catalog at level {}", self.dirstack.len());
309 }
310
311 let dir = self.dirstack.pop().unwrap();
312
313 let start = self.pos;
314 let (_, data) = dir.encode(start)?;
315 self.write_all(&data)?;
316
317 self.write_all(&start.to_le_bytes())?;
318
319 self.writer.flush()?;
320
321 Ok(())
322 }
323 }
324
325 impl <W: Write> BackupCatalogWriter for CatalogWriter<W> {
326
327 fn start_directory(&mut self, name: &CStr) -> Result<(), Error> {
328 let new = DirInfo::new(name.to_owned());
329 self.dirstack.push(new);
330 Ok(())
331 }
332
333 fn end_directory(&mut self) -> Result<(), Error> {
334 let (start, name) = match self.dirstack.pop() {
335 Some(dir) => {
336 let start = self.pos;
337 let (name, data) = dir.encode(start)?;
338 self.write_all(&data)?;
339 (start, name)
340 }
341 None => {
342 bail!("got unexpected end_directory level 0");
343 }
344 };
345
346 let current = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
347 let name = name.to_bytes().to_vec();
348 current.entries.push(DirEntry { name, attr: DirEntryAttribute::Directory { start } });
349
350 Ok(())
351 }
352
353 fn add_file(&mut self, name: &CStr, size: u64, mtime: u64) -> Result<(), Error> {
354 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
355 let name = name.to_bytes().to_vec();
356 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::File { size, mtime } });
357 Ok(())
358 }
359
360 fn add_symlink(&mut self, name: &CStr) -> Result<(), Error> {
361 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
362 let name = name.to_bytes().to_vec();
363 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Symlink });
364 Ok(())
365 }
366
367 fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error> {
368 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
369 let name = name.to_bytes().to_vec();
370 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Hardlink });
371 Ok(())
372 }
373
374 fn add_block_device(&mut self, name: &CStr) -> Result<(), Error> {
375 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
376 let name = name.to_bytes().to_vec();
377 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::BlockDevice });
378 Ok(())
379 }
380
381 fn add_char_device(&mut self, name: &CStr) -> Result<(), Error> {
382 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
383 let name = name.to_bytes().to_vec();
384 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::CharDevice });
385 Ok(())
386 }
387
388 fn add_fifo(&mut self, name: &CStr) -> Result<(), Error> {
389 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
390 let name = name.to_bytes().to_vec();
391 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Fifo });
392 Ok(())
393 }
394
395 fn add_socket(&mut self, name: &CStr) -> Result<(), Error> {
396 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
397 let name = name.to_bytes().to_vec();
398 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Socket });
399 Ok(())
400 }
401 }
402
403 // fixme: move to somehere else?
404 /// Implement Write to tokio mpsc channel Sender
405 pub struct SenderWriter(tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>);
406
407 impl SenderWriter {
408 pub fn new(sender: tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>) -> Self {
409 Self(sender)
410 }
411 }
412
413 impl Write for SenderWriter {
414 fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
415 block_on(async move {
416 self.0
417 .send(Ok(buf.to_vec()))
418 .await
419 .map_err(io_err_other)
420 .and(Ok(buf.len()))
421 })
422 }
423
424 fn flush(&mut self) -> Result<(), std::io::Error> {
425 Ok(())
426 }
427 }
428
429 /// Read Catalog files
430 pub struct CatalogReader<R> {
431 reader: R,
432 }
433
434 impl <R: Read + Seek> CatalogReader<R> {
435
436 /// Create a new CatalogReader instance
437 pub fn new(reader: R) -> Self {
438 Self { reader }
439 }
440
441 /// Print whole catalog to stdout
442 pub fn dump(&mut self) -> Result<(), Error> {
443
444 let root = self.root()?;
445 match root {
446 DirEntry { attr: DirEntryAttribute::Directory { start }, .. }=> {
447 self.dump_dir(std::path::Path::new("./"), start)
448 }
449 _ => unreachable!(),
450 }
451 }
452
453 /// Get the root DirEntry
454 pub fn root(&mut self) -> Result<DirEntry, Error> {
455 // Root dir is special
456 self.reader.seek(SeekFrom::Start(0))?;
457 let mut magic = [ 0u8; 8];
458 self.reader.read_exact(&mut magic)?;
459 if magic != PROXMOX_CATALOG_FILE_MAGIC_1_0 {
460 bail!("got unexpected magic number for catalog");
461 }
462 self.reader.seek(SeekFrom::End(-8))?;
463 let start = unsafe { self.reader.read_le_value::<u64>()? };
464 Ok(DirEntry { name: b"".to_vec(), attr: DirEntryAttribute::Directory { start } })
465 }
466
467 /// Read all directory entries
468 pub fn read_dir(
469 &mut self,
470 parent: &DirEntry,
471 ) -> Result<Vec<DirEntry>, Error> {
472
473 let start = match parent.attr {
474 DirEntryAttribute::Directory { start } => start,
475 _ => bail!("parent is not a directory - internal error"),
476 };
477
478 let data = self.read_raw_dirinfo_block(start)?;
479
480 let mut entry_list = Vec::new();
481
482 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
483 let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
484 entry_list.push(entry);
485 Ok(true)
486 })?;
487
488 Ok(entry_list)
489 }
490
491 /// Lockup a DirEntry inside a parent directory
492 pub fn lookup(
493 &mut self,
494 parent: &DirEntry,
495 filename: &[u8],
496 ) -> Result<Option<DirEntry>, Error> {
497
498 let start = match parent.attr {
499 DirEntryAttribute::Directory { start } => start,
500 _ => bail!("parent is not a directory - internal error"),
501 };
502
503 let data = self.read_raw_dirinfo_block(start)?;
504
505 let mut item = None;
506 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
507 if name != filename {
508 return Ok(true);
509 }
510
511 let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
512 item = Some(entry);
513 Ok(false) // stop parsing
514 })?;
515
516 Ok(item)
517 }
518
519 /// Read the raw directory info block from current reader position.
520 fn read_raw_dirinfo_block(&mut self, start: u64) -> Result<Vec<u8>, Error> {
521 self.reader.seek(SeekFrom::Start(start))?;
522 let size = catalog_decode_u64(&mut self.reader)?;
523 if size < 1 { bail!("got small directory size {}", size) };
524 let data = self.reader.read_exact_allocated(size as usize)?;
525 Ok(data)
526 }
527
528 /// Print the content of a directory to stdout
529 pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
530
531 let data = self.read_raw_dirinfo_block(start)?;
532
533 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
534
535 let mut path = std::path::PathBuf::from(prefix);
536 let name: &OsStr = OsStrExt::from_bytes(name);
537 path.push(name);
538
539 match etype {
540 CatalogEntryType::Directory => {
541 println!("{} {:?}", etype, path);
542 if offset > start {
543 bail!("got wrong directory offset ({} > {})", offset, start);
544 }
545 let pos = start - offset;
546 self.dump_dir(&path, pos)?;
547 }
548 CatalogEntryType::File => {
549 let dt = Local.timestamp(mtime as i64, 0);
550
551 println!(
552 "{} {:?} {} {}",
553 etype,
554 path,
555 size,
556 dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, false),
557 );
558 }
559 _ => {
560 println!("{} {:?}", etype, path);
561 }
562 }
563
564 Ok(true)
565 })
566 }
567
568 /// Finds all entries matching the given match patterns and calls the
569 /// provided callback on them.
570 pub fn find(
571 &mut self,
572 parent: &DirEntry,
573 file_path: &mut Vec<u8>,
574 match_list: &impl MatchList, //&[MatchEntry],
575 callback: &mut dyn FnMut(&[u8]) -> Result<(), Error>,
576 ) -> Result<(), Error> {
577 let file_len = file_path.len();
578 for e in self.read_dir(parent)? {
579 let is_dir = e.is_directory();
580 file_path.truncate(file_len);
581 if !e.name.starts_with(b"/") {
582 file_path.reserve(e.name.len() + 1);
583 file_path.push(b'/');
584 }
585 file_path.extend(&e.name);
586 match match_list.matches(&file_path, e.get_file_mode()) {
587 Some(MatchType::Exclude) => continue,
588 Some(MatchType::Include) => callback(&file_path)?,
589 None => (),
590 }
591 if is_dir {
592 self.find(&e, file_path, match_list, callback)?;
593 }
594 }
595 file_path.truncate(file_len);
596
597 Ok(())
598 }
599 }
600
601 /// Serialize u64 as short, variable length byte sequence
602 ///
603 /// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
604 /// We limit values to a maximum of 2^63.
605 pub fn catalog_encode_u64<W: Write>(writer: &mut W, v: u64) -> Result<(), Error> {
606 let mut enc = Vec::new();
607
608 if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); }
609 let mut d = v;
610 loop {
611 if d < 128 {
612 enc.push(d as u8);
613 break;
614 }
615 enc.push((128 | (d & 127)) as u8);
616 d = d >> 7;
617 }
618 writer.write_all(&enc)?;
619
620 Ok(())
621 }
622
623 /// Deserialize u64 from variable length byte sequence
624 ///
625 /// We currently read maximal 9 bytes, which give a maximum of 63 bits.
626 pub fn catalog_decode_u64<R: Read>(reader: &mut R) -> Result<u64, Error> {
627
628 let mut v: u64 = 0;
629 let mut buf = [0u8];
630
631 for i in 0..9 { // only allow 9 bytes (63 bits)
632 if buf.is_empty() {
633 bail!("decode_u64 failed - unexpected EOB");
634 }
635 reader.read_exact(&mut buf)?;
636 let t = buf[0];
637 if t < 128 {
638 v |= (t as u64) << (i*7);
639 return Ok(v);
640 } else {
641 v |= ((t & 127) as u64) << (i*7);
642 }
643 }
644
645 bail!("decode_u64 failed - missing end marker");
646 }
647
648 #[test]
649 fn test_catalog_u64_encoder() {
650
651 fn test_encode_decode(value: u64) {
652
653 let mut data = Vec::new();
654 catalog_encode_u64(&mut data, value).unwrap();
655
656 //println!("ENCODE {} {:?}", value, data);
657
658 let slice = &mut &data[..];
659 let decoded = catalog_decode_u64(slice).unwrap();
660
661 //println!("DECODE {}", decoded);
662
663 assert!(decoded == value);
664 }
665
666 test_encode_decode(126);
667 test_encode_decode((1<<12)-1);
668 test_encode_decode((1<<20)-1);
669 test_encode_decode((1<<50)-1);
670 test_encode_decode((1<<63)-1);
671 }