]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/catalog.rs
65605e6e536766e630ad5e29c3a0fde82ff92777
[proxmox-backup.git] / src / backup / catalog.rs
1 use failure::*;
2 use std::fmt;
3 use std::ffi::{CStr, CString, OsStr};
4 use std::os::unix::ffi::OsStrExt;
5 use std::io::{Read, Write, Seek, SeekFrom};
6 use std::convert::TryFrom;
7
8 use chrono::offset::{TimeZone, Local};
9
10 use proxmox::tools::io::ReadExt;
11
12 use crate::pxar::catalog::BackupCatalogWriter;
13 use crate::pxar::{MatchPattern, MatchPatternSlice, MatchType};
14 use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
15
16 #[repr(u8)]
17 #[derive(Copy,Clone,PartialEq)]
18 enum CatalogEntryType {
19 Directory = b'd',
20 File = b'f',
21 Symlink = b'l',
22 Hardlink = b'h',
23 BlockDevice = b'b',
24 CharDevice = b'c',
25 Fifo = b'p', // Fifo,Pipe
26 Socket = b's',
27 }
28
29 impl TryFrom<u8> for CatalogEntryType {
30 type Error=Error;
31
32 fn try_from(value: u8) -> Result<Self, Error> {
33 Ok(match value {
34 b'd' => CatalogEntryType::Directory,
35 b'f' => CatalogEntryType::File,
36 b'l' => CatalogEntryType::Symlink,
37 b'h' => CatalogEntryType::Hardlink,
38 b'b' => CatalogEntryType::BlockDevice,
39 b'c' => CatalogEntryType::CharDevice,
40 b'p' => CatalogEntryType::Fifo,
41 b's' => CatalogEntryType::Socket,
42 _ => bail!("invalid CatalogEntryType value '{}'", char::from(value)),
43 })
44 }
45 }
46
47 impl fmt::Display for CatalogEntryType {
48 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49 write!(f, "{}", char::from(*self as u8))
50 }
51 }
52
53 /// Represents a named directory entry
54 ///
55 /// The ``attr`` property contain the exact type with type specific
56 /// attributes.
57 #[derive(Clone)]
58 pub struct DirEntry {
59 pub name: Vec<u8>,
60 pub attr: DirEntryAttribute,
61 }
62
63 /// Used to specific additional attributes inside DirEntry
64 #[derive(Clone)]
65 pub enum DirEntryAttribute {
66 Directory { start: u64 },
67 File { size: u64, mtime: u64 },
68 Symlink,
69 Hardlink,
70 BlockDevice,
71 CharDevice,
72 Fifo,
73 Socket,
74 }
75
76 impl DirEntry {
77
78 fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime:u64) -> Self {
79 match etype {
80 CatalogEntryType::Directory => {
81 DirEntry { name, attr: DirEntryAttribute::Directory { start } }
82 }
83 CatalogEntryType::File => {
84 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } }
85 }
86 CatalogEntryType::Symlink => {
87 DirEntry { name, attr: DirEntryAttribute::Symlink }
88 }
89 CatalogEntryType::Hardlink => {
90 DirEntry { name, attr: DirEntryAttribute::Hardlink }
91 }
92 CatalogEntryType::BlockDevice => {
93 DirEntry { name, attr: DirEntryAttribute::BlockDevice }
94 }
95 CatalogEntryType::CharDevice => {
96 DirEntry { name, attr: DirEntryAttribute::CharDevice }
97 }
98 CatalogEntryType::Fifo => {
99 DirEntry { name, attr: DirEntryAttribute::Fifo }
100 }
101 CatalogEntryType::Socket => {
102 DirEntry { name, attr: DirEntryAttribute::Socket }
103 }
104 }
105 }
106
107 pub fn is_directory(&self) -> bool {
108 match self.attr {
109 DirEntryAttribute::Directory { .. } => true,
110 _ => false,
111 }
112 }
113 }
114
115 struct DirInfo {
116 name: CString,
117 entries: Vec<DirEntry>,
118 }
119
120 impl DirInfo {
121
122 fn new(name: CString) -> Self {
123 DirInfo { name, entries: Vec::new() }
124 }
125
126 fn new_rootdir() -> Self {
127 DirInfo::new(CString::new(b"/".to_vec()).unwrap())
128 }
129
130 fn encode_entry<W: Write>(
131 writer: &mut W,
132 entry: &DirEntry,
133 pos: u64,
134 ) -> Result<(), Error> {
135 match entry {
136 DirEntry { name, attr: DirEntryAttribute::Directory { start } } => {
137 writer.write_all(&[CatalogEntryType::Directory as u8])?;
138 catalog_encode_u64(writer, name.len() as u64)?;
139 writer.write_all(name)?;
140 catalog_encode_u64(writer, pos - start)?;
141 }
142 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } } => {
143 writer.write_all(&[CatalogEntryType::File as u8])?;
144 catalog_encode_u64(writer, name.len() as u64)?;
145 writer.write_all(name)?;
146 catalog_encode_u64(writer, *size)?;
147 catalog_encode_u64(writer, *mtime)?;
148 }
149 DirEntry { name, attr: DirEntryAttribute::Symlink } => {
150 writer.write_all(&[CatalogEntryType::Symlink as u8])?;
151 catalog_encode_u64(writer, name.len() as u64)?;
152 writer.write_all(name)?;
153 }
154 DirEntry { name, attr: DirEntryAttribute::Hardlink } => {
155 writer.write_all(&[CatalogEntryType::Hardlink as u8])?;
156 catalog_encode_u64(writer, name.len() as u64)?;
157 writer.write_all(name)?;
158 }
159 DirEntry { name, attr: DirEntryAttribute::BlockDevice } => {
160 writer.write_all(&[CatalogEntryType::BlockDevice as u8])?;
161 catalog_encode_u64(writer, name.len() as u64)?;
162 writer.write_all(name)?;
163 }
164 DirEntry { name, attr: DirEntryAttribute::CharDevice } => {
165 writer.write_all(&[CatalogEntryType::CharDevice as u8])?;
166 catalog_encode_u64(writer, name.len() as u64)?;
167 writer.write_all(name)?;
168 }
169 DirEntry { name, attr: DirEntryAttribute::Fifo } => {
170 writer.write_all(&[CatalogEntryType::Fifo as u8])?;
171 catalog_encode_u64(writer, name.len() as u64)?;
172 writer.write_all(name)?;
173 }
174 DirEntry { name, attr: DirEntryAttribute::Socket } => {
175 writer.write_all(&[CatalogEntryType::Socket as u8])?;
176 catalog_encode_u64(writer, name.len() as u64)?;
177 writer.write_all(name)?;
178 }
179 }
180 Ok(())
181 }
182
183 fn encode(self, start: u64) -> Result<(CString, Vec<u8>), Error> {
184 let mut table = Vec::new();
185 catalog_encode_u64(&mut table, self.entries.len() as u64)?;
186 for entry in self.entries {
187 Self::encode_entry(&mut table, &entry, start)?;
188 }
189
190 let mut data = Vec::new();
191 catalog_encode_u64(&mut data, table.len() as u64)?;
192 data.extend_from_slice(&table);
193
194 Ok((self.name, data))
195 }
196
197 fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, u64) -> Result<bool, Error>>(
198 data: &[u8],
199 mut callback: C,
200 ) -> Result<(), Error> {
201
202 let mut cursor = data;
203
204 let entries = catalog_decode_u64(&mut cursor)?;
205
206 let mut name_buf = vec![0u8; 4096];
207
208 for _ in 0..entries {
209
210 let mut buf = [ 0u8 ];
211 cursor.read_exact(&mut buf)?;
212 let etype = CatalogEntryType::try_from(buf[0])?;
213
214 let name_len = catalog_decode_u64(&mut cursor)? as usize;
215 if name_len >= name_buf.len() {
216 bail!("directory entry name too long ({} >= {})", name_len, name_buf.len());
217 }
218 let name = &mut name_buf[0..name_len];
219 cursor.read_exact(name)?;
220
221 let cont = match etype {
222 CatalogEntryType::Directory => {
223 let offset = catalog_decode_u64(&mut cursor)?;
224 callback(etype, name, offset, 0, 0)?
225 }
226 CatalogEntryType::File => {
227 let size = catalog_decode_u64(&mut cursor)?;
228 let mtime = catalog_decode_u64(&mut cursor)?;
229 callback(etype, name, 0, size, mtime)?
230 }
231 _ => {
232 callback(etype, name, 0, 0, 0)?
233 }
234 };
235 if !cont {
236 return Ok(());
237 }
238 }
239
240 if !cursor.is_empty() {
241 bail!("unable to parse whole catalog data block");
242 }
243
244 Ok(())
245 }
246 }
247
248 /// Write small catalog files
249 ///
250 /// A Catalogs simply contains list of files and directories
251 /// (directory tree). They are use to find content without having to
252 /// search the real archive (which may be large). For files, they
253 /// include the last modification time and file size.
254 pub struct CatalogWriter<W> {
255 writer: W,
256 dirstack: Vec<DirInfo>,
257 pos: u64,
258 }
259
260 impl <W: Write> CatalogWriter<W> {
261
262 /// Create a new CatalogWriter instance
263 pub fn new(writer: W) -> Result<Self, Error> {
264 let mut me = Self { writer, dirstack: vec![ DirInfo::new_rootdir() ], pos: 0 };
265 me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_1_0)?;
266 Ok(me)
267 }
268
269 fn write_all(&mut self, data: &[u8]) -> Result<(), Error> {
270 self.writer.write_all(data)?;
271 self.pos += u64::try_from(data.len())?;
272 Ok(())
273 }
274
275 /// Finish writing, flush all data
276 ///
277 /// This need to be called before drop.
278 pub fn finish(&mut self) -> Result<(), Error> {
279 if self.dirstack.len() != 1 {
280 bail!("unable to finish catalog at level {}", self.dirstack.len());
281 }
282
283 let dir = self.dirstack.pop().unwrap();
284
285 let start = self.pos;
286 let (_, data) = dir.encode(start)?;
287 self.write_all(&data)?;
288
289 self.write_all(&start.to_le_bytes())?;
290
291 self.writer.flush()?;
292
293 Ok(())
294 }
295 }
296
297 impl <W: Write> BackupCatalogWriter for CatalogWriter<W> {
298
299 fn start_directory(&mut self, name: &CStr) -> Result<(), Error> {
300 let new = DirInfo::new(name.to_owned());
301 self.dirstack.push(new);
302 Ok(())
303 }
304
305 fn end_directory(&mut self) -> Result<(), Error> {
306 let (start, name) = match self.dirstack.pop() {
307 Some(dir) => {
308 let start = self.pos;
309 let (name, data) = dir.encode(start)?;
310 self.write_all(&data)?;
311 (start, name)
312 }
313 None => {
314 bail!("got unexpected end_directory level 0");
315 }
316 };
317
318 let current = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
319 let name = name.to_bytes().to_vec();
320 current.entries.push(DirEntry { name, attr: DirEntryAttribute::Directory { start } });
321
322 Ok(())
323 }
324
325 fn add_file(&mut self, name: &CStr, size: u64, mtime: u64) -> Result<(), Error> {
326 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
327 let name = name.to_bytes().to_vec();
328 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::File { size, mtime } });
329 Ok(())
330 }
331
332 fn add_symlink(&mut self, name: &CStr) -> Result<(), Error> {
333 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
334 let name = name.to_bytes().to_vec();
335 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Symlink });
336 Ok(())
337 }
338
339 fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error> {
340 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
341 let name = name.to_bytes().to_vec();
342 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Hardlink });
343 Ok(())
344 }
345
346 fn add_block_device(&mut self, name: &CStr) -> Result<(), Error> {
347 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
348 let name = name.to_bytes().to_vec();
349 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::BlockDevice });
350 Ok(())
351 }
352
353 fn add_char_device(&mut self, name: &CStr) -> Result<(), Error> {
354 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
355 let name = name.to_bytes().to_vec();
356 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::CharDevice });
357 Ok(())
358 }
359
360 fn add_fifo(&mut self, name: &CStr) -> Result<(), Error> {
361 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
362 let name = name.to_bytes().to_vec();
363 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Fifo });
364 Ok(())
365 }
366
367 fn add_socket(&mut self, name: &CStr) -> Result<(), Error> {
368 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
369 let name = name.to_bytes().to_vec();
370 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Socket });
371 Ok(())
372 }
373 }
374
375 // fixme: move to somehere else?
376 /// Implement Write to tokio mpsc channel Sender
377 pub struct SenderWriter(tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>);
378
379 impl SenderWriter {
380 pub fn new(sender: tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>) -> Self {
381 Self(sender)
382 }
383 }
384
385 impl Write for SenderWriter {
386 fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
387 tokio::task::block_in_place(|| {
388 futures::executor::block_on(async move {
389 self.0.send(Ok(buf.to_vec())).await
390 .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err.to_string()))?;
391 Ok(buf.len())
392 })
393 })
394 }
395
396 fn flush(&mut self) -> Result<(), std::io::Error> {
397 Ok(())
398 }
399 }
400
401 /// Read Catalog files
402 pub struct CatalogReader<R> {
403 reader: R,
404 }
405
406 impl <R: Read + Seek> CatalogReader<R> {
407
408 /// Create a new CatalogReader instance
409 pub fn new(reader: R) -> Self {
410 Self { reader }
411 }
412
413 /// Print whole catalog to stdout
414 pub fn dump(&mut self) -> Result<(), Error> {
415
416 let root = self.root()?;
417 match root {
418 DirEntry { attr: DirEntryAttribute::Directory { start }, .. }=> {
419 self.dump_dir(std::path::Path::new("./"), start)
420 }
421 _ => unreachable!(),
422 }
423 }
424
425 /// Get the root DirEntry
426 pub fn root(&mut self) -> Result<DirEntry, Error> {
427 // Root dir is special
428 self.reader.seek(SeekFrom::Start(0))?;
429 let mut magic = [ 0u8; 8];
430 self.reader.read_exact(&mut magic)?;
431 if magic != PROXMOX_CATALOG_FILE_MAGIC_1_0 {
432 bail!("got unexpected magic number for catalog");
433 }
434 self.reader.seek(SeekFrom::End(-8))?;
435 let start = unsafe { self.reader.read_le_value::<u64>()? };
436 Ok(DirEntry { name: b"".to_vec(), attr: DirEntryAttribute::Directory { start } })
437 }
438
439 /// Read all directory entries
440 pub fn read_dir(
441 &mut self,
442 parent: &DirEntry,
443 ) -> Result<Vec<DirEntry>, Error> {
444
445 let start = match parent.attr {
446 DirEntryAttribute::Directory { start } => start,
447 _ => bail!("parent is not a directory - internal error"),
448 };
449
450 let data = self.read_raw_dirinfo_block(start)?;
451
452 let mut entry_list = Vec::new();
453
454 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
455 let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
456 entry_list.push(entry);
457 Ok(true)
458 })?;
459
460 Ok(entry_list)
461 }
462
463 /// Lockup a DirEntry inside a parent directory
464 pub fn lookup(
465 &mut self,
466 parent: &DirEntry,
467 filename: &[u8],
468 ) -> Result<DirEntry, Error> {
469
470 let start = match parent.attr {
471 DirEntryAttribute::Directory { start } => start,
472 _ => bail!("parent is not a directory - internal error"),
473 };
474
475 let data = self.read_raw_dirinfo_block(start)?;
476
477 let mut item = None;
478 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
479 if name != filename {
480 return Ok(true);
481 }
482
483 let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
484 item = Some(entry);
485 Ok(false) // stop parsing
486 })?;
487
488 match item {
489 None => bail!("no such file"),
490 Some(entry) => Ok(entry),
491 }
492 }
493
494 /// Read the raw directory info block from current reader position.
495 fn read_raw_dirinfo_block(&mut self, start: u64) -> Result<Vec<u8>, Error> {
496 self.reader.seek(SeekFrom::Start(start))?;
497 let size = catalog_decode_u64(&mut self.reader)?;
498 if size < 1 { bail!("got small directory size {}", size) };
499 let data = self.reader.read_exact_allocated(size as usize)?;
500 Ok(data)
501 }
502
503 /// Print the content of a directory to stdout
504 pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
505
506 let data = self.read_raw_dirinfo_block(start)?;
507
508 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
509
510 let mut path = std::path::PathBuf::from(prefix);
511 let name: &OsStr = OsStrExt::from_bytes(name);
512 path.push(name);
513
514 match etype {
515 CatalogEntryType::Directory => {
516 println!("{} {:?}", etype, path);
517 if offset > start {
518 bail!("got wrong directory offset ({} > {})", offset, start);
519 }
520 let pos = start - offset;
521 self.dump_dir(&path, pos)?;
522 }
523 CatalogEntryType::File => {
524 let dt = Local.timestamp(mtime as i64, 0);
525
526 println!(
527 "{} {:?} {} {}",
528 etype,
529 path,
530 size,
531 dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, false),
532 );
533 }
534 _ => {
535 println!("{} {:?}", etype, path);
536 }
537 }
538
539 Ok(true)
540 })
541 }
542
543 /// Finds all entries matching the given match patterns and calls the
544 /// provided callback on them.
545 pub fn find(
546 &mut self,
547 mut entry: &mut Vec<DirEntry>,
548 pattern: &[MatchPatternSlice],
549 callback: &Box<fn(&[DirEntry])>,
550 ) -> Result<(), Error> {
551 let parent = entry.last().unwrap();
552 if !parent.is_directory() {
553 return Ok(())
554 }
555
556 for e in self.read_dir(parent)? {
557 match MatchPatternSlice::match_filename_include(
558 &CString::new(e.name.clone())?,
559 e.is_directory(),
560 pattern,
561 )? {
562 (MatchType::Positive, _) => {
563 entry.push(e);
564 callback(&entry);
565 let pattern = MatchPattern::from_line(b"**/*").unwrap().unwrap();
566 let child_pattern = vec![pattern.as_slice()];
567 self.find(&mut entry, &child_pattern, callback)?;
568 entry.pop();
569 }
570 (MatchType::PartialPositive, child_pattern)
571 | (MatchType::PartialNegative, child_pattern) => {
572 entry.push(e);
573 self.find(&mut entry, &child_pattern, callback)?;
574 entry.pop();
575 }
576 _ => {}
577 }
578 }
579
580 Ok(())
581 }
582 }
583
584 /// Serialize u64 as short, variable length byte sequence
585 ///
586 /// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
587 /// We limit values to a maximum of 2^63.
588 pub fn catalog_encode_u64<W: Write>(writer: &mut W, v: u64) -> Result<(), Error> {
589 let mut enc = Vec::new();
590
591 if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); }
592 let mut d = v;
593 loop {
594 if d < 128 {
595 enc.push(d as u8);
596 break;
597 }
598 enc.push((128 | (d & 127)) as u8);
599 d = d >> 7;
600 }
601 writer.write_all(&enc)?;
602
603 Ok(())
604 }
605
606 /// Deserialize u64 from variable length byte sequence
607 ///
608 /// We currently read maximal 9 bytes, which give a maximum of 63 bits.
609 pub fn catalog_decode_u64<R: Read>(reader: &mut R) -> Result<u64, Error> {
610
611 let mut v: u64 = 0;
612 let mut buf = [0u8];
613
614 for i in 0..9 { // only allow 9 bytes (63 bits)
615 if buf.is_empty() {
616 bail!("decode_u64 failed - unexpected EOB");
617 }
618 reader.read_exact(&mut buf)?;
619 let t = buf[0];
620 if t < 128 {
621 v |= (t as u64) << (i*7);
622 return Ok(v);
623 } else {
624 v |= ((t & 127) as u64) << (i*7);
625 }
626 }
627
628 bail!("decode_u64 failed - missing end marker");
629 }
630
631 #[test]
632 fn test_catalog_u64_encoder() {
633
634 fn test_encode_decode(value: u64) {
635
636 let mut data = Vec::new();
637 catalog_encode_u64(&mut data, value).unwrap();
638
639 //println!("ENCODE {} {:?}", value, data);
640
641 let slice = &mut &data[..];
642 let decoded = catalog_decode_u64(slice).unwrap();
643
644 //println!("DECODE {}", decoded);
645
646 assert!(decoded == value);
647 }
648
649 test_encode_decode(126);
650 test_encode_decode((1<<12)-1);
651 test_encode_decode((1<<20)-1);
652 test_encode_decode((1<<50)-1);
653 test_encode_decode((1<<63)-1);
654 }