]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/catalog.rs
src/backup/catalog.rs: impl read_dir() and lookup(), refactor common code
[proxmox-backup.git] / src / backup / catalog.rs
1 use failure::*;
2 use std::fmt;
3 use std::ffi::{CStr, CString, OsStr};
4 use std::os::unix::ffi::OsStrExt;
5 use std::io::{Read, Write, Seek, SeekFrom};
6 use std::convert::TryFrom;
7
8 use chrono::offset::{TimeZone, Local};
9
10 use proxmox::tools::io::ReadExt;
11
12 use crate::pxar::catalog::BackupCatalogWriter;
13 use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
14
15 #[repr(u8)]
16 #[derive(Copy,Clone,PartialEq)]
17 enum CatalogEntryType {
18 Directory = b'd',
19 File = b'f',
20 Symlink = b'l',
21 Hardlink = b'h',
22 BlockDevice = b'b',
23 CharDevice = b'c',
24 Fifo = b'p', // Fifo,Pipe
25 Socket = b's',
26 }
27
28 impl TryFrom<u8> for CatalogEntryType {
29 type Error=Error;
30
31 fn try_from(value: u8) -> Result<Self, Error> {
32 Ok(match value {
33 b'd' => CatalogEntryType::Directory,
34 b'f' => CatalogEntryType::File,
35 b'l' => CatalogEntryType::Symlink,
36 b'h' => CatalogEntryType::Hardlink,
37 b'b' => CatalogEntryType::BlockDevice,
38 b'c' => CatalogEntryType::CharDevice,
39 b'p' => CatalogEntryType::Fifo,
40 b's' => CatalogEntryType::Socket,
41 _ => bail!("invalid CatalogEntryType value '{}'", char::from(value)),
42 })
43 }
44 }
45
46 impl fmt::Display for CatalogEntryType {
47 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48 write!(f, "{}", char::from(*self as u8))
49 }
50 }
51
52 pub struct DirEntry {
53 pub name: Vec<u8>,
54 pub attr: DirEntryAttribute,
55 }
56
57 impl DirEntry {
58
59 fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime:u64) -> Self {
60 match etype {
61 CatalogEntryType::Directory => {
62 DirEntry { name, attr: DirEntryAttribute::Directory { start } }
63 }
64 CatalogEntryType::File => {
65 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } }
66 }
67 CatalogEntryType::Symlink => {
68 DirEntry { name, attr: DirEntryAttribute::Symlink }
69 }
70 CatalogEntryType::Hardlink => {
71 DirEntry { name, attr: DirEntryAttribute::Hardlink }
72 }
73 CatalogEntryType::BlockDevice => {
74 DirEntry { name, attr: DirEntryAttribute::BlockDevice }
75 }
76 CatalogEntryType::CharDevice => {
77 DirEntry { name, attr: DirEntryAttribute::CharDevice }
78 }
79 CatalogEntryType::Fifo => {
80 DirEntry { name, attr: DirEntryAttribute::Fifo }
81 }
82 CatalogEntryType::Socket => {
83 DirEntry { name, attr: DirEntryAttribute::Socket }
84 }
85 }
86 }
87 }
88
89 pub enum DirEntryAttribute {
90 Directory { start: u64 },
91 File { size: u64, mtime: u64 },
92 Symlink,
93 Hardlink,
94 BlockDevice,
95 CharDevice,
96 Fifo,
97 Socket,
98 }
99
100 struct DirInfo {
101 name: CString,
102 entries: Vec<DirEntry>,
103 }
104
105 impl DirInfo {
106
107 fn new(name: CString) -> Self {
108 DirInfo { name, entries: Vec::new() }
109 }
110
111 fn new_rootdir() -> Self {
112 DirInfo::new(CString::new(b"/".to_vec()).unwrap())
113 }
114
115 fn encode_entry<W: Write>(
116 writer: &mut W,
117 entry: &DirEntry,
118 pos: u64,
119 ) -> Result<(), Error> {
120 match entry {
121 DirEntry { name, attr: DirEntryAttribute::Directory { start } } => {
122 writer.write_all(&[CatalogEntryType::Directory as u8])?;
123 catalog_encode_u64(writer, name.len() as u64)?;
124 writer.write_all(name)?;
125 catalog_encode_u64(writer, pos - start)?;
126 }
127 DirEntry { name, attr: DirEntryAttribute::File { size, mtime } } => {
128 writer.write_all(&[CatalogEntryType::File as u8])?;
129 catalog_encode_u64(writer, name.len() as u64)?;
130 writer.write_all(name)?;
131 catalog_encode_u64(writer, *size)?;
132 catalog_encode_u64(writer, *mtime)?;
133 }
134 DirEntry { name, attr: DirEntryAttribute::Symlink } => {
135 writer.write_all(&[CatalogEntryType::Symlink as u8])?;
136 catalog_encode_u64(writer, name.len() as u64)?;
137 writer.write_all(name)?;
138 }
139 DirEntry { name, attr: DirEntryAttribute::Hardlink } => {
140 writer.write_all(&[CatalogEntryType::Hardlink as u8])?;
141 catalog_encode_u64(writer, name.len() as u64)?;
142 writer.write_all(name)?;
143 }
144 DirEntry { name, attr: DirEntryAttribute::BlockDevice } => {
145 writer.write_all(&[CatalogEntryType::BlockDevice as u8])?;
146 catalog_encode_u64(writer, name.len() as u64)?;
147 writer.write_all(name)?;
148 }
149 DirEntry { name, attr: DirEntryAttribute::CharDevice } => {
150 writer.write_all(&[CatalogEntryType::CharDevice as u8])?;
151 catalog_encode_u64(writer, name.len() as u64)?;
152 writer.write_all(name)?;
153 }
154 DirEntry { name, attr: DirEntryAttribute::Fifo } => {
155 writer.write_all(&[CatalogEntryType::Fifo as u8])?;
156 catalog_encode_u64(writer, name.len() as u64)?;
157 writer.write_all(name)?;
158 }
159 DirEntry { name, attr: DirEntryAttribute::Socket } => {
160 writer.write_all(&[CatalogEntryType::Socket as u8])?;
161 catalog_encode_u64(writer, name.len() as u64)?;
162 writer.write_all(name)?;
163 }
164 }
165 Ok(())
166 }
167
168 fn encode(self, start: u64) -> Result<(CString, Vec<u8>), Error> {
169 let mut table = Vec::new();
170 catalog_encode_u64(&mut table, self.entries.len() as u64)?;
171 for entry in self.entries {
172 Self::encode_entry(&mut table, &entry, start)?;
173 }
174
175 let mut data = Vec::new();
176 catalog_encode_u64(&mut data, table.len() as u64)?;
177 data.extend_from_slice(&table);
178
179 Ok((self.name, data))
180 }
181
182 fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, u64) -> Result<(), Error>>(
183 data: &[u8],
184 mut callback: C,
185 ) -> Result<(), Error> {
186
187 let mut cursor = data;
188
189 let entries = catalog_decode_u64(&mut cursor)?;
190
191 let mut name_buf = vec![0u8; 4096];
192
193 for _ in 0..entries {
194
195 let mut buf = [ 0u8 ];
196 cursor.read_exact(&mut buf)?;
197 let etype = CatalogEntryType::try_from(buf[0])?;
198
199 let name_len = catalog_decode_u64(&mut cursor)? as usize;
200 if name_len >= name_buf.len() {
201 bail!("directory entry name too long ({} >= {})", name_len, name_buf.len());
202 }
203 let name = &mut name_buf[0..name_len];
204 cursor.read_exact(name)?;
205
206 match etype {
207 CatalogEntryType::Directory => {
208 let offset = catalog_decode_u64(&mut cursor)?;
209 callback(etype, name, offset, 0, 0)?;
210 }
211 CatalogEntryType::File => {
212 let size = catalog_decode_u64(&mut cursor)?;
213 let mtime = catalog_decode_u64(&mut cursor)?;
214 callback(etype, name, 0, size, mtime)?;
215 }
216 _ => {
217 callback(etype, name, 0, 0, 0)?;
218 }
219 }
220 }
221
222 if !cursor.is_empty() {
223 bail!("unable to parse whole catalog data block");
224 }
225
226 Ok(())
227 }
228 }
229
230 pub struct CatalogWriter<W> {
231 writer: W,
232 dirstack: Vec<DirInfo>,
233 pos: u64,
234 }
235
236 impl <W: Write> CatalogWriter<W> {
237
238 pub fn new(writer: W) -> Result<Self, Error> {
239 let mut me = Self { writer, dirstack: vec![ DirInfo::new_rootdir() ], pos: 0 };
240 me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_1_0)?;
241 Ok(me)
242 }
243
244 fn write_all(&mut self, data: &[u8]) -> Result<(), Error> {
245 self.writer.write_all(data)?;
246 self.pos += u64::try_from(data.len())?;
247 Ok(())
248 }
249
250 pub fn finish(&mut self) -> Result<(), Error> {
251 if self.dirstack.len() != 1 {
252 bail!("unable to finish catalog at level {}", self.dirstack.len());
253 }
254
255 let dir = self.dirstack.pop().unwrap();
256
257 let start = self.pos;
258 let (_, data) = dir.encode(start)?;
259 self.write_all(&data)?;
260
261 self.write_all(&start.to_le_bytes())?;
262
263 self.writer.flush()?;
264
265 Ok(())
266 }
267 }
268
269 impl <W: Write> BackupCatalogWriter for CatalogWriter<W> {
270
271 fn start_directory(&mut self, name: &CStr) -> Result<(), Error> {
272 let new = DirInfo::new(name.to_owned());
273 self.dirstack.push(new);
274 Ok(())
275 }
276
277 fn end_directory(&mut self) -> Result<(), Error> {
278 let (start, name) = match self.dirstack.pop() {
279 Some(dir) => {
280 let start = self.pos;
281 let (name, data) = dir.encode(start)?;
282 self.write_all(&data)?;
283 (start, name)
284 }
285 None => {
286 bail!("got unexpected end_directory level 0");
287 }
288 };
289
290 let current = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
291 let name = name.to_bytes().to_vec();
292 current.entries.push(DirEntry { name, attr: DirEntryAttribute::Directory { start } });
293
294 Ok(())
295 }
296
297 fn add_file(&mut self, name: &CStr, size: u64, mtime: u64) -> Result<(), Error> {
298 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
299 let name = name.to_bytes().to_vec();
300 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::File { size, mtime } });
301 Ok(())
302 }
303
304 fn add_symlink(&mut self, name: &CStr) -> Result<(), Error> {
305 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
306 let name = name.to_bytes().to_vec();
307 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Symlink });
308 Ok(())
309 }
310
311 fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error> {
312 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
313 let name = name.to_bytes().to_vec();
314 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Hardlink });
315 Ok(())
316 }
317
318 fn add_block_device(&mut self, name: &CStr) -> Result<(), Error> {
319 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
320 let name = name.to_bytes().to_vec();
321 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::BlockDevice });
322 Ok(())
323 }
324
325 fn add_char_device(&mut self, name: &CStr) -> Result<(), Error> {
326 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
327 let name = name.to_bytes().to_vec();
328 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::CharDevice });
329 Ok(())
330 }
331
332 fn add_fifo(&mut self, name: &CStr) -> Result<(), Error> {
333 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
334 let name = name.to_bytes().to_vec();
335 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Fifo });
336 Ok(())
337 }
338
339 fn add_socket(&mut self, name: &CStr) -> Result<(), Error> {
340 let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
341 let name = name.to_bytes().to_vec();
342 dir.entries.push(DirEntry { name, attr: DirEntryAttribute::Socket });
343 Ok(())
344 }
345 }
346
347 // fixme: move to somehere else?
348 /// Implement Write to tokio mpsc channel Sender
349 pub struct SenderWriter(tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>);
350
351 impl SenderWriter {
352 pub fn new(sender: tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>) -> Self {
353 Self(sender)
354 }
355 }
356
357 impl Write for SenderWriter {
358 fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
359 futures::executor::block_on(async move {
360 self.0.send(Ok(buf.to_vec())).await
361 .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err.to_string()))?;
362 Ok(buf.len())
363 })
364 }
365
366 fn flush(&mut self) -> Result<(), std::io::Error> {
367 Ok(())
368 }
369 }
370
371 pub struct CatalogReader<R> {
372 reader: R,
373 }
374
375 impl <R: Read + Seek> CatalogReader<R> {
376
377 pub fn new(reader: R) -> Self {
378 Self { reader }
379 }
380
381 pub fn dump(&mut self) -> Result<(), Error> {
382
383 self.reader.seek(SeekFrom::End(-8))?;
384
385 let start = unsafe { self.reader.read_le_value::<u64>()? };
386
387 self.dump_dir(std::path::Path::new("./"), start)
388 }
389
390 /// Get the root DirEntry
391 pub fn root(&mut self) -> Result<DirEntry, Error> {
392 // Root dir is special
393 // mixme: verify magic
394 self.reader.seek(SeekFrom::End(-8))?;
395 let start = unsafe { self.reader.read_le_value::<u64>()? };
396 Ok(DirEntry { name: b"".to_vec(), attr: DirEntryAttribute::Directory { start } })
397 }
398
399 /// Read all directory entries
400 pub fn read_dir(
401 &mut self,
402 parent: &DirEntry,
403 ) -> Result<Vec<DirEntry>, Error> {
404
405 let start = match parent.attr {
406 DirEntryAttribute::Directory { start } => start,
407 _ => bail!("parent is not a directory - internal error"),
408 };
409
410 let data = self.read_raw_dirinfo_block(start)?;
411
412 let mut entry_list = Vec::new();
413
414 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
415 let entry = DirEntry::new(etype, name.to_vec(), offset, size, mtime);
416 entry_list.push(entry);
417 Ok(())
418 })?;
419
420 Ok(entry_list)
421 }
422
423 /// Lockup a DirEntry inside a parent directory
424 pub fn lookup(
425 &mut self,
426 parent: &DirEntry,
427 filename: &[u8],
428 ) -> Result<DirEntry, Error> {
429
430 let start = match parent.attr {
431 DirEntryAttribute::Directory { start } => start,
432 _ => bail!("parent is not a directory - internal error"),
433 };
434
435 let data = self.read_raw_dirinfo_block(start)?;
436
437 let mut item = None;
438 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
439 if name != filename {
440 return Ok(());
441 }
442
443 let entry = DirEntry::new(etype, name.to_vec(), offset, size, mtime);
444 item = Some(entry);
445
446 Ok(())
447 })?;
448
449 match item {
450 None => bail!("no such file"),
451 Some(entry) => Ok(entry),
452 }
453 }
454
455 /// Read the raw directory info block from current reader position.
456 fn read_raw_dirinfo_block(&mut self, start: u64) -> Result<Vec<u8>, Error> {
457 self.reader.seek(SeekFrom::Start(start))?;
458 let size = catalog_decode_u64(&mut self.reader)?;
459 if size < 1 { bail!("got small directory size {}", size) };
460 let data = self.reader.read_exact_allocated(size as usize)?;
461 Ok(data)
462 }
463
464 pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
465
466 let data = self.read_raw_dirinfo_block(start)?;
467
468 DirInfo::parse(&data, |etype, name, offset, size, mtime| {
469
470 let mut path = std::path::PathBuf::from(prefix);
471 let name: &OsStr = OsStrExt::from_bytes(name);
472 path.push(name);
473
474 match etype {
475 CatalogEntryType::Directory => {
476 println!("{} {:?}", etype, path);
477 if offset > start {
478 bail!("got wrong directory offset ({} > {})", offset, start);
479 }
480 let pos = start - offset;
481 self.dump_dir(&path, pos)?;
482 }
483 CatalogEntryType::File => {
484 let dt = Local.timestamp(mtime as i64, 0);
485
486 println!(
487 "{} {:?} {} {}",
488 etype,
489 path,
490 size,
491 dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, false),
492 );
493 }
494 _ => {
495 println!("{} {:?}", etype, path);
496 }
497 }
498
499 Ok(())
500 })
501 }
502 }
503
504 /// Serialize u64 as short, variable length byte sequence
505 ///
506 /// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
507 /// We limit values to a maximum of 2^63.
508 pub fn catalog_encode_u64<W: Write>(writer: &mut W, v: u64) -> Result<(), Error> {
509 let mut enc = Vec::new();
510
511 if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); }
512 let mut d = v;
513 loop {
514 if d < 128 {
515 enc.push(d as u8);
516 break;
517 }
518 enc.push((128 | (d & 127)) as u8);
519 d = d >> 7;
520 }
521 writer.write_all(&enc)?;
522
523 Ok(())
524 }
525
526 /// Deserialize u64 from variable length byte sequence
527 ///
528 /// We currently read maximal 9 bytes, which give a maximum of 63 bits.
529 pub fn catalog_decode_u64<R: Read>(reader: &mut R) -> Result<u64, Error> {
530
531 let mut v: u64 = 0;
532 let mut buf = [0u8];
533
534 for i in 0..9 { // only allow 9 bytes (63 bits)
535 if buf.is_empty() {
536 bail!("decode_u64 failed - unexpected EOB");
537 }
538 reader.read_exact(&mut buf)?;
539 let t = buf[0];
540 if t < 128 {
541 v |= (t as u64) << (i*7);
542 return Ok(v);
543 } else {
544 v |= ((t & 127) as u64) << (i*7);
545 }
546 }
547
548 bail!("decode_u64 failed - missing end marker");
549 }
550
551 #[test]
552 fn test_catalog_u64_encoder() {
553
554 fn test_encode_decode(value: u64) {
555
556 let mut data = Vec::new();
557 catalog_encode_u64(&mut data, value).unwrap();
558
559 //println!("ENCODE {} {:?}", value, data);
560
561 let slice = &mut &data[..];
562 let decoded = catalog_decode_u64(slice).unwrap();
563
564 //println!("DECODE {}", decoded);
565
566 assert!(decoded == value);
567 }
568
569 test_encode_decode(126);
570 test_encode_decode((1<<12)-1);
571 test_encode_decode((1<<20)-1);
572 test_encode_decode((1<<50)-1);
573 test_encode_decode((1<<63)-1);
574 }