]> git.proxmox.com Git - proxmox-backup.git/blob - src/catar/encoder.rs
catar cleanups ...
[proxmox-backup.git] / src / catar / encoder.rs
1 //! *catar* format encoder.
2 //!
3 //! This module contain the code to generate *catar* archive files.
4
5 use failure::*;
6
7 use super::format_definition::*;
8 use super::binary_search_tree::*;
9
10 use std::io::Write;
11 use std::os::unix::io::AsRawFd;
12 use std::os::unix::ffi::OsStrExt;
13 use std::os::unix::io::RawFd;
14 use std::path::{Path, PathBuf};
15
16 use std::ffi::CStr;
17
18 use nix::NixPath;
19 use nix::fcntl::OFlag;
20 use nix::sys::stat::Mode;
21 use nix::errno::Errno;
22 use nix::sys::stat::FileStat;
23
24 /// The format requires to build sorted directory lookup tables in
25 /// memory, so we restrict the number of allowed entries to limit
26 /// maximum memory usage.
27 pub const MAX_DIRECTORY_ENTRIES: usize = 256*1024;
28
29 pub struct CaTarEncoder<W: Write> {
30 current_path: PathBuf, // used for error reporting
31 writer: W,
32 writer_pos: usize,
33 size: usize,
34 file_copy_buffer: Vec<u8>,
35 }
36
37
38 impl <W: Write> CaTarEncoder<W> {
39
40 pub fn encode(path: PathBuf, dir: &mut nix::dir::Dir, writer: W) -> Result<(), Error> {
41
42 const FILE_COPY_BUFFER_SIZE: usize = 1024*1024;
43
44 let mut file_copy_buffer = Vec::with_capacity(FILE_COPY_BUFFER_SIZE);
45 unsafe { file_copy_buffer.set_len(FILE_COPY_BUFFER_SIZE); }
46
47 let mut me = Self {
48 current_path: path,
49 writer: writer,
50 writer_pos: 0,
51 size: 0,
52 file_copy_buffer,
53 };
54
55 // todo: use scandirat??
56 me.encode_dir(dir)?;
57
58 Ok(())
59 }
60
61 fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
62 self.writer.write(buf)?;
63 self.writer_pos += buf.len();
64 Ok(())
65 }
66
67 fn flush_copy_buffer(&mut self, size: usize) -> Result<(), Error> {
68 self.writer.write(&self.file_copy_buffer[..size])?;
69 self.writer_pos += size;
70 Ok(())
71 }
72
73 fn write_header(&mut self, htype: u64, size: u64) -> Result<(), Error> {
74
75 let mut buffer = [0u8; std::mem::size_of::<CaFormatHeader>()];
76 let mut header = crate::tools::map_struct_mut::<CaFormatHeader>(&mut buffer)?;
77 header.size = u64::to_le((std::mem::size_of::<CaFormatHeader>() as u64) + size);
78 header.htype = u64::to_le(htype);
79
80 self.write(&buffer)?;
81
82 Ok(())
83 }
84
85 fn write_filename(&mut self, name: &CStr) -> Result<(), Error> {
86
87 let buffer = name.to_bytes_with_nul();
88 self.write_header(CA_FORMAT_FILENAME, buffer.len() as u64)?;
89 self.write(buffer)?;
90
91 Ok(())
92 }
93
94 fn write_entry(&mut self, stat: &FileStat) -> Result<(), Error> {
95
96 let mut buffer = [0u8; std::mem::size_of::<CaFormatHeader>() + std::mem::size_of::<CaFormatEntry>()];
97 let mut header = crate::tools::map_struct_mut::<CaFormatHeader>(&mut buffer)?;
98 header.size = u64::to_le((std::mem::size_of::<CaFormatHeader>() + std::mem::size_of::<CaFormatEntry>()) as u64);
99 header.htype = u64::to_le(CA_FORMAT_ENTRY);
100
101 let mut entry = crate::tools::map_struct_mut::<CaFormatEntry>(&mut buffer[std::mem::size_of::<CaFormatHeader>()..])?;
102
103 entry.feature_flags = u64::to_le(CA_FORMAT_FEATURE_FLAGS_MAX);
104
105 if (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK {
106 entry.mode = u64::to_le((libc::S_IFLNK | 0o777) as u64);
107 } else {
108 let mode = stat.st_mode & (libc::S_IFMT | 0o7777);
109 entry.mode = u64::to_le(mode as u64);
110 }
111
112 entry.flags = 0; // todo: CHATTR, FAT_ATTRS, subvolume?
113
114 entry.uid = u64::to_le(stat.st_uid as u64);
115 entry.gid = u64::to_le(stat.st_gid as u64);
116
117 let mtime = stat.st_mtime * 1_000_000_000 + stat.st_mtime_nsec;
118 if mtime > 0 { entry.mtime = mtime as u64 };
119
120 self.write(&buffer)?;
121
122 Ok(())
123 }
124
125 fn write_goodbye_table(&mut self, goodbye_offset: usize, goodbye_items: &[CaFormatGoodbyeItem]) -> Result<(), Error> {
126
127 let item_count = goodbye_items.len();
128
129 let goodbye_table_size = (item_count + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
130
131 self.write_header(CA_FORMAT_GOODBYE, goodbye_table_size as u64)?;
132
133 if self.file_copy_buffer.capacity() < goodbye_table_size {
134 let need = goodbye_table_size - self.file_copy_buffer.capacity();
135 self.file_copy_buffer.reserve(need);
136 unsafe { self.file_copy_buffer.set_len(self.file_copy_buffer.capacity()); }
137 }
138
139 let buffer = &mut self.file_copy_buffer;
140
141 copy_binary_search_tree(item_count, |s, d| {
142 let item = &goodbye_items[s];
143 let offset = d*std::mem::size_of::<CaFormatGoodbyeItem>();
144 let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
145 dest.offset = u64::to_le(item.offset);
146 dest.size = u64::to_le(item.size);
147 dest.hash = u64::to_le(item.hash);
148 });
149
150 // append CaFormatGoodbyeTail as last item
151 let offset = item_count*std::mem::size_of::<CaFormatGoodbyeItem>();
152 let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
153 dest.offset = u64::to_le(goodbye_offset as u64);
154 dest.size = u64::to_le((goodbye_table_size + std::mem::size_of::<CaFormatHeader>()) as u64);
155 dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER);
156
157 self.flush_copy_buffer(goodbye_table_size)?;
158
159 Ok(())
160 }
161
162 fn encode_dir(&mut self, dir: &mut nix::dir::Dir) -> Result<(), Error> {
163
164 println!("encode_dir: {:?} start {}", self.current_path, self.writer_pos);
165
166 let mut name_list = vec![];
167
168 let rawfd = dir.as_raw_fd();
169
170 let dir_stat = match nix::sys::stat::fstat(rawfd) {
171 Ok(stat) => stat,
172 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
173 };
174
175 if (dir_stat.st_mode & libc::S_IFMT) != libc::S_IFDIR {
176 bail!("got unexpected file type {:?} (not a directory)", self.current_path);
177 }
178
179 let dir_start_pos = self.writer_pos;
180
181 self.write_entry(&dir_stat)?;
182
183 let mut dir_count = 0;
184
185 for entry in dir.iter() {
186 dir_count += 1;
187 if dir_count > MAX_DIRECTORY_ENTRIES {
188 bail!("too many directory items in {:?} (> {})",
189 self.current_path, MAX_DIRECTORY_ENTRIES);
190 }
191
192 let entry = match entry {
193 Ok(entry) => entry,
194 Err(err) => bail!("readir {:?} failed - {}", self.current_path, err),
195 };
196 let filename = entry.file_name().to_owned();
197
198 let name = filename.to_bytes_with_nul();
199 let name_len = name.len();
200 if name_len == 2 && name[0] == b'.' && name[1] == 0u8 { continue; }
201 if name_len == 3 && name[0] == b'.' && name[1] == b'.' && name[2] == 0u8 { continue; }
202
203 match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
204 Ok(stat) => {
205 name_list.push((filename, stat));
206 }
207 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
208 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
209 }
210 }
211
212 name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
213
214 let mut goodbye_items = vec![];
215
216 for (filename, stat) in &name_list {
217 self.current_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes()));
218
219 let start_pos = self.writer_pos;
220
221 self.write_filename(&filename)?;
222
223 if (stat.st_mode & libc::S_IFMT) == libc::S_IFDIR {
224
225 match nix::dir::Dir::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
226 Ok(mut dir) => self.encode_dir(&mut dir)?,
227 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
228 Err(err) => bail!("open dir {:?} failed - {}", self.current_path, err),
229 }
230
231 } else if (stat.st_mode & libc::S_IFMT) == libc::S_IFREG {
232 match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
233 Ok(filefd) => {
234 let res = self.encode_file(filefd);
235 let _ = nix::unistd::close(filefd); // ignore close errors
236 res?;
237 }
238 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
239 Err(err) => bail!("open file {:?} failed - {}", self.current_path, err),
240 }
241 } else if (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK {
242 let mut buffer = [0u8; libc::PATH_MAX as usize];
243
244 let res = filename.with_nix_path(|cstr| {
245 unsafe { libc::readlinkat(rawfd, cstr.as_ptr(), buffer.as_mut_ptr() as *mut libc::c_char, buffer.len()-1) }
246 })?;
247
248 match Errno::result(res) {
249 Ok(len) => {
250 buffer[len as usize] = 0u8; // add Nul byte
251 self.encode_symlink(&buffer[..((len+1) as usize)], &stat)?
252 }
253 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
254 Err(err) => bail!("readlink {:?} failed - {}", self.current_path, err),
255 }
256 } else {
257 bail!("unsupported file type (mode {:o} {:?})", stat.st_mode, self.current_path);
258 }
259
260 let end_pos = self.writer_pos;
261
262 goodbye_items.push(CaFormatGoodbyeItem {
263 offset: start_pos as u64,
264 size: (end_pos - start_pos) as u64,
265 hash: compute_goodbye_hash(filename.to_bytes()),
266 });
267
268 self.current_path.pop();
269 }
270
271 println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
272
273 // fixup goodby item offsets
274 let goodbye_start = self.writer_pos as u64;
275 for item in &mut goodbye_items {
276 item.offset = goodbye_start - item.offset;
277 }
278
279 let goodbye_offset = self.writer_pos - dir_start_pos;
280
281 self.write_goodbye_table(goodbye_offset, &goodbye_items)?;
282
283 println!("encode_dir: {:?} end1 {}", self.current_path, self.writer_pos);
284 Ok(())
285 }
286
287 fn encode_file(&mut self, filefd: RawFd) -> Result<(), Error> {
288
289 println!("encode_file: {:?}", self.current_path);
290
291 let stat = match nix::sys::stat::fstat(filefd) {
292 Ok(stat) => stat,
293 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
294 };
295
296 if (stat.st_mode & libc::S_IFMT) != libc::S_IFREG {
297 bail!("got unexpected file type {:?} (not a regular file)", self.current_path);
298 }
299
300 self.write_entry(&stat)?;
301
302 let size = stat.st_size as u64;
303
304 self.write_header(CA_FORMAT_PAYLOAD, size)?;
305
306 let mut pos: u64 = 0;
307 loop {
308 let n = match nix::unistd::read(filefd, &mut self.file_copy_buffer) {
309 Ok(n) => n,
310 Err(nix::Error::Sys(Errno::EINTR)) => continue /* try again */,
311 Err(err) => bail!("read {:?} failed - {}", self.current_path, err),
312 };
313 if n == 0 { // EOF
314 if pos != size {
315 // Note:: casync format cannot handle that
316 bail!("detected shrinked file {:?} ({} < {})", self.current_path, pos, size);
317 }
318 break;
319 }
320
321 let mut next = pos + (n as u64);
322
323 if next > size { next = size; }
324
325 let count = (next - pos) as usize;
326
327 self.flush_copy_buffer(count)?;
328
329 pos = next;
330
331 if pos >= size { break; }
332 }
333
334 Ok(())
335 }
336
337 fn encode_symlink(&mut self, target: &[u8], stat: &FileStat) -> Result<(), Error> {
338
339 println!("encode_symlink: {:?} -> {:?}", self.current_path, target);
340
341 self.write_entry(stat)?;
342
343 self.write_header(CA_FORMAT_SYMLINK, target.len() as u64)?;
344 self.write(target)?;
345
346 Ok(())
347 }
348
349 // the report_XXX method may raise and error - depending on encoder configuration
350
351 fn report_vanished_file(&self, path: &Path) -> Result<(), Error> {
352
353 eprintln!("WARNING: detected vanished file {:?}", path);
354
355 Ok(())
356 }
357 }