]> git.proxmox.com Git - proxmox-backup.git/blob - src/catar/encoder.rs
use external crate siphasher
[proxmox-backup.git] / src / catar / encoder.rs
1 use failure::*;
2
3 use super::format_definition::*;
4
5 use std::io::Write;
6 use std::os::unix::io::AsRawFd;
7 use std::os::unix::ffi::OsStrExt;
8 use std::os::unix::io::RawFd;
9 use std::path::{Path, PathBuf};
10
11 use std::ffi::CStr;
12
13 use nix::NixPath;
14 use nix::fcntl::OFlag;
15 use nix::sys::stat::Mode;
16 use nix::errno::Errno;
17 use nix::sys::stat::FileStat;
18
19 use siphasher::sip::SipHasher24;
20
21 const FILE_COPY_BUFFER_SIZE: usize = 1024*1024;
22
23 pub struct CaTarEncoder<W: Write> {
24 current_path: PathBuf, // used for error reporting
25 writer: W,
26 writer_pos: usize,
27 size: usize,
28 file_copy_buffer: Vec<u8>,
29 }
30
31
32 impl <W: Write> CaTarEncoder<W> {
33
34 pub fn encode(path: PathBuf, dir: &mut nix::dir::Dir, writer: W) -> Result<(), Error> {
35
36 let mut file_copy_buffer = Vec::with_capacity(FILE_COPY_BUFFER_SIZE);
37 unsafe { file_copy_buffer.set_len(FILE_COPY_BUFFER_SIZE); }
38
39 let mut me = Self {
40 current_path: path,
41 writer: writer,
42 writer_pos: 0,
43 size: 0,
44 file_copy_buffer,
45 };
46
47 // todo: use scandirat??
48 me.encode_dir(dir)?;
49
50 Ok(())
51 }
52
53 fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
54 self.writer.write(buf)?;
55 self.writer_pos += buf.len();
56 Ok(())
57 }
58
59 fn flush_copy_buffer(&mut self, size: usize) -> Result<(), Error> {
60 self.writer.write(&self.file_copy_buffer[..size])?;
61 self.writer_pos += size;
62 Ok(())
63 }
64
65 fn write_header(&mut self, htype: u64, size: u64) -> Result<(), Error> {
66
67 let mut buffer = [0u8; std::mem::size_of::<CaFormatHeader>()];
68 let mut header = crate::tools::map_struct_mut::<CaFormatHeader>(&mut buffer)?;
69 header.size = u64::to_le((std::mem::size_of::<CaFormatHeader>() as u64) + size);
70 header.htype = u64::to_le(htype);
71
72 self.write(&buffer)?;
73
74 Ok(())
75 }
76
77 fn write_filename(&mut self, name: &CStr) -> Result<(), Error> {
78
79 let buffer = name.to_bytes_with_nul();
80 self.write_header(CA_FORMAT_FILENAME, buffer.len() as u64)?;
81 self.write(buffer)?;
82
83 Ok(())
84 }
85
86 fn write_entry(&mut self, stat: &FileStat) -> Result<(), Error> {
87
88 let mut buffer = [0u8; std::mem::size_of::<CaFormatHeader>() + std::mem::size_of::<CaFormatEntry>()];
89 let mut header = crate::tools::map_struct_mut::<CaFormatHeader>(&mut buffer)?;
90 header.size = u64::to_le((std::mem::size_of::<CaFormatHeader>() + std::mem::size_of::<CaFormatEntry>()) as u64);
91 header.htype = u64::to_le(CA_FORMAT_ENTRY);
92
93 let mut entry = crate::tools::map_struct_mut::<CaFormatEntry>(&mut buffer[std::mem::size_of::<CaFormatHeader>()..])?;
94
95 entry.feature_flags = u64::to_le(CA_FORMAT_FEATURE_FLAGS_MAX);
96
97 if (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK {
98 entry.mode = u64::to_le((libc::S_IFLNK | 0o777) as u64);
99 } else {
100 let mode = stat.st_mode & (libc::S_IFMT | 0o7777);
101 entry.mode = u64::to_le(mode as u64);
102 }
103
104 entry.flags = 0; // todo: CHATTR, FAT_ATTRS, subvolume?
105
106 entry.uid = u64::to_le(stat.st_uid as u64);
107 entry.gid = u64::to_le(stat.st_gid as u64);
108
109 let mtime = stat.st_mtime * 1_000_000_000 + stat.st_mtime_nsec;
110 if mtime > 0 { entry.mtime = mtime as u64 };
111
112 self.write(&buffer)?;
113
114 Ok(())
115 }
116
117 fn encode_dir(&mut self, dir: &mut nix::dir::Dir) -> Result<(), Error> {
118
119 println!("encode_dir: {:?} start {}", self.current_path, self.writer_pos);
120
121 let mut name_list = vec![];
122
123 let rawfd = dir.as_raw_fd();
124
125 let dir_stat = match nix::sys::stat::fstat(rawfd) {
126 Ok(stat) => stat,
127 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
128 };
129
130 if (dir_stat.st_mode & libc::S_IFMT) != libc::S_IFDIR {
131 bail!("got unexpected file type {:?} (not a directory)", self.current_path);
132 }
133
134 let dir_start_pos = self.writer_pos;
135
136 self.write_entry(&dir_stat)?;
137
138 for entry in dir.iter() {
139 let entry = match entry {
140 Ok(entry) => entry,
141 Err(err) => bail!("readir {:?} failed - {}", self.current_path, err),
142 };
143 let filename = entry.file_name().to_owned();
144
145 let name = filename.to_bytes_with_nul();
146 let name_len = name.len();
147 if name_len == 2 && name[0] == b'.' && name[1] == 0u8 { continue; }
148 if name_len == 3 && name[0] == b'.' && name[1] == b'.' && name[2] == 0u8 { continue; }
149
150 match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
151 Ok(stat) => {
152 name_list.push((filename, stat));
153 }
154 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
155 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
156 }
157 }
158
159 name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
160
161 let mut goodby_items = vec![];
162
163 for (filename, stat) in &name_list {
164 self.current_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes()));
165
166 let start_pos = self.writer_pos;
167
168 self.write_filename(&filename)?;
169
170 if (stat.st_mode & libc::S_IFMT) == libc::S_IFDIR {
171
172 match nix::dir::Dir::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
173 Ok(mut dir) => self.encode_dir(&mut dir)?,
174 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
175 Err(err) => bail!("open dir {:?} failed - {}", self.current_path, err),
176 }
177
178 } else if (stat.st_mode & libc::S_IFMT) == libc::S_IFREG {
179 match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
180 Ok(filefd) => {
181 let res = self.encode_file(filefd);
182 let _ = nix::unistd::close(filefd); // ignore close errors
183 res?;
184 }
185 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
186 Err(err) => bail!("open file {:?} failed - {}", self.current_path, err),
187 }
188 } else if (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK {
189 let mut buffer = [0u8; libc::PATH_MAX as usize];
190
191 let res = filename.with_nix_path(|cstr| {
192 unsafe { libc::readlinkat(rawfd, cstr.as_ptr(), buffer.as_mut_ptr() as *mut libc::c_char, buffer.len()-1) }
193 })?;
194
195 match Errno::result(res) {
196 Ok(len) => {
197 buffer[len as usize] = 0u8; // add Nul byte
198 self.encode_symlink(&buffer[..((len+1) as usize)], &stat)?
199 }
200 Err(nix::Error::Sys(Errno::ENOENT)) => self.report_vanished_file(&self.current_path)?,
201 Err(err) => bail!("readlink {:?} failed - {}", self.current_path, err),
202 }
203 } else {
204 bail!("unsupported file type (mode {:o} {:?})", stat.st_mode, self.current_path);
205 }
206
207 let end_pos = self.writer_pos;
208
209 goodby_items.push(CaFormatGoodbyeItem {
210 offset: start_pos as u64,
211 size: (end_pos - start_pos) as u64,
212 hash: compute_goodby_hash(&filename),
213 });
214
215 self.current_path.pop();
216 }
217
218 println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
219
220 let goodby_start = self.writer_pos as u64;
221 let goodby_table_size = (goodby_items.len() + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
222
223 for item in &mut goodby_items {
224 item.offset = goodby_start - item.offset;
225 }
226
227 // fixme: sort goodby_items (BST)
228
229 let goodby_offset = self.writer_pos - dir_start_pos;
230
231 // append CaFormatGoodbyeTail as last item
232 goodby_items.push(CaFormatGoodbyeItem {
233 offset: goodby_offset as u64,
234 size: (goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64,
235 hash: CA_FORMAT_GOODBYE_TAIL_MARKER,
236 });
237
238 self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?;
239
240 if goodby_table_size > FILE_COPY_BUFFER_SIZE {
241 bail!("goodby table too large ({} > {})", goodby_table_size, FILE_COPY_BUFFER_SIZE);
242 }
243
244 let buffer = &mut self.file_copy_buffer;
245 let buffer_ptr = buffer.as_ptr();
246 for (i, item) in goodby_items.iter().enumerate() {
247 unsafe {
248 *(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()) as *mut u64) = u64::to_le(item.offset);
249 *(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+8) as *mut u64) = u64::to_le(item.size);
250 *(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+16) as *mut u64) = u64::to_le(item.hash);
251 }
252 }
253
254 self.flush_copy_buffer(goodby_table_size)?;
255
256 println!("encode_dir: {:?} end1 {}", self.current_path, self.writer_pos);
257 Ok(())
258 }
259
260 fn encode_file(&mut self, filefd: RawFd) -> Result<(), Error> {
261
262 println!("encode_file: {:?}", self.current_path);
263
264 let stat = match nix::sys::stat::fstat(filefd) {
265 Ok(stat) => stat,
266 Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
267 };
268
269 if (stat.st_mode & libc::S_IFMT) != libc::S_IFREG {
270 bail!("got unexpected file type {:?} (not a regular file)", self.current_path);
271 }
272
273 self.write_entry(&stat)?;
274
275 let size = stat.st_size as u64;
276
277 self.write_header(CA_FORMAT_PAYLOAD, size)?;
278
279 let mut pos: u64 = 0;
280 loop {
281 let n = match nix::unistd::read(filefd, &mut self.file_copy_buffer) {
282 Ok(n) => n,
283 Err(nix::Error::Sys(Errno::EINTR)) => continue /* try again */,
284 Err(err) => bail!("read {:?} failed - {}", self.current_path, err),
285 };
286 if n == 0 { // EOF
287 if pos != size {
288 // Note:: casync format cannot handle that
289 bail!("detected shrinked file {:?} ({} < {})", self.current_path, pos, size);
290 }
291 break;
292 }
293
294 let mut next = pos + (n as u64);
295
296 if next > size { next = size; }
297
298 let count = (next - pos) as usize;
299
300 self.flush_copy_buffer(count)?;
301
302 pos = next;
303
304 if pos >= size { break; }
305 }
306
307 Ok(())
308 }
309
310 fn encode_symlink(&mut self, target: &[u8], stat: &FileStat) -> Result<(), Error> {
311
312 println!("encode_symlink: {:?} -> {:?}", self.current_path, target);
313
314 self.write_entry(stat)?;
315
316 self.write_header(CA_FORMAT_SYMLINK, target.len() as u64)?;
317 self.write(target)?;
318
319 Ok(())
320 }
321
322 // the report_XXX method may raise and error - depending on encoder configuration
323
324 fn report_vanished_file(&self, path: &Path) -> Result<(), Error> {
325
326 eprintln!("WARNING: detected vanished file {:?}", path);
327
328 Ok(())
329 }
330 }
331
332 fn compute_goodby_hash(name: &CStr) -> u64 {
333
334 use std::hash::Hasher;
335 let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
336 hasher.write(name.to_bytes());
337 hasher.finish()
338 }