]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/match_pattern.rs
pxar: match_pattern: refactor match_filename
[proxmox-backup.git] / src / pxar / match_pattern.rs
1 //! `MatchPattern` defines a match pattern used to match filenames encountered
2 //! during encoding or decoding of a `pxar` archive.
3 //! `fnmatch` is used internally to match filenames against the patterns.
4 //! Shell wildcard pattern can be used to match multiple filenames, see manpage
5 //! `glob(7)`.
6 //! `**` is treated special, as it matches multiple directories in a path.
7
8 use std::ffi::{CStr, CString};
9 use std::fs::File;
10 use std::io::Read;
11 use std::os::unix::io::{FromRawFd, RawFd};
12
13 use failure::{bail, Error};
14 use libc::{c_char, c_int};
15 use nix::errno::Errno;
16 use nix::fcntl;
17 use nix::fcntl::{AtFlags, OFlag};
18 use nix::sys::stat;
19 use nix::sys::stat::{FileStat, Mode};
20 use nix::NixPath;
21
22 pub const FNM_NOMATCH: c_int = 1;
23
24 extern "C" {
25 fn fnmatch(pattern: *const c_char, string: *const c_char, flags: c_int) -> c_int;
26 }
27
28 #[derive(Debug, PartialEq, Clone, Copy)]
29 pub enum MatchType {
30 None,
31 Positive,
32 Negative,
33 PartialPositive,
34 PartialNegative,
35 }
36
37 /// `MatchPattern` provides functionality for filename glob pattern matching
38 /// based on glibc's `fnmatch`.
39 /// Positive matches return `MatchType::PartialPositive` or `MatchType::Positive`.
40 /// Patterns starting with `!` are interpreted as negation, meaning they will
41 /// return `MatchType::PartialNegative` or `MatchType::Negative`.
42 /// No matches result in `MatchType::None`.
43 /// # Examples:
44 /// ```
45 /// # use std::ffi::CString;
46 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
47 /// # fn main() -> Result<(), failure::Error> {
48 /// let filename = CString::new("some.conf")?;
49 /// let is_dir = false;
50 ///
51 /// /// Positive match of any file ending in `.conf` in any subdirectory
52 /// let positive = MatchPattern::from_line(b"**/*.conf")?.unwrap();
53 /// let m_positive = positive.matches_filename(&filename, is_dir)?;
54 /// assert!(m_positive == MatchType::Positive);
55 ///
56 /// /// Negative match of filenames starting with `s`
57 /// let negative = MatchPattern::from_line(b"![s]*")?.unwrap();
58 /// let m_negative = negative.matches_filename(&filename, is_dir)?;
59 /// assert!(m_negative == MatchType::Negative);
60 /// # Ok(())
61 /// # }
62 /// ```
63 #[derive(Clone)]
64 pub struct MatchPattern {
65 pattern: CString,
66 match_positive: bool,
67 match_dir_only: bool,
68 split_pattern: (CString, CString),
69 }
70
71 impl MatchPattern {
72 /// Read a list of `MatchPattern` from file.
73 /// The file is read line by line (lines terminated by newline character),
74 /// each line may only contain one pattern.
75 /// Leading `/` are ignored and lines starting with `#` are interpreted as
76 /// comments and not included in the resulting list.
77 /// Patterns ending in `/` will match only directories.
78 ///
79 /// On success, a list of match pattern is returned as well as the raw file
80 /// byte buffer together with the files stats.
81 /// This is done in order to avoid reading the file more than once during
82 /// encoding of the archive.
83 pub fn from_file<P: ?Sized + NixPath>(
84 parent_fd: RawFd,
85 filename: &P,
86 ) -> Result<Option<(Vec<MatchPattern>, Vec<u8>, FileStat)>, Error> {
87 let stat = match stat::fstatat(parent_fd, filename, AtFlags::AT_SYMLINK_NOFOLLOW) {
88 Ok(stat) => stat,
89 Err(nix::Error::Sys(Errno::ENOENT)) => return Ok(None),
90 Err(err) => bail!("stat failed - {}", err),
91 };
92
93 let filefd = fcntl::openat(parent_fd, filename, OFlag::O_NOFOLLOW, Mode::empty())?;
94 let mut file = unsafe { File::from_raw_fd(filefd) };
95
96 let mut content_buffer = Vec::new();
97 let _bytes = file.read_to_end(&mut content_buffer)?;
98
99 let mut match_pattern = Vec::new();
100 for line in content_buffer.split(|&c| c == b'\n') {
101 if line.is_empty() {
102 continue;
103 }
104 if let Some(pattern) = Self::from_line(line)? {
105 match_pattern.push(pattern);
106 }
107 }
108
109 Ok(Some((match_pattern, content_buffer, stat)))
110 }
111
112 /// Interprete a byte buffer as a sinlge line containing a valid
113 /// `MatchPattern`.
114 /// Pattern starting with `#` are interpreted as comments, returning `Ok(None)`.
115 /// Pattern starting with '!' are interpreted as negative match pattern.
116 /// Pattern with trailing `/` match only against directories.
117 /// `.` as well as `..` and any pattern containing `\0` are invalid and will
118 /// result in an error.
119 pub fn from_line(line: &[u8]) -> Result<Option<MatchPattern>, Error> {
120 let mut input = line;
121
122 if input.starts_with(b"#") {
123 return Ok(None);
124 }
125
126 let match_positive = if input.starts_with(b"!") {
127 // Reduce slice view to exclude "!"
128 input = &input[1..];
129 false
130 } else {
131 true
132 };
133
134 // Paths ending in / match only directory names (no filenames)
135 let match_dir_only = if input.ends_with(b"/") {
136 let len = input.len();
137 input = &input[..len - 1];
138 true
139 } else {
140 false
141 };
142
143 // Ignore initial slash
144 if input.starts_with(b"/") {
145 input = &input[1..];
146 }
147
148 if input.is_empty() || input == b"." || input == b".." || input.contains(&b'\0') {
149 bail!("invalid path component encountered");
150 }
151
152 // This will fail if the line contains b"\0"
153 let pattern = CString::new(input)?;
154 let split_pattern = split_at_slash(&pattern);
155
156 Ok(Some(MatchPattern {
157 pattern,
158 match_positive,
159 match_dir_only,
160 split_pattern,
161 }))
162 }
163
164 /// Returns the pattern before the first `/` encountered as `MatchPattern`.
165 /// If no slash is encountered, the `MatchPattern` will be a copy of the
166 /// original pattern.
167 /// ```
168 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
169 /// # fn main() -> Result<(), failure::Error> {
170 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
171 /// let front = pattern.get_front_pattern();
172 /// /// ... will be the same as ...
173 /// let front_pattern = MatchPattern::from_line(b"some")?.unwrap();
174 /// # Ok(())
175 /// # }
176 /// ```
177 pub fn get_front_pattern(&self) -> MatchPattern {
178 let pattern = split_at_slash(&self.split_pattern.0);
179 MatchPattern {
180 pattern: self.split_pattern.0.clone(),
181 match_positive: self.match_positive,
182 match_dir_only: self.match_dir_only,
183 split_pattern: pattern,
184 }
185 }
186
187 /// Returns the pattern after the first encountered `/` as `MatchPattern`.
188 /// If no slash is encountered, the `MatchPattern` will be empty.
189 /// ```
190 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
191 /// # fn main() -> Result<(), failure::Error> {
192 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
193 /// let rest = pattern.get_rest_pattern();
194 /// /// ... will be the same as ...
195 /// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
196 /// # Ok(())
197 /// # }
198 /// ```
199 pub fn get_rest_pattern(&self) -> MatchPattern {
200 let pattern = split_at_slash(&self.split_pattern.1);
201 MatchPattern {
202 pattern: self.split_pattern.1.clone(),
203 match_positive: self.match_positive,
204 match_dir_only: self.match_dir_only,
205 split_pattern: pattern,
206 }
207 }
208
209 /// Dump the content of the `MatchPattern` to stdout.
210 /// Intended for debugging purposes only.
211 pub fn dump(&self) {
212 match (self.match_positive, self.match_dir_only) {
213 (true, true) => println!("{:#?}/", self.pattern),
214 (true, false) => println!("{:#?}", self.pattern),
215 (false, true) => println!("!{:#?}/", self.pattern),
216 (false, false) => println!("!{:#?}", self.pattern),
217 }
218 }
219
220 /// Convert a list of MatchPattern to bytes in order to write them to e.g.
221 /// a file.
222 pub fn to_bytes(patterns: &[MatchPattern]) -> Vec<u8> {
223 let mut buffer = Vec::new();
224 for pattern in patterns {
225 if !pattern.match_positive { buffer.push(b'!'); }
226 buffer.extend_from_slice( pattern.pattern.as_bytes());
227 if pattern.match_dir_only { buffer.push(b'/'); }
228 buffer.push(b'\n');
229 }
230 buffer
231 }
232
233 /// Match the given filename against this `MatchPattern`.
234 /// If the filename matches the pattern completely, `MatchType::Positive` or
235 /// `MatchType::Negative` is returned, depending if the match pattern is was
236 /// declared as positive (no `!` prefix) or negative (`!` prefix).
237 /// If the pattern matched only up to the first slash of the pattern,
238 /// `MatchType::PartialPositive` or `MatchType::PartialNegatie` is returned.
239 /// If the pattern was postfixed by a trailing `/` a match is only valid if
240 /// the parameter `is_dir` equals `true`.
241 /// No match results in `MatchType::None`.
242 pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> Result<MatchType, Error> {
243 let mut res = MatchType::None;
244 let (front, _) = &self.split_pattern;
245
246 let fnmatch_res = unsafe {
247 let front_ptr = front.as_ptr() as *const libc::c_char;
248 let filename_ptr = filename.as_ptr() as *const libc::c_char;
249 fnmatch(front_ptr, filename_ptr, 0)
250 };
251 if fnmatch_res < 0 {
252 bail!("error in fnmatch inside of MatchPattern");
253 }
254 if fnmatch_res == 0 {
255 res = if self.match_positive {
256 MatchType::PartialPositive
257 } else {
258 MatchType::PartialNegative
259 };
260 }
261
262 let full = if self.pattern.to_bytes().starts_with(b"**/") {
263 CString::new(&self.pattern.to_bytes()[3..]).unwrap()
264 } else {
265 CString::new(&self.pattern.to_bytes()[..]).unwrap()
266 };
267 let fnmatch_res = unsafe {
268 let full_ptr = full.as_ptr() as *const libc::c_char;
269 let filename_ptr = filename.as_ptr() as *const libc::c_char;
270 fnmatch(full_ptr, filename_ptr, 0)
271 };
272 if fnmatch_res < 0 {
273 bail!("error in fnmatch inside of MatchPattern");
274 }
275 if fnmatch_res == 0 {
276 res = if self.match_positive {
277 MatchType::Positive
278 } else {
279 MatchType::Negative
280 };
281 }
282
283 if !is_dir && self.match_dir_only {
284 res = MatchType::None;
285 }
286
287 if !is_dir && (res == MatchType::PartialPositive || res == MatchType::PartialNegative) {
288 res = MatchType::None;
289 }
290
291 Ok(res)
292 }
293
294 /// Match the given filename against the set of match patterns.
295 ///
296 /// A positive match is intended to includes the full subtree (unless another
297 /// negative match excludes entries later).
298 /// The `MatchType` together with an updated `MatchPattern` list for passing
299 /// to the matched child is returned.
300 /// ```
301 /// # use std::ffi::CString;
302 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
303 /// # fn main() -> Result<(), failure::Error> {
304 /// let patterns = vec![
305 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
306 /// MatchPattern::from_line(b"to_match/")?.unwrap()
307 /// ];
308 /// let filename = CString::new("some")?;
309 /// let is_dir = true;
310 /// let (match_type, child_pattern) = MatchPattern::match_filename_include(
311 /// &filename,
312 /// is_dir,
313 /// &patterns
314 /// )?;
315 /// assert_eq!(match_type, MatchType::PartialPositive);
316 /// /// child pattern will be the same as ...
317 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
318 ///
319 /// let filename = CString::new("to_match")?;
320 /// let is_dir = true;
321 /// let (match_type, child_pattern) = MatchPattern::match_filename_include(
322 /// &filename,
323 /// is_dir,
324 /// &patterns
325 /// )?;
326 /// assert_eq!(match_type, MatchType::Positive);
327 /// /// child pattern will be the same as ...
328 /// let pattern = MatchPattern::from_line(b"**/*")?.unwrap();
329 /// # Ok(())
330 /// # }
331 /// ```
332 pub fn match_filename_include(
333 filename: &CStr,
334 is_dir: bool,
335 match_pattern: &[MatchPattern],
336 ) -> Result<(MatchType, Vec<MatchPattern>), Error> {
337 let mut child_pattern = Vec::new();
338 let mut match_state = MatchType::None;
339
340 for pattern in match_pattern {
341 match pattern.matches_filename(filename, is_dir)? {
342 MatchType::None => continue,
343 MatchType::Positive => {
344 match_state = MatchType::Positive;
345 // Full match so lets include everything below this node
346 let incl_pattern = MatchPattern::from_line(b"**/*").unwrap().unwrap();
347 child_pattern.push(incl_pattern);
348 }
349 MatchType::Negative => match_state = MatchType::Negative,
350 MatchType::PartialPositive => {
351 if match_state != MatchType::Negative && match_state != MatchType::Positive {
352 match_state = MatchType::PartialPositive;
353 }
354 child_pattern.push(pattern.get_rest_pattern());
355 }
356 MatchType::PartialNegative => {
357 if match_state == MatchType::PartialPositive {
358 match_state = MatchType::PartialNegative;
359 }
360 child_pattern.push(pattern.get_rest_pattern());
361 }
362 }
363 }
364
365 Ok((match_state, child_pattern))
366 }
367
368 /// Match the given filename against the set of match patterns.
369 ///
370 /// A positive match is intended to exclude the full subtree, independent of
371 /// matches deeper down the tree.
372 /// The `MatchType` together with an updated `MatchPattern` list for passing
373 /// to the matched child is returned.
374 /// ```
375 /// # use std::ffi::CString;
376 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
377 /// # fn main() -> Result<(), failure::Error> {
378 /// let patterns = vec![
379 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
380 /// MatchPattern::from_line(b"to_match/")?.unwrap()
381 /// ];
382 /// let filename = CString::new("some")?;
383 /// let is_dir = true;
384 /// let (match_type, child_pattern) = MatchPattern::match_filename_exclude(
385 /// &filename,
386 /// is_dir,
387 /// &patterns
388 /// )?;
389 /// assert_eq!(match_type, MatchType::PartialPositive);
390 /// /// child pattern will be the same as ...
391 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
392 ///
393 /// let filename = CString::new("to_match")?;
394 /// let is_dir = true;
395 /// let (match_type, child_pattern) = MatchPattern::match_filename_exclude(
396 /// &filename,
397 /// is_dir,
398 /// &patterns
399 /// )?;
400 /// assert_eq!(match_type, MatchType::Positive);
401 /// /// child pattern will be empty
402 /// # Ok(())
403 /// # }
404 /// ```
405 pub fn match_filename_exclude(
406 filename: &CStr,
407 is_dir: bool,
408 match_pattern: &[MatchPattern],
409 ) -> Result<(MatchType, Vec<MatchPattern>), Error> {
410 let mut child_pattern = Vec::new();
411 let mut match_state = MatchType::None;
412
413 for pattern in match_pattern {
414 match pattern.matches_filename(filename, is_dir)? {
415 MatchType::None => {}
416 MatchType::Positive => match_state = MatchType::Positive,
417 MatchType::Negative => match_state = MatchType::Negative,
418 match_type => {
419 if match_state != MatchType::Positive && match_state != MatchType::Negative {
420 match_state = match_type;
421 }
422 child_pattern.push(pattern.get_rest_pattern());
423 }
424 }
425 }
426
427 Ok((match_state, child_pattern))
428 }
429 }
430
431 // Splits the `CStr` slice at the first slash encountered and returns the
432 // content before (front pattern) and after the slash (rest pattern),
433 // omitting the slash itself.
434 // Slices starting with `**/` are an exception to this, as the corresponding
435 // `MatchPattern` is intended to match multiple directories.
436 // These pattern slices therefore return a `*` as front pattern and the original
437 // pattern itself as rest pattern.
438 fn split_at_slash(match_pattern: &CStr) -> (CString, CString) {
439 let match_pattern = match_pattern.to_bytes();
440
441 let pattern = if match_pattern.starts_with(b"./") {
442 &match_pattern[2..]
443 } else {
444 match_pattern
445 };
446
447 let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
448 Some(ind) => {
449 let (front, rest) = pattern.split_at(ind);
450 (front, &rest[1..])
451 }
452 None => (pattern, &pattern[0..0]),
453 };
454 // '**' is treated such that it maches any directory
455 if front == b"**" {
456 front = b"*";
457 rest = pattern;
458 }
459
460 // Pattern where valid CStrings before, so it is safe to unwrap the Result
461 let front_pattern = CString::new(front).unwrap();
462 let rest_pattern = CString::new(rest).unwrap();
463 (front_pattern, rest_pattern)
464 }