]> git.proxmox.com Git - proxmox-backup.git/blame - src/pxar/match_pattern.rs
pxar: match_pattern: refactor MatchPattern and introduce MatchPatternSlice.
[proxmox-backup.git] / src / pxar / match_pattern.rs
CommitLineData
eecb1828
CE
1//! `MatchPattern` defines a match pattern used to match filenames encountered
2//! during encoding or decoding of a `pxar` archive.
3//! `fnmatch` is used internally to match filenames against the patterns.
4//! Shell wildcard pattern can be used to match multiple filenames, see manpage
5//! `glob(7)`.
6//! `**` is treated special, as it matches multiple directories in a path.
7
cd7dc879
CE
8use std::ffi::{CStr, CString};
9use std::fs::File;
d792dc3c 10use std::io::Read;
cd7dc879
CE
11use std::os::unix::io::{FromRawFd, RawFd};
12
d792dc3c 13use failure::{bail, Error};
cd7dc879 14use libc::{c_char, c_int};
d792dc3c 15use nix::errno::Errno;
4d142ea7
CE
16use nix::fcntl;
17use nix::fcntl::{AtFlags, OFlag};
4d142ea7 18use nix::sys::stat;
cd7dc879 19use nix::sys::stat::{FileStat, Mode};
d792dc3c 20use nix::NixPath;
cd7dc879 21
4d142ea7 22pub const FNM_NOMATCH: c_int = 1;
cd7dc879
CE
23
24extern "C" {
25 fn fnmatch(pattern: *const c_char, string: *const c_char, flags: c_int) -> c_int;
26}
27
51ac99c3 28#[derive(Debug, PartialEq, Clone, Copy)]
cd7dc879
CE
29pub enum MatchType {
30 None,
4d142ea7
CE
31 Positive,
32 Negative,
33 PartialPositive,
34 PartialNegative,
cd7dc879
CE
35}
36
eecb1828
CE
37/// `MatchPattern` provides functionality for filename glob pattern matching
38/// based on glibc's `fnmatch`.
39/// Positive matches return `MatchType::PartialPositive` or `MatchType::Positive`.
40/// Patterns starting with `!` are interpreted as negation, meaning they will
41/// return `MatchType::PartialNegative` or `MatchType::Negative`.
42/// No matches result in `MatchType::None`.
43/// # Examples:
44/// ```
45/// # use std::ffi::CString;
46/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
47/// # fn main() -> Result<(), failure::Error> {
48/// let filename = CString::new("some.conf")?;
49/// let is_dir = false;
50///
51/// /// Positive match of any file ending in `.conf` in any subdirectory
52/// let positive = MatchPattern::from_line(b"**/*.conf")?.unwrap();
255bb860 53/// let m_positive = positive.as_slice().matches_filename(&filename, is_dir)?;
eecb1828
CE
54/// assert!(m_positive == MatchType::Positive);
55///
56/// /// Negative match of filenames starting with `s`
57/// let negative = MatchPattern::from_line(b"![s]*")?.unwrap();
255bb860 58/// let m_negative = negative.as_slice().matches_filename(&filename, is_dir)?;
eecb1828
CE
59/// assert!(m_negative == MatchType::Negative);
60/// # Ok(())
61/// # }
62/// ```
cd7dc879 63#[derive(Clone)]
4d142ea7 64pub struct MatchPattern {
255bb860 65 pattern: Vec<u8>,
4d142ea7 66 match_positive: bool,
cd7dc879 67 match_dir_only: bool,
cd7dc879
CE
68}
69
4d142ea7 70impl MatchPattern {
eecb1828
CE
71 /// Read a list of `MatchPattern` from file.
72 /// The file is read line by line (lines terminated by newline character),
73 /// each line may only contain one pattern.
74 /// Leading `/` are ignored and lines starting with `#` are interpreted as
75 /// comments and not included in the resulting list.
76 /// Patterns ending in `/` will match only directories.
77 ///
78 /// On success, a list of match pattern is returned as well as the raw file
79 /// byte buffer together with the files stats.
80 /// This is done in order to avoid reading the file more than once during
81 /// encoding of the archive.
4d142ea7
CE
82 pub fn from_file<P: ?Sized + NixPath>(
83 parent_fd: RawFd,
84 filename: &P,
85 ) -> Result<Option<(Vec<MatchPattern>, Vec<u8>, FileStat)>, Error> {
4d142ea7 86 let stat = match stat::fstatat(parent_fd, filename, AtFlags::AT_SYMLINK_NOFOLLOW) {
cd7dc879
CE
87 Ok(stat) => stat,
88 Err(nix::Error::Sys(Errno::ENOENT)) => return Ok(None),
89 Err(err) => bail!("stat failed - {}", err),
90 };
91
4d142ea7 92 let filefd = fcntl::openat(parent_fd, filename, OFlag::O_NOFOLLOW, Mode::empty())?;
d792dc3c 93 let mut file = unsafe { File::from_raw_fd(filefd) };
cd7dc879
CE
94
95 let mut content_buffer = Vec::new();
96 let _bytes = file.read_to_end(&mut content_buffer)?;
97
4d142ea7 98 let mut match_pattern = Vec::new();
cd7dc879
CE
99 for line in content_buffer.split(|&c| c == b'\n') {
100 if line.is_empty() {
101 continue;
102 }
103 if let Some(pattern) = Self::from_line(line)? {
4d142ea7 104 match_pattern.push(pattern);
cd7dc879
CE
105 }
106 }
107
4d142ea7 108 Ok(Some((match_pattern, content_buffer, stat)))
cd7dc879
CE
109 }
110
eecb1828
CE
111 /// Interprete a byte buffer as a sinlge line containing a valid
112 /// `MatchPattern`.
113 /// Pattern starting with `#` are interpreted as comments, returning `Ok(None)`.
114 /// Pattern starting with '!' are interpreted as negative match pattern.
115 /// Pattern with trailing `/` match only against directories.
116 /// `.` as well as `..` and any pattern containing `\0` are invalid and will
117 /// result in an error.
4d142ea7 118 pub fn from_line(line: &[u8]) -> Result<Option<MatchPattern>, Error> {
cd7dc879
CE
119 let mut input = line;
120
121 if input.starts_with(b"#") {
122 return Ok(None);
123 }
124
4d142ea7 125 let match_positive = if input.starts_with(b"!") {
cd7dc879
CE
126 // Reduce slice view to exclude "!"
127 input = &input[1..];
128 false
129 } else {
130 true
131 };
132
133 // Paths ending in / match only directory names (no filenames)
134 let match_dir_only = if input.ends_with(b"/") {
135 let len = input.len();
136 input = &input[..len - 1];
137 true
138 } else {
139 false
140 };
141
142 // Ignore initial slash
143 if input.starts_with(b"/") {
144 input = &input[1..];
145 }
146
d792dc3c 147 if input.is_empty() || input == b"." || input == b".." || input.contains(&b'\0') {
cd7dc879
CE
148 bail!("invalid path component encountered");
149 }
150
4d142ea7 151 Ok(Some(MatchPattern {
255bb860 152 pattern: input.to_vec(),
4d142ea7 153 match_positive,
cd7dc879 154 match_dir_only,
cd7dc879
CE
155 }))
156 }
157
255bb860
CE
158
159 /// Create a `MatchPatternSlice` of the `MatchPattern` to give a view of the
160 /// `MatchPattern` without copying its content.
161 pub fn as_slice<'a>(&'a self) -> MatchPatternSlice<'a> {
162 MatchPatternSlice {
163 pattern: self.pattern.as_slice(),
164 match_positive: self.match_positive,
165 match_dir_only: self.match_dir_only,
166 }
167 }
168
169 /// Dump the content of the `MatchPattern` to stdout.
170 /// Intended for debugging purposes only.
171 pub fn dump(&self) {
172 match (self.match_positive, self.match_dir_only) {
173 (true, true) => println!("{:#?}/", self.pattern),
174 (true, false) => println!("{:#?}", self.pattern),
175 (false, true) => println!("!{:#?}/", self.pattern),
176 (false, false) => println!("!{:#?}", self.pattern),
177 }
178 }
179
180 /// Convert a list of MatchPattern to bytes in order to write them to e.g.
181 /// a file.
182 pub fn to_bytes(patterns: &[MatchPattern]) -> Vec<u8> {
183 let mut slices = Vec::new();
184 for pattern in patterns {
185 slices.push(pattern.as_slice());
186 }
187
188 MatchPatternSlice::to_bytes(&slices)
189 }
190}
191
192#[derive(Clone)]
193pub struct MatchPatternSlice<'a> {
194 pattern: &'a [u8],
195 match_positive: bool,
196 match_dir_only: bool,
197}
198
199impl<'a> MatchPatternSlice<'a> {
200 /// Returns the pattern before the first `/` encountered as `MatchPatternSlice`.
201 /// If no slash is encountered, the `MatchPatternSlice` will be a copy of the
eecb1828
CE
202 /// original pattern.
203 /// ```
255bb860 204 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
eecb1828
CE
205 /// # fn main() -> Result<(), failure::Error> {
206 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
255bb860
CE
207 /// let slice = pattern.as_slice();
208 /// let front = slice.get_front_pattern();
eecb1828
CE
209 /// /// ... will be the same as ...
210 /// let front_pattern = MatchPattern::from_line(b"some")?.unwrap();
255bb860 211 /// let front_slice = front_pattern.as_slice();
eecb1828
CE
212 /// # Ok(())
213 /// # }
214 /// ```
255bb860
CE
215 pub fn get_front_pattern(&'a self) -> MatchPatternSlice<'a> {
216 let (front, _) = self.split_at_slash();
217 MatchPatternSlice {
218 pattern: front,
4d142ea7 219 match_positive: self.match_positive,
cd7dc879 220 match_dir_only: self.match_dir_only,
cd7dc879
CE
221 }
222 }
223
255bb860
CE
224 /// Returns the pattern after the first encountered `/` as `MatchPatternSlice`.
225 /// If no slash is encountered, the `MatchPatternSlice` will be empty.
eecb1828 226 /// ```
255bb860 227 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
eecb1828
CE
228 /// # fn main() -> Result<(), failure::Error> {
229 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
255bb860
CE
230 /// let slice = pattern.as_slice();
231 /// let rest = slice.get_rest_pattern();
eecb1828
CE
232 /// /// ... will be the same as ...
233 /// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 234 /// let rest_slice = rest_pattern.as_slice();
eecb1828
CE
235 /// # Ok(())
236 /// # }
237 /// ```
255bb860
CE
238 pub fn get_rest_pattern(&'a self) -> MatchPatternSlice<'a> {
239 let (_, rest) = self.split_at_slash();
240 MatchPatternSlice {
241 pattern: rest,
4d142ea7 242 match_positive: self.match_positive,
cd7dc879 243 match_dir_only: self.match_dir_only,
cd7dc879
CE
244 }
245 }
246
255bb860
CE
247 /// Splits the `MatchPatternSlice` at the first slash encountered and returns the
248 /// content before (front pattern) and after the slash (rest pattern),
249 /// omitting the slash itself.
250 /// Slices starting with `**/` are an exception to this, as the corresponding
251 /// `MatchPattern` is intended to match multiple directories.
252 /// These pattern slices therefore return a `*` as front pattern and the original
253 /// pattern itself as rest pattern.
254 fn split_at_slash(&'a self) -> (&'a [u8], &'a [u8]) {
255 let pattern = if self.pattern.starts_with(b"./") {
256 &self.pattern[2..]
257 } else {
258 self.pattern
259 };
260
261 let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
262 Some(ind) => {
263 let (front, rest) = pattern.split_at(ind);
264 (front, &rest[1..])
265 }
266 None => (pattern, &pattern[0..0]),
267 };
268 // '**' is treated such that it maches any directory
269 if front == b"**" {
270 front = b"*";
271 rest = pattern;
cd7dc879 272 }
255bb860
CE
273
274 (front, rest)
cd7dc879
CE
275 }
276
255bb860 277 /// Convert a list of `MatchPatternSlice`s to bytes in order to write them to e.g.
e50a90e0 278 /// a file.
255bb860 279 pub fn to_bytes(patterns: &[MatchPatternSlice]) -> Vec<u8> {
e50a90e0
CE
280 let mut buffer = Vec::new();
281 for pattern in patterns {
920243b1 282 if !pattern.match_positive { buffer.push(b'!'); }
255bb860 283 buffer.extend_from_slice(&pattern.pattern);
920243b1 284 if pattern.match_dir_only { buffer.push(b'/'); }
e50a90e0
CE
285 buffer.push(b'\n');
286 }
287 buffer
288 }
289
255bb860 290 /// Match the given filename against this `MatchPatternSlice`.
eecb1828
CE
291 /// If the filename matches the pattern completely, `MatchType::Positive` or
292 /// `MatchType::Negative` is returned, depending if the match pattern is was
293 /// declared as positive (no `!` prefix) or negative (`!` prefix).
294 /// If the pattern matched only up to the first slash of the pattern,
295 /// `MatchType::PartialPositive` or `MatchType::PartialNegatie` is returned.
296 /// If the pattern was postfixed by a trailing `/` a match is only valid if
297 /// the parameter `is_dir` equals `true`.
298 /// No match results in `MatchType::None`.
43e892d2 299 pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> Result<MatchType, Error> {
cd7dc879 300 let mut res = MatchType::None;
255bb860 301 let (front, _) = self.split_at_slash();
cd7dc879 302
255bb860 303 let front = CString::new(front).unwrap();
cd7dc879 304 let fnmatch_res = unsafe {
4d142ea7
CE
305 let front_ptr = front.as_ptr() as *const libc::c_char;
306 let filename_ptr = filename.as_ptr() as *const libc::c_char;
d792dc3c 307 fnmatch(front_ptr, filename_ptr, 0)
cd7dc879 308 };
43e892d2
CE
309 if fnmatch_res < 0 {
310 bail!("error in fnmatch inside of MatchPattern");
311 }
cd7dc879 312 if fnmatch_res == 0 {
4d142ea7
CE
313 res = if self.match_positive {
314 MatchType::PartialPositive
cd7dc879 315 } else {
4d142ea7 316 MatchType::PartialNegative
cd7dc879
CE
317 };
318 }
319
255bb860
CE
320 let full = if self.pattern.starts_with(b"**/") {
321 CString::new(&self.pattern[3..]).unwrap()
cd7dc879 322 } else {
255bb860 323 CString::new(&self.pattern[..]).unwrap()
cd7dc879
CE
324 };
325 let fnmatch_res = unsafe {
4d142ea7
CE
326 let full_ptr = full.as_ptr() as *const libc::c_char;
327 let filename_ptr = filename.as_ptr() as *const libc::c_char;
328 fnmatch(full_ptr, filename_ptr, 0)
cd7dc879 329 };
43e892d2
CE
330 if fnmatch_res < 0 {
331 bail!("error in fnmatch inside of MatchPattern");
332 }
cd7dc879 333 if fnmatch_res == 0 {
4d142ea7
CE
334 res = if self.match_positive {
335 MatchType::Positive
cd7dc879 336 } else {
4d142ea7 337 MatchType::Negative
cd7dc879
CE
338 };
339 }
340
341 if !is_dir && self.match_dir_only {
342 res = MatchType::None;
343 }
344
4d142ea7 345 if !is_dir && (res == MatchType::PartialPositive || res == MatchType::PartialNegative) {
a771f907
CE
346 res = MatchType::None;
347 }
348
43e892d2 349 Ok(res)
cd7dc879 350 }
d3dbe52f 351
255bb860 352 /// Match the given filename against the set of `MatchPatternSlice`s.
d3dbe52f
CE
353 ///
354 /// A positive match is intended to includes the full subtree (unless another
355 /// negative match excludes entries later).
255bb860 356 /// The `MatchType` together with an updated `MatchPatternSlice` list for passing
d3dbe52f
CE
357 /// to the matched child is returned.
358 /// ```
359 /// # use std::ffi::CString;
255bb860 360 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
d3dbe52f
CE
361 /// # fn main() -> Result<(), failure::Error> {
362 /// let patterns = vec![
363 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
364 /// MatchPattern::from_line(b"to_match/")?.unwrap()
365 /// ];
255bb860
CE
366 /// let mut slices = Vec::new();
367 /// for pattern in &patterns {
368 /// slices.push(pattern.as_slice());
369 /// }
d3dbe52f
CE
370 /// let filename = CString::new("some")?;
371 /// let is_dir = true;
255bb860 372 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
d3dbe52f
CE
373 /// &filename,
374 /// is_dir,
255bb860 375 /// &slices
d3dbe52f
CE
376 /// )?;
377 /// assert_eq!(match_type, MatchType::PartialPositive);
378 /// /// child pattern will be the same as ...
379 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 380 /// let slice = pattern.as_slice();
d3dbe52f
CE
381 ///
382 /// let filename = CString::new("to_match")?;
383 /// let is_dir = true;
255bb860 384 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
d3dbe52f
CE
385 /// &filename,
386 /// is_dir,
255bb860 387 /// &slices
d3dbe52f
CE
388 /// )?;
389 /// assert_eq!(match_type, MatchType::Positive);
390 /// /// child pattern will be the same as ...
391 /// let pattern = MatchPattern::from_line(b"**/*")?.unwrap();
255bb860 392 /// let slice = pattern.as_slice();
d3dbe52f
CE
393 /// # Ok(())
394 /// # }
395 /// ```
396 pub fn match_filename_include(
397 filename: &CStr,
398 is_dir: bool,
255bb860
CE
399 match_pattern: &'a [MatchPatternSlice<'a>],
400 ) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
d3dbe52f
CE
401 let mut child_pattern = Vec::new();
402 let mut match_state = MatchType::None;
403
404 for pattern in match_pattern {
405 match pattern.matches_filename(filename, is_dir)? {
406 MatchType::None => continue,
255bb860 407 MatchType::Positive => match_state = MatchType::Positive,
d3dbe52f
CE
408 MatchType::Negative => match_state = MatchType::Negative,
409 MatchType::PartialPositive => {
410 if match_state != MatchType::Negative && match_state != MatchType::Positive {
411 match_state = MatchType::PartialPositive;
412 }
413 child_pattern.push(pattern.get_rest_pattern());
414 }
415 MatchType::PartialNegative => {
416 if match_state == MatchType::PartialPositive {
417 match_state = MatchType::PartialNegative;
418 }
419 child_pattern.push(pattern.get_rest_pattern());
420 }
421 }
422 }
423
424 Ok((match_state, child_pattern))
425 }
426
255bb860 427 /// Match the given filename against the set of `MatchPatternSlice`s.
d3dbe52f
CE
428 ///
429 /// A positive match is intended to exclude the full subtree, independent of
430 /// matches deeper down the tree.
431 /// The `MatchType` together with an updated `MatchPattern` list for passing
432 /// to the matched child is returned.
433 /// ```
434 /// # use std::ffi::CString;
255bb860 435 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
d3dbe52f
CE
436 /// # fn main() -> Result<(), failure::Error> {
437 /// let patterns = vec![
438 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
439 /// MatchPattern::from_line(b"to_match/")?.unwrap()
440 /// ];
255bb860
CE
441 /// let mut slices = Vec::new();
442 /// for pattern in &patterns {
443 /// slices.push(pattern.as_slice());
444 /// }
d3dbe52f
CE
445 /// let filename = CString::new("some")?;
446 /// let is_dir = true;
255bb860 447 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
d3dbe52f
CE
448 /// &filename,
449 /// is_dir,
255bb860 450 /// &slices,
d3dbe52f
CE
451 /// )?;
452 /// assert_eq!(match_type, MatchType::PartialPositive);
453 /// /// child pattern will be the same as ...
454 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 455 /// let slice = pattern.as_slice();
d3dbe52f
CE
456 ///
457 /// let filename = CString::new("to_match")?;
458 /// let is_dir = true;
255bb860 459 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
d3dbe52f
CE
460 /// &filename,
461 /// is_dir,
255bb860 462 /// &slices,
d3dbe52f
CE
463 /// )?;
464 /// assert_eq!(match_type, MatchType::Positive);
465 /// /// child pattern will be empty
466 /// # Ok(())
467 /// # }
468 /// ```
469 pub fn match_filename_exclude(
470 filename: &CStr,
471 is_dir: bool,
255bb860
CE
472 match_pattern: &'a [MatchPatternSlice<'a>],
473 ) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
d3dbe52f
CE
474 let mut child_pattern = Vec::new();
475 let mut match_state = MatchType::None;
476
477 for pattern in match_pattern {
478 match pattern.matches_filename(filename, is_dir)? {
479 MatchType::None => {}
480 MatchType::Positive => match_state = MatchType::Positive,
481 MatchType::Negative => match_state = MatchType::Negative,
482 match_type => {
483 if match_state != MatchType::Positive && match_state != MatchType::Negative {
484 match_state = match_type;
485 }
486 child_pattern.push(pattern.get_rest_pattern());
487 }
488 }
489 }
490
491 Ok((match_state, child_pattern))
492 }
cd7dc879 493}