]> git.proxmox.com Git - proxmox-backup.git/blame - src/pxar/match_pattern.rs
Cargo.toml: pathpatterns, pxar, proxmox-fuse
[proxmox-backup.git] / src / pxar / match_pattern.rs
CommitLineData
eecb1828
CE
1//! `MatchPattern` defines a match pattern used to match filenames encountered
2//! during encoding or decoding of a `pxar` archive.
3//! `fnmatch` is used internally to match filenames against the patterns.
4//! Shell wildcard pattern can be used to match multiple filenames, see manpage
5//! `glob(7)`.
6//! `**` is treated special, as it matches multiple directories in a path.
7
cd7dc879
CE
8use std::ffi::{CStr, CString};
9use std::fs::File;
d792dc3c 10use std::io::Read;
cd7dc879
CE
11use std::os::unix::io::{FromRawFd, RawFd};
12
f7d4e4b5 13use anyhow::{bail, Error};
cd7dc879 14use libc::{c_char, c_int};
d792dc3c 15use nix::errno::Errno;
4d142ea7
CE
16use nix::fcntl;
17use nix::fcntl::{AtFlags, OFlag};
4d142ea7 18use nix::sys::stat;
cd7dc879 19use nix::sys::stat::{FileStat, Mode};
d792dc3c 20use nix::NixPath;
cd7dc879 21
4d142ea7 22pub const FNM_NOMATCH: c_int = 1;
cd7dc879
CE
23
24extern "C" {
25 fn fnmatch(pattern: *const c_char, string: *const c_char, flags: c_int) -> c_int;
26}
27
51ac99c3 28#[derive(Debug, PartialEq, Clone, Copy)]
cd7dc879
CE
29pub enum MatchType {
30 None,
4d142ea7
CE
31 Positive,
32 Negative,
33 PartialPositive,
34 PartialNegative,
cd7dc879
CE
35}
36
eecb1828
CE
37/// `MatchPattern` provides functionality for filename glob pattern matching
38/// based on glibc's `fnmatch`.
39/// Positive matches return `MatchType::PartialPositive` or `MatchType::Positive`.
40/// Patterns starting with `!` are interpreted as negation, meaning they will
41/// return `MatchType::PartialNegative` or `MatchType::Negative`.
42/// No matches result in `MatchType::None`.
43/// # Examples:
44/// ```
45/// # use std::ffi::CString;
46/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
f7d4e4b5 47/// # fn main() -> Result<(), anyhow::Error> {
eecb1828
CE
48/// let filename = CString::new("some.conf")?;
49/// let is_dir = false;
50///
51/// /// Positive match of any file ending in `.conf` in any subdirectory
52/// let positive = MatchPattern::from_line(b"**/*.conf")?.unwrap();
255bb860 53/// let m_positive = positive.as_slice().matches_filename(&filename, is_dir)?;
eecb1828
CE
54/// assert!(m_positive == MatchType::Positive);
55///
56/// /// Negative match of filenames starting with `s`
57/// let negative = MatchPattern::from_line(b"![s]*")?.unwrap();
255bb860 58/// let m_negative = negative.as_slice().matches_filename(&filename, is_dir)?;
eecb1828
CE
59/// assert!(m_negative == MatchType::Negative);
60/// # Ok(())
61/// # }
62/// ```
189996cf 63#[derive(Clone, Eq, PartialOrd)]
4d142ea7 64pub struct MatchPattern {
255bb860 65 pattern: Vec<u8>,
4d142ea7 66 match_positive: bool,
cd7dc879 67 match_dir_only: bool,
cd7dc879
CE
68}
69
48af80b3
CE
70impl std::cmp::PartialEq for MatchPattern {
71 fn eq(&self, other: &Self) -> bool {
72 self.pattern == other.pattern
73 && self.match_positive == other.match_positive
74 && self.match_dir_only == other.match_dir_only
75 }
76}
77
78impl std::cmp::Ord for MatchPattern {
79 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80 (&self.pattern, &self.match_positive, &self.match_dir_only)
81 .cmp(&(&other.pattern, &other.match_positive, &other.match_dir_only))
82 }
83}
84
4d142ea7 85impl MatchPattern {
eecb1828
CE
86 /// Read a list of `MatchPattern` from file.
87 /// The file is read line by line (lines terminated by newline character),
88 /// each line may only contain one pattern.
89 /// Leading `/` are ignored and lines starting with `#` are interpreted as
90 /// comments and not included in the resulting list.
91 /// Patterns ending in `/` will match only directories.
92 ///
93 /// On success, a list of match pattern is returned as well as the raw file
94 /// byte buffer together with the files stats.
95 /// This is done in order to avoid reading the file more than once during
96 /// encoding of the archive.
4d142ea7
CE
97 pub fn from_file<P: ?Sized + NixPath>(
98 parent_fd: RawFd,
99 filename: &P,
ecbc6226 100 ) -> Result<Option<(Vec<MatchPattern>, Vec<u8>, FileStat)>, nix::Error> {
4d142ea7 101 let stat = match stat::fstatat(parent_fd, filename, AtFlags::AT_SYMLINK_NOFOLLOW) {
cd7dc879
CE
102 Ok(stat) => stat,
103 Err(nix::Error::Sys(Errno::ENOENT)) => return Ok(None),
ecbc6226 104 Err(err) => return Err(err),
cd7dc879
CE
105 };
106
4d142ea7 107 let filefd = fcntl::openat(parent_fd, filename, OFlag::O_NOFOLLOW, Mode::empty())?;
d792dc3c 108 let mut file = unsafe { File::from_raw_fd(filefd) };
cd7dc879
CE
109
110 let mut content_buffer = Vec::new();
ecbc6226
CE
111 let _bytes = file.read_to_end(&mut content_buffer)
112 .map_err(|_| Errno::EIO)?;
cd7dc879 113
4d142ea7 114 let mut match_pattern = Vec::new();
cd7dc879
CE
115 for line in content_buffer.split(|&c| c == b'\n') {
116 if line.is_empty() {
117 continue;
118 }
119 if let Some(pattern) = Self::from_line(line)? {
4d142ea7 120 match_pattern.push(pattern);
cd7dc879
CE
121 }
122 }
123
4d142ea7 124 Ok(Some((match_pattern, content_buffer, stat)))
cd7dc879
CE
125 }
126
add5861e 127 /// Interpret a byte buffer as a sinlge line containing a valid
eecb1828
CE
128 /// `MatchPattern`.
129 /// Pattern starting with `#` are interpreted as comments, returning `Ok(None)`.
130 /// Pattern starting with '!' are interpreted as negative match pattern.
131 /// Pattern with trailing `/` match only against directories.
132 /// `.` as well as `..` and any pattern containing `\0` are invalid and will
ecbc6226
CE
133 /// result in an error with Errno::EINVAL.
134 pub fn from_line(line: &[u8]) -> Result<Option<MatchPattern>, nix::Error> {
cd7dc879
CE
135 let mut input = line;
136
137 if input.starts_with(b"#") {
138 return Ok(None);
139 }
140
4d142ea7 141 let match_positive = if input.starts_with(b"!") {
cd7dc879
CE
142 // Reduce slice view to exclude "!"
143 input = &input[1..];
144 false
145 } else {
146 true
147 };
148
149 // Paths ending in / match only directory names (no filenames)
150 let match_dir_only = if input.ends_with(b"/") {
151 let len = input.len();
152 input = &input[..len - 1];
153 true
154 } else {
155 false
156 };
157
158 // Ignore initial slash
159 if input.starts_with(b"/") {
160 input = &input[1..];
161 }
162
d792dc3c 163 if input.is_empty() || input == b"." || input == b".." || input.contains(&b'\0') {
ecbc6226 164 return Err(nix::Error::Sys(Errno::EINVAL));
cd7dc879
CE
165 }
166
4d142ea7 167 Ok(Some(MatchPattern {
255bb860 168 pattern: input.to_vec(),
4d142ea7 169 match_positive,
cd7dc879 170 match_dir_only,
cd7dc879
CE
171 }))
172 }
173
255bb860
CE
174
175 /// Create a `MatchPatternSlice` of the `MatchPattern` to give a view of the
176 /// `MatchPattern` without copying its content.
177 pub fn as_slice<'a>(&'a self) -> MatchPatternSlice<'a> {
178 MatchPatternSlice {
179 pattern: self.pattern.as_slice(),
180 match_positive: self.match_positive,
181 match_dir_only: self.match_dir_only,
182 }
183 }
184
185 /// Dump the content of the `MatchPattern` to stdout.
186 /// Intended for debugging purposes only.
187 pub fn dump(&self) {
188 match (self.match_positive, self.match_dir_only) {
189 (true, true) => println!("{:#?}/", self.pattern),
190 (true, false) => println!("{:#?}", self.pattern),
191 (false, true) => println!("!{:#?}/", self.pattern),
192 (false, false) => println!("!{:#?}", self.pattern),
193 }
194 }
195
196 /// Convert a list of MatchPattern to bytes in order to write them to e.g.
197 /// a file.
198 pub fn to_bytes(patterns: &[MatchPattern]) -> Vec<u8> {
199 let mut slices = Vec::new();
200 for pattern in patterns {
201 slices.push(pattern.as_slice());
202 }
203
204 MatchPatternSlice::to_bytes(&slices)
205 }
a333b4cc
CE
206
207 /// Invert the match type for this MatchPattern.
208 pub fn invert(&mut self) {
209 self.match_positive = !self.match_positive;
210 }
255bb860
CE
211}
212
213#[derive(Clone)]
214pub struct MatchPatternSlice<'a> {
215 pattern: &'a [u8],
216 match_positive: bool,
217 match_dir_only: bool,
218}
219
220impl<'a> MatchPatternSlice<'a> {
221 /// Returns the pattern before the first `/` encountered as `MatchPatternSlice`.
222 /// If no slash is encountered, the `MatchPatternSlice` will be a copy of the
eecb1828
CE
223 /// original pattern.
224 /// ```
255bb860 225 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
f7d4e4b5 226 /// # fn main() -> Result<(), anyhow::Error> {
eecb1828 227 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
255bb860
CE
228 /// let slice = pattern.as_slice();
229 /// let front = slice.get_front_pattern();
eecb1828
CE
230 /// /// ... will be the same as ...
231 /// let front_pattern = MatchPattern::from_line(b"some")?.unwrap();
255bb860 232 /// let front_slice = front_pattern.as_slice();
eecb1828
CE
233 /// # Ok(())
234 /// # }
235 /// ```
255bb860
CE
236 pub fn get_front_pattern(&'a self) -> MatchPatternSlice<'a> {
237 let (front, _) = self.split_at_slash();
238 MatchPatternSlice {
239 pattern: front,
4d142ea7 240 match_positive: self.match_positive,
cd7dc879 241 match_dir_only: self.match_dir_only,
cd7dc879
CE
242 }
243 }
244
255bb860
CE
245 /// Returns the pattern after the first encountered `/` as `MatchPatternSlice`.
246 /// If no slash is encountered, the `MatchPatternSlice` will be empty.
eecb1828 247 /// ```
255bb860 248 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
f7d4e4b5 249 /// # fn main() -> Result<(), anyhow::Error> {
eecb1828 250 /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
255bb860
CE
251 /// let slice = pattern.as_slice();
252 /// let rest = slice.get_rest_pattern();
eecb1828
CE
253 /// /// ... will be the same as ...
254 /// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 255 /// let rest_slice = rest_pattern.as_slice();
eecb1828
CE
256 /// # Ok(())
257 /// # }
258 /// ```
255bb860
CE
259 pub fn get_rest_pattern(&'a self) -> MatchPatternSlice<'a> {
260 let (_, rest) = self.split_at_slash();
261 MatchPatternSlice {
262 pattern: rest,
4d142ea7 263 match_positive: self.match_positive,
cd7dc879 264 match_dir_only: self.match_dir_only,
cd7dc879
CE
265 }
266 }
267
255bb860
CE
268 /// Splits the `MatchPatternSlice` at the first slash encountered and returns the
269 /// content before (front pattern) and after the slash (rest pattern),
270 /// omitting the slash itself.
271 /// Slices starting with `**/` are an exception to this, as the corresponding
272 /// `MatchPattern` is intended to match multiple directories.
273 /// These pattern slices therefore return a `*` as front pattern and the original
274 /// pattern itself as rest pattern.
275 fn split_at_slash(&'a self) -> (&'a [u8], &'a [u8]) {
276 let pattern = if self.pattern.starts_with(b"./") {
277 &self.pattern[2..]
278 } else {
279 self.pattern
280 };
281
282 let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
283 Some(ind) => {
284 let (front, rest) = pattern.split_at(ind);
285 (front, &rest[1..])
286 }
287 None => (pattern, &pattern[0..0]),
288 };
289 // '**' is treated such that it maches any directory
290 if front == b"**" {
291 front = b"*";
292 rest = pattern;
cd7dc879 293 }
255bb860
CE
294
295 (front, rest)
cd7dc879
CE
296 }
297
255bb860 298 /// Convert a list of `MatchPatternSlice`s to bytes in order to write them to e.g.
e50a90e0 299 /// a file.
255bb860 300 pub fn to_bytes(patterns: &[MatchPatternSlice]) -> Vec<u8> {
e50a90e0
CE
301 let mut buffer = Vec::new();
302 for pattern in patterns {
920243b1 303 if !pattern.match_positive { buffer.push(b'!'); }
255bb860 304 buffer.extend_from_slice(&pattern.pattern);
920243b1 305 if pattern.match_dir_only { buffer.push(b'/'); }
e50a90e0
CE
306 buffer.push(b'\n');
307 }
308 buffer
309 }
310
255bb860 311 /// Match the given filename against this `MatchPatternSlice`.
eecb1828
CE
312 /// If the filename matches the pattern completely, `MatchType::Positive` or
313 /// `MatchType::Negative` is returned, depending if the match pattern is was
314 /// declared as positive (no `!` prefix) or negative (`!` prefix).
315 /// If the pattern matched only up to the first slash of the pattern,
316 /// `MatchType::PartialPositive` or `MatchType::PartialNegatie` is returned.
317 /// If the pattern was postfixed by a trailing `/` a match is only valid if
318 /// the parameter `is_dir` equals `true`.
319 /// No match results in `MatchType::None`.
43e892d2 320 pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> Result<MatchType, Error> {
cd7dc879 321 let mut res = MatchType::None;
255bb860 322 let (front, _) = self.split_at_slash();
cd7dc879 323
255bb860 324 let front = CString::new(front).unwrap();
cd7dc879 325 let fnmatch_res = unsafe {
4d142ea7
CE
326 let front_ptr = front.as_ptr() as *const libc::c_char;
327 let filename_ptr = filename.as_ptr() as *const libc::c_char;
d792dc3c 328 fnmatch(front_ptr, filename_ptr, 0)
cd7dc879 329 };
43e892d2
CE
330 if fnmatch_res < 0 {
331 bail!("error in fnmatch inside of MatchPattern");
332 }
cd7dc879 333 if fnmatch_res == 0 {
4d142ea7
CE
334 res = if self.match_positive {
335 MatchType::PartialPositive
cd7dc879 336 } else {
4d142ea7 337 MatchType::PartialNegative
cd7dc879
CE
338 };
339 }
340
255bb860
CE
341 let full = if self.pattern.starts_with(b"**/") {
342 CString::new(&self.pattern[3..]).unwrap()
cd7dc879 343 } else {
255bb860 344 CString::new(&self.pattern[..]).unwrap()
cd7dc879
CE
345 };
346 let fnmatch_res = unsafe {
4d142ea7
CE
347 let full_ptr = full.as_ptr() as *const libc::c_char;
348 let filename_ptr = filename.as_ptr() as *const libc::c_char;
349 fnmatch(full_ptr, filename_ptr, 0)
cd7dc879 350 };
43e892d2
CE
351 if fnmatch_res < 0 {
352 bail!("error in fnmatch inside of MatchPattern");
353 }
cd7dc879 354 if fnmatch_res == 0 {
4d142ea7
CE
355 res = if self.match_positive {
356 MatchType::Positive
cd7dc879 357 } else {
4d142ea7 358 MatchType::Negative
cd7dc879
CE
359 };
360 }
361
362 if !is_dir && self.match_dir_only {
363 res = MatchType::None;
364 }
365
4d142ea7 366 if !is_dir && (res == MatchType::PartialPositive || res == MatchType::PartialNegative) {
a771f907
CE
367 res = MatchType::None;
368 }
369
43e892d2 370 Ok(res)
cd7dc879 371 }
d3dbe52f 372
255bb860 373 /// Match the given filename against the set of `MatchPatternSlice`s.
d3dbe52f
CE
374 ///
375 /// A positive match is intended to includes the full subtree (unless another
376 /// negative match excludes entries later).
255bb860 377 /// The `MatchType` together with an updated `MatchPatternSlice` list for passing
d3dbe52f
CE
378 /// to the matched child is returned.
379 /// ```
380 /// # use std::ffi::CString;
255bb860 381 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
f7d4e4b5 382 /// # fn main() -> Result<(), anyhow::Error> {
d3dbe52f
CE
383 /// let patterns = vec![
384 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
385 /// MatchPattern::from_line(b"to_match/")?.unwrap()
386 /// ];
255bb860
CE
387 /// let mut slices = Vec::new();
388 /// for pattern in &patterns {
389 /// slices.push(pattern.as_slice());
390 /// }
d3dbe52f
CE
391 /// let filename = CString::new("some")?;
392 /// let is_dir = true;
255bb860 393 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
d3dbe52f
CE
394 /// &filename,
395 /// is_dir,
255bb860 396 /// &slices
d3dbe52f
CE
397 /// )?;
398 /// assert_eq!(match_type, MatchType::PartialPositive);
399 /// /// child pattern will be the same as ...
400 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 401 /// let slice = pattern.as_slice();
d3dbe52f
CE
402 ///
403 /// let filename = CString::new("to_match")?;
404 /// let is_dir = true;
255bb860 405 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
d3dbe52f
CE
406 /// &filename,
407 /// is_dir,
255bb860 408 /// &slices
d3dbe52f
CE
409 /// )?;
410 /// assert_eq!(match_type, MatchType::Positive);
411 /// /// child pattern will be the same as ...
412 /// let pattern = MatchPattern::from_line(b"**/*")?.unwrap();
255bb860 413 /// let slice = pattern.as_slice();
d3dbe52f
CE
414 /// # Ok(())
415 /// # }
416 /// ```
417 pub fn match_filename_include(
418 filename: &CStr,
419 is_dir: bool,
255bb860
CE
420 match_pattern: &'a [MatchPatternSlice<'a>],
421 ) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
d3dbe52f
CE
422 let mut child_pattern = Vec::new();
423 let mut match_state = MatchType::None;
424
425 for pattern in match_pattern {
426 match pattern.matches_filename(filename, is_dir)? {
427 MatchType::None => continue,
255bb860 428 MatchType::Positive => match_state = MatchType::Positive,
d3dbe52f
CE
429 MatchType::Negative => match_state = MatchType::Negative,
430 MatchType::PartialPositive => {
431 if match_state != MatchType::Negative && match_state != MatchType::Positive {
432 match_state = MatchType::PartialPositive;
433 }
434 child_pattern.push(pattern.get_rest_pattern());
435 }
436 MatchType::PartialNegative => {
437 if match_state == MatchType::PartialPositive {
438 match_state = MatchType::PartialNegative;
439 }
440 child_pattern.push(pattern.get_rest_pattern());
441 }
442 }
443 }
444
445 Ok((match_state, child_pattern))
446 }
447
255bb860 448 /// Match the given filename against the set of `MatchPatternSlice`s.
d3dbe52f
CE
449 ///
450 /// A positive match is intended to exclude the full subtree, independent of
451 /// matches deeper down the tree.
452 /// The `MatchType` together with an updated `MatchPattern` list for passing
453 /// to the matched child is returned.
454 /// ```
455 /// # use std::ffi::CString;
255bb860 456 /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
f7d4e4b5 457 /// # fn main() -> Result<(), anyhow::Error> {
d3dbe52f
CE
458 /// let patterns = vec![
459 /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
460 /// MatchPattern::from_line(b"to_match/")?.unwrap()
461 /// ];
255bb860
CE
462 /// let mut slices = Vec::new();
463 /// for pattern in &patterns {
464 /// slices.push(pattern.as_slice());
465 /// }
d3dbe52f
CE
466 /// let filename = CString::new("some")?;
467 /// let is_dir = true;
255bb860 468 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
d3dbe52f
CE
469 /// &filename,
470 /// is_dir,
255bb860 471 /// &slices,
d3dbe52f
CE
472 /// )?;
473 /// assert_eq!(match_type, MatchType::PartialPositive);
474 /// /// child pattern will be the same as ...
475 /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
255bb860 476 /// let slice = pattern.as_slice();
d3dbe52f
CE
477 ///
478 /// let filename = CString::new("to_match")?;
479 /// let is_dir = true;
255bb860 480 /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
d3dbe52f
CE
481 /// &filename,
482 /// is_dir,
255bb860 483 /// &slices,
d3dbe52f
CE
484 /// )?;
485 /// assert_eq!(match_type, MatchType::Positive);
486 /// /// child pattern will be empty
487 /// # Ok(())
488 /// # }
489 /// ```
490 pub fn match_filename_exclude(
491 filename: &CStr,
492 is_dir: bool,
255bb860
CE
493 match_pattern: &'a [MatchPatternSlice<'a>],
494 ) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
d3dbe52f
CE
495 let mut child_pattern = Vec::new();
496 let mut match_state = MatchType::None;
497
498 for pattern in match_pattern {
499 match pattern.matches_filename(filename, is_dir)? {
500 MatchType::None => {}
501 MatchType::Positive => match_state = MatchType::Positive,
502 MatchType::Negative => match_state = MatchType::Negative,
503 match_type => {
504 if match_state != MatchType::Positive && match_state != MatchType::Negative {
505 match_state = match_type;
506 }
507 child_pattern.push(pattern.get_rest_pattern());
508 }
509 }
510 }
511
512 Ok((match_state, child_pattern))
513 }
cd7dc879 514}