]> git.proxmox.com Git - rustc.git/blob - vendor/regex-automata/tests/dfa/api.rs
New upstream version 1.67.1+dfsg1
[rustc.git] / vendor / regex-automata / tests / dfa / api.rs
1 use std::error::Error;
2
3 use regex_automata::{
4 dfa::{dense, regex::Regex, Automaton, OverlappingState},
5 nfa::thompson,
6 HalfMatch, MatchError, MatchKind, MultiMatch,
7 };
8
9 use crate::util::{BunkPrefilter, SubstringPrefilter};
10
11 // Tests that quit bytes in the forward direction work correctly.
12 #[test]
13 fn quit_fwd() -> Result<(), Box<dyn Error>> {
14 let dfa = dense::Builder::new()
15 .configure(dense::Config::new().quit(b'x', true))
16 .build("[[:word:]]+$")?;
17
18 assert_eq!(
19 dfa.find_earliest_fwd(b"abcxyz"),
20 Err(MatchError::Quit { byte: b'x', offset: 3 })
21 );
22 assert_eq!(
23 dfa.find_leftmost_fwd(b"abcxyz"),
24 Err(MatchError::Quit { byte: b'x', offset: 3 })
25 );
26 assert_eq!(
27 dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()),
28 Err(MatchError::Quit { byte: b'x', offset: 3 })
29 );
30
31 Ok(())
32 }
33
34 // Tests that quit bytes in the reverse direction work correctly.
35 #[test]
36 fn quit_rev() -> Result<(), Box<dyn Error>> {
37 let dfa = dense::Builder::new()
38 .configure(dense::Config::new().quit(b'x', true))
39 .thompson(thompson::Config::new().reverse(true))
40 .build("^[[:word:]]+")?;
41
42 assert_eq!(
43 dfa.find_earliest_rev(b"abcxyz"),
44 Err(MatchError::Quit { byte: b'x', offset: 3 })
45 );
46 assert_eq!(
47 dfa.find_leftmost_rev(b"abcxyz"),
48 Err(MatchError::Quit { byte: b'x', offset: 3 })
49 );
50
51 Ok(())
52 }
53
54 // Tests that if we heuristically enable Unicode word boundaries but then
55 // instruct that a non-ASCII byte should NOT be a quit byte, then the builder
56 // will panic.
57 #[test]
58 #[should_panic]
59 fn quit_panics() {
60 dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
61 }
62
63 // Tests that if we attempt an overlapping search using a regex without a
64 // reverse DFA compiled with 'starts_for_each_pattern', then we get a panic.
65 #[test]
66 #[should_panic]
67 fn incorrect_config_overlapping_search_panics() {
68 let forward = dense::DFA::new(r"abca").unwrap();
69 let reverse = dense::Builder::new()
70 .configure(
71 dense::Config::new()
72 .anchored(true)
73 .match_kind(MatchKind::All)
74 .starts_for_each_pattern(false),
75 )
76 .thompson(thompson::Config::new().reverse(true))
77 .build(r"abca")
78 .unwrap();
79
80 let re = Regex::builder().build_from_dfas(forward, reverse);
81 let haystack = "bar abcabcabca abca foo".as_bytes();
82 re.find_overlapping(haystack, &mut OverlappingState::start());
83 }
84
85 // This tests an intesting case where even if the Unicode word boundary option
86 // is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
87 // word boundaries to be enabled.
88 #[test]
89 fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
90 let mut config = dense::Config::new();
91 for b in 0x80..=0xFF {
92 config = config.quit(b, true);
93 }
94 let dfa = dense::Builder::new().configure(config).build(r"\b")?;
95 let expected = HalfMatch::must(0, 1);
96 assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected)));
97 Ok(())
98 }
99
100 // Tests that we can provide a prefilter to a Regex, and the search reports
101 // correct results.
102 #[test]
103 fn prefilter_works() -> Result<(), Box<dyn Error>> {
104 let re = Regex::new(r"a[0-9]+")
105 .unwrap()
106 .with_prefilter(SubstringPrefilter::new("a"));
107 let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
108 let matches: Vec<(usize, usize)> =
109 re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect();
110 assert_eq!(
111 matches,
112 vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
113 );
114 Ok(())
115 }
116
117 // This test confirms that a prefilter is active by using a prefilter that
118 // reports false negatives.
119 #[test]
120 fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
121 let text = b"za123";
122 let re = Regex::new(r"a[0-9]+")
123 .unwrap()
124 .with_prefilter(SubstringPrefilter::new("a"));
125 assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5)));
126 assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4)));
127 let re = re.with_prefilter(BunkPrefilter::new());
128 assert_eq!(re.find_leftmost(b"za123"), None);
129 // This checks that the prefilter is used when first starting the search,
130 // instead of waiting until at least one transition has occurred.
131 assert_eq!(re.find_leftmost(b"a123"), None);
132 Ok(())
133 }