]> git.proxmox.com Git - rustc.git/blob - vendor/regex-automata-0.2.0/tests/nfa/thompson/pikevm/api.rs
New upstream version 1.74.1+dfsg1
[rustc.git] / vendor / regex-automata-0.2.0 / tests / nfa / thompson / pikevm / api.rs
1 /*
2 use std::error::Error;
3
4 use regex_automata::{
5 hybrid::{
6 dfa::{self, DFA},
7 regex::Regex,
8 OverlappingState,
9 },
10 nfa::thompson,
11 HalfMatch, MatchError, MatchKind, MultiMatch,
12 };
13
14 use crate::util::{BunkPrefilter, SubstringPrefilter};
15
16 // Tests that too many cache resets cause the lazy DFA to quit.
17 #[test]
18 fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> {
19 // This is a carefully chosen regex. The idea is to pick one that requires
20 // some decent number of states (hence the bounded repetition). But we
21 // specifically choose to create a class with an ASCII letter and a
22 // non-ASCII letter so that we can check that no new states are created
23 // once the cache is full. Namely, if we fill up the cache on a haystack
24 // of 'a's, then in order to match one 'β', a new state will need to be
25 // created since a 'β' is encoded with multiple bytes. Since there's no
26 // room for this state, the search should quit at the very first position.
27 let pattern = r"[aβ]{100}";
28 let dfa = DFA::builder()
29 .configure(
30 // Configure it so that we have the minimum cache capacity
31 // possible. And that if any resets occur, the search quits.
32 DFA::config()
33 .skip_cache_capacity_check(true)
34 .cache_capacity(0)
35 .minimum_cache_clear_count(Some(0)),
36 )
37 .build(pattern)?;
38 let mut cache = dfa.create_cache();
39
40 let haystack = "a".repeat(101).into_bytes();
41 let err = MatchError::GaveUp { offset: 25 };
42 assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone()));
43 assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone()));
44 assert_eq!(
45 dfa.find_overlapping_fwd(
46 &mut cache,
47 &haystack,
48 &mut OverlappingState::start()
49 ),
50 Err(err.clone())
51 );
52
53 let haystack = "β".repeat(101).into_bytes();
54 let err = MatchError::GaveUp { offset: 0 };
55 assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
56 // no need to test that other find routines quit, since we did that above
57
58 // OK, if we reset the cache, then we should be able to create more states
59 // and make more progress with searching for betas.
60 cache.reset(&dfa);
61 let err = MatchError::GaveUp { offset: 26 };
62 assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
63
64 // ... switching back to ASCII still makes progress since it just needs to
65 // set transitions on existing states!
66 let haystack = "a".repeat(101).into_bytes();
67 let err = MatchError::GaveUp { offset: 13 };
68 assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
69
70 Ok(())
71 }
72
73 // Tests that quit bytes in the forward direction work correctly.
74 #[test]
75 fn quit_fwd() -> Result<(), Box<dyn Error>> {
76 let dfa = DFA::builder()
77 .configure(DFA::config().quit(b'x', true))
78 .build("[[:word:]]+$")?;
79 let mut cache = dfa.create_cache();
80
81 assert_eq!(
82 dfa.find_earliest_fwd(&mut cache, b"abcxyz"),
83 Err(MatchError::Quit { byte: b'x', offset: 3 })
84 );
85 assert_eq!(
86 dfa.find_leftmost_fwd(&mut cache, b"abcxyz"),
87 Err(MatchError::Quit { byte: b'x', offset: 3 })
88 );
89 assert_eq!(
90 dfa.find_overlapping_fwd(
91 &mut cache,
92 b"abcxyz",
93 &mut OverlappingState::start()
94 ),
95 Err(MatchError::Quit { byte: b'x', offset: 3 })
96 );
97
98 Ok(())
99 }
100
101 // Tests that quit bytes in the reverse direction work correctly.
102 #[test]
103 fn quit_rev() -> Result<(), Box<dyn Error>> {
104 let dfa = DFA::builder()
105 .configure(DFA::config().quit(b'x', true))
106 .thompson(thompson::Config::new().reverse(true))
107 .build("^[[:word:]]+")?;
108 let mut cache = dfa.create_cache();
109
110 assert_eq!(
111 dfa.find_earliest_rev(&mut cache, b"abcxyz"),
112 Err(MatchError::Quit { byte: b'x', offset: 3 })
113 );
114 assert_eq!(
115 dfa.find_leftmost_rev(&mut cache, b"abcxyz"),
116 Err(MatchError::Quit { byte: b'x', offset: 3 })
117 );
118
119 Ok(())
120 }
121
122 // Tests that if we heuristically enable Unicode word boundaries but then
123 // instruct that a non-ASCII byte should NOT be a quit byte, then the builder
124 // will panic.
125 #[test]
126 #[should_panic]
127 fn quit_panics() {
128 DFA::config().unicode_word_boundary(true).quit(b'\xFF', false);
129 }
130
131 // This tests an intesting case where even if the Unicode word boundary option
132 // is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
133 // word boundaries to be enabled.
134 #[test]
135 fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
136 let mut config = DFA::config();
137 for b in 0x80..=0xFF {
138 config = config.quit(b, true);
139 }
140 let dfa = DFA::builder().configure(config).build(r"\b")?;
141 let mut cache = dfa.create_cache();
142 let expected = HalfMatch::must(0, 1);
143 assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected)));
144 Ok(())
145 }
146
147 // Tests that we can provide a prefilter to a Regex, and the search reports
148 // correct results.
149 #[test]
150 fn prefilter_works() -> Result<(), Box<dyn Error>> {
151 let mut re = Regex::new(r"a[0-9]+").unwrap();
152 re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a"))));
153 let mut cache = re.create_cache();
154
155 let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
156 let matches: Vec<(usize, usize)> = re
157 .find_leftmost_iter(&mut cache, text)
158 .map(|m| (m.start(), m.end()))
159 .collect();
160 assert_eq!(
161 matches,
162 vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
163 );
164 Ok(())
165 }
166
167 // This test confirms that a prefilter is active by using a prefilter that
168 // reports false negatives.
169 #[test]
170 fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
171 let text = b"za123";
172 let mut re = Regex::new(r"a[0-9]+").unwrap();
173 let mut cache = re.create_cache();
174
175 re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a"))));
176 assert_eq!(
177 re.find_leftmost(&mut cache, b"za123"),
178 Some(MultiMatch::must(0, 1, 5))
179 );
180 assert_eq!(
181 re.find_leftmost(&mut cache, b"a123"),
182 Some(MultiMatch::must(0, 0, 4))
183 );
184 re.set_prefilter(Some(Box::new(BunkPrefilter::new())));
185 assert_eq!(re.find_leftmost(&mut cache, b"za123"), None);
186 // This checks that the prefilter is used when first starting the search,
187 // instead of waiting until at least one transition has occurred.
188 assert_eq!(re.find_leftmost(&mut cache, b"a123"), None);
189 Ok(())
190 }
191 */