]>
Commit | Line | Data |
---|---|---|
487cf647 FG |
1 | use crate::{ |
2 | dfa::{automaton::Automaton, dense, sparse}, | |
3 | util::id::StateID, | |
4 | }; | |
5 | ||
6 | impl<T: AsRef<[u32]>> fst::Automaton for dense::DFA<T> { | |
7 | type State = StateID; | |
8 | ||
9 | #[inline] | |
10 | fn start(&self) -> StateID { | |
11 | self.start_state_forward(None, &[], 0, 0) | |
12 | } | |
13 | ||
14 | #[inline] | |
15 | fn is_match(&self, state: &StateID) -> bool { | |
16 | self.is_match_state(*state) | |
17 | } | |
18 | ||
19 | #[inline] | |
20 | fn accept(&self, state: &StateID, byte: u8) -> StateID { | |
21 | if fst::Automaton::is_match(self, state) { | |
22 | return *state; | |
23 | } | |
24 | self.next_state(*state, byte) | |
25 | } | |
26 | ||
27 | #[inline] | |
28 | fn accept_eof(&self, state: &StateID) -> Option<StateID> { | |
29 | if fst::Automaton::is_match(self, state) { | |
30 | return Some(*state); | |
31 | } | |
32 | Some(self.next_eoi_state(*state)) | |
33 | } | |
34 | ||
35 | #[inline] | |
36 | fn can_match(&self, state: &StateID) -> bool { | |
37 | !self.is_dead_state(*state) | |
38 | } | |
39 | } | |
40 | ||
41 | impl<T: AsRef<[u8]>> fst::Automaton for sparse::DFA<T> { | |
42 | type State = StateID; | |
43 | ||
44 | #[inline] | |
45 | fn start(&self) -> StateID { | |
46 | self.start_state_forward(None, &[], 0, 0) | |
47 | } | |
48 | ||
49 | #[inline] | |
50 | fn is_match(&self, state: &StateID) -> bool { | |
51 | self.is_match_state(*state) | |
52 | } | |
53 | ||
54 | #[inline] | |
55 | fn accept(&self, state: &StateID, byte: u8) -> StateID { | |
56 | if fst::Automaton::is_match(self, state) { | |
57 | return *state; | |
58 | } | |
59 | self.next_state(*state, byte) | |
60 | } | |
61 | ||
62 | #[inline] | |
63 | fn accept_eof(&self, state: &StateID) -> Option<StateID> { | |
64 | if fst::Automaton::is_match(self, state) { | |
65 | return Some(*state); | |
66 | } | |
67 | Some(self.next_eoi_state(*state)) | |
68 | } | |
69 | ||
70 | #[inline] | |
71 | fn can_match(&self, state: &StateID) -> bool { | |
72 | !self.is_dead_state(*state) | |
73 | } | |
74 | } | |
75 | ||
76 | #[cfg(test)] | |
77 | mod tests { | |
78 | use bstr::BString; | |
79 | use fst::{Automaton, IntoStreamer, Set, Streamer}; | |
80 | ||
81 | use crate::dfa::{dense, sparse}; | |
82 | ||
83 | fn search<A: Automaton, D: AsRef<[u8]>>( | |
84 | set: &Set<D>, | |
85 | aut: A, | |
86 | ) -> Vec<BString> { | |
87 | let mut stream = set.search(aut).into_stream(); | |
88 | ||
89 | let mut results = vec![]; | |
90 | while let Some(key) = stream.next() { | |
91 | results.push(BString::from(key)); | |
92 | } | |
93 | results | |
94 | } | |
95 | ||
96 | #[test] | |
97 | fn dense_anywhere() { | |
98 | let set = | |
99 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
100 | .unwrap(); | |
101 | let dfa = dense::DFA::new("ba.*").unwrap(); | |
102 | let got = search(&set, &dfa); | |
103 | assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]); | |
104 | } | |
105 | ||
106 | #[test] | |
107 | fn dense_anchored() { | |
108 | let set = | |
109 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
110 | .unwrap(); | |
111 | let dfa = dense::Builder::new() | |
112 | .configure(dense::Config::new().anchored(true)) | |
113 | .build("ba.*") | |
114 | .unwrap(); | |
115 | let got = search(&set, &dfa); | |
116 | assert_eq!(got, vec!["bar", "baz"]); | |
117 | } | |
118 | ||
119 | #[test] | |
120 | fn dense_assertions_start() { | |
121 | let set = | |
122 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
123 | .unwrap(); | |
124 | let dfa = dense::Builder::new().build("^ba.*").unwrap(); | |
125 | let got = search(&set, &dfa); | |
126 | assert_eq!(got, vec!["bar", "baz"]); | |
127 | } | |
128 | ||
129 | #[test] | |
130 | fn dense_assertions_end() { | |
131 | let set = | |
132 | Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"]) | |
133 | .unwrap(); | |
134 | let dfa = dense::Builder::new().build(".*x$").unwrap(); | |
135 | let got = search(&set, &dfa); | |
136 | assert_eq!(got, vec!["bax", "xbax"]); | |
137 | } | |
138 | ||
139 | #[test] | |
140 | fn dense_assertions_word() { | |
141 | let set = | |
142 | Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap(); | |
143 | let dfa = dense::Builder::new().build(r"(?-u)\bfoo\b").unwrap(); | |
144 | let got = search(&set, &dfa); | |
145 | assert_eq!(got, vec!["foo", "zzz foo zzz"]); | |
146 | } | |
147 | ||
148 | #[test] | |
149 | fn sparse_anywhere() { | |
150 | let set = | |
151 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
152 | .unwrap(); | |
153 | let dfa = sparse::DFA::new("ba.*").unwrap(); | |
154 | let got = search(&set, &dfa); | |
155 | assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]); | |
156 | } | |
157 | ||
158 | #[test] | |
159 | fn sparse_anchored() { | |
160 | let set = | |
161 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
162 | .unwrap(); | |
163 | let dfa = dense::Builder::new() | |
164 | .configure(dense::Config::new().anchored(true)) | |
165 | .build("ba.*") | |
166 | .unwrap() | |
167 | .to_sparse() | |
168 | .unwrap(); | |
169 | let got = search(&set, &dfa); | |
170 | assert_eq!(got, vec!["bar", "baz"]); | |
171 | } | |
172 | ||
173 | #[test] | |
174 | fn sparse_assertions_start() { | |
175 | let set = | |
176 | Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) | |
177 | .unwrap(); | |
178 | let dfa = | |
179 | dense::Builder::new().build("^ba.*").unwrap().to_sparse().unwrap(); | |
180 | let got = search(&set, &dfa); | |
181 | assert_eq!(got, vec!["bar", "baz"]); | |
182 | } | |
183 | ||
184 | #[test] | |
185 | fn sparse_assertions_end() { | |
186 | let set = | |
187 | Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"]) | |
188 | .unwrap(); | |
189 | let dfa = | |
190 | dense::Builder::new().build(".*x$").unwrap().to_sparse().unwrap(); | |
191 | let got = search(&set, &dfa); | |
192 | assert_eq!(got, vec!["bax", "xbax"]); | |
193 | } | |
194 | ||
195 | #[test] | |
196 | fn sparse_assertions_word() { | |
197 | let set = | |
198 | Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap(); | |
199 | let dfa = dense::Builder::new() | |
200 | .build(r"(?-u)\bfoo\b") | |
201 | .unwrap() | |
202 | .to_sparse() | |
203 | .unwrap(); | |
204 | let got = search(&set, &dfa); | |
205 | assert_eq!(got, vec!["foo", "zzz foo zzz"]); | |
206 | } | |
207 | } |