3 use classes
::ByteClasses
;
4 pub use nfa
::compiler
::Builder
;
10 /// The representation for an NFA state identifier.
11 pub type StateID
= usize;
13 /// A final compiled NFA.
15 /// The states of the NFA are indexed by state IDs, which are how transitions
19 /// Whether this NFA can only match at the beginning of input or not.
21 /// When true, a match should only be reported if it begins at the 0th
22 /// index of the haystack.
24 /// The starting state of this NFA.
26 /// The state list. This list is guaranteed to be indexable by the starting
27 /// state ID, and it is also guaranteed to contain exactly one `Match`
30 /// A mapping from any byte value to its corresponding equivalence class
31 /// identifier. Two bytes in the same equivalence class cannot discriminate
32 /// between a match or a non-match. This map can be used to shrink the
33 /// total size of a DFA's transition table with a small match-time cost.
35 /// Note that the NFA's transitions are *not* defined in terms of these
36 /// equivalence classes. The NFA's transitions are defined on the original
37 /// byte values. For the most part, this is because they wouldn't really
38 /// help the NFA much since the NFA already uses a sparse representation
39 /// to represent transitions. Byte classes are most effective in a dense
41 byte_classes
: ByteClasses
,
45 /// Returns an NFA that always matches at every position.
46 pub fn always_match() -> NFA
{
50 states
: vec
![State
::Match
],
51 byte_classes
: ByteClasses
::empty(),
55 /// Returns an NFA that never matches at any position.
56 pub fn never_match() -> NFA
{
60 states
: vec
![State
::Fail
],
61 byte_classes
: ByteClasses
::empty(),
65 /// Returns true if and only if this NFA is anchored.
66 pub fn is_anchored(&self) -> bool
{
70 /// Return the number of states in this NFA.
71 pub fn len(&self) -> usize {
75 /// Return the ID of the initial state of this NFA.
76 pub fn start(&self) -> StateID
{
80 /// Return the NFA state corresponding to the given ID.
81 pub fn state(&self, id
: StateID
) -> &State
{
85 /// Return the set of equivalence classes for this NFA. The slice returned
86 /// always has length 256 and maps each possible byte value to its
87 /// corresponding equivalence class ID (which is never more than 255).
88 pub fn byte_classes(&self) -> &ByteClasses
{
93 impl fmt
::Debug
for NFA
{
94 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
95 for (i
, state
) in self.states
.iter().enumerate() {
96 let status
= if i
== self.start { '>' }
else { ' ' }
;
97 writeln
!(f
, "{}{:06}: {:?}", status
, i
, state
)?
;
103 /// A state in a final compiled NFA.
104 #[derive(Clone, Eq, PartialEq)]
106 /// A state that transitions to `next` if and only if the current input
107 /// byte is in the range `[start, end]` (inclusive).
109 /// This is a special case of Sparse in that it encodes only one transition
110 /// (and therefore avoids the allocation).
111 Range { range: Transition }
,
112 /// A state with possibly many transitions, represented in a sparse
113 /// fashion. Transitions are ordered lexicographically by input range.
114 /// As such, this may only be used when every transition has equal
115 /// priority. (In practice, this is only used for encoding large UTF-8
117 Sparse { ranges: Box<[Transition]> }
,
118 /// An alternation such that there exists an epsilon transition to all
119 /// states in `alternates`, where matches found via earlier transitions
120 /// are preferred over later transitions.
121 Union { alternates: Box<[StateID]> }
,
122 /// A fail state. When encountered, the automaton is guaranteed to never
123 /// reach a match state.
125 /// A match state. There is exactly one such occurrence of this state in
130 /// A transition to another state, only if the given byte falls in the
131 /// inclusive range specified.
132 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
133 pub struct Transition
{
140 /// Returns true if and only if this state contains one or more epsilon
142 pub fn is_epsilon(&self) -> bool
{
145 | State
::Sparse { .. }
147 | State
::Match
=> false,
148 State
::Union { .. }
=> true,
152 /// Remap the transitions in this state using the given map. Namely, the
153 /// given map should be indexed according to the transitions currently
156 /// This is used during the final phase of the NFA compiler, which turns
157 /// its intermediate NFA into the final NFA.
158 fn remap(&mut self, remap
: &[StateID
]) {
160 State
::Range { ref mut range }
=> range
.next
= remap
[range
.next
],
161 State
::Sparse { ref mut ranges }
=> {
162 for r
in ranges
.iter_mut() {
163 r
.next
= remap
[r
.next
];
166 State
::Union { ref mut alternates }
=> {
167 for alt
in alternates
.iter_mut() {
177 impl fmt
::Debug
for State
{
178 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
180 State
::Range { ref range }
=> range
.fmt(f
),
181 State
::Sparse { ref ranges }
=> {
184 .map(|t
| format
!("{:?}", t
))
185 .collect
::<Vec
<String
>>()
187 write
!(f
, "sparse({})", rs
)
189 State
::Union { ref alternates }
=> {
190 let alts
= alternates
192 .map(|id
| format
!("{}", id
))
193 .collect
::<Vec
<String
>>()
195 write
!(f
, "alt({})", alts
)
197 State
::Fail
=> write
!(f
, "FAIL"),
198 State
::Match
=> write
!(f
, "MATCH"),
203 impl fmt
::Debug
for Transition
{
204 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
205 let Transition { start, end, next }
= *self;
206 if self.start
== self.end
{
207 write
!(f
, "{} => {}", escape(start
), next
)
209 write
!(f
, "{}-{} => {}", escape(start
), escape(end
), next
)
214 /// Return the given byte as its escaped string form.
215 fn escape(b
: u8) -> String
{
218 String
::from_utf8(ascii
::escape_default(b
).collect
::<Vec
<_
>>()).unwrap()
229 let nfa
= NFA
::always_match();
230 let dfa
= dense
::Builder
::new().build_from_nfa
::<usize>(&nfa
).unwrap();
232 assert_eq
!(Some(0), dfa
.find_at(b
"", 0));
233 assert_eq
!(Some(0), dfa
.find_at(b
"a", 0));
234 assert_eq
!(Some(1), dfa
.find_at(b
"a", 1));
235 assert_eq
!(Some(0), dfa
.find_at(b
"ab", 0));
236 assert_eq
!(Some(1), dfa
.find_at(b
"ab", 1));
237 assert_eq
!(Some(2), dfa
.find_at(b
"ab", 2));
242 let nfa
= NFA
::never_match();
243 let dfa
= dense
::Builder
::new().build_from_nfa
::<usize>(&nfa
).unwrap();
245 assert_eq
!(None
, dfa
.find_at(b
"", 0));
246 assert_eq
!(None
, dfa
.find_at(b
"a", 0));
247 assert_eq
!(None
, dfa
.find_at(b
"a", 1));
248 assert_eq
!(None
, dfa
.find_at(b
"ab", 0));
249 assert_eq
!(None
, dfa
.find_at(b
"ab", 1));
250 assert_eq
!(None
, dfa
.find_at(b
"ab", 2));