vendor/regex-automata-0.2.0/tests/dfa/suite.rs

   1 use regex_automata::{
   2     dfa::{self, dense, regex::Regex, sparse, Automaton},
   3     nfa::thompson,
   4     MatchKind, SyntaxConfig,
   5 };
   6 use regex_syntax as syntax;
   7
   8 use regex_test::{
   9     bstr::{BString, ByteSlice},
  10     CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
  11     SearchKind as TestSearchKind, TestResult, TestRunner,
  12 };
  13
  14 use crate::{suite, Result};
  15
  16 /// Runs the test suite with the default configuration.
  17 #[test]
  18 fn unminimized_default() -> Result<()> {
  19     let builder = Regex::builder();
  20     TestRunner::new()?
  21         .test_iter(suite()?.iter(), dense_compiler(builder))
  22         .assert();
  23     Ok(())
  24 }
  25
  26 /// Runs the test suite with byte classes disabled.
  27 #[test]
  28 fn unminimized_no_byte_class() -> Result<()> {
  29     let mut builder = Regex::builder();
  30     builder.dense(dense::Config::new().byte_classes(false));
  31
  32     TestRunner::new()?
  33         .test_iter(suite()?.iter(), dense_compiler(builder))
  34         .assert();
  35     Ok(())
  36 }
  37
  38 /// Runs the test suite with NFA shrinking disabled.
  39 #[test]
  40 fn unminimized_no_nfa_shrink() -> Result<()> {
  41     let mut builder = Regex::builder();
  42     builder.thompson(thompson::Config::new().shrink(false));
  43
  44     TestRunner::new()?
  45         .test_iter(suite()?.iter(), dense_compiler(builder))
  46         .assert();
  47     Ok(())
  48 }
  49
  50 /// Runs the test suite on a minimized DFA with an otherwise default
  51 /// configuration.
  52 #[test]
  53 fn minimized_default() -> Result<()> {
  54     let mut builder = Regex::builder();
  55     builder.dense(dense::Config::new().minimize(true));
  56     TestRunner::new()?
  57         // These regexes tend to be too big. Minimization takes... forever.
  58         .blacklist("expensive")
  59         .test_iter(suite()?.iter(), dense_compiler(builder))
  60         .assert();
  61     Ok(())
  62 }
  63
  64 /// Runs the test suite on a minimized DFA with byte classes disabled.
  65 #[test]
  66 fn minimized_no_byte_class() -> Result<()> {
  67     let mut builder = Regex::builder();
  68     builder.dense(dense::Config::new().minimize(true).byte_classes(false));
  69
  70     TestRunner::new()?
  71         // These regexes tend to be too big. Minimization takes... forever.
  72         .blacklist("expensive")
  73         .test_iter(suite()?.iter(), dense_compiler(builder))
  74         .assert();
  75     Ok(())
  76 }
  77
  78 /// Runs the test suite on a sparse unminimized DFA.
  79 #[test]
  80 fn sparse_unminimized_default() -> Result<()> {
  81     let builder = Regex::builder();
  82     TestRunner::new()?
  83         .test_iter(suite()?.iter(), sparse_compiler(builder))
  84         .assert();
  85     Ok(())
  86 }
  87
  88 /// Another basic sanity test that checks we can serialize and then deserialize
  89 /// a regex, and that the resulting regex can be used for searching correctly.
  90 #[test]
  91 fn serialization_unminimized_default() -> Result<()> {
  92     let builder = Regex::builder();
  93     let my_compiler = |builder| {
  94         compiler(builder, |builder, re| {
  95             let builder = builder.clone();
  96             let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
  97             let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
  98             Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
  99                 let fwd: dense::DFA<&[u32]> =
 100                     dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
 101                 let rev: dense::DFA<&[u32]> =
 102                     dense::DFA::from_bytes(&rev_bytes).unwrap().0;
 103                 let re = builder.build_from_dfas(fwd, rev);
 104
 105                 run_test(&re, test)
 106             }))
 107         })
 108     };
 109     TestRunner::new()?
 110         .test_iter(suite()?.iter(), my_compiler(builder))
 111         .assert();
 112     Ok(())
 113 }
 114
 115 /// A basic sanity test that checks we can serialize and then deserialize a
 116 /// regex using sparse DFAs, and that the resulting regex can be used for
 117 /// searching correctly.
 118 #[test]
 119 fn sparse_serialization_unminimized_default() -> Result<()> {
 120     let builder = Regex::builder();
 121     let my_compiler = |builder| {
 122         compiler(builder, |builder, re| {
 123             let builder = builder.clone();
 124             let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
 125             let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
 126             Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
 127                 let fwd: sparse::DFA<&[u8]> =
 128                     sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
 129                 let rev: sparse::DFA<&[u8]> =
 130                     sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
 131                 let re = builder.build_from_dfas(fwd, rev);
 132                 run_test(&re, test)
 133             }))
 134         })
 135     };
 136     TestRunner::new()?
 137         .test_iter(suite()?.iter(), my_compiler(builder))
 138         .assert();
 139     Ok(())
 140 }
 141
 142 fn dense_compiler(
 143     builder: dfa::regex::Builder,
 144 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
 145     compiler(builder, |_, re| {
 146         Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
 147             run_test(&re, test)
 148         }))
 149     })
 150 }
 151
 152 fn sparse_compiler(
 153     builder: dfa::regex::Builder,
 154 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
 155     compiler(builder, |builder, re| {
 156         let fwd = re.forward().to_sparse()?;
 157         let rev = re.reverse().to_sparse()?;
 158         let re = builder.build_from_dfas(fwd, rev);
 159         Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
 160             run_test(&re, test)
 161         }))
 162     })
 163 }
 164
 165 fn compiler(
 166     mut builder: dfa::regex::Builder,
 167     mut create_matcher: impl FnMut(
 168         &dfa::regex::Builder,
 169         Regex,
 170     ) -> Result<CompiledRegex>,
 171 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
 172     move |test, regexes| {
 173         let regexes = regexes
 174             .iter()
 175             .map(|r| r.to_str().map(|s| s.to_string()))
 176             .collect::<std::result::Result<Vec<String>, _>>()?;
 177
 178         // Check if our regex contains things that aren't supported by DFAs.
 179         // That is, Unicode word boundaries when searching non-ASCII text.
 180         let mut thompson = thompson::Builder::new();
 181         thompson.configure(config_thompson(test));
 182         // TODO: Modify Hir to report facts like this, instead of needing to
 183         // build an NFA to do it.
 184         if let Ok(nfa) = thompson.build_many(&regexes) {
 185             let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
 186             if nfa.has_word_boundary_unicode() && non_ascii {
 187                 return Ok(CompiledRegex::skip());
 188             }
 189         }
 190         if !configure_regex_builder(test, &mut builder) {
 191             return Ok(CompiledRegex::skip());
 192         }
 193         create_matcher(&builder, builder.build_many(&regexes)?)
 194     }
 195 }
 196
 197 fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
 198     let is_match = if re.is_match(test.input()) {
 199         TestResult::matched()
 200     } else {
 201         TestResult::no_match()
 202     };
 203     let is_match = is_match.name("is_match");
 204
 205     let find_matches = match test.search_kind() {
 206         TestSearchKind::Earliest => {
 207             let it = re
 208                 .find_earliest_iter(test.input())
 209                 .take(test.match_limit().unwrap_or(std::usize::MAX))
 210                 .map(|m| Match {
 211                     id: m.pattern().as_usize(),
 212                     start: m.start(),
 213                     end: m.end(),
 214                 });
 215             TestResult::matches(it).name("find_earliest_iter")
 216         }
 217         TestSearchKind::Leftmost => {
 218             let it = re
 219                 .find_leftmost_iter(test.input())
 220                 .take(test.match_limit().unwrap_or(std::usize::MAX))
 221                 .map(|m| Match {
 222                     id: m.pattern().as_usize(),
 223                     start: m.start(),
 224                     end: m.end(),
 225                 });
 226             TestResult::matches(it).name("find_leftmost_iter")
 227         }
 228         TestSearchKind::Overlapping => {
 229             let it = re
 230                 .find_overlapping_iter(test.input())
 231                 .take(test.match_limit().unwrap_or(std::usize::MAX))
 232                 .map(|m| Match {
 233                     id: m.pattern().as_usize(),
 234                     start: m.start(),
 235                     end: m.end(),
 236                 });
 237             TestResult::matches(it).name("find_overlapping_iter")
 238         }
 239     };
 240
 241     vec![is_match, find_matches]
 242 }
 243
 244 /// Configures the given regex builder with all relevant settings on the given
 245 /// regex test.
 246 ///
 247 /// If the regex test has a setting that is unsupported, then this returns
 248 /// false (implying the test should be skipped).
 249 fn configure_regex_builder(
 250     test: &RegexTest,
 251     builder: &mut dfa::regex::Builder,
 252 ) -> bool {
 253     let match_kind = match test.match_kind() {
 254         TestMatchKind::All => MatchKind::All,
 255         TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
 256         TestMatchKind::LeftmostLongest => return false,
 257     };
 258
 259     let syntax_config = SyntaxConfig::new()
 260         .case_insensitive(test.case_insensitive())
 261         .unicode(test.unicode())
 262         .utf8(test.utf8());
 263     let dense_config = dense::Config::new()
 264         .anchored(test.anchored())
 265         .match_kind(match_kind)
 266         .unicode_word_boundary(true);
 267     let regex_config = Regex::config().utf8(test.utf8());
 268
 269     builder
 270         .configure(regex_config)
 271         .syntax(syntax_config)
 272         .thompson(config_thompson(test))
 273         .dense(dense_config);
 274     true
 275 }
 276
 277 /// Configuration of a Thompson NFA compiler from a regex test.
 278 fn config_thompson(test: &RegexTest) -> thompson::Config {
 279     thompson::Config::new().utf8(test.utf8())
 280 }