]>
Commit | Line | Data |
---|---|---|
94b46f34 XL |
1 | mat!(ascii_literal, r"a", "a", Some((0, 1))); |
2 | ||
3 | // Some crazy expressions from regular-expressions.info. | |
f9f354fc XL |
4 | mat!( |
5 | match_ranges, | |
6 | r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", | |
7 | "num: 255", | |
8 | Some((5, 8)) | |
9 | ); | |
10 | mat!( | |
11 | match_ranges_not, | |
12 | r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", | |
13 | "num: 256", | |
14 | None | |
15 | ); | |
94b46f34 XL |
16 | mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); |
17 | mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); | |
18 | mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); | |
19 | mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); | |
f9f354fc XL |
20 | mat!( |
21 | match_email, | |
22 | r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", | |
23 | "mine is jam.slam@gmail.com ", | |
24 | Some((8, 26)) | |
25 | ); | |
26 | mat!( | |
27 | match_email_not, | |
28 | r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", | |
29 | "mine is jam.slam@gmail ", | |
30 | None | |
31 | ); | |
32 | mat!( | |
33 | match_email_big, | |
34 | r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", | |
35 | "mine is jam.slam@gmail.com ", | |
36 | Some((8, 26)) | |
37 | ); | |
38 | mat!( | |
39 | match_date1, | |
40 | r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", | |
41 | "1900-01-01", | |
42 | Some((0, 10)) | |
43 | ); | |
44 | mat!( | |
45 | match_date2, | |
46 | r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", | |
47 | "1900-00-01", | |
48 | None | |
49 | ); | |
50 | mat!( | |
51 | match_date3, | |
52 | r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", | |
53 | "1900-13-01", | |
54 | None | |
55 | ); | |
94b46f34 XL |
56 | |
57 | // Do some crazy dancing with the start/end assertions. | |
58 | matiter!(match_start_end_empty, r"^$", "", (0, 0)); | |
59 | matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); | |
60 | matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); | |
61 | matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); | |
f9f354fc XL |
62 | matiter!( |
63 | match_start_end_empty_rep, | |
64 | r"(?:^$)*", | |
65 | "a\nb\nc", | |
66 | (0, 0), | |
67 | (1, 1), | |
68 | (2, 2), | |
69 | (3, 3), | |
70 | (4, 4), | |
71 | (5, 5) | |
72 | ); | |
73 | matiter!( | |
74 | match_start_end_empty_rep_rev, | |
75 | r"(?:$^)*", | |
76 | "a\nb\nc", | |
77 | (0, 0), | |
78 | (1, 1), | |
79 | (2, 2), | |
80 | (3, 3), | |
81 | (4, 4), | |
82 | (5, 5) | |
83 | ); | |
94b46f34 XL |
84 | |
85 | // Test negated character classes. | |
86 | mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); | |
87 | mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); | |
f9f354fc | 88 | mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); |
94b46f34 | 89 | mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); |
f9f354fc XL |
90 | mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); |
91 | mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); | |
92 | mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); | |
94b46f34 XL |
93 | mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); |
94 | ||
95 | // Test that repeated empty expressions don't loop forever. | |
96 | mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); | |
97 | mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); | |
98 | mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); | |
99 | mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); | |
100 | mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); | |
101 | mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); | |
102 | mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); | |
103 | mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); | |
104 | mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); | |
105 | mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); | |
106 | mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); | |
107 | mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); | |
108 | ||
109 | // Test that we handle various flavors of empty expressions. | |
110 | matiter!(match_empty1, r"", "", (0, 0)); | |
111 | matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
112 | matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
113 | matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
114 | matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
115 | matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
116 | matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
117 | matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
118 | matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
119 | matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
120 | matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); | |
f035d41b XL |
121 | matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
122 | matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); | |
123 | matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
124 | matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
125 | matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
126 | matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
127 | matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
128 | matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
129 | matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); | |
130 | matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
131 | matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); | |
132 | matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); | |
94b46f34 XL |
133 | |
134 | // Test that the DFA can handle pathological cases. | |
135 | // (This should result in the DFA's cache being flushed too frequently, which | |
136 | // should cause it to quit and fall back to the NFA algorithm.) | |
137 | #[test] | |
138 | fn dfa_handles_pathological_case() { | |
139 | fn ones_and_zeroes(count: usize) -> String { | |
17df50a5 | 140 | use rand::{thread_rng, Rng}; |
94b46f34 | 141 | |
17df50a5 | 142 | let mut rng = thread_rng(); |
94b46f34 XL |
143 | let mut s = String::new(); |
144 | for _ in 0..count { | |
145 | if rng.gen() { | |
146 | s.push('1'); | |
147 | } else { | |
148 | s.push('0'); | |
149 | } | |
150 | } | |
151 | s | |
152 | } | |
153 | ||
154 | let re = regex!(r"[01]*1[01]{20}$"); | |
155 | let text = { | |
156 | let mut pieces = ones_and_zeroes(100_000); | |
157 | pieces.push('1'); | |
158 | pieces.push_str(&ones_and_zeroes(20)); | |
159 | pieces | |
160 | }; | |
161 | assert!(re.is_match(text!(&*text))); | |
162 | } | |
163 | ||
164 | #[test] | |
165 | fn nest_limit_makes_it_parse() { | |
166 | use regex::RegexBuilder; | |
167 | ||
168 | RegexBuilder::new( | |
f9f354fc | 169 | r#"(?-u) |
94b46f34 XL |
170 | 2(?: |
171 | [45]\d{3}| | |
172 | 7(?: | |
173 | 1[0-267]| | |
174 | 2[0-289]| | |
175 | 3[0-29]| | |
176 | 4[01]| | |
177 | 5[1-3]| | |
178 | 6[013]| | |
179 | 7[0178]| | |
180 | 91 | |
181 | )| | |
182 | 8(?: | |
183 | 0[125]| | |
184 | [139][1-6]| | |
185 | 2[0157-9]| | |
186 | 41| | |
187 | 6[1-35]| | |
188 | 7[1-5]| | |
189 | 8[1-8]| | |
190 | 90 | |
191 | )| | |
192 | 9(?: | |
193 | 0[0-2]| | |
194 | 1[0-4]| | |
195 | 2[568]| | |
196 | 3[3-6]| | |
197 | 5[5-7]| | |
198 | 6[0167]| | |
199 | 7[15]| | |
200 | 8[0146-9] | |
201 | ) | |
202 | )\d{4}| | |
203 | 3(?: | |
204 | 12?[5-7]\d{2}| | |
205 | 0(?: | |
206 | 2(?: | |
207 | [025-79]\d| | |
208 | [348]\d{1,2} | |
209 | )| | |
210 | 3(?: | |
211 | [2-4]\d| | |
212 | [56]\d? | |
213 | ) | |
214 | )| | |
215 | 2(?: | |
216 | 1\d{2}| | |
217 | 2(?: | |
218 | [12]\d| | |
219 | [35]\d{1,2}| | |
220 | 4\d? | |
221 | ) | |
222 | )| | |
223 | 3(?: | |
224 | 1\d{2}| | |
225 | 2(?: | |
226 | [2356]\d| | |
227 | 4\d{1,2} | |
228 | ) | |
229 | )| | |
230 | 4(?: | |
231 | 1\d{2}| | |
232 | 2(?: | |
233 | 2\d{1,2}| | |
234 | [47]| | |
235 | 5\d{2} | |
236 | ) | |
237 | )| | |
238 | 5(?: | |
239 | 1\d{2}| | |
240 | 29 | |
241 | )| | |
242 | [67]1\d{2}| | |
243 | 8(?: | |
244 | 1\d{2}| | |
245 | 2(?: | |
246 | 2\d{2}| | |
247 | 3| | |
248 | 4\d | |
249 | ) | |
250 | ) | |
251 | )\d{3}| | |
252 | 4(?: | |
253 | 0(?: | |
254 | 2(?: | |
255 | [09]\d| | |
256 | 7 | |
257 | )| | |
258 | 33\d{2} | |
259 | )| | |
260 | 1\d{3}| | |
261 | 2(?: | |
262 | 1\d{2}| | |
263 | 2(?: | |
264 | [25]\d?| | |
265 | [348]\d| | |
266 | [67]\d{1,2} | |
267 | ) | |
268 | )| | |
269 | 3(?: | |
270 | 1\d{2}(?: | |
271 | \d{2} | |
272 | )?| | |
273 | 2(?: | |
274 | [045]\d| | |
275 | [236-9]\d{1,2} | |
276 | )| | |
277 | 32\d{2} | |
278 | )| | |
279 | 4(?: | |
280 | [18]\d{2}| | |
281 | 2(?: | |
282 | [2-46]\d{2}| | |
283 | 3 | |
284 | )| | |
285 | 5[25]\d{2} | |
286 | )| | |
287 | 5(?: | |
288 | 1\d{2}| | |
289 | 2(?: | |
290 | 3\d| | |
291 | 5 | |
292 | ) | |
293 | )| | |
294 | 6(?: | |
295 | [18]\d{2}| | |
296 | 2(?: | |
297 | 3(?: | |
298 | \d{2} | |
299 | )?| | |
300 | [46]\d{1,2}| | |
301 | 5\d{2}| | |
302 | 7\d | |
303 | )| | |
304 | 5(?: | |
305 | 3\d?| | |
306 | 4\d| | |
307 | [57]\d{1,2}| | |
308 | 6\d{2}| | |
309 | 8 | |
310 | ) | |
311 | )| | |
312 | 71\d{2}| | |
313 | 8(?: | |
314 | [18]\d{2}| | |
315 | 23\d{2}| | |
316 | 54\d{2} | |
317 | )| | |
318 | 9(?: | |
319 | [18]\d{2}| | |
320 | 2[2-5]\d{2}| | |
321 | 53\d{1,2} | |
322 | ) | |
323 | )\d{3}| | |
324 | 5(?: | |
325 | 02[03489]\d{2}| | |
326 | 1\d{2}| | |
327 | 2(?: | |
328 | 1\d{2}| | |
329 | 2(?: | |
330 | 2(?: | |
331 | \d{2} | |
332 | )?| | |
333 | [457]\d{2} | |
334 | ) | |
335 | )| | |
336 | 3(?: | |
337 | 1\d{2}| | |
338 | 2(?: | |
339 | [37](?: | |
340 | \d{2} | |
341 | )?| | |
342 | [569]\d{2} | |
343 | ) | |
344 | )| | |
345 | 4(?: | |
346 | 1\d{2}| | |
347 | 2[46]\d{2} | |
348 | )| | |
349 | 5(?: | |
350 | 1\d{2}| | |
351 | 26\d{1,2} | |
352 | )| | |
353 | 6(?: | |
354 | [18]\d{2}| | |
355 | 2| | |
356 | 53\d{2} | |
357 | )| | |
358 | 7(?: | |
359 | 1| | |
360 | 24 | |
361 | )\d{2}| | |
362 | 8(?: | |
363 | 1| | |
364 | 26 | |
365 | )\d{2}| | |
366 | 91\d{2} | |
367 | )\d{3}| | |
368 | 6(?: | |
369 | 0(?: | |
370 | 1\d{2}| | |
371 | 2(?: | |
372 | 3\d{2}| | |
373 | 4\d{1,2} | |
374 | ) | |
375 | )| | |
376 | 2(?: | |
377 | 2[2-5]\d{2}| | |
378 | 5(?: | |
379 | [3-5]\d{2}| | |
380 | 7 | |
381 | )| | |
382 | 8\d{2} | |
383 | )| | |
384 | 3(?: | |
385 | 1| | |
386 | 2[3478] | |
387 | )\d{2}| | |
388 | 4(?: | |
389 | 1| | |
390 | 2[34] | |
391 | )\d{2}| | |
392 | 5(?: | |
393 | 1| | |
394 | 2[47] | |
395 | )\d{2}| | |
396 | 6(?: | |
397 | [18]\d{2}| | |
398 | 6(?: | |
399 | 2(?: | |
400 | 2\d| | |
401 | [34]\d{2} | |
402 | )| | |
403 | 5(?: | |
404 | [24]\d{2}| | |
405 | 3\d| | |
406 | 5\d{1,2} | |
407 | ) | |
408 | ) | |
409 | )| | |
410 | 72[2-5]\d{2}| | |
411 | 8(?: | |
412 | 1\d{2}| | |
413 | 2[2-5]\d{2} | |
414 | )| | |
415 | 9(?: | |
416 | 1\d{2}| | |
417 | 2[2-6]\d{2} | |
418 | ) | |
419 | )\d{3}| | |
420 | 7(?: | |
421 | (?: | |
422 | 02| | |
423 | [3-589]1| | |
424 | 6[12]| | |
425 | 72[24] | |
426 | )\d{2}| | |
427 | 21\d{3}| | |
428 | 32 | |
429 | )\d{3}| | |
430 | 8(?: | |
431 | (?: | |
432 | 4[12]| | |
433 | [5-7]2| | |
434 | 1\d? | |
435 | )| | |
436 | (?: | |
437 | 0| | |
438 | 3[12]| | |
439 | [5-7]1| | |
440 | 217 | |
441 | )\d | |
442 | )\d{4}| | |
443 | 9(?: | |
444 | [35]1| | |
445 | (?: | |
446 | [024]2| | |
447 | 81 | |
448 | )\d| | |
449 | (?: | |
450 | 1| | |
451 | [24]1 | |
452 | )\d{2} | |
453 | )\d{3} | |
f9f354fc | 454 | "#, |
94b46f34 XL |
455 | ) |
456 | .build() | |
457 | .unwrap(); | |
458 | } |