]> git.proxmox.com Git - rustc.git/blob - vendor/regex/tests/unicode.rs
New upstream version 1.32.0~beta.2+dfsg1
[rustc.git] / vendor / regex / tests / unicode.rs
1 mat!(uni_literal, r"☃", "☃", Some((0, 3)));
2 mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3)));
3 mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3)));
4 mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3)));
5 mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)));
6 mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)));
7 mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)));
8 mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)));
9 mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)));
10 mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)));
11 mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)));
12 mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)));
13 mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)));
14 mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)));
15
16 // Test the Unicode friendliness of Perl character classes.
17 mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)));
18 mat!(uni_perl_w_not, r"\w+", "⥡", None);
19 mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3)));
20 mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)));
21 mat!(uni_perl_d_not, r"\d+", "Ⅱ", None);
22 mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)));
23 mat!(uni_perl_s, r"\s+", " ", Some((0, 3)));
24 mat!(uni_perl_s_not, r"\s+", "☃", None);
25 mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)));
26
27 // And do the same for word boundaries.
28 mat!(uni_boundary_none, r"\d\b", "6δ", None);
29 mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
30 mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
31 mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
32
33 // Test general categories.
34 //
35 // We should test more, but there's a lot. Write a script to generate more of
36 // these tests.
37 mat!(uni_class_gencat_cased_letter,
38 r"\p{Cased_Letter}", "A", Some((0, 3)));
39 mat!(uni_class_gencat_close_punctuation,
40 r"\p{Close_Punctuation}", "❯", Some((0, 3)));
41 mat!(uni_class_gencat_connector_punctuation,
42 r"\p{Connector_Punctuation}", "⁀", Some((0, 3)));
43 mat!(uni_class_gencat_control,
44 r"\p{Control}", "\u{9f}", Some((0, 2)));
45 mat!(uni_class_gencat_currency_symbol,
46 r"\p{Currency_Symbol}", "£", Some((0, 3)));
47 mat!(uni_class_gencat_dash_punctuation,
48 r"\p{Dash_Punctuation}", "〰", Some((0, 3)));
49 mat!(uni_class_gencat_decimal_numer,
50 r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
51 mat!(uni_class_gencat_enclosing_mark,
52 r"\p{Enclosing_Mark}", "\u{A672}", Some((0, 3)));
53 mat!(uni_class_gencat_final_punctuation,
54 r"\p{Final_Punctuation}", "⸡", Some((0, 3)));
55 mat!(uni_class_gencat_format,
56 r"\p{Format}", "\u{E007F}", Some((0, 4)));
57 mat!(uni_class_gencat_initial_punctuation,
58 r"\p{Initial_Punctuation}", "⸜", Some((0, 3)));
59 mat!(uni_class_gencat_letter,
60 r"\p{Letter}", "Έ", Some((0, 2)));
61 mat!(uni_class_gencat_letter_number,
62 r"\p{Letter_Number}", "ↂ", Some((0, 3)));
63 mat!(uni_class_gencat_line_separator,
64 r"\p{Line_Separator}", "\u{2028}", Some((0, 3)));
65 mat!(uni_class_gencat_lowercase_letter,
66 r"\p{Lowercase_Letter}", "ϛ", Some((0, 2)));
67 mat!(uni_class_gencat_mark,
68 r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
69 mat!(uni_class_gencat_math,
70 r"\p{Math}", "⋿", Some((0, 3)));
71 mat!(uni_class_gencat_modifier_letter,
72 r"\p{Modifier_Letter}", "𖭃", Some((0, 4)));
73 mat!(uni_class_gencat_modifier_symbol,
74 r"\p{Modifier_Symbol}", "🏿", Some((0, 4)));
75 mat!(uni_class_gencat_nonspacing_mark,
76 r"\p{Nonspacing_Mark}", "\u{1E94A}", Some((0, 4)));
77 mat!(uni_class_gencat_number,
78 r"\p{Number}", "⓿", Some((0, 3)));
79 mat!(uni_class_gencat_open_punctuation,
80 r"\p{Open_Punctuation}", "⦅", Some((0, 3)));
81 mat!(uni_class_gencat_other,
82 r"\p{Other}", "\u{bc9}", Some((0, 3)));
83 mat!(uni_class_gencat_other_letter,
84 r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
85 mat!(uni_class_gencat_other_number,
86 r"\p{Other_Number}", "㉏", Some((0, 3)));
87 mat!(uni_class_gencat_other_punctuation,
88 r"\p{Other_Punctuation}", "𞥞", Some((0, 4)));
89 mat!(uni_class_gencat_other_symbol,
90 r"\p{Other_Symbol}", "⅌", Some((0, 3)));
91 mat!(uni_class_gencat_paragraph_separator,
92 r"\p{Paragraph_Separator}", "\u{2029}", Some((0, 3)));
93 mat!(uni_class_gencat_private_use,
94 r"\p{Private_Use}", "\u{10FFFD}", Some((0, 4)));
95 mat!(uni_class_gencat_punctuation,
96 r"\p{Punctuation}", "𑁍", Some((0, 4)));
97 mat!(uni_class_gencat_separator,
98 r"\p{Separator}", "\u{3000}", Some((0, 3)));
99 mat!(uni_class_gencat_space_separator,
100 r"\p{Space_Separator}", "\u{205F}", Some((0, 3)));
101 mat!(uni_class_gencat_spacing_mark,
102 r"\p{Spacing_Mark}", "\u{16F7E}", Some((0, 4)));
103 mat!(uni_class_gencat_symbol,
104 r"\p{Symbol}", "⯈", Some((0, 3)));
105 mat!(uni_class_gencat_titlecase_letter,
106 r"\p{Titlecase_Letter}", "ῼ", Some((0, 3)));
107 mat!(uni_class_gencat_unassigned,
108 r"\p{Unassigned}", "\u{10FFFF}", Some((0, 4)));
109 mat!(uni_class_gencat_uppercase_letter,
110 r"\p{Uppercase_Letter}", "Ꝋ", Some((0, 3)));