]>
Commit | Line | Data |
---|---|---|
dfeec247 XL |
1 | use std::path::Path; |
2 | use std::str::FromStr; | |
3 | ||
fe692bf9 | 4 | use once_cell::sync::Lazy; |
dfeec247 XL |
5 | use regex::Regex; |
6 | ||
3dfed10e XL |
7 | use crate::common::UcdFile; |
8 | use crate::error::Error; | |
dfeec247 XL |
9 | |
10 | /// A single row in the `PropertyAliases.txt` file. | |
11 | #[derive(Clone, Debug, Default, Eq, PartialEq)] | |
12 | pub struct PropertyAlias { | |
13 | /// An abbreviation for this property. | |
14 | pub abbreviation: String, | |
15 | /// The "long" name of this property. | |
16 | pub long: String, | |
17 | /// Additional aliases (if present). | |
18 | pub aliases: Vec<String>, | |
19 | } | |
20 | ||
21 | impl UcdFile for PropertyAlias { | |
22 | fn relative_file_path() -> &'static Path { | |
23 | Path::new("PropertyAliases.txt") | |
24 | } | |
25 | } | |
26 | ||
27 | impl FromStr for PropertyAlias { | |
28 | type Err = Error; | |
29 | ||
30 | fn from_str(line: &str) -> Result<PropertyAlias, Error> { | |
fe692bf9 FG |
31 | static PARTS: Lazy<Regex> = Lazy::new(|| { |
32 | Regex::new( | |
dfeec247 XL |
33 | r"(?x) |
34 | ^ | |
35 | \s*(?P<abbrev>[^\s;]+)\s*; | |
36 | \s*(?P<long>[^\s;]+)\s* | |
37 | (?:;(?P<aliases>.*))? | |
fe692bf9 | 38 | ", |
3dfed10e | 39 | ) |
fe692bf9 FG |
40 | .unwrap() |
41 | }); | |
42 | static ALIASES: Lazy<Regex> = Lazy::new(|| { | |
43 | Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap() | |
44 | }); | |
dfeec247 XL |
45 | |
46 | let caps = match PARTS.captures(line.trim()) { | |
47 | Some(caps) => caps, | |
48 | None => return err!("invalid PropertyAliases line: '{}'", line), | |
49 | }; | |
50 | let mut aliases = vec![]; | |
51 | if let Some(m) = caps.name("aliases") { | |
52 | for acaps in ALIASES.captures_iter(m.as_str()) { | |
53 | let alias = acaps.name("alias").unwrap().as_str(); | |
54 | aliases.push(alias.to_string()); | |
55 | } | |
56 | } | |
57 | Ok(PropertyAlias { | |
58 | abbreviation: caps.name("abbrev").unwrap().as_str().to_string(), | |
59 | long: caps.name("long").unwrap().as_str().to_string(), | |
3dfed10e | 60 | aliases, |
dfeec247 XL |
61 | }) |
62 | } | |
63 | } | |
64 | ||
65 | #[cfg(test)] | |
66 | mod tests { | |
67 | use super::PropertyAlias; | |
68 | ||
69 | #[test] | |
70 | fn parse1() { | |
71 | let line = "cjkAccountingNumeric ; kAccountingNumeric\n"; | |
72 | let row: PropertyAlias = line.parse().unwrap(); | |
73 | assert_eq!(row.abbreviation, "cjkAccountingNumeric"); | |
74 | assert_eq!(row.long, "kAccountingNumeric"); | |
75 | assert!(row.aliases.is_empty()); | |
76 | } | |
77 | ||
78 | #[test] | |
79 | fn parse2() { | |
80 | let line = "nv ; Numeric_Value\n"; | |
81 | let row: PropertyAlias = line.parse().unwrap(); | |
82 | assert_eq!(row.abbreviation, "nv"); | |
83 | assert_eq!(row.long, "Numeric_Value"); | |
84 | assert!(row.aliases.is_empty()); | |
85 | } | |
86 | ||
87 | #[test] | |
88 | fn parse3() { | |
3dfed10e XL |
89 | let line = |
90 | "scf ; Simple_Case_Folding ; sfc\n"; | |
dfeec247 XL |
91 | let row: PropertyAlias = line.parse().unwrap(); |
92 | assert_eq!(row.abbreviation, "scf"); | |
93 | assert_eq!(row.long, "Simple_Case_Folding"); | |
94 | assert_eq!(row.aliases, vec!["sfc"]); | |
95 | } | |
96 | ||
97 | #[test] | |
98 | fn parse4() { | |
99 | let line = "cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS\n"; | |
100 | let row: PropertyAlias = line.parse().unwrap(); | |
101 | assert_eq!(row.abbreviation, "cjkRSUnicode"); | |
102 | assert_eq!(row.long, "kRSUnicode"); | |
103 | assert_eq!(row.aliases, vec!["Unicode_Radical_Stroke", "URS"]); | |
104 | } | |
105 | ||
106 | #[test] | |
107 | fn parse5() { | |
108 | let line = "isc ; ISO_Comment"; | |
109 | let row: PropertyAlias = line.parse().unwrap(); | |
110 | assert_eq!(row.abbreviation, "isc"); | |
111 | assert_eq!(row.long, "ISO_Comment"); | |
112 | assert!(row.aliases.is_empty()); | |
113 | } | |
114 | } |