]>
git.proxmox.com Git - rustc.git/blob - vendor/ucd-parse/src/special_casing.rs
4 use once_cell
::sync
::Lazy
;
8 parse_codepoint_sequence
, Codepoint
, CodepointIter
, UcdFile
,
11 use crate::error
::Error
;
13 /// A single row in the `SpecialCasing.txt` file.
15 /// Note that a single codepoint may be mapped multiple times. In particular,
16 /// a single codepoint might have mappings based on distinct language sensitive
17 /// conditions (e.g., `U+0307`).
18 #[derive(Clone, Debug, Default, Eq, PartialEq)]
19 pub struct SpecialCaseMapping
{
20 /// The codepoint that is being mapped.
21 pub codepoint
: Codepoint
,
22 /// The lowercase mapping, which may be empty.
23 pub lowercase
: Vec
<Codepoint
>,
24 /// The titlecase mapping, which may be empty.
25 pub titlecase
: Vec
<Codepoint
>,
26 /// The uppercase mapping, which may be empty.
27 pub uppercase
: Vec
<Codepoint
>,
28 /// A list of language specific conditions, see `SpecialCasing.txt` for
30 pub conditions
: Vec
<String
>,
33 impl UcdFile
for SpecialCaseMapping
{
34 fn relative_file_path() -> &'
static Path
{
35 Path
::new("SpecialCasing.txt")
39 impl UcdFileByCodepoint
for SpecialCaseMapping
{
40 fn codepoints(&self) -> CodepointIter
{
41 self.codepoint
.into_iter()
45 impl FromStr
for SpecialCaseMapping
{
48 fn from_str(line
: &str) -> Result
<SpecialCaseMapping
, Error
> {
49 static PARTS
: Lazy
<Regex
> = Lazy
::new(|| {
53 \s*(?P<codepoint>[^\s;]+)\s*;
54 \s*(?P<lower>[^;]+)\s*;
55 \s*(?P<title>[^;]+)\s*;
56 \s*(?P<upper>[^;]+)\s*;
57 \s*(?P<conditions>[^;\x23]+)?
63 let caps
= match PARTS
.captures(line
.trim()) {
65 None
=> return err
!("invalid SpecialCasing line: '{}'", line
),
73 .map(|c
| c
.to_string())
77 Ok(SpecialCaseMapping
{
78 codepoint
: caps
["codepoint"].parse()?
,
79 lowercase
: parse_codepoint_sequence(&caps
["lower"])?
,
80 titlecase
: parse_codepoint_sequence(&caps
["title"])?
,
81 uppercase
: parse_codepoint_sequence(&caps
["upper"])?
,
89 use super::SpecialCaseMapping
;
93 let line
= "1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA\n";
94 let row
: SpecialCaseMapping
= line
.parse().unwrap();
95 assert_eq
!(row
.codepoint
, 0x1F52);
96 assert_eq
!(row
.lowercase
, vec
![0x1F52]);
97 assert_eq
!(row
.titlecase
, vec
![0x03A5, 0x0313, 0x0300]);
98 assert_eq
!(row
.uppercase
, vec
![0x03A5, 0x0313, 0x0300]);
99 assert
!(row
.conditions
.is_empty());
104 let line
= "0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE\n";
105 let row
: SpecialCaseMapping
= line
.parse().unwrap();
106 assert_eq
!(row
.codepoint
, 0x0307);
107 assert
!(row
.lowercase
.is_empty());
108 assert_eq
!(row
.titlecase
, vec
![0x0307]);
109 assert_eq
!(row
.uppercase
, vec
![0x0307]);
110 assert_eq
!(row
.conditions
, vec
!["tr", "After_I"]);