1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 use super::UnicodeNormalization
;
14 use super::char::is_combining_mark
;
20 ($input
: expr
, $expected
: expr
) => {
21 assert_eq
!($input
.nfd().to_string(), $expected
);
22 // A dummy iterator that is not std::str::Chars directly;
23 // note that `id_func` is used to ensure `Clone` implementation
24 assert_eq
!($input
.chars().map(|c
| c
).nfd().collect
::<String
>(), $expected
);
28 t
!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
29 t
!("\u{2026}", "\u{2026}");
30 t
!("\u{2126}", "\u{3a9}");
31 t
!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
32 t
!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
33 t
!("a\u{301}", "a\u{301}");
34 t
!("\u{301}a", "\u{301}a");
35 t
!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
36 t
!("\u{ac1c}", "\u{1100}\u{1162}");
42 ($input
: expr
, $expected
: expr
) => {
43 assert_eq
!($input
.nfkd().to_string(), $expected
);
47 t
!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
48 t
!("\u{2026}", "...");
49 t
!("\u{2126}", "\u{3a9}");
50 t
!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
51 t
!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
52 t
!("a\u{301}", "a\u{301}");
53 t
!("\u{301}a", "\u{301}a");
54 t
!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
55 t
!("\u{ac1c}", "\u{1100}\u{1162}");
61 ($input
: expr
, $expected
: expr
) => {
62 assert_eq
!($input
.nfc().to_string(), $expected
);
66 t
!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
67 t
!("\u{2026}", "\u{2026}");
68 t
!("\u{2126}", "\u{3a9}");
69 t
!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
70 t
!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
71 t
!("a\u{301}", "\u{e1}");
72 t
!("\u{301}a", "\u{301}a");
73 t
!("\u{d4db}", "\u{d4db}");
74 t
!("\u{ac1c}", "\u{ac1c}");
75 t
!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
81 ($input
: expr
, $expected
: expr
) => {
82 assert_eq
!($input
.nfkc().to_string(), $expected
);
86 t
!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
87 t
!("\u{2026}", "...");
88 t
!("\u{2126}", "\u{3a9}");
89 t
!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
90 t
!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
91 t
!("a\u{301}", "\u{e1}");
92 t
!("\u{301}a", "\u{301}a");
93 t
!("\u{d4db}", "\u{d4db}");
94 t
!("\u{ac1c}", "\u{ac1c}");
95 t
!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
100 use testdata
::TEST_NORM
;
101 macro_rules
! normString
{
102 ($method
: ident
, $input
: expr
) => { $input.$method().collect::<String>() }
105 for &(s1
, s2
, s3
, s4
, s5
) in TEST_NORM
{
106 // these invariants come from the CONFORMANCE section of
107 // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
109 let r1
= normString
!(nfc
, s1
);
110 let r2
= normString
!(nfc
, s2
);
111 let r3
= normString
!(nfc
, s3
);
112 let r4
= normString
!(nfc
, s4
);
113 let r5
= normString
!(nfc
, s5
);
114 assert_eq
!(s2
, &r1
[..]);
115 assert_eq
!(s2
, &r2
[..]);
116 assert_eq
!(s2
, &r3
[..]);
117 assert_eq
!(s4
, &r4
[..]);
118 assert_eq
!(s4
, &r5
[..]);
122 let r1
= normString
!(nfd
, s1
);
123 let r2
= normString
!(nfd
, s2
);
124 let r3
= normString
!(nfd
, s3
);
125 let r4
= normString
!(nfd
, s4
);
126 let r5
= normString
!(nfd
, s5
);
127 assert_eq
!(s3
, &r1
[..]);
128 assert_eq
!(s3
, &r2
[..]);
129 assert_eq
!(s3
, &r3
[..]);
130 assert_eq
!(s5
, &r4
[..]);
131 assert_eq
!(s5
, &r5
[..]);
135 let r1
= normString
!(nfkc
, s1
);
136 let r2
= normString
!(nfkc
, s2
);
137 let r3
= normString
!(nfkc
, s3
);
138 let r4
= normString
!(nfkc
, s4
);
139 let r5
= normString
!(nfkc
, s5
);
140 assert_eq
!(s4
, &r1
[..]);
141 assert_eq
!(s4
, &r2
[..]);
142 assert_eq
!(s4
, &r3
[..]);
143 assert_eq
!(s4
, &r4
[..]);
144 assert_eq
!(s4
, &r5
[..]);
148 let r1
= normString
!(nfkd
, s1
);
149 let r2
= normString
!(nfkd
, s2
);
150 let r3
= normString
!(nfkd
, s3
);
151 let r4
= normString
!(nfkd
, s4
);
152 let r5
= normString
!(nfkd
, s5
);
153 assert_eq
!(s5
, &r1
[..]);
154 assert_eq
!(s5
, &r2
[..]);
155 assert_eq
!(s5
, &r3
[..]);
156 assert_eq
!(s5
, &r4
[..]);
157 assert_eq
!(s5
, &r5
[..]);
165 fn test_is_combining_mark_ascii() {
167 assert
!(!is_combining_mark(char::from_u32(cp
).unwrap()));
172 fn test_is_combining_mark_misc() {
173 // https://github.com/unicode-rs/unicode-normalization/issues/16
174 // U+11C3A BHAIKSUKI VOWEL SIGN O
175 // Category: Mark, Nonspacing [Mn]
176 assert
!(is_combining_mark('
\u{11C3A}'
));
178 // U+11C3F BHAIKSUKI SIGN VIRAMA
179 // Category: Mark, Nonspacing [Mn]
180 assert
!(is_combining_mark('
\u{11C3F}'
));