1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Unicode character composition and decomposition utilities
13 //! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
16 //! extern crate unicode_normalization;
18 //! use unicode_normalization::char::compose;
19 //! use unicode_normalization::UnicodeNormalization;
22 //! assert_eq!(compose('A','\u{30a}'), Some('Å'));
25 //! let c = s.nfc().collect::<String>();
26 //! assert_eq!(c, "ÅΩ");
32 //! You can use this package in your project by adding the following
33 //! to your `Cargo.toml`:
37 //! unicode-normalization = "0.1.3"
40 #![deny(missing_docs, unsafe_code)]
41 #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
42 html_favicon_url
= "https://unicode-rs.github.io/unicode-rs_sm.png")]
44 pub use tables
::UNICODE_VERSION
;
45 pub use decompose
::Decompositions
;
46 pub use recompose
::Recompositions
;
59 /// Methods for composing and decomposing characters.
61 pub use normalize
::{decompose_canonical, decompose_compatible, compose}
;
63 /// Look up the canonical combining class of a character.
64 pub use tables
::normalization
::canonical_combining_class
;
66 /// Return whether the given character is a combining mark (`General_Category=Mark`)
67 pub use tables
::normalization
::is_combining_mark
;
71 /// Methods for iterating over strings while applying Unicode normalizations
73 /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
74 pub trait UnicodeNormalization
<I
: Iterator
<Item
=char>> {
75 /// Returns an iterator over the string in Unicode Normalization Form D
76 /// (canonical decomposition).
78 fn nfd(self) -> Decompositions
<I
>;
80 /// Returns an iterator over the string in Unicode Normalization Form KD
81 /// (compatibility decomposition).
83 fn nfkd(self) -> Decompositions
<I
>;
85 /// An Iterator over the string in Unicode Normalization Form C
86 /// (canonical decomposition followed by canonical composition).
88 fn nfc(self) -> Recompositions
<I
>;
90 /// An Iterator over the string in Unicode Normalization Form KC
91 /// (compatibility decomposition followed by canonical composition).
93 fn nfkc(self) -> Recompositions
<I
>;
96 impl<'a
> UnicodeNormalization
<Chars
<'a
>> for &'a
str {
98 fn nfd(self) -> Decompositions
<Chars
<'a
>> {
99 decompose
::new_canonical(self.chars())
103 fn nfkd(self) -> Decompositions
<Chars
<'a
>> {
104 decompose
::new_compatible(self.chars())
108 fn nfc(self) -> Recompositions
<Chars
<'a
>> {
109 recompose
::new_canonical(self.chars())
113 fn nfkc(self) -> Recompositions
<Chars
<'a
>> {
114 recompose
::new_compatible(self.chars())
118 impl<I
: Iterator
<Item
=char>> UnicodeNormalization
<I
> for I
{
120 fn nfd(self) -> Decompositions
<I
> {
121 decompose
::new_canonical(self)
125 fn nfkd(self) -> Decompositions
<I
> {
126 decompose
::new_compatible(self)
130 fn nfc(self) -> Recompositions
<I
> {
131 recompose
::new_canonical(self)
135 fn nfkc(self) -> Recompositions
<I
> {
136 recompose
::new_compatible(self)