]>
Commit | Line | Data |
---|---|---|
487cf647 FG |
1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree | |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | |
4 | ||
5 | //! Unicode Extensions provide information about user preferences in a given locale. | |
6 | //! | |
7 | //! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and | |
8 | //! [`Attributes`]. | |
9 | //! | |
10 | //! | |
11 | //! # Examples | |
12 | //! | |
13 | //! ``` | |
9ffffee4 FG |
14 | //! use icu::locid::Locale; |
15 | //! use icu::locid::{ | |
16 | //! extensions::unicode::Unicode, | |
17 | //! extensions_unicode_attribute as attribute, | |
18 | //! extensions_unicode_key as key, extensions_unicode_value as value, | |
19 | //! }; | |
487cf647 | 20 | //! |
9ffffee4 | 21 | //! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed."); |
487cf647 | 22 | //! |
9ffffee4 FG |
23 | //! assert_eq!( |
24 | //! loc.extensions.unicode.keywords.get(&key!("hc")), | |
25 | //! Some(&value!("h12")) | |
26 | //! ); | |
27 | //! assert!(loc | |
28 | //! .extensions | |
29 | //! .unicode | |
30 | //! .attributes | |
31 | //! .contains(&attribute!("foobar"))); | |
487cf647 FG |
32 | //! ``` |
33 | mod attribute; | |
34 | mod attributes; | |
35 | mod key; | |
36 | mod keywords; | |
37 | mod value; | |
38 | ||
39 | use alloc::vec; | |
40 | pub use attribute::Attribute; | |
41 | pub use attributes::Attributes; | |
42 | pub use key::Key; | |
43 | pub use keywords::Keywords; | |
44 | pub use value::Value; | |
45 | ||
46 | use crate::parser::ParserError; | |
47 | use crate::parser::SubtagIterator; | |
48 | use litemap::LiteMap; | |
49 | ||
50 | /// Unicode Extensions provide information about user preferences in a given locale. | |
51 | /// | |
52 | /// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale | |
53 | /// Identifier`] specification. | |
54 | /// | |
55 | /// Unicode extensions provide subtags that specify language and/or locale-based behavior | |
56 | /// or refinements to language tags, according to work done by the Unicode Consortium. | |
57 | /// (See [`RFC 6067`] for details). | |
58 | /// | |
59 | /// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension | |
60 | /// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt | |
61 | /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier | |
62 | /// | |
63 | /// # Examples | |
64 | /// | |
65 | /// ``` | |
487cf647 | 66 | /// use icu::locid::Locale; |
9ffffee4 FG |
67 | /// use icu::locid::{ |
68 | /// extensions_unicode_key as key, extensions_unicode_value as value, | |
69 | /// }; | |
487cf647 | 70 | /// |
9ffffee4 | 71 | /// let loc: Locale = |
487cf647 FG |
72 | /// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed."); |
73 | /// | |
9ffffee4 FG |
74 | /// assert_eq!( |
75 | /// loc.extensions.unicode.keywords.get(&key!("ca")), | |
76 | /// Some(&value!("buddhist")) | |
77 | /// ); | |
487cf647 FG |
78 | /// ``` |
79 | #[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] | |
80 | #[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure | |
81 | pub struct Unicode { | |
82 | /// The key-value pairs present in this locale extension, with each extension key subtag | |
83 | /// associated to its provided value subtag. | |
84 | pub keywords: Keywords, | |
85 | /// A canonically ordered sequence of single standalone subtags for this locale extension. | |
86 | pub attributes: Attributes, | |
87 | } | |
88 | ||
89 | impl Unicode { | |
90 | /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. | |
91 | /// | |
92 | /// # Examples | |
93 | /// | |
94 | /// ``` | |
95 | /// use icu::locid::extensions::unicode::Unicode; | |
96 | /// | |
97 | /// assert_eq!(Unicode::new(), Unicode::default()); | |
98 | /// ``` | |
99 | #[inline] | |
100 | pub const fn new() -> Self { | |
101 | Self { | |
102 | keywords: Keywords::new(), | |
103 | attributes: Attributes::new(), | |
104 | } | |
105 | } | |
106 | ||
107 | /// Returns [`true`] if there list of keywords and attributes is empty. | |
108 | /// | |
109 | /// # Examples | |
110 | /// | |
111 | /// ``` | |
112 | /// use icu::locid::Locale; | |
113 | /// | |
114 | /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed."); | |
115 | /// | |
116 | /// assert!(!loc.extensions.unicode.is_empty()); | |
117 | /// ``` | |
118 | pub fn is_empty(&self) -> bool { | |
119 | self.keywords.is_empty() && self.attributes.is_empty() | |
120 | } | |
121 | ||
122 | /// Clears all Unicode extension keywords and attributes, effectively removing | |
123 | /// the Unicode extension. | |
124 | /// | |
125 | /// # Example | |
126 | /// | |
127 | /// ``` | |
128 | /// use icu::locid::Locale; | |
129 | /// | |
130 | /// let mut loc: Locale = | |
131 | /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap(); | |
132 | /// loc.extensions.unicode.clear(); | |
133 | /// assert_eq!(loc, "und-t-mul".parse().unwrap()); | |
134 | /// ``` | |
135 | pub fn clear(&mut self) { | |
136 | self.keywords.clear(); | |
137 | self.attributes.clear(); | |
138 | } | |
139 | ||
140 | pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { | |
141 | let mut attributes = vec![]; | |
142 | let mut keywords = LiteMap::new(); | |
143 | ||
144 | let mut current_keyword = None; | |
145 | let mut current_type = vec![]; | |
146 | ||
147 | while let Some(subtag) = iter.peek() { | |
148 | if let Ok(attr) = Attribute::try_from_bytes(subtag) { | |
149 | if let Err(idx) = attributes.binary_search(&attr) { | |
150 | attributes.insert(idx, attr); | |
151 | } | |
152 | } else { | |
153 | break; | |
154 | } | |
155 | iter.next(); | |
156 | } | |
157 | ||
158 | while let Some(subtag) = iter.peek() { | |
159 | let slen = subtag.len(); | |
160 | if slen == 2 { | |
161 | if let Some(kw) = current_keyword.take() { | |
162 | keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); | |
163 | current_type = vec![]; | |
164 | } | |
165 | current_keyword = Some(Key::try_from_bytes(subtag)?); | |
166 | } else if current_keyword.is_some() { | |
167 | match Value::parse_subtag(subtag) { | |
168 | Ok(Some(t)) => current_type.push(t), | |
169 | Ok(None) => {} | |
170 | Err(_) => break, | |
171 | } | |
172 | } else { | |
173 | break; | |
174 | } | |
175 | iter.next(); | |
176 | } | |
177 | ||
178 | if let Some(kw) = current_keyword.take() { | |
179 | keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); | |
180 | } | |
181 | ||
182 | // Ensure we've defined at least one attribute or keyword | |
183 | if attributes.is_empty() && keywords.is_empty() { | |
184 | return Err(ParserError::InvalidExtension); | |
185 | } | |
186 | ||
187 | Ok(Self { | |
188 | keywords: keywords.into(), | |
189 | attributes: Attributes::from_vec_unchecked(attributes), | |
190 | }) | |
191 | } | |
192 | ||
193 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> | |
194 | where | |
195 | F: FnMut(&str) -> Result<(), E>, | |
196 | { | |
197 | if self.is_empty() { | |
198 | return Ok(()); | |
199 | } | |
200 | f("u")?; | |
201 | self.attributes.for_each_subtag_str(f)?; | |
202 | self.keywords.for_each_subtag_str(f)?; | |
203 | Ok(()) | |
204 | } | |
205 | } | |
206 | ||
207 | writeable::impl_display_with_writeable!(Unicode); | |
208 | ||
209 | impl writeable::Writeable for Unicode { | |
210 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { | |
211 | if self.is_empty() { | |
212 | return Ok(()); | |
213 | } | |
9ffffee4 | 214 | sink.write_str("u")?; |
487cf647 FG |
215 | if !self.attributes.is_empty() { |
216 | sink.write_char('-')?; | |
217 | writeable::Writeable::write_to(&self.attributes, sink)?; | |
218 | } | |
219 | if !self.keywords.is_empty() { | |
220 | sink.write_char('-')?; | |
221 | writeable::Writeable::write_to(&self.keywords, sink)?; | |
222 | } | |
223 | Ok(()) | |
224 | } | |
225 | ||
226 | fn writeable_length_hint(&self) -> writeable::LengthHint { | |
227 | if self.is_empty() { | |
228 | return writeable::LengthHint::exact(0); | |
229 | } | |
9ffffee4 | 230 | let mut result = writeable::LengthHint::exact(1); |
487cf647 FG |
231 | if !self.attributes.is_empty() { |
232 | result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; | |
233 | } | |
234 | if !self.keywords.is_empty() { | |
235 | result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; | |
236 | } | |
237 | result | |
238 | } | |
239 | } |