]> git.proxmox.com Git - rustc.git/blame - vendor/icu_locid/src/extensions/unicode/mod.rs
New upstream version 1.69.0+dfsg1
[rustc.git] / vendor / icu_locid / src / extensions / unicode / mod.rs
CommitLineData
487cf647
FG
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
9ffffee4
FG
14//! use icu::locid::Locale;
15//! use icu::locid::{
16//! extensions::unicode::Unicode,
17//! extensions_unicode_attribute as attribute,
18//! extensions_unicode_key as key, extensions_unicode_value as value,
19//! };
487cf647 20//!
9ffffee4 21//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
487cf647 22//!
9ffffee4
FG
23//! assert_eq!(
24//! loc.extensions.unicode.keywords.get(&key!("hc")),
25//! Some(&value!("h12"))
26//! );
27//! assert!(loc
28//! .extensions
29//! .unicode
30//! .attributes
31//! .contains(&attribute!("foobar")));
487cf647
FG
32//! ```
33mod attribute;
34mod attributes;
35mod key;
36mod keywords;
37mod value;
38
39use alloc::vec;
40pub use attribute::Attribute;
41pub use attributes::Attributes;
42pub use key::Key;
43pub use keywords::Keywords;
44pub use value::Value;
45
46use crate::parser::ParserError;
47use crate::parser::SubtagIterator;
48use litemap::LiteMap;
49
50/// Unicode Extensions provide information about user preferences in a given locale.
51///
52/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
53/// Identifier`] specification.
54///
55/// Unicode extensions provide subtags that specify language and/or locale-based behavior
56/// or refinements to language tags, according to work done by the Unicode Consortium.
57/// (See [`RFC 6067`] for details).
58///
59/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
60/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
61/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
62///
63/// # Examples
64///
65/// ```
487cf647 66/// use icu::locid::Locale;
9ffffee4
FG
67/// use icu::locid::{
68/// extensions_unicode_key as key, extensions_unicode_value as value,
69/// };
487cf647 70///
9ffffee4 71/// let loc: Locale =
487cf647
FG
72/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
73///
9ffffee4
FG
74/// assert_eq!(
75/// loc.extensions.unicode.keywords.get(&key!("ca")),
76/// Some(&value!("buddhist"))
77/// );
487cf647
FG
78/// ```
79#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
80#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
81pub struct Unicode {
82 /// The key-value pairs present in this locale extension, with each extension key subtag
83 /// associated to its provided value subtag.
84 pub keywords: Keywords,
85 /// A canonically ordered sequence of single standalone subtags for this locale extension.
86 pub attributes: Attributes,
87}
88
89impl Unicode {
90 /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
91 ///
92 /// # Examples
93 ///
94 /// ```
95 /// use icu::locid::extensions::unicode::Unicode;
96 ///
97 /// assert_eq!(Unicode::new(), Unicode::default());
98 /// ```
99 #[inline]
100 pub const fn new() -> Self {
101 Self {
102 keywords: Keywords::new(),
103 attributes: Attributes::new(),
104 }
105 }
106
107 /// Returns [`true`] if there list of keywords and attributes is empty.
108 ///
109 /// # Examples
110 ///
111 /// ```
112 /// use icu::locid::Locale;
113 ///
114 /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
115 ///
116 /// assert!(!loc.extensions.unicode.is_empty());
117 /// ```
118 pub fn is_empty(&self) -> bool {
119 self.keywords.is_empty() && self.attributes.is_empty()
120 }
121
122 /// Clears all Unicode extension keywords and attributes, effectively removing
123 /// the Unicode extension.
124 ///
125 /// # Example
126 ///
127 /// ```
128 /// use icu::locid::Locale;
129 ///
130 /// let mut loc: Locale =
131 /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
132 /// loc.extensions.unicode.clear();
133 /// assert_eq!(loc, "und-t-mul".parse().unwrap());
134 /// ```
135 pub fn clear(&mut self) {
136 self.keywords.clear();
137 self.attributes.clear();
138 }
139
140 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
141 let mut attributes = vec![];
142 let mut keywords = LiteMap::new();
143
144 let mut current_keyword = None;
145 let mut current_type = vec![];
146
147 while let Some(subtag) = iter.peek() {
148 if let Ok(attr) = Attribute::try_from_bytes(subtag) {
149 if let Err(idx) = attributes.binary_search(&attr) {
150 attributes.insert(idx, attr);
151 }
152 } else {
153 break;
154 }
155 iter.next();
156 }
157
158 while let Some(subtag) = iter.peek() {
159 let slen = subtag.len();
160 if slen == 2 {
161 if let Some(kw) = current_keyword.take() {
162 keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
163 current_type = vec![];
164 }
165 current_keyword = Some(Key::try_from_bytes(subtag)?);
166 } else if current_keyword.is_some() {
167 match Value::parse_subtag(subtag) {
168 Ok(Some(t)) => current_type.push(t),
169 Ok(None) => {}
170 Err(_) => break,
171 }
172 } else {
173 break;
174 }
175 iter.next();
176 }
177
178 if let Some(kw) = current_keyword.take() {
179 keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
180 }
181
182 // Ensure we've defined at least one attribute or keyword
183 if attributes.is_empty() && keywords.is_empty() {
184 return Err(ParserError::InvalidExtension);
185 }
186
187 Ok(Self {
188 keywords: keywords.into(),
189 attributes: Attributes::from_vec_unchecked(attributes),
190 })
191 }
192
193 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
194 where
195 F: FnMut(&str) -> Result<(), E>,
196 {
197 if self.is_empty() {
198 return Ok(());
199 }
200 f("u")?;
201 self.attributes.for_each_subtag_str(f)?;
202 self.keywords.for_each_subtag_str(f)?;
203 Ok(())
204 }
205}
206
207writeable::impl_display_with_writeable!(Unicode);
208
209impl writeable::Writeable for Unicode {
210 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
211 if self.is_empty() {
212 return Ok(());
213 }
9ffffee4 214 sink.write_str("u")?;
487cf647
FG
215 if !self.attributes.is_empty() {
216 sink.write_char('-')?;
217 writeable::Writeable::write_to(&self.attributes, sink)?;
218 }
219 if !self.keywords.is_empty() {
220 sink.write_char('-')?;
221 writeable::Writeable::write_to(&self.keywords, sink)?;
222 }
223 Ok(())
224 }
225
226 fn writeable_length_hint(&self) -> writeable::LengthHint {
227 if self.is_empty() {
228 return writeable::LengthHint::exact(0);
229 }
9ffffee4 230 let mut result = writeable::LengthHint::exact(1);
487cf647
FG
231 if !self.attributes.is_empty() {
232 result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
233 }
234 if !self.keywords.is_empty() {
235 result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
236 }
237 result
238 }
239}