]> git.proxmox.com Git - rustc.git/blame - vendor/mime/src/parse.rs
Update upstream source from tag 'upstream/1.70.0+dfsg1'
[rustc.git] / vendor / mime / src / parse.rs
CommitLineData
353b0b11
FG
1#[allow(unused, deprecated)]
2use std::ascii::AsciiExt;
3use std::error::Error;
4use std::fmt;
5use std::iter::Enumerate;
6use std::str::Bytes;
7
8use super::{Mime, Source, ParamSource, Indexed, CHARSET, UTF_8};
9
10#[derive(Debug)]
11pub enum ParseError {
12 MissingSlash,
13 MissingEqual,
14 MissingQuote,
15 InvalidToken {
16 pos: usize,
17 byte: u8,
18 },
19}
20
21impl ParseError {
22 fn s(&self) -> &str {
23 use self::ParseError::*;
24
25 match *self {
26 MissingSlash => "a slash (/) was missing between the type and subtype",
27 MissingEqual => "an equals sign (=) was missing between a parameter and its value",
28 MissingQuote => "a quote (\") was missing from a parameter value",
29 InvalidToken { .. } => "an invalid token was encountered",
30 }
31 }
32}
33
34impl fmt::Display for ParseError {
35 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36 if let ParseError::InvalidToken { pos, byte } = *self {
37 write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
38 } else {
39 f.write_str(self.s())
40 }
41 }
42}
43
44impl Error for ParseError {
45 // Minimum Rust is 1.15, Error::description was still required then
46 #[allow(deprecated)]
47 fn description(&self) -> &str {
48 self.s()
49 }
50}
51
52pub fn parse(s: &str) -> Result<Mime, ParseError> {
53 if s == "*/*" {
54 return Ok(::STAR_STAR);
55 }
56
57 let mut iter = s.bytes().enumerate();
58 // toplevel
59 let mut start;
60 let slash;
61 loop {
62 match iter.next() {
63 Some((_, c)) if is_token(c) => (),
64 Some((i, b'/')) if i > 0 => {
65 slash = i;
66 start = i + 1;
67 break;
68 },
69 None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
70 Some((pos, byte)) => return Err(ParseError::InvalidToken {
71 pos: pos,
72 byte: byte,
73 })
74 };
75
76 }
77
78 // sublevel
79 let mut plus = None;
80 loop {
81 match iter.next() {
82 Some((i, b'+')) if i > start => {
83 plus = Some(i);
84 },
85 Some((i, b';')) if i > start => {
86 start = i;
87 break;
88 },
89 Some((_, c)) if is_token(c) => (),
90 None => {
91 return Ok(Mime {
92 source: Source::Dynamic(s.to_ascii_lowercase()),
93 slash: slash,
94 plus: plus,
95 params: ParamSource::None,
96 });
97 },
98 Some((pos, byte)) => return Err(ParseError::InvalidToken {
99 pos: pos,
100 byte: byte,
101 })
102 };
103 }
104
105 // params
106 let params = params_from_str(s, &mut iter, start)?;
107
108 let src = match params {
109 ParamSource::Utf8(_) => s.to_ascii_lowercase(),
110 ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
111 ParamSource::None => {
112 // Chop off the empty list
113 s[..start].to_ascii_lowercase()
114 }
115 };
116
117 Ok(Mime {
118 source: Source::Dynamic(src),
119 slash: slash,
120 plus: plus,
121 params: params,
122 })
123}
124
125
126fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
127 let semicolon = start;
128 start += 1;
129 let mut params = ParamSource::None;
130 'params: while start < s.len() {
131 let name;
132 // name
133 'name: loop {
134 match iter.next() {
135 Some((i, b' ')) if i == start => {
136 start = i + 1;
137 continue 'params;
138 },
139 Some((_, c)) if is_token(c) => (),
140 Some((i, b'=')) if i > start => {
141 name = Indexed(start, i);
142 start = i + 1;
143 break 'name;
144 },
145 None => return Err(ParseError::MissingEqual),
146 Some((pos, byte)) => return Err(ParseError::InvalidToken {
147 pos: pos,
148 byte: byte,
149 }),
150 }
151 }
152
153 let value;
154 // values must be restrict-name-char or "anything goes"
155 let mut is_quoted = false;
156
157 'value: loop {
158 if is_quoted {
159 match iter.next() {
160 Some((i, b'"')) if i > start => {
161 value = Indexed(start, i);
162 break 'value;
163 },
164 Some((_, c)) if is_restricted_quoted_char(c) => (),
165 None => return Err(ParseError::MissingQuote),
166 Some((pos, byte)) => return Err(ParseError::InvalidToken {
167 pos: pos,
168 byte: byte,
169 }),
170 }
171 } else {
172 match iter.next() {
173 Some((i, b'"')) if i == start => {
174 is_quoted = true;
175 start = i + 1;
176 },
177 Some((_, c)) if is_token(c) => (),
178 Some((i, b';')) if i > start => {
179 value = Indexed(start, i);
180 start = i + 1;
181 break 'value;
182 }
183 None => {
184 value = Indexed(start, s.len());
185 start = s.len();
186 break 'value;
187 },
188
189 Some((pos, byte)) => return Err(ParseError::InvalidToken {
190 pos: pos,
191 byte: byte,
192 }),
193 }
194 }
195 }
196
197 if is_quoted {
198 'ws: loop {
199 match iter.next() {
200 Some((i, b';')) => {
201 // next param
202 start = i + 1;
203 break 'ws;
204 },
205 Some((_, b' ')) => {
206 // skip whitespace
207 },
208 None => {
209 // eof
210 start = s.len();
211 break 'ws;
212 },
213 Some((pos, byte)) => return Err(ParseError::InvalidToken {
214 pos: pos,
215 byte: byte,
216 }),
217 }
218 }
219 }
220
221 match params {
222 ParamSource::Utf8(i) => {
223 let i = i + 2;
224 let charset = Indexed(i, "charset".len() + i);
225 let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
226 params = ParamSource::Custom(semicolon, vec![
227 (charset, utf8),
228 (name, value),
229 ]);
230 },
231 ParamSource::Custom(_, ref mut vec) => {
232 vec.push((name, value));
233 },
234 ParamSource::None => {
235 if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
236 if UTF_8 == &s[value.0..value.1] {
237 params = ParamSource::Utf8(semicolon);
238 continue 'params;
239 }
240 }
241 params = ParamSource::Custom(semicolon, vec![(name, value)]);
242 },
243 }
244 }
245 Ok(params)
246}
247
248fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
249 let mut owned = s.to_owned();
250 owned[..semi].make_ascii_lowercase();
251
252 for &(ref name, ref value) in params {
253 owned[name.0..name.1].make_ascii_lowercase();
254 // Since we just converted this part of the string to lowercase,
255 // we can skip the `Name == &str` unicase check and do a faster
256 // memcmp instead.
257 if &owned[name.0..name.1] == CHARSET.source {
258 owned[value.0..value.1].make_ascii_lowercase();
259 }
260 }
261
262 owned
263}
264
265// From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
266//
267// > All registered media types MUST be assigned top-level type and
268// > subtype names. The combination of these names serves to uniquely
269// > identify the media type, and the subtype name facet (or the absence
270// > of one) identifies the registration tree. Both top-level type and
271// > subtype names are case-insensitive.
272// >
273// > Type and subtype names MUST conform to the following ABNF:
274// >
275// > type-name = restricted-name
276// > subtype-name = restricted-name
277// >
278// > restricted-name = restricted-name-first *126restricted-name-chars
279// > restricted-name-first = ALPHA / DIGIT
280// > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
281// > "$" / "&" / "-" / "^" / "_"
282// > restricted-name-chars =/ "." ; Characters before first dot always
283// > ; specify a facet name
284// > restricted-name-chars =/ "+" ; Characters after last plus always
285// > ; specify a structured syntax suffix
286
287// However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
288//
289// > media-type = type "/" subtype *( OWS ";" OWS parameter )
290// > type = token
291// > subtype = token
292// > parameter = token "=" ( token / quoted-string )
293//
294// Where token is defined as:
295//
296// > token = 1*tchar
297// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
298// > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
299//
300// So, clearly, ¯\_(Ä_/¯
301
302macro_rules! byte_map {
303 ($($flag:expr,)*) => ([
304 $($flag != 0,)*
305 ])
306}
307
308static TOKEN_MAP: [bool; 256] = byte_map![
309 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
313 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325];
326
327fn is_token(c: u8) -> bool {
328 TOKEN_MAP[c as usize]
329}
330
331fn is_restricted_quoted_char(c: u8) -> bool {
332 c > 31 && c != 127
333}
334
335#[test]
336#[allow(warnings)] // ... ranges deprecated
337fn test_lookup_tables() {
338 for (i, &valid) in TOKEN_MAP.iter().enumerate() {
339 let i = i as u8;
340 let should = match i {
341 b'a'...b'z' |
342 b'A'...b'Z' |
343 b'0'...b'9' |
344 b'!' |
345 b'#' |
346 b'$' |
347 b'%' |
348 b'&' |
349 b'\'' |
350 b'*' |
351 b'+' |
352 b'-' |
353 b'.' |
354 b'^' |
355 b'_' |
356 b'`' |
357 b'|' |
358 b'~' => true,
359 _ => false
360 };
361 assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
362 }
363}