1 #[allow(unused, deprecated)]
2 use std
::ascii
::AsciiExt
;
5 use std
::iter
::Enumerate
;
8 use super::{Mime, Source, ParamSource, Indexed, CHARSET, UTF_8}
;
23 use self::ParseError
::*;
26 MissingSlash
=> "a slash (/) was missing between the type and subtype",
27 MissingEqual
=> "an equals sign (=) was missing between a parameter and its value",
28 MissingQuote
=> "a quote (\") was missing from a parameter value",
29 InvalidToken { .. }
=> "an invalid token was encountered",
34 impl fmt
::Display
for ParseError
{
35 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
36 if let ParseError
::InvalidToken { pos, byte }
= *self {
37 write
!(f
, "{}, {:X} at position {}", self.s(), byte
, pos
)
44 impl Error
for ParseError
{
45 // Minimum Rust is 1.15, Error::description was still required then
47 fn description(&self) -> &str {
52 pub fn parse(s
: &str) -> Result
<Mime
, ParseError
> {
54 return Ok(::STAR_STAR
);
57 let mut iter
= s
.bytes().enumerate();
63 Some((_
, c
)) if is_token(c
) => (),
64 Some((i
, b'
/'
)) if i
> 0 => {
69 None
=> return Err(ParseError
::MissingSlash
), // EOF and no toplevel is no Mime
70 Some((pos
, byte
)) => return Err(ParseError
::InvalidToken
{
82 Some((i
, b'
+'
)) if i
> start
=> {
85 Some((i
, b'
;'
)) if i
> start
=> {
89 Some((_
, c
)) if is_token(c
) => (),
92 source
: Source
::Dynamic(s
.to_ascii_lowercase()),
95 params
: ParamSource
::None
,
98 Some((pos
, byte
)) => return Err(ParseError
::InvalidToken
{
106 let params
= params_from_str(s
, &mut iter
, start
)?
;
108 let src
= match params
{
109 ParamSource
::Utf8(_
) => s
.to_ascii_lowercase(),
110 ParamSource
::Custom(semicolon
, ref indices
) => lower_ascii_with_params(s
, semicolon
, indices
),
111 ParamSource
::None
=> {
112 // Chop off the empty list
113 s
[..start
].to_ascii_lowercase()
118 source
: Source
::Dynamic(src
),
126 fn params_from_str(s
: &str, iter
: &mut Enumerate
<Bytes
>, mut start
: usize) -> Result
<ParamSource
, ParseError
> {
127 let semicolon
= start
;
129 let mut params
= ParamSource
::None
;
130 'params
: while start
< s
.len() {
135 Some((i
, b' '
)) if i
== start
=> {
139 Some((_
, c
)) if is_token(c
) => (),
140 Some((i
, b'
='
)) if i
> start
=> {
141 name
= Indexed(start
, i
);
145 None
=> return Err(ParseError
::MissingEqual
),
146 Some((pos
, byte
)) => return Err(ParseError
::InvalidToken
{
154 // values must be restrict-name-char or "anything goes"
155 let mut is_quoted
= false;
160 Some((i
, b'
"')) if i > start => {
161 value = Indexed(start, i);
164 Some((_, c)) if is_restricted_quoted_char(c) => (),
165 None => return Err(ParseError::MissingQuote),
166 Some((pos, byte)) => return Err(ParseError::InvalidToken {
173 Some((i, b'"'
)) if i
== start
=> {
177 Some((_
, c
)) if is_token(c
) => (),
178 Some((i
, b'
;'
)) if i
> start
=> {
179 value
= Indexed(start
, i
);
184 value
= Indexed(start
, s
.len());
189 Some((pos
, byte
)) => return Err(ParseError
::InvalidToken
{
213 Some((pos
, byte
)) => return Err(ParseError
::InvalidToken
{
222 ParamSource
::Utf8(i
) => {
224 let charset
= Indexed(i
, "charset".len() + i
);
225 let utf8
= Indexed(charset
.1 + 1, charset
.1 + "utf-8".len() + 1);
226 params
= ParamSource
::Custom(semicolon
, vec
![
231 ParamSource
::Custom(_
, ref mut vec
) => {
232 vec
.push((name
, value
));
234 ParamSource
::None
=> {
235 if semicolon
+ 2 == name
.0 && CHARSET
== &s
[name
.0..name
.1] {
236 if UTF_8
== &s
[value
.0..value
.1] {
237 params
= ParamSource
::Utf8(semicolon
);
241 params
= ParamSource
::Custom(semicolon
, vec
![(name
, value
)]);
248 fn lower_ascii_with_params(s
: &str, semi
: usize, params
: &[(Indexed
, Indexed
)]) -> String
{
249 let mut owned
= s
.to_owned();
250 owned
[..semi
].make_ascii_lowercase();
252 for &(ref name
, ref value
) in params
{
253 owned
[name
.0..name
.1].make_ascii_lowercase();
254 // Since we just converted this part of the string to lowercase,
255 // we can skip the `Name == &str` unicase check and do a faster
257 if &owned
[name
.0..name
.1] == CHARSET
.source
{
258 owned
[value
.0..value
.1].make_ascii_lowercase();
265 // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
267 // > All registered media types MUST be assigned top-level type and
268 // > subtype names. The combination of these names serves to uniquely
269 // > identify the media type, and the subtype name facet (or the absence
270 // > of one) identifies the registration tree. Both top-level type and
271 // > subtype names are case-insensitive.
273 // > Type and subtype names MUST conform to the following ABNF:
275 // > type-name = restricted-name
276 // > subtype-name = restricted-name
278 // > restricted-name = restricted-name-first *126restricted-name-chars
279 // > restricted-name-first = ALPHA / DIGIT
280 // > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
281 // > "$" / "&" / "-" / "^" / "_"
282 // > restricted-name-chars =/ "." ; Characters before first dot always
283 // > ; specify a facet name
284 // > restricted-name-chars =/ "+" ; Characters after last plus always
285 // > ; specify a structured syntax suffix
287 // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
289 // > media-type = type "/" subtype *( OWS ";" OWS parameter )
292 // > parameter = token "=" ( token / quoted-string )
294 // Where token is defined as:
297 // > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
298 // > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
300 // So, clearly, ¯\_(Ä_/¯
302 macro_rules
! byte_map
{
303 ($
($flag
:expr
,)*) => ([
308 static TOKEN_MAP
: [bool
; 256] = byte_map
![
309 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
313 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 fn is_token(c
: u8) -> bool
{
328 TOKEN_MAP
[c
as usize]
331 fn is_restricted_quoted_char(c
: u8) -> bool
{
336 #[allow(warnings)] // ... ranges deprecated
337 fn test_lookup_tables() {
338 for (i
, &valid
) in TOKEN_MAP
.iter().enumerate() {
340 let should
= match i
{
361 assert_eq
!(valid
, should
, "{:?} ({}) should be {}", i
as char, i
, should
);