]>
Commit | Line | Data |
---|---|---|
6a06907d XL |
1 | use crate::Error; |
2 | use proc_macro2::{Delimiter, Ident, Literal, Span, TokenStream, TokenTree}; | |
3 | use std::char; | |
4 | ||
5 | macro_rules! unexpected_content { | |
6 | () => { | |
7 | "expected one of: byte string literal, string literal, identifier" | |
8 | }; | |
9 | } | |
10 | ||
11 | pub(crate) fn parse_input(mut input: TokenStream) -> Result<(Vec<u8>, Span), Error> { | |
12 | loop { | |
13 | let mut tokens = input.into_iter(); | |
14 | let token = match tokens.next() { | |
15 | Some(token) => token, | |
16 | None => { | |
17 | return Err(Error( | |
18 | Span::call_site(), | |
19 | concat!("unexpected end of input, ", unexpected_content!()), | |
20 | )) | |
21 | } | |
22 | }; | |
23 | let span = token.span(); | |
24 | let result = match token { | |
25 | // Unwrap any empty group which may be created from macro expansion. | |
26 | TokenTree::Group(group) if group.delimiter() == Delimiter::None => Err(group), | |
27 | TokenTree::Literal(literal) => match parse_literal(literal) { | |
28 | Ok(result) => Ok(result), | |
29 | Err(msg) => return Err(Error(span, msg)), | |
30 | }, | |
31 | TokenTree::Ident(ident) => Ok(parse_ident(ident)), | |
32 | _ => return Err(Error(span, unexpected_content!())), | |
33 | }; | |
34 | if let Some(token) = tokens.next() { | |
35 | return Err(Error(token.span(), "unexpected token")); | |
36 | } | |
37 | match result { | |
38 | Ok(result) => return Ok((result, span)), | |
39 | Err(group) => input = group.stream(), | |
40 | } | |
41 | } | |
42 | } | |
43 | ||
44 | fn parse_literal(literal: Literal) -> Result<Vec<u8>, &'static str> { | |
45 | let s = literal.to_string(); | |
46 | let s = s.as_bytes(); | |
47 | match s[0] { | |
48 | b'"' => Ok(parse_cooked_content(&s)), | |
49 | b'r' => Ok(parse_raw_content(&s[1..])), | |
50 | b'b' => match s[1] { | |
51 | b'"' => Ok(parse_cooked_content(&s[1..])), | |
52 | b'r' => Ok(parse_raw_content(&s[2..])), | |
53 | _ => Err(unexpected_content!()), | |
54 | }, | |
55 | _ => Err(unexpected_content!()), | |
56 | } | |
57 | } | |
58 | ||
59 | fn all_pounds(bytes: &[u8]) -> bool { | |
60 | bytes.iter().all(|b| *b == b'#') | |
61 | } | |
62 | ||
63 | /// Parses raw string / bytes content after `r` prefix. | |
64 | fn parse_raw_content(s: &[u8]) -> Vec<u8> { | |
65 | let q_start = s.iter().position(|b| *b == b'"').unwrap(); | |
66 | let q_end = s.iter().rposition(|b| *b == b'"').unwrap(); | |
67 | assert!(all_pounds(&s[0..q_start])); | |
68 | assert!(all_pounds(&s[q_end + 1..q_end + q_start + 1])); | |
69 | Vec::from(&s[q_start + 1..q_end]) | |
70 | } | |
71 | ||
72 | /// Parses the cooked string / bytes content within quotes. | |
73 | fn parse_cooked_content(mut s: &[u8]) -> Vec<u8> { | |
74 | s = &s[1..s.iter().rposition(|b| *b == b'"').unwrap()]; | |
75 | let mut result = Vec::new(); | |
76 | while !s.is_empty() { | |
77 | match s[0] { | |
78 | b'\\' => {} | |
79 | b'\r' => { | |
80 | assert_eq!(s[1], b'\n'); | |
81 | result.push(b'\n'); | |
82 | s = &s[2..]; | |
83 | continue; | |
84 | } | |
85 | b => { | |
86 | result.push(b); | |
87 | s = &s[1..]; | |
88 | continue; | |
89 | } | |
90 | } | |
91 | let b = s[1]; | |
92 | s = &s[2..]; | |
93 | match b { | |
94 | b'x' => { | |
95 | let (b, rest) = backslash_x(&s); | |
96 | result.push(b); | |
97 | s = rest; | |
98 | } | |
99 | b'u' => { | |
100 | let (c, rest) = backslash_u(&s); | |
101 | result.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()); | |
102 | s = rest; | |
103 | } | |
104 | b'n' => result.push(b'\n'), | |
105 | b'r' => result.push(b'\r'), | |
106 | b't' => result.push(b'\t'), | |
107 | b'\\' => result.push(b'\\'), | |
108 | b'0' => result.push(b'\0'), | |
109 | b'\'' => result.push(b'\''), | |
110 | b'"' => result.push(b'"'), | |
111 | b'\r' | b'\n' => { | |
112 | let next = s.iter().position(|b| { | |
113 | let ch = char::from_u32(u32::from(*b)).unwrap(); | |
114 | !ch.is_whitespace() | |
115 | }); | |
116 | match next { | |
117 | Some(pos) => s = &s[pos..], | |
118 | None => s = b"", | |
119 | } | |
120 | } | |
121 | b => panic!("unexpected byte {:?} after \\", b), | |
122 | } | |
123 | } | |
124 | result | |
125 | } | |
126 | ||
127 | fn backslash_x(s: &[u8]) -> (u8, &[u8]) { | |
128 | let ch = hex_to_u8(s[0]) * 0x10 + hex_to_u8(s[1]); | |
129 | (ch, &s[2..]) | |
130 | } | |
131 | ||
132 | fn hex_to_u8(b: u8) -> u8 { | |
133 | match b { | |
134 | b'0'..=b'9' => b - b'0', | |
135 | b'a'..=b'f' => b - b'a' + 10, | |
136 | b'A'..=b'F' => b - b'A' + 10, | |
137 | _ => unreachable!("unexpected non-hex character {:?} after \\x", b), | |
138 | } | |
139 | } | |
140 | ||
141 | fn backslash_u(s: &[u8]) -> (char, &[u8]) { | |
142 | assert_eq!(s[0], b'{'); | |
143 | let end = s[1..].iter().position(|b| *b == b'}').unwrap(); | |
144 | let mut ch = 0; | |
145 | for b in &s[1..=end] { | |
146 | ch *= 0x10; | |
147 | ch += u32::from(hex_to_u8(*b)); | |
148 | } | |
149 | (char::from_u32(ch).unwrap(), &s[end + 2..]) | |
150 | } | |
151 | ||
152 | fn parse_ident(ident: Ident) -> Vec<u8> { | |
153 | ident.to_string().into_bytes() | |
154 | } | |
155 | ||
156 | #[cfg(test)] | |
157 | mod tests { | |
158 | use super::*; | |
159 | use std::str::FromStr; | |
160 | ||
161 | // Tests below were modified from | |
162 | // https://github.com/dtolnay/syn/blob/cd5fdc0f530f822446fccaf831669cd0cf4a0fc9/tests/test_lit.rs | |
163 | ||
164 | fn lit(s: &str) -> Vec<u8> { | |
165 | match TokenStream::from_str(s) | |
166 | .unwrap() | |
167 | .into_iter() | |
168 | .next() | |
169 | .unwrap() | |
170 | { | |
171 | TokenTree::Literal(lit) => parse_literal(lit).unwrap(), | |
172 | _ => panic!(), | |
173 | } | |
174 | } | |
175 | ||
176 | #[test] | |
177 | fn strings() { | |
178 | #[track_caller] | |
179 | fn test_string(s: &str, value: &[u8]) { | |
180 | assert_eq!(lit(s), value); | |
181 | } | |
182 | ||
183 | test_string("\"a\"", b"a"); | |
184 | test_string("\"\\n\"", b"\n"); | |
185 | test_string("\"\\r\"", b"\r"); | |
186 | test_string("\"\\t\"", b"\t"); | |
187 | test_string("\"🐕\"", b"\xf0\x9f\x90\x95"); // NOTE: This is an emoji | |
188 | test_string("\"\\\"\"", b"\""); | |
189 | test_string("\"'\"", b"'"); | |
190 | test_string("\"\"", b""); | |
191 | test_string("\"\\u{1F415}\"", b"\xf0\x9f\x90\x95"); | |
192 | test_string( | |
193 | "\"contains\nnewlines\\\nescaped newlines\"", | |
194 | b"contains\nnewlinesescaped newlines", | |
195 | ); | |
196 | test_string("r\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere"); | |
197 | test_string("\"...\"q", b"..."); | |
198 | test_string("r\"...\"q", b"..."); | |
199 | test_string("r##\"...\"##q", b"..."); | |
200 | } | |
201 | ||
202 | #[test] | |
203 | fn byte_strings() { | |
204 | #[track_caller] | |
205 | fn test_byte_string(s: &str, value: &[u8]) { | |
206 | assert_eq!(lit(s), value); | |
207 | } | |
208 | ||
209 | test_byte_string("b\"a\"", b"a"); | |
210 | test_byte_string("b\"\\n\"", b"\n"); | |
211 | test_byte_string("b\"\\r\"", b"\r"); | |
212 | test_byte_string("b\"\\t\"", b"\t"); | |
213 | test_byte_string("b\"\\\"\"", b"\""); | |
214 | test_byte_string("b\"'\"", b"'"); | |
215 | test_byte_string("b\"\"", b""); | |
216 | test_byte_string( | |
217 | "b\"contains\nnewlines\\\nescaped newlines\"", | |
218 | b"contains\nnewlinesescaped newlines", | |
219 | ); | |
220 | test_byte_string("br\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere"); | |
221 | test_byte_string("b\"...\"q", b"..."); | |
222 | test_byte_string("br\"...\"q", b"..."); | |
223 | test_byte_string("br##\"...\"##q", b"..."); | |
224 | } | |
225 | } |