]> git.proxmox.com Git - rustc.git/blame - vendor/cstr/src/parse.rs
Merge tag 'debian/1.52.1+dfsg1-1_exp2' into proxmox/buster
[rustc.git] / vendor / cstr / src / parse.rs
CommitLineData
6a06907d
XL
1use crate::Error;
2use proc_macro2::{Delimiter, Ident, Literal, Span, TokenStream, TokenTree};
3use std::char;
4
5macro_rules! unexpected_content {
6 () => {
7 "expected one of: byte string literal, string literal, identifier"
8 };
9}
10
11pub(crate) fn parse_input(mut input: TokenStream) -> Result<(Vec<u8>, Span), Error> {
12 loop {
13 let mut tokens = input.into_iter();
14 let token = match tokens.next() {
15 Some(token) => token,
16 None => {
17 return Err(Error(
18 Span::call_site(),
19 concat!("unexpected end of input, ", unexpected_content!()),
20 ))
21 }
22 };
23 let span = token.span();
24 let result = match token {
25 // Unwrap any empty group which may be created from macro expansion.
26 TokenTree::Group(group) if group.delimiter() == Delimiter::None => Err(group),
27 TokenTree::Literal(literal) => match parse_literal(literal) {
28 Ok(result) => Ok(result),
29 Err(msg) => return Err(Error(span, msg)),
30 },
31 TokenTree::Ident(ident) => Ok(parse_ident(ident)),
32 _ => return Err(Error(span, unexpected_content!())),
33 };
34 if let Some(token) = tokens.next() {
35 return Err(Error(token.span(), "unexpected token"));
36 }
37 match result {
38 Ok(result) => return Ok((result, span)),
39 Err(group) => input = group.stream(),
40 }
41 }
42}
43
44fn parse_literal(literal: Literal) -> Result<Vec<u8>, &'static str> {
45 let s = literal.to_string();
46 let s = s.as_bytes();
47 match s[0] {
48 b'"' => Ok(parse_cooked_content(&s)),
49 b'r' => Ok(parse_raw_content(&s[1..])),
50 b'b' => match s[1] {
51 b'"' => Ok(parse_cooked_content(&s[1..])),
52 b'r' => Ok(parse_raw_content(&s[2..])),
53 _ => Err(unexpected_content!()),
54 },
55 _ => Err(unexpected_content!()),
56 }
57}
58
59fn all_pounds(bytes: &[u8]) -> bool {
60 bytes.iter().all(|b| *b == b'#')
61}
62
63/// Parses raw string / bytes content after `r` prefix.
64fn parse_raw_content(s: &[u8]) -> Vec<u8> {
65 let q_start = s.iter().position(|b| *b == b'"').unwrap();
66 let q_end = s.iter().rposition(|b| *b == b'"').unwrap();
67 assert!(all_pounds(&s[0..q_start]));
68 assert!(all_pounds(&s[q_end + 1..q_end + q_start + 1]));
69 Vec::from(&s[q_start + 1..q_end])
70}
71
72/// Parses the cooked string / bytes content within quotes.
73fn parse_cooked_content(mut s: &[u8]) -> Vec<u8> {
74 s = &s[1..s.iter().rposition(|b| *b == b'"').unwrap()];
75 let mut result = Vec::new();
76 while !s.is_empty() {
77 match s[0] {
78 b'\\' => {}
79 b'\r' => {
80 assert_eq!(s[1], b'\n');
81 result.push(b'\n');
82 s = &s[2..];
83 continue;
84 }
85 b => {
86 result.push(b);
87 s = &s[1..];
88 continue;
89 }
90 }
91 let b = s[1];
92 s = &s[2..];
93 match b {
94 b'x' => {
95 let (b, rest) = backslash_x(&s);
96 result.push(b);
97 s = rest;
98 }
99 b'u' => {
100 let (c, rest) = backslash_u(&s);
101 result.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes());
102 s = rest;
103 }
104 b'n' => result.push(b'\n'),
105 b'r' => result.push(b'\r'),
106 b't' => result.push(b'\t'),
107 b'\\' => result.push(b'\\'),
108 b'0' => result.push(b'\0'),
109 b'\'' => result.push(b'\''),
110 b'"' => result.push(b'"'),
111 b'\r' | b'\n' => {
112 let next = s.iter().position(|b| {
113 let ch = char::from_u32(u32::from(*b)).unwrap();
114 !ch.is_whitespace()
115 });
116 match next {
117 Some(pos) => s = &s[pos..],
118 None => s = b"",
119 }
120 }
121 b => panic!("unexpected byte {:?} after \\", b),
122 }
123 }
124 result
125}
126
127fn backslash_x(s: &[u8]) -> (u8, &[u8]) {
128 let ch = hex_to_u8(s[0]) * 0x10 + hex_to_u8(s[1]);
129 (ch, &s[2..])
130}
131
132fn hex_to_u8(b: u8) -> u8 {
133 match b {
134 b'0'..=b'9' => b - b'0',
135 b'a'..=b'f' => b - b'a' + 10,
136 b'A'..=b'F' => b - b'A' + 10,
137 _ => unreachable!("unexpected non-hex character {:?} after \\x", b),
138 }
139}
140
141fn backslash_u(s: &[u8]) -> (char, &[u8]) {
142 assert_eq!(s[0], b'{');
143 let end = s[1..].iter().position(|b| *b == b'}').unwrap();
144 let mut ch = 0;
145 for b in &s[1..=end] {
146 ch *= 0x10;
147 ch += u32::from(hex_to_u8(*b));
148 }
149 (char::from_u32(ch).unwrap(), &s[end + 2..])
150}
151
152fn parse_ident(ident: Ident) -> Vec<u8> {
153 ident.to_string().into_bytes()
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use std::str::FromStr;
160
161 // Tests below were modified from
162 // https://github.com/dtolnay/syn/blob/cd5fdc0f530f822446fccaf831669cd0cf4a0fc9/tests/test_lit.rs
163
164 fn lit(s: &str) -> Vec<u8> {
165 match TokenStream::from_str(s)
166 .unwrap()
167 .into_iter()
168 .next()
169 .unwrap()
170 {
171 TokenTree::Literal(lit) => parse_literal(lit).unwrap(),
172 _ => panic!(),
173 }
174 }
175
176 #[test]
177 fn strings() {
178 #[track_caller]
179 fn test_string(s: &str, value: &[u8]) {
180 assert_eq!(lit(s), value);
181 }
182
183 test_string("\"a\"", b"a");
184 test_string("\"\\n\"", b"\n");
185 test_string("\"\\r\"", b"\r");
186 test_string("\"\\t\"", b"\t");
187 test_string("\"🐕\"", b"\xf0\x9f\x90\x95"); // NOTE: This is an emoji
188 test_string("\"\\\"\"", b"\"");
189 test_string("\"'\"", b"'");
190 test_string("\"\"", b"");
191 test_string("\"\\u{1F415}\"", b"\xf0\x9f\x90\x95");
192 test_string(
193 "\"contains\nnewlines\\\nescaped newlines\"",
194 b"contains\nnewlinesescaped newlines",
195 );
196 test_string("r\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere");
197 test_string("\"...\"q", b"...");
198 test_string("r\"...\"q", b"...");
199 test_string("r##\"...\"##q", b"...");
200 }
201
202 #[test]
203 fn byte_strings() {
204 #[track_caller]
205 fn test_byte_string(s: &str, value: &[u8]) {
206 assert_eq!(lit(s), value);
207 }
208
209 test_byte_string("b\"a\"", b"a");
210 test_byte_string("b\"\\n\"", b"\n");
211 test_byte_string("b\"\\r\"", b"\r");
212 test_byte_string("b\"\\t\"", b"\t");
213 test_byte_string("b\"\\\"\"", b"\"");
214 test_byte_string("b\"'\"", b"'");
215 test_byte_string("b\"\"", b"");
216 test_byte_string(
217 "b\"contains\nnewlines\\\nescaped newlines\"",
218 b"contains\nnewlinesescaped newlines",
219 );
220 test_byte_string("br\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere");
221 test_byte_string("b\"...\"q", b"...");
222 test_byte_string("br\"...\"q", b"...");
223 test_byte_string("br##\"...\"##q", b"...");
224 }
225}