]>
Commit | Line | Data |
---|---|---|
f035d41b XL |
1 | use crate::fallback::{ |
2 | is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream, | |
f2b60f7d | 3 | TokenStreamBuilder, |
f035d41b XL |
4 | }; |
5 | use crate::{Delimiter, Punct, Spacing, TokenTree}; | |
f2b60f7d FG |
6 | use core::char; |
7 | use core::str::{Bytes, CharIndices, Chars}; | |
f035d41b XL |
8 | |
9 | #[derive(Copy, Clone, Eq, PartialEq)] | |
10 | pub(crate) struct Cursor<'a> { | |
11 | pub rest: &'a str, | |
12 | #[cfg(span_locations)] | |
13 | pub off: u32, | |
14 | } | |
15 | ||
16 | impl<'a> Cursor<'a> { | |
17 | fn advance(&self, bytes: usize) -> Cursor<'a> { | |
18 | let (_front, rest) = self.rest.split_at(bytes); | |
19 | Cursor { | |
20 | rest, | |
21 | #[cfg(span_locations)] | |
22 | off: self.off + _front.chars().count() as u32, | |
23 | } | |
24 | } | |
25 | ||
26 | fn starts_with(&self, s: &str) -> bool { | |
27 | self.rest.starts_with(s) | |
28 | } | |
29 | ||
cdc7bbd5 | 30 | fn is_empty(&self) -> bool { |
f035d41b XL |
31 | self.rest.is_empty() |
32 | } | |
33 | ||
34 | fn len(&self) -> usize { | |
35 | self.rest.len() | |
36 | } | |
37 | ||
38 | fn as_bytes(&self) -> &'a [u8] { | |
39 | self.rest.as_bytes() | |
40 | } | |
41 | ||
42 | fn bytes(&self) -> Bytes<'a> { | |
43 | self.rest.bytes() | |
44 | } | |
45 | ||
46 | fn chars(&self) -> Chars<'a> { | |
47 | self.rest.chars() | |
48 | } | |
49 | ||
50 | fn char_indices(&self) -> CharIndices<'a> { | |
51 | self.rest.char_indices() | |
52 | } | |
53 | ||
cdc7bbd5 | 54 | fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> { |
f035d41b XL |
55 | if self.starts_with(tag) { |
56 | Ok(self.advance(tag.len())) | |
57 | } else { | |
cdc7bbd5 | 58 | Err(Reject) |
f035d41b XL |
59 | } |
60 | } | |
61 | } | |
62 | ||
17df50a5 | 63 | pub(crate) struct Reject; |
cdc7bbd5 | 64 | type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; |
f035d41b XL |
65 | |
66 | fn skip_whitespace(input: Cursor) -> Cursor { | |
67 | let mut s = input; | |
68 | ||
69 | while !s.is_empty() { | |
70 | let byte = s.as_bytes()[0]; | |
71 | if byte == b'/' { | |
72 | if s.starts_with("//") | |
73 | && (!s.starts_with("///") || s.starts_with("////")) | |
74 | && !s.starts_with("//!") | |
75 | { | |
76 | let (cursor, _) = take_until_newline_or_eof(s); | |
77 | s = cursor; | |
78 | continue; | |
79 | } else if s.starts_with("/**/") { | |
80 | s = s.advance(4); | |
81 | continue; | |
82 | } else if s.starts_with("/*") | |
83 | && (!s.starts_with("/**") || s.starts_with("/***")) | |
84 | && !s.starts_with("/*!") | |
85 | { | |
86 | match block_comment(s) { | |
87 | Ok((rest, _)) => { | |
88 | s = rest; | |
89 | continue; | |
90 | } | |
cdc7bbd5 | 91 | Err(Reject) => return s, |
f035d41b XL |
92 | } |
93 | } | |
94 | } | |
95 | match byte { | |
96 | b' ' | 0x09..=0x0d => { | |
97 | s = s.advance(1); | |
98 | continue; | |
99 | } | |
100 | b if b <= 0x7f => {} | |
101 | _ => { | |
102 | let ch = s.chars().next().unwrap(); | |
103 | if is_whitespace(ch) { | |
104 | s = s.advance(ch.len_utf8()); | |
105 | continue; | |
106 | } | |
107 | } | |
108 | } | |
109 | return s; | |
110 | } | |
111 | s | |
112 | } | |
113 | ||
114 | fn block_comment(input: Cursor) -> PResult<&str> { | |
115 | if !input.starts_with("/*") { | |
cdc7bbd5 | 116 | return Err(Reject); |
f035d41b XL |
117 | } |
118 | ||
119 | let mut depth = 0; | |
120 | let bytes = input.as_bytes(); | |
121 | let mut i = 0; | |
122 | let upper = bytes.len() - 1; | |
123 | ||
124 | while i < upper { | |
125 | if bytes[i] == b'/' && bytes[i + 1] == b'*' { | |
126 | depth += 1; | |
127 | i += 1; // eat '*' | |
128 | } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { | |
129 | depth -= 1; | |
130 | if depth == 0 { | |
131 | return Ok((input.advance(i + 2), &input.rest[..i + 2])); | |
132 | } | |
133 | i += 1; // eat '/' | |
134 | } | |
135 | i += 1; | |
136 | } | |
137 | ||
cdc7bbd5 | 138 | Err(Reject) |
f035d41b XL |
139 | } |
140 | ||
141 | fn is_whitespace(ch: char) -> bool { | |
142 | // Rust treats left-to-right mark and right-to-left mark as whitespace | |
143 | ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' | |
144 | } | |
145 | ||
cdc7bbd5 | 146 | fn word_break(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b | 147 | match input.chars().next() { |
cdc7bbd5 | 148 | Some(ch) if is_ident_continue(ch) => Err(Reject), |
f035d41b XL |
149 | Some(_) | None => Ok(input), |
150 | } | |
151 | } | |
152 | ||
cdc7bbd5 | 153 | pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> { |
f2b60f7d | 154 | let mut trees = TokenStreamBuilder::new(); |
f035d41b XL |
155 | let mut stack = Vec::new(); |
156 | ||
157 | loop { | |
158 | input = skip_whitespace(input); | |
159 | ||
f2b60f7d | 160 | if let Ok((rest, ())) = doc_comment(input, &mut trees) { |
f035d41b XL |
161 | input = rest; |
162 | continue; | |
163 | } | |
164 | ||
165 | #[cfg(span_locations)] | |
166 | let lo = input.off; | |
167 | ||
168 | let first = match input.bytes().next() { | |
169 | Some(first) => first, | |
cdc7bbd5 | 170 | None => match stack.last() { |
f2b60f7d | 171 | None => return Ok(trees.build()), |
cdc7bbd5 XL |
172 | #[cfg(span_locations)] |
173 | Some((lo, _frame)) => { | |
174 | return Err(LexError { | |
175 | span: Span { lo: *lo, hi: *lo }, | |
176 | }) | |
177 | } | |
178 | #[cfg(not(span_locations))] | |
179 | Some(_frame) => return Err(LexError { span: Span {} }), | |
180 | }, | |
f035d41b XL |
181 | }; |
182 | ||
183 | if let Some(open_delimiter) = match first { | |
184 | b'(' => Some(Delimiter::Parenthesis), | |
185 | b'[' => Some(Delimiter::Bracket), | |
186 | b'{' => Some(Delimiter::Brace), | |
187 | _ => None, | |
188 | } { | |
189 | input = input.advance(1); | |
190 | let frame = (open_delimiter, trees); | |
191 | #[cfg(span_locations)] | |
192 | let frame = (lo, frame); | |
193 | stack.push(frame); | |
f2b60f7d | 194 | trees = TokenStreamBuilder::new(); |
f035d41b XL |
195 | } else if let Some(close_delimiter) = match first { |
196 | b')' => Some(Delimiter::Parenthesis), | |
197 | b']' => Some(Delimiter::Bracket), | |
198 | b'}' => Some(Delimiter::Brace), | |
199 | _ => None, | |
200 | } { | |
cdc7bbd5 XL |
201 | let frame = match stack.pop() { |
202 | Some(frame) => frame, | |
203 | None => return Err(lex_error(input)), | |
204 | }; | |
f035d41b XL |
205 | #[cfg(span_locations)] |
206 | let (lo, frame) = frame; | |
207 | let (open_delimiter, outer) = frame; | |
208 | if open_delimiter != close_delimiter { | |
cdc7bbd5 | 209 | return Err(lex_error(input)); |
f035d41b | 210 | } |
cdc7bbd5 | 211 | input = input.advance(1); |
f2b60f7d | 212 | let mut g = Group::new(open_delimiter, trees.build()); |
f035d41b XL |
213 | g.set_span(Span { |
214 | #[cfg(span_locations)] | |
215 | lo, | |
216 | #[cfg(span_locations)] | |
217 | hi: input.off, | |
218 | }); | |
219 | trees = outer; | |
f2b60f7d | 220 | trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_stable(g))); |
f035d41b | 221 | } else { |
cdc7bbd5 XL |
222 | let (rest, mut tt) = match leaf_token(input) { |
223 | Ok((rest, tt)) => (rest, tt), | |
224 | Err(Reject) => return Err(lex_error(input)), | |
225 | }; | |
f035d41b XL |
226 | tt.set_span(crate::Span::_new_stable(Span { |
227 | #[cfg(span_locations)] | |
228 | lo, | |
229 | #[cfg(span_locations)] | |
230 | hi: rest.off, | |
231 | })); | |
f2b60f7d | 232 | trees.push_token_from_parser(tt); |
f035d41b XL |
233 | input = rest; |
234 | } | |
235 | } | |
cdc7bbd5 | 236 | } |
f035d41b | 237 | |
cdc7bbd5 XL |
238 | fn lex_error(cursor: Cursor) -> LexError { |
239 | #[cfg(not(span_locations))] | |
240 | let _ = cursor; | |
241 | LexError { | |
242 | span: Span { | |
243 | #[cfg(span_locations)] | |
244 | lo: cursor.off, | |
245 | #[cfg(span_locations)] | |
246 | hi: cursor.off, | |
247 | }, | |
f035d41b XL |
248 | } |
249 | } | |
250 | ||
251 | fn leaf_token(input: Cursor) -> PResult<TokenTree> { | |
252 | if let Ok((input, l)) = literal(input) { | |
253 | // must be parsed before ident | |
254 | Ok((input, TokenTree::Literal(crate::Literal::_new_stable(l)))) | |
29967ef6 | 255 | } else if let Ok((input, p)) = punct(input) { |
f035d41b XL |
256 | Ok((input, TokenTree::Punct(p))) |
257 | } else if let Ok((input, i)) = ident(input) { | |
258 | Ok((input, TokenTree::Ident(i))) | |
259 | } else { | |
cdc7bbd5 | 260 | Err(Reject) |
f035d41b XL |
261 | } |
262 | } | |
263 | ||
264 | fn ident(input: Cursor) -> PResult<crate::Ident> { | |
29967ef6 XL |
265 | if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"] |
266 | .iter() | |
267 | .any(|prefix| input.starts_with(prefix)) | |
268 | { | |
cdc7bbd5 | 269 | Err(Reject) |
29967ef6 XL |
270 | } else { |
271 | ident_any(input) | |
272 | } | |
273 | } | |
274 | ||
275 | fn ident_any(input: Cursor) -> PResult<crate::Ident> { | |
f035d41b XL |
276 | let raw = input.starts_with("r#"); |
277 | let rest = input.advance((raw as usize) << 1); | |
278 | ||
279 | let (rest, sym) = ident_not_raw(rest)?; | |
280 | ||
281 | if !raw { | |
282 | let ident = crate::Ident::new(sym, crate::Span::call_site()); | |
283 | return Ok((rest, ident)); | |
284 | } | |
285 | ||
286 | if sym == "_" { | |
cdc7bbd5 | 287 | return Err(Reject); |
f035d41b XL |
288 | } |
289 | ||
290 | let ident = crate::Ident::_new_raw(sym, crate::Span::call_site()); | |
291 | Ok((rest, ident)) | |
292 | } | |
293 | ||
294 | fn ident_not_raw(input: Cursor) -> PResult<&str> { | |
295 | let mut chars = input.char_indices(); | |
296 | ||
297 | match chars.next() { | |
298 | Some((_, ch)) if is_ident_start(ch) => {} | |
cdc7bbd5 | 299 | _ => return Err(Reject), |
f035d41b XL |
300 | } |
301 | ||
302 | let mut end = input.len(); | |
303 | for (i, ch) in chars { | |
304 | if !is_ident_continue(ch) { | |
305 | end = i; | |
306 | break; | |
307 | } | |
308 | } | |
309 | ||
310 | Ok((input.advance(end), &input.rest[..end])) | |
311 | } | |
312 | ||
17df50a5 | 313 | pub(crate) fn literal(input: Cursor) -> PResult<Literal> { |
cdc7bbd5 XL |
314 | let rest = literal_nocapture(input)?; |
315 | let end = input.len() - rest.len(); | |
316 | Ok((rest, Literal::_new(input.rest[..end].to_string()))) | |
f035d41b XL |
317 | } |
318 | ||
cdc7bbd5 | 319 | fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
320 | if let Ok(ok) = string(input) { |
321 | Ok(ok) | |
322 | } else if let Ok(ok) = byte_string(input) { | |
323 | Ok(ok) | |
324 | } else if let Ok(ok) = byte(input) { | |
325 | Ok(ok) | |
326 | } else if let Ok(ok) = character(input) { | |
327 | Ok(ok) | |
328 | } else if let Ok(ok) = float(input) { | |
329 | Ok(ok) | |
330 | } else if let Ok(ok) = int(input) { | |
331 | Ok(ok) | |
332 | } else { | |
cdc7bbd5 | 333 | Err(Reject) |
f035d41b XL |
334 | } |
335 | } | |
336 | ||
337 | fn literal_suffix(input: Cursor) -> Cursor { | |
338 | match ident_not_raw(input) { | |
339 | Ok((input, _)) => input, | |
cdc7bbd5 | 340 | Err(Reject) => input, |
f035d41b XL |
341 | } |
342 | } | |
343 | ||
cdc7bbd5 | 344 | fn string(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
345 | if let Ok(input) = input.parse("\"") { |
346 | cooked_string(input) | |
347 | } else if let Ok(input) = input.parse("r") { | |
348 | raw_string(input) | |
349 | } else { | |
cdc7bbd5 | 350 | Err(Reject) |
f035d41b XL |
351 | } |
352 | } | |
353 | ||
cdc7bbd5 | 354 | fn cooked_string(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
355 | let mut chars = input.char_indices().peekable(); |
356 | ||
357 | while let Some((i, ch)) = chars.next() { | |
358 | match ch { | |
359 | '"' => { | |
360 | let input = input.advance(i + 1); | |
361 | return Ok(literal_suffix(input)); | |
362 | } | |
29967ef6 XL |
363 | '\r' => match chars.next() { |
364 | Some((_, '\n')) => {} | |
365 | _ => break, | |
366 | }, | |
f035d41b XL |
367 | '\\' => match chars.next() { |
368 | Some((_, 'x')) => { | |
369 | if !backslash_x_char(&mut chars) { | |
370 | break; | |
371 | } | |
372 | } | |
373 | Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\')) | |
374 | | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {} | |
375 | Some((_, 'u')) => { | |
376 | if !backslash_u(&mut chars) { | |
377 | break; | |
378 | } | |
379 | } | |
29967ef6 XL |
380 | Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => { |
381 | let mut last = ch; | |
382 | loop { | |
383 | if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') { | |
cdc7bbd5 | 384 | return Err(Reject); |
29967ef6 XL |
385 | } |
386 | match chars.peek() { | |
387 | Some((_, ch)) if ch.is_whitespace() => { | |
388 | last = *ch; | |
389 | chars.next(); | |
390 | } | |
391 | _ => break, | |
f035d41b XL |
392 | } |
393 | } | |
394 | } | |
395 | _ => break, | |
396 | }, | |
397 | _ch => {} | |
398 | } | |
399 | } | |
cdc7bbd5 | 400 | Err(Reject) |
f035d41b XL |
401 | } |
402 | ||
cdc7bbd5 | 403 | fn byte_string(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
404 | if let Ok(input) = input.parse("b\"") { |
405 | cooked_byte_string(input) | |
406 | } else if let Ok(input) = input.parse("br") { | |
407 | raw_string(input) | |
408 | } else { | |
cdc7bbd5 | 409 | Err(Reject) |
f035d41b XL |
410 | } |
411 | } | |
412 | ||
cdc7bbd5 | 413 | fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> { |
f035d41b | 414 | let mut bytes = input.bytes().enumerate(); |
29967ef6 | 415 | while let Some((offset, b)) = bytes.next() { |
f035d41b XL |
416 | match b { |
417 | b'"' => { | |
418 | let input = input.advance(offset + 1); | |
419 | return Ok(literal_suffix(input)); | |
420 | } | |
29967ef6 XL |
421 | b'\r' => match bytes.next() { |
422 | Some((_, b'\n')) => {} | |
423 | _ => break, | |
424 | }, | |
f035d41b XL |
425 | b'\\' => match bytes.next() { |
426 | Some((_, b'x')) => { | |
427 | if !backslash_x_byte(&mut bytes) { | |
428 | break; | |
429 | } | |
430 | } | |
431 | Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\')) | |
432 | | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {} | |
29967ef6 XL |
433 | Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => { |
434 | let mut last = b as char; | |
f035d41b | 435 | let rest = input.advance(newline + 1); |
29967ef6 XL |
436 | let mut chars = rest.char_indices(); |
437 | loop { | |
438 | if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') { | |
cdc7bbd5 | 439 | return Err(Reject); |
29967ef6 XL |
440 | } |
441 | match chars.next() { | |
442 | Some((_, ch)) if ch.is_whitespace() => last = ch, | |
443 | Some((offset, _)) => { | |
444 | input = rest.advance(offset); | |
445 | bytes = input.bytes().enumerate(); | |
446 | break; | |
447 | } | |
cdc7bbd5 | 448 | None => return Err(Reject), |
f035d41b XL |
449 | } |
450 | } | |
f035d41b XL |
451 | } |
452 | _ => break, | |
453 | }, | |
454 | b if b < 0x80 => {} | |
455 | _ => break, | |
456 | } | |
457 | } | |
cdc7bbd5 | 458 | Err(Reject) |
f035d41b XL |
459 | } |
460 | ||
cdc7bbd5 | 461 | fn raw_string(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
462 | let mut chars = input.char_indices(); |
463 | let mut n = 0; | |
94222f64 | 464 | for (i, ch) in &mut chars { |
f035d41b XL |
465 | match ch { |
466 | '"' => { | |
467 | n = i; | |
468 | break; | |
469 | } | |
470 | '#' => {} | |
cdc7bbd5 | 471 | _ => return Err(Reject), |
f035d41b XL |
472 | } |
473 | } | |
29967ef6 | 474 | while let Some((i, ch)) = chars.next() { |
f035d41b XL |
475 | match ch { |
476 | '"' if input.rest[i + 1..].starts_with(&input.rest[..n]) => { | |
477 | let rest = input.advance(i + 1 + n); | |
478 | return Ok(literal_suffix(rest)); | |
479 | } | |
29967ef6 XL |
480 | '\r' => match chars.next() { |
481 | Some((_, '\n')) => {} | |
482 | _ => break, | |
483 | }, | |
f035d41b XL |
484 | _ => {} |
485 | } | |
486 | } | |
cdc7bbd5 | 487 | Err(Reject) |
f035d41b XL |
488 | } |
489 | ||
cdc7bbd5 | 490 | fn byte(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
491 | let input = input.parse("b'")?; |
492 | let mut bytes = input.bytes().enumerate(); | |
493 | let ok = match bytes.next().map(|(_, b)| b) { | |
494 | Some(b'\\') => match bytes.next().map(|(_, b)| b) { | |
495 | Some(b'x') => backslash_x_byte(&mut bytes), | |
496 | Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'') | |
497 | | Some(b'"') => true, | |
498 | _ => false, | |
499 | }, | |
500 | b => b.is_some(), | |
501 | }; | |
502 | if !ok { | |
cdc7bbd5 | 503 | return Err(Reject); |
f035d41b | 504 | } |
cdc7bbd5 | 505 | let (offset, _) = bytes.next().ok_or(Reject)?; |
f035d41b | 506 | if !input.chars().as_str().is_char_boundary(offset) { |
cdc7bbd5 | 507 | return Err(Reject); |
f035d41b XL |
508 | } |
509 | let input = input.advance(offset).parse("'")?; | |
510 | Ok(literal_suffix(input)) | |
511 | } | |
512 | ||
cdc7bbd5 | 513 | fn character(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
514 | let input = input.parse("'")?; |
515 | let mut chars = input.char_indices(); | |
516 | let ok = match chars.next().map(|(_, ch)| ch) { | |
517 | Some('\\') => match chars.next().map(|(_, ch)| ch) { | |
518 | Some('x') => backslash_x_char(&mut chars), | |
519 | Some('u') => backslash_u(&mut chars), | |
520 | Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => { | |
521 | true | |
522 | } | |
523 | _ => false, | |
524 | }, | |
525 | ch => ch.is_some(), | |
526 | }; | |
527 | if !ok { | |
cdc7bbd5 | 528 | return Err(Reject); |
f035d41b | 529 | } |
cdc7bbd5 | 530 | let (idx, _) = chars.next().ok_or(Reject)?; |
f035d41b XL |
531 | let input = input.advance(idx).parse("'")?; |
532 | Ok(literal_suffix(input)) | |
533 | } | |
534 | ||
535 | macro_rules! next_ch { | |
536 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { | |
537 | match $chars.next() { | |
538 | Some((_, ch)) => match ch { | |
539 | $pat $(| $rest)* => ch, | |
540 | _ => return false, | |
541 | }, | |
542 | None => return false, | |
543 | } | |
544 | }; | |
545 | } | |
546 | ||
547 | fn backslash_x_char<I>(chars: &mut I) -> bool | |
548 | where | |
549 | I: Iterator<Item = (usize, char)>, | |
550 | { | |
551 | next_ch!(chars @ '0'..='7'); | |
552 | next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); | |
553 | true | |
554 | } | |
555 | ||
556 | fn backslash_x_byte<I>(chars: &mut I) -> bool | |
557 | where | |
558 | I: Iterator<Item = (usize, u8)>, | |
559 | { | |
560 | next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); | |
561 | next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); | |
562 | true | |
563 | } | |
564 | ||
565 | fn backslash_u<I>(chars: &mut I) -> bool | |
566 | where | |
567 | I: Iterator<Item = (usize, char)>, | |
568 | { | |
569 | next_ch!(chars @ '{'); | |
29967ef6 XL |
570 | let mut value = 0; |
571 | let mut len = 0; | |
cdc7bbd5 | 572 | for (_, ch) in chars { |
29967ef6 XL |
573 | let digit = match ch { |
574 | '0'..='9' => ch as u8 - b'0', | |
575 | 'a'..='f' => 10 + ch as u8 - b'a', | |
576 | 'A'..='F' => 10 + ch as u8 - b'A', | |
577 | '_' if len > 0 => continue, | |
578 | '}' if len > 0 => return char::from_u32(value).is_some(), | |
579 | _ => return false, | |
580 | }; | |
581 | if len == 6 { | |
582 | return false; | |
f035d41b | 583 | } |
29967ef6 XL |
584 | value *= 0x10; |
585 | value += u32::from(digit); | |
586 | len += 1; | |
f035d41b | 587 | } |
29967ef6 | 588 | false |
f035d41b XL |
589 | } |
590 | ||
cdc7bbd5 | 591 | fn float(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
592 | let mut rest = float_digits(input)?; |
593 | if let Some(ch) = rest.chars().next() { | |
594 | if is_ident_start(ch) { | |
595 | rest = ident_not_raw(rest)?.0; | |
596 | } | |
597 | } | |
598 | word_break(rest) | |
599 | } | |
600 | ||
cdc7bbd5 | 601 | fn float_digits(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
602 | let mut chars = input.chars().peekable(); |
603 | match chars.next() { | |
604 | Some(ch) if ch >= '0' && ch <= '9' => {} | |
cdc7bbd5 | 605 | _ => return Err(Reject), |
f035d41b XL |
606 | } |
607 | ||
608 | let mut len = 1; | |
609 | let mut has_dot = false; | |
610 | let mut has_exp = false; | |
611 | while let Some(&ch) = chars.peek() { | |
612 | match ch { | |
613 | '0'..='9' | '_' => { | |
614 | chars.next(); | |
615 | len += 1; | |
616 | } | |
617 | '.' => { | |
618 | if has_dot { | |
619 | break; | |
620 | } | |
621 | chars.next(); | |
622 | if chars | |
623 | .peek() | |
3c0e092e | 624 | .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) |
f035d41b | 625 | { |
cdc7bbd5 | 626 | return Err(Reject); |
f035d41b XL |
627 | } |
628 | len += 1; | |
629 | has_dot = true; | |
630 | } | |
631 | 'e' | 'E' => { | |
632 | chars.next(); | |
633 | len += 1; | |
634 | has_exp = true; | |
635 | break; | |
636 | } | |
637 | _ => break, | |
638 | } | |
639 | } | |
640 | ||
29967ef6 | 641 | if !(has_dot || has_exp) { |
cdc7bbd5 | 642 | return Err(Reject); |
f035d41b XL |
643 | } |
644 | ||
645 | if has_exp { | |
29967ef6 XL |
646 | let token_before_exp = if has_dot { |
647 | Ok(input.advance(len - 1)) | |
648 | } else { | |
cdc7bbd5 | 649 | Err(Reject) |
29967ef6 XL |
650 | }; |
651 | let mut has_sign = false; | |
f035d41b XL |
652 | let mut has_exp_value = false; |
653 | while let Some(&ch) = chars.peek() { | |
654 | match ch { | |
655 | '+' | '-' => { | |
656 | if has_exp_value { | |
657 | break; | |
658 | } | |
29967ef6 XL |
659 | if has_sign { |
660 | return token_before_exp; | |
661 | } | |
f035d41b XL |
662 | chars.next(); |
663 | len += 1; | |
29967ef6 | 664 | has_sign = true; |
f035d41b XL |
665 | } |
666 | '0'..='9' => { | |
667 | chars.next(); | |
668 | len += 1; | |
669 | has_exp_value = true; | |
670 | } | |
671 | '_' => { | |
672 | chars.next(); | |
673 | len += 1; | |
674 | } | |
675 | _ => break, | |
676 | } | |
677 | } | |
678 | if !has_exp_value { | |
29967ef6 | 679 | return token_before_exp; |
f035d41b XL |
680 | } |
681 | } | |
682 | ||
683 | Ok(input.advance(len)) | |
684 | } | |
685 | ||
cdc7bbd5 | 686 | fn int(input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
687 | let mut rest = digits(input)?; |
688 | if let Some(ch) = rest.chars().next() { | |
689 | if is_ident_start(ch) { | |
690 | rest = ident_not_raw(rest)?.0; | |
691 | } | |
692 | } | |
693 | word_break(rest) | |
694 | } | |
695 | ||
cdc7bbd5 | 696 | fn digits(mut input: Cursor) -> Result<Cursor, Reject> { |
f035d41b XL |
697 | let base = if input.starts_with("0x") { |
698 | input = input.advance(2); | |
699 | 16 | |
700 | } else if input.starts_with("0o") { | |
701 | input = input.advance(2); | |
702 | 8 | |
703 | } else if input.starts_with("0b") { | |
704 | input = input.advance(2); | |
705 | 2 | |
706 | } else { | |
707 | 10 | |
708 | }; | |
709 | ||
710 | let mut len = 0; | |
711 | let mut empty = true; | |
712 | for b in input.bytes() { | |
29967ef6 XL |
713 | match b { |
714 | b'0'..=b'9' => { | |
715 | let digit = (b - b'0') as u64; | |
716 | if digit >= base { | |
cdc7bbd5 | 717 | return Err(Reject); |
29967ef6 XL |
718 | } |
719 | } | |
720 | b'a'..=b'f' => { | |
721 | let digit = 10 + (b - b'a') as u64; | |
722 | if digit >= base { | |
723 | break; | |
724 | } | |
725 | } | |
726 | b'A'..=b'F' => { | |
727 | let digit = 10 + (b - b'A') as u64; | |
728 | if digit >= base { | |
729 | break; | |
730 | } | |
731 | } | |
f035d41b XL |
732 | b'_' => { |
733 | if empty && base == 10 { | |
cdc7bbd5 | 734 | return Err(Reject); |
f035d41b XL |
735 | } |
736 | len += 1; | |
737 | continue; | |
738 | } | |
739 | _ => break, | |
740 | }; | |
f035d41b XL |
741 | len += 1; |
742 | empty = false; | |
743 | } | |
744 | if empty { | |
cdc7bbd5 | 745 | Err(Reject) |
f035d41b XL |
746 | } else { |
747 | Ok(input.advance(len)) | |
748 | } | |
749 | } | |
750 | ||
29967ef6 | 751 | fn punct(input: Cursor) -> PResult<Punct> { |
cdc7bbd5 XL |
752 | let (rest, ch) = punct_char(input)?; |
753 | if ch == '\'' { | |
754 | if ident_any(rest)?.0.starts_with("'") { | |
755 | Err(Reject) | |
756 | } else { | |
757 | Ok((rest, Punct::new('\'', Spacing::Joint))) | |
f035d41b | 758 | } |
cdc7bbd5 XL |
759 | } else { |
760 | let kind = match punct_char(rest) { | |
761 | Ok(_) => Spacing::Joint, | |
762 | Err(Reject) => Spacing::Alone, | |
763 | }; | |
764 | Ok((rest, Punct::new(ch, kind))) | |
f035d41b XL |
765 | } |
766 | } | |
767 | ||
29967ef6 | 768 | fn punct_char(input: Cursor) -> PResult<char> { |
f035d41b | 769 | if input.starts_with("//") || input.starts_with("/*") { |
29967ef6 | 770 | // Do not accept `/` of a comment as a punct. |
cdc7bbd5 | 771 | return Err(Reject); |
f035d41b XL |
772 | } |
773 | ||
774 | let mut chars = input.chars(); | |
775 | let first = match chars.next() { | |
776 | Some(ch) => ch, | |
777 | None => { | |
cdc7bbd5 | 778 | return Err(Reject); |
f035d41b XL |
779 | } |
780 | }; | |
781 | let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; | |
782 | if recognized.contains(first) { | |
783 | Ok((input.advance(first.len_utf8()), first)) | |
784 | } else { | |
cdc7bbd5 | 785 | Err(Reject) |
f035d41b XL |
786 | } |
787 | } | |
788 | ||
f2b60f7d | 789 | fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { |
f035d41b XL |
790 | #[cfg(span_locations)] |
791 | let lo = input.off; | |
792 | let (rest, (comment, inner)) = doc_comment_contents(input)?; | |
793 | let span = crate::Span::_new_stable(Span { | |
794 | #[cfg(span_locations)] | |
795 | lo, | |
796 | #[cfg(span_locations)] | |
797 | hi: rest.off, | |
798 | }); | |
799 | ||
800 | let mut scan_for_bare_cr = comment; | |
801 | while let Some(cr) = scan_for_bare_cr.find('\r') { | |
802 | let rest = &scan_for_bare_cr[cr + 1..]; | |
803 | if !rest.starts_with('\n') { | |
cdc7bbd5 | 804 | return Err(Reject); |
f035d41b XL |
805 | } |
806 | scan_for_bare_cr = rest; | |
807 | } | |
808 | ||
f2b60f7d FG |
809 | let mut pound = Punct::new('#', Spacing::Alone); |
810 | pound.set_span(span); | |
811 | trees.push_token_from_parser(TokenTree::Punct(pound)); | |
812 | ||
f035d41b | 813 | if inner { |
f2b60f7d FG |
814 | let mut bang = Punct::new('!', Spacing::Alone); |
815 | bang.set_span(span); | |
816 | trees.push_token_from_parser(TokenTree::Punct(bang)); | |
817 | } | |
818 | ||
819 | let doc_ident = crate::Ident::new("doc", span); | |
820 | let mut equal = Punct::new('=', Spacing::Alone); | |
821 | equal.set_span(span); | |
822 | let mut literal = crate::Literal::string(comment); | |
823 | literal.set_span(span); | |
824 | let mut bracketed = TokenStreamBuilder::with_capacity(3); | |
825 | bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); | |
826 | bracketed.push_token_from_parser(TokenTree::Punct(equal)); | |
827 | bracketed.push_token_from_parser(TokenTree::Literal(literal)); | |
828 | let group = Group::new(Delimiter::Bracket, bracketed.build()); | |
829 | let mut group = crate::Group::_new_stable(group); | |
830 | group.set_span(span); | |
831 | trees.push_token_from_parser(TokenTree::Group(group)); | |
832 | ||
833 | Ok((rest, ())) | |
f035d41b XL |
834 | } |
835 | ||
836 | fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { | |
837 | if input.starts_with("//!") { | |
838 | let input = input.advance(3); | |
839 | let (input, s) = take_until_newline_or_eof(input); | |
840 | Ok((input, (s, true))) | |
841 | } else if input.starts_with("/*!") { | |
842 | let (input, s) = block_comment(input)?; | |
843 | Ok((input, (&s[3..s.len() - 2], true))) | |
844 | } else if input.starts_with("///") { | |
845 | let input = input.advance(3); | |
846 | if input.starts_with("/") { | |
cdc7bbd5 | 847 | return Err(Reject); |
f035d41b XL |
848 | } |
849 | let (input, s) = take_until_newline_or_eof(input); | |
850 | Ok((input, (s, false))) | |
851 | } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { | |
852 | let (input, s) = block_comment(input)?; | |
853 | Ok((input, (&s[3..s.len() - 2], false))) | |
854 | } else { | |
cdc7bbd5 | 855 | Err(Reject) |
f035d41b XL |
856 | } |
857 | } | |
858 | ||
859 | fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { | |
860 | let chars = input.char_indices(); | |
861 | ||
862 | for (i, ch) in chars { | |
863 | if ch == '\n' { | |
864 | return (input.advance(i), &input.rest[..i]); | |
865 | } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { | |
866 | return (input.advance(i + 1), &input.rest[..i]); | |
867 | } | |
868 | } | |
869 | ||
870 | (input.advance(input.len()), input.rest) | |
871 | } |