]> git.proxmox.com Git - rustc.git/blob - vendor/toml_edit/src/parser/trivia.rs
New upstream version 1.70.0+dfsg2
[rustc.git] / vendor / toml_edit / src / parser / trivia.rs
1 use std::ops::RangeInclusive;
2
3 use nom8::branch::alt;
4 use nom8::bytes::one_of;
5 use nom8::bytes::take_while;
6 use nom8::bytes::take_while1;
7 use nom8::combinator::eof;
8 use nom8::combinator::opt;
9 use nom8::multi::many0_count;
10 use nom8::multi::many1_count;
11 use nom8::prelude::*;
12 use nom8::sequence::terminated;
13
14 use crate::parser::prelude::*;
15
16 pub(crate) unsafe fn from_utf8_unchecked<'b>(
17 bytes: &'b [u8],
18 safety_justification: &'static str,
19 ) -> &'b str {
20 if cfg!(debug_assertions) {
21 // Catch problems more quickly when testing
22 std::str::from_utf8(bytes).expect(safety_justification)
23 } else {
24 std::str::from_utf8_unchecked(bytes)
25 }
26 }
27
28 // wschar = ( %x20 / ; Space
29 // %x09 ) ; Horizontal tab
30 pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
31
32 // ws = *wschar
33 pub(crate) fn ws(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
34 take_while(WSCHAR)
35 .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
36 .parse(input)
37 }
38
39 // non-ascii = %x80-D7FF / %xE000-10FFFF
40 // - ASCII is 0xxxxxxx
41 // - First byte for UTF-8 is 11xxxxxx
42 // - Subsequent UTF-8 bytes are 10xxxxxx
43 pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
44
45 // non-eol = %x09 / %x20-7E / non-ascii
46 pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
47 (0x09, 0x20..=0x7E, NON_ASCII);
48
49 // comment-start-symbol = %x23 ; #
50 pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
51
52 // comment = comment-start-symbol *non-eol
53 pub(crate) fn comment(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
54 (COMMENT_START_SYMBOL, take_while(NON_EOL))
55 .recognize()
56 .parse(input)
57 }
58
59 // newline = ( %x0A / ; LF
60 // %x0D.0A ) ; CRLF
61 pub(crate) fn newline(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> {
62 alt((
63 one_of(LF).value(b'\n'),
64 (one_of(CR), one_of(LF)).value(b'\n'),
65 ))
66 .parse(input)
67 }
68 pub(crate) const LF: u8 = b'\n';
69 pub(crate) const CR: u8 = b'\r';
70
71 // ws-newline = *( wschar / newline )
72 pub(crate) fn ws_newline(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
73 many0_count(alt((newline.value(&b"\n"[..]), take_while1(WSCHAR))))
74 .recognize()
75 .map(|b| unsafe {
76 from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
77 })
78 .parse(input)
79 }
80
81 // ws-newlines = newline *( wschar / newline )
82 pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
83 (newline, ws_newline)
84 .recognize()
85 .map(|b| unsafe {
86 from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
87 })
88 .parse(input)
89 }
90
91 // note: this rule is not present in the original grammar
92 // ws-comment-newline = *( ws-newline-nonempty / comment )
93 pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
94 many0_count(alt((
95 many1_count(alt((take_while1(WSCHAR), newline.value(&b"\n"[..])))).value(()),
96 comment.value(()),
97 )))
98 .recognize()
99 .parse(input)
100 }
101
102 // note: this rule is not present in the original grammar
103 // line-ending = newline / eof
104 pub(crate) fn line_ending(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
105 alt((newline.value("\n"), eof.value(""))).parse(input)
106 }
107
108 // note: this rule is not present in the original grammar
109 // line-trailing = ws [comment] skip-line-ending
110 pub(crate) fn line_trailing(
111 input: Input<'_>,
112 ) -> IResult<Input<'_>, std::ops::Range<usize>, ParserError<'_>> {
113 terminated((ws, opt(comment)).span(), line_ending).parse(input)
114 }
115
116 #[cfg(test)]
117 mod test {
118 use super::*;
119
120 #[test]
121 fn trivia() {
122 let inputs = [
123 "",
124 r#" "#,
125 r#"
126 "#,
127 r#"
128 # comment
129
130 # comment2
131
132
133 "#,
134 r#"
135 "#,
136 r#"# comment
137 # comment2
138
139
140 "#,
141 ];
142 for input in inputs {
143 dbg!(input);
144 let parsed = ws_comment_newline.parse(new_input(input)).finish();
145 assert!(parsed.is_ok(), "{:?}", parsed);
146 let parsed = parsed.unwrap();
147 assert_eq!(parsed, input.as_bytes());
148 }
149 }
150 }