1 use std
::ops
::RangeInclusive
;
4 use nom8
::bytes
::one_of
;
5 use nom8
::bytes
::take_while
;
6 use nom8
::bytes
::take_while1
;
7 use nom8
::combinator
::eof
;
8 use nom8
::combinator
::opt
;
9 use nom8
::multi
::many0_count
;
10 use nom8
::multi
::many1_count
;
12 use nom8
::sequence
::terminated
;
14 use crate::parser
::prelude
::*;
16 pub(crate) unsafe fn from_utf8_unchecked
<'b
>(
18 safety_justification
: &'
static str,
20 if cfg
!(debug_assertions
) {
21 // Catch problems more quickly when testing
22 std
::str::from_utf8(bytes
).expect(safety_justification
)
24 std
::str::from_utf8_unchecked(bytes
)
28 // wschar = ( %x20 / ; Space
29 // %x09 ) ; Horizontal tab
30 pub(crate) const WSCHAR
: (u8, u8) = (b' '
, b'
\t'
);
33 pub(crate) fn ws(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &str, ParserError
<'_
>> {
35 .map(|b
| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") }
)
39 // non-ascii = %x80-D7FF / %xE000-10FFFF
40 // - ASCII is 0xxxxxxx
41 // - First byte for UTF-8 is 11xxxxxx
42 // - Subsequent UTF-8 bytes are 10xxxxxx
43 pub(crate) const NON_ASCII
: RangeInclusive
<u8> = 0x80..=0xff;
45 // non-eol = %x09 / %x20-7E / non-ascii
46 pub(crate) const NON_EOL
: (u8, RangeInclusive
<u8>, RangeInclusive
<u8>) =
47 (0x09, 0x20..=0x7E, NON_ASCII
);
49 // comment-start-symbol = %x23 ; #
50 pub(crate) const COMMENT_START_SYMBOL
: u8 = b'
#';
52 // comment = comment-start-symbol *non-eol
53 pub(crate) fn comment(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &[u8], ParserError
<'_
>> {
54 (COMMENT_START_SYMBOL
, take_while(NON_EOL
))
59 // newline = ( %x0A / ; LF
61 pub(crate) fn newline(input
: Input
<'_
>) -> IResult
<Input
<'_
>, u8, ParserError
<'_
>> {
63 one_of(LF
).value(b'
\n'
),
64 (one_of(CR
), one_of(LF
)).value(b'
\n'
),
68 pub(crate) const LF
: u8 = b'
\n'
;
69 pub(crate) const CR
: u8 = b'
\r'
;
71 // ws-newline = *( wschar / newline )
72 pub(crate) fn ws_newline(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &str, ParserError
<'_
>> {
73 many0_count(alt((newline
.value(&b
"\n"[..]), take_while1(WSCHAR
))))
76 from_utf8_unchecked(b
, "`is_wschar` and `newline` filters out on-ASCII")
81 // ws-newlines = newline *( wschar / newline )
82 pub(crate) fn ws_newlines(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &str, ParserError
<'_
>> {
86 from_utf8_unchecked(b
, "`is_wschar` and `newline` filters out on-ASCII")
91 // note: this rule is not present in the original grammar
92 // ws-comment-newline = *( ws-newline-nonempty / comment )
93 pub(crate) fn ws_comment_newline(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &[u8], ParserError
<'_
>> {
95 many1_count(alt((take_while1(WSCHAR
), newline
.value(&b
"\n"[..])))).value(()),
102 // note: this rule is not present in the original grammar
103 // line-ending = newline / eof
104 pub(crate) fn line_ending(input
: Input
<'_
>) -> IResult
<Input
<'_
>, &str, ParserError
<'_
>> {
105 alt((newline
.value("\n"), eof
.value(""))).parse(input
)
108 // note: this rule is not present in the original grammar
109 // line-trailing = ws [comment] skip-line-ending
110 pub(crate) fn line_trailing(
112 ) -> IResult
<Input
<'_
>, std
::ops
::Range
<usize>, ParserError
<'_
>> {
113 terminated((ws
, opt(comment
)).span(), line_ending
).parse(input
)
142 for input
in inputs
{
144 let parsed
= ws_comment_newline
.parse(new_input(input
)).finish();
145 assert
!(parsed
.is_ok(), "{:?}", parsed
);
146 let parsed
= parsed
.unwrap();
147 assert_eq
!(parsed
, input
.as_bytes());