vendor/toml_edit/src/parser/trivia.rs

   1 use std::ops::RangeInclusive;
   2
   3 use nom8::branch::alt;
   4 use nom8::bytes::one_of;
   5 use nom8::bytes::take_while;
   6 use nom8::bytes::take_while1;
   7 use nom8::combinator::eof;
   8 use nom8::combinator::opt;
   9 use nom8::multi::many0_count;
  10 use nom8::multi::many1_count;
  11 use nom8::prelude::*;
  12 use nom8::sequence::terminated;
  13
  14 use crate::parser::prelude::*;
  15
  16 pub(crate) unsafe fn from_utf8_unchecked<'b>(
  17     bytes: &'b [u8],
  18     safety_justification: &'static str,
  19 ) -> &'b str {
  20     if cfg!(debug_assertions) {
  21         // Catch problems more quickly when testing
  22         std::str::from_utf8(bytes).expect(safety_justification)
  23     } else {
  24         std::str::from_utf8_unchecked(bytes)
  25     }
  26 }
  27
  28 // wschar = ( %x20 /              ; Space
  29 //            %x09 )              ; Horizontal tab
  30 pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
  31
  32 // ws = *wschar
  33 pub(crate) fn ws(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
  34     take_while(WSCHAR)
  35         .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
  36         .parse(input)
  37 }
  38
  39 // non-ascii = %x80-D7FF / %xE000-10FFFF
  40 // - ASCII is 0xxxxxxx
  41 // - First byte for UTF-8 is 11xxxxxx
  42 // - Subsequent UTF-8 bytes are 10xxxxxx
  43 pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
  44
  45 // non-eol = %x09 / %x20-7E / non-ascii
  46 pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
  47     (0x09, 0x20..=0x7E, NON_ASCII);
  48
  49 // comment-start-symbol = %x23 ; #
  50 pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
  51
  52 // comment = comment-start-symbol *non-eol
  53 pub(crate) fn comment(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
  54     (COMMENT_START_SYMBOL, take_while(NON_EOL))
  55         .recognize()
  56         .parse(input)
  57 }
  58
  59 // newline = ( %x0A /              ; LF
  60 //             %x0D.0A )           ; CRLF
  61 pub(crate) fn newline(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> {
  62     alt((
  63         one_of(LF).value(b'\n'),
  64         (one_of(CR), one_of(LF)).value(b'\n'),
  65     ))
  66     .parse(input)
  67 }
  68 pub(crate) const LF: u8 = b'\n';
  69 pub(crate) const CR: u8 = b'\r';
  70
  71 // ws-newline       = *( wschar / newline )
  72 pub(crate) fn ws_newline(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
  73     many0_count(alt((newline.value(&b"\n"[..]), take_while1(WSCHAR))))
  74         .recognize()
  75         .map(|b| unsafe {
  76             from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
  77         })
  78         .parse(input)
  79 }
  80
  81 // ws-newlines      = newline *( wschar / newline )
  82 pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
  83     (newline, ws_newline)
  84         .recognize()
  85         .map(|b| unsafe {
  86             from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
  87         })
  88         .parse(input)
  89 }
  90
  91 // note: this rule is not present in the original grammar
  92 // ws-comment-newline = *( ws-newline-nonempty / comment )
  93 pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
  94     many0_count(alt((
  95         many1_count(alt((take_while1(WSCHAR), newline.value(&b"\n"[..])))).value(()),
  96         comment.value(()),
  97     )))
  98     .recognize()
  99     .parse(input)
 100 }
 101
 102 // note: this rule is not present in the original grammar
 103 // line-ending = newline / eof
 104 pub(crate) fn line_ending(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
 105     alt((newline.value("\n"), eof.value(""))).parse(input)
 106 }
 107
 108 // note: this rule is not present in the original grammar
 109 // line-trailing = ws [comment] skip-line-ending
 110 pub(crate) fn line_trailing(
 111     input: Input<'_>,
 112 ) -> IResult<Input<'_>, std::ops::Range<usize>, ParserError<'_>> {
 113     terminated((ws, opt(comment)).span(), line_ending).parse(input)
 114 }
 115
 116 #[cfg(test)]
 117 mod test {
 118     use super::*;
 119
 120     #[test]
 121     fn trivia() {
 122         let inputs = [
 123             "",
 124             r#" "#,
 125             r#"
 126 "#,
 127             r#"
 128 # comment
 129
 130 # comment2
 131
 132
 133 "#,
 134             r#"
 135         "#,
 136             r#"# comment
 137 # comment2
 138
 139
 140    "#,
 141         ];
 142         for input in inputs {
 143             dbg!(input);
 144             let parsed = ws_comment_newline.parse(new_input(input)).finish();
 145             assert!(parsed.is_ok(), "{:?}", parsed);
 146             let parsed = parsed.unwrap();
 147             assert_eq!(parsed, input.as_bytes());
 148         }
 149     }
 150 }