1 use std
::fmt
::{self, Display, Formatter}
;
2 use std
::iter
::FromIterator
;
3 use std
::ops
::{Deref, DerefMut}
;
4 use std
::path
::{Path, PathBuf}
;
5 use memchr
::{self, Memchr}
;
6 use pulldown_cmark
::{self, Alignment, Event, Tag}
;
9 /// Parse the text from a `SUMMARY.md` file into a sort of "recipe" to be
10 /// used when loading a book from disk.
14 /// **Title:** It's common practice to begin with a title, generally
15 /// "# Summary". It's not mandatory and the parser (currently) ignores it, so
16 /// you can too if you feel like it.
18 /// **Prefix Chapter:** Before the main numbered chapters you can add a couple
19 /// of elements that will not be numbered. This is useful for forewords,
20 /// introductions, etc. There are however some constraints. You can not nest
21 /// prefix chapters, they should all be on the root level. And you can not add
22 /// prefix chapters once you have added numbered chapters.
25 /// [Title of prefix element](relative/path/to/markdown.md)
28 /// **Numbered Chapter:** Numbered chapters are the main content of the book,
30 /// will be numbered and can be nested, resulting in a nice hierarchy (chapters,
31 /// sub-chapters, etc.)
34 /// - [Title of the Chapter](relative/path/to/markdown.md)
37 /// You can either use - or * to indicate a numbered chapter, the parser doesn't
38 /// care but you'll probably want to stay consistent.
40 /// **Suffix Chapter:** After the numbered chapters you can add a couple of
41 /// non-numbered chapters. They are the same as prefix chapters but come after
42 /// the numbered chapters instead of before.
44 /// All other elements are unsupported and will be ignored at best or result in
46 pub fn parse_summary(summary
: &str) -> Result
<Summary
> {
47 let parser
= SummaryParser
::new(summary
);
51 /// The parsed `SUMMARY.md`, specifying how the book should be laid out.
52 #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
54 /// An optional title for the `SUMMARY.md`, currently just ignored.
55 pub title
: Option
<String
>,
56 /// Chapters before the main text (e.g. an introduction).
57 pub prefix_chapters
: Vec
<SummaryItem
>,
58 /// The main chapters in the document.
59 pub numbered_chapters
: Vec
<SummaryItem
>,
60 /// Items which come after the main document (e.g. a conclusion).
61 pub suffix_chapters
: Vec
<SummaryItem
>,
64 /// A struct representing an entry in the `SUMMARY.md`, possibly with nested
67 /// This is roughly the equivalent of `[Some section](./path/to/file.md)`.
68 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
70 /// The name of the chapter.
72 /// The location of the chapter's source file, taking the book's `src`
73 /// directory as the root.
74 pub location
: PathBuf
,
75 /// The section number, if this chapter is in the numbered section.
76 pub number
: Option
<SectionNumber
>,
77 /// Any nested items this chapter may contain.
78 pub nested_items
: Vec
<SummaryItem
>,
82 /// Create a new link with no nested items.
83 pub fn new
<S
: Into
<String
>, P
: AsRef
<Path
>>(name
: S
, location
: P
) -> Link
{
86 location
: location
.as_ref().to_path_buf(),
88 nested_items
: Vec
::new(),
93 impl Default
for Link
{
94 fn default() -> Self {
97 location
: PathBuf
::new(),
99 nested_items
: Vec
::new(),
104 /// An item in `SUMMARY.md` which could be either a separator or a `Link`.
105 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106 pub enum SummaryItem
{
107 /// A link to a chapter.
109 /// A separator (`---`).
114 fn maybe_link_mut(&mut self) -> Option
<&mut Link
> {
116 SummaryItem
::Link(ref mut l
) => Some(l
),
122 impl From
<Link
> for SummaryItem
{
123 fn from(other
: Link
) -> SummaryItem
{
124 SummaryItem
::Link(other
)
128 /// A recursive descent (-ish) parser for a `SUMMARY.md`.
133 /// The `SUMMARY.md` file has a grammar which looks something like this:
136 /// summary ::= title prefix_chapters numbered_chapters
138 /// title ::= "# " TEXT
140 /// prefix_chapters ::= item*
141 /// suffix_chapters ::= item*
142 /// numbered_chapters ::= dotted_item+
143 /// dotted_item ::= INDENT* DOT_POINT item
146 /// separator ::= "---"
147 /// link ::= "[" TEXT "]" "(" TEXT ")"
148 /// DOT_POINT ::= "-"
152 /// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
153 /// > match the following regex: "[^<>\n[]]+".
154 struct SummaryParser
<'a
> {
156 stream
: pulldown_cmark
::Parser
<'a
>,
159 /// Reads `Events` from the provided stream until the corresponding
160 /// `Event::End` is encountered which matches the `$delimiter` pattern.
162 /// This is the equivalent of doing
163 /// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to
164 /// use pattern matching and you won't get errors because `take_while()`
165 /// moves `$stream` out of self.
166 macro_rules
! collect_events
{
167 ($stream
:expr
, start $delimiter
:pat
) => {
168 collect_events
!($stream
, Event
::Start($delimiter
))
170 ($stream
:expr
, end $delimiter
:pat
) => {
171 collect_events
!($stream
, Event
::End($delimiter
))
173 ($stream
:expr
, $delimiter
:pat
) => {
175 let mut events
= Vec
::new();
178 let event
= $stream
.next();
179 trace
!("Next event: {:?}", event
);
182 Some($delimiter
) => break,
183 Some(other
) => events
.push(other
),
185 debug
!("Reached end of stream without finding the closing pattern, {}", stringify
!($delimiter
));
196 impl<'a
> SummaryParser
<'a
> {
197 fn new(text
: &str) -> SummaryParser
{
198 let pulldown_parser
= pulldown_cmark
::Parser
::new(text
);
202 stream
: pulldown_parser
,
206 /// Get the current line and column to give the user more useful error
208 fn current_location(&self) -> (usize, usize) {
209 let byte_offset
= self.stream
.get_offset();
211 let previous_text
= self.src
[..byte_offset
].as_bytes();
212 let line
= Memchr
::new(b'
\n'
, previous_text
).count() + 1;
213 let start_of_line
= memchr
::memrchr(b'
\n'
, previous_text
).unwrap_or(0);
214 let col
= self.src
[start_of_line
..byte_offset
].chars().count();
219 /// Parse the text the `SummaryParser` was created with.
220 fn parse(mut self) -> Result
<Summary
> {
221 let title
= self.parse_title();
223 let prefix_chapters
= self.parse_affix(true)
224 .chain_err(|| "There was an error parsing the prefix chapters")?
;
225 let numbered_chapters
= self.parse_numbered()
226 .chain_err(|| "There was an error parsing the numbered chapters")?
;
227 let suffix_chapters
= self.parse_affix(false)
228 .chain_err(|| "There was an error parsing the suffix chapters")?
;
238 /// Parse the affix chapters. This expects the first event (start of
239 /// paragraph) to have already been consumed by the previous parser.
240 fn parse_affix(&mut self, is_prefix
: bool
) -> Result
<Vec
<SummaryItem
>> {
241 let mut items
= Vec
::new();
244 if is_prefix { "prefix" }
else { "suffix" }
248 match self.next_event() {
249 Some(Event
::Start(Tag
::List(..))) => {
251 // we've finished prefix chapters and are at the start
252 // of the numbered section.
255 bail
!(self.parse_error("Suffix chapters cannot be followed by a list"));
258 Some(Event
::Start(Tag
::Link(href
, _
))) => {
259 let link
= self.parse_link(href
.to_string())?
;
260 items
.push(SummaryItem
::Link(link
));
262 Some(Event
::Start(Tag
::Rule
)) => items
.push(SummaryItem
::Separator
),
271 fn parse_link(&mut self, href
: String
) -> Result
<Link
> {
272 let link_content
= collect_events
!(self.stream
, end Tag
::Link(..));
273 let name
= stringify_events(link_content
);
276 Err(self.parse_error("You can't have an empty link."))
280 location
: PathBuf
::from(href
.to_string()),
282 nested_items
: Vec
::new(),
287 /// Parse the numbered chapters. This assumes the opening list tag has
288 /// already been consumed by a previous parser.
289 fn parse_numbered(&mut self) -> Result
<Vec
<SummaryItem
>> {
290 let mut items
= Vec
::new();
291 let root_number
= SectionNumber
::default();
293 // we need to do this funny loop-match-if-let dance because a rule will
294 // close off any currently running list. Therefore we try to read the
295 // list items before the rule, then if we encounter a rule we'll add a
296 // separator and try to resume parsing numbered chapters if we start a
297 // list immediately afterwards.
299 // If you can think of a better way to do this then please make a PR :)
302 let mut bunch_of_items
= self.parse_nested_numbered(&root_number
)?
;
304 // if we've resumed after something like a rule the root sections
305 // will be numbered from 1. We need to manually go back and update
307 update_section_numbers(&mut bunch_of_items
, 0, items
.len() as u32);
308 items
.extend(bunch_of_items
);
310 match self.next_event() {
311 Some(Event
::Start(Tag
::Paragraph
)) => {
312 // we're starting the suffix chapters
315 Some(Event
::Start(other_tag
)) => {
316 // FIXME: Remove this when google/pulldown_cmark#120 lands (new patch release)
317 // replace with `other_tag == Tag::Rule`
318 if tag_eq(&other_tag
, &Tag
::Rule
) {
319 items
.push(SummaryItem
::Separator
);
321 trace
!("Skipping contents of {:?}", other_tag
);
323 // Skip over the contents of this tag
324 while let Some(event
) = self.next_event() {
325 // FIXME: Remove this when google/pulldown_cmark#120 lands (new patch release)
326 // and replace the nested if-let with:
327 // if next == Event::End(other_tag.clone()) {
330 if let Event
::End(tag
) = event
{
331 if tag_eq(&tag
, &other_tag
) {
337 if let Some(Event
::Start(Tag
::List(..))) = self.next_event() {
344 // something else... ignore
357 fn next_event(&mut self) -> Option
<Event
<'a
>> {
358 let next
= self.stream
.next();
359 trace
!("Next event: {:?}", next
);
364 fn parse_nested_numbered(&mut self, parent
: &SectionNumber
) -> Result
<Vec
<SummaryItem
>> {
365 debug
!("Parsing numbered chapters at level {}", parent
);
366 let mut items
= Vec
::new();
369 match self.next_event() {
370 Some(Event
::Start(Tag
::Item
)) => {
371 let item
= self.parse_nested_item(parent
, items
.len())?
;
374 Some(Event
::Start(Tag
::List(..))) => {
375 // recurse to parse the nested list
376 let (_
, last_item
) = get_last_link(&mut items
)?
;
377 let last_item_number
= last_item
380 .expect("All numbered chapters have numbers");
382 let sub_items
= self.parse_nested_numbered(last_item_number
)?
;
384 last_item
.nested_items
= sub_items
;
386 Some(Event
::End(Tag
::List(..))) => break,
395 fn parse_nested_item(
397 parent
: &SectionNumber
,
398 num_existing_items
: usize,
399 ) -> Result
<SummaryItem
> {
401 match self.next_event() {
402 Some(Event
::Start(Tag
::Paragraph
)) => continue,
403 Some(Event
::Start(Tag
::Link(href
, _
))) => {
404 let mut link
= self.parse_link(href
.to_string())?
;
406 let mut number
= parent
.clone();
407 number
.0.push(num_existing_items
as u32 + 1);
409 "Found chapter: {} {} ({})",
412 link
.location
.display()
415 link
.number
= Some(number
);
417 return Ok(SummaryItem
::Link(link
));
420 warn
!("Expected a start of a link, actually got {:?}", other
);
421 bail
!(self.parse_error(
422 "The link items for nested chapters must only contain a hyperlink"
429 fn parse_error
<D
: Display
>(&self, msg
: D
) -> Error
{
430 let (line
, col
) = self.current_location();
432 ErrorKind
::ParseError(line
, col
, msg
.to_string()).into()
435 /// Try to parse the title line.
436 fn parse_title(&mut self) -> Option
<String
> {
437 if let Some(Event
::Start(Tag
::Header(1))) = self.next_event() {
438 debug
!("Found a h1 in the SUMMARY");
440 let tags
= collect_events
!(self.stream
, end Tag
::Header(1));
441 Some(stringify_events(tags
))
448 fn update_section_numbers(sections
: &mut [SummaryItem
], level
: usize, by
: u32) {
449 for section
in sections
{
450 if let SummaryItem
::Link(ref mut link
) = *section
{
451 if let Some(ref mut number
) = link
.number
{
452 number
.0[level
] += by
;
455 update_section_numbers(&mut link
.nested_items
, level
, by
);
460 /// Gets a pointer to the last `Link` in a list of `SummaryItem`s, and its
462 fn get_last_link(links
: &mut [SummaryItem
]) -> Result
<(usize, &mut Link
)> {
466 .filter_map(|(i
, item
)| item
.maybe_link_mut().map(|l
| (i
, l
)))
470 "Unable to get last link because the list of SummaryItems doesn't contain any Links"
475 /// Removes the styling from a list of Markdown events and returns just the
477 fn stringify_events(events
: Vec
<Event
>) -> String
{
480 .filter_map(|t
| match t
{
481 Event
::Text(text
) => Some(text
.into_owned()),
487 // FIXME: Remove this when google/pulldown_cmark#120 lands (new patch release)
488 fn tag_eq(left
: &Tag
, right
: &Tag
) -> bool
{
489 match (left
, right
) {
490 (&Tag
::Paragraph
, &Tag
::Paragraph
) => true,
491 (&Tag
::Rule
, &Tag
::Rule
) => true,
492 (&Tag
::Header(a
), &Tag
::Header(b
)) => a
== b
,
493 (&Tag
::BlockQuote
, &Tag
::BlockQuote
) => true,
494 (&Tag
::CodeBlock(ref a
), &Tag
::CodeBlock(ref b
)) => a
== b
,
495 (&Tag
::List(ref a
), &Tag
::List(ref b
)) => a
== b
,
496 (&Tag
::Item
, &Tag
::Item
) => true,
497 (&Tag
::FootnoteDefinition(ref a
), &Tag
::FootnoteDefinition(ref b
)) => a
== b
,
498 (&Tag
::Table(ref a
), &Tag
::Table(ref b
)) => {
499 a
.iter().zip(b
.iter()).all(|(l
, r
)| alignment_eq(*l
, *r
))
501 (&Tag
::TableHead
, &Tag
::TableHead
) => true,
502 (&Tag
::TableRow
, &Tag
::TableRow
) => true,
503 (&Tag
::TableCell
, &Tag
::TableCell
) => true,
504 (&Tag
::Emphasis
, &Tag
::Emphasis
) => true,
505 (&Tag
::Strong
, &Tag
::Strong
) => true,
506 (&Tag
::Code
, &Tag
::Code
) => true,
507 (&Tag
::Link(ref a_1
, ref a_2
), &Tag
::Link(ref b_1
, ref b_2
)) => a_1
== b_1
&& a_2
== b_2
,
508 (&Tag
::Image(ref a_1
, ref a_2
), &Tag
::Image(ref b_1
, ref b_2
)) => a_1
== b_1
&& a_2
== b_2
,
513 // FIXME: Remove this when google/pulldown_cmark#120 lands (new patch release)
514 fn alignment_eq(left
: Alignment
, right
: Alignment
) -> bool
{
515 match (left
, right
) {
516 (Alignment
::None
, Alignment
::None
) => true,
517 (Alignment
::Left
, Alignment
::Left
) => true,
518 (Alignment
::Center
, Alignment
::Center
) => true,
519 (Alignment
::Right
, Alignment
::Right
) => true,
524 /// A section number like "1.2.3", basically just a newtype'd `Vec<u32>` with
525 /// a pretty `Display` impl.
526 #[derive(Debug, PartialEq, Clone, Default, Serialize, Deserialize)]
527 pub struct SectionNumber(pub Vec
<u32>);
529 impl Display
for SectionNumber
{
530 fn fmt(&self, f
: &mut Formatter
) -> fmt
::Result
{
531 if self.0.is_empty
() {
534 for item
in &self.0 {
535 write
!(f
, "{}.", item
)?
;
542 impl Deref
for SectionNumber
{
543 type Target
= Vec
<u32>;
544 fn deref(&self) -> &Self::Target
{
549 impl DerefMut
for SectionNumber
{
550 fn deref_mut(&mut self) -> &mut Self::Target
{
555 impl FromIterator
<u32> for SectionNumber
{
556 fn from_iter
<I
: IntoIterator
<Item
= u32>>(it
: I
) -> Self {
557 SectionNumber(it
.into_iter().collect())
566 fn section_number_has_correct_dotted_representation() {
569 (vec
![1, 3], "1.3."),
570 (vec
![1, 2, 3], "1.2.3."),
573 for (input
, should_be
) in inputs
{
574 let section_number
= SectionNumber(input
).to_string();
575 assert_eq
!(section_number
, should_be
);
580 fn parse_initial_title() {
581 let src
= "# Summary";
582 let should_be
= String
::from("Summary");
584 let mut parser
= SummaryParser
::new(src
);
585 let got
= parser
.parse_title().unwrap();
587 assert_eq
!(got
, should_be
);
591 fn parse_title_with_styling() {
592 let src
= "# My **Awesome** Summary";
593 let should_be
= String
::from("My Awesome Summary");
595 let mut parser
= SummaryParser
::new(src
);
596 let got
= parser
.parse_title().unwrap();
598 assert_eq
!(got
, should_be
);
602 fn convert_markdown_events_to_a_string() {
603 let src
= "Hello *World*, `this` is some text [and a link](./path/to/link)";
604 let should_be
= "Hello World, this is some text and a link";
606 let events
= pulldown_cmark
::Parser
::new(src
).collect();
607 let got
= stringify_events(events
);
609 assert_eq
!(got
, should_be
);
613 fn parse_some_prefix_items() {
614 let src
= "[First](./first.md)\n[Second](./second.md)\n";
615 let mut parser
= SummaryParser
::new(src
);
617 let should_be
= vec
![
618 SummaryItem
::Link(Link
{
619 name
: String
::from("First"),
620 location
: PathBuf
::from("./first.md"),
623 SummaryItem
::Link(Link
{
624 name
: String
::from("Second"),
625 location
: PathBuf
::from("./second.md"),
630 let _
= parser
.stream
.next(); // step past first event
631 let got
= parser
.parse_affix(true).unwrap();
633 assert_eq
!(got
, should_be
);
637 fn parse_prefix_items_with_a_separator() {
638 let src
= "[First](./first.md)\n\n---\n\n[Second](./second.md)\n";
639 let mut parser
= SummaryParser
::new(src
);
641 let _
= parser
.stream
.next(); // step past first event
642 let got
= parser
.parse_affix(true).unwrap();
644 assert_eq
!(got
.len(), 3);
645 assert_eq
!(got
[1], SummaryItem
::Separator
);
649 fn suffix_items_cannot_be_followed_by_a_list() {
650 let src
= "[First](./first.md)\n- [Second](./second.md)\n";
651 let mut parser
= SummaryParser
::new(src
);
653 let _
= parser
.stream
.next(); // step past first event
654 let got
= parser
.parse_affix(false);
656 assert
!(got
.is_err());
661 let src
= "[First](./first.md)";
662 let should_be
= Link
{
663 name
: String
::from("First"),
664 location
: PathBuf
::from("./first.md"),
668 let mut parser
= SummaryParser
::new(src
);
669 let _
= parser
.stream
.next(); // skip past start of paragraph
671 let href
= match parser
.stream
.next() {
672 Some(Event
::Start(Tag
::Link(href
, _
))) => href
.to_string(),
673 other
=> panic
!("Unreachable, {:?}", other
),
676 let got
= parser
.parse_link(href
).unwrap();
677 assert_eq
!(got
, should_be
);
681 fn parse_a_numbered_chapter() {
682 let src
= "- [First](./first.md)\n";
684 name
: String
::from("First"),
685 location
: PathBuf
::from("./first.md"),
686 number
: Some(SectionNumber(vec
![1])),
689 let should_be
= vec
![SummaryItem
::Link(link
)];
691 let mut parser
= SummaryParser
::new(src
);
692 let _
= parser
.stream
.next();
694 let got
= parser
.parse_numbered().unwrap();
696 assert_eq
!(got
, should_be
);
700 fn parse_nested_numbered_chapters() {
701 let src
= "- [First](./first.md)\n - [Nested](./nested.md)\n- [Second](./second.md)";
703 let should_be
= vec
![
704 SummaryItem
::Link(Link
{
705 name
: String
::from("First"),
706 location
: PathBuf
::from("./first.md"),
707 number
: Some(SectionNumber(vec
![1])),
709 SummaryItem
::Link(Link
{
710 name
: String
::from("Nested"),
711 location
: PathBuf
::from("./nested.md"),
712 number
: Some(SectionNumber(vec
![1, 1])),
713 nested_items
: Vec
::new(),
717 SummaryItem
::Link(Link
{
718 name
: String
::from("Second"),
719 location
: PathBuf
::from("./second.md"),
720 number
: Some(SectionNumber(vec
![2])),
721 nested_items
: Vec
::new(),
725 let mut parser
= SummaryParser
::new(src
);
726 let _
= parser
.stream
.next();
728 let got
= parser
.parse_numbered().unwrap();
730 assert_eq
!(got
, should_be
);
733 /// This test ensures the book will continue to pass because it breaks the
734 /// `SUMMARY.md` up using level 2 headers ([example]).
736 /// [example]: https://github.com/rust-lang/book/blob/2c942dc094f4ddcdc7aba7564f80782801197c99/second-edition/src/SUMMARY.md#basic-rust-literacy
738 fn can_have_a_subheader_between_nested_items() {
739 let src
= "- [First](./first.md)\n\n## Subheading\n\n- [Second](./second.md)\n";
740 let should_be
= vec
![
741 SummaryItem
::Link(Link
{
742 name
: String
::from("First"),
743 location
: PathBuf
::from("./first.md"),
744 number
: Some(SectionNumber(vec
![1])),
745 nested_items
: Vec
::new(),
747 SummaryItem
::Link(Link
{
748 name
: String
::from("Second"),
749 location
: PathBuf
::from("./second.md"),
750 number
: Some(SectionNumber(vec
![2])),
751 nested_items
: Vec
::new(),
755 let mut parser
= SummaryParser
::new(src
);
756 let _
= parser
.stream
.next();
758 let got
= parser
.parse_numbered().unwrap();
760 assert_eq
!(got
, should_be
);
764 fn an_empty_link_location_is_an_error() {
765 let src
= "- [Empty]()\n";
766 let mut parser
= SummaryParser
::new(src
);
767 parser
.stream
.next();
769 let got
= parser
.parse_numbered();
770 assert
!(got
.is_err());