vendor/pulldown-cmark-0.7.2/src/parse.rs

   1 // Copyright 2017 Google Inc. All rights reserved.
   2 //
   3 // Permission is hereby granted, free of charge, to any person obtaining a copy
   4 // of this software and associated documentation files (the "Software"), to deal
   5 // in the Software without restriction, including without limitation the rights
   6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7 // copies of the Software, and to permit persons to whom the Software is
   8 // furnished to do so, subject to the following conditions:
   9 //
  10 // The above copyright notice and this permission notice shall be included in
  11 // all copies or substantial portions of the Software.
  12 //
  13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19 // THE SOFTWARE.
  20
  21 //! Tree-based two pass parser.
  22
  23 use std::cmp::{max, min};
  24 use std::collections::{HashMap, VecDeque};
  25 use std::ops::{Index, Range};
  26
  27 use unicase::UniCase;
  28
  29 use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel};
  30 use crate::scanners::*;
  31 use crate::strings::CowStr;
  32 use crate::tree::{Tree, TreeIndex};
  33
  34 // Allowing arbitrary depth nested parentheses inside link destinations
  35 // can create denial of service vulnerabilities if we're not careful.
  36 // The simplest countermeasure is to limit their depth, which is
  37 // explicitly allowed by the spec as long as the limit is at least 3:
  38 // https://spec.commonmark.org/0.29/#link-destination
  39 const LINK_MAX_NESTED_PARENS: usize = 5;
  40
  41 /// Codeblock kind.
  42 #[derive(Clone, Debug, PartialEq)]
  43 pub enum CodeBlockKind<'a> {
  44     Indented,
  45     /// The value contained in the tag describes the language of the code, which may be empty.
  46     Fenced(CowStr<'a>),
  47 }
  48
  49 impl<'a> CodeBlockKind<'a> {
  50     pub fn is_indented(&self) -> bool {
  51         match *self {
  52             CodeBlockKind::Indented => true,
  53             _ => false,
  54         }
  55     }
  56
  57     pub fn is_fenced(&self) -> bool {
  58         match *self {
  59             CodeBlockKind::Fenced(_) => true,
  60             _ => false,
  61         }
  62     }
  63 }
  64
  65 /// Tags for elements that can contain other elements.
  66 #[derive(Clone, Debug, PartialEq)]
  67 pub enum Tag<'a> {
  68     /// A paragraph of text and other inline elements.
  69     Paragraph,
  70
  71     /// A heading. The field indicates the level of the heading.
  72     Heading(u32),
  73
  74     BlockQuote,
  75     /// A code block.
  76     CodeBlock(CodeBlockKind<'a>),
  77
  78     /// A list. If the list is ordered the field indicates the number of the first item.
  79     /// Contains only list items.
  80     List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
  81     /// A list item.
  82     Item,
  83     /// A footnote definition. The value contained is the footnote's label by which it can
  84     /// be referred to.
  85     FootnoteDefinition(CowStr<'a>),
  86
  87     /// A table. Contains a vector describing the text-alignment for each of its columns.
  88     Table(Vec<Alignment>),
  89     /// A table header. Contains only `TableRow`s. Note that the table body starts immediately
  90     /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
  91     TableHead,
  92     /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
  93     TableRow,
  94     TableCell,
  95
  96     // span-level tags
  97     Emphasis,
  98     Strong,
  99     Strikethrough,
 100
 101     /// A link. The first field is the link type, the second the destination URL and the third is a title.
 102     Link(LinkType, CowStr<'a>, CowStr<'a>),
 103
 104     /// An image. The first field is the link type, the second the destination URL and the third is a title.
 105     Image(LinkType, CowStr<'a>, CowStr<'a>),
 106 }
 107
 108 /// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
 109 #[derive(Clone, Debug, PartialEq, Copy)]
 110 pub enum LinkType {
 111     /// Inline link like `[foo](bar)`
 112     Inline,
 113     /// Reference link like `[foo][bar]`
 114     Reference,
 115     /// Reference without destination in the document, but resolved by the broken_link_callback
 116     ReferenceUnknown,
 117     /// Collapsed link like `[foo][]`
 118     Collapsed,
 119     /// Collapsed link without destination in the document, but resolved by the broken_link_callback
 120     CollapsedUnknown,
 121     /// Shortcut link like `[foo]`
 122     Shortcut,
 123     /// Shortcut without destination in the document, but resolved by the broken_link_callback
 124     ShortcutUnknown,
 125     /// Autolink like `<http://foo.bar/baz>`
 126     Autolink,
 127     /// Email address in autolink like `<john@example.org>`
 128     Email,
 129 }
 130
 131 impl LinkType {
 132     fn to_unknown(self) -> Self {
 133         match self {
 134             LinkType::Reference => LinkType::ReferenceUnknown,
 135             LinkType::Collapsed => LinkType::CollapsedUnknown,
 136             LinkType::Shortcut => LinkType::ShortcutUnknown,
 137             _ => unreachable!(),
 138         }
 139     }
 140 }
 141
 142 /// Markdown events that are generated in a preorder traversal of the document
 143 /// tree, with additional `End` events whenever all of an inner node's children
 144 /// have been visited.
 145 #[derive(Clone, Debug, PartialEq)]
 146 pub enum Event<'a> {
 147     /// Start of a tagged element. Events that are yielded after this event
 148     /// and before its corresponding `End` event are inside this element.
 149     /// Start and end events are guaranteed to be balanced.
 150     Start(Tag<'a>),
 151     /// End of a tagged element.
 152     End(Tag<'a>),
 153     /// A text node.
 154     Text(CowStr<'a>),
 155     /// An inline code node.
 156     Code(CowStr<'a>),
 157     /// An HTML node.
 158     Html(CowStr<'a>),
 159     /// A reference to a footnote with given label, which may or may not be defined
 160     /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
 161     /// occur in any order.
 162     FootnoteReference(CowStr<'a>),
 163     /// A soft line break.
 164     SoftBreak,
 165     /// A hard line break.
 166     HardBreak,
 167     /// A horizontal ruler.
 168     Rule,
 169     /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
 170     TaskListMarker(bool),
 171 }
 172
 173 /// Table column text alignment.
 174 #[derive(Copy, Clone, Debug, PartialEq)]
 175 pub enum Alignment {
 176     /// Default text alignment.
 177     None,
 178     Left,
 179     Center,
 180     Right,
 181 }
 182
 183 bitflags! {
 184     /// Option struct containing flags for enabling extra features
 185     /// that are not part of the CommonMark spec.
 186     pub struct Options: u32 {
 187         const ENABLE_TABLES = 1 << 1;
 188         const ENABLE_FOOTNOTES = 1 << 2;
 189         const ENABLE_STRIKETHROUGH = 1 << 3;
 190         const ENABLE_TASKLISTS = 1 << 4;
 191     }
 192 }
 193
 194 #[derive(Debug, Default, Clone, Copy)]
 195 struct Item {
 196     start: usize,
 197     end: usize,
 198     body: ItemBody,
 199 }
 200
 201 #[derive(Debug, PartialEq, Clone, Copy)]
 202 enum ItemBody {
 203     Paragraph,
 204     Text,
 205     SoftBreak,
 206     HardBreak,
 207
 208     // These are possible inline items, need to be resolved in second pass.
 209
 210     // repeats, can_open, can_close
 211     MaybeEmphasis(usize, bool, bool),
 212     MaybeCode(usize, bool), // number of backticks, preceeded by backslash
 213     MaybeHtml,
 214     MaybeLinkOpen,
 215     MaybeLinkClose,
 216     MaybeImage,
 217
 218     // These are inline items after resolution.
 219     Emphasis,
 220     Strong,
 221     Strikethrough,
 222     Code(CowIndex),
 223     Link(LinkIndex),
 224     Image(LinkIndex),
 225     FootnoteReference(CowIndex),
 226     TaskListMarker(bool), // true for checked
 227
 228     Rule,
 229     Heading(u32), // heading level
 230     FencedCodeBlock(CowIndex),
 231     IndentCodeBlock,
 232     Html,
 233     BlockQuote,
 234     List(bool, u8, u64), // is_tight, list character, list start index
 235     ListItem(usize),     // indent level
 236     SynthesizeText(CowIndex),
 237     FootnoteDefinition(CowIndex),
 238
 239     // Tables
 240     Table(AlignmentIndex),
 241     TableHead,
 242     TableRow,
 243     TableCell,
 244
 245     // Dummy node at the top of the tree - should not be used otherwise!
 246     Root,
 247 }
 248
 249 impl<'a> ItemBody {
 250     fn is_inline(&self) -> bool {
 251         match *self {
 252             ItemBody::MaybeEmphasis(..)
 253             | ItemBody::MaybeHtml
 254             | ItemBody::MaybeCode(..)
 255             | ItemBody::MaybeLinkOpen
 256             | ItemBody::MaybeLinkClose
 257             | ItemBody::MaybeImage => true,
 258             _ => false,
 259         }
 260     }
 261 }
 262
 263 impl<'a> Default for ItemBody {
 264     fn default() -> Self {
 265         ItemBody::Root
 266     }
 267 }
 268
 269 /// Scanning modes for `Parser`'s `parse_line` method.
 270 #[derive(PartialEq, Eq, Copy, Clone)]
 271 enum TableParseMode {
 272     /// Inside a paragraph, scanning for table headers.
 273     Scan,
 274     /// Inside a table.
 275     Active,
 276     /// Inside a paragraph, not scanning for table headers.
 277     Disabled,
 278 }
 279
 280 /// State for the first parsing pass.
 281 ///
 282 /// The first pass resolves all block structure, generating an AST. Within a block, items
 283 /// are in a linear chain with potential inline markup identified.
 284 struct FirstPass<'a> {
 285     text: &'a str,
 286     tree: Tree<Item>,
 287     begin_list_item: bool,
 288     last_line_blank: bool,
 289     allocs: Allocations<'a>,
 290     options: Options,
 291     list_nesting: usize,
 292 }
 293
 294 impl<'a> FirstPass<'a> {
 295     fn new(text: &'a str, options: Options) -> FirstPass {
 296         // This is a very naive heuristic for the number of nodes
 297         // we'll need.
 298         let start_capacity = max(128, text.len() / 32);
 299         let tree = Tree::with_capacity(start_capacity);
 300         let begin_list_item = false;
 301         let last_line_blank = false;
 302         let allocs = Allocations::new();
 303         FirstPass {
 304             text,
 305             tree,
 306             begin_list_item,
 307             last_line_blank,
 308             allocs,
 309             options,
 310             list_nesting: 0,
 311         }
 312     }
 313
 314     fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
 315         let mut ix = 0;
 316         while ix < self.text.len() {
 317             ix = self.parse_block(ix);
 318         }
 319         for _ in 0..self.tree.spine_len() {
 320             self.pop(ix);
 321         }
 322         (self.tree, self.allocs)
 323     }
 324
 325     /// Returns offset after block.
 326     fn parse_block(&mut self, mut start_ix: usize) -> usize {
 327         let bytes = self.text.as_bytes();
 328         let mut line_start = LineStart::new(&bytes[start_ix..]);
 329
 330         let i = scan_containers(&self.tree, &mut line_start);
 331         for _ in i..self.tree.spine_len() {
 332             self.pop(start_ix);
 333         }
 334
 335         if self.options.contains(Options::ENABLE_FOOTNOTES) {
 336             // finish footnote if it's still open and was preceeded by blank line
 337             if let Some(node_ix) = self.tree.peek_up() {
 338                 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
 339                     if self.last_line_blank {
 340                         self.pop(start_ix);
 341                     }
 342                 }
 343             }
 344
 345             // Footnote definitions of the form
 346             // [^bar]:
 347             // * anything really
 348             let container_start = start_ix + line_start.bytes_scanned();
 349             if let Some(bytecount) = self.parse_footnote(container_start) {
 350                 start_ix = container_start + bytecount;
 351                 start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
 352                 line_start = LineStart::new(&bytes[start_ix..]);
 353             }
 354         }
 355
 356         // Process new containers
 357         loop {
 358             let container_start = start_ix + line_start.bytes_scanned();
 359             if let Some((ch, index, indent)) = line_start.scan_list_marker() {
 360                 let after_marker_index = start_ix + line_start.bytes_scanned();
 361                 self.continue_list(container_start, ch, index);
 362                 self.tree.append(Item {
 363                     start: container_start,
 364                     end: after_marker_index, // will get updated later if item not empty
 365                     body: ItemBody::ListItem(indent),
 366                 });
 367                 self.tree.push();
 368                 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
 369                     self.begin_list_item = true;
 370                     return after_marker_index + n;
 371                 }
 372                 if self.options.contains(Options::ENABLE_TASKLISTS) {
 373                     if let Some(is_checked) = line_start.scan_task_list_marker() {
 374                         self.tree.append(Item {
 375                             start: after_marker_index,
 376                             end: start_ix + line_start.bytes_scanned(),
 377                             body: ItemBody::TaskListMarker(is_checked),
 378                         });
 379                     }
 380                 }
 381             } else if line_start.scan_blockquote_marker() {
 382                 self.finish_list(start_ix);
 383                 self.tree.append(Item {
 384                     start: container_start,
 385                     end: 0, // will get set later
 386                     body: ItemBody::BlockQuote,
 387                 });
 388                 self.tree.push();
 389             } else {
 390                 break;
 391             }
 392         }
 393
 394         let ix = start_ix + line_start.bytes_scanned();
 395
 396         if let Some(n) = scan_blank_line(&bytes[ix..]) {
 397             if let Some(node_ix) = self.tree.peek_up() {
 398                 match self.tree[node_ix].item.body {
 399                     ItemBody::BlockQuote => (),
 400                     _ => {
 401                         if self.begin_list_item {
 402                             // A list item can begin with at most one blank line.
 403                             self.pop(start_ix);
 404                         }
 405                         self.last_line_blank = true;
 406                     }
 407                 }
 408             }
 409             return ix + n;
 410         }
 411
 412         self.begin_list_item = false;
 413         self.finish_list(start_ix);
 414
 415         // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
 416         let remaining_space = line_start.remaining_space();
 417
 418         let indent = line_start.scan_space_upto(4);
 419         if indent == 4 {
 420             let ix = start_ix + line_start.bytes_scanned();
 421             let remaining_space = line_start.remaining_space();
 422             return self.parse_indented_code_block(ix, remaining_space);
 423         }
 424
 425         let ix = start_ix + line_start.bytes_scanned();
 426
 427         // HTML Blocks
 428         if bytes[ix] == b'<' {
 429             // Types 1-5 are all detected by one function and all end with the same
 430             // pattern
 431             if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
 432                 return self.parse_html_block_type_1_to_5(ix, html_end_tag, remaining_space);
 433             }
 434
 435             // Detect type 6
 436             let possible_tag = scan_html_block_tag(&bytes[(ix + 1)..]).1;
 437             if is_html_tag(possible_tag) {
 438                 return self.parse_html_block_type_6_or_7(ix, remaining_space);
 439             }
 440
 441             // Detect type 7
 442             if let Some(_html_bytes) = scan_html_type_7(&bytes[(ix + 1)..]) {
 443                 return self.parse_html_block_type_6_or_7(ix, remaining_space);
 444             }
 445         }
 446
 447         if let Ok(n) = scan_hrule(&bytes[ix..]) {
 448             return self.parse_hrule(n, ix);
 449         }
 450
 451         if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
 452             return self.parse_atx_heading(ix, atx_size);
 453         }
 454
 455         // parse refdef
 456         if let Some((bytecount, label, link_def)) = self.parse_refdef_total(ix) {
 457             self.allocs.refdefs.entry(label).or_insert(link_def);
 458             let ix = ix + bytecount;
 459             // try to read trailing whitespace or it will register as a completely blank line
 460             // TODO: shouldn't we do this for all block level items?
 461             return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
 462         }
 463
 464         if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
 465             return self.parse_fenced_code_block(ix, indent, fence_ch, n);
 466         }
 467         self.parse_paragraph(ix)
 468     }
 469
 470     /// Returns the offset of the first line after the table.
 471     /// Assumptions: current focus is a table element and the table header
 472     /// matches the separator line (same number of columns).
 473     fn parse_table(&mut self, table_cols: usize, head_start: usize, body_start: usize) -> usize {
 474         // parse header. this shouldn't fail because we made sure the table header is ok
 475         let (_sep_start, thead_ix) = self.parse_table_row_inner(head_start, table_cols);
 476         self.tree[thead_ix].item.body = ItemBody::TableHead;
 477
 478         // parse body
 479         let mut ix = body_start;
 480         while let Some((next_ix, _row_ix)) = self.parse_table_row(ix, table_cols) {
 481             ix = next_ix;
 482         }
 483
 484         self.pop(ix);
 485         ix
 486     }
 487
 488     /// Call this when containers are taken care of.
 489     /// Returns bytes scanned, row_ix
 490     fn parse_table_row_inner(&mut self, mut ix: usize, row_cells: usize) -> (usize, TreeIndex) {
 491         let bytes = self.text.as_bytes();
 492         let mut cells = 0;
 493         let mut final_cell_ix = None;
 494
 495         let row_ix = self.tree.append(Item {
 496             start: ix,
 497             end: 0, // set at end of this function
 498             body: ItemBody::TableRow,
 499         });
 500         self.tree.push();
 501
 502         loop {
 503             ix += scan_ch(&bytes[ix..], b'|');
 504             ix += scan_whitespace_no_nl(&bytes[ix..]);
 505
 506             if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
 507                 ix += eol_bytes;
 508                 break;
 509             }
 510
 511             let cell_ix = self.tree.append(Item {
 512                 start: ix,
 513                 end: ix,
 514                 body: ItemBody::TableCell,
 515             });
 516             self.tree.push();
 517             let (next_ix, _brk) = self.parse_line(ix, TableParseMode::Active);
 518             let trailing_whitespace = scan_rev_while(&bytes[..next_ix], is_ascii_whitespace);
 519
 520             if let Some(cur_ix) = self.tree.cur() {
 521                 self.tree[cur_ix].item.end -= trailing_whitespace;
 522             }
 523
 524             self.tree[cell_ix].item.end = next_ix - trailing_whitespace;
 525             self.tree.pop();
 526
 527             ix = next_ix;
 528             cells += 1;
 529
 530             if cells == row_cells {
 531                 final_cell_ix = Some(cell_ix);
 532             }
 533         }
 534
 535         // fill empty cells if needed
 536         // note: this is where GFM and commonmark-extra diverge. we follow
 537         // GFM here
 538         for _ in cells..row_cells {
 539             self.tree.append(Item {
 540                 start: ix,
 541                 end: ix,
 542                 body: ItemBody::TableCell,
 543             });
 544         }
 545
 546         // drop excess cells
 547         if let Some(cell_ix) = final_cell_ix {
 548             self.tree[cell_ix].next = None;
 549         }
 550
 551         self.pop(ix);
 552
 553         (ix, row_ix)
 554     }
 555
 556     /// Returns first offset after the row and the tree index of the row.
 557     fn parse_table_row(&mut self, mut ix: usize, row_cells: usize) -> Option<(usize, TreeIndex)> {
 558         let bytes = self.text.as_bytes();
 559         let mut line_start = LineStart::new(&bytes[ix..]);
 560         let containers = scan_containers(&self.tree, &mut line_start);
 561         if containers != self.tree.spine_len() {
 562             return None;
 563         }
 564         line_start.scan_all_space();
 565         ix += line_start.bytes_scanned();
 566         if scan_paragraph_interrupt(&bytes[ix..]) {
 567             return None;
 568         }
 569
 570         let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells);
 571         Some((ix, row_ix))
 572     }
 573
 574     /// Returns offset of line start after paragraph.
 575     fn parse_paragraph(&mut self, start_ix: usize) -> usize {
 576         let node_ix = self.tree.append(Item {
 577             start: start_ix,
 578             end: 0, // will get set later
 579             body: ItemBody::Paragraph,
 580         });
 581         self.tree.push();
 582         let bytes = self.text.as_bytes();
 583
 584         let mut ix = start_ix;
 585         loop {
 586             let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
 587                 TableParseMode::Scan
 588             } else {
 589                 TableParseMode::Disabled
 590             };
 591             let (next_ix, brk) = self.parse_line(ix, scan_mode);
 592
 593             // break out when we find a table
 594             if let Some(Item {
 595                 body: ItemBody::Table(alignment_ix),
 596                 ..
 597             }) = brk
 598             {
 599                 let table_cols = self.allocs[alignment_ix].len();
 600                 self.tree[node_ix].item.body = ItemBody::Table(alignment_ix);
 601                 // this clears out any stuff we may have appended - but there may
 602                 // be a cleaner way
 603                 self.tree[node_ix].child = None;
 604                 self.tree.pop();
 605                 self.tree.push();
 606                 return self.parse_table(table_cols, ix, next_ix);
 607             }
 608
 609             ix = next_ix;
 610             let mut line_start = LineStart::new(&bytes[ix..]);
 611             let n_containers = scan_containers(&self.tree, &mut line_start);
 612             if !line_start.scan_space(4) {
 613                 let ix_new = ix + line_start.bytes_scanned();
 614                 if n_containers == self.tree.spine_len() {
 615                     if let Some(ix_setext) = self.parse_setext_heading(ix_new, node_ix) {
 616                         if let Some(Item {
 617                             start,
 618                             body: ItemBody::HardBreak,
 619                             ..
 620                         }) = brk
 621                         {
 622                             if bytes[start] == b'\\' {
 623                                 self.tree.append_text(start, start + 1);
 624                             }
 625                         }
 626                         ix = ix_setext;
 627                         break;
 628                     }
 629                 }
 630                 // first check for non-empty lists, then for other interrupts
 631                 let suffix = &bytes[ix_new..];
 632                 if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
 633                     break;
 634                 }
 635             }
 636             line_start.scan_all_space();
 637             if line_start.is_at_eol() {
 638                 break;
 639             }
 640             ix = next_ix + line_start.bytes_scanned();
 641             if let Some(item) = brk {
 642                 self.tree.append(item);
 643             }
 644         }
 645
 646         self.pop(ix);
 647         ix
 648     }
 649
 650     /// Returns end ix of setext_heading on success.
 651     fn parse_setext_heading(&mut self, ix: usize, node_ix: TreeIndex) -> Option<usize> {
 652         let bytes = self.text.as_bytes();
 653         let (n, level) = scan_setext_heading(&bytes[ix..])?;
 654         self.tree[node_ix].item.body = ItemBody::Heading(level);
 655
 656         // strip trailing whitespace
 657         if let Some(cur_ix) = self.tree.cur() {
 658             self.tree[cur_ix].item.end -= scan_rev_while(
 659                 &bytes[..self.tree[cur_ix].item.end],
 660                 is_ascii_whitespace_no_nl,
 661             );
 662         }
 663
 664         Some(ix + n)
 665     }
 666
 667     /// Parse a line of input, appending text and items to tree.
 668     ///
 669     /// Returns: index after line and an item representing the break.
 670     fn parse_line(&mut self, start: usize, mode: TableParseMode) -> (usize, Option<Item>) {
 671         let bytes = &self.text.as_bytes();
 672         let mut pipes = 0;
 673         let mut last_pipe_ix = start;
 674         let mut begin_text = start;
 675
 676         let (final_ix, brk) = iterate_special_bytes(bytes, start, |ix, byte| {
 677             match byte {
 678                 b'\n' | b'\r' => {
 679                     if let TableParseMode::Active = mode {
 680                         return LoopInstruction::BreakAtWith(ix, None);
 681                     }
 682
 683                     let mut i = ix;
 684                     let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
 685                     if mode == TableParseMode::Scan && pipes > 0 {
 686                         // check if we may be parsing a table
 687                         let next_line_ix = ix + eol_bytes;
 688                         let mut line_start = LineStart::new(&bytes[next_line_ix..]);
 689                         if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
 690                             let table_head_ix = next_line_ix + line_start.bytes_scanned();
 691                             let (table_head_bytes, alignment) =
 692                                 scan_table_head(&bytes[table_head_ix..]);
 693
 694                             if table_head_bytes > 0 {
 695                                 // computing header count from number of pipes
 696                                 let header_count =
 697                                     count_header_cols(bytes, pipes, start, last_pipe_ix);
 698
 699                                 // make sure they match the number of columns we find in separator line
 700                                 if alignment.len() == header_count {
 701                                     let alignment_ix = self.allocs.allocate_alignment(alignment);
 702                                     let end_ix = table_head_ix + table_head_bytes;
 703                                     return LoopInstruction::BreakAtWith(
 704                                         end_ix,
 705                                         Some(Item {
 706                                             start: i,
 707                                             end: end_ix, // must update later
 708                                             body: ItemBody::Table(alignment_ix),
 709                                         }),
 710                                     );
 711                                 }
 712                             }
 713                         }
 714                     }
 715
 716                     let end_ix = ix + eol_bytes;
 717                     let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
 718                     if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
 719                         i -= 1;
 720                         self.tree.append_text(begin_text, i);
 721                         return LoopInstruction::BreakAtWith(
 722                             end_ix,
 723                             Some(Item {
 724                                 start: i,
 725                                 end: end_ix,
 726                                 body: ItemBody::HardBreak,
 727                             }),
 728                         );
 729                     }
 730                     let trailing_whitespace =
 731                         scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
 732                     if trailing_whitespace >= 2 {
 733                         i -= trailing_whitespace;
 734                         self.tree.append_text(begin_text, i);
 735                         return LoopInstruction::BreakAtWith(
 736                             end_ix,
 737                             Some(Item {
 738                                 start: i,
 739                                 end: end_ix,
 740                                 body: ItemBody::HardBreak,
 741                             }),
 742                         );
 743                     }
 744
 745                     self.tree.append_text(begin_text, ix);
 746                     LoopInstruction::BreakAtWith(
 747                         end_ix,
 748                         Some(Item {
 749                             start: i,
 750                             end: end_ix,
 751                             body: ItemBody::SoftBreak,
 752                         }),
 753                     )
 754                 }
 755                 b'\\' => {
 756                     if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
 757                         self.tree.append_text(begin_text, ix);
 758                         if bytes[ix + 1] == b'`' {
 759                             let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
 760                             self.tree.append(Item {
 761                                 start: ix + 1,
 762                                 end: ix + count + 1,
 763                                 body: ItemBody::MaybeCode(count, true),
 764                             });
 765                             begin_text = ix + 1 + count;
 766                             LoopInstruction::ContinueAndSkip(count)
 767                         } else {
 768                             begin_text = ix + 1;
 769                             LoopInstruction::ContinueAndSkip(1)
 770                         }
 771                     } else {
 772                         LoopInstruction::ContinueAndSkip(0)
 773                     }
 774                 }
 775                 c @ b'*' | c @ b'_' | c @ b'~' => {
 776                     let string_suffix = &self.text[ix..];
 777                     let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
 778                     let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
 779                     let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
 780                     let is_valid_seq = c != b'~'
 781                         || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
 782
 783                     if (can_open || can_close) && is_valid_seq {
 784                         self.tree.append_text(begin_text, ix);
 785                         for i in 0..count {
 786                             self.tree.append(Item {
 787                                 start: ix + i,
 788                                 end: ix + i + 1,
 789                                 body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
 790                             });
 791                         }
 792                         begin_text = ix + count;
 793                     }
 794                     LoopInstruction::ContinueAndSkip(count - 1)
 795                 }
 796                 b'`' => {
 797                     self.tree.append_text(begin_text, ix);
 798                     let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
 799                     self.tree.append(Item {
 800                         start: ix,
 801                         end: ix + count,
 802                         body: ItemBody::MaybeCode(count, false),
 803                     });
 804                     begin_text = ix + count;
 805                     LoopInstruction::ContinueAndSkip(count - 1)
 806                 }
 807                 b'<' => {
 808                     // Note: could detect some non-HTML cases and early escape here, but not
 809                     // clear that's a win.
 810                     self.tree.append_text(begin_text, ix);
 811                     self.tree.append(Item {
 812                         start: ix,
 813                         end: ix + 1,
 814                         body: ItemBody::MaybeHtml,
 815                     });
 816                     begin_text = ix + 1;
 817                     LoopInstruction::ContinueAndSkip(0)
 818                 }
 819                 b'!' => {
 820                     if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
 821                         self.tree.append_text(begin_text, ix);
 822                         self.tree.append(Item {
 823                             start: ix,
 824                             end: ix + 2,
 825                             body: ItemBody::MaybeImage,
 826                         });
 827                         begin_text = ix + 2;
 828                         LoopInstruction::ContinueAndSkip(1)
 829                     } else {
 830                         LoopInstruction::ContinueAndSkip(0)
 831                     }
 832                 }
 833                 b'[' => {
 834                     self.tree.append_text(begin_text, ix);
 835                     self.tree.append(Item {
 836                         start: ix,
 837                         end: ix + 1,
 838                         body: ItemBody::MaybeLinkOpen,
 839                     });
 840                     begin_text = ix + 1;
 841                     LoopInstruction::ContinueAndSkip(0)
 842                 }
 843                 b']' => {
 844                     self.tree.append_text(begin_text, ix);
 845                     self.tree.append(Item {
 846                         start: ix,
 847                         end: ix + 1,
 848                         body: ItemBody::MaybeLinkClose,
 849                     });
 850                     begin_text = ix + 1;
 851                     LoopInstruction::ContinueAndSkip(0)
 852                 }
 853                 b'&' => match scan_entity(&bytes[ix..]) {
 854                     (n, Some(value)) => {
 855                         self.tree.append_text(begin_text, ix);
 856                         self.tree.append(Item {
 857                             start: ix,
 858                             end: ix + n,
 859                             body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
 860                         });
 861                         begin_text = ix + n;
 862                         LoopInstruction::ContinueAndSkip(n - 1)
 863                     }
 864                     _ => LoopInstruction::ContinueAndSkip(0),
 865                 },
 866                 b'|' => {
 867                     if let TableParseMode::Active = mode {
 868                         LoopInstruction::BreakAtWith(ix, None)
 869                     } else {
 870                         last_pipe_ix = ix;
 871                         pipes += 1;
 872                         LoopInstruction::ContinueAndSkip(0)
 873                     }
 874                 }
 875                 _ => LoopInstruction::ContinueAndSkip(0),
 876             }
 877         });
 878
 879         if brk.is_none() {
 880             // need to close text at eof
 881             self.tree.append_text(begin_text, final_ix);
 882         }
 883         (final_ix, brk)
 884     }
 885
 886     /// Check whether we should allow a paragraph interrupt by lists. Only non-empty
 887     /// lists are allowed.
 888     fn interrupt_paragraph_by_list(&self, suffix: &[u8]) -> bool {
 889         scan_listitem(suffix).map_or(false, |(ix, delim, index, _)| {
 890             self.list_nesting > 0 ||
 891             // we don't allow interruption by either empty lists or
 892             // numbered lists starting at an index other than 1
 893             !scan_empty_list(&suffix[ix..]) && (delim == b'*' || delim == b'-' || index == 1)
 894         })
 895     }
 896
 897     /// When start_ix is at the beginning of an HTML block of type 1 to 5,
 898     /// this will find the end of the block, adding the block itself to the
 899     /// tree and also keeping track of the lines of HTML within the block.
 900     ///
 901     /// The html_end_tag is the tag that must be found on a line to end the block.
 902     fn parse_html_block_type_1_to_5(
 903         &mut self,
 904         start_ix: usize,
 905         html_end_tag: &str,
 906         mut remaining_space: usize,
 907     ) -> usize {
 908         let bytes = self.text.as_bytes();
 909         let mut ix = start_ix;
 910         loop {
 911             let line_start_ix = ix;
 912             ix += scan_nextline(&bytes[ix..]);
 913             self.append_html_line(remaining_space, line_start_ix, ix);
 914
 915             let mut line_start = LineStart::new(&bytes[ix..]);
 916             let n_containers = scan_containers(&self.tree, &mut line_start);
 917             if n_containers < self.tree.spine_len() {
 918                 break;
 919             }
 920
 921             if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
 922                 break;
 923             }
 924
 925             let next_line_ix = ix + line_start.bytes_scanned();
 926             if next_line_ix == self.text.len() {
 927                 break;
 928             }
 929             ix = next_line_ix;
 930             remaining_space = line_start.remaining_space();
 931         }
 932         ix
 933     }
 934
 935     /// When start_ix is at the beginning of an HTML block of type 6 or 7,
 936     /// this will consume lines until there is a blank line and keep track of
 937     /// the HTML within the block.
 938     fn parse_html_block_type_6_or_7(
 939         &mut self,
 940         start_ix: usize,
 941         mut remaining_space: usize,
 942     ) -> usize {
 943         let bytes = self.text.as_bytes();
 944         let mut ix = start_ix;
 945         loop {
 946             let line_start_ix = ix;
 947             ix += scan_nextline(&bytes[ix..]);
 948             self.append_html_line(remaining_space, line_start_ix, ix);
 949
 950             let mut line_start = LineStart::new(&bytes[ix..]);
 951             let n_containers = scan_containers(&self.tree, &mut line_start);
 952             if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
 953                 break;
 954             }
 955
 956             let next_line_ix = ix + line_start.bytes_scanned();
 957             if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
 958             {
 959                 break;
 960             }
 961             ix = next_line_ix;
 962             remaining_space = line_start.remaining_space();
 963         }
 964         ix
 965     }
 966
 967     fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
 968         self.tree.append(Item {
 969             start: start_ix,
 970             end: 0, // will get set later
 971             body: ItemBody::IndentCodeBlock,
 972         });
 973         self.tree.push();
 974         let bytes = self.text.as_bytes();
 975         let mut last_nonblank_child = None;
 976         let mut last_nonblank_ix = 0;
 977         let mut end_ix = 0;
 978         let mut last_line_blank = false;
 979
 980         let mut ix = start_ix;
 981         loop {
 982             let line_start_ix = ix;
 983             ix += scan_nextline(&bytes[ix..]);
 984             self.append_code_text(remaining_space, line_start_ix, ix);
 985             // TODO(spec clarification): should we synthesize newline at EOF?
 986
 987             if !last_line_blank {
 988                 last_nonblank_child = self.tree.cur();
 989                 last_nonblank_ix = ix;
 990                 end_ix = ix;
 991             }
 992
 993             let mut line_start = LineStart::new(&bytes[ix..]);
 994             let n_containers = scan_containers(&self.tree, &mut line_start);
 995             if n_containers < self.tree.spine_len()
 996                 || !(line_start.scan_space(4) || line_start.is_at_eol())
 997             {
 998                 break;
 999             }
1000             let next_line_ix = ix + line_start.bytes_scanned();
1001             if next_line_ix == self.text.len() {
1002                 break;
1003             }
1004             ix = next_line_ix;
1005             remaining_space = line_start.remaining_space();
1006             last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1007         }
1008
1009         // Trim trailing blank lines.
1010         if let Some(child) = last_nonblank_child {
1011             self.tree[child].next = None;
1012             self.tree[child].item.end = last_nonblank_ix;
1013         }
1014         self.pop(end_ix);
1015         ix
1016     }
1017
1018     fn parse_fenced_code_block(
1019         &mut self,
1020         start_ix: usize,
1021         indent: usize,
1022         fence_ch: u8,
1023         n_fence_char: usize,
1024     ) -> usize {
1025         let bytes = self.text.as_bytes();
1026         let mut info_start = start_ix + n_fence_char;
1027         info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1028         // TODO: info strings are typically very short. wouldnt it be faster
1029         // to just do a forward scan here?
1030         let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1031         let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1032         let info_string = unescape(&self.text[info_start..info_end]);
1033         self.tree.append(Item {
1034             start: start_ix,
1035             end: 0, // will get set later
1036             body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1037         });
1038         self.tree.push();
1039         loop {
1040             let mut line_start = LineStart::new(&bytes[ix..]);
1041             let n_containers = scan_containers(&self.tree, &mut line_start);
1042             if n_containers < self.tree.spine_len() {
1043                 break;
1044             }
1045             line_start.scan_space(indent);
1046             let mut close_line_start = line_start.clone();
1047             if !close_line_start.scan_space(4) {
1048                 let close_ix = ix + close_line_start.bytes_scanned();
1049                 if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1050                 {
1051                     ix = close_ix + n;
1052                     break;
1053                 }
1054             }
1055             let remaining_space = line_start.remaining_space();
1056             ix += line_start.bytes_scanned();
1057             let next_ix = ix + scan_nextline(&bytes[ix..]);
1058             self.append_code_text(remaining_space, ix, next_ix);
1059             ix = next_ix;
1060         }
1061
1062         self.pop(ix);
1063
1064         // try to read trailing whitespace or it will register as a completely blank line
1065         ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1066     }
1067
1068     fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1069         if remaining_space > 0 {
1070             let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
1071             self.tree.append(Item {
1072                 start,
1073                 end: start,
1074                 body: ItemBody::SynthesizeText(cow_ix),
1075             });
1076         }
1077         if self.text.as_bytes()[end - 2] == b'\r' {
1078             // Normalize CRLF to LF
1079             self.tree.append_text(start, end - 2);
1080             self.tree.append_text(end - 1, end);
1081         } else {
1082             self.tree.append_text(start, end);
1083         }
1084     }
1085
1086     /// Appends a line of HTML to the tree.
1087     fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1088         if remaining_space > 0 {
1089             let cow_ix = self.allocs.allocate_cow("   "[..remaining_space].into());
1090             self.tree.append(Item {
1091                 start,
1092                 end: start,
1093                 // TODO: maybe this should synthesize to html rather than text?
1094                 body: ItemBody::SynthesizeText(cow_ix),
1095             });
1096         }
1097         if self.text.as_bytes()[end - 2] == b'\r' {
1098             // Normalize CRLF to LF
1099             self.tree.append(Item {
1100                 start,
1101                 end: end - 2,
1102                 body: ItemBody::Html,
1103             });
1104             self.tree.append(Item {
1105                 start: end - 1,
1106                 end,
1107                 body: ItemBody::Html,
1108             });
1109         } else {
1110             self.tree.append(Item {
1111                 start,
1112                 end,
1113                 body: ItemBody::Html,
1114             });
1115         }
1116     }
1117
1118     /// Pop a container, setting its end.
1119     fn pop(&mut self, ix: usize) {
1120         let cur_ix = self.tree.pop().unwrap();
1121         self.tree[cur_ix].item.end = ix;
1122         if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1123             surgerize_tight_list(&mut self.tree, cur_ix);
1124         }
1125     }
1126
1127     /// Close a list if it's open. Also set loose if last line was blank
1128     fn finish_list(&mut self, ix: usize) {
1129         if let Some(node_ix) = self.tree.peek_up() {
1130             if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1131                 self.pop(ix);
1132                 self.list_nesting -= 1;
1133             }
1134         }
1135         if self.last_line_blank {
1136             if let Some(node_ix) = self.tree.peek_grandparent() {
1137                 if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
1138                     *is_tight = false;
1139                 }
1140             }
1141             self.last_line_blank = false;
1142         }
1143     }
1144
1145     /// Continue an existing list or start a new one if there's not an open
1146     /// list that matches.
1147     fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1148         if let Some(node_ix) = self.tree.peek_up() {
1149             if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1150                 if existing_ch == ch {
1151                     if self.last_line_blank {
1152                         *is_tight = false;
1153                         self.last_line_blank = false;
1154                     }
1155                     return;
1156                 }
1157             }
1158             // TODO: this is not the best choice for end; maybe get end from last list item.
1159             self.finish_list(start);
1160         }
1161         self.tree.append(Item {
1162             start,
1163             end: 0, // will get set later
1164             body: ItemBody::List(true, ch, index),
1165         });
1166         self.list_nesting += 1;
1167         self.tree.push();
1168         self.last_line_blank = false;
1169     }
1170
1171     /// Parse a thematic break.
1172     ///
1173     /// Returns index of start of next line.
1174     fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1175         self.tree.append(Item {
1176             start: ix,
1177             end: ix + hrule_size,
1178             body: ItemBody::Rule,
1179         });
1180         ix + hrule_size
1181     }
1182
1183     /// Parse an ATX heading.
1184     ///
1185     /// Returns index of start of next line.
1186     fn parse_atx_heading(&mut self, mut ix: usize, atx_size: usize) -> usize {
1187         let heading_ix = self.tree.append(Item {
1188             start: ix,
1189             end: 0, // set later
1190             body: ItemBody::Heading(atx_size as u32),
1191         });
1192         ix += atx_size;
1193         // next char is space or eol (guaranteed by scan_atx_heading)
1194         let bytes = self.text.as_bytes();
1195         if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1196             self.tree[heading_ix].item.end = ix + eol_bytes;
1197             return ix + eol_bytes;
1198         }
1199         // skip leading spaces
1200         let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1201         ix += skip_spaces;
1202
1203         // now handle the header text
1204         let header_start = ix;
1205         let header_node_idx = self.tree.push(); // so that we can set the endpoint later
1206         ix = self.parse_line(ix, TableParseMode::Disabled).0;
1207         self.tree[header_node_idx].item.end = ix;
1208
1209         // remove trailing matter from header text
1210         if let Some(cur_ix) = self.tree.cur() {
1211             let header_text = &bytes[header_start..ix];
1212             let mut limit = header_text
1213                 .iter()
1214                 .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1215                 .map_or(0, |i| i + 1);
1216             let closer = header_text[..limit]
1217                 .iter()
1218                 .rposition(|&b| b != b'#')
1219                 .map_or(0, |i| i + 1);
1220             if closer == 0 {
1221                 limit = closer;
1222             } else {
1223                 let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1224                 if spaces > 0 {
1225                     limit = closer - spaces;
1226                 }
1227             }
1228             self.tree[cur_ix].item.end = limit + header_start;
1229         }
1230
1231         self.tree.pop();
1232         ix
1233     }
1234
1235     /// Returns the number of bytes scanned on success.
1236     fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1237         let bytes = &self.text.as_bytes()[start..];
1238         if !bytes.starts_with(b"[^") {
1239             return None;
1240         }
1241         let (mut i, label) = self.parse_refdef_label(start + 2)?;
1242         i += 2;
1243         if scan_ch(&bytes[i..], b':') == 0 {
1244             return None;
1245         }
1246         i += 1;
1247         self.finish_list(start);
1248         self.tree.append(Item {
1249             start,
1250             end: 0, // will get set later
1251             // TODO: check whether the label here is strictly necessary
1252             body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1253         });
1254         self.tree.push();
1255         Some(i)
1256     }
1257
1258     /// Tries to parse a reference label, which can be interrupted by new blocks.
1259     /// On success, returns the number of bytes of the label and the label itself.
1260     fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1261         scan_link_label_rest(&self.text[start..], &|bytes| {
1262             let mut line_start = LineStart::new(bytes);
1263             let _ = scan_containers(&self.tree, &mut line_start);
1264             let bytes_scanned = line_start.bytes_scanned();
1265
1266             let suffix = &bytes[bytes_scanned..];
1267             if self.interrupt_paragraph_by_list(suffix) || scan_paragraph_interrupt(suffix) {
1268                 None
1269             } else {
1270                 Some(bytes_scanned)
1271             }
1272         })
1273     }
1274
1275     /// Returns number of bytes scanned, label and definition on success.
1276     fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1277         let bytes = &self.text.as_bytes()[start..];
1278         if scan_ch(bytes, b'[') == 0 {
1279             return None;
1280         }
1281         let (mut i, label) = self.parse_refdef_label(start + 1)?;
1282         i += 1;
1283         if scan_ch(&bytes[i..], b':') == 0 {
1284             return None;
1285         }
1286         i += 1;
1287         let (bytecount, link_def) = self.scan_refdef(start + i)?;
1288         Some((bytecount + i, UniCase::new(label), link_def))
1289     }
1290
1291     /// Returns number of bytes and number of newlines
1292     fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1293         let mut newlines = 0;
1294         loop {
1295             let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1296             i += whitespaces;
1297             if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1298                 i += eol_bytes;
1299                 newlines += 1;
1300                 if newlines > 1 {
1301                     return None;
1302                 }
1303             } else {
1304                 break;
1305             }
1306             let mut line_start = LineStart::new(&bytes[i..]);
1307             if self.tree.spine_len() != scan_containers(&self.tree, &mut line_start) {
1308                 return None;
1309             }
1310             i += line_start.bytes_scanned();
1311         }
1312         Some((i, newlines))
1313     }
1314
1315     /// Returns # of bytes and definition.
1316     /// Assumes the label of the reference including colon has already been scanned.
1317     fn scan_refdef(&self, start: usize) -> Option<(usize, LinkDef<'a>)> {
1318         let bytes = self.text.as_bytes();
1319
1320         // whitespace between label and url (including up to one newline)
1321         let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1322
1323         // scan link dest
1324         let (dest_length, dest) = scan_link_dest(self.text, i, 1)?;
1325         if dest_length == 0 {
1326             return None;
1327         }
1328         let dest = unescape(dest);
1329         i += dest_length;
1330
1331         // no title
1332         let mut backup = (i - start, LinkDef { dest, title: None });
1333
1334         // scan whitespace between dest and label
1335         let (mut i, newlines) =
1336             if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1337                 if i == self.text.len() {
1338                     newlines += 1;
1339                 }
1340                 if new_i == i && newlines == 0 {
1341                     return None;
1342                 }
1343                 if newlines > 1 {
1344                     return Some(backup);
1345                 };
1346                 (new_i, newlines)
1347             } else {
1348                 return Some(backup);
1349             };
1350
1351         // scan title
1352         // if this fails but newline == 1, return also a refdef without title
1353         if let Some((title_length, title)) = scan_refdef_title(&self.text[i..]) {
1354             i += title_length;
1355             backup.1.title = Some(unescape(title));
1356         } else if newlines > 0 {
1357             return Some(backup);
1358         } else {
1359             return None;
1360         };
1361
1362         // scan EOL
1363         if let Some(bytes) = scan_blank_line(&bytes[i..]) {
1364             backup.0 = i + bytes - start;
1365             Some(backup)
1366         } else if newlines > 0 {
1367             Some(backup)
1368         } else {
1369             None
1370         }
1371     }
1372 }
1373
1374 /// Returns number of containers scanned.
1375 fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize {
1376     let mut i = 0;
1377     for &node_ix in tree.walk_spine() {
1378         match tree[node_ix].item.body {
1379             ItemBody::BlockQuote => {
1380                 let save = line_start.clone();
1381                 if !line_start.scan_blockquote_marker() {
1382                     *line_start = save;
1383                     break;
1384                 }
1385             }
1386             ItemBody::ListItem(indent) => {
1387                 let save = line_start.clone();
1388                 if !line_start.scan_space(indent) {
1389                     if !line_start.is_at_eol() {
1390                         *line_start = save;
1391                         break;
1392                     }
1393                 }
1394             }
1395             _ => (),
1396         }
1397         i += 1;
1398     }
1399     i
1400 }
1401
1402 /// Computes the number of header columns in a table line by computing the number of dividing pipes
1403 /// that aren't followed or preceeded by whitespace.
1404 fn count_header_cols(
1405     bytes: &[u8],
1406     mut pipes: usize,
1407     mut start: usize,
1408     last_pipe_ix: usize,
1409 ) -> usize {
1410     // was first pipe preceeded by whitespace? if so, subtract one
1411     start += scan_whitespace_no_nl(&bytes[start..]);
1412     if bytes[start] == b'|' {
1413         pipes -= 1;
1414     }
1415
1416     // was last pipe followed by whitespace? if so, sub one
1417     if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
1418         pipes
1419     } else {
1420         pipes + 1
1421     }
1422 }
1423
1424 impl<'a> Tree<Item> {
1425     fn append_text(&mut self, start: usize, end: usize) {
1426         if end > start {
1427             if let Some(ix) = self.cur() {
1428                 if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
1429                     self[ix].item.end = end;
1430                     return;
1431                 }
1432             }
1433             self.append(Item {
1434                 start,
1435                 end,
1436                 body: ItemBody::Text,
1437             });
1438         }
1439     }
1440 }
1441
1442 /// Determines whether the delimiter run starting at given index is
1443 /// left-flanking, as defined by the commonmark spec (and isn't intraword
1444 /// for _ delims).
1445 /// suffix is &s[ix..], which is passed in as an optimization, since taking
1446 /// a string subslice is O(n).
1447 fn delim_run_can_open(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1448     let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1449         c
1450     } else {
1451         return false;
1452     };
1453     if next_char.is_whitespace() {
1454         return false;
1455     }
1456     if ix == 0 {
1457         return true;
1458     }
1459     let delim = suffix.chars().next().unwrap();
1460     if delim == '*' && !is_punctuation(next_char) {
1461         return true;
1462     }
1463
1464     let prev_char = s[..ix].chars().last().unwrap();
1465
1466     prev_char.is_whitespace() || is_punctuation(prev_char)
1467 }
1468
1469 /// Determines whether the delimiter run starting at given index is
1470 /// left-flanking, as defined by the commonmark spec (and isn't intraword
1471 /// for _ delims)
1472 fn delim_run_can_close(s: &str, suffix: &str, run_len: usize, ix: usize) -> bool {
1473     if ix == 0 {
1474         return false;
1475     }
1476     let prev_char = s[..ix].chars().last().unwrap();
1477     if prev_char.is_whitespace() {
1478         return false;
1479     }
1480     let next_char = if let Some(c) = suffix.chars().nth(run_len) {
1481         c
1482     } else {
1483         return true;
1484     };
1485     let delim = suffix.chars().next().unwrap();
1486     if delim == '*' && !is_punctuation(prev_char) {
1487         return true;
1488     }
1489
1490     next_char.is_whitespace() || is_punctuation(next_char)
1491 }
1492
1493 /// Checks whether we should break a paragraph on the given input.
1494 /// Note: lists are dealt with in `interrupt_paragraph_by_list`, because determing
1495 /// whether to break on a list requires additional context.
1496 fn scan_paragraph_interrupt(bytes: &[u8]) -> bool {
1497     if scan_eol(bytes).is_some()
1498         || scan_hrule(bytes).is_ok()
1499         || scan_atx_heading(bytes).is_some()
1500         || scan_code_fence(bytes).is_some()
1501         || scan_blockquote_start(bytes).is_some()
1502     {
1503         return true;
1504     }
1505     bytes.starts_with(b"<")
1506         && (get_html_end_tag(&bytes[1..]).is_some()
1507             || is_html_tag(scan_html_block_tag(&bytes[1..]).1))
1508 }
1509
1510 /// Assumes `text_bytes` is preceded by `<`.
1511 fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
1512     static BEGIN_TAGS: &[&[u8]; 3] = &[b"pre", b"style", b"script"];
1513     static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
1514
1515     for (beg_tag, end_tag) in BEGIN_TAGS
1516         .iter()
1517         .zip(["</pre>", "</style>", "</script>"].iter())
1518     {
1519         let tag_len = beg_tag.len();
1520
1521         if text_bytes.len() < tag_len {
1522             // begin tags are increasing in size
1523             break;
1524         }
1525
1526         if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
1527             continue;
1528         }
1529
1530         // Must either be the end of the line...
1531         if text_bytes.len() == tag_len {
1532             return Some(end_tag);
1533         }
1534
1535         // ...or be followed by whitespace, newline, or '>'.
1536         let s = text_bytes[tag_len];
1537         if is_ascii_whitespace(s) || s == b'>' {
1538             return Some(end_tag);
1539         }
1540     }
1541
1542     for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
1543         if text_bytes.starts_with(beg_tag) {
1544             return Some(end_tag);
1545         }
1546     }
1547
1548     if text_bytes.len() > 1
1549         && text_bytes[0] == b'!'
1550         && text_bytes[1] >= b'A'
1551         && text_bytes[1] <= b'Z'
1552     {
1553         Some(">")
1554     } else {
1555         None
1556     }
1557 }
1558
1559 #[derive(Copy, Clone, Debug)]
1560 struct InlineEl {
1561     start: TreeIndex, // offset of tree node
1562     count: usize,
1563     c: u8,      // b'*' or b'_'
1564     both: bool, // can both open and close
1565 }
1566
1567 #[derive(Debug, Clone, Default)]
1568 struct InlineStack {
1569     stack: Vec<InlineEl>,
1570     // Lower bounds for matching indices in the stack. For example
1571     // a strikethrough delimiter will never match with any element
1572     // in the stack with index smaller than
1573     // `lower_bounds[InlineStack::TILDES]`.
1574     lower_bounds: [usize; 7],
1575 }
1576
1577 impl InlineStack {
1578     /// These are indices into the lower bounds array.
1579     /// Not both refers to the property that the delimiter can not both
1580     /// be opener as a closer.
1581     const UNDERSCORE_NOT_BOTH: usize = 0;
1582     const ASTERISK_NOT_BOTH: usize = 1;
1583     const ASTERISK_BASE: usize = 2;
1584     const TILDES: usize = 5;
1585     const UNDERSCORE_BOTH: usize = 6;
1586
1587     fn pop_all(&mut self, tree: &mut Tree<Item>) {
1588         for el in self.stack.drain(..) {
1589             for i in 0..el.count {
1590                 tree[el.start + i].item.body = ItemBody::Text;
1591             }
1592         }
1593         self.lower_bounds = [0; 7];
1594     }
1595
1596     fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1597         if c == b'_' {
1598             if both {
1599                 self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
1600             } else {
1601                 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
1602             }
1603         } else if c == b'*' {
1604             let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1605             if both {
1606                 mod3_lower
1607             } else {
1608                 min(
1609                     mod3_lower,
1610                     self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1611                 )
1612             }
1613         } else {
1614             self.lower_bounds[InlineStack::TILDES]
1615         }
1616     }
1617
1618     fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1619         if c == b'_' {
1620             if both {
1621                 self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
1622             } else {
1623                 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1624             }
1625         } else if c == b'*' {
1626             self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1627             if !both {
1628                 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1629             }
1630         } else {
1631             self.lower_bounds[InlineStack::TILDES] = new_bound;
1632         }
1633     }
1634
1635     fn find_match(
1636         &mut self,
1637         tree: &mut Tree<Item>,
1638         c: u8,
1639         count: usize,
1640         both: bool,
1641     ) -> Option<InlineEl> {
1642         let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1643         let res = self.stack[lowerbound..]
1644             .iter()
1645             .cloned()
1646             .enumerate()
1647             .rfind(|(_, el)| {
1648                 el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0)
1649             });
1650
1651         if let Some((matching_ix, matching_el)) = res {
1652             let matching_ix = matching_ix + lowerbound;
1653             for el in &self.stack[(matching_ix + 1)..] {
1654                 for i in 0..el.count {
1655                     tree[el.start + i].item.body = ItemBody::Text;
1656                 }
1657             }
1658             self.stack.truncate(matching_ix);
1659             Some(matching_el)
1660         } else {
1661             self.set_lowerbound(c, count, both, self.stack.len());
1662             None
1663         }
1664     }
1665
1666     fn push(&mut self, el: InlineEl) {
1667         self.stack.push(el)
1668     }
1669 }
1670
1671 #[derive(Debug, Clone)]
1672 enum RefScan<'a> {
1673     // label, next node index, source ix of label end
1674     LinkLabel(CowStr<'a>, Option<TreeIndex>, usize),
1675     // contains next node index
1676     Collapsed(Option<TreeIndex>),
1677     Failed,
1678 }
1679
1680 /// Skips forward within a block to a node which spans (ends inclusive) the given
1681 /// index into the source.
1682 fn scan_nodes_to_ix(
1683     tree: &Tree<Item>,
1684     mut node: Option<TreeIndex>,
1685     ix: usize,
1686 ) -> Option<TreeIndex> {
1687     while let Some(node_ix) = node {
1688         if tree[node_ix].item.end <= ix {
1689             node = tree[node_ix].next;
1690         } else {
1691             break;
1692         }
1693     }
1694     node
1695 }
1696
1697 /// Scans an inline link label, which cannot be interrupted.
1698 /// Returns number of bytes (including brackets) and label on success.
1699 fn scan_link_label<'text, 'tree>(
1700     tree: &'tree Tree<Item>,
1701     text: &'text str,
1702 ) -> Option<(usize, ReferenceLabel<'text>)> {
1703     let bytes = &text.as_bytes();
1704     if bytes.len() < 2 || bytes[0] != b'[' {
1705         return None;
1706     }
1707     let linebreak_handler = |bytes: &[u8]| {
1708         let mut line_start = LineStart::new(bytes);
1709         let _ = scan_containers(tree, &mut line_start);
1710         Some(line_start.bytes_scanned())
1711     };
1712     let pair = if b'^' == bytes[1] {
1713         let (byte_index, cow) = scan_link_label_rest(&text[2..], &linebreak_handler)?;
1714         (byte_index + 2, ReferenceLabel::Footnote(cow))
1715     } else {
1716         let (byte_index, cow) = scan_link_label_rest(&text[1..], &linebreak_handler)?;
1717         (byte_index + 1, ReferenceLabel::Link(cow))
1718     };
1719     Some(pair)
1720 }
1721
1722 fn scan_reference<'a, 'b>(
1723     tree: &'a Tree<Item>,
1724     text: &'b str,
1725     cur: Option<TreeIndex>,
1726 ) -> RefScan<'b> {
1727     let cur_ix = match cur {
1728         None => return RefScan::Failed,
1729         Some(cur_ix) => cur_ix,
1730     };
1731     let start = tree[cur_ix].item.start;
1732     let tail = &text.as_bytes()[start..];
1733
1734     if tail.starts_with(b"[]") {
1735         let closing_node = tree[cur_ix].next.unwrap();
1736         RefScan::Collapsed(tree[closing_node].next)
1737     } else if let Some((ix, ReferenceLabel::Link(label))) = scan_link_label(tree, &text[start..]) {
1738         let next_node = scan_nodes_to_ix(tree, cur, start + ix);
1739         RefScan::LinkLabel(label, next_node, start + ix)
1740     } else {
1741         RefScan::Failed
1742     }
1743 }
1744
1745 #[derive(Clone, Default)]
1746 struct LinkStack {
1747     inner: Vec<LinkStackEl>,
1748     disabled_ix: usize,
1749 }
1750
1751 impl LinkStack {
1752     fn push(&mut self, el: LinkStackEl) {
1753         self.inner.push(el);
1754     }
1755
1756     fn pop(&mut self) -> Option<LinkStackEl> {
1757         let el = self.inner.pop();
1758         self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1759         el
1760     }
1761
1762     fn clear(&mut self) {
1763         self.inner.clear();
1764         self.disabled_ix = 0;
1765     }
1766
1767     fn disable_all_links(&mut self) {
1768         for el in &mut self.inner[self.disabled_ix..] {
1769             if el.ty == LinkStackTy::Link {
1770                 el.ty = LinkStackTy::Disabled;
1771             }
1772         }
1773         self.disabled_ix = self.inner.len();
1774     }
1775 }
1776
1777 #[derive(Clone, Debug)]
1778 struct LinkStackEl {
1779     node: TreeIndex,
1780     ty: LinkStackTy,
1781 }
1782
1783 #[derive(PartialEq, Clone, Debug)]
1784 enum LinkStackTy {
1785     Link,
1786     Image,
1787     Disabled,
1788 }
1789
1790 #[derive(Clone)]
1791 struct LinkDef<'a> {
1792     dest: CowStr<'a>,
1793     title: Option<CowStr<'a>>,
1794 }
1795
1796 /// Tracks tree indices of code span delimiters of each length. It should prevent
1797 /// quadratic scanning behaviours by providing (amortized) constant time lookups.
1798 struct CodeDelims {
1799     inner: HashMap<usize, VecDeque<TreeIndex>>,
1800     seen_first: bool,
1801 }
1802
1803 impl CodeDelims {
1804     fn new() -> Self {
1805         Self {
1806             inner: Default::default(),
1807             seen_first: false,
1808         }
1809     }
1810
1811     fn insert(&mut self, count: usize, ix: TreeIndex) {
1812         if self.seen_first {
1813             self.inner
1814                 .entry(count)
1815                 .or_insert_with(Default::default)
1816                 .push_back(ix);
1817         } else {
1818             // Skip the first insert, since that delimiter will always
1819             // be an opener and not a closer.
1820             self.seen_first = true;
1821         }
1822     }
1823
1824     fn is_populated(&self) -> bool {
1825         !self.inner.is_empty()
1826     }
1827
1828     fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1829         while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1830             if ix > open_ix {
1831                 return Some(ix);
1832             }
1833         }
1834         None
1835     }
1836
1837     fn clear(&mut self) {
1838         self.inner.clear();
1839         self.seen_first = false;
1840     }
1841 }
1842
1843 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
1844 struct LinkIndex(usize);
1845
1846 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
1847 struct CowIndex(usize);
1848
1849 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
1850 struct AlignmentIndex(usize);
1851
1852 #[derive(Clone)]
1853 struct Allocations<'a> {
1854     refdefs: HashMap<LinkLabel<'a>, LinkDef<'a>>,
1855     links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1856     cows: Vec<CowStr<'a>>,
1857     alignments: Vec<Vec<Alignment>>,
1858 }
1859
1860 impl<'a> Allocations<'a> {
1861     fn new() -> Self {
1862         Self {
1863             refdefs: HashMap::new(),
1864             links: Vec::with_capacity(128),
1865             cows: Vec::new(),
1866             alignments: Vec::new(),
1867         }
1868     }
1869
1870     fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1871         let ix = self.cows.len();
1872         self.cows.push(cow);
1873         CowIndex(ix)
1874     }
1875
1876     fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1877         let ix = self.links.len();
1878         self.links.push((ty, url, title));
1879         LinkIndex(ix)
1880     }
1881
1882     fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1883         let ix = self.alignments.len();
1884         self.alignments.push(alignment);
1885         AlignmentIndex(ix)
1886     }
1887 }
1888
1889 impl<'a> Index<CowIndex> for Allocations<'a> {
1890     type Output = CowStr<'a>;
1891
1892     fn index(&self, ix: CowIndex) -> &Self::Output {
1893         self.cows.index(ix.0)
1894     }
1895 }
1896
1897 impl<'a> Index<LinkIndex> for Allocations<'a> {
1898     type Output = (LinkType, CowStr<'a>, CowStr<'a>);
1899
1900     fn index(&self, ix: LinkIndex) -> &Self::Output {
1901         self.links.index(ix.0)
1902     }
1903 }
1904
1905 impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1906     type Output = Vec<Alignment>;
1907
1908     fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1909         self.alignments.index(ix.0)
1910     }
1911 }
1912
1913 /// A struct containing information on the reachability of certain inline HTML
1914 /// elements. In particular, for cdata elements (`<![CDATA[`), processing
1915 /// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
1916 /// represent the indices before which a scan will always fail and can hence
1917 /// be skipped.
1918 #[derive(Clone, Default)]
1919 pub(crate) struct HtmlScanGuard {
1920     pub cdata: usize,
1921     pub processing: usize,
1922     pub declaration: usize,
1923 }
1924
1925 /// Markdown event iterator.
1926 #[derive(Clone)]
1927 pub struct Parser<'a> {
1928     text: &'a str,
1929     tree: Tree<Item>,
1930     allocs: Allocations<'a>,
1931     broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
1932     html_scan_guard: HtmlScanGuard,
1933
1934     // used by inline passes. store them here for reuse
1935     inline_stack: InlineStack,
1936     link_stack: LinkStack,
1937 }
1938
1939 impl<'a> Parser<'a> {
1940     /// Creates a new event iterator for a markdown string without any options enabled.
1941     pub fn new(text: &'a str) -> Parser<'a> {
1942         Parser::new_ext(text, Options::empty())
1943     }
1944
1945     /// Creates a new event iterator for a markdown string with given options.
1946     pub fn new_ext(text: &'a str, options: Options) -> Parser<'a> {
1947         Parser::new_with_broken_link_callback(text, options, None)
1948     }
1949
1950     /// In case the parser encounters any potential links that have a broken
1951     /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
1952     /// the provided callback will be called with the reference name,
1953     /// and the returned pair will be used as the link name and title if it is not
1954     /// `None`.
1955     pub fn new_with_broken_link_callback(
1956         text: &'a str,
1957         options: Options,
1958         broken_link_callback: Option<&'a dyn Fn(&str, &str) -> Option<(String, String)>>,
1959     ) -> Parser<'a> {
1960         let first_pass = FirstPass::new(text, options);
1961         let (mut tree, allocs) = first_pass.run();
1962         tree.reset();
1963         let inline_stack = Default::default();
1964         let link_stack = Default::default();
1965         let html_scan_guard = Default::default();
1966         Parser {
1967             text,
1968             tree,
1969             allocs,
1970             broken_link_callback,
1971             inline_stack,
1972             link_stack,
1973             html_scan_guard,
1974         }
1975     }
1976
1977     /// Handle inline markup.
1978     ///
1979     /// When the parser encounters any item indicating potential inline markup, all
1980     /// inline markup passes are run on the remainder of the chain.
1981     ///
1982     /// Note: there's some potential for optimization here, but that's future work.
1983     fn handle_inline(&mut self) {
1984         self.handle_inline_pass1();
1985         self.handle_emphasis();
1986     }
1987
1988     /// Handle inline HTML, code spans, and links.
1989     ///
1990     /// This function handles both inline HTML and code spans, because they have
1991     /// the same precedence. It also handles links, even though they have lower
1992     /// precedence, because the URL of links must not be processed.
1993     fn handle_inline_pass1(&mut self) {
1994         let mut code_delims = CodeDelims::new();
1995         let mut cur = self.tree.cur();
1996         let mut prev = None;
1997
1998         let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
1999         let block_text = &self.text[..block_end];
2000
2001         while let Some(mut cur_ix) = cur {
2002             match self.tree[cur_ix].item.body {
2003                 ItemBody::MaybeHtml => {
2004                     let next = self.tree[cur_ix].next;
2005                     let autolink = if let Some(next_ix) = next {
2006                         scan_autolink(block_text, self.tree[next_ix].item.start)
2007                     } else {
2008                         None
2009                     };
2010
2011                     if let Some((ix, uri, link_type)) = autolink {
2012                         let node = scan_nodes_to_ix(&self.tree, next, ix);
2013                         let text_node = self.tree.create_node(Item {
2014                             start: self.tree[cur_ix].item.start + 1,
2015                             end: ix - 1,
2016                             body: ItemBody::Text,
2017                         });
2018                         let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
2019                         self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
2020                         self.tree[cur_ix].item.end = ix;
2021                         self.tree[cur_ix].next = node;
2022                         self.tree[cur_ix].child = Some(text_node);
2023                         prev = cur;
2024                         cur = node;
2025                         if let Some(node_ix) = cur {
2026                             self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
2027                         }
2028                         continue;
2029                     } else {
2030                         let inline_html = if let Some(next_ix) = next {
2031                             self.scan_inline_html(
2032                                 block_text.as_bytes(),
2033                                 self.tree[next_ix].item.start,
2034                             )
2035                         } else {
2036                             None
2037                         };
2038                         if let Some(ix) = inline_html {
2039                             let node = scan_nodes_to_ix(&self.tree, next, ix);
2040                             self.tree[cur_ix].item.body = ItemBody::Html;
2041                             self.tree[cur_ix].item.end = ix;
2042                             self.tree[cur_ix].next = node;
2043                             prev = cur;
2044                             cur = node;
2045                             if let Some(node_ix) = cur {
2046                                 self.tree[node_ix].item.start =
2047                                     max(self.tree[node_ix].item.start, ix);
2048                             }
2049                             continue;
2050                         }
2051                     }
2052                     self.tree[cur_ix].item.body = ItemBody::Text;
2053                 }
2054                 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
2055                     if preceded_by_backslash {
2056                         search_count -= 1;
2057                         if search_count == 0 {
2058                             self.tree[cur_ix].item.body = ItemBody::Text;
2059                             prev = cur;
2060                             cur = self.tree[cur_ix].next;
2061                             continue;
2062                         }
2063                     }
2064
2065                     if code_delims.is_populated() {
2066                         // we have previously scanned all codeblock delimiters,
2067                         // so we can reuse that work
2068                         if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
2069                             self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2070                         } else {
2071                             self.tree[cur_ix].item.body = ItemBody::Text;
2072                         }
2073                     } else {
2074                         // we haven't previously scanned all codeblock delimiters,
2075                         // so walk the AST
2076                         let mut scan = if search_count > 0 {
2077                             self.tree[cur_ix].next
2078                         } else {
2079                             None
2080                         };
2081                         while let Some(scan_ix) = scan {
2082                             if let ItemBody::MaybeCode(delim_count, _) =
2083                                 self.tree[scan_ix].item.body
2084                             {
2085                                 if search_count == delim_count {
2086                                     self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
2087                                     code_delims.clear();
2088                                     break;
2089                                 } else {
2090                                     code_delims.insert(delim_count, scan_ix);
2091                                 }
2092                             }
2093                             scan = self.tree[scan_ix].next;
2094                         }
2095                         if scan == None {
2096                             self.tree[cur_ix].item.body = ItemBody::Text;
2097                         }
2098                     }
2099                 }
2100                 ItemBody::MaybeLinkOpen => {
2101                     self.tree[cur_ix].item.body = ItemBody::Text;
2102                     self.link_stack.push(LinkStackEl {
2103                         node: cur_ix,
2104                         ty: LinkStackTy::Link,
2105                     });
2106                 }
2107                 ItemBody::MaybeImage => {
2108                     self.tree[cur_ix].item.body = ItemBody::Text;
2109                     self.link_stack.push(LinkStackEl {
2110                         node: cur_ix,
2111                         ty: LinkStackTy::Image,
2112                     });
2113                 }
2114                 ItemBody::MaybeLinkClose => {
2115                     self.tree[cur_ix].item.body = ItemBody::Text;
2116                     if let Some(tos) = self.link_stack.pop() {
2117                         if tos.ty == LinkStackTy::Disabled {
2118                             continue;
2119                         }
2120                         let next = self.tree[cur_ix].next;
2121                         if let Some((next_ix, url, title)) =
2122                             self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
2123                         {
2124                             let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
2125                             if let Some(prev_ix) = prev {
2126                                 self.tree[prev_ix].next = None;
2127                             }
2128                             cur = Some(tos.node);
2129                             cur_ix = tos.node;
2130                             let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
2131                             self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
2132                                 ItemBody::Image(link_ix)
2133                             } else {
2134                                 ItemBody::Link(link_ix)
2135                             };
2136                             self.tree[cur_ix].child = self.tree[cur_ix].next;
2137                             self.tree[cur_ix].next = next_node;
2138                             self.tree[cur_ix].item.end = next_ix;
2139                             if let Some(next_node_ix) = next_node {
2140                                 self.tree[next_node_ix].item.start =
2141                                     max(self.tree[next_node_ix].item.start, next_ix);
2142                             }
2143
2144                             if tos.ty == LinkStackTy::Link {
2145                                 self.link_stack.disable_all_links();
2146                             }
2147                         } else {
2148                             // ok, so its not an inline link. maybe it is a reference
2149                             // to a defined link?
2150                             let scan_result = scan_reference(&self.tree, block_text, next);
2151                             let (node_after_link, link_type) = match scan_result {
2152                                 // [label][reference]
2153                                 RefScan::LinkLabel(_, next_node, _) => {
2154                                     (next_node, LinkType::Reference)
2155                                 }
2156                                 // []
2157                                 RefScan::Collapsed(next_node) => (next_node, LinkType::Collapsed),
2158                                 // [shortcut]
2159                                 //
2160                                 // [shortcut]: /blah
2161                                 RefScan::Failed => (next, LinkType::Shortcut),
2162                             };
2163
2164                             // (label, source_ix end)
2165                             let label: Option<(ReferenceLabel<'a>, usize)> = match scan_result {
2166                                 RefScan::LinkLabel(l, _, end_ix) => {
2167                                     Some((ReferenceLabel::Link(l), end_ix))
2168                                 }
2169                                 RefScan::Collapsed(..) | RefScan::Failed => {
2170                                     // No label? maybe it is a shortcut reference
2171                                     let label_start = self.tree[tos.node].item.end - 1;
2172                                     scan_link_label(
2173                                         &self.tree,
2174                                         &self.text[label_start..self.tree[cur_ix].item.end],
2175                                     )
2176                                     .map(|(ix, label)| (label, label_start + ix))
2177                                 }
2178                             };
2179
2180                             // see if it's a footnote reference
2181                             if let Some((ReferenceLabel::Footnote(l), end)) = label {
2182                                 self.tree[tos.node].next = node_after_link;
2183                                 self.tree[tos.node].child = None;
2184                                 self.tree[tos.node].item.body =
2185                                     ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
2186                                 self.tree[tos.node].item.end = end;
2187                                 prev = Some(tos.node);
2188                                 cur = node_after_link;
2189                                 self.link_stack.clear();
2190                                 continue;
2191                             } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
2192                                 let type_url_title = self
2193                                     .allocs
2194                                     .refdefs
2195                                     .get(&UniCase::new(link_label.as_ref().into()))
2196                                     .map(|matching_def| {
2197                                         // found a matching definition!
2198                                         let title = matching_def
2199                                             .title
2200                                             .as_ref()
2201                                             .cloned()
2202                                             .unwrap_or_else(|| "".into());
2203                                         let url = matching_def.dest.clone();
2204                                         (link_type, url, title)
2205                                     })
2206                                     .or_else(|| {
2207                                         self.broken_link_callback
2208                                             .and_then(|callback| {
2209                                                 // looked for matching definition, but didn't find it. try to fix
2210                                                 // link with callback, if it is defined
2211                                                 callback(link_label.as_ref(), link_label.as_ref())
2212                                             })
2213                                             .map(|(url, title)| {
2214                                                 (link_type.to_unknown(), url.into(), title.into())
2215                                             })
2216                                     });
2217
2218                                 if let Some((def_link_type, url, title)) = type_url_title {
2219                                     let link_ix =
2220                                         self.allocs.allocate_link(def_link_type, url, title);
2221                                     self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
2222                                     {
2223                                         ItemBody::Image(link_ix)
2224                                     } else {
2225                                         ItemBody::Link(link_ix)
2226                                     };
2227                                     let label_node = self.tree[tos.node].next;
2228
2229                                     // lets do some tree surgery to add the link to the tree
2230                                     // 1st: skip the label node and close node
2231                                     self.tree[tos.node].next = node_after_link;
2232
2233                                     // then, if it exists, add the label node as a child to the link node
2234                                     if label_node != cur {
2235                                         self.tree[tos.node].child = label_node;
2236
2237                                         // finally: disconnect list of children
2238                                         if let Some(prev_ix) = prev {
2239                                             self.tree[prev_ix].next = None;
2240                                         }
2241                                     }
2242
2243                                     self.tree[tos.node].item.end = end;
2244
2245                                     // set up cur so next node will be node_after_link
2246                                     cur = Some(tos.node);
2247                                     cur_ix = tos.node;
2248
2249                                     if tos.ty == LinkStackTy::Link {
2250                                         self.link_stack.disable_all_links();
2251                                     }
2252                                 }
2253                             }
2254                         }
2255                     }
2256                 }
2257                 _ => (),
2258             }
2259             prev = cur;
2260             cur = self.tree[cur_ix].next;
2261         }
2262         self.link_stack.clear();
2263     }
2264
2265     fn handle_emphasis(&mut self) {
2266         let mut prev = None;
2267         let mut prev_ix: TreeIndex;
2268         let mut cur = self.tree.cur();
2269         while let Some(mut cur_ix) = cur {
2270             if let ItemBody::MaybeEmphasis(mut count, can_open, can_close) =
2271                 self.tree[cur_ix].item.body
2272             {
2273                 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
2274                 let both = can_open && can_close;
2275                 if can_close {
2276                     while let Some(el) =
2277                         self.inline_stack.find_match(&mut self.tree, c, count, both)
2278                     {
2279                         // have a match!
2280                         if let Some(prev_ix) = prev {
2281                             self.tree[prev_ix].next = None;
2282                         }
2283                         let match_count = min(count, el.count);
2284                         // start, end are tree node indices
2285                         let mut end = cur_ix - 1;
2286                         let mut start = el.start + el.count;
2287
2288                         // work from the inside out
2289                         while start > el.start + el.count - match_count {
2290                             let (inc, ty) = if c == b'~' {
2291                                 (2, ItemBody::Strikethrough)
2292                             } else if start > el.start + el.count - match_count + 1 {
2293                                 (2, ItemBody::Strong)
2294                             } else {
2295                                 (1, ItemBody::Emphasis)
2296                             };
2297
2298                             let root = start - inc;
2299                             end = end + inc;
2300                             self.tree[root].item.body = ty;
2301                             self.tree[root].item.end = self.tree[end].item.end;
2302                             self.tree[root].child = Some(start);
2303                             self.tree[root].next = None;
2304                             start = root;
2305                         }
2306
2307                         // set next for top most emph level
2308                         prev_ix = el.start + el.count - match_count;
2309                         prev = Some(prev_ix);
2310                         cur = self.tree[cur_ix + match_count - 1].next;
2311                         self.tree[prev_ix].next = cur;
2312
2313                         if el.count > match_count {
2314                             self.inline_stack.push(InlineEl {
2315                                 start: el.start,
2316                                 count: el.count - match_count,
2317                                 c: el.c,
2318                                 both,
2319                             })
2320                         }
2321                         count -= match_count;
2322                         if count > 0 {
2323                             cur_ix = cur.unwrap();
2324                         } else {
2325                             break;
2326                         }
2327                     }
2328                 }
2329                 if count > 0 {
2330                     if can_open {
2331                         self.inline_stack.push(InlineEl {
2332                             start: cur_ix,
2333                             count,
2334                             c,
2335                             both,
2336                         });
2337                     } else {
2338                         for i in 0..count {
2339                             self.tree[cur_ix + i].item.body = ItemBody::Text;
2340                         }
2341                     }
2342                     prev_ix = cur_ix + count - 1;
2343                     prev = Some(prev_ix);
2344                     cur = self.tree[prev_ix].next;
2345                 }
2346             } else {
2347                 prev = cur;
2348                 cur = self.tree[cur_ix].next;
2349             }
2350         }
2351         self.inline_stack.pop_all(&mut self.tree);
2352     }
2353
2354     /// Returns next byte index, url and title.
2355     fn scan_inline_link(
2356         &self,
2357         underlying: &'a str,
2358         mut ix: usize,
2359         node: Option<TreeIndex>,
2360     ) -> Option<(usize, CowStr<'a>, CowStr<'a>)> {
2361         if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
2362             return None;
2363         }
2364         ix += 1;
2365         ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2366
2367         let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
2368         let dest = unescape(dest);
2369         ix += dest_length;
2370
2371         ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2372
2373         let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
2374             ix += bytes_scanned;
2375             ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
2376             t
2377         } else {
2378             "".into()
2379         };
2380         if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
2381             return None;
2382         }
2383         ix += 1;
2384
2385         Some((ix, dest, title))
2386     }
2387
2388     // returns (bytes scanned, title cow)
2389     fn scan_link_title(
2390         &self,
2391         text: &'a str,
2392         start_ix: usize,
2393         node: Option<TreeIndex>,
2394     ) -> Option<(usize, CowStr<'a>)> {
2395         let bytes = text.as_bytes();
2396         let open = match bytes.get(start_ix) {
2397             Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
2398             _ => return None,
2399         };
2400         let close = if open == b'(' { b')' } else { open };
2401
2402         let mut title = String::new();
2403         let mut mark = start_ix + 1;
2404         let mut i = start_ix + 1;
2405
2406         while i < bytes.len() {
2407             let c = bytes[i];
2408
2409             if c == close {
2410                 let cow = if mark == 1 {
2411                     (i - start_ix + 1, text[mark..i].into())
2412                 } else {
2413                     title.push_str(&text[mark..i]);
2414                     (i - start_ix + 1, title.into())
2415                 };
2416
2417                 return Some(cow);
2418             }
2419             if c == open {
2420                 return None;
2421             }
2422
2423             if c == b'\n' || c == b'\r' {
2424                 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
2425                     if self.tree[node_ix].item.start > i {
2426                         title.push_str(&text[mark..i]);
2427                         title.push('\n');
2428                         i = self.tree[node_ix].item.start;
2429                         mark = i;
2430                         continue;
2431                     }
2432                 }
2433             }
2434             if c == b'&' {
2435                 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
2436                     title.push_str(&text[mark..i]);
2437                     title.push_str(&value);
2438                     i += n;
2439                     mark = i;
2440                     continue;
2441                 }
2442             }
2443             if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
2444                 title.push_str(&text[mark..i]);
2445                 i += 1;
2446                 mark = i;
2447             }
2448
2449             i += 1;
2450         }
2451
2452         None
2453     }
2454
2455     /// Make a code span.
2456     ///
2457     /// Both `open` and `close` are matching MaybeCode items.
2458     fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
2459         let first_ix = open + 1;
2460         let last_ix = close - 1;
2461         let bytes = self.text.as_bytes();
2462         let mut span_start = self.tree[open].item.end;
2463         let mut span_end = self.tree[close].item.start;
2464         let mut buf: Option<String> = None;
2465
2466         // detect all-space sequences, since they are kept as-is as of commonmark 0.29
2467         if !bytes[span_start..span_end].iter().all(|&b| b == b' ') {
2468             let opening = match bytes[span_start] {
2469                 b' ' | b'\r' | b'\n' => true,
2470                 _ => false,
2471             };
2472             let closing = match bytes[span_end - 1] {
2473                 b' ' | b'\r' | b'\n' => true,
2474                 _ => false,
2475             };
2476             let drop_enclosing_whitespace = opening && closing;
2477
2478             if drop_enclosing_whitespace {
2479                 span_start += 1;
2480                 if span_start < span_end {
2481                     span_end -= 1;
2482                 }
2483             }
2484
2485             let mut ix = first_ix;
2486
2487             while ix < close {
2488                 if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body {
2489                     if drop_enclosing_whitespace {
2490                         // check whether break should be ignored
2491                         if ix == first_ix {
2492                             ix = ix + 1;
2493                             span_start = min(span_end, self.tree[ix].item.start);
2494                             continue;
2495                         } else if ix == last_ix && last_ix > first_ix {
2496                             ix = ix + 1;
2497                             continue;
2498                         }
2499                     }
2500
2501                     let end = bytes[self.tree[ix].item.start..]
2502                         .iter()
2503                         .position(|&b| b == b'\r' || b == b'\n')
2504                         .unwrap()
2505                         + self.tree[ix].item.start;
2506                     if let Some(ref mut buf) = buf {
2507                         buf.push_str(&self.text[self.tree[ix].item.start..end]);
2508                         buf.push(' ');
2509                     } else {
2510                         let mut new_buf = String::with_capacity(span_end - span_start);
2511                         new_buf.push_str(&self.text[span_start..end]);
2512                         new_buf.push(' ');
2513                         buf = Some(new_buf);
2514                     }
2515                 } else if let Some(ref mut buf) = buf {
2516                     let end = if ix == last_ix {
2517                         span_end
2518                     } else {
2519                         self.tree[ix].item.end
2520                     };
2521                     buf.push_str(&self.text[self.tree[ix].item.start..end]);
2522                 }
2523                 ix = ix + 1;
2524             }
2525         }
2526
2527         let cow = if let Some(buf) = buf {
2528             buf.into()
2529         } else {
2530             self.text[span_start..span_end].into()
2531         };
2532         if preceding_backslash {
2533             self.tree[open].item.body = ItemBody::Text;
2534             self.tree[open].item.end = self.tree[open].item.start + 1;
2535             self.tree[open].next = Some(close);
2536             self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2537             self.tree[close].item.start = self.tree[open].item.start + 1;
2538         } else {
2539             self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
2540             self.tree[open].item.end = self.tree[close].item.end;
2541             self.tree[open].next = self.tree[close].next;
2542         }
2543     }
2544
2545     /// Returns the next byte offset on success.
2546     fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<usize> {
2547         let c = *bytes.get(ix)?;
2548         if c == b'!' {
2549             scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)
2550         } else if c == b'?' {
2551             scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)
2552         } else {
2553             let i = scan_html_block_inner(
2554                 &bytes[ix..],
2555                 Some(&|_bytes| {
2556                     let mut line_start = LineStart::new(bytes);
2557                     let _ = scan_containers(&self.tree, &mut line_start);
2558                     line_start.bytes_scanned()
2559                 }),
2560             )?;
2561             Some(i + ix)
2562         }
2563     }
2564
2565     /// Consumes the event iterator and produces an iterator that produces
2566     /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
2567     /// range in the markdown source.
2568     pub fn into_offset_iter(self) -> OffsetIter<'a> {
2569         OffsetIter { inner: self }
2570     }
2571 }
2572
2573 pub(crate) enum LoopInstruction<T> {
2574     /// Continue looking for more special bytes, but skip next few bytes.
2575     ContinueAndSkip(usize),
2576     /// Break looping immediately, returning with the given index and value.
2577     BreakAtWith(usize, T),
2578 }
2579
2580 /// This function walks the byte slices from the given index and
2581 /// calls the callback function on all bytes (and their indices) that are in the following set:
2582 /// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n`
2583 /// It is guaranteed not call the callback on other bytes.
2584 /// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
2585 /// will not be called with an index that is less than `ix + n + 1`.
2586 /// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
2587 /// called and the function returns immediately with the return value `(end_ix, opt_val)`.
2588 /// If `BreakAtWith(..)` is never returned, this function will return the first
2589 /// index that is outside the byteslice bound and a `None` value.
2590 fn iterate_special_bytes<F, T>(bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
2591 where
2592     F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2593 {
2594     #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2595     {
2596         crate::simd::iterate_special_bytes(bytes, ix, callback)
2597     }
2598     #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2599     {
2600         scalar_iterate_special_bytes(bytes, ix, callback)
2601     }
2602 }
2603
2604 const fn special_bytes() -> [bool; 256] {
2605     let mut bytes = [false; 256];
2606     bytes[b'<' as usize] = true;
2607     bytes[b'!' as usize] = true;
2608     bytes[b'[' as usize] = true;
2609     bytes[b'~' as usize] = true;
2610     bytes[b'`' as usize] = true;
2611     bytes[b'|' as usize] = true;
2612     bytes[b'\\' as usize] = true;
2613     bytes[b'*' as usize] = true;
2614     bytes[b'_' as usize] = true;
2615     bytes[b'\r' as usize] = true;
2616     bytes[b'\n' as usize] = true;
2617     bytes[b']' as usize] = true;
2618     bytes[b'&' as usize] = true;
2619     bytes
2620 }
2621
2622 pub(crate) fn scalar_iterate_special_bytes<F, T>(
2623     bytes: &[u8],
2624     mut ix: usize,
2625     mut callback: F,
2626 ) -> (usize, Option<T>)
2627 where
2628     F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2629 {
2630     let special_bytes = special_bytes();
2631
2632     while ix < bytes.len() {
2633         let b = bytes[ix];
2634         if special_bytes[b as usize] {
2635             match callback(ix, b) {
2636                 LoopInstruction::ContinueAndSkip(skip) => {
2637                     ix += skip;
2638                 }
2639                 LoopInstruction::BreakAtWith(ix, val) => {
2640                     return (ix, val);
2641                 }
2642             }
2643         }
2644         ix += 1;
2645     }
2646
2647     (ix, None)
2648 }
2649
2650 /// Markdown event and source range iterator.
2651 ///
2652 /// Generates tuples where the first element is the markdown event and the second
2653 /// is a the corresponding range in the source string.
2654 ///
2655 /// Constructed from a `Parser` using its
2656 /// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2657 pub struct OffsetIter<'a> {
2658     inner: Parser<'a>,
2659 }
2660
2661 impl<'a> Iterator for OffsetIter<'a> {
2662     type Item = (Event<'a>, Range<usize>);
2663
2664     fn next(&mut self) -> Option<Self::Item> {
2665         match self.inner.tree.cur() {
2666             None => {
2667                 let ix = self.inner.tree.pop()?;
2668                 let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
2669                 self.inner.tree.next_sibling(ix);
2670                 Some((
2671                     Event::End(tag),
2672                     self.inner.tree[ix].item.start..self.inner.tree[ix].item.end,
2673                 ))
2674             }
2675             Some(cur_ix) => {
2676                 if self.inner.tree[cur_ix].item.body.is_inline() {
2677                     self.inner.handle_inline();
2678                 }
2679
2680                 let node = self.inner.tree[cur_ix];
2681                 let item = node.item;
2682                 let event = item_to_event(item, self.inner.text, &self.inner.allocs);
2683                 if let Event::Start(..) = event {
2684                     self.inner.tree.push();
2685                 } else {
2686                     self.inner.tree.next_sibling(cur_ix);
2687                 }
2688                 Some((event, item.start..item.end))
2689             }
2690         }
2691     }
2692 }
2693
2694 fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
2695     match item.body {
2696         ItemBody::Paragraph => Tag::Paragraph,
2697         ItemBody::Emphasis => Tag::Emphasis,
2698         ItemBody::Strong => Tag::Strong,
2699         ItemBody::Strikethrough => Tag::Strikethrough,
2700         ItemBody::Link(link_ix) => {
2701             let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2702             Tag::Link(*link_type, url.clone(), title.clone())
2703         }
2704         ItemBody::Image(link_ix) => {
2705             let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2706             Tag::Image(*link_type, url.clone(), title.clone())
2707         }
2708         ItemBody::Heading(level) => Tag::Heading(level),
2709         ItemBody::FencedCodeBlock(cow_ix) => {
2710             Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2711         }
2712         ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2713         ItemBody::BlockQuote => Tag::BlockQuote,
2714         ItemBody::List(_, c, listitem_start) => {
2715             if c == b'.' || c == b')' {
2716                 Tag::List(Some(listitem_start))
2717             } else {
2718                 Tag::List(None)
2719             }
2720         }
2721         ItemBody::ListItem(_) => Tag::Item,
2722         ItemBody::TableHead => Tag::TableHead,
2723         ItemBody::TableCell => Tag::TableCell,
2724         ItemBody::TableRow => Tag::TableRow,
2725         ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
2726         ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2727         _ => panic!("unexpected item body {:?}", item.body),
2728     }
2729 }
2730
2731 fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
2732     let tag = match item.body {
2733         ItemBody::Text => return Event::Text(text[item.start..item.end].into()),
2734         ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()),
2735         ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()),
2736         ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2737         ItemBody::SoftBreak => return Event::SoftBreak,
2738         ItemBody::HardBreak => return Event::HardBreak,
2739         ItemBody::FootnoteReference(cow_ix) => {
2740             return Event::FootnoteReference(allocs[cow_ix].clone())
2741         }
2742         ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2743         ItemBody::Rule => return Event::Rule,
2744
2745         ItemBody::Paragraph => Tag::Paragraph,
2746         ItemBody::Emphasis => Tag::Emphasis,
2747         ItemBody::Strong => Tag::Strong,
2748         ItemBody::Strikethrough => Tag::Strikethrough,
2749         ItemBody::Link(link_ix) => {
2750             let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2751             Tag::Link(*link_type, url.clone(), title.clone())
2752         }
2753         ItemBody::Image(link_ix) => {
2754             let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
2755             Tag::Image(*link_type, url.clone(), title.clone())
2756         }
2757         ItemBody::Heading(level) => Tag::Heading(level),
2758         ItemBody::FencedCodeBlock(cow_ix) => {
2759             Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
2760         }
2761         ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2762         ItemBody::BlockQuote => Tag::BlockQuote,
2763         ItemBody::List(_, c, listitem_start) => {
2764             if c == b'.' || c == b')' {
2765                 Tag::List(Some(listitem_start))
2766             } else {
2767                 Tag::List(None)
2768             }
2769         }
2770         ItemBody::ListItem(_) => Tag::Item,
2771         ItemBody::TableHead => Tag::TableHead,
2772         ItemBody::TableCell => Tag::TableCell,
2773         ItemBody::TableRow => Tag::TableRow,
2774         ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
2775         ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
2776         _ => panic!("unexpected item body {:?}", item.body),
2777     };
2778
2779     Event::Start(tag)
2780 }
2781
2782 // https://english.stackexchange.com/a/285573
2783 fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
2784     let mut list_item = tree[list_ix].child;
2785     while let Some(listitem_ix) = list_item {
2786         // first child is special, controls how we repoint list_item.child
2787         let list_item_firstborn = tree[listitem_ix].child;
2788
2789         // Check that list item has children - this is not necessarily the case!
2790         if let Some(firstborn_ix) = list_item_firstborn {
2791             if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2792                 tree[listitem_ix].child = tree[firstborn_ix].child;
2793             }
2794
2795             let mut list_item_child = Some(firstborn_ix);
2796             let mut node_to_repoint = None;
2797             while let Some(child_ix) = list_item_child {
2798                 // surgerize paragraphs
2799                 let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2800                     if let Some(child_firstborn) = tree[child_ix].child {
2801                         if let Some(repoint_ix) = node_to_repoint {
2802                             tree[repoint_ix].next = Some(child_firstborn);
2803                         }
2804                         let mut child_lastborn = child_firstborn;
2805                         while let Some(lastborn_next_ix) = tree[child_lastborn].next {
2806                             child_lastborn = lastborn_next_ix;
2807                         }
2808                         child_lastborn
2809                     } else {
2810                         child_ix
2811                     }
2812                 } else {
2813                     child_ix
2814                 };
2815
2816                 node_to_repoint = Some(repoint_ix);
2817                 tree[repoint_ix].next = tree[child_ix].next;
2818                 list_item_child = tree[child_ix].next;
2819             }
2820         }
2821
2822         list_item = tree[listitem_ix].next;
2823     }
2824 }
2825
2826 impl<'a> Iterator for Parser<'a> {
2827     type Item = Event<'a>;
2828
2829     fn next(&mut self) -> Option<Event<'a>> {
2830         match self.tree.cur() {
2831             None => {
2832                 let ix = self.tree.pop()?;
2833                 let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
2834                 self.tree.next_sibling(ix);
2835                 Some(Event::End(tag))
2836             }
2837             Some(cur_ix) => {
2838                 if self.tree[cur_ix].item.body.is_inline() {
2839                     self.handle_inline();
2840                 }
2841
2842                 let node = self.tree[cur_ix];
2843                 let item = node.item;
2844                 let event = item_to_event(item, self.text, &self.allocs);
2845                 if let Event::Start(..) = event {
2846                     self.tree.push();
2847                 } else {
2848                     self.tree.next_sibling(cur_ix);
2849                 }
2850                 Some(event)
2851             }
2852         }
2853     }
2854 }
2855
2856 #[cfg(test)]
2857 mod test {
2858     use super::*;
2859     use crate::tree::Node;
2860
2861     // TODO: move these tests to tests/html.rs?
2862
2863     fn parser_with_extensions(text: &str) -> Parser<'_> {
2864         let mut opts = Options::empty();
2865         opts.insert(Options::ENABLE_TABLES);
2866         opts.insert(Options::ENABLE_FOOTNOTES);
2867         opts.insert(Options::ENABLE_STRIKETHROUGH);
2868         opts.insert(Options::ENABLE_TASKLISTS);
2869
2870         Parser::new_ext(text, opts)
2871     }
2872
2873     #[test]
2874     #[cfg(target_pointer_width = "64")]
2875     fn node_size() {
2876         let node_size = std::mem::size_of::<Node<Item>>();
2877         assert_eq!(48, node_size);
2878     }
2879
2880     #[test]
2881     #[cfg(target_pointer_width = "64")]
2882     fn body_size() {
2883         let body_size = std::mem::size_of::<ItemBody>();
2884         assert_eq!(16, body_size);
2885     }
2886
2887     #[test]
2888     fn single_open_fish_bracket() {
2889         // dont crash
2890         assert_eq!(3, Parser::new("<").count());
2891     }
2892
2893     #[test]
2894     fn lone_hashtag() {
2895         // dont crash
2896         assert_eq!(2, Parser::new("#").count());
2897     }
2898
2899     #[test]
2900     fn lots_of_backslashes() {
2901         // dont crash
2902         Parser::new("\\\\\r\r").count();
2903         Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2904     }
2905
2906     #[test]
2907     fn issue_320() {
2908         // dont crash
2909         parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2910     }
2911
2912     #[test]
2913     fn issue_319() {
2914         // dont crash
2915         parser_with_extensions("|\r-]([^|\r-]([^").count();
2916         parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2917     }
2918
2919     #[test]
2920     fn issue_303() {
2921         // dont crash
2922         parser_with_extensions("[^\r\ra]").count();
2923         parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2924     }
2925
2926     #[test]
2927     fn issue_313() {
2928         // dont crash
2929         parser_with_extensions("*]0[^\r\r*]0[^").count();
2930         parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2931     }
2932
2933     #[test]
2934     fn issue_311() {
2935         // dont crash
2936         parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2937     }
2938
2939     #[test]
2940     fn issue_283() {
2941         let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2942         // dont crash
2943         parser_with_extensions(input).count();
2944     }
2945
2946     #[test]
2947     fn issue_289() {
2948         // dont crash
2949         parser_with_extensions("> - \\\n> - ").count();
2950         parser_with_extensions("- \n\n").count();
2951     }
2952
2953     #[test]
2954     fn issue_306() {
2955         // dont crash
2956         parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2957     }
2958
2959     #[test]
2960     fn issue_305() {
2961         // dont crash
2962         parser_with_extensions("_6**6*_*").count();
2963     }
2964
2965     #[test]
2966     fn another_emphasis_panic() {
2967         parser_with_extensions("*__#_#__*").count();
2968     }
2969
2970     #[test]
2971     fn offset_iter() {
2972         let event_offsets: Vec<_> = Parser::new("*hello* world")
2973             .into_offset_iter()
2974             .map(|(_ev, range)| range)
2975             .collect();
2976         let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2977         assert_eq!(expected_offsets, event_offsets);
2978     }
2979
2980     #[test]
2981     fn reference_link_offsets() {
2982         let range =
2983             Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2984                 .into_offset_iter()
2985                 .filter_map(|(ev, range)| match ev {
2986                     Event::Start(Tag::Link(LinkType::Reference, ..), ..) => Some(range),
2987                     _ => None,
2988                 })
2989                 .next()
2990                 .unwrap();
2991         assert_eq!(5..30, range);
2992     }
2993
2994     #[test]
2995     fn footnote_offsets() {
2996         let range = Parser::new("Testing this[^1] out.\n\n[^1]: Footnote.")
2997             .into_offset_iter()
2998             .filter_map(|(ev, range)| match ev {
2999                 Event::FootnoteReference(..) => Some(range),
3000                 _ => None,
3001             })
3002             .next()
3003             .unwrap();
3004         assert_eq!(12..16, range);
3005     }
3006
3007     #[test]
3008     fn table_offset() {
3009         let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
3010         let event_offset = parser_with_extensions(markdown)
3011             .into_offset_iter()
3012             .map(|(_ev, range)| range)
3013             .nth(3)
3014             .unwrap();
3015         let expected_offset = 3..59;
3016         assert_eq!(expected_offset, event_offset);
3017     }
3018
3019     #[test]
3020     fn offset_iter_issue_378() {
3021         let event_offsets: Vec<_> = Parser::new("a [b](c) d")
3022             .into_offset_iter()
3023             .map(|(_ev, range)| range)
3024             .collect();
3025         let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
3026         assert_eq!(expected_offsets, event_offsets);
3027     }
3028
3029     #[test]
3030     fn offset_iter_issue_404() {
3031         let event_offsets: Vec<_> = Parser::new("###\n")
3032             .into_offset_iter()
3033             .map(|(_ev, range)| range)
3034             .collect();
3035         let expected_offsets = vec![(0..4), (0..4)];
3036         assert_eq!(expected_offsets, event_offsets);
3037     }
3038
3039     // FIXME: add this one regression suite
3040     #[test]
3041     fn link_def_at_eof() {
3042         let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
3043         let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
3044
3045         let mut buf = String::new();
3046         crate::html::push_html(&mut buf, Parser::new(test_str));
3047         assert_eq!(expected, buf);
3048     }
3049
3050     #[test]
3051     fn ref_def_at_eof() {
3052         let test_str = "[test]:\\";
3053         let expected = "";
3054
3055         let mut buf = String::new();
3056         crate::html::push_html(&mut buf, Parser::new(test_str));
3057         assert_eq!(expected, buf);
3058     }
3059
3060     #[test]
3061     fn ref_def_cr_lf() {
3062         let test_str = "[a]: /u\r\n\n[a]";
3063         let expected = "<p><a href=\"/u\">a</a></p>\n";
3064
3065         let mut buf = String::new();
3066         crate::html::push_html(&mut buf, Parser::new(test_str));
3067         assert_eq!(expected, buf);
3068     }
3069
3070     #[test]
3071     fn no_dest_refdef() {
3072         let test_str = "[a]:";
3073         let expected = "<p>[a]:</p>\n";
3074
3075         let mut buf = String::new();
3076         crate::html::push_html(&mut buf, Parser::new(test_str));
3077         assert_eq!(expected, buf);
3078     }
3079
3080     #[test]
3081     fn simple_broken_link_callback() {
3082         let test_str = "This is a link w/o def: [hello][world]";
3083         let parser = Parser::new_with_broken_link_callback(
3084             test_str,
3085             Options::empty(),
3086             Some(&|norm, raw| {
3087                 assert_eq!("world", raw);
3088                 assert_eq!("world", norm);
3089                 Some(("YOLO".to_owned(), "SWAG".to_owned()))
3090             }),
3091         );
3092         let mut link_tag_count = 0;
3093         for (typ, url, title) in parser.filter_map(|event| match event {
3094             Event::Start(tag) | Event::End(tag) => match tag {
3095                 Tag::Link(typ, url, title) => Some((typ, url, title)),
3096                 _ => None,
3097             },
3098             _ => None,
3099         }) {
3100             link_tag_count += 1;
3101             assert_eq!(typ, LinkType::ReferenceUnknown);
3102             assert_eq!(url.as_ref(), "YOLO");
3103             assert_eq!(title.as_ref(), "SWAG");
3104         }
3105         assert!(link_tag_count > 0);
3106     }
3107
3108     #[test]
3109     fn code_block_kind_check_fenced() {
3110         let parser = Parser::new("hello\n```test\ntadam\n```");
3111         let mut found = 0;
3112         for (ev, _range) in parser.into_offset_iter() {
3113             match ev {
3114                 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
3115                     assert_eq!(syntax.as_ref(), "test");
3116                     found += 1;
3117                 }
3118                 _ => {}
3119             }
3120         }
3121         assert_eq!(found, 1);
3122     }
3123
3124     #[test]
3125     fn code_block_kind_check_indented() {
3126         let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
3127         let mut found = 0;
3128         for (ev, _range) in parser.into_offset_iter() {
3129             match ev {
3130                 Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
3131                     found += 1;
3132                 }
3133                 _ => {}
3134             }
3135         }
3136         assert_eq!(found, 1);
3137     }
3138 }