]> git.proxmox.com Git - rustc.git/blame - src/vendor/pulldown-cmark-0.0.8/src/parse.rs
New upstream version 1.19.0+dfsg3
[rustc.git] / src / vendor / pulldown-cmark-0.0.8 / src / parse.rs
CommitLineData
cc61c64b
XL
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Raw parser, for doing a single pass over input.
22
23use scanners::*;
24use utils;
25use std::borrow::Cow;
26use std::borrow::Cow::{Borrowed};
27use std::collections::{HashMap, HashSet};
28use std::cmp;
29
30#[derive(PartialEq, Debug)]
31enum State {
32 StartBlock,
33 InContainers,
34 Inline,
35 TableHead(usize, usize), // limit, next
36 TableBody,
37 TableRow,
38 CodeLineStart,
39 Code,
40 InlineCode,
41 Literal,
42}
43
44#[derive(Copy, Clone, Debug, PartialEq)]
45enum Container {
46 BlockQuote,
47 List(usize, u8),
48 ListItem(usize),
49 FootnoteDefinition,
50}
51
52pub struct RawParser<'a> {
53 text: &'a str,
54 off: usize,
55
56 opts: Options,
57 active_tab: [u8; 256],
58
59 state: State,
60 stack: Vec<(Tag<'a>, usize, usize)>,
61 leading_space: usize,
62
63 containers: Vec<Container>,
64 last_line_was_empty: bool,
65
66 // state for code fences
67 fence_char: u8,
68 fence_count: usize,
69 fence_indent: usize,
70
71 // info, used in second pass
72 loose_lists: HashSet<usize>, // offset is at list marker
73 links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>,
74}
75
76pub struct ParseInfo<'a> {
77 pub loose_lists: HashSet<usize>,
78 pub links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>,
79}
80
81#[derive(Clone, Debug)]
82pub enum Tag<'a> {
83 // block-level tags
84 Paragraph,
85 Rule,
86 Header(i32),
87 BlockQuote,
88 CodeBlock(Cow<'a, str>),
89 List(Option<usize>), // TODO: add delim and tight for ast (not needed for html)
90 Item,
91 FootnoteDefinition(Cow<'a, str>),
92
93 // tables
94 Table(i32),
95 TableHead,
96 TableRow,
97 TableCell,
98
99 // span-level tags
100 Emphasis,
101 Strong,
102 Code,
103 Link(Cow<'a, str>, Cow<'a, str>),
104 Image(Cow<'a, str>, Cow<'a, str>),
105}
106
107#[derive(Debug)]
108pub enum Event<'a> {
109 Start(Tag<'a>),
110 End(Tag<'a>),
111 Text(Cow<'a, str>),
112 Html(Cow<'a, str>),
113 InlineHtml(Cow<'a, str>),
114 FootnoteReference(Cow<'a, str>),
115 SoftBreak,
116 HardBreak,
117}
118
119bitflags! {
120 pub flags Options: u32 {
121 const OPTION_FIRST_PASS = 1 << 0,
122 const OPTION_ENABLE_TABLES = 1 << 1,
123 const OPTION_ENABLE_FOOTNOTES = 1 << 2,
124 }
125}
126
127const MAX_LINK_NEST: usize = 10;
128
129impl<'a> RawParser<'a> {
130 pub fn new_with_links(text: &'a str, opts: Options,
131 links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>) -> RawParser<'a> {
132 let mut ret = RawParser {
133 text: text,
134 off: if text.starts_with("\u{FEFF}") { 3 } else { 0 },
135 opts: opts,
136 active_tab: [0; 256],
137 state: State::StartBlock,
138 leading_space: 0,
139 stack: Vec::new(),
140 containers: Vec::new(),
141 last_line_was_empty: false,
142
143 fence_char: 0,
144 fence_count: 0,
145 fence_indent: 0,
146
147 // info, used in second pass
148 loose_lists: HashSet::new(),
149 links: links,
150 };
151 ret.init_active();
152 ret.skip_blank_lines();
153 ret
154 }
155
156 pub fn new(text: &'a str, opts: Options) -> RawParser<'a> {
157 RawParser::new_with_links(text, opts, HashMap::new())
158 }
159
160 // offset into text representing current parse position, hopefully
161 // useful for building source maps
162 pub fn get_offset(&self) -> usize {
163 self.off
164 }
165
166 // extract info from parser on finish
167 pub fn get_info(self) -> ParseInfo<'a> {
168 ParseInfo {
169 loose_lists: self.loose_lists,
170 links: self.links,
171 }
172 }
173
174 fn init_active(&mut self) {
175 if self.opts.contains(OPTION_FIRST_PASS) {
176 self.active_tab[b'\n' as usize] = 1
177 } else {
178 for &c in b"\x00\t\n\r_\\&*[!`<" {
179 self.active_tab[c as usize] = 1;
180 }
181 }
182 }
183
184 fn limit(&self) -> usize {
185 match self.stack.last() {
186 Some(&(_, limit, _)) => limit,
187 None => self.text.len()
188 }
189 }
190
191 // if end is not known, limit should be text.len(), next should be 0
192 fn start(&mut self, tag: Tag<'a>, limit: usize, next: usize) -> Event<'a> {
193 self.stack.push((tag.clone(), limit, next));
194 Event::Start(tag)
195 }
196
197 fn end(&mut self) -> Event<'a> {
198 let (tag, _, next) = self.stack.pop().unwrap();
199 match tag {
200 // containers
201 Tag::BlockQuote | Tag::List(_) | Tag::Item | Tag::FootnoteDefinition(_) => {
202 let _ = self.containers.pop();
203 }
204
205 // block level tags
206 Tag::Paragraph | Tag::Header(_) | Tag::Rule | Tag::CodeBlock(_) | Tag::Table(_) => {
207 self.state = State::StartBlock;
208 // TODO: skip blank lines (for cleaner source maps)
209 }
210
211 // tables
212 Tag::TableCell => self.state = State::TableRow,
213 Tag::TableRow | Tag::TableHead => self.state = State::TableBody,
214
215 // inline
216 Tag::Code => self.state = State::Inline,
217 _ => (),
218 }
219 if next != 0 { self.off = next; }
220
221 /*
222 if self.stack.is_empty() {
223 // TODO maybe: make block ends do this
224 self.state = State::StartBlock;
225 self.skip_blank_lines();
226 }
227 */
228 Event::End(tag)
229 }
230
231 fn skip_leading_whitespace(&mut self) {
232 self.off += scan_whitespace_no_nl(&self.text[self.off .. self.limit()]);
233 }
234
235 // TODO: this function doesn't respect containers
236 fn skip_blank_lines(&mut self) {
237 loop {
238 let ret = scan_blank_line(&self.text[self.off..]);
239 if ret == 0 {
240 break;
241 }
242 self.off += ret;
243 }
244 }
245
246 // Scan markers and indentation for current container stack
247 // Return: bytes scanned, whether containers are complete, and remaining space
248 fn scan_containers(&self, text: &str) -> (usize, bool, usize) {
249 let (mut i, mut space) = scan_leading_space(text, 0);
250 for container in self.containers.iter() {
251 match *container {
252 Container::BlockQuote => {
253 if space <= 3 {
254 let n = scan_blockquote_start(&text[i..]);
255 if n > 0 {
256 let (n_sp, next_space) = scan_leading_space(text, i + n);
257 i += n + n_sp;
258 space = next_space;
259 } else {
260 return (i, false, space);
261 }
262 } else {
263 return (i, false, space);
264 }
265 }
266 Container::FootnoteDefinition => (),
267 Container::List(_, _) => (),
268 Container::ListItem(indent) => {
269 if space >= indent {
270 space -= indent;
271 } else if scan_eol(&text[i..]).1 {
272 space = 0;
273 } else {
274 return (i, false, 0);
275 }
276 }
277 }
278 }
279 (i, true, space)
280 }
281
282 // scans empty lines with current container stack
283 // returns number of bytes scanned, number of empty lines
284 // note: EOF counts as a line ending for counting lines
285 fn scan_empty_lines(&self, text: &str) -> (usize, usize) {
286 let mut i = 0;
287 let mut lines = 0;
288 loop {
289 let (n, scanned, _) = self.scan_containers(&text[i..]);
290 if !scanned {
291 return (i, lines);
292 }
293 if i == text.len() {
294 return (i, lines + 1);
295 }
296 let n_blank = scan_eol(&text[i + n ..]).0;
297 if n_blank == 0 {
298 return (i, lines);
299 }
300 i += n + n_blank;
301 lines += 1;
302 }
303 }
304
305 // scans whitespace, skipping past containers on newline
306 fn scan_whitespace_inline(&self, text: &str) -> usize {
307 let i = scan_whitespace_no_nl(text);
308 if let (n, true) = scan_eol(&text[i..]) {
309 let (n_containers, _, space) = self.scan_containers(&text[i + n ..]);
310 let j = i + n + n_containers;
311 if !self.is_inline_block_end(&text[j..], space) {
312 return j;
313 }
314 }
315 i
316 }
317
318 fn at_list(&self, level: usize) -> Option<usize> {
319 let len = self.containers.len();
320 if len >= level {
321 if let Container::List(offset, _) = self.containers[len - level] {
322 return Some(offset);
323 }
324 }
325 None
326 }
327
328 fn start_block(&mut self) -> Option<Event<'a>> {
329 let size = self.text.len();
330 //println!("start_block {}", self.off);
331 while self.off < size {
332 //println!("start_block loop {} {}", self.off, self.last_line_was_empty);
333 if self.off >= self.limit() {
334 return Some(self.end());
335 }
336 if self.state != State::InContainers {
337 let (n, scanned, space) = self.scan_containers(&self.text[self.off ..]);
338 if !scanned {
339 return Some(self.end());
340 }
341 self.leading_space = space;
342 self.off += n;
343 self.state = State::InContainers;
344 }
345
346 let (n, at_eol) = scan_eol(&self.text[self.off ..]);
347 if at_eol {
348 self.off += n;
349 self.state = State::StartBlock;
350 // two empty lines closes lists and footnotes
351 let (n, empty_lines) = self.scan_empty_lines(&self.text[self.off ..]);
352 //println!("{} empty lines (n = {})", empty_lines, n);
353 let mut closed = false;
354 if empty_lines >= 1 {
355 let mut close_tags: Vec<&mut (Tag<'a>, usize, usize)> = self.stack.iter_mut().skip_while(|tag| {
356 match tag.0 {
357 Tag::List(_) | Tag::FootnoteDefinition(_) => false,
358 _ => true,
359 }
360 }).collect();
361 if close_tags.len() != 0 {
362 for tag in &mut close_tags {
363 tag.1 = self.off; // limit
364 tag.2 = self.off; // next
365 }
366 close_tags[0].2 = self.off + n; // next
367 closed = true;
368 }
369 }
370 if closed {
371 return Some(self.end());
372 }
373 self.off += n;
374 if let Some(_) = self.at_list(2) {
375 self.last_line_was_empty = true;
376 }
377 continue;
378 }
379
380 //println!("checking loose {} {:?}", self.last_line_was_empty, self.at_list(2));
381 if self.last_line_was_empty {
382 if let Some(offset) = self.at_list(2) {
383 // list item contains two blocks separated by empty line
384 self.loose_lists.insert(offset);
385 }
386 }
387
388 if self.leading_space >= 4 && !self.at_list(1).is_some() {
389 // see below
390 if let Some(&Container::List(_, _)) = self.containers.last() {
391 return Some(self.end());
392 }
393 return Some(self.start_indented_code());
394 }
395
396 let tail = &self.text[self.off ..];
397
398 // must be before list item because ambiguous
399 let n = scan_hrule(tail);
400 if n != 0 {
401 self.last_line_was_empty = false;
402 // see below
403 if let Some(&Container::List(_, _)) = self.containers.last() {
404 return Some(self.end());
405 }
406 self.off += n;
407 return Some(self.start_hrule());
408 }
409
410 let (n, c, start, indent) = scan_listitem(tail);
411 if n != 0 {
412 if self.last_line_was_empty {
413 if let Some(offset) = self.at_list(1) {
414 // two list items separated by empty line
415 self.loose_lists.insert(offset);
416 }
417 }
418 self.last_line_was_empty = false;
419 return Some(self.start_listitem(n, c, start, indent));
420 }
421
422 // not a list item, so if we're in a list, close it
423 if let Some(&Container::List(_, _)) = self.containers.last() {
424 return Some(self.end());
425 }
426 self.last_line_was_empty = false;
427
428 let c = tail.as_bytes()[0];
429 match c {
430 b'#' => {
431 let (n, level) = scan_atx_header(tail);
432 if n != 0 {
433 self.off += n;
434 return Some(self.start_atx_header(level));
435 }
436 }
437 b'`' | b'~' => {
438 let (n, ch) = scan_code_fence(tail);
439 if n != 0 {
440 return Some(self.start_code_fence(n, ch, n));
441 }
442 }
443 b'>' => {
444 let n = scan_blockquote_start(tail);
445 if n != 0 {
446 self.off += n;
447 let (n, space) = scan_leading_space(self.text, self.off);
448 self.off += n;
449 self.leading_space = space;
450 self.containers.push(Container::BlockQuote);
451 return Some(self.start(Tag::BlockQuote, self.text.len(), 0));
452 }
453 }
454 b'<' => {
455 if self.is_html_block(tail) {
456 return Some(self.do_html_block());
457 }
458 }
459 b'[' => {
460 if self.opts.contains(OPTION_ENABLE_FOOTNOTES) {
461 if let Some((name, n)) = self.parse_footnote_definition(tail) {
462 if self.containers.last() == Some(&Container::FootnoteDefinition) {
463 return Some(self.end());
464 }
465 self.off += n;
466 self.containers.push(Container::FootnoteDefinition);
467 return Some(self.start(Tag::FootnoteDefinition(Cow::Borrowed(name)), self.text.len(), 0));
468 }
469 }
470 if self.try_link_reference_definition(tail) {
471 continue;
472 }
473 }
474 _ => ()
475 }
476 return Some(self.start_paragraph());
477 }
478 None
479 }
480
481 // can start a paragraph, a setext header, or a table, as they start similarly
482 fn start_paragraph(&mut self) -> Event<'a> {
483 let mut i = self.off + scan_nextline(&self.text[self.off..]);
484
485 if let (n, true, space) = self.scan_containers(&self.text[i..]) {
486 i += n;
487 if space <= 3 {
488 let (n, level) = scan_setext_header(&self.text[i..]);
489 if n != 0 {
490 let next = i + n;
491 while i > self.off && is_ascii_whitespace(self.text.as_bytes()[i - 1]) {
492 i -= 1;
493 }
494 self.state = State::Inline;
495 return self.start(Tag::Header(level), i, next);
496 }
497 if self.opts.contains(OPTION_ENABLE_TABLES) {
498 let (n, cols) = scan_table_head(&self.text[i..]);
499 if n != 0 {
500 let next = i + n;
501 while i > self.off && is_ascii_whitespace(self.text.as_bytes()[i - 1]) {
502 i -= 1;
503 }
504 self.state = State::TableHead(i, next);
505 return self.start(Tag::Table(cols), self.text.len(), 0);
506 }
507 }
508 }
509 }
510
511 let size = self.text.len();
512 self.state = State::Inline;
513 self.start(Tag::Paragraph, size, 0)
514 }
515
516 fn start_table_head(&mut self) -> Event<'a> {
517 assert!(self.opts.contains(OPTION_ENABLE_TABLES));
518 if let State::TableHead(limit, next) = self.state {
519 self.state = State::TableRow;
520 return self.start(Tag::TableHead, limit, next);
521 } else {
522 panic!();
523 }
524 }
525
526 fn start_table_body(&mut self) -> Event<'a> {
527 assert!(self.opts.contains(OPTION_ENABLE_TABLES));
528 let (off, _) = match self.scan_containers(&self.text[self.off ..]) {
529 (n, true, space) => (self.off + n, space),
530 _ => {
531 return self.end();
532 }
533 };
534 let n = scan_blank_line(&self.text[off..]);
535 if n != 0 {
536 self.off = off + n;
537 return self.end();
538 }
539 self.state = State::TableRow;
540 self.off = off;
541 return self.start(Tag::TableRow, self.text.len(), 0);
542 }
543
544 fn start_hrule(&mut self) -> Event<'a> {
545 let limit = self.off; // body of hrule is empty
546 self.state = State::Inline; // handy state for producing correct end tag
547 self.start(Tag::Rule, limit, limit)
548 }
549
550 fn start_atx_header(&mut self, level: i32) -> Event<'a> {
551 self.skip_leading_whitespace();
552 let tail = &self.text[self.off..];
553 let next = scan_nextline(tail);
554 let mut limit = next;
555 while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) {
556 limit -= 1;
557 }
558 let mut end = limit;
559 while end > 0 && tail.as_bytes()[end - 1] == b'#' {
560 end -= 1;
561 }
562 if end == 0 {
563 limit = end;
564 } else if is_ascii_whitespace(tail.as_bytes()[end - 1]) {
565 limit = end - 1;
566 }
567 while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) {
568 limit -= 1;
569 }
570 let limit = limit + self.off;
571 let next = next + self.off;
572 self.state = State::Inline;
573 self.start(Tag::Header(level), limit, next)
574 }
575
576 fn start_indented_code(&mut self) -> Event<'a> {
577 self.fence_char = b'\0';
578 self.fence_indent = 4;
579 let size = self.text.len();
580 self.state = State::Code;
581 self.start(Tag::CodeBlock(Borrowed("")), size, 0)
582 }
583
584 fn start_listitem(&mut self, n: usize, c: u8, start: usize, indent: usize) -> Event<'a> {
585 let indent = self.leading_space + indent;
586 match self.containers.last() {
587 Some(&Container::List(_, c2)) => {
588 if c != c2 {
589 // mismatched list type or delimeter
590 return self.end();
591 }
592 self.off += n;
593 let n_blank = scan_blank_line(&self.text[self.off ..]);
594 if n_blank != 0 {
595 self.off += n_blank;
596 self.state = State::StartBlock;
597 } else {
598 // TODO: deal with tab
599 let (n, space) = scan_leading_space(self.text, self.off);
600 self.off += n;
601 self.leading_space = space;
602 }
603 self.containers.push(Container::ListItem(indent));
604 self.start(Tag::Item, self.text.len(), 0)
605 }
606 _ => {
607 self.containers.push(Container::List(self.off, c));
608 // arguably this should be done in the scanner, it should return option
609 let startopt = if c == b'.' || c == b')' { Some(start) } else { None };
610 self.start(Tag::List(startopt), self.text.len(), 0)
611 }
612 }
613 }
614
615 fn start_code_fence(&mut self, n: usize, ch: u8, count: usize) -> Event<'a> {
616 self.fence_char = ch;
617 self.fence_count = count;
618 self.fence_indent = self.leading_space;
619 let beg_info = self.off + n;
620 let next_line = beg_info + scan_nextline(&self.text[beg_info..]);
621 self.off = next_line;
622 let info = unescape(&self.text[beg_info..next_line].trim());
623 let size = self.text.len();
624 self.state = State::CodeLineStart;
625 self.start(Tag::CodeBlock(info), size, 0)
626 }
627
628 fn next_code_line_start(&mut self) -> Event<'a> {
629 let (off, space) = match self.scan_containers(&self.text[self.off ..]) {
630 (n, true, space) => (self.off + n, space),
631 _ => {
632 return self.end();
633 }
634 };
635
636 if self.fence_char == b'\0' {
637 let n = scan_blank_line(&self.text[off..]);
638 if n != 0 {
639 // TODO performance: this scanning is O(n^2) in the number of empty lines
640 let (n_empty, _lines) = self.scan_empty_lines(&self.text[off + n ..]);
641 let next = off + n + n_empty;
642 let (n_containers, scanned, nspace) = self.scan_containers(&self.text[next..]);
643 // TODO; handle space
644 if !scanned || self.is_code_block_end(next + n_containers, nspace) {
645 //println!("was end: {}", next + n_containers);
646 return self.end();
647 } else {
648 self.off = off;
649 //println!("code line start space={}, off={}", space, off);
650 self.leading_space = space;
651 return self.next_code();
652 }
653 }
654 }
655
656 if self.is_code_block_end(off, space) {
657 let ret = self.end();
658 if self.fence_char != b'\0' {
659 self.off = off + scan_nextline(&self.text[off..]);
660 }
661 ret
662 } else {
663 self.off = off;
664 self.state = State::Code;
665 self.leading_space = space;
666 self.next_code()
667 }
668 }
669
670 fn next_code(&mut self) -> Event<'a> {
671 if self.leading_space > self.fence_indent {
672 // TODO: might try to combine spaces in text, for fewer events
673 let space = self.leading_space;
674 self.leading_space = 0;
675 return Event::Text(spaces(space - self.fence_indent));
676 }
677 let bytes = self.text.as_bytes();
678 let mut beg = self.off;
679 let mut i = beg;
680 loop {
681 match bytes[i..].iter().position(|&c| c < b' ') {
682 Some(j) => i += j,
683 None => {
684 i += bytes[i..].len();
685 break;
686 }
687 }
688 match bytes[i] {
689 b'\n' => {
690 i += 1;
691 self.state = State::CodeLineStart;
692 break;
693 }
694 b'\t' => {
695 if i > beg { break; }
696 return self.char_tab();
697 }
698 b'\r' => {
699 // just skip it (does not support '\r' only line break)
700 if i > beg { break; }
701 beg += 1;
702 }
703 _ => ()
704 }
705 i += 1;
706 }
707 self.off = i;
708 Event::Text(Borrowed(&self.text[beg..i]))
709 }
710
711 fn is_code_block_end(&self, loc: usize, space: usize) -> bool {
712 let tail = &self.text[loc..];
713 if self.fence_char == b'\0' {
714 // indented code block
715 space < 4
716 } else if space <= 3 {
717 let (n, c) = scan_code_fence(tail);
718 if c != self.fence_char || n < self.fence_count {
719 return false;
720 }
721 if n < tail.len() && scan_blank_line(&tail[n..]) == 0 {
722 // Closing code fences cannot have info strings
723 return false;
724 }
725 return true;
726 } else {
727 false
728 }
729 }
730
731 // # HTML blocks
732
733 fn scan_html_block_tag(&self, data: &'a str) -> (usize, &'a str) {
734 let mut i = scan_ch(data, b'<');
735 if i == 0 { return (0, "") }
736 i += scan_ch(&data[i..], b'/');
737 let n = scan_while(&data[i..], is_ascii_alphanumeric);
738 // TODO: scan attributes and >
739 (i + n, &data[i .. i + n])
740 }
741
742 fn is_html_block(&self, data: &str) -> bool {
743 let (n_tag, tag) = self.scan_html_block_tag(data);
744 (n_tag > 0 && is_html_tag(tag)) ||
745 data.starts_with("<?") ||
746 data.starts_with("<!")
747 }
748
749 fn do_html_block(&mut self) -> Event<'a> {
750 let size = self.text.len();
751 let mut out = Borrowed("");
752 let mut i = self.off;
753 let mut mark = i;
754 loop {
755 let n = scan_nextline(&self.text[i..]);
756 i += n;
757 if n >= 2 && self.text.as_bytes()[i - 2] == b'\r' {
758 if self.leading_space > 0 {
759 out = utils::cow_append(out, spaces(self.leading_space));
760 self.leading_space = 0;
761 }
762 out = utils::cow_append(out, Borrowed(&self.text[mark .. i - 2]));
763 mark = i - 1;
764 }
765 let (n, scanned, space) = self.scan_containers(&self.text[i..]);
766 let n_blank = scan_blank_line(&self.text[i + n ..]);
767 if n != 0 || !scanned || i + n == size || n_blank != 0 {
768 if self.leading_space > 0 {
769 out = utils::cow_append(out, spaces(self.leading_space));
770 }
771 self.leading_space = space;
772 out = utils::cow_append(out, Borrowed(&self.text[mark..i]));
773 mark = i + n;
774 }
775 if !scanned || i + n == size || n_blank != 0 {
776 self.off = i; // TODO: skip blank lines (cleaner source maps)
777 self.state = State::StartBlock;
778 return Event::Html(out)
779 }
780 }
781 }
782
783 // # Link reference definitions
784
785 fn try_link_reference_definition(&mut self, data: &'a str) -> bool {
786 let (n_link, text_beg, text_end, max_nest) = self.scan_link_label(data);
787 if n_link == 0 || max_nest > 1 { return false; }
788 let n_colon = scan_ch(&data[n_link ..], b':');
789 if n_colon == 0 { return false; }
790 let mut i = n_link + n_colon;
791 i += self.scan_whitespace_inline(&data[i..]);
792 let linkdest = scan_link_dest(&data[i..]);
793 if linkdest.is_none() { return false; }
794 let (n_dest, raw_dest) = linkdest.unwrap();
795 if n_dest == 0 { return false; }
796 i += n_dest;
797 i += scan_whitespace_no_nl(&data[i..]);
798 let n_nl = self.scan_whitespace_inline(&data[i..]);
799 let (n_title, title_beg, title_end) = self.scan_link_title(&data[i + n_nl ..]);
800 let title = if n_title == 0 {
801 Borrowed("")
802 } else {
803 let (title_beg, title_end) = (i + n_nl + title_beg, i + n_nl + title_end);
804 i += n_nl + n_title;
805 unescape(&data[title_beg..title_end])
806 };
807 i += scan_whitespace_no_nl(&data[i..]);
808 if let (n_eol, true) = scan_eol(&data[i..]) {
809 i += n_eol;
810 } else {
811 return false;
812 }
813
814 let linktext = self.normalize_link_ref(&data[text_beg..text_end]);
815 if linktext.is_empty() {
816 return false;
817 }
818 if !self.links.contains_key(&linktext) {
819 let dest = unescape(raw_dest);
820 self.links.insert(linktext, (dest, title));
821 }
822 self.state = State::StartBlock;
823 self.off += i;
824 true
825 }
826
827 // normalize whitespace and case-fold
828 fn normalize_link_ref(&self, raw: &str) -> String {
829 let mut need_space = false;
830 let mut result = String::new();
831 let mut i = 0;
832 while i < raw.len() {
833 let n = scan_nextline(&raw[i..]);
834 for c in raw[i.. i + n].chars() {
835 if c.is_whitespace() {
836 need_space = true;
837 } else {
838 if need_space && !result.is_empty() {
839 result.push(' ');
840 }
841 // TODO: Unicode case folding can differ from lowercase (ß)
842 result.extend(c.to_lowercase());
843 need_space = false;
844 }
845 }
846 i += n;
847 if i == raw.len() { break; }
848 i += self.scan_containers(&raw[i..]).0;
849 need_space = true;
850 }
851 result
852 }
853
854 // determine whether the line starting at loc ends the block
855 fn is_inline_block_end(&self, data: &str, space: usize) -> bool {
856 data.is_empty() ||
857 scan_blank_line(data) != 0 ||
858 space <= 3 && (scan_hrule(data) != 0 ||
859 scan_atx_header(data).0 != 0 ||
860 scan_code_fence(data).0 != 0 ||
861 scan_blockquote_start(data) != 0 ||
862 scan_listitem(data).0 != 0 ||
863 self.is_html_block(data))
864 }
865
866 fn next_table_cell(&mut self) -> Event<'a> {
867 assert!(self.opts.contains(OPTION_ENABLE_TABLES));
868 let bytes = self.text.as_bytes();
869 let mut beg = self.off + scan_whitespace_no_nl(&self.text[self.off ..]);
870 let mut i = beg;
871 let limit = self.limit();
872 if i < limit && bytes[i] == b'|' {
873 i += 1;
874 beg += 1;
875 self.off += 1;
876 }
877 if i >= limit {
878 self.off = limit;
879 return self.end();
880 }
881 let mut n = 0;
882 while i < limit {
883 let c = bytes[i];
884 if c == b'\\' && i + 1 < limit && bytes[i + 1] == b'|' {
885 i += 2;
886 continue;
887 } else if c == b'|' {
888 n = 0;
889 break;
890 }
891 n = if is_ascii_whitespace(bytes[i]) { scan_blank_line(&self.text[i..]) } else { 0 };
892 if n != 0 {
893 if i > beg {
894 n = 0;
895 }
896 break;
897 }
898 i += 1;
899 }
900 if i > beg {
901 self.state = State::Inline;
902 self.start(Tag::TableCell, i, i + n)
903 } else {
904 self.off = i + n;
905 self.end()
906 }
907 }
908
909 fn next_inline(&mut self) -> Event<'a> {
910 let bytes = self.text.as_bytes();
911 let beg = self.off;
912 let mut i = beg;
913 let limit = self.limit();
914 while i < limit {
915 match bytes[i..limit].iter().position(|&c| self.active_tab[c as usize] != 0) {
916 Some(pos) => i += pos,
917 None => { i = limit; break; }
918 }
919 let c = bytes[i];
920 if c == b'\n' || c == b'\r' {
921 let n = scan_trailing_whitespace(&self.text[beg..i]);
922 let end = i - n;
923 if end > beg {
924 self.off = end;
925 return Event::Text(Borrowed(&self.text[beg..end]));
926 }
927 if c == b'\r' && i + 1 < limit && self.text.as_bytes()[i + 1] == b'\n' {
928 i += 1;
929 }
930 i += 1;
931 let next = i;
932 let (n_containers, _, space) = self.scan_containers(&self.text[i..limit]);
933 i += n_containers;
934 if self.is_inline_block_end(&self.text[i..limit], space) {
935 self.off = next;
936 return self.end();
937 }
938 i += scan_whitespace_no_nl(&self.text[i..limit]);
939 self.off = i;
940 return if n >= 2 { Event::HardBreak } else { Event::SoftBreak };
941 }
942 self.off = i;
943 if i > beg {
944 return Event::Text(Borrowed(&self.text[beg..i]));
945 }
946 if let Some(event) = self.active_char(c) {
947 return event;
948 }
949 i = self.off; // let handler advance offset even on None
950 i += 1;
951 }
952 if i > beg {
953 self.off = i;
954 Event::Text(Borrowed(&self.text[beg..i]))
955 } else {
956 self.end()
957 }
958 }
959
960 fn active_char(&mut self, c: u8) -> Option<Event<'a>> {
961 match c {
962 b'\x00' => Some(self.char_null()),
963 b'\t' => Some(self.char_tab()),
964 b'\\' => self.char_backslash(),
965 b'&' => self.char_entity(),
966 b'_' => self.char_emphasis(),
967 b'*' => self.char_emphasis(),
968 b'[' if self.opts.contains(OPTION_ENABLE_FOOTNOTES) => self.char_link_footnote(),
969 b'[' | b'!' => self.char_link(),
970 b'`' => self.char_backtick(),
971 b'<' => self.char_lt(),
972 _ => None
973 }
974 }
975
976 fn char_null(&mut self) -> Event<'a> {
977 self.off += 1;
978 Event::Text(Borrowed(&"\u{fffd}"))
979 }
980
981 // expand tab in content (used for code and inline)
982 // scan backward to find offset, counting unicode code points
983 fn char_tab(&mut self) -> Event<'a> {
984 let count = count_tab(&self.text.as_bytes()[.. self.off]);
985 self.off += 1;
986 Event::Text(Borrowed(&" "[..count]))
987 }
988
989 fn char_backslash(&mut self) -> Option<Event<'a>> {
990 let limit = self.limit();
991 if self.off + 1 < limit {
992 if let (_, true) = scan_eol(&self.text[self.off + 1 .. limit]) {
993 let n_white = self.scan_whitespace_inline(&self.text[self.off + 1 .. limit]);
994 let space = 0; // TODO: figure this out
995 if !self.is_inline_block_end(&self.text[self.off + 1 + n_white .. limit], space) {
996 self.off += 1 + n_white;
997 return Some(Event::HardBreak);
998 }
999 }
1000 let c = self.text.as_bytes()[self.off + 1];
1001 if is_ascii_punctuation(c) {
1002 self.off += 2;
1003 return Some(Event::Text(Borrowed(&self.text[self.off - 1 .. self.off])));
1004 }
1005 }
1006 None
1007 }
1008
1009 fn char_entity(&mut self) -> Option<Event<'a>> {
1010 match scan_entity(&self.text[self.off ..]) {
1011 (n, Some(value)) => {
1012 self.off += n;
1013 Some(Event::Text(value))
1014 }
1015 _ => None
1016 }
1017 }
1018
1019 fn char_emphasis(&mut self) -> Option<Event<'a>> {
1020 // can see to left for flanking info, but not past limit
1021 let limit = self.limit();
1022 let data = &self.text[..limit];
1023
1024 let c = data.as_bytes()[self.off];
1025 let (n, can_open, _can_close) = compute_open_close(data, self.off, c);
1026 if !can_open {
1027 return None;
1028 }
1029 let mut stack = vec![n]; // TODO performance: don't allocate
1030 let mut i = self.off + n;
1031 while i < limit {
1032 let c2 = data.as_bytes()[i];
1033 if c2 == b'\n' && !is_escaped(data, i) {
1034 let space = 0; // TODO: scan containers
1035 if self.is_inline_block_end(&self.text[i + 1 .. limit], space) {
1036 return None
1037 } else {
1038 i += 1;
1039 }
1040 } else if c2 == c && !is_escaped(data, i) {
1041 let (mut n2, can_open, can_close) = compute_open_close(data, i, c);
1042 if can_close {
1043 loop {
1044 let ntos = stack.pop().unwrap();
1045 if ntos > n2 {
1046 stack.push(ntos - n2);
1047 break;
1048 }
1049 if stack.is_empty() {
1050 let npop = if ntos < n2 { ntos } else { n2 };
1051 if npop == 1 {
1052 self.off += 1;
1053 return Some(self.start(Tag::Emphasis, i, i + 1));
1054 } else {
1055 self.off += 2;
1056 let next = i + npop;
1057 return Some(self.start(Tag::Strong, next - 2, next));
1058 }
1059 } else {
1060 i += ntos;
1061 n2 -= ntos;
1062 }
1063 }
1064 } else if can_open {
1065 stack.push(n2);
1066 }
1067 i += n2;
1068 } else if c2 == b'`' {
1069 let (n, beg, _) = self.scan_inline_code(&self.text[i..limit]);
1070 if n != 0 {
1071 i += n;
1072 } else {
1073 i += beg;
1074 }
1075 } else if c2 == b'<' {
1076 let n = self.scan_autolink_or_html(&self.text[i..limit]);
1077 if n != 0 {
1078 i += n;
1079 } else {
1080 i += 1;
1081 }
1082 } else if c2 == b'[' {
1083 if self.opts.contains(OPTION_ENABLE_FOOTNOTES) {
1084 if let Some((_, n)) = self.parse_footnote(&self.text[i..limit]) {
1085 i += n;
1086 continue;
1087 }
1088 }
1089 if let Some((_, _, _, n)) = self.parse_link(&self.text[i..limit], false) {
1090 i += n;
1091 } else {
1092 i += 1;
1093 }
1094 } else {
1095 i += 1;
1096 }
1097 }
1098 None
1099 }
1100
1101 // # Links
1102
1103 // scans a link label, example [link]
1104 // return value is: total bytes, start of text, end of text, max nesting
1105 fn scan_link_label(&self, data: &str) -> (usize, usize, usize, usize) {
1106 let mut i = scan_ch(data, b'[');
1107 if i == 0 { return (0, 0, 0, 0); }
1108 let text_beg = i;
1109 let mut max_nest = 1;
1110 let mut nest = 1;
1111 loop {
1112 if i >= data.len() { return (0, 0, 0, 0); }
1113 match data.as_bytes()[i] {
1114 b'\n' => {
1115 let n = self.scan_whitespace_inline(&data[i..]);
1116 if n == 0 { return (0, 0, 0, 0); }
1117 i += n;
1118 }
1119 b'[' => {
1120 nest += 1;
1121 if nest == MAX_LINK_NEST { return (0, 0, 0, 0); }
1122 max_nest = cmp::max(max_nest, nest);
1123 i += 1;
1124 }
1125 b']' => {
1126 nest -= 1;
1127 if nest == 0 {
1128 break;
1129 }
1130 i += 1;
1131 }
1132 b'\\' => i += 1,
1133 b'<' => {
1134 let n = self.scan_autolink_or_html(&data[i..]);
1135 if n != 0 {
1136 i += n;
1137 } else {
1138 i += 1;
1139 }
1140 }
1141 b'`' => {
1142 let (n, beg, _) = self.scan_inline_code(&data[i..]);
1143 if n != 0 {
1144 i += n;
1145 } else {
1146 i += beg;
1147 }
1148 }
1149 _ => i += 1
1150 }
1151 }
1152 let text_end = i;
1153 i += 1; // skip closing ]
1154 (i, text_beg, text_end, max_nest)
1155 }
1156
1157 fn scan_link_title(&self, data: &str) -> (usize, usize, usize) {
1158 let size = data.len();
1159 if size == 0 { return (0, 0, 0); }
1160 let mut i = 0;
1161 let titleclose = match data.as_bytes()[i] {
1162 b'(' => b')',
1163 b'\'' => b'\'',
1164 b'\"' => b'\"',
1165 _ => return (0, 0, 0)
1166 };
1167 i += 1;
1168 let title_beg = i;
1169 while i < size {
1170 match data.as_bytes()[i] {
1171 x if x == titleclose => break,
1172 b'\\' => i += 2, // may be > size
1173 b'\n' => {
1174 let n = self.scan_whitespace_inline(&data[i..]);
1175 if n == 0 { return (0, 0, 0); }
1176 i += n;
1177 }
1178 _ => i += 1
1179 }
1180 }
1181 if i >= size { return (0, 0, 0); }
1182 let title_end = i;
1183 i += 1;
1184 (i, title_beg, title_end)
1185 }
1186
1187 fn char_link(&mut self) -> Option<Event<'a>> {
1188 self.parse_link(&self.text[self.off .. self.limit()], false).map(|(tag, beg, end, n)| {
1189 let off = self.off;
1190 self.off += beg;
1191 self.start(tag, off + end, off + n)
1192 })
1193 }
1194
1195 // return: tag, begin, end, total size
1196 fn parse_link(&self, data: &'a str, recur: bool) -> Option<(Tag<'a>, usize, usize, usize)> {
1197 let size = data.len();
1198
1199 // scan link text
1200 let i = scan_ch(data, b'!');
1201 let is_image = i == 1;
1202 let (n, text_beg, text_end, max_nest) = self.scan_link_label(&data[i..]);
1203 if n == 0 { return None; }
1204 let (text_beg, text_end) = (text_beg + i, text_end + i);
1205 if !is_image && !recur && max_nest > 1 && self.contains_link(&data[text_beg..text_end]) {
1206 // disallow nested links in links (but ok in images)
1207 return None;
1208 }
1209 let mut i = i + n;
1210
1211 // scan dest
1212 let (dest, title, beg, end, next) = if data[i..].starts_with("(") {
1213 i += 1;
1214 i += self.scan_whitespace_inline(&data[i..]);
1215 if i >= size { return None; }
1216
1217 let linkdest = scan_link_dest(&data[i..]);
1218 if linkdest.is_none() { return None; }
1219 let (n, raw_dest) = linkdest.unwrap();
1220 let dest = unescape(raw_dest);
1221 i += n;
1222
1223 i += self.scan_whitespace_inline(&data[i..]);
1224 if i == size { return None; }
1225
1226 // scan title
1227 let (n_title, title_beg, title_end) = self.scan_link_title(&data[i..]);
1228 let title = if n_title == 0 {
1229 Borrowed("")
1230 } else {
1231 let (title_beg, title_end) = (i + title_beg, i + title_end);
1232 i += n_title;
1233 // TODO: not just unescape, remove containers from newlines
1234 unescape(&data[title_beg..title_end])
1235 };
1236 i += self.scan_whitespace_inline(&data[i..]);
1237 if i == size || data.as_bytes()[i] != b')' { return None; }
1238 i += 1;
1239 (dest, title, text_beg, text_end, i)
1240 } else {
1241 // try link reference
1242 let j = i + self.scan_whitespace_inline(&data[i..]);
1243 let (n_ref, ref_beg, ref_end, _) = self.scan_link_label(&data[j..]);
1244 let (ref_beg, ref_end) = if n_ref == 0 || ref_beg == ref_end {
1245 (text_beg, text_end)
1246 } else {
1247 (j + ref_beg, j + ref_end)
1248 };
1249 if n_ref != 0 {
1250 i = j + n_ref;
1251 }
1252 let reference = self.normalize_link_ref(&data[ref_beg..ref_end]);
1253 let (dest, title) = match self.links.get(&reference) {
1254 Some(&(ref dest, ref title)) => (dest.clone(), title.clone()),
1255 None => return None
1256 };
1257 (dest, title, text_beg, text_end, i)
1258 };
1259 if is_image {
1260 Some((Tag::Image(dest, title), beg, end, next))
1261 } else {
1262 Some((Tag::Link(dest, title), beg, end, next))
1263 }
1264 }
1265
1266 // determine whether there's a link anywhere in the text
1267 // TODO: code duplication with scan_link_label is unpleasant
1268 fn contains_link(&self, data: &str) -> bool {
1269 let mut i = 0;
1270 while i < data.len() {
1271 match data.as_bytes()[i] {
1272 b'\n' => {
1273 let n = self.scan_whitespace_inline(&data[i..]);
1274 if n == 0 { return false; }
1275 i += n;
1276 continue;
1277 }
1278 b'!' => {
1279 if scan_ch(&data[i + 1 ..], b'[') != 0 {
1280 // ok to contain image, skip over opening bracket
1281 i += 1;
1282 }
1283 }
1284 b'[' => {
1285 if self.opts.contains(OPTION_ENABLE_FOOTNOTES) && self.parse_footnote(&data[i..]).is_some() {
1286 return false;
1287 }
1288 if self.parse_link(&data[i..], true).is_some() { return true; }
1289 }
1290 b'\\' => i += 1,
1291 b'<' => {
1292 let n = self.scan_autolink_or_html(&data[i..]);
1293 if n != 0 {
1294 i += n;
1295 } else {
1296 i += 1;
1297 }
1298 }
1299 b'`' => {
1300 let (n, beg, _) = self.scan_inline_code(&data[i..]);
1301 if n != 0 {
1302 i += n;
1303 } else {
1304 i += beg;
1305 }
1306 }
1307 _ => ()
1308 }
1309 i += 1;
1310 }
1311 false
1312 }
1313
1314 // # Footnotes
1315
1316 fn parse_footnote_definition<'b>(&self, data: &'b str) -> Option<(&'b str, usize)> {
1317 assert!(self.opts.contains(OPTION_ENABLE_FOOTNOTES));
1318 self.parse_footnote(data).and_then(|(name, len)| {
1319 let n_colon = scan_ch(&data[len ..], b':');
1320 if n_colon == 0 {
1321 None
1322 } else {
1323 let space = scan_whitespace_no_nl(&data[len + n_colon..]);
1324 Some((name, len + n_colon + space))
1325 }
1326 })
1327 }
1328
1329 fn char_link_footnote(&mut self) -> Option<Event<'a>> {
1330 assert!(self.opts.contains(OPTION_ENABLE_FOOTNOTES));
1331 if let Some((name, end)) = self.parse_footnote(&self.text[self.off .. self.limit()]) {
1332 self.off += end;
1333 Some(Event::FootnoteReference(Cow::Borrowed(name)))
1334 } else {
1335 self.char_link()
1336 }
1337 }
1338
1339 fn parse_footnote<'b>(&self, data: &'b str) -> Option<(&'b str, usize)> {
1340 assert!(self.opts.contains(OPTION_ENABLE_FOOTNOTES));
1341 let (n_footnote, text_beg, text_end) = self.scan_footnote_label(data);
1342 if n_footnote == 0 { return None; }
1343 return Some((&data[text_beg..text_end], n_footnote));
1344 }
1345
1346 fn scan_footnote_label(&self, data: &str) -> (usize, usize, usize) {
1347 assert!(self.opts.contains(OPTION_ENABLE_FOOTNOTES));
1348 let mut i = scan_ch(data, b'[');
1349 if i == 0 { return (0, 0, 0); }
1350 if i >= data.len() || data.as_bytes()[i] != b'^' { return (0, 0, 0); }
1351 i += 1;
1352 let text_beg = i;
1353 loop {
1354 if i >= data.len() { return (0, 0, 0); }
1355 match data.as_bytes()[i] {
1356 b'\n' => {
1357 let n = self.scan_whitespace_inline(&data[i..]);
1358 if n == 0 { return (0, 0, 0); }
1359 i += n;
1360 continue;
1361 }
1362 b']' => break,
1363 b'\\' => i += 1,
1364 _ => ()
1365 }
1366 i += 1;
1367 }
1368 let text_end = i;
1369 i += 1; // skip closing ]
1370 (i, text_beg, text_end)
1371 }
1372
1373 // # Autolinks and inline HTML
1374
1375 fn char_lt(&mut self) -> Option<Event<'a>> {
1376 let tail = &self.text[self.off .. self.limit()];
1377 if let Some((n, link)) = scan_autolink(tail) {
1378 let next = self.off + n;
1379 self.off += 1;
1380 self.state = State::Literal;
1381 return Some(self.start(Tag::Link(link, Borrowed("")), next - 1, next))
1382 }
1383 let n = self.scan_inline_html(tail);
1384 if n != 0 {
1385 return Some(self.inline_html_event(n))
1386 }
1387 None
1388 }
1389
1390 fn scan_autolink_or_html(&self, data: &str) -> usize {
1391 if let Some((n, _)) = scan_autolink(data) {
1392 n
1393 } else {
1394 self.scan_inline_html(data)
1395 }
1396 }
1397
1398 fn scan_inline_html(&self, data: &str) -> usize {
1399 let n = self.scan_html_tag(data);
1400 if n != 0 { return n; }
1401 let n = self.scan_html_comment(data);
1402 if n != 0 { return n; }
1403 let n = self.scan_processing_instruction(data);
1404 if n != 0 { return n; }
1405 let n = self.scan_declaration(data);
1406 if n != 0 { return n; }
1407 let n = self.scan_cdata(data);
1408 if n != 0 { return n; }
1409 0
1410 }
1411
1412 fn scan_html_tag(&self, data: &str) -> usize {
1413 let size = data.len();
1414 let mut i = 0;
1415 if scan_ch(data, b'<') == 0 { return 0; }
1416 i += 1;
1417 let n_slash = scan_ch(&data[i..], b'/');
1418 i += n_slash;
1419 if i == size || !is_ascii_alpha(data.as_bytes()[i]) { return 0; }
1420 i += 1;
1421 i += scan_while(&data[i..], is_ascii_alphanumeric);
1422 if n_slash == 0 {
1423 loop {
1424 let n = self.scan_whitespace_inline(&data[i..]);
1425 if n == 0 { break; }
1426 i += n;
1427 let n = scan_attribute_name(&data[i..]);
1428 if n == 0 { break; }
1429 i += n;
1430 let n = self.scan_whitespace_inline(&data[i..]);
1431 if scan_ch(&data[i + n ..], b'=') != 0 {
1432 i += n + 1;
1433 i += self.scan_whitespace_inline(&data[i..]);
1434 let n_attr = self.scan_attribute_value(&data[i..]);
1435 if n_attr == 0 { return 0; }
1436 i += n_attr;
1437 }
1438 }
1439 i += self.scan_whitespace_inline(&data[i..]);
1440 i += scan_ch(&data[i..], b'/');
1441 } else {
1442 i += self.scan_whitespace_inline(&data[i..]);
1443 }
1444 if scan_ch(&data[i..], b'>') == 0 { return 0; }
1445 i += 1;
1446 i
1447 }
1448
1449 fn scan_attribute_value(&self, data: &str) -> usize {
1450 let size = data.len();
1451 if size == 0 { return 0; }
1452 let open = data.as_bytes()[0];
1453 let quoted = open == b'\'' || open == b'"';
1454 let mut i = if quoted { 1 } else { 0 };
1455 while i < size {
1456 let c = data.as_bytes()[i];
1457 match c {
1458 b'\n' => {
1459 if !quoted { break; }
1460 let n = self.scan_whitespace_inline(&data[i..]);
1461 if n == 0 { return 0; }
1462 i += n;
1463 }
1464 b'\'' | b'"' | b'=' | b'<' | b'>' | b'`' | b'\t' ... b' ' => {
1465 if !quoted || c == open { break; }
1466 i += 1;
1467 }
1468 _ => i += 1
1469 }
1470 }
1471 if quoted {
1472 if i == size || data.as_bytes()[i] != open { return 0; }
1473 i += 1;
1474 }
1475 i
1476 }
1477
1478 fn scan_html_comment(&self, data: &str) -> usize {
1479 if !data.starts_with("<!--") { return 0; }
1480 if let Some(n) = data[4..].find("--") {
1481 let text = &data[4..4 + n];
1482 if !text.starts_with('>') && !text.starts_with("->") &&
1483 data[n + 6 ..].starts_with('>') {
1484 return n + 7;
1485 }
1486 }
1487 0
1488 }
1489
1490 fn scan_processing_instruction(&self, data: &str) -> usize {
1491 if !data.starts_with("<?") { return 0; }
1492 if let Some(n) = data[2..].find("?>") {
1493 return n + 4;
1494 }
1495 0
1496 }
1497
1498 fn scan_declaration(&self, data: &str) -> usize {
1499 if !data.starts_with("<!") { return 0; }
1500 let n = scan_while(&data[2..], is_ascii_upper);
1501 if n == 0 { return 0; }
1502 let i = n + 2;
1503 let n = self.scan_whitespace_inline(&data[i..]);
1504 if n == 0 { return 0; }
1505 let mut i = i + n;
1506 while i < data.len() {
1507 match data.as_bytes()[i] {
1508 b'>' => return i + 1,
1509 b'\n' => i += self.scan_whitespace_inline(&data[i..]),
1510 _ => i += 1
1511 }
1512 }
1513 0
1514 }
1515
1516 fn scan_cdata(&self, data: &str) -> usize {
1517 if !data.starts_with("<![CDATA[") { return 0; }
1518 if let Some(n) = data[9..].find("]]>") {
1519 return n + 12;
1520 }
1521 0
1522 }
1523
1524 fn inline_html_event(&mut self, n: usize) -> Event<'a> {
1525 let data = &self.text[self.off .. self.off + n];
1526 let size = data.len();
1527 let mut out = Borrowed("");
1528 let mut i = 0;
1529 let mut mark = 0;
1530 while i < size {
1531 let n = scan_nextline(&data[i..]);
1532 i += n;
1533 if n >= 2 && data.as_bytes()[i - 2] == b'\r' {
1534 out = utils::cow_append(out, Borrowed(&data[mark .. i - 2]));
1535 mark = i - 1;
1536 }
1537 if i < size {
1538 let (n, _, _) = self.scan_containers(&data[i..]);
1539 if n != 0 {
1540 out = utils::cow_append(out, Borrowed(&data[mark..i]));
1541 mark = i + n;
1542 }
1543 }
1544 }
1545 out = utils::cow_append(out, Borrowed(&data[mark..n]));
1546 self.off += n;
1547 Event::InlineHtml(out)
1548 }
1549
1550 // link text is literal, with no processing of markup
1551 fn next_literal(&mut self) -> Event<'a> {
1552 self.state = State::Inline;
1553 let beg = self.off;
1554 let end = self.limit();
1555 self.off = end;
1556 Event::Text(Borrowed(&self.text[beg..end]))
1557 }
1558
1559 // second return value is number of backticks even if not closed
1560 fn scan_inline_code(&self, data: &str) -> (usize, usize, usize) {
1561 let size = data.len();
1562 let backtick_len = scan_backticks(data);
1563 let mut i = backtick_len;
1564 while i < size {
1565 match data.as_bytes()[i] {
1566 b'`' => {
1567 let close_len = scan_backticks(&data[i..]);
1568 if close_len == backtick_len {
1569 return (i + backtick_len, backtick_len, i);
1570 } else {
1571 i += close_len;
1572 }
1573 }
1574 b'\n' => {
1575 i += 1;
1576 let (n, _, space) = self.scan_containers(&data[i..]);
1577 i += n;
1578 if self.is_inline_block_end(&data[i..], space) {
1579 return (0, backtick_len, 0);
1580 }
1581 }
1582 // TODO: '<'
1583 _ => i += 1
1584 }
1585 }
1586 (0, backtick_len, 0)
1587 }
1588
1589 fn char_backtick(&mut self) -> Option<Event<'a>> {
1590 let beg = self.off;
1591 let limit = self.limit();
1592 let mut i = beg;
1593 let (n, code_beg, code_end) = self.scan_inline_code(&self.text[i..limit]);
1594 if n == 0 {
1595 self.off += code_beg - 1;
1596 return None;
1597 }
1598 i += code_beg;
1599 let end = beg + code_end;
1600 let next = beg + n;
1601 i += self.scan_whitespace_inline(&self.text[i..limit]);
1602 self.off = i;
1603 self.state = State::InlineCode;
1604 Some(self.start(Tag::Code, end, next))
1605 }
1606
1607 fn next_inline_code(&mut self) -> Event<'a> {
1608 let beg = self.off;
1609 let mut i = beg;
1610 let limit = self.limit();
1611 while i < limit {
1612 let c = self.text.as_bytes()[i];
1613 if is_ascii_whitespace(c) {
1614 let n = self.scan_whitespace_inline(&self.text[i..limit]);
1615 if i + n == limit || n == 0 {
1616 if i > beg {
1617 break;
1618 } else {
1619 return self.end();
1620 }
1621 }
1622 if c == b' ' && n == 1 {
1623 // optimization to reduce number of text blocks produced
1624 i += 1;
1625 } else {
1626 if i > beg {
1627 break;
1628 }
1629 i += n;
1630 self.off = i;
1631 return Event::Text(Borrowed(" "));
1632 }
1633 } else {
1634 i += 1;
1635 }
1636 }
1637 if i > beg {
1638 self.off = i;
1639 Event::Text(Borrowed(&self.text[beg..i]))
1640 } else {
1641 self.end()
1642 }
1643 }
1644}
1645
1646impl<'a> Iterator for RawParser<'a> {
1647 type Item = Event<'a>;
1648
1649 fn next(&mut self) -> Option<Event<'a>> {
1650 //println!("off {} {:?}, stack {:?} containers {:?}",
1651 // self.off, self.state, self.stack, self.containers);
1652 if self.off < self.text.len() {
1653 match self.state {
1654 State::StartBlock | State::InContainers => {
1655 let ret = self.start_block();
1656 if ret.is_some() {
1657 return ret;
1658 }
1659 }
1660 State::Inline => return Some(self.next_inline()),
1661 State::TableHead(_, _) => return Some(self.start_table_head()),
1662 State::TableBody => return Some(self.start_table_body()),
1663 State::TableRow => return Some(self.next_table_cell()),
1664 State::CodeLineStart => return Some(self.next_code_line_start()),
1665 State::Code => return Some(self.next_code()),
1666 State::InlineCode => return Some(self.next_inline_code()),
1667 State::Literal => return Some(self.next_literal()),
1668 }
1669 }
1670 match self.stack.pop() {
1671 Some((tag, _, _)) => Some(Event::End(tag)),
1672 None => None
1673 }
1674 }
1675}