]> git.proxmox.com Git - rustc.git/blob - vendor/pulldown-cmark-0.7.2/tests/lib.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / vendor / pulldown-cmark-0.7.2 / tests / lib.rs
1 #[macro_use]
2 extern crate html5ever;
3 #[macro_use]
4 extern crate lazy_static;
5
6 use html5ever::serialize::{serialize, SerializeOpts};
7 use html5ever::{driver as html, QualName};
8 use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
9 use pulldown_cmark::{Options, Parser};
10
11 use regex::Regex;
12 use std::collections::HashSet;
13 use std::mem;
14 use std::rc::{Rc, Weak};
15 use tendril::stream::TendrilSink;
16
17 mod suite;
18
19 #[inline(never)]
20 pub fn test_markdown_html(input: &str, output: &str) {
21 let mut s = String::new();
22
23 let mut opts = Options::empty();
24 opts.insert(Options::ENABLE_TABLES);
25 opts.insert(Options::ENABLE_FOOTNOTES);
26 opts.insert(Options::ENABLE_STRIKETHROUGH);
27 opts.insert(Options::ENABLE_TASKLISTS);
28
29 let p = Parser::new_ext(input, opts);
30 pulldown_cmark::html::push_html(&mut s, p);
31
32 assert_eq!(normalize_html(output), normalize_html(&s));
33 }
34
35 lazy_static! {
36 static ref WHITESPACE_RE: Regex = Regex::new(r"\s+").unwrap();
37 static ref LEADING_WHITESPACE_RE: Regex = Regex::new(r"\A\s+").unwrap();
38 static ref TRAILING_WHITESPACE_RE: Regex = Regex::new(r"\s+\z").unwrap();
39 static ref BLOCK_TAGS: HashSet<&'static str> = [
40 "article",
41 "header",
42 "aside",
43 "hgroup",
44 "blockquote",
45 "hr",
46 "iframe",
47 "body",
48 "li",
49 "map",
50 "button",
51 "object",
52 "canvas",
53 "ol",
54 "caption",
55 "output",
56 "col",
57 "p",
58 "colgroup",
59 "pre",
60 "dd",
61 "progress",
62 "div",
63 "section",
64 "dl",
65 "table",
66 "td",
67 "dt",
68 "tbody",
69 "embed",
70 "textarea",
71 "fieldset",
72 "tfoot",
73 "figcaption",
74 "th",
75 "figure",
76 "thead",
77 "footer",
78 "tr",
79 "form",
80 "ul",
81 "h1",
82 "h2",
83 "h3",
84 "h4",
85 "h5",
86 "h6",
87 "video",
88 "script",
89 "style"
90 ]
91 .iter()
92 .cloned()
93 .collect();
94 static ref WHITESPACE_SENSITIVE_TAGS: HashSet<&'static str> =
95 ["pre", "code", "h1", "h2", "h3", "h4", "h5", "h6"]
96 .iter()
97 .cloned()
98 .collect();
99 static ref TABLE_TAGS: HashSet<&'static str> = ["table", "thead", "tbody", "tr", "td"]
100 .iter()
101 .cloned()
102 .collect();
103 }
104
105 fn make_html_parser() -> html::Parser<RcDom> {
106 html::parse_fragment(
107 RcDom::default(),
108 html::ParseOpts::default(),
109 QualName::new(None, ns!(html), local_name!("div")),
110 vec![],
111 )
112 }
113
114 fn normalize_html(s: &str) -> String {
115 let parser = make_html_parser();
116 let dom = parser.one(s);
117 let body: SerializableHandle = normalize_dom(&dom).into();
118 let opts = SerializeOpts::default();
119 let mut ret_val = Vec::new();
120 serialize(&mut ret_val, &body, opts)
121 .expect("Writing to a string shouldn't fail (expect on OOM)");
122 String::from_utf8(ret_val).expect("html5ever should always produce UTF8")
123 }
124
125 fn normalize_dom(dom: &RcDom) -> Handle {
126 let body = {
127 let children = dom.document.children.borrow();
128 children[0].clone()
129 };
130 let mut current_level = Vec::new();
131 let mut next_level = Vec::new();
132 current_level.extend(body.children.borrow().iter().cloned().rev());
133 loop {
134 while let Some(mut node) = current_level.pop() {
135 let parent = node.parent.replace(None);
136 node.parent.replace(parent.clone());
137 let parent = parent
138 .expect("a node in the DOM will have a parent, except the root, which is not processed")
139 .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
140 let retain = normalize_node(&parent, &mut node);
141 if !retain {
142 let mut siblings = parent.children.borrow_mut();
143 siblings.retain(|s| !Rc::ptr_eq(&node, s));
144 } else {
145 next_level.extend(node.children.borrow().iter().cloned().rev());
146 }
147 }
148 if next_level.is_empty() {
149 break;
150 };
151 mem::swap(&mut next_level, &mut current_level);
152 }
153 body
154 }
155
156 // Returns false if node is an empty text node or an empty tbody.
157 // Returns true otherwise.
158 fn normalize_node(parent: &Handle, node: &mut Handle) -> bool {
159 match node.data {
160 NodeData::Comment { .. }
161 | NodeData::Doctype { .. }
162 | NodeData::Document
163 | NodeData::ProcessingInstruction { .. } => true,
164 NodeData::Text { ref contents, .. } => {
165 let mut contents = contents.borrow_mut();
166 let is_pre = {
167 let mut parent = parent.clone();
168 loop {
169 let is_pre = if let NodeData::Element { ref name, .. } = parent.data {
170 WHITESPACE_SENSITIVE_TAGS.contains(&&*name.local.to_ascii_lowercase())
171 } else {
172 false
173 };
174 if is_pre {
175 break true;
176 };
177 let parent_ = parent.parent.replace(None);
178 parent.parent.replace(parent_.clone());
179 let parent_ = parent_.as_ref().and_then(Weak::upgrade);
180 if let Some(parent_) = parent_ {
181 parent = parent_
182 } else {
183 break false;
184 };
185 }
186 };
187 if !is_pre {
188 let (is_first_in_block, is_last_in_block) = {
189 let mut is_first_in_block = true;
190 let mut is_last_in_block = true;
191 let mut parent = parent.clone();
192 let mut node = node.clone();
193 loop {
194 let reached_block = if let NodeData::Element { ref name, .. } = parent.data
195 {
196 BLOCK_TAGS.contains(&&*name.local.to_ascii_lowercase())
197 } else {
198 false
199 };
200 let (is_first, is_last) = {
201 let siblings = parent.children.borrow();
202 let n = &node;
203 (
204 siblings.get(0).map(|s| Rc::ptr_eq(s, n)).unwrap_or(false),
205 siblings.len() > 0
206 && siblings
207 .get(siblings.len() - 1)
208 .map(|s| Rc::ptr_eq(s, n))
209 .unwrap_or(false),
210 )
211 };
212 is_first_in_block = is_first_in_block && is_first;
213 is_last_in_block = is_last_in_block && is_last;
214 if (is_first_in_block || is_last_in_block) && !reached_block {
215 node = parent.clone();
216 let parent_ = parent.parent.replace(None);
217 parent.parent.replace(parent_.clone());
218 let parent_ = parent_.as_ref().and_then(Weak::upgrade);
219 if let Some(parent_) = parent_ {
220 parent = parent_;
221 } else {
222 break (is_first_in_block, is_last_in_block);
223 }
224 } else {
225 break (is_first_in_block, is_last_in_block);
226 }
227 }
228 };
229 let is_preceeded_by_ws = {
230 let mut parent = parent.clone();
231 let mut node = node.clone();
232 'ascent: loop {
233 let is_first = {
234 let siblings = parent.children.borrow();
235 let n = &node;
236 siblings.get(0).map(|s| Rc::ptr_eq(s, n)).unwrap_or(false)
237 };
238 if is_first {
239 node = parent.clone();
240 let parent_ = parent.parent.replace(None);
241 parent.parent.replace(parent_.clone());
242 let parent_ = parent_.as_ref().and_then(Weak::upgrade);
243 if let Some(parent_) = parent_ {
244 parent = parent_;
245 } else {
246 break 'ascent false;
247 }
248 } else {
249 let siblings = parent.children.borrow();
250 let n = &node;
251 let mut pos = !0;
252 'search: for (i, s) in siblings.iter().enumerate() {
253 if Rc::ptr_eq(s, n) {
254 pos = i;
255 break 'search;
256 }
257 }
258 assert!(
259 pos != !0,
260 "The list of node's parent's children shall contain node"
261 );
262 assert!(
263 pos != 0,
264 "If node is not first, then node's position shall not be zero"
265 );
266 let mut preceeding = siblings[pos - 1].clone();
267 'descent: loop {
268 if let NodeData::Text { .. } = preceeding.data {
269 break 'descent;
270 }
271 preceeding = {
272 let ch = preceeding.children.borrow();
273 if ch.len() == 0 {
274 break 'descent;
275 }
276 if let Some(preceeding_) = ch.get(ch.len() - 1) {
277 preceeding_.clone()
278 } else {
279 break 'descent;
280 }
281 };
282 }
283 if let NodeData::Text { ref contents, .. } = preceeding.data {
284 break 'ascent TRAILING_WHITESPACE_RE.is_match(&*contents.borrow());
285 } else {
286 break 'ascent false;
287 }
288 }
289 }
290 };
291
292 let is_in_table = if let NodeData::Element { ref name, .. } = parent.data {
293 TABLE_TAGS.contains(&&*name.local.to_ascii_lowercase())
294 } else {
295 false
296 };
297 let whitespace_replacement = if is_in_table { "" } else { " " };
298 *contents = WHITESPACE_RE
299 .replace_all(&*contents, whitespace_replacement)
300 .as_ref()
301 .into();
302
303 if is_first_in_block || is_preceeded_by_ws {
304 *contents = LEADING_WHITESPACE_RE
305 .replace_all(&*contents, "")
306 .as_ref()
307 .into();
308 }
309 if is_last_in_block {
310 *contents = TRAILING_WHITESPACE_RE
311 .replace_all(&*contents, "")
312 .as_ref()
313 .into();
314 }
315 // TODO: collapse whitespace when adjacent to whitespace.
316 // For example, the whitespace in the span should be collapsed in all of these cases:
317 //
318 // " <span> q </span> "
319 // "<b>q </b><span> q</span>"
320 // "<b>q <i></i></b><span> q</span>"
321 // "<b>q <i></i></b><span> q</span>"
322 // "q <b></b><span> q</span>"
323 }
324 &**contents != ""
325 }
326 NodeData::Element {
327 ref attrs,
328 ref name,
329 ..
330 } => {
331 let mut attrs = attrs.borrow_mut();
332 for a in attrs.iter_mut() {
333 a.name.local = a.name.local.to_ascii_lowercase().into();
334 }
335 attrs.sort_by(|a: &html5ever::Attribute, b: &html5ever::Attribute| {
336 (&*a.name.local).cmp(&*b.name.local)
337 });
338 let ascii_name = &*name.local.to_ascii_lowercase();
339 // drop empty tbody's
340 ascii_name != "tbody"
341 || node.children.borrow().len() > 1
342 || node
343 .children
344 .borrow()
345 .iter()
346 .next()
347 .map(|only_child| match only_child.data {
348 NodeData::Text { ref contents, .. } => {
349 !contents.borrow().chars().all(|c| c.is_whitespace())
350 }
351 _ => true,
352 })
353 .unwrap_or(false)
354 }
355 }
356 }
357
358 #[test]
359 fn strip_div_newline() {
360 assert_eq!("<div></div>", normalize_html("<div>\n</div>"));
361 }
362
363 #[test]
364 fn strip_end_newline() {
365 assert_eq!("test", normalize_html("test\n"));
366 }
367
368 #[test]
369 fn strip_double_space() {
370 assert_eq!("test mess", normalize_html("test mess"));
371 }
372
373 #[test]
374 fn strip_inline_internal_text() {
375 assert_eq!(
376 "<u>a </u>b <u>c</u>",
377 normalize_html("<u> a </u> b <u> c </u>")
378 )
379 }
380
381 #[test]
382 fn strip_inline_block_internal_text() {
383 assert_eq!(
384 "<u>a </u>b <u>c</u>",
385 normalize_html(" <u> a </u> b <u> c </u> ")
386 )
387 }
388
389 #[test]
390 fn leaves_necessary_whitespace_alone() {
391 assert_eq!("<u>a</u> b <u>c</u>", normalize_html("<u>a</u> b <u>c</u>"))
392 }
393
394 #[test]
395 fn leaves_necessary_whitespace_alone_weird() {
396 assert_eq!(
397 "<u>a </u>b <u>c</u>",
398 normalize_html(" <u>a </u>b <u>c</u>")
399 )
400 }
401
402 #[test]
403 fn leaves_necessary_whitespace_all_nested() {
404 assert_eq!(
405 "<u></u><u></u><u></u><u></u>",
406 normalize_html("<u> </u><u> </u><u> </u><u> </u>")
407 )
408 }
409
410 #[test]
411 fn drops_empty_tbody() {
412 assert_eq!(
413 "<table><thead><tr><td>hi</td></tr></thead></table>",
414 normalize_html("<table><thead><tr><td>hi</td></tr></thead><tbody> </tbody></table>")
415 )
416 }
417
418 #[test]
419 fn leaves_nonempty_tbody() {
420 let input = "<table><thead><tr><td>hi</td></tr></thead><tbody><tr></tr></tbody></table>";
421 assert_eq!(input, normalize_html(input))
422 }