]> git.proxmox.com Git - rustc.git/blame - vendor/mdbook/src/utils/mod.rs
New upstream version 1.46.0~beta.2+dfsg1
[rustc.git] / vendor / mdbook / src / utils / mod.rs
CommitLineData
2c00a5a8
XL
1#![allow(missing_docs)] // FIXME: Document this
2
ea8adc8c 3pub mod fs;
2c00a5a8 4mod string;
f035d41b 5pub(crate) mod toml_ext;
dc9dc135 6use crate::errors::Error;
83c7162d 7use regex::Regex;
ea8adc8c 8
f035d41b 9use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
83c7162d 10
ea8adc8c 11use std::borrow::Cow;
416331ca
XL
12use std::fmt::Write;
13use std::path::Path;
ea8adc8c 14
e74abb32
XL
15pub use self::string::{
16 take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
17 take_rustdoc_include_lines,
18};
ea8adc8c 19
83c7162d 20/// Replaces multiple consecutive whitespace characters with a single space character.
dc9dc135 21pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
83c7162d
XL
22 lazy_static! {
23 static ref RE: Regex = Regex::new(r"\s\s+").unwrap();
24 }
25 RE.replace_all(text, " ")
26}
27
9fa01778
XL
28/// Convert the given string to a valid HTML element ID.
29/// The only restriction is that the ID must not contain any ASCII whitespace.
83c7162d 30pub fn normalize_id(content: &str) -> String {
9fa01778 31 content
83c7162d
XL
32 .chars()
33 .filter_map(|ch| {
34 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
35 Some(ch.to_ascii_lowercase())
36 } else if ch.is_whitespace() {
37 Some('-')
38 } else {
39 None
40 }
dc9dc135
XL
41 })
42 .collect::<String>()
83c7162d
XL
43}
44
45/// Generate an ID for use with anchors which is derived from a "normalised"
46/// string.
47pub fn id_from_content(content: &str) -> String {
48 let mut content = content.to_string();
49
50 // Skip any tags or html-encoded stuff
9fa01778
XL
51 const REPL_SUB: &[&str] = &[
52 "<em>",
53 "</em>",
54 "<code>",
55 "</code>",
56 "<strong>",
57 "</strong>",
58 "&lt;",
59 "&gt;",
60 "&amp;",
61 "&#39;",
62 "&quot;",
63 ];
83c7162d
XL
64 for sub in REPL_SUB {
65 content = content.replace(sub, "");
66 }
67
68 // Remove spaces and hashes indicating a header
dc9dc135 69 let trimmed = content.trim().trim_start_matches('#').trim();
83c7162d
XL
70
71 normalize_id(trimmed)
72}
73
416331ca
XL
74/// Fix links to the correct location.
75///
76/// This adjusts links, such as turning `.md` extensions to `.html`.
77///
78/// `path` is the path to the page being rendered relative to the root of the
79/// book. This is used for the `print.html` page so that links on the print
80/// page go to the original location. Normal page rendering sets `path` to
81/// None. Ideally, print page links would link to anchors on the print page,
82/// but that is very difficult.
83fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
9fa01778 84 lazy_static! {
dc9dc135 85 static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
9fa01778
XL
86 static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
87 }
88
416331ca
XL
89 fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
90 if dest.starts_with('#') {
91 // Fragment-only link.
92 if let Some(path) = path {
93 let mut base = path.display().to_string();
94 if base.ends_with(".md") {
95 base.replace_range(base.len() - 3.., ".html");
96 }
97 return format!("{}{}", base, dest).into();
98 } else {
99 return dest;
100 }
101 }
dc9dc135
XL
102 // Don't modify links with schemes like `https`.
103 if !SCHEME_LINK.is_match(&dest) {
104 // This is a relative link, adjust it as necessary.
105 let mut fixed_link = String::new();
416331ca
XL
106 if let Some(path) = path {
107 let base = path
108 .parent()
109 .expect("path can't be empty")
110 .to_str()
111 .expect("utf-8 paths only");
112 if !base.is_empty() {
113 write!(fixed_link, "{}/", base).unwrap();
114 }
dc9dc135 115 }
9fa01778 116
dc9dc135
XL
117 if let Some(caps) = MD_LINK.captures(&dest) {
118 fixed_link.push_str(&caps["link"]);
119 fixed_link.push_str(".html");
120 if let Some(anchor) = caps.name("anchor") {
121 fixed_link.push_str(anchor.as_str());
9fa01778 122 }
dc9dc135
XL
123 } else {
124 fixed_link.push_str(&dest);
125 };
126 return CowStr::from(fixed_link);
127 }
128 dest
129 }
9fa01778 130
416331ca
XL
131 fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
132 // This is a terrible hack, but should be reasonably reliable. Nobody
133 // should ever parse a tag with a regex. However, there isn't anything
134 // in Rust that I know of that is suitable for handling partial html
135 // fragments like those generated by pulldown_cmark.
136 //
137 // There are dozens of HTML tags/attributes that contain paths, so
138 // feel free to add more tags if desired; these are the only ones I
139 // care about right now.
140 lazy_static! {
141 static ref HTML_LINK: Regex =
142 Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
143 }
144
145 HTML_LINK
146 .replace_all(&html, |caps: &regex::Captures<'_>| {
147 let fixed = fix(caps[2].into(), path);
148 format!("{}{}\"", &caps[1], fixed)
149 })
150 .into_owned()
151 .into()
152 }
153
dc9dc135
XL
154 match event {
155 Event::Start(Tag::Link(link_type, dest, title)) => {
416331ca 156 Event::Start(Tag::Link(link_type, fix(dest, path), title))
dc9dc135
XL
157 }
158 Event::Start(Tag::Image(link_type, dest, title)) => {
416331ca 159 Event::Start(Tag::Image(link_type, fix(dest, path), title))
9fa01778 160 }
416331ca 161 Event::Html(html) => Event::Html(fix_html(html, path)),
9fa01778
XL
162 _ => event,
163 }
164}
165
2c00a5a8 166/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
ea8adc8c 167pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
416331ca 168 render_markdown_with_path(text, curly_quotes, None)
9fa01778
XL
169}
170
dc9dc135 171pub fn new_cmark_parser(text: &str) -> Parser<'_> {
ea8adc8c 172 let mut opts = Options::empty();
dc9dc135
XL
173 opts.insert(Options::ENABLE_TABLES);
174 opts.insert(Options::ENABLE_FOOTNOTES);
175 opts.insert(Options::ENABLE_STRIKETHROUGH);
176 opts.insert(Options::ENABLE_TASKLISTS);
177 Parser::new_ext(text, opts)
178}
ea8adc8c 179
416331ca 180pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
dc9dc135
XL
181 let mut s = String::with_capacity(text.len() * 3 / 2);
182 let p = new_cmark_parser(text);
ea8adc8c 183 let mut converter = EventQuoteConverter::new(curly_quotes);
9fa01778
XL
184 let events = p
185 .map(clean_codeblock_headers)
416331ca 186 .map(|event| adjust_links(event, path))
9fa01778 187 .map(|event| converter.convert(event));
ea8adc8c
XL
188
189 html::push_html(&mut s, events);
190 s
191}
192
193struct EventQuoteConverter {
194 enabled: bool,
195 convert_text: bool,
196}
197
198impl EventQuoteConverter {
199 fn new(enabled: bool) -> Self {
2c00a5a8 200 EventQuoteConverter {
9fa01778 201 enabled,
2c00a5a8
XL
202 convert_text: true,
203 }
ea8adc8c
XL
204 }
205
206 fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
207 if !self.enabled {
208 return event;
209 }
210
211 match event {
dc9dc135 212 Event::Start(Tag::CodeBlock(_)) => {
ea8adc8c
XL
213 self.convert_text = false;
214 event
2c00a5a8 215 }
dc9dc135 216 Event::End(Tag::CodeBlock(_)) => {
ea8adc8c
XL
217 self.convert_text = true;
218 event
2c00a5a8
XL
219 }
220 Event::Text(ref text) if self.convert_text => {
dc9dc135 221 Event::Text(CowStr::from(convert_quotes_to_curly(text)))
2c00a5a8 222 }
ea8adc8c
XL
223 _ => event,
224 }
225 }
226}
227
dc9dc135 228fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
ea8adc8c 229 match event {
f035d41b 230 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
2c00a5a8 231 let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
ea8adc8c 232
f035d41b 233 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
2c00a5a8 234 }
ea8adc8c
XL
235 _ => event,
236 }
237}
238
ea8adc8c
XL
239fn convert_quotes_to_curly(original_text: &str) -> String {
240 // We'll consider the start to be "whitespace".
241 let mut preceded_by_whitespace = true;
242
9fa01778
XL
243 original_text
244 .chars()
245 .map(|original_char| {
246 let converted_char = match original_char {
247 '\'' => {
248 if preceded_by_whitespace {
249 '‘'
250 } else {
251 '’'
252 }
2c00a5a8 253 }
9fa01778
XL
254 '"' => {
255 if preceded_by_whitespace {
256 '“'
257 } else {
258 '”'
259 }
2c00a5a8 260 }
9fa01778
XL
261 _ => original_char,
262 };
2c00a5a8 263
9fa01778 264 preceded_by_whitespace = original_char.is_whitespace();
2c00a5a8 265
9fa01778 266 converted_char
dc9dc135
XL
267 })
268 .collect()
2c00a5a8 269}
ea8adc8c 270
2c00a5a8
XL
271/// Prints a "backtrace" of some `Error`.
272pub fn log_backtrace(e: &Error) {
273 error!("Error: {}", e);
ea8adc8c 274
f035d41b 275 for cause in e.chain().skip(1) {
2c00a5a8
XL
276 error!("\tCaused By: {}", cause);
277 }
ea8adc8c
XL
278}
279
280#[cfg(test)]
281mod tests {
282 mod render_markdown {
283 use super::super::render_markdown;
284
9fa01778
XL
285 #[test]
286 fn preserves_external_links() {
287 assert_eq!(
288 render_markdown("[example](https://www.rust-lang.org/)", false),
289 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
290 );
291 }
292
293 #[test]
294 fn it_can_adjust_markdown_links() {
295 assert_eq!(
296 render_markdown("[example](example.md)", false),
297 "<p><a href=\"example.html\">example</a></p>\n"
298 );
299 assert_eq!(
300 render_markdown("[example_anchor](example.md#anchor)", false),
301 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
302 );
303
304 // this anchor contains 'md' inside of it
305 assert_eq!(
306 render_markdown("[phantom data](foo.html#phantomdata)", false),
307 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
308 );
309 }
310
ea8adc8c
XL
311 #[test]
312 fn it_can_keep_quotes_straight() {
313 assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
314 }
315
316 #[test]
317 fn it_can_make_quotes_curly_except_when_they_are_in_code() {
318 let input = r#"
319'one'
320```
321'two'
322```
323`'three'` 'four'"#;
324 let expected = r#"<p>‘one’</p>
325<pre><code>'two'
326</code></pre>
327<p><code>'three'</code> ‘four’</p>
328"#;
329 assert_eq!(render_markdown(input, true), expected);
330 }
331
332 #[test]
333 fn whitespace_outside_of_codeblock_header_is_preserved() {
334 let input = r#"
335some text with spaces
336```rust
337fn main() {
338// code inside is unchanged
339}
340```
341more text with spaces
342"#;
343
344 let expected = r#"<p>some text with spaces</p>
345<pre><code class="language-rust">fn main() {
346// code inside is unchanged
347}
348</code></pre>
349<p>more text with spaces</p>
350"#;
351 assert_eq!(render_markdown(input, false), expected);
352 assert_eq!(render_markdown(input, true), expected);
353 }
354
355 #[test]
356 fn rust_code_block_properties_are_passed_as_space_delimited_class() {
357 let input = r#"
358```rust,no_run,should_panic,property_3
359```
360"#;
361
f9f354fc 362 let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
ea8adc8c
XL
363"#;
364 assert_eq!(render_markdown(input, false), expected);
365 assert_eq!(render_markdown(input, true), expected);
366 }
367
368 #[test]
369 fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
370 let input = r#"
371```rust, no_run,,,should_panic , ,property_3
372```
373"#;
374
f9f354fc 375 let expected = r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
ea8adc8c
XL
376"#;
377 assert_eq!(render_markdown(input, false), expected);
378 assert_eq!(render_markdown(input, true), expected);
379 }
380
381 #[test]
382 fn rust_code_block_without_properties_has_proper_html_class() {
383 let input = r#"
2c00a5a8 384```rust
ea8adc8c
XL
385```
386"#;
387
388 let expected = r#"<pre><code class="language-rust"></code></pre>
389"#;
390 assert_eq!(render_markdown(input, false), expected);
391 assert_eq!(render_markdown(input, true), expected);
392
393 let input = r#"
394```rust
395```
396"#;
397 assert_eq!(render_markdown(input, false), expected);
398 assert_eq!(render_markdown(input, true), expected);
ea8adc8c
XL
399 }
400 }
401
83c7162d
XL
402 mod html_munging {
403 use super::super::{id_from_content, normalize_id};
404
405 #[test]
406 fn it_generates_anchors() {
9fa01778
XL
407 assert_eq!(
408 id_from_content("## Method-call expressions"),
409 "method-call-expressions"
410 );
411 assert_eq!(id_from_content("## **Bold** title"), "bold-title");
412 assert_eq!(id_from_content("## `Code` title"), "code-title");
413 }
414
415 #[test]
416 fn it_generates_anchors_from_non_ascii_initial() {
417 assert_eq!(
418 id_from_content("## `--passes`: add more rustdoc passes"),
419 "--passes-add-more-rustdoc-passes"
420 );
421 assert_eq!(
422 id_from_content("## 中文標題 CJK title"),
423 "中文標題-cjk-title"
424 );
425 assert_eq!(id_from_content("## Über"), "Über");
83c7162d
XL
426 }
427
428 #[test]
429 fn it_normalizes_ids() {
9fa01778
XL
430 assert_eq!(
431 normalize_id("`--passes`: add more rustdoc passes"),
432 "--passes-add-more-rustdoc-passes"
433 );
434 assert_eq!(
435 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
436 "method-call--expressions-"
437 );
438 assert_eq!(normalize_id("_-_12345"), "_-_12345");
439 assert_eq!(normalize_id("12345"), "12345");
440 assert_eq!(normalize_id("中文"), "中文");
441 assert_eq!(normalize_id("にほんご"), "にほんご");
442 assert_eq!(normalize_id("한국어"), "한국어");
83c7162d
XL
443 assert_eq!(normalize_id(""), "");
444 }
445 }
446
ea8adc8c
XL
447 mod convert_quotes_to_curly {
448 use super::super::convert_quotes_to_curly;
449
450 #[test]
451 fn it_converts_single_quotes() {
e74abb32 452 assert_eq!(convert_quotes_to_curly("'one', 'two'"), "‘one’, ‘two’");
ea8adc8c
XL
453 }
454
455 #[test]
456 fn it_converts_double_quotes() {
e74abb32 457 assert_eq!(convert_quotes_to_curly(r#""one", "two""#), "“one”, “two”");
ea8adc8c
XL
458 }
459
460 #[test]
461 fn it_treats_tab_as_whitespace() {
462 assert_eq!(convert_quotes_to_curly("\t'one'"), "\t‘one’");
463 }
464 }
465}