]> git.proxmox.com Git - rustc.git/blame - vendor/mdbook/src/utils/mod.rs
New upstream version 1.39.0+dfsg1
[rustc.git] / vendor / mdbook / src / utils / mod.rs
CommitLineData
2c00a5a8
XL
1#![allow(missing_docs)] // FIXME: Document this
2
ea8adc8c 3pub mod fs;
2c00a5a8 4mod string;
dc9dc135 5use crate::errors::Error;
83c7162d 6use regex::Regex;
ea8adc8c 7
dc9dc135 8use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag};
83c7162d 9
ea8adc8c 10use std::borrow::Cow;
416331ca
XL
11use std::fmt::Write;
12use std::path::Path;
ea8adc8c 13
416331ca 14pub use self::string::{take_anchored_lines, take_lines};
ea8adc8c 15
83c7162d 16/// Replaces multiple consecutive whitespace characters with a single space character.
dc9dc135 17pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
83c7162d
XL
18 lazy_static! {
19 static ref RE: Regex = Regex::new(r"\s\s+").unwrap();
20 }
21 RE.replace_all(text, " ")
22}
23
9fa01778
XL
24/// Convert the given string to a valid HTML element ID.
25/// The only restriction is that the ID must not contain any ASCII whitespace.
83c7162d 26pub fn normalize_id(content: &str) -> String {
9fa01778 27 content
83c7162d
XL
28 .chars()
29 .filter_map(|ch| {
30 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
31 Some(ch.to_ascii_lowercase())
32 } else if ch.is_whitespace() {
33 Some('-')
34 } else {
35 None
36 }
dc9dc135
XL
37 })
38 .collect::<String>()
83c7162d
XL
39}
40
41/// Generate an ID for use with anchors which is derived from a "normalised"
42/// string.
43pub fn id_from_content(content: &str) -> String {
44 let mut content = content.to_string();
45
46 // Skip any tags or html-encoded stuff
9fa01778
XL
47 const REPL_SUB: &[&str] = &[
48 "<em>",
49 "</em>",
50 "<code>",
51 "</code>",
52 "<strong>",
53 "</strong>",
54 "&lt;",
55 "&gt;",
56 "&amp;",
57 "&#39;",
58 "&quot;",
59 ];
83c7162d
XL
60 for sub in REPL_SUB {
61 content = content.replace(sub, "");
62 }
63
64 // Remove spaces and hashes indicating a header
dc9dc135 65 let trimmed = content.trim().trim_start_matches('#').trim();
83c7162d
XL
66
67 normalize_id(trimmed)
68}
69
416331ca
XL
70/// Fix links to the correct location.
71///
72/// This adjusts links, such as turning `.md` extensions to `.html`.
73///
74/// `path` is the path to the page being rendered relative to the root of the
75/// book. This is used for the `print.html` page so that links on the print
76/// page go to the original location. Normal page rendering sets `path` to
77/// None. Ideally, print page links would link to anchors on the print page,
78/// but that is very difficult.
79fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
9fa01778 80 lazy_static! {
dc9dc135 81 static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
9fa01778
XL
82 static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
83 }
84
416331ca
XL
85 fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
86 if dest.starts_with('#') {
87 // Fragment-only link.
88 if let Some(path) = path {
89 let mut base = path.display().to_string();
90 if base.ends_with(".md") {
91 base.replace_range(base.len() - 3.., ".html");
92 }
93 return format!("{}{}", base, dest).into();
94 } else {
95 return dest;
96 }
97 }
dc9dc135
XL
98 // Don't modify links with schemes like `https`.
99 if !SCHEME_LINK.is_match(&dest) {
100 // This is a relative link, adjust it as necessary.
101 let mut fixed_link = String::new();
416331ca
XL
102 if let Some(path) = path {
103 let base = path
104 .parent()
105 .expect("path can't be empty")
106 .to_str()
107 .expect("utf-8 paths only");
108 if !base.is_empty() {
109 write!(fixed_link, "{}/", base).unwrap();
110 }
dc9dc135 111 }
9fa01778 112
dc9dc135
XL
113 if let Some(caps) = MD_LINK.captures(&dest) {
114 fixed_link.push_str(&caps["link"]);
115 fixed_link.push_str(".html");
116 if let Some(anchor) = caps.name("anchor") {
117 fixed_link.push_str(anchor.as_str());
9fa01778 118 }
dc9dc135
XL
119 } else {
120 fixed_link.push_str(&dest);
121 };
122 return CowStr::from(fixed_link);
123 }
124 dest
125 }
9fa01778 126
416331ca
XL
127 fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
128 // This is a terrible hack, but should be reasonably reliable. Nobody
129 // should ever parse a tag with a regex. However, there isn't anything
130 // in Rust that I know of that is suitable for handling partial html
131 // fragments like those generated by pulldown_cmark.
132 //
133 // There are dozens of HTML tags/attributes that contain paths, so
134 // feel free to add more tags if desired; these are the only ones I
135 // care about right now.
136 lazy_static! {
137 static ref HTML_LINK: Regex =
138 Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
139 }
140
141 HTML_LINK
142 .replace_all(&html, |caps: &regex::Captures<'_>| {
143 let fixed = fix(caps[2].into(), path);
144 format!("{}{}\"", &caps[1], fixed)
145 })
146 .into_owned()
147 .into()
148 }
149
dc9dc135
XL
150 match event {
151 Event::Start(Tag::Link(link_type, dest, title)) => {
416331ca 152 Event::Start(Tag::Link(link_type, fix(dest, path), title))
dc9dc135
XL
153 }
154 Event::Start(Tag::Image(link_type, dest, title)) => {
416331ca 155 Event::Start(Tag::Image(link_type, fix(dest, path), title))
9fa01778 156 }
416331ca
XL
157 Event::Html(html) => Event::Html(fix_html(html, path)),
158 Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
9fa01778
XL
159 _ => event,
160 }
161}
162
2c00a5a8 163/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
ea8adc8c 164pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
416331ca 165 render_markdown_with_path(text, curly_quotes, None)
9fa01778
XL
166}
167
dc9dc135 168pub fn new_cmark_parser(text: &str) -> Parser<'_> {
ea8adc8c 169 let mut opts = Options::empty();
dc9dc135
XL
170 opts.insert(Options::ENABLE_TABLES);
171 opts.insert(Options::ENABLE_FOOTNOTES);
172 opts.insert(Options::ENABLE_STRIKETHROUGH);
173 opts.insert(Options::ENABLE_TASKLISTS);
174 Parser::new_ext(text, opts)
175}
ea8adc8c 176
416331ca 177pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
dc9dc135
XL
178 let mut s = String::with_capacity(text.len() * 3 / 2);
179 let p = new_cmark_parser(text);
ea8adc8c 180 let mut converter = EventQuoteConverter::new(curly_quotes);
9fa01778
XL
181 let events = p
182 .map(clean_codeblock_headers)
416331ca 183 .map(|event| adjust_links(event, path))
9fa01778 184 .map(|event| converter.convert(event));
ea8adc8c
XL
185
186 html::push_html(&mut s, events);
187 s
188}
189
190struct EventQuoteConverter {
191 enabled: bool,
192 convert_text: bool,
193}
194
195impl EventQuoteConverter {
196 fn new(enabled: bool) -> Self {
2c00a5a8 197 EventQuoteConverter {
9fa01778 198 enabled,
2c00a5a8
XL
199 convert_text: true,
200 }
ea8adc8c
XL
201 }
202
203 fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
204 if !self.enabled {
205 return event;
206 }
207
208 match event {
dc9dc135 209 Event::Start(Tag::CodeBlock(_)) => {
ea8adc8c
XL
210 self.convert_text = false;
211 event
2c00a5a8 212 }
dc9dc135 213 Event::End(Tag::CodeBlock(_)) => {
ea8adc8c
XL
214 self.convert_text = true;
215 event
2c00a5a8
XL
216 }
217 Event::Text(ref text) if self.convert_text => {
dc9dc135 218 Event::Text(CowStr::from(convert_quotes_to_curly(text)))
2c00a5a8 219 }
ea8adc8c
XL
220 _ => event,
221 }
222 }
223}
224
dc9dc135 225fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
ea8adc8c
XL
226 match event {
227 Event::Start(Tag::CodeBlock(ref info)) => {
2c00a5a8 228 let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
ea8adc8c 229
dc9dc135 230 Event::Start(Tag::CodeBlock(CowStr::from(info)))
2c00a5a8 231 }
ea8adc8c
XL
232 _ => event,
233 }
234}
235
ea8adc8c
XL
236fn convert_quotes_to_curly(original_text: &str) -> String {
237 // We'll consider the start to be "whitespace".
238 let mut preceded_by_whitespace = true;
239
9fa01778
XL
240 original_text
241 .chars()
242 .map(|original_char| {
243 let converted_char = match original_char {
244 '\'' => {
245 if preceded_by_whitespace {
246 '‘'
247 } else {
248 '’'
249 }
2c00a5a8 250 }
9fa01778
XL
251 '"' => {
252 if preceded_by_whitespace {
253 '“'
254 } else {
255 '”'
256 }
2c00a5a8 257 }
9fa01778
XL
258 _ => original_char,
259 };
2c00a5a8 260
9fa01778 261 preceded_by_whitespace = original_char.is_whitespace();
2c00a5a8 262
9fa01778 263 converted_char
dc9dc135
XL
264 })
265 .collect()
2c00a5a8 266}
ea8adc8c 267
2c00a5a8
XL
268/// Prints a "backtrace" of some `Error`.
269pub fn log_backtrace(e: &Error) {
270 error!("Error: {}", e);
ea8adc8c 271
2c00a5a8
XL
272 for cause in e.iter().skip(1) {
273 error!("\tCaused By: {}", cause);
274 }
ea8adc8c
XL
275}
276
277#[cfg(test)]
278mod tests {
279 mod render_markdown {
280 use super::super::render_markdown;
281
9fa01778
XL
282 #[test]
283 fn preserves_external_links() {
284 assert_eq!(
285 render_markdown("[example](https://www.rust-lang.org/)", false),
286 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
287 );
288 }
289
290 #[test]
291 fn it_can_adjust_markdown_links() {
292 assert_eq!(
293 render_markdown("[example](example.md)", false),
294 "<p><a href=\"example.html\">example</a></p>\n"
295 );
296 assert_eq!(
297 render_markdown("[example_anchor](example.md#anchor)", false),
298 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
299 );
300
301 // this anchor contains 'md' inside of it
302 assert_eq!(
303 render_markdown("[phantom data](foo.html#phantomdata)", false),
304 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
305 );
306 }
307
ea8adc8c
XL
308 #[test]
309 fn it_can_keep_quotes_straight() {
310 assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
311 }
312
313 #[test]
314 fn it_can_make_quotes_curly_except_when_they_are_in_code() {
315 let input = r#"
316'one'
317```
318'two'
319```
320`'three'` 'four'"#;
321 let expected = r#"<p>‘one’</p>
322<pre><code>'two'
323</code></pre>
324<p><code>'three'</code> ‘four’</p>
325"#;
326 assert_eq!(render_markdown(input, true), expected);
327 }
328
329 #[test]
330 fn whitespace_outside_of_codeblock_header_is_preserved() {
331 let input = r#"
332some text with spaces
333```rust
334fn main() {
335// code inside is unchanged
336}
337```
338more text with spaces
339"#;
340
341 let expected = r#"<p>some text with spaces</p>
342<pre><code class="language-rust">fn main() {
343// code inside is unchanged
344}
345</code></pre>
346<p>more text with spaces</p>
347"#;
348 assert_eq!(render_markdown(input, false), expected);
349 assert_eq!(render_markdown(input, true), expected);
350 }
351
352 #[test]
353 fn rust_code_block_properties_are_passed_as_space_delimited_class() {
354 let input = r#"
355```rust,no_run,should_panic,property_3
356```
357"#;
358
2c00a5a8
XL
359 let expected =
360 r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
ea8adc8c
XL
361"#;
362 assert_eq!(render_markdown(input, false), expected);
363 assert_eq!(render_markdown(input, true), expected);
364 }
365
366 #[test]
367 fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
368 let input = r#"
369```rust, no_run,,,should_panic , ,property_3
370```
371"#;
372
2c00a5a8
XL
373 let expected =
374 r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
ea8adc8c
XL
375"#;
376 assert_eq!(render_markdown(input, false), expected);
377 assert_eq!(render_markdown(input, true), expected);
378 }
379
380 #[test]
381 fn rust_code_block_without_properties_has_proper_html_class() {
382 let input = r#"
2c00a5a8 383```rust
ea8adc8c
XL
384```
385"#;
386
387 let expected = r#"<pre><code class="language-rust"></code></pre>
388"#;
389 assert_eq!(render_markdown(input, false), expected);
390 assert_eq!(render_markdown(input, true), expected);
391
392 let input = r#"
393```rust
394```
395"#;
396 assert_eq!(render_markdown(input, false), expected);
397 assert_eq!(render_markdown(input, true), expected);
ea8adc8c
XL
398 }
399 }
400
83c7162d
XL
401 mod html_munging {
402 use super::super::{id_from_content, normalize_id};
403
404 #[test]
405 fn it_generates_anchors() {
9fa01778
XL
406 assert_eq!(
407 id_from_content("## Method-call expressions"),
408 "method-call-expressions"
409 );
410 assert_eq!(id_from_content("## **Bold** title"), "bold-title");
411 assert_eq!(id_from_content("## `Code` title"), "code-title");
412 }
413
414 #[test]
415 fn it_generates_anchors_from_non_ascii_initial() {
416 assert_eq!(
417 id_from_content("## `--passes`: add more rustdoc passes"),
418 "--passes-add-more-rustdoc-passes"
419 );
420 assert_eq!(
421 id_from_content("## 中文標題 CJK title"),
422 "中文標題-cjk-title"
423 );
424 assert_eq!(id_from_content("## Über"), "Über");
83c7162d
XL
425 }
426
427 #[test]
428 fn it_normalizes_ids() {
9fa01778
XL
429 assert_eq!(
430 normalize_id("`--passes`: add more rustdoc passes"),
431 "--passes-add-more-rustdoc-passes"
432 );
433 assert_eq!(
434 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
435 "method-call--expressions-"
436 );
437 assert_eq!(normalize_id("_-_12345"), "_-_12345");
438 assert_eq!(normalize_id("12345"), "12345");
439 assert_eq!(normalize_id("中文"), "中文");
440 assert_eq!(normalize_id("にほんご"), "にほんご");
441 assert_eq!(normalize_id("한국어"), "한국어");
83c7162d
XL
442 assert_eq!(normalize_id(""), "");
443 }
444 }
445
ea8adc8c
XL
446 mod convert_quotes_to_curly {
447 use super::super::convert_quotes_to_curly;
448
449 #[test]
450 fn it_converts_single_quotes() {
9fa01778
XL
451 assert_eq!(
452 convert_quotes_to_curly("'one', 'two'"),
453 "‘one’, ‘two’"
454 );
ea8adc8c
XL
455 }
456
457 #[test]
458 fn it_converts_double_quotes() {
9fa01778
XL
459 assert_eq!(
460 convert_quotes_to_curly(r#""one", "two""#),
461 "“one”, “two”"
462 );
ea8adc8c
XL
463 }
464
465 #[test]
466 fn it_treats_tab_as_whitespace() {
467 assert_eq!(convert_quotes_to_curly("\t'one'"), "\t‘one’");
468 }
469 }
470}