vendor/mdbook/src/utils/mod.rs

   1 #![allow(missing_docs)] // FIXME: Document this
   2
   3 pub mod fs;
   4 mod string;
   5 pub(crate) mod toml_ext;
   6 use crate::errors::Error;
   7 use regex::Regex;
   8
   9 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
  10
  11 use std::borrow::Cow;
  12 use std::fmt::Write;
  13 use std::path::Path;
  14
  15 pub use self::string::{
  16     take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
  17     take_rustdoc_include_lines,
  18 };
  19
  20 /// Replaces multiple consecutive whitespace characters with a single space character.
  21 pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
  22     lazy_static! {
  23         static ref RE: Regex = Regex::new(r"\s\s+").unwrap();
  24     }
  25     RE.replace_all(text, " ")
  26 }
  27
  28 /// Convert the given string to a valid HTML element ID.
  29 /// The only restriction is that the ID must not contain any ASCII whitespace.
  30 pub fn normalize_id(content: &str) -> String {
  31     content
  32         .chars()
  33         .filter_map(|ch| {
  34             if ch.is_alphanumeric() || ch == '_' || ch == '-' {
  35                 Some(ch.to_ascii_lowercase())
  36             } else if ch.is_whitespace() {
  37                 Some('-')
  38             } else {
  39                 None
  40             }
  41         })
  42         .collect::<String>()
  43 }
  44
  45 /// Generate an ID for use with anchors which is derived from a "normalised"
  46 /// string.
  47 pub fn id_from_content(content: &str) -> String {
  48     let mut content = content.to_string();
  49
  50     // Skip any tags or html-encoded stuff
  51     const REPL_SUB: &[&str] = &[
  52         "<em>",
  53         "</em>",
  54         "<code>",
  55         "</code>",
  56         "<strong>",
  57         "</strong>",
  58         "&lt;",
  59         "&gt;",
  60         "&amp;",
  61         "&#39;",
  62         "&quot;",
  63     ];
  64     for sub in REPL_SUB {
  65         content = content.replace(sub, "");
  66     }
  67
  68     // Remove spaces and hashes indicating a header
  69     let trimmed = content.trim().trim_start_matches('#').trim();
  70
  71     normalize_id(trimmed)
  72 }
  73
  74 /// Fix links to the correct location.
  75 ///
  76 /// This adjusts links, such as turning `.md` extensions to `.html`.
  77 ///
  78 /// `path` is the path to the page being rendered relative to the root of the
  79 /// book. This is used for the `print.html` page so that links on the print
  80 /// page go to the original location. Normal page rendering sets `path` to
  81 /// None. Ideally, print page links would link to anchors on the print page,
  82 /// but that is very difficult.
  83 fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
  84     lazy_static! {
  85         static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
  86         static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
  87     }
  88
  89     fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
  90         if dest.starts_with('#') {
  91             // Fragment-only link.
  92             if let Some(path) = path {
  93                 let mut base = path.display().to_string();
  94                 if base.ends_with(".md") {
  95                     base.replace_range(base.len() - 3.., ".html");
  96                 }
  97                 return format!("{}{}", base, dest).into();
  98             } else {
  99                 return dest;
 100             }
 101         }
 102         // Don't modify links with schemes like `https`.
 103         if !SCHEME_LINK.is_match(&dest) {
 104             // This is a relative link, adjust it as necessary.
 105             let mut fixed_link = String::new();
 106             if let Some(path) = path {
 107                 let base = path
 108                     .parent()
 109                     .expect("path can't be empty")
 110                     .to_str()
 111                     .expect("utf-8 paths only");
 112                 if !base.is_empty() {
 113                     write!(fixed_link, "{}/", base).unwrap();
 114                 }
 115             }
 116
 117             if let Some(caps) = MD_LINK.captures(&dest) {
 118                 fixed_link.push_str(&caps["link"]);
 119                 fixed_link.push_str(".html");
 120                 if let Some(anchor) = caps.name("anchor") {
 121                     fixed_link.push_str(anchor.as_str());
 122                 }
 123             } else {
 124                 fixed_link.push_str(&dest);
 125             };
 126             return CowStr::from(fixed_link);
 127         }
 128         dest
 129     }
 130
 131     fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
 132         // This is a terrible hack, but should be reasonably reliable. Nobody
 133         // should ever parse a tag with a regex. However, there isn't anything
 134         // in Rust that I know of that is suitable for handling partial html
 135         // fragments like those generated by pulldown_cmark.
 136         //
 137         // There are dozens of HTML tags/attributes that contain paths, so
 138         // feel free to add more tags if desired; these are the only ones I
 139         // care about right now.
 140         lazy_static! {
 141             static ref HTML_LINK: Regex =
 142                 Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
 143         }
 144
 145         HTML_LINK
 146             .replace_all(&html, |caps: &regex::Captures<'_>| {
 147                 let fixed = fix(caps[2].into(), path);
 148                 format!("{}{}\"", &caps[1], fixed)
 149             })
 150             .into_owned()
 151             .into()
 152     }
 153
 154     match event {
 155         Event::Start(Tag::Link(link_type, dest, title)) => {
 156             Event::Start(Tag::Link(link_type, fix(dest, path), title))
 157         }
 158         Event::Start(Tag::Image(link_type, dest, title)) => {
 159             Event::Start(Tag::Image(link_type, fix(dest, path), title))
 160         }
 161         Event::Html(html) => Event::Html(fix_html(html, path)),
 162         _ => event,
 163     }
 164 }
 165
 166 /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
 167 pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
 168     render_markdown_with_path(text, curly_quotes, None)
 169 }
 170
 171 pub fn new_cmark_parser(text: &str) -> Parser<'_> {
 172     let mut opts = Options::empty();
 173     opts.insert(Options::ENABLE_TABLES);
 174     opts.insert(Options::ENABLE_FOOTNOTES);
 175     opts.insert(Options::ENABLE_STRIKETHROUGH);
 176     opts.insert(Options::ENABLE_TASKLISTS);
 177     Parser::new_ext(text, opts)
 178 }
 179
 180 pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
 181     let mut s = String::with_capacity(text.len() * 3 / 2);
 182     let p = new_cmark_parser(text);
 183     let mut converter = EventQuoteConverter::new(curly_quotes);
 184     let events = p
 185         .map(clean_codeblock_headers)
 186         .map(|event| adjust_links(event, path))
 187         .map(|event| converter.convert(event));
 188
 189     html::push_html(&mut s, events);
 190     s
 191 }
 192
 193 struct EventQuoteConverter {
 194     enabled: bool,
 195     convert_text: bool,
 196 }
 197
 198 impl EventQuoteConverter {
 199     fn new(enabled: bool) -> Self {
 200         EventQuoteConverter {
 201             enabled,
 202             convert_text: true,
 203         }
 204     }
 205
 206     fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
 207         if !self.enabled {
 208             return event;
 209         }
 210
 211         match event {
 212             Event::Start(Tag::CodeBlock(_)) => {
 213                 self.convert_text = false;
 214                 event
 215             }
 216             Event::End(Tag::CodeBlock(_)) => {
 217                 self.convert_text = true;
 218                 event
 219             }
 220             Event::Text(ref text) if self.convert_text => {
 221                 Event::Text(CowStr::from(convert_quotes_to_curly(text)))
 222             }
 223             _ => event,
 224         }
 225     }
 226 }
 227
 228 fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
 229     match event {
 230         Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
 231             let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
 232
 233             Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
 234         }
 235         _ => event,
 236     }
 237 }
 238
 239 fn convert_quotes_to_curly(original_text: &str) -> String {
 240     // We'll consider the start to be "whitespace".
 241     let mut preceded_by_whitespace = true;
 242
 243     original_text
 244         .chars()
 245         .map(|original_char| {
 246             let converted_char = match original_char {
 247                 '\'' => {
 248                     if preceded_by_whitespace {
 249                         '‘'
 250                     } else {
 251                         '’'
 252                     }
 253                 }
 254                 '"' => {
 255                     if preceded_by_whitespace {
 256                         '“'
 257                     } else {
 258                         '”'
 259                     }
 260                 }
 261                 _ => original_char,
 262             };
 263
 264             preceded_by_whitespace = original_char.is_whitespace();
 265
 266             converted_char
 267         })
 268         .collect()
 269 }
 270
 271 /// Prints a "backtrace" of some `Error`.
 272 pub fn log_backtrace(e: &Error) {
 273     error!("Error: {}", e);
 274
 275     for cause in e.chain().skip(1) {
 276         error!("\tCaused By: {}", cause);
 277     }
 278 }
 279
 280 #[cfg(test)]
 281 mod tests {
 282     mod render_markdown {
 283         use super::super::render_markdown;
 284
 285         #[test]
 286         fn preserves_external_links() {
 287             assert_eq!(
 288                 render_markdown("[example](https://www.rust-lang.org/)", false),
 289                 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
 290             );
 291         }
 292
 293         #[test]
 294         fn it_can_adjust_markdown_links() {
 295             assert_eq!(
 296                 render_markdown("[example](example.md)", false),
 297                 "<p><a href=\"example.html\">example</a></p>\n"
 298             );
 299             assert_eq!(
 300                 render_markdown("[example_anchor](example.md#anchor)", false),
 301                 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
 302             );
 303
 304             // this anchor contains 'md' inside of it
 305             assert_eq!(
 306                 render_markdown("[phantom data](foo.html#phantomdata)", false),
 307                 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
 308             );
 309         }
 310
 311         #[test]
 312         fn it_can_keep_quotes_straight() {
 313             assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
 314         }
 315
 316         #[test]
 317         fn it_can_make_quotes_curly_except_when_they_are_in_code() {
 318             let input = r#"
 319 'one'
 320 ```
 321 'two'
 322 ```
 323 `'three'` 'four'"#;
 324             let expected = r#"<p>‘one’</p>
 325 <pre><code>'two'
 326 </code></pre>
 327 <p><code>'three'</code> ‘four’</p>
 328 "#;
 329             assert_eq!(render_markdown(input, true), expected);
 330         }
 331
 332         #[test]
 333         fn whitespace_outside_of_codeblock_header_is_preserved() {
 334             let input = r#"
 335 some text with spaces
 336 ```rust
 337 fn main() {
 338 // code inside is unchanged
 339 }
 340 ```
 341 more text with spaces
 342 "#;
 343
 344             let expected = r#"<p>some text with spaces</p>
 345 <pre><code class="language-rust">fn main() {
 346 // code inside is unchanged
 347 }
 348 </code></pre>
 349 <p>more text with spaces</p>
 350 "#;
 351             assert_eq!(render_markdown(input, false), expected);
 352             assert_eq!(render_markdown(input, true), expected);
 353         }
 354
 355         #[test]
 356         fn rust_code_block_properties_are_passed_as_space_delimited_class() {
 357             let input = r#"
 358 ```rust,no_run,should_panic,property_3
 359 ```
 360 "#;
 361
 362             let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
 363 "#;
 364             assert_eq!(render_markdown(input, false), expected);
 365             assert_eq!(render_markdown(input, true), expected);
 366         }
 367
 368         #[test]
 369         fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
 370             let input = r#"
 371 ```rust,    no_run,,,should_panic , ,property_3
 372 ```
 373 "#;
 374
 375             let expected = r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
 376 "#;
 377             assert_eq!(render_markdown(input, false), expected);
 378             assert_eq!(render_markdown(input, true), expected);
 379         }
 380
 381         #[test]
 382         fn rust_code_block_without_properties_has_proper_html_class() {
 383             let input = r#"
 384 ```rust
 385 ```
 386 "#;
 387
 388             let expected = r#"<pre><code class="language-rust"></code></pre>
 389 "#;
 390             assert_eq!(render_markdown(input, false), expected);
 391             assert_eq!(render_markdown(input, true), expected);
 392
 393             let input = r#"
 394 ```rust
 395 ```
 396 "#;
 397             assert_eq!(render_markdown(input, false), expected);
 398             assert_eq!(render_markdown(input, true), expected);
 399         }
 400     }
 401
 402     mod html_munging {
 403         use super::super::{id_from_content, normalize_id};
 404
 405         #[test]
 406         fn it_generates_anchors() {
 407             assert_eq!(
 408                 id_from_content("## Method-call expressions"),
 409                 "method-call-expressions"
 410             );
 411             assert_eq!(id_from_content("## **Bold** title"), "bold-title");
 412             assert_eq!(id_from_content("## `Code` title"), "code-title");
 413         }
 414
 415         #[test]
 416         fn it_generates_anchors_from_non_ascii_initial() {
 417             assert_eq!(
 418                 id_from_content("## `--passes`: add more rustdoc passes"),
 419                 "--passes-add-more-rustdoc-passes"
 420             );
 421             assert_eq!(
 422                 id_from_content("## 中文標題 CJK title"),
 423                 "中文標題-cjk-title"
 424             );
 425             assert_eq!(id_from_content("## Über"), "Über");
 426         }
 427
 428         #[test]
 429         fn it_normalizes_ids() {
 430             assert_eq!(
 431                 normalize_id("`--passes`: add more rustdoc passes"),
 432                 "--passes-add-more-rustdoc-passes"
 433             );
 434             assert_eq!(
 435                 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
 436                 "method-call--expressions-"
 437             );
 438             assert_eq!(normalize_id("_-_12345"), "_-_12345");
 439             assert_eq!(normalize_id("12345"), "12345");
 440             assert_eq!(normalize_id("中文"), "中文");
 441             assert_eq!(normalize_id("にほんご"), "にほんご");
 442             assert_eq!(normalize_id("한국어"), "한국어");
 443             assert_eq!(normalize_id(""), "");
 444         }
 445     }
 446
 447     mod convert_quotes_to_curly {
 448         use super::super::convert_quotes_to_curly;
 449
 450         #[test]
 451         fn it_converts_single_quotes() {
 452             assert_eq!(convert_quotes_to_curly("'one', 'two'"), "‘one’, ‘two’");
 453         }
 454
 455         #[test]
 456         fn it_converts_double_quotes() {
 457             assert_eq!(convert_quotes_to_curly(r#""one", "two""#), "“one”, “two”");
 458         }
 459
 460         #[test]
 461         fn it_treats_tab_as_whitespace() {
 462             assert_eq!(convert_quotes_to_curly("\t'one'"), "\t‘one’");
 463         }
 464     }
 465 }