1 #![allow(missing_docs)] // FIXME: Document this
5 pub(crate) mod toml_ext
;
6 use crate::errors
::Error
;
9 use pulldown_cmark
::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag}
;
15 pub use self::string
::{
16 take_anchored_lines
, take_lines
, take_rustdoc_include_anchored_lines
,
17 take_rustdoc_include_lines
,
20 /// Replaces multiple consecutive whitespace characters with a single space character.
21 pub fn collapse_whitespace(text
: &str) -> Cow
<'_
, str> {
23 static ref RE
: Regex
= Regex
::new(r
"\s\s+").unwrap();
25 RE
.replace_all(text
, " ")
28 /// Convert the given string to a valid HTML element ID.
29 /// The only restriction is that the ID must not contain any ASCII whitespace.
30 pub fn normalize_id(content
: &str) -> String
{
34 if ch
.is_alphanumeric() || ch
== '_'
|| ch
== '
-'
{
35 Some(ch
.to_ascii_lowercase())
36 } else if ch
.is_whitespace() {
45 /// Generate an ID for use with anchors which is derived from a "normalised"
47 pub fn id_from_content(content
: &str) -> String
{
48 let mut content
= content
.to_string();
50 // Skip any tags or html-encoded stuff
51 const REPL_SUB
: &[&str] = &[
65 content
= content
.replace(sub
, "");
68 // Remove spaces and hashes indicating a header
69 let trimmed
= content
.trim().trim_start_matches('
#').trim();
74 /// Fix links to the correct location.
76 /// This adjusts links, such as turning `.md` extensions to `.html`.
78 /// `path` is the path to the page being rendered relative to the root of the
79 /// book. This is used for the `print.html` page so that links on the print
80 /// page go to the original location. Normal page rendering sets `path` to
81 /// None. Ideally, print page links would link to anchors on the print page,
82 /// but that is very difficult.
83 fn adjust_links
<'a
>(event
: Event
<'a
>, path
: Option
<&Path
>) -> Event
<'a
> {
85 static ref SCHEME_LINK
: Regex
= Regex
::new(r
"^[a-z][a-z0-9+.-]*:").unwrap();
86 static ref MD_LINK
: Regex
= Regex
::new(r
"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
89 fn fix
<'a
>(dest
: CowStr
<'a
>, path
: Option
<&Path
>) -> CowStr
<'a
> {
90 if dest
.starts_with('
#') {
91 // Fragment-only link.
92 if let Some(path
) = path
{
93 let mut base
= path
.display().to_string();
94 if base
.ends_with(".md") {
95 base
.replace_range(base
.len() - 3.., ".html");
97 return format
!("{}{}", base
, dest
).into();
102 // Don't modify links with schemes like `https`.
103 if !SCHEME_LINK
.is_match(&dest
) {
104 // This is a relative link, adjust it as necessary.
105 let mut fixed_link
= String
::new();
106 if let Some(path
) = path
{
109 .expect("path can't be empty")
111 .expect("utf-8 paths only");
112 if !base
.is_empty() {
113 write
!(fixed_link
, "{}/", base
).unwrap();
117 if let Some(caps
) = MD_LINK
.captures(&dest
) {
118 fixed_link
.push_str(&caps
["link"]);
119 fixed_link
.push_str(".html");
120 if let Some(anchor
) = caps
.name("anchor") {
121 fixed_link
.push_str(anchor
.as_str());
124 fixed_link
.push_str(&dest
);
126 return CowStr
::from(fixed_link
);
131 fn fix_html
<'a
>(html
: CowStr
<'a
>, path
: Option
<&Path
>) -> CowStr
<'a
> {
132 // This is a terrible hack, but should be reasonably reliable. Nobody
133 // should ever parse a tag with a regex. However, there isn't anything
134 // in Rust that I know of that is suitable for handling partial html
135 // fragments like those generated by pulldown_cmark.
137 // There are dozens of HTML tags/attributes that contain paths, so
138 // feel free to add more tags if desired; these are the only ones I
139 // care about right now.
141 static ref HTML_LINK
: Regex
=
142 Regex
::new(r
#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
146 .replace_all(&html, |caps: ®ex::Captures<'_>| {
147 let fixed = fix(caps[2].into(), path);
148 format!("{}{}\"", &caps[1], fixed)
155 Event::Start(Tag::Link(link_type, dest, title)) => {
156 Event::Start(Tag::Link(link_type, fix(dest, path), title))
158 Event::Start(Tag::Image(link_type, dest, title)) => {
159 Event::Start(Tag::Image(link_type, fix(dest, path), title))
161 Event::Html(html) => Event::Html(fix_html(html, path)),
166 /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
167 pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
168 render_markdown_with_path(text, curly_quotes, None)
171 pub fn new_cmark_parser(text: &str) -> Parser<'_> {
172 let mut opts = Options::empty();
173 opts.insert(Options::ENABLE_TABLES);
174 opts.insert(Options::ENABLE_FOOTNOTES);
175 opts.insert(Options::ENABLE_STRIKETHROUGH);
176 opts.insert(Options::ENABLE_TASKLISTS);
177 Parser::new_ext(text, opts)
180 pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
181 let mut s = String::with_capacity(text.len() * 3 / 2);
182 let p = new_cmark_parser(text);
183 let mut converter = EventQuoteConverter::new(curly_quotes);
185 .map(clean_codeblock_headers)
186 .map(|event| adjust_links(event, path))
187 .map(|event| converter.convert(event));
189 html::push_html(&mut s, events);
193 struct EventQuoteConverter {
198 impl EventQuoteConverter {
199 fn new(enabled: bool) -> Self {
200 EventQuoteConverter {
206 fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
212 Event::Start(Tag::CodeBlock(_)) => {
213 self.convert_text = false;
216 Event::End(Tag::CodeBlock(_)) => {
217 self.convert_text = true;
220 Event::Text(ref text) if self.convert_text => {
221 Event::Text(CowStr::from(convert_quotes_to_curly(text)))
228 fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
230 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
231 let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
233 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
239 fn convert_quotes_to_curly(original_text: &str) -> String {
240 // We'll consider the start to be "whitespace".
241 let mut preceded_by_whitespace = true;
245 .map(|original_char| {
246 let converted_char = match original_char {
248 if preceded_by_whitespace {
255 if preceded_by_whitespace
{
264 preceded_by_whitespace
= original_char
.is_whitespace();
271 /// Prints a "backtrace" of some `Error`.
272 pub fn log_backtrace(e
: &Error
) {
273 error
!("Error: {}", e
);
275 for cause
in e
.chain().skip(1) {
276 error
!("\tCaused By: {}", cause
);
282 mod render_markdown
{
283 use super::super::render_markdown
;
286 fn preserves_external_links() {
288 render_markdown("[example](https://www.rust-lang.org/)", false),
289 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
294 fn it_can_adjust_markdown_links() {
296 render_markdown("[example](example.md)", false),
297 "<p><a href=\"example.html\">example</a></p>\n"
300 render_markdown("[example_anchor](example.md#anchor)", false),
301 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
304 // this anchor contains 'md' inside of it
306 render_markdown("[phantom data](foo.html#phantomdata)", false),
307 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
312 fn it_can_keep_quotes_straight() {
313 assert_eq
!(render_markdown("'one'", false), "<p>'one'</p>\n");
317 fn it_can_make_quotes_curly_except_when_they_are_in_code() {
324 let expected
= r
#"<p>‘one’</p>
327 <p><code>'three'</code> ‘four’</p>
329 assert_eq
!(render_markdown(input
, true), expected
);
333 fn whitespace_outside_of_codeblock_header_is_preserved() {
335 some text with spaces
338 // code inside is unchanged
341 more text with spaces
344 let expected
= r
#"<p>some text with spaces</p>
345 <pre><code class="language-rust">fn main() {
346 // code inside is unchanged
349 <p>more text with spaces</p>
351 assert_eq
!(render_markdown(input
, false), expected
);
352 assert_eq
!(render_markdown(input
, true), expected
);
356 fn rust_code_block_properties_are_passed_as_space_delimited_class() {
358 ```rust,no_run,should_panic,property_3
362 let expected
= r
#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
364 assert_eq
!(render_markdown(input
, false), expected
);
365 assert_eq
!(render_markdown(input
, true), expected
);
369 fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
371 ```rust, no_run,,,should_panic , ,property_3
375 let expected
= r
#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
377 assert_eq
!(render_markdown(input
, false), expected
);
378 assert_eq
!(render_markdown(input
, true), expected
);
382 fn rust_code_block_without_properties_has_proper_html_class() {
388 let expected
= r
#"<pre><code class="language-rust"></code></pre>
390 assert_eq
!(render_markdown(input
, false), expected
);
391 assert_eq
!(render_markdown(input
, true), expected
);
397 assert_eq
!(render_markdown(input
, false), expected
);
398 assert_eq
!(render_markdown(input
, true), expected
);
403 use super::super::{id_from_content, normalize_id}
;
406 fn it_generates_anchors() {
408 id_from_content("## Method-call expressions"),
409 "method-call-expressions"
411 assert_eq
!(id_from_content("## **Bold** title"), "bold-title");
412 assert_eq
!(id_from_content("## `Code` title"), "code-title");
416 fn it_generates_anchors_from_non_ascii_initial() {
418 id_from_content("## `--passes`: add more rustdoc passes"),
419 "--passes-add-more-rustdoc-passes"
422 id_from_content("## 中文標題 CJK title"),
425 assert_eq
!(id_from_content("## Über"), "Über");
429 fn it_normalizes_ids() {
431 normalize_id("`--passes`: add more rustdoc passes"),
432 "--passes-add-more-rustdoc-passes"
435 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
436 "method-call--expressions-"
438 assert_eq
!(normalize_id("_-_12345"), "_-_12345");
439 assert_eq
!(normalize_id("12345"), "12345");
440 assert_eq
!(normalize_id("中文"), "中文");
441 assert_eq
!(normalize_id("にほんご"), "にほんご");
442 assert_eq
!(normalize_id("한국어"), "한국어");
443 assert_eq
!(normalize_id(""), "");
447 mod convert_quotes_to_curly
{
448 use super::super::convert_quotes_to_curly
;
451 fn it_converts_single_quotes() {
452 assert_eq
!(convert_quotes_to_curly("'one', 'two'"), "‘one’, ‘two’");
456 fn it_converts_double_quotes() {
457 assert_eq
!(convert_quotes_to_curly(r
#""one", "two""#), "“one”, “two”");
461 fn it_treats_tab_as_whitespace() {
462 assert_eq
!(convert_quotes_to_curly("\t'one'"), "\t‘one’");