]>
Commit | Line | Data |
---|---|---|
1 | #![allow(missing_docs)] // FIXME: Document this | |
2 | ||
3 | pub mod fs; | |
4 | mod string; | |
5 | use crate::errors::Error; | |
6 | use regex::Regex; | |
7 | ||
8 | use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag}; | |
9 | ||
10 | use std::borrow::Cow; | |
11 | use std::fmt::Write; | |
12 | use std::path::Path; | |
13 | ||
14 | pub use self::string::{take_anchored_lines, take_lines}; | |
15 | ||
16 | /// Replaces multiple consecutive whitespace characters with a single space character. | |
17 | pub fn collapse_whitespace(text: &str) -> Cow<'_, str> { | |
18 | lazy_static! { | |
19 | static ref RE: Regex = Regex::new(r"\s\s+").unwrap(); | |
20 | } | |
21 | RE.replace_all(text, " ") | |
22 | } | |
23 | ||
24 | /// Convert the given string to a valid HTML element ID. | |
25 | /// The only restriction is that the ID must not contain any ASCII whitespace. | |
26 | pub fn normalize_id(content: &str) -> String { | |
27 | content | |
28 | .chars() | |
29 | .filter_map(|ch| { | |
30 | if ch.is_alphanumeric() || ch == '_' || ch == '-' { | |
31 | Some(ch.to_ascii_lowercase()) | |
32 | } else if ch.is_whitespace() { | |
33 | Some('-') | |
34 | } else { | |
35 | None | |
36 | } | |
37 | }) | |
38 | .collect::<String>() | |
39 | } | |
40 | ||
41 | /// Generate an ID for use with anchors which is derived from a "normalised" | |
42 | /// string. | |
43 | pub fn id_from_content(content: &str) -> String { | |
44 | let mut content = content.to_string(); | |
45 | ||
46 | // Skip any tags or html-encoded stuff | |
47 | const REPL_SUB: &[&str] = &[ | |
48 | "<em>", | |
49 | "</em>", | |
50 | "<code>", | |
51 | "</code>", | |
52 | "<strong>", | |
53 | "</strong>", | |
54 | "<", | |
55 | ">", | |
56 | "&", | |
57 | "'", | |
58 | """, | |
59 | ]; | |
60 | for sub in REPL_SUB { | |
61 | content = content.replace(sub, ""); | |
62 | } | |
63 | ||
64 | // Remove spaces and hashes indicating a header | |
65 | let trimmed = content.trim().trim_start_matches('#').trim(); | |
66 | ||
67 | normalize_id(trimmed) | |
68 | } | |
69 | ||
70 | /// Fix links to the correct location. | |
71 | /// | |
72 | /// This adjusts links, such as turning `.md` extensions to `.html`. | |
73 | /// | |
74 | /// `path` is the path to the page being rendered relative to the root of the | |
75 | /// book. This is used for the `print.html` page so that links on the print | |
76 | /// page go to the original location. Normal page rendering sets `path` to | |
77 | /// None. Ideally, print page links would link to anchors on the print page, | |
78 | /// but that is very difficult. | |
79 | fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { | |
80 | lazy_static! { | |
81 | static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap(); | |
82 | static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap(); | |
83 | } | |
84 | ||
85 | fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { | |
86 | if dest.starts_with('#') { | |
87 | // Fragment-only link. | |
88 | if let Some(path) = path { | |
89 | let mut base = path.display().to_string(); | |
90 | if base.ends_with(".md") { | |
91 | base.replace_range(base.len() - 3.., ".html"); | |
92 | } | |
93 | return format!("{}{}", base, dest).into(); | |
94 | } else { | |
95 | return dest; | |
96 | } | |
97 | } | |
98 | // Don't modify links with schemes like `https`. | |
99 | if !SCHEME_LINK.is_match(&dest) { | |
100 | // This is a relative link, adjust it as necessary. | |
101 | let mut fixed_link = String::new(); | |
102 | if let Some(path) = path { | |
103 | let base = path | |
104 | .parent() | |
105 | .expect("path can't be empty") | |
106 | .to_str() | |
107 | .expect("utf-8 paths only"); | |
108 | if !base.is_empty() { | |
109 | write!(fixed_link, "{}/", base).unwrap(); | |
110 | } | |
111 | } | |
112 | ||
113 | if let Some(caps) = MD_LINK.captures(&dest) { | |
114 | fixed_link.push_str(&caps["link"]); | |
115 | fixed_link.push_str(".html"); | |
116 | if let Some(anchor) = caps.name("anchor") { | |
117 | fixed_link.push_str(anchor.as_str()); | |
118 | } | |
119 | } else { | |
120 | fixed_link.push_str(&dest); | |
121 | }; | |
122 | return CowStr::from(fixed_link); | |
123 | } | |
124 | dest | |
125 | } | |
126 | ||
127 | fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { | |
128 | // This is a terrible hack, but should be reasonably reliable. Nobody | |
129 | // should ever parse a tag with a regex. However, there isn't anything | |
130 | // in Rust that I know of that is suitable for handling partial html | |
131 | // fragments like those generated by pulldown_cmark. | |
132 | // | |
133 | // There are dozens of HTML tags/attributes that contain paths, so | |
134 | // feel free to add more tags if desired; these are the only ones I | |
135 | // care about right now. | |
136 | lazy_static! { | |
137 | static ref HTML_LINK: Regex = | |
138 | Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap(); | |
139 | } | |
140 | ||
141 | HTML_LINK | |
142 | .replace_all(&html, |caps: ®ex::Captures<'_>| { | |
143 | let fixed = fix(caps[2].into(), path); | |
144 | format!("{}{}\"", &caps[1], fixed) | |
145 | }) | |
146 | .into_owned() | |
147 | .into() | |
148 | } | |
149 | ||
150 | match event { | |
151 | Event::Start(Tag::Link(link_type, dest, title)) => { | |
152 | Event::Start(Tag::Link(link_type, fix(dest, path), title)) | |
153 | } | |
154 | Event::Start(Tag::Image(link_type, dest, title)) => { | |
155 | Event::Start(Tag::Image(link_type, fix(dest, path), title)) | |
156 | } | |
157 | Event::Html(html) => Event::Html(fix_html(html, path)), | |
158 | Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)), | |
159 | _ => event, | |
160 | } | |
161 | } | |
162 | ||
163 | /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. | |
164 | pub fn render_markdown(text: &str, curly_quotes: bool) -> String { | |
165 | render_markdown_with_path(text, curly_quotes, None) | |
166 | } | |
167 | ||
168 | pub fn new_cmark_parser(text: &str) -> Parser<'_> { | |
169 | let mut opts = Options::empty(); | |
170 | opts.insert(Options::ENABLE_TABLES); | |
171 | opts.insert(Options::ENABLE_FOOTNOTES); | |
172 | opts.insert(Options::ENABLE_STRIKETHROUGH); | |
173 | opts.insert(Options::ENABLE_TASKLISTS); | |
174 | Parser::new_ext(text, opts) | |
175 | } | |
176 | ||
177 | pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String { | |
178 | let mut s = String::with_capacity(text.len() * 3 / 2); | |
179 | let p = new_cmark_parser(text); | |
180 | let mut converter = EventQuoteConverter::new(curly_quotes); | |
181 | let events = p | |
182 | .map(clean_codeblock_headers) | |
183 | .map(|event| adjust_links(event, path)) | |
184 | .map(|event| converter.convert(event)); | |
185 | ||
186 | html::push_html(&mut s, events); | |
187 | s | |
188 | } | |
189 | ||
190 | struct EventQuoteConverter { | |
191 | enabled: bool, | |
192 | convert_text: bool, | |
193 | } | |
194 | ||
195 | impl EventQuoteConverter { | |
196 | fn new(enabled: bool) -> Self { | |
197 | EventQuoteConverter { | |
198 | enabled, | |
199 | convert_text: true, | |
200 | } | |
201 | } | |
202 | ||
203 | fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> { | |
204 | if !self.enabled { | |
205 | return event; | |
206 | } | |
207 | ||
208 | match event { | |
209 | Event::Start(Tag::CodeBlock(_)) => { | |
210 | self.convert_text = false; | |
211 | event | |
212 | } | |
213 | Event::End(Tag::CodeBlock(_)) => { | |
214 | self.convert_text = true; | |
215 | event | |
216 | } | |
217 | Event::Text(ref text) if self.convert_text => { | |
218 | Event::Text(CowStr::from(convert_quotes_to_curly(text))) | |
219 | } | |
220 | _ => event, | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
225 | fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { | |
226 | match event { | |
227 | Event::Start(Tag::CodeBlock(ref info)) => { | |
228 | let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect(); | |
229 | ||
230 | Event::Start(Tag::CodeBlock(CowStr::from(info))) | |
231 | } | |
232 | _ => event, | |
233 | } | |
234 | } | |
235 | ||
236 | fn convert_quotes_to_curly(original_text: &str) -> String { | |
237 | // We'll consider the start to be "whitespace". | |
238 | let mut preceded_by_whitespace = true; | |
239 | ||
240 | original_text | |
241 | .chars() | |
242 | .map(|original_char| { | |
243 | let converted_char = match original_char { | |
244 | '\'' => { | |
245 | if preceded_by_whitespace { | |
246 | '‘' | |
247 | } else { | |
248 | '’' | |
249 | } | |
250 | } | |
251 | '"' => { | |
252 | if preceded_by_whitespace { | |
253 | '“' | |
254 | } else { | |
255 | '”' | |
256 | } | |
257 | } | |
258 | _ => original_char, | |
259 | }; | |
260 | ||
261 | preceded_by_whitespace = original_char.is_whitespace(); | |
262 | ||
263 | converted_char | |
264 | }) | |
265 | .collect() | |
266 | } | |
267 | ||
268 | /// Prints a "backtrace" of some `Error`. | |
269 | pub fn log_backtrace(e: &Error) { | |
270 | error!("Error: {}", e); | |
271 | ||
272 | for cause in e.iter().skip(1) { | |
273 | error!("\tCaused By: {}", cause); | |
274 | } | |
275 | } | |
276 | ||
277 | #[cfg(test)] | |
278 | mod tests { | |
279 | mod render_markdown { | |
280 | use super::super::render_markdown; | |
281 | ||
282 | #[test] | |
283 | fn preserves_external_links() { | |
284 | assert_eq!( | |
285 | render_markdown("[example](https://www.rust-lang.org/)", false), | |
286 | "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n" | |
287 | ); | |
288 | } | |
289 | ||
290 | #[test] | |
291 | fn it_can_adjust_markdown_links() { | |
292 | assert_eq!( | |
293 | render_markdown("[example](example.md)", false), | |
294 | "<p><a href=\"example.html\">example</a></p>\n" | |
295 | ); | |
296 | assert_eq!( | |
297 | render_markdown("[example_anchor](example.md#anchor)", false), | |
298 | "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n" | |
299 | ); | |
300 | ||
301 | // this anchor contains 'md' inside of it | |
302 | assert_eq!( | |
303 | render_markdown("[phantom data](foo.html#phantomdata)", false), | |
304 | "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n" | |
305 | ); | |
306 | } | |
307 | ||
308 | #[test] | |
309 | fn it_can_keep_quotes_straight() { | |
310 | assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n"); | |
311 | } | |
312 | ||
313 | #[test] | |
314 | fn it_can_make_quotes_curly_except_when_they_are_in_code() { | |
315 | let input = r#" | |
316 | 'one' | |
317 | ``` | |
318 | 'two' | |
319 | ``` | |
320 | `'three'` 'four'"#; | |
321 | let expected = r#"<p>‘one’</p> | |
322 | <pre><code>'two' | |
323 | </code></pre> | |
324 | <p><code>'three'</code> ‘four’</p> | |
325 | "#; | |
326 | assert_eq!(render_markdown(input, true), expected); | |
327 | } | |
328 | ||
329 | #[test] | |
330 | fn whitespace_outside_of_codeblock_header_is_preserved() { | |
331 | let input = r#" | |
332 | some text with spaces | |
333 | ```rust | |
334 | fn main() { | |
335 | // code inside is unchanged | |
336 | } | |
337 | ``` | |
338 | more text with spaces | |
339 | "#; | |
340 | ||
341 | let expected = r#"<p>some text with spaces</p> | |
342 | <pre><code class="language-rust">fn main() { | |
343 | // code inside is unchanged | |
344 | } | |
345 | </code></pre> | |
346 | <p>more text with spaces</p> | |
347 | "#; | |
348 | assert_eq!(render_markdown(input, false), expected); | |
349 | assert_eq!(render_markdown(input, true), expected); | |
350 | } | |
351 | ||
352 | #[test] | |
353 | fn rust_code_block_properties_are_passed_as_space_delimited_class() { | |
354 | let input = r#" | |
355 | ```rust,no_run,should_panic,property_3 | |
356 | ``` | |
357 | "#; | |
358 | ||
359 | let expected = | |
360 | r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre> | |
361 | "#; | |
362 | assert_eq!(render_markdown(input, false), expected); | |
363 | assert_eq!(render_markdown(input, true), expected); | |
364 | } | |
365 | ||
366 | #[test] | |
367 | fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() { | |
368 | let input = r#" | |
369 | ```rust, no_run,,,should_panic , ,property_3 | |
370 | ``` | |
371 | "#; | |
372 | ||
373 | let expected = | |
374 | r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre> | |
375 | "#; | |
376 | assert_eq!(render_markdown(input, false), expected); | |
377 | assert_eq!(render_markdown(input, true), expected); | |
378 | } | |
379 | ||
380 | #[test] | |
381 | fn rust_code_block_without_properties_has_proper_html_class() { | |
382 | let input = r#" | |
383 | ```rust | |
384 | ``` | |
385 | "#; | |
386 | ||
387 | let expected = r#"<pre><code class="language-rust"></code></pre> | |
388 | "#; | |
389 | assert_eq!(render_markdown(input, false), expected); | |
390 | assert_eq!(render_markdown(input, true), expected); | |
391 | ||
392 | let input = r#" | |
393 | ```rust | |
394 | ``` | |
395 | "#; | |
396 | assert_eq!(render_markdown(input, false), expected); | |
397 | assert_eq!(render_markdown(input, true), expected); | |
398 | } | |
399 | } | |
400 | ||
401 | mod html_munging { | |
402 | use super::super::{id_from_content, normalize_id}; | |
403 | ||
404 | #[test] | |
405 | fn it_generates_anchors() { | |
406 | assert_eq!( | |
407 | id_from_content("## Method-call expressions"), | |
408 | "method-call-expressions" | |
409 | ); | |
410 | assert_eq!(id_from_content("## **Bold** title"), "bold-title"); | |
411 | assert_eq!(id_from_content("## `Code` title"), "code-title"); | |
412 | } | |
413 | ||
414 | #[test] | |
415 | fn it_generates_anchors_from_non_ascii_initial() { | |
416 | assert_eq!( | |
417 | id_from_content("## `--passes`: add more rustdoc passes"), | |
418 | "--passes-add-more-rustdoc-passes" | |
419 | ); | |
420 | assert_eq!( | |
421 | id_from_content("## 中文標題 CJK title"), | |
422 | "中文標題-cjk-title" | |
423 | ); | |
424 | assert_eq!(id_from_content("## Über"), "Über"); | |
425 | } | |
426 | ||
427 | #[test] | |
428 | fn it_normalizes_ids() { | |
429 | assert_eq!( | |
430 | normalize_id("`--passes`: add more rustdoc passes"), | |
431 | "--passes-add-more-rustdoc-passes" | |
432 | ); | |
433 | assert_eq!( | |
434 | normalize_id("Method-call 🐙 expressions \u{1f47c}"), | |
435 | "method-call--expressions-" | |
436 | ); | |
437 | assert_eq!(normalize_id("_-_12345"), "_-_12345"); | |
438 | assert_eq!(normalize_id("12345"), "12345"); | |
439 | assert_eq!(normalize_id("中文"), "中文"); | |
440 | assert_eq!(normalize_id("にほんご"), "にほんご"); | |
441 | assert_eq!(normalize_id("한국어"), "한국어"); | |
442 | assert_eq!(normalize_id(""), ""); | |
443 | } | |
444 | } | |
445 | ||
446 | mod convert_quotes_to_curly { | |
447 | use super::super::convert_quotes_to_curly; | |
448 | ||
449 | #[test] | |
450 | fn it_converts_single_quotes() { | |
451 | assert_eq!( | |
452 | convert_quotes_to_curly("'one', 'two'"), | |
453 | "‘one’, ‘two’" | |
454 | ); | |
455 | } | |
456 | ||
457 | #[test] | |
458 | fn it_converts_double_quotes() { | |
459 | assert_eq!( | |
460 | convert_quotes_to_curly(r#""one", "two""#), | |
461 | "“one”, “two”" | |
462 | ); | |
463 | } | |
464 | ||
465 | #[test] | |
466 | fn it_treats_tab_as_whitespace() { | |
467 | assert_eq!(convert_quotes_to_curly("\t'one'"), "\t‘one’"); | |
468 | } | |
469 | } | |
470 | } |