]> git.proxmox.com Git - rustc.git/blame - vendor/mdbook/src/renderer/html_handlebars/search.rs
New upstream version 1.59.0+dfsg1
[rustc.git] / vendor / mdbook / src / renderer / html_handlebars / search.rs
CommitLineData
83c7162d
XL
1use std::borrow::Cow;
2use std::collections::{HashMap, HashSet};
3use std::path::Path;
4
dc9dc135 5use elasticlunr::Index;
83c7162d 6use pulldown_cmark::*;
83c7162d 7
dc9dc135
XL
8use crate::book::{Book, BookItem};
9use crate::config::Search;
10use crate::errors::*;
11use crate::theme::searcher;
12use crate::utils;
83c7162d
XL
13
14/// Creates all files required for search.
15pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
16 let mut index = Index::new(&["title", "body", "breadcrumbs"]);
9fa01778 17 let mut doc_urls = Vec::with_capacity(book.sections.len());
83c7162d
XL
18
19 for item in book.iter() {
a2a8927a 20 render_item(&mut index, search_config, &mut doc_urls, item)?;
83c7162d
XL
21 }
22
a2a8927a 23 let index = write_to_json(index, search_config, doc_urls)?;
83c7162d 24 debug!("Writing search index ✓");
9fa01778
XL
25 if index.len() > 10_000_000 {
26 warn!("searchindex.json is very large ({} bytes)", index.len());
27 }
83c7162d
XL
28
29 if search_config.copy_js {
9fa01778
XL
30 utils::fs::write_file(destination, "searchindex.json", index.as_bytes())?;
31 utils::fs::write_file(
32 destination,
33 "searchindex.js",
dc9dc135 34 format!("Object.assign(window.search, {});", index).as_bytes(),
9fa01778 35 )?;
83c7162d
XL
36 utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
37 utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
38 utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
39 debug!("Copying search files ✓");
40 }
41
42 Ok(())
43}
44
45/// Uses the given arguments to construct a search document, then inserts it to the given index.
9fa01778 46fn add_doc(
83c7162d 47 index: &mut Index,
9fa01778
XL
48 doc_urls: &mut Vec<String>,
49 anchor_base: &str,
83c7162d
XL
50 section_id: &Option<String>,
51 items: &[&str],
52) {
9fa01778
XL
53 let url = if let Some(ref id) = *section_id {
54 Cow::Owned(format!("{}#{}", anchor_base, id))
83c7162d 55 } else {
9fa01778 56 Cow::Borrowed(anchor_base)
83c7162d 57 };
9fa01778
XL
58 let url = utils::collapse_whitespace(url.trim());
59 let doc_ref = doc_urls.len().to_string();
60 doc_urls.push(url.into());
61
83c7162d
XL
62 let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
63 index.add_doc(&doc_ref, items);
64}
65
66/// Renders markdown into flat unformatted text and adds it to the search index.
67fn render_item(
68 index: &mut Index,
69 search_config: &Search,
9fa01778 70 doc_urls: &mut Vec<String>,
83c7162d
XL
71 item: &BookItem,
72) -> Result<()> {
9fa01778 73 let chapter = match *item {
f035d41b 74 BookItem::Chapter(ref ch) if !ch.is_draft_chapter() => ch,
83c7162d
XL
75 _ => return Ok(()),
76 };
77
f035d41b
XL
78 let chapter_path = chapter
79 .path
80 .as_ref()
81 .expect("Checked that path exists above");
82 let filepath = Path::new(&chapter_path).with_extension("html");
83c7162d
XL
83 let filepath = filepath
84 .to_str()
f035d41b 85 .with_context(|| "Could not convert HTML path to str")?;
83c7162d
XL
86 let anchor_base = utils::fs::normalize_path(filepath);
87
a2a8927a 88 let mut p = utils::new_cmark_parser(&chapter.content, false).peekable();
83c7162d 89
e74abb32
XL
90 let mut in_heading = false;
91 let max_section_depth = u32::from(search_config.heading_split_level);
83c7162d
XL
92 let mut section_id = None;
93 let mut heading = String::new();
94 let mut body = String::new();
95 let mut breadcrumbs = chapter.parent_names.clone();
96 let mut footnote_numbers = HashMap::new();
97
5869c6ff
XL
98 breadcrumbs.push(chapter.name.clone());
99
e74abb32 100 while let Some(event) = p.next() {
83c7162d 101 match event {
a2a8927a 102 Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
9fa01778 103 if !heading.is_empty() {
e74abb32 104 // Section finished, the next heading is following now
83c7162d
XL
105 // Write the data to the index, and clear it for the next section
106 add_doc(
107 index,
9fa01778 108 doc_urls,
83c7162d
XL
109 &anchor_base,
110 &section_id,
111 &[&heading, &body, &breadcrumbs.join(" » ")],
112 );
113 section_id = None;
114 heading.clear();
115 body.clear();
116 breadcrumbs.pop();
117 }
118
e74abb32 119 in_heading = true;
83c7162d 120 }
a2a8927a 121 Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
e74abb32 122 in_heading = false;
83c7162d
XL
123 section_id = Some(utils::id_from_content(&heading));
124 breadcrumbs.push(heading.clone());
125 }
126 Event::Start(Tag::FootnoteDefinition(name)) => {
127 let number = footnote_numbers.len() + 1;
128 footnote_numbers.entry(name).or_insert(number);
129 }
dc9dc135 130 Event::Html(html) => {
e74abb32
XL
131 let mut html_block = html.into_string();
132
133 // As of pulldown_cmark 0.6, html events are no longer contained
134 // in an HtmlBlock tag. We must collect consecutive Html events
135 // into a block ourselves.
136 while let Some(Event::Html(html)) = p.peek() {
a2a8927a 137 html_block.push_str(html);
e74abb32
XL
138 p.next();
139 }
140
dc9dc135 141 body.push_str(&clean_html(&html_block));
dc9dc135 142 }
e74abb32 143 Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
94222f64 144 // Insert spaces where HTML output would usually separate text
83c7162d 145 // to ensure words don't get merged together
e74abb32 146 if in_heading {
83c7162d
XL
147 heading.push(' ');
148 } else {
149 body.push(' ');
150 }
151 }
dc9dc135 152 Event::Text(text) | Event::Code(text) => {
e74abb32 153 if in_heading {
83c7162d
XL
154 heading.push_str(&text);
155 } else {
156 body.push_str(&text);
157 }
158 }
83c7162d
XL
159 Event::FootnoteReference(name) => {
160 let len = footnote_numbers.len() + 1;
161 let number = footnote_numbers.entry(name).or_insert(len);
162 body.push_str(&format!(" [{}] ", number));
163 }
dc9dc135 164 Event::TaskListMarker(_checked) => {}
83c7162d
XL
165 }
166 }
167
a2a8927a
XL
168 if !body.is_empty() || !heading.is_empty() {
169 if heading.is_empty() {
170 if let Some(chapter) = breadcrumbs.first() {
171 heading = chapter.clone();
172 }
173 }
83c7162d
XL
174 // Make sure the last section is added to the index
175 add_doc(
176 index,
9fa01778 177 doc_urls,
83c7162d
XL
178 &anchor_base,
179 &section_id,
180 &[&heading, &body, &breadcrumbs.join(" » ")],
181 );
182 }
183
184 Ok(())
185}
186
9fa01778 187fn write_to_json(index: Index, search_config: &Search, doc_urls: Vec<String>) -> Result<String> {
dc9dc135 188 use elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField};
9fa01778 189 use std::collections::BTreeMap;
83c7162d
XL
190
191 #[derive(Serialize)]
192 struct ResultsOptions {
193 limit_results: u32,
194 teaser_word_count: u32,
195 }
196
197 #[derive(Serialize)]
198 struct SearchindexJson {
199 /// The options used for displaying search results
9fa01778 200 results_options: ResultsOptions,
83c7162d 201 /// The searchoptions for elasticlunr.js
9fa01778
XL
202 search_options: SearchOptions,
203 /// Used to lookup a document's URL from an integer document ref.
204 doc_urls: Vec<String>,
83c7162d
XL
205 /// The index for elasticlunr.js
206 index: elasticlunr::Index,
207 }
208
209 let mut fields = BTreeMap::new();
210 let mut opt = SearchOptionsField::default();
211 opt.boost = Some(search_config.boost_title);
212 fields.insert("title".into(), opt);
213 opt.boost = Some(search_config.boost_paragraph);
214 fields.insert("body".into(), opt);
215 opt.boost = Some(search_config.boost_hierarchy);
216 fields.insert("breadcrumbs".into(), opt);
217
9fa01778 218 let search_options = SearchOptions {
83c7162d
XL
219 bool: if search_config.use_boolean_and {
220 SearchBool::And
221 } else {
222 SearchBool::Or
223 },
224 expand: search_config.expand,
225 fields,
226 };
227
9fa01778 228 let results_options = ResultsOptions {
83c7162d
XL
229 limit_results: search_config.limit_results,
230 teaser_word_count: search_config.teaser_word_count,
231 };
232
233 let json_contents = SearchindexJson {
9fa01778
XL
234 results_options,
235 search_options,
236 doc_urls,
83c7162d
XL
237 index,
238 };
9fa01778
XL
239
240 // By converting to serde_json::Value as an intermediary, we use a
241 // BTreeMap internally and can force a stable ordering of map keys.
242 let json_contents = serde_json::to_value(&json_contents)?;
83c7162d
XL
243 let json_contents = serde_json::to_string(&json_contents)?;
244
9fa01778 245 Ok(json_contents)
83c7162d
XL
246}
247
248fn clean_html(html: &str) -> String {
249 lazy_static! {
250 static ref AMMONIA: ammonia::Builder<'static> = {
251 let mut clean_content = HashSet::new();
252 clean_content.insert("script");
253 clean_content.insert("style");
254 let mut builder = ammonia::Builder::new();
255 builder
256 .tags(HashSet::new())
257 .tag_attributes(HashMap::new())
258 .generic_attributes(HashSet::new())
259 .link_rel(None)
260 .allowed_classes(HashMap::new())
261 .clean_content_tags(clean_content);
262 builder
263 };
264 }
265 AMMONIA.clean(html).to_string()
266}