]>
git.proxmox.com Git - rustc.git/blob - vendor/elasticlunr-rs/src/lib.rs
3 //! [![Build Status](https://travis-ci.org/mattico/elasticlunr-rs.svg?branch=master)](https://travis-ci.org/mattico/elasticlunr-rs)
4 //! [![Documentation](https://docs.rs/elasticlunr-rs/badge.svg)](https://docs.rs/elasticlunr-rs)
5 //! [![Crates.io](https://img.shields.io/crates/v/elasticlunr-rs.svg)](https://crates.io/crates/elasticlunr-rs)
7 //! A partial port of [elasticlunr](https://github.com/weixsong/elasticlunr.js) to Rust. Intended to
8 //! be used for generating compatible search indices.
10 //! Access to all index-generating functionality is provided. Most users will only need to use the
11 //! [`Index`](struct.Index.html) or [`IndexBuilder`](struct.IndexBuilder.html) types.
16 //! use std::fs::File;
17 //! use std::io::Write;
18 //! use elasticlunr::Index;
20 //! let mut index = Index::new(&["title", "body"]);
21 //! index.add_doc("1", &["This is a title", "This is body text!"]);
22 //! // Add more docs...
23 //! let mut file = File::create("out.json").unwrap();
24 //! file.write_all(index.to_json_pretty().as_bytes());
27 #![cfg_attr(feature = "bench", feature(test))]
30 extern crate lazy_static
;
34 extern crate serde_derive
;
35 extern crate serde_json
;
38 extern crate strum_macros
;
40 #[cfg(feature = "rust-stemmers")]
41 extern crate rust_stemmers
;
46 #[cfg(feature = "zh")]
47 extern crate jieba_rs
;
48 #[cfg(feature = "ja")]
51 /// The version of elasticlunr.js this library was designed for.
52 pub const ELASTICLUNR_VERSION
: &str = "0.9.5";
55 pub mod document_store
;
56 pub mod inverted_index
;
60 use std
::collections
::{BTreeMap, BTreeSet}
;
62 use document_store
::DocumentStore
;
63 use inverted_index
::InvertedIndex
;
64 pub use lang
::Language
;
65 pub use pipeline
::Pipeline
;
67 /// A builder for an `Index` with custom parameters.
71 /// # use elasticlunr::{Index, IndexBuilder};
72 /// let mut index = IndexBuilder::new()
74 /// .add_fields(&["title", "subtitle", "body"])
75 /// .set_ref("doc_id")
77 /// index.add_doc("doc_a", &["Chapter 1", "Welcome to Copenhagen", "..."]);
79 pub struct IndexBuilder
{
81 fields
: BTreeSet
<String
>,
83 pipeline
: Option
<Pipeline
>,
86 impl Default
for IndexBuilder
{
87 fn default() -> Self {
90 fields
: BTreeSet
::new(),
91 ref_field
: "id".into(),
98 pub fn new() -> Self {
102 /// Set whether or not documents should be saved in the `Index`'s document store.
103 pub fn save_docs(mut self, save
: bool
) -> Self {
108 /// Add a document field to the `Index`.
110 /// If the `Index` already contains a field with an identical name, adding it again is a no-op.
111 pub fn add_field(mut self, field
: &str) -> Self {
112 self.fields
.insert(field
.into());
116 /// Add the document fields to the `Index`.
118 /// If the `Index` already contains a field with an identical name, adding it again is a no-op.
119 pub fn add_fields
<I
>(mut self, fields
: I
) -> Self
125 .extend(fields
.into_iter().map(|f
| f
.as_ref().into()));
129 /// Set the key used to store the document reference field.
130 pub fn set_ref(mut self, ref_field
: &str) -> Self {
131 self.ref_field
= ref_field
.into();
135 /// Set the pipeline used by the `Index`.
136 pub fn set_pipeline(mut self, pipeline
: Pipeline
) -> Self {
137 self.pipeline
= Some(pipeline
);
141 /// Build an `Index` from this builder.
142 pub fn build(self) -> Index
{
146 .map(|f
| (f
.clone(), InvertedIndex
::new()))
151 fields
: self.fields
.into_iter().collect(),
152 ref_field
: self.ref_field
,
153 document_store
: DocumentStore
::new(self.save
),
154 pipeline
: self.pipeline
.unwrap_or_default(),
155 version
: ::ELASTICLUNR_VERSION
,
156 lang
: Language
::English
,
161 /// An elasticlunr search index.
162 #[derive(Serialize, Deserialize, Debug)]
163 #[serde(rename_all = "camelCase")]
165 // TODO(3.0): Use a BTreeSet<String>
166 pub fields
: Vec
<String
>,
167 pub pipeline
: Pipeline
,
168 #[serde(rename = "ref")]
169 pub ref_field
: String
,
170 pub version
: &'
static str,
171 index
: BTreeMap
<String
, InvertedIndex
>,
172 pub document_store
: DocumentStore
,
177 /// Create a new index with the provided fields.
182 /// # use elasticlunr::Index;
183 /// let mut index = Index::new(&["title", "body", "breadcrumbs"]);
184 /// index.add_doc("1", &["How to Foo", "First, you need to `bar`.", "Chapter 1 > How to Foo"]);
189 /// Panics if multiple given fields are identical.
190 pub fn new
<I
>(fields
: I
) -> Self
195 Index
::with_language(Language
::English
, fields
)
198 /// Create a new index with the provided fields for the given
199 /// [`Language`](lang/enum.Language.html).
204 /// # use elasticlunr::{Index, Language};
205 /// let mut index = Index::with_language(Language::English, &["title", "body"]);
206 /// index.add_doc("1", &["this is a title", "this is body text"]);
211 /// Panics if multiple given fields are identical.
212 pub fn with_language
<I
>(lang
: Language
, fields
: I
) -> Self
217 let mut indices
= BTreeMap
::new();
218 let mut field_vec
= Vec
::new();
219 for field
in fields
{
220 let field
= field
.as_ref().to_string();
221 if field_vec
.contains(&field
) {
222 panic
!("The Index already contains the field {}", field
);
224 field_vec
.push(field
.clone());
225 indices
.insert(field
, InvertedIndex
::new());
231 pipeline
: lang
.make_pipeline(),
232 ref_field
: "id".into(),
233 version
: ::ELASTICLUNR_VERSION
,
234 document_store
: DocumentStore
::new(true),
239 /// Add the data from a document to the index.
241 /// *NOTE: The elements of `data` should be provided in the same order as
242 /// the fields used to create the index.*
246 /// # use elasticlunr::Index;
247 /// let mut index = Index::new(&["title", "body"]);
248 /// index.add_doc("1", &["this is a title", "this is body text"]);
250 pub fn add_doc
<I
>(&mut self, doc_ref
: &str, data
: I
)
255 let mut doc
= BTreeMap
::new();
256 doc
.insert(self.ref_field
.clone(), doc_ref
.into());
257 let mut token_freq
= BTreeMap
::new();
259 for (field
, value
) in self.fields
.iter().zip(data
) {
260 doc
.insert(field
.clone(), value
.as_ref().to_string());
262 if field
== &self.ref_field
{
266 let raw_tokens
: Vec
<String
>;
269 #[cfg(feature = "zh")]
270 Language
::Chinese
=> {
271 raw_tokens
= pipeline
::tokenize_chinese(value
.as_ref());
273 #[cfg(feature = "ja")]
274 Language
::Japanese
=> {
275 raw_tokens
= pipeline
::tokenize_japanese(value
.as_ref());
278 raw_tokens
= pipeline
::tokenize(value
.as_ref());
282 let tokens
= self.pipeline
.run(raw_tokens
);
285 .add_field_length(doc_ref
, field
, tokens
.len());
287 for token
in tokens
{
288 *token_freq
.entry(token
).or_insert(0u64) += 1;
291 for (token
, count
) in &token_freq
{
292 let freq
= (*count
as f64).sqrt();
296 .expect(&format
!("InvertedIndex does not exist for field {}", field
))
297 .add_token(doc_ref
, token
, freq
);
301 self.document_store
.add_doc(doc_ref
, doc
);
304 pub fn get_fields(&self) -> &[String
] {
308 /// Returns the index, serialized to pretty-printed JSON.
309 pub fn to_json_pretty(&self) -> String
{
310 serde_json
::to_string_pretty(&self).unwrap()
313 /// Returns the index, serialized to JSON.
314 pub fn to_json(&self) -> String
{
315 serde_json
::to_string(&self).unwrap()
324 fn add_field_to_builder() {
325 let idx
= IndexBuilder
::new()
327 .add_fields(&["foo", "bar", "baz"])
330 let idx_fields
= idx
.get_fields();
331 for f
in &["foo", "bar", "baz"] {
332 assert_eq
!(idx_fields
.iter().filter(|x
| x
== f
).count(), 1);
337 fn adding_document_to_index() {
338 let mut idx
= Index
::new(&["body"]);
339 idx
.add_doc("1", &["this is a test"]);
341 assert_eq
!(idx
.document_store
.len(), 1);
343 idx
.document_store
.get_doc("1").unwrap(),
345 "id".into() => "1".into(),
346 "body".into() => "this is a test".into(),
352 fn adding_document_with_empty_field() {
353 let mut idx
= Index
::new(&["title", "body"]);
355 idx
.add_doc("1", &["", "test"]);
356 assert_eq
!(idx
.index
["body"].get_doc_frequency("test"), 1);
357 assert_eq
!(idx
.index
["body"].get_docs("test").unwrap()["1"], 1.);
362 fn creating_index_with_identical_fields_panics() {
363 let _idx
= Index
::new(&["title", "body", "title"]);