]>
Commit | Line | Data |
---|---|---|
064997fb FG |
1 | //! This module handles fuzzy-searching of functions, structs and other symbols |
2 | //! by name across the whole workspace and dependencies. | |
3 | //! | |
4 | //! It works by building an incrementally-updated text-search index of all | |
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | |
6 | //! @BurntSushi. | |
7 | //! | |
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | |
9 | //! finite state machine describing this set of strings. The strings which | |
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | |
11 | //! What is freaking cool is that you can now traverse both state machines in | |
12 | //! lock-step to enumerate the strings which are both in the input set and | |
13 | //! fuzz-match the query. Or, more formally, given two languages described by | |
14 | //! FSTs, one can build a product FST which describes the intersection of the | |
15 | //! languages. | |
16 | //! | |
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | |
18 | //! of state machines. So, to account for changing source code, we build an FST | |
19 | //! for each library (which is assumed to never change) and an FST for each Rust | |
20 | //! file in the current workspace, and run a query against the union of all | |
21 | //! those FSTs. | |
22 | ||
23 | use std::{ | |
24 | cmp::Ordering, | |
25 | fmt, | |
26 | hash::{Hash, Hasher}, | |
27 | mem, | |
28 | sync::Arc, | |
29 | }; | |
30 | ||
31 | use base_db::{ | |
32 | salsa::{self, ParallelDatabase}, | |
33 | SourceDatabaseExt, SourceRootId, Upcast, | |
34 | }; | |
35 | use fst::{self, Streamer}; | |
36 | use hir::{ | |
37 | db::HirDatabase, | |
38 | symbols::{FileSymbol, SymbolCollector}, | |
39 | Crate, Module, | |
40 | }; | |
41 | use rayon::prelude::*; | |
42 | use rustc_hash::FxHashSet; | |
43 | ||
44 | use crate::RootDatabase; | |
45 | ||
46 | #[derive(Debug)] | |
47 | pub struct Query { | |
48 | query: String, | |
49 | lowercased: String, | |
50 | only_types: bool, | |
51 | libs: bool, | |
52 | exact: bool, | |
53 | case_sensitive: bool, | |
54 | limit: usize, | |
55 | } | |
56 | ||
57 | impl Query { | |
58 | pub fn new(query: String) -> Query { | |
59 | let lowercased = query.to_lowercase(); | |
60 | Query { | |
61 | query, | |
62 | lowercased, | |
63 | only_types: false, | |
64 | libs: false, | |
65 | exact: false, | |
66 | case_sensitive: false, | |
67 | limit: usize::max_value(), | |
68 | } | |
69 | } | |
70 | ||
71 | pub fn only_types(&mut self) { | |
72 | self.only_types = true; | |
73 | } | |
74 | ||
75 | pub fn libs(&mut self) { | |
76 | self.libs = true; | |
77 | } | |
78 | ||
79 | pub fn exact(&mut self) { | |
80 | self.exact = true; | |
81 | } | |
82 | ||
83 | pub fn case_sensitive(&mut self) { | |
84 | self.case_sensitive = true; | |
85 | } | |
86 | ||
87 | pub fn limit(&mut self, limit: usize) { | |
88 | self.limit = limit | |
89 | } | |
90 | } | |
91 | ||
92 | #[salsa::query_group(SymbolsDatabaseStorage)] | |
93 | pub trait SymbolsDatabase: HirDatabase + SourceDatabaseExt + Upcast<dyn HirDatabase> { | |
94 | /// The symbol index for a given module. These modules should only be in source roots that | |
95 | /// are inside local_roots. | |
96 | fn module_symbols(&self, module: Module) -> Arc<SymbolIndex>; | |
97 | ||
98 | /// The symbol index for a given source root within library_roots. | |
99 | fn library_symbols(&self, source_root_id: SourceRootId) -> Arc<SymbolIndex>; | |
100 | ||
101 | /// The set of "local" (that is, from the current workspace) roots. | |
102 | /// Files in local roots are assumed to change frequently. | |
103 | #[salsa::input] | |
104 | fn local_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | |
105 | ||
106 | /// The set of roots for crates.io libraries. | |
107 | /// Files in libraries are assumed to never change. | |
108 | #[salsa::input] | |
109 | fn library_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | |
110 | } | |
111 | ||
112 | fn library_symbols(db: &dyn SymbolsDatabase, source_root_id: SourceRootId) -> Arc<SymbolIndex> { | |
113 | let _p = profile::span("library_symbols"); | |
114 | ||
115 | // todo: this could be parallelized, once I figure out how to do that... | |
116 | let symbols = db | |
117 | .source_root_crates(source_root_id) | |
118 | .iter() | |
119 | .flat_map(|&krate| Crate::from(krate).modules(db.upcast())) | |
120 | // we specifically avoid calling SymbolsDatabase::module_symbols here, even they do the same thing, | |
121 | // as the index for a library is not going to really ever change, and we do not want to store each | |
122 | // module's index in salsa. | |
123 | .flat_map(|module| SymbolCollector::collect(db.upcast(), module)) | |
124 | .collect(); | |
125 | ||
126 | Arc::new(SymbolIndex::new(symbols)) | |
127 | } | |
128 | ||
129 | fn module_symbols(db: &dyn SymbolsDatabase, module: Module) -> Arc<SymbolIndex> { | |
130 | let _p = profile::span("module_symbols"); | |
131 | let symbols = SymbolCollector::collect(db.upcast(), module); | |
132 | Arc::new(SymbolIndex::new(symbols)) | |
133 | } | |
134 | ||
135 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | |
136 | struct Snap<DB>(DB); | |
137 | impl<DB: ParallelDatabase> Snap<salsa::Snapshot<DB>> { | |
138 | fn new(db: &DB) -> Self { | |
139 | Self(db.snapshot()) | |
140 | } | |
141 | } | |
142 | impl<DB: ParallelDatabase> Clone for Snap<salsa::Snapshot<DB>> { | |
143 | fn clone(&self) -> Snap<salsa::Snapshot<DB>> { | |
144 | Snap(self.0.snapshot()) | |
145 | } | |
146 | } | |
147 | impl<DB> std::ops::Deref for Snap<DB> { | |
148 | type Target = DB; | |
149 | ||
150 | fn deref(&self) -> &Self::Target { | |
151 | &self.0 | |
152 | } | |
153 | } | |
154 | ||
155 | // Feature: Workspace Symbol | |
156 | // | |
157 | // Uses fuzzy-search to find types, modules and functions by name across your | |
158 | // project and dependencies. This is **the** most useful feature, which improves code | |
159 | // navigation tremendously. It mostly works on top of the built-in LSP | |
160 | // functionality, however `#` and `*` symbols can be used to narrow down the | |
161 | // search. Specifically, | |
162 | // | |
163 | // - `Foo` searches for `Foo` type in the current workspace | |
164 | // - `foo#` searches for `foo` function in the current workspace | |
165 | // - `Foo*` searches for `Foo` type among dependencies, including `stdlib` | |
166 | // - `foo#*` searches for `foo` function among dependencies | |
167 | // | |
168 | // That is, `#` switches from "types" to all symbols, `*` switches from the current | |
169 | // workspace to dependencies. | |
170 | // | |
171 | // Note that filtering does not currently work in VSCode due to the editor never | |
172 | // sending the special symbols to the language server. Instead, you can configure | |
173 | // the filtering via the `rust-analyzer.workspace.symbol.search.scope` and | |
174 | // `rust-analyzer.workspace.symbol.search.kind` settings. | |
175 | // | |
176 | // |=== | |
177 | // | Editor | Shortcut | |
178 | // | |
179 | // | VS Code | kbd:[Ctrl+T] | |
180 | // |=== | |
181 | pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | |
182 | let _p = profile::span("world_symbols").detail(|| query.query.clone()); | |
183 | ||
184 | let indices: Vec<_> = if query.libs { | |
185 | db.library_roots() | |
186 | .par_iter() | |
187 | .map_with(Snap::new(db), |snap, &root| snap.library_symbols(root)) | |
188 | .collect() | |
189 | } else { | |
190 | let mut modules = Vec::new(); | |
191 | ||
192 | for &root in db.local_roots().iter() { | |
193 | let crates = db.source_root_crates(root); | |
194 | for &krate in crates.iter() { | |
195 | modules.extend(Crate::from(krate).modules(db)); | |
196 | } | |
197 | } | |
198 | ||
199 | modules | |
200 | .par_iter() | |
201 | .map_with(Snap::new(db), |snap, &module| snap.module_symbols(module)) | |
202 | .collect() | |
203 | }; | |
204 | ||
205 | query.search(&indices) | |
206 | } | |
207 | ||
208 | pub fn crate_symbols(db: &RootDatabase, krate: Crate, query: Query) -> Vec<FileSymbol> { | |
9c376795 | 209 | let _p = profile::span("crate_symbols").detail(|| format!("{query:?}")); |
064997fb FG |
210 | |
211 | let modules = krate.modules(db); | |
212 | let indices: Vec<_> = modules | |
213 | .par_iter() | |
214 | .map_with(Snap::new(db), |snap, &module| snap.module_symbols(module)) | |
215 | .collect(); | |
216 | ||
217 | query.search(&indices) | |
218 | } | |
219 | ||
220 | #[derive(Default)] | |
221 | pub struct SymbolIndex { | |
222 | symbols: Vec<FileSymbol>, | |
223 | map: fst::Map<Vec<u8>>, | |
224 | } | |
225 | ||
226 | impl fmt::Debug for SymbolIndex { | |
227 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
228 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | |
229 | } | |
230 | } | |
231 | ||
232 | impl PartialEq for SymbolIndex { | |
233 | fn eq(&self, other: &SymbolIndex) -> bool { | |
234 | self.symbols == other.symbols | |
235 | } | |
236 | } | |
237 | ||
238 | impl Eq for SymbolIndex {} | |
239 | ||
240 | impl Hash for SymbolIndex { | |
241 | fn hash<H: Hasher>(&self, hasher: &mut H) { | |
242 | self.symbols.hash(hasher) | |
243 | } | |
244 | } | |
245 | ||
246 | impl SymbolIndex { | |
247 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | |
248 | fn cmp(lhs: &FileSymbol, rhs: &FileSymbol) -> Ordering { | |
249 | let lhs_chars = lhs.name.chars().map(|c| c.to_ascii_lowercase()); | |
250 | let rhs_chars = rhs.name.chars().map(|c| c.to_ascii_lowercase()); | |
251 | lhs_chars.cmp(rhs_chars) | |
252 | } | |
253 | ||
254 | symbols.par_sort_by(cmp); | |
255 | ||
256 | let mut builder = fst::MapBuilder::memory(); | |
257 | ||
258 | let mut last_batch_start = 0; | |
259 | ||
260 | for idx in 0..symbols.len() { | |
261 | if let Some(next_symbol) = symbols.get(idx + 1) { | |
262 | if cmp(&symbols[last_batch_start], next_symbol) == Ordering::Equal { | |
263 | continue; | |
264 | } | |
265 | } | |
266 | ||
267 | let start = last_batch_start; | |
268 | let end = idx + 1; | |
269 | last_batch_start = end; | |
270 | ||
271 | let key = symbols[start].name.as_str().to_ascii_lowercase(); | |
272 | let value = SymbolIndex::range_to_map_value(start, end); | |
273 | ||
274 | builder.insert(key, value).unwrap(); | |
275 | } | |
276 | ||
277 | let map = fst::Map::new(builder.into_inner().unwrap()).unwrap(); | |
278 | SymbolIndex { symbols, map } | |
279 | } | |
280 | ||
281 | pub fn len(&self) -> usize { | |
282 | self.symbols.len() | |
283 | } | |
284 | ||
285 | pub fn memory_size(&self) -> usize { | |
286 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | |
287 | } | |
288 | ||
289 | fn range_to_map_value(start: usize, end: usize) -> u64 { | |
290 | debug_assert![start <= (std::u32::MAX as usize)]; | |
291 | debug_assert![end <= (std::u32::MAX as usize)]; | |
292 | ||
293 | ((start as u64) << 32) | end as u64 | |
294 | } | |
295 | ||
296 | fn map_value_to_range(value: u64) -> (usize, usize) { | |
297 | let end = value as u32 as usize; | |
298 | let start = (value >> 32) as usize; | |
299 | (start, end) | |
300 | } | |
301 | } | |
302 | ||
303 | impl Query { | |
304 | pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> { | |
305 | let _p = profile::span("symbol_index::Query::search"); | |
306 | let mut op = fst::map::OpBuilder::new(); | |
307 | for file_symbols in indices.iter() { | |
308 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | |
309 | op = op.add(file_symbols.map.search(automaton)) | |
310 | } | |
311 | let mut stream = op.union(); | |
312 | let mut res = Vec::new(); | |
313 | while let Some((_, indexed_values)) = stream.next() { | |
314 | for indexed_value in indexed_values { | |
315 | let symbol_index = &indices[indexed_value.index]; | |
316 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | |
317 | ||
318 | for symbol in &symbol_index.symbols[start..end] { | |
319 | if self.only_types && !symbol.kind.is_type() { | |
320 | continue; | |
321 | } | |
322 | if self.exact { | |
323 | if symbol.name != self.query { | |
324 | continue; | |
325 | } | |
326 | } else if self.case_sensitive { | |
327 | if self.query.chars().any(|c| !symbol.name.contains(c)) { | |
328 | continue; | |
329 | } | |
330 | } | |
331 | ||
332 | res.push(symbol.clone()); | |
333 | if res.len() >= self.limit { | |
334 | return res; | |
335 | } | |
336 | } | |
337 | } | |
338 | } | |
339 | res | |
340 | } | |
341 | } | |
342 | ||
343 | #[cfg(test)] | |
344 | mod tests { | |
345 | ||
346 | use base_db::fixture::WithFixture; | |
347 | use expect_test::expect_file; | |
348 | use hir::symbols::SymbolCollector; | |
349 | ||
350 | use super::*; | |
351 | ||
352 | #[test] | |
353 | fn test_symbol_index_collection() { | |
354 | let (db, _) = RootDatabase::with_many_files( | |
355 | r#" | |
356 | //- /main.rs | |
357 | ||
358 | macro_rules! macro_rules_macro { | |
359 | () => {} | |
360 | }; | |
361 | ||
362 | macro_rules! define_struct { | |
363 | () => { | |
364 | struct StructFromMacro; | |
365 | } | |
366 | }; | |
367 | ||
368 | define_struct!(); | |
369 | ||
370 | macro Macro { } | |
371 | ||
372 | struct Struct; | |
373 | enum Enum { | |
374 | A, B | |
375 | } | |
376 | union Union {} | |
377 | ||
378 | impl Struct { | |
379 | fn impl_fn() {} | |
380 | } | |
381 | ||
382 | trait Trait { | |
383 | fn trait_fn(&self); | |
384 | } | |
385 | ||
386 | fn main() { | |
387 | struct StructInFn; | |
388 | } | |
389 | ||
390 | const CONST: u32 = 1; | |
391 | static STATIC: &'static str = "2"; | |
392 | type Alias = Struct; | |
393 | ||
394 | mod a_mod { | |
395 | struct StructInModA; | |
396 | } | |
397 | ||
398 | const _: () = { | |
399 | struct StructInUnnamedConst; | |
400 | ||
401 | () | |
402 | }; | |
403 | ||
404 | const CONST_WITH_INNER: () = { | |
405 | struct StructInNamedConst; | |
406 | ||
407 | () | |
408 | }; | |
409 | ||
410 | mod b_mod; | |
411 | ||
412 | //- /b_mod.rs | |
413 | struct StructInModB; | |
414 | "#, | |
415 | ); | |
416 | ||
417 | let symbols: Vec<_> = Crate::from(db.test_crate()) | |
418 | .modules(&db) | |
419 | .into_iter() | |
420 | .map(|module_id| { | |
421 | let mut symbols = SymbolCollector::collect(&db, module_id); | |
422 | symbols.sort_by_key(|it| it.name.clone()); | |
423 | (module_id, symbols) | |
424 | }) | |
425 | .collect(); | |
426 | ||
427 | expect_file!["./test_data/test_symbol_index_collection.txt"].assert_debug_eq(&symbols); | |
428 | } | |
429 | } |