]>
Commit | Line | Data |
---|---|---|
5869c6ff XL |
1 | //! Types for tracking pieces of source code within a crate. |
2 | //! | |
3 | //! The [`SourceMap`] tracks all the source code used within a single crate, mapping | |
1a4d82fc JJ |
4 | //! from integer byte positions to the original source code location. Each bit |
5 | //! of source parsed during crate parsing (typically files, in-memory strings, | |
6 | //! or various bits of macro expansion) cover a continuous range of bytes in the | |
5869c6ff XL |
7 | //! `SourceMap` and are represented by [`SourceFile`]s. Byte positions are stored in |
8 | //! [`Span`] and used pervasively in the compiler. They are absolute positions | |
e1599b0c | 9 | //! within the `SourceMap`, which upon request can be converted to line and column |
1a4d82fc | 10 | //! information, source code snippets, etc. |
223e47cc | 11 | |
dfeec247 | 12 | pub use crate::hygiene::{ExpnData, ExpnKind}; |
60c5eb7d | 13 | pub use crate::*; |
223e47cc | 14 | |
abe05a73 XL |
15 | use rustc_data_structures::fx::FxHashMap; |
16 | use rustc_data_structures::stable_hasher::StableHasher; | |
29967ef6 | 17 | use rustc_data_structures::sync::{AtomicU32, Lrc, MappedReadGuard, ReadGuard, RwLock}; |
9c376795 | 18 | use std::cmp; |
abe05a73 | 19 | use std::hash::Hash; |
9c376795 | 20 | use std::path::{self, Path, PathBuf}; |
74b04a01 | 21 | use std::sync::atomic::Ordering; |
223e47cc | 22 | |
3157f602 | 23 | use std::fs; |
0731742a | 24 | use std::io; |
9fa01778 | 25 | |
416331ca XL |
26 | #[cfg(test)] |
27 | mod tests; | |
28 | ||
9fa01778 | 29 | /// Returns the span itself if it doesn't come from a macro expansion, |
1a4d82fc | 30 | /// otherwise return the call site span up to the `enclosing_sp` by |
e1599b0c | 31 | /// following the `expn_data` chain. |
cc61c64b | 32 | pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span { |
e1599b0c XL |
33 | let expn_data1 = sp.ctxt().outer_expn_data(); |
34 | let expn_data2 = enclosing_sp.ctxt().outer_expn_data(); | |
dfeec247 XL |
35 | if expn_data1.is_root() || !expn_data2.is_root() && expn_data1.call_site == expn_data2.call_site |
36 | { | |
e1599b0c XL |
37 | sp |
38 | } else { | |
39 | original_sp(expn_data1.call_site, enclosing_sp) | |
1a4d82fc JJ |
40 | } |
41 | } | |
223e47cc | 42 | |
f035d41b XL |
43 | pub mod monotonic { |
44 | use std::ops::{Deref, DerefMut}; | |
45 | ||
46 | /// A `MonotonicVec` is a `Vec` which can only be grown. | |
47 | /// Once inserted, an element can never be removed or swapped, | |
48 | /// guaranteeing that any indices into a `MonotonicVec` are stable | |
49 | // This is declared in its own module to ensure that the private | |
50 | // field is inaccessible | |
51 | pub struct MonotonicVec<T>(Vec<T>); | |
52 | impl<T> MonotonicVec<T> { | |
53 | pub fn new(val: Vec<T>) -> MonotonicVec<T> { | |
54 | MonotonicVec(val) | |
55 | } | |
56 | ||
57 | pub fn push(&mut self, val: T) { | |
58 | self.0.push(val); | |
59 | } | |
60 | } | |
61 | ||
62 | impl<T> Default for MonotonicVec<T> { | |
63 | fn default() -> Self { | |
64 | MonotonicVec::new(vec![]) | |
65 | } | |
66 | } | |
67 | ||
68 | impl<T> Deref for MonotonicVec<T> { | |
69 | type Target = Vec<T>; | |
70 | fn deref(&self) -> &Self::Target { | |
71 | &self.0 | |
72 | } | |
73 | } | |
74 | ||
75 | impl<T> !DerefMut for MonotonicVec<T> {} | |
76 | } | |
77 | ||
3dfed10e | 78 | #[derive(Clone, Encodable, Decodable, Debug, Copy, HashStable_Generic)] |
3157f602 XL |
79 | pub struct Spanned<T> { |
80 | pub node: T, | |
81 | pub span: Span, | |
7453a54e SL |
82 | } |
83 | ||
3157f602 | 84 | pub fn respan<T>(sp: Span, t: T) -> Spanned<T> { |
dfeec247 | 85 | Spanned { node: t, span: sp } |
223e47cc LB |
86 | } |
87 | ||
3157f602 XL |
88 | pub fn dummy_spanned<T>(t: T) -> Spanned<T> { |
89 | respan(DUMMY_SP, t) | |
1a4d82fc JJ |
90 | } |
91 | ||
c34b1796 | 92 | // _____________________________________________________________________________ |
b7449926 | 93 | // SourceFile, MultiByteChar, FileName, FileLines |
c34b1796 AL |
94 | // |
95 | ||
62682a34 SL |
96 | /// An abstraction over the fs operations used by the Parser. |
97 | pub trait FileLoader { | |
98 | /// Query the existence of a file. | |
99 | fn file_exists(&self, path: &Path) -> bool; | |
100 | ||
94222f64 | 101 | /// Read the contents of a UTF-8 file into memory. |
62682a34 SL |
102 | fn read_file(&self, path: &Path) -> io::Result<String>; |
103 | } | |
104 | ||
105 | /// A FileLoader that uses std::fs to load real files. | |
106 | pub struct RealFileLoader; | |
107 | ||
108 | impl FileLoader for RealFileLoader { | |
109 | fn file_exists(&self, path: &Path) -> bool { | |
17df50a5 | 110 | path.exists() |
62682a34 SL |
111 | } |
112 | ||
113 | fn read_file(&self, path: &Path) -> io::Result<String> { | |
0731742a | 114 | fs::read_to_string(path) |
62682a34 SL |
115 | } |
116 | } | |
c34b1796 | 117 | |
17df50a5 XL |
118 | /// This is a [SourceFile] identifier that is used to correlate source files between |
119 | /// subsequent compilation sessions (which is something we need to do during | |
120 | /// incremental compilation). | |
121 | /// | |
122 | /// The [StableSourceFileId] also contains the CrateNum of the crate the source | |
123 | /// file was originally parsed for. This way we get two separate entries in | |
124 | /// the [SourceMap] if the same file is part of both the local and an upstream | |
125 | /// crate. Trying to only have one entry for both cases is problematic because | |
126 | /// at the point where we discover that there's a local use of the file in | |
127 | /// addition to the upstream one, we might already have made decisions based on | |
128 | /// the assumption that it's an upstream file. Treating the two files as | |
129 | /// different has no real downsides. | |
3dfed10e | 130 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)] |
17df50a5 | 131 | pub struct StableSourceFileId { |
487cf647 FG |
132 | /// A hash of the source file's [`FileName`]. This is hash so that it's size |
133 | /// is more predictable than if we included the actual [`FileName`] value. | |
17df50a5 XL |
134 | pub file_name_hash: u64, |
135 | ||
487cf647 FG |
136 | /// The [`CrateNum`] of the crate this source file was originally parsed for. |
137 | /// We cannot include this information in the hash because at the time | |
138 | /// of hashing we don't have the context to map from the [`CrateNum`]'s numeric | |
139 | /// value to a `StableCrateId`. | |
17df50a5 XL |
140 | pub cnum: CrateNum, |
141 | } | |
abe05a73 | 142 | |
ba9703b0 XL |
143 | // FIXME: we need a more globally consistent approach to the problem solved by |
144 | // StableSourceFileId, perhaps built atop source_file.name_hash. | |
a1dfa0c6 XL |
145 | impl StableSourceFileId { |
146 | pub fn new(source_file: &SourceFile) -> StableSourceFileId { | |
17df50a5 | 147 | StableSourceFileId::new_from_name(&source_file.name, source_file.cnum) |
0731742a XL |
148 | } |
149 | ||
17df50a5 | 150 | fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId { |
abe05a73 | 151 | let mut hasher = StableHasher::new(); |
17df50a5 XL |
152 | name.hash(&mut hasher); |
153 | StableSourceFileId { file_name_hash: hasher.finish(), cnum } | |
abe05a73 XL |
154 | } |
155 | } | |
156 | ||
c34b1796 | 157 | // _____________________________________________________________________________ |
b7449926 | 158 | // SourceMap |
c34b1796 AL |
159 | // |
160 | ||
0bf4aa26 | 161 | #[derive(Default)] |
b7449926 | 162 | pub(super) struct SourceMapFiles { |
f035d41b | 163 | source_files: monotonic::MonotonicVec<Lrc<SourceFile>>, |
dfeec247 | 164 | stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>, |
0531ce1d XL |
165 | } |
166 | ||
b7449926 | 167 | pub struct SourceMap { |
74b04a01 XL |
168 | /// The address space below this value is currently used by the files in the source map. |
169 | used_address_space: AtomicU32, | |
170 | ||
29967ef6 | 171 | files: RwLock<SourceMapFiles>, |
8faf50e0 | 172 | file_loader: Box<dyn FileLoader + Sync + Send>, |
7cac9316 | 173 | // This is used to apply the file path remapping as specified via |
e1599b0c | 174 | // `--remap-path-prefix` to all `SourceFile`s allocated within this `SourceMap`. |
7cac9316 | 175 | path_mapping: FilePathMapping, |
ba9703b0 XL |
176 | |
177 | /// The algorithm used for hashing the contents of each source file. | |
178 | hash_kind: SourceFileHashAlgorithm, | |
223e47cc LB |
179 | } |
180 | ||
b7449926 XL |
181 | impl SourceMap { |
182 | pub fn new(path_mapping: FilePathMapping) -> SourceMap { | |
ba9703b0 XL |
183 | Self::with_file_loader_and_hash_kind( |
184 | Box::new(RealFileLoader), | |
74b04a01 | 185 | path_mapping, |
ba9703b0 XL |
186 | SourceFileHashAlgorithm::Md5, |
187 | ) | |
62682a34 SL |
188 | } |
189 | ||
ba9703b0 | 190 | pub fn with_file_loader_and_hash_kind( |
dfeec247 XL |
191 | file_loader: Box<dyn FileLoader + Sync + Send>, |
192 | path_mapping: FilePathMapping, | |
ba9703b0 | 193 | hash_kind: SourceFileHashAlgorithm, |
dfeec247 | 194 | ) -> SourceMap { |
74b04a01 XL |
195 | SourceMap { |
196 | used_address_space: AtomicU32::new(0), | |
197 | files: Default::default(), | |
198 | file_loader, | |
199 | path_mapping, | |
ba9703b0 | 200 | hash_kind, |
74b04a01 | 201 | } |
223e47cc LB |
202 | } |
203 | ||
7cac9316 XL |
204 | pub fn path_mapping(&self) -> &FilePathMapping { |
205 | &self.path_mapping | |
206 | } | |
207 | ||
62682a34 SL |
208 | pub fn file_exists(&self, path: &Path) -> bool { |
209 | self.file_loader.file_exists(path) | |
210 | } | |
211 | ||
b7449926 | 212 | pub fn load_file(&self, path: &Path) -> io::Result<Lrc<SourceFile>> { |
54a0048b | 213 | let src = self.file_loader.read_file(path)?; |
0731742a | 214 | let filename = path.to_owned().into(); |
b7449926 | 215 | Ok(self.new_source_file(filename, src)) |
7cac9316 XL |
216 | } |
217 | ||
e1599b0c XL |
218 | /// Loads source file as a binary blob. |
219 | /// | |
220 | /// Unlike `load_file`, guarantees that no normalization like BOM-removal | |
221 | /// takes place. | |
222 | pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> { | |
223 | // Ideally, this should use `self.file_loader`, but it can't | |
224 | // deal with binary files yet. | |
225 | let bytes = fs::read(path)?; | |
226 | ||
227 | // We need to add file to the `SourceMap`, so that it is present | |
228 | // in dep-info. There's also an edge case that file might be both | |
229 | // loaded as a binary via `include_bytes!` and as proper `SourceFile` | |
230 | // via `mod`, so we try to use real file contents and not just an | |
231 | // empty string. | |
dfeec247 | 232 | let text = std::str::from_utf8(&bytes).unwrap_or("").to_string(); |
e1599b0c XL |
233 | self.new_source_file(path.to_owned().into(), text); |
234 | Ok(bytes) | |
235 | } | |
236 | ||
f035d41b XL |
237 | // By returning a `MonotonicVec`, we ensure that consumers cannot invalidate |
238 | // any existing indices pointing into `files`. | |
29967ef6 XL |
239 | pub fn files(&self) -> MappedReadGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> { |
240 | ReadGuard::map(self.files.borrow(), |files| &files.source_files) | |
62682a34 SL |
241 | } |
242 | ||
dfeec247 XL |
243 | pub fn source_file_by_stable_id( |
244 | &self, | |
245 | stable_id: StableSourceFileId, | |
246 | ) -> Option<Lrc<SourceFile>> { | |
74b04a01 XL |
247 | self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned() |
248 | } | |
249 | ||
250 | fn allocate_address_space(&self, size: usize) -> Result<usize, OffsetOverflowError> { | |
251 | let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?; | |
252 | ||
253 | loop { | |
254 | let current = self.used_address_space.load(Ordering::Relaxed); | |
255 | let next = current | |
256 | .checked_add(size) | |
257 | // Add one so there is some space between files. This lets us distinguish | |
258 | // positions in the `SourceMap`, even in the presence of zero-length files. | |
259 | .and_then(|next| next.checked_add(1)) | |
260 | .ok_or(OffsetOverflowError)?; | |
261 | ||
262 | if self | |
263 | .used_address_space | |
264 | .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed) | |
265 | .is_ok() | |
266 | { | |
267 | return Ok(usize::try_from(current).unwrap()); | |
268 | } | |
c1a9b12d SL |
269 | } |
270 | } | |
271 | ||
e1599b0c XL |
272 | /// Creates a new `SourceFile`. |
273 | /// If a file already exists in the `SourceMap` with the same ID, that file is returned | |
274 | /// unmodified. | |
b7449926 | 275 | pub fn new_source_file(&self, filename: FileName, src: String) -> Lrc<SourceFile> { |
dfeec247 XL |
276 | self.try_new_source_file(filename, src).unwrap_or_else(|OffsetOverflowError| { |
277 | eprintln!("fatal error: rustc does not support files larger than 4GB"); | |
278 | crate::fatal_error::FatalError.raise() | |
279 | }) | |
dc9dc135 XL |
280 | } |
281 | ||
282 | fn try_new_source_file( | |
283 | &self, | |
17df50a5 | 284 | filename: FileName, |
dfeec247 | 285 | src: String, |
dc9dc135 | 286 | ) -> Result<Lrc<SourceFile>, OffsetOverflowError> { |
ea8adc8c XL |
287 | // Note that filename may not be a valid path, eg it may be `<anon>` etc, |
288 | // but this is okay because the directory determined by `path.pop()` will | |
289 | // be empty, so the working directory will be used. | |
17df50a5 | 290 | let (filename, _) = self.path_mapping.map_filename_prefix(&filename); |
223e47cc | 291 | |
17df50a5 | 292 | let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE); |
1a4d82fc | 293 | |
dc9dc135 | 294 | let lrc_sf = match self.source_file_by_stable_id(file_id) { |
0731742a XL |
295 | Some(lrc_sf) => lrc_sf, |
296 | None => { | |
74b04a01 XL |
297 | let start_pos = self.allocate_address_space(src.len())?; |
298 | ||
0731742a XL |
299 | let source_file = Lrc::new(SourceFile::new( |
300 | filename, | |
0731742a XL |
301 | src, |
302 | Pos::from_usize(start_pos), | |
ba9703b0 | 303 | self.hash_kind, |
74b04a01 | 304 | )); |
abe05a73 | 305 | |
17df50a5 XL |
306 | // Let's make sure the file_id we generated above actually matches |
307 | // the ID we generate for the SourceFile we just created. | |
308 | debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); | |
309 | ||
0731742a XL |
310 | let mut files = self.files.borrow_mut(); |
311 | ||
312 | files.source_files.push(source_file.clone()); | |
313 | files.stable_id_to_source_file.insert(file_id, source_file.clone()); | |
314 | ||
315 | source_file | |
316 | } | |
dc9dc135 XL |
317 | }; |
318 | Ok(lrc_sf) | |
223e47cc LB |
319 | } |
320 | ||
e1599b0c XL |
321 | /// Allocates a new `SourceFile` representing a source file from an external |
322 | /// crate. The source code of such an "imported `SourceFile`" is not available, | |
c34b1796 AL |
323 | /// but we still know enough to generate accurate debuginfo location |
324 | /// information for things inlined from other crates. | |
b7449926 XL |
325 | pub fn new_imported_source_file( |
326 | &self, | |
327 | filename: FileName, | |
ba9703b0 | 328 | src_hash: SourceFileHash, |
b7449926 XL |
329 | name_hash: u128, |
330 | source_len: usize, | |
ba9703b0 | 331 | cnum: CrateNum, |
923072b8 | 332 | file_local_lines: Lock<SourceFileLines>, |
b7449926 XL |
333 | mut file_local_multibyte_chars: Vec<MultiByteChar>, |
334 | mut file_local_non_narrow_chars: Vec<NonNarrowChar>, | |
e74abb32 | 335 | mut file_local_normalized_pos: Vec<NormalizedPos>, |
ba9703b0 | 336 | original_start_pos: BytePos, |
f2b60f7d | 337 | metadata_index: u32, |
b7449926 | 338 | ) -> Lrc<SourceFile> { |
74b04a01 XL |
339 | let start_pos = self |
340 | .allocate_address_space(source_len) | |
341 | .expect("not enough address space for imported source file"); | |
c34b1796 AL |
342 | |
343 | let end_pos = Pos::from_usize(start_pos + source_len); | |
344 | let start_pos = Pos::from_usize(start_pos); | |
345 | ||
923072b8 FG |
346 | // Translate these positions into the new global frame of reference, |
347 | // now that the offset of the SourceFile is known. | |
348 | // | |
349 | // These are all unsigned values. `original_start_pos` may be larger or | |
350 | // smaller than `start_pos`, but `pos` is always larger than both. | |
351 | // Therefore, `(pos - original_start_pos) + start_pos` won't overflow | |
352 | // but `start_pos - original_start_pos` might. So we use the former | |
353 | // form rather than pre-computing the offset into a local variable. The | |
354 | // compiler backend can optimize away the repeated computations in a | |
355 | // way that won't trigger overflow checks. | |
356 | match &mut *file_local_lines.borrow_mut() { | |
357 | SourceFileLines::Lines(lines) => { | |
358 | for pos in lines { | |
359 | *pos = (*pos - original_start_pos) + start_pos; | |
360 | } | |
361 | } | |
362 | SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => { | |
363 | *line_start = (*line_start - original_start_pos) + start_pos; | |
364 | } | |
d9579d0f | 365 | } |
d9579d0f | 366 | for mbc in &mut file_local_multibyte_chars { |
923072b8 | 367 | mbc.pos = (mbc.pos - original_start_pos) + start_pos; |
d9579d0f | 368 | } |
abe05a73 | 369 | for swc in &mut file_local_non_narrow_chars { |
923072b8 | 370 | *swc = (*swc - original_start_pos) + start_pos; |
abe05a73 | 371 | } |
e74abb32 | 372 | for nc in &mut file_local_normalized_pos { |
923072b8 | 373 | nc.pos = (nc.pos - original_start_pos) + start_pos; |
e74abb32 XL |
374 | } |
375 | ||
b7449926 | 376 | let source_file = Lrc::new(SourceFile { |
c34b1796 AL |
377 | name: filename, |
378 | src: None, | |
3b2f2976 | 379 | src_hash, |
ba9703b0 XL |
380 | external_src: Lock::new(ExternalSource::Foreign { |
381 | kind: ExternalSourceKind::AbsentOk, | |
f2b60f7d | 382 | metadata_index, |
ba9703b0 | 383 | }), |
3b2f2976 XL |
384 | start_pos, |
385 | end_pos, | |
8faf50e0 XL |
386 | lines: file_local_lines, |
387 | multibyte_chars: file_local_multibyte_chars, | |
388 | non_narrow_chars: file_local_non_narrow_chars, | |
e74abb32 | 389 | normalized_pos: file_local_normalized_pos, |
ff7c6d11 | 390 | name_hash, |
ba9703b0 | 391 | cnum, |
c34b1796 AL |
392 | }); |
393 | ||
0531ce1d | 394 | let mut files = self.files.borrow_mut(); |
c34b1796 | 395 | |
a1dfa0c6 | 396 | files.source_files.push(source_file.clone()); |
dfeec247 XL |
397 | files |
398 | .stable_id_to_source_file | |
399 | .insert(StableSourceFileId::new(&source_file), source_file.clone()); | |
abe05a73 | 400 | |
b7449926 | 401 | source_file |
c34b1796 AL |
402 | } |
403 | ||
487cf647 | 404 | /// If there is a doctest offset, applies it to the line. |
0731742a | 405 | pub fn doctest_offset_line(&self, file: &FileName, orig: usize) -> usize { |
ba9703b0 | 406 | match file { |
0731742a | 407 | FileName::DocTest(_, offset) => { |
ba9703b0 | 408 | if *offset < 0 { |
0731742a | 409 | orig - (-(*offset)) as usize |
ba9703b0 XL |
410 | } else { |
411 | orig + *offset as usize | |
412 | } | |
dfeec247 XL |
413 | } |
414 | _ => orig, | |
ba9703b0 | 415 | } |
2c00a5a8 XL |
416 | } |
417 | ||
29967ef6 XL |
418 | /// Return the SourceFile that contains the given `BytePos` |
419 | pub fn lookup_source_file(&self, pos: BytePos) -> Lrc<SourceFile> { | |
420 | let idx = self.lookup_source_file_idx(pos); | |
421 | (*self.files.borrow().source_files)[idx].clone() | |
422 | } | |
423 | ||
e1599b0c | 424 | /// Looks up source information about a `BytePos`. |
970d7e83 | 425 | pub fn lookup_char_pos(&self, pos: BytePos) -> Loc { |
29967ef6 XL |
426 | let sf = self.lookup_source_file(pos); |
427 | let (line, col, col_display) = sf.lookup_file_pos_with_col_display(pos); | |
428 | Loc { file: sf, line, col, col_display } | |
d9579d0f AL |
429 | } |
430 | ||
487cf647 | 431 | /// If the corresponding `SourceFile` is empty, does not return a line number. |
b7449926 | 432 | pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> { |
29967ef6 | 433 | let f = self.lookup_source_file(pos); |
c1a9b12d | 434 | |
9e0c209e | 435 | match f.lookup_line(pos) { |
dc9dc135 | 436 | Some(line) => Ok(SourceFileAndLine { sf: f, line }), |
dfeec247 | 437 | None => Err(f), |
c1a9b12d | 438 | } |
223e47cc LB |
439 | } |
440 | ||
9c376795 FG |
441 | pub fn span_to_string( |
442 | &self, | |
443 | sp: Span, | |
444 | filename_display_pref: FileNameDisplayPreference, | |
445 | ) -> String { | |
17df50a5 | 446 | if self.files.borrow().source_files.is_empty() || sp.is_dummy() { |
1a4d82fc | 447 | return "no-location".to_string(); |
223e47cc LB |
448 | } |
449 | ||
48663c56 XL |
450 | let lo = self.lookup_char_pos(sp.lo()); |
451 | let hi = self.lookup_char_pos(sp.hi()); | |
dfeec247 | 452 | format!( |
9c376795 | 453 | "{}:{}:{}{}", |
94222f64 | 454 | lo.file.name.display(filename_display_pref), |
e1599b0c XL |
455 | lo.line, |
456 | lo.col.to_usize() + 1, | |
9c376795 FG |
457 | if let FileNameDisplayPreference::Short = filename_display_pref { |
458 | String::new() | |
459 | } else { | |
460 | format!(": {}:{}", hi.line, hi.col.to_usize() + 1) | |
461 | } | |
e1599b0c | 462 | ) |
223e47cc LB |
463 | } |
464 | ||
17df50a5 XL |
465 | /// Format the span location suitable for embedding in build artifacts |
466 | pub fn span_to_embeddable_string(&self, sp: Span) -> String { | |
94222f64 | 467 | self.span_to_string(sp, FileNameDisplayPreference::Remapped) |
ea8adc8c XL |
468 | } |
469 | ||
064997fb FG |
470 | /// Format the span location suitable for pretty printing anotations with relative line numbers |
471 | pub fn span_to_relative_line_string(&self, sp: Span, relative_to: Span) -> String { | |
472 | if self.files.borrow().source_files.is_empty() || sp.is_dummy() || relative_to.is_dummy() { | |
473 | return "no-location".to_string(); | |
474 | } | |
475 | ||
476 | let lo = self.lookup_char_pos(sp.lo()); | |
477 | let hi = self.lookup_char_pos(sp.hi()); | |
478 | let offset = self.lookup_char_pos(relative_to.lo()); | |
479 | ||
f2b60f7d | 480 | if lo.file.name != offset.file.name || !relative_to.contains(sp) { |
064997fb FG |
481 | return self.span_to_embeddable_string(sp); |
482 | } | |
483 | ||
484 | let lo_line = lo.line.saturating_sub(offset.line); | |
485 | let hi_line = hi.line.saturating_sub(offset.line); | |
486 | ||
487 | format!( | |
488 | "{}:+{}:{}: +{}:{}", | |
489 | lo.file.name.display(FileNameDisplayPreference::Remapped), | |
490 | lo_line, | |
491 | lo.col.to_usize() + 1, | |
492 | hi_line, | |
493 | hi.col.to_usize() + 1, | |
494 | ) | |
495 | } | |
496 | ||
17df50a5 XL |
497 | /// Format the span location to be printed in diagnostics. Must not be emitted |
498 | /// to build artifacts as this may leak local file paths. Use span_to_embeddable_string | |
499 | /// for string suitable for embedding. | |
500 | pub fn span_to_diagnostic_string(&self, sp: Span) -> String { | |
94222f64 | 501 | self.span_to_string(sp, self.path_mapping.filename_display_for_diagnostics) |
17df50a5 XL |
502 | } |
503 | ||
504 | pub fn span_to_filename(&self, sp: Span) -> FileName { | |
505 | self.lookup_char_pos(sp.lo()).file.name.clone() | |
223e47cc LB |
506 | } |
507 | ||
94222f64 XL |
508 | pub fn filename_for_diagnostics<'a>(&self, filename: &'a FileName) -> FileNameDisplay<'a> { |
509 | filename.display(self.path_mapping.filename_display_for_diagnostics) | |
510 | } | |
511 | ||
ff7c6d11 | 512 | pub fn is_multiline(&self, sp: Span) -> bool { |
136023e0 XL |
513 | let lo = self.lookup_source_file_idx(sp.lo()); |
514 | let hi = self.lookup_source_file_idx(sp.hi()); | |
515 | if lo != hi { | |
516 | return true; | |
517 | } | |
518 | let f = (*self.files.borrow().source_files)[lo].clone(); | |
519 | f.lookup_line(sp.lo()) != f.lookup_line(sp.hi()) | |
ff7c6d11 XL |
520 | } |
521 | ||
c295e0f8 | 522 | #[instrument(skip(self), level = "trace")] |
60c5eb7d | 523 | pub fn is_valid_span(&self, sp: Span) -> Result<(Loc, Loc), SpanLinesError> { |
ea8adc8c | 524 | let lo = self.lookup_char_pos(sp.lo()); |
c295e0f8 | 525 | trace!(?lo); |
ea8adc8c | 526 | let hi = self.lookup_char_pos(sp.hi()); |
c295e0f8 | 527 | trace!(?hi); |
d9579d0f AL |
528 | if lo.file.start_pos != hi.file.start_pos { |
529 | return Err(SpanLinesError::DistinctSources(DistinctSources { | |
530 | begin: (lo.file.name.clone(), lo.file.start_pos), | |
531 | end: (hi.file.name.clone(), hi.file.start_pos), | |
532 | })); | |
533 | } | |
60c5eb7d XL |
534 | Ok((lo, hi)) |
535 | } | |
536 | ||
ba9703b0 XL |
537 | pub fn is_line_before_span_empty(&self, sp: Span) -> bool { |
538 | match self.span_to_prev_source(sp) { | |
6a06907d | 539 | Ok(s) => s.rsplit_once('\n').unwrap_or(("", &s)).1.trim_start().is_empty(), |
ba9703b0 XL |
540 | Err(_) => false, |
541 | } | |
542 | } | |
543 | ||
60c5eb7d XL |
544 | pub fn span_to_lines(&self, sp: Span) -> FileLinesResult { |
545 | debug!("span_to_lines(sp={:?})", sp); | |
546 | let (lo, hi) = self.is_valid_span(sp)?; | |
d9579d0f AL |
547 | assert!(hi.line >= lo.line); |
548 | ||
ba9703b0 XL |
549 | if sp.is_dummy() { |
550 | return Ok(FileLines { file: lo.file, lines: Vec::new() }); | |
551 | } | |
552 | ||
9346a6ac AL |
553 | let mut lines = Vec::with_capacity(hi.line - lo.line + 1); |
554 | ||
555 | // The span starts partway through the first line, | |
556 | // but after that it starts from offset 0. | |
557 | let mut start_col = lo.col; | |
558 | ||
559 | // For every line but the last, it extends from `start_col` | |
560 | // and to the end of the line. Be careful because the line | |
561 | // numbers in Loc are 1-based, so we subtract 1 to get 0-based | |
562 | // lines. | |
ba9703b0 XL |
563 | // |
564 | // FIXME: now that we handle DUMMY_SP up above, we should consider | |
565 | // asserting that the line numbers here are all indeed 1-based. | |
dfeec247 XL |
566 | let hi_line = hi.line.saturating_sub(1); |
567 | for line_index in lo.line.saturating_sub(1)..hi_line { | |
5869c6ff | 568 | let line_len = lo.file.get_line(line_index).map_or(0, |s| s.chars().count()); |
dfeec247 | 569 | lines.push(LineInfo { line_index, start_col, end_col: CharPos::from_usize(line_len) }); |
9346a6ac AL |
570 | start_col = CharPos::from_usize(0); |
571 | } | |
572 | ||
573 | // For the last line, it extends from `start_col` to `hi.col`: | |
dfeec247 | 574 | lines.push(LineInfo { line_index: hi_line, start_col, end_col: hi.col }); |
9346a6ac | 575 | |
dfeec247 | 576 | Ok(FileLines { file: lo.file, lines }) |
223e47cc LB |
577 | } |
578 | ||
9fa01778 | 579 | /// Extracts the source surrounding the given `Span` using the `extract_source` function. The |
0531ce1d XL |
580 | /// extract function takes three arguments: a string slice containing the source, an index in |
581 | /// the slice for the beginning of the span and an index in the slice for the end of the span. | |
5869c6ff | 582 | fn span_to_source<F, T>(&self, sp: Span, extract_source: F) -> Result<T, SpanSnippetError> |
dfeec247 | 583 | where |
5869c6ff | 584 | F: Fn(&str, usize, usize) -> Result<T, SpanSnippetError>, |
0531ce1d | 585 | { |
ea8adc8c XL |
586 | let local_begin = self.lookup_byte_offset(sp.lo()); |
587 | let local_end = self.lookup_byte_offset(sp.hi()); | |
1a4d82fc | 588 | |
a1dfa0c6 | 589 | if local_begin.sf.start_pos != local_end.sf.start_pos { |
ba9703b0 | 590 | Err(SpanSnippetError::DistinctSources(DistinctSources { |
dfeec247 XL |
591 | begin: (local_begin.sf.name.clone(), local_begin.sf.start_pos), |
592 | end: (local_end.sf.name.clone(), local_end.sf.start_pos), | |
ba9703b0 | 593 | })) |
1a4d82fc | 594 | } else { |
a1dfa0c6 | 595 | self.ensure_source_file_source_present(local_begin.sf.clone()); |
041b39d2 XL |
596 | |
597 | let start_index = local_begin.pos.to_usize(); | |
598 | let end_index = local_end.pos.to_usize(); | |
dfeec247 | 599 | let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); |
041b39d2 XL |
600 | |
601 | if start_index > end_index || end_index > source_len { | |
dfeec247 XL |
602 | return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { |
603 | name: local_begin.sf.name.clone(), | |
604 | source_len, | |
605 | begin_pos: local_begin.pos, | |
606 | end_pos: local_end.pos, | |
607 | })); | |
041b39d2 XL |
608 | } |
609 | ||
a1dfa0c6 | 610 | if let Some(ref src) = local_begin.sf.src { |
ba9703b0 | 611 | extract_source(src, start_index, end_index) |
a1dfa0c6 | 612 | } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() { |
ba9703b0 | 613 | extract_source(src, start_index, end_index) |
041b39d2 | 614 | } else { |
ba9703b0 | 615 | Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() }) |
c34b1796 | 616 | } |
1a4d82fc | 617 | } |
223e47cc LB |
618 | } |
619 | ||
064997fb FG |
620 | pub fn is_span_accessible(&self, sp: Span) -> bool { |
621 | self.span_to_source(sp, |src, start_index, end_index| { | |
622 | Ok(src.get(start_index..end_index).is_some()) | |
623 | }) | |
624 | .map_or(false, |is_accessible| is_accessible) | |
94222f64 XL |
625 | } |
626 | ||
e1599b0c | 627 | /// Returns the source snippet as `String` corresponding to the given `Span`. |
0531ce1d | 628 | pub fn span_to_snippet(&self, sp: Span) -> Result<String, SpanSnippetError> { |
dfeec247 XL |
629 | self.span_to_source(sp, |src, start_index, end_index| { |
630 | src.get(start_index..end_index) | |
631 | .map(|s| s.to_string()) | |
fc512014 | 632 | .ok_or(SpanSnippetError::IllFormedSpan(sp)) |
dfeec247 | 633 | }) |
0531ce1d XL |
634 | } |
635 | ||
b7449926 | 636 | pub fn span_to_margin(&self, sp: Span) -> Option<usize> { |
3c0e092e XL |
637 | Some(self.indentation_before(sp)?.len()) |
638 | } | |
6a06907d | 639 | |
3c0e092e XL |
640 | pub fn indentation_before(&self, sp: Span) -> Option<String> { |
641 | self.span_to_source(sp, |src, start_index, _| { | |
642 | let before = &src[..start_index]; | |
643 | let last_line = before.rsplit_once('\n').map_or(before, |(_, last)| last); | |
644 | Ok(last_line | |
645 | .split_once(|c: char| !c.is_whitespace()) | |
646 | .map_or(last_line, |(indent, _)| indent) | |
647 | .to_string()) | |
648 | }) | |
649 | .ok() | |
b7449926 XL |
650 | } |
651 | ||
e1599b0c | 652 | /// Returns the source snippet as `String` before the given `Span`. |
0531ce1d | 653 | pub fn span_to_prev_source(&self, sp: Span) -> Result<String, SpanSnippetError> { |
dfeec247 | 654 | self.span_to_source(sp, |src, start_index, _| { |
fc512014 | 655 | src.get(..start_index).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp)) |
dfeec247 | 656 | }) |
0531ce1d XL |
657 | } |
658 | ||
e1599b0c | 659 | /// Extends the given `Span` to just after the previous occurrence of `c`. Return the same span |
0531ce1d | 660 | /// if no character could be found or if an error occurred while retrieving the code snippet. |
5869c6ff | 661 | pub fn span_extend_to_prev_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span { |
0531ce1d | 662 | if let Ok(prev_source) = self.span_to_prev_source(sp) { |
5869c6ff | 663 | let prev_source = prev_source.rsplit(c).next().unwrap_or(""); |
6a06907d | 664 | if !prev_source.is_empty() && (accept_newlines || !prev_source.contains('\n')) { |
0531ce1d XL |
665 | return sp.with_lo(BytePos(sp.lo().0 - prev_source.len() as u32)); |
666 | } | |
667 | } | |
668 | ||
669 | sp | |
670 | } | |
671 | ||
e1599b0c | 672 | /// Extends the given `Span` to just after the previous occurrence of `pat` when surrounded by |
5099ac24 FG |
673 | /// whitespace. Returns None if the pattern could not be found or if an error occurred while |
674 | /// retrieving the code snippet. | |
675 | pub fn span_extend_to_prev_str( | |
676 | &self, | |
677 | sp: Span, | |
678 | pat: &str, | |
679 | accept_newlines: bool, | |
680 | include_whitespace: bool, | |
681 | ) -> Option<Span> { | |
0531ce1d XL |
682 | // assure that the pattern is delimited, to avoid the following |
683 | // fn my_fn() | |
684 | // ^^^^ returned span without the check | |
685 | // ---------- correct span | |
5099ac24 | 686 | let prev_source = self.span_to_prev_source(sp).ok()?; |
0531ce1d XL |
687 | for ws in &[" ", "\t", "\n"] { |
688 | let pat = pat.to_owned() + ws; | |
5099ac24 FG |
689 | if let Some(pat_pos) = prev_source.rfind(&pat) { |
690 | let just_after_pat_pos = pat_pos + pat.len() - 1; | |
691 | let just_after_pat_plus_ws = if include_whitespace { | |
692 | just_after_pat_pos | |
693 | + prev_source[just_after_pat_pos..] | |
694 | .find(|c: char| !c.is_whitespace()) | |
695 | .unwrap_or(0) | |
696 | } else { | |
697 | just_after_pat_pos | |
698 | }; | |
699 | let len = prev_source.len() - just_after_pat_plus_ws; | |
700 | let prev_source = &prev_source[just_after_pat_plus_ws..]; | |
701 | if accept_newlines || !prev_source.trim_start().contains('\n') { | |
702 | return Some(sp.with_lo(BytePos(sp.lo().0 - len as u32))); | |
0531ce1d XL |
703 | } |
704 | } | |
705 | } | |
706 | ||
5099ac24 | 707 | None |
0531ce1d XL |
708 | } |
709 | ||
5869c6ff XL |
710 | /// Returns the source snippet as `String` after the given `Span`. |
711 | pub fn span_to_next_source(&self, sp: Span) -> Result<String, SpanSnippetError> { | |
712 | self.span_to_source(sp, |src, _, end_index| { | |
713 | src.get(end_index..).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp)) | |
714 | }) | |
715 | } | |
716 | ||
c295e0f8 XL |
717 | /// Extends the given `Span` while the next character matches the predicate |
718 | pub fn span_extend_while( | |
719 | &self, | |
720 | span: Span, | |
721 | f: impl Fn(char) -> bool, | |
722 | ) -> Result<Span, SpanSnippetError> { | |
723 | self.span_to_source(span, |s, _start, end| { | |
724 | let n = s[end..].char_indices().find(|&(_, c)| !f(c)).map_or(s.len() - end, |(i, _)| i); | |
725 | Ok(span.with_hi(span.hi() + BytePos(n as u32))) | |
726 | }) | |
727 | } | |
728 | ||
f2b60f7d | 729 | /// Extends the given `Span` to just before the next occurrence of `c`. |
5869c6ff XL |
730 | pub fn span_extend_to_next_char(&self, sp: Span, c: char, accept_newlines: bool) -> Span { |
731 | if let Ok(next_source) = self.span_to_next_source(sp) { | |
732 | let next_source = next_source.split(c).next().unwrap_or(""); | |
6a06907d | 733 | if !next_source.is_empty() && (accept_newlines || !next_source.contains('\n')) { |
5869c6ff XL |
734 | return sp.with_hi(BytePos(sp.hi().0 + next_source.len() as u32)); |
735 | } | |
736 | } | |
737 | ||
738 | sp | |
739 | } | |
740 | ||
064997fb FG |
741 | /// Extends the given `Span` to contain the entire line it is on. |
742 | pub fn span_extend_to_line(&self, sp: Span) -> Span { | |
743 | self.span_extend_to_prev_char(self.span_extend_to_next_char(sp, '\n', true), '\n', true) | |
744 | } | |
745 | ||
e1599b0c XL |
746 | /// Given a `Span`, tries to get a shorter span ending before the first occurrence of `char` |
747 | /// `c`. | |
cc61c64b XL |
748 | pub fn span_until_char(&self, sp: Span, c: char) -> Span { |
749 | match self.span_to_snippet(sp) { | |
750 | Ok(snippet) => { | |
74b04a01 | 751 | let snippet = snippet.split(c).next().unwrap_or("").trim_end(); |
7cac9316 | 752 | if !snippet.is_empty() && !snippet.contains('\n') { |
ea8adc8c | 753 | sp.with_hi(BytePos(sp.lo().0 + snippet.len() as u32)) |
cc61c64b XL |
754 | } else { |
755 | sp | |
756 | } | |
757 | } | |
758 | _ => sp, | |
759 | } | |
760 | } | |
761 | ||
487cf647 FG |
762 | /// Given a 'Span', tries to tell if it's wrapped by "<>" or "()" |
763 | /// the algorithm searches if the next character is '>' or ')' after skipping white space | |
764 | /// then searches the previous charactoer to match '<' or '(' after skipping white space | |
765 | /// return true if wrapped by '<>' or '()' | |
766 | pub fn span_wrapped_by_angle_or_parentheses(&self, span: Span) -> bool { | |
767 | self.span_to_source(span, |src, start_index, end_index| { | |
768 | if src.get(start_index..end_index).is_none() { | |
769 | return Ok(false); | |
770 | } | |
771 | // test the right side to match '>' after skipping white space | |
772 | let end_src = &src[end_index..]; | |
773 | let mut i = 0; | |
774 | let mut found_right_parentheses = false; | |
775 | let mut found_right_angle = false; | |
776 | while let Some(cc) = end_src.chars().nth(i) { | |
777 | if cc == ' ' { | |
778 | i = i + 1; | |
779 | } else if cc == '>' { | |
780 | // found > in the right; | |
781 | found_right_angle = true; | |
782 | break; | |
783 | } else if cc == ')' { | |
784 | found_right_parentheses = true; | |
785 | break; | |
786 | } else { | |
787 | // failed to find '>' return false immediately | |
788 | return Ok(false); | |
789 | } | |
790 | } | |
791 | // test the left side to match '<' after skipping white space | |
792 | i = start_index; | |
793 | let start_src = &src[0..start_index]; | |
794 | while let Some(cc) = start_src.chars().nth(i) { | |
795 | if cc == ' ' { | |
796 | if i == 0 { | |
797 | return Ok(false); | |
798 | } | |
799 | i = i - 1; | |
800 | } else if cc == '<' { | |
801 | // found < in the left | |
802 | if !found_right_angle { | |
803 | // skip something like "(< )>" | |
804 | return Ok(false); | |
805 | } | |
806 | break; | |
807 | } else if cc == '(' { | |
808 | if !found_right_parentheses { | |
809 | // skip something like "<(>)" | |
810 | return Ok(false); | |
811 | } | |
812 | break; | |
813 | } else { | |
814 | // failed to find '<' return false immediately | |
815 | return Ok(false); | |
816 | } | |
817 | } | |
818 | return Ok(true); | |
819 | }) | |
820 | .map_or(false, |is_accessible| is_accessible) | |
821 | } | |
822 | ||
e1599b0c | 823 | /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char` |
0531ce1d XL |
824 | /// `c`. |
825 | pub fn span_through_char(&self, sp: Span, c: char) -> Span { | |
826 | if let Ok(snippet) = self.span_to_snippet(sp) { | |
827 | if let Some(offset) = snippet.find(c) { | |
828 | return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32)); | |
829 | } | |
830 | } | |
831 | sp | |
832 | } | |
833 | ||
e1599b0c XL |
834 | /// Given a `Span`, gets a new `Span` covering the first token and all its trailing whitespace |
835 | /// or the original `Span`. | |
2c00a5a8 XL |
836 | /// |
837 | /// If `sp` points to `"let mut x"`, then a span pointing at `"let "` will be returned. | |
838 | pub fn span_until_non_whitespace(&self, sp: Span) -> Span { | |
0531ce1d XL |
839 | let mut whitespace_found = false; |
840 | ||
841 | self.span_take_while(sp, |c| { | |
842 | if !whitespace_found && c.is_whitespace() { | |
843 | whitespace_found = true; | |
2c00a5a8 | 844 | } |
0531ce1d | 845 | |
74b04a01 | 846 | !whitespace_found || c.is_whitespace() |
0531ce1d | 847 | }) |
2c00a5a8 XL |
848 | } |
849 | ||
e1599b0c XL |
850 | /// Given a `Span`, gets a new `Span` covering the first token without its trailing whitespace |
851 | /// or the original `Span` in case of error. | |
0531ce1d XL |
852 | /// |
853 | /// If `sp` points to `"let mut x"`, then a span pointing at `"let"` will be returned. | |
854 | pub fn span_until_whitespace(&self, sp: Span) -> Span { | |
855 | self.span_take_while(sp, |c| !c.is_whitespace()) | |
856 | } | |
857 | ||
e1599b0c | 858 | /// Given a `Span`, gets a shorter one until `predicate` yields `false`. |
0531ce1d | 859 | pub fn span_take_while<P>(&self, sp: Span, predicate: P) -> Span |
dfeec247 XL |
860 | where |
861 | P: for<'r> FnMut(&'r char) -> bool, | |
0531ce1d | 862 | { |
abe05a73 | 863 | if let Ok(snippet) = self.span_to_snippet(sp) { |
dfeec247 | 864 | let offset = snippet.chars().take_while(predicate).map(|c| c.len_utf8()).sum::<usize>(); |
0531ce1d XL |
865 | |
866 | sp.with_hi(BytePos(sp.lo().0 + (offset as u32))) | |
867 | } else { | |
868 | sp | |
abe05a73 | 869 | } |
abe05a73 XL |
870 | } |
871 | ||
ba9703b0 XL |
872 | /// Given a `Span`, return a span ending in the closest `{`. This is useful when you have a |
873 | /// `Span` enclosing a whole item but we need to point at only the head (usually the first | |
874 | /// line) of that item. | |
875 | /// | |
876 | /// *Only suitable for diagnostics.* | |
877 | pub fn guess_head_span(&self, sp: Span) -> Span { | |
878 | // FIXME: extend the AST items to have a head span, or replace callers with pointing at | |
879 | // the item's ident when appropriate. | |
cc61c64b XL |
880 | self.span_until_char(sp, '{') |
881 | } | |
882 | ||
6a06907d | 883 | /// Returns a new span representing just the first character of the given span. |
8faf50e0 | 884 | pub fn start_point(&self, sp: Span) -> Span { |
6a06907d XL |
885 | let width = { |
886 | let sp = sp.data(); | |
887 | let local_begin = self.lookup_byte_offset(sp.lo); | |
888 | let start_index = local_begin.pos.to_usize(); | |
889 | let src = local_begin.sf.external_src.borrow(); | |
890 | ||
891 | let snippet = if let Some(ref src) = local_begin.sf.src { | |
892 | Some(&src[start_index..]) | |
893 | } else if let Some(src) = src.get_source() { | |
894 | Some(&src[start_index..]) | |
895 | } else { | |
896 | None | |
897 | }; | |
898 | ||
899 | match snippet { | |
900 | None => 1, | |
901 | Some(snippet) => match snippet.chars().next() { | |
902 | None => 1, | |
903 | Some(c) => c.len_utf8(), | |
904 | }, | |
905 | } | |
906 | }; | |
907 | ||
908 | sp.with_hi(BytePos(sp.lo().0 + width as u32)) | |
8faf50e0 XL |
909 | } |
910 | ||
6a06907d | 911 | /// Returns a new span representing just the last character of this span. |
2c00a5a8 XL |
912 | pub fn end_point(&self, sp: Span) -> Span { |
913 | let pos = sp.hi().0; | |
914 | ||
915 | let width = self.find_width_of_character_at_span(sp, false); | |
916 | let corrected_end_position = pos.checked_sub(width).unwrap_or(pos); | |
917 | ||
918 | let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0)); | |
919 | sp.with_lo(end_point) | |
920 | } | |
921 | ||
e1599b0c | 922 | /// Returns a new span representing the next character after the end-point of this span. |
2b03887a FG |
923 | /// Special cases: |
924 | /// - if span is a dummy one, returns the same span | |
487cf647 FG |
925 | /// - if next_point reached the end of source, return a span exceeding the end of source, |
926 | /// which means sm.span_to_snippet(next_point) will get `Err` | |
2b03887a | 927 | /// - respect multi-byte characters |
2c00a5a8 | 928 | pub fn next_point(&self, sp: Span) -> Span { |
5869c6ff XL |
929 | if sp.is_dummy() { |
930 | return sp; | |
931 | } | |
2c00a5a8 XL |
932 | let start_of_next_point = sp.hi().0; |
933 | ||
2b03887a | 934 | let width = self.find_width_of_character_at_span(sp, true); |
2b03887a FG |
935 | // If the width is 1, then the next span should only contain the next char besides current ending. |
936 | // However, in the case of a multibyte character, where the width != 1, the next span should | |
2c00a5a8 | 937 | // span multiple bytes to include the whole character. |
dfeec247 | 938 | let end_of_next_point = |
2b03887a | 939 | start_of_next_point.checked_add(width).unwrap_or(start_of_next_point); |
2c00a5a8 | 940 | |
2b03887a | 941 | let end_of_next_point = BytePos(cmp::max(start_of_next_point + 1, end_of_next_point)); |
c295e0f8 | 942 | Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None) |
2c00a5a8 XL |
943 | } |
944 | ||
2b03887a FG |
945 | /// Returns a new span to check next none-whitespace character or some specified expected character |
946 | /// If `expect` is none, the first span of non-whitespace character is returned. | |
947 | /// If `expect` presented, the first span of the character `expect` is returned | |
948 | /// Otherwise, the span reached to limit is returned. | |
949 | pub fn span_look_ahead(&self, span: Span, expect: Option<&str>, limit: Option<usize>) -> Span { | |
950 | let mut sp = span; | |
9c376795 | 951 | for _ in 0..limit.unwrap_or(100_usize) { |
2b03887a FG |
952 | sp = self.next_point(sp); |
953 | if let Ok(ref snippet) = self.span_to_snippet(sp) { | |
954 | if expect.map_or(false, |es| snippet == es) { | |
955 | break; | |
956 | } | |
957 | if expect.is_none() && snippet.chars().any(|c| !c.is_whitespace()) { | |
958 | break; | |
959 | } | |
960 | } | |
961 | } | |
962 | sp | |
963 | } | |
964 | ||
6a06907d XL |
965 | /// Finds the width of the character, either before or after the end of provided span, |
966 | /// depending on the `forwards` parameter. | |
9c376795 | 967 | #[instrument(skip(self, sp))] |
2c00a5a8 | 968 | fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 { |
60c5eb7d | 969 | let sp = sp.data(); |
2b03887a FG |
970 | |
971 | if sp.lo == sp.hi && !forwards { | |
9c376795 | 972 | debug!("early return empty span"); |
2c00a5a8 XL |
973 | return 1; |
974 | } | |
975 | ||
60c5eb7d XL |
976 | let local_begin = self.lookup_byte_offset(sp.lo); |
977 | let local_end = self.lookup_byte_offset(sp.hi); | |
9c376795 | 978 | debug!("local_begin=`{:?}`, local_end=`{:?}`", local_begin, local_end); |
2c00a5a8 | 979 | |
dc9dc135 | 980 | if local_begin.sf.start_pos != local_end.sf.start_pos { |
9c376795 | 981 | debug!("begin and end are in different files"); |
dc9dc135 XL |
982 | return 1; |
983 | } | |
984 | ||
2c00a5a8 XL |
985 | let start_index = local_begin.pos.to_usize(); |
986 | let end_index = local_end.pos.to_usize(); | |
9c376795 | 987 | debug!("start_index=`{:?}`, end_index=`{:?}`", start_index, end_index); |
2c00a5a8 XL |
988 | |
989 | // Disregard indexes that are at the start or end of their spans, they can't fit bigger | |
990 | // characters. | |
f035d41b | 991 | if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) { |
9c376795 | 992 | debug!("start or end of span, cannot be multibyte"); |
2c00a5a8 XL |
993 | return 1; |
994 | } | |
995 | ||
a1dfa0c6 | 996 | let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); |
9c376795 | 997 | debug!("source_len=`{:?}`", source_len); |
2c00a5a8 | 998 | // Ensure indexes are also not malformed. |
2b03887a | 999 | if start_index > end_index || end_index > source_len - 1 { |
9c376795 | 1000 | debug!("source indexes are malformed"); |
487cf647 | 1001 | return 1; |
2c00a5a8 XL |
1002 | } |
1003 | ||
a1dfa0c6 | 1004 | let src = local_begin.sf.external_src.borrow(); |
2c00a5a8 XL |
1005 | |
1006 | // We need to extend the snippet to the end of the src rather than to end_index so when | |
1007 | // searching forwards for boundaries we've got somewhere to search. | |
a1dfa0c6 | 1008 | let snippet = if let Some(ref src) = local_begin.sf.src { |
6a06907d | 1009 | &src[start_index..] |
2c00a5a8 | 1010 | } else if let Some(src) = src.get_source() { |
6a06907d | 1011 | &src[start_index..] |
2c00a5a8 XL |
1012 | } else { |
1013 | return 1; | |
1014 | }; | |
9c376795 | 1015 | debug!("snippet=`{:?}`", snippet); |
2c00a5a8 | 1016 | |
2c00a5a8 | 1017 | let mut target = if forwards { end_index + 1 } else { end_index - 1 }; |
9c376795 | 1018 | debug!("initial target=`{:?}`", target); |
2c00a5a8 | 1019 | |
0531ce1d XL |
1020 | while !snippet.is_char_boundary(target - start_index) && target < source_len { |
1021 | target = if forwards { | |
1022 | target + 1 | |
1023 | } else { | |
1024 | match target.checked_sub(1) { | |
1025 | Some(target) => target, | |
1026 | None => { | |
1027 | break; | |
1028 | } | |
1029 | } | |
1030 | }; | |
9c376795 | 1031 | debug!("target=`{:?}`", target); |
2c00a5a8 | 1032 | } |
9c376795 | 1033 | debug!("final target=`{:?}`", target); |
2c00a5a8 | 1034 | |
dfeec247 | 1035 | if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 } |
2c00a5a8 XL |
1036 | } |
1037 | ||
b7449926 | 1038 | pub fn get_source_file(&self, filename: &FileName) -> Option<Lrc<SourceFile>> { |
5869c6ff XL |
1039 | // Remap filename before lookup |
1040 | let filename = self.path_mapping().map_filename_prefix(filename).0; | |
a1dfa0c6 | 1041 | for sf in self.files.borrow().source_files.iter() { |
5869c6ff | 1042 | if filename == sf.name { |
a1dfa0c6 | 1043 | return Some(sf.clone()); |
1a4d82fc JJ |
1044 | } |
1045 | } | |
3157f602 | 1046 | None |
1a4d82fc JJ |
1047 | } |
1048 | ||
e1599b0c | 1049 | /// For a global `BytePos`, computes the local offset within the containing `SourceFile`. |
b7449926 XL |
1050 | pub fn lookup_byte_offset(&self, bpos: BytePos) -> SourceFileAndBytePos { |
1051 | let idx = self.lookup_source_file_idx(bpos); | |
a1dfa0c6 XL |
1052 | let sf = (*self.files.borrow().source_files)[idx].clone(); |
1053 | let offset = bpos - sf.start_pos; | |
dfeec247 | 1054 | SourceFileAndBytePos { sf, pos: offset } |
1a4d82fc JJ |
1055 | } |
1056 | ||
487cf647 FG |
1057 | /// Returns the index of the [`SourceFile`] (in `self.files`) that contains `pos`. |
1058 | /// This index is guaranteed to be valid for the lifetime of this `SourceMap`, | |
1059 | /// since `source_files` is a `MonotonicVec` | |
b7449926 | 1060 | pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize { |
dfeec247 XL |
1061 | self.files |
1062 | .borrow() | |
1063 | .source_files | |
1064 | .binary_search_by_key(&pos, |key| key.start_pos) | |
e74abb32 | 1065 | .unwrap_or_else(|p| p - 1) |
223e47cc LB |
1066 | } |
1067 | ||
92a42be0 | 1068 | pub fn count_lines(&self) -> usize { |
7cac9316 | 1069 | self.files().iter().fold(0, |a, f| a + f.count_lines()) |
92a42be0 | 1070 | } |
94b46f34 | 1071 | |
60c5eb7d | 1072 | pub fn ensure_source_file_source_present(&self, source_file: Lrc<SourceFile>) -> bool { |
94222f64 | 1073 | source_file.add_external_src(|| { |
9c376795 FG |
1074 | let FileName::Real(ref name) = source_file.name else { |
1075 | return None; | |
1076 | }; | |
1077 | ||
1078 | let local_path: Cow<'_, Path> = match name { | |
1079 | RealFileName::LocalPath(local_path) => local_path.into(), | |
1080 | RealFileName::Remapped { local_path: Some(local_path), .. } => local_path.into(), | |
1081 | RealFileName::Remapped { local_path: None, virtual_name } => { | |
1082 | // The compiler produces better error messages if the sources of dependencies | |
1083 | // are available. Attempt to undo any path mapping so we can find remapped | |
1084 | // dependencies. | |
1085 | // We can only use the heuristic because `add_external_src` checks the file | |
1086 | // content hash. | |
1087 | self.path_mapping.reverse_map_prefix_heuristically(virtual_name)?.into() | |
17df50a5 | 1088 | } |
9c376795 FG |
1089 | }; |
1090 | ||
1091 | self.file_loader.read_file(&local_path).ok() | |
dfeec247 | 1092 | }) |
9e0c209e | 1093 | } |
ba9703b0 XL |
1094 | |
1095 | pub fn is_imported(&self, sp: Span) -> bool { | |
1096 | let source_file_index = self.lookup_source_file_idx(sp.lo()); | |
1097 | let source_file = &self.files()[source_file_index]; | |
1098 | source_file.is_imported() | |
1099 | } | |
c295e0f8 XL |
1100 | |
1101 | /// Gets the span of a statement. If the statement is a macro expansion, the | |
1102 | /// span in the context of the block span is found. The trailing semicolon is included | |
1103 | /// on a best-effort basis. | |
1104 | pub fn stmt_span(&self, stmt_span: Span, block_span: Span) -> Span { | |
1105 | if !stmt_span.from_expansion() { | |
1106 | return stmt_span; | |
1107 | } | |
1108 | let mac_call = original_sp(stmt_span, block_span); | |
1109 | self.mac_call_stmt_semi_span(mac_call).map_or(mac_call, |s| mac_call.with_hi(s.hi())) | |
1110 | } | |
1111 | ||
1112 | /// Tries to find the span of the semicolon of a macro call statement. | |
1113 | /// The input must be the *call site* span of a statement from macro expansion. | |
04454e1e FG |
1114 | /// ```ignore (illustrative) |
1115 | /// // v output | |
1116 | /// mac!(); | |
1117 | /// // ^^^^^^ input | |
1118 | /// ``` | |
c295e0f8 XL |
1119 | pub fn mac_call_stmt_semi_span(&self, mac_call: Span) -> Option<Span> { |
1120 | let span = self.span_extend_while(mac_call, char::is_whitespace).ok()?; | |
1121 | let span = span.shrink_to_hi().with_hi(BytePos(span.hi().0.checked_add(1)?)); | |
1122 | if self.span_to_snippet(span).as_deref() != Ok(";") { | |
1123 | return None; | |
1124 | } | |
1125 | Some(span) | |
1126 | } | |
7cac9316 XL |
1127 | } |
1128 | ||
1129 | #[derive(Clone)] | |
1130 | pub struct FilePathMapping { | |
ff7c6d11 | 1131 | mapping: Vec<(PathBuf, PathBuf)>, |
94222f64 | 1132 | filename_display_for_diagnostics: FileNameDisplayPreference, |
7cac9316 XL |
1133 | } |
1134 | ||
1135 | impl FilePathMapping { | |
1136 | pub fn empty() -> FilePathMapping { | |
94222f64 | 1137 | FilePathMapping::new(Vec::new()) |
7cac9316 XL |
1138 | } |
1139 | ||
ff7c6d11 | 1140 | pub fn new(mapping: Vec<(PathBuf, PathBuf)>) -> FilePathMapping { |
94222f64 XL |
1141 | let filename_display_for_diagnostics = if mapping.is_empty() { |
1142 | FileNameDisplayPreference::Local | |
1143 | } else { | |
1144 | FileNameDisplayPreference::Remapped | |
1145 | }; | |
1146 | ||
1147 | FilePathMapping { mapping, filename_display_for_diagnostics } | |
7cac9316 XL |
1148 | } |
1149 | ||
1150 | /// Applies any path prefix substitution as defined by the mapping. | |
1151 | /// The return value is the remapped path and a boolean indicating whether | |
1152 | /// the path was affected by the mapping. | |
9c376795 FG |
1153 | pub fn map_prefix<'a>(&'a self, path: impl Into<Cow<'a, Path>>) -> (Cow<'a, Path>, bool) { |
1154 | let path = path.into(); | |
923072b8 FG |
1155 | if path.as_os_str().is_empty() { |
1156 | // Exit early if the path is empty and therefore there's nothing to remap. | |
1157 | // This is mostly to reduce spam for `RUSTC_LOG=[remap_path_prefix]`. | |
1158 | return (path, false); | |
1159 | } | |
04454e1e | 1160 | |
923072b8 FG |
1161 | return remap_path_prefix(&self.mapping, path); |
1162 | ||
f2b60f7d | 1163 | #[instrument(level = "debug", skip(mapping), ret)] |
9c376795 FG |
1164 | fn remap_path_prefix<'a>( |
1165 | mapping: &'a [(PathBuf, PathBuf)], | |
1166 | path: Cow<'a, Path>, | |
1167 | ) -> (Cow<'a, Path>, bool) { | |
923072b8 FG |
1168 | // NOTE: We are iterating over the mapping entries from last to first |
1169 | // because entries specified later on the command line should | |
1170 | // take precedence. | |
9c376795 | 1171 | for (from, to) in mapping.iter().rev() { |
f2b60f7d | 1172 | debug!("Trying to apply {from:?} => {to:?}"); |
923072b8 FG |
1173 | |
1174 | if let Ok(rest) = path.strip_prefix(from) { | |
1175 | let remapped = if rest.as_os_str().is_empty() { | |
1176 | // This is subtle, joining an empty path onto e.g. `foo/bar` will | |
1177 | // result in `foo/bar/`, that is, there'll be an additional directory | |
1178 | // separator at the end. This can lead to duplicated directory separators | |
1179 | // in remapped paths down the line. | |
1180 | // So, if we have an exact match, we just return that without a call | |
1181 | // to `Path::join()`. | |
9c376795 | 1182 | to.into() |
923072b8 | 1183 | } else { |
9c376795 | 1184 | to.join(rest).into() |
923072b8 | 1185 | }; |
f2b60f7d | 1186 | debug!("Match - remapped"); |
923072b8 FG |
1187 | |
1188 | return (remapped, true); | |
1189 | } else { | |
f2b60f7d | 1190 | debug!("No match - prefix {from:?} does not match"); |
923072b8 | 1191 | } |
7cac9316 | 1192 | } |
7cac9316 | 1193 | |
f2b60f7d | 1194 | debug!("not remapped"); |
923072b8 FG |
1195 | (path, false) |
1196 | } | |
7cac9316 | 1197 | } |
5869c6ff XL |
1198 | |
1199 | fn map_filename_prefix(&self, file: &FileName) -> (FileName, bool) { | |
1200 | match file { | |
94222f64 | 1201 | FileName::Real(realfile) if let RealFileName::LocalPath(local_path) = realfile => { |
9c376795 | 1202 | let (mapped_path, mapped) = self.map_prefix(local_path); |
94222f64 XL |
1203 | let realfile = if mapped { |
1204 | RealFileName::Remapped { | |
1205 | local_path: Some(local_path.clone()), | |
9c376795 | 1206 | virtual_name: mapped_path.into_owned(), |
94222f64 | 1207 | } |
17df50a5 | 1208 | } else { |
94222f64 XL |
1209 | realfile.clone() |
1210 | }; | |
1211 | (FileName::Real(realfile), mapped) | |
5869c6ff | 1212 | } |
94222f64 | 1213 | FileName::Real(_) => unreachable!("attempted to remap an already remapped filename"), |
5869c6ff XL |
1214 | other => (other.clone(), false), |
1215 | } | |
1216 | } | |
923072b8 FG |
1217 | |
1218 | /// Expand a relative path to an absolute path with remapping taken into account. | |
1219 | /// Use this when absolute paths are required (e.g. debuginfo or crate metadata). | |
1220 | /// | |
1221 | /// The resulting `RealFileName` will have its `local_path` portion erased if | |
1222 | /// possible (i.e. if there's also a remapped path). | |
1223 | pub fn to_embeddable_absolute_path( | |
1224 | &self, | |
1225 | file_path: RealFileName, | |
1226 | working_directory: &RealFileName, | |
1227 | ) -> RealFileName { | |
1228 | match file_path { | |
1229 | // Anything that's already remapped we don't modify, except for erasing | |
1230 | // the `local_path` portion. | |
1231 | RealFileName::Remapped { local_path: _, virtual_name } => { | |
1232 | RealFileName::Remapped { | |
1233 | // We do not want any local path to be exported into metadata | |
1234 | local_path: None, | |
1235 | // We use the remapped name verbatim, even if it looks like a relative | |
1236 | // path. The assumption is that the user doesn't want us to further | |
1237 | // process paths that have gone through remapping. | |
1238 | virtual_name, | |
1239 | } | |
1240 | } | |
1241 | ||
1242 | RealFileName::LocalPath(unmapped_file_path) => { | |
1243 | // If no remapping has been applied yet, try to do so | |
1244 | let (new_path, was_remapped) = self.map_prefix(unmapped_file_path); | |
1245 | if was_remapped { | |
1246 | // It was remapped, so don't modify further | |
9c376795 FG |
1247 | return RealFileName::Remapped { |
1248 | local_path: None, | |
1249 | virtual_name: new_path.into_owned(), | |
1250 | }; | |
923072b8 FG |
1251 | } |
1252 | ||
1253 | if new_path.is_absolute() { | |
1254 | // No remapping has applied to this path and it is absolute, | |
1255 | // so the working directory cannot influence it either, so | |
1256 | // we are done. | |
9c376795 | 1257 | return RealFileName::LocalPath(new_path.into_owned()); |
923072b8 FG |
1258 | } |
1259 | ||
1260 | debug_assert!(new_path.is_relative()); | |
1261 | let unmapped_file_path_rel = new_path; | |
1262 | ||
1263 | match working_directory { | |
1264 | RealFileName::LocalPath(unmapped_working_dir_abs) => { | |
1265 | let file_path_abs = unmapped_working_dir_abs.join(unmapped_file_path_rel); | |
1266 | ||
1267 | // Although neither `working_directory` nor the file name were subject | |
1268 | // to path remapping, the concatenation between the two may be. Hence | |
1269 | // we need to do a remapping here. | |
1270 | let (file_path_abs, was_remapped) = self.map_prefix(file_path_abs); | |
1271 | if was_remapped { | |
1272 | RealFileName::Remapped { | |
1273 | // Erase the actual path | |
1274 | local_path: None, | |
9c376795 | 1275 | virtual_name: file_path_abs.into_owned(), |
923072b8 FG |
1276 | } |
1277 | } else { | |
1278 | // No kind of remapping applied to this path, so | |
1279 | // we leave it as it is. | |
9c376795 | 1280 | RealFileName::LocalPath(file_path_abs.into_owned()) |
923072b8 FG |
1281 | } |
1282 | } | |
1283 | RealFileName::Remapped { | |
1284 | local_path: _, | |
1285 | virtual_name: remapped_working_dir_abs, | |
1286 | } => { | |
1287 | // If working_directory has been remapped, then we emit | |
1288 | // Remapped variant as the expanded path won't be valid | |
1289 | RealFileName::Remapped { | |
1290 | local_path: None, | |
1291 | virtual_name: Path::new(remapped_working_dir_abs) | |
1292 | .join(unmapped_file_path_rel), | |
1293 | } | |
1294 | } | |
1295 | } | |
1296 | } | |
1297 | } | |
1298 | } | |
9c376795 FG |
1299 | |
1300 | /// Attempts to (heuristically) reverse a prefix mapping. | |
1301 | /// | |
1302 | /// Returns [`Some`] if there is exactly one mapping where the "to" part is | |
1303 | /// a prefix of `path` and has at least one non-empty | |
1304 | /// [`Normal`](path::Component::Normal) component. The component | |
1305 | /// restriction exists to avoid reverse mapping overly generic paths like | |
1306 | /// `/` or `.`). | |
1307 | /// | |
1308 | /// This is a heuristic and not guaranteed to return the actual original | |
1309 | /// path! Do not rely on the result unless you have other means to verify | |
1310 | /// that the mapping is correct (e.g. by checking the file content hash). | |
1311 | #[instrument(level = "debug", skip(self), ret)] | |
1312 | fn reverse_map_prefix_heuristically(&self, path: &Path) -> Option<PathBuf> { | |
1313 | let mut found = None; | |
1314 | ||
1315 | for (from, to) in self.mapping.iter() { | |
1316 | let has_normal_component = to.components().any(|c| match c { | |
1317 | path::Component::Normal(s) => !s.is_empty(), | |
1318 | _ => false, | |
1319 | }); | |
1320 | ||
1321 | if !has_normal_component { | |
1322 | continue; | |
1323 | } | |
1324 | ||
1325 | let Ok(rest) = path.strip_prefix(to) else { | |
1326 | continue; | |
1327 | }; | |
1328 | ||
1329 | if found.is_some() { | |
1330 | return None; | |
1331 | } | |
1332 | ||
1333 | found = Some(from.join(rest)); | |
1334 | } | |
1335 | ||
1336 | found | |
1337 | } | |
85aaf69f | 1338 | } |