]> git.proxmox.com Git - rustc.git/blob - src/tools/rust-analyzer/crates/rust-analyzer/src/line_index.rs
New upstream version 1.67.1+dfsg1
[rustc.git] / src / tools / rust-analyzer / crates / rust-analyzer / src / line_index.rs
1 //! Enhances `ide::LineIndex` with additional info required to convert offsets
2 //! into lsp positions.
3 //!
4 //! We maintain invariant that all internal strings use `\n` as line separator.
5 //! This module does line ending conversion and detection (so that we can
6 //! convert back to `\r\n` on the way out).
7
8 use std::sync::Arc;
9
10 pub enum PositionEncoding {
11 Utf8,
12 Utf16,
13 }
14
15 pub(crate) struct LineIndex {
16 pub(crate) index: Arc<ide::LineIndex>,
17 pub(crate) endings: LineEndings,
18 pub(crate) encoding: PositionEncoding,
19 }
20
21 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
22 pub(crate) enum LineEndings {
23 Unix,
24 Dos,
25 }
26
27 impl LineEndings {
28 /// Replaces `\r\n` with `\n` in-place in `src`.
29 pub(crate) fn normalize(src: String) -> (String, LineEndings) {
30 // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
31 // While we *can* call `as_mut_vec` and do surgery on the live string
32 // directly, let's rather steal the contents of `src`. This makes the code
33 // safe even if a panic occurs.
34
35 let mut buf = src.into_bytes();
36 let mut gap_len = 0;
37 let mut tail = buf.as_mut_slice();
38 let mut crlf_seen = false;
39
40 let find_crlf = |src: &[u8]| src.windows(2).position(|it| it == b"\r\n");
41
42 loop {
43 let idx = match find_crlf(&tail[gap_len..]) {
44 None if crlf_seen => tail.len(),
45 // SAFETY: buf is unchanged and therefore still contains utf8 data
46 None => return (unsafe { String::from_utf8_unchecked(buf) }, LineEndings::Unix),
47 Some(idx) => {
48 crlf_seen = true;
49 idx + gap_len
50 }
51 };
52 tail.copy_within(gap_len..idx, 0);
53 tail = &mut tail[idx - gap_len..];
54 if tail.len() == gap_len {
55 break;
56 }
57 gap_len += 1;
58 }
59
60 // Account for removed `\r`.
61 // After `set_len`, `buf` is guaranteed to contain utf-8 again.
62 let src = unsafe {
63 let new_len = buf.len() - gap_len;
64 buf.set_len(new_len);
65 String::from_utf8_unchecked(buf)
66 };
67 (src, LineEndings::Dos)
68 }
69 }
70
71 #[cfg(test)]
72 mod tests {
73 use super::*;
74
75 #[test]
76 fn unix() {
77 let src = "a\nb\nc\n\n\n\n";
78 let (res, endings) = LineEndings::normalize(src.into());
79 assert_eq!(endings, LineEndings::Unix);
80 assert_eq!(res, src);
81 }
82
83 #[test]
84 fn dos() {
85 let src = "\r\na\r\n\r\nb\r\nc\r\n\r\n\r\n\r\n";
86 let (res, endings) = LineEndings::normalize(src.into());
87 assert_eq!(endings, LineEndings::Dos);
88 assert_eq!(res, "\na\n\nb\nc\n\n\n\n");
89 }
90
91 #[test]
92 fn mixed() {
93 let src = "a\r\nb\r\nc\r\n\n\r\n\n";
94 let (res, endings) = LineEndings::normalize(src.into());
95 assert_eq!(endings, LineEndings::Dos);
96 assert_eq!(res, "a\nb\nc\n\n\n\n");
97 }
98
99 #[test]
100 fn none() {
101 let src = "abc";
102 let (res, endings) = LineEndings::normalize(src.into());
103 assert_eq!(endings, LineEndings::Unix);
104 assert_eq!(res, src);
105 }
106 }