]> git.proxmox.com Git - rustc.git/blob - src/tools/rust-analyzer/crates/ide-db/src/line_index.rs
New upstream version 1.65.0+dfsg1
[rustc.git] / src / tools / rust-analyzer / crates / ide-db / src / line_index.rs
1 //! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
2 //! representation.
3 use std::{iter, mem};
4
5 use stdx::hash::NoHashHashMap;
6 use syntax::{TextRange, TextSize};
7
8 #[derive(Clone, Debug, PartialEq, Eq)]
9 pub struct LineIndex {
10 /// Offset the the beginning of each line, zero-based
11 pub(crate) newlines: Vec<TextSize>,
12 /// List of non-ASCII characters on each line
13 pub(crate) utf16_lines: NoHashHashMap<u32, Vec<Utf16Char>>,
14 }
15
16 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
17 pub struct LineColUtf16 {
18 /// Zero-based
19 pub line: u32,
20 /// Zero-based
21 pub col: u32,
22 }
23
24 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
25 pub struct LineCol {
26 /// Zero-based
27 pub line: u32,
28 /// Zero-based utf8 offset
29 pub col: u32,
30 }
31
32 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
33 pub(crate) struct Utf16Char {
34 /// Start offset of a character inside a line, zero-based
35 pub(crate) start: TextSize,
36 /// End offset of a character inside a line, zero-based
37 pub(crate) end: TextSize,
38 }
39
40 impl Utf16Char {
41 /// Returns the length in 8-bit UTF-8 code units.
42 fn len(&self) -> TextSize {
43 self.end - self.start
44 }
45
46 /// Returns the length in 16-bit UTF-16 code units.
47 fn len_utf16(&self) -> usize {
48 if self.len() == TextSize::from(4) {
49 2
50 } else {
51 1
52 }
53 }
54 }
55
56 impl LineIndex {
57 pub fn new(text: &str) -> LineIndex {
58 let mut utf16_lines = NoHashHashMap::default();
59 let mut utf16_chars = Vec::new();
60
61 let mut newlines = vec![0.into()];
62 let mut curr_row @ mut curr_col = 0.into();
63 let mut line = 0;
64 for c in text.chars() {
65 let c_len = TextSize::of(c);
66 curr_row += c_len;
67 if c == '\n' {
68 newlines.push(curr_row);
69
70 // Save any utf-16 characters seen in the previous line
71 if !utf16_chars.is_empty() {
72 utf16_lines.insert(line, mem::take(&mut utf16_chars));
73 }
74
75 // Prepare for processing the next line
76 curr_col = 0.into();
77 line += 1;
78 continue;
79 }
80
81 if !c.is_ascii() {
82 utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len });
83 }
84
85 curr_col += c_len;
86 }
87
88 // Save any utf-16 characters seen in the last line
89 if !utf16_chars.is_empty() {
90 utf16_lines.insert(line, utf16_chars);
91 }
92
93 LineIndex { newlines, utf16_lines }
94 }
95
96 pub fn line_col(&self, offset: TextSize) -> LineCol {
97 let line = self.newlines.partition_point(|&it| it <= offset) - 1;
98 let line_start_offset = self.newlines[line];
99 let col = offset - line_start_offset;
100 LineCol { line: line as u32, col: col.into() }
101 }
102
103 pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
104 self.newlines
105 .get(line_col.line as usize)
106 .map(|offset| offset + TextSize::from(line_col.col))
107 }
108
109 pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
110 let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
111 LineColUtf16 { line: line_col.line, col: col as u32 }
112 }
113
114 pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
115 let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
116 LineCol { line: line_col.line, col: col.into() }
117 }
118
119 pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
120 let lo = self.newlines.partition_point(|&it| it < range.start());
121 let hi = self.newlines.partition_point(|&it| it <= range.end());
122 let all = iter::once(range.start())
123 .chain(self.newlines[lo..hi].iter().copied())
124 .chain(iter::once(range.end()));
125
126 all.clone()
127 .zip(all.skip(1))
128 .map(|(lo, hi)| TextRange::new(lo, hi))
129 .filter(|it| !it.is_empty())
130 }
131
132 fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
133 let mut res: usize = col.into();
134 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
135 for c in utf16_chars {
136 if c.end <= col {
137 res -= usize::from(c.len()) - c.len_utf16();
138 } else {
139 // From here on, all utf16 characters come *after* the character we are mapping,
140 // so we don't need to take them into account
141 break;
142 }
143 }
144 }
145 res
146 }
147
148 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
149 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
150 for c in utf16_chars {
151 if col > u32::from(c.start) {
152 col += u32::from(c.len()) - c.len_utf16() as u32;
153 } else {
154 // From here on, all utf16 characters come *after* the character we are mapping,
155 // so we don't need to take them into account
156 break;
157 }
158 }
159 }
160
161 col.into()
162 }
163 }
164
165 #[cfg(test)]
166 mod tests {
167 use super::*;
168
169 #[test]
170 fn test_line_index() {
171 let text = "hello\nworld";
172 let table = [
173 (00, 0, 0),
174 (01, 0, 1),
175 (05, 0, 5),
176 (06, 1, 0),
177 (07, 1, 1),
178 (08, 1, 2),
179 (10, 1, 4),
180 (11, 1, 5),
181 (12, 1, 6),
182 ];
183
184 let index = LineIndex::new(text);
185 for &(offset, line, col) in &table {
186 assert_eq!(index.line_col(offset.into()), LineCol { line, col });
187 }
188
189 let text = "\nhello\nworld";
190 let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
191 let index = LineIndex::new(text);
192 for &(offset, line, col) in &table {
193 assert_eq!(index.line_col(offset.into()), LineCol { line, col });
194 }
195 }
196
197 #[test]
198 fn test_char_len() {
199 assert_eq!('メ'.len_utf8(), 3);
200 assert_eq!('メ'.len_utf16(), 1);
201 }
202
203 #[test]
204 fn test_empty_index() {
205 let col_index = LineIndex::new(
206 "
207 const C: char = 'x';
208 ",
209 );
210 assert_eq!(col_index.utf16_lines.len(), 0);
211 }
212
213 #[test]
214 fn test_single_char() {
215 let col_index = LineIndex::new(
216 "
217 const C: char = 'メ';
218 ",
219 );
220
221 assert_eq!(col_index.utf16_lines.len(), 1);
222 assert_eq!(col_index.utf16_lines[&1].len(), 1);
223 assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
224
225 // UTF-8 to UTF-16, no changes
226 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
227
228 // UTF-8 to UTF-16
229 assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
230
231 // UTF-16 to UTF-8, no changes
232 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
233
234 // UTF-16 to UTF-8
235 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
236
237 let col_index = LineIndex::new("a𐐏b");
238 assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
239 }
240
241 #[test]
242 fn test_string() {
243 let col_index = LineIndex::new(
244 "
245 const C: char = \"メ メ\";
246 ",
247 );
248
249 assert_eq!(col_index.utf16_lines.len(), 1);
250 assert_eq!(col_index.utf16_lines[&1].len(), 2);
251 assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
252 assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() });
253
254 // UTF-8 to UTF-16
255 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
256
257 assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
258 assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
259
260 assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
261
262 // UTF-16 to UTF-8
263 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
264
265 // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
266 assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
267 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
268 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
269
270 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
271 }
272
273 #[test]
274 fn test_splitlines() {
275 fn r(lo: u32, hi: u32) -> TextRange {
276 TextRange::new(lo.into(), hi.into())
277 }
278
279 let text = "a\nbb\nccc\n";
280 let line_index = LineIndex::new(text);
281
282 let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
283 let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
284 assert_eq!(actual, expected);
285
286 let text = "";
287 let line_index = LineIndex::new(text);
288
289 let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
290 let expected = vec![];
291 assert_eq!(actual, expected);
292
293 let text = "\n";
294 let line_index = LineIndex::new(text);
295
296 let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
297 let expected = vec![r(0, 1)];
298 assert_eq!(actual, expected)
299 }
300 }