]> git.proxmox.com Git - rustc.git/blob - src/tools/rust-analyzer/crates/ide-db/src/line_index.rs
New upstream version 1.67.1+dfsg1
[rustc.git] / src / tools / rust-analyzer / crates / ide-db / src / line_index.rs
1 //! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
2 //! representation.
3 use std::{iter, mem};
4
5 use stdx::hash::NoHashHashMap;
6 use syntax::{TextRange, TextSize};
7
8 #[derive(Clone, Debug, PartialEq, Eq)]
9 pub struct LineIndex {
10 /// Offset the the beginning of each line, zero-based
11 pub(crate) newlines: Vec<TextSize>,
12 /// List of non-ASCII characters on each line
13 pub(crate) utf16_lines: NoHashHashMap<u32, Vec<Utf16Char>>,
14 }
15
16 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
17 pub struct LineColUtf16 {
18 /// Zero-based
19 pub line: u32,
20 /// Zero-based
21 pub col: u32,
22 }
23
24 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
25 pub struct LineCol {
26 /// Zero-based
27 pub line: u32,
28 /// Zero-based utf8 offset
29 pub col: u32,
30 }
31
32 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
33 pub(crate) struct Utf16Char {
34 /// Start offset of a character inside a line, zero-based
35 pub(crate) start: TextSize,
36 /// End offset of a character inside a line, zero-based
37 pub(crate) end: TextSize,
38 }
39
40 impl Utf16Char {
41 /// Returns the length in 8-bit UTF-8 code units.
42 fn len(&self) -> TextSize {
43 self.end - self.start
44 }
45
46 /// Returns the length in 16-bit UTF-16 code units.
47 fn len_utf16(&self) -> usize {
48 if self.len() == TextSize::from(4) {
49 2
50 } else {
51 1
52 }
53 }
54 }
55
56 impl LineIndex {
57 pub fn new(text: &str) -> LineIndex {
58 let mut utf16_lines = NoHashHashMap::default();
59 let mut utf16_chars = Vec::new();
60
61 let mut newlines = Vec::with_capacity(16);
62 newlines.push(TextSize::from(0));
63
64 let mut curr_row = 0.into();
65 let mut curr_col = 0.into();
66 let mut line = 0;
67 for c in text.chars() {
68 let c_len = TextSize::of(c);
69 curr_row += c_len;
70 if c == '\n' {
71 newlines.push(curr_row);
72
73 // Save any utf-16 characters seen in the previous line
74 if !utf16_chars.is_empty() {
75 utf16_lines.insert(line, mem::take(&mut utf16_chars));
76 }
77
78 // Prepare for processing the next line
79 curr_col = 0.into();
80 line += 1;
81 continue;
82 }
83
84 if !c.is_ascii() {
85 utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len });
86 }
87
88 curr_col += c_len;
89 }
90
91 // Save any utf-16 characters seen in the last line
92 if !utf16_chars.is_empty() {
93 utf16_lines.insert(line, utf16_chars);
94 }
95
96 LineIndex { newlines, utf16_lines }
97 }
98
99 pub fn line_col(&self, offset: TextSize) -> LineCol {
100 let line = self.newlines.partition_point(|&it| it <= offset) - 1;
101 let line_start_offset = self.newlines[line];
102 let col = offset - line_start_offset;
103 LineCol { line: line as u32, col: col.into() }
104 }
105
106 pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
107 self.newlines
108 .get(line_col.line as usize)
109 .map(|offset| offset + TextSize::from(line_col.col))
110 }
111
112 pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
113 let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
114 LineColUtf16 { line: line_col.line, col: col as u32 }
115 }
116
117 pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
118 let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
119 LineCol { line: line_col.line, col: col.into() }
120 }
121
122 pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
123 let lo = self.newlines.partition_point(|&it| it < range.start());
124 let hi = self.newlines.partition_point(|&it| it <= range.end());
125 let all = iter::once(range.start())
126 .chain(self.newlines[lo..hi].iter().copied())
127 .chain(iter::once(range.end()));
128
129 all.clone()
130 .zip(all.skip(1))
131 .map(|(lo, hi)| TextRange::new(lo, hi))
132 .filter(|it| !it.is_empty())
133 }
134
135 fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
136 let mut res: usize = col.into();
137 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
138 for c in utf16_chars {
139 if c.end <= col {
140 res -= usize::from(c.len()) - c.len_utf16();
141 } else {
142 // From here on, all utf16 characters come *after* the character we are mapping,
143 // so we don't need to take them into account
144 break;
145 }
146 }
147 }
148 res
149 }
150
151 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
152 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
153 for c in utf16_chars {
154 if col > u32::from(c.start) {
155 col += u32::from(c.len()) - c.len_utf16() as u32;
156 } else {
157 // From here on, all utf16 characters come *after* the character we are mapping,
158 // so we don't need to take them into account
159 break;
160 }
161 }
162 }
163
164 col.into()
165 }
166 }
167
168 #[cfg(test)]
169 mod tests {
170 use super::*;
171
172 #[test]
173 fn test_line_index() {
174 let text = "hello\nworld";
175 let table = [
176 (00, 0, 0),
177 (01, 0, 1),
178 (05, 0, 5),
179 (06, 1, 0),
180 (07, 1, 1),
181 (08, 1, 2),
182 (10, 1, 4),
183 (11, 1, 5),
184 (12, 1, 6),
185 ];
186
187 let index = LineIndex::new(text);
188 for &(offset, line, col) in &table {
189 assert_eq!(index.line_col(offset.into()), LineCol { line, col });
190 }
191
192 let text = "\nhello\nworld";
193 let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
194 let index = LineIndex::new(text);
195 for &(offset, line, col) in &table {
196 assert_eq!(index.line_col(offset.into()), LineCol { line, col });
197 }
198 }
199
200 #[test]
201 fn test_char_len() {
202 assert_eq!('メ'.len_utf8(), 3);
203 assert_eq!('メ'.len_utf16(), 1);
204 }
205
206 #[test]
207 fn test_empty_index() {
208 let col_index = LineIndex::new(
209 "
210 const C: char = 'x';
211 ",
212 );
213 assert_eq!(col_index.utf16_lines.len(), 0);
214 }
215
216 #[test]
217 fn test_single_char() {
218 let col_index = LineIndex::new(
219 "
220 const C: char = 'メ';
221 ",
222 );
223
224 assert_eq!(col_index.utf16_lines.len(), 1);
225 assert_eq!(col_index.utf16_lines[&1].len(), 1);
226 assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
227
228 // UTF-8 to UTF-16, no changes
229 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
230
231 // UTF-8 to UTF-16
232 assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
233
234 // UTF-16 to UTF-8, no changes
235 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
236
237 // UTF-16 to UTF-8
238 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
239
240 let col_index = LineIndex::new("a𐐏b");
241 assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
242 }
243
244 #[test]
245 fn test_string() {
246 let col_index = LineIndex::new(
247 "
248 const C: char = \"メ メ\";
249 ",
250 );
251
252 assert_eq!(col_index.utf16_lines.len(), 1);
253 assert_eq!(col_index.utf16_lines[&1].len(), 2);
254 assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
255 assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() });
256
257 // UTF-8 to UTF-16
258 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
259
260 assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
261 assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
262
263 assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
264
265 // UTF-16 to UTF-8
266 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
267
268 // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
269 assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
270 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
271 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
272
273 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
274 }
275
276 #[test]
277 fn test_splitlines() {
278 fn r(lo: u32, hi: u32) -> TextRange {
279 TextRange::new(lo.into(), hi.into())
280 }
281
282 let text = "a\nbb\nccc\n";
283 let line_index = LineIndex::new(text);
284
285 let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
286 let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
287 assert_eq!(actual, expected);
288
289 let text = "";
290 let line_index = LineIndex::new(text);
291
292 let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
293 let expected = vec![];
294 assert_eq!(actual, expected);
295
296 let text = "\n";
297 let line_index = LineIndex::new(text);
298
299 let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
300 let expected = vec![r(0, 1)];
301 assert_eq!(actual, expected)
302 }
303 }