]> git.proxmox.com Git - rustc.git/blob - src/libsyntax/codemap.rs
Imported Upstream version 0.6
[rustc.git] / src / libsyntax / codemap.rs
1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 /*!
12
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
21
22 */
23
24 use core::prelude::*;
25
26 use core::cmp;
27 use core::str;
28 use core::to_bytes;
29 use core::uint;
30 use std::serialize::{Encodable, Decodable, Encoder, Decoder};
31
32 pub trait Pos {
33 fn from_uint(n: uint) -> Self;
34 fn to_uint(&self) -> uint;
35 }
36
37 /// A byte offset
38 #[deriving(Eq)]
39 pub struct BytePos(uint);
40 /// A character offset. Because of multibyte utf8 characters, a byte offset
41 /// is not equivalent to a character offset. The CodeMap will convert BytePos
42 /// values to CharPos values as necessary.
43 #[deriving(Eq)]
44 pub struct CharPos(uint);
45
46 // XXX: Lots of boilerplate in these impls, but so far my attempts to fix
47 // have been unsuccessful
48
49 impl Pos for BytePos {
50 fn from_uint(n: uint) -> BytePos { BytePos(n) }
51 fn to_uint(&self) -> uint { **self }
52 }
53
54 impl cmp::Ord for BytePos {
55 fn lt(&self, other: &BytePos) -> bool { **self < **other }
56 fn le(&self, other: &BytePos) -> bool { **self <= **other }
57 fn ge(&self, other: &BytePos) -> bool { **self >= **other }
58 fn gt(&self, other: &BytePos) -> bool { **self > **other }
59 }
60
61 impl Add<BytePos, BytePos> for BytePos {
62 fn add(&self, rhs: &BytePos) -> BytePos {
63 BytePos(**self + **rhs)
64 }
65 }
66
67 impl Sub<BytePos, BytePos> for BytePos {
68 fn sub(&self, rhs: &BytePos) -> BytePos {
69 BytePos(**self - **rhs)
70 }
71 }
72
73 impl to_bytes::IterBytes for BytePos {
74 fn iter_bytes(&self, +lsb0: bool, &&f: to_bytes::Cb) {
75 (**self).iter_bytes(lsb0, f)
76 }
77 }
78
79 impl Pos for CharPos {
80 fn from_uint(n: uint) -> CharPos { CharPos(n) }
81 fn to_uint(&self) -> uint { **self }
82 }
83
84 impl cmp::Ord for CharPos {
85 fn lt(&self, other: &CharPos) -> bool { **self < **other }
86 fn le(&self, other: &CharPos) -> bool { **self <= **other }
87 fn ge(&self, other: &CharPos) -> bool { **self >= **other }
88 fn gt(&self, other: &CharPos) -> bool { **self > **other }
89 }
90
91 impl to_bytes::IterBytes for CharPos {
92 fn iter_bytes(&self, +lsb0: bool, &&f: to_bytes::Cb) {
93 (**self).iter_bytes(lsb0, f)
94 }
95 }
96
97 impl Add<CharPos,CharPos> for CharPos {
98 fn add(&self, rhs: &CharPos) -> CharPos {
99 CharPos(**self + **rhs)
100 }
101 }
102
103 impl Sub<CharPos,CharPos> for CharPos {
104 fn sub(&self, rhs: &CharPos) -> CharPos {
105 CharPos(**self - **rhs)
106 }
107 }
108
109 /**
110 Spans represent a region of code, used for error reporting. Positions in spans
111 are *absolute* positions from the beginning of the codemap, not positions
112 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
113 to the original source.
114 */
115 pub struct span {
116 lo: BytePos,
117 hi: BytePos,
118 expn_info: Option<@ExpnInfo>
119 }
120
121 #[auto_encode]
122 #[auto_decode]
123 #[deriving(Eq)]
124 pub struct spanned<T> { node: T, span: span }
125
126 impl cmp::Eq for span {
127 fn eq(&self, other: &span) -> bool {
128 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
129 }
130 fn ne(&self, other: &span) -> bool { !(*self).eq(other) }
131 }
132
133 impl<S:Encoder> Encodable<S> for span {
134 /* Note #1972 -- spans are encoded but not decoded */
135 fn encode(&self, _s: &S) { _s.emit_nil() }
136 }
137
138 impl<D:Decoder> Decodable<D> for span {
139 fn decode(_d: &D) -> span {
140 dummy_sp()
141 }
142 }
143
144 pub fn spanned<T>(+lo: BytePos, +hi: BytePos, +t: T) -> spanned<T> {
145 respan(mk_sp(lo, hi), t)
146 }
147
148 pub fn respan<T>(sp: span, +t: T) -> spanned<T> {
149 spanned {node: t, span: sp}
150 }
151
152 pub fn dummy_spanned<T>(+t: T) -> spanned<T> {
153 respan(dummy_sp(), t)
154 }
155
156 /* assuming that we're not in macro expansion */
157 pub fn mk_sp(+lo: BytePos, +hi: BytePos) -> span {
158 span {lo: lo, hi: hi, expn_info: None}
159 }
160
161 // make this a const, once the compiler supports it
162 pub fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }
163
164
165
166 /// A source code location used for error reporting
167 pub struct Loc {
168 /// Information about the original source
169 file: @FileMap,
170 /// The (1-based) line number
171 line: uint,
172 /// The (0-based) column offset
173 col: CharPos
174 }
175
176 /// A source code location used as the result of lookup_char_pos_adj
177 // Actually, *none* of the clients use the filename *or* file field;
178 // perhaps they should just be removed.
179 pub struct LocWithOpt {
180 filename: ~str,
181 line: uint,
182 col: CharPos,
183 file: Option<@FileMap>,
184 }
185
186 // used to be structural records. Better names, anyone?
187 pub struct FileMapAndLine {fm: @FileMap, line: uint}
188 pub struct FileMapAndBytePos {fm: @FileMap, pos: BytePos}
189 pub struct NameAndSpan {name: ~str, span: Option<span>}
190
191 pub struct CallInfo {
192 call_site: span,
193 callee: NameAndSpan
194 }
195
196 /// Extra information for tracking macro expansion of spans
197 pub enum ExpnInfo {
198 ExpandedFrom(CallInfo)
199 }
200
201 pub type FileName = ~str;
202
203 pub struct FileLines
204 {
205 file: @FileMap,
206 lines: ~[uint]
207 }
208
209 pub enum FileSubstr {
210 pub FssNone,
211 pub FssInternal(span),
212 }
213
214 /// Identifies an offset of a multi-byte character in a FileMap
215 pub struct MultiByteChar {
216 /// The absolute offset of the character in the CodeMap
217 pos: BytePos,
218 /// The number of bytes, >=2
219 bytes: uint,
220 }
221
222 /// A single source in the CodeMap
223 pub struct FileMap {
224 /// The name of the file that the source came from, source that doesn't
225 /// originate from files has names between angle brackets by convention,
226 /// e.g. `<anon>`
227 name: FileName,
228 /// Extra information used by qquote
229 substr: FileSubstr,
230 /// The complete source code
231 src: @~str,
232 /// The start position of this source in the CodeMap
233 start_pos: BytePos,
234 /// Locations of lines beginnings in the source code
235 lines: @mut ~[BytePos],
236 /// Locations of multi-byte characters in the source code
237 multibyte_chars: @mut ~[MultiByteChar],
238 }
239
240 pub impl FileMap {
241 // EFFECT: register a start-of-line offset in the
242 // table of line-beginnings.
243 // UNCHECKED INVARIANT: these offsets must be added in the right
244 // order and must be in the right places; there is shared knowledge
245 // about what ends a line between this file and parse.rs
246 fn next_line(&self, +pos: BytePos) {
247 // the new charpos must be > the last one (or it's the first one).
248 let lines = &mut *self.lines;
249 assert!((lines.len() == 0) || (lines[lines.len() - 1] < pos));
250 self.lines.push(pos);
251 }
252
253 // get a line from the list of pre-computed line-beginnings
254 pub fn get_line(&self, line: int) -> ~str {
255 unsafe {
256 let begin: BytePos = self.lines[line] - self.start_pos;
257 let begin = begin.to_uint();
258 let end = match str::find_char_from(*self.src, '\n', begin) {
259 Some(e) => e,
260 None => str::len(*self.src)
261 };
262 str::slice(*self.src, begin, end).to_owned()
263 }
264 }
265
266 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
267 assert!(bytes >=2 && bytes <= 4);
268 let mbc = MultiByteChar {
269 pos: pos,
270 bytes: bytes,
271 };
272 self.multibyte_chars.push(mbc);
273 }
274 }
275
276 pub struct CodeMap {
277 files: @mut ~[@FileMap]
278 }
279
280 pub impl CodeMap {
281 pub fn new() -> CodeMap {
282 CodeMap {
283 files: @mut ~[],
284 }
285 }
286
287 /// Add a new FileMap to the CodeMap and return it
288 fn new_filemap(&self, +filename: FileName, src: @~str) -> @FileMap {
289 return self.new_filemap_w_substr(filename, FssNone, src);
290 }
291
292 fn new_filemap_w_substr(
293 &self,
294 +filename: FileName,
295 +substr: FileSubstr,
296 src: @~str
297 ) -> @FileMap {
298 let files = &mut *self.files;
299 let start_pos = if files.len() == 0 {
300 0
301 } else {
302 let last_start = files.last().start_pos.to_uint();
303 let last_len = files.last().src.len();
304 last_start + last_len
305 };
306
307 let filemap = @FileMap {
308 name: filename, substr: substr, src: src,
309 start_pos: BytePos(start_pos),
310 lines: @mut ~[],
311 multibyte_chars: @mut ~[],
312 };
313
314 self.files.push(filemap);
315
316 return filemap;
317 }
318
319 pub fn mk_substr_filename(&self, sp: span) -> ~str {
320 let pos = self.lookup_char_pos(sp.lo);
321 return fmt!("<%s:%u:%u>", pos.file.name,
322 pos.line, pos.col.to_uint());
323 }
324
325 /// Lookup source information about a BytePos
326 pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
327 return self.lookup_pos(pos);
328 }
329
330 pub fn lookup_char_pos_adj(&self, +pos: BytePos) -> LocWithOpt
331 {
332 let loc = self.lookup_char_pos(pos);
333 match (loc.file.substr) {
334 FssNone =>
335 LocWithOpt {
336 filename: /* FIXME (#2543) */ copy loc.file.name,
337 line: loc.line,
338 col: loc.col,
339 file: Some(loc.file)},
340 FssInternal(sp) =>
341 self.lookup_char_pos_adj(
342 sp.lo + (pos - loc.file.start_pos)),
343 }
344 }
345
346 pub fn adjust_span(&self, sp: span) -> span {
347 let line = self.lookup_line(sp.lo);
348 match (line.fm.substr) {
349 FssNone => sp,
350 FssInternal(s) => {
351 self.adjust_span(span {
352 lo: s.lo + (sp.lo - line.fm.start_pos),
353 hi: s.lo + (sp.hi - line.fm.start_pos),
354 expn_info: sp.expn_info
355 })
356 }
357 }
358 }
359
360 pub fn span_to_str(&self, sp: span) -> ~str {
361 let files = &mut *self.files;
362 if files.len() == 0 && sp == dummy_sp() {
363 return ~"no-location";
364 }
365
366 let lo = self.lookup_char_pos_adj(sp.lo);
367 let hi = self.lookup_char_pos_adj(sp.hi);
368 return fmt!("%s:%u:%u: %u:%u", lo.filename,
369 lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
370 }
371
372 pub fn span_to_filename(&self, sp: span) -> FileName {
373 let lo = self.lookup_char_pos(sp.lo);
374 return /* FIXME (#2543) */ copy lo.file.name;
375 }
376
377 pub fn span_to_lines(&self, sp: span) -> @FileLines {
378 let lo = self.lookup_char_pos(sp.lo);
379 let hi = self.lookup_char_pos(sp.hi);
380 let mut lines = ~[];
381 for uint::range(lo.line - 1u, hi.line as uint) |i| {
382 lines.push(i);
383 };
384 return @FileLines {file: lo.file, lines: lines};
385 }
386
387 pub fn span_to_snippet(&self, sp: span) -> ~str {
388 let begin = self.lookup_byte_offset(sp.lo);
389 let end = self.lookup_byte_offset(sp.hi);
390 assert!(begin.fm.start_pos == end.fm.start_pos);
391 return str::slice(*begin.fm.src,
392 begin.pos.to_uint(), end.pos.to_uint()).to_owned();
393 }
394
395 pub fn get_filemap(&self, filename: ~str) -> @FileMap {
396 for self.files.each |fm| { if fm.name == filename { return *fm; } }
397 //XXjdm the following triggers a mismatched type bug
398 // (or expected function, found _|_)
399 fail!(); // ("asking for " + filename + " which we don't know about");
400 }
401
402 }
403
404 priv impl CodeMap {
405
406 fn lookup_filemap_idx(&self, +pos: BytePos) -> uint {
407 let files = &*self.files;
408 let len = files.len();
409 let mut a = 0u;
410 let mut b = len;
411 while b - a > 1u {
412 let m = (a + b) / 2u;
413 if self.files[m].start_pos > pos {
414 b = m;
415 } else {
416 a = m;
417 }
418 }
419 if (a >= len) {
420 fail!(fmt!("position %u does not resolve to a source location",
421 pos.to_uint()))
422 }
423
424 return a;
425 }
426
427 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine
428 {
429 let idx = self.lookup_filemap_idx(pos);
430 let f = self.files[idx];
431 let mut a = 0u;
432 let lines = &*f.lines;
433 let mut b = lines.len();
434 while b - a > 1u {
435 let m = (a + b) / 2u;
436 if lines[m] > pos { b = m; } else { a = m; }
437 }
438 return FileMapAndLine {fm: f, line: a};
439 }
440
441 fn lookup_pos(&self, +pos: BytePos) -> Loc {
442 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
443 let line = a + 1u; // Line numbers start at 1
444 let chpos = self.bytepos_to_local_charpos(pos);
445 let linebpos = f.lines[a];
446 let linechpos = self.bytepos_to_local_charpos(linebpos);
447 debug!("codemap: byte pos %? is on the line at byte pos %?",
448 pos, linebpos);
449 debug!("codemap: char pos %? is on the line at char pos %?",
450 chpos, linechpos);
451 debug!("codemap: byte is on line: %?", line);
452 assert!(chpos >= linechpos);
453 return Loc {
454 file: f,
455 line: line,
456 col: chpos - linechpos
457 };
458 }
459
460 fn span_to_str_no_adj(&self, sp: span) -> ~str {
461 let lo = self.lookup_char_pos(sp.lo);
462 let hi = self.lookup_char_pos(sp.hi);
463 return fmt!("%s:%u:%u: %u:%u", lo.file.name,
464 lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
465 }
466
467 fn lookup_byte_offset(&self, +bpos: BytePos)
468 -> FileMapAndBytePos {
469 let idx = self.lookup_filemap_idx(bpos);
470 let fm = self.files[idx];
471 let offset = bpos - fm.start_pos;
472 return FileMapAndBytePos {fm: fm, pos: offset};
473 }
474
475 // Converts an absolute BytePos to a CharPos relative to the file it is
476 // located in
477 fn bytepos_to_local_charpos(&self, +bpos: BytePos) -> CharPos {
478 debug!("codemap: converting %? to char pos", bpos);
479 let idx = self.lookup_filemap_idx(bpos);
480 let map = self.files[idx];
481
482 // The number of extra bytes due to multibyte chars in the FileMap
483 let mut total_extra_bytes = 0;
484
485 for map.multibyte_chars.each |mbc| {
486 debug!("codemap: %?-byte char at %?", mbc.bytes, mbc.pos);
487 if mbc.pos < bpos {
488 total_extra_bytes += mbc.bytes;
489 // We should never see a byte position in the middle of a
490 // character
491 assert!(bpos == mbc.pos
492 || bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
493 } else {
494 break;
495 }
496 }
497
498 CharPos(bpos.to_uint() - total_extra_bytes)
499 }
500 }
501
502 #[cfg(test)]
503 mod test {
504 use super::*;
505
506 #[test]
507 fn t1 () {
508 let cm = CodeMap::new();
509 let fm = cm.new_filemap(~"blork.rs",@~"first line.\nsecond line");
510 fm.next_line(BytePos(0));
511 assert_eq!(&fm.get_line(0),&~"first line.");
512 // TESTING BROKEN BEHAVIOR:
513 fm.next_line(BytePos(10));
514 assert_eq!(&fm.get_line(1),&~".");
515 }
516
517 #[test]
518 #[should_fail]
519 fn t2 () {
520 let cm = CodeMap::new();
521 let fm = cm.new_filemap(~"blork.rs",@~"first line.\nsecond line");
522 // TESTING *REALLY* BROKEN BEHAVIOR:
523 fm.next_line(BytePos(0));
524 fm.next_line(BytePos(10));
525 fm.next_line(BytePos(2));
526 }
527 }
528
529
530
531 //
532 // Local Variables:
533 // mode: rust
534 // fill-column: 78;
535 // indent-tabs-mode: nil
536 // c-basic-offset: 4
537 // buffer-file-coding-system: utf-8-unix
538 // End:
539 //