1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
30 use std
::serialize
::{Encodable, Decodable, Encoder, Decoder}
;
33 fn from_uint(n
: uint
) -> Self;
34 fn to_uint(&self) -> uint
;
39 pub struct BytePos(uint
);
40 /// A character offset. Because of multibyte utf8 characters, a byte offset
41 /// is not equivalent to a character offset. The CodeMap will convert BytePos
42 /// values to CharPos values as necessary.
44 pub struct CharPos(uint
);
46 // XXX: Lots of boilerplate in these impls, but so far my attempts to fix
47 // have been unsuccessful
49 impl Pos
for BytePos
{
50 fn from_uint(n
: uint
) -> BytePos { BytePos(n) }
51 fn to_uint(&self) -> uint { **self }
54 impl cmp
::Ord
for BytePos
{
55 fn lt(&self, other
: &BytePos
) -> bool { **self < **other }
56 fn le(&self, other
: &BytePos
) -> bool { **self <= **other }
57 fn ge(&self, other
: &BytePos
) -> bool { **self >= **other }
58 fn gt(&self, other
: &BytePos
) -> bool { **self > **other }
61 impl Add
<BytePos
, BytePos
> for BytePos
{
62 fn add(&self, rhs
: &BytePos
) -> BytePos
{
63 BytePos(**self + **rhs
)
67 impl Sub
<BytePos
, BytePos
> for BytePos
{
68 fn sub(&self, rhs
: &BytePos
) -> BytePos
{
69 BytePos(**self - **rhs
)
73 impl to_bytes
::IterBytes
for BytePos
{
74 fn iter_bytes(&self, +lsb0
: bool
, &&f
: to_bytes
::Cb
) {
75 (**self).iter_bytes(lsb0
, f
)
79 impl Pos
for CharPos
{
80 fn from_uint(n
: uint
) -> CharPos { CharPos(n) }
81 fn to_uint(&self) -> uint { **self }
84 impl cmp
::Ord
for CharPos
{
85 fn lt(&self, other
: &CharPos
) -> bool { **self < **other }
86 fn le(&self, other
: &CharPos
) -> bool { **self <= **other }
87 fn ge(&self, other
: &CharPos
) -> bool { **self >= **other }
88 fn gt(&self, other
: &CharPos
) -> bool { **self > **other }
91 impl to_bytes
::IterBytes
for CharPos
{
92 fn iter_bytes(&self, +lsb0
: bool
, &&f
: to_bytes
::Cb
) {
93 (**self).iter_bytes(lsb0
, f
)
97 impl Add
<CharPos
,CharPos
> for CharPos
{
98 fn add(&self, rhs
: &CharPos
) -> CharPos
{
99 CharPos(**self + **rhs
)
103 impl Sub
<CharPos
,CharPos
> for CharPos
{
104 fn sub(&self, rhs
: &CharPos
) -> CharPos
{
105 CharPos(**self - **rhs
)
110 Spans represent a region of code, used for error reporting. Positions in spans
111 are *absolute* positions from the beginning of the codemap, not positions
112 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
113 to the original source.
118 expn_info
: Option
<@ExpnInfo
>
124 pub struct spanned
<T
> { node: T, span: span }
126 impl cmp
::Eq
for span
{
127 fn eq(&self, other
: &span
) -> bool
{
128 return (*self).lo
== (*other
).lo
&& (*self).hi
== (*other
).hi
;
130 fn ne(&self, other
: &span
) -> bool { !(*self).eq(other) }
133 impl<S
:Encoder
> Encodable
<S
> for span
{
134 /* Note #1972 -- spans are encoded but not decoded */
135 fn encode(&self, _s
: &S
) { _s.emit_nil() }
138 impl<D
:Decoder
> Decodable
<D
> for span
{
139 fn decode(_d
: &D
) -> span
{
144 pub fn spanned
<T
>(+lo
: BytePos
, +hi
: BytePos
, +t
: T
) -> spanned
<T
> {
145 respan(mk_sp(lo
, hi
), t
)
148 pub fn respan
<T
>(sp
: span
, +t
: T
) -> spanned
<T
> {
149 spanned {node: t, span: sp}
152 pub fn dummy_spanned
<T
>(+t
: T
) -> spanned
<T
> {
153 respan(dummy_sp(), t
)
156 /* assuming that we're not in macro expansion */
157 pub fn mk_sp(+lo
: BytePos
, +hi
: BytePos
) -> span
{
158 span {lo: lo, hi: hi, expn_info: None}
161 // make this a const, once the compiler supports it
162 pub fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }
166 /// A source code location used for error reporting
168 /// Information about the original source
170 /// The (1-based) line number
172 /// The (0-based) column offset
176 /// A source code location used as the result of lookup_char_pos_adj
177 // Actually, *none* of the clients use the filename *or* file field;
178 // perhaps they should just be removed.
179 pub struct LocWithOpt
{
183 file
: Option
<@FileMap
>,
186 // used to be structural records. Better names, anyone?
187 pub struct FileMapAndLine {fm: @FileMap, line: uint}
188 pub struct FileMapAndBytePos {fm: @FileMap, pos: BytePos}
189 pub struct NameAndSpan {name: ~str, span: Option<span>}
191 pub struct CallInfo
{
196 /// Extra information for tracking macro expansion of spans
198 ExpandedFrom(CallInfo
)
201 pub type FileName
= ~str;
209 pub enum FileSubstr
{
211 pub FssInternal(span
),
214 /// Identifies an offset of a multi-byte character in a FileMap
215 pub struct MultiByteChar
{
216 /// The absolute offset of the character in the CodeMap
218 /// The number of bytes, >=2
222 /// A single source in the CodeMap
224 /// The name of the file that the source came from, source that doesn't
225 /// originate from files has names between angle brackets by convention,
228 /// Extra information used by qquote
230 /// The complete source code
232 /// The start position of this source in the CodeMap
234 /// Locations of lines beginnings in the source code
235 lines
: @
mut ~[BytePos
],
236 /// Locations of multi-byte characters in the source code
237 multibyte_chars
: @
mut ~[MultiByteChar
],
241 // EFFECT: register a start-of-line offset in the
242 // table of line-beginnings.
243 // UNCHECKED INVARIANT: these offsets must be added in the right
244 // order and must be in the right places; there is shared knowledge
245 // about what ends a line between this file and parse.rs
246 fn next_line(&self, +pos
: BytePos
) {
247 // the new charpos must be > the last one (or it's the first one).
248 let lines
= &mut *self.lines
;
249 assert
!((lines
.len() == 0) || (lines
[lines
.len() - 1] < pos
));
250 self.lines
.push(pos
);
253 // get a line from the list of pre-computed line-beginnings
254 pub fn get_line(&self, line
: int
) -> ~str {
256 let begin
: BytePos
= self.lines
[line
] - self.start_pos
;
257 let begin
= begin
.to_uint();
258 let end
= match str::find_char_from(*self.src
, '
\n'
, begin
) {
260 None
=> str::len(*self.src
)
262 str::slice(*self.src
, begin
, end
).to_owned()
266 pub fn record_multibyte_char(&self, pos
: BytePos
, bytes
: uint
) {
267 assert
!(bytes
>=2 && bytes
<= 4);
268 let mbc
= MultiByteChar
{
272 self.multibyte_chars
.push(mbc
);
277 files
: @
mut ~[@FileMap
]
281 pub fn new() -> CodeMap
{
287 /// Add a new FileMap to the CodeMap and return it
288 fn new_filemap(&self, +filename
: FileName
, src
: @
~str) -> @FileMap
{
289 return self.new_filemap_w_substr(filename
, FssNone
, src
);
292 fn new_filemap_w_substr(
298 let files
= &mut *self.files
;
299 let start_pos
= if files
.len() == 0 {
302 let last_start
= files
.last().start_pos
.to_uint();
303 let last_len
= files
.last().src
.len();
304 last_start
+ last_len
307 let filemap
= @FileMap
{
308 name
: filename
, substr
: substr
, src
: src
,
309 start_pos
: BytePos(start_pos
),
311 multibyte_chars
: @
mut ~[],
314 self.files
.push(filemap
);
319 pub fn mk_substr_filename(&self, sp
: span
) -> ~str {
320 let pos
= self.lookup_char_pos(sp
.lo
);
321 return fmt
!("<%s:%u:%u>", pos
.file
.name
,
322 pos
.line
, pos
.col
.to_uint());
325 /// Lookup source information about a BytePos
326 pub fn lookup_char_pos(&self, +pos
: BytePos
) -> Loc
{
327 return self.lookup_pos(pos
);
330 pub fn lookup_char_pos_adj(&self, +pos
: BytePos
) -> LocWithOpt
332 let loc
= self.lookup_char_pos(pos
);
333 match (loc
.file
.substr
) {
336 filename
: /* FIXME (#2543) */ copy loc
.file
.name
,
339 file
: Some(loc
.file
)},
341 self.lookup_char_pos_adj(
342 sp
.lo
+ (pos
- loc
.file
.start_pos
)),
346 pub fn adjust_span(&self, sp
: span
) -> span
{
347 let line
= self.lookup_line(sp
.lo
);
348 match (line
.fm
.substr
) {
351 self.adjust_span(span
{
352 lo
: s
.lo
+ (sp
.lo
- line
.fm
.start_pos
),
353 hi
: s
.lo
+ (sp
.hi
- line
.fm
.start_pos
),
354 expn_info
: sp
.expn_info
360 pub fn span_to_str(&self, sp
: span
) -> ~str {
361 let files
= &mut *self.files
;
362 if files
.len() == 0 && sp
== dummy_sp() {
363 return ~"no-location";
366 let lo
= self.lookup_char_pos_adj(sp
.lo
);
367 let hi
= self.lookup_char_pos_adj(sp
.hi
);
368 return fmt
!("%s:%u:%u: %u:%u", lo
.filename
,
369 lo
.line
, lo
.col
.to_uint(), hi
.line
, hi
.col
.to_uint())
372 pub fn span_to_filename(&self, sp
: span
) -> FileName
{
373 let lo
= self.lookup_char_pos(sp
.lo
);
374 return /* FIXME (#2543) */ copy lo
.file
.name
;
377 pub fn span_to_lines(&self, sp
: span
) -> @FileLines
{
378 let lo
= self.lookup_char_pos(sp
.lo
);
379 let hi
= self.lookup_char_pos(sp
.hi
);
381 for uint
::range(lo
.line
- 1u, hi
.line
as uint
) |i
| {
384 return @FileLines {file: lo.file, lines: lines}
;
387 pub fn span_to_snippet(&self, sp
: span
) -> ~str {
388 let begin
= self.lookup_byte_offset(sp
.lo
);
389 let end
= self.lookup_byte_offset(sp
.hi
);
390 assert
!(begin
.fm
.start_pos
== end
.fm
.start_pos
);
391 return str::slice(*begin
.fm
.src
,
392 begin
.pos
.to_uint(), end
.pos
.to_uint()).to_owned();
395 pub fn get_filemap(&self, filename
: ~str) -> @FileMap
{
396 for self.files
.each
|fm
| { if fm.name == filename { return *fm; }
}
397 //XXjdm the following triggers a mismatched type bug
398 // (or expected function, found _|_)
399 fail
!(); // ("asking for " + filename + " which we don't know about");
406 fn lookup_filemap_idx(&self, +pos
: BytePos
) -> uint
{
407 let files
= &*self.files
;
408 let len
= files
.len();
412 let m
= (a
+ b
) / 2u;
413 if self.files
[m
].start_pos
> pos
{
420 fail
!(fmt
!("position %u does not resolve to a source location",
427 fn lookup_line(&self, pos
: BytePos
) -> FileMapAndLine
429 let idx
= self.lookup_filemap_idx(pos
);
430 let f
= self.files
[idx
];
432 let lines
= &*f
.lines
;
433 let mut b
= lines
.len();
435 let m
= (a
+ b
) / 2u;
436 if lines
[m
] > pos { b = m; }
else { a = m; }
438 return FileMapAndLine {fm: f, line: a}
;
441 fn lookup_pos(&self, +pos
: BytePos
) -> Loc
{
442 let FileMapAndLine {fm: f, line: a}
= self.lookup_line(pos
);
443 let line
= a
+ 1u; // Line numbers start at 1
444 let chpos
= self.bytepos_to_local_charpos(pos
);
445 let linebpos
= f
.lines
[a
];
446 let linechpos
= self.bytepos_to_local_charpos(linebpos
);
447 debug
!("codemap: byte pos %? is on the line at byte pos %?",
449 debug
!("codemap: char pos %? is on the line at char pos %?",
451 debug
!("codemap: byte is on line: %?", line
);
452 assert
!(chpos
>= linechpos
);
456 col
: chpos
- linechpos
460 fn span_to_str_no_adj(&self, sp
: span
) -> ~str {
461 let lo
= self.lookup_char_pos(sp
.lo
);
462 let hi
= self.lookup_char_pos(sp
.hi
);
463 return fmt
!("%s:%u:%u: %u:%u", lo
.file
.name
,
464 lo
.line
, lo
.col
.to_uint(), hi
.line
, hi
.col
.to_uint())
467 fn lookup_byte_offset(&self, +bpos
: BytePos
)
468 -> FileMapAndBytePos
{
469 let idx
= self.lookup_filemap_idx(bpos
);
470 let fm
= self.files
[idx
];
471 let offset
= bpos
- fm
.start_pos
;
472 return FileMapAndBytePos {fm: fm, pos: offset}
;
475 // Converts an absolute BytePos to a CharPos relative to the file it is
477 fn bytepos_to_local_charpos(&self, +bpos
: BytePos
) -> CharPos
{
478 debug
!("codemap: converting %? to char pos", bpos
);
479 let idx
= self.lookup_filemap_idx(bpos
);
480 let map
= self.files
[idx
];
482 // The number of extra bytes due to multibyte chars in the FileMap
483 let mut total_extra_bytes
= 0;
485 for map
.multibyte_chars
.each
|mbc
| {
486 debug
!("codemap: %?-byte char at %?", mbc
.bytes
, mbc
.pos
);
488 total_extra_bytes
+= mbc
.bytes
;
489 // We should never see a byte position in the middle of a
491 assert
!(bpos
== mbc
.pos
492 || bpos
.to_uint() >= mbc
.pos
.to_uint() + mbc
.bytes
);
498 CharPos(bpos
.to_uint() - total_extra_bytes
)
508 let cm
= CodeMap
::new();
509 let fm
= cm
.new_filemap(~"blork.rs",@
~"first line.\nsecond line");
510 fm
.next_line(BytePos(0));
511 assert_eq
!(&fm
.get_line(0),&~"first line.");
512 // TESTING BROKEN BEHAVIOR:
513 fm
.next_line(BytePos(10));
514 assert_eq
!(&fm
.get_line(1),&~".");
520 let cm
= CodeMap
::new();
521 let fm
= cm
.new_filemap(~"blork.rs",@
~"first line.\nsecond line");
522 // TESTING *REALLY* BROKEN BEHAVIOR:
523 fm
.next_line(BytePos(0));
524 fm
.next_line(BytePos(10));
525 fm
.next_line(BytePos(2));
535 // indent-tabs-mode: nil
537 // buffer-file-coding-system: utf-8-unix