]> git.proxmox.com Git - rustc.git/blame - src/libsyntax_pos/symbol.rs
New upstream version 1.23.0+dfsg1
[rustc.git] / src / libsyntax_pos / symbol.rs
CommitLineData
476ff2be
SL
1// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An "interner" is a data structure that associates values with usize tags and
12//! allows bidirectional lookup; i.e. given a value, one can easily find the
13//! type, and vice versa.
14
cc61c64b
XL
15use hygiene::SyntaxContext;
16
476ff2be
SL
17use serialize::{Decodable, Decoder, Encodable, Encoder};
18use std::cell::RefCell;
19use std::collections::HashMap;
20use std::fmt;
21
cc61c64b
XL
22#[derive(Copy, Clone, PartialEq, Eq, Hash)]
23pub struct Ident {
24 pub name: Symbol,
25 pub ctxt: SyntaxContext,
26}
27
28impl Ident {
29 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
30 Ident { name: name, ctxt: SyntaxContext::empty() }
31 }
32
33 /// Maps a string to an identifier with an empty syntax context.
34 pub fn from_str(string: &str) -> Ident {
35 Ident::with_empty_ctxt(Symbol::intern(string))
36 }
37
7cac9316
XL
38 pub fn modern(self) -> Ident {
39 Ident { name: self.name, ctxt: self.ctxt.modern() }
cc61c64b
XL
40 }
41}
42
43impl fmt::Debug for Ident {
44 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 write!(f, "{}{:?}", self.name, self.ctxt)
46 }
47}
48
49impl fmt::Display for Ident {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 fmt::Display::fmt(&self.name, f)
52 }
53}
54
55impl Encodable for Ident {
56 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
7cac9316
XL
57 if self.ctxt.modern() == SyntaxContext::empty() {
58 s.emit_str(&self.name.as_str())
59 } else { // FIXME(jseyfried) intercrate hygiene
60 let mut string = "#".to_owned();
61 string.push_str(&self.name.as_str());
62 s.emit_str(&string)
63 }
cc61c64b
XL
64 }
65}
66
67impl Decodable for Ident {
68 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
7cac9316
XL
69 let string = d.read_str()?;
70 Ok(if !string.starts_with('#') {
71 Ident::from_str(&string)
72 } else { // FIXME(jseyfried) intercrate hygiene
73 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
74 })
cc61c64b
XL
75 }
76}
77
476ff2be
SL
78/// A symbol is an interned or gensymed string.
79#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
80pub struct Symbol(u32);
81
82// The interner in thread-local, so `Symbol` shouldn't move between threads.
83impl !Send for Symbol { }
041b39d2 84impl !Sync for Symbol { }
476ff2be
SL
85
86impl Symbol {
87 /// Maps a string to its interned representation.
88 pub fn intern(string: &str) -> Self {
89 with_interner(|interner| interner.intern(string))
90 }
91
7cac9316
XL
92 pub fn interned(self) -> Self {
93 with_interner(|interner| interner.interned(self))
94 }
95
476ff2be
SL
96 /// gensym's a new usize, using the current interner.
97 pub fn gensym(string: &str) -> Self {
98 with_interner(|interner| interner.gensym(string))
99 }
100
7cac9316
XL
101 pub fn gensymed(self) -> Self {
102 with_interner(|interner| interner.gensymed(self))
103 }
104
476ff2be
SL
105 pub fn as_str(self) -> InternedString {
106 with_interner(|interner| unsafe {
107 InternedString {
108 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
109 }
110 })
111 }
112
113 pub fn as_u32(self) -> u32 {
114 self.0
115 }
116}
117
ea8adc8c
XL
118impl<'a> From<&'a str> for Symbol {
119 fn from(string: &'a str) -> Symbol {
120 Symbol::intern(string)
121 }
122}
123
476ff2be
SL
124impl fmt::Debug for Symbol {
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 write!(f, "{}({})", self, self.0)
127 }
128}
129
130impl fmt::Display for Symbol {
131 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
132 fmt::Display::fmt(&self.as_str(), f)
133 }
134}
135
136impl Encodable for Symbol {
137 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
138 s.emit_str(&self.as_str())
139 }
140}
141
142impl Decodable for Symbol {
143 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
144 Ok(Symbol::intern(&d.read_str()?))
145 }
146}
147
cc61c64b
XL
148impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
149 fn eq(&self, other: &T) -> bool {
150 self.as_str() == other.deref()
476ff2be
SL
151 }
152}
153
154#[derive(Default)]
155pub struct Interner {
156 names: HashMap<Box<str>, Symbol>,
157 strings: Vec<Box<str>>,
7cac9316 158 gensyms: Vec<Symbol>,
476ff2be
SL
159}
160
161impl Interner {
162 pub fn new() -> Self {
163 Interner::default()
164 }
165
166 fn prefill(init: &[&str]) -> Self {
167 let mut this = Interner::new();
168 for &string in init {
169 this.intern(string);
170 }
171 this
172 }
173
174 pub fn intern(&mut self, string: &str) -> Symbol {
175 if let Some(&name) = self.names.get(string) {
176 return name;
177 }
178
179 let name = Symbol(self.strings.len() as u32);
180 let string = string.to_string().into_boxed_str();
181 self.strings.push(string.clone());
182 self.names.insert(string, name);
183 name
184 }
185
7cac9316
XL
186 pub fn interned(&self, symbol: Symbol) -> Symbol {
187 if (symbol.0 as usize) < self.strings.len() {
188 symbol
189 } else {
190 self.interned(self.gensyms[(!0 - symbol.0) as usize])
191 }
192 }
193
476ff2be 194 fn gensym(&mut self, string: &str) -> Symbol {
7cac9316
XL
195 let symbol = self.intern(string);
196 self.gensymed(symbol)
476ff2be
SL
197 }
198
7cac9316
XL
199 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
200 self.gensyms.push(symbol);
201 Symbol(!0 - self.gensyms.len() as u32 + 1)
202 }
203
204 pub fn get(&self, symbol: Symbol) -> &str {
205 match self.strings.get(symbol.0 as usize) {
206 Some(ref string) => string,
207 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
208 }
476ff2be
SL
209 }
210}
211
212// In this macro, there is the requirement that the name (the number) must be monotonically
213// increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
214// except starting from the next number instead of zero.
215macro_rules! declare_keywords {(
216 $( ($index: expr, $konst: ident, $string: expr) )*
217) => {
218 pub mod keywords {
cc61c64b 219 use super::{Symbol, Ident};
476ff2be
SL
220 #[derive(Clone, Copy, PartialEq, Eq)]
221 pub struct Keyword {
cc61c64b 222 ident: Ident,
476ff2be
SL
223 }
224 impl Keyword {
cc61c64b
XL
225 #[inline] pub fn ident(self) -> Ident { self.ident }
226 #[inline] pub fn name(self) -> Symbol { self.ident.name }
476ff2be
SL
227 }
228 $(
229 #[allow(non_upper_case_globals)]
230 pub const $konst: Keyword = Keyword {
cc61c64b 231 ident: Ident::with_empty_ctxt(super::Symbol($index))
476ff2be
SL
232 };
233 )*
234 }
235
236 impl Interner {
237 fn fresh() -> Self {
238 Interner::prefill(&[$($string,)*])
239 }
240 }
241}}
242
243// NB: leaving holes in the ident table is bad! a different ident will get
244// interned with the id from the hole, but it will be between the min and max
245// of the reserved words, and thus tagged as "reserved".
041b39d2 246// After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`,
476ff2be
SL
247// this should be rarely necessary though if the keywords are kept in alphabetic order.
248declare_keywords! {
041b39d2
XL
249 // Special reserved identifiers used internally for elided lifetimes,
250 // unnamed method parameters, crate root module, error recovery etc.
476ff2be 251 (0, Invalid, "")
041b39d2
XL
252 (1, CrateRoot, "{{root}}")
253 (2, DollarCrate, "$crate")
254
255 // Keywords used in the language.
256 (3, As, "as")
257 (4, Box, "box")
258 (5, Break, "break")
259 (6, Const, "const")
260 (7, Continue, "continue")
261 (8, Crate, "crate")
262 (9, Else, "else")
263 (10, Enum, "enum")
264 (11, Extern, "extern")
265 (12, False, "false")
266 (13, Fn, "fn")
267 (14, For, "for")
268 (15, If, "if")
269 (16, Impl, "impl")
270 (17, In, "in")
271 (18, Let, "let")
272 (19, Loop, "loop")
273 (20, Match, "match")
274 (21, Mod, "mod")
275 (22, Move, "move")
276 (23, Mut, "mut")
277 (24, Pub, "pub")
278 (25, Ref, "ref")
279 (26, Return, "return")
280 (27, SelfValue, "self")
281 (28, SelfType, "Self")
282 (29, Static, "static")
283 (30, Struct, "struct")
284 (31, Super, "super")
285 (32, Trait, "trait")
286 (33, True, "true")
287 (34, Type, "type")
288 (35, Unsafe, "unsafe")
289 (36, Use, "use")
290 (37, Where, "where")
291 (38, While, "while")
476ff2be
SL
292
293 // Keywords reserved for future use.
041b39d2
XL
294 (39, Abstract, "abstract")
295 (40, Alignof, "alignof")
296 (41, Become, "become")
297 (42, Do, "do")
298 (43, Final, "final")
299 (44, Macro, "macro")
300 (45, Offsetof, "offsetof")
301 (46, Override, "override")
302 (47, Priv, "priv")
303 (48, Proc, "proc")
304 (49, Pure, "pure")
305 (50, Sizeof, "sizeof")
306 (51, Typeof, "typeof")
307 (52, Unsized, "unsized")
308 (53, Virtual, "virtual")
309 (54, Yield, "yield")
476ff2be
SL
310
311 // Weak keywords, have special meaning only in specific contexts.
abe05a73
XL
312 (55, Auto, "auto")
313 (56, Catch, "catch")
314 (57, Default, "default")
315 (58, Dyn, "dyn")
316 (59, StaticLifetime, "'static")
317 (60, Union, "union")
476ff2be
SL
318}
319
320// If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
321fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
322 thread_local!(static INTERNER: RefCell<Interner> = {
323 RefCell::new(Interner::fresh())
324 });
325 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
326}
327
328/// Represents a string stored in the thread-local interner. Because the
329/// interner lives for the life of the thread, this can be safely treated as an
330/// immortal string, as long as it never crosses between threads.
331///
332/// FIXME(pcwalton): You must be careful about what you do in the destructors
333/// of objects stored in TLS, because they may run after the interner is
334/// destroyed. In particular, they must not access string contents. This can
335/// be fixed in the future by just leaking all strings until thread death
336/// somehow.
3b2f2976 337#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
476ff2be
SL
338pub struct InternedString {
339 string: &'static str,
340}
341
cc61c64b
XL
342impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> {
343 fn as_ref(&self) -> &U {
344 self.string.as_ref()
345 }
346}
347
348impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString {
349 fn eq(&self, other: &T) -> bool {
350 self.string == other.deref()
351 }
352}
353
354impl ::std::cmp::PartialEq<InternedString> for str {
355 fn eq(&self, other: &InternedString) -> bool {
356 self == other.string
357 }
358}
359
360impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str {
361 fn eq(&self, other: &InternedString) -> bool {
362 *self == other.string
363 }
364}
365
366impl ::std::cmp::PartialEq<InternedString> for String {
367 fn eq(&self, other: &InternedString) -> bool {
368 self == other.string
369 }
370}
371
372impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String {
373 fn eq(&self, other: &InternedString) -> bool {
374 *self == other.string
375 }
376}
377
476ff2be
SL
378impl !Send for InternedString { }
379
380impl ::std::ops::Deref for InternedString {
381 type Target = str;
382 fn deref(&self) -> &str { self.string }
383}
384
385impl fmt::Debug for InternedString {
386 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
387 fmt::Debug::fmt(self.string, f)
388 }
389}
390
391impl fmt::Display for InternedString {
392 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
393 fmt::Display::fmt(self.string, f)
394 }
395}
396
397impl Decodable for InternedString {
398 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
399 Ok(Symbol::intern(&d.read_str()?).as_str())
400 }
401}
402
403impl Encodable for InternedString {
404 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
405 s.emit_str(self.string)
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
476ff2be
SL
412
413 #[test]
414 fn interner_tests() {
415 let mut i: Interner = Interner::new();
416 // first one is zero:
8bb4bdeb 417 assert_eq!(i.intern("dog"), Symbol(0));
476ff2be 418 // re-use gets the same entry:
abe05a73 419 assert_eq!(i.intern("dog"), Symbol(0));
476ff2be 420 // different string gets a different #:
8bb4bdeb
XL
421 assert_eq!(i.intern("cat"), Symbol(1));
422 assert_eq!(i.intern("cat"), Symbol(1));
476ff2be 423 // dog is still at zero
8bb4bdeb 424 assert_eq!(i.intern("dog"), Symbol(0));
7cac9316 425 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
476ff2be 426 // gensym of same string gets new number :
7cac9316 427 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
476ff2be 428 // gensym of *existing* string gets new number:
7cac9316 429 assert_eq!(i.gensym("dog"), Symbol(4294967293));
476ff2be
SL
430 }
431}