]> git.proxmox.com Git - rustc.git/blame - src/libsyntax_pos/symbol.rs
New upstream version 1.21.0+dfsg1
[rustc.git] / src / libsyntax_pos / symbol.rs
CommitLineData
476ff2be
SL
1// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An "interner" is a data structure that associates values with usize tags and
12//! allows bidirectional lookup; i.e. given a value, one can easily find the
13//! type, and vice versa.
14
cc61c64b
XL
15use hygiene::SyntaxContext;
16
476ff2be
SL
17use serialize::{Decodable, Decoder, Encodable, Encoder};
18use std::cell::RefCell;
19use std::collections::HashMap;
20use std::fmt;
21
cc61c64b
XL
22#[derive(Copy, Clone, PartialEq, Eq, Hash)]
23pub struct Ident {
24 pub name: Symbol,
25 pub ctxt: SyntaxContext,
26}
27
28impl Ident {
29 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
30 Ident { name: name, ctxt: SyntaxContext::empty() }
31 }
32
33 /// Maps a string to an identifier with an empty syntax context.
34 pub fn from_str(string: &str) -> Ident {
35 Ident::with_empty_ctxt(Symbol::intern(string))
36 }
37
7cac9316
XL
38 pub fn modern(self) -> Ident {
39 Ident { name: self.name, ctxt: self.ctxt.modern() }
cc61c64b
XL
40 }
41}
42
43impl fmt::Debug for Ident {
44 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 write!(f, "{}{:?}", self.name, self.ctxt)
46 }
47}
48
49impl fmt::Display for Ident {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 fmt::Display::fmt(&self.name, f)
52 }
53}
54
55impl Encodable for Ident {
56 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
7cac9316
XL
57 if self.ctxt.modern() == SyntaxContext::empty() {
58 s.emit_str(&self.name.as_str())
59 } else { // FIXME(jseyfried) intercrate hygiene
60 let mut string = "#".to_owned();
61 string.push_str(&self.name.as_str());
62 s.emit_str(&string)
63 }
cc61c64b
XL
64 }
65}
66
67impl Decodable for Ident {
68 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
7cac9316
XL
69 let string = d.read_str()?;
70 Ok(if !string.starts_with('#') {
71 Ident::from_str(&string)
72 } else { // FIXME(jseyfried) intercrate hygiene
73 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
74 })
cc61c64b
XL
75 }
76}
77
476ff2be
SL
78/// A symbol is an interned or gensymed string.
79#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
80pub struct Symbol(u32);
81
82// The interner in thread-local, so `Symbol` shouldn't move between threads.
83impl !Send for Symbol { }
041b39d2 84impl !Sync for Symbol { }
476ff2be
SL
85
86impl Symbol {
87 /// Maps a string to its interned representation.
88 pub fn intern(string: &str) -> Self {
89 with_interner(|interner| interner.intern(string))
90 }
91
7cac9316
XL
92 pub fn interned(self) -> Self {
93 with_interner(|interner| interner.interned(self))
94 }
95
476ff2be
SL
96 /// gensym's a new usize, using the current interner.
97 pub fn gensym(string: &str) -> Self {
98 with_interner(|interner| interner.gensym(string))
99 }
100
7cac9316
XL
101 pub fn gensymed(self) -> Self {
102 with_interner(|interner| interner.gensymed(self))
103 }
104
476ff2be
SL
105 pub fn as_str(self) -> InternedString {
106 with_interner(|interner| unsafe {
107 InternedString {
108 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
109 }
110 })
111 }
112
113 pub fn as_u32(self) -> u32 {
114 self.0
115 }
116}
117
118impl fmt::Debug for Symbol {
119 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
120 write!(f, "{}({})", self, self.0)
121 }
122}
123
124impl fmt::Display for Symbol {
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 fmt::Display::fmt(&self.as_str(), f)
127 }
128}
129
130impl Encodable for Symbol {
131 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
132 s.emit_str(&self.as_str())
133 }
134}
135
136impl Decodable for Symbol {
137 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
138 Ok(Symbol::intern(&d.read_str()?))
139 }
140}
141
cc61c64b
XL
142impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
143 fn eq(&self, other: &T) -> bool {
144 self.as_str() == other.deref()
476ff2be
SL
145 }
146}
147
148#[derive(Default)]
149pub struct Interner {
150 names: HashMap<Box<str>, Symbol>,
151 strings: Vec<Box<str>>,
7cac9316 152 gensyms: Vec<Symbol>,
476ff2be
SL
153}
154
155impl Interner {
156 pub fn new() -> Self {
157 Interner::default()
158 }
159
160 fn prefill(init: &[&str]) -> Self {
161 let mut this = Interner::new();
162 for &string in init {
163 this.intern(string);
164 }
165 this
166 }
167
168 pub fn intern(&mut self, string: &str) -> Symbol {
169 if let Some(&name) = self.names.get(string) {
170 return name;
171 }
172
173 let name = Symbol(self.strings.len() as u32);
174 let string = string.to_string().into_boxed_str();
175 self.strings.push(string.clone());
176 self.names.insert(string, name);
177 name
178 }
179
7cac9316
XL
180 pub fn interned(&self, symbol: Symbol) -> Symbol {
181 if (symbol.0 as usize) < self.strings.len() {
182 symbol
183 } else {
184 self.interned(self.gensyms[(!0 - symbol.0) as usize])
185 }
186 }
187
476ff2be 188 fn gensym(&mut self, string: &str) -> Symbol {
7cac9316
XL
189 let symbol = self.intern(string);
190 self.gensymed(symbol)
476ff2be
SL
191 }
192
7cac9316
XL
193 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
194 self.gensyms.push(symbol);
195 Symbol(!0 - self.gensyms.len() as u32 + 1)
196 }
197
198 pub fn get(&self, symbol: Symbol) -> &str {
199 match self.strings.get(symbol.0 as usize) {
200 Some(ref string) => string,
201 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
202 }
476ff2be
SL
203 }
204}
205
206// In this macro, there is the requirement that the name (the number) must be monotonically
207// increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
208// except starting from the next number instead of zero.
209macro_rules! declare_keywords {(
210 $( ($index: expr, $konst: ident, $string: expr) )*
211) => {
212 pub mod keywords {
cc61c64b 213 use super::{Symbol, Ident};
476ff2be
SL
214 #[derive(Clone, Copy, PartialEq, Eq)]
215 pub struct Keyword {
cc61c64b 216 ident: Ident,
476ff2be
SL
217 }
218 impl Keyword {
cc61c64b
XL
219 #[inline] pub fn ident(self) -> Ident { self.ident }
220 #[inline] pub fn name(self) -> Symbol { self.ident.name }
476ff2be
SL
221 }
222 $(
223 #[allow(non_upper_case_globals)]
224 pub const $konst: Keyword = Keyword {
cc61c64b 225 ident: Ident::with_empty_ctxt(super::Symbol($index))
476ff2be
SL
226 };
227 )*
228 }
229
230 impl Interner {
231 fn fresh() -> Self {
232 Interner::prefill(&[$($string,)*])
233 }
234 }
235}}
236
237// NB: leaving holes in the ident table is bad! a different ident will get
238// interned with the id from the hole, but it will be between the min and max
239// of the reserved words, and thus tagged as "reserved".
041b39d2 240// After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`,
476ff2be
SL
241// this should be rarely necessary though if the keywords are kept in alphabetic order.
242declare_keywords! {
041b39d2
XL
243 // Special reserved identifiers used internally for elided lifetimes,
244 // unnamed method parameters, crate root module, error recovery etc.
476ff2be 245 (0, Invalid, "")
041b39d2
XL
246 (1, CrateRoot, "{{root}}")
247 (2, DollarCrate, "$crate")
248
249 // Keywords used in the language.
250 (3, As, "as")
251 (4, Box, "box")
252 (5, Break, "break")
253 (6, Const, "const")
254 (7, Continue, "continue")
255 (8, Crate, "crate")
256 (9, Else, "else")
257 (10, Enum, "enum")
258 (11, Extern, "extern")
259 (12, False, "false")
260 (13, Fn, "fn")
261 (14, For, "for")
262 (15, If, "if")
263 (16, Impl, "impl")
264 (17, In, "in")
265 (18, Let, "let")
266 (19, Loop, "loop")
267 (20, Match, "match")
268 (21, Mod, "mod")
269 (22, Move, "move")
270 (23, Mut, "mut")
271 (24, Pub, "pub")
272 (25, Ref, "ref")
273 (26, Return, "return")
274 (27, SelfValue, "self")
275 (28, SelfType, "Self")
276 (29, Static, "static")
277 (30, Struct, "struct")
278 (31, Super, "super")
279 (32, Trait, "trait")
280 (33, True, "true")
281 (34, Type, "type")
282 (35, Unsafe, "unsafe")
283 (36, Use, "use")
284 (37, Where, "where")
285 (38, While, "while")
476ff2be
SL
286
287 // Keywords reserved for future use.
041b39d2
XL
288 (39, Abstract, "abstract")
289 (40, Alignof, "alignof")
290 (41, Become, "become")
291 (42, Do, "do")
292 (43, Final, "final")
293 (44, Macro, "macro")
294 (45, Offsetof, "offsetof")
295 (46, Override, "override")
296 (47, Priv, "priv")
297 (48, Proc, "proc")
298 (49, Pure, "pure")
299 (50, Sizeof, "sizeof")
300 (51, Typeof, "typeof")
301 (52, Unsized, "unsized")
302 (53, Virtual, "virtual")
303 (54, Yield, "yield")
476ff2be
SL
304
305 // Weak keywords, have special meaning only in specific contexts.
041b39d2
XL
306 (55, Default, "default")
307 (56, StaticLifetime, "'static")
308 (57, Union, "union")
309 (58, Catch, "catch")
476ff2be
SL
310}
311
312// If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
313fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
314 thread_local!(static INTERNER: RefCell<Interner> = {
315 RefCell::new(Interner::fresh())
316 });
317 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
318}
319
320/// Represents a string stored in the thread-local interner. Because the
321/// interner lives for the life of the thread, this can be safely treated as an
322/// immortal string, as long as it never crosses between threads.
323///
324/// FIXME(pcwalton): You must be careful about what you do in the destructors
325/// of objects stored in TLS, because they may run after the interner is
326/// destroyed. In particular, they must not access string contents. This can
327/// be fixed in the future by just leaking all strings until thread death
328/// somehow.
3b2f2976 329#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
476ff2be
SL
330pub struct InternedString {
331 string: &'static str,
332}
333
cc61c64b
XL
334impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> {
335 fn as_ref(&self) -> &U {
336 self.string.as_ref()
337 }
338}
339
340impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString {
341 fn eq(&self, other: &T) -> bool {
342 self.string == other.deref()
343 }
344}
345
346impl ::std::cmp::PartialEq<InternedString> for str {
347 fn eq(&self, other: &InternedString) -> bool {
348 self == other.string
349 }
350}
351
352impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str {
353 fn eq(&self, other: &InternedString) -> bool {
354 *self == other.string
355 }
356}
357
358impl ::std::cmp::PartialEq<InternedString> for String {
359 fn eq(&self, other: &InternedString) -> bool {
360 self == other.string
361 }
362}
363
364impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String {
365 fn eq(&self, other: &InternedString) -> bool {
366 *self == other.string
367 }
368}
369
476ff2be
SL
370impl !Send for InternedString { }
371
372impl ::std::ops::Deref for InternedString {
373 type Target = str;
374 fn deref(&self) -> &str { self.string }
375}
376
377impl fmt::Debug for InternedString {
378 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
379 fmt::Debug::fmt(self.string, f)
380 }
381}
382
383impl fmt::Display for InternedString {
384 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
385 fmt::Display::fmt(self.string, f)
386 }
387}
388
389impl Decodable for InternedString {
390 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
391 Ok(Symbol::intern(&d.read_str()?).as_str())
392 }
393}
394
395impl Encodable for InternedString {
396 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
397 s.emit_str(self.string)
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
476ff2be
SL
404
405 #[test]
406 fn interner_tests() {
407 let mut i: Interner = Interner::new();
408 // first one is zero:
8bb4bdeb 409 assert_eq!(i.intern("dog"), Symbol(0));
476ff2be 410 // re-use gets the same entry:
8bb4bdeb 411 assert_eq!(i.intern ("dog"), Symbol(0));
476ff2be 412 // different string gets a different #:
8bb4bdeb
XL
413 assert_eq!(i.intern("cat"), Symbol(1));
414 assert_eq!(i.intern("cat"), Symbol(1));
476ff2be 415 // dog is still at zero
8bb4bdeb 416 assert_eq!(i.intern("dog"), Symbol(0));
7cac9316 417 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
476ff2be 418 // gensym of same string gets new number :
7cac9316 419 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
476ff2be 420 // gensym of *existing* string gets new number:
7cac9316 421 assert_eq!(i.gensym("dog"), Symbol(4294967293));
476ff2be
SL
422 }
423}