]> git.proxmox.com Git - rustc.git/blame - src/libsyntax_pos/symbol.rs
New upstream version 1.26.2+dfsg1
[rustc.git] / src / libsyntax_pos / symbol.rs
CommitLineData
476ff2be
SL
1// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An "interner" is a data structure that associates values with usize tags and
12//! allows bidirectional lookup; i.e. given a value, one can easily find the
13//! type, and vice versa.
14
cc61c64b 15use hygiene::SyntaxContext;
0531ce1d 16use GLOBALS;
cc61c64b 17
476ff2be 18use serialize::{Decodable, Decoder, Encodable, Encoder};
476ff2be
SL
19use std::collections::HashMap;
20use std::fmt;
21
cc61c64b
XL
22#[derive(Copy, Clone, PartialEq, Eq, Hash)]
23pub struct Ident {
24 pub name: Symbol,
25 pub ctxt: SyntaxContext,
26}
27
28impl Ident {
29 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
30 Ident { name: name, ctxt: SyntaxContext::empty() }
31 }
32
33 /// Maps a string to an identifier with an empty syntax context.
34 pub fn from_str(string: &str) -> Ident {
35 Ident::with_empty_ctxt(Symbol::intern(string))
36 }
37
ff7c6d11
XL
38 pub fn without_first_quote(&self) -> Ident {
39 Ident { name: Symbol::from(self.name.as_str().trim_left_matches('\'')), ctxt: self.ctxt }
40 }
41
7cac9316
XL
42 pub fn modern(self) -> Ident {
43 Ident { name: self.name, ctxt: self.ctxt.modern() }
cc61c64b
XL
44 }
45}
46
47impl fmt::Debug for Ident {
48 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49 write!(f, "{}{:?}", self.name, self.ctxt)
50 }
51}
52
53impl fmt::Display for Ident {
54 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 fmt::Display::fmt(&self.name, f)
56 }
57}
58
59impl Encodable for Ident {
60 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
7cac9316
XL
61 if self.ctxt.modern() == SyntaxContext::empty() {
62 s.emit_str(&self.name.as_str())
63 } else { // FIXME(jseyfried) intercrate hygiene
64 let mut string = "#".to_owned();
65 string.push_str(&self.name.as_str());
66 s.emit_str(&string)
67 }
cc61c64b
XL
68 }
69}
70
71impl Decodable for Ident {
72 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
7cac9316
XL
73 let string = d.read_str()?;
74 Ok(if !string.starts_with('#') {
75 Ident::from_str(&string)
76 } else { // FIXME(jseyfried) intercrate hygiene
77 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
78 })
cc61c64b
XL
79 }
80}
81
476ff2be
SL
82/// A symbol is an interned or gensymed string.
83#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
84pub struct Symbol(u32);
85
0531ce1d
XL
86// The interner is pointed to by a thread local value which is only set on the main thread
87// with parallelization is disabled. So we don't allow Symbol to transfer between threads
88// to avoid panics and other errors, even though it would be memory safe to do so.
89#[cfg(not(parallel_queries))]
476ff2be 90impl !Send for Symbol { }
0531ce1d 91#[cfg(not(parallel_queries))]
041b39d2 92impl !Sync for Symbol { }
476ff2be
SL
93
94impl Symbol {
95 /// Maps a string to its interned representation.
96 pub fn intern(string: &str) -> Self {
97 with_interner(|interner| interner.intern(string))
98 }
99
7cac9316
XL
100 pub fn interned(self) -> Self {
101 with_interner(|interner| interner.interned(self))
102 }
103
476ff2be
SL
104 /// gensym's a new usize, using the current interner.
105 pub fn gensym(string: &str) -> Self {
106 with_interner(|interner| interner.gensym(string))
107 }
108
7cac9316
XL
109 pub fn gensymed(self) -> Self {
110 with_interner(|interner| interner.gensymed(self))
111 }
112
476ff2be
SL
113 pub fn as_str(self) -> InternedString {
114 with_interner(|interner| unsafe {
115 InternedString {
116 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
117 }
118 })
119 }
120
121 pub fn as_u32(self) -> u32 {
122 self.0
123 }
124}
125
ea8adc8c
XL
126impl<'a> From<&'a str> for Symbol {
127 fn from(string: &'a str) -> Symbol {
128 Symbol::intern(string)
129 }
130}
131
476ff2be
SL
132impl fmt::Debug for Symbol {
133 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
ff7c6d11
XL
134 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
135 if is_gensymed {
136 write!(f, "{}({})", self, self.0)
137 } else {
138 write!(f, "{}", self)
139 }
476ff2be
SL
140 }
141}
142
143impl fmt::Display for Symbol {
144 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
145 fmt::Display::fmt(&self.as_str(), f)
146 }
147}
148
149impl Encodable for Symbol {
150 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
151 s.emit_str(&self.as_str())
152 }
153}
154
155impl Decodable for Symbol {
156 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
157 Ok(Symbol::intern(&d.read_str()?))
158 }
159}
160
cc61c64b
XL
161impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
162 fn eq(&self, other: &T) -> bool {
163 self.as_str() == other.deref()
476ff2be
SL
164 }
165}
166
167#[derive(Default)]
168pub struct Interner {
169 names: HashMap<Box<str>, Symbol>,
170 strings: Vec<Box<str>>,
7cac9316 171 gensyms: Vec<Symbol>,
476ff2be
SL
172}
173
174impl Interner {
175 pub fn new() -> Self {
176 Interner::default()
177 }
178
179 fn prefill(init: &[&str]) -> Self {
180 let mut this = Interner::new();
181 for &string in init {
182 this.intern(string);
183 }
184 this
185 }
186
187 pub fn intern(&mut self, string: &str) -> Symbol {
188 if let Some(&name) = self.names.get(string) {
189 return name;
190 }
191
192 let name = Symbol(self.strings.len() as u32);
193 let string = string.to_string().into_boxed_str();
194 self.strings.push(string.clone());
195 self.names.insert(string, name);
196 name
197 }
198
7cac9316
XL
199 pub fn interned(&self, symbol: Symbol) -> Symbol {
200 if (symbol.0 as usize) < self.strings.len() {
201 symbol
202 } else {
203 self.interned(self.gensyms[(!0 - symbol.0) as usize])
204 }
205 }
206
476ff2be 207 fn gensym(&mut self, string: &str) -> Symbol {
7cac9316
XL
208 let symbol = self.intern(string);
209 self.gensymed(symbol)
476ff2be
SL
210 }
211
7cac9316
XL
212 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
213 self.gensyms.push(symbol);
214 Symbol(!0 - self.gensyms.len() as u32 + 1)
215 }
216
ff7c6d11
XL
217 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
218 symbol.0 as usize >= self.strings.len()
219 }
220
7cac9316
XL
221 pub fn get(&self, symbol: Symbol) -> &str {
222 match self.strings.get(symbol.0 as usize) {
223 Some(ref string) => string,
224 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
225 }
476ff2be
SL
226 }
227}
228
229// In this macro, there is the requirement that the name (the number) must be monotonically
230// increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
231// except starting from the next number instead of zero.
232macro_rules! declare_keywords {(
233 $( ($index: expr, $konst: ident, $string: expr) )*
234) => {
235 pub mod keywords {
cc61c64b 236 use super::{Symbol, Ident};
476ff2be
SL
237 #[derive(Clone, Copy, PartialEq, Eq)]
238 pub struct Keyword {
cc61c64b 239 ident: Ident,
476ff2be
SL
240 }
241 impl Keyword {
cc61c64b
XL
242 #[inline] pub fn ident(self) -> Ident { self.ident }
243 #[inline] pub fn name(self) -> Symbol { self.ident.name }
476ff2be
SL
244 }
245 $(
246 #[allow(non_upper_case_globals)]
247 pub const $konst: Keyword = Keyword {
cc61c64b 248 ident: Ident::with_empty_ctxt(super::Symbol($index))
476ff2be
SL
249 };
250 )*
251 }
252
253 impl Interner {
0531ce1d 254 pub fn fresh() -> Self {
476ff2be
SL
255 Interner::prefill(&[$($string,)*])
256 }
257 }
258}}
259
260// NB: leaving holes in the ident table is bad! a different ident will get
261// interned with the id from the hole, but it will be between the min and max
262// of the reserved words, and thus tagged as "reserved".
041b39d2 263// After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`,
476ff2be
SL
264// this should be rarely necessary though if the keywords are kept in alphabetic order.
265declare_keywords! {
041b39d2
XL
266 // Special reserved identifiers used internally for elided lifetimes,
267 // unnamed method parameters, crate root module, error recovery etc.
0531ce1d
XL
268 (0, Invalid, "")
269 (1, CrateRoot, "{{root}}")
270 (2, DollarCrate, "$crate")
271 (3, Underscore, "_")
041b39d2
XL
272
273 // Keywords used in the language.
0531ce1d
XL
274 (4, As, "as")
275 (5, Box, "box")
276 (6, Break, "break")
277 (7, Const, "const")
278 (8, Continue, "continue")
279 (9, Crate, "crate")
280 (10, Else, "else")
281 (11, Enum, "enum")
282 (12, Extern, "extern")
283 (13, False, "false")
284 (14, Fn, "fn")
285 (15, For, "for")
286 (16, If, "if")
287 (17, Impl, "impl")
288 (18, In, "in")
289 (19, Let, "let")
290 (20, Loop, "loop")
291 (21, Match, "match")
292 (22, Mod, "mod")
293 (23, Move, "move")
294 (24, Mut, "mut")
295 (25, Pub, "pub")
296 (26, Ref, "ref")
297 (27, Return, "return")
298 (28, SelfValue, "self")
299 (29, SelfType, "Self")
300 (30, Static, "static")
301 (31, Struct, "struct")
302 (32, Super, "super")
303 (33, Trait, "trait")
304 (34, True, "true")
305 (35, Type, "type")
306 (36, Unsafe, "unsafe")
307 (37, Use, "use")
308 (38, Where, "where")
309 (39, While, "while")
476ff2be
SL
310
311 // Keywords reserved for future use.
0531ce1d
XL
312 (40, Abstract, "abstract")
313 (41, Alignof, "alignof")
314 (42, Become, "become")
315 (43, Do, "do")
316 (44, Final, "final")
317 (45, Macro, "macro")
318 (46, Offsetof, "offsetof")
319 (47, Override, "override")
320 (48, Priv, "priv")
321 (49, Proc, "proc")
322 (50, Pure, "pure")
323 (51, Sizeof, "sizeof")
324 (52, Typeof, "typeof")
325 (53, Unsized, "unsized")
326 (54, Virtual, "virtual")
327 (55, Yield, "yield")
328
329 // Special lifetime names
330 (56, UnderscoreLifetime, "'_")
331 (57, StaticLifetime, "'static")
476ff2be
SL
332
333 // Weak keywords, have special meaning only in specific contexts.
0531ce1d
XL
334 (58, Auto, "auto")
335 (59, Catch, "catch")
336 (60, Default, "default")
337 (61, Dyn, "dyn")
338 (62, Union, "union")
476ff2be
SL
339}
340
0531ce1d
XL
341// If an interner exists, return it. Otherwise, prepare a fresh one.
342#[inline]
476ff2be 343fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
0531ce1d 344 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
476ff2be
SL
345}
346
347/// Represents a string stored in the thread-local interner. Because the
348/// interner lives for the life of the thread, this can be safely treated as an
349/// immortal string, as long as it never crosses between threads.
350///
351/// FIXME(pcwalton): You must be careful about what you do in the destructors
352/// of objects stored in TLS, because they may run after the interner is
353/// destroyed. In particular, they must not access string contents. This can
354/// be fixed in the future by just leaking all strings until thread death
355/// somehow.
3b2f2976 356#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
476ff2be
SL
357pub struct InternedString {
358 string: &'static str,
359}
360
cc61c64b
XL
361impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> {
362 fn as_ref(&self) -> &U {
363 self.string.as_ref()
364 }
365}
366
367impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString {
368 fn eq(&self, other: &T) -> bool {
369 self.string == other.deref()
370 }
371}
372
373impl ::std::cmp::PartialEq<InternedString> for str {
374 fn eq(&self, other: &InternedString) -> bool {
375 self == other.string
376 }
377}
378
379impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str {
380 fn eq(&self, other: &InternedString) -> bool {
381 *self == other.string
382 }
383}
384
385impl ::std::cmp::PartialEq<InternedString> for String {
386 fn eq(&self, other: &InternedString) -> bool {
387 self == other.string
388 }
389}
390
391impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String {
392 fn eq(&self, other: &InternedString) -> bool {
393 *self == other.string
394 }
395}
396
476ff2be
SL
397impl !Send for InternedString { }
398
399impl ::std::ops::Deref for InternedString {
400 type Target = str;
401 fn deref(&self) -> &str { self.string }
402}
403
404impl fmt::Debug for InternedString {
405 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
406 fmt::Debug::fmt(self.string, f)
407 }
408}
409
410impl fmt::Display for InternedString {
411 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
412 fmt::Display::fmt(self.string, f)
413 }
414}
415
416impl Decodable for InternedString {
417 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
418 Ok(Symbol::intern(&d.read_str()?).as_str())
419 }
420}
421
422impl Encodable for InternedString {
423 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
424 s.emit_str(self.string)
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
0531ce1d 431 use Globals;
476ff2be
SL
432
433 #[test]
434 fn interner_tests() {
435 let mut i: Interner = Interner::new();
436 // first one is zero:
8bb4bdeb 437 assert_eq!(i.intern("dog"), Symbol(0));
476ff2be 438 // re-use gets the same entry:
abe05a73 439 assert_eq!(i.intern("dog"), Symbol(0));
476ff2be 440 // different string gets a different #:
8bb4bdeb
XL
441 assert_eq!(i.intern("cat"), Symbol(1));
442 assert_eq!(i.intern("cat"), Symbol(1));
476ff2be 443 // dog is still at zero
8bb4bdeb 444 assert_eq!(i.intern("dog"), Symbol(0));
7cac9316 445 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
476ff2be 446 // gensym of same string gets new number :
7cac9316 447 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
476ff2be 448 // gensym of *existing* string gets new number:
7cac9316 449 assert_eq!(i.gensym("dog"), Symbol(4294967293));
476ff2be 450 }
ff7c6d11
XL
451
452 #[test]
453 fn without_first_quote_test() {
0531ce1d
XL
454 GLOBALS.set(&Globals::new(), || {
455 let i = Ident::from_str("'break");
456 assert_eq!(i.without_first_quote().name, keywords::Break.name());
457 });
ff7c6d11 458 }
476ff2be 459}