]>
Commit | Line | Data |
---|---|---|
476ff2be SL |
1 | // Copyright 2016 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! An "interner" is a data structure that associates values with usize tags and | |
12 | //! allows bidirectional lookup; i.e. given a value, one can easily find the | |
13 | //! type, and vice versa. | |
14 | ||
cc61c64b | 15 | use hygiene::SyntaxContext; |
0531ce1d | 16 | use GLOBALS; |
cc61c64b | 17 | |
476ff2be | 18 | use serialize::{Decodable, Decoder, Encodable, Encoder}; |
476ff2be SL |
19 | use std::collections::HashMap; |
20 | use std::fmt; | |
21 | ||
cc61c64b XL |
22 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] |
23 | pub struct Ident { | |
24 | pub name: Symbol, | |
25 | pub ctxt: SyntaxContext, | |
26 | } | |
27 | ||
28 | impl Ident { | |
29 | pub const fn with_empty_ctxt(name: Symbol) -> Ident { | |
30 | Ident { name: name, ctxt: SyntaxContext::empty() } | |
31 | } | |
32 | ||
33 | /// Maps a string to an identifier with an empty syntax context. | |
34 | pub fn from_str(string: &str) -> Ident { | |
35 | Ident::with_empty_ctxt(Symbol::intern(string)) | |
36 | } | |
37 | ||
ff7c6d11 XL |
38 | pub fn without_first_quote(&self) -> Ident { |
39 | Ident { name: Symbol::from(self.name.as_str().trim_left_matches('\'')), ctxt: self.ctxt } | |
40 | } | |
41 | ||
7cac9316 XL |
42 | pub fn modern(self) -> Ident { |
43 | Ident { name: self.name, ctxt: self.ctxt.modern() } | |
cc61c64b XL |
44 | } |
45 | } | |
46 | ||
47 | impl fmt::Debug for Ident { | |
48 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
49 | write!(f, "{}{:?}", self.name, self.ctxt) | |
50 | } | |
51 | } | |
52 | ||
53 | impl fmt::Display for Ident { | |
54 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
55 | fmt::Display::fmt(&self.name, f) | |
56 | } | |
57 | } | |
58 | ||
59 | impl Encodable for Ident { | |
60 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
7cac9316 XL |
61 | if self.ctxt.modern() == SyntaxContext::empty() { |
62 | s.emit_str(&self.name.as_str()) | |
63 | } else { // FIXME(jseyfried) intercrate hygiene | |
64 | let mut string = "#".to_owned(); | |
65 | string.push_str(&self.name.as_str()); | |
66 | s.emit_str(&string) | |
67 | } | |
cc61c64b XL |
68 | } |
69 | } | |
70 | ||
71 | impl Decodable for Ident { | |
72 | fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> { | |
7cac9316 XL |
73 | let string = d.read_str()?; |
74 | Ok(if !string.starts_with('#') { | |
75 | Ident::from_str(&string) | |
76 | } else { // FIXME(jseyfried) intercrate hygiene | |
77 | Ident::with_empty_ctxt(Symbol::gensym(&string[1..])) | |
78 | }) | |
cc61c64b XL |
79 | } |
80 | } | |
81 | ||
476ff2be SL |
82 | /// A symbol is an interned or gensymed string. |
83 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | |
84 | pub struct Symbol(u32); | |
85 | ||
0531ce1d XL |
86 | // The interner is pointed to by a thread local value which is only set on the main thread |
87 | // with parallelization is disabled. So we don't allow Symbol to transfer between threads | |
88 | // to avoid panics and other errors, even though it would be memory safe to do so. | |
89 | #[cfg(not(parallel_queries))] | |
476ff2be | 90 | impl !Send for Symbol { } |
0531ce1d | 91 | #[cfg(not(parallel_queries))] |
041b39d2 | 92 | impl !Sync for Symbol { } |
476ff2be SL |
93 | |
94 | impl Symbol { | |
95 | /// Maps a string to its interned representation. | |
96 | pub fn intern(string: &str) -> Self { | |
97 | with_interner(|interner| interner.intern(string)) | |
98 | } | |
99 | ||
7cac9316 XL |
100 | pub fn interned(self) -> Self { |
101 | with_interner(|interner| interner.interned(self)) | |
102 | } | |
103 | ||
476ff2be SL |
104 | /// gensym's a new usize, using the current interner. |
105 | pub fn gensym(string: &str) -> Self { | |
106 | with_interner(|interner| interner.gensym(string)) | |
107 | } | |
108 | ||
7cac9316 XL |
109 | pub fn gensymed(self) -> Self { |
110 | with_interner(|interner| interner.gensymed(self)) | |
111 | } | |
112 | ||
476ff2be SL |
113 | pub fn as_str(self) -> InternedString { |
114 | with_interner(|interner| unsafe { | |
115 | InternedString { | |
116 | string: ::std::mem::transmute::<&str, &str>(interner.get(self)) | |
117 | } | |
118 | }) | |
119 | } | |
120 | ||
121 | pub fn as_u32(self) -> u32 { | |
122 | self.0 | |
123 | } | |
124 | } | |
125 | ||
ea8adc8c XL |
126 | impl<'a> From<&'a str> for Symbol { |
127 | fn from(string: &'a str) -> Symbol { | |
128 | Symbol::intern(string) | |
129 | } | |
130 | } | |
131 | ||
476ff2be SL |
132 | impl fmt::Debug for Symbol { |
133 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
ff7c6d11 XL |
134 | let is_gensymed = with_interner(|interner| interner.is_gensymed(*self)); |
135 | if is_gensymed { | |
136 | write!(f, "{}({})", self, self.0) | |
137 | } else { | |
138 | write!(f, "{}", self) | |
139 | } | |
476ff2be SL |
140 | } |
141 | } | |
142 | ||
143 | impl fmt::Display for Symbol { | |
144 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
145 | fmt::Display::fmt(&self.as_str(), f) | |
146 | } | |
147 | } | |
148 | ||
149 | impl Encodable for Symbol { | |
150 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
151 | s.emit_str(&self.as_str()) | |
152 | } | |
153 | } | |
154 | ||
155 | impl Decodable for Symbol { | |
156 | fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> { | |
157 | Ok(Symbol::intern(&d.read_str()?)) | |
158 | } | |
159 | } | |
160 | ||
cc61c64b XL |
161 | impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol { |
162 | fn eq(&self, other: &T) -> bool { | |
163 | self.as_str() == other.deref() | |
476ff2be SL |
164 | } |
165 | } | |
166 | ||
167 | #[derive(Default)] | |
168 | pub struct Interner { | |
169 | names: HashMap<Box<str>, Symbol>, | |
170 | strings: Vec<Box<str>>, | |
7cac9316 | 171 | gensyms: Vec<Symbol>, |
476ff2be SL |
172 | } |
173 | ||
174 | impl Interner { | |
175 | pub fn new() -> Self { | |
176 | Interner::default() | |
177 | } | |
178 | ||
179 | fn prefill(init: &[&str]) -> Self { | |
180 | let mut this = Interner::new(); | |
181 | for &string in init { | |
182 | this.intern(string); | |
183 | } | |
184 | this | |
185 | } | |
186 | ||
187 | pub fn intern(&mut self, string: &str) -> Symbol { | |
188 | if let Some(&name) = self.names.get(string) { | |
189 | return name; | |
190 | } | |
191 | ||
192 | let name = Symbol(self.strings.len() as u32); | |
193 | let string = string.to_string().into_boxed_str(); | |
194 | self.strings.push(string.clone()); | |
195 | self.names.insert(string, name); | |
196 | name | |
197 | } | |
198 | ||
7cac9316 XL |
199 | pub fn interned(&self, symbol: Symbol) -> Symbol { |
200 | if (symbol.0 as usize) < self.strings.len() { | |
201 | symbol | |
202 | } else { | |
203 | self.interned(self.gensyms[(!0 - symbol.0) as usize]) | |
204 | } | |
205 | } | |
206 | ||
476ff2be | 207 | fn gensym(&mut self, string: &str) -> Symbol { |
7cac9316 XL |
208 | let symbol = self.intern(string); |
209 | self.gensymed(symbol) | |
476ff2be SL |
210 | } |
211 | ||
7cac9316 XL |
212 | fn gensymed(&mut self, symbol: Symbol) -> Symbol { |
213 | self.gensyms.push(symbol); | |
214 | Symbol(!0 - self.gensyms.len() as u32 + 1) | |
215 | } | |
216 | ||
ff7c6d11 XL |
217 | fn is_gensymed(&mut self, symbol: Symbol) -> bool { |
218 | symbol.0 as usize >= self.strings.len() | |
219 | } | |
220 | ||
7cac9316 XL |
221 | pub fn get(&self, symbol: Symbol) -> &str { |
222 | match self.strings.get(symbol.0 as usize) { | |
223 | Some(ref string) => string, | |
224 | None => self.get(self.gensyms[(!0 - symbol.0) as usize]), | |
225 | } | |
476ff2be SL |
226 | } |
227 | } | |
228 | ||
229 | // In this macro, there is the requirement that the name (the number) must be monotonically | |
230 | // increasing by one in the special identifiers, starting at 0; the same holds for the keywords, | |
231 | // except starting from the next number instead of zero. | |
232 | macro_rules! declare_keywords {( | |
233 | $( ($index: expr, $konst: ident, $string: expr) )* | |
234 | ) => { | |
235 | pub mod keywords { | |
cc61c64b | 236 | use super::{Symbol, Ident}; |
476ff2be SL |
237 | #[derive(Clone, Copy, PartialEq, Eq)] |
238 | pub struct Keyword { | |
cc61c64b | 239 | ident: Ident, |
476ff2be SL |
240 | } |
241 | impl Keyword { | |
cc61c64b XL |
242 | #[inline] pub fn ident(self) -> Ident { self.ident } |
243 | #[inline] pub fn name(self) -> Symbol { self.ident.name } | |
476ff2be SL |
244 | } |
245 | $( | |
246 | #[allow(non_upper_case_globals)] | |
247 | pub const $konst: Keyword = Keyword { | |
cc61c64b | 248 | ident: Ident::with_empty_ctxt(super::Symbol($index)) |
476ff2be SL |
249 | }; |
250 | )* | |
251 | } | |
252 | ||
253 | impl Interner { | |
0531ce1d | 254 | pub fn fresh() -> Self { |
476ff2be SL |
255 | Interner::prefill(&[$($string,)*]) |
256 | } | |
257 | } | |
258 | }} | |
259 | ||
260 | // NB: leaving holes in the ident table is bad! a different ident will get | |
261 | // interned with the id from the hole, but it will be between the min and max | |
262 | // of the reserved words, and thus tagged as "reserved". | |
041b39d2 | 263 | // After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`, |
476ff2be SL |
264 | // this should be rarely necessary though if the keywords are kept in alphabetic order. |
265 | declare_keywords! { | |
041b39d2 XL |
266 | // Special reserved identifiers used internally for elided lifetimes, |
267 | // unnamed method parameters, crate root module, error recovery etc. | |
0531ce1d XL |
268 | (0, Invalid, "") |
269 | (1, CrateRoot, "{{root}}") | |
270 | (2, DollarCrate, "$crate") | |
271 | (3, Underscore, "_") | |
041b39d2 XL |
272 | |
273 | // Keywords used in the language. | |
0531ce1d XL |
274 | (4, As, "as") |
275 | (5, Box, "box") | |
276 | (6, Break, "break") | |
277 | (7, Const, "const") | |
278 | (8, Continue, "continue") | |
279 | (9, Crate, "crate") | |
280 | (10, Else, "else") | |
281 | (11, Enum, "enum") | |
282 | (12, Extern, "extern") | |
283 | (13, False, "false") | |
284 | (14, Fn, "fn") | |
285 | (15, For, "for") | |
286 | (16, If, "if") | |
287 | (17, Impl, "impl") | |
288 | (18, In, "in") | |
289 | (19, Let, "let") | |
290 | (20, Loop, "loop") | |
291 | (21, Match, "match") | |
292 | (22, Mod, "mod") | |
293 | (23, Move, "move") | |
294 | (24, Mut, "mut") | |
295 | (25, Pub, "pub") | |
296 | (26, Ref, "ref") | |
297 | (27, Return, "return") | |
298 | (28, SelfValue, "self") | |
299 | (29, SelfType, "Self") | |
300 | (30, Static, "static") | |
301 | (31, Struct, "struct") | |
302 | (32, Super, "super") | |
303 | (33, Trait, "trait") | |
304 | (34, True, "true") | |
305 | (35, Type, "type") | |
306 | (36, Unsafe, "unsafe") | |
307 | (37, Use, "use") | |
308 | (38, Where, "where") | |
309 | (39, While, "while") | |
476ff2be SL |
310 | |
311 | // Keywords reserved for future use. | |
0531ce1d XL |
312 | (40, Abstract, "abstract") |
313 | (41, Alignof, "alignof") | |
314 | (42, Become, "become") | |
315 | (43, Do, "do") | |
316 | (44, Final, "final") | |
317 | (45, Macro, "macro") | |
318 | (46, Offsetof, "offsetof") | |
319 | (47, Override, "override") | |
320 | (48, Priv, "priv") | |
321 | (49, Proc, "proc") | |
322 | (50, Pure, "pure") | |
323 | (51, Sizeof, "sizeof") | |
324 | (52, Typeof, "typeof") | |
325 | (53, Unsized, "unsized") | |
326 | (54, Virtual, "virtual") | |
327 | (55, Yield, "yield") | |
328 | ||
329 | // Special lifetime names | |
330 | (56, UnderscoreLifetime, "'_") | |
331 | (57, StaticLifetime, "'static") | |
476ff2be SL |
332 | |
333 | // Weak keywords, have special meaning only in specific contexts. | |
0531ce1d XL |
334 | (58, Auto, "auto") |
335 | (59, Catch, "catch") | |
336 | (60, Default, "default") | |
337 | (61, Dyn, "dyn") | |
338 | (62, Union, "union") | |
476ff2be SL |
339 | } |
340 | ||
0531ce1d XL |
341 | // If an interner exists, return it. Otherwise, prepare a fresh one. |
342 | #[inline] | |
476ff2be | 343 | fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { |
0531ce1d | 344 | GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock())) |
476ff2be SL |
345 | } |
346 | ||
347 | /// Represents a string stored in the thread-local interner. Because the | |
348 | /// interner lives for the life of the thread, this can be safely treated as an | |
349 | /// immortal string, as long as it never crosses between threads. | |
350 | /// | |
351 | /// FIXME(pcwalton): You must be careful about what you do in the destructors | |
352 | /// of objects stored in TLS, because they may run after the interner is | |
353 | /// destroyed. In particular, they must not access string contents. This can | |
354 | /// be fixed in the future by just leaking all strings until thread death | |
355 | /// somehow. | |
3b2f2976 | 356 | #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)] |
476ff2be SL |
357 | pub struct InternedString { |
358 | string: &'static str, | |
359 | } | |
360 | ||
cc61c64b XL |
361 | impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> { |
362 | fn as_ref(&self) -> &U { | |
363 | self.string.as_ref() | |
364 | } | |
365 | } | |
366 | ||
367 | impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString { | |
368 | fn eq(&self, other: &T) -> bool { | |
369 | self.string == other.deref() | |
370 | } | |
371 | } | |
372 | ||
373 | impl ::std::cmp::PartialEq<InternedString> for str { | |
374 | fn eq(&self, other: &InternedString) -> bool { | |
375 | self == other.string | |
376 | } | |
377 | } | |
378 | ||
379 | impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str { | |
380 | fn eq(&self, other: &InternedString) -> bool { | |
381 | *self == other.string | |
382 | } | |
383 | } | |
384 | ||
385 | impl ::std::cmp::PartialEq<InternedString> for String { | |
386 | fn eq(&self, other: &InternedString) -> bool { | |
387 | self == other.string | |
388 | } | |
389 | } | |
390 | ||
391 | impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String { | |
392 | fn eq(&self, other: &InternedString) -> bool { | |
393 | *self == other.string | |
394 | } | |
395 | } | |
396 | ||
476ff2be SL |
397 | impl !Send for InternedString { } |
398 | ||
399 | impl ::std::ops::Deref for InternedString { | |
400 | type Target = str; | |
401 | fn deref(&self) -> &str { self.string } | |
402 | } | |
403 | ||
404 | impl fmt::Debug for InternedString { | |
405 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
406 | fmt::Debug::fmt(self.string, f) | |
407 | } | |
408 | } | |
409 | ||
410 | impl fmt::Display for InternedString { | |
411 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
412 | fmt::Display::fmt(self.string, f) | |
413 | } | |
414 | } | |
415 | ||
416 | impl Decodable for InternedString { | |
417 | fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { | |
418 | Ok(Symbol::intern(&d.read_str()?).as_str()) | |
419 | } | |
420 | } | |
421 | ||
422 | impl Encodable for InternedString { | |
423 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
424 | s.emit_str(self.string) | |
425 | } | |
426 | } | |
427 | ||
428 | #[cfg(test)] | |
429 | mod tests { | |
430 | use super::*; | |
0531ce1d | 431 | use Globals; |
476ff2be SL |
432 | |
433 | #[test] | |
434 | fn interner_tests() { | |
435 | let mut i: Interner = Interner::new(); | |
436 | // first one is zero: | |
8bb4bdeb | 437 | assert_eq!(i.intern("dog"), Symbol(0)); |
476ff2be | 438 | // re-use gets the same entry: |
abe05a73 | 439 | assert_eq!(i.intern("dog"), Symbol(0)); |
476ff2be | 440 | // different string gets a different #: |
8bb4bdeb XL |
441 | assert_eq!(i.intern("cat"), Symbol(1)); |
442 | assert_eq!(i.intern("cat"), Symbol(1)); | |
476ff2be | 443 | // dog is still at zero |
8bb4bdeb | 444 | assert_eq!(i.intern("dog"), Symbol(0)); |
7cac9316 | 445 | assert_eq!(i.gensym("zebra"), Symbol(4294967295)); |
476ff2be | 446 | // gensym of same string gets new number : |
7cac9316 | 447 | assert_eq!(i.gensym("zebra"), Symbol(4294967294)); |
476ff2be | 448 | // gensym of *existing* string gets new number: |
7cac9316 | 449 | assert_eq!(i.gensym("dog"), Symbol(4294967293)); |
476ff2be | 450 | } |
ff7c6d11 XL |
451 | |
452 | #[test] | |
453 | fn without_first_quote_test() { | |
0531ce1d XL |
454 | GLOBALS.set(&Globals::new(), || { |
455 | let i = Ident::from_str("'break"); | |
456 | assert_eq!(i.without_first_quote().name, keywords::Break.name()); | |
457 | }); | |
ff7c6d11 | 458 | } |
476ff2be | 459 | } |