]>
Commit | Line | Data |
---|---|---|
476ff2be SL |
1 | // Copyright 2016 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! An "interner" is a data structure that associates values with usize tags and | |
12 | //! allows bidirectional lookup; i.e. given a value, one can easily find the | |
13 | //! type, and vice versa. | |
14 | ||
cc61c64b XL |
15 | use hygiene::SyntaxContext; |
16 | ||
476ff2be SL |
17 | use serialize::{Decodable, Decoder, Encodable, Encoder}; |
18 | use std::cell::RefCell; | |
19 | use std::collections::HashMap; | |
20 | use std::fmt; | |
21 | ||
cc61c64b XL |
22 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] |
23 | pub struct Ident { | |
24 | pub name: Symbol, | |
25 | pub ctxt: SyntaxContext, | |
26 | } | |
27 | ||
28 | impl Ident { | |
29 | pub const fn with_empty_ctxt(name: Symbol) -> Ident { | |
30 | Ident { name: name, ctxt: SyntaxContext::empty() } | |
31 | } | |
32 | ||
33 | /// Maps a string to an identifier with an empty syntax context. | |
34 | pub fn from_str(string: &str) -> Ident { | |
35 | Ident::with_empty_ctxt(Symbol::intern(string)) | |
36 | } | |
37 | ||
7cac9316 XL |
38 | pub fn modern(self) -> Ident { |
39 | Ident { name: self.name, ctxt: self.ctxt.modern() } | |
cc61c64b XL |
40 | } |
41 | } | |
42 | ||
43 | impl fmt::Debug for Ident { | |
44 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
45 | write!(f, "{}{:?}", self.name, self.ctxt) | |
46 | } | |
47 | } | |
48 | ||
49 | impl fmt::Display for Ident { | |
50 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
51 | fmt::Display::fmt(&self.name, f) | |
52 | } | |
53 | } | |
54 | ||
55 | impl Encodable for Ident { | |
56 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
7cac9316 XL |
57 | if self.ctxt.modern() == SyntaxContext::empty() { |
58 | s.emit_str(&self.name.as_str()) | |
59 | } else { // FIXME(jseyfried) intercrate hygiene | |
60 | let mut string = "#".to_owned(); | |
61 | string.push_str(&self.name.as_str()); | |
62 | s.emit_str(&string) | |
63 | } | |
cc61c64b XL |
64 | } |
65 | } | |
66 | ||
67 | impl Decodable for Ident { | |
68 | fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> { | |
7cac9316 XL |
69 | let string = d.read_str()?; |
70 | Ok(if !string.starts_with('#') { | |
71 | Ident::from_str(&string) | |
72 | } else { // FIXME(jseyfried) intercrate hygiene | |
73 | Ident::with_empty_ctxt(Symbol::gensym(&string[1..])) | |
74 | }) | |
cc61c64b XL |
75 | } |
76 | } | |
77 | ||
476ff2be SL |
78 | /// A symbol is an interned or gensymed string. |
79 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | |
80 | pub struct Symbol(u32); | |
81 | ||
82 | // The interner in thread-local, so `Symbol` shouldn't move between threads. | |
83 | impl !Send for Symbol { } | |
041b39d2 | 84 | impl !Sync for Symbol { } |
476ff2be SL |
85 | |
86 | impl Symbol { | |
87 | /// Maps a string to its interned representation. | |
88 | pub fn intern(string: &str) -> Self { | |
89 | with_interner(|interner| interner.intern(string)) | |
90 | } | |
91 | ||
7cac9316 XL |
92 | pub fn interned(self) -> Self { |
93 | with_interner(|interner| interner.interned(self)) | |
94 | } | |
95 | ||
476ff2be SL |
96 | /// gensym's a new usize, using the current interner. |
97 | pub fn gensym(string: &str) -> Self { | |
98 | with_interner(|interner| interner.gensym(string)) | |
99 | } | |
100 | ||
7cac9316 XL |
101 | pub fn gensymed(self) -> Self { |
102 | with_interner(|interner| interner.gensymed(self)) | |
103 | } | |
104 | ||
476ff2be SL |
105 | pub fn as_str(self) -> InternedString { |
106 | with_interner(|interner| unsafe { | |
107 | InternedString { | |
108 | string: ::std::mem::transmute::<&str, &str>(interner.get(self)) | |
109 | } | |
110 | }) | |
111 | } | |
112 | ||
113 | pub fn as_u32(self) -> u32 { | |
114 | self.0 | |
115 | } | |
116 | } | |
117 | ||
ea8adc8c XL |
118 | impl<'a> From<&'a str> for Symbol { |
119 | fn from(string: &'a str) -> Symbol { | |
120 | Symbol::intern(string) | |
121 | } | |
122 | } | |
123 | ||
476ff2be SL |
124 | impl fmt::Debug for Symbol { |
125 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
126 | write!(f, "{}({})", self, self.0) | |
127 | } | |
128 | } | |
129 | ||
130 | impl fmt::Display for Symbol { | |
131 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
132 | fmt::Display::fmt(&self.as_str(), f) | |
133 | } | |
134 | } | |
135 | ||
136 | impl Encodable for Symbol { | |
137 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
138 | s.emit_str(&self.as_str()) | |
139 | } | |
140 | } | |
141 | ||
142 | impl Decodable for Symbol { | |
143 | fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> { | |
144 | Ok(Symbol::intern(&d.read_str()?)) | |
145 | } | |
146 | } | |
147 | ||
cc61c64b XL |
148 | impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol { |
149 | fn eq(&self, other: &T) -> bool { | |
150 | self.as_str() == other.deref() | |
476ff2be SL |
151 | } |
152 | } | |
153 | ||
154 | #[derive(Default)] | |
155 | pub struct Interner { | |
156 | names: HashMap<Box<str>, Symbol>, | |
157 | strings: Vec<Box<str>>, | |
7cac9316 | 158 | gensyms: Vec<Symbol>, |
476ff2be SL |
159 | } |
160 | ||
161 | impl Interner { | |
162 | pub fn new() -> Self { | |
163 | Interner::default() | |
164 | } | |
165 | ||
166 | fn prefill(init: &[&str]) -> Self { | |
167 | let mut this = Interner::new(); | |
168 | for &string in init { | |
169 | this.intern(string); | |
170 | } | |
171 | this | |
172 | } | |
173 | ||
174 | pub fn intern(&mut self, string: &str) -> Symbol { | |
175 | if let Some(&name) = self.names.get(string) { | |
176 | return name; | |
177 | } | |
178 | ||
179 | let name = Symbol(self.strings.len() as u32); | |
180 | let string = string.to_string().into_boxed_str(); | |
181 | self.strings.push(string.clone()); | |
182 | self.names.insert(string, name); | |
183 | name | |
184 | } | |
185 | ||
7cac9316 XL |
186 | pub fn interned(&self, symbol: Symbol) -> Symbol { |
187 | if (symbol.0 as usize) < self.strings.len() { | |
188 | symbol | |
189 | } else { | |
190 | self.interned(self.gensyms[(!0 - symbol.0) as usize]) | |
191 | } | |
192 | } | |
193 | ||
476ff2be | 194 | fn gensym(&mut self, string: &str) -> Symbol { |
7cac9316 XL |
195 | let symbol = self.intern(string); |
196 | self.gensymed(symbol) | |
476ff2be SL |
197 | } |
198 | ||
7cac9316 XL |
199 | fn gensymed(&mut self, symbol: Symbol) -> Symbol { |
200 | self.gensyms.push(symbol); | |
201 | Symbol(!0 - self.gensyms.len() as u32 + 1) | |
202 | } | |
203 | ||
204 | pub fn get(&self, symbol: Symbol) -> &str { | |
205 | match self.strings.get(symbol.0 as usize) { | |
206 | Some(ref string) => string, | |
207 | None => self.get(self.gensyms[(!0 - symbol.0) as usize]), | |
208 | } | |
476ff2be SL |
209 | } |
210 | } | |
211 | ||
212 | // In this macro, there is the requirement that the name (the number) must be monotonically | |
213 | // increasing by one in the special identifiers, starting at 0; the same holds for the keywords, | |
214 | // except starting from the next number instead of zero. | |
215 | macro_rules! declare_keywords {( | |
216 | $( ($index: expr, $konst: ident, $string: expr) )* | |
217 | ) => { | |
218 | pub mod keywords { | |
cc61c64b | 219 | use super::{Symbol, Ident}; |
476ff2be SL |
220 | #[derive(Clone, Copy, PartialEq, Eq)] |
221 | pub struct Keyword { | |
cc61c64b | 222 | ident: Ident, |
476ff2be SL |
223 | } |
224 | impl Keyword { | |
cc61c64b XL |
225 | #[inline] pub fn ident(self) -> Ident { self.ident } |
226 | #[inline] pub fn name(self) -> Symbol { self.ident.name } | |
476ff2be SL |
227 | } |
228 | $( | |
229 | #[allow(non_upper_case_globals)] | |
230 | pub const $konst: Keyword = Keyword { | |
cc61c64b | 231 | ident: Ident::with_empty_ctxt(super::Symbol($index)) |
476ff2be SL |
232 | }; |
233 | )* | |
234 | } | |
235 | ||
236 | impl Interner { | |
237 | fn fresh() -> Self { | |
238 | Interner::prefill(&[$($string,)*]) | |
239 | } | |
240 | } | |
241 | }} | |
242 | ||
243 | // NB: leaving holes in the ident table is bad! a different ident will get | |
244 | // interned with the id from the hole, but it will be between the min and max | |
245 | // of the reserved words, and thus tagged as "reserved". | |
041b39d2 | 246 | // After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`, |
476ff2be SL |
247 | // this should be rarely necessary though if the keywords are kept in alphabetic order. |
248 | declare_keywords! { | |
041b39d2 XL |
249 | // Special reserved identifiers used internally for elided lifetimes, |
250 | // unnamed method parameters, crate root module, error recovery etc. | |
476ff2be | 251 | (0, Invalid, "") |
041b39d2 XL |
252 | (1, CrateRoot, "{{root}}") |
253 | (2, DollarCrate, "$crate") | |
254 | ||
255 | // Keywords used in the language. | |
256 | (3, As, "as") | |
257 | (4, Box, "box") | |
258 | (5, Break, "break") | |
259 | (6, Const, "const") | |
260 | (7, Continue, "continue") | |
261 | (8, Crate, "crate") | |
262 | (9, Else, "else") | |
263 | (10, Enum, "enum") | |
264 | (11, Extern, "extern") | |
265 | (12, False, "false") | |
266 | (13, Fn, "fn") | |
267 | (14, For, "for") | |
268 | (15, If, "if") | |
269 | (16, Impl, "impl") | |
270 | (17, In, "in") | |
271 | (18, Let, "let") | |
272 | (19, Loop, "loop") | |
273 | (20, Match, "match") | |
274 | (21, Mod, "mod") | |
275 | (22, Move, "move") | |
276 | (23, Mut, "mut") | |
277 | (24, Pub, "pub") | |
278 | (25, Ref, "ref") | |
279 | (26, Return, "return") | |
280 | (27, SelfValue, "self") | |
281 | (28, SelfType, "Self") | |
282 | (29, Static, "static") | |
283 | (30, Struct, "struct") | |
284 | (31, Super, "super") | |
285 | (32, Trait, "trait") | |
286 | (33, True, "true") | |
287 | (34, Type, "type") | |
288 | (35, Unsafe, "unsafe") | |
289 | (36, Use, "use") | |
290 | (37, Where, "where") | |
291 | (38, While, "while") | |
476ff2be SL |
292 | |
293 | // Keywords reserved for future use. | |
041b39d2 XL |
294 | (39, Abstract, "abstract") |
295 | (40, Alignof, "alignof") | |
296 | (41, Become, "become") | |
297 | (42, Do, "do") | |
298 | (43, Final, "final") | |
299 | (44, Macro, "macro") | |
300 | (45, Offsetof, "offsetof") | |
301 | (46, Override, "override") | |
302 | (47, Priv, "priv") | |
303 | (48, Proc, "proc") | |
304 | (49, Pure, "pure") | |
305 | (50, Sizeof, "sizeof") | |
306 | (51, Typeof, "typeof") | |
307 | (52, Unsized, "unsized") | |
308 | (53, Virtual, "virtual") | |
309 | (54, Yield, "yield") | |
476ff2be SL |
310 | |
311 | // Weak keywords, have special meaning only in specific contexts. | |
abe05a73 XL |
312 | (55, Auto, "auto") |
313 | (56, Catch, "catch") | |
314 | (57, Default, "default") | |
315 | (58, Dyn, "dyn") | |
316 | (59, StaticLifetime, "'static") | |
317 | (60, Union, "union") | |
476ff2be SL |
318 | } |
319 | ||
320 | // If an interner exists in TLS, return it. Otherwise, prepare a fresh one. | |
321 | fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { | |
322 | thread_local!(static INTERNER: RefCell<Interner> = { | |
323 | RefCell::new(Interner::fresh()) | |
324 | }); | |
325 | INTERNER.with(|interner| f(&mut *interner.borrow_mut())) | |
326 | } | |
327 | ||
328 | /// Represents a string stored in the thread-local interner. Because the | |
329 | /// interner lives for the life of the thread, this can be safely treated as an | |
330 | /// immortal string, as long as it never crosses between threads. | |
331 | /// | |
332 | /// FIXME(pcwalton): You must be careful about what you do in the destructors | |
333 | /// of objects stored in TLS, because they may run after the interner is | |
334 | /// destroyed. In particular, they must not access string contents. This can | |
335 | /// be fixed in the future by just leaking all strings until thread death | |
336 | /// somehow. | |
3b2f2976 | 337 | #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)] |
476ff2be SL |
338 | pub struct InternedString { |
339 | string: &'static str, | |
340 | } | |
341 | ||
cc61c64b XL |
342 | impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> { |
343 | fn as_ref(&self) -> &U { | |
344 | self.string.as_ref() | |
345 | } | |
346 | } | |
347 | ||
348 | impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString { | |
349 | fn eq(&self, other: &T) -> bool { | |
350 | self.string == other.deref() | |
351 | } | |
352 | } | |
353 | ||
354 | impl ::std::cmp::PartialEq<InternedString> for str { | |
355 | fn eq(&self, other: &InternedString) -> bool { | |
356 | self == other.string | |
357 | } | |
358 | } | |
359 | ||
360 | impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str { | |
361 | fn eq(&self, other: &InternedString) -> bool { | |
362 | *self == other.string | |
363 | } | |
364 | } | |
365 | ||
366 | impl ::std::cmp::PartialEq<InternedString> for String { | |
367 | fn eq(&self, other: &InternedString) -> bool { | |
368 | self == other.string | |
369 | } | |
370 | } | |
371 | ||
372 | impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String { | |
373 | fn eq(&self, other: &InternedString) -> bool { | |
374 | *self == other.string | |
375 | } | |
376 | } | |
377 | ||
476ff2be SL |
378 | impl !Send for InternedString { } |
379 | ||
380 | impl ::std::ops::Deref for InternedString { | |
381 | type Target = str; | |
382 | fn deref(&self) -> &str { self.string } | |
383 | } | |
384 | ||
385 | impl fmt::Debug for InternedString { | |
386 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
387 | fmt::Debug::fmt(self.string, f) | |
388 | } | |
389 | } | |
390 | ||
391 | impl fmt::Display for InternedString { | |
392 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
393 | fmt::Display::fmt(self.string, f) | |
394 | } | |
395 | } | |
396 | ||
397 | impl Decodable for InternedString { | |
398 | fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { | |
399 | Ok(Symbol::intern(&d.read_str()?).as_str()) | |
400 | } | |
401 | } | |
402 | ||
403 | impl Encodable for InternedString { | |
404 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
405 | s.emit_str(self.string) | |
406 | } | |
407 | } | |
408 | ||
409 | #[cfg(test)] | |
410 | mod tests { | |
411 | use super::*; | |
476ff2be SL |
412 | |
413 | #[test] | |
414 | fn interner_tests() { | |
415 | let mut i: Interner = Interner::new(); | |
416 | // first one is zero: | |
8bb4bdeb | 417 | assert_eq!(i.intern("dog"), Symbol(0)); |
476ff2be | 418 | // re-use gets the same entry: |
abe05a73 | 419 | assert_eq!(i.intern("dog"), Symbol(0)); |
476ff2be | 420 | // different string gets a different #: |
8bb4bdeb XL |
421 | assert_eq!(i.intern("cat"), Symbol(1)); |
422 | assert_eq!(i.intern("cat"), Symbol(1)); | |
476ff2be | 423 | // dog is still at zero |
8bb4bdeb | 424 | assert_eq!(i.intern("dog"), Symbol(0)); |
7cac9316 | 425 | assert_eq!(i.gensym("zebra"), Symbol(4294967295)); |
476ff2be | 426 | // gensym of same string gets new number : |
7cac9316 | 427 | assert_eq!(i.gensym("zebra"), Symbol(4294967294)); |
476ff2be | 428 | // gensym of *existing* string gets new number: |
7cac9316 | 429 | assert_eq!(i.gensym("dog"), Symbol(4294967293)); |
476ff2be SL |
430 | } |
431 | } |