]>
Commit | Line | Data |
---|---|---|
476ff2be | 1 | //! An "interner" is a data structure that associates values with usize tags and |
0731742a | 2 | //! allows bidirectional lookup; i.e., given a value, one can easily find the |
476ff2be SL |
3 | //! type, and vice versa. |
4 | ||
94b46f34 | 5 | use arena::DroplessArena; |
0731742a XL |
6 | use rustc_data_structures::fx::FxHashMap; |
7 | use rustc_data_structures::indexed_vec::Idx; | |
476ff2be | 8 | use serialize::{Decodable, Decoder, Encodable, Encoder}; |
0731742a | 9 | |
476ff2be | 10 | use std::fmt; |
94b46f34 | 11 | use std::str; |
83c7162d XL |
12 | use std::cmp::{PartialEq, Ordering, PartialOrd, Ord}; |
13 | use std::hash::{Hash, Hasher}; | |
476ff2be | 14 | |
0731742a XL |
15 | use hygiene::SyntaxContext; |
16 | use {Span, DUMMY_SP, GLOBALS}; | |
17 | ||
83c7162d | 18 | #[derive(Copy, Clone, Eq)] |
cc61c64b XL |
19 | pub struct Ident { |
20 | pub name: Symbol, | |
83c7162d | 21 | pub span: Span, |
cc61c64b XL |
22 | } |
23 | ||
24 | impl Ident { | |
83c7162d XL |
25 | #[inline] |
26 | pub const fn new(name: Symbol, span: Span) -> Ident { | |
27 | Ident { name, span } | |
28 | } | |
0731742a | 29 | |
83c7162d | 30 | #[inline] |
cc61c64b | 31 | pub const fn with_empty_ctxt(name: Symbol) -> Ident { |
83c7162d XL |
32 | Ident::new(name, DUMMY_SP) |
33 | } | |
34 | ||
35 | /// Maps an interned string to an identifier with an empty syntax context. | |
36 | pub fn from_interned_str(string: InternedString) -> Ident { | |
37 | Ident::with_empty_ctxt(string.as_symbol()) | |
cc61c64b XL |
38 | } |
39 | ||
40 | /// Maps a string to an identifier with an empty syntax context. | |
41 | pub fn from_str(string: &str) -> Ident { | |
42 | Ident::with_empty_ctxt(Symbol::intern(string)) | |
43 | } | |
44 | ||
83c7162d XL |
45 | /// Replace `lo` and `hi` with those from `span`, but keep hygiene context. |
46 | pub fn with_span_pos(self, span: Span) -> Ident { | |
47 | Ident::new(self.name, span.with_ctxt(self.span.ctxt())) | |
48 | } | |
49 | ||
50 | pub fn without_first_quote(self) -> Ident { | |
0731742a | 51 | Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span) |
ff7c6d11 XL |
52 | } |
53 | ||
8faf50e0 XL |
54 | /// "Normalize" ident for use in comparisons using "item hygiene". |
55 | /// Identifiers with same string value become same if they came from the same "modern" macro | |
0731742a | 56 | /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from |
8faf50e0 XL |
57 | /// different "modern" macros. |
58 | /// Technically, this operation strips all non-opaque marks from ident's syntactic context. | |
7cac9316 | 59 | pub fn modern(self) -> Ident { |
83c7162d XL |
60 | Ident::new(self.name, self.span.modern()) |
61 | } | |
62 | ||
8faf50e0 XL |
63 | /// "Normalize" ident for use in comparisons using "local variable hygiene". |
64 | /// Identifiers with same string value become same if they came from the same non-transparent | |
0731742a | 65 | /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different |
8faf50e0 XL |
66 | /// non-transparent macros. |
67 | /// Technically, this operation strips all transparent marks from ident's syntactic context. | |
68 | pub fn modern_and_legacy(self) -> Ident { | |
69 | Ident::new(self.name, self.span.modern_and_legacy()) | |
70 | } | |
71 | ||
83c7162d XL |
72 | pub fn gensym(self) -> Ident { |
73 | Ident::new(self.name.gensymed(), self.span) | |
74 | } | |
94b46f34 | 75 | |
0731742a XL |
76 | pub fn gensym_if_underscore(self) -> Ident { |
77 | if self.name == keywords::Underscore.name() { self.gensym() } else { self } | |
78 | } | |
79 | ||
94b46f34 XL |
80 | pub fn as_str(self) -> LocalInternedString { |
81 | self.name.as_str() | |
82 | } | |
8faf50e0 XL |
83 | |
84 | pub fn as_interned_str(self) -> InternedString { | |
85 | self.name.as_interned_str() | |
86 | } | |
83c7162d XL |
87 | } |
88 | ||
89 | impl PartialEq for Ident { | |
90 | fn eq(&self, rhs: &Self) -> bool { | |
91 | self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt() | |
92 | } | |
93 | } | |
94 | ||
95 | impl Hash for Ident { | |
96 | fn hash<H: Hasher>(&self, state: &mut H) { | |
97 | self.name.hash(state); | |
98 | self.span.ctxt().hash(state); | |
cc61c64b XL |
99 | } |
100 | } | |
101 | ||
102 | impl fmt::Debug for Ident { | |
103 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
83c7162d | 104 | write!(f, "{}{:?}", self.name, self.span.ctxt()) |
cc61c64b XL |
105 | } |
106 | } | |
107 | ||
108 | impl fmt::Display for Ident { | |
109 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
110 | fmt::Display::fmt(&self.name, f) | |
111 | } | |
112 | } | |
113 | ||
114 | impl Encodable for Ident { | |
115 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
83c7162d | 116 | if self.span.ctxt().modern() == SyntaxContext::empty() { |
94b46f34 | 117 | s.emit_str(&self.as_str()) |
0731742a | 118 | } else { // FIXME(jseyfried): intercrate hygiene |
7cac9316 | 119 | let mut string = "#".to_owned(); |
94b46f34 | 120 | string.push_str(&self.as_str()); |
7cac9316 XL |
121 | s.emit_str(&string) |
122 | } | |
cc61c64b XL |
123 | } |
124 | } | |
125 | ||
126 | impl Decodable for Ident { | |
127 | fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> { | |
7cac9316 XL |
128 | let string = d.read_str()?; |
129 | Ok(if !string.starts_with('#') { | |
130 | Ident::from_str(&string) | |
0731742a | 131 | } else { // FIXME(jseyfried): intercrate hygiene |
7cac9316 XL |
132 | Ident::with_empty_ctxt(Symbol::gensym(&string[1..])) |
133 | }) | |
cc61c64b XL |
134 | } |
135 | } | |
136 | ||
0731742a XL |
137 | /// A symbol is an interned or gensymed string. The use of newtype_index! means |
138 | /// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves | |
139 | /// the last 256 values for tagging purposes. | |
140 | /// | |
141 | /// Note that Symbol cannot be a newtype_index! directly because it implements | |
142 | /// fmt::Debug, Encodable, and Decodable in special ways. | |
476ff2be | 143 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
0731742a XL |
144 | pub struct Symbol(SymbolIndex); |
145 | ||
146 | newtype_index! { | |
147 | pub struct SymbolIndex { .. } | |
148 | } | |
476ff2be | 149 | |
0531ce1d | 150 | // The interner is pointed to by a thread local value which is only set on the main thread |
0731742a | 151 | // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads |
0531ce1d XL |
152 | // to avoid panics and other errors, even though it would be memory safe to do so. |
153 | #[cfg(not(parallel_queries))] | |
476ff2be | 154 | impl !Send for Symbol { } |
0531ce1d | 155 | #[cfg(not(parallel_queries))] |
041b39d2 | 156 | impl !Sync for Symbol { } |
476ff2be SL |
157 | |
158 | impl Symbol { | |
0731742a XL |
159 | const fn new(n: u32) -> Self { |
160 | Symbol(SymbolIndex::from_u32_const(n)) | |
161 | } | |
162 | ||
476ff2be SL |
163 | /// Maps a string to its interned representation. |
164 | pub fn intern(string: &str) -> Self { | |
165 | with_interner(|interner| interner.intern(string)) | |
166 | } | |
167 | ||
7cac9316 XL |
168 | pub fn interned(self) -> Self { |
169 | with_interner(|interner| interner.interned(self)) | |
170 | } | |
171 | ||
0731742a | 172 | /// Gensyms a new usize, using the current interner. |
476ff2be SL |
173 | pub fn gensym(string: &str) -> Self { |
174 | with_interner(|interner| interner.gensym(string)) | |
175 | } | |
176 | ||
7cac9316 XL |
177 | pub fn gensymed(self) -> Self { |
178 | with_interner(|interner| interner.gensymed(self)) | |
179 | } | |
180 | ||
83c7162d | 181 | pub fn as_str(self) -> LocalInternedString { |
476ff2be | 182 | with_interner(|interner| unsafe { |
83c7162d | 183 | LocalInternedString { |
476ff2be SL |
184 | string: ::std::mem::transmute::<&str, &str>(interner.get(self)) |
185 | } | |
186 | }) | |
187 | } | |
188 | ||
83c7162d XL |
189 | pub fn as_interned_str(self) -> InternedString { |
190 | with_interner(|interner| InternedString { | |
191 | symbol: interner.interned(self) | |
192 | }) | |
476ff2be | 193 | } |
476ff2be | 194 | |
83c7162d | 195 | pub fn as_u32(self) -> u32 { |
0731742a | 196 | self.0.as_u32() |
ea8adc8c XL |
197 | } |
198 | } | |
199 | ||
476ff2be SL |
200 | impl fmt::Debug for Symbol { |
201 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
ff7c6d11 XL |
202 | let is_gensymed = with_interner(|interner| interner.is_gensymed(*self)); |
203 | if is_gensymed { | |
0731742a | 204 | write!(f, "{}({:?})", self, self.0) |
ff7c6d11 XL |
205 | } else { |
206 | write!(f, "{}", self) | |
207 | } | |
476ff2be SL |
208 | } |
209 | } | |
210 | ||
211 | impl fmt::Display for Symbol { | |
212 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
213 | fmt::Display::fmt(&self.as_str(), f) | |
214 | } | |
215 | } | |
216 | ||
217 | impl Encodable for Symbol { | |
218 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
219 | s.emit_str(&self.as_str()) | |
220 | } | |
221 | } | |
222 | ||
223 | impl Decodable for Symbol { | |
224 | fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> { | |
225 | Ok(Symbol::intern(&d.read_str()?)) | |
226 | } | |
227 | } | |
228 | ||
cc61c64b XL |
229 | impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol { |
230 | fn eq(&self, other: &T) -> bool { | |
231 | self.as_str() == other.deref() | |
476ff2be SL |
232 | } |
233 | } | |
234 | ||
0731742a XL |
235 | // The `&'static str`s in this type actually point into the arena. |
236 | // | |
237 | // Note that normal symbols are indexed upward from 0, and gensyms are indexed | |
238 | // downward from SymbolIndex::MAX_AS_U32. | |
0bf4aa26 | 239 | #[derive(Default)] |
476ff2be | 240 | pub struct Interner { |
94b46f34 XL |
241 | arena: DroplessArena, |
242 | names: FxHashMap<&'static str, Symbol>, | |
243 | strings: Vec<&'static str>, | |
7cac9316 | 244 | gensyms: Vec<Symbol>, |
476ff2be SL |
245 | } |
246 | ||
247 | impl Interner { | |
476ff2be | 248 | fn prefill(init: &[&str]) -> Self { |
0bf4aa26 | 249 | let mut this = Interner::default(); |
476ff2be | 250 | for &string in init { |
94b46f34 | 251 | if string == "" { |
0731742a XL |
252 | // We can't allocate empty strings in the arena, so handle this here. |
253 | let name = Symbol::new(this.strings.len() as u32); | |
94b46f34 XL |
254 | this.names.insert("", name); |
255 | this.strings.push(""); | |
256 | } else { | |
257 | this.intern(string); | |
258 | } | |
476ff2be SL |
259 | } |
260 | this | |
261 | } | |
262 | ||
263 | pub fn intern(&mut self, string: &str) -> Symbol { | |
264 | if let Some(&name) = self.names.get(string) { | |
265 | return name; | |
266 | } | |
267 | ||
0731742a | 268 | let name = Symbol::new(self.strings.len() as u32); |
94b46f34 | 269 | |
0731742a XL |
270 | // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be |
271 | // UTF-8. | |
94b46f34 XL |
272 | let string: &str = unsafe { |
273 | str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) | |
274 | }; | |
0731742a XL |
275 | // It is safe to extend the arena allocation to `'static` because we only access |
276 | // these while the arena is still alive. | |
94b46f34 XL |
277 | let string: &'static str = unsafe { |
278 | &*(string as *const str) | |
279 | }; | |
280 | self.strings.push(string); | |
476ff2be SL |
281 | self.names.insert(string, name); |
282 | name | |
283 | } | |
284 | ||
7cac9316 | 285 | pub fn interned(&self, symbol: Symbol) -> Symbol { |
0731742a | 286 | if (symbol.0.as_usize()) < self.strings.len() { |
7cac9316 XL |
287 | symbol |
288 | } else { | |
0731742a | 289 | self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]) |
7cac9316 XL |
290 | } |
291 | } | |
292 | ||
476ff2be | 293 | fn gensym(&mut self, string: &str) -> Symbol { |
7cac9316 XL |
294 | let symbol = self.intern(string); |
295 | self.gensymed(symbol) | |
476ff2be SL |
296 | } |
297 | ||
7cac9316 XL |
298 | fn gensymed(&mut self, symbol: Symbol) -> Symbol { |
299 | self.gensyms.push(symbol); | |
0731742a | 300 | Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1) |
7cac9316 XL |
301 | } |
302 | ||
ff7c6d11 | 303 | fn is_gensymed(&mut self, symbol: Symbol) -> bool { |
0731742a | 304 | symbol.0.as_usize() >= self.strings.len() |
ff7c6d11 XL |
305 | } |
306 | ||
7cac9316 | 307 | pub fn get(&self, symbol: Symbol) -> &str { |
0731742a | 308 | match self.strings.get(symbol.0.as_usize()) { |
94b46f34 | 309 | Some(string) => string, |
0731742a | 310 | None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]), |
7cac9316 | 311 | } |
476ff2be SL |
312 | } |
313 | } | |
314 | ||
315 | // In this macro, there is the requirement that the name (the number) must be monotonically | |
316 | // increasing by one in the special identifiers, starting at 0; the same holds for the keywords, | |
317 | // except starting from the next number instead of zero. | |
318 | macro_rules! declare_keywords {( | |
319 | $( ($index: expr, $konst: ident, $string: expr) )* | |
320 | ) => { | |
321 | pub mod keywords { | |
cc61c64b | 322 | use super::{Symbol, Ident}; |
476ff2be SL |
323 | #[derive(Clone, Copy, PartialEq, Eq)] |
324 | pub struct Keyword { | |
cc61c64b | 325 | ident: Ident, |
476ff2be SL |
326 | } |
327 | impl Keyword { | |
cc61c64b XL |
328 | #[inline] pub fn ident(self) -> Ident { self.ident } |
329 | #[inline] pub fn name(self) -> Symbol { self.ident.name } | |
476ff2be SL |
330 | } |
331 | $( | |
332 | #[allow(non_upper_case_globals)] | |
333 | pub const $konst: Keyword = Keyword { | |
0731742a | 334 | ident: Ident::with_empty_ctxt(super::Symbol::new($index)) |
476ff2be SL |
335 | }; |
336 | )* | |
94b46f34 XL |
337 | |
338 | impl ::std::str::FromStr for Keyword { | |
339 | type Err = (); | |
340 | ||
341 | fn from_str(s: &str) -> Result<Self, ()> { | |
342 | match s { | |
343 | $($string => Ok($konst),)* | |
344 | _ => Err(()), | |
345 | } | |
346 | } | |
347 | } | |
476ff2be SL |
348 | } |
349 | ||
350 | impl Interner { | |
0531ce1d | 351 | pub fn fresh() -> Self { |
476ff2be SL |
352 | Interner::prefill(&[$($string,)*]) |
353 | } | |
354 | } | |
355 | }} | |
356 | ||
0731742a | 357 | // N.B., leaving holes in the ident table is bad! a different ident will get |
476ff2be SL |
358 | // interned with the id from the hole, but it will be between the min and max |
359 | // of the reserved words, and thus tagged as "reserved". | |
94b46f34 | 360 | // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`, |
476ff2be SL |
361 | // this should be rarely necessary though if the keywords are kept in alphabetic order. |
362 | declare_keywords! { | |
041b39d2 XL |
363 | // Special reserved identifiers used internally for elided lifetimes, |
364 | // unnamed method parameters, crate root module, error recovery etc. | |
0531ce1d | 365 | (0, Invalid, "") |
0731742a | 366 | (1, PathRoot, "{{root}}") |
0531ce1d XL |
367 | (2, DollarCrate, "$crate") |
368 | (3, Underscore, "_") | |
041b39d2 | 369 | |
0731742a | 370 | // Keywords that are used in stable Rust. |
0531ce1d XL |
371 | (4, As, "as") |
372 | (5, Box, "box") | |
373 | (6, Break, "break") | |
374 | (7, Const, "const") | |
375 | (8, Continue, "continue") | |
376 | (9, Crate, "crate") | |
377 | (10, Else, "else") | |
378 | (11, Enum, "enum") | |
379 | (12, Extern, "extern") | |
380 | (13, False, "false") | |
381 | (14, Fn, "fn") | |
382 | (15, For, "for") | |
383 | (16, If, "if") | |
384 | (17, Impl, "impl") | |
385 | (18, In, "in") | |
386 | (19, Let, "let") | |
387 | (20, Loop, "loop") | |
388 | (21, Match, "match") | |
389 | (22, Mod, "mod") | |
390 | (23, Move, "move") | |
391 | (24, Mut, "mut") | |
392 | (25, Pub, "pub") | |
393 | (26, Ref, "ref") | |
394 | (27, Return, "return") | |
0731742a XL |
395 | (28, SelfLower, "self") |
396 | (29, SelfUpper, "Self") | |
0531ce1d XL |
397 | (30, Static, "static") |
398 | (31, Struct, "struct") | |
399 | (32, Super, "super") | |
400 | (33, Trait, "trait") | |
401 | (34, True, "true") | |
402 | (35, Type, "type") | |
403 | (36, Unsafe, "unsafe") | |
404 | (37, Use, "use") | |
405 | (38, Where, "where") | |
406 | (39, While, "while") | |
476ff2be | 407 | |
0731742a | 408 | // Keywords that are used in unstable Rust or reserved for future use. |
0531ce1d | 409 | (40, Abstract, "abstract") |
94b46f34 XL |
410 | (41, Become, "become") |
411 | (42, Do, "do") | |
412 | (43, Final, "final") | |
413 | (44, Macro, "macro") | |
414 | (45, Override, "override") | |
415 | (46, Priv, "priv") | |
416 | (47, Typeof, "typeof") | |
417 | (48, Unsized, "unsized") | |
418 | (49, Virtual, "virtual") | |
419 | (50, Yield, "yield") | |
420 | ||
0731742a XL |
421 | // Edition-specific keywords that are used in stable Rust. |
422 | (51, Dyn, "dyn") // >= 2018 Edition only | |
423 | ||
424 | // Edition-specific keywords that are used in unstable Rust or reserved for future use. | |
425 | (52, Async, "async") // >= 2018 Edition only | |
0bf4aa26 | 426 | (53, Try, "try") // >= 2018 Edition only |
0531ce1d XL |
427 | |
428 | // Special lifetime names | |
0bf4aa26 XL |
429 | (54, UnderscoreLifetime, "'_") |
430 | (55, StaticLifetime, "'static") | |
476ff2be SL |
431 | |
432 | // Weak keywords, have special meaning only in specific contexts. | |
0bf4aa26 XL |
433 | (56, Auto, "auto") |
434 | (57, Catch, "catch") | |
435 | (58, Default, "default") | |
0731742a XL |
436 | (59, Existential, "existential") |
437 | (60, Union, "union") | |
94b46f34 XL |
438 | } |
439 | ||
440 | impl Symbol { | |
0731742a XL |
441 | fn is_used_keyword_2018(self) -> bool { |
442 | self == keywords::Dyn.name() | |
443 | } | |
444 | ||
94b46f34 | 445 | fn is_unused_keyword_2018(self) -> bool { |
0bf4aa26 | 446 | self >= keywords::Async.name() && self <= keywords::Try.name() |
94b46f34 XL |
447 | } |
448 | } | |
449 | ||
450 | impl Ident { | |
0731742a | 451 | // Returns `true` for reserved identifiers used internally for elided lifetimes, |
94b46f34 XL |
452 | // unnamed method parameters, crate root module, error recovery etc. |
453 | pub fn is_special(self) -> bool { | |
454 | self.name <= keywords::Underscore.name() | |
455 | } | |
456 | ||
457 | /// Returns `true` if the token is a keyword used in the language. | |
458 | pub fn is_used_keyword(self) -> bool { | |
0731742a XL |
459 | // Note: `span.edition()` is relatively expensive, don't call it unless necessary. |
460 | self.name >= keywords::As.name() && self.name <= keywords::While.name() || | |
461 | self.name.is_used_keyword_2018() && self.span.rust_2018() | |
94b46f34 XL |
462 | } |
463 | ||
464 | /// Returns `true` if the token is a keyword reserved for possible future use. | |
465 | pub fn is_unused_keyword(self) -> bool { | |
466 | // Note: `span.edition()` is relatively expensive, don't call it unless necessary. | |
467 | self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() || | |
13cf67c4 | 468 | self.name.is_unused_keyword_2018() && self.span.rust_2018() |
94b46f34 XL |
469 | } |
470 | ||
471 | /// Returns `true` if the token is either a special identifier or a keyword. | |
472 | pub fn is_reserved(self) -> bool { | |
473 | self.is_special() || self.is_used_keyword() || self.is_unused_keyword() | |
474 | } | |
475 | ||
476 | /// A keyword or reserved identifier that can be used as a path segment. | |
477 | pub fn is_path_segment_keyword(self) -> bool { | |
478 | self.name == keywords::Super.name() || | |
0731742a XL |
479 | self.name == keywords::SelfLower.name() || |
480 | self.name == keywords::SelfUpper.name() || | |
94b46f34 | 481 | self.name == keywords::Crate.name() || |
0731742a | 482 | self.name == keywords::PathRoot.name() || |
94b46f34 XL |
483 | self.name == keywords::DollarCrate.name() |
484 | } | |
485 | ||
486 | // We see this identifier in a normal identifier position, like variable name or a type. | |
487 | // How was it written originally? Did it use the raw form? Let's try to guess. | |
488 | pub fn is_raw_guess(self) -> bool { | |
0731742a | 489 | self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() && |
94b46f34 XL |
490 | self.is_reserved() && !self.is_path_segment_keyword() |
491 | } | |
476ff2be SL |
492 | } |
493 | ||
0531ce1d XL |
494 | // If an interner exists, return it. Otherwise, prepare a fresh one. |
495 | #[inline] | |
476ff2be | 496 | fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { |
0531ce1d | 497 | GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock())) |
476ff2be SL |
498 | } |
499 | ||
83c7162d XL |
500 | /// Represents a string stored in the interner. Because the interner outlives any thread |
501 | /// which uses this type, we can safely treat `string` which points to interner data, | |
502 | /// as an immortal string, as long as this type never crosses between threads. | |
0731742a XL |
503 | // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`, |
504 | // by creating a new thread right after constructing the interner. | |
3b2f2976 | 505 | #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)] |
83c7162d | 506 | pub struct LocalInternedString { |
476ff2be SL |
507 | string: &'static str, |
508 | } | |
509 | ||
83c7162d XL |
510 | impl LocalInternedString { |
511 | pub fn as_interned_str(self) -> InternedString { | |
512 | InternedString { | |
513 | symbol: Symbol::intern(self.string) | |
514 | } | |
515 | } | |
a1dfa0c6 XL |
516 | |
517 | pub fn get(&self) -> &'static str { | |
518 | self.string | |
519 | } | |
83c7162d XL |
520 | } |
521 | ||
522 | impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString | |
523 | where | |
524 | str: ::std::convert::AsRef<U> | |
525 | { | |
cc61c64b XL |
526 | fn as_ref(&self) -> &U { |
527 | self.string.as_ref() | |
528 | } | |
529 | } | |
530 | ||
83c7162d | 531 | impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString { |
cc61c64b XL |
532 | fn eq(&self, other: &T) -> bool { |
533 | self.string == other.deref() | |
534 | } | |
535 | } | |
536 | ||
83c7162d XL |
537 | impl ::std::cmp::PartialEq<LocalInternedString> for str { |
538 | fn eq(&self, other: &LocalInternedString) -> bool { | |
cc61c64b XL |
539 | self == other.string |
540 | } | |
541 | } | |
542 | ||
83c7162d XL |
543 | impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str { |
544 | fn eq(&self, other: &LocalInternedString) -> bool { | |
cc61c64b XL |
545 | *self == other.string |
546 | } | |
547 | } | |
548 | ||
83c7162d XL |
549 | impl ::std::cmp::PartialEq<LocalInternedString> for String { |
550 | fn eq(&self, other: &LocalInternedString) -> bool { | |
cc61c64b XL |
551 | self == other.string |
552 | } | |
553 | } | |
554 | ||
83c7162d XL |
555 | impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String { |
556 | fn eq(&self, other: &LocalInternedString) -> bool { | |
cc61c64b XL |
557 | *self == other.string |
558 | } | |
559 | } | |
560 | ||
83c7162d XL |
561 | impl !Send for LocalInternedString {} |
562 | impl !Sync for LocalInternedString {} | |
476ff2be | 563 | |
83c7162d | 564 | impl ::std::ops::Deref for LocalInternedString { |
476ff2be SL |
565 | type Target = str; |
566 | fn deref(&self) -> &str { self.string } | |
567 | } | |
568 | ||
83c7162d | 569 | impl fmt::Debug for LocalInternedString { |
476ff2be SL |
570 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
571 | fmt::Debug::fmt(self.string, f) | |
572 | } | |
573 | } | |
574 | ||
83c7162d | 575 | impl fmt::Display for LocalInternedString { |
476ff2be SL |
576 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
577 | fmt::Display::fmt(self.string, f) | |
578 | } | |
579 | } | |
580 | ||
83c7162d XL |
581 | impl Decodable for LocalInternedString { |
582 | fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> { | |
583 | Ok(Symbol::intern(&d.read_str()?).as_str()) | |
584 | } | |
585 | } | |
586 | ||
587 | impl Encodable for LocalInternedString { | |
588 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
589 | s.emit_str(self.string) | |
590 | } | |
591 | } | |
592 | ||
0731742a | 593 | /// Represents a string stored in the string interner. |
83c7162d XL |
594 | #[derive(Clone, Copy, Eq)] |
595 | pub struct InternedString { | |
596 | symbol: Symbol, | |
597 | } | |
598 | ||
599 | impl InternedString { | |
600 | pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R { | |
601 | let str = with_interner(|interner| { | |
602 | interner.get(self.symbol) as *const str | |
603 | }); | |
604 | // This is safe because the interner keeps string alive until it is dropped. | |
605 | // We can access it because we know the interner is still alive since we use a | |
b7449926 | 606 | // scoped thread local to access it, and it was alive at the beginning of this scope |
83c7162d XL |
607 | unsafe { f(&*str) } |
608 | } | |
609 | ||
610 | pub fn as_symbol(self) -> Symbol { | |
611 | self.symbol | |
612 | } | |
613 | ||
614 | pub fn as_str(self) -> LocalInternedString { | |
615 | self.symbol.as_str() | |
616 | } | |
617 | } | |
618 | ||
619 | impl Hash for InternedString { | |
620 | fn hash<H: Hasher>(&self, state: &mut H) { | |
621 | self.with(|str| str.hash(state)) | |
622 | } | |
623 | } | |
624 | ||
625 | impl PartialOrd<InternedString> for InternedString { | |
626 | fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> { | |
627 | if self.symbol == other.symbol { | |
628 | return Some(Ordering::Equal); | |
629 | } | |
94b46f34 | 630 | self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str))) |
83c7162d XL |
631 | } |
632 | } | |
633 | ||
634 | impl Ord for InternedString { | |
635 | fn cmp(&self, other: &InternedString) -> Ordering { | |
636 | if self.symbol == other.symbol { | |
637 | return Ordering::Equal; | |
638 | } | |
639 | self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str))) | |
640 | } | |
641 | } | |
642 | ||
643 | impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString { | |
644 | fn eq(&self, other: &T) -> bool { | |
645 | self.with(|string| string == other.deref()) | |
646 | } | |
647 | } | |
648 | ||
649 | impl PartialEq<InternedString> for InternedString { | |
650 | fn eq(&self, other: &InternedString) -> bool { | |
651 | self.symbol == other.symbol | |
652 | } | |
653 | } | |
654 | ||
655 | impl PartialEq<InternedString> for str { | |
656 | fn eq(&self, other: &InternedString) -> bool { | |
657 | other.with(|string| self == string) | |
658 | } | |
659 | } | |
660 | ||
661 | impl<'a> PartialEq<InternedString> for &'a str { | |
662 | fn eq(&self, other: &InternedString) -> bool { | |
663 | other.with(|string| *self == string) | |
664 | } | |
665 | } | |
666 | ||
667 | impl PartialEq<InternedString> for String { | |
668 | fn eq(&self, other: &InternedString) -> bool { | |
669 | other.with(|string| self == string) | |
670 | } | |
671 | } | |
672 | ||
673 | impl<'a> PartialEq<InternedString> for &'a String { | |
674 | fn eq(&self, other: &InternedString) -> bool { | |
675 | other.with(|string| *self == string) | |
676 | } | |
677 | } | |
678 | ||
679 | impl ::std::convert::From<InternedString> for String { | |
680 | fn from(val: InternedString) -> String { | |
681 | val.as_symbol().to_string() | |
682 | } | |
683 | } | |
684 | ||
685 | impl fmt::Debug for InternedString { | |
686 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
687 | self.with(|str| fmt::Debug::fmt(&str, f)) | |
688 | } | |
689 | } | |
690 | ||
691 | impl fmt::Display for InternedString { | |
692 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
693 | self.with(|str| fmt::Display::fmt(&str, f)) | |
694 | } | |
695 | } | |
696 | ||
476ff2be SL |
697 | impl Decodable for InternedString { |
698 | fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { | |
83c7162d | 699 | Ok(Symbol::intern(&d.read_str()?).as_interned_str()) |
476ff2be SL |
700 | } |
701 | } | |
702 | ||
703 | impl Encodable for InternedString { | |
704 | fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { | |
83c7162d | 705 | self.with(|string| s.emit_str(string)) |
476ff2be SL |
706 | } |
707 | } | |
708 | ||
709 | #[cfg(test)] | |
710 | mod tests { | |
711 | use super::*; | |
0531ce1d | 712 | use Globals; |
476ff2be SL |
713 | |
714 | #[test] | |
715 | fn interner_tests() { | |
0bf4aa26 | 716 | let mut i: Interner = Interner::default(); |
476ff2be | 717 | // first one is zero: |
0731742a | 718 | assert_eq!(i.intern("dog"), Symbol::new(0)); |
476ff2be | 719 | // re-use gets the same entry: |
0731742a | 720 | assert_eq!(i.intern("dog"), Symbol::new(0)); |
476ff2be | 721 | // different string gets a different #: |
0731742a XL |
722 | assert_eq!(i.intern("cat"), Symbol::new(1)); |
723 | assert_eq!(i.intern("cat"), Symbol::new(1)); | |
476ff2be | 724 | // dog is still at zero |
0731742a XL |
725 | assert_eq!(i.intern("dog"), Symbol::new(0)); |
726 | assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32)); | |
727 | // gensym of same string gets new number: | |
728 | assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1)); | |
476ff2be | 729 | // gensym of *existing* string gets new number: |
0731742a | 730 | assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2)); |
476ff2be | 731 | } |
ff7c6d11 XL |
732 | |
733 | #[test] | |
734 | fn without_first_quote_test() { | |
0531ce1d XL |
735 | GLOBALS.set(&Globals::new(), || { |
736 | let i = Ident::from_str("'break"); | |
737 | assert_eq!(i.without_first_quote().name, keywords::Break.name()); | |
738 | }); | |
ff7c6d11 | 739 | } |
476ff2be | 740 | } |