]> git.proxmox.com Git - rustc.git/blob - src/libsyntax_pos/symbol.rs
New upstream version 1.37.0+dfsg1
[rustc.git] / src / libsyntax_pos / symbol.rs
1 //! An "interner" is a data structure that associates values with usize tags and
2 //! allows bidirectional lookup; i.e., given a value, one can easily find the
3 //! type, and vice versa.
4
5 use arena::DroplessArena;
6 use rustc_data_structures::fx::FxHashMap;
7 use rustc_data_structures::indexed_vec::Idx;
8 use rustc_data_structures::newtype_index;
9 use rustc_macros::symbols;
10 use serialize::{Decodable, Decoder, Encodable, Encoder};
11
12 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
13 use std::fmt;
14 use std::hash::{Hash, Hasher};
15 use std::str;
16
17 use crate::hygiene::SyntaxContext;
18 use crate::{Span, DUMMY_SP, GLOBALS};
19
20 symbols! {
21 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
22 // this should be rarely necessary though if the keywords are kept in alphabetic order.
23 Keywords {
24 // Special reserved identifiers used internally for elided lifetimes,
25 // unnamed method parameters, crate root module, error recovery etc.
26 Invalid: "",
27 PathRoot: "{{root}}",
28 DollarCrate: "$crate",
29 Underscore: "_",
30
31 // Keywords that are used in stable Rust.
32 As: "as",
33 Break: "break",
34 Const: "const",
35 Continue: "continue",
36 Crate: "crate",
37 Else: "else",
38 Enum: "enum",
39 Extern: "extern",
40 False: "false",
41 Fn: "fn",
42 For: "for",
43 If: "if",
44 Impl: "impl",
45 In: "in",
46 Let: "let",
47 Loop: "loop",
48 Match: "match",
49 Mod: "mod",
50 Move: "move",
51 Mut: "mut",
52 Pub: "pub",
53 Ref: "ref",
54 Return: "return",
55 SelfLower: "self",
56 SelfUpper: "Self",
57 Static: "static",
58 Struct: "struct",
59 Super: "super",
60 Trait: "trait",
61 True: "true",
62 Type: "type",
63 Unsafe: "unsafe",
64 Use: "use",
65 Where: "where",
66 While: "while",
67
68 // Keywords that are used in unstable Rust or reserved for future use.
69 Abstract: "abstract",
70 Become: "become",
71 Box: "box",
72 Do: "do",
73 Final: "final",
74 Macro: "macro",
75 Override: "override",
76 Priv: "priv",
77 Typeof: "typeof",
78 Unsized: "unsized",
79 Virtual: "virtual",
80 Yield: "yield",
81
82 // Edition-specific keywords that are used in stable Rust.
83 Dyn: "dyn", // >= 2018 Edition only
84
85 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
86 Async: "async", // >= 2018 Edition only
87 Await: "await", // >= 2018 Edition only
88 Try: "try", // >= 2018 Edition only
89
90 // Special lifetime names
91 UnderscoreLifetime: "'_",
92 StaticLifetime: "'static",
93
94 // Weak keywords, have special meaning only in specific contexts.
95 Auto: "auto",
96 Catch: "catch",
97 Default: "default",
98 Existential: "existential",
99 Union: "union",
100 }
101
102 // Symbols that can be referred to with syntax_pos::sym::*. The symbol is
103 // the stringified identifier unless otherwise specified (e.g.
104 // `proc_dash_macro` represents "proc-macro").
105 //
106 // As well as the symbols listed, there are symbols for the the strings
107 // "0", "1", ..., "9", which are accessible via `sym::integer`.
108 Symbols {
109 aarch64_target_feature,
110 abi,
111 abi_amdgpu_kernel,
112 abi_msp430_interrupt,
113 abi_ptx,
114 abi_sysv64,
115 abi_thiscall,
116 abi_unadjusted,
117 abi_vectorcall,
118 abi_x86_interrupt,
119 aborts,
120 advanced_slice_patterns,
121 adx_target_feature,
122 alias,
123 align,
124 alignstack,
125 all,
126 allocator,
127 allocator_internals,
128 alloc_error_handler,
129 allow,
130 allowed,
131 allow_fail,
132 allow_internal_unsafe,
133 allow_internal_unstable,
134 allow_internal_unstable_backcompat_hack,
135 always,
136 and,
137 any,
138 arbitrary_enum_discriminant,
139 arbitrary_self_types,
140 Arguments,
141 ArgumentV1,
142 arm_target_feature,
143 asm,
144 associated_consts,
145 associated_type_bounds,
146 associated_type_defaults,
147 associated_types,
148 async_await,
149 attr,
150 attributes,
151 attr_literals,
152 augmented_assignments,
153 automatically_derived,
154 avx512_target_feature,
155 await_macro,
156 begin_panic,
157 bench,
158 bin,
159 bind_by_move_pattern_guards,
160 block,
161 bool,
162 borrowck_graphviz_postflow,
163 borrowck_graphviz_preflow,
164 box_patterns,
165 box_syntax,
166 braced_empty_structs,
167 C,
168 cdylib,
169 cfg,
170 cfg_attr,
171 cfg_attr_multi,
172 cfg_target_feature,
173 cfg_target_has_atomic,
174 cfg_target_thread_local,
175 cfg_target_vendor,
176 char,
177 clone,
178 Clone,
179 clone_closures,
180 clone_from,
181 closure_to_fn_coercion,
182 cmp,
183 cmpxchg16b_target_feature,
184 cold,
185 compile_error,
186 compiler_builtins,
187 concat_idents,
188 conservative_impl_trait,
189 console,
190 const_compare_raw_pointers,
191 const_constructor,
192 const_fn,
193 const_fn_union,
194 const_generics,
195 const_indexing,
196 const_let,
197 const_panic,
198 const_raw_ptr_deref,
199 const_raw_ptr_to_usize_cast,
200 const_transmute,
201 contents,
202 context,
203 convert,
204 copy_closures,
205 core,
206 core_intrinsics,
207 crate_id,
208 crate_in_paths,
209 crate_local,
210 crate_name,
211 crate_type,
212 crate_visibility_modifier,
213 custom_attribute,
214 custom_derive,
215 custom_inner_attributes,
216 custom_test_frameworks,
217 c_variadic,
218 decl_macro,
219 Default,
220 default_lib_allocator,
221 default_type_parameter_fallback,
222 default_type_params,
223 deny,
224 deprecated,
225 deref,
226 deref_mut,
227 derive,
228 direct,
229 doc,
230 doc_alias,
231 doc_cfg,
232 doc_keyword,
233 doc_masked,
234 doc_spotlight,
235 document_private_items,
236 dotdoteq_in_patterns,
237 dotdot_in_tuple_patterns,
238 double_braced_crate: "{{crate}}",
239 double_braced_impl: "{{impl}}",
240 double_braced_misc: "{{misc}}",
241 double_braced_closure: "{{closure}}",
242 double_braced_constructor: "{{constructor}}",
243 double_braced_constant: "{{constant}}",
244 double_braced_opaque: "{{opaque}}",
245 dropck_eyepatch,
246 dropck_parametricity,
247 drop_types_in_const,
248 dylib,
249 dyn_trait,
250 eh_personality,
251 eh_unwind_resume,
252 enable,
253 eq,
254 err,
255 Err,
256 Equal,
257 except,
258 exclusive_range_pattern,
259 exhaustive_integer_patterns,
260 exhaustive_patterns,
261 existential_type,
262 expected,
263 export_name,
264 expr,
265 extern_absolute_paths,
266 external_doc,
267 extern_crate_item_prelude,
268 extern_crate_self,
269 extern_in_paths,
270 extern_prelude,
271 extern_types,
272 f16c_target_feature,
273 f32,
274 f64,
275 feature,
276 ffi_returns_twice,
277 field,
278 field_init_shorthand,
279 file,
280 fmt,
281 fmt_internals,
282 fn_must_use,
283 forbid,
284 format_args_nl,
285 from,
286 From,
287 from_desugaring,
288 from_error,
289 from_generator,
290 from_method,
291 from_ok,
292 from_usize,
293 fundamental,
294 future,
295 Future,
296 FxHashSet,
297 FxHashMap,
298 gen_future,
299 generators,
300 generic_associated_types,
301 generic_param_attrs,
302 global_allocator,
303 global_asm,
304 globs,
305 hash,
306 Hash,
307 HashSet,
308 HashMap,
309 hexagon_target_feature,
310 hidden,
311 homogeneous_aggregate,
312 html_favicon_url,
313 html_logo_url,
314 html_no_source,
315 html_playground_url,
316 html_root_url,
317 i128,
318 i128_type,
319 i16,
320 i32,
321 i64,
322 i8,
323 ident,
324 if_let,
325 if_while_or_patterns,
326 ignore,
327 impl_header_lifetime_elision,
328 impl_trait_in_bindings,
329 import_shadowing,
330 index,
331 index_mut,
332 in_band_lifetimes,
333 include,
334 inclusive_range_syntax,
335 infer_outlives_requirements,
336 infer_static_outlives_requirements,
337 inline,
338 intel,
339 into_iter,
340 IntoIterator,
341 into_result,
342 intrinsics,
343 irrefutable_let_patterns,
344 isize,
345 issue,
346 issue_5723_bootstrap,
347 issue_tracker_base_url,
348 item,
349 item_like_imports,
350 iter,
351 Iterator,
352 keyword,
353 kind,
354 label,
355 label_break_value,
356 lang,
357 lang_items,
358 let_chains,
359 lhs,
360 lib,
361 lifetime,
362 link,
363 linkage,
364 link_args,
365 link_cfg,
366 link_llvm_intrinsics,
367 link_name,
368 link_section,
369 lint_reasons,
370 literal,
371 local_inner_macros,
372 log_syntax,
373 loop_break_value,
374 macro_at_most_once_rep,
375 macro_escape,
376 macro_export,
377 macro_lifetime_matcher,
378 macro_literal_matcher,
379 macro_reexport,
380 macro_rules,
381 macros_in_extern,
382 macro_use,
383 macro_vis_matcher,
384 main,
385 managed_boxes,
386 marker,
387 marker_trait_attr,
388 masked,
389 match_beginning_vert,
390 match_default_bindings,
391 may_dangle,
392 member_constraints,
393 message,
394 meta,
395 min_const_fn,
396 min_const_unsafe_fn,
397 mips_target_feature,
398 mmx_target_feature,
399 module,
400 more_struct_aliases,
401 movbe_target_feature,
402 must_use,
403 naked,
404 naked_functions,
405 name,
406 needs_allocator,
407 needs_panic_runtime,
408 negate_unsigned,
409 never,
410 never_type,
411 new,
412 next,
413 __next,
414 nll,
415 no_builtins,
416 no_core,
417 no_crate_inject,
418 no_debug,
419 no_default_passes,
420 no_implicit_prelude,
421 no_inline,
422 no_link,
423 no_main,
424 no_mangle,
425 non_ascii_idents,
426 None,
427 non_exhaustive,
428 non_modrs_mods,
429 no_stack_check,
430 no_start,
431 no_std,
432 not,
433 note,
434 Ok,
435 omit_gdb_pretty_printer_section,
436 on,
437 on_unimplemented,
438 oom,
439 ops,
440 optimize,
441 optimize_attribute,
442 optin_builtin_traits,
443 option,
444 Option,
445 opt_out_copy,
446 or,
447 Ord,
448 Ordering,
449 Output,
450 overlapping_marker_traits,
451 packed,
452 panic,
453 panic_handler,
454 panic_impl,
455 panic_implementation,
456 panic_runtime,
457 parent_trait,
458 partial_cmp,
459 param_attrs,
460 PartialOrd,
461 passes,
462 pat,
463 path,
464 pattern_parentheses,
465 Pending,
466 pin,
467 Pin,
468 pinned,
469 platform_intrinsics,
470 plugin,
471 plugin_registrar,
472 plugins,
473 Poll,
474 poll_with_tls_context,
475 powerpc_target_feature,
476 precise_pointer_size_matching,
477 prelude,
478 prelude_import,
479 primitive,
480 proc_dash_macro: "proc-macro",
481 proc_macro,
482 proc_macro_attribute,
483 proc_macro_def_site,
484 proc_macro_derive,
485 proc_macro_expr,
486 proc_macro_gen,
487 proc_macro_hygiene,
488 proc_macro_mod,
489 proc_macro_non_items,
490 proc_macro_path_invoc,
491 profiler_runtime,
492 pub_restricted,
493 pushpop_unsafe,
494 quad_precision_float,
495 question_mark,
496 quote,
497 Range,
498 RangeFrom,
499 RangeFull,
500 RangeInclusive,
501 RangeTo,
502 RangeToInclusive,
503 raw_identifiers,
504 Ready,
505 reason,
506 recursion_limit,
507 reexport_test_harness_main,
508 reflect,
509 relaxed_adts,
510 repr,
511 repr128,
512 repr_align,
513 repr_align_enum,
514 repr_packed,
515 repr_simd,
516 repr_transparent,
517 re_rebalance_coherence,
518 result,
519 Result,
520 Return,
521 rhs,
522 rlib,
523 rt,
524 rtm_target_feature,
525 rust,
526 rust_2015_preview,
527 rust_2018_preview,
528 rust_begin_unwind,
529 rustc,
530 rustc_allocator,
531 rustc_allocator_nounwind,
532 rustc_allow_const_fn_ptr,
533 rustc_args_required_const,
534 rustc_attrs,
535 rustc_clean,
536 rustc_const_unstable,
537 rustc_conversion_suggestion,
538 rustc_copy_clone_marker,
539 rustc_def_path,
540 rustc_deprecated,
541 rustc_diagnostic_macros,
542 rustc_dirty,
543 rustc_doc_only_macro,
544 rustc_dummy,
545 rustc_dump_env_program_clauses,
546 rustc_dump_program_clauses,
547 rustc_dump_user_substs,
548 rustc_error,
549 rustc_expected_cgu_reuse,
550 rustc_if_this_changed,
551 rustc_inherit_overflow_checks,
552 rustc_layout,
553 rustc_layout_scalar_valid_range_end,
554 rustc_layout_scalar_valid_range_start,
555 rustc_mir,
556 rustc_nonnull_optimization_guaranteed,
557 rustc_object_lifetime_default,
558 rustc_on_unimplemented,
559 rustc_outlives,
560 rustc_paren_sugar,
561 rustc_partition_codegened,
562 rustc_partition_reused,
563 rustc_peek,
564 rustc_peek_definite_init,
565 rustc_peek_maybe_init,
566 rustc_peek_maybe_uninit,
567 rustc_private,
568 rustc_proc_macro_decls,
569 rustc_promotable,
570 rustc_regions,
571 rustc_stable,
572 rustc_std_internal_symbol,
573 rustc_symbol_name,
574 rustc_synthetic,
575 rustc_test_marker,
576 rustc_then_this_would_need,
577 rustc_transparent_macro,
578 rustc_variance,
579 rustdoc,
580 rust_eh_personality,
581 rust_eh_unwind_resume,
582 rust_oom,
583 __rust_unstable_column,
584 rvalue_static_promotion,
585 sanitizer_runtime,
586 _Self,
587 self_in_typedefs,
588 self_struct_ctor,
589 Send,
590 should_panic,
591 simd,
592 simd_ffi,
593 since,
594 size,
595 slice_patterns,
596 slicing_syntax,
597 Some,
598 specialization,
599 speed,
600 spotlight,
601 sse4a_target_feature,
602 stable,
603 staged_api,
604 start,
605 static_in_const,
606 staticlib,
607 static_nobundle,
608 static_recursion,
609 std,
610 str,
611 stmt,
612 stmt_expr_attributes,
613 stop_after_dataflow,
614 struct_field_attributes,
615 struct_inherit,
616 structural_match,
617 struct_variant,
618 sty,
619 suggestion,
620 target_feature,
621 target_has_atomic,
622 target_thread_local,
623 task,
624 tbm_target_feature,
625 termination_trait,
626 termination_trait_test,
627 test,
628 test_2018_feature,
629 test_accepted_feature,
630 test_case,
631 test_removed_feature,
632 test_runner,
633 then_with,
634 thread_local,
635 tool_attributes,
636 tool_lints,
637 trace_macros,
638 trait_alias,
639 transmute,
640 transparent,
641 transparent_enums,
642 transparent_unions,
643 trivial_bounds,
644 Try,
645 try_blocks,
646 try_trait,
647 tt,
648 tuple_indexing,
649 Ty,
650 ty,
651 TyCtxt,
652 TyKind,
653 type_alias_enum_variants,
654 type_ascription,
655 type_length_limit,
656 type_macros,
657 u128,
658 u16,
659 u32,
660 u64,
661 u8,
662 unboxed_closures,
663 underscore_const_names,
664 underscore_imports,
665 underscore_lifetimes,
666 uniform_paths,
667 universal_impl_trait,
668 unmarked_api,
669 unreachable_code,
670 unrestricted_attribute_tokens,
671 unsafe_destructor_blind_to_params,
672 unsafe_no_drop_flag,
673 unsized_locals,
674 unsized_tuple_coercion,
675 unstable,
676 untagged_unions,
677 unwind,
678 unwind_attributes,
679 unwrap_or,
680 used,
681 use_extern_macros,
682 use_nested_groups,
683 usize,
684 v1,
685 val,
686 vec,
687 Vec,
688 vis,
689 visible_private_types,
690 volatile,
691 warn,
692 warn_directory_ownership,
693 wasm_import_module,
694 wasm_target_feature,
695 while_let,
696 windows,
697 windows_subsystem,
698 Yield,
699 }
700 }
701
702 #[derive(Copy, Clone, Eq)]
703 pub struct Ident {
704 pub name: Symbol,
705 pub span: Span,
706 }
707
708 impl Ident {
709 #[inline]
710 /// Constructs a new identifier from a symbol and a span.
711 pub const fn new(name: Symbol, span: Span) -> Ident {
712 Ident { name, span }
713 }
714
715 /// Constructs a new identifier with an empty syntax context.
716 #[inline]
717 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
718 Ident::new(name, DUMMY_SP)
719 }
720
721 #[inline]
722 pub fn invalid() -> Ident {
723 Ident::with_empty_ctxt(kw::Invalid)
724 }
725
726 /// Maps an interned string to an identifier with an empty syntax context.
727 pub fn from_interned_str(string: InternedString) -> Ident {
728 Ident::with_empty_ctxt(string.as_symbol())
729 }
730
731 /// Maps a string to an identifier with an empty span.
732 pub fn from_str(string: &str) -> Ident {
733 Ident::with_empty_ctxt(Symbol::intern(string))
734 }
735
736 /// Maps a string and a span to an identifier.
737 pub fn from_str_and_span(string: &str, span: Span) -> Ident {
738 Ident::new(Symbol::intern(string), span)
739 }
740
741 /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context.
742 pub fn with_span_pos(self, span: Span) -> Ident {
743 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
744 }
745
746 pub fn without_first_quote(self) -> Ident {
747 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
748 }
749
750 /// "Normalize" ident for use in comparisons using "item hygiene".
751 /// Identifiers with same string value become same if they came from the same "modern" macro
752 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
753 /// different "modern" macros.
754 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
755 pub fn modern(self) -> Ident {
756 Ident::new(self.name, self.span.modern())
757 }
758
759 /// "Normalize" ident for use in comparisons using "local variable hygiene".
760 /// Identifiers with same string value become same if they came from the same non-transparent
761 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
762 /// non-transparent macros.
763 /// Technically, this operation strips all transparent marks from ident's syntactic context.
764 pub fn modern_and_legacy(self) -> Ident {
765 Ident::new(self.name, self.span.modern_and_legacy())
766 }
767
768 /// Transforms an identifier into one with the same name, but gensymed.
769 pub fn gensym(self) -> Ident {
770 let name = with_interner(|interner| interner.gensymed(self.name));
771 Ident::new(name, self.span)
772 }
773
774 /// Transforms an underscore identifier into one with the same name, but
775 /// gensymed. Leaves non-underscore identifiers unchanged.
776 pub fn gensym_if_underscore(self) -> Ident {
777 if self.name == kw::Underscore { self.gensym() } else { self }
778 }
779
780 // WARNING: this function is deprecated and will be removed in the future.
781 pub fn is_gensymed(self) -> bool {
782 with_interner(|interner| interner.is_gensymed(self.name))
783 }
784
785 pub fn as_str(self) -> LocalInternedString {
786 self.name.as_str()
787 }
788
789 pub fn as_interned_str(self) -> InternedString {
790 self.name.as_interned_str()
791 }
792 }
793
794 impl PartialEq for Ident {
795 fn eq(&self, rhs: &Self) -> bool {
796 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
797 }
798 }
799
800 impl Hash for Ident {
801 fn hash<H: Hasher>(&self, state: &mut H) {
802 self.name.hash(state);
803 self.span.ctxt().hash(state);
804 }
805 }
806
807 impl fmt::Debug for Ident {
808 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
809 write!(f, "{}{:?}", self.name, self.span.ctxt())
810 }
811 }
812
813 impl fmt::Display for Ident {
814 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
815 fmt::Display::fmt(&self.name, f)
816 }
817 }
818
819 impl Encodable for Ident {
820 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
821 if self.span.ctxt().modern() == SyntaxContext::empty() {
822 s.emit_str(&self.as_str())
823 } else { // FIXME(jseyfried): intercrate hygiene
824 let mut string = "#".to_owned();
825 string.push_str(&self.as_str());
826 s.emit_str(&string)
827 }
828 }
829 }
830
831 impl Decodable for Ident {
832 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
833 let string = d.read_str()?;
834 Ok(if !string.starts_with('#') {
835 Ident::from_str(&string)
836 } else { // FIXME(jseyfried): intercrate hygiene
837 Ident::from_str(&string[1..]).gensym()
838 })
839 }
840 }
841
842 /// A symbol is an interned or gensymed string. A gensym is a symbol that is
843 /// never equal to any other symbol.
844 ///
845 /// Conceptually, a gensym can be thought of as a normal symbol with an
846 /// invisible unique suffix. Gensyms are useful when creating new identifiers
847 /// that must not match any existing identifiers, e.g. during macro expansion
848 /// and syntax desugaring. Because gensyms should always be identifiers, all
849 /// gensym operations are on `Ident` rather than `Symbol`. (Indeed, in the
850 /// future the gensym-ness may be moved from `Symbol` to hygiene data.)
851 ///
852 /// Examples:
853 /// ```
854 /// assert_eq!(Ident::from_str("x"), Ident::from_str("x"))
855 /// assert_ne!(Ident::from_str("x").gensym(), Ident::from_str("x"))
856 /// assert_ne!(Ident::from_str("x").gensym(), Ident::from_str("x").gensym())
857 /// ```
858 /// Internally, a symbol is implemented as an index, and all operations
859 /// (including hashing, equality, and ordering) operate on that index. The use
860 /// of `newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
861 /// because `newtype_index!` reserves the last 256 values for tagging purposes.
862 ///
863 /// Note that `Symbol` cannot directly be a `newtype_index!` because it
864 /// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
865 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
866 pub struct Symbol(SymbolIndex);
867
868 newtype_index! {
869 pub struct SymbolIndex { .. }
870 }
871
872 impl Symbol {
873 const fn new(n: u32) -> Self {
874 Symbol(SymbolIndex::from_u32_const(n))
875 }
876
877 /// Maps a string to its interned representation.
878 pub fn intern(string: &str) -> Self {
879 with_interner(|interner| interner.intern(string))
880 }
881
882 pub fn as_str(self) -> LocalInternedString {
883 with_interner(|interner| unsafe {
884 LocalInternedString {
885 string: std::mem::transmute::<&str, &str>(interner.get(self))
886 }
887 })
888 }
889
890 pub fn as_interned_str(self) -> InternedString {
891 with_interner(|interner| InternedString {
892 symbol: interner.interned(self)
893 })
894 }
895
896 pub fn as_u32(self) -> u32 {
897 self.0.as_u32()
898 }
899 }
900
901 impl fmt::Debug for Symbol {
902 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
903 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
904 if is_gensymed {
905 write!(f, "{}({:?})", self, self.0)
906 } else {
907 write!(f, "{}", self)
908 }
909 }
910 }
911
912 impl fmt::Display for Symbol {
913 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
914 fmt::Display::fmt(&self.as_str(), f)
915 }
916 }
917
918 impl Encodable for Symbol {
919 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
920 s.emit_str(&self.as_str())
921 }
922 }
923
924 impl Decodable for Symbol {
925 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
926 Ok(Symbol::intern(&d.read_str()?))
927 }
928 }
929
930 // The `&'static str`s in this type actually point into the arena.
931 //
932 // Note that normal symbols are indexed upward from 0, and gensyms are indexed
933 // downward from SymbolIndex::MAX_AS_U32.
934 #[derive(Default)]
935 pub struct Interner {
936 arena: DroplessArena,
937 names: FxHashMap<&'static str, Symbol>,
938 strings: Vec<&'static str>,
939 gensyms: Vec<Symbol>,
940 }
941
942 impl Interner {
943 fn prefill(init: &[&'static str]) -> Self {
944 Interner {
945 strings: init.into(),
946 names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
947 ..Default::default()
948 }
949 }
950
951 pub fn intern(&mut self, string: &str) -> Symbol {
952 if let Some(&name) = self.names.get(string) {
953 return name;
954 }
955
956 let name = Symbol::new(self.strings.len() as u32);
957
958 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
959 // UTF-8.
960 let string: &str = unsafe {
961 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
962 };
963 // It is safe to extend the arena allocation to `'static` because we only access
964 // these while the arena is still alive.
965 let string: &'static str = unsafe {
966 &*(string as *const str)
967 };
968 self.strings.push(string);
969 self.names.insert(string, name);
970 name
971 }
972
973 fn interned(&self, symbol: Symbol) -> Symbol {
974 if (symbol.0.as_usize()) < self.strings.len() {
975 symbol
976 } else {
977 self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
978 }
979 }
980
981 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
982 self.gensyms.push(symbol);
983 Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
984 }
985
986 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
987 symbol.0.as_usize() >= self.strings.len()
988 }
989
990 // Get the symbol as a string. `Symbol::as_str()` should be used in
991 // preference to this function.
992 pub fn get(&self, symbol: Symbol) -> &str {
993 match self.strings.get(symbol.0.as_usize()) {
994 Some(string) => string,
995 None => {
996 let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
997 self.strings[symbol.0.as_usize()]
998 }
999 }
1000 }
1001 }
1002
1003 // This module has a very short name because it's used a lot.
1004 pub mod kw {
1005 use super::Symbol;
1006 keywords!();
1007 }
1008
1009 // This module has a very short name because it's used a lot.
1010 pub mod sym {
1011 use std::convert::TryInto;
1012 use super::Symbol;
1013
1014 symbols!();
1015
1016 // Get the symbol for an integer. The first few non-negative integers each
1017 // have a static symbol and therefore are fast.
1018 pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol {
1019 if let Result::Ok(idx) = n.try_into() {
1020 if let Option::Some(&sym) = digits_array.get(idx) {
1021 return sym;
1022 }
1023 }
1024 Symbol::intern(&n.to_string())
1025 }
1026 }
1027
1028 impl Symbol {
1029 fn is_used_keyword_2018(self) -> bool {
1030 self == kw::Dyn
1031 }
1032
1033 fn is_unused_keyword_2018(self) -> bool {
1034 self >= kw::Async && self <= kw::Try
1035 }
1036
1037 /// Used for sanity checking rustdoc keyword sections.
1038 pub fn is_doc_keyword(self) -> bool {
1039 self <= kw::Union
1040 }
1041
1042 /// A keyword or reserved identifier that can be used as a path segment.
1043 pub fn is_path_segment_keyword(self) -> bool {
1044 self == kw::Super ||
1045 self == kw::SelfLower ||
1046 self == kw::SelfUpper ||
1047 self == kw::Crate ||
1048 self == kw::PathRoot ||
1049 self == kw::DollarCrate
1050 }
1051
1052 /// This symbol can be a raw identifier.
1053 pub fn can_be_raw(self) -> bool {
1054 self != kw::Invalid && self != kw::Underscore && !self.is_path_segment_keyword()
1055 }
1056 }
1057
1058 impl Ident {
1059 // Returns `true` for reserved identifiers used internally for elided lifetimes,
1060 // unnamed method parameters, crate root module, error recovery etc.
1061 pub fn is_special(self) -> bool {
1062 self.name <= kw::Underscore
1063 }
1064
1065 /// Returns `true` if the token is a keyword used in the language.
1066 pub fn is_used_keyword(self) -> bool {
1067 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
1068 self.name >= kw::As && self.name <= kw::While ||
1069 self.name.is_used_keyword_2018() && self.span.rust_2018()
1070 }
1071
1072 /// Returns `true` if the token is a keyword reserved for possible future use.
1073 pub fn is_unused_keyword(self) -> bool {
1074 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
1075 self.name >= kw::Abstract && self.name <= kw::Yield ||
1076 self.name.is_unused_keyword_2018() && self.span.rust_2018()
1077 }
1078
1079 /// Returns `true` if the token is either a special identifier or a keyword.
1080 pub fn is_reserved(self) -> bool {
1081 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
1082 }
1083
1084 /// A keyword or reserved identifier that can be used as a path segment.
1085 pub fn is_path_segment_keyword(self) -> bool {
1086 self.name.is_path_segment_keyword()
1087 }
1088
1089 /// We see this identifier in a normal identifier position, like variable name or a type.
1090 /// How was it written originally? Did it use the raw form? Let's try to guess.
1091 pub fn is_raw_guess(self) -> bool {
1092 self.name.can_be_raw() && self.is_reserved()
1093 }
1094 }
1095
1096 // If an interner exists, return it. Otherwise, prepare a fresh one.
1097 #[inline]
1098 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
1099 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
1100 }
1101
1102 /// An alternative to `Symbol` and `InternedString`, useful when the chars
1103 /// within the symbol need to be accessed. It is best used for temporary
1104 /// values.
1105 ///
1106 /// Because the interner outlives any thread which uses this type, we can
1107 /// safely treat `string` which points to interner data, as an immortal string,
1108 /// as long as this type never crosses between threads.
1109 //
1110 // FIXME: ensure that the interner outlives any thread which uses
1111 // `LocalInternedString`, by creating a new thread right after constructing the
1112 // interner.
1113 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
1114 pub struct LocalInternedString {
1115 string: &'static str,
1116 }
1117
1118 impl LocalInternedString {
1119 /// Maps a string to its interned representation.
1120 pub fn intern(string: &str) -> Self {
1121 let string = with_interner(|interner| {
1122 let symbol = interner.intern(string);
1123 interner.strings[symbol.0.as_usize()]
1124 });
1125 LocalInternedString {
1126 string: unsafe { std::mem::transmute::<&str, &str>(string) }
1127 }
1128 }
1129
1130 pub fn as_interned_str(self) -> InternedString {
1131 InternedString {
1132 symbol: Symbol::intern(self.string)
1133 }
1134 }
1135
1136 #[inline]
1137 pub fn get(&self) -> &str {
1138 // This returns a valid string since we ensure that `self` outlives the interner
1139 // by creating the interner on a thread which outlives threads which can access it.
1140 // This type cannot move to a thread which outlives the interner since it does
1141 // not implement Send.
1142 self.string
1143 }
1144 }
1145
1146 impl<U: ?Sized> std::convert::AsRef<U> for LocalInternedString
1147 where
1148 str: std::convert::AsRef<U>
1149 {
1150 #[inline]
1151 fn as_ref(&self) -> &U {
1152 self.string.as_ref()
1153 }
1154 }
1155
1156 impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for LocalInternedString {
1157 fn eq(&self, other: &T) -> bool {
1158 self.string == other.deref()
1159 }
1160 }
1161
1162 impl std::cmp::PartialEq<LocalInternedString> for str {
1163 fn eq(&self, other: &LocalInternedString) -> bool {
1164 self == other.string
1165 }
1166 }
1167
1168 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a str {
1169 fn eq(&self, other: &LocalInternedString) -> bool {
1170 *self == other.string
1171 }
1172 }
1173
1174 impl std::cmp::PartialEq<LocalInternedString> for String {
1175 fn eq(&self, other: &LocalInternedString) -> bool {
1176 self == other.string
1177 }
1178 }
1179
1180 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a String {
1181 fn eq(&self, other: &LocalInternedString) -> bool {
1182 *self == other.string
1183 }
1184 }
1185
1186 impl !Send for LocalInternedString {}
1187 impl !Sync for LocalInternedString {}
1188
1189 impl std::ops::Deref for LocalInternedString {
1190 type Target = str;
1191 #[inline]
1192 fn deref(&self) -> &str { self.string }
1193 }
1194
1195 impl fmt::Debug for LocalInternedString {
1196 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1197 fmt::Debug::fmt(self.string, f)
1198 }
1199 }
1200
1201 impl fmt::Display for LocalInternedString {
1202 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1203 fmt::Display::fmt(self.string, f)
1204 }
1205 }
1206
1207 impl Decodable for LocalInternedString {
1208 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
1209 Ok(LocalInternedString::intern(&d.read_str()?))
1210 }
1211 }
1212
1213 impl Encodable for LocalInternedString {
1214 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
1215 s.emit_str(self.string)
1216 }
1217 }
1218
1219 /// An alternative to `Symbol` that is focused on string contents. It has two
1220 /// main differences to `Symbol`.
1221 ///
1222 /// First, its implementations of `Hash`, `PartialOrd` and `Ord` work with the
1223 /// string chars rather than the symbol integer. This is useful when hash
1224 /// stability is required across compile sessions, or a guaranteed sort
1225 /// ordering is required.
1226 ///
1227 /// Second, gensym-ness is irrelevant. E.g.:
1228 /// ```
1229 /// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x"))
1230 /// assert_eq!(Symbol::gensym("x").as_interned_str(), Symbol::gensym("x").as_interned_str())
1231 /// ```
1232 #[derive(Clone, Copy, PartialEq, Eq)]
1233 pub struct InternedString {
1234 symbol: Symbol,
1235 }
1236
1237 impl InternedString {
1238 /// Maps a string to its interned representation.
1239 pub fn intern(string: &str) -> Self {
1240 InternedString {
1241 symbol: Symbol::intern(string)
1242 }
1243 }
1244
1245 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
1246 let str = with_interner(|interner| {
1247 interner.get(self.symbol) as *const str
1248 });
1249 // This is safe because the interner keeps string alive until it is dropped.
1250 // We can access it because we know the interner is still alive since we use a
1251 // scoped thread local to access it, and it was alive at the beginning of this scope
1252 unsafe { f(&*str) }
1253 }
1254
1255 fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
1256 let (self_str, other_str) = with_interner(|interner| {
1257 (interner.get(self.symbol) as *const str,
1258 interner.get(other.symbol) as *const str)
1259 });
1260 // This is safe for the same reason that `with` is safe.
1261 unsafe { f(&*self_str, &*other_str) }
1262 }
1263
1264 pub fn as_symbol(self) -> Symbol {
1265 self.symbol
1266 }
1267
1268 pub fn as_str(self) -> LocalInternedString {
1269 self.symbol.as_str()
1270 }
1271 }
1272
1273 impl Hash for InternedString {
1274 fn hash<H: Hasher>(&self, state: &mut H) {
1275 self.with(|str| str.hash(state))
1276 }
1277 }
1278
1279 impl PartialOrd<InternedString> for InternedString {
1280 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
1281 if self.symbol == other.symbol {
1282 return Some(Ordering::Equal);
1283 }
1284 self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
1285 }
1286 }
1287
1288 impl Ord for InternedString {
1289 fn cmp(&self, other: &InternedString) -> Ordering {
1290 if self.symbol == other.symbol {
1291 return Ordering::Equal;
1292 }
1293 self.with2(other, |self_str, other_str| self_str.cmp(other_str))
1294 }
1295 }
1296
1297 impl fmt::Debug for InternedString {
1298 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1299 self.with(|str| fmt::Debug::fmt(&str, f))
1300 }
1301 }
1302
1303 impl fmt::Display for InternedString {
1304 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1305 self.with(|str| fmt::Display::fmt(&str, f))
1306 }
1307 }
1308
1309 impl Decodable for InternedString {
1310 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
1311 Ok(InternedString::intern(&d.read_str()?))
1312 }
1313 }
1314
1315 impl Encodable for InternedString {
1316 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
1317 self.with(|string| s.emit_str(string))
1318 }
1319 }
1320
1321 #[cfg(test)]
1322 mod tests {
1323 use super::*;
1324 use crate::Globals;
1325 use crate::edition;
1326
1327 #[test]
1328 fn interner_tests() {
1329 let mut i: Interner = Interner::default();
1330 // first one is zero:
1331 assert_eq!(i.intern("dog"), Symbol::new(0));
1332 // re-use gets the same entry:
1333 assert_eq!(i.intern("dog"), Symbol::new(0));
1334 // different string gets a different #:
1335 assert_eq!(i.intern("cat"), Symbol::new(1));
1336 assert_eq!(i.intern("cat"), Symbol::new(1));
1337 // dog is still at zero
1338 assert_eq!(i.intern("dog"), Symbol::new(0));
1339 let z = i.intern("zebra");
1340 assert_eq!(i.gensymed(z), Symbol::new(SymbolIndex::MAX_AS_U32));
1341 // gensym of same string gets new number:
1342 assert_eq!(i.gensymed(z), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
1343 // gensym of *existing* string gets new number:
1344 let d = i.intern("dog");
1345 assert_eq!(i.gensymed(d), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
1346 }
1347
1348 #[test]
1349 fn without_first_quote_test() {
1350 GLOBALS.set(&Globals::new(edition::DEFAULT_EDITION), || {
1351 let i = Ident::from_str("'break");
1352 assert_eq!(i.without_first_quote().name, kw::Break);
1353 });
1354 }
1355 }