]>
Commit | Line | Data |
---|---|---|
54a0048b SL |
1 | //! The Rust Linkage Model and Symbol Names |
2 | //! ======================================= | |
3 | //! | |
4 | //! The semantic model of Rust linkage is, broadly, that "there's no global | |
5 | //! namespace" between crates. Our aim is to preserve the illusion of this | |
6 | //! model despite the fact that it's not *quite* possible to implement on | |
7 | //! modern linkers. We initially didn't use system linkers at all, but have | |
8 | //! been convinced of their utility. | |
9 | //! | |
10 | //! There are a few issues to handle: | |
11 | //! | |
12 | //! - Linkers operate on a flat namespace, so we have to flatten names. | |
13 | //! We do this using the C++ namespace-mangling technique. Foo::bar | |
14 | //! symbols and such. | |
15 | //! | |
16 | //! - Symbols for distinct items with the same *name* need to get different | |
17 | //! linkage-names. Examples of this are monomorphizations of functions or | |
18 | //! items within anonymous scopes that end up having the same path. | |
19 | //! | |
20 | //! - Symbols in different crates but with same names "within" the crate need | |
21 | //! to get different linkage-names. | |
22 | //! | |
23 | //! - Symbol names should be deterministic: Two consecutive runs of the | |
24 | //! compiler over the same code base should produce the same symbol names for | |
25 | //! the same items. | |
26 | //! | |
27 | //! - Symbol names should not depend on any global properties of the code base, | |
28 | //! so that small modifications to the code base do not result in all symbols | |
29 | //! changing. In previous versions of the compiler, symbol names incorporated | |
30 | //! the SVH (Stable Version Hash) of the crate. This scheme turned out to be | |
31 | //! infeasible when used in conjunction with incremental compilation because | |
32 | //! small code changes would invalidate all symbols generated previously. | |
33 | //! | |
34 | //! - Even symbols from different versions of the same crate should be able to | |
35 | //! live next to each other without conflict. | |
36 | //! | |
37 | //! In order to fulfill the above requirements the following scheme is used by | |
38 | //! the compiler: | |
39 | //! | |
40 | //! The main tool for avoiding naming conflicts is the incorporation of a 64-bit | |
41 | //! hash value into every exported symbol name. Anything that makes a difference | |
42 | //! to the symbol being named, but does not show up in the regular path needs to | |
43 | //! be fed into this hash: | |
44 | //! | |
45 | //! - Different monomorphizations of the same item have the same path but differ | |
46 | //! in their concrete type parameters, so these parameters are part of the | |
47 | //! data being digested for the symbol hash. | |
48 | //! | |
49 | //! - Rust allows items to be defined in anonymous scopes, such as in | |
50 | //! `fn foo() { { fn bar() {} } { fn bar() {} } }`. Both `bar` functions have | |
51 | //! the path `foo::bar`, since the anonymous scopes do not contribute to the | |
52 | //! path of an item. The compiler already handles this case via so-called | |
53 | //! disambiguating `DefPaths` which use indices to distinguish items with the | |
54 | //! same name. The DefPaths of the functions above are thus `foo[0]::bar[0]` | |
55 | //! and `foo[0]::bar[1]`. In order to incorporate this disambiguation | |
56 | //! information into the symbol name too, these indices are fed into the | |
57 | //! symbol hash, so that the above two symbols would end up with different | |
58 | //! hash values. | |
59 | //! | |
60 | //! The two measures described above suffice to avoid intra-crate conflicts. In | |
61 | //! order to also avoid inter-crate conflicts two more measures are taken: | |
62 | //! | |
63 | //! - The name of the crate containing the symbol is prepended to the symbol | |
0731742a | 64 | //! name, i.e., symbols are "crate qualified". For example, a function `foo` in |
54a0048b SL |
65 | //! module `bar` in crate `baz` would get a symbol name like |
66 | //! `baz::bar::foo::{hash}` instead of just `bar::foo::{hash}`. This avoids | |
67 | //! simple conflicts between functions from different crates. | |
68 | //! | |
69 | //! - In order to be able to also use symbols from two versions of the same | |
70 | //! crate (which naturally also have the same name), a stronger measure is | |
71 | //! required: The compiler accepts an arbitrary "disambiguator" value via the | |
a1dfa0c6 | 72 | //! `-C metadata` command-line argument. This disambiguator is then fed into |
54a0048b SL |
73 | //! the symbol hash of every exported item. Consequently, the symbols in two |
74 | //! identical crates but with different disambiguators are not in conflict | |
75 | //! with each other. This facility is mainly intended to be used by build | |
76 | //! tools like Cargo. | |
77 | //! | |
78 | //! A note on symbol name stability | |
79 | //! ------------------------------- | |
80 | //! Previous versions of the compiler resorted to feeding NodeIds into the | |
81 | //! symbol hash in order to disambiguate between items with the same path. The | |
82 | //! current version of the name generation algorithm takes great care not to do | |
83 | //! that, since NodeIds are notoriously unstable: A small change to the | |
84 | //! code base will offset all NodeIds after the change and thus, much as using | |
85 | //! the SVH in the hash, invalidate an unbounded number of symbol names. This | |
86 | //! makes re-using previously compiled code for incremental compilation | |
87 | //! virtually impossible. Thus, symbol hash generation exclusively relies on | |
88 | //! DefPaths which are much more robust in the face of changes to the code base. | |
89 | ||
1b1a35ee | 90 | #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] |
ba9703b0 | 91 | #![feature(never_type)] |
ba9703b0 | 92 | #![recursion_limit = "256"] |
5e7ed085 | 93 | #![allow(rustc::potential_query_instability)] |
f2b60f7d FG |
94 | #![deny(rustc::untranslatable_diagnostic)] |
95 | #![deny(rustc::diagnostic_outside_of_impl)] | |
ba9703b0 XL |
96 | |
97 | #[macro_use] | |
98 | extern crate rustc_middle; | |
99 | ||
f2b60f7d FG |
100 | #[macro_use] |
101 | extern crate tracing; | |
102 | ||
04454e1e | 103 | use rustc_hir::def::DefKind; |
dfeec247 | 104 | use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; |
ba9703b0 | 105 | use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; |
04454e1e | 106 | use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; |
ba9703b0 XL |
107 | use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; |
108 | use rustc_middle::ty::query::Providers; | |
109 | use rustc_middle::ty::subst::SubstsRef; | |
064997fb | 110 | use rustc_middle::ty::{self, Instance, TyCtxt}; |
ba9703b0 | 111 | use rustc_session::config::SymbolManglingVersion; |
54a0048b | 112 | |
dc9dc135 XL |
113 | mod legacy; |
114 | mod v0; | |
54a0048b | 115 | |
f2b60f7d | 116 | pub mod errors; |
ba9703b0 | 117 | pub mod test; |
064997fb | 118 | pub mod typeid; |
ba9703b0 | 119 | |
dfeec247 XL |
120 | /// This function computes the symbol name for the given `instance` and the |
121 | /// given instantiating crate. That is, if you know that instance X is | |
122 | /// instantiated in crate Y, this is the symbol name this instance would have. | |
a2a8927a | 123 | pub fn symbol_name_for_instance_in_crate<'tcx>( |
dfeec247 XL |
124 | tcx: TyCtxt<'tcx>, |
125 | instance: Instance<'tcx>, | |
126 | instantiating_crate: CrateNum, | |
127 | ) -> String { | |
128 | compute_symbol_name(tcx, instance, || instantiating_crate) | |
129 | } | |
130 | ||
f035d41b | 131 | pub fn provide(providers: &mut Providers) { |
dfeec247 XL |
132 | *providers = Providers { symbol_name: symbol_name_provider, ..*providers }; |
133 | } | |
ea8adc8c | 134 | |
dfeec247 XL |
135 | // The `symbol_name` query provides the symbol name for calling a given |
136 | // instance from the local crate. In particular, it will also look up the | |
137 | // correct symbol name of instances from upstream crates. | |
a2a8927a | 138 | fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> { |
dfeec247 XL |
139 | let symbol_name = compute_symbol_name(tcx, instance, || { |
140 | // This closure determines the instantiating crate for instances that | |
141 | // need an instantiating-crate-suffix for their symbol name, in order | |
142 | // to differentiate between local copies. | |
143 | if is_generic(instance.substs) { | |
144 | // For generics we might find re-usable upstream instances. If there | |
145 | // is one, we rely on the symbol being instantiated locally. | |
146 | instance.upstream_monomorphization(tcx).unwrap_or(LOCAL_CRATE) | |
147 | } else { | |
148 | // For non-generic things that need to avoid naming conflicts, we | |
149 | // always instantiate a copy in the local crate. | |
150 | LOCAL_CRATE | |
151 | } | |
152 | }); | |
153 | ||
3dfed10e | 154 | ty::SymbolName::new(tcx, &symbol_name) |
7cac9316 XL |
155 | } |
156 | ||
923072b8 FG |
157 | pub fn typeid_for_trait_ref<'tcx>( |
158 | tcx: TyCtxt<'tcx>, | |
159 | trait_ref: ty::PolyExistentialTraitRef<'tcx>, | |
160 | ) -> String { | |
161 | v0::mangle_typeid_for_trait_ref(tcx, trait_ref) | |
162 | } | |
163 | ||
dfeec247 XL |
164 | /// Computes the symbol name for the given instance. This function will call |
165 | /// `compute_instantiating_crate` if it needs to factor the instantiating crate | |
166 | /// into the symbol name. | |
a2a8927a | 167 | fn compute_symbol_name<'tcx>( |
dfeec247 XL |
168 | tcx: TyCtxt<'tcx>, |
169 | instance: Instance<'tcx>, | |
170 | compute_instantiating_crate: impl FnOnce() -> CrateNum, | |
171 | ) -> String { | |
cc61c64b XL |
172 | let def_id = instance.def_id(); |
173 | let substs = instance.substs; | |
3157f602 | 174 | |
94b46f34 | 175 | debug!("symbol_name(def_id={:?}, substs={:?})", def_id, substs); |
54a0048b | 176 | |
04454e1e | 177 | if let Some(def_id) = def_id.as_local() { |
17df50a5 | 178 | if tcx.proc_macro_decls_static(()) == Some(def_id) { |
136023e0 XL |
179 | let stable_crate_id = tcx.sess.local_stable_crate_id(); |
180 | return tcx.sess.generate_proc_macro_decls_symbol(stable_crate_id); | |
3157f602 | 181 | } |
04454e1e FG |
182 | } |
183 | ||
184 | // FIXME(eddyb) Precompute a custom symbol name based on attributes. | |
185 | let attrs = if tcx.def_kind(def_id).has_codegen_attrs() { | |
186 | tcx.codegen_fn_attrs(def_id) | |
cc61c64b | 187 | } else { |
04454e1e | 188 | CodegenFnAttrs::EMPTY |
cc61c64b | 189 | }; |
54a0048b | 190 | |
dfeec247 XL |
191 | // Foreign items by default use no mangling for their symbol name. There's a |
192 | // few exceptions to this rule though: | |
193 | // | |
194 | // * This can be overridden with the `#[link_name]` attribute | |
195 | // | |
196 | // * On the wasm32 targets there is a bug (or feature) in LLD [1] where the | |
197 | // same-named symbol when imported from different wasm modules will get | |
74b04a01 | 198 | // hooked up incorrectly. As a result foreign symbols, on the wasm target, |
dfeec247 XL |
199 | // with a wasm import module, get mangled. Additionally our codegen will |
200 | // deduplicate symbols based purely on the symbol name, but for wasm this | |
201 | // isn't quite right because the same-named symbol on wasm can come from | |
202 | // different modules. For these reasons if `#[link(wasm_import_module)]` | |
203 | // is present we mangle everything on wasm because the demangled form will | |
204 | // show up in the `wasm-import-name` custom attribute in LLVM IR. | |
205 | // | |
206 | // [1]: https://bugs.llvm.org/show_bug.cgi?id=44316 | |
04454e1e | 207 | if tcx.is_foreign_item(def_id) |
cdc7bbd5 | 208 | && (!tcx.sess.target.is_like_wasm |
29967ef6 XL |
209 | || !tcx.wasm_import_module_map(def_id.krate).contains_key(&def_id)) |
210 | { | |
211 | if let Some(name) = attrs.link_name { | |
212 | return name.to_string(); | |
3157f602 | 213 | } |
29967ef6 | 214 | return tcx.item_name(def_id).to_string(); |
cc61c64b | 215 | } |
54a0048b | 216 | |
e74abb32 | 217 | if let Some(name) = attrs.export_name { |
cc61c64b | 218 | // Use provided name |
dfeec247 | 219 | return name.to_string(); |
cc61c64b | 220 | } |
54a0048b | 221 | |
b7449926 | 222 | if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) { |
cc61c64b | 223 | // Don't mangle |
dfeec247 | 224 | return tcx.item_name(def_id).to_string(); |
dc9dc135 XL |
225 | } |
226 | ||
04454e1e FG |
227 | // If we're dealing with an instance of a function that's inlined from |
228 | // another crate but we're marking it as globally shared to our | |
229 | // compilation (aka we're not making an internal copy in each of our | |
230 | // codegen units) then this symbol may become an exported (but hidden | |
231 | // visibility) symbol. This means that multiple crates may do the same | |
232 | // and we want to be sure to avoid any symbol conflicts here. | |
233 | let is_globally_shared_function = matches!( | |
234 | tcx.def_kind(instance.def_id()), | |
235 | DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Generator | DefKind::Ctor(..) | |
236 | ) && matches!( | |
237 | MonoItem::Fn(instance).instantiation_mode(tcx), | |
238 | InstantiationMode::GloballyShared { may_conflict: true } | |
239 | ); | |
240 | ||
241 | // If this is an instance of a generic function, we also hash in | |
242 | // the ID of the instantiating crate. This avoids symbol conflicts | |
243 | // in case the same instances is emitted in two crates of the same | |
244 | // project. | |
245 | let avoid_cross_crate_conflicts = is_generic(substs) || is_globally_shared_function; | |
54a0048b | 246 | |
dfeec247 XL |
247 | let instantiating_crate = |
248 | if avoid_cross_crate_conflicts { Some(compute_instantiating_crate()) } else { None }; | |
9fa01778 | 249 | |
dc9dc135 XL |
250 | // Pick the crate responsible for the symbol mangling version, which has to: |
251 | // 1. be stable for each instance, whether it's being defined or imported | |
a2a8927a | 252 | // 2. obey each crate's own `-C symbol-mangling-version`, as much as possible |
dc9dc135 XL |
253 | // We solve these as follows: |
254 | // 1. because symbol names depend on both `def_id` and `instantiating_crate`, | |
255 | // both their `CrateNum`s are stable for any given instance, so we can pick | |
256 | // either and have a stable choice of symbol mangling version | |
257 | // 2. we favor `instantiating_crate` where possible (i.e. when `Some`) | |
258 | let mangling_version_crate = instantiating_crate.unwrap_or(def_id.krate); | |
259 | let mangling_version = if mangling_version_crate == LOCAL_CRATE { | |
a2a8927a | 260 | tcx.sess.opts.get_symbol_mangling_version() |
dc9dc135 XL |
261 | } else { |
262 | tcx.symbol_mangling_version(mangling_version_crate) | |
263 | }; | |
9fa01778 | 264 | |
94222f64 | 265 | let symbol = match mangling_version { |
dc9dc135 XL |
266 | SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate), |
267 | SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), | |
94222f64 XL |
268 | }; |
269 | ||
270 | debug_assert!( | |
271 | rustc_demangle::try_demangle(&symbol).is_ok(), | |
272 | "compute_symbol_name: `{}` cannot be demangled", | |
273 | symbol | |
274 | ); | |
275 | ||
276 | symbol | |
dfeec247 | 277 | } |
9fa01778 | 278 | |
dfeec247 XL |
279 | fn is_generic(substs: SubstsRef<'_>) -> bool { |
280 | substs.non_erasable_generics().next().is_some() | |
54a0048b | 281 | } |