]>
Commit | Line | Data |
---|---|---|
54a0048b SL |
1 | //! The Rust Linkage Model and Symbol Names |
2 | //! ======================================= | |
3 | //! | |
4 | //! The semantic model of Rust linkage is, broadly, that "there's no global | |
5 | //! namespace" between crates. Our aim is to preserve the illusion of this | |
6 | //! model despite the fact that it's not *quite* possible to implement on | |
7 | //! modern linkers. We initially didn't use system linkers at all, but have | |
8 | //! been convinced of their utility. | |
9 | //! | |
10 | //! There are a few issues to handle: | |
11 | //! | |
12 | //! - Linkers operate on a flat namespace, so we have to flatten names. | |
13 | //! We do this using the C++ namespace-mangling technique. Foo::bar | |
14 | //! symbols and such. | |
15 | //! | |
16 | //! - Symbols for distinct items with the same *name* need to get different | |
17 | //! linkage-names. Examples of this are monomorphizations of functions or | |
18 | //! items within anonymous scopes that end up having the same path. | |
19 | //! | |
20 | //! - Symbols in different crates but with same names "within" the crate need | |
21 | //! to get different linkage-names. | |
22 | //! | |
23 | //! - Symbol names should be deterministic: Two consecutive runs of the | |
24 | //! compiler over the same code base should produce the same symbol names for | |
25 | //! the same items. | |
26 | //! | |
27 | //! - Symbol names should not depend on any global properties of the code base, | |
28 | //! so that small modifications to the code base do not result in all symbols | |
29 | //! changing. In previous versions of the compiler, symbol names incorporated | |
30 | //! the SVH (Stable Version Hash) of the crate. This scheme turned out to be | |
31 | //! infeasible when used in conjunction with incremental compilation because | |
32 | //! small code changes would invalidate all symbols generated previously. | |
33 | //! | |
34 | //! - Even symbols from different versions of the same crate should be able to | |
35 | //! live next to each other without conflict. | |
36 | //! | |
37 | //! In order to fulfill the above requirements the following scheme is used by | |
38 | //! the compiler: | |
39 | //! | |
40 | //! The main tool for avoiding naming conflicts is the incorporation of a 64-bit | |
41 | //! hash value into every exported symbol name. Anything that makes a difference | |
42 | //! to the symbol being named, but does not show up in the regular path needs to | |
43 | //! be fed into this hash: | |
44 | //! | |
45 | //! - Different monomorphizations of the same item have the same path but differ | |
46 | //! in their concrete type parameters, so these parameters are part of the | |
47 | //! data being digested for the symbol hash. | |
48 | //! | |
49 | //! - Rust allows items to be defined in anonymous scopes, such as in | |
50 | //! `fn foo() { { fn bar() {} } { fn bar() {} } }`. Both `bar` functions have | |
51 | //! the path `foo::bar`, since the anonymous scopes do not contribute to the | |
52 | //! path of an item. The compiler already handles this case via so-called | |
53 | //! disambiguating `DefPaths` which use indices to distinguish items with the | |
54 | //! same name. The DefPaths of the functions above are thus `foo[0]::bar[0]` | |
55 | //! and `foo[0]::bar[1]`. In order to incorporate this disambiguation | |
56 | //! information into the symbol name too, these indices are fed into the | |
57 | //! symbol hash, so that the above two symbols would end up with different | |
58 | //! hash values. | |
59 | //! | |
60 | //! The two measures described above suffice to avoid intra-crate conflicts. In | |
61 | //! order to also avoid inter-crate conflicts two more measures are taken: | |
62 | //! | |
63 | //! - The name of the crate containing the symbol is prepended to the symbol | |
0731742a | 64 | //! name, i.e., symbols are "crate qualified". For example, a function `foo` in |
54a0048b SL |
65 | //! module `bar` in crate `baz` would get a symbol name like |
66 | //! `baz::bar::foo::{hash}` instead of just `bar::foo::{hash}`. This avoids | |
67 | //! simple conflicts between functions from different crates. | |
68 | //! | |
69 | //! - In order to be able to also use symbols from two versions of the same | |
70 | //! crate (which naturally also have the same name), a stronger measure is | |
71 | //! required: The compiler accepts an arbitrary "disambiguator" value via the | |
a1dfa0c6 | 72 | //! `-C metadata` command-line argument. This disambiguator is then fed into |
54a0048b SL |
73 | //! the symbol hash of every exported item. Consequently, the symbols in two |
74 | //! identical crates but with different disambiguators are not in conflict | |
75 | //! with each other. This facility is mainly intended to be used by build | |
76 | //! tools like Cargo. | |
77 | //! | |
78 | //! A note on symbol name stability | |
79 | //! ------------------------------- | |
80 | //! Previous versions of the compiler resorted to feeding NodeIds into the | |
81 | //! symbol hash in order to disambiguate between items with the same path. The | |
82 | //! current version of the name generation algorithm takes great care not to do | |
83 | //! that, since NodeIds are notoriously unstable: A small change to the | |
84 | //! code base will offset all NodeIds after the change and thus, much as using | |
85 | //! the SVH in the hash, invalidate an unbounded number of symbol names. This | |
86 | //! makes re-using previously compiled code for incremental compilation | |
87 | //! virtually impossible. Thus, symbol hash generation exclusively relies on | |
88 | //! DefPaths which are much more robust in the face of changes to the code base. | |
89 | ||
1b1a35ee | 90 | #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] |
ba9703b0 XL |
91 | #![feature(never_type)] |
92 | #![feature(nll)] | |
ba9703b0 XL |
93 | #![recursion_limit = "256"] |
94 | ||
95 | #[macro_use] | |
96 | extern crate rustc_middle; | |
97 | ||
dfeec247 XL |
98 | use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; |
99 | use rustc_hir::Node; | |
ba9703b0 XL |
100 | use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; |
101 | use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; | |
102 | use rustc_middle::ty::query::Providers; | |
103 | use rustc_middle::ty::subst::SubstsRef; | |
3c0e092e | 104 | use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; |
ba9703b0 | 105 | use rustc_session::config::SymbolManglingVersion; |
3c0e092e | 106 | use rustc_target::abi::call::FnAbi; |
54a0048b | 107 | |
3dfed10e | 108 | use tracing::debug; |
9fa01778 | 109 | |
dc9dc135 XL |
110 | mod legacy; |
111 | mod v0; | |
54a0048b | 112 | |
ba9703b0 XL |
113 | pub mod test; |
114 | ||
dfeec247 XL |
115 | /// This function computes the symbol name for the given `instance` and the |
116 | /// given instantiating crate. That is, if you know that instance X is | |
117 | /// instantiated in crate Y, this is the symbol name this instance would have. | |
a2a8927a | 118 | pub fn symbol_name_for_instance_in_crate<'tcx>( |
dfeec247 XL |
119 | tcx: TyCtxt<'tcx>, |
120 | instance: Instance<'tcx>, | |
121 | instantiating_crate: CrateNum, | |
122 | ) -> String { | |
123 | compute_symbol_name(tcx, instance, || instantiating_crate) | |
124 | } | |
125 | ||
f035d41b | 126 | pub fn provide(providers: &mut Providers) { |
dfeec247 XL |
127 | *providers = Providers { symbol_name: symbol_name_provider, ..*providers }; |
128 | } | |
ea8adc8c | 129 | |
dfeec247 XL |
130 | // The `symbol_name` query provides the symbol name for calling a given |
131 | // instance from the local crate. In particular, it will also look up the | |
132 | // correct symbol name of instances from upstream crates. | |
a2a8927a | 133 | fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> { |
dfeec247 XL |
134 | let symbol_name = compute_symbol_name(tcx, instance, || { |
135 | // This closure determines the instantiating crate for instances that | |
136 | // need an instantiating-crate-suffix for their symbol name, in order | |
137 | // to differentiate between local copies. | |
138 | if is_generic(instance.substs) { | |
139 | // For generics we might find re-usable upstream instances. If there | |
140 | // is one, we rely on the symbol being instantiated locally. | |
141 | instance.upstream_monomorphization(tcx).unwrap_or(LOCAL_CRATE) | |
142 | } else { | |
143 | // For non-generic things that need to avoid naming conflicts, we | |
144 | // always instantiate a copy in the local crate. | |
145 | LOCAL_CRATE | |
146 | } | |
147 | }); | |
148 | ||
3dfed10e | 149 | ty::SymbolName::new(tcx, &symbol_name) |
7cac9316 XL |
150 | } |
151 | ||
3c0e092e | 152 | /// This function computes the typeid for the given function ABI. |
a2a8927a | 153 | pub fn typeid_for_fnabi<'tcx>(tcx: TyCtxt<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> String { |
3c0e092e XL |
154 | v0::mangle_typeid_for_fnabi(tcx, fn_abi) |
155 | } | |
156 | ||
dfeec247 XL |
157 | /// Computes the symbol name for the given instance. This function will call |
158 | /// `compute_instantiating_crate` if it needs to factor the instantiating crate | |
159 | /// into the symbol name. | |
a2a8927a | 160 | fn compute_symbol_name<'tcx>( |
dfeec247 XL |
161 | tcx: TyCtxt<'tcx>, |
162 | instance: Instance<'tcx>, | |
163 | compute_instantiating_crate: impl FnOnce() -> CrateNum, | |
164 | ) -> String { | |
cc61c64b XL |
165 | let def_id = instance.def_id(); |
166 | let substs = instance.substs; | |
3157f602 | 167 | |
94b46f34 | 168 | debug!("symbol_name(def_id={:?}, substs={:?})", def_id, substs); |
54a0048b | 169 | |
f9f354fc XL |
170 | // FIXME(eddyb) Precompute a custom symbol name based on attributes. |
171 | let is_foreign = if let Some(def_id) = def_id.as_local() { | |
17df50a5 | 172 | if tcx.proc_macro_decls_static(()) == Some(def_id) { |
136023e0 XL |
173 | let stable_crate_id = tcx.sess.local_stable_crate_id(); |
174 | return tcx.sess.generate_proc_macro_decls_symbol(stable_crate_id); | |
3157f602 | 175 | } |
3dfed10e | 176 | let hir_id = tcx.hir().local_def_id_to_hir_id(def_id); |
29967ef6 | 177 | matches!(tcx.hir().get(hir_id), Node::ForeignItem(_)) |
cc61c64b | 178 | } else { |
7cac9316 | 179 | tcx.is_foreign_item(def_id) |
cc61c64b | 180 | }; |
54a0048b | 181 | |
b7449926 | 182 | let attrs = tcx.codegen_fn_attrs(def_id); |
dfeec247 XL |
183 | |
184 | // Foreign items by default use no mangling for their symbol name. There's a | |
185 | // few exceptions to this rule though: | |
186 | // | |
187 | // * This can be overridden with the `#[link_name]` attribute | |
188 | // | |
189 | // * On the wasm32 targets there is a bug (or feature) in LLD [1] where the | |
190 | // same-named symbol when imported from different wasm modules will get | |
74b04a01 | 191 | // hooked up incorrectly. As a result foreign symbols, on the wasm target, |
dfeec247 XL |
192 | // with a wasm import module, get mangled. Additionally our codegen will |
193 | // deduplicate symbols based purely on the symbol name, but for wasm this | |
194 | // isn't quite right because the same-named symbol on wasm can come from | |
195 | // different modules. For these reasons if `#[link(wasm_import_module)]` | |
196 | // is present we mangle everything on wasm because the demangled form will | |
197 | // show up in the `wasm-import-name` custom attribute in LLVM IR. | |
198 | // | |
199 | // [1]: https://bugs.llvm.org/show_bug.cgi?id=44316 | |
29967ef6 | 200 | if is_foreign |
cdc7bbd5 | 201 | && (!tcx.sess.target.is_like_wasm |
29967ef6 XL |
202 | || !tcx.wasm_import_module_map(def_id.krate).contains_key(&def_id)) |
203 | { | |
204 | if let Some(name) = attrs.link_name { | |
205 | return name.to_string(); | |
3157f602 | 206 | } |
29967ef6 | 207 | return tcx.item_name(def_id).to_string(); |
cc61c64b | 208 | } |
54a0048b | 209 | |
e74abb32 | 210 | if let Some(name) = attrs.export_name { |
cc61c64b | 211 | // Use provided name |
dfeec247 | 212 | return name.to_string(); |
cc61c64b | 213 | } |
54a0048b | 214 | |
b7449926 | 215 | if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) { |
cc61c64b | 216 | // Don't mangle |
dfeec247 | 217 | return tcx.item_name(def_id).to_string(); |
dc9dc135 XL |
218 | } |
219 | ||
dc9dc135 XL |
220 | let avoid_cross_crate_conflicts = |
221 | // If this is an instance of a generic function, we also hash in | |
222 | // the ID of the instantiating crate. This avoids symbol conflicts | |
223 | // in case the same instances is emitted in two crates of the same | |
224 | // project. | |
dfeec247 | 225 | is_generic(substs) || |
dc9dc135 XL |
226 | |
227 | // If we're dealing with an instance of a function that's inlined from | |
228 | // another crate but we're marking it as globally shared to our | |
229 | // compliation (aka we're not making an internal copy in each of our | |
230 | // codegen units) then this symbol may become an exported (but hidden | |
231 | // visibility) symbol. This means that multiple crates may do the same | |
232 | // and we want to be sure to avoid any symbol conflicts here. | |
29967ef6 | 233 | matches!(MonoItem::Fn(instance).instantiation_mode(tcx), InstantiationMode::GloballyShared { may_conflict: true }); |
54a0048b | 234 | |
dfeec247 XL |
235 | let instantiating_crate = |
236 | if avoid_cross_crate_conflicts { Some(compute_instantiating_crate()) } else { None }; | |
9fa01778 | 237 | |
dc9dc135 XL |
238 | // Pick the crate responsible for the symbol mangling version, which has to: |
239 | // 1. be stable for each instance, whether it's being defined or imported | |
a2a8927a | 240 | // 2. obey each crate's own `-C symbol-mangling-version`, as much as possible |
dc9dc135 XL |
241 | // We solve these as follows: |
242 | // 1. because symbol names depend on both `def_id` and `instantiating_crate`, | |
243 | // both their `CrateNum`s are stable for any given instance, so we can pick | |
244 | // either and have a stable choice of symbol mangling version | |
245 | // 2. we favor `instantiating_crate` where possible (i.e. when `Some`) | |
246 | let mangling_version_crate = instantiating_crate.unwrap_or(def_id.krate); | |
247 | let mangling_version = if mangling_version_crate == LOCAL_CRATE { | |
a2a8927a | 248 | tcx.sess.opts.get_symbol_mangling_version() |
dc9dc135 XL |
249 | } else { |
250 | tcx.symbol_mangling_version(mangling_version_crate) | |
251 | }; | |
9fa01778 | 252 | |
94222f64 | 253 | let symbol = match mangling_version { |
dc9dc135 XL |
254 | SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate), |
255 | SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), | |
94222f64 XL |
256 | }; |
257 | ||
258 | debug_assert!( | |
259 | rustc_demangle::try_demangle(&symbol).is_ok(), | |
260 | "compute_symbol_name: `{}` cannot be demangled", | |
261 | symbol | |
262 | ); | |
263 | ||
264 | symbol | |
dfeec247 | 265 | } |
9fa01778 | 266 | |
dfeec247 XL |
267 | fn is_generic(substs: SubstsRef<'_>) -> bool { |
268 | substs.non_erasable_generics().next().is_some() | |
54a0048b | 269 | } |