1 use crate::back
::write
::{
2 self, save_temp_bitcode
, to_llvm_opt_settings
, with_llvm_pmb
, DiagnosticHandlers
,
4 use crate::llvm
::archive_ro
::ArchiveRO
;
5 use crate::llvm
::{self, False, True}
;
6 use crate::{LlvmCodegenBackend, ModuleLlvm}
;
7 use rustc_codegen_ssa
::back
::lto
::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared}
;
8 use rustc_codegen_ssa
::back
::symbol_export
;
9 use rustc_codegen_ssa
::back
::write
::{CodegenContext, FatLTOInput, ModuleConfig}
;
10 use rustc_codegen_ssa
::traits
::*;
11 use rustc_codegen_ssa
::{looks_like_rust_object_file, ModuleCodegen, ModuleKind}
;
12 use rustc_data_structures
::fx
::{FxHashMap, FxHashSet}
;
13 use rustc_errors
::{FatalError, Handler}
;
14 use rustc_hir
::def_id
::LOCAL_CRATE
;
15 use rustc_middle
::bug
;
16 use rustc_middle
::dep_graph
::WorkProduct
;
17 use rustc_middle
::middle
::exported_symbols
::SymbolExportLevel
;
18 use rustc_session
::cgu_reuse_tracker
::CguReuse
;
19 use rustc_session
::config
::{self, CrateType, Lto}
;
20 use tracing
::{debug, info}
;
22 use std
::ffi
::{CStr, CString}
;
31 /// We keep track of past LTO imports that were used to produce the current set
32 /// of compiled object files that we might choose to reuse during this
33 /// compilation session.
34 pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME
: &str = "thin-lto-past-imports.bin";
36 pub fn crate_type_allows_lto(crate_type
: CrateType
) -> bool
{
38 CrateType
::Executable
| CrateType
::Staticlib
| CrateType
::Cdylib
=> true,
39 CrateType
::Dylib
| CrateType
::Rlib
| CrateType
::ProcMacro
=> false,
44 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
45 diag_handler
: &Handler
,
46 ) -> Result
<(Vec
<CString
>, Vec
<(SerializedModule
<ModuleBuffer
>, CString
)>), FatalError
> {
47 let export_threshold
= match cgcx
.lto
{
48 // We're just doing LTO for our one crate
49 Lto
::ThinLocal
=> SymbolExportLevel
::Rust
,
51 // We're doing LTO for the entire crate graph
52 Lto
::Fat
| Lto
::Thin
=> symbol_export
::crates_export_threshold(&cgcx
.crate_types
),
54 Lto
::No
=> panic
!("didn't request LTO but we're doing LTO"),
57 let symbol_filter
= &|&(ref name
, level
): &(String
, SymbolExportLevel
)| {
58 if level
.is_below_threshold(export_threshold
) {
59 Some(CString
::new(name
.as_str()).unwrap())
64 let exported_symbols
= cgcx
.exported_symbols
.as_ref().expect("needs exported symbols for LTO");
65 let mut symbols_below_threshold
= {
66 let _timer
= cgcx
.prof
.generic_activity("LLVM_lto_generate_symbols_below_threshold");
67 exported_symbols
[&LOCAL_CRATE
].iter().filter_map(symbol_filter
).collect
::<Vec
<CString
>>()
69 info
!("{} symbols to preserve in this crate", symbols_below_threshold
.len());
71 // If we're performing LTO for the entire crate graph, then for each of our
72 // upstream dependencies, find the corresponding rlib and load the bitcode
75 // We save off all the bytecode and LLVM module ids for later processing
76 // with either fat or thin LTO
77 let mut upstream_modules
= Vec
::new();
78 if cgcx
.lto
!= Lto
::ThinLocal
{
79 if cgcx
.opts
.cg
.prefer_dynamic
{
81 .struct_err("cannot prefer dynamic linking when performing LTO")
83 "only 'staticlib', 'bin', and 'cdylib' outputs are \
87 return Err(FatalError
);
90 // Make sure we actually can run LTO
91 for crate_type
in cgcx
.crate_types
.iter() {
92 if !crate_type_allows_lto(*crate_type
) {
93 let e
= diag_handler
.fatal(
94 "lto can only be run for executables, cdylibs and \
95 static library outputs",
101 for &(cnum
, ref path
) in cgcx
.each_linked_rlib_for_lto
.iter() {
102 let exported_symbols
=
103 cgcx
.exported_symbols
.as_ref().expect("needs exported symbols for LTO");
106 cgcx
.prof
.generic_activity("LLVM_lto_generate_symbols_below_threshold");
107 symbols_below_threshold
108 .extend(exported_symbols
[&cnum
].iter().filter_map(symbol_filter
));
111 let archive
= ArchiveRO
::open(&path
).expect("wanted an rlib");
112 let obj_files
= archive
114 .filter_map(|child
| child
.ok().and_then(|c
| c
.name().map(|name
| (name
, c
))))
115 .filter(|&(name
, _
)| looks_like_rust_object_file(name
));
116 for (name
, child
) in obj_files
{
117 info
!("adding bitcode from {}", name
);
118 match get_bitcode_slice_from_object_data(child
.data()) {
120 let module
= SerializedModule
::FromRlib(data
.to_vec());
121 upstream_modules
.push((module
, CString
::new(name
).unwrap()));
123 Err(msg
) => return Err(diag_handler
.fatal(&msg
)),
129 Ok((symbols_below_threshold
, upstream_modules
))
132 fn get_bitcode_slice_from_object_data(obj
: &[u8]) -> Result
<&[u8], String
> {
135 unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) }
;
138 let bc
= unsafe { slice::from_raw_parts(data, len) }
;
140 // `bc` must be a sub-slice of `obj`.
141 assert
!(obj
.as_ptr() <= bc
.as_ptr());
142 assert
!(bc
[bc
.len()..bc
.len()].as_ptr() <= obj
[obj
.len()..obj
.len()].as_ptr());
147 let msg
= llvm
::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
148 Err(format
!("failed to get bitcode from object file for LTO ({})", msg
))
152 /// Performs fat LTO by merging all modules into a single one and returning it
153 /// for further optimization.
154 pub(crate) fn run_fat(
155 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
156 modules
: Vec
<FatLTOInput
<LlvmCodegenBackend
>>,
157 cached_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, WorkProduct
)>,
158 ) -> Result
<LtoModuleCodegen
<LlvmCodegenBackend
>, FatalError
> {
159 let diag_handler
= cgcx
.create_diag_handler();
160 let (symbols_below_threshold
, upstream_modules
) = prepare_lto(cgcx
, &diag_handler
)?
;
161 let symbols_below_threshold
=
162 symbols_below_threshold
.iter().map(|c
| c
.as_ptr()).collect
::<Vec
<_
>>();
169 &symbols_below_threshold
,
173 /// Performs thin LTO by performing necessary global analysis and returning two
174 /// lists, one of the modules that need optimization and another for modules that
175 /// can simply be copied over from the incr. comp. cache.
176 pub(crate) fn run_thin(
177 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
178 modules
: Vec
<(String
, ThinBuffer
)>,
179 cached_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, WorkProduct
)>,
180 ) -> Result
<(Vec
<LtoModuleCodegen
<LlvmCodegenBackend
>>, Vec
<WorkProduct
>), FatalError
> {
181 let diag_handler
= cgcx
.create_diag_handler();
182 let (symbols_below_threshold
, upstream_modules
) = prepare_lto(cgcx
, &diag_handler
)?
;
183 let symbols_below_threshold
=
184 symbols_below_threshold
.iter().map(|c
| c
.as_ptr()).collect
::<Vec
<_
>>();
185 if cgcx
.opts
.cg
.linker_plugin_lto
.enabled() {
187 "We should never reach this case if the LTO step \
188 is deferred to the linker"
197 &symbols_below_threshold
,
201 pub(crate) fn prepare_thin(module
: ModuleCodegen
<ModuleLlvm
>) -> (String
, ThinBuffer
) {
202 let name
= module
.name
.clone();
203 let buffer
= ThinBuffer
::new(module
.module_llvm
.llmod());
208 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
209 diag_handler
: &Handler
,
210 modules
: Vec
<FatLTOInput
<LlvmCodegenBackend
>>,
211 cached_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, WorkProduct
)>,
212 mut serialized_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, CString
)>,
213 symbols_below_threshold
: &[*const libc
::c_char
],
214 ) -> Result
<LtoModuleCodegen
<LlvmCodegenBackend
>, FatalError
> {
215 let _timer
= cgcx
.prof
.generic_activity("LLVM_fat_lto_build_monolithic_module");
216 info
!("going for a fat lto");
218 // Sort out all our lists of incoming modules into two lists.
220 // * `serialized_modules` (also and argument to this function) contains all
221 // modules that are serialized in-memory.
222 // * `in_memory` contains modules which are already parsed and in-memory,
223 // such as from multi-CGU builds.
225 // All of `cached_modules` (cached from previous incremental builds) can
226 // immediately go onto the `serialized_modules` modules list and then we can
227 // split the `modules` array into these two lists.
228 let mut in_memory
= Vec
::new();
229 serialized_modules
.extend(cached_modules
.into_iter().map(|(buffer
, wp
)| {
230 info
!("pushing cached module {:?}", wp
.cgu_name
);
231 (buffer
, CString
::new(wp
.cgu_name
).unwrap())
233 for module
in modules
{
235 FatLTOInput
::InMemory(m
) => in_memory
.push(m
),
236 FatLTOInput
::Serialized { name, buffer }
=> {
237 info
!("pushing serialized module {:?}", name
);
238 let buffer
= SerializedModule
::Local(buffer
);
239 serialized_modules
.push((buffer
, CString
::new(name
).unwrap()));
244 // Find the "costliest" module and merge everything into that codegen unit.
245 // All the other modules will be serialized and reparsed into the new
246 // context, so this hopefully avoids serializing and parsing the largest
249 // Additionally use a regular module as the base here to ensure that various
250 // file copy operations in the backend work correctly. The only other kind
251 // of module here should be an allocator one, and if your crate is smaller
252 // than the allocator module then the size doesn't really matter anyway.
253 let costliest_module
= in_memory
256 .filter(|&(_
, module
)| module
.kind
== ModuleKind
::Regular
)
258 let cost
= unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) }
;
263 // If we found a costliest module, we're good to go. Otherwise all our
264 // inputs were serialized which could happen in the case, for example, that
265 // all our inputs were incrementally reread from the cache and we're just
266 // re-executing the LTO passes. If that's the case deserialize the first
267 // module and create a linker with it.
268 let module
: ModuleCodegen
<ModuleLlvm
> = match costliest_module
{
269 Some((_cost
, i
)) => in_memory
.remove(i
),
271 assert
!(!serialized_modules
.is_empty(), "must have at least one serialized module");
272 let (buffer
, name
) = serialized_modules
.remove(0);
273 info
!("no in-memory regular modules to choose from, parsing {:?}", name
);
275 module_llvm
: ModuleLlvm
::parse(cgcx
, &name
, buffer
.data(), diag_handler
)?
,
276 name
: name
.into_string().unwrap(),
277 kind
: ModuleKind
::Regular
,
281 let mut serialized_bitcode
= Vec
::new();
283 let (llcx
, llmod
) = {
284 let llvm
= &module
.module_llvm
;
285 (&llvm
.llcx
, llvm
.llmod())
287 info
!("using {:?} as a base module", module
.name
);
289 // The linking steps below may produce errors and diagnostics within LLVM
290 // which we'd like to handle and print, so set up our diagnostic handlers
291 // (which get unregistered when they go out of scope below).
292 let _handler
= DiagnosticHandlers
::new(cgcx
, diag_handler
, llcx
);
294 // For all other modules we codegened we'll need to link them into our own
295 // bitcode. All modules were codegened in their own LLVM context, however,
296 // and we want to move everything to the same LLVM context. Currently the
297 // way we know of to do that is to serialize them to a string and them parse
298 // them later. Not great but hey, that's why it's "fat" LTO, right?
299 for module
in in_memory
{
300 let buffer
= ModuleBuffer
::new(module
.module_llvm
.llmod());
301 let llmod_id
= CString
::new(&module
.name
[..]).unwrap();
302 serialized_modules
.push((SerializedModule
::Local(buffer
), llmod_id
));
304 // Sort the modules to ensure we produce deterministic results.
305 serialized_modules
.sort_by(|module1
, module2
| module1
.1
.cmp(&module2
.1
));
307 // For all serialized bitcode files we parse them and link them in as we did
308 // above, this is all mostly handled in C++. Like above, though, we don't
309 // know much about the memory management here so we err on the side of being
310 // save and persist everything with the original module.
311 let mut linker
= Linker
::new(llmod
);
312 for (bc_decoded
, name
) in serialized_modules
{
315 .generic_activity_with_arg("LLVM_fat_lto_link_module", format
!("{:?}", name
));
316 info
!("linking {:?}", name
);
317 let data
= bc_decoded
.data();
318 linker
.add(&data
).map_err(|()| {
319 let msg
= format
!("failed to load bc of {:?}", name
);
320 write
::llvm_err(&diag_handler
, &msg
)
322 serialized_bitcode
.push(bc_decoded
);
325 save_temp_bitcode(&cgcx
, &module
, "lto.input");
327 // Internalize everything below threshold to help strip out more modules and such.
329 let ptr
= symbols_below_threshold
.as_ptr();
330 llvm
::LLVMRustRunRestrictionPass(
332 ptr
as *const *const libc
::c_char
,
333 symbols_below_threshold
.len() as libc
::size_t
,
335 save_temp_bitcode(&cgcx
, &module
, "lto.after-restriction");
338 if cgcx
.no_landing_pads
{
340 llvm
::LLVMRustMarkAllFunctionsNounwind(llmod
);
342 save_temp_bitcode(&cgcx
, &module
, "lto.after-nounwind");
346 Ok(LtoModuleCodegen
::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode }
)
349 struct Linker
<'a
>(&'a
mut llvm
::Linker
<'a
>);
352 fn new(llmod
: &'a llvm
::Module
) -> Self {
353 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
356 fn add(&mut self, bytecode
: &[u8]) -> Result
<(), ()> {
358 if llvm
::LLVMRustLinkerAdd(
360 bytecode
.as_ptr() as *const libc
::c_char
,
371 impl Drop
for Linker
<'a
> {
374 llvm
::LLVMRustLinkerFree(&mut *(self.0 as *mut _
));
379 /// Prepare "thin" LTO to get run on these modules.
381 /// The general structure of ThinLTO is quite different from the structure of
382 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
383 /// one giant LLVM module, and then we run more optimization passes over this
384 /// big module after internalizing most symbols. Thin LTO, on the other hand,
385 /// avoid this large bottleneck through more targeted optimization.
387 /// At a high level Thin LTO looks like:
389 /// 1. Prepare a "summary" of each LLVM module in question which describes
390 /// the values inside, cost of the values, etc.
391 /// 2. Merge the summaries of all modules in question into one "index"
392 /// 3. Perform some global analysis on this index
393 /// 4. For each module, use the index and analysis calculated previously to
394 /// perform local transformations on the module, for example inlining
395 /// small functions from other modules.
396 /// 5. Run thin-specific optimization passes over each module, and then code
397 /// generate everything at the end.
399 /// The summary for each module is intended to be quite cheap, and the global
400 /// index is relatively quite cheap to create as well. As a result, the goal of
401 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
402 /// situations. For example one cheap optimization is that we can parallelize
403 /// all codegen modules, easily making use of all the cores on a machine.
405 /// With all that in mind, the function here is designed at specifically just
406 /// calculating the *index* for ThinLTO. This index will then be shared amongst
407 /// all of the `LtoModuleCodegen` units returned below and destroyed once
408 /// they all go out of scope.
410 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
411 diag_handler
: &Handler
,
412 modules
: Vec
<(String
, ThinBuffer
)>,
413 serialized_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, CString
)>,
414 cached_modules
: Vec
<(SerializedModule
<ModuleBuffer
>, WorkProduct
)>,
415 symbols_below_threshold
: &[*const libc
::c_char
],
416 ) -> Result
<(Vec
<LtoModuleCodegen
<LlvmCodegenBackend
>>, Vec
<WorkProduct
>), FatalError
> {
417 let _timer
= cgcx
.prof
.generic_activity("LLVM_thin_lto_global_analysis");
419 info
!("going for that thin, thin LTO");
421 let green_modules
: FxHashMap
<_
, _
> =
422 cached_modules
.iter().map(|&(_
, ref wp
)| (wp
.cgu_name
.clone(), wp
.clone())).collect();
424 let full_scope_len
= modules
.len() + serialized_modules
.len() + cached_modules
.len();
425 let mut thin_buffers
= Vec
::with_capacity(modules
.len());
426 let mut module_names
= Vec
::with_capacity(full_scope_len
);
427 let mut thin_modules
= Vec
::with_capacity(full_scope_len
);
429 for (i
, (name
, buffer
)) in modules
.into_iter().enumerate() {
430 info
!("local module: {} - {}", i
, name
);
431 let cname
= CString
::new(name
.clone()).unwrap();
432 thin_modules
.push(llvm
::ThinLTOModule
{
433 identifier
: cname
.as_ptr(),
434 data
: buffer
.data().as_ptr(),
435 len
: buffer
.data().len(),
437 thin_buffers
.push(buffer
);
438 module_names
.push(cname
);
441 // FIXME: All upstream crates are deserialized internally in the
442 // function below to extract their summary and modules. Note that
443 // unlike the loop above we *must* decode and/or read something
444 // here as these are all just serialized files on disk. An
445 // improvement, however, to make here would be to store the
446 // module summary separately from the actual module itself. Right
447 // now this is store in one large bitcode file, and the entire
448 // file is deflate-compressed. We could try to bypass some of the
449 // decompression by storing the index uncompressed and only
450 // lazily decompressing the bytecode if necessary.
452 // Note that truly taking advantage of this optimization will
453 // likely be further down the road. We'd have to implement
454 // incremental ThinLTO first where we could actually avoid
455 // looking at upstream modules entirely sometimes (the contents,
456 // we must always unconditionally look at the index).
457 let mut serialized
= Vec
::with_capacity(serialized_modules
.len() + cached_modules
.len());
460 cached_modules
.into_iter().map(|(sm
, wp
)| (sm
, CString
::new(wp
.cgu_name
).unwrap()));
462 for (module
, name
) in serialized_modules
.into_iter().chain(cached_modules
) {
463 info
!("upstream or cached module {:?}", name
);
464 thin_modules
.push(llvm
::ThinLTOModule
{
465 identifier
: name
.as_ptr(),
466 data
: module
.data().as_ptr(),
467 len
: module
.data().len(),
469 serialized
.push(module
);
470 module_names
.push(name
);
474 assert_eq
!(thin_modules
.len(), module_names
.len());
476 // Delegate to the C++ bindings to create some data here. Once this is a
477 // tried-and-true interface we may wish to try to upstream some of this
478 // to LLVM itself, right now we reimplement a lot of what they do
480 let data
= llvm
::LLVMRustCreateThinLTOData(
481 thin_modules
.as_ptr(),
482 thin_modules
.len() as u32,
483 symbols_below_threshold
.as_ptr(),
484 symbols_below_threshold
.len() as u32,
486 .ok_or_else(|| write
::llvm_err(&diag_handler
, "failed to prepare thin LTO context"))?
;
488 info
!("thin LTO data created");
490 let (import_map_path
, prev_import_map
, curr_import_map
) =
491 if let Some(ref incr_comp_session_dir
) = cgcx
.incr_comp_session_dir
{
492 let path
= incr_comp_session_dir
.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME
);
493 // If previous imports have been deleted, or we get an IO error
494 // reading the file storing them, then we'll just use `None` as the
495 // prev_import_map, which will force the code to be recompiled.
496 let prev
= if path
.exists() {
497 ThinLTOImportMaps
::load_from_file(&path
).ok()
501 let curr
= ThinLTOImportMaps
::from_thin_lto_data(data
);
502 (Some(path
), prev
, curr
)
504 // If we don't compile incrementally, we don't need to load the
505 // import data from LLVM.
506 assert
!(green_modules
.is_empty());
507 let curr
= ThinLTOImportMaps
::default();
510 info
!("thin LTO import map loaded");
512 let data
= ThinData(data
);
514 // Throw our data in an `Arc` as we'll be sharing it across threads. We
515 // also put all memory referenced by the C++ data (buffers, ids, etc)
516 // into the arc as well. After this we'll create a thin module
517 // codegen per module in this data.
518 let shared
= Arc
::new(ThinShared
{
521 serialized_modules
: serialized
,
525 let mut copy_jobs
= vec
![];
526 let mut opt_jobs
= vec
![];
528 info
!("checking which modules can be-reused and which have to be re-optimized.");
529 for (module_index
, module_name
) in shared
.module_names
.iter().enumerate() {
530 let module_name
= module_name_to_str(module_name
);
532 // If (1.) the module hasn't changed, and (2.) none of the modules
533 // it imports from have changed, *and* (3.) the import and export
534 // sets themselves have not changed from the previous compile when
535 // it was last ThinLTO'ed, then we can re-use the post-ThinLTO
536 // version of the module. Otherwise, freshly perform LTO
539 // (Note that globally, the export set is just the inverse of the
542 // For further justification of why the above is necessary and sufficient,
543 // see the LLVM blog post on ThinLTO:
545 // http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html
547 // which states the following:
550 // any particular ThinLTO backend must be redone iff:
552 // 1. The corresponding (primary) module’s bitcode changed
553 // 2. The list of imports into or exports from the module changed
554 // 3. The bitcode for any module being imported from has changed
555 // 4. Any global analysis result affecting either the primary module
556 // or anything it imports has changed.
559 // This strategy means we can always save the computed imports as
560 // canon: when we reuse the post-ThinLTO version, condition (3.)
561 // ensures that the current import set is the same as the previous
562 // one. (And of course, when we don't reuse the post-ThinLTO
563 // version, the current import set *is* the correct one, since we
564 // are doing the ThinLTO in this current compilation cycle.)
566 // For more discussion, see rust-lang/rust#59535 (where the import
567 // issue was discovered) and rust-lang/rust#69798 (where the
568 // analogous export issue was discovered).
569 if let (Some(prev_import_map
), true) =
570 (prev_import_map
.as_ref(), green_modules
.contains_key(module_name
))
572 assert
!(cgcx
.incr_comp_session_dir
.is_some());
574 let prev_imports
= prev_import_map
.imports_of(module_name
);
575 let curr_imports
= curr_import_map
.imports_of(module_name
);
576 let prev_exports
= prev_import_map
.exports_of(module_name
);
577 let curr_exports
= curr_import_map
.exports_of(module_name
);
578 let imports_all_green
= curr_imports
580 .all(|imported_module
| green_modules
.contains_key(imported_module
));
582 && equivalent_as_sets(prev_imports
, curr_imports
)
583 && equivalent_as_sets(prev_exports
, curr_exports
)
585 let work_product
= green_modules
[module_name
].clone();
586 copy_jobs
.push(work_product
);
587 info
!(" - {}: re-used", module_name
);
588 assert
!(cgcx
.incr_comp_session_dir
.is_some());
589 cgcx
.cgu_reuse_tracker
.set_actual_reuse(module_name
, CguReuse
::PostLto
);
594 info
!(" - {}: re-compiled", module_name
);
595 opt_jobs
.push(LtoModuleCodegen
::Thin(ThinModule
{
596 shared
: shared
.clone(),
601 // Save the current ThinLTO import information for the next compilation
602 // session, overwriting the previous serialized imports (if any).
603 if let Some(path
) = import_map_path
{
604 if let Err(err
) = curr_import_map
.save_to_file(&path
) {
605 let msg
= format
!("Error while writing ThinLTO import data: {}", err
);
606 return Err(write
::llvm_err(&diag_handler
, &msg
));
610 Ok((opt_jobs
, copy_jobs
))
614 /// Given two slices, each with no repeat elements. returns true if and only if
615 /// the two slices have the same contents when considered as sets (i.e. when
616 /// element order is disregarded).
617 fn equivalent_as_sets(a
: &[String
], b
: &[String
]) -> bool
{
618 // cheap path: unequal lengths means cannot possibly be set equivalent.
619 if a
.len() != b
.len() {
622 // fast path: before building new things, check if inputs are equivalent as is.
626 // slow path: general set comparison.
627 let a
: FxHashSet
<&str> = a
.iter().map(|s
| s
.as_str()).collect();
628 let b
: FxHashSet
<&str> = b
.iter().map(|s
| s
.as_str()).collect();
632 pub(crate) fn run_pass_manager(
633 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
634 module
: &ModuleCodegen
<ModuleLlvm
>,
635 config
: &ModuleConfig
,
638 let _timer
= cgcx
.prof
.extra_verbose_generic_activity("LLVM_lto_optimize", &module
.name
[..]);
640 // Now we have one massive module inside of llmod. Time to run the
641 // LTO-specific optimization passes that LLVM provides.
643 // This code is based off the code found in llvm's LTO code generator:
644 // tools/lto/LTOCodeGenerator.cpp
645 debug
!("running the pass manager");
647 if write
::should_use_new_llvm_pass_manager(config
) {
648 let opt_stage
= if thin { llvm::OptStage::ThinLTO }
else { llvm::OptStage::FatLTO }
;
649 let opt_level
= config
.opt_level
.unwrap_or(config
::OptLevel
::No
);
650 // See comment below for why this is necessary.
651 let opt_level
= if let config
::OptLevel
::No
= opt_level
{
652 config
::OptLevel
::Less
656 write
::optimize_with_new_llvm_pass_manager(cgcx
, module
, config
, opt_level
, opt_stage
);
661 let pm
= llvm
::LLVMCreatePassManager();
662 llvm
::LLVMAddAnalysisPasses(module
.module_llvm
.tm
, pm
);
664 if config
.verify_llvm_ir
{
665 let pass
= llvm
::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
666 llvm
::LLVMRustAddPass(pm
, pass
.unwrap());
669 // When optimizing for LTO we don't actually pass in `-O0`, but we force
670 // it to always happen at least with `-O1`.
672 // With ThinLTO we mess around a lot with symbol visibility in a way
673 // that will actually cause linking failures if we optimize at O0 which
674 // notable is lacking in dead code elimination. To ensure we at least
675 // get some optimizations and correctly link we forcibly switch to `-O1`
676 // to get dead code elimination.
678 // Note that in general this shouldn't matter too much as you typically
679 // only turn on ThinLTO when you're compiling with optimizations
681 let opt_level
= config
683 .map(|x
| to_llvm_opt_settings(x
).0)
684 .unwrap_or(llvm
::CodeGenOptLevel
::None
);
685 let opt_level
= match opt_level
{
686 llvm
::CodeGenOptLevel
::None
=> llvm
::CodeGenOptLevel
::Less
,
689 with_llvm_pmb(module
.module_llvm
.llmod(), config
, opt_level
, false, &mut |b
| {
691 llvm
::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b
, pm
);
693 llvm
::LLVMPassManagerBuilderPopulateLTOPassManager(
694 b
, pm
, /* Internalize = */ False
, /* RunInliner = */ True
,
699 // We always generate bitcode through ThinLTOBuffers,
700 // which do not support anonymous globals
701 if config
.bitcode_needed() {
702 let pass
= llvm
::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
703 llvm
::LLVMRustAddPass(pm
, pass
.unwrap());
706 if config
.verify_llvm_ir
{
707 let pass
= llvm
::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
708 llvm
::LLVMRustAddPass(pm
, pass
.unwrap());
711 llvm
::LLVMRunPassManager(pm
, module
.module_llvm
.llmod());
713 llvm
::LLVMDisposePassManager(pm
);
718 pub struct ModuleBuffer(&'
static mut llvm
::ModuleBuffer
);
720 unsafe impl Send
for ModuleBuffer {}
721 unsafe impl Sync
for ModuleBuffer {}
724 pub fn new(m
: &llvm
::Module
) -> ModuleBuffer
{
725 ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) }
)
729 impl ModuleBufferMethods
for ModuleBuffer
{
730 fn data(&self) -> &[u8] {
732 let ptr
= llvm
::LLVMRustModuleBufferPtr(self.0);
733 let len
= llvm
::LLVMRustModuleBufferLen(self.0);
734 slice
::from_raw_parts(ptr
, len
)
739 impl Drop
for ModuleBuffer
{
742 llvm
::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _
));
747 pub struct ThinData(&'
static mut llvm
::ThinLTOData
);
749 unsafe impl Send
for ThinData {}
750 unsafe impl Sync
for ThinData {}
752 impl Drop
for ThinData
{
755 llvm
::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _
));
760 pub struct ThinBuffer(&'
static mut llvm
::ThinLTOBuffer
);
762 unsafe impl Send
for ThinBuffer {}
763 unsafe impl Sync
for ThinBuffer {}
766 pub fn new(m
: &llvm
::Module
) -> ThinBuffer
{
768 let buffer
= llvm
::LLVMRustThinLTOBufferCreate(m
);
774 impl ThinBufferMethods
for ThinBuffer
{
775 fn data(&self) -> &[u8] {
777 let ptr
= llvm
::LLVMRustThinLTOBufferPtr(self.0) as *const _
;
778 let len
= llvm
::LLVMRustThinLTOBufferLen(self.0);
779 slice
::from_raw_parts(ptr
, len
)
784 impl Drop
for ThinBuffer
{
787 llvm
::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _
));
792 pub unsafe fn optimize_thin_module(
793 thin_module
: &mut ThinModule
<LlvmCodegenBackend
>,
794 cgcx
: &CodegenContext
<LlvmCodegenBackend
>,
795 ) -> Result
<ModuleCodegen
<ModuleLlvm
>, FatalError
> {
796 let diag_handler
= cgcx
.create_diag_handler();
797 let tm
= (cgcx
.tm_factory
.0)().map_err(|e
| write
::llvm_err(&diag_handler
, &e
))?
;
799 // Right now the implementation we've got only works over serialized
800 // modules, so we create a fresh new LLVM context and parse the module
801 // into that context. One day, however, we may do this for upstream
802 // crates but for locally codegened modules we may be able to reuse
803 // that LLVM Context and Module.
804 let llcx
= llvm
::LLVMRustContextCreate(cgcx
.fewer_names
);
805 let llmod_raw
= parse_module(
807 &thin_module
.shared
.module_names
[thin_module
.idx
],
811 let module
= ModuleCodegen
{
812 module_llvm
: ModuleLlvm { llmod_raw, llcx, tm }
,
813 name
: thin_module
.name().to_string(),
814 kind
: ModuleKind
::Regular
,
817 let target
= &*module
.module_llvm
.tm
;
818 let llmod
= module
.module_llvm
.llmod();
819 save_temp_bitcode(&cgcx
, &module
, "thin-lto-input");
821 // Before we do much else find the "main" `DICompileUnit` that we'll be
822 // using below. If we find more than one though then rustc has changed
823 // in a way we're not ready for, so generate an ICE by returning
825 let mut cu1
= ptr
::null_mut();
826 let mut cu2
= ptr
::null_mut();
827 llvm
::LLVMRustThinLTOGetDICompileUnit(llmod
, &mut cu1
, &mut cu2
);
829 let msg
= "multiple source DICompileUnits found";
830 return Err(write
::llvm_err(&diag_handler
, msg
));
833 // Like with "fat" LTO, get some better optimizations if landing pads
834 // are disabled by removing all landing pads.
835 if cgcx
.no_landing_pads
{
838 .generic_activity_with_arg("LLVM_thin_lto_remove_landing_pads", thin_module
.name());
839 llvm
::LLVMRustMarkAllFunctionsNounwind(llmod
);
840 save_temp_bitcode(&cgcx
, &module
, "thin-lto-after-nounwind");
843 // Up next comes the per-module local analyses that we do for Thin LTO.
844 // Each of these functions is basically copied from the LLVM
845 // implementation and then tailored to suit this implementation. Ideally
846 // each of these would be supported by upstream LLVM but that's perhaps
847 // a patch for another day!
849 // You can find some more comments about these functions in the LLVM
850 // bindings we've got (currently `PassWrapper.cpp`)
853 cgcx
.prof
.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module
.name());
854 if !llvm
::LLVMRustPrepareThinLTORename(thin_module
.shared
.data
.0, llmod
, target
) {
855 let msg
= "failed to prepare thin LTO module";
856 return Err(write
::llvm_err(&diag_handler
, msg
));
858 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-rename");
864 .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module
.name());
865 if !llvm
::LLVMRustPrepareThinLTOResolveWeak(thin_module
.shared
.data
.0, llmod
) {
866 let msg
= "failed to prepare thin LTO module";
867 return Err(write
::llvm_err(&diag_handler
, msg
));
869 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-resolve");
875 .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module
.name());
876 if !llvm
::LLVMRustPrepareThinLTOInternalize(thin_module
.shared
.data
.0, llmod
) {
877 let msg
= "failed to prepare thin LTO module";
878 return Err(write
::llvm_err(&diag_handler
, msg
));
880 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-internalize");
885 cgcx
.prof
.generic_activity_with_arg("LLVM_thin_lto_import", thin_module
.name());
886 if !llvm
::LLVMRustPrepareThinLTOImport(thin_module
.shared
.data
.0, llmod
, target
) {
887 let msg
= "failed to prepare thin LTO module";
888 return Err(write
::llvm_err(&diag_handler
, msg
));
890 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-import");
893 // Ok now this is a bit unfortunate. This is also something you won't
894 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
895 // work around bugs in LLVM.
897 // First discovered in #45511 it was found that as part of ThinLTO
898 // importing passes LLVM will import `DICompileUnit` metadata
899 // information across modules. This means that we'll be working with one
900 // LLVM module that has multiple `DICompileUnit` instances in it (a
901 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
902 // bugs in LLVM's backend which generates invalid DWARF in a situation
905 // https://bugs.llvm.org/show_bug.cgi?id=35212
906 // https://bugs.llvm.org/show_bug.cgi?id=35562
908 // While the first bug there is fixed the second ended up causing #46346
909 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
912 // This function below is a huge hack around this problem. The function
913 // below is defined in `PassWrapper.cpp` and will basically "merge"
914 // all `DICompileUnit` instances in a module. Basically it'll take all
915 // the objects, rewrite all pointers of `DISubprogram` to point to the
916 // first `DICompileUnit`, and then delete all the other units.
918 // This is probably mangling to the debug info slightly (but hopefully
919 // not too much) but for now at least gets LLVM to emit valid DWARF (or
920 // so it appears). Hopefully we can remove this once upstream bugs are
925 .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module
.name());
926 llvm
::LLVMRustThinLTOPatchDICompileUnit(llmod
, cu1
);
927 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-patch");
930 // Alright now that we've done everything related to the ThinLTO
931 // analysis it's time to run some optimizations! Here we use the same
932 // `run_pass_manager` as the "fat" LTO above except that we tell it to
933 // populate a thin-specific pass manager, which presumably LLVM treats a
934 // little differently.
936 info
!("running thin lto passes over {}", module
.name
);
937 let config
= cgcx
.config(module
.kind
);
938 run_pass_manager(cgcx
, &module
, config
, true);
939 save_temp_bitcode(cgcx
, &module
, "thin-lto-after-pm");
945 /// Summarizes module import/export relationships used by LLVM's ThinLTO pass.
947 /// Note that we tend to have two such instances of `ThinLTOImportMaps` in use:
948 /// one loaded from a file that represents the relationships used during the
949 /// compilation associated with the incremetnal build artifacts we are
950 /// attempting to reuse, and another constructed via `from_thin_lto_data`, which
951 /// captures the relationships of ThinLTO in the current compilation.
952 #[derive(Debug, Default)]
953 pub struct ThinLTOImportMaps
{
954 // key = llvm name of importing module, value = list of modules it imports from
955 imports
: FxHashMap
<String
, Vec
<String
>>,
956 // key = llvm name of exporting module, value = list of modules it exports to
957 exports
: FxHashMap
<String
, Vec
<String
>>,
960 impl ThinLTOImportMaps
{
961 /// Returns modules imported by `llvm_module_name` during some ThinLTO pass.
962 fn imports_of(&self, llvm_module_name
: &str) -> &[String
] {
963 self.imports
.get(llvm_module_name
).map(|v
| &v
[..]).unwrap_or(&[])
966 /// Returns modules exported by `llvm_module_name` during some ThinLTO pass.
967 fn exports_of(&self, llvm_module_name
: &str) -> &[String
] {
968 self.exports
.get(llvm_module_name
).map(|v
| &v
[..]).unwrap_or(&[])
971 fn save_to_file(&self, path
: &Path
) -> io
::Result
<()> {
973 let file
= File
::create(path
)?
;
974 let mut writer
= io
::BufWriter
::new(file
);
975 for (importing_module_name
, imported_modules
) in &self.imports
{
976 writeln
!(writer
, "{}", importing_module_name
)?
;
977 for imported_module
in imported_modules
{
978 writeln
!(writer
, " {}", imported_module
)?
;
985 fn load_from_file(path
: &Path
) -> io
::Result
<ThinLTOImportMaps
> {
986 use std
::io
::BufRead
;
987 let mut imports
= FxHashMap
::default();
988 let mut exports
: FxHashMap
<_
, Vec
<_
>> = FxHashMap
::default();
989 let mut current_module
: Option
<String
> = None
;
990 let mut current_imports
: Vec
<String
> = vec
![];
991 let file
= File
::open(path
)?
;
992 for line
in io
::BufReader
::new(file
).lines() {
995 let importing_module
= current_module
.take().expect("Importing module not set");
996 for imported
in ¤t_imports
{
997 exports
.entry(imported
.clone()).or_default().push(importing_module
.clone());
999 imports
.insert(importing_module
, mem
::replace(&mut current_imports
, vec
![]));
1000 } else if line
.starts_with(' '
) {
1001 // Space marks an imported module
1002 assert_ne
!(current_module
, None
);
1003 current_imports
.push(line
.trim().to_string());
1005 // Otherwise, beginning of a new module (must be start or follow empty line)
1006 assert_eq
!(current_module
, None
);
1007 current_module
= Some(line
.trim().to_string());
1010 Ok(ThinLTOImportMaps { imports, exports }
)
1013 /// Loads the ThinLTO import map from ThinLTOData.
1014 unsafe fn from_thin_lto_data(data
: *const llvm
::ThinLTOData
) -> ThinLTOImportMaps
{
1015 unsafe extern "C" fn imported_module_callback(
1016 payload
: *mut libc
::c_void
,
1017 importing_module_name
: *const libc
::c_char
,
1018 imported_module_name
: *const libc
::c_char
,
1020 let map
= &mut *(payload
as *mut ThinLTOImportMaps
);
1021 let importing_module_name
= CStr
::from_ptr(importing_module_name
);
1022 let importing_module_name
= module_name_to_str(&importing_module_name
);
1023 let imported_module_name
= CStr
::from_ptr(imported_module_name
);
1024 let imported_module_name
= module_name_to_str(&imported_module_name
);
1026 if !map
.imports
.contains_key(importing_module_name
) {
1027 map
.imports
.insert(importing_module_name
.to_owned(), vec
![]);
1031 .get_mut(importing_module_name
)
1033 .push(imported_module_name
.to_owned());
1035 if !map
.exports
.contains_key(imported_module_name
) {
1036 map
.exports
.insert(imported_module_name
.to_owned(), vec
![]);
1040 .get_mut(imported_module_name
)
1042 .push(importing_module_name
.to_owned());
1045 let mut map
= ThinLTOImportMaps
::default();
1046 llvm
::LLVMRustGetThinLTOModuleImports(
1048 imported_module_callback
,
1049 &mut map
as *mut _
as *mut libc
::c_void
,
1055 fn module_name_to_str(c_str
: &CStr
) -> &str {
1056 c_str
.to_str().unwrap_or_else(|e
| {
1057 bug
!("Encountered non-utf8 LLVM module name `{}`: {}", c_str
.to_string_lossy(), e
)
1061 pub fn parse_module
<'a
>(
1062 cx
: &'a llvm
::Context
,
1065 diag_handler
: &Handler
,
1066 ) -> Result
<&'a llvm
::Module
, FatalError
> {
1068 llvm
::LLVMRustParseBitcodeForLTO(cx
, data
.as_ptr(), data
.len(), name
.as_ptr()).ok_or_else(
1070 let msg
= "failed to parse bitcode for LTO module";
1071 write
::llvm_err(&diag_handler
, msg
)