1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 use back
::bytecode
::{DecodedBytecode, RLIB_BYTECODE_EXTENSION}
;
12 use back
::symbol_export
;
13 use back
::write
::{ModuleConfig, with_llvm_pmb, CodegenContext}
;
15 use errors
::{FatalError, Handler}
;
16 use llvm
::archive_ro
::ArchiveRO
;
17 use llvm
::{ModuleRef, TargetMachineRef, True, False}
;
19 use rustc
::hir
::def_id
::LOCAL_CRATE
;
20 use rustc
::middle
::exported_symbols
::SymbolExportLevel
;
21 use rustc
::session
::config
::{self, Lto}
;
22 use rustc
::util
::common
::time_ext
;
23 use time_graph
::Timeline
;
24 use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource}
;
28 use std
::ffi
::CString
;
33 pub fn crate_type_allows_lto(crate_type
: config
::CrateType
) -> bool
{
35 config
::CrateTypeExecutable
|
36 config
::CrateTypeStaticlib
|
37 config
::CrateTypeCdylib
=> true,
39 config
::CrateTypeDylib
|
40 config
::CrateTypeRlib
|
41 config
::CrateTypeProcMacro
=> false,
45 pub(crate) enum LtoModuleCodegen
{
47 module
: Option
<ModuleCodegen
>,
48 _serialized_bitcode
: Vec
<SerializedModule
>,
54 impl LtoModuleCodegen
{
55 pub fn name(&self) -> &str {
57 LtoModuleCodegen
::Fat { .. }
=> "everything",
58 LtoModuleCodegen
::Thin(ref m
) => m
.name(),
62 /// Optimize this module within the given codegen context.
64 /// This function is unsafe as it'll return a `ModuleCodegen` still
65 /// points to LLVM data structures owned by this `LtoModuleCodegen`.
66 /// It's intended that the module returned is immediately code generated and
67 /// dropped, and then this LTO module is dropped.
68 pub(crate) unsafe fn optimize(&mut self,
69 cgcx
: &CodegenContext
,
70 timeline
: &mut Timeline
)
71 -> Result
<ModuleCodegen
, FatalError
>
74 LtoModuleCodegen
::Fat { ref mut module, .. }
=> {
75 let module
= module
.take().unwrap();
76 let config
= cgcx
.config(module
.kind
);
77 let llmod
= module
.llvm().unwrap().llmod
;
78 let tm
= module
.llvm().unwrap().tm
;
79 run_pass_manager(cgcx
, tm
, llmod
, config
, false);
80 timeline
.record("fat-done");
83 LtoModuleCodegen
::Thin(ref mut thin
) => thin
.optimize(cgcx
, timeline
),
87 /// A "gauge" of how costly it is to optimize this module, used to sort
88 /// biggest modules first.
89 pub fn cost(&self) -> u64 {
91 // Only one module with fat LTO, so the cost doesn't matter.
92 LtoModuleCodegen
::Fat { .. }
=> 0,
93 LtoModuleCodegen
::Thin(ref m
) => m
.cost(),
98 pub(crate) fn run(cgcx
: &CodegenContext
,
99 modules
: Vec
<ModuleCodegen
>,
100 timeline
: &mut Timeline
)
101 -> Result
<Vec
<LtoModuleCodegen
>, FatalError
>
103 let diag_handler
= cgcx
.create_diag_handler();
104 let export_threshold
= match cgcx
.lto
{
105 // We're just doing LTO for our one crate
106 Lto
::ThinLocal
=> SymbolExportLevel
::Rust
,
108 // We're doing LTO for the entire crate graph
109 Lto
::Yes
| Lto
::Fat
| Lto
::Thin
=> {
110 symbol_export
::crates_export_threshold(&cgcx
.crate_types
)
113 Lto
::No
=> panic
!("didn't request LTO but we're doing LTO"),
116 let symbol_filter
= &|&(ref name
, level
): &(String
, SymbolExportLevel
)| {
117 if level
.is_below_threshold(export_threshold
) {
118 let mut bytes
= Vec
::with_capacity(name
.len() + 1);
119 bytes
.extend(name
.bytes());
120 Some(CString
::new(bytes
).unwrap())
125 let exported_symbols
= cgcx
.exported_symbols
126 .as_ref().expect("needs exported symbols for LTO");
127 let mut symbol_white_list
= exported_symbols
[&LOCAL_CRATE
]
129 .filter_map(symbol_filter
)
130 .collect
::<Vec
<CString
>>();
131 timeline
.record("whitelist");
132 info
!("{} symbols to preserve in this crate", symbol_white_list
.len());
134 // If we're performing LTO for the entire crate graph, then for each of our
135 // upstream dependencies, find the corresponding rlib and load the bitcode
138 // We save off all the bytecode and LLVM module ids for later processing
139 // with either fat or thin LTO
140 let mut upstream_modules
= Vec
::new();
141 if cgcx
.lto
!= Lto
::ThinLocal
{
142 if cgcx
.opts
.cg
.prefer_dynamic
{
143 diag_handler
.struct_err("cannot prefer dynamic linking when performing LTO")
144 .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
147 return Err(FatalError
)
150 // Make sure we actually can run LTO
151 for crate_type
in cgcx
.crate_types
.iter() {
152 if !crate_type_allows_lto(*crate_type
) {
153 let e
= diag_handler
.fatal("lto can only be run for executables, cdylibs and \
154 static library outputs");
159 for &(cnum
, ref path
) in cgcx
.each_linked_rlib_for_lto
.iter() {
160 let exported_symbols
= cgcx
.exported_symbols
161 .as_ref().expect("needs exported symbols for LTO");
162 symbol_white_list
.extend(
163 exported_symbols
[&cnum
]
165 .filter_map(symbol_filter
));
167 let archive
= ArchiveRO
::open(&path
).expect("wanted an rlib");
168 let bytecodes
= archive
.iter().filter_map(|child
| {
169 child
.ok().and_then(|c
| c
.name().map(|name
| (name
, c
)))
170 }).filter(|&(name
, _
)| name
.ends_with(RLIB_BYTECODE_EXTENSION
));
171 for (name
, data
) in bytecodes
{
172 info
!("adding bytecode {}", name
);
173 let bc_encoded
= data
.data();
175 let (bc
, id
) = time_ext(cgcx
.time_passes
, None
, &format
!("decode {}", name
), || {
176 match DecodedBytecode
::new(bc_encoded
) {
177 Ok(b
) => Ok((b
.bytecode(), b
.identifier().to_string())),
178 Err(e
) => Err(diag_handler
.fatal(&e
)),
181 let bc
= SerializedModule
::FromRlib(bc
);
182 upstream_modules
.push((bc
, CString
::new(id
).unwrap()));
184 timeline
.record(&format
!("load: {}", path
.display()));
188 let arr
= symbol_white_list
.iter().map(|c
| c
.as_ptr()).collect
::<Vec
<_
>>();
190 Lto
::Yes
| // `-C lto` == fat LTO by default
192 fat_lto(cgcx
, &diag_handler
, modules
, upstream_modules
, &arr
, timeline
)
196 thin_lto(&diag_handler
, modules
, upstream_modules
, &arr
, timeline
)
198 Lto
::No
=> unreachable
!(),
202 fn fat_lto(cgcx
: &CodegenContext
,
203 diag_handler
: &Handler
,
204 mut modules
: Vec
<ModuleCodegen
>,
205 mut serialized_modules
: Vec
<(SerializedModule
, CString
)>,
206 symbol_white_list
: &[*const libc
::c_char
],
207 timeline
: &mut Timeline
)
208 -> Result
<Vec
<LtoModuleCodegen
>, FatalError
>
210 info
!("going for a fat lto");
212 // Find the "costliest" module and merge everything into that codegen unit.
213 // All the other modules will be serialized and reparsed into the new
214 // context, so this hopefully avoids serializing and parsing the largest
217 // Additionally use a regular module as the base here to ensure that various
218 // file copy operations in the backend work correctly. The only other kind
219 // of module here should be an allocator one, and if your crate is smaller
220 // than the allocator module then the size doesn't really matter anyway.
221 let (_
, costliest_module
) = modules
.iter()
223 .filter(|&(_
, module
)| module
.kind
== ModuleKind
::Regular
)
226 llvm
::LLVMRustModuleCost(module
.llvm().unwrap().llmod
)
231 .expect("must be codegen'ing at least one module");
232 let module
= modules
.remove(costliest_module
);
233 let llmod
= module
.llvm().expect("can't lto pre-codegened modules").llmod
;
234 info
!("using {:?} as a base module", module
.llmod_id
);
236 // For all other modules we codegened we'll need to link them into our own
237 // bitcode. All modules were codegened in their own LLVM context, however,
238 // and we want to move everything to the same LLVM context. Currently the
239 // way we know of to do that is to serialize them to a string and them parse
240 // them later. Not great but hey, that's why it's "fat" LTO, right?
241 for module
in modules
{
242 let llvm
= module
.llvm().expect("can't lto pre-codegened modules");
243 let buffer
= ModuleBuffer
::new(llvm
.llmod
);
244 let llmod_id
= CString
::new(&module
.llmod_id
[..]).unwrap();
245 serialized_modules
.push((SerializedModule
::Local(buffer
), llmod_id
));
248 // For all serialized bitcode files we parse them and link them in as we did
249 // above, this is all mostly handled in C++. Like above, though, we don't
250 // know much about the memory management here so we err on the side of being
251 // save and persist everything with the original module.
252 let mut serialized_bitcode
= Vec
::new();
253 let mut linker
= Linker
::new(llmod
);
254 for (bc_decoded
, name
) in serialized_modules
{
255 info
!("linking {:?}", name
);
256 time_ext(cgcx
.time_passes
, None
, &format
!("ll link {:?}", name
), || {
257 let data
= bc_decoded
.data();
258 linker
.add(&data
).map_err(|()| {
259 let msg
= format
!("failed to load bc of {:?}", name
);
260 write
::llvm_err(&diag_handler
, msg
)
263 timeline
.record(&format
!("link {:?}", name
));
264 serialized_bitcode
.push(bc_decoded
);
267 cgcx
.save_temp_bitcode(&module
, "lto.input");
269 // Internalize everything that *isn't* in our whitelist to help strip out
270 // more modules and such
272 let ptr
= symbol_white_list
.as_ptr();
273 llvm
::LLVMRustRunRestrictionPass(llmod
,
274 ptr
as *const *const libc
::c_char
,
275 symbol_white_list
.len() as libc
::size_t
);
276 cgcx
.save_temp_bitcode(&module
, "lto.after-restriction");
279 if cgcx
.no_landing_pads
{
281 llvm
::LLVMRustMarkAllFunctionsNounwind(llmod
);
283 cgcx
.save_temp_bitcode(&module
, "lto.after-nounwind");
285 timeline
.record("passes");
287 Ok(vec
![LtoModuleCodegen
::Fat
{
288 module
: Some(module
),
289 _serialized_bitcode
: serialized_bitcode
,
293 struct Linker(llvm
::LinkerRef
);
296 fn new(llmod
: ModuleRef
) -> Linker
{
297 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
300 fn add(&mut self, bytecode
: &[u8]) -> Result
<(), ()> {
302 if llvm
::LLVMRustLinkerAdd(self.0,
303 bytecode
.as_ptr() as *const libc
::c_char
,
313 impl Drop
for Linker
{
315 unsafe { llvm::LLVMRustLinkerFree(self.0); }
319 /// Prepare "thin" LTO to get run on these modules.
321 /// The general structure of ThinLTO is quite different from the structure of
322 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
323 /// one giant LLVM module, and then we run more optimization passes over this
324 /// big module after internalizing most symbols. Thin LTO, on the other hand,
325 /// avoid this large bottleneck through more targeted optimization.
327 /// At a high level Thin LTO looks like:
329 /// 1. Prepare a "summary" of each LLVM module in question which describes
330 /// the values inside, cost of the values, etc.
331 /// 2. Merge the summaries of all modules in question into one "index"
332 /// 3. Perform some global analysis on this index
333 /// 4. For each module, use the index and analysis calculated previously to
334 /// perform local transformations on the module, for example inlining
335 /// small functions from other modules.
336 /// 5. Run thin-specific optimization passes over each module, and then code
337 /// generate everything at the end.
339 /// The summary for each module is intended to be quite cheap, and the global
340 /// index is relatively quite cheap to create as well. As a result, the goal of
341 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
342 /// situations. For example one cheap optimization is that we can parallelize
343 /// all codegen modules, easily making use of all the cores on a machine.
345 /// With all that in mind, the function here is designed at specifically just
346 /// calculating the *index* for ThinLTO. This index will then be shared amongst
347 /// all of the `LtoModuleCodegen` units returned below and destroyed once
348 /// they all go out of scope.
349 fn thin_lto(diag_handler
: &Handler
,
350 modules
: Vec
<ModuleCodegen
>,
351 serialized_modules
: Vec
<(SerializedModule
, CString
)>,
352 symbol_white_list
: &[*const libc
::c_char
],
353 timeline
: &mut Timeline
)
354 -> Result
<Vec
<LtoModuleCodegen
>, FatalError
>
357 info
!("going for that thin, thin LTO");
359 let mut thin_buffers
= Vec
::new();
360 let mut module_names
= Vec
::new();
361 let mut thin_modules
= Vec
::new();
363 // FIXME: right now, like with fat LTO, we serialize all in-memory
364 // modules before working with them and ThinLTO. We really
365 // shouldn't do this, however, and instead figure out how to
366 // extract a summary from an in-memory module and then merge that
367 // into the global index. It turns out that this loop is by far
368 // the most expensive portion of this small bit of global
370 for (i
, module
) in modules
.iter().enumerate() {
371 info
!("local module: {} - {}", i
, module
.llmod_id
);
372 let llvm
= module
.llvm().expect("can't lto precodegened module");
373 let name
= CString
::new(module
.llmod_id
.clone()).unwrap();
374 let buffer
= ThinBuffer
::new(llvm
.llmod
);
375 thin_modules
.push(llvm
::ThinLTOModule
{
376 identifier
: name
.as_ptr(),
377 data
: buffer
.data().as_ptr(),
378 len
: buffer
.data().len(),
380 thin_buffers
.push(buffer
);
381 module_names
.push(name
);
382 timeline
.record(&module
.llmod_id
);
385 // FIXME: All upstream crates are deserialized internally in the
386 // function below to extract their summary and modules. Note that
387 // unlike the loop above we *must* decode and/or read something
388 // here as these are all just serialized files on disk. An
389 // improvement, however, to make here would be to store the
390 // module summary separately from the actual module itself. Right
391 // now this is store in one large bitcode file, and the entire
392 // file is deflate-compressed. We could try to bypass some of the
393 // decompression by storing the index uncompressed and only
394 // lazily decompressing the bytecode if necessary.
396 // Note that truly taking advantage of this optimization will
397 // likely be further down the road. We'd have to implement
398 // incremental ThinLTO first where we could actually avoid
399 // looking at upstream modules entirely sometimes (the contents,
400 // we must always unconditionally look at the index).
401 let mut serialized
= Vec
::new();
402 for (module
, name
) in serialized_modules
{
403 info
!("foreign module {:?}", name
);
404 thin_modules
.push(llvm
::ThinLTOModule
{
405 identifier
: name
.as_ptr(),
406 data
: module
.data().as_ptr(),
407 len
: module
.data().len(),
409 serialized
.push(module
);
410 module_names
.push(name
);
413 // Delegate to the C++ bindings to create some data here. Once this is a
414 // tried-and-true interface we may wish to try to upstream some of this
415 // to LLVM itself, right now we reimplement a lot of what they do
417 let data
= llvm
::LLVMRustCreateThinLTOData(
418 thin_modules
.as_ptr(),
419 thin_modules
.len() as u32,
420 symbol_white_list
.as_ptr(),
421 symbol_white_list
.len() as u32,
424 let msg
= format
!("failed to prepare thin LTO context");
425 return Err(write
::llvm_err(&diag_handler
, msg
))
427 let data
= ThinData(data
);
428 info
!("thin LTO data created");
429 timeline
.record("data");
431 // Throw our data in an `Arc` as we'll be sharing it across threads. We
432 // also put all memory referenced by the C++ data (buffers, ids, etc)
433 // into the arc as well. After this we'll create a thin module
434 // codegen per module in this data.
435 let shared
= Arc
::new(ThinShared
{
438 serialized_modules
: serialized
,
441 Ok((0..shared
.module_names
.len()).map(|i
| {
442 LtoModuleCodegen
::Thin(ThinModule
{
443 shared
: shared
.clone(),
450 fn run_pass_manager(cgcx
: &CodegenContext
,
451 tm
: TargetMachineRef
,
453 config
: &ModuleConfig
,
455 // Now we have one massive module inside of llmod. Time to run the
456 // LTO-specific optimization passes that LLVM provides.
458 // This code is based off the code found in llvm's LTO code generator:
459 // tools/lto/LTOCodeGenerator.cpp
460 debug
!("running the pass manager");
462 let pm
= llvm
::LLVMCreatePassManager();
463 llvm
::LLVMRustAddAnalysisPasses(tm
, pm
, llmod
);
464 let pass
= llvm
::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _
);
465 assert
!(!pass
.is_null());
466 llvm
::LLVMRustAddPass(pm
, pass
);
468 // When optimizing for LTO we don't actually pass in `-O0`, but we force
469 // it to always happen at least with `-O1`.
471 // With ThinLTO we mess around a lot with symbol visibility in a way
472 // that will actually cause linking failures if we optimize at O0 which
473 // notable is lacking in dead code elimination. To ensure we at least
474 // get some optimizations and correctly link we forcibly switch to `-O1`
475 // to get dead code elimination.
477 // Note that in general this shouldn't matter too much as you typically
478 // only turn on ThinLTO when you're compiling with optimizations
480 let opt_level
= config
.opt_level
.unwrap_or(llvm
::CodeGenOptLevel
::None
);
481 let opt_level
= match opt_level
{
482 llvm
::CodeGenOptLevel
::None
=> llvm
::CodeGenOptLevel
::Less
,
485 with_llvm_pmb(llmod
, config
, opt_level
, false, &mut |b
| {
487 if !llvm
::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b
, pm
) {
488 panic
!("this version of LLVM does not support ThinLTO");
491 llvm
::LLVMPassManagerBuilderPopulateLTOPassManager(b
, pm
,
492 /* Internalize = */ False
,
493 /* RunInliner = */ True
);
497 let pass
= llvm
::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _
);
498 assert
!(!pass
.is_null());
499 llvm
::LLVMRustAddPass(pm
, pass
);
501 time_ext(cgcx
.time_passes
, None
, "LTO passes", ||
502 llvm
::LLVMRunPassManager(pm
, llmod
));
504 llvm
::LLVMDisposePassManager(pm
);
509 pub enum SerializedModule
{
514 impl SerializedModule
{
515 fn data(&self) -> &[u8] {
517 SerializedModule
::Local(ref m
) => m
.data(),
518 SerializedModule
::FromRlib(ref m
) => m
,
523 pub struct ModuleBuffer(*mut llvm
::ModuleBuffer
);
525 unsafe impl Send
for ModuleBuffer {}
526 unsafe impl Sync
for ModuleBuffer {}
529 pub fn new(m
: ModuleRef
) -> ModuleBuffer
{
530 ModuleBuffer(unsafe {
531 llvm
::LLVMRustModuleBufferCreate(m
)
535 pub fn data(&self) -> &[u8] {
537 let ptr
= llvm
::LLVMRustModuleBufferPtr(self.0);
538 let len
= llvm
::LLVMRustModuleBufferLen(self.0);
539 slice
::from_raw_parts(ptr
, len
)
544 impl Drop
for ModuleBuffer
{
546 unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
550 pub struct ThinModule
{
551 shared
: Arc
<ThinShared
>,
557 thin_buffers
: Vec
<ThinBuffer
>,
558 serialized_modules
: Vec
<SerializedModule
>,
559 module_names
: Vec
<CString
>,
562 struct ThinData(*mut llvm
::ThinLTOData
);
564 unsafe impl Send
for ThinData {}
565 unsafe impl Sync
for ThinData {}
567 impl Drop
for ThinData
{
570 llvm
::LLVMRustFreeThinLTOData(self.0);
575 pub struct ThinBuffer(*mut llvm
::ThinLTOBuffer
);
577 unsafe impl Send
for ThinBuffer {}
578 unsafe impl Sync
for ThinBuffer {}
581 pub fn new(m
: ModuleRef
) -> ThinBuffer
{
583 let buffer
= llvm
::LLVMRustThinLTOBufferCreate(m
);
588 pub fn data(&self) -> &[u8] {
590 let ptr
= llvm
::LLVMRustThinLTOBufferPtr(self.0) as *const _
;
591 let len
= llvm
::LLVMRustThinLTOBufferLen(self.0);
592 slice
::from_raw_parts(ptr
, len
)
597 impl Drop
for ThinBuffer
{
600 llvm
::LLVMRustThinLTOBufferFree(self.0);
606 fn name(&self) -> &str {
607 self.shared
.module_names
[self.idx
].to_str().unwrap()
610 fn cost(&self) -> u64 {
611 // Yes, that's correct, we're using the size of the bytecode as an
612 // indicator for how costly this codegen unit is.
613 self.data().len() as u64
616 fn data(&self) -> &[u8] {
617 let a
= self.shared
.thin_buffers
.get(self.idx
).map(|b
| b
.data());
618 a
.unwrap_or_else(|| {
619 let len
= self.shared
.thin_buffers
.len();
620 self.shared
.serialized_modules
[self.idx
- len
].data()
624 unsafe fn optimize(&mut self, cgcx
: &CodegenContext
, timeline
: &mut Timeline
)
625 -> Result
<ModuleCodegen
, FatalError
>
627 let diag_handler
= cgcx
.create_diag_handler();
628 let tm
= (cgcx
.tm_factory
)().map_err(|e
| {
629 write
::llvm_err(&diag_handler
, e
)
632 // Right now the implementation we've got only works over serialized
633 // modules, so we create a fresh new LLVM context and parse the module
634 // into that context. One day, however, we may do this for upstream
635 // crates but for locally codegened modules we may be able to reuse
636 // that LLVM Context and Module.
637 let llcx
= llvm
::LLVMRustContextCreate(cgcx
.fewer_names
);
638 let llmod
= llvm
::LLVMRustParseBitcodeForThinLTO(
640 self.data().as_ptr(),
642 self.shared
.module_names
[self.idx
].as_ptr(),
645 let msg
= format
!("failed to parse bitcode for thin LTO module");
646 return Err(write
::llvm_err(&diag_handler
, msg
));
648 let module
= ModuleCodegen
{
649 source
: ModuleSource
::Codegened(ModuleLlvm
{
654 llmod_id
: self.name().to_string(),
655 name
: self.name().to_string(),
656 kind
: ModuleKind
::Regular
,
658 cgcx
.save_temp_bitcode(&module
, "thin-lto-input");
660 // Before we do much else find the "main" `DICompileUnit` that we'll be
661 // using below. If we find more than one though then rustc has changed
662 // in a way we're not ready for, so generate an ICE by returning
664 let mut cu1
= ptr
::null_mut();
665 let mut cu2
= ptr
::null_mut();
666 llvm
::LLVMRustThinLTOGetDICompileUnit(llmod
, &mut cu1
, &mut cu2
);
668 let msg
= format
!("multiple source DICompileUnits found");
669 return Err(write
::llvm_err(&diag_handler
, msg
))
672 // Like with "fat" LTO, get some better optimizations if landing pads
673 // are disabled by removing all landing pads.
674 if cgcx
.no_landing_pads
{
675 llvm
::LLVMRustMarkAllFunctionsNounwind(llmod
);
676 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-nounwind");
677 timeline
.record("nounwind");
680 // Up next comes the per-module local analyses that we do for Thin LTO.
681 // Each of these functions is basically copied from the LLVM
682 // implementation and then tailored to suit this implementation. Ideally
683 // each of these would be supported by upstream LLVM but that's perhaps
684 // a patch for another day!
686 // You can find some more comments about these functions in the LLVM
687 // bindings we've got (currently `PassWrapper.cpp`)
688 if !llvm
::LLVMRustPrepareThinLTORename(self.shared
.data
.0, llmod
) {
689 let msg
= format
!("failed to prepare thin LTO module");
690 return Err(write
::llvm_err(&diag_handler
, msg
))
692 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-rename");
693 timeline
.record("rename");
694 if !llvm
::LLVMRustPrepareThinLTOResolveWeak(self.shared
.data
.0, llmod
) {
695 let msg
= format
!("failed to prepare thin LTO module");
696 return Err(write
::llvm_err(&diag_handler
, msg
))
698 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-resolve");
699 timeline
.record("resolve");
700 if !llvm
::LLVMRustPrepareThinLTOInternalize(self.shared
.data
.0, llmod
) {
701 let msg
= format
!("failed to prepare thin LTO module");
702 return Err(write
::llvm_err(&diag_handler
, msg
))
704 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-internalize");
705 timeline
.record("internalize");
706 if !llvm
::LLVMRustPrepareThinLTOImport(self.shared
.data
.0, llmod
) {
707 let msg
= format
!("failed to prepare thin LTO module");
708 return Err(write
::llvm_err(&diag_handler
, msg
))
710 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-import");
711 timeline
.record("import");
713 // Ok now this is a bit unfortunate. This is also something you won't
714 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
715 // work around bugs in LLVM.
717 // First discovered in #45511 it was found that as part of ThinLTO
718 // importing passes LLVM will import `DICompileUnit` metadata
719 // information across modules. This means that we'll be working with one
720 // LLVM module that has multiple `DICompileUnit` instances in it (a
721 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
722 // bugs in LLVM's backend which generates invalid DWARF in a situation
725 // https://bugs.llvm.org/show_bug.cgi?id=35212
726 // https://bugs.llvm.org/show_bug.cgi?id=35562
728 // While the first bug there is fixed the second ended up causing #46346
729 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
732 // This function below is a huge hack around this problem. The function
733 // below is defined in `PassWrapper.cpp` and will basically "merge"
734 // all `DICompileUnit` instances in a module. Basically it'll take all
735 // the objects, rewrite all pointers of `DISubprogram` to point to the
736 // first `DICompileUnit`, and then delete all the other units.
738 // This is probably mangling to the debug info slightly (but hopefully
739 // not too much) but for now at least gets LLVM to emit valid DWARF (or
740 // so it appears). Hopefully we can remove this once upstream bugs are
742 llvm
::LLVMRustThinLTOPatchDICompileUnit(llmod
, cu1
);
743 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-patch");
744 timeline
.record("patch");
746 // Alright now that we've done everything related to the ThinLTO
747 // analysis it's time to run some optimizations! Here we use the same
748 // `run_pass_manager` as the "fat" LTO above except that we tell it to
749 // populate a thin-specific pass manager, which presumably LLVM treats a
750 // little differently.
751 info
!("running thin lto passes over {}", module
.name
);
752 let config
= cgcx
.config(module
.kind
);
753 run_pass_manager(cgcx
, tm
, llmod
, config
, true);
754 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-pm");
755 timeline
.record("thin-done");
757 // FIXME: this is a hack around a bug in LLVM right now. Discovered in
758 // #46910 it was found out that on 32-bit MSVC LLVM will hit a codegen
759 // error if there's an available_externally function in the LLVM module.
760 // Typically we don't actually use these functions but ThinLTO makes
761 // heavy use of them when inlining across modules.
763 // Tracked upstream at https://bugs.llvm.org/show_bug.cgi?id=35736 this
764 // function call (and its definition on the C++ side of things)
765 // shouldn't be necessary eventually and we can safetly delete these few
767 llvm
::LLVMRustThinLTORemoveAvailableExternally(llmod
);
768 cgcx
.save_temp_bitcode(&module
, "thin-lto-after-rm-ae");
769 timeline
.record("no-ae");