]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_codegen_llvm/src/back/lto.rs
New upstream version 1.54.0+dfsg1
[rustc.git] / compiler / rustc_codegen_llvm / src / back / lto.rs
CommitLineData
dfeec247
XL
1use crate::back::write::{
2 self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
3};
9fa01778 4use crate::llvm::archive_ro::ArchiveRO;
29967ef6 5use crate::llvm::{self, build_string, False, True};
dfeec247 6use crate::{LlvmCodegenBackend, ModuleLlvm};
dfeec247 7use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
a1dfa0c6 8use rustc_codegen_ssa::back::symbol_export;
fc512014
XL
9use rustc_codegen_ssa::back::write::{
10 CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryConfig,
11};
a1dfa0c6 12use rustc_codegen_ssa::traits::*;
f9f354fc 13use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
29967ef6 14use rustc_data_structures::fx::FxHashMap;
60c5eb7d 15use rustc_errors::{FatalError, Handler};
dfeec247 16use rustc_hir::def_id::LOCAL_CRATE;
ba9703b0
XL
17use rustc_middle::bug;
18use rustc_middle::dep_graph::WorkProduct;
19use rustc_middle::middle::exported_symbols::SymbolExportLevel;
60c5eb7d 20use rustc_session::cgu_reuse_tracker::CguReuse;
f9f354fc 21use rustc_session::config::{self, CrateType, Lto};
3dfed10e 22use tracing::{debug, info};
1a4d82fc 23
b7449926 24use std::ffi::{CStr, CString};
dfeec247
XL
25use std::fs::File;
26use std::io;
cdc7bbd5 27use std::iter;
dfeec247 28use std::path::Path;
ff7c6d11 29use std::ptr;
ea8adc8c
XL
30use std::slice;
31use std::sync::Arc;
1a4d82fc 32
29967ef6
XL
33/// We keep track of the computed LTO cache keys from the previous
34/// session to determine which CGUs we can reuse.
35pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
dfeec247 36
f9f354fc 37pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
476ff2be 38 match crate_type {
f9f354fc
XL
39 CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => true,
40 CrateType::Dylib | CrateType::Rlib | CrateType::ProcMacro => false,
476ff2be
SL
41 }
42}
43
dfeec247
XL
44fn prepare_lto(
45 cgcx: &CodegenContext<LlvmCodegenBackend>,
46 diag_handler: &Handler,
47) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
2c00a5a8
XL
48 let export_threshold = match cgcx.lto {
49 // We're just doing LTO for our one crate
50 Lto::ThinLocal => SymbolExportLevel::Rust,
51
52 // We're doing LTO for the entire crate graph
dfeec247 53 Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
2c00a5a8
XL
54
55 Lto::No => panic!("didn't request LTO but we're doing LTO"),
ea8adc8c
XL
56 };
57
0531ce1d 58 let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
ea8adc8c 59 if level.is_below_threshold(export_threshold) {
e74abb32 60 Some(CString::new(name.as_str()).unwrap())
476ff2be
SL
61 } else {
62 None
63 }
64 };
dfeec247 65 let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
f035d41b
XL
66 let mut symbols_below_threshold = {
67 let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
dfeec247 68 exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
e74abb32 69 };
f035d41b 70 info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
ea8adc8c
XL
71
72 // If we're performing LTO for the entire crate graph, then for each of our
73 // upstream dependencies, find the corresponding rlib and load the bitcode
74 // from the archive.
75 //
76 // We save off all the bytecode and LLVM module ids for later processing
77 // with either fat or thin LTO
78 let mut upstream_modules = Vec::new();
2c00a5a8 79 if cgcx.lto != Lto::ThinLocal {
ea8adc8c 80 if cgcx.opts.cg.prefer_dynamic {
dfeec247
XL
81 diag_handler
82 .struct_err("cannot prefer dynamic linking when performing LTO")
83 .note(
84 "only 'staticlib', 'bin', and 'cdylib' outputs are \
85 supported with LTO",
86 )
87 .emit();
88 return Err(FatalError);
ea8adc8c
XL
89 }
90
91 // Make sure we actually can run LTO
92 for crate_type in cgcx.crate_types.iter() {
93 if !crate_type_allows_lto(*crate_type) {
dfeec247
XL
94 let e = diag_handler.fatal(
95 "lto can only be run for executables, cdylibs and \
96 static library outputs",
97 );
98 return Err(e);
ea8adc8c
XL
99 }
100 }
101
102 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
dfeec247
XL
103 let exported_symbols =
104 cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
e74abb32 105 {
f035d41b
XL
106 let _timer =
107 cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
108 symbols_below_threshold
109 .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
e74abb32 110 }
ea8adc8c
XL
111
112 let archive = ArchiveRO::open(&path).expect("wanted an rlib");
f9f354fc 113 let obj_files = archive
dfeec247
XL
114 .iter()
115 .filter_map(|child| child.ok().and_then(|c| c.name().map(|name| (name, c))))
f9f354fc
XL
116 .filter(|&(name, _)| looks_like_rust_object_file(name));
117 for (name, child) in obj_files {
118 info!("adding bitcode from {}", name);
119 match get_bitcode_slice_from_object_data(child.data()) {
120 Ok(data) => {
121 let module = SerializedModule::FromRlib(data.to_vec());
122 upstream_modules.push((module, CString::new(name).unwrap()));
123 }
124 Err(msg) => return Err(diag_handler.fatal(&msg)),
125 }
ea8adc8c 126 }
ea8adc8c
XL
127 }
128 }
1a4d82fc 129
f035d41b 130 Ok((symbols_below_threshold, upstream_modules))
0731742a
XL
131}
132
f9f354fc
XL
133fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
134 let mut len = 0;
135 let data =
136 unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
137 if !data.is_null() {
138 assert!(len != 0);
139 let bc = unsafe { slice::from_raw_parts(data, len) };
140
141 // `bc` must be a sub-slice of `obj`.
142 assert!(obj.as_ptr() <= bc.as_ptr());
143 assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());
144
145 Ok(bc)
146 } else {
147 assert!(len == 0);
148 let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
149 Err(format!("failed to get bitcode from object file for LTO ({})", msg))
150 }
151}
152
0731742a
XL
153/// Performs fat LTO by merging all modules into a single one and returning it
154/// for further optimization.
dfeec247
XL
155pub(crate) fn run_fat(
156 cgcx: &CodegenContext<LlvmCodegenBackend>,
157 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
158 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
159) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
0731742a 160 let diag_handler = cgcx.create_diag_handler();
f035d41b
XL
161 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
162 let symbols_below_threshold =
163 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
164 fat_lto(
165 cgcx,
166 &diag_handler,
167 modules,
168 cached_modules,
169 upstream_modules,
170 &symbols_below_threshold,
171 )
0731742a
XL
172}
173
174/// Performs thin LTO by performing necessary global analysis and returning two
175/// lists, one of the modules that need optimization and another for modules that
176/// can simply be copied over from the incr. comp. cache.
dfeec247
XL
177pub(crate) fn run_thin(
178 cgcx: &CodegenContext<LlvmCodegenBackend>,
179 modules: Vec<(String, ThinBuffer)>,
180 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
181) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
0731742a 182 let diag_handler = cgcx.create_diag_handler();
f035d41b
XL
183 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
184 let symbols_below_threshold =
185 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
9fa01778 186 if cgcx.opts.cg.linker_plugin_lto.enabled() {
dfeec247
XL
187 unreachable!(
188 "We should never reach this case if the LTO step \
189 is deferred to the linker"
190 );
0731742a 191 }
f035d41b
XL
192 thin_lto(
193 cgcx,
194 &diag_handler,
195 modules,
196 upstream_modules,
197 cached_modules,
198 &symbols_below_threshold,
199 )
0731742a
XL
200}
201
dfeec247 202pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
0731742a
XL
203 let name = module.name.clone();
204 let buffer = ThinBuffer::new(module.module_llvm.llmod());
0731742a 205 (name, buffer)
ea8adc8c
XL
206}
207
dfeec247
XL
208fn fat_lto(
209 cgcx: &CodegenContext<LlvmCodegenBackend>,
210 diag_handler: &Handler,
211 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
212 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
213 mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
f035d41b 214 symbols_below_threshold: &[*const libc::c_char],
dfeec247 215) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
e74abb32 216 let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
ea8adc8c
XL
217 info!("going for a fat lto");
218
e1599b0c
XL
219 // Sort out all our lists of incoming modules into two lists.
220 //
221 // * `serialized_modules` (also and argument to this function) contains all
222 // modules that are serialized in-memory.
223 // * `in_memory` contains modules which are already parsed and in-memory,
224 // such as from multi-CGU builds.
225 //
226 // All of `cached_modules` (cached from previous incremental builds) can
227 // immediately go onto the `serialized_modules` modules list and then we can
228 // split the `modules` array into these two lists.
229 let mut in_memory = Vec::new();
230 serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
231 info!("pushing cached module {:?}", wp.cgu_name);
232 (buffer, CString::new(wp.cgu_name).unwrap())
233 }));
234 for module in modules {
235 match module {
236 FatLTOInput::InMemory(m) => in_memory.push(m),
237 FatLTOInput::Serialized { name, buffer } => {
238 info!("pushing serialized module {:?}", name);
239 let buffer = SerializedModule::Local(buffer);
240 serialized_modules.push((buffer, CString::new(name).unwrap()));
241 }
242 }
243 }
244
ea8adc8c
XL
245 // Find the "costliest" module and merge everything into that codegen unit.
246 // All the other modules will be serialized and reparsed into the new
247 // context, so this hopefully avoids serializing and parsing the largest
248 // codegen unit.
249 //
250 // Additionally use a regular module as the base here to ensure that various
251 // file copy operations in the backend work correctly. The only other kind
252 // of module here should be an allocator one, and if your crate is smaller
253 // than the allocator module then the size doesn't really matter anyway.
dfeec247
XL
254 let costliest_module = in_memory
255 .iter()
ea8adc8c
XL
256 .enumerate()
257 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
258 .map(|(i, module)| {
dfeec247 259 let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
ea8adc8c
XL
260 (cost, i)
261 })
9fa01778
XL
262 .max();
263
264 // If we found a costliest module, we're good to go. Otherwise all our
265 // inputs were serialized which could happen in the case, for example, that
266 // all our inputs were incrementally reread from the cache and we're just
267 // re-executing the LTO passes. If that's the case deserialize the first
268 // module and create a linker with it.
269 let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
e1599b0c 270 Some((_cost, i)) => in_memory.remove(i),
9fa01778 271 None => {
74b04a01 272 assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
e1599b0c
XL
273 let (buffer, name) = serialized_modules.remove(0);
274 info!("no in-memory regular modules to choose from, parsing {:?}", name);
9fa01778 275 ModuleCodegen {
e1599b0c
XL
276 module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
277 name: name.into_string().unwrap(),
9fa01778
XL
278 kind: ModuleKind::Regular,
279 }
280 }
281 };
ea8adc8c 282 let mut serialized_bitcode = Vec::new();
b7449926
XL
283 {
284 let (llcx, llmod) = {
285 let llvm = &module.module_llvm;
286 (&llvm.llcx, llvm.llmod())
287 };
288 info!("using {:?} as a base module", module.name);
289
290 // The linking steps below may produce errors and diagnostics within LLVM
291 // which we'd like to handle and print, so set up our diagnostic handlers
292 // (which get unregistered when they go out of scope below).
293 let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
294
295 // For all other modules we codegened we'll need to link them into our own
296 // bitcode. All modules were codegened in their own LLVM context, however,
297 // and we want to move everything to the same LLVM context. Currently the
298 // way we know of to do that is to serialize them to a string and them parse
299 // them later. Not great but hey, that's why it's "fat" LTO, right?
e1599b0c
XL
300 for module in in_memory {
301 let buffer = ModuleBuffer::new(module.module_llvm.llmod());
302 let llmod_id = CString::new(&module.name[..]).unwrap();
303 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
304 }
416331ca 305 // Sort the modules to ensure we produce deterministic results.
e1599b0c 306 serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
1a4d82fc 307
b7449926
XL
308 // For all serialized bitcode files we parse them and link them in as we did
309 // above, this is all mostly handled in C++. Like above, though, we don't
310 // know much about the memory management here so we err on the side of being
311 // save and persist everything with the original module.
312 let mut linker = Linker::new(llmod);
313 for (bc_decoded, name) in serialized_modules {
74b04a01
XL
314 let _timer = cgcx
315 .prof
316 .generic_activity_with_arg("LLVM_fat_lto_link_module", format!("{:?}", name));
b7449926 317 info!("linking {:?}", name);
74b04a01
XL
318 let data = bc_decoded.data();
319 linker.add(&data).map_err(|()| {
320 let msg = format!("failed to load bc of {:?}", name);
321 write::llvm_err(&diag_handler, &msg)
b7449926 322 })?;
b7449926
XL
323 serialized_bitcode.push(bc_decoded);
324 }
325 drop(linker);
a1dfa0c6 326 save_temp_bitcode(&cgcx, &module, "lto.input");
1a4d82fc 327
f035d41b 328 // Internalize everything below threshold to help strip out more modules and such.
1a4d82fc 329 unsafe {
f035d41b 330 let ptr = symbols_below_threshold.as_ptr();
dfeec247
XL
331 llvm::LLVMRustRunRestrictionPass(
332 llmod,
333 ptr as *const *const libc::c_char,
f035d41b 334 symbols_below_threshold.len() as libc::size_t,
dfeec247 335 );
a1dfa0c6 336 save_temp_bitcode(&cgcx, &module, "lto.after-restriction");
b7449926
XL
337 }
338
339 if cgcx.no_landing_pads {
340 unsafe {
341 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
342 }
a1dfa0c6 343 save_temp_bitcode(&cgcx, &module, "lto.after-nounwind");
1a4d82fc
JJ
344 }
345 }
346
dfeec247 347 Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode })
ea8adc8c
XL
348}
349
1b1a35ee 350crate struct Linker<'a>(&'a mut llvm::Linker<'a>);
0531ce1d 351
b7449926 352impl Linker<'a> {
1b1a35ee 353 crate fn new(llmod: &'a llvm::Module) -> Self {
0531ce1d
XL
354 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
355 }
356
1b1a35ee 357 crate fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
0531ce1d 358 unsafe {
dfeec247
XL
359 if llvm::LLVMRustLinkerAdd(
360 self.0,
361 bytecode.as_ptr() as *const libc::c_char,
362 bytecode.len(),
363 ) {
0531ce1d
XL
364 Ok(())
365 } else {
366 Err(())
367 }
368 }
369 }
370}
371
b7449926 372impl Drop for Linker<'a> {
0531ce1d 373 fn drop(&mut self) {
dfeec247
XL
374 unsafe {
375 llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
376 }
0531ce1d
XL
377 }
378}
379
ea8adc8c
XL
380/// Prepare "thin" LTO to get run on these modules.
381///
382/// The general structure of ThinLTO is quite different from the structure of
383/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
384/// one giant LLVM module, and then we run more optimization passes over this
385/// big module after internalizing most symbols. Thin LTO, on the other hand,
386/// avoid this large bottleneck through more targeted optimization.
387///
388/// At a high level Thin LTO looks like:
389///
390/// 1. Prepare a "summary" of each LLVM module in question which describes
391/// the values inside, cost of the values, etc.
392/// 2. Merge the summaries of all modules in question into one "index"
393/// 3. Perform some global analysis on this index
394/// 4. For each module, use the index and analysis calculated previously to
395/// perform local transformations on the module, for example inlining
396/// small functions from other modules.
397/// 5. Run thin-specific optimization passes over each module, and then code
398/// generate everything at the end.
399///
400/// The summary for each module is intended to be quite cheap, and the global
401/// index is relatively quite cheap to create as well. As a result, the goal of
402/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
403/// situations. For example one cheap optimization is that we can parallelize
404/// all codegen modules, easily making use of all the cores on a machine.
405///
406/// With all that in mind, the function here is designed at specifically just
407/// calculating the *index* for ThinLTO. This index will then be shared amongst
94b46f34 408/// all of the `LtoModuleCodegen` units returned below and destroyed once
ea8adc8c 409/// they all go out of scope.
dfeec247
XL
410fn thin_lto(
411 cgcx: &CodegenContext<LlvmCodegenBackend>,
412 diag_handler: &Handler,
413 modules: Vec<(String, ThinBuffer)>,
414 serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
415 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
f035d41b 416 symbols_below_threshold: &[*const libc::c_char],
dfeec247 417) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
e74abb32 418 let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
ea8adc8c
XL
419 unsafe {
420 info!("going for that thin, thin LTO");
421
dfeec247
XL
422 let green_modules: FxHashMap<_, _> =
423 cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();
b7449926 424
a1dfa0c6
XL
425 let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
426 let mut thin_buffers = Vec::with_capacity(modules.len());
427 let mut module_names = Vec::with_capacity(full_scope_len);
428 let mut thin_modules = Vec::with_capacity(full_scope_len);
ea8adc8c 429
0731742a
XL
430 for (i, (name, buffer)) in modules.into_iter().enumerate() {
431 info!("local module: {} - {}", i, name);
432 let cname = CString::new(name.clone()).unwrap();
ea8adc8c 433 thin_modules.push(llvm::ThinLTOModule {
0731742a 434 identifier: cname.as_ptr(),
ea8adc8c
XL
435 data: buffer.data().as_ptr(),
436 len: buffer.data().len(),
437 });
438 thin_buffers.push(buffer);
0731742a 439 module_names.push(cname);
b039eaaf 440 }
ea8adc8c
XL
441
442 // FIXME: All upstream crates are deserialized internally in the
443 // function below to extract their summary and modules. Note that
444 // unlike the loop above we *must* decode and/or read something
445 // here as these are all just serialized files on disk. An
446 // improvement, however, to make here would be to store the
447 // module summary separately from the actual module itself. Right
448 // now this is store in one large bitcode file, and the entire
449 // file is deflate-compressed. We could try to bypass some of the
450 // decompression by storing the index uncompressed and only
451 // lazily decompressing the bytecode if necessary.
452 //
453 // Note that truly taking advantage of this optimization will
454 // likely be further down the road. We'd have to implement
455 // incremental ThinLTO first where we could actually avoid
456 // looking at upstream modules entirely sometimes (the contents,
457 // we must always unconditionally look at the index).
a1dfa0c6 458 let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
b7449926 459
dfeec247
XL
460 let cached_modules =
461 cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
b7449926
XL
462
463 for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
464 info!("upstream or cached module {:?}", name);
ea8adc8c
XL
465 thin_modules.push(llvm::ThinLTOModule {
466 identifier: name.as_ptr(),
467 data: module.data().as_ptr(),
468 len: module.data().len(),
469 });
470 serialized.push(module);
471 module_names.push(name);
472 }
473
b7449926
XL
474 // Sanity check
475 assert_eq!(thin_modules.len(), module_names.len());
476
ea8adc8c
XL
477 // Delegate to the C++ bindings to create some data here. Once this is a
478 // tried-and-true interface we may wish to try to upstream some of this
479 // to LLVM itself, right now we reimplement a lot of what they do
480 // upstream...
481 let data = llvm::LLVMRustCreateThinLTOData(
482 thin_modules.as_ptr(),
483 thin_modules.len() as u32,
f035d41b
XL
484 symbols_below_threshold.as_ptr(),
485 symbols_below_threshold.len() as u32,
dfeec247
XL
486 )
487 .ok_or_else(|| write::llvm_err(&diag_handler, "failed to prepare thin LTO context"))?;
b7449926 488
29967ef6 489 let data = ThinData(data);
ea8adc8c 490
29967ef6 491 info!("thin LTO data created");
b7449926 492
29967ef6
XL
493 let (key_map_path, prev_key_map, curr_key_map) = if let Some(ref incr_comp_session_dir) =
494 cgcx.incr_comp_session_dir
495 {
496 let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
497 // If the previous file was deleted, or we get an IO error
498 // reading the file, then we'll just use `None` as the
499 // prev_key_map, which will force the code to be recompiled.
500 let prev =
501 if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
502 let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
503 (Some(path), prev, curr)
504 } else {
505 // If we don't compile incrementally, we don't need to load the
506 // import data from LLVM.
507 assert!(green_modules.is_empty());
508 let curr = ThinLTOKeysMap::default();
509 (None, None, curr)
510 };
511 info!("thin LTO cache key map loaded");
512 info!("prev_key_map: {:#?}", prev_key_map);
513 info!("curr_key_map: {:#?}", curr_key_map);
b7449926 514
ea8adc8c
XL
515 // Throw our data in an `Arc` as we'll be sharing it across threads. We
516 // also put all memory referenced by the C++ data (buffers, ids, etc)
517 // into the arc as well. After this we'll create a thin module
94b46f34 518 // codegen per module in this data.
ea8adc8c
XL
519 let shared = Arc::new(ThinShared {
520 data,
521 thin_buffers,
522 serialized_modules: serialized,
523 module_names,
524 });
b7449926
XL
525
526 let mut copy_jobs = vec![];
527 let mut opt_jobs = vec![];
528
529 info!("checking which modules can be-reused and which have to be re-optimized.");
530 for (module_index, module_name) in shared.module_names.iter().enumerate() {
531 let module_name = module_name_to_str(module_name);
29967ef6
XL
532 if let (Some(prev_key_map), true) =
533 (prev_key_map.as_ref(), green_modules.contains_key(module_name))
dfeec247
XL
534 {
535 assert!(cgcx.incr_comp_session_dir.is_some());
536
29967ef6
XL
537 // If a module exists in both the current and the previous session,
538 // and has the same LTO cache key in both sessions, then we can re-use it
539 if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
b7449926
XL
540 let work_product = green_modules[module_name].clone();
541 copy_jobs.push(work_product);
542 info!(" - {}: re-used", module_name);
dfeec247
XL
543 assert!(cgcx.incr_comp_session_dir.is_some());
544 cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
545 continue;
b7449926
XL
546 }
547 }
548
549 info!(" - {}: re-compiled", module_name);
550 opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
ea8adc8c 551 shared: shared.clone(),
b7449926
XL
552 idx: module_index,
553 }));
554 }
555
74b04a01 556 // Save the current ThinLTO import information for the next compilation
29967ef6
XL
557 // session, overwriting the previous serialized data (if any).
558 if let Some(path) = key_map_path {
559 if let Err(err) = curr_key_map.save_to_file(&path) {
560 let msg = format!("Error while writing ThinLTO key data: {}", err);
dfeec247
XL
561 return Err(write::llvm_err(&diag_handler, &msg));
562 }
563 }
564
b7449926 565 Ok((opt_jobs, copy_jobs))
b039eaaf 566 }
ea8adc8c 567}
b039eaaf 568
dfeec247
XL
569pub(crate) fn run_pass_manager(
570 cgcx: &CodegenContext<LlvmCodegenBackend>,
17df50a5 571 diag_handler: &Handler,
dfeec247
XL
572 module: &ModuleCodegen<ModuleLlvm>,
573 config: &ModuleConfig,
574 thin: bool,
17df50a5 575) -> Result<(), FatalError> {
74b04a01
XL
576 let _timer = cgcx.prof.extra_verbose_generic_activity("LLVM_lto_optimize", &module.name[..]);
577
1a4d82fc
JJ
578 // Now we have one massive module inside of llmod. Time to run the
579 // LTO-specific optimization passes that LLVM provides.
580 //
581 // This code is based off the code found in llvm's LTO code generator:
582 // tools/lto/LTOCodeGenerator.cpp
583 debug!("running the pass manager");
584 unsafe {
74b04a01
XL
585 if write::should_use_new_llvm_pass_manager(config) {
586 let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
587 let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
17df50a5
XL
588 write::optimize_with_new_llvm_pass_manager(
589 cgcx,
590 diag_handler,
591 module,
592 config,
593 opt_level,
594 opt_stage,
595 )?;
74b04a01 596 debug!("lto done");
17df50a5 597 return Ok(());
74b04a01
XL
598 }
599
1a4d82fc 600 let pm = llvm::LLVMCreatePassManager();
60c5eb7d 601 llvm::LLVMAddAnalysisPasses(module.module_llvm.tm, pm);
8faf50e0
XL
602
603 if config.verify_llvm_ir {
e74abb32 604 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
b7449926 605 llvm::LLVMRustAddPass(pm, pass.unwrap());
8faf50e0 606 }
1a4d82fc 607
dfeec247
XL
608 let opt_level = config
609 .opt_level
610 .map(|x| to_llvm_opt_settings(x).0)
a1dfa0c6 611 .unwrap_or(llvm::CodeGenOptLevel::None);
a1dfa0c6 612 with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
ea8adc8c 613 if thin {
a1dfa0c6 614 llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
ea8adc8c 615 } else {
dfeec247
XL
616 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(
617 b, pm, /* Internalize = */ False, /* RunInliner = */ True,
618 );
ea8adc8c 619 }
c1a9b12d 620 });
1a4d82fc 621
a1dfa0c6
XL
622 // We always generate bitcode through ThinLTOBuffers,
623 // which do not support anonymous globals
624 if config.bitcode_needed() {
e74abb32 625 let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
a1dfa0c6
XL
626 llvm::LLVMRustAddPass(pm, pass.unwrap());
627 }
628
8faf50e0 629 if config.verify_llvm_ir {
e74abb32 630 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
b7449926 631 llvm::LLVMRustAddPass(pm, pass.unwrap());
8faf50e0 632 }
1a4d82fc 633
74b04a01 634 llvm::LLVMRunPassManager(pm, module.module_llvm.llmod());
1a4d82fc
JJ
635
636 llvm::LLVMDisposePassManager(pm);
637 }
638 debug!("lto done");
17df50a5 639 Ok(())
1a4d82fc
JJ
640}
641
b7449926 642pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
ea8adc8c
XL
643
644unsafe impl Send for ModuleBuffer {}
645unsafe impl Sync for ModuleBuffer {}
646
647impl ModuleBuffer {
b7449926 648 pub fn new(m: &llvm::Module) -> ModuleBuffer {
dfeec247 649 ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
ea8adc8c 650 }
a1dfa0c6 651}
ea8adc8c 652
a1dfa0c6
XL
653impl ModuleBufferMethods for ModuleBuffer {
654 fn data(&self) -> &[u8] {
ea8adc8c
XL
655 unsafe {
656 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
657 let len = llvm::LLVMRustModuleBufferLen(self.0);
658 slice::from_raw_parts(ptr, len)
659 }
660 }
661}
662
663impl Drop for ModuleBuffer {
664 fn drop(&mut self) {
dfeec247
XL
665 unsafe {
666 llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
667 }
ea8adc8c
XL
668 }
669}
670
a1dfa0c6 671pub struct ThinData(&'static mut llvm::ThinLTOData);
ea8adc8c
XL
672
673unsafe impl Send for ThinData {}
674unsafe impl Sync for ThinData {}
675
676impl Drop for ThinData {
677 fn drop(&mut self) {
678 unsafe {
b7449926 679 llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
ea8adc8c
XL
680 }
681 }
682}
683
b7449926 684pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
ea8adc8c
XL
685
686unsafe impl Send for ThinBuffer {}
687unsafe impl Sync for ThinBuffer {}
688
689impl ThinBuffer {
b7449926 690 pub fn new(m: &llvm::Module) -> ThinBuffer {
abe05a73
XL
691 unsafe {
692 let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
693 ThinBuffer(buffer)
694 }
695 }
a1dfa0c6 696}
abe05a73 697
a1dfa0c6
XL
698impl ThinBufferMethods for ThinBuffer {
699 fn data(&self) -> &[u8] {
ea8adc8c
XL
700 unsafe {
701 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
702 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
703 slice::from_raw_parts(ptr, len)
704 }
705 }
1a4d82fc
JJ
706}
707
ea8adc8c
XL
708impl Drop for ThinBuffer {
709 fn drop(&mut self) {
710 unsafe {
b7449926 711 llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
ea8adc8c
XL
712 }
713 }
1a4d82fc
JJ
714}
715
a1dfa0c6
XL
716pub unsafe fn optimize_thin_module(
717 thin_module: &mut ThinModule<LlvmCodegenBackend>,
718 cgcx: &CodegenContext<LlvmCodegenBackend>,
a1dfa0c6
XL
719) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
720 let diag_handler = cgcx.create_diag_handler();
fc512014
XL
721
722 let module_name = &thin_module.shared.module_names[thin_module.idx];
5869c6ff 723 let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
fc512014
XL
724 let tm =
725 (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&diag_handler, &e))?;
a1dfa0c6
XL
726
727 // Right now the implementation we've got only works over serialized
728 // modules, so we create a fresh new LLVM context and parse the module
729 // into that context. One day, however, we may do this for upstream
730 // crates but for locally codegened modules we may be able to reuse
731 // that LLVM Context and Module.
732 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
fc512014
XL
733 let llmod_raw =
734 parse_module(llcx, &module_name, thin_module.data(), &diag_handler)? as *const _;
a1dfa0c6 735 let module = ModuleCodegen {
dfeec247 736 module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
a1dfa0c6
XL
737 name: thin_module.name().to_string(),
738 kind: ModuleKind::Regular,
739 };
740 {
f035d41b 741 let target = &*module.module_llvm.tm;
a1dfa0c6
XL
742 let llmod = module.module_llvm.llmod();
743 save_temp_bitcode(&cgcx, &module, "thin-lto-input");
744
745 // Before we do much else find the "main" `DICompileUnit` that we'll be
746 // using below. If we find more than one though then rustc has changed
747 // in a way we're not ready for, so generate an ICE by returning
748 // an error.
749 let mut cu1 = ptr::null_mut();
750 let mut cu2 = ptr::null_mut();
751 llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
752 if !cu2.is_null() {
753 let msg = "multiple source DICompileUnits found";
dfeec247 754 return Err(write::llvm_err(&diag_handler, msg));
a1dfa0c6 755 }
ff7c6d11 756
a1dfa0c6
XL
757 // Like with "fat" LTO, get some better optimizations if landing pads
758 // are disabled by removing all landing pads.
759 if cgcx.no_landing_pads {
74b04a01
XL
760 let _timer = cgcx
761 .prof
762 .generic_activity_with_arg("LLVM_thin_lto_remove_landing_pads", thin_module.name());
a1dfa0c6
XL
763 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
764 save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
a1dfa0c6 765 }
ea8adc8c 766
a1dfa0c6
XL
767 // Up next comes the per-module local analyses that we do for Thin LTO.
768 // Each of these functions is basically copied from the LLVM
769 // implementation and then tailored to suit this implementation. Ideally
770 // each of these would be supported by upstream LLVM but that's perhaps
771 // a patch for another day!
772 //
773 // You can find some more comments about these functions in the LLVM
774 // bindings we've got (currently `PassWrapper.cpp`)
e74abb32 775 {
74b04a01
XL
776 let _timer =
777 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
f035d41b 778 if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
e74abb32 779 let msg = "failed to prepare thin LTO module";
dfeec247 780 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
781 }
782 save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
a1dfa0c6 783 }
e74abb32
XL
784
785 {
74b04a01
XL
786 let _timer = cgcx
787 .prof
788 .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
e74abb32
XL
789 if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
790 let msg = "failed to prepare thin LTO module";
dfeec247 791 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
792 }
793 save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
ea8adc8c 794 }
e74abb32
XL
795
796 {
74b04a01
XL
797 let _timer = cgcx
798 .prof
799 .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
e74abb32
XL
800 if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
801 let msg = "failed to prepare thin LTO module";
dfeec247 802 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
803 }
804 save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
a1dfa0c6 805 }
e74abb32
XL
806
807 {
74b04a01
XL
808 let _timer =
809 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
f035d41b 810 if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
e74abb32 811 let msg = "failed to prepare thin LTO module";
dfeec247 812 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
813 }
814 save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
a1dfa0c6 815 }
b7449926 816
a1dfa0c6
XL
817 // Ok now this is a bit unfortunate. This is also something you won't
818 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
819 // work around bugs in LLVM.
820 //
821 // First discovered in #45511 it was found that as part of ThinLTO
822 // importing passes LLVM will import `DICompileUnit` metadata
823 // information across modules. This means that we'll be working with one
824 // LLVM module that has multiple `DICompileUnit` instances in it (a
825 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
826 // bugs in LLVM's backend which generates invalid DWARF in a situation
827 // like this:
828 //
829 // https://bugs.llvm.org/show_bug.cgi?id=35212
830 // https://bugs.llvm.org/show_bug.cgi?id=35562
831 //
832 // While the first bug there is fixed the second ended up causing #46346
833 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
834 // fixed.
835 //
836 // This function below is a huge hack around this problem. The function
837 // below is defined in `PassWrapper.cpp` and will basically "merge"
838 // all `DICompileUnit` instances in a module. Basically it'll take all
839 // the objects, rewrite all pointers of `DISubprogram` to point to the
840 // first `DICompileUnit`, and then delete all the other units.
841 //
842 // This is probably mangling to the debug info slightly (but hopefully
843 // not too much) but for now at least gets LLVM to emit valid DWARF (or
844 // so it appears). Hopefully we can remove this once upstream bugs are
845 // fixed in LLVM.
e74abb32 846 {
74b04a01
XL
847 let _timer = cgcx
848 .prof
849 .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
e74abb32
XL
850 llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
851 save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
852 }
a1dfa0c6
XL
853
854 // Alright now that we've done everything related to the ThinLTO
855 // analysis it's time to run some optimizations! Here we use the same
856 // `run_pass_manager` as the "fat" LTO above except that we tell it to
857 // populate a thin-specific pass manager, which presumably LLVM treats a
858 // little differently.
e74abb32 859 {
e74abb32
XL
860 info!("running thin lto passes over {}", module.name);
861 let config = cgcx.config(module.kind);
17df50a5 862 run_pass_manager(cgcx, &diag_handler, &module, config, true)?;
e74abb32
XL
863 save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
864 }
b7449926 865 }
a1dfa0c6 866 Ok(module)
b7449926
XL
867}
868
29967ef6 869/// Maps LLVM module identifiers to their corresponding LLVM LTO cache keys
0bf4aa26 870#[derive(Debug, Default)]
29967ef6
XL
871pub struct ThinLTOKeysMap {
872 // key = llvm name of importing module, value = LLVM cache key
873 keys: FxHashMap<String, String>,
b7449926
XL
874}
875
29967ef6 876impl ThinLTOKeysMap {
dfeec247
XL
877 fn save_to_file(&self, path: &Path) -> io::Result<()> {
878 use std::io::Write;
879 let file = File::create(path)?;
880 let mut writer = io::BufWriter::new(file);
29967ef6
XL
881 for (module, key) in &self.keys {
882 writeln!(writer, "{} {}", module, key)?;
dfeec247
XL
883 }
884 Ok(())
885 }
886
29967ef6 887 fn load_from_file(path: &Path) -> io::Result<Self> {
dfeec247 888 use std::io::BufRead;
29967ef6 889 let mut keys = FxHashMap::default();
dfeec247
XL
890 let file = File::open(path)?;
891 for line in io::BufReader::new(file).lines() {
892 let line = line?;
29967ef6
XL
893 let mut split = line.split(' ');
894 let module = split.next().unwrap();
895 let key = split.next().unwrap();
896 assert_eq!(split.next(), None, "Expected two space-separated values, found {:?}", line);
897 keys.insert(module.to_string(), key.to_string());
dfeec247 898 }
29967ef6 899 Ok(Self { keys })
dfeec247
XL
900 }
901
29967ef6
XL
902 fn from_thin_lto_modules(
903 data: &ThinData,
904 modules: &[llvm::ThinLTOModule],
905 names: &[CString],
906 ) -> Self {
cdc7bbd5 907 let keys = iter::zip(modules, names)
29967ef6
XL
908 .map(|(module, name)| {
909 let key = build_string(|rust_str| unsafe {
910 llvm::LLVMRustComputeLTOCacheKey(rust_str, module.identifier, data.0);
911 })
912 .expect("Invalid ThinLTO module key");
913 (name.clone().into_string().unwrap(), key)
914 })
915 .collect();
916 Self { keys }
b7449926
XL
917 }
918}
919
920fn module_name_to_str(c_str: &CStr) -> &str {
dfeec247
XL
921 c_str.to_str().unwrap_or_else(|e| {
922 bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
923 })
1a4d82fc 924}
9fa01778 925
e1599b0c 926pub fn parse_module<'a>(
9fa01778
XL
927 cx: &'a llvm::Context,
928 name: &CStr,
929 data: &[u8],
930 diag_handler: &Handler,
931) -> Result<&'a llvm::Module, FatalError> {
932 unsafe {
dfeec247
XL
933 llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
934 || {
935 let msg = "failed to parse bitcode for LTO module";
936 write::llvm_err(&diag_handler, msg)
937 },
938 )
9fa01778
XL
939 }
940}