]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_codegen_llvm/src/back/lto.rs
New upstream version 1.61.0+dfsg1
[rustc.git] / compiler / rustc_codegen_llvm / src / back / lto.rs
CommitLineData
dfeec247
XL
1use crate::back::write::{
2 self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
3};
9fa01778 4use crate::llvm::archive_ro::ArchiveRO;
29967ef6 5use crate::llvm::{self, build_string, False, True};
a2a8927a 6use crate::{llvm_util, LlvmCodegenBackend, ModuleLlvm};
dfeec247 7use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
a1dfa0c6 8use rustc_codegen_ssa::back::symbol_export;
fc512014
XL
9use rustc_codegen_ssa::back::write::{
10 CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryConfig,
11};
a1dfa0c6 12use rustc_codegen_ssa::traits::*;
f9f354fc 13use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
29967ef6 14use rustc_data_structures::fx::FxHashMap;
60c5eb7d 15use rustc_errors::{FatalError, Handler};
dfeec247 16use rustc_hir::def_id::LOCAL_CRATE;
ba9703b0
XL
17use rustc_middle::bug;
18use rustc_middle::dep_graph::WorkProduct;
19use rustc_middle::middle::exported_symbols::SymbolExportLevel;
60c5eb7d 20use rustc_session::cgu_reuse_tracker::CguReuse;
f9f354fc 21use rustc_session::config::{self, CrateType, Lto};
3dfed10e 22use tracing::{debug, info};
1a4d82fc 23
b7449926 24use std::ffi::{CStr, CString};
dfeec247
XL
25use std::fs::File;
26use std::io;
cdc7bbd5 27use std::iter;
dfeec247 28use std::path::Path;
ff7c6d11 29use std::ptr;
ea8adc8c
XL
30use std::slice;
31use std::sync::Arc;
1a4d82fc 32
29967ef6
XL
33/// We keep track of the computed LTO cache keys from the previous
34/// session to determine which CGUs we can reuse.
35pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
dfeec247 36
f9f354fc 37pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
476ff2be 38 match crate_type {
f9f354fc
XL
39 CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => true,
40 CrateType::Dylib | CrateType::Rlib | CrateType::ProcMacro => false,
476ff2be
SL
41 }
42}
43
dfeec247
XL
44fn prepare_lto(
45 cgcx: &CodegenContext<LlvmCodegenBackend>,
46 diag_handler: &Handler,
47) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
2c00a5a8
XL
48 let export_threshold = match cgcx.lto {
49 // We're just doing LTO for our one crate
50 Lto::ThinLocal => SymbolExportLevel::Rust,
51
52 // We're doing LTO for the entire crate graph
dfeec247 53 Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
2c00a5a8
XL
54
55 Lto::No => panic!("didn't request LTO but we're doing LTO"),
ea8adc8c
XL
56 };
57
0531ce1d 58 let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
ea8adc8c 59 if level.is_below_threshold(export_threshold) {
e74abb32 60 Some(CString::new(name.as_str()).unwrap())
476ff2be
SL
61 } else {
62 None
63 }
64 };
dfeec247 65 let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
f035d41b
XL
66 let mut symbols_below_threshold = {
67 let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
dfeec247 68 exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
e74abb32 69 };
f035d41b 70 info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
ea8adc8c
XL
71
72 // If we're performing LTO for the entire crate graph, then for each of our
73 // upstream dependencies, find the corresponding rlib and load the bitcode
74 // from the archive.
75 //
76 // We save off all the bytecode and LLVM module ids for later processing
77 // with either fat or thin LTO
78 let mut upstream_modules = Vec::new();
2c00a5a8 79 if cgcx.lto != Lto::ThinLocal {
ea8adc8c 80 if cgcx.opts.cg.prefer_dynamic {
dfeec247
XL
81 diag_handler
82 .struct_err("cannot prefer dynamic linking when performing LTO")
83 .note(
84 "only 'staticlib', 'bin', and 'cdylib' outputs are \
85 supported with LTO",
86 )
87 .emit();
88 return Err(FatalError);
ea8adc8c
XL
89 }
90
91 // Make sure we actually can run LTO
92 for crate_type in cgcx.crate_types.iter() {
93 if !crate_type_allows_lto(*crate_type) {
dfeec247
XL
94 let e = diag_handler.fatal(
95 "lto can only be run for executables, cdylibs and \
96 static library outputs",
97 );
98 return Err(e);
ea8adc8c
XL
99 }
100 }
101
102 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
dfeec247
XL
103 let exported_symbols =
104 cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
e74abb32 105 {
f035d41b
XL
106 let _timer =
107 cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
108 symbols_below_threshold
109 .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
e74abb32 110 }
ea8adc8c 111
c295e0f8 112 let archive = ArchiveRO::open(path).expect("wanted an rlib");
f9f354fc 113 let obj_files = archive
dfeec247
XL
114 .iter()
115 .filter_map(|child| child.ok().and_then(|c| c.name().map(|name| (name, c))))
f9f354fc
XL
116 .filter(|&(name, _)| looks_like_rust_object_file(name));
117 for (name, child) in obj_files {
118 info!("adding bitcode from {}", name);
119 match get_bitcode_slice_from_object_data(child.data()) {
120 Ok(data) => {
121 let module = SerializedModule::FromRlib(data.to_vec());
122 upstream_modules.push((module, CString::new(name).unwrap()));
123 }
124 Err(msg) => return Err(diag_handler.fatal(&msg)),
125 }
ea8adc8c 126 }
ea8adc8c
XL
127 }
128 }
1a4d82fc 129
f035d41b 130 Ok((symbols_below_threshold, upstream_modules))
0731742a
XL
131}
132
f9f354fc
XL
133fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
134 let mut len = 0;
135 let data =
136 unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
137 if !data.is_null() {
138 assert!(len != 0);
139 let bc = unsafe { slice::from_raw_parts(data, len) };
140
141 // `bc` must be a sub-slice of `obj`.
142 assert!(obj.as_ptr() <= bc.as_ptr());
143 assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());
144
145 Ok(bc)
146 } else {
147 assert!(len == 0);
148 let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
149 Err(format!("failed to get bitcode from object file for LTO ({})", msg))
150 }
151}
152
0731742a
XL
153/// Performs fat LTO by merging all modules into a single one and returning it
154/// for further optimization.
dfeec247
XL
155pub(crate) fn run_fat(
156 cgcx: &CodegenContext<LlvmCodegenBackend>,
157 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
158 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
159) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
0731742a 160 let diag_handler = cgcx.create_diag_handler();
f035d41b
XL
161 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
162 let symbols_below_threshold =
163 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
164 fat_lto(
165 cgcx,
166 &diag_handler,
167 modules,
168 cached_modules,
169 upstream_modules,
170 &symbols_below_threshold,
171 )
0731742a
XL
172}
173
174/// Performs thin LTO by performing necessary global analysis and returning two
175/// lists, one of the modules that need optimization and another for modules that
176/// can simply be copied over from the incr. comp. cache.
dfeec247
XL
177pub(crate) fn run_thin(
178 cgcx: &CodegenContext<LlvmCodegenBackend>,
179 modules: Vec<(String, ThinBuffer)>,
180 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
181) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
0731742a 182 let diag_handler = cgcx.create_diag_handler();
f035d41b
XL
183 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
184 let symbols_below_threshold =
185 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
9fa01778 186 if cgcx.opts.cg.linker_plugin_lto.enabled() {
dfeec247
XL
187 unreachable!(
188 "We should never reach this case if the LTO step \
189 is deferred to the linker"
190 );
0731742a 191 }
f035d41b
XL
192 thin_lto(
193 cgcx,
194 &diag_handler,
195 modules,
196 upstream_modules,
197 cached_modules,
198 &symbols_below_threshold,
199 )
0731742a
XL
200}
201
dfeec247 202pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
0731742a
XL
203 let name = module.name.clone();
204 let buffer = ThinBuffer::new(module.module_llvm.llmod());
0731742a 205 (name, buffer)
ea8adc8c
XL
206}
207
dfeec247
XL
208fn fat_lto(
209 cgcx: &CodegenContext<LlvmCodegenBackend>,
210 diag_handler: &Handler,
211 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
212 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
213 mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
f035d41b 214 symbols_below_threshold: &[*const libc::c_char],
dfeec247 215) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
e74abb32 216 let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
ea8adc8c
XL
217 info!("going for a fat lto");
218
e1599b0c
XL
219 // Sort out all our lists of incoming modules into two lists.
220 //
221 // * `serialized_modules` (also and argument to this function) contains all
222 // modules that are serialized in-memory.
223 // * `in_memory` contains modules which are already parsed and in-memory,
224 // such as from multi-CGU builds.
225 //
226 // All of `cached_modules` (cached from previous incremental builds) can
227 // immediately go onto the `serialized_modules` modules list and then we can
228 // split the `modules` array into these two lists.
229 let mut in_memory = Vec::new();
230 serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
231 info!("pushing cached module {:?}", wp.cgu_name);
232 (buffer, CString::new(wp.cgu_name).unwrap())
233 }));
234 for module in modules {
235 match module {
236 FatLTOInput::InMemory(m) => in_memory.push(m),
237 FatLTOInput::Serialized { name, buffer } => {
238 info!("pushing serialized module {:?}", name);
239 let buffer = SerializedModule::Local(buffer);
240 serialized_modules.push((buffer, CString::new(name).unwrap()));
241 }
242 }
243 }
244
ea8adc8c
XL
245 // Find the "costliest" module and merge everything into that codegen unit.
246 // All the other modules will be serialized and reparsed into the new
247 // context, so this hopefully avoids serializing and parsing the largest
248 // codegen unit.
249 //
250 // Additionally use a regular module as the base here to ensure that various
251 // file copy operations in the backend work correctly. The only other kind
252 // of module here should be an allocator one, and if your crate is smaller
253 // than the allocator module then the size doesn't really matter anyway.
dfeec247
XL
254 let costliest_module = in_memory
255 .iter()
ea8adc8c
XL
256 .enumerate()
257 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
258 .map(|(i, module)| {
dfeec247 259 let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
ea8adc8c
XL
260 (cost, i)
261 })
9fa01778
XL
262 .max();
263
264 // If we found a costliest module, we're good to go. Otherwise all our
265 // inputs were serialized which could happen in the case, for example, that
266 // all our inputs were incrementally reread from the cache and we're just
267 // re-executing the LTO passes. If that's the case deserialize the first
268 // module and create a linker with it.
269 let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
e1599b0c 270 Some((_cost, i)) => in_memory.remove(i),
9fa01778 271 None => {
74b04a01 272 assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
e1599b0c
XL
273 let (buffer, name) = serialized_modules.remove(0);
274 info!("no in-memory regular modules to choose from, parsing {:?}", name);
9fa01778 275 ModuleCodegen {
e1599b0c
XL
276 module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
277 name: name.into_string().unwrap(),
9fa01778
XL
278 kind: ModuleKind::Regular,
279 }
280 }
281 };
ea8adc8c 282 let mut serialized_bitcode = Vec::new();
b7449926
XL
283 {
284 let (llcx, llmod) = {
285 let llvm = &module.module_llvm;
286 (&llvm.llcx, llvm.llmod())
287 };
288 info!("using {:?} as a base module", module.name);
289
290 // The linking steps below may produce errors and diagnostics within LLVM
291 // which we'd like to handle and print, so set up our diagnostic handlers
292 // (which get unregistered when they go out of scope below).
293 let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
294
295 // For all other modules we codegened we'll need to link them into our own
296 // bitcode. All modules were codegened in their own LLVM context, however,
297 // and we want to move everything to the same LLVM context. Currently the
298 // way we know of to do that is to serialize them to a string and them parse
299 // them later. Not great but hey, that's why it's "fat" LTO, right?
e1599b0c
XL
300 for module in in_memory {
301 let buffer = ModuleBuffer::new(module.module_llvm.llmod());
302 let llmod_id = CString::new(&module.name[..]).unwrap();
303 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
304 }
416331ca 305 // Sort the modules to ensure we produce deterministic results.
e1599b0c 306 serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
1a4d82fc 307
b7449926
XL
308 // For all serialized bitcode files we parse them and link them in as we did
309 // above, this is all mostly handled in C++. Like above, though, we don't
310 // know much about the memory management here so we err on the side of being
311 // save and persist everything with the original module.
312 let mut linker = Linker::new(llmod);
313 for (bc_decoded, name) in serialized_modules {
74b04a01
XL
314 let _timer = cgcx
315 .prof
316 .generic_activity_with_arg("LLVM_fat_lto_link_module", format!("{:?}", name));
b7449926 317 info!("linking {:?}", name);
74b04a01 318 let data = bc_decoded.data();
c295e0f8 319 linker.add(data).map_err(|()| {
ee023bcb 320 let msg = format!("failed to load bitcode of module {:?}", name);
c295e0f8 321 write::llvm_err(diag_handler, &msg)
b7449926 322 })?;
b7449926
XL
323 serialized_bitcode.push(bc_decoded);
324 }
325 drop(linker);
c295e0f8
XL
326 save_temp_bitcode(cgcx, &module, "lto.input");
327
328 // Fat LTO also suffers from the invalid DWARF issue similar to Thin LTO.
329 // Here we rewrite all `DICompileUnit` pointers if there is only one `DICompileUnit`.
330 // This only works around the problem when codegen-units = 1.
331 // Refer to the comments in the `optimize_thin_module` function for more details.
332 let mut cu1 = ptr::null_mut();
333 let mut cu2 = ptr::null_mut();
334 unsafe { llvm::LLVMRustLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2) };
335 if !cu2.is_null() {
336 let _timer =
337 cgcx.prof.generic_activity_with_arg("LLVM_fat_lto_patch_debuginfo", &*module.name);
338 unsafe { llvm::LLVMRustLTOPatchDICompileUnit(llmod, cu1) };
339 save_temp_bitcode(cgcx, &module, "fat-lto-after-patch");
340 }
1a4d82fc 341
f035d41b 342 // Internalize everything below threshold to help strip out more modules and such.
1a4d82fc 343 unsafe {
f035d41b 344 let ptr = symbols_below_threshold.as_ptr();
dfeec247
XL
345 llvm::LLVMRustRunRestrictionPass(
346 llmod,
347 ptr as *const *const libc::c_char,
f035d41b 348 symbols_below_threshold.len() as libc::size_t,
dfeec247 349 );
c295e0f8 350 save_temp_bitcode(cgcx, &module, "lto.after-restriction");
b7449926 351 }
1a4d82fc
JJ
352 }
353
dfeec247 354 Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode })
ea8adc8c
XL
355}
356
1b1a35ee 357crate struct Linker<'a>(&'a mut llvm::Linker<'a>);
0531ce1d 358
a2a8927a 359impl<'a> Linker<'a> {
1b1a35ee 360 crate fn new(llmod: &'a llvm::Module) -> Self {
0531ce1d
XL
361 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
362 }
363
1b1a35ee 364 crate fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
0531ce1d 365 unsafe {
dfeec247
XL
366 if llvm::LLVMRustLinkerAdd(
367 self.0,
368 bytecode.as_ptr() as *const libc::c_char,
369 bytecode.len(),
370 ) {
0531ce1d
XL
371 Ok(())
372 } else {
373 Err(())
374 }
375 }
376 }
377}
378
a2a8927a 379impl Drop for Linker<'_> {
0531ce1d 380 fn drop(&mut self) {
dfeec247
XL
381 unsafe {
382 llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
383 }
0531ce1d
XL
384 }
385}
386
ea8adc8c
XL
387/// Prepare "thin" LTO to get run on these modules.
388///
389/// The general structure of ThinLTO is quite different from the structure of
390/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
391/// one giant LLVM module, and then we run more optimization passes over this
392/// big module after internalizing most symbols. Thin LTO, on the other hand,
393/// avoid this large bottleneck through more targeted optimization.
394///
395/// At a high level Thin LTO looks like:
396///
397/// 1. Prepare a "summary" of each LLVM module in question which describes
398/// the values inside, cost of the values, etc.
399/// 2. Merge the summaries of all modules in question into one "index"
400/// 3. Perform some global analysis on this index
401/// 4. For each module, use the index and analysis calculated previously to
402/// perform local transformations on the module, for example inlining
403/// small functions from other modules.
404/// 5. Run thin-specific optimization passes over each module, and then code
405/// generate everything at the end.
406///
407/// The summary for each module is intended to be quite cheap, and the global
408/// index is relatively quite cheap to create as well. As a result, the goal of
409/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
410/// situations. For example one cheap optimization is that we can parallelize
411/// all codegen modules, easily making use of all the cores on a machine.
412///
413/// With all that in mind, the function here is designed at specifically just
414/// calculating the *index* for ThinLTO. This index will then be shared amongst
94b46f34 415/// all of the `LtoModuleCodegen` units returned below and destroyed once
ea8adc8c 416/// they all go out of scope.
dfeec247
XL
417fn thin_lto(
418 cgcx: &CodegenContext<LlvmCodegenBackend>,
419 diag_handler: &Handler,
420 modules: Vec<(String, ThinBuffer)>,
421 serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
422 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
f035d41b 423 symbols_below_threshold: &[*const libc::c_char],
dfeec247 424) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
e74abb32 425 let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
ea8adc8c
XL
426 unsafe {
427 info!("going for that thin, thin LTO");
428
dfeec247
XL
429 let green_modules: FxHashMap<_, _> =
430 cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();
b7449926 431
a1dfa0c6
XL
432 let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
433 let mut thin_buffers = Vec::with_capacity(modules.len());
434 let mut module_names = Vec::with_capacity(full_scope_len);
435 let mut thin_modules = Vec::with_capacity(full_scope_len);
ea8adc8c 436
0731742a
XL
437 for (i, (name, buffer)) in modules.into_iter().enumerate() {
438 info!("local module: {} - {}", i, name);
439 let cname = CString::new(name.clone()).unwrap();
ea8adc8c 440 thin_modules.push(llvm::ThinLTOModule {
0731742a 441 identifier: cname.as_ptr(),
ea8adc8c
XL
442 data: buffer.data().as_ptr(),
443 len: buffer.data().len(),
444 });
445 thin_buffers.push(buffer);
0731742a 446 module_names.push(cname);
b039eaaf 447 }
ea8adc8c
XL
448
449 // FIXME: All upstream crates are deserialized internally in the
450 // function below to extract their summary and modules. Note that
451 // unlike the loop above we *must* decode and/or read something
452 // here as these are all just serialized files on disk. An
453 // improvement, however, to make here would be to store the
454 // module summary separately from the actual module itself. Right
455 // now this is store in one large bitcode file, and the entire
456 // file is deflate-compressed. We could try to bypass some of the
457 // decompression by storing the index uncompressed and only
458 // lazily decompressing the bytecode if necessary.
459 //
460 // Note that truly taking advantage of this optimization will
461 // likely be further down the road. We'd have to implement
462 // incremental ThinLTO first where we could actually avoid
463 // looking at upstream modules entirely sometimes (the contents,
464 // we must always unconditionally look at the index).
a1dfa0c6 465 let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
b7449926 466
dfeec247
XL
467 let cached_modules =
468 cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
b7449926
XL
469
470 for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
471 info!("upstream or cached module {:?}", name);
ea8adc8c
XL
472 thin_modules.push(llvm::ThinLTOModule {
473 identifier: name.as_ptr(),
474 data: module.data().as_ptr(),
475 len: module.data().len(),
476 });
477 serialized.push(module);
478 module_names.push(name);
479 }
480
b7449926
XL
481 // Sanity check
482 assert_eq!(thin_modules.len(), module_names.len());
483
ea8adc8c
XL
484 // Delegate to the C++ bindings to create some data here. Once this is a
485 // tried-and-true interface we may wish to try to upstream some of this
486 // to LLVM itself, right now we reimplement a lot of what they do
487 // upstream...
488 let data = llvm::LLVMRustCreateThinLTOData(
489 thin_modules.as_ptr(),
490 thin_modules.len() as u32,
f035d41b
XL
491 symbols_below_threshold.as_ptr(),
492 symbols_below_threshold.len() as u32,
dfeec247 493 )
c295e0f8 494 .ok_or_else(|| write::llvm_err(diag_handler, "failed to prepare thin LTO context"))?;
b7449926 495
29967ef6 496 let data = ThinData(data);
ea8adc8c 497
29967ef6 498 info!("thin LTO data created");
b7449926 499
29967ef6
XL
500 let (key_map_path, prev_key_map, curr_key_map) = if let Some(ref incr_comp_session_dir) =
501 cgcx.incr_comp_session_dir
502 {
503 let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
504 // If the previous file was deleted, or we get an IO error
505 // reading the file, then we'll just use `None` as the
506 // prev_key_map, which will force the code to be recompiled.
507 let prev =
508 if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
509 let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
510 (Some(path), prev, curr)
511 } else {
512 // If we don't compile incrementally, we don't need to load the
513 // import data from LLVM.
514 assert!(green_modules.is_empty());
515 let curr = ThinLTOKeysMap::default();
516 (None, None, curr)
517 };
518 info!("thin LTO cache key map loaded");
519 info!("prev_key_map: {:#?}", prev_key_map);
520 info!("curr_key_map: {:#?}", curr_key_map);
b7449926 521
ea8adc8c
XL
522 // Throw our data in an `Arc` as we'll be sharing it across threads. We
523 // also put all memory referenced by the C++ data (buffers, ids, etc)
524 // into the arc as well. After this we'll create a thin module
94b46f34 525 // codegen per module in this data.
ea8adc8c
XL
526 let shared = Arc::new(ThinShared {
527 data,
528 thin_buffers,
529 serialized_modules: serialized,
530 module_names,
531 });
b7449926
XL
532
533 let mut copy_jobs = vec![];
534 let mut opt_jobs = vec![];
535
536 info!("checking which modules can be-reused and which have to be re-optimized.");
537 for (module_index, module_name) in shared.module_names.iter().enumerate() {
538 let module_name = module_name_to_str(module_name);
29967ef6
XL
539 if let (Some(prev_key_map), true) =
540 (prev_key_map.as_ref(), green_modules.contains_key(module_name))
dfeec247
XL
541 {
542 assert!(cgcx.incr_comp_session_dir.is_some());
543
29967ef6
XL
544 // If a module exists in both the current and the previous session,
545 // and has the same LTO cache key in both sessions, then we can re-use it
546 if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
b7449926
XL
547 let work_product = green_modules[module_name].clone();
548 copy_jobs.push(work_product);
549 info!(" - {}: re-used", module_name);
dfeec247
XL
550 assert!(cgcx.incr_comp_session_dir.is_some());
551 cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
552 continue;
b7449926
XL
553 }
554 }
555
556 info!(" - {}: re-compiled", module_name);
557 opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
ea8adc8c 558 shared: shared.clone(),
b7449926
XL
559 idx: module_index,
560 }));
561 }
562
74b04a01 563 // Save the current ThinLTO import information for the next compilation
29967ef6
XL
564 // session, overwriting the previous serialized data (if any).
565 if let Some(path) = key_map_path {
566 if let Err(err) = curr_key_map.save_to_file(&path) {
567 let msg = format!("Error while writing ThinLTO key data: {}", err);
c295e0f8 568 return Err(write::llvm_err(diag_handler, &msg));
dfeec247
XL
569 }
570 }
571
b7449926 572 Ok((opt_jobs, copy_jobs))
b039eaaf 573 }
ea8adc8c 574}
b039eaaf 575
dfeec247
XL
576pub(crate) fn run_pass_manager(
577 cgcx: &CodegenContext<LlvmCodegenBackend>,
17df50a5 578 diag_handler: &Handler,
dfeec247
XL
579 module: &ModuleCodegen<ModuleLlvm>,
580 config: &ModuleConfig,
581 thin: bool,
17df50a5 582) -> Result<(), FatalError> {
a2a8927a 583 let _timer = cgcx.prof.extra_verbose_generic_activity("LLVM_lto_optimize", &*module.name);
74b04a01 584
1a4d82fc
JJ
585 // Now we have one massive module inside of llmod. Time to run the
586 // LTO-specific optimization passes that LLVM provides.
587 //
588 // This code is based off the code found in llvm's LTO code generator:
589 // tools/lto/LTOCodeGenerator.cpp
590 debug!("running the pass manager");
591 unsafe {
a2a8927a
XL
592 if llvm_util::should_use_new_llvm_pass_manager(
593 &config.new_llvm_pass_manager,
594 &cgcx.target_arch,
595 ) {
74b04a01
XL
596 let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
597 let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
17df50a5
XL
598 write::optimize_with_new_llvm_pass_manager(
599 cgcx,
600 diag_handler,
601 module,
602 config,
603 opt_level,
604 opt_stage,
605 )?;
74b04a01 606 debug!("lto done");
17df50a5 607 return Ok(());
74b04a01
XL
608 }
609
1a4d82fc 610 let pm = llvm::LLVMCreatePassManager();
60c5eb7d 611 llvm::LLVMAddAnalysisPasses(module.module_llvm.tm, pm);
8faf50e0
XL
612
613 if config.verify_llvm_ir {
e74abb32 614 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
b7449926 615 llvm::LLVMRustAddPass(pm, pass.unwrap());
8faf50e0 616 }
1a4d82fc 617
dfeec247
XL
618 let opt_level = config
619 .opt_level
620 .map(|x| to_llvm_opt_settings(x).0)
a1dfa0c6 621 .unwrap_or(llvm::CodeGenOptLevel::None);
a1dfa0c6 622 with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
ea8adc8c 623 if thin {
a1dfa0c6 624 llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
ea8adc8c 625 } else {
dfeec247
XL
626 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(
627 b, pm, /* Internalize = */ False, /* RunInliner = */ True,
628 );
ea8adc8c 629 }
c1a9b12d 630 });
1a4d82fc 631
a1dfa0c6
XL
632 // We always generate bitcode through ThinLTOBuffers,
633 // which do not support anonymous globals
634 if config.bitcode_needed() {
e74abb32 635 let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
a1dfa0c6
XL
636 llvm::LLVMRustAddPass(pm, pass.unwrap());
637 }
638
8faf50e0 639 if config.verify_llvm_ir {
e74abb32 640 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
b7449926 641 llvm::LLVMRustAddPass(pm, pass.unwrap());
8faf50e0 642 }
1a4d82fc 643
74b04a01 644 llvm::LLVMRunPassManager(pm, module.module_llvm.llmod());
1a4d82fc
JJ
645
646 llvm::LLVMDisposePassManager(pm);
647 }
648 debug!("lto done");
17df50a5 649 Ok(())
1a4d82fc
JJ
650}
651
b7449926 652pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
ea8adc8c
XL
653
654unsafe impl Send for ModuleBuffer {}
655unsafe impl Sync for ModuleBuffer {}
656
657impl ModuleBuffer {
b7449926 658 pub fn new(m: &llvm::Module) -> ModuleBuffer {
dfeec247 659 ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
ea8adc8c 660 }
a1dfa0c6 661}
ea8adc8c 662
a1dfa0c6
XL
663impl ModuleBufferMethods for ModuleBuffer {
664 fn data(&self) -> &[u8] {
ea8adc8c
XL
665 unsafe {
666 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
667 let len = llvm::LLVMRustModuleBufferLen(self.0);
668 slice::from_raw_parts(ptr, len)
669 }
670 }
671}
672
673impl Drop for ModuleBuffer {
674 fn drop(&mut self) {
dfeec247
XL
675 unsafe {
676 llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
677 }
ea8adc8c
XL
678 }
679}
680
a1dfa0c6 681pub struct ThinData(&'static mut llvm::ThinLTOData);
ea8adc8c
XL
682
683unsafe impl Send for ThinData {}
684unsafe impl Sync for ThinData {}
685
686impl Drop for ThinData {
687 fn drop(&mut self) {
688 unsafe {
b7449926 689 llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
ea8adc8c
XL
690 }
691 }
692}
693
b7449926 694pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
ea8adc8c
XL
695
696unsafe impl Send for ThinBuffer {}
697unsafe impl Sync for ThinBuffer {}
698
699impl ThinBuffer {
b7449926 700 pub fn new(m: &llvm::Module) -> ThinBuffer {
abe05a73
XL
701 unsafe {
702 let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
703 ThinBuffer(buffer)
704 }
705 }
a1dfa0c6 706}
abe05a73 707
a1dfa0c6
XL
708impl ThinBufferMethods for ThinBuffer {
709 fn data(&self) -> &[u8] {
ea8adc8c
XL
710 unsafe {
711 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
712 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
713 slice::from_raw_parts(ptr, len)
714 }
715 }
1a4d82fc
JJ
716}
717
ea8adc8c
XL
718impl Drop for ThinBuffer {
719 fn drop(&mut self) {
720 unsafe {
b7449926 721 llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
ea8adc8c
XL
722 }
723 }
1a4d82fc
JJ
724}
725
a1dfa0c6
XL
726pub unsafe fn optimize_thin_module(
727 thin_module: &mut ThinModule<LlvmCodegenBackend>,
728 cgcx: &CodegenContext<LlvmCodegenBackend>,
a1dfa0c6
XL
729) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
730 let diag_handler = cgcx.create_diag_handler();
fc512014
XL
731
732 let module_name = &thin_module.shared.module_names[thin_module.idx];
5869c6ff 733 let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
fc512014
XL
734 let tm =
735 (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&diag_handler, &e))?;
a1dfa0c6
XL
736
737 // Right now the implementation we've got only works over serialized
738 // modules, so we create a fresh new LLVM context and parse the module
739 // into that context. One day, however, we may do this for upstream
740 // crates but for locally codegened modules we may be able to reuse
741 // that LLVM Context and Module.
742 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
c295e0f8 743 let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &diag_handler)? as *const _;
a1dfa0c6 744 let module = ModuleCodegen {
dfeec247 745 module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
a1dfa0c6
XL
746 name: thin_module.name().to_string(),
747 kind: ModuleKind::Regular,
748 };
749 {
f035d41b 750 let target = &*module.module_llvm.tm;
a1dfa0c6 751 let llmod = module.module_llvm.llmod();
c295e0f8 752 save_temp_bitcode(cgcx, &module, "thin-lto-input");
a1dfa0c6
XL
753
754 // Before we do much else find the "main" `DICompileUnit` that we'll be
755 // using below. If we find more than one though then rustc has changed
756 // in a way we're not ready for, so generate an ICE by returning
757 // an error.
758 let mut cu1 = ptr::null_mut();
759 let mut cu2 = ptr::null_mut();
c295e0f8 760 llvm::LLVMRustLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
a1dfa0c6
XL
761 if !cu2.is_null() {
762 let msg = "multiple source DICompileUnits found";
dfeec247 763 return Err(write::llvm_err(&diag_handler, msg));
a1dfa0c6 764 }
ff7c6d11 765
a1dfa0c6
XL
766 // Up next comes the per-module local analyses that we do for Thin LTO.
767 // Each of these functions is basically copied from the LLVM
768 // implementation and then tailored to suit this implementation. Ideally
769 // each of these would be supported by upstream LLVM but that's perhaps
770 // a patch for another day!
771 //
772 // You can find some more comments about these functions in the LLVM
773 // bindings we've got (currently `PassWrapper.cpp`)
e74abb32 774 {
74b04a01
XL
775 let _timer =
776 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
f035d41b 777 if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
e74abb32 778 let msg = "failed to prepare thin LTO module";
dfeec247 779 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
780 }
781 save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
a1dfa0c6 782 }
e74abb32
XL
783
784 {
74b04a01
XL
785 let _timer = cgcx
786 .prof
787 .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
e74abb32
XL
788 if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
789 let msg = "failed to prepare thin LTO module";
dfeec247 790 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
791 }
792 save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
ea8adc8c 793 }
e74abb32
XL
794
795 {
74b04a01
XL
796 let _timer = cgcx
797 .prof
798 .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
e74abb32
XL
799 if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
800 let msg = "failed to prepare thin LTO module";
dfeec247 801 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
802 }
803 save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
a1dfa0c6 804 }
e74abb32
XL
805
806 {
74b04a01
XL
807 let _timer =
808 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
f035d41b 809 if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
e74abb32 810 let msg = "failed to prepare thin LTO module";
dfeec247 811 return Err(write::llvm_err(&diag_handler, msg));
e74abb32
XL
812 }
813 save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
a1dfa0c6 814 }
b7449926 815
a1dfa0c6
XL
816 // Ok now this is a bit unfortunate. This is also something you won't
817 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
818 // work around bugs in LLVM.
819 //
820 // First discovered in #45511 it was found that as part of ThinLTO
821 // importing passes LLVM will import `DICompileUnit` metadata
822 // information across modules. This means that we'll be working with one
823 // LLVM module that has multiple `DICompileUnit` instances in it (a
824 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
825 // bugs in LLVM's backend which generates invalid DWARF in a situation
826 // like this:
827 //
828 // https://bugs.llvm.org/show_bug.cgi?id=35212
829 // https://bugs.llvm.org/show_bug.cgi?id=35562
830 //
831 // While the first bug there is fixed the second ended up causing #46346
832 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
833 // fixed.
834 //
835 // This function below is a huge hack around this problem. The function
836 // below is defined in `PassWrapper.cpp` and will basically "merge"
837 // all `DICompileUnit` instances in a module. Basically it'll take all
838 // the objects, rewrite all pointers of `DISubprogram` to point to the
839 // first `DICompileUnit`, and then delete all the other units.
840 //
841 // This is probably mangling to the debug info slightly (but hopefully
842 // not too much) but for now at least gets LLVM to emit valid DWARF (or
843 // so it appears). Hopefully we can remove this once upstream bugs are
844 // fixed in LLVM.
e74abb32 845 {
74b04a01
XL
846 let _timer = cgcx
847 .prof
848 .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
c295e0f8 849 llvm::LLVMRustLTOPatchDICompileUnit(llmod, cu1);
e74abb32
XL
850 save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
851 }
a1dfa0c6
XL
852
853 // Alright now that we've done everything related to the ThinLTO
854 // analysis it's time to run some optimizations! Here we use the same
855 // `run_pass_manager` as the "fat" LTO above except that we tell it to
856 // populate a thin-specific pass manager, which presumably LLVM treats a
857 // little differently.
e74abb32 858 {
e74abb32
XL
859 info!("running thin lto passes over {}", module.name);
860 let config = cgcx.config(module.kind);
17df50a5 861 run_pass_manager(cgcx, &diag_handler, &module, config, true)?;
e74abb32
XL
862 save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
863 }
b7449926 864 }
a1dfa0c6 865 Ok(module)
b7449926
XL
866}
867
29967ef6 868/// Maps LLVM module identifiers to their corresponding LLVM LTO cache keys
0bf4aa26 869#[derive(Debug, Default)]
29967ef6
XL
870pub struct ThinLTOKeysMap {
871 // key = llvm name of importing module, value = LLVM cache key
872 keys: FxHashMap<String, String>,
b7449926
XL
873}
874
29967ef6 875impl ThinLTOKeysMap {
dfeec247
XL
876 fn save_to_file(&self, path: &Path) -> io::Result<()> {
877 use std::io::Write;
878 let file = File::create(path)?;
879 let mut writer = io::BufWriter::new(file);
29967ef6
XL
880 for (module, key) in &self.keys {
881 writeln!(writer, "{} {}", module, key)?;
dfeec247
XL
882 }
883 Ok(())
884 }
885
29967ef6 886 fn load_from_file(path: &Path) -> io::Result<Self> {
dfeec247 887 use std::io::BufRead;
29967ef6 888 let mut keys = FxHashMap::default();
dfeec247
XL
889 let file = File::open(path)?;
890 for line in io::BufReader::new(file).lines() {
891 let line = line?;
29967ef6
XL
892 let mut split = line.split(' ');
893 let module = split.next().unwrap();
894 let key = split.next().unwrap();
895 assert_eq!(split.next(), None, "Expected two space-separated values, found {:?}", line);
896 keys.insert(module.to_string(), key.to_string());
dfeec247 897 }
29967ef6 898 Ok(Self { keys })
dfeec247
XL
899 }
900
29967ef6
XL
901 fn from_thin_lto_modules(
902 data: &ThinData,
903 modules: &[llvm::ThinLTOModule],
904 names: &[CString],
905 ) -> Self {
cdc7bbd5 906 let keys = iter::zip(modules, names)
29967ef6
XL
907 .map(|(module, name)| {
908 let key = build_string(|rust_str| unsafe {
909 llvm::LLVMRustComputeLTOCacheKey(rust_str, module.identifier, data.0);
910 })
911 .expect("Invalid ThinLTO module key");
912 (name.clone().into_string().unwrap(), key)
913 })
914 .collect();
915 Self { keys }
b7449926
XL
916 }
917}
918
919fn module_name_to_str(c_str: &CStr) -> &str {
dfeec247
XL
920 c_str.to_str().unwrap_or_else(|e| {
921 bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
922 })
1a4d82fc 923}
9fa01778 924
e1599b0c 925pub fn parse_module<'a>(
9fa01778
XL
926 cx: &'a llvm::Context,
927 name: &CStr,
928 data: &[u8],
929 diag_handler: &Handler,
930) -> Result<&'a llvm::Module, FatalError> {
931 unsafe {
dfeec247
XL
932 llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
933 || {
934 let msg = "failed to parse bitcode for LTO module";
c295e0f8 935 write::llvm_err(diag_handler, msg)
dfeec247
XL
936 },
937 )
9fa01778
XL
938 }
939}