]> git.proxmox.com Git - rustc.git/blame - src/librustc_codegen_llvm/back/lto.rs
New upstream version 1.29.0+dfsg1
[rustc.git] / src / librustc_codegen_llvm / back / lto.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
ea8adc8c 11use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
041b39d2 12use back::symbol_export;
ea8adc8c
XL
13use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
14use back::write;
3b2f2976 15use errors::{FatalError, Handler};
1a4d82fc
JJ
16use llvm::archive_ro::ArchiveRO;
17use llvm::{ModuleRef, TargetMachineRef, True, False};
ea8adc8c 18use llvm;
476ff2be 19use rustc::hir::def_id::LOCAL_CRATE;
ea8adc8c 20use rustc::middle::exported_symbols::SymbolExportLevel;
2c00a5a8 21use rustc::session::config::{self, Lto};
0531ce1d 22use rustc::util::common::time_ext;
ea8adc8c 23use time_graph::Timeline;
94b46f34 24use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
1a4d82fc
JJ
25
26use libc;
1a4d82fc
JJ
27
28use std::ffi::CString;
ff7c6d11 29use std::ptr;
ea8adc8c
XL
30use std::slice;
31use std::sync::Arc;
1a4d82fc 32
476ff2be
SL
33pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
34 match crate_type {
35 config::CrateTypeExecutable |
36 config::CrateTypeStaticlib |
37 config::CrateTypeCdylib => true,
38
39 config::CrateTypeDylib |
40 config::CrateTypeRlib |
476ff2be
SL
41 config::CrateTypeProcMacro => false,
42 }
43}
44
94b46f34 45pub(crate) enum LtoModuleCodegen {
ea8adc8c 46 Fat {
94b46f34 47 module: Option<ModuleCodegen>,
ea8adc8c
XL
48 _serialized_bitcode: Vec<SerializedModule>,
49 },
50
51 Thin(ThinModule),
52}
53
94b46f34 54impl LtoModuleCodegen {
ea8adc8c
XL
55 pub fn name(&self) -> &str {
56 match *self {
94b46f34
XL
57 LtoModuleCodegen::Fat { .. } => "everything",
58 LtoModuleCodegen::Thin(ref m) => m.name(),
ea8adc8c 59 }
1a4d82fc
JJ
60 }
61
ea8adc8c
XL
62 /// Optimize this module within the given codegen context.
63 ///
94b46f34
XL
64 /// This function is unsafe as it'll return a `ModuleCodegen` still
65 /// points to LLVM data structures owned by this `LtoModuleCodegen`.
ea8adc8c
XL
66 /// It's intended that the module returned is immediately code generated and
67 /// dropped, and then this LTO module is dropped.
2c00a5a8
XL
68 pub(crate) unsafe fn optimize(&mut self,
69 cgcx: &CodegenContext,
70 timeline: &mut Timeline)
94b46f34 71 -> Result<ModuleCodegen, FatalError>
ea8adc8c
XL
72 {
73 match *self {
94b46f34
XL
74 LtoModuleCodegen::Fat { ref mut module, .. } => {
75 let module = module.take().unwrap();
76 let config = cgcx.config(module.kind);
77 let llmod = module.llvm().unwrap().llmod;
78 let tm = module.llvm().unwrap().tm;
ea8adc8c
XL
79 run_pass_manager(cgcx, tm, llmod, config, false);
80 timeline.record("fat-done");
94b46f34 81 Ok(module)
ea8adc8c 82 }
94b46f34 83 LtoModuleCodegen::Thin(ref mut thin) => thin.optimize(cgcx, timeline),
1a4d82fc
JJ
84 }
85 }
86
0531ce1d 87 /// A "gauge" of how costly it is to optimize this module, used to sort
ea8adc8c
XL
88 /// biggest modules first.
89 pub fn cost(&self) -> u64 {
90 match *self {
91 // Only one module with fat LTO, so the cost doesn't matter.
94b46f34
XL
92 LtoModuleCodegen::Fat { .. } => 0,
93 LtoModuleCodegen::Thin(ref m) => m.cost(),
ea8adc8c
XL
94 }
95 }
96}
97
2c00a5a8 98pub(crate) fn run(cgcx: &CodegenContext,
94b46f34 99 modules: Vec<ModuleCodegen>,
2c00a5a8 100 timeline: &mut Timeline)
94b46f34 101 -> Result<Vec<LtoModuleCodegen>, FatalError>
ea8adc8c
XL
102{
103 let diag_handler = cgcx.create_diag_handler();
2c00a5a8
XL
104 let export_threshold = match cgcx.lto {
105 // We're just doing LTO for our one crate
106 Lto::ThinLocal => SymbolExportLevel::Rust,
107
108 // We're doing LTO for the entire crate graph
109 Lto::Yes | Lto::Fat | Lto::Thin => {
ea8adc8c
XL
110 symbol_export::crates_export_threshold(&cgcx.crate_types)
111 }
2c00a5a8
XL
112
113 Lto::No => panic!("didn't request LTO but we're doing LTO"),
ea8adc8c
XL
114 };
115
0531ce1d 116 let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
ea8adc8c 117 if level.is_below_threshold(export_threshold) {
476ff2be
SL
118 let mut bytes = Vec::with_capacity(name.len() + 1);
119 bytes.extend(name.bytes());
120 Some(CString::new(bytes).unwrap())
121 } else {
122 None
123 }
124 };
0531ce1d
XL
125 let exported_symbols = cgcx.exported_symbols
126 .as_ref().expect("needs exported symbols for LTO");
127 let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
476ff2be
SL
128 .iter()
129 .filter_map(symbol_filter)
ea8adc8c
XL
130 .collect::<Vec<CString>>();
131 timeline.record("whitelist");
abe05a73 132 info!("{} symbols to preserve in this crate", symbol_white_list.len());
ea8adc8c
XL
133
134 // If we're performing LTO for the entire crate graph, then for each of our
135 // upstream dependencies, find the corresponding rlib and load the bitcode
136 // from the archive.
137 //
138 // We save off all the bytecode and LLVM module ids for later processing
139 // with either fat or thin LTO
140 let mut upstream_modules = Vec::new();
2c00a5a8 141 if cgcx.lto != Lto::ThinLocal {
ea8adc8c
XL
142 if cgcx.opts.cg.prefer_dynamic {
143 diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
144 .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
145 supported with LTO")
146 .emit();
147 return Err(FatalError)
148 }
149
150 // Make sure we actually can run LTO
151 for crate_type in cgcx.crate_types.iter() {
152 if !crate_type_allows_lto(*crate_type) {
153 let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
154 static library outputs");
155 return Err(e)
156 }
157 }
158
159 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
0531ce1d
XL
160 let exported_symbols = cgcx.exported_symbols
161 .as_ref().expect("needs exported symbols for LTO");
ea8adc8c 162 symbol_white_list.extend(
0531ce1d 163 exported_symbols[&cnum]
ea8adc8c
XL
164 .iter()
165 .filter_map(symbol_filter));
166
167 let archive = ArchiveRO::open(&path).expect("wanted an rlib");
168 let bytecodes = archive.iter().filter_map(|child| {
169 child.ok().and_then(|c| c.name().map(|name| (name, c)))
170 }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
171 for (name, data) in bytecodes {
172 info!("adding bytecode {}", name);
173 let bc_encoded = data.data();
174
0531ce1d 175 let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || {
ea8adc8c
XL
176 match DecodedBytecode::new(bc_encoded) {
177 Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
178 Err(e) => Err(diag_handler.fatal(&e)),
1a4d82fc 179 }
ea8adc8c
XL
180 })?;
181 let bc = SerializedModule::FromRlib(bc);
182 upstream_modules.push((bc, CString::new(id).unwrap()));
183 }
184 timeline.record(&format!("load: {}", path.display()));
185 }
186 }
1a4d82fc 187
ea8adc8c 188 let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
2c00a5a8
XL
189 match cgcx.lto {
190 Lto::Yes | // `-C lto` == fat LTO by default
191 Lto::Fat => {
ea8adc8c
XL
192 fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
193 }
2c00a5a8
XL
194 Lto::Thin |
195 Lto::ThinLocal => {
ea8adc8c 196 thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
1a4d82fc 197 }
2c00a5a8 198 Lto::No => unreachable!(),
041b39d2 199 }
ea8adc8c
XL
200}
201
202fn fat_lto(cgcx: &CodegenContext,
203 diag_handler: &Handler,
94b46f34 204 mut modules: Vec<ModuleCodegen>,
ea8adc8c
XL
205 mut serialized_modules: Vec<(SerializedModule, CString)>,
206 symbol_white_list: &[*const libc::c_char],
207 timeline: &mut Timeline)
94b46f34 208 -> Result<Vec<LtoModuleCodegen>, FatalError>
ea8adc8c
XL
209{
210 info!("going for a fat lto");
211
212 // Find the "costliest" module and merge everything into that codegen unit.
213 // All the other modules will be serialized and reparsed into the new
214 // context, so this hopefully avoids serializing and parsing the largest
215 // codegen unit.
216 //
217 // Additionally use a regular module as the base here to ensure that various
218 // file copy operations in the backend work correctly. The only other kind
219 // of module here should be an allocator one, and if your crate is smaller
220 // than the allocator module then the size doesn't really matter anyway.
221 let (_, costliest_module) = modules.iter()
222 .enumerate()
223 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
224 .map(|(i, module)| {
225 let cost = unsafe {
226 llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod)
227 };
228 (cost, i)
229 })
230 .max()
94b46f34 231 .expect("must be codegen'ing at least one module");
ea8adc8c 232 let module = modules.remove(costliest_module);
94b46f34 233 let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
ea8adc8c
XL
234 info!("using {:?} as a base module", module.llmod_id);
235
94b46f34
XL
236 // For all other modules we codegened we'll need to link them into our own
237 // bitcode. All modules were codegened in their own LLVM context, however,
ea8adc8c
XL
238 // and we want to move everything to the same LLVM context. Currently the
239 // way we know of to do that is to serialize them to a string and them parse
240 // them later. Not great but hey, that's why it's "fat" LTO, right?
241 for module in modules {
94b46f34 242 let llvm = module.llvm().expect("can't lto pre-codegened modules");
ea8adc8c
XL
243 let buffer = ModuleBuffer::new(llvm.llmod);
244 let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
245 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
246 }
247
248 // For all serialized bitcode files we parse them and link them in as we did
249 // above, this is all mostly handled in C++. Like above, though, we don't
250 // know much about the memory management here so we err on the side of being
251 // save and persist everything with the original module.
252 let mut serialized_bitcode = Vec::new();
0531ce1d 253 let mut linker = Linker::new(llmod);
ea8adc8c
XL
254 for (bc_decoded, name) in serialized_modules {
255 info!("linking {:?}", name);
0531ce1d 256 time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
ea8adc8c 257 let data = bc_decoded.data();
0531ce1d 258 linker.add(&data).map_err(|()| {
ea8adc8c 259 let msg = format!("failed to load bc of {:?}", name);
0531ce1d
XL
260 write::llvm_err(&diag_handler, msg)
261 })
ea8adc8c
XL
262 })?;
263 timeline.record(&format!("link {:?}", name));
264 serialized_bitcode.push(bc_decoded);
265 }
0531ce1d 266 drop(linker);
ea8adc8c 267 cgcx.save_temp_bitcode(&module, "lto.input");
1a4d82fc 268
ea8adc8c
XL
269 // Internalize everything that *isn't* in our whitelist to help strip out
270 // more modules and such
1a4d82fc 271 unsafe {
ea8adc8c 272 let ptr = symbol_white_list.as_ptr();
1a4d82fc
JJ
273 llvm::LLVMRustRunRestrictionPass(llmod,
274 ptr as *const *const libc::c_char,
ea8adc8c
XL
275 symbol_white_list.len() as libc::size_t);
276 cgcx.save_temp_bitcode(&module, "lto.after-restriction");
1a4d82fc
JJ
277 }
278
041b39d2 279 if cgcx.no_landing_pads {
1a4d82fc
JJ
280 unsafe {
281 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
282 }
ea8adc8c 283 cgcx.save_temp_bitcode(&module, "lto.after-nounwind");
1a4d82fc 284 }
ea8adc8c 285 timeline.record("passes");
1a4d82fc 286
94b46f34 287 Ok(vec![LtoModuleCodegen::Fat {
ea8adc8c
XL
288 module: Some(module),
289 _serialized_bitcode: serialized_bitcode,
290 }])
291}
292
0531ce1d
XL
293struct Linker(llvm::LinkerRef);
294
295impl Linker {
296 fn new(llmod: ModuleRef) -> Linker {
297 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
298 }
299
300 fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
301 unsafe {
302 if llvm::LLVMRustLinkerAdd(self.0,
303 bytecode.as_ptr() as *const libc::c_char,
304 bytecode.len()) {
305 Ok(())
306 } else {
307 Err(())
308 }
309 }
310 }
311}
312
313impl Drop for Linker {
314 fn drop(&mut self) {
315 unsafe { llvm::LLVMRustLinkerFree(self.0); }
316 }
317}
318
ea8adc8c
XL
319/// Prepare "thin" LTO to get run on these modules.
320///
321/// The general structure of ThinLTO is quite different from the structure of
322/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
323/// one giant LLVM module, and then we run more optimization passes over this
324/// big module after internalizing most symbols. Thin LTO, on the other hand,
325/// avoid this large bottleneck through more targeted optimization.
326///
327/// At a high level Thin LTO looks like:
328///
329/// 1. Prepare a "summary" of each LLVM module in question which describes
330/// the values inside, cost of the values, etc.
331/// 2. Merge the summaries of all modules in question into one "index"
332/// 3. Perform some global analysis on this index
333/// 4. For each module, use the index and analysis calculated previously to
334/// perform local transformations on the module, for example inlining
335/// small functions from other modules.
336/// 5. Run thin-specific optimization passes over each module, and then code
337/// generate everything at the end.
338///
339/// The summary for each module is intended to be quite cheap, and the global
340/// index is relatively quite cheap to create as well. As a result, the goal of
341/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
342/// situations. For example one cheap optimization is that we can parallelize
343/// all codegen modules, easily making use of all the cores on a machine.
344///
345/// With all that in mind, the function here is designed at specifically just
346/// calculating the *index* for ThinLTO. This index will then be shared amongst
94b46f34 347/// all of the `LtoModuleCodegen` units returned below and destroyed once
ea8adc8c
XL
348/// they all go out of scope.
349fn thin_lto(diag_handler: &Handler,
94b46f34 350 modules: Vec<ModuleCodegen>,
ea8adc8c
XL
351 serialized_modules: Vec<(SerializedModule, CString)>,
352 symbol_white_list: &[*const libc::c_char],
353 timeline: &mut Timeline)
94b46f34 354 -> Result<Vec<LtoModuleCodegen>, FatalError>
ea8adc8c
XL
355{
356 unsafe {
357 info!("going for that thin, thin LTO");
358
359 let mut thin_buffers = Vec::new();
360 let mut module_names = Vec::new();
361 let mut thin_modules = Vec::new();
362
363 // FIXME: right now, like with fat LTO, we serialize all in-memory
364 // modules before working with them and ThinLTO. We really
365 // shouldn't do this, however, and instead figure out how to
366 // extract a summary from an in-memory module and then merge that
367 // into the global index. It turns out that this loop is by far
368 // the most expensive portion of this small bit of global
369 // analysis!
370 for (i, module) in modules.iter().enumerate() {
371 info!("local module: {} - {}", i, module.llmod_id);
94b46f34 372 let llvm = module.llvm().expect("can't lto precodegened module");
ea8adc8c 373 let name = CString::new(module.llmod_id.clone()).unwrap();
abe05a73 374 let buffer = ThinBuffer::new(llvm.llmod);
ea8adc8c
XL
375 thin_modules.push(llvm::ThinLTOModule {
376 identifier: name.as_ptr(),
377 data: buffer.data().as_ptr(),
378 len: buffer.data().len(),
379 });
380 thin_buffers.push(buffer);
381 module_names.push(name);
382 timeline.record(&module.llmod_id);
b039eaaf 383 }
ea8adc8c
XL
384
385 // FIXME: All upstream crates are deserialized internally in the
386 // function below to extract their summary and modules. Note that
387 // unlike the loop above we *must* decode and/or read something
388 // here as these are all just serialized files on disk. An
389 // improvement, however, to make here would be to store the
390 // module summary separately from the actual module itself. Right
391 // now this is store in one large bitcode file, and the entire
392 // file is deflate-compressed. We could try to bypass some of the
393 // decompression by storing the index uncompressed and only
394 // lazily decompressing the bytecode if necessary.
395 //
396 // Note that truly taking advantage of this optimization will
397 // likely be further down the road. We'd have to implement
398 // incremental ThinLTO first where we could actually avoid
399 // looking at upstream modules entirely sometimes (the contents,
400 // we must always unconditionally look at the index).
401 let mut serialized = Vec::new();
402 for (module, name) in serialized_modules {
403 info!("foreign module {:?}", name);
404 thin_modules.push(llvm::ThinLTOModule {
405 identifier: name.as_ptr(),
406 data: module.data().as_ptr(),
407 len: module.data().len(),
408 });
409 serialized.push(module);
410 module_names.push(name);
411 }
412
413 // Delegate to the C++ bindings to create some data here. Once this is a
414 // tried-and-true interface we may wish to try to upstream some of this
415 // to LLVM itself, right now we reimplement a lot of what they do
416 // upstream...
417 let data = llvm::LLVMRustCreateThinLTOData(
418 thin_modules.as_ptr(),
419 thin_modules.len() as u32,
420 symbol_white_list.as_ptr(),
421 symbol_white_list.len() as u32,
422 );
423 if data.is_null() {
8faf50e0 424 let msg = "failed to prepare thin LTO context".to_string();
ea8adc8c
XL
425 return Err(write::llvm_err(&diag_handler, msg))
426 }
427 let data = ThinData(data);
428 info!("thin LTO data created");
429 timeline.record("data");
430
431 // Throw our data in an `Arc` as we'll be sharing it across threads. We
432 // also put all memory referenced by the C++ data (buffers, ids, etc)
433 // into the arc as well. After this we'll create a thin module
94b46f34 434 // codegen per module in this data.
ea8adc8c
XL
435 let shared = Arc::new(ThinShared {
436 data,
437 thin_buffers,
438 serialized_modules: serialized,
439 module_names,
440 });
441 Ok((0..shared.module_names.len()).map(|i| {
94b46f34 442 LtoModuleCodegen::Thin(ThinModule {
ea8adc8c
XL
443 shared: shared.clone(),
444 idx: i,
445 })
446 }).collect())
b039eaaf 447 }
ea8adc8c 448}
b039eaaf 449
ea8adc8c
XL
450fn run_pass_manager(cgcx: &CodegenContext,
451 tm: TargetMachineRef,
452 llmod: ModuleRef,
453 config: &ModuleConfig,
454 thin: bool) {
1a4d82fc
JJ
455 // Now we have one massive module inside of llmod. Time to run the
456 // LTO-specific optimization passes that LLVM provides.
457 //
458 // This code is based off the code found in llvm's LTO code generator:
459 // tools/lto/LTOCodeGenerator.cpp
460 debug!("running the pass manager");
461 unsafe {
462 let pm = llvm::LLVMCreatePassManager();
463 llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
8faf50e0
XL
464
465 if config.verify_llvm_ir {
466 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
467 assert!(!pass.is_null());
468 llvm::LLVMRustAddPass(pm, pass);
469 }
1a4d82fc 470
abe05a73
XL
471 // When optimizing for LTO we don't actually pass in `-O0`, but we force
472 // it to always happen at least with `-O1`.
473 //
474 // With ThinLTO we mess around a lot with symbol visibility in a way
475 // that will actually cause linking failures if we optimize at O0 which
476 // notable is lacking in dead code elimination. To ensure we at least
477 // get some optimizations and correctly link we forcibly switch to `-O1`
478 // to get dead code elimination.
479 //
480 // Note that in general this shouldn't matter too much as you typically
481 // only turn on ThinLTO when you're compiling with optimizations
482 // otherwise.
483 let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None);
484 let opt_level = match opt_level {
485 llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
486 level => level,
487 };
94b46f34 488 with_llvm_pmb(llmod, config, opt_level, false, &mut |b| {
ea8adc8c
XL
489 if thin {
490 if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) {
491 panic!("this version of LLVM does not support ThinLTO");
492 }
493 } else {
494 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
495 /* Internalize = */ False,
496 /* RunInliner = */ True);
497 }
c1a9b12d 498 });
1a4d82fc 499
8faf50e0
XL
500 if config.verify_llvm_ir {
501 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
502 assert!(!pass.is_null());
503 llvm::LLVMRustAddPass(pm, pass);
504 }
1a4d82fc 505
0531ce1d 506 time_ext(cgcx.time_passes, None, "LTO passes", ||
1a4d82fc
JJ
507 llvm::LLVMRunPassManager(pm, llmod));
508
509 llvm::LLVMDisposePassManager(pm);
510 }
511 debug!("lto done");
512}
513
ea8adc8c
XL
514pub enum SerializedModule {
515 Local(ModuleBuffer),
516 FromRlib(Vec<u8>),
517}
518
519impl SerializedModule {
520 fn data(&self) -> &[u8] {
521 match *self {
522 SerializedModule::Local(ref m) => m.data(),
523 SerializedModule::FromRlib(ref m) => m,
524 }
525 }
526}
527
528pub struct ModuleBuffer(*mut llvm::ModuleBuffer);
529
530unsafe impl Send for ModuleBuffer {}
531unsafe impl Sync for ModuleBuffer {}
532
533impl ModuleBuffer {
abe05a73 534 pub fn new(m: ModuleRef) -> ModuleBuffer {
ea8adc8c
XL
535 ModuleBuffer(unsafe {
536 llvm::LLVMRustModuleBufferCreate(m)
537 })
538 }
539
abe05a73 540 pub fn data(&self) -> &[u8] {
ea8adc8c
XL
541 unsafe {
542 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
543 let len = llvm::LLVMRustModuleBufferLen(self.0);
544 slice::from_raw_parts(ptr, len)
545 }
546 }
547}
548
549impl Drop for ModuleBuffer {
550 fn drop(&mut self) {
551 unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
552 }
553}
554
555pub struct ThinModule {
556 shared: Arc<ThinShared>,
557 idx: usize,
558}
559
560struct ThinShared {
561 data: ThinData,
562 thin_buffers: Vec<ThinBuffer>,
563 serialized_modules: Vec<SerializedModule>,
564 module_names: Vec<CString>,
565}
566
567struct ThinData(*mut llvm::ThinLTOData);
568
569unsafe impl Send for ThinData {}
570unsafe impl Sync for ThinData {}
571
572impl Drop for ThinData {
573 fn drop(&mut self) {
574 unsafe {
575 llvm::LLVMRustFreeThinLTOData(self.0);
576 }
577 }
578}
579
abe05a73 580pub struct ThinBuffer(*mut llvm::ThinLTOBuffer);
ea8adc8c
XL
581
582unsafe impl Send for ThinBuffer {}
583unsafe impl Sync for ThinBuffer {}
584
585impl ThinBuffer {
abe05a73
XL
586 pub fn new(m: ModuleRef) -> ThinBuffer {
587 unsafe {
588 let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
589 ThinBuffer(buffer)
590 }
591 }
592
593 pub fn data(&self) -> &[u8] {
ea8adc8c
XL
594 unsafe {
595 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
596 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
597 slice::from_raw_parts(ptr, len)
598 }
599 }
1a4d82fc
JJ
600}
601
ea8adc8c
XL
602impl Drop for ThinBuffer {
603 fn drop(&mut self) {
604 unsafe {
605 llvm::LLVMRustThinLTOBufferFree(self.0);
606 }
607 }
1a4d82fc
JJ
608}
609
ea8adc8c
XL
610impl ThinModule {
611 fn name(&self) -> &str {
612 self.shared.module_names[self.idx].to_str().unwrap()
613 }
614
615 fn cost(&self) -> u64 {
616 // Yes, that's correct, we're using the size of the bytecode as an
617 // indicator for how costly this codegen unit is.
618 self.data().len() as u64
619 }
620
621 fn data(&self) -> &[u8] {
622 let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data());
623 a.unwrap_or_else(|| {
624 let len = self.shared.thin_buffers.len();
625 self.shared.serialized_modules[self.idx - len].data()
626 })
627 }
628
629 unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline)
94b46f34 630 -> Result<ModuleCodegen, FatalError>
ea8adc8c
XL
631 {
632 let diag_handler = cgcx.create_diag_handler();
633 let tm = (cgcx.tm_factory)().map_err(|e| {
634 write::llvm_err(&diag_handler, e)
635 })?;
636
637 // Right now the implementation we've got only works over serialized
638 // modules, so we create a fresh new LLVM context and parse the module
639 // into that context. One day, however, we may do this for upstream
94b46f34 640 // crates but for locally codegened modules we may be able to reuse
ea8adc8c 641 // that LLVM Context and Module.
ff7c6d11 642 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
ea8adc8c
XL
643 let llmod = llvm::LLVMRustParseBitcodeForThinLTO(
644 llcx,
645 self.data().as_ptr(),
646 self.data().len(),
647 self.shared.module_names[self.idx].as_ptr(),
648 );
ff7c6d11 649 if llmod.is_null() {
8faf50e0 650 let msg = "failed to parse bitcode for thin LTO module".to_string();
ff7c6d11
XL
651 return Err(write::llvm_err(&diag_handler, msg));
652 }
94b46f34
XL
653 let module = ModuleCodegen {
654 source: ModuleSource::Codegened(ModuleLlvm {
ea8adc8c
XL
655 llmod,
656 llcx,
657 tm,
658 }),
659 llmod_id: self.name().to_string(),
660 name: self.name().to_string(),
661 kind: ModuleKind::Regular,
662 };
94b46f34 663 cgcx.save_temp_bitcode(&module, "thin-lto-input");
ea8adc8c 664
ff7c6d11
XL
665 // Before we do much else find the "main" `DICompileUnit` that we'll be
666 // using below. If we find more than one though then rustc has changed
667 // in a way we're not ready for, so generate an ICE by returning
668 // an error.
669 let mut cu1 = ptr::null_mut();
670 let mut cu2 = ptr::null_mut();
671 llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
672 if !cu2.is_null() {
8faf50e0 673 let msg = "multiple source DICompileUnits found".to_string();
ff7c6d11
XL
674 return Err(write::llvm_err(&diag_handler, msg))
675 }
676
ea8adc8c
XL
677 // Like with "fat" LTO, get some better optimizations if landing pads
678 // are disabled by removing all landing pads.
679 if cgcx.no_landing_pads {
680 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
94b46f34 681 cgcx.save_temp_bitcode(&module, "thin-lto-after-nounwind");
ea8adc8c
XL
682 timeline.record("nounwind");
683 }
684
685 // Up next comes the per-module local analyses that we do for Thin LTO.
686 // Each of these functions is basically copied from the LLVM
687 // implementation and then tailored to suit this implementation. Ideally
688 // each of these would be supported by upstream LLVM but that's perhaps
689 // a patch for another day!
690 //
691 // You can find some more comments about these functions in the LLVM
692 // bindings we've got (currently `PassWrapper.cpp`)
693 if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) {
8faf50e0 694 let msg = "failed to prepare thin LTO module".to_string();
ea8adc8c
XL
695 return Err(write::llvm_err(&diag_handler, msg))
696 }
94b46f34 697 cgcx.save_temp_bitcode(&module, "thin-lto-after-rename");
ea8adc8c
XL
698 timeline.record("rename");
699 if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) {
8faf50e0 700 let msg = "failed to prepare thin LTO module".to_string();
ea8adc8c
XL
701 return Err(write::llvm_err(&diag_handler, msg))
702 }
94b46f34 703 cgcx.save_temp_bitcode(&module, "thin-lto-after-resolve");
ea8adc8c
XL
704 timeline.record("resolve");
705 if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) {
8faf50e0 706 let msg = "failed to prepare thin LTO module".to_string();
ea8adc8c
XL
707 return Err(write::llvm_err(&diag_handler, msg))
708 }
94b46f34 709 cgcx.save_temp_bitcode(&module, "thin-lto-after-internalize");
ea8adc8c
XL
710 timeline.record("internalize");
711 if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) {
8faf50e0 712 let msg = "failed to prepare thin LTO module".to_string();
ea8adc8c
XL
713 return Err(write::llvm_err(&diag_handler, msg))
714 }
94b46f34 715 cgcx.save_temp_bitcode(&module, "thin-lto-after-import");
ea8adc8c
XL
716 timeline.record("import");
717
ff7c6d11
XL
718 // Ok now this is a bit unfortunate. This is also something you won't
719 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
720 // work around bugs in LLVM.
721 //
722 // First discovered in #45511 it was found that as part of ThinLTO
723 // importing passes LLVM will import `DICompileUnit` metadata
724 // information across modules. This means that we'll be working with one
725 // LLVM module that has multiple `DICompileUnit` instances in it (a
726 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
727 // bugs in LLVM's backend which generates invalid DWARF in a situation
728 // like this:
729 //
730 // https://bugs.llvm.org/show_bug.cgi?id=35212
731 // https://bugs.llvm.org/show_bug.cgi?id=35562
732 //
733 // While the first bug there is fixed the second ended up causing #46346
734 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
735 // fixed.
736 //
0531ce1d 737 // This function below is a huge hack around this problem. The function
ff7c6d11
XL
738 // below is defined in `PassWrapper.cpp` and will basically "merge"
739 // all `DICompileUnit` instances in a module. Basically it'll take all
740 // the objects, rewrite all pointers of `DISubprogram` to point to the
741 // first `DICompileUnit`, and then delete all the other units.
742 //
743 // This is probably mangling to the debug info slightly (but hopefully
744 // not too much) but for now at least gets LLVM to emit valid DWARF (or
745 // so it appears). Hopefully we can remove this once upstream bugs are
746 // fixed in LLVM.
747 llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
94b46f34 748 cgcx.save_temp_bitcode(&module, "thin-lto-after-patch");
ff7c6d11
XL
749 timeline.record("patch");
750
ea8adc8c
XL
751 // Alright now that we've done everything related to the ThinLTO
752 // analysis it's time to run some optimizations! Here we use the same
753 // `run_pass_manager` as the "fat" LTO above except that we tell it to
754 // populate a thin-specific pass manager, which presumably LLVM treats a
755 // little differently.
94b46f34
XL
756 info!("running thin lto passes over {}", module.name);
757 let config = cgcx.config(module.kind);
ea8adc8c 758 run_pass_manager(cgcx, tm, llmod, config, true);
94b46f34 759 cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
ea8adc8c 760 timeline.record("thin-done");
ff7c6d11 761
94b46f34 762 Ok(module)
ea8adc8c 763 }
1a4d82fc 764}