]> git.proxmox.com Git - rustc.git/blob - src/librustc_codegen_llvm/back/lto.rs
New upstream version 1.28.0~beta.14+dfsg1
[rustc.git] / src / librustc_codegen_llvm / back / lto.rs
1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
12 use back::symbol_export;
13 use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
14 use back::write;
15 use errors::{FatalError, Handler};
16 use llvm::archive_ro::ArchiveRO;
17 use llvm::{ModuleRef, TargetMachineRef, True, False};
18 use llvm;
19 use rustc::hir::def_id::LOCAL_CRATE;
20 use rustc::middle::exported_symbols::SymbolExportLevel;
21 use rustc::session::config::{self, Lto};
22 use rustc::util::common::time_ext;
23 use time_graph::Timeline;
24 use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
25
26 use libc;
27
28 use std::ffi::CString;
29 use std::ptr;
30 use std::slice;
31 use std::sync::Arc;
32
33 pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
34 match crate_type {
35 config::CrateTypeExecutable |
36 config::CrateTypeStaticlib |
37 config::CrateTypeCdylib => true,
38
39 config::CrateTypeDylib |
40 config::CrateTypeRlib |
41 config::CrateTypeProcMacro => false,
42 }
43 }
44
45 pub(crate) enum LtoModuleCodegen {
46 Fat {
47 module: Option<ModuleCodegen>,
48 _serialized_bitcode: Vec<SerializedModule>,
49 },
50
51 Thin(ThinModule),
52 }
53
54 impl LtoModuleCodegen {
55 pub fn name(&self) -> &str {
56 match *self {
57 LtoModuleCodegen::Fat { .. } => "everything",
58 LtoModuleCodegen::Thin(ref m) => m.name(),
59 }
60 }
61
62 /// Optimize this module within the given codegen context.
63 ///
64 /// This function is unsafe as it'll return a `ModuleCodegen` still
65 /// points to LLVM data structures owned by this `LtoModuleCodegen`.
66 /// It's intended that the module returned is immediately code generated and
67 /// dropped, and then this LTO module is dropped.
68 pub(crate) unsafe fn optimize(&mut self,
69 cgcx: &CodegenContext,
70 timeline: &mut Timeline)
71 -> Result<ModuleCodegen, FatalError>
72 {
73 match *self {
74 LtoModuleCodegen::Fat { ref mut module, .. } => {
75 let module = module.take().unwrap();
76 let config = cgcx.config(module.kind);
77 let llmod = module.llvm().unwrap().llmod;
78 let tm = module.llvm().unwrap().tm;
79 run_pass_manager(cgcx, tm, llmod, config, false);
80 timeline.record("fat-done");
81 Ok(module)
82 }
83 LtoModuleCodegen::Thin(ref mut thin) => thin.optimize(cgcx, timeline),
84 }
85 }
86
87 /// A "gauge" of how costly it is to optimize this module, used to sort
88 /// biggest modules first.
89 pub fn cost(&self) -> u64 {
90 match *self {
91 // Only one module with fat LTO, so the cost doesn't matter.
92 LtoModuleCodegen::Fat { .. } => 0,
93 LtoModuleCodegen::Thin(ref m) => m.cost(),
94 }
95 }
96 }
97
98 pub(crate) fn run(cgcx: &CodegenContext,
99 modules: Vec<ModuleCodegen>,
100 timeline: &mut Timeline)
101 -> Result<Vec<LtoModuleCodegen>, FatalError>
102 {
103 let diag_handler = cgcx.create_diag_handler();
104 let export_threshold = match cgcx.lto {
105 // We're just doing LTO for our one crate
106 Lto::ThinLocal => SymbolExportLevel::Rust,
107
108 // We're doing LTO for the entire crate graph
109 Lto::Yes | Lto::Fat | Lto::Thin => {
110 symbol_export::crates_export_threshold(&cgcx.crate_types)
111 }
112
113 Lto::No => panic!("didn't request LTO but we're doing LTO"),
114 };
115
116 let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
117 if level.is_below_threshold(export_threshold) {
118 let mut bytes = Vec::with_capacity(name.len() + 1);
119 bytes.extend(name.bytes());
120 Some(CString::new(bytes).unwrap())
121 } else {
122 None
123 }
124 };
125 let exported_symbols = cgcx.exported_symbols
126 .as_ref().expect("needs exported symbols for LTO");
127 let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
128 .iter()
129 .filter_map(symbol_filter)
130 .collect::<Vec<CString>>();
131 timeline.record("whitelist");
132 info!("{} symbols to preserve in this crate", symbol_white_list.len());
133
134 // If we're performing LTO for the entire crate graph, then for each of our
135 // upstream dependencies, find the corresponding rlib and load the bitcode
136 // from the archive.
137 //
138 // We save off all the bytecode and LLVM module ids for later processing
139 // with either fat or thin LTO
140 let mut upstream_modules = Vec::new();
141 if cgcx.lto != Lto::ThinLocal {
142 if cgcx.opts.cg.prefer_dynamic {
143 diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
144 .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
145 supported with LTO")
146 .emit();
147 return Err(FatalError)
148 }
149
150 // Make sure we actually can run LTO
151 for crate_type in cgcx.crate_types.iter() {
152 if !crate_type_allows_lto(*crate_type) {
153 let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
154 static library outputs");
155 return Err(e)
156 }
157 }
158
159 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
160 let exported_symbols = cgcx.exported_symbols
161 .as_ref().expect("needs exported symbols for LTO");
162 symbol_white_list.extend(
163 exported_symbols[&cnum]
164 .iter()
165 .filter_map(symbol_filter));
166
167 let archive = ArchiveRO::open(&path).expect("wanted an rlib");
168 let bytecodes = archive.iter().filter_map(|child| {
169 child.ok().and_then(|c| c.name().map(|name| (name, c)))
170 }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
171 for (name, data) in bytecodes {
172 info!("adding bytecode {}", name);
173 let bc_encoded = data.data();
174
175 let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || {
176 match DecodedBytecode::new(bc_encoded) {
177 Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
178 Err(e) => Err(diag_handler.fatal(&e)),
179 }
180 })?;
181 let bc = SerializedModule::FromRlib(bc);
182 upstream_modules.push((bc, CString::new(id).unwrap()));
183 }
184 timeline.record(&format!("load: {}", path.display()));
185 }
186 }
187
188 let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
189 match cgcx.lto {
190 Lto::Yes | // `-C lto` == fat LTO by default
191 Lto::Fat => {
192 fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
193 }
194 Lto::Thin |
195 Lto::ThinLocal => {
196 thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
197 }
198 Lto::No => unreachable!(),
199 }
200 }
201
202 fn fat_lto(cgcx: &CodegenContext,
203 diag_handler: &Handler,
204 mut modules: Vec<ModuleCodegen>,
205 mut serialized_modules: Vec<(SerializedModule, CString)>,
206 symbol_white_list: &[*const libc::c_char],
207 timeline: &mut Timeline)
208 -> Result<Vec<LtoModuleCodegen>, FatalError>
209 {
210 info!("going for a fat lto");
211
212 // Find the "costliest" module and merge everything into that codegen unit.
213 // All the other modules will be serialized and reparsed into the new
214 // context, so this hopefully avoids serializing and parsing the largest
215 // codegen unit.
216 //
217 // Additionally use a regular module as the base here to ensure that various
218 // file copy operations in the backend work correctly. The only other kind
219 // of module here should be an allocator one, and if your crate is smaller
220 // than the allocator module then the size doesn't really matter anyway.
221 let (_, costliest_module) = modules.iter()
222 .enumerate()
223 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
224 .map(|(i, module)| {
225 let cost = unsafe {
226 llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod)
227 };
228 (cost, i)
229 })
230 .max()
231 .expect("must be codegen'ing at least one module");
232 let module = modules.remove(costliest_module);
233 let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
234 info!("using {:?} as a base module", module.llmod_id);
235
236 // For all other modules we codegened we'll need to link them into our own
237 // bitcode. All modules were codegened in their own LLVM context, however,
238 // and we want to move everything to the same LLVM context. Currently the
239 // way we know of to do that is to serialize them to a string and them parse
240 // them later. Not great but hey, that's why it's "fat" LTO, right?
241 for module in modules {
242 let llvm = module.llvm().expect("can't lto pre-codegened modules");
243 let buffer = ModuleBuffer::new(llvm.llmod);
244 let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
245 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
246 }
247
248 // For all serialized bitcode files we parse them and link them in as we did
249 // above, this is all mostly handled in C++. Like above, though, we don't
250 // know much about the memory management here so we err on the side of being
251 // save and persist everything with the original module.
252 let mut serialized_bitcode = Vec::new();
253 let mut linker = Linker::new(llmod);
254 for (bc_decoded, name) in serialized_modules {
255 info!("linking {:?}", name);
256 time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
257 let data = bc_decoded.data();
258 linker.add(&data).map_err(|()| {
259 let msg = format!("failed to load bc of {:?}", name);
260 write::llvm_err(&diag_handler, msg)
261 })
262 })?;
263 timeline.record(&format!("link {:?}", name));
264 serialized_bitcode.push(bc_decoded);
265 }
266 drop(linker);
267 cgcx.save_temp_bitcode(&module, "lto.input");
268
269 // Internalize everything that *isn't* in our whitelist to help strip out
270 // more modules and such
271 unsafe {
272 let ptr = symbol_white_list.as_ptr();
273 llvm::LLVMRustRunRestrictionPass(llmod,
274 ptr as *const *const libc::c_char,
275 symbol_white_list.len() as libc::size_t);
276 cgcx.save_temp_bitcode(&module, "lto.after-restriction");
277 }
278
279 if cgcx.no_landing_pads {
280 unsafe {
281 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
282 }
283 cgcx.save_temp_bitcode(&module, "lto.after-nounwind");
284 }
285 timeline.record("passes");
286
287 Ok(vec![LtoModuleCodegen::Fat {
288 module: Some(module),
289 _serialized_bitcode: serialized_bitcode,
290 }])
291 }
292
293 struct Linker(llvm::LinkerRef);
294
295 impl Linker {
296 fn new(llmod: ModuleRef) -> Linker {
297 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
298 }
299
300 fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
301 unsafe {
302 if llvm::LLVMRustLinkerAdd(self.0,
303 bytecode.as_ptr() as *const libc::c_char,
304 bytecode.len()) {
305 Ok(())
306 } else {
307 Err(())
308 }
309 }
310 }
311 }
312
313 impl Drop for Linker {
314 fn drop(&mut self) {
315 unsafe { llvm::LLVMRustLinkerFree(self.0); }
316 }
317 }
318
319 /// Prepare "thin" LTO to get run on these modules.
320 ///
321 /// The general structure of ThinLTO is quite different from the structure of
322 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
323 /// one giant LLVM module, and then we run more optimization passes over this
324 /// big module after internalizing most symbols. Thin LTO, on the other hand,
325 /// avoid this large bottleneck through more targeted optimization.
326 ///
327 /// At a high level Thin LTO looks like:
328 ///
329 /// 1. Prepare a "summary" of each LLVM module in question which describes
330 /// the values inside, cost of the values, etc.
331 /// 2. Merge the summaries of all modules in question into one "index"
332 /// 3. Perform some global analysis on this index
333 /// 4. For each module, use the index and analysis calculated previously to
334 /// perform local transformations on the module, for example inlining
335 /// small functions from other modules.
336 /// 5. Run thin-specific optimization passes over each module, and then code
337 /// generate everything at the end.
338 ///
339 /// The summary for each module is intended to be quite cheap, and the global
340 /// index is relatively quite cheap to create as well. As a result, the goal of
341 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
342 /// situations. For example one cheap optimization is that we can parallelize
343 /// all codegen modules, easily making use of all the cores on a machine.
344 ///
345 /// With all that in mind, the function here is designed at specifically just
346 /// calculating the *index* for ThinLTO. This index will then be shared amongst
347 /// all of the `LtoModuleCodegen` units returned below and destroyed once
348 /// they all go out of scope.
349 fn thin_lto(diag_handler: &Handler,
350 modules: Vec<ModuleCodegen>,
351 serialized_modules: Vec<(SerializedModule, CString)>,
352 symbol_white_list: &[*const libc::c_char],
353 timeline: &mut Timeline)
354 -> Result<Vec<LtoModuleCodegen>, FatalError>
355 {
356 unsafe {
357 info!("going for that thin, thin LTO");
358
359 let mut thin_buffers = Vec::new();
360 let mut module_names = Vec::new();
361 let mut thin_modules = Vec::new();
362
363 // FIXME: right now, like with fat LTO, we serialize all in-memory
364 // modules before working with them and ThinLTO. We really
365 // shouldn't do this, however, and instead figure out how to
366 // extract a summary from an in-memory module and then merge that
367 // into the global index. It turns out that this loop is by far
368 // the most expensive portion of this small bit of global
369 // analysis!
370 for (i, module) in modules.iter().enumerate() {
371 info!("local module: {} - {}", i, module.llmod_id);
372 let llvm = module.llvm().expect("can't lto precodegened module");
373 let name = CString::new(module.llmod_id.clone()).unwrap();
374 let buffer = ThinBuffer::new(llvm.llmod);
375 thin_modules.push(llvm::ThinLTOModule {
376 identifier: name.as_ptr(),
377 data: buffer.data().as_ptr(),
378 len: buffer.data().len(),
379 });
380 thin_buffers.push(buffer);
381 module_names.push(name);
382 timeline.record(&module.llmod_id);
383 }
384
385 // FIXME: All upstream crates are deserialized internally in the
386 // function below to extract their summary and modules. Note that
387 // unlike the loop above we *must* decode and/or read something
388 // here as these are all just serialized files on disk. An
389 // improvement, however, to make here would be to store the
390 // module summary separately from the actual module itself. Right
391 // now this is store in one large bitcode file, and the entire
392 // file is deflate-compressed. We could try to bypass some of the
393 // decompression by storing the index uncompressed and only
394 // lazily decompressing the bytecode if necessary.
395 //
396 // Note that truly taking advantage of this optimization will
397 // likely be further down the road. We'd have to implement
398 // incremental ThinLTO first where we could actually avoid
399 // looking at upstream modules entirely sometimes (the contents,
400 // we must always unconditionally look at the index).
401 let mut serialized = Vec::new();
402 for (module, name) in serialized_modules {
403 info!("foreign module {:?}", name);
404 thin_modules.push(llvm::ThinLTOModule {
405 identifier: name.as_ptr(),
406 data: module.data().as_ptr(),
407 len: module.data().len(),
408 });
409 serialized.push(module);
410 module_names.push(name);
411 }
412
413 // Delegate to the C++ bindings to create some data here. Once this is a
414 // tried-and-true interface we may wish to try to upstream some of this
415 // to LLVM itself, right now we reimplement a lot of what they do
416 // upstream...
417 let data = llvm::LLVMRustCreateThinLTOData(
418 thin_modules.as_ptr(),
419 thin_modules.len() as u32,
420 symbol_white_list.as_ptr(),
421 symbol_white_list.len() as u32,
422 );
423 if data.is_null() {
424 let msg = format!("failed to prepare thin LTO context");
425 return Err(write::llvm_err(&diag_handler, msg))
426 }
427 let data = ThinData(data);
428 info!("thin LTO data created");
429 timeline.record("data");
430
431 // Throw our data in an `Arc` as we'll be sharing it across threads. We
432 // also put all memory referenced by the C++ data (buffers, ids, etc)
433 // into the arc as well. After this we'll create a thin module
434 // codegen per module in this data.
435 let shared = Arc::new(ThinShared {
436 data,
437 thin_buffers,
438 serialized_modules: serialized,
439 module_names,
440 });
441 Ok((0..shared.module_names.len()).map(|i| {
442 LtoModuleCodegen::Thin(ThinModule {
443 shared: shared.clone(),
444 idx: i,
445 })
446 }).collect())
447 }
448 }
449
450 fn run_pass_manager(cgcx: &CodegenContext,
451 tm: TargetMachineRef,
452 llmod: ModuleRef,
453 config: &ModuleConfig,
454 thin: bool) {
455 // Now we have one massive module inside of llmod. Time to run the
456 // LTO-specific optimization passes that LLVM provides.
457 //
458 // This code is based off the code found in llvm's LTO code generator:
459 // tools/lto/LTOCodeGenerator.cpp
460 debug!("running the pass manager");
461 unsafe {
462 let pm = llvm::LLVMCreatePassManager();
463 llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
464 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
465 assert!(!pass.is_null());
466 llvm::LLVMRustAddPass(pm, pass);
467
468 // When optimizing for LTO we don't actually pass in `-O0`, but we force
469 // it to always happen at least with `-O1`.
470 //
471 // With ThinLTO we mess around a lot with symbol visibility in a way
472 // that will actually cause linking failures if we optimize at O0 which
473 // notable is lacking in dead code elimination. To ensure we at least
474 // get some optimizations and correctly link we forcibly switch to `-O1`
475 // to get dead code elimination.
476 //
477 // Note that in general this shouldn't matter too much as you typically
478 // only turn on ThinLTO when you're compiling with optimizations
479 // otherwise.
480 let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None);
481 let opt_level = match opt_level {
482 llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
483 level => level,
484 };
485 with_llvm_pmb(llmod, config, opt_level, false, &mut |b| {
486 if thin {
487 if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) {
488 panic!("this version of LLVM does not support ThinLTO");
489 }
490 } else {
491 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
492 /* Internalize = */ False,
493 /* RunInliner = */ True);
494 }
495 });
496
497 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
498 assert!(!pass.is_null());
499 llvm::LLVMRustAddPass(pm, pass);
500
501 time_ext(cgcx.time_passes, None, "LTO passes", ||
502 llvm::LLVMRunPassManager(pm, llmod));
503
504 llvm::LLVMDisposePassManager(pm);
505 }
506 debug!("lto done");
507 }
508
509 pub enum SerializedModule {
510 Local(ModuleBuffer),
511 FromRlib(Vec<u8>),
512 }
513
514 impl SerializedModule {
515 fn data(&self) -> &[u8] {
516 match *self {
517 SerializedModule::Local(ref m) => m.data(),
518 SerializedModule::FromRlib(ref m) => m,
519 }
520 }
521 }
522
523 pub struct ModuleBuffer(*mut llvm::ModuleBuffer);
524
525 unsafe impl Send for ModuleBuffer {}
526 unsafe impl Sync for ModuleBuffer {}
527
528 impl ModuleBuffer {
529 pub fn new(m: ModuleRef) -> ModuleBuffer {
530 ModuleBuffer(unsafe {
531 llvm::LLVMRustModuleBufferCreate(m)
532 })
533 }
534
535 pub fn data(&self) -> &[u8] {
536 unsafe {
537 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
538 let len = llvm::LLVMRustModuleBufferLen(self.0);
539 slice::from_raw_parts(ptr, len)
540 }
541 }
542 }
543
544 impl Drop for ModuleBuffer {
545 fn drop(&mut self) {
546 unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
547 }
548 }
549
550 pub struct ThinModule {
551 shared: Arc<ThinShared>,
552 idx: usize,
553 }
554
555 struct ThinShared {
556 data: ThinData,
557 thin_buffers: Vec<ThinBuffer>,
558 serialized_modules: Vec<SerializedModule>,
559 module_names: Vec<CString>,
560 }
561
562 struct ThinData(*mut llvm::ThinLTOData);
563
564 unsafe impl Send for ThinData {}
565 unsafe impl Sync for ThinData {}
566
567 impl Drop for ThinData {
568 fn drop(&mut self) {
569 unsafe {
570 llvm::LLVMRustFreeThinLTOData(self.0);
571 }
572 }
573 }
574
575 pub struct ThinBuffer(*mut llvm::ThinLTOBuffer);
576
577 unsafe impl Send for ThinBuffer {}
578 unsafe impl Sync for ThinBuffer {}
579
580 impl ThinBuffer {
581 pub fn new(m: ModuleRef) -> ThinBuffer {
582 unsafe {
583 let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
584 ThinBuffer(buffer)
585 }
586 }
587
588 pub fn data(&self) -> &[u8] {
589 unsafe {
590 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
591 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
592 slice::from_raw_parts(ptr, len)
593 }
594 }
595 }
596
597 impl Drop for ThinBuffer {
598 fn drop(&mut self) {
599 unsafe {
600 llvm::LLVMRustThinLTOBufferFree(self.0);
601 }
602 }
603 }
604
605 impl ThinModule {
606 fn name(&self) -> &str {
607 self.shared.module_names[self.idx].to_str().unwrap()
608 }
609
610 fn cost(&self) -> u64 {
611 // Yes, that's correct, we're using the size of the bytecode as an
612 // indicator for how costly this codegen unit is.
613 self.data().len() as u64
614 }
615
616 fn data(&self) -> &[u8] {
617 let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data());
618 a.unwrap_or_else(|| {
619 let len = self.shared.thin_buffers.len();
620 self.shared.serialized_modules[self.idx - len].data()
621 })
622 }
623
624 unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline)
625 -> Result<ModuleCodegen, FatalError>
626 {
627 let diag_handler = cgcx.create_diag_handler();
628 let tm = (cgcx.tm_factory)().map_err(|e| {
629 write::llvm_err(&diag_handler, e)
630 })?;
631
632 // Right now the implementation we've got only works over serialized
633 // modules, so we create a fresh new LLVM context and parse the module
634 // into that context. One day, however, we may do this for upstream
635 // crates but for locally codegened modules we may be able to reuse
636 // that LLVM Context and Module.
637 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
638 let llmod = llvm::LLVMRustParseBitcodeForThinLTO(
639 llcx,
640 self.data().as_ptr(),
641 self.data().len(),
642 self.shared.module_names[self.idx].as_ptr(),
643 );
644 if llmod.is_null() {
645 let msg = format!("failed to parse bitcode for thin LTO module");
646 return Err(write::llvm_err(&diag_handler, msg));
647 }
648 let module = ModuleCodegen {
649 source: ModuleSource::Codegened(ModuleLlvm {
650 llmod,
651 llcx,
652 tm,
653 }),
654 llmod_id: self.name().to_string(),
655 name: self.name().to_string(),
656 kind: ModuleKind::Regular,
657 };
658 cgcx.save_temp_bitcode(&module, "thin-lto-input");
659
660 // Before we do much else find the "main" `DICompileUnit` that we'll be
661 // using below. If we find more than one though then rustc has changed
662 // in a way we're not ready for, so generate an ICE by returning
663 // an error.
664 let mut cu1 = ptr::null_mut();
665 let mut cu2 = ptr::null_mut();
666 llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
667 if !cu2.is_null() {
668 let msg = format!("multiple source DICompileUnits found");
669 return Err(write::llvm_err(&diag_handler, msg))
670 }
671
672 // Like with "fat" LTO, get some better optimizations if landing pads
673 // are disabled by removing all landing pads.
674 if cgcx.no_landing_pads {
675 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
676 cgcx.save_temp_bitcode(&module, "thin-lto-after-nounwind");
677 timeline.record("nounwind");
678 }
679
680 // Up next comes the per-module local analyses that we do for Thin LTO.
681 // Each of these functions is basically copied from the LLVM
682 // implementation and then tailored to suit this implementation. Ideally
683 // each of these would be supported by upstream LLVM but that's perhaps
684 // a patch for another day!
685 //
686 // You can find some more comments about these functions in the LLVM
687 // bindings we've got (currently `PassWrapper.cpp`)
688 if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) {
689 let msg = format!("failed to prepare thin LTO module");
690 return Err(write::llvm_err(&diag_handler, msg))
691 }
692 cgcx.save_temp_bitcode(&module, "thin-lto-after-rename");
693 timeline.record("rename");
694 if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) {
695 let msg = format!("failed to prepare thin LTO module");
696 return Err(write::llvm_err(&diag_handler, msg))
697 }
698 cgcx.save_temp_bitcode(&module, "thin-lto-after-resolve");
699 timeline.record("resolve");
700 if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) {
701 let msg = format!("failed to prepare thin LTO module");
702 return Err(write::llvm_err(&diag_handler, msg))
703 }
704 cgcx.save_temp_bitcode(&module, "thin-lto-after-internalize");
705 timeline.record("internalize");
706 if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) {
707 let msg = format!("failed to prepare thin LTO module");
708 return Err(write::llvm_err(&diag_handler, msg))
709 }
710 cgcx.save_temp_bitcode(&module, "thin-lto-after-import");
711 timeline.record("import");
712
713 // Ok now this is a bit unfortunate. This is also something you won't
714 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
715 // work around bugs in LLVM.
716 //
717 // First discovered in #45511 it was found that as part of ThinLTO
718 // importing passes LLVM will import `DICompileUnit` metadata
719 // information across modules. This means that we'll be working with one
720 // LLVM module that has multiple `DICompileUnit` instances in it (a
721 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
722 // bugs in LLVM's backend which generates invalid DWARF in a situation
723 // like this:
724 //
725 // https://bugs.llvm.org/show_bug.cgi?id=35212
726 // https://bugs.llvm.org/show_bug.cgi?id=35562
727 //
728 // While the first bug there is fixed the second ended up causing #46346
729 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
730 // fixed.
731 //
732 // This function below is a huge hack around this problem. The function
733 // below is defined in `PassWrapper.cpp` and will basically "merge"
734 // all `DICompileUnit` instances in a module. Basically it'll take all
735 // the objects, rewrite all pointers of `DISubprogram` to point to the
736 // first `DICompileUnit`, and then delete all the other units.
737 //
738 // This is probably mangling to the debug info slightly (but hopefully
739 // not too much) but for now at least gets LLVM to emit valid DWARF (or
740 // so it appears). Hopefully we can remove this once upstream bugs are
741 // fixed in LLVM.
742 llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
743 cgcx.save_temp_bitcode(&module, "thin-lto-after-patch");
744 timeline.record("patch");
745
746 // Alright now that we've done everything related to the ThinLTO
747 // analysis it's time to run some optimizations! Here we use the same
748 // `run_pass_manager` as the "fat" LTO above except that we tell it to
749 // populate a thin-specific pass manager, which presumably LLVM treats a
750 // little differently.
751 info!("running thin lto passes over {}", module.name);
752 let config = cgcx.config(module.kind);
753 run_pass_manager(cgcx, tm, llmod, config, true);
754 cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
755 timeline.record("thin-done");
756
757 // FIXME: this is a hack around a bug in LLVM right now. Discovered in
758 // #46910 it was found out that on 32-bit MSVC LLVM will hit a codegen
759 // error if there's an available_externally function in the LLVM module.
760 // Typically we don't actually use these functions but ThinLTO makes
761 // heavy use of them when inlining across modules.
762 //
763 // Tracked upstream at https://bugs.llvm.org/show_bug.cgi?id=35736 this
764 // function call (and its definition on the C++ side of things)
765 // shouldn't be necessary eventually and we can safetly delete these few
766 // lines.
767 llvm::LLVMRustThinLTORemoveAvailableExternally(llmod);
768 cgcx.save_temp_bitcode(&module, "thin-lto-after-rm-ae");
769 timeline.record("no-ae");
770
771 Ok(module)
772 }
773 }