src/librustc_codegen_llvm/back/lto.rs

   1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
  12 use back::symbol_export;
  13 use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
  14 use back::write;
  15 use errors::{FatalError, Handler};
  16 use llvm::archive_ro::ArchiveRO;
  17 use llvm::{ModuleRef, TargetMachineRef, True, False};
  18 use llvm;
  19 use rustc::hir::def_id::LOCAL_CRATE;
  20 use rustc::middle::exported_symbols::SymbolExportLevel;
  21 use rustc::session::config::{self, Lto};
  22 use rustc::util::common::time_ext;
  23 use time_graph::Timeline;
  24 use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
  25
  26 use libc;
  27
  28 use std::ffi::CString;
  29 use std::ptr;
  30 use std::slice;
  31 use std::sync::Arc;
  32
  33 pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
  34     match crate_type {
  35         config::CrateTypeExecutable |
  36         config::CrateTypeStaticlib  |
  37         config::CrateTypeCdylib     => true,
  38
  39         config::CrateTypeDylib     |
  40         config::CrateTypeRlib      |
  41         config::CrateTypeProcMacro => false,
  42     }
  43 }
  44
  45 pub(crate) enum LtoModuleCodegen {
  46     Fat {
  47         module: Option<ModuleCodegen>,
  48         _serialized_bitcode: Vec<SerializedModule>,
  49     },
  50
  51     Thin(ThinModule),
  52 }
  53
  54 impl LtoModuleCodegen {
  55     pub fn name(&self) -> &str {
  56         match *self {
  57             LtoModuleCodegen::Fat { .. } => "everything",
  58             LtoModuleCodegen::Thin(ref m) => m.name(),
  59         }
  60     }
  61
  62     /// Optimize this module within the given codegen context.
  63     ///
  64     /// This function is unsafe as it'll return a `ModuleCodegen` still
  65     /// points to LLVM data structures owned by this `LtoModuleCodegen`.
  66     /// It's intended that the module returned is immediately code generated and
  67     /// dropped, and then this LTO module is dropped.
  68     pub(crate) unsafe fn optimize(&mut self,
  69                                   cgcx: &CodegenContext,
  70                                   timeline: &mut Timeline)
  71         -> Result<ModuleCodegen, FatalError>
  72     {
  73         match *self {
  74             LtoModuleCodegen::Fat { ref mut module, .. } => {
  75                 let module = module.take().unwrap();
  76                 let config = cgcx.config(module.kind);
  77                 let llmod = module.llvm().unwrap().llmod;
  78                 let tm = module.llvm().unwrap().tm;
  79                 run_pass_manager(cgcx, tm, llmod, config, false);
  80                 timeline.record("fat-done");
  81                 Ok(module)
  82             }
  83             LtoModuleCodegen::Thin(ref mut thin) => thin.optimize(cgcx, timeline),
  84         }
  85     }
  86
  87     /// A "gauge" of how costly it is to optimize this module, used to sort
  88     /// biggest modules first.
  89     pub fn cost(&self) -> u64 {
  90         match *self {
  91             // Only one module with fat LTO, so the cost doesn't matter.
  92             LtoModuleCodegen::Fat { .. } => 0,
  93             LtoModuleCodegen::Thin(ref m) => m.cost(),
  94         }
  95     }
  96 }
  97
  98 pub(crate) fn run(cgcx: &CodegenContext,
  99                   modules: Vec<ModuleCodegen>,
 100                   timeline: &mut Timeline)
 101     -> Result<Vec<LtoModuleCodegen>, FatalError>
 102 {
 103     let diag_handler = cgcx.create_diag_handler();
 104     let export_threshold = match cgcx.lto {
 105         // We're just doing LTO for our one crate
 106         Lto::ThinLocal => SymbolExportLevel::Rust,
 107
 108         // We're doing LTO for the entire crate graph
 109         Lto::Yes | Lto::Fat | Lto::Thin => {
 110             symbol_export::crates_export_threshold(&cgcx.crate_types)
 111         }
 112
 113         Lto::No => panic!("didn't request LTO but we're doing LTO"),
 114     };
 115
 116     let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
 117         if level.is_below_threshold(export_threshold) {
 118             let mut bytes = Vec::with_capacity(name.len() + 1);
 119             bytes.extend(name.bytes());
 120             Some(CString::new(bytes).unwrap())
 121         } else {
 122             None
 123         }
 124     };
 125     let exported_symbols = cgcx.exported_symbols
 126         .as_ref().expect("needs exported symbols for LTO");
 127     let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
 128         .iter()
 129         .filter_map(symbol_filter)
 130         .collect::<Vec<CString>>();
 131     timeline.record("whitelist");
 132     info!("{} symbols to preserve in this crate", symbol_white_list.len());
 133
 134     // If we're performing LTO for the entire crate graph, then for each of our
 135     // upstream dependencies, find the corresponding rlib and load the bitcode
 136     // from the archive.
 137     //
 138     // We save off all the bytecode and LLVM module ids for later processing
 139     // with either fat or thin LTO
 140     let mut upstream_modules = Vec::new();
 141     if cgcx.lto != Lto::ThinLocal {
 142         if cgcx.opts.cg.prefer_dynamic {
 143             diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
 144                         .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
 145                                supported with LTO")
 146                         .emit();
 147             return Err(FatalError)
 148         }
 149
 150         // Make sure we actually can run LTO
 151         for crate_type in cgcx.crate_types.iter() {
 152             if !crate_type_allows_lto(*crate_type) {
 153                 let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
 154                                             static library outputs");
 155                 return Err(e)
 156             }
 157         }
 158
 159         for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
 160             let exported_symbols = cgcx.exported_symbols
 161                 .as_ref().expect("needs exported symbols for LTO");
 162             symbol_white_list.extend(
 163                 exported_symbols[&cnum]
 164                     .iter()
 165                     .filter_map(symbol_filter));
 166
 167             let archive = ArchiveRO::open(&path).expect("wanted an rlib");
 168             let bytecodes = archive.iter().filter_map(|child| {
 169                 child.ok().and_then(|c| c.name().map(|name| (name, c)))
 170             }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
 171             for (name, data) in bytecodes {
 172                 info!("adding bytecode {}", name);
 173                 let bc_encoded = data.data();
 174
 175                 let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || {
 176                     match DecodedBytecode::new(bc_encoded) {
 177                         Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
 178                         Err(e) => Err(diag_handler.fatal(&e)),
 179                     }
 180                 })?;
 181                 let bc = SerializedModule::FromRlib(bc);
 182                 upstream_modules.push((bc, CString::new(id).unwrap()));
 183             }
 184             timeline.record(&format!("load: {}", path.display()));
 185         }
 186     }
 187
 188     let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
 189     match cgcx.lto {
 190         Lto::Yes | // `-C lto` == fat LTO by default
 191         Lto::Fat => {
 192             fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
 193         }
 194         Lto::Thin |
 195         Lto::ThinLocal => {
 196             thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
 197         }
 198         Lto::No => unreachable!(),
 199     }
 200 }
 201
 202 fn fat_lto(cgcx: &CodegenContext,
 203            diag_handler: &Handler,
 204            mut modules: Vec<ModuleCodegen>,
 205            mut serialized_modules: Vec<(SerializedModule, CString)>,
 206            symbol_white_list: &[*const libc::c_char],
 207            timeline: &mut Timeline)
 208     -> Result<Vec<LtoModuleCodegen>, FatalError>
 209 {
 210     info!("going for a fat lto");
 211
 212     // Find the "costliest" module and merge everything into that codegen unit.
 213     // All the other modules will be serialized and reparsed into the new
 214     // context, so this hopefully avoids serializing and parsing the largest
 215     // codegen unit.
 216     //
 217     // Additionally use a regular module as the base here to ensure that various
 218     // file copy operations in the backend work correctly. The only other kind
 219     // of module here should be an allocator one, and if your crate is smaller
 220     // than the allocator module then the size doesn't really matter anyway.
 221     let (_, costliest_module) = modules.iter()
 222         .enumerate()
 223         .filter(|&(_, module)| module.kind == ModuleKind::Regular)
 224         .map(|(i, module)| {
 225             let cost = unsafe {
 226                 llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod)
 227             };
 228             (cost, i)
 229         })
 230         .max()
 231         .expect("must be codegen'ing at least one module");
 232     let module = modules.remove(costliest_module);
 233     let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
 234     info!("using {:?} as a base module", module.llmod_id);
 235
 236     // For all other modules we codegened we'll need to link them into our own
 237     // bitcode. All modules were codegened in their own LLVM context, however,
 238     // and we want to move everything to the same LLVM context. Currently the
 239     // way we know of to do that is to serialize them to a string and them parse
 240     // them later. Not great but hey, that's why it's "fat" LTO, right?
 241     for module in modules {
 242         let llvm = module.llvm().expect("can't lto pre-codegened modules");
 243         let buffer = ModuleBuffer::new(llvm.llmod);
 244         let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
 245         serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
 246     }
 247
 248     // For all serialized bitcode files we parse them and link them in as we did
 249     // above, this is all mostly handled in C++. Like above, though, we don't
 250     // know much about the memory management here so we err on the side of being
 251     // save and persist everything with the original module.
 252     let mut serialized_bitcode = Vec::new();
 253     let mut linker = Linker::new(llmod);
 254     for (bc_decoded, name) in serialized_modules {
 255         info!("linking {:?}", name);
 256         time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
 257             let data = bc_decoded.data();
 258             linker.add(&data).map_err(|()| {
 259                 let msg = format!("failed to load bc of {:?}", name);
 260                 write::llvm_err(&diag_handler, msg)
 261             })
 262         })?;
 263         timeline.record(&format!("link {:?}", name));
 264         serialized_bitcode.push(bc_decoded);
 265     }
 266     drop(linker);
 267     cgcx.save_temp_bitcode(&module, "lto.input");
 268
 269     // Internalize everything that *isn't* in our whitelist to help strip out
 270     // more modules and such
 271     unsafe {
 272         let ptr = symbol_white_list.as_ptr();
 273         llvm::LLVMRustRunRestrictionPass(llmod,
 274                                          ptr as *const *const libc::c_char,
 275                                          symbol_white_list.len() as libc::size_t);
 276         cgcx.save_temp_bitcode(&module, "lto.after-restriction");
 277     }
 278
 279     if cgcx.no_landing_pads {
 280         unsafe {
 281             llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 282         }
 283         cgcx.save_temp_bitcode(&module, "lto.after-nounwind");
 284     }
 285     timeline.record("passes");
 286
 287     Ok(vec![LtoModuleCodegen::Fat {
 288         module: Some(module),
 289         _serialized_bitcode: serialized_bitcode,
 290     }])
 291 }
 292
 293 struct Linker(llvm::LinkerRef);
 294
 295 impl Linker {
 296     fn new(llmod: ModuleRef) -> Linker {
 297         unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
 298     }
 299
 300     fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
 301         unsafe {
 302             if llvm::LLVMRustLinkerAdd(self.0,
 303                                        bytecode.as_ptr() as *const libc::c_char,
 304                                        bytecode.len()) {
 305                 Ok(())
 306             } else {
 307                 Err(())
 308             }
 309         }
 310     }
 311 }
 312
 313 impl Drop for Linker {
 314     fn drop(&mut self) {
 315         unsafe { llvm::LLVMRustLinkerFree(self.0); }
 316     }
 317 }
 318
 319 /// Prepare "thin" LTO to get run on these modules.
 320 ///
 321 /// The general structure of ThinLTO is quite different from the structure of
 322 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
 323 /// one giant LLVM module, and then we run more optimization passes over this
 324 /// big module after internalizing most symbols. Thin LTO, on the other hand,
 325 /// avoid this large bottleneck through more targeted optimization.
 326 ///
 327 /// At a high level Thin LTO looks like:
 328 ///
 329 ///     1. Prepare a "summary" of each LLVM module in question which describes
 330 ///        the values inside, cost of the values, etc.
 331 ///     2. Merge the summaries of all modules in question into one "index"
 332 ///     3. Perform some global analysis on this index
 333 ///     4. For each module, use the index and analysis calculated previously to
 334 ///        perform local transformations on the module, for example inlining
 335 ///        small functions from other modules.
 336 ///     5. Run thin-specific optimization passes over each module, and then code
 337 ///        generate everything at the end.
 338 ///
 339 /// The summary for each module is intended to be quite cheap, and the global
 340 /// index is relatively quite cheap to create as well. As a result, the goal of
 341 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
 342 /// situations. For example one cheap optimization is that we can parallelize
 343 /// all codegen modules, easily making use of all the cores on a machine.
 344 ///
 345 /// With all that in mind, the function here is designed at specifically just
 346 /// calculating the *index* for ThinLTO. This index will then be shared amongst
 347 /// all of the `LtoModuleCodegen` units returned below and destroyed once
 348 /// they all go out of scope.
 349 fn thin_lto(diag_handler: &Handler,
 350             modules: Vec<ModuleCodegen>,
 351             serialized_modules: Vec<(SerializedModule, CString)>,
 352             symbol_white_list: &[*const libc::c_char],
 353             timeline: &mut Timeline)
 354     -> Result<Vec<LtoModuleCodegen>, FatalError>
 355 {
 356     unsafe {
 357         info!("going for that thin, thin LTO");
 358
 359         let mut thin_buffers = Vec::new();
 360         let mut module_names = Vec::new();
 361         let mut thin_modules = Vec::new();
 362
 363         // FIXME: right now, like with fat LTO, we serialize all in-memory
 364         //        modules before working with them and ThinLTO. We really
 365         //        shouldn't do this, however, and instead figure out how to
 366         //        extract a summary from an in-memory module and then merge that
 367         //        into the global index. It turns out that this loop is by far
 368         //        the most expensive portion of this small bit of global
 369         //        analysis!
 370         for (i, module) in modules.iter().enumerate() {
 371             info!("local module: {} - {}", i, module.llmod_id);
 372             let llvm = module.llvm().expect("can't lto precodegened module");
 373             let name = CString::new(module.llmod_id.clone()).unwrap();
 374             let buffer = ThinBuffer::new(llvm.llmod);
 375             thin_modules.push(llvm::ThinLTOModule {
 376                 identifier: name.as_ptr(),
 377                 data: buffer.data().as_ptr(),
 378                 len: buffer.data().len(),
 379             });
 380             thin_buffers.push(buffer);
 381             module_names.push(name);
 382             timeline.record(&module.llmod_id);
 383         }
 384
 385         // FIXME: All upstream crates are deserialized internally in the
 386         //        function below to extract their summary and modules. Note that
 387         //        unlike the loop above we *must* decode and/or read something
 388         //        here as these are all just serialized files on disk. An
 389         //        improvement, however, to make here would be to store the
 390         //        module summary separately from the actual module itself. Right
 391         //        now this is store in one large bitcode file, and the entire
 392         //        file is deflate-compressed. We could try to bypass some of the
 393         //        decompression by storing the index uncompressed and only
 394         //        lazily decompressing the bytecode if necessary.
 395         //
 396         //        Note that truly taking advantage of this optimization will
 397         //        likely be further down the road. We'd have to implement
 398         //        incremental ThinLTO first where we could actually avoid
 399         //        looking at upstream modules entirely sometimes (the contents,
 400         //        we must always unconditionally look at the index).
 401         let mut serialized = Vec::new();
 402         for (module, name) in serialized_modules {
 403             info!("foreign module {:?}", name);
 404             thin_modules.push(llvm::ThinLTOModule {
 405                 identifier: name.as_ptr(),
 406                 data: module.data().as_ptr(),
 407                 len: module.data().len(),
 408             });
 409             serialized.push(module);
 410             module_names.push(name);
 411         }
 412
 413         // Delegate to the C++ bindings to create some data here. Once this is a
 414         // tried-and-true interface we may wish to try to upstream some of this
 415         // to LLVM itself, right now we reimplement a lot of what they do
 416         // upstream...
 417         let data = llvm::LLVMRustCreateThinLTOData(
 418             thin_modules.as_ptr(),
 419             thin_modules.len() as u32,
 420             symbol_white_list.as_ptr(),
 421             symbol_white_list.len() as u32,
 422         );
 423         if data.is_null() {
 424             let msg = format!("failed to prepare thin LTO context");
 425             return Err(write::llvm_err(&diag_handler, msg))
 426         }
 427         let data = ThinData(data);
 428         info!("thin LTO data created");
 429         timeline.record("data");
 430
 431         // Throw our data in an `Arc` as we'll be sharing it across threads. We
 432         // also put all memory referenced by the C++ data (buffers, ids, etc)
 433         // into the arc as well. After this we'll create a thin module
 434         // codegen per module in this data.
 435         let shared = Arc::new(ThinShared {
 436             data,
 437             thin_buffers,
 438             serialized_modules: serialized,
 439             module_names,
 440         });
 441         Ok((0..shared.module_names.len()).map(|i| {
 442             LtoModuleCodegen::Thin(ThinModule {
 443                 shared: shared.clone(),
 444                 idx: i,
 445             })
 446         }).collect())
 447     }
 448 }
 449
 450 fn run_pass_manager(cgcx: &CodegenContext,
 451                     tm: TargetMachineRef,
 452                     llmod: ModuleRef,
 453                     config: &ModuleConfig,
 454                     thin: bool) {
 455     // Now we have one massive module inside of llmod. Time to run the
 456     // LTO-specific optimization passes that LLVM provides.
 457     //
 458     // This code is based off the code found in llvm's LTO code generator:
 459     //      tools/lto/LTOCodeGenerator.cpp
 460     debug!("running the pass manager");
 461     unsafe {
 462         let pm = llvm::LLVMCreatePassManager();
 463         llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
 464         let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
 465         assert!(!pass.is_null());
 466         llvm::LLVMRustAddPass(pm, pass);
 467
 468         // When optimizing for LTO we don't actually pass in `-O0`, but we force
 469         // it to always happen at least with `-O1`.
 470         //
 471         // With ThinLTO we mess around a lot with symbol visibility in a way
 472         // that will actually cause linking failures if we optimize at O0 which
 473         // notable is lacking in dead code elimination. To ensure we at least
 474         // get some optimizations and correctly link we forcibly switch to `-O1`
 475         // to get dead code elimination.
 476         //
 477         // Note that in general this shouldn't matter too much as you typically
 478         // only turn on ThinLTO when you're compiling with optimizations
 479         // otherwise.
 480         let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None);
 481         let opt_level = match opt_level {
 482             llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
 483             level => level,
 484         };
 485         with_llvm_pmb(llmod, config, opt_level, false, &mut |b| {
 486             if thin {
 487                 if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) {
 488                     panic!("this version of LLVM does not support ThinLTO");
 489                 }
 490             } else {
 491                 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
 492                     /* Internalize = */ False,
 493                     /* RunInliner = */ True);
 494             }
 495         });
 496
 497         let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
 498         assert!(!pass.is_null());
 499         llvm::LLVMRustAddPass(pm, pass);
 500
 501         time_ext(cgcx.time_passes, None, "LTO passes", ||
 502              llvm::LLVMRunPassManager(pm, llmod));
 503
 504         llvm::LLVMDisposePassManager(pm);
 505     }
 506     debug!("lto done");
 507 }
 508
 509 pub enum SerializedModule {
 510     Local(ModuleBuffer),
 511     FromRlib(Vec<u8>),
 512 }
 513
 514 impl SerializedModule {
 515     fn data(&self) -> &[u8] {
 516         match *self {
 517             SerializedModule::Local(ref m) => m.data(),
 518             SerializedModule::FromRlib(ref m) => m,
 519         }
 520     }
 521 }
 522
 523 pub struct ModuleBuffer(*mut llvm::ModuleBuffer);
 524
 525 unsafe impl Send for ModuleBuffer {}
 526 unsafe impl Sync for ModuleBuffer {}
 527
 528 impl ModuleBuffer {
 529     pub fn new(m: ModuleRef) -> ModuleBuffer {
 530         ModuleBuffer(unsafe {
 531             llvm::LLVMRustModuleBufferCreate(m)
 532         })
 533     }
 534
 535     pub fn data(&self) -> &[u8] {
 536         unsafe {
 537             let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
 538             let len = llvm::LLVMRustModuleBufferLen(self.0);
 539             slice::from_raw_parts(ptr, len)
 540         }
 541     }
 542 }
 543
 544 impl Drop for ModuleBuffer {
 545     fn drop(&mut self) {
 546         unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
 547     }
 548 }
 549
 550 pub struct ThinModule {
 551     shared: Arc<ThinShared>,
 552     idx: usize,
 553 }
 554
 555 struct ThinShared {
 556     data: ThinData,
 557     thin_buffers: Vec<ThinBuffer>,
 558     serialized_modules: Vec<SerializedModule>,
 559     module_names: Vec<CString>,
 560 }
 561
 562 struct ThinData(*mut llvm::ThinLTOData);
 563
 564 unsafe impl Send for ThinData {}
 565 unsafe impl Sync for ThinData {}
 566
 567 impl Drop for ThinData {
 568     fn drop(&mut self) {
 569         unsafe {
 570             llvm::LLVMRustFreeThinLTOData(self.0);
 571         }
 572     }
 573 }
 574
 575 pub struct ThinBuffer(*mut llvm::ThinLTOBuffer);
 576
 577 unsafe impl Send for ThinBuffer {}
 578 unsafe impl Sync for ThinBuffer {}
 579
 580 impl ThinBuffer {
 581     pub fn new(m: ModuleRef) -> ThinBuffer {
 582         unsafe {
 583             let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
 584             ThinBuffer(buffer)
 585         }
 586     }
 587
 588     pub fn data(&self) -> &[u8] {
 589         unsafe {
 590             let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
 591             let len = llvm::LLVMRustThinLTOBufferLen(self.0);
 592             slice::from_raw_parts(ptr, len)
 593         }
 594     }
 595 }
 596
 597 impl Drop for ThinBuffer {
 598     fn drop(&mut self) {
 599         unsafe {
 600             llvm::LLVMRustThinLTOBufferFree(self.0);
 601         }
 602     }
 603 }
 604
 605 impl ThinModule {
 606     fn name(&self) -> &str {
 607         self.shared.module_names[self.idx].to_str().unwrap()
 608     }
 609
 610     fn cost(&self) -> u64 {
 611         // Yes, that's correct, we're using the size of the bytecode as an
 612         // indicator for how costly this codegen unit is.
 613         self.data().len() as u64
 614     }
 615
 616     fn data(&self) -> &[u8] {
 617         let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data());
 618         a.unwrap_or_else(|| {
 619             let len = self.shared.thin_buffers.len();
 620             self.shared.serialized_modules[self.idx - len].data()
 621         })
 622     }
 623
 624     unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline)
 625         -> Result<ModuleCodegen, FatalError>
 626     {
 627         let diag_handler = cgcx.create_diag_handler();
 628         let tm = (cgcx.tm_factory)().map_err(|e| {
 629             write::llvm_err(&diag_handler, e)
 630         })?;
 631
 632         // Right now the implementation we've got only works over serialized
 633         // modules, so we create a fresh new LLVM context and parse the module
 634         // into that context. One day, however, we may do this for upstream
 635         // crates but for locally codegened modules we may be able to reuse
 636         // that LLVM Context and Module.
 637         let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
 638         let llmod = llvm::LLVMRustParseBitcodeForThinLTO(
 639             llcx,
 640             self.data().as_ptr(),
 641             self.data().len(),
 642             self.shared.module_names[self.idx].as_ptr(),
 643         );
 644         if llmod.is_null() {
 645             let msg = format!("failed to parse bitcode for thin LTO module");
 646             return Err(write::llvm_err(&diag_handler, msg));
 647         }
 648         let module = ModuleCodegen {
 649             source: ModuleSource::Codegened(ModuleLlvm {
 650                 llmod,
 651                 llcx,
 652                 tm,
 653             }),
 654             llmod_id: self.name().to_string(),
 655             name: self.name().to_string(),
 656             kind: ModuleKind::Regular,
 657         };
 658         cgcx.save_temp_bitcode(&module, "thin-lto-input");
 659
 660         // Before we do much else find the "main" `DICompileUnit` that we'll be
 661         // using below. If we find more than one though then rustc has changed
 662         // in a way we're not ready for, so generate an ICE by returning
 663         // an error.
 664         let mut cu1 = ptr::null_mut();
 665         let mut cu2 = ptr::null_mut();
 666         llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
 667         if !cu2.is_null() {
 668             let msg = format!("multiple source DICompileUnits found");
 669             return Err(write::llvm_err(&diag_handler, msg))
 670         }
 671
 672         // Like with "fat" LTO, get some better optimizations if landing pads
 673         // are disabled by removing all landing pads.
 674         if cgcx.no_landing_pads {
 675             llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 676             cgcx.save_temp_bitcode(&module, "thin-lto-after-nounwind");
 677             timeline.record("nounwind");
 678         }
 679
 680         // Up next comes the per-module local analyses that we do for Thin LTO.
 681         // Each of these functions is basically copied from the LLVM
 682         // implementation and then tailored to suit this implementation. Ideally
 683         // each of these would be supported by upstream LLVM but that's perhaps
 684         // a patch for another day!
 685         //
 686         // You can find some more comments about these functions in the LLVM
 687         // bindings we've got (currently `PassWrapper.cpp`)
 688         if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) {
 689             let msg = format!("failed to prepare thin LTO module");
 690             return Err(write::llvm_err(&diag_handler, msg))
 691         }
 692         cgcx.save_temp_bitcode(&module, "thin-lto-after-rename");
 693         timeline.record("rename");
 694         if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) {
 695             let msg = format!("failed to prepare thin LTO module");
 696             return Err(write::llvm_err(&diag_handler, msg))
 697         }
 698         cgcx.save_temp_bitcode(&module, "thin-lto-after-resolve");
 699         timeline.record("resolve");
 700         if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) {
 701             let msg = format!("failed to prepare thin LTO module");
 702             return Err(write::llvm_err(&diag_handler, msg))
 703         }
 704         cgcx.save_temp_bitcode(&module, "thin-lto-after-internalize");
 705         timeline.record("internalize");
 706         if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) {
 707             let msg = format!("failed to prepare thin LTO module");
 708             return Err(write::llvm_err(&diag_handler, msg))
 709         }
 710         cgcx.save_temp_bitcode(&module, "thin-lto-after-import");
 711         timeline.record("import");
 712
 713         // Ok now this is a bit unfortunate. This is also something you won't
 714         // find upstream in LLVM's ThinLTO passes! This is a hack for now to
 715         // work around bugs in LLVM.
 716         //
 717         // First discovered in #45511 it was found that as part of ThinLTO
 718         // importing passes LLVM will import `DICompileUnit` metadata
 719         // information across modules. This means that we'll be working with one
 720         // LLVM module that has multiple `DICompileUnit` instances in it (a
 721         // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
 722         // bugs in LLVM's backend which generates invalid DWARF in a situation
 723         // like this:
 724         //
 725         //  https://bugs.llvm.org/show_bug.cgi?id=35212
 726         //  https://bugs.llvm.org/show_bug.cgi?id=35562
 727         //
 728         // While the first bug there is fixed the second ended up causing #46346
 729         // which was basically a resurgence of #45511 after LLVM's bug 35212 was
 730         // fixed.
 731         //
 732         // This function below is a huge hack around this problem. The function
 733         // below is defined in `PassWrapper.cpp` and will basically "merge"
 734         // all `DICompileUnit` instances in a module. Basically it'll take all
 735         // the objects, rewrite all pointers of `DISubprogram` to point to the
 736         // first `DICompileUnit`, and then delete all the other units.
 737         //
 738         // This is probably mangling to the debug info slightly (but hopefully
 739         // not too much) but for now at least gets LLVM to emit valid DWARF (or
 740         // so it appears). Hopefully we can remove this once upstream bugs are
 741         // fixed in LLVM.
 742         llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
 743         cgcx.save_temp_bitcode(&module, "thin-lto-after-patch");
 744         timeline.record("patch");
 745
 746         // Alright now that we've done everything related to the ThinLTO
 747         // analysis it's time to run some optimizations! Here we use the same
 748         // `run_pass_manager` as the "fat" LTO above except that we tell it to
 749         // populate a thin-specific pass manager, which presumably LLVM treats a
 750         // little differently.
 751         info!("running thin lto passes over {}", module.name);
 752         let config = cgcx.config(module.kind);
 753         run_pass_manager(cgcx, tm, llmod, config, true);
 754         cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
 755         timeline.record("thin-done");
 756
 757         // FIXME: this is a hack around a bug in LLVM right now. Discovered in
 758         // #46910 it was found out that on 32-bit MSVC LLVM will hit a codegen
 759         // error if there's an available_externally function in the LLVM module.
 760         // Typically we don't actually use these functions but ThinLTO makes
 761         // heavy use of them when inlining across modules.
 762         //
 763         // Tracked upstream at https://bugs.llvm.org/show_bug.cgi?id=35736 this
 764         // function call (and its definition on the C++ side of things)
 765         // shouldn't be necessary eventually and we can safetly delete these few
 766         // lines.
 767         llvm::LLVMRustThinLTORemoveAvailableExternally(llmod);
 768         cgcx.save_temp_bitcode(&module, "thin-lto-after-rm-ae");
 769         timeline.record("no-ae");
 770
 771         Ok(module)
 772     }
 773 }