]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2013 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
ea8adc8c | 11 | use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION}; |
041b39d2 | 12 | use back::symbol_export; |
ea8adc8c XL |
13 | use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext}; |
14 | use back::write; | |
3b2f2976 | 15 | use errors::{FatalError, Handler}; |
1a4d82fc JJ |
16 | use llvm::archive_ro::ArchiveRO; |
17 | use llvm::{ModuleRef, TargetMachineRef, True, False}; | |
ea8adc8c | 18 | use llvm; |
476ff2be | 19 | use rustc::hir::def_id::LOCAL_CRATE; |
ea8adc8c | 20 | use rustc::middle::exported_symbols::SymbolExportLevel; |
2c00a5a8 | 21 | use rustc::session::config::{self, Lto}; |
0531ce1d | 22 | use rustc::util::common::time_ext; |
ea8adc8c | 23 | use time_graph::Timeline; |
94b46f34 | 24 | use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource}; |
1a4d82fc JJ |
25 | |
26 | use libc; | |
1a4d82fc JJ |
27 | |
28 | use std::ffi::CString; | |
ff7c6d11 | 29 | use std::ptr; |
ea8adc8c XL |
30 | use std::slice; |
31 | use std::sync::Arc; | |
1a4d82fc | 32 | |
476ff2be SL |
33 | pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { |
34 | match crate_type { | |
35 | config::CrateTypeExecutable | | |
36 | config::CrateTypeStaticlib | | |
37 | config::CrateTypeCdylib => true, | |
38 | ||
39 | config::CrateTypeDylib | | |
40 | config::CrateTypeRlib | | |
476ff2be SL |
41 | config::CrateTypeProcMacro => false, |
42 | } | |
43 | } | |
44 | ||
94b46f34 | 45 | pub(crate) enum LtoModuleCodegen { |
ea8adc8c | 46 | Fat { |
94b46f34 | 47 | module: Option<ModuleCodegen>, |
ea8adc8c XL |
48 | _serialized_bitcode: Vec<SerializedModule>, |
49 | }, | |
50 | ||
51 | Thin(ThinModule), | |
52 | } | |
53 | ||
94b46f34 | 54 | impl LtoModuleCodegen { |
ea8adc8c XL |
55 | pub fn name(&self) -> &str { |
56 | match *self { | |
94b46f34 XL |
57 | LtoModuleCodegen::Fat { .. } => "everything", |
58 | LtoModuleCodegen::Thin(ref m) => m.name(), | |
ea8adc8c | 59 | } |
1a4d82fc JJ |
60 | } |
61 | ||
ea8adc8c XL |
62 | /// Optimize this module within the given codegen context. |
63 | /// | |
94b46f34 XL |
64 | /// This function is unsafe as it'll return a `ModuleCodegen` still |
65 | /// points to LLVM data structures owned by this `LtoModuleCodegen`. | |
ea8adc8c XL |
66 | /// It's intended that the module returned is immediately code generated and |
67 | /// dropped, and then this LTO module is dropped. | |
2c00a5a8 XL |
68 | pub(crate) unsafe fn optimize(&mut self, |
69 | cgcx: &CodegenContext, | |
70 | timeline: &mut Timeline) | |
94b46f34 | 71 | -> Result<ModuleCodegen, FatalError> |
ea8adc8c XL |
72 | { |
73 | match *self { | |
94b46f34 XL |
74 | LtoModuleCodegen::Fat { ref mut module, .. } => { |
75 | let module = module.take().unwrap(); | |
76 | let config = cgcx.config(module.kind); | |
77 | let llmod = module.llvm().unwrap().llmod; | |
78 | let tm = module.llvm().unwrap().tm; | |
ea8adc8c XL |
79 | run_pass_manager(cgcx, tm, llmod, config, false); |
80 | timeline.record("fat-done"); | |
94b46f34 | 81 | Ok(module) |
ea8adc8c | 82 | } |
94b46f34 | 83 | LtoModuleCodegen::Thin(ref mut thin) => thin.optimize(cgcx, timeline), |
1a4d82fc JJ |
84 | } |
85 | } | |
86 | ||
0531ce1d | 87 | /// A "gauge" of how costly it is to optimize this module, used to sort |
ea8adc8c XL |
88 | /// biggest modules first. |
89 | pub fn cost(&self) -> u64 { | |
90 | match *self { | |
91 | // Only one module with fat LTO, so the cost doesn't matter. | |
94b46f34 XL |
92 | LtoModuleCodegen::Fat { .. } => 0, |
93 | LtoModuleCodegen::Thin(ref m) => m.cost(), | |
ea8adc8c XL |
94 | } |
95 | } | |
96 | } | |
97 | ||
2c00a5a8 | 98 | pub(crate) fn run(cgcx: &CodegenContext, |
94b46f34 | 99 | modules: Vec<ModuleCodegen>, |
2c00a5a8 | 100 | timeline: &mut Timeline) |
94b46f34 | 101 | -> Result<Vec<LtoModuleCodegen>, FatalError> |
ea8adc8c XL |
102 | { |
103 | let diag_handler = cgcx.create_diag_handler(); | |
2c00a5a8 XL |
104 | let export_threshold = match cgcx.lto { |
105 | // We're just doing LTO for our one crate | |
106 | Lto::ThinLocal => SymbolExportLevel::Rust, | |
107 | ||
108 | // We're doing LTO for the entire crate graph | |
109 | Lto::Yes | Lto::Fat | Lto::Thin => { | |
ea8adc8c XL |
110 | symbol_export::crates_export_threshold(&cgcx.crate_types) |
111 | } | |
2c00a5a8 XL |
112 | |
113 | Lto::No => panic!("didn't request LTO but we're doing LTO"), | |
ea8adc8c XL |
114 | }; |
115 | ||
0531ce1d | 116 | let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| { |
ea8adc8c | 117 | if level.is_below_threshold(export_threshold) { |
476ff2be SL |
118 | let mut bytes = Vec::with_capacity(name.len() + 1); |
119 | bytes.extend(name.bytes()); | |
120 | Some(CString::new(bytes).unwrap()) | |
121 | } else { | |
122 | None | |
123 | } | |
124 | }; | |
0531ce1d XL |
125 | let exported_symbols = cgcx.exported_symbols |
126 | .as_ref().expect("needs exported symbols for LTO"); | |
127 | let mut symbol_white_list = exported_symbols[&LOCAL_CRATE] | |
476ff2be SL |
128 | .iter() |
129 | .filter_map(symbol_filter) | |
ea8adc8c XL |
130 | .collect::<Vec<CString>>(); |
131 | timeline.record("whitelist"); | |
abe05a73 | 132 | info!("{} symbols to preserve in this crate", symbol_white_list.len()); |
ea8adc8c XL |
133 | |
134 | // If we're performing LTO for the entire crate graph, then for each of our | |
135 | // upstream dependencies, find the corresponding rlib and load the bitcode | |
136 | // from the archive. | |
137 | // | |
138 | // We save off all the bytecode and LLVM module ids for later processing | |
139 | // with either fat or thin LTO | |
140 | let mut upstream_modules = Vec::new(); | |
2c00a5a8 | 141 | if cgcx.lto != Lto::ThinLocal { |
ea8adc8c XL |
142 | if cgcx.opts.cg.prefer_dynamic { |
143 | diag_handler.struct_err("cannot prefer dynamic linking when performing LTO") | |
144 | .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ | |
145 | supported with LTO") | |
146 | .emit(); | |
147 | return Err(FatalError) | |
148 | } | |
149 | ||
150 | // Make sure we actually can run LTO | |
151 | for crate_type in cgcx.crate_types.iter() { | |
152 | if !crate_type_allows_lto(*crate_type) { | |
153 | let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \ | |
154 | static library outputs"); | |
155 | return Err(e) | |
156 | } | |
157 | } | |
158 | ||
159 | for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { | |
0531ce1d XL |
160 | let exported_symbols = cgcx.exported_symbols |
161 | .as_ref().expect("needs exported symbols for LTO"); | |
ea8adc8c | 162 | symbol_white_list.extend( |
0531ce1d | 163 | exported_symbols[&cnum] |
ea8adc8c XL |
164 | .iter() |
165 | .filter_map(symbol_filter)); | |
166 | ||
167 | let archive = ArchiveRO::open(&path).expect("wanted an rlib"); | |
168 | let bytecodes = archive.iter().filter_map(|child| { | |
169 | child.ok().and_then(|c| c.name().map(|name| (name, c))) | |
170 | }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION)); | |
171 | for (name, data) in bytecodes { | |
172 | info!("adding bytecode {}", name); | |
173 | let bc_encoded = data.data(); | |
174 | ||
0531ce1d | 175 | let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || { |
ea8adc8c XL |
176 | match DecodedBytecode::new(bc_encoded) { |
177 | Ok(b) => Ok((b.bytecode(), b.identifier().to_string())), | |
178 | Err(e) => Err(diag_handler.fatal(&e)), | |
1a4d82fc | 179 | } |
ea8adc8c XL |
180 | })?; |
181 | let bc = SerializedModule::FromRlib(bc); | |
182 | upstream_modules.push((bc, CString::new(id).unwrap())); | |
183 | } | |
184 | timeline.record(&format!("load: {}", path.display())); | |
185 | } | |
186 | } | |
1a4d82fc | 187 | |
ea8adc8c | 188 | let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); |
2c00a5a8 XL |
189 | match cgcx.lto { |
190 | Lto::Yes | // `-C lto` == fat LTO by default | |
191 | Lto::Fat => { | |
ea8adc8c XL |
192 | fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline) |
193 | } | |
2c00a5a8 XL |
194 | Lto::Thin | |
195 | Lto::ThinLocal => { | |
ea8adc8c | 196 | thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline) |
1a4d82fc | 197 | } |
2c00a5a8 | 198 | Lto::No => unreachable!(), |
041b39d2 | 199 | } |
ea8adc8c XL |
200 | } |
201 | ||
202 | fn fat_lto(cgcx: &CodegenContext, | |
203 | diag_handler: &Handler, | |
94b46f34 | 204 | mut modules: Vec<ModuleCodegen>, |
ea8adc8c XL |
205 | mut serialized_modules: Vec<(SerializedModule, CString)>, |
206 | symbol_white_list: &[*const libc::c_char], | |
207 | timeline: &mut Timeline) | |
94b46f34 | 208 | -> Result<Vec<LtoModuleCodegen>, FatalError> |
ea8adc8c XL |
209 | { |
210 | info!("going for a fat lto"); | |
211 | ||
212 | // Find the "costliest" module and merge everything into that codegen unit. | |
213 | // All the other modules will be serialized and reparsed into the new | |
214 | // context, so this hopefully avoids serializing and parsing the largest | |
215 | // codegen unit. | |
216 | // | |
217 | // Additionally use a regular module as the base here to ensure that various | |
218 | // file copy operations in the backend work correctly. The only other kind | |
219 | // of module here should be an allocator one, and if your crate is smaller | |
220 | // than the allocator module then the size doesn't really matter anyway. | |
221 | let (_, costliest_module) = modules.iter() | |
222 | .enumerate() | |
223 | .filter(|&(_, module)| module.kind == ModuleKind::Regular) | |
224 | .map(|(i, module)| { | |
225 | let cost = unsafe { | |
226 | llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod) | |
227 | }; | |
228 | (cost, i) | |
229 | }) | |
230 | .max() | |
94b46f34 | 231 | .expect("must be codegen'ing at least one module"); |
ea8adc8c | 232 | let module = modules.remove(costliest_module); |
94b46f34 | 233 | let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod; |
ea8adc8c XL |
234 | info!("using {:?} as a base module", module.llmod_id); |
235 | ||
94b46f34 XL |
236 | // For all other modules we codegened we'll need to link them into our own |
237 | // bitcode. All modules were codegened in their own LLVM context, however, | |
ea8adc8c XL |
238 | // and we want to move everything to the same LLVM context. Currently the |
239 | // way we know of to do that is to serialize them to a string and them parse | |
240 | // them later. Not great but hey, that's why it's "fat" LTO, right? | |
241 | for module in modules { | |
94b46f34 | 242 | let llvm = module.llvm().expect("can't lto pre-codegened modules"); |
ea8adc8c XL |
243 | let buffer = ModuleBuffer::new(llvm.llmod); |
244 | let llmod_id = CString::new(&module.llmod_id[..]).unwrap(); | |
245 | serialized_modules.push((SerializedModule::Local(buffer), llmod_id)); | |
246 | } | |
247 | ||
248 | // For all serialized bitcode files we parse them and link them in as we did | |
249 | // above, this is all mostly handled in C++. Like above, though, we don't | |
250 | // know much about the memory management here so we err on the side of being | |
251 | // save and persist everything with the original module. | |
252 | let mut serialized_bitcode = Vec::new(); | |
0531ce1d | 253 | let mut linker = Linker::new(llmod); |
ea8adc8c XL |
254 | for (bc_decoded, name) in serialized_modules { |
255 | info!("linking {:?}", name); | |
0531ce1d | 256 | time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || { |
ea8adc8c | 257 | let data = bc_decoded.data(); |
0531ce1d | 258 | linker.add(&data).map_err(|()| { |
ea8adc8c | 259 | let msg = format!("failed to load bc of {:?}", name); |
0531ce1d XL |
260 | write::llvm_err(&diag_handler, msg) |
261 | }) | |
ea8adc8c XL |
262 | })?; |
263 | timeline.record(&format!("link {:?}", name)); | |
264 | serialized_bitcode.push(bc_decoded); | |
265 | } | |
0531ce1d | 266 | drop(linker); |
ea8adc8c | 267 | cgcx.save_temp_bitcode(&module, "lto.input"); |
1a4d82fc | 268 | |
ea8adc8c XL |
269 | // Internalize everything that *isn't* in our whitelist to help strip out |
270 | // more modules and such | |
1a4d82fc | 271 | unsafe { |
ea8adc8c | 272 | let ptr = symbol_white_list.as_ptr(); |
1a4d82fc JJ |
273 | llvm::LLVMRustRunRestrictionPass(llmod, |
274 | ptr as *const *const libc::c_char, | |
ea8adc8c XL |
275 | symbol_white_list.len() as libc::size_t); |
276 | cgcx.save_temp_bitcode(&module, "lto.after-restriction"); | |
1a4d82fc JJ |
277 | } |
278 | ||
041b39d2 | 279 | if cgcx.no_landing_pads { |
1a4d82fc JJ |
280 | unsafe { |
281 | llvm::LLVMRustMarkAllFunctionsNounwind(llmod); | |
282 | } | |
ea8adc8c | 283 | cgcx.save_temp_bitcode(&module, "lto.after-nounwind"); |
1a4d82fc | 284 | } |
ea8adc8c | 285 | timeline.record("passes"); |
1a4d82fc | 286 | |
94b46f34 | 287 | Ok(vec![LtoModuleCodegen::Fat { |
ea8adc8c XL |
288 | module: Some(module), |
289 | _serialized_bitcode: serialized_bitcode, | |
290 | }]) | |
291 | } | |
292 | ||
0531ce1d XL |
293 | struct Linker(llvm::LinkerRef); |
294 | ||
295 | impl Linker { | |
296 | fn new(llmod: ModuleRef) -> Linker { | |
297 | unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) } | |
298 | } | |
299 | ||
300 | fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> { | |
301 | unsafe { | |
302 | if llvm::LLVMRustLinkerAdd(self.0, | |
303 | bytecode.as_ptr() as *const libc::c_char, | |
304 | bytecode.len()) { | |
305 | Ok(()) | |
306 | } else { | |
307 | Err(()) | |
308 | } | |
309 | } | |
310 | } | |
311 | } | |
312 | ||
313 | impl Drop for Linker { | |
314 | fn drop(&mut self) { | |
315 | unsafe { llvm::LLVMRustLinkerFree(self.0); } | |
316 | } | |
317 | } | |
318 | ||
ea8adc8c XL |
319 | /// Prepare "thin" LTO to get run on these modules. |
320 | /// | |
321 | /// The general structure of ThinLTO is quite different from the structure of | |
322 | /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into | |
323 | /// one giant LLVM module, and then we run more optimization passes over this | |
324 | /// big module after internalizing most symbols. Thin LTO, on the other hand, | |
325 | /// avoid this large bottleneck through more targeted optimization. | |
326 | /// | |
327 | /// At a high level Thin LTO looks like: | |
328 | /// | |
329 | /// 1. Prepare a "summary" of each LLVM module in question which describes | |
330 | /// the values inside, cost of the values, etc. | |
331 | /// 2. Merge the summaries of all modules in question into one "index" | |
332 | /// 3. Perform some global analysis on this index | |
333 | /// 4. For each module, use the index and analysis calculated previously to | |
334 | /// perform local transformations on the module, for example inlining | |
335 | /// small functions from other modules. | |
336 | /// 5. Run thin-specific optimization passes over each module, and then code | |
337 | /// generate everything at the end. | |
338 | /// | |
339 | /// The summary for each module is intended to be quite cheap, and the global | |
340 | /// index is relatively quite cheap to create as well. As a result, the goal of | |
341 | /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more | |
342 | /// situations. For example one cheap optimization is that we can parallelize | |
343 | /// all codegen modules, easily making use of all the cores on a machine. | |
344 | /// | |
345 | /// With all that in mind, the function here is designed at specifically just | |
346 | /// calculating the *index* for ThinLTO. This index will then be shared amongst | |
94b46f34 | 347 | /// all of the `LtoModuleCodegen` units returned below and destroyed once |
ea8adc8c XL |
348 | /// they all go out of scope. |
349 | fn thin_lto(diag_handler: &Handler, | |
94b46f34 | 350 | modules: Vec<ModuleCodegen>, |
ea8adc8c XL |
351 | serialized_modules: Vec<(SerializedModule, CString)>, |
352 | symbol_white_list: &[*const libc::c_char], | |
353 | timeline: &mut Timeline) | |
94b46f34 | 354 | -> Result<Vec<LtoModuleCodegen>, FatalError> |
ea8adc8c XL |
355 | { |
356 | unsafe { | |
357 | info!("going for that thin, thin LTO"); | |
358 | ||
359 | let mut thin_buffers = Vec::new(); | |
360 | let mut module_names = Vec::new(); | |
361 | let mut thin_modules = Vec::new(); | |
362 | ||
363 | // FIXME: right now, like with fat LTO, we serialize all in-memory | |
364 | // modules before working with them and ThinLTO. We really | |
365 | // shouldn't do this, however, and instead figure out how to | |
366 | // extract a summary from an in-memory module and then merge that | |
367 | // into the global index. It turns out that this loop is by far | |
368 | // the most expensive portion of this small bit of global | |
369 | // analysis! | |
370 | for (i, module) in modules.iter().enumerate() { | |
371 | info!("local module: {} - {}", i, module.llmod_id); | |
94b46f34 | 372 | let llvm = module.llvm().expect("can't lto precodegened module"); |
ea8adc8c | 373 | let name = CString::new(module.llmod_id.clone()).unwrap(); |
abe05a73 | 374 | let buffer = ThinBuffer::new(llvm.llmod); |
ea8adc8c XL |
375 | thin_modules.push(llvm::ThinLTOModule { |
376 | identifier: name.as_ptr(), | |
377 | data: buffer.data().as_ptr(), | |
378 | len: buffer.data().len(), | |
379 | }); | |
380 | thin_buffers.push(buffer); | |
381 | module_names.push(name); | |
382 | timeline.record(&module.llmod_id); | |
b039eaaf | 383 | } |
ea8adc8c XL |
384 | |
385 | // FIXME: All upstream crates are deserialized internally in the | |
386 | // function below to extract their summary and modules. Note that | |
387 | // unlike the loop above we *must* decode and/or read something | |
388 | // here as these are all just serialized files on disk. An | |
389 | // improvement, however, to make here would be to store the | |
390 | // module summary separately from the actual module itself. Right | |
391 | // now this is store in one large bitcode file, and the entire | |
392 | // file is deflate-compressed. We could try to bypass some of the | |
393 | // decompression by storing the index uncompressed and only | |
394 | // lazily decompressing the bytecode if necessary. | |
395 | // | |
396 | // Note that truly taking advantage of this optimization will | |
397 | // likely be further down the road. We'd have to implement | |
398 | // incremental ThinLTO first where we could actually avoid | |
399 | // looking at upstream modules entirely sometimes (the contents, | |
400 | // we must always unconditionally look at the index). | |
401 | let mut serialized = Vec::new(); | |
402 | for (module, name) in serialized_modules { | |
403 | info!("foreign module {:?}", name); | |
404 | thin_modules.push(llvm::ThinLTOModule { | |
405 | identifier: name.as_ptr(), | |
406 | data: module.data().as_ptr(), | |
407 | len: module.data().len(), | |
408 | }); | |
409 | serialized.push(module); | |
410 | module_names.push(name); | |
411 | } | |
412 | ||
413 | // Delegate to the C++ bindings to create some data here. Once this is a | |
414 | // tried-and-true interface we may wish to try to upstream some of this | |
415 | // to LLVM itself, right now we reimplement a lot of what they do | |
416 | // upstream... | |
417 | let data = llvm::LLVMRustCreateThinLTOData( | |
418 | thin_modules.as_ptr(), | |
419 | thin_modules.len() as u32, | |
420 | symbol_white_list.as_ptr(), | |
421 | symbol_white_list.len() as u32, | |
422 | ); | |
423 | if data.is_null() { | |
8faf50e0 | 424 | let msg = "failed to prepare thin LTO context".to_string(); |
ea8adc8c XL |
425 | return Err(write::llvm_err(&diag_handler, msg)) |
426 | } | |
427 | let data = ThinData(data); | |
428 | info!("thin LTO data created"); | |
429 | timeline.record("data"); | |
430 | ||
431 | // Throw our data in an `Arc` as we'll be sharing it across threads. We | |
432 | // also put all memory referenced by the C++ data (buffers, ids, etc) | |
433 | // into the arc as well. After this we'll create a thin module | |
94b46f34 | 434 | // codegen per module in this data. |
ea8adc8c XL |
435 | let shared = Arc::new(ThinShared { |
436 | data, | |
437 | thin_buffers, | |
438 | serialized_modules: serialized, | |
439 | module_names, | |
440 | }); | |
441 | Ok((0..shared.module_names.len()).map(|i| { | |
94b46f34 | 442 | LtoModuleCodegen::Thin(ThinModule { |
ea8adc8c XL |
443 | shared: shared.clone(), |
444 | idx: i, | |
445 | }) | |
446 | }).collect()) | |
b039eaaf | 447 | } |
ea8adc8c | 448 | } |
b039eaaf | 449 | |
ea8adc8c XL |
450 | fn run_pass_manager(cgcx: &CodegenContext, |
451 | tm: TargetMachineRef, | |
452 | llmod: ModuleRef, | |
453 | config: &ModuleConfig, | |
454 | thin: bool) { | |
1a4d82fc JJ |
455 | // Now we have one massive module inside of llmod. Time to run the |
456 | // LTO-specific optimization passes that LLVM provides. | |
457 | // | |
458 | // This code is based off the code found in llvm's LTO code generator: | |
459 | // tools/lto/LTOCodeGenerator.cpp | |
460 | debug!("running the pass manager"); | |
461 | unsafe { | |
462 | let pm = llvm::LLVMCreatePassManager(); | |
463 | llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod); | |
8faf50e0 XL |
464 | |
465 | if config.verify_llvm_ir { | |
466 | let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _); | |
467 | assert!(!pass.is_null()); | |
468 | llvm::LLVMRustAddPass(pm, pass); | |
469 | } | |
1a4d82fc | 470 | |
abe05a73 XL |
471 | // When optimizing for LTO we don't actually pass in `-O0`, but we force |
472 | // it to always happen at least with `-O1`. | |
473 | // | |
474 | // With ThinLTO we mess around a lot with symbol visibility in a way | |
475 | // that will actually cause linking failures if we optimize at O0 which | |
476 | // notable is lacking in dead code elimination. To ensure we at least | |
477 | // get some optimizations and correctly link we forcibly switch to `-O1` | |
478 | // to get dead code elimination. | |
479 | // | |
480 | // Note that in general this shouldn't matter too much as you typically | |
481 | // only turn on ThinLTO when you're compiling with optimizations | |
482 | // otherwise. | |
483 | let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None); | |
484 | let opt_level = match opt_level { | |
485 | llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less, | |
486 | level => level, | |
487 | }; | |
94b46f34 | 488 | with_llvm_pmb(llmod, config, opt_level, false, &mut |b| { |
ea8adc8c XL |
489 | if thin { |
490 | if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) { | |
491 | panic!("this version of LLVM does not support ThinLTO"); | |
492 | } | |
493 | } else { | |
494 | llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm, | |
495 | /* Internalize = */ False, | |
496 | /* RunInliner = */ True); | |
497 | } | |
c1a9b12d | 498 | }); |
1a4d82fc | 499 | |
8faf50e0 XL |
500 | if config.verify_llvm_ir { |
501 | let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _); | |
502 | assert!(!pass.is_null()); | |
503 | llvm::LLVMRustAddPass(pm, pass); | |
504 | } | |
1a4d82fc | 505 | |
0531ce1d | 506 | time_ext(cgcx.time_passes, None, "LTO passes", || |
1a4d82fc JJ |
507 | llvm::LLVMRunPassManager(pm, llmod)); |
508 | ||
509 | llvm::LLVMDisposePassManager(pm); | |
510 | } | |
511 | debug!("lto done"); | |
512 | } | |
513 | ||
ea8adc8c XL |
514 | pub enum SerializedModule { |
515 | Local(ModuleBuffer), | |
516 | FromRlib(Vec<u8>), | |
517 | } | |
518 | ||
519 | impl SerializedModule { | |
520 | fn data(&self) -> &[u8] { | |
521 | match *self { | |
522 | SerializedModule::Local(ref m) => m.data(), | |
523 | SerializedModule::FromRlib(ref m) => m, | |
524 | } | |
525 | } | |
526 | } | |
527 | ||
528 | pub struct ModuleBuffer(*mut llvm::ModuleBuffer); | |
529 | ||
530 | unsafe impl Send for ModuleBuffer {} | |
531 | unsafe impl Sync for ModuleBuffer {} | |
532 | ||
533 | impl ModuleBuffer { | |
abe05a73 | 534 | pub fn new(m: ModuleRef) -> ModuleBuffer { |
ea8adc8c XL |
535 | ModuleBuffer(unsafe { |
536 | llvm::LLVMRustModuleBufferCreate(m) | |
537 | }) | |
538 | } | |
539 | ||
abe05a73 | 540 | pub fn data(&self) -> &[u8] { |
ea8adc8c XL |
541 | unsafe { |
542 | let ptr = llvm::LLVMRustModuleBufferPtr(self.0); | |
543 | let len = llvm::LLVMRustModuleBufferLen(self.0); | |
544 | slice::from_raw_parts(ptr, len) | |
545 | } | |
546 | } | |
547 | } | |
548 | ||
549 | impl Drop for ModuleBuffer { | |
550 | fn drop(&mut self) { | |
551 | unsafe { llvm::LLVMRustModuleBufferFree(self.0); } | |
552 | } | |
553 | } | |
554 | ||
555 | pub struct ThinModule { | |
556 | shared: Arc<ThinShared>, | |
557 | idx: usize, | |
558 | } | |
559 | ||
560 | struct ThinShared { | |
561 | data: ThinData, | |
562 | thin_buffers: Vec<ThinBuffer>, | |
563 | serialized_modules: Vec<SerializedModule>, | |
564 | module_names: Vec<CString>, | |
565 | } | |
566 | ||
567 | struct ThinData(*mut llvm::ThinLTOData); | |
568 | ||
569 | unsafe impl Send for ThinData {} | |
570 | unsafe impl Sync for ThinData {} | |
571 | ||
572 | impl Drop for ThinData { | |
573 | fn drop(&mut self) { | |
574 | unsafe { | |
575 | llvm::LLVMRustFreeThinLTOData(self.0); | |
576 | } | |
577 | } | |
578 | } | |
579 | ||
abe05a73 | 580 | pub struct ThinBuffer(*mut llvm::ThinLTOBuffer); |
ea8adc8c XL |
581 | |
582 | unsafe impl Send for ThinBuffer {} | |
583 | unsafe impl Sync for ThinBuffer {} | |
584 | ||
585 | impl ThinBuffer { | |
abe05a73 XL |
586 | pub fn new(m: ModuleRef) -> ThinBuffer { |
587 | unsafe { | |
588 | let buffer = llvm::LLVMRustThinLTOBufferCreate(m); | |
589 | ThinBuffer(buffer) | |
590 | } | |
591 | } | |
592 | ||
593 | pub fn data(&self) -> &[u8] { | |
ea8adc8c XL |
594 | unsafe { |
595 | let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _; | |
596 | let len = llvm::LLVMRustThinLTOBufferLen(self.0); | |
597 | slice::from_raw_parts(ptr, len) | |
598 | } | |
599 | } | |
1a4d82fc JJ |
600 | } |
601 | ||
ea8adc8c XL |
602 | impl Drop for ThinBuffer { |
603 | fn drop(&mut self) { | |
604 | unsafe { | |
605 | llvm::LLVMRustThinLTOBufferFree(self.0); | |
606 | } | |
607 | } | |
1a4d82fc JJ |
608 | } |
609 | ||
ea8adc8c XL |
610 | impl ThinModule { |
611 | fn name(&self) -> &str { | |
612 | self.shared.module_names[self.idx].to_str().unwrap() | |
613 | } | |
614 | ||
615 | fn cost(&self) -> u64 { | |
616 | // Yes, that's correct, we're using the size of the bytecode as an | |
617 | // indicator for how costly this codegen unit is. | |
618 | self.data().len() as u64 | |
619 | } | |
620 | ||
621 | fn data(&self) -> &[u8] { | |
622 | let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data()); | |
623 | a.unwrap_or_else(|| { | |
624 | let len = self.shared.thin_buffers.len(); | |
625 | self.shared.serialized_modules[self.idx - len].data() | |
626 | }) | |
627 | } | |
628 | ||
629 | unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline) | |
94b46f34 | 630 | -> Result<ModuleCodegen, FatalError> |
ea8adc8c XL |
631 | { |
632 | let diag_handler = cgcx.create_diag_handler(); | |
633 | let tm = (cgcx.tm_factory)().map_err(|e| { | |
634 | write::llvm_err(&diag_handler, e) | |
635 | })?; | |
636 | ||
637 | // Right now the implementation we've got only works over serialized | |
638 | // modules, so we create a fresh new LLVM context and parse the module | |
639 | // into that context. One day, however, we may do this for upstream | |
94b46f34 | 640 | // crates but for locally codegened modules we may be able to reuse |
ea8adc8c | 641 | // that LLVM Context and Module. |
ff7c6d11 | 642 | let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); |
ea8adc8c XL |
643 | let llmod = llvm::LLVMRustParseBitcodeForThinLTO( |
644 | llcx, | |
645 | self.data().as_ptr(), | |
646 | self.data().len(), | |
647 | self.shared.module_names[self.idx].as_ptr(), | |
648 | ); | |
ff7c6d11 | 649 | if llmod.is_null() { |
8faf50e0 | 650 | let msg = "failed to parse bitcode for thin LTO module".to_string(); |
ff7c6d11 XL |
651 | return Err(write::llvm_err(&diag_handler, msg)); |
652 | } | |
94b46f34 XL |
653 | let module = ModuleCodegen { |
654 | source: ModuleSource::Codegened(ModuleLlvm { | |
ea8adc8c XL |
655 | llmod, |
656 | llcx, | |
657 | tm, | |
658 | }), | |
659 | llmod_id: self.name().to_string(), | |
660 | name: self.name().to_string(), | |
661 | kind: ModuleKind::Regular, | |
662 | }; | |
94b46f34 | 663 | cgcx.save_temp_bitcode(&module, "thin-lto-input"); |
ea8adc8c | 664 | |
ff7c6d11 XL |
665 | // Before we do much else find the "main" `DICompileUnit` that we'll be |
666 | // using below. If we find more than one though then rustc has changed | |
667 | // in a way we're not ready for, so generate an ICE by returning | |
668 | // an error. | |
669 | let mut cu1 = ptr::null_mut(); | |
670 | let mut cu2 = ptr::null_mut(); | |
671 | llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2); | |
672 | if !cu2.is_null() { | |
8faf50e0 | 673 | let msg = "multiple source DICompileUnits found".to_string(); |
ff7c6d11 XL |
674 | return Err(write::llvm_err(&diag_handler, msg)) |
675 | } | |
676 | ||
ea8adc8c XL |
677 | // Like with "fat" LTO, get some better optimizations if landing pads |
678 | // are disabled by removing all landing pads. | |
679 | if cgcx.no_landing_pads { | |
680 | llvm::LLVMRustMarkAllFunctionsNounwind(llmod); | |
94b46f34 | 681 | cgcx.save_temp_bitcode(&module, "thin-lto-after-nounwind"); |
ea8adc8c XL |
682 | timeline.record("nounwind"); |
683 | } | |
684 | ||
685 | // Up next comes the per-module local analyses that we do for Thin LTO. | |
686 | // Each of these functions is basically copied from the LLVM | |
687 | // implementation and then tailored to suit this implementation. Ideally | |
688 | // each of these would be supported by upstream LLVM but that's perhaps | |
689 | // a patch for another day! | |
690 | // | |
691 | // You can find some more comments about these functions in the LLVM | |
692 | // bindings we've got (currently `PassWrapper.cpp`) | |
693 | if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) { | |
8faf50e0 | 694 | let msg = "failed to prepare thin LTO module".to_string(); |
ea8adc8c XL |
695 | return Err(write::llvm_err(&diag_handler, msg)) |
696 | } | |
94b46f34 | 697 | cgcx.save_temp_bitcode(&module, "thin-lto-after-rename"); |
ea8adc8c XL |
698 | timeline.record("rename"); |
699 | if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) { | |
8faf50e0 | 700 | let msg = "failed to prepare thin LTO module".to_string(); |
ea8adc8c XL |
701 | return Err(write::llvm_err(&diag_handler, msg)) |
702 | } | |
94b46f34 | 703 | cgcx.save_temp_bitcode(&module, "thin-lto-after-resolve"); |
ea8adc8c XL |
704 | timeline.record("resolve"); |
705 | if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) { | |
8faf50e0 | 706 | let msg = "failed to prepare thin LTO module".to_string(); |
ea8adc8c XL |
707 | return Err(write::llvm_err(&diag_handler, msg)) |
708 | } | |
94b46f34 | 709 | cgcx.save_temp_bitcode(&module, "thin-lto-after-internalize"); |
ea8adc8c XL |
710 | timeline.record("internalize"); |
711 | if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) { | |
8faf50e0 | 712 | let msg = "failed to prepare thin LTO module".to_string(); |
ea8adc8c XL |
713 | return Err(write::llvm_err(&diag_handler, msg)) |
714 | } | |
94b46f34 | 715 | cgcx.save_temp_bitcode(&module, "thin-lto-after-import"); |
ea8adc8c XL |
716 | timeline.record("import"); |
717 | ||
ff7c6d11 XL |
718 | // Ok now this is a bit unfortunate. This is also something you won't |
719 | // find upstream in LLVM's ThinLTO passes! This is a hack for now to | |
720 | // work around bugs in LLVM. | |
721 | // | |
722 | // First discovered in #45511 it was found that as part of ThinLTO | |
723 | // importing passes LLVM will import `DICompileUnit` metadata | |
724 | // information across modules. This means that we'll be working with one | |
725 | // LLVM module that has multiple `DICompileUnit` instances in it (a | |
726 | // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of | |
727 | // bugs in LLVM's backend which generates invalid DWARF in a situation | |
728 | // like this: | |
729 | // | |
730 | // https://bugs.llvm.org/show_bug.cgi?id=35212 | |
731 | // https://bugs.llvm.org/show_bug.cgi?id=35562 | |
732 | // | |
733 | // While the first bug there is fixed the second ended up causing #46346 | |
734 | // which was basically a resurgence of #45511 after LLVM's bug 35212 was | |
735 | // fixed. | |
736 | // | |
0531ce1d | 737 | // This function below is a huge hack around this problem. The function |
ff7c6d11 XL |
738 | // below is defined in `PassWrapper.cpp` and will basically "merge" |
739 | // all `DICompileUnit` instances in a module. Basically it'll take all | |
740 | // the objects, rewrite all pointers of `DISubprogram` to point to the | |
741 | // first `DICompileUnit`, and then delete all the other units. | |
742 | // | |
743 | // This is probably mangling to the debug info slightly (but hopefully | |
744 | // not too much) but for now at least gets LLVM to emit valid DWARF (or | |
745 | // so it appears). Hopefully we can remove this once upstream bugs are | |
746 | // fixed in LLVM. | |
747 | llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1); | |
94b46f34 | 748 | cgcx.save_temp_bitcode(&module, "thin-lto-after-patch"); |
ff7c6d11 XL |
749 | timeline.record("patch"); |
750 | ||
ea8adc8c XL |
751 | // Alright now that we've done everything related to the ThinLTO |
752 | // analysis it's time to run some optimizations! Here we use the same | |
753 | // `run_pass_manager` as the "fat" LTO above except that we tell it to | |
754 | // populate a thin-specific pass manager, which presumably LLVM treats a | |
755 | // little differently. | |
94b46f34 XL |
756 | info!("running thin lto passes over {}", module.name); |
757 | let config = cgcx.config(module.kind); | |
ea8adc8c | 758 | run_pass_manager(cgcx, tm, llmod, config, true); |
94b46f34 | 759 | cgcx.save_temp_bitcode(&module, "thin-lto-after-pm"); |
ea8adc8c | 760 | timeline.record("thin-done"); |
ff7c6d11 | 761 | |
94b46f34 | 762 | Ok(module) |
ea8adc8c | 763 | } |
1a4d82fc | 764 | } |