]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2013 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
ea8adc8c | 11 | use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION}; |
041b39d2 | 12 | use back::symbol_export; |
ea8adc8c XL |
13 | use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext}; |
14 | use back::write; | |
3b2f2976 | 15 | use errors::{FatalError, Handler}; |
1a4d82fc JJ |
16 | use llvm::archive_ro::ArchiveRO; |
17 | use llvm::{ModuleRef, TargetMachineRef, True, False}; | |
ea8adc8c | 18 | use llvm; |
476ff2be | 19 | use rustc::hir::def_id::LOCAL_CRATE; |
ea8adc8c XL |
20 | use rustc::middle::exported_symbols::SymbolExportLevel; |
21 | use rustc::session::config; | |
22 | use rustc::util::common::time; | |
23 | use time_graph::Timeline; | |
24 | use {ModuleTranslation, ModuleLlvm, ModuleKind, ModuleSource}; | |
1a4d82fc JJ |
25 | |
26 | use libc; | |
1a4d82fc JJ |
27 | |
28 | use std::ffi::CString; | |
ea8adc8c XL |
29 | use std::slice; |
30 | use std::sync::Arc; | |
1a4d82fc | 31 | |
476ff2be SL |
32 | pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { |
33 | match crate_type { | |
34 | config::CrateTypeExecutable | | |
35 | config::CrateTypeStaticlib | | |
36 | config::CrateTypeCdylib => true, | |
37 | ||
38 | config::CrateTypeDylib | | |
39 | config::CrateTypeRlib | | |
476ff2be SL |
40 | config::CrateTypeProcMacro => false, |
41 | } | |
42 | } | |
43 | ||
ea8adc8c XL |
44 | pub enum LtoModuleTranslation { |
45 | Fat { | |
46 | module: Option<ModuleTranslation>, | |
47 | _serialized_bitcode: Vec<SerializedModule>, | |
48 | }, | |
49 | ||
50 | Thin(ThinModule), | |
51 | } | |
52 | ||
53 | impl LtoModuleTranslation { | |
54 | pub fn name(&self) -> &str { | |
55 | match *self { | |
56 | LtoModuleTranslation::Fat { .. } => "everything", | |
57 | LtoModuleTranslation::Thin(ref m) => m.name(), | |
58 | } | |
1a4d82fc JJ |
59 | } |
60 | ||
ea8adc8c XL |
61 | /// Optimize this module within the given codegen context. |
62 | /// | |
63 | /// This function is unsafe as it'll return a `ModuleTranslation` still | |
64 | /// points to LLVM data structures owned by this `LtoModuleTranslation`. | |
65 | /// It's intended that the module returned is immediately code generated and | |
66 | /// dropped, and then this LTO module is dropped. | |
67 | pub unsafe fn optimize(&mut self, | |
68 | cgcx: &CodegenContext, | |
69 | timeline: &mut Timeline) | |
70 | -> Result<ModuleTranslation, FatalError> | |
71 | { | |
72 | match *self { | |
73 | LtoModuleTranslation::Fat { ref mut module, .. } => { | |
74 | let trans = module.take().unwrap(); | |
75 | let config = cgcx.config(trans.kind); | |
76 | let llmod = trans.llvm().unwrap().llmod; | |
77 | let tm = trans.llvm().unwrap().tm; | |
78 | run_pass_manager(cgcx, tm, llmod, config, false); | |
79 | timeline.record("fat-done"); | |
80 | Ok(trans) | |
81 | } | |
82 | LtoModuleTranslation::Thin(ref mut thin) => thin.optimize(cgcx, timeline), | |
1a4d82fc JJ |
83 | } |
84 | } | |
85 | ||
ea8adc8c XL |
86 | /// A "guage" of how costly it is to optimize this module, used to sort |
87 | /// biggest modules first. | |
88 | pub fn cost(&self) -> u64 { | |
89 | match *self { | |
90 | // Only one module with fat LTO, so the cost doesn't matter. | |
91 | LtoModuleTranslation::Fat { .. } => 0, | |
92 | LtoModuleTranslation::Thin(ref m) => m.cost(), | |
93 | } | |
94 | } | |
95 | } | |
96 | ||
97 | pub enum LTOMode { | |
98 | WholeCrateGraph, | |
99 | JustThisCrate, | |
100 | } | |
476ff2be | 101 | |
ea8adc8c XL |
102 | pub fn run(cgcx: &CodegenContext, |
103 | modules: Vec<ModuleTranslation>, | |
104 | mode: LTOMode, | |
105 | timeline: &mut Timeline) | |
106 | -> Result<Vec<LtoModuleTranslation>, FatalError> | |
107 | { | |
108 | let diag_handler = cgcx.create_diag_handler(); | |
109 | let export_threshold = match mode { | |
110 | LTOMode::WholeCrateGraph => { | |
111 | symbol_export::crates_export_threshold(&cgcx.crate_types) | |
112 | } | |
113 | LTOMode::JustThisCrate => { | |
114 | SymbolExportLevel::Rust | |
115 | } | |
116 | }; | |
117 | ||
118 | let symbol_filter = &|&(ref name, _, level): &(String, _, SymbolExportLevel)| { | |
119 | if level.is_below_threshold(export_threshold) { | |
476ff2be SL |
120 | let mut bytes = Vec::with_capacity(name.len() + 1); |
121 | bytes.extend(name.bytes()); | |
122 | Some(CString::new(bytes).unwrap()) | |
123 | } else { | |
124 | None | |
125 | } | |
126 | }; | |
127 | ||
ea8adc8c | 128 | let mut symbol_white_list = cgcx.exported_symbols[&LOCAL_CRATE] |
476ff2be SL |
129 | .iter() |
130 | .filter_map(symbol_filter) | |
ea8adc8c XL |
131 | .collect::<Vec<CString>>(); |
132 | timeline.record("whitelist"); | |
abe05a73 | 133 | info!("{} symbols to preserve in this crate", symbol_white_list.len()); |
ea8adc8c XL |
134 | |
135 | // If we're performing LTO for the entire crate graph, then for each of our | |
136 | // upstream dependencies, find the corresponding rlib and load the bitcode | |
137 | // from the archive. | |
138 | // | |
139 | // We save off all the bytecode and LLVM module ids for later processing | |
140 | // with either fat or thin LTO | |
141 | let mut upstream_modules = Vec::new(); | |
142 | if let LTOMode::WholeCrateGraph = mode { | |
143 | if cgcx.opts.cg.prefer_dynamic { | |
144 | diag_handler.struct_err("cannot prefer dynamic linking when performing LTO") | |
145 | .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ | |
146 | supported with LTO") | |
147 | .emit(); | |
148 | return Err(FatalError) | |
149 | } | |
150 | ||
151 | // Make sure we actually can run LTO | |
152 | for crate_type in cgcx.crate_types.iter() { | |
153 | if !crate_type_allows_lto(*crate_type) { | |
154 | let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \ | |
155 | static library outputs"); | |
156 | return Err(e) | |
157 | } | |
158 | } | |
159 | ||
160 | for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { | |
161 | symbol_white_list.extend( | |
162 | cgcx.exported_symbols[&cnum] | |
163 | .iter() | |
164 | .filter_map(symbol_filter)); | |
165 | ||
166 | let archive = ArchiveRO::open(&path).expect("wanted an rlib"); | |
167 | let bytecodes = archive.iter().filter_map(|child| { | |
168 | child.ok().and_then(|c| c.name().map(|name| (name, c))) | |
169 | }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION)); | |
170 | for (name, data) in bytecodes { | |
171 | info!("adding bytecode {}", name); | |
172 | let bc_encoded = data.data(); | |
173 | ||
174 | let (bc, id) = time(cgcx.time_passes, &format!("decode {}", name), || { | |
175 | match DecodedBytecode::new(bc_encoded) { | |
176 | Ok(b) => Ok((b.bytecode(), b.identifier().to_string())), | |
177 | Err(e) => Err(diag_handler.fatal(&e)), | |
1a4d82fc | 178 | } |
ea8adc8c XL |
179 | })?; |
180 | let bc = SerializedModule::FromRlib(bc); | |
181 | upstream_modules.push((bc, CString::new(id).unwrap())); | |
182 | } | |
183 | timeline.record(&format!("load: {}", path.display())); | |
184 | } | |
185 | } | |
1a4d82fc | 186 | |
ea8adc8c XL |
187 | let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); |
188 | match mode { | |
189 | LTOMode::WholeCrateGraph if !cgcx.thinlto => { | |
190 | fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline) | |
191 | } | |
192 | _ => { | |
193 | thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline) | |
1a4d82fc | 194 | } |
041b39d2 | 195 | } |
ea8adc8c XL |
196 | } |
197 | ||
198 | fn fat_lto(cgcx: &CodegenContext, | |
199 | diag_handler: &Handler, | |
200 | mut modules: Vec<ModuleTranslation>, | |
201 | mut serialized_modules: Vec<(SerializedModule, CString)>, | |
202 | symbol_white_list: &[*const libc::c_char], | |
203 | timeline: &mut Timeline) | |
204 | -> Result<Vec<LtoModuleTranslation>, FatalError> | |
205 | { | |
206 | info!("going for a fat lto"); | |
207 | ||
208 | // Find the "costliest" module and merge everything into that codegen unit. | |
209 | // All the other modules will be serialized and reparsed into the new | |
210 | // context, so this hopefully avoids serializing and parsing the largest | |
211 | // codegen unit. | |
212 | // | |
213 | // Additionally use a regular module as the base here to ensure that various | |
214 | // file copy operations in the backend work correctly. The only other kind | |
215 | // of module here should be an allocator one, and if your crate is smaller | |
216 | // than the allocator module then the size doesn't really matter anyway. | |
217 | let (_, costliest_module) = modules.iter() | |
218 | .enumerate() | |
219 | .filter(|&(_, module)| module.kind == ModuleKind::Regular) | |
220 | .map(|(i, module)| { | |
221 | let cost = unsafe { | |
222 | llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod) | |
223 | }; | |
224 | (cost, i) | |
225 | }) | |
226 | .max() | |
227 | .expect("must be trans'ing at least one module"); | |
228 | let module = modules.remove(costliest_module); | |
229 | let llmod = module.llvm().expect("can't lto pre-translated modules").llmod; | |
230 | info!("using {:?} as a base module", module.llmod_id); | |
231 | ||
232 | // For all other modules we translated we'll need to link them into our own | |
233 | // bitcode. All modules were translated in their own LLVM context, however, | |
234 | // and we want to move everything to the same LLVM context. Currently the | |
235 | // way we know of to do that is to serialize them to a string and them parse | |
236 | // them later. Not great but hey, that's why it's "fat" LTO, right? | |
237 | for module in modules { | |
238 | let llvm = module.llvm().expect("can't lto pre-translated modules"); | |
239 | let buffer = ModuleBuffer::new(llvm.llmod); | |
240 | let llmod_id = CString::new(&module.llmod_id[..]).unwrap(); | |
241 | serialized_modules.push((SerializedModule::Local(buffer), llmod_id)); | |
242 | } | |
243 | ||
244 | // For all serialized bitcode files we parse them and link them in as we did | |
245 | // above, this is all mostly handled in C++. Like above, though, we don't | |
246 | // know much about the memory management here so we err on the side of being | |
247 | // save and persist everything with the original module. | |
248 | let mut serialized_bitcode = Vec::new(); | |
249 | for (bc_decoded, name) in serialized_modules { | |
250 | info!("linking {:?}", name); | |
251 | time(cgcx.time_passes, &format!("ll link {:?}", name), || unsafe { | |
252 | let data = bc_decoded.data(); | |
253 | if llvm::LLVMRustLinkInExternalBitcode(llmod, | |
254 | data.as_ptr() as *const libc::c_char, | |
255 | data.len() as libc::size_t) { | |
256 | Ok(()) | |
257 | } else { | |
258 | let msg = format!("failed to load bc of {:?}", name); | |
259 | Err(write::llvm_err(&diag_handler, msg)) | |
260 | } | |
261 | })?; | |
262 | timeline.record(&format!("link {:?}", name)); | |
263 | serialized_bitcode.push(bc_decoded); | |
264 | } | |
265 | cgcx.save_temp_bitcode(&module, "lto.input"); | |
1a4d82fc | 266 | |
ea8adc8c XL |
267 | // Internalize everything that *isn't* in our whitelist to help strip out |
268 | // more modules and such | |
1a4d82fc | 269 | unsafe { |
ea8adc8c | 270 | let ptr = symbol_white_list.as_ptr(); |
1a4d82fc JJ |
271 | llvm::LLVMRustRunRestrictionPass(llmod, |
272 | ptr as *const *const libc::c_char, | |
ea8adc8c XL |
273 | symbol_white_list.len() as libc::size_t); |
274 | cgcx.save_temp_bitcode(&module, "lto.after-restriction"); | |
1a4d82fc JJ |
275 | } |
276 | ||
041b39d2 | 277 | if cgcx.no_landing_pads { |
1a4d82fc JJ |
278 | unsafe { |
279 | llvm::LLVMRustMarkAllFunctionsNounwind(llmod); | |
280 | } | |
ea8adc8c | 281 | cgcx.save_temp_bitcode(&module, "lto.after-nounwind"); |
1a4d82fc | 282 | } |
ea8adc8c | 283 | timeline.record("passes"); |
1a4d82fc | 284 | |
ea8adc8c XL |
285 | Ok(vec![LtoModuleTranslation::Fat { |
286 | module: Some(module), | |
287 | _serialized_bitcode: serialized_bitcode, | |
288 | }]) | |
289 | } | |
290 | ||
291 | /// Prepare "thin" LTO to get run on these modules. | |
292 | /// | |
293 | /// The general structure of ThinLTO is quite different from the structure of | |
294 | /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into | |
295 | /// one giant LLVM module, and then we run more optimization passes over this | |
296 | /// big module after internalizing most symbols. Thin LTO, on the other hand, | |
297 | /// avoid this large bottleneck through more targeted optimization. | |
298 | /// | |
299 | /// At a high level Thin LTO looks like: | |
300 | /// | |
301 | /// 1. Prepare a "summary" of each LLVM module in question which describes | |
302 | /// the values inside, cost of the values, etc. | |
303 | /// 2. Merge the summaries of all modules in question into one "index" | |
304 | /// 3. Perform some global analysis on this index | |
305 | /// 4. For each module, use the index and analysis calculated previously to | |
306 | /// perform local transformations on the module, for example inlining | |
307 | /// small functions from other modules. | |
308 | /// 5. Run thin-specific optimization passes over each module, and then code | |
309 | /// generate everything at the end. | |
310 | /// | |
311 | /// The summary for each module is intended to be quite cheap, and the global | |
312 | /// index is relatively quite cheap to create as well. As a result, the goal of | |
313 | /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more | |
314 | /// situations. For example one cheap optimization is that we can parallelize | |
315 | /// all codegen modules, easily making use of all the cores on a machine. | |
316 | /// | |
317 | /// With all that in mind, the function here is designed at specifically just | |
318 | /// calculating the *index* for ThinLTO. This index will then be shared amongst | |
319 | /// all of the `LtoModuleTranslation` units returned below and destroyed once | |
320 | /// they all go out of scope. | |
321 | fn thin_lto(diag_handler: &Handler, | |
322 | modules: Vec<ModuleTranslation>, | |
323 | serialized_modules: Vec<(SerializedModule, CString)>, | |
324 | symbol_white_list: &[*const libc::c_char], | |
325 | timeline: &mut Timeline) | |
326 | -> Result<Vec<LtoModuleTranslation>, FatalError> | |
327 | { | |
328 | unsafe { | |
329 | info!("going for that thin, thin LTO"); | |
330 | ||
331 | let mut thin_buffers = Vec::new(); | |
332 | let mut module_names = Vec::new(); | |
333 | let mut thin_modules = Vec::new(); | |
334 | ||
335 | // FIXME: right now, like with fat LTO, we serialize all in-memory | |
336 | // modules before working with them and ThinLTO. We really | |
337 | // shouldn't do this, however, and instead figure out how to | |
338 | // extract a summary from an in-memory module and then merge that | |
339 | // into the global index. It turns out that this loop is by far | |
340 | // the most expensive portion of this small bit of global | |
341 | // analysis! | |
342 | for (i, module) in modules.iter().enumerate() { | |
343 | info!("local module: {} - {}", i, module.llmod_id); | |
344 | let llvm = module.llvm().expect("can't lto pretranslated module"); | |
345 | let name = CString::new(module.llmod_id.clone()).unwrap(); | |
abe05a73 | 346 | let buffer = ThinBuffer::new(llvm.llmod); |
ea8adc8c XL |
347 | thin_modules.push(llvm::ThinLTOModule { |
348 | identifier: name.as_ptr(), | |
349 | data: buffer.data().as_ptr(), | |
350 | len: buffer.data().len(), | |
351 | }); | |
352 | thin_buffers.push(buffer); | |
353 | module_names.push(name); | |
354 | timeline.record(&module.llmod_id); | |
b039eaaf | 355 | } |
ea8adc8c XL |
356 | |
357 | // FIXME: All upstream crates are deserialized internally in the | |
358 | // function below to extract their summary and modules. Note that | |
359 | // unlike the loop above we *must* decode and/or read something | |
360 | // here as these are all just serialized files on disk. An | |
361 | // improvement, however, to make here would be to store the | |
362 | // module summary separately from the actual module itself. Right | |
363 | // now this is store in one large bitcode file, and the entire | |
364 | // file is deflate-compressed. We could try to bypass some of the | |
365 | // decompression by storing the index uncompressed and only | |
366 | // lazily decompressing the bytecode if necessary. | |
367 | // | |
368 | // Note that truly taking advantage of this optimization will | |
369 | // likely be further down the road. We'd have to implement | |
370 | // incremental ThinLTO first where we could actually avoid | |
371 | // looking at upstream modules entirely sometimes (the contents, | |
372 | // we must always unconditionally look at the index). | |
373 | let mut serialized = Vec::new(); | |
374 | for (module, name) in serialized_modules { | |
375 | info!("foreign module {:?}", name); | |
376 | thin_modules.push(llvm::ThinLTOModule { | |
377 | identifier: name.as_ptr(), | |
378 | data: module.data().as_ptr(), | |
379 | len: module.data().len(), | |
380 | }); | |
381 | serialized.push(module); | |
382 | module_names.push(name); | |
383 | } | |
384 | ||
385 | // Delegate to the C++ bindings to create some data here. Once this is a | |
386 | // tried-and-true interface we may wish to try to upstream some of this | |
387 | // to LLVM itself, right now we reimplement a lot of what they do | |
388 | // upstream... | |
389 | let data = llvm::LLVMRustCreateThinLTOData( | |
390 | thin_modules.as_ptr(), | |
391 | thin_modules.len() as u32, | |
392 | symbol_white_list.as_ptr(), | |
393 | symbol_white_list.len() as u32, | |
394 | ); | |
395 | if data.is_null() { | |
396 | let msg = format!("failed to prepare thin LTO context"); | |
397 | return Err(write::llvm_err(&diag_handler, msg)) | |
398 | } | |
399 | let data = ThinData(data); | |
400 | info!("thin LTO data created"); | |
401 | timeline.record("data"); | |
402 | ||
403 | // Throw our data in an `Arc` as we'll be sharing it across threads. We | |
404 | // also put all memory referenced by the C++ data (buffers, ids, etc) | |
405 | // into the arc as well. After this we'll create a thin module | |
406 | // translation per module in this data. | |
407 | let shared = Arc::new(ThinShared { | |
408 | data, | |
409 | thin_buffers, | |
410 | serialized_modules: serialized, | |
411 | module_names, | |
412 | }); | |
413 | Ok((0..shared.module_names.len()).map(|i| { | |
414 | LtoModuleTranslation::Thin(ThinModule { | |
415 | shared: shared.clone(), | |
416 | idx: i, | |
417 | }) | |
418 | }).collect()) | |
b039eaaf | 419 | } |
ea8adc8c | 420 | } |
b039eaaf | 421 | |
ea8adc8c XL |
422 | fn run_pass_manager(cgcx: &CodegenContext, |
423 | tm: TargetMachineRef, | |
424 | llmod: ModuleRef, | |
425 | config: &ModuleConfig, | |
426 | thin: bool) { | |
1a4d82fc JJ |
427 | // Now we have one massive module inside of llmod. Time to run the |
428 | // LTO-specific optimization passes that LLVM provides. | |
429 | // | |
430 | // This code is based off the code found in llvm's LTO code generator: | |
431 | // tools/lto/LTOCodeGenerator.cpp | |
432 | debug!("running the pass manager"); | |
433 | unsafe { | |
434 | let pm = llvm::LLVMCreatePassManager(); | |
435 | llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod); | |
7453a54e SL |
436 | let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _); |
437 | assert!(!pass.is_null()); | |
438 | llvm::LLVMRustAddPass(pm, pass); | |
1a4d82fc | 439 | |
abe05a73 XL |
440 | // When optimizing for LTO we don't actually pass in `-O0`, but we force |
441 | // it to always happen at least with `-O1`. | |
442 | // | |
443 | // With ThinLTO we mess around a lot with symbol visibility in a way | |
444 | // that will actually cause linking failures if we optimize at O0 which | |
445 | // notable is lacking in dead code elimination. To ensure we at least | |
446 | // get some optimizations and correctly link we forcibly switch to `-O1` | |
447 | // to get dead code elimination. | |
448 | // | |
449 | // Note that in general this shouldn't matter too much as you typically | |
450 | // only turn on ThinLTO when you're compiling with optimizations | |
451 | // otherwise. | |
452 | let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None); | |
453 | let opt_level = match opt_level { | |
454 | llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less, | |
455 | level => level, | |
456 | }; | |
457 | with_llvm_pmb(llmod, config, opt_level, &mut |b| { | |
ea8adc8c XL |
458 | if thin { |
459 | if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) { | |
460 | panic!("this version of LLVM does not support ThinLTO"); | |
461 | } | |
462 | } else { | |
463 | llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm, | |
464 | /* Internalize = */ False, | |
465 | /* RunInliner = */ True); | |
466 | } | |
c1a9b12d | 467 | }); |
1a4d82fc | 468 | |
7453a54e SL |
469 | let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _); |
470 | assert!(!pass.is_null()); | |
471 | llvm::LLVMRustAddPass(pm, pass); | |
1a4d82fc | 472 | |
041b39d2 | 473 | time(cgcx.time_passes, "LTO passes", || |
1a4d82fc JJ |
474 | llvm::LLVMRunPassManager(pm, llmod)); |
475 | ||
476 | llvm::LLVMDisposePassManager(pm); | |
477 | } | |
478 | debug!("lto done"); | |
479 | } | |
480 | ||
ea8adc8c XL |
481 | pub enum SerializedModule { |
482 | Local(ModuleBuffer), | |
483 | FromRlib(Vec<u8>), | |
484 | } | |
485 | ||
486 | impl SerializedModule { | |
487 | fn data(&self) -> &[u8] { | |
488 | match *self { | |
489 | SerializedModule::Local(ref m) => m.data(), | |
490 | SerializedModule::FromRlib(ref m) => m, | |
491 | } | |
492 | } | |
493 | } | |
494 | ||
495 | pub struct ModuleBuffer(*mut llvm::ModuleBuffer); | |
496 | ||
497 | unsafe impl Send for ModuleBuffer {} | |
498 | unsafe impl Sync for ModuleBuffer {} | |
499 | ||
500 | impl ModuleBuffer { | |
abe05a73 | 501 | pub fn new(m: ModuleRef) -> ModuleBuffer { |
ea8adc8c XL |
502 | ModuleBuffer(unsafe { |
503 | llvm::LLVMRustModuleBufferCreate(m) | |
504 | }) | |
505 | } | |
506 | ||
abe05a73 | 507 | pub fn data(&self) -> &[u8] { |
ea8adc8c XL |
508 | unsafe { |
509 | let ptr = llvm::LLVMRustModuleBufferPtr(self.0); | |
510 | let len = llvm::LLVMRustModuleBufferLen(self.0); | |
511 | slice::from_raw_parts(ptr, len) | |
512 | } | |
513 | } | |
514 | } | |
515 | ||
516 | impl Drop for ModuleBuffer { | |
517 | fn drop(&mut self) { | |
518 | unsafe { llvm::LLVMRustModuleBufferFree(self.0); } | |
519 | } | |
520 | } | |
521 | ||
522 | pub struct ThinModule { | |
523 | shared: Arc<ThinShared>, | |
524 | idx: usize, | |
525 | } | |
526 | ||
527 | struct ThinShared { | |
528 | data: ThinData, | |
529 | thin_buffers: Vec<ThinBuffer>, | |
530 | serialized_modules: Vec<SerializedModule>, | |
531 | module_names: Vec<CString>, | |
532 | } | |
533 | ||
534 | struct ThinData(*mut llvm::ThinLTOData); | |
535 | ||
536 | unsafe impl Send for ThinData {} | |
537 | unsafe impl Sync for ThinData {} | |
538 | ||
539 | impl Drop for ThinData { | |
540 | fn drop(&mut self) { | |
541 | unsafe { | |
542 | llvm::LLVMRustFreeThinLTOData(self.0); | |
543 | } | |
544 | } | |
545 | } | |
546 | ||
abe05a73 | 547 | pub struct ThinBuffer(*mut llvm::ThinLTOBuffer); |
ea8adc8c XL |
548 | |
549 | unsafe impl Send for ThinBuffer {} | |
550 | unsafe impl Sync for ThinBuffer {} | |
551 | ||
552 | impl ThinBuffer { | |
abe05a73 XL |
553 | pub fn new(m: ModuleRef) -> ThinBuffer { |
554 | unsafe { | |
555 | let buffer = llvm::LLVMRustThinLTOBufferCreate(m); | |
556 | ThinBuffer(buffer) | |
557 | } | |
558 | } | |
559 | ||
560 | pub fn data(&self) -> &[u8] { | |
ea8adc8c XL |
561 | unsafe { |
562 | let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _; | |
563 | let len = llvm::LLVMRustThinLTOBufferLen(self.0); | |
564 | slice::from_raw_parts(ptr, len) | |
565 | } | |
566 | } | |
1a4d82fc JJ |
567 | } |
568 | ||
ea8adc8c XL |
569 | impl Drop for ThinBuffer { |
570 | fn drop(&mut self) { | |
571 | unsafe { | |
572 | llvm::LLVMRustThinLTOBufferFree(self.0); | |
573 | } | |
574 | } | |
1a4d82fc JJ |
575 | } |
576 | ||
ea8adc8c XL |
577 | impl ThinModule { |
578 | fn name(&self) -> &str { | |
579 | self.shared.module_names[self.idx].to_str().unwrap() | |
580 | } | |
581 | ||
582 | fn cost(&self) -> u64 { | |
583 | // Yes, that's correct, we're using the size of the bytecode as an | |
584 | // indicator for how costly this codegen unit is. | |
585 | self.data().len() as u64 | |
586 | } | |
587 | ||
588 | fn data(&self) -> &[u8] { | |
589 | let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data()); | |
590 | a.unwrap_or_else(|| { | |
591 | let len = self.shared.thin_buffers.len(); | |
592 | self.shared.serialized_modules[self.idx - len].data() | |
593 | }) | |
594 | } | |
595 | ||
596 | unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline) | |
597 | -> Result<ModuleTranslation, FatalError> | |
598 | { | |
599 | let diag_handler = cgcx.create_diag_handler(); | |
600 | let tm = (cgcx.tm_factory)().map_err(|e| { | |
601 | write::llvm_err(&diag_handler, e) | |
602 | })?; | |
603 | ||
604 | // Right now the implementation we've got only works over serialized | |
605 | // modules, so we create a fresh new LLVM context and parse the module | |
606 | // into that context. One day, however, we may do this for upstream | |
607 | // crates but for locally translated modules we may be able to reuse | |
608 | // that LLVM Context and Module. | |
609 | let llcx = llvm::LLVMContextCreate(); | |
610 | let llmod = llvm::LLVMRustParseBitcodeForThinLTO( | |
611 | llcx, | |
612 | self.data().as_ptr(), | |
613 | self.data().len(), | |
614 | self.shared.module_names[self.idx].as_ptr(), | |
615 | ); | |
616 | assert!(!llmod.is_null()); | |
617 | let mtrans = ModuleTranslation { | |
618 | source: ModuleSource::Translated(ModuleLlvm { | |
619 | llmod, | |
620 | llcx, | |
621 | tm, | |
622 | }), | |
623 | llmod_id: self.name().to_string(), | |
624 | name: self.name().to_string(), | |
625 | kind: ModuleKind::Regular, | |
626 | }; | |
627 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-input"); | |
628 | ||
629 | // Like with "fat" LTO, get some better optimizations if landing pads | |
630 | // are disabled by removing all landing pads. | |
631 | if cgcx.no_landing_pads { | |
632 | llvm::LLVMRustMarkAllFunctionsNounwind(llmod); | |
633 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-nounwind"); | |
634 | timeline.record("nounwind"); | |
635 | } | |
636 | ||
637 | // Up next comes the per-module local analyses that we do for Thin LTO. | |
638 | // Each of these functions is basically copied from the LLVM | |
639 | // implementation and then tailored to suit this implementation. Ideally | |
640 | // each of these would be supported by upstream LLVM but that's perhaps | |
641 | // a patch for another day! | |
642 | // | |
643 | // You can find some more comments about these functions in the LLVM | |
644 | // bindings we've got (currently `PassWrapper.cpp`) | |
645 | if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) { | |
646 | let msg = format!("failed to prepare thin LTO module"); | |
647 | return Err(write::llvm_err(&diag_handler, msg)) | |
648 | } | |
649 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-rename"); | |
650 | timeline.record("rename"); | |
651 | if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) { | |
652 | let msg = format!("failed to prepare thin LTO module"); | |
653 | return Err(write::llvm_err(&diag_handler, msg)) | |
654 | } | |
655 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-resolve"); | |
656 | timeline.record("resolve"); | |
657 | if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) { | |
658 | let msg = format!("failed to prepare thin LTO module"); | |
659 | return Err(write::llvm_err(&diag_handler, msg)) | |
660 | } | |
661 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-internalize"); | |
662 | timeline.record("internalize"); | |
663 | if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) { | |
664 | let msg = format!("failed to prepare thin LTO module"); | |
665 | return Err(write::llvm_err(&diag_handler, msg)) | |
666 | } | |
667 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-import"); | |
668 | timeline.record("import"); | |
669 | ||
670 | // Alright now that we've done everything related to the ThinLTO | |
671 | // analysis it's time to run some optimizations! Here we use the same | |
672 | // `run_pass_manager` as the "fat" LTO above except that we tell it to | |
673 | // populate a thin-specific pass manager, which presumably LLVM treats a | |
674 | // little differently. | |
675 | info!("running thin lto passes over {}", mtrans.name); | |
676 | let config = cgcx.config(mtrans.kind); | |
677 | run_pass_manager(cgcx, tm, llmod, config, true); | |
678 | cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-pm"); | |
679 | timeline.record("thin-done"); | |
680 | Ok(mtrans) | |
681 | } | |
1a4d82fc | 682 | } |