1 use super::link
::{self, remove}
;
2 use super::linker
::LinkerInfo
;
3 use super::lto
::{self, SerializedModule}
;
4 use super::symbol_export
::symbol_name_for_instance_in_crate
;
7 CachedModuleCodegen
, CodegenResults
, CompiledModule
, CrateInfo
, ModuleCodegen
, ModuleKind
,
11 use jobserver
::{Acquired, Client}
;
12 use rustc_data_structures
::fx
::FxHashMap
;
13 use rustc_data_structures
::profiling
::SelfProfilerRef
;
14 use rustc_data_structures
::profiling
::TimingGuard
;
15 use rustc_data_structures
::profiling
::VerboseTimingGuard
;
16 use rustc_data_structures
::sync
::Lrc
;
17 use rustc_errors
::emitter
::Emitter
;
18 use rustc_errors
::{DiagnosticId, FatalError, Handler, Level}
;
19 use rustc_fs_util
::link_or_copy
;
20 use rustc_hir
::def_id
::{CrateNum, LOCAL_CRATE}
;
21 use rustc_incremental
::{
22 copy_cgu_workproduct_to_incr_comp_cache_dir
, in_incr_comp_dir
, in_incr_comp_dir_sess
,
24 use rustc_middle
::dep_graph
::{WorkProduct, WorkProductId}
;
25 use rustc_middle
::middle
::cstore
::EncodedMetadata
;
26 use rustc_middle
::middle
::exported_symbols
::SymbolExportLevel
;
27 use rustc_middle
::ty
::TyCtxt
;
28 use rustc_session
::cgu_reuse_tracker
::CguReuseTracker
;
29 use rustc_session
::config
::{self, CrateType, Lto, OutputFilenames, OutputType}
;
30 use rustc_session
::config
::{Passes, SanitizerSet, SwitchWithOptPath}
;
31 use rustc_session
::Session
;
32 use rustc_span
::source_map
::SourceMap
;
33 use rustc_span
::symbol
::{sym, Symbol}
;
34 use rustc_span
::{BytePos, FileName, InnerSpan, Pos, Span}
;
35 use rustc_target
::spec
::{MergeFunctions, PanicStrategy}
;
41 use std
::path
::{Path, PathBuf}
;
43 use std
::sync
::mpsc
::{channel, Receiver, Sender}
;
47 const PRE_LTO_BC_EXT
: &str = "pre-lto.bc";
49 /// What kind of object file to emit.
50 #[derive(Clone, Copy, PartialEq)]
55 // Just uncompressed llvm bitcode. Provides easy compatibility with
56 // emscripten's ecc compiler, when used as the linker.
59 // Object code, possibly augmented with a bitcode section.
60 ObjectCode(BitcodeSection
),
63 /// What kind of llvm bitcode section to embed in an object file.
64 #[derive(Clone, Copy, PartialEq)]
65 pub enum BitcodeSection
{
66 // No bitcode section.
69 // A full, uncompressed bitcode section.
73 /// Module-specific configuration for `optimize_and_codegen`.
74 pub struct ModuleConfig
{
75 /// Names of additional optimization passes to run.
76 pub passes
: Vec
<String
>,
77 /// Some(level) to optimize at a certain level, or None to run
78 /// absolutely no optimizations (used for the metadata module).
79 pub opt_level
: Option
<config
::OptLevel
>,
81 /// Some(level) to optimize binary size, or None to not affect program size.
82 pub opt_size
: Option
<config
::OptLevel
>,
84 pub pgo_gen
: SwitchWithOptPath
,
85 pub pgo_use
: Option
<PathBuf
>,
87 pub sanitizer
: SanitizerSet
,
88 pub sanitizer_recover
: SanitizerSet
,
89 pub sanitizer_memory_track_origins
: usize,
91 // Flags indicating which outputs to produce.
92 pub emit_pre_lto_bc
: bool
,
93 pub emit_no_opt_bc
: bool
,
97 pub emit_obj
: EmitObj
,
98 pub bc_cmdline
: String
,
100 // Miscellaneous flags. These are mostly copied from command-line
102 pub verify_llvm_ir
: bool
,
103 pub no_prepopulate_passes
: bool
,
104 pub no_builtins
: bool
,
105 pub time_module
: bool
,
106 pub vectorize_loop
: bool
,
107 pub vectorize_slp
: bool
,
108 pub merge_functions
: bool
,
109 pub inline_threshold
: Option
<usize>,
110 pub new_llvm_pass_manager
: bool
,
111 pub emit_lifetime_markers
: bool
,
119 is_compiler_builtins
: bool
,
121 // If it's a regular module, use `$regular`, otherwise use `$other`.
122 // `$regular` and `$other` are evaluated lazily.
123 macro_rules
! if_regular
{
124 ($regular
: expr
, $other
: expr
) => {
125 if let ModuleKind
::Regular
= kind { $regular }
else { $other }
129 let opt_level_and_size
= if_regular
!(Some(sess
.opts
.optimize
), None
);
131 let save_temps
= sess
.opts
.cg
.save_temps
;
133 let should_emit_obj
= sess
.opts
.output_types
.contains_key(&OutputType
::Exe
)
135 ModuleKind
::Regular
=> sess
.opts
.output_types
.contains_key(&OutputType
::Object
),
136 ModuleKind
::Allocator
=> false,
137 ModuleKind
::Metadata
=> sess
.opts
.output_types
.contains_key(&OutputType
::Metadata
),
140 let emit_obj
= if !should_emit_obj
{
142 } else if sess
.target
.obj_is_bitcode
143 || (sess
.opts
.cg
.linker_plugin_lto
.enabled() && !no_builtins
)
145 // This case is selected if the target uses objects as bitcode, or
146 // if linker plugin LTO is enabled. In the linker plugin LTO case
147 // the assumption is that the final link-step will read the bitcode
148 // and convert it to object code. This may be done by either the
149 // native linker or rustc itself.
151 // Note, however, that the linker-plugin-lto requested here is
152 // explicitly ignored for `#![no_builtins]` crates. These crates are
153 // specifically ignored by rustc's LTO passes and wouldn't work if
154 // loaded into the linker. These crates define symbols that LLVM
155 // lowers intrinsics to, and these symbol dependencies aren't known
156 // until after codegen. As a result any crate marked
157 // `#![no_builtins]` is assumed to not participate in LTO and
158 // instead goes on to generate object code.
160 } else if need_bitcode_in_object(sess
) {
161 EmitObj
::ObjectCode(BitcodeSection
::Full
)
163 EmitObj
::ObjectCode(BitcodeSection
::None
)
169 let mut passes
= sess
.opts
.cg
.passes
.clone();
170 // compiler_builtins overrides the codegen-units settings,
171 // which is incompatible with -Zprofile which requires that
172 // only a single codegen unit is used per crate.
173 if sess
.opts
.debugging_opts
.profile
&& !is_compiler_builtins
{
174 passes
.push("insert-gcov-profiling".to_owned());
177 // The rustc option `-Zinstrument_coverage` injects intrinsic calls to
178 // `llvm.instrprof.increment()`, which requires the LLVM `instrprof` pass.
179 if sess
.opts
.debugging_opts
.instrument_coverage
{
180 passes
.push("instrprof".to_owned());
187 opt_level
: opt_level_and_size
,
188 opt_size
: opt_level_and_size
,
190 pgo_gen
: if_regular
!(
191 sess
.opts
.cg
.profile_generate
.clone(),
192 SwitchWithOptPath
::Disabled
194 pgo_use
: if_regular
!(sess
.opts
.cg
.profile_use
.clone(), None
),
196 sanitizer
: if_regular
!(sess
.opts
.debugging_opts
.sanitizer
, SanitizerSet
::empty()),
197 sanitizer_recover
: if_regular
!(
198 sess
.opts
.debugging_opts
.sanitizer_recover
,
199 SanitizerSet
::empty()
201 sanitizer_memory_track_origins
: if_regular
!(
202 sess
.opts
.debugging_opts
.sanitizer_memory_track_origins
,
206 emit_pre_lto_bc
: if_regular
!(
207 save_temps
|| need_pre_lto_bitcode_for_incr_comp(sess
),
210 emit_no_opt_bc
: if_regular
!(save_temps
, false),
211 emit_bc
: if_regular
!(
212 save_temps
|| sess
.opts
.output_types
.contains_key(&OutputType
::Bitcode
),
215 emit_ir
: if_regular
!(
216 sess
.opts
.output_types
.contains_key(&OutputType
::LlvmAssembly
),
219 emit_asm
: if_regular
!(
220 sess
.opts
.output_types
.contains_key(&OutputType
::Assembly
),
224 bc_cmdline
: sess
.target
.bitcode_llvm_cmdline
.clone(),
226 verify_llvm_ir
: sess
.verify_llvm_ir(),
227 no_prepopulate_passes
: sess
.opts
.cg
.no_prepopulate_passes
,
228 no_builtins
: no_builtins
|| sess
.target
.no_builtins
,
230 // Exclude metadata and allocator modules from time_passes output,
231 // since they throw off the "LLVM passes" measurement.
232 time_module
: if_regular
!(true, false),
234 // Copy what clang does by turning on loop vectorization at O2 and
235 // slp vectorization at O3.
236 vectorize_loop
: !sess
.opts
.cg
.no_vectorize_loops
237 && (sess
.opts
.optimize
== config
::OptLevel
::Default
238 || sess
.opts
.optimize
== config
::OptLevel
::Aggressive
),
239 vectorize_slp
: !sess
.opts
.cg
.no_vectorize_slp
240 && sess
.opts
.optimize
== config
::OptLevel
::Aggressive
,
242 // Some targets (namely, NVPTX) interact badly with the
243 // MergeFunctions pass. This is because MergeFunctions can generate
244 // new function calls which may interfere with the target calling
245 // convention; e.g. for the NVPTX target, PTX kernels should not
246 // call other PTX kernels. MergeFunctions can also be configured to
247 // generate aliases instead, but aliases are not supported by some
248 // backends (again, NVPTX). Therefore, allow targets to opt out of
249 // the MergeFunctions pass, but otherwise keep the pass enabled (at
250 // O2 and O3) since it can be useful for reducing code size.
251 merge_functions
: match sess
255 .unwrap_or(sess
.target
.merge_functions
)
257 MergeFunctions
::Disabled
=> false,
258 MergeFunctions
::Trampolines
| MergeFunctions
::Aliases
=> {
259 sess
.opts
.optimize
== config
::OptLevel
::Default
260 || sess
.opts
.optimize
== config
::OptLevel
::Aggressive
264 inline_threshold
: sess
.opts
.cg
.inline_threshold
,
265 new_llvm_pass_manager
: sess
.opts
.debugging_opts
.new_llvm_pass_manager
,
266 emit_lifetime_markers
: sess
.emit_lifetime_markers(),
270 pub fn bitcode_needed(&self) -> bool
{
272 || self.emit_obj
== EmitObj
::Bitcode
273 || self.emit_obj
== EmitObj
::ObjectCode(BitcodeSection
::Full
)
277 /// Configuration passed to the function returned by the `target_machine_factory`.
278 pub struct TargetMachineFactoryConfig
{
279 /// Split DWARF is enabled in LLVM by checking that `TM.MCOptions.SplitDwarfFile` isn't empty,
280 /// so the path to the dwarf object has to be provided when we create the target machine.
281 /// This can be ignored by backends which do not need it for their Split DWARF support.
282 pub split_dwarf_file
: Option
<PathBuf
>,
285 pub type TargetMachineFactoryFn
<B
> = Arc
<
286 dyn Fn(TargetMachineFactoryConfig
) -> Result
<<B
as WriteBackendMethods
>::TargetMachine
, String
>
291 pub type ExportedSymbols
= FxHashMap
<CrateNum
, Arc
<Vec
<(String
, SymbolExportLevel
)>>>;
293 /// Additional resources used by optimize_and_codegen (not module specific)
295 pub struct CodegenContext
<B
: WriteBackendMethods
> {
296 // Resources needed when running LTO
298 pub prof
: SelfProfilerRef
,
300 pub no_landing_pads
: bool
,
301 pub save_temps
: bool
,
302 pub fewer_names
: bool
,
303 pub exported_symbols
: Option
<Arc
<ExportedSymbols
>>,
304 pub opts
: Arc
<config
::Options
>,
305 pub crate_types
: Vec
<CrateType
>,
306 pub each_linked_rlib_for_lto
: Vec
<(CrateNum
, PathBuf
)>,
307 pub output_filenames
: Arc
<OutputFilenames
>,
308 pub regular_module_config
: Arc
<ModuleConfig
>,
309 pub metadata_module_config
: Arc
<ModuleConfig
>,
310 pub allocator_module_config
: Arc
<ModuleConfig
>,
311 pub tm_factory
: TargetMachineFactoryFn
<B
>,
312 pub msvc_imps_needed
: bool
,
313 pub is_pe_coff
: bool
,
314 pub target_pointer_width
: u32,
315 pub target_arch
: String
,
316 pub debuginfo
: config
::DebugInfo
,
317 pub split_dwarf_kind
: config
::SplitDwarfKind
,
319 // Number of cgus excluding the allocator/metadata modules
320 pub total_cgus
: usize,
321 // Handler to use for diagnostics produced during codegen.
322 pub diag_emitter
: SharedEmitter
,
323 // LLVM optimizations for which we want to print remarks.
325 // Worker thread number
327 // The incremental compilation session directory, or None if we are not
328 // compiling incrementally
329 pub incr_comp_session_dir
: Option
<PathBuf
>,
330 // Used to update CGU re-use information during the thinlto phase.
331 pub cgu_reuse_tracker
: CguReuseTracker
,
332 // Channel back to the main control thread to send messages to
333 pub coordinator_send
: Sender
<Box
<dyn Any
+ Send
>>,
336 impl<B
: WriteBackendMethods
> CodegenContext
<B
> {
337 pub fn create_diag_handler(&self) -> Handler
{
338 Handler
::with_emitter(true, None
, Box
::new(self.diag_emitter
.clone()))
341 pub fn config(&self, kind
: ModuleKind
) -> &ModuleConfig
{
343 ModuleKind
::Regular
=> &self.regular_module_config
,
344 ModuleKind
::Metadata
=> &self.metadata_module_config
,
345 ModuleKind
::Allocator
=> &self.allocator_module_config
,
350 fn generate_lto_work
<B
: ExtraBackendMethods
>(
351 cgcx
: &CodegenContext
<B
>,
352 needs_fat_lto
: Vec
<FatLTOInput
<B
>>,
353 needs_thin_lto
: Vec
<(String
, B
::ThinBuffer
)>,
354 import_only_modules
: Vec
<(SerializedModule
<B
::ModuleBuffer
>, WorkProduct
)>,
355 ) -> Vec
<(WorkItem
<B
>, u64)> {
356 let _prof_timer
= cgcx
.prof
.generic_activity("codegen_generate_lto_work");
358 let (lto_modules
, copy_jobs
) = if !needs_fat_lto
.is_empty() {
359 assert
!(needs_thin_lto
.is_empty());
361 B
::run_fat_lto(cgcx
, needs_fat_lto
, import_only_modules
).unwrap_or_else(|e
| e
.raise());
362 (vec
![lto_module
], vec
![])
364 assert
!(needs_fat_lto
.is_empty());
365 B
::run_thin_lto(cgcx
, needs_thin_lto
, import_only_modules
).unwrap_or_else(|e
| e
.raise())
371 let cost
= module
.cost();
372 (WorkItem
::LTO(module
), cost
)
374 .chain(copy_jobs
.into_iter().map(|wp
| {
376 WorkItem
::CopyPostLtoArtifacts(CachedModuleCodegen
{
377 name
: wp
.cgu_name
.clone(),
386 pub struct CompiledModules
{
387 pub modules
: Vec
<CompiledModule
>,
388 pub metadata_module
: Option
<CompiledModule
>,
389 pub allocator_module
: Option
<CompiledModule
>,
392 fn need_bitcode_in_object(sess
: &Session
) -> bool
{
393 let requested_for_rlib
= sess
.opts
.cg
.embed_bitcode
394 && sess
.crate_types().contains(&CrateType
::Rlib
)
395 && sess
.opts
.output_types
.contains_key(&OutputType
::Exe
);
396 let forced_by_target
= sess
.target
.forces_embed_bitcode
;
397 requested_for_rlib
|| forced_by_target
400 fn need_pre_lto_bitcode_for_incr_comp(sess
: &Session
) -> bool
{
401 if sess
.opts
.incremental
.is_none() {
407 Lto
::Fat
| Lto
::Thin
| Lto
::ThinLocal
=> true,
411 pub fn start_async_codegen
<B
: ExtraBackendMethods
>(
414 metadata
: EncodedMetadata
,
416 ) -> OngoingCodegen
<B
> {
417 let (coordinator_send
, coordinator_receive
) = channel();
420 let crate_name
= tcx
.crate_name(LOCAL_CRATE
);
421 let no_builtins
= tcx
.sess
.contains_name(&tcx
.hir().krate().item
.attrs
, sym
::no_builtins
);
422 let is_compiler_builtins
=
423 tcx
.sess
.contains_name(&tcx
.hir().krate().item
.attrs
, sym
::compiler_builtins
);
426 .first_attr_value_str_by_name(&tcx
.hir().krate().item
.attrs
, sym
::windows_subsystem
);
427 let windows_subsystem
= subsystem
.map(|subsystem
| {
428 if subsystem
!= sym
::windows
&& subsystem
!= sym
::console
{
429 tcx
.sess
.fatal(&format
!(
430 "invalid windows subsystem `{}`, only \
431 `windows` and `console` are allowed",
435 subsystem
.to_string()
438 let linker_info
= LinkerInfo
::new(tcx
);
439 let crate_info
= CrateInfo
::new(tcx
);
442 ModuleConfig
::new(ModuleKind
::Regular
, sess
, no_builtins
, is_compiler_builtins
);
443 let metadata_config
=
444 ModuleConfig
::new(ModuleKind
::Metadata
, sess
, no_builtins
, is_compiler_builtins
);
445 let allocator_config
=
446 ModuleConfig
::new(ModuleKind
::Allocator
, sess
, no_builtins
, is_compiler_builtins
);
448 let (shared_emitter
, shared_emitter_main
) = SharedEmitter
::new();
449 let (codegen_worker_send
, codegen_worker_receive
) = channel();
451 let coordinator_thread
= start_executing_work(
459 sess
.jobserver
.clone(),
460 Arc
::new(regular_config
),
461 Arc
::new(metadata_config
),
462 Arc
::new(allocator_config
),
463 coordinator_send
.clone(),
475 codegen_worker_receive
,
477 future
: coordinator_thread
,
478 output_filenames
: tcx
.output_filenames(LOCAL_CRATE
),
482 fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
484 compiled_modules
: &CompiledModules
,
485 ) -> FxHashMap
<WorkProductId
, WorkProduct
> {
486 let mut work_products
= FxHashMap
::default();
488 if sess
.opts
.incremental
.is_none() {
489 return work_products
;
492 let _timer
= sess
.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir");
494 for module
in compiled_modules
.modules
.iter().filter(|m
| m
.kind
== ModuleKind
::Regular
) {
495 let path
= module
.object
.as_ref().cloned();
497 if let Some((id
, product
)) =
498 copy_cgu_workproduct_to_incr_comp_cache_dir(sess
, &module
.name
, &path
)
500 work_products
.insert(id
, product
);
507 fn produce_final_output_artifacts(
509 compiled_modules
: &CompiledModules
,
510 crate_output
: &OutputFilenames
,
512 let mut user_wants_bitcode
= false;
513 let mut user_wants_objects
= false;
515 // Produce final compile outputs.
516 let copy_gracefully
= |from
: &Path
, to
: &Path
| {
517 if let Err(e
) = fs
::copy(from
, to
) {
518 sess
.err(&format
!("could not copy {:?} to {:?}: {}", from
, to
, e
));
522 let copy_if_one_unit
= |output_type
: OutputType
, keep_numbered
: bool
| {
523 if compiled_modules
.modules
.len() == 1 {
524 // 1) Only one codegen unit. In this case it's no difficulty
525 // to copy `foo.0.x` to `foo.x`.
526 let module_name
= Some(&compiled_modules
.modules
[0].name
[..]);
527 let path
= crate_output
.temp_path(output_type
, module_name
);
528 copy_gracefully(&path
, &crate_output
.path(output_type
));
529 if !sess
.opts
.cg
.save_temps
&& !keep_numbered
{
530 // The user just wants `foo.x`, not `foo.#module-name#.x`.
534 let ext
= crate_output
535 .temp_path(output_type
, None
)
542 if crate_output
.outputs
.contains_key(&output_type
) {
543 // 2) Multiple codegen units, with `--emit foo=some_name`. We have
544 // no good solution for this case, so warn the user.
546 "ignoring emit path because multiple .{} files \
550 } else if crate_output
.single_output_file
.is_some() {
551 // 3) Multiple codegen units, with `-o some_name`. We have
552 // no good solution for this case, so warn the user.
554 "ignoring -o because multiple .{} files \
559 // 4) Multiple codegen units, but no explicit name. We
560 // just leave the `foo.0.x` files in place.
561 // (We don't have to do any work in this case.)
566 // Flag to indicate whether the user explicitly requested bitcode.
567 // Otherwise, we produced it only as a temporary output, and will need
569 for output_type
in crate_output
.outputs
.keys() {
571 OutputType
::Bitcode
=> {
572 user_wants_bitcode
= true;
573 // Copy to .bc, but always keep the .0.bc. There is a later
574 // check to figure out if we should delete .0.bc files, or keep
575 // them for making an rlib.
576 copy_if_one_unit(OutputType
::Bitcode
, true);
578 OutputType
::LlvmAssembly
=> {
579 copy_if_one_unit(OutputType
::LlvmAssembly
, false);
581 OutputType
::Assembly
=> {
582 copy_if_one_unit(OutputType
::Assembly
, false);
584 OutputType
::Object
=> {
585 user_wants_objects
= true;
586 copy_if_one_unit(OutputType
::Object
, true);
588 OutputType
::Mir
| OutputType
::Metadata
| OutputType
::Exe
| OutputType
::DepInfo
=> {}
592 // Clean up unwanted temporary files.
594 // We create the following files by default:
595 // - #crate#.#module-name#.bc
596 // - #crate#.#module-name#.o
597 // - #crate#.crate.metadata.bc
598 // - #crate#.crate.metadata.o
599 // - #crate#.o (linked from crate.##.o)
600 // - #crate#.bc (copied from crate.##.bc)
601 // We may create additional files if requested by the user (through
602 // `-C save-temps` or `--emit=` flags).
604 if !sess
.opts
.cg
.save_temps
{
605 // Remove the temporary .#module-name#.o objects. If the user didn't
606 // explicitly request bitcode (with --emit=bc), and the bitcode is not
607 // needed for building an rlib, then we must remove .#module-name#.bc as
610 // Specific rules for keeping .#module-name#.bc:
611 // - If the user requested bitcode (`user_wants_bitcode`), and
612 // codegen_units > 1, then keep it.
613 // - If the user requested bitcode but codegen_units == 1, then we
614 // can toss .#module-name#.bc because we copied it to .bc earlier.
615 // - If we're not building an rlib and the user didn't request
616 // bitcode, then delete .#module-name#.bc.
617 // If you change how this works, also update back::link::link_rlib,
618 // where .#module-name#.bc files are (maybe) deleted after making an
620 let needs_crate_object
= crate_output
.outputs
.contains_key(&OutputType
::Exe
);
622 let keep_numbered_bitcode
= user_wants_bitcode
&& sess
.codegen_units() > 1;
624 let keep_numbered_objects
=
625 needs_crate_object
|| (user_wants_objects
&& sess
.codegen_units() > 1);
627 for module
in compiled_modules
.modules
.iter() {
628 if let Some(ref path
) = module
.object
{
629 if !keep_numbered_objects
{
634 if let Some(ref path
) = module
.dwarf_object
{
635 if !keep_numbered_objects
{
640 if let Some(ref path
) = module
.bytecode
{
641 if !keep_numbered_bitcode
{
647 if !user_wants_bitcode
{
648 if let Some(ref metadata_module
) = compiled_modules
.metadata_module
{
649 if let Some(ref path
) = metadata_module
.bytecode
{
654 if let Some(ref allocator_module
) = compiled_modules
.allocator_module
{
655 if let Some(ref path
) = allocator_module
.bytecode
{
662 // We leave the following files around by default:
664 // - #crate#.crate.metadata.o
666 // These are used in linking steps and will be cleaned up afterward.
669 pub enum WorkItem
<B
: WriteBackendMethods
> {
670 /// Optimize a newly codegened, totally unoptimized module.
671 Optimize(ModuleCodegen
<B
::Module
>),
672 /// Copy the post-LTO artifacts from the incremental cache to the output
674 CopyPostLtoArtifacts(CachedModuleCodegen
),
675 /// Performs (Thin)LTO on the given module.
676 LTO(lto
::LtoModuleCodegen
<B
>),
679 impl<B
: WriteBackendMethods
> WorkItem
<B
> {
680 pub fn module_kind(&self) -> ModuleKind
{
682 WorkItem
::Optimize(ref m
) => m
.kind
,
683 WorkItem
::CopyPostLtoArtifacts(_
) | WorkItem
::LTO(_
) => ModuleKind
::Regular
,
687 fn start_profiling
<'a
>(&self, cgcx
: &'a CodegenContext
<B
>) -> TimingGuard
<'a
> {
689 WorkItem
::Optimize(ref m
) => {
690 cgcx
.prof
.generic_activity_with_arg("codegen_module_optimize", &m
.name
[..])
692 WorkItem
::CopyPostLtoArtifacts(ref m
) => cgcx
694 .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &m
.name
[..]),
695 WorkItem
::LTO(ref m
) => {
696 cgcx
.prof
.generic_activity_with_arg("codegen_module_perform_lto", m
.name())
702 enum WorkItemResult
<B
: WriteBackendMethods
> {
703 Compiled(CompiledModule
),
704 NeedsLink(ModuleCodegen
<B
::Module
>),
705 NeedsFatLTO(FatLTOInput
<B
>),
706 NeedsThinLTO(String
, B
::ThinBuffer
),
709 pub enum FatLTOInput
<B
: WriteBackendMethods
> {
710 Serialized { name: String, buffer: B::ModuleBuffer }
,
711 InMemory(ModuleCodegen
<B
::Module
>),
714 fn execute_work_item
<B
: ExtraBackendMethods
>(
715 cgcx
: &CodegenContext
<B
>,
716 work_item
: WorkItem
<B
>,
717 ) -> Result
<WorkItemResult
<B
>, FatalError
> {
718 let module_config
= cgcx
.config(work_item
.module_kind());
721 WorkItem
::Optimize(module
) => execute_optimize_work_item(cgcx
, module
, module_config
),
722 WorkItem
::CopyPostLtoArtifacts(module
) => {
723 execute_copy_from_cache_work_item(cgcx
, module
, module_config
)
725 WorkItem
::LTO(module
) => execute_lto_work_item(cgcx
, module
, module_config
),
729 // Actual LTO type we end up choosing based on multiple factors.
730 pub enum ComputedLtoType
{
736 pub fn compute_per_cgu_lto_type(
738 opts
: &config
::Options
,
739 sess_crate_types
: &[CrateType
],
740 module_kind
: ModuleKind
,
741 ) -> ComputedLtoType
{
742 // Metadata modules never participate in LTO regardless of the lto
744 if module_kind
== ModuleKind
::Metadata
{
745 return ComputedLtoType
::No
;
748 // If the linker does LTO, we don't have to do it. Note that we
749 // keep doing full LTO, if it is requested, as not to break the
750 // assumption that the output will be a single module.
751 let linker_does_lto
= opts
.cg
.linker_plugin_lto
.enabled();
753 // When we're automatically doing ThinLTO for multi-codegen-unit
754 // builds we don't actually want to LTO the allocator modules if
755 // it shows up. This is due to various linker shenanigans that
756 // we'll encounter later.
757 let is_allocator
= module_kind
== ModuleKind
::Allocator
;
759 // We ignore a request for full crate grath LTO if the cate type
760 // is only an rlib, as there is no full crate graph to process,
761 // that'll happen later.
763 // This use case currently comes up primarily for targets that
764 // require LTO so the request for LTO is always unconditionally
765 // passed down to the backend, but we don't actually want to do
766 // anything about it yet until we've got a final product.
767 let is_rlib
= sess_crate_types
.len() == 1 && sess_crate_types
[0] == CrateType
::Rlib
;
770 Lto
::ThinLocal
if !linker_does_lto
&& !is_allocator
=> ComputedLtoType
::Thin
,
771 Lto
::Thin
if !linker_does_lto
&& !is_rlib
=> ComputedLtoType
::Thin
,
772 Lto
::Fat
if !is_rlib
=> ComputedLtoType
::Fat
,
773 _
=> ComputedLtoType
::No
,
777 fn execute_optimize_work_item
<B
: ExtraBackendMethods
>(
778 cgcx
: &CodegenContext
<B
>,
779 module
: ModuleCodegen
<B
::Module
>,
780 module_config
: &ModuleConfig
,
781 ) -> Result
<WorkItemResult
<B
>, FatalError
> {
782 let diag_handler
= cgcx
.create_diag_handler();
785 B
::optimize(cgcx
, &diag_handler
, &module
, module_config
)?
;
788 // After we've done the initial round of optimizations we need to
789 // decide whether to synchronously codegen this module or ship it
790 // back to the coordinator thread for further LTO processing (which
791 // has to wait for all the initial modules to be optimized).
793 let lto_type
= compute_per_cgu_lto_type(&cgcx
.lto
, &cgcx
.opts
, &cgcx
.crate_types
, module
.kind
);
795 // If we're doing some form of incremental LTO then we need to be sure to
796 // save our module to disk first.
797 let bitcode
= if cgcx
.config(module
.kind
).emit_pre_lto_bc
{
798 let filename
= pre_lto_bitcode_filename(&module
.name
);
799 cgcx
.incr_comp_session_dir
.as_ref().map(|path
| path
.join(&filename
))
805 ComputedLtoType
::No
=> finish_intra_module_work(cgcx
, module
, module_config
),
806 ComputedLtoType
::Thin
=> {
807 let (name
, thin_buffer
) = B
::prepare_thin(module
);
808 if let Some(path
) = bitcode
{
809 fs
::write(&path
, thin_buffer
.data()).unwrap_or_else(|e
| {
810 panic
!("Error writing pre-lto-bitcode file `{}`: {}", path
.display(), e
);
813 Ok(WorkItemResult
::NeedsThinLTO(name
, thin_buffer
))
815 ComputedLtoType
::Fat
=> match bitcode
{
817 let (name
, buffer
) = B
::serialize_module(module
);
818 fs
::write(&path
, buffer
.data()).unwrap_or_else(|e
| {
819 panic
!("Error writing pre-lto-bitcode file `{}`: {}", path
.display(), e
);
821 Ok(WorkItemResult
::NeedsFatLTO(FatLTOInput
::Serialized { name, buffer }
))
823 None
=> Ok(WorkItemResult
::NeedsFatLTO(FatLTOInput
::InMemory(module
))),
828 fn execute_copy_from_cache_work_item
<B
: ExtraBackendMethods
>(
829 cgcx
: &CodegenContext
<B
>,
830 module
: CachedModuleCodegen
,
831 module_config
: &ModuleConfig
,
832 ) -> Result
<WorkItemResult
<B
>, FatalError
> {
833 let incr_comp_session_dir
= cgcx
.incr_comp_session_dir
.as_ref().unwrap();
834 let mut object
= None
;
835 if let Some(saved_file
) = module
.source
.saved_file
{
836 let obj_out
= cgcx
.output_filenames
.temp_path(OutputType
::Object
, Some(&module
.name
));
837 object
= Some(obj_out
.clone());
838 let source_file
= in_incr_comp_dir(&incr_comp_session_dir
, &saved_file
);
840 "copying pre-existing module `{}` from {:?} to {}",
845 if let Err(err
) = link_or_copy(&source_file
, &obj_out
) {
846 let diag_handler
= cgcx
.create_diag_handler();
847 diag_handler
.err(&format
!(
848 "unable to copy {} to {}: {}",
849 source_file
.display(),
856 assert_eq
!(object
.is_some(), module_config
.emit_obj
!= EmitObj
::None
);
858 Ok(WorkItemResult
::Compiled(CompiledModule
{
860 kind
: ModuleKind
::Regular
,
867 fn execute_lto_work_item
<B
: ExtraBackendMethods
>(
868 cgcx
: &CodegenContext
<B
>,
869 mut module
: lto
::LtoModuleCodegen
<B
>,
870 module_config
: &ModuleConfig
,
871 ) -> Result
<WorkItemResult
<B
>, FatalError
> {
872 let module
= unsafe { module.optimize(cgcx)? }
;
873 finish_intra_module_work(cgcx
, module
, module_config
)
876 fn finish_intra_module_work
<B
: ExtraBackendMethods
>(
877 cgcx
: &CodegenContext
<B
>,
878 module
: ModuleCodegen
<B
::Module
>,
879 module_config
: &ModuleConfig
,
880 ) -> Result
<WorkItemResult
<B
>, FatalError
> {
881 let diag_handler
= cgcx
.create_diag_handler();
883 if !cgcx
.opts
.debugging_opts
.combine_cgu
884 || module
.kind
== ModuleKind
::Metadata
885 || module
.kind
== ModuleKind
::Allocator
887 let module
= unsafe { B::codegen(cgcx, &diag_handler, module, module_config)? }
;
888 Ok(WorkItemResult
::Compiled(module
))
890 Ok(WorkItemResult
::NeedsLink(module
))
894 pub enum Message
<B
: WriteBackendMethods
> {
895 Token(io
::Result
<Acquired
>),
897 result
: FatLTOInput
<B
>,
902 thin_buffer
: B
::ThinBuffer
,
906 module
: ModuleCodegen
<B
::Module
>,
910 result
: Result
<CompiledModule
, Option
<WorkerFatalError
>>,
914 llvm_work_item
: WorkItem
<B
>,
917 AddImportOnlyModule
{
918 module_data
: SerializedModule
<B
::ModuleBuffer
>,
919 work_product
: WorkProduct
,
928 code
: Option
<DiagnosticId
>,
932 #[derive(PartialEq, Clone, Copy, Debug)]
933 enum MainThreadWorkerState
{
939 fn start_executing_work
<B
: ExtraBackendMethods
>(
942 crate_info
: &CrateInfo
,
943 shared_emitter
: SharedEmitter
,
944 codegen_worker_send
: Sender
<Message
<B
>>,
945 coordinator_receive
: Receiver
<Box
<dyn Any
+ Send
>>,
948 regular_config
: Arc
<ModuleConfig
>,
949 metadata_config
: Arc
<ModuleConfig
>,
950 allocator_config
: Arc
<ModuleConfig
>,
951 tx_to_llvm_workers
: Sender
<Box
<dyn Any
+ Send
>>,
952 ) -> thread
::JoinHandle
<Result
<CompiledModules
, ()>> {
953 let coordinator_send
= tx_to_llvm_workers
;
956 // Compute the set of symbols we need to retain when doing LTO (if we need to)
957 let exported_symbols
= {
958 let mut exported_symbols
= FxHashMap
::default();
960 let copy_symbols
= |cnum
| {
962 .exported_symbols(cnum
)
964 .map(|&(s
, lvl
)| (symbol_name_for_instance_in_crate(tcx
, s
, cnum
), lvl
))
972 exported_symbols
.insert(LOCAL_CRATE
, copy_symbols(LOCAL_CRATE
));
973 Some(Arc
::new(exported_symbols
))
975 Lto
::Fat
| Lto
::Thin
=> {
976 exported_symbols
.insert(LOCAL_CRATE
, copy_symbols(LOCAL_CRATE
));
977 for &cnum
in tcx
.crates().iter() {
978 exported_symbols
.insert(cnum
, copy_symbols(cnum
));
980 Some(Arc
::new(exported_symbols
))
985 // First up, convert our jobserver into a helper thread so we can use normal
986 // mpsc channels to manage our messages and such.
987 // After we've requested tokens then we'll, when we can,
988 // get tokens on `coordinator_receive` which will
989 // get managed in the main loop below.
990 let coordinator_send2
= coordinator_send
.clone();
991 let helper
= jobserver
992 .into_helper_thread(move |token
| {
993 drop(coordinator_send2
.send(Box
::new(Message
::Token
::<B
>(token
))));
995 .expect("failed to spawn helper thread");
997 let mut each_linked_rlib_for_lto
= Vec
::new();
998 drop(link
::each_linked_rlib(crate_info
, &mut |cnum
, path
| {
999 if link
::ignored_for_lto(sess
, crate_info
, cnum
) {
1002 each_linked_rlib_for_lto
.push((cnum
, path
.to_path_buf()));
1005 let ol
= if tcx
.sess
.opts
.debugging_opts
.no_codegen
1006 || !tcx
.sess
.opts
.output_types
.should_codegen()
1008 // If we know that we won’t be doing codegen, create target machines without optimisation.
1009 config
::OptLevel
::No
1011 tcx
.backend_optimization_level(LOCAL_CRATE
)
1013 let cgcx
= CodegenContext
::<B
> {
1014 backend
: backend
.clone(),
1015 crate_types
: sess
.crate_types().to_vec(),
1016 each_linked_rlib_for_lto
,
1018 no_landing_pads
: sess
.panic_strategy() == PanicStrategy
::Abort
,
1019 fewer_names
: sess
.fewer_names(),
1020 save_temps
: sess
.opts
.cg
.save_temps
,
1021 opts
: Arc
::new(sess
.opts
.clone()),
1022 prof
: sess
.prof
.clone(),
1024 remark
: sess
.opts
.cg
.remark
.clone(),
1026 incr_comp_session_dir
: sess
.incr_comp_session_dir_opt().map(|r
| r
.clone()),
1027 cgu_reuse_tracker
: sess
.cgu_reuse_tracker
.clone(),
1029 diag_emitter
: shared_emitter
.clone(),
1030 output_filenames
: tcx
.output_filenames(LOCAL_CRATE
),
1031 regular_module_config
: regular_config
,
1032 metadata_module_config
: metadata_config
,
1033 allocator_module_config
: allocator_config
,
1034 tm_factory
: backend
.target_machine_factory(tcx
.sess
, ol
),
1036 msvc_imps_needed
: msvc_imps_needed(tcx
),
1037 is_pe_coff
: tcx
.sess
.target
.is_like_windows
,
1038 target_pointer_width
: tcx
.sess
.target
.pointer_width
,
1039 target_arch
: tcx
.sess
.target
.arch
.clone(),
1040 debuginfo
: tcx
.sess
.opts
.debuginfo
,
1041 split_dwarf_kind
: tcx
.sess
.opts
.debugging_opts
.split_dwarf
,
1044 // This is the "main loop" of parallel work happening for parallel codegen.
1045 // It's here that we manage parallelism, schedule work, and work with
1046 // messages coming from clients.
1048 // There are a few environmental pre-conditions that shape how the system
1051 // - Error reporting only can happen on the main thread because that's the
1052 // only place where we have access to the compiler `Session`.
1053 // - LLVM work can be done on any thread.
1054 // - Codegen can only happen on the main thread.
1055 // - Each thread doing substantial work most be in possession of a `Token`
1056 // from the `Jobserver`.
1057 // - The compiler process always holds one `Token`. Any additional `Tokens`
1058 // have to be requested from the `Jobserver`.
1062 // The error reporting restriction is handled separately from the rest: We
1063 // set up a `SharedEmitter` the holds an open channel to the main thread.
1064 // When an error occurs on any thread, the shared emitter will send the
1065 // error message to the receiver main thread (`SharedEmitterMain`). The
1066 // main thread will periodically query this error message queue and emit
1067 // any error messages it has received. It might even abort compilation if
1068 // has received a fatal error. In this case we rely on all other threads
1069 // being torn down automatically with the main thread.
1070 // Since the main thread will often be busy doing codegen work, error
1071 // reporting will be somewhat delayed, since the message queue can only be
1072 // checked in between to work packages.
1074 // Work Processing Infrastructure
1075 // ==============================
1076 // The work processing infrastructure knows three major actors:
1078 // - the coordinator thread,
1079 // - the main thread, and
1080 // - LLVM worker threads
1082 // The coordinator thread is running a message loop. It instructs the main
1083 // thread about what work to do when, and it will spawn off LLVM worker
1084 // threads as open LLVM WorkItems become available.
1086 // The job of the main thread is to codegen CGUs into LLVM work package
1087 // (since the main thread is the only thread that can do this). The main
1088 // thread will block until it receives a message from the coordinator, upon
1089 // which it will codegen one CGU, send it to the coordinator and block
1090 // again. This way the coordinator can control what the main thread is
1093 // The coordinator keeps a queue of LLVM WorkItems, and when a `Token` is
1094 // available, it will spawn off a new LLVM worker thread and let it process
1095 // that a WorkItem. When a LLVM worker thread is done with its WorkItem,
1096 // it will just shut down, which also frees all resources associated with
1097 // the given LLVM module, and sends a message to the coordinator that the
1098 // has been completed.
1102 // The scheduler's goal is to minimize the time it takes to complete all
1103 // work there is, however, we also want to keep memory consumption low
1104 // if possible. These two goals are at odds with each other: If memory
1105 // consumption were not an issue, we could just let the main thread produce
1106 // LLVM WorkItems at full speed, assuring maximal utilization of
1107 // Tokens/LLVM worker threads. However, since codegen usual is faster
1108 // than LLVM processing, the queue of LLVM WorkItems would fill up and each
1109 // WorkItem potentially holds on to a substantial amount of memory.
1111 // So the actual goal is to always produce just enough LLVM WorkItems as
1112 // not to starve our LLVM worker threads. That means, once we have enough
1113 // WorkItems in our queue, we can block the main thread, so it does not
1114 // produce more until we need them.
1116 // Doing LLVM Work on the Main Thread
1117 // ----------------------------------
1118 // Since the main thread owns the compiler processes implicit `Token`, it is
1119 // wasteful to keep it blocked without doing any work. Therefore, what we do
1120 // in this case is: We spawn off an additional LLVM worker thread that helps
1121 // reduce the queue. The work it is doing corresponds to the implicit
1122 // `Token`. The coordinator will mark the main thread as being busy with
1123 // LLVM work. (The actual work happens on another OS thread but we just care
1124 // about `Tokens`, not actual threads).
1126 // When any LLVM worker thread finishes while the main thread is marked as
1127 // "busy with LLVM work", we can do a little switcheroo: We give the Token
1128 // of the just finished thread to the LLVM worker thread that is working on
1129 // behalf of the main thread's implicit Token, thus freeing up the main
1130 // thread again. The coordinator can then again decide what the main thread
1131 // should do. This allows the coordinator to make decisions at more points
1134 // Striking a Balance between Throughput and Memory Consumption
1135 // ------------------------------------------------------------
1136 // Since our two goals, (1) use as many Tokens as possible and (2) keep
1137 // memory consumption as low as possible, are in conflict with each other,
1138 // we have to find a trade off between them. Right now, the goal is to keep
1139 // all workers busy, which means that no worker should find the queue empty
1140 // when it is ready to start.
1141 // How do we do achieve this? Good question :) We actually never know how
1142 // many `Tokens` are potentially available so it's hard to say how much to
1143 // fill up the queue before switching the main thread to LLVM work. Also we
1144 // currently don't have a means to estimate how long a running LLVM worker
1145 // will still be busy with it's current WorkItem. However, we know the
1146 // maximal count of available Tokens that makes sense (=the number of CPU
1147 // cores), so we can take a conservative guess. The heuristic we use here
1148 // is implemented in the `queue_full_enough()` function.
1150 // Some Background on Jobservers
1151 // -----------------------------
1152 // It's worth also touching on the management of parallelism here. We don't
1153 // want to just spawn a thread per work item because while that's optimal
1154 // parallelism it may overload a system with too many threads or violate our
1155 // configuration for the maximum amount of cpu to use for this process. To
1156 // manage this we use the `jobserver` crate.
1158 // Job servers are an artifact of GNU make and are used to manage
1159 // parallelism between processes. A jobserver is a glorified IPC semaphore
1160 // basically. Whenever we want to run some work we acquire the semaphore,
1161 // and whenever we're done with that work we release the semaphore. In this
1162 // manner we can ensure that the maximum number of parallel workers is
1163 // capped at any one point in time.
1165 // LTO and the coordinator thread
1166 // ------------------------------
1168 // The final job the coordinator thread is responsible for is managing LTO
1169 // and how that works. When LTO is requested what we'll to is collect all
1170 // optimized LLVM modules into a local vector on the coordinator. Once all
1171 // modules have been codegened and optimized we hand this to the `lto`
1172 // module for further optimization. The `lto` module will return back a list
1173 // of more modules to work on, which the coordinator will continue to spawn
1176 // Each LLVM module is automatically sent back to the coordinator for LTO if
1177 // necessary. There's already optimizations in place to avoid sending work
1178 // back to the coordinator if LTO isn't requested.
1179 return thread
::spawn(move || {
1180 let max_workers
= num_cpus
::get();
1181 let mut worker_id_counter
= 0;
1182 let mut free_worker_ids
= Vec
::new();
1183 let mut get_worker_id
= |free_worker_ids
: &mut Vec
<usize>| {
1184 if let Some(id
) = free_worker_ids
.pop() {
1187 let id
= worker_id_counter
;
1188 worker_id_counter
+= 1;
1193 // This is where we collect codegen units that have gone all the way
1194 // through codegen and LLVM.
1195 let mut compiled_modules
= vec
![];
1196 let mut compiled_metadata_module
= None
;
1197 let mut compiled_allocator_module
= None
;
1198 let mut needs_link
= Vec
::new();
1199 let mut needs_fat_lto
= Vec
::new();
1200 let mut needs_thin_lto
= Vec
::new();
1201 let mut lto_import_only_modules
= Vec
::new();
1202 let mut started_lto
= false;
1203 let mut codegen_aborted
= false;
1205 // This flag tracks whether all items have gone through codegens
1206 let mut codegen_done
= false;
1208 // This is the queue of LLVM work items that still need processing.
1209 let mut work_items
= Vec
::<(WorkItem
<B
>, u64)>::new();
1211 // This are the Jobserver Tokens we currently hold. Does not include
1212 // the implicit Token the compiler process owns no matter what.
1213 let mut tokens
= Vec
::new();
1215 let mut main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1216 let mut running
= 0;
1218 let prof
= &cgcx
.prof
;
1219 let mut llvm_start_time
: Option
<VerboseTimingGuard
<'_
>> = None
;
1221 // Run the message loop while there's still anything that needs message
1222 // processing. Note that as soon as codegen is aborted we simply want to
1223 // wait for all existing work to finish, so many of the conditions here
1224 // only apply if codegen hasn't been aborted as they represent pending
1228 || (!codegen_aborted
1229 && !(work_items
.is_empty()
1230 && needs_fat_lto
.is_empty()
1231 && needs_thin_lto
.is_empty()
1232 && lto_import_only_modules
.is_empty()
1233 && main_thread_worker_state
== MainThreadWorkerState
::Idle
))
1235 // While there are still CGUs to be codegened, the coordinator has
1236 // to decide how to utilize the compiler processes implicit Token:
1237 // For codegenning more CGU or for running them through LLVM.
1239 if main_thread_worker_state
== MainThreadWorkerState
::Idle
{
1240 if !queue_full_enough(work_items
.len(), running
, max_workers
) {
1241 // The queue is not full enough, codegen more items:
1242 if codegen_worker_send
.send(Message
::CodegenItem
).is_err() {
1243 panic
!("Could not send Message::CodegenItem to main thread")
1245 main_thread_worker_state
= MainThreadWorkerState
::Codegenning
;
1247 // The queue is full enough to not let the worker
1248 // threads starve. Use the implicit Token to do some
1251 work_items
.pop().expect("queue empty - queue_full_enough() broken?");
1252 let cgcx
= CodegenContext
{
1253 worker
: get_worker_id(&mut free_worker_ids
),
1256 maybe_start_llvm_timer(
1258 cgcx
.config(item
.module_kind()),
1259 &mut llvm_start_time
,
1261 main_thread_worker_state
= MainThreadWorkerState
::LLVMing
;
1262 spawn_work(cgcx
, item
);
1265 } else if codegen_aborted
{
1266 // don't queue up any more work if codegen was aborted, we're
1267 // just waiting for our existing children to finish
1269 // If we've finished everything related to normal codegen
1270 // then it must be the case that we've got some LTO work to do.
1271 // Perform the serial work here of figuring out what we're
1272 // going to LTO and then push a bunch of work items onto our
1274 if work_items
.is_empty()
1276 && main_thread_worker_state
== MainThreadWorkerState
::Idle
1278 assert
!(!started_lto
);
1281 let needs_fat_lto
= mem
::take(&mut needs_fat_lto
);
1282 let needs_thin_lto
= mem
::take(&mut needs_thin_lto
);
1283 let import_only_modules
= mem
::take(&mut lto_import_only_modules
);
1286 generate_lto_work(&cgcx
, needs_fat_lto
, needs_thin_lto
, import_only_modules
)
1288 let insertion_index
= work_items
1289 .binary_search_by_key(&cost
, |&(_
, cost
)| cost
)
1290 .unwrap_or_else(|e
| e
);
1291 work_items
.insert(insertion_index
, (work
, cost
));
1292 if !cgcx
.opts
.debugging_opts
.no_parallel_llvm
{
1293 helper
.request_token();
1298 // In this branch, we know that everything has been codegened,
1299 // so it's just a matter of determining whether the implicit
1300 // Token is free to use for LLVM work.
1301 match main_thread_worker_state
{
1302 MainThreadWorkerState
::Idle
=> {
1303 if let Some((item
, _
)) = work_items
.pop() {
1304 let cgcx
= CodegenContext
{
1305 worker
: get_worker_id(&mut free_worker_ids
),
1308 maybe_start_llvm_timer(
1310 cgcx
.config(item
.module_kind()),
1311 &mut llvm_start_time
,
1313 main_thread_worker_state
= MainThreadWorkerState
::LLVMing
;
1314 spawn_work(cgcx
, item
);
1316 // There is no unstarted work, so let the main thread
1317 // take over for a running worker. Otherwise the
1318 // implicit token would just go to waste.
1319 // We reduce the `running` counter by one. The
1320 // `tokens.truncate()` below will take care of
1321 // giving the Token back.
1322 debug_assert
!(running
> 0);
1324 main_thread_worker_state
= MainThreadWorkerState
::LLVMing
;
1327 MainThreadWorkerState
::Codegenning
=> bug
!(
1328 "codegen worker should not be codegenning after \
1329 codegen was already completed"
1331 MainThreadWorkerState
::LLVMing
=> {
1332 // Already making good use of that token
1337 // Spin up what work we can, only doing this while we've got available
1338 // parallelism slots and work left to spawn.
1339 while !codegen_aborted
&& !work_items
.is_empty() && running
< tokens
.len() {
1340 let (item
, _
) = work_items
.pop().unwrap();
1342 maybe_start_llvm_timer(prof
, cgcx
.config(item
.module_kind()), &mut llvm_start_time
);
1345 CodegenContext { worker: get_worker_id(&mut free_worker_ids), ..cgcx.clone() }
;
1347 spawn_work(cgcx
, item
);
1351 // Relinquish accidentally acquired extra tokens
1352 tokens
.truncate(running
);
1354 // If a thread exits successfully then we drop a token associated
1355 // with that worker and update our `running` count. We may later
1356 // re-acquire a token to continue running more work. We may also not
1357 // actually drop a token here if the worker was running with an
1358 // "ephemeral token"
1359 let mut free_worker
= |worker_id
| {
1360 if main_thread_worker_state
== MainThreadWorkerState
::LLVMing
{
1361 main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1366 free_worker_ids
.push(worker_id
);
1369 let msg
= coordinator_receive
.recv().unwrap();
1370 match *msg
.downcast
::<Message
<B
>>().ok().unwrap() {
1371 // Save the token locally and the next turn of the loop will use
1372 // this to spawn a new unit of work, or it may get dropped
1373 // immediately if we have no more work to spawn.
1374 Message
::Token(token
) => {
1379 if main_thread_worker_state
== MainThreadWorkerState
::LLVMing
{
1380 // If the main thread token is used for LLVM work
1381 // at the moment, we turn that thread into a regular
1382 // LLVM worker thread, so the main thread is free
1383 // to react to codegen demand.
1384 main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1389 let msg
= &format
!("failed to acquire jobserver token: {}", e
);
1390 shared_emitter
.fatal(msg
);
1391 // Exit the coordinator thread
1397 Message
::CodegenDone { llvm_work_item, cost }
=> {
1398 // We keep the queue sorted by estimated processing cost,
1399 // so that more expensive items are processed earlier. This
1400 // is good for throughput as it gives the main thread more
1401 // time to fill up the queue and it avoids scheduling
1402 // expensive items to the end.
1403 // Note, however, that this is not ideal for memory
1404 // consumption, as LLVM module sizes are not evenly
1406 let insertion_index
= work_items
.binary_search_by_key(&cost
, |&(_
, cost
)| cost
);
1407 let insertion_index
= match insertion_index
{
1408 Ok(idx
) | Err(idx
) => idx
,
1410 work_items
.insert(insertion_index
, (llvm_work_item
, cost
));
1412 if !cgcx
.opts
.debugging_opts
.no_parallel_llvm
{
1413 helper
.request_token();
1415 assert
!(!codegen_aborted
);
1416 assert_eq
!(main_thread_worker_state
, MainThreadWorkerState
::Codegenning
);
1417 main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1420 Message
::CodegenComplete
=> {
1421 codegen_done
= true;
1422 assert
!(!codegen_aborted
);
1423 assert_eq
!(main_thread_worker_state
, MainThreadWorkerState
::Codegenning
);
1424 main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1427 // If codegen is aborted that means translation was aborted due
1428 // to some normal-ish compiler error. In this situation we want
1429 // to exit as soon as possible, but we want to make sure all
1430 // existing work has finished. Flag codegen as being done, and
1431 // then conditions above will ensure no more work is spawned but
1432 // we'll keep executing this loop until `running` hits 0.
1433 Message
::CodegenAborted
=> {
1434 assert
!(!codegen_aborted
);
1435 codegen_done
= true;
1436 codegen_aborted
= true;
1437 assert_eq
!(main_thread_worker_state
, MainThreadWorkerState
::Codegenning
);
1439 Message
::Done { result: Ok(compiled_module), worker_id }
=> {
1440 free_worker(worker_id
);
1441 match compiled_module
.kind
{
1442 ModuleKind
::Regular
=> {
1443 compiled_modules
.push(compiled_module
);
1445 ModuleKind
::Metadata
=> {
1446 assert
!(compiled_metadata_module
.is_none());
1447 compiled_metadata_module
= Some(compiled_module
);
1449 ModuleKind
::Allocator
=> {
1450 assert
!(compiled_allocator_module
.is_none());
1451 compiled_allocator_module
= Some(compiled_module
);
1455 Message
::NeedsLink { module, worker_id }
=> {
1456 free_worker(worker_id
);
1457 needs_link
.push(module
);
1459 Message
::NeedsFatLTO { result, worker_id }
=> {
1460 assert
!(!started_lto
);
1461 free_worker(worker_id
);
1462 needs_fat_lto
.push(result
);
1464 Message
::NeedsThinLTO { name, thin_buffer, worker_id }
=> {
1465 assert
!(!started_lto
);
1466 free_worker(worker_id
);
1467 needs_thin_lto
.push((name
, thin_buffer
));
1469 Message
::AddImportOnlyModule { module_data, work_product }
=> {
1470 assert
!(!started_lto
);
1471 assert
!(!codegen_done
);
1472 assert_eq
!(main_thread_worker_state
, MainThreadWorkerState
::Codegenning
);
1473 lto_import_only_modules
.push((module_data
, work_product
));
1474 main_thread_worker_state
= MainThreadWorkerState
::Idle
;
1476 // If the thread failed that means it panicked, so we abort immediately.
1477 Message
::Done { result: Err(None), worker_id: _ }
=> {
1478 bug
!("worker thread panicked");
1480 Message
::Done { result: Err(Some(WorkerFatalError)), worker_id: _ }
=> {
1483 Message
::CodegenItem
=> bug
!("the coordinator should not receive codegen requests"),
1487 let needs_link
= mem
::take(&mut needs_link
);
1488 if !needs_link
.is_empty() {
1489 assert
!(compiled_modules
.is_empty());
1490 let diag_handler
= cgcx
.create_diag_handler();
1491 let module
= B
::run_link(&cgcx
, &diag_handler
, needs_link
).map_err(|_
| ())?
;
1492 let module
= unsafe {
1493 B
::codegen(&cgcx
, &diag_handler
, module
, cgcx
.config(ModuleKind
::Regular
))
1496 compiled_modules
.push(module
);
1499 // Drop to print timings
1500 drop(llvm_start_time
);
1502 // Regardless of what order these modules completed in, report them to
1503 // the backend in the same order every time to ensure that we're handing
1504 // out deterministic results.
1505 compiled_modules
.sort_by(|a
, b
| a
.name
.cmp(&b
.name
));
1507 Ok(CompiledModules
{
1508 modules
: compiled_modules
,
1509 metadata_module
: compiled_metadata_module
,
1510 allocator_module
: compiled_allocator_module
,
1514 // A heuristic that determines if we have enough LLVM WorkItems in the
1515 // queue so that the main thread can do LLVM work instead of codegen
1516 fn queue_full_enough(
1517 items_in_queue
: usize,
1518 workers_running
: usize,
1522 items_in_queue
> 0 && items_in_queue
>= max_workers
.saturating_sub(workers_running
/ 2)
1525 fn maybe_start_llvm_timer
<'a
>(
1526 prof
: &'a SelfProfilerRef
,
1527 config
: &ModuleConfig
,
1528 llvm_start_time
: &mut Option
<VerboseTimingGuard
<'a
>>,
1530 if config
.time_module
&& llvm_start_time
.is_none() {
1531 *llvm_start_time
= Some(prof
.extra_verbose_generic_activity("LLVM_passes", "crate"));
1536 /// `FatalError` is explicitly not `Send`.
1538 pub struct WorkerFatalError
;
1540 fn spawn_work
<B
: ExtraBackendMethods
>(cgcx
: CodegenContext
<B
>, work
: WorkItem
<B
>) {
1541 thread
::spawn(move || {
1542 // Set up a destructor which will fire off a message that we're done as
1544 struct Bomb
<B
: ExtraBackendMethods
> {
1545 coordinator_send
: Sender
<Box
<dyn Any
+ Send
>>,
1546 result
: Option
<Result
<WorkItemResult
<B
>, FatalError
>>,
1549 impl<B
: ExtraBackendMethods
> Drop
for Bomb
<B
> {
1550 fn drop(&mut self) {
1551 let worker_id
= self.worker_id
;
1552 let msg
= match self.result
.take() {
1553 Some(Ok(WorkItemResult
::Compiled(m
))) => {
1554 Message
::Done
::<B
> { result: Ok(m), worker_id }
1556 Some(Ok(WorkItemResult
::NeedsLink(m
))) => {
1557 Message
::NeedsLink
::<B
> { module: m, worker_id }
1559 Some(Ok(WorkItemResult
::NeedsFatLTO(m
))) => {
1560 Message
::NeedsFatLTO
::<B
> { result: m, worker_id }
1562 Some(Ok(WorkItemResult
::NeedsThinLTO(name
, thin_buffer
))) => {
1563 Message
::NeedsThinLTO
::<B
> { name, thin_buffer, worker_id }
1565 Some(Err(FatalError
)) => {
1566 Message
::Done
::<B
> { result: Err(Some(WorkerFatalError)), worker_id }
1568 None
=> Message
::Done
::<B
> { result: Err(None), worker_id }
,
1570 drop(self.coordinator_send
.send(Box
::new(msg
)));
1574 let mut bomb
= Bomb
::<B
> {
1575 coordinator_send
: cgcx
.coordinator_send
.clone(),
1577 worker_id
: cgcx
.worker
,
1580 // Execute the work itself, and if it finishes successfully then flag
1581 // ourselves as a success as well.
1583 // Note that we ignore any `FatalError` coming out of `execute_work_item`,
1584 // as a diagnostic was already sent off to the main thread - just
1585 // surface that there was an error in this worker.
1587 let _prof_timer
= work
.start_profiling(&cgcx
);
1588 Some(execute_work_item(&cgcx
, work
))
1593 enum SharedEmitterMessage
{
1594 Diagnostic(Diagnostic
),
1595 InlineAsmError(u32, String
, Level
, Option
<(String
, Vec
<InnerSpan
>)>),
1601 pub struct SharedEmitter
{
1602 sender
: Sender
<SharedEmitterMessage
>,
1605 pub struct SharedEmitterMain
{
1606 receiver
: Receiver
<SharedEmitterMessage
>,
1609 impl SharedEmitter
{
1610 pub fn new() -> (SharedEmitter
, SharedEmitterMain
) {
1611 let (sender
, receiver
) = channel();
1613 (SharedEmitter { sender }
, SharedEmitterMain { receiver }
)
1616 pub fn inline_asm_error(
1621 source
: Option
<(String
, Vec
<InnerSpan
>)>,
1623 drop(self.sender
.send(SharedEmitterMessage
::InlineAsmError(cookie
, msg
, level
, source
)));
1626 pub fn fatal(&self, msg
: &str) {
1627 drop(self.sender
.send(SharedEmitterMessage
::Fatal(msg
.to_string())));
1631 impl Emitter
for SharedEmitter
{
1632 fn emit_diagnostic(&mut self, diag
: &rustc_errors
::Diagnostic
) {
1633 drop(self.sender
.send(SharedEmitterMessage
::Diagnostic(Diagnostic
{
1634 msg
: diag
.message(),
1635 code
: diag
.code
.clone(),
1638 for child
in &diag
.children
{
1639 drop(self.sender
.send(SharedEmitterMessage
::Diagnostic(Diagnostic
{
1640 msg
: child
.message(),
1645 drop(self.sender
.send(SharedEmitterMessage
::AbortIfErrors
));
1647 fn source_map(&self) -> Option
<&Lrc
<SourceMap
>> {
1652 impl SharedEmitterMain
{
1653 pub fn check(&self, sess
: &Session
, blocking
: bool
) {
1655 let message
= if blocking
{
1656 match self.receiver
.recv() {
1657 Ok(message
) => Ok(message
),
1661 match self.receiver
.try_recv() {
1662 Ok(message
) => Ok(message
),
1668 Ok(SharedEmitterMessage
::Diagnostic(diag
)) => {
1669 let handler
= sess
.diagnostic();
1670 let mut d
= rustc_errors
::Diagnostic
::new(diag
.lvl
, &diag
.msg
);
1671 if let Some(code
) = diag
.code
{
1674 handler
.emit_diagnostic(&d
);
1676 Ok(SharedEmitterMessage
::InlineAsmError(cookie
, msg
, level
, source
)) => {
1677 let msg
= msg
.strip_prefix("error: ").unwrap_or(&msg
);
1679 let mut err
= match level
{
1680 Level
::Error
=> sess
.struct_err(&msg
),
1681 Level
::Warning
=> sess
.struct_warn(&msg
),
1682 Level
::Note
=> sess
.struct_note_without_error(&msg
),
1683 _
=> bug
!("Invalid inline asm diagnostic level"),
1686 // If the cookie is 0 then we don't have span information.
1688 let pos
= BytePos
::from_u32(cookie
);
1689 let span
= Span
::with_root_ctxt(pos
, pos
);
1693 // Point to the generated assembly if it is available.
1694 if let Some((buffer
, spans
)) = source
{
1697 .new_source_file(FileName
::inline_asm_source_code(&buffer
), buffer
);
1698 let source_span
= Span
::with_root_ctxt(source
.start_pos
, source
.end_pos
);
1700 spans
.iter().map(|sp
| source_span
.from_inner(*sp
)).collect();
1701 err
.span_note(spans
, "instantiated into assembly here");
1706 Ok(SharedEmitterMessage
::AbortIfErrors
) => {
1707 sess
.abort_if_errors();
1709 Ok(SharedEmitterMessage
::Fatal(msg
)) => {
1720 pub struct OngoingCodegen
<B
: ExtraBackendMethods
> {
1722 pub crate_name
: Symbol
,
1723 pub metadata
: EncodedMetadata
,
1724 pub windows_subsystem
: Option
<String
>,
1725 pub linker_info
: LinkerInfo
,
1726 pub crate_info
: CrateInfo
,
1727 pub coordinator_send
: Sender
<Box
<dyn Any
+ Send
>>,
1728 pub codegen_worker_receive
: Receiver
<Message
<B
>>,
1729 pub shared_emitter_main
: SharedEmitterMain
,
1730 pub future
: thread
::JoinHandle
<Result
<CompiledModules
, ()>>,
1731 pub output_filenames
: Arc
<OutputFilenames
>,
1734 impl<B
: ExtraBackendMethods
> OngoingCodegen
<B
> {
1735 pub fn join(self, sess
: &Session
) -> (CodegenResults
, FxHashMap
<WorkProductId
, WorkProduct
>) {
1736 let _timer
= sess
.timer("finish_ongoing_codegen");
1738 self.shared_emitter_main
.check(sess
, true);
1739 let future
= self.future
;
1740 let compiled_modules
= sess
.time("join_worker_thread", || match future
.join() {
1741 Ok(Ok(compiled_modules
)) => compiled_modules
,
1743 sess
.abort_if_errors();
1744 panic
!("expected abort due to worker thread errors")
1747 bug
!("panic during codegen/LLVM phase");
1751 sess
.cgu_reuse_tracker
.check_expected_reuse(sess
.diagnostic());
1753 sess
.abort_if_errors();
1756 copy_all_cgu_workproducts_to_incr_comp_cache_dir(sess
, &compiled_modules
);
1757 produce_final_output_artifacts(sess
, &compiled_modules
, &self.output_filenames
);
1759 // FIXME: time_llvm_passes support - does this use a global context or
1761 if sess
.codegen_units() == 1 && sess
.time_llvm_passes() {
1762 self.backend
.print_pass_timings()
1767 crate_name
: self.crate_name
,
1768 metadata
: self.metadata
,
1769 windows_subsystem
: self.windows_subsystem
,
1770 linker_info
: self.linker_info
,
1771 crate_info
: self.crate_info
,
1773 modules
: compiled_modules
.modules
,
1774 allocator_module
: compiled_modules
.allocator_module
,
1775 metadata_module
: compiled_modules
.metadata_module
,
1781 pub fn submit_pre_codegened_module_to_llvm(
1784 module
: ModuleCodegen
<B
::Module
>,
1786 self.wait_for_signal_to_codegen_item();
1787 self.check_for_errors(tcx
.sess
);
1789 // These are generally cheap and won't throw off scheduling.
1791 submit_codegened_module_to_llvm(&self.backend
, &self.coordinator_send
, module
, cost
);
1794 pub fn codegen_finished(&self, tcx
: TyCtxt
<'_
>) {
1795 self.wait_for_signal_to_codegen_item();
1796 self.check_for_errors(tcx
.sess
);
1797 drop(self.coordinator_send
.send(Box
::new(Message
::CodegenComplete
::<B
>)));
1800 /// Consumes this context indicating that codegen was entirely aborted, and
1801 /// we need to exit as quickly as possible.
1803 /// This method blocks the current thread until all worker threads have
1804 /// finished, and all worker threads should have exited or be real close to
1805 /// exiting at this point.
1806 pub fn codegen_aborted(self) {
1807 // Signal to the coordinator it should spawn no more work and start
1809 drop(self.coordinator_send
.send(Box
::new(Message
::CodegenAborted
::<B
>)));
1810 drop(self.future
.join());
1813 pub fn check_for_errors(&self, sess
: &Session
) {
1814 self.shared_emitter_main
.check(sess
, false);
1817 pub fn wait_for_signal_to_codegen_item(&self) {
1818 match self.codegen_worker_receive
.recv() {
1819 Ok(Message
::CodegenItem
) => {
1822 Ok(_
) => panic
!("unexpected message"),
1824 // One of the LLVM threads must have panicked, fall through so
1825 // error handling can be reached.
1831 pub fn submit_codegened_module_to_llvm
<B
: ExtraBackendMethods
>(
1833 tx_to_llvm_workers
: &Sender
<Box
<dyn Any
+ Send
>>,
1834 module
: ModuleCodegen
<B
::Module
>,
1837 let llvm_work_item
= WorkItem
::Optimize(module
);
1838 drop(tx_to_llvm_workers
.send(Box
::new(Message
::CodegenDone
::<B
> { llvm_work_item, cost }
)));
1841 pub fn submit_post_lto_module_to_llvm
<B
: ExtraBackendMethods
>(
1843 tx_to_llvm_workers
: &Sender
<Box
<dyn Any
+ Send
>>,
1844 module
: CachedModuleCodegen
,
1846 let llvm_work_item
= WorkItem
::CopyPostLtoArtifacts(module
);
1847 drop(tx_to_llvm_workers
.send(Box
::new(Message
::CodegenDone
::<B
> { llvm_work_item, cost: 0 }
)));
1850 pub fn submit_pre_lto_module_to_llvm
<B
: ExtraBackendMethods
>(
1853 tx_to_llvm_workers
: &Sender
<Box
<dyn Any
+ Send
>>,
1854 module
: CachedModuleCodegen
,
1856 let filename
= pre_lto_bitcode_filename(&module
.name
);
1857 let bc_path
= in_incr_comp_dir_sess(tcx
.sess
, &filename
);
1858 let file
= fs
::File
::open(&bc_path
)
1859 .unwrap_or_else(|e
| panic
!("failed to open bitcode file `{}`: {}", bc_path
.display(), e
));
1862 memmap
::Mmap
::map(&file
).unwrap_or_else(|e
| {
1863 panic
!("failed to mmap bitcode file `{}`: {}", bc_path
.display(), e
)
1866 // Schedule the module to be loaded
1867 drop(tx_to_llvm_workers
.send(Box
::new(Message
::AddImportOnlyModule
::<B
> {
1868 module_data
: SerializedModule
::FromUncompressedFile(mmap
),
1869 work_product
: module
.source
,
1873 pub fn pre_lto_bitcode_filename(module_name
: &str) -> String
{
1874 format
!("{}.{}", module_name
, PRE_LTO_BC_EXT
)
1877 fn msvc_imps_needed(tcx
: TyCtxt
<'_
>) -> bool
{
1878 // This should never be true (because it's not supported). If it is true,
1879 // something is wrong with commandline arg validation.
1881 !(tcx
.sess
.opts
.cg
.linker_plugin_lto
.enabled()
1882 && tcx
.sess
.target
.is_like_windows
1883 && tcx
.sess
.opts
.cg
.prefer_dynamic
)
1886 tcx
.sess
.target
.is_like_windows
&&
1887 tcx
.sess
.crate_types().iter().any(|ct
| *ct
== CrateType
::Rlib
) &&
1888 // ThinLTO can't handle this workaround in all cases, so we don't
1889 // emit the `__imp_` symbols. Instead we make them unnecessary by disallowing
1890 // dynamic linking when linker plugin LTO is enabled.
1891 !tcx
.sess
.opts
.cg
.linker_plugin_lto
.enabled()