1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
3 //! # Undefined behaviour
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
12 use std
::collections
::BTreeMap
;
16 use rustc_data_structures
::fx
::FxHashSet
;
18 use cranelift_codegen
::cursor
::{Cursor, FuncCursor}
;
19 use cranelift_codegen
::ir
::immediates
::Offset32
;
20 use cranelift_codegen
::ir
::{InstructionData, Opcode, ValueDef}
;
22 use crate::prelude
::*;
24 /// Workaround for `StackSlot` not implementing `Ord`.
25 #[derive(Copy, Clone, PartialEq, Eq)]
26 struct OrdStackSlot(StackSlot
);
28 impl fmt
::Debug
for OrdStackSlot
{
29 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
30 write
!(f
, "{:?}", self.0)
34 impl PartialOrd
for OrdStackSlot
{
35 fn partial_cmp(&self, rhs
: &Self) -> Option
<std
::cmp
::Ordering
> {
36 self.0.as_u32().partial_cmp(&rhs
.0.as_u32())
40 impl Ord
for OrdStackSlot
{
41 fn cmp(&self, rhs
: &Self) -> std
::cmp
::Ordering
{
42 self.0.as_u32().cmp(&rhs
.0.as_u32())
46 #[derive(Debug, Default)]
47 struct StackSlotUsage
{
48 stack_addr
: FxHashSet
<Inst
>,
49 stack_load
: FxHashSet
<Inst
>,
50 stack_store
: FxHashSet
<Inst
>,
54 fn potential_stores_for_load(&self, ctx
: &Context
, load
: Inst
) -> Vec
<Inst
> {
59 match spatial_overlap(&ctx
.func
, store
, load
) {
60 SpatialOverlap
::No
=> false, // Can never be the source of the loaded value.
61 SpatialOverlap
::Partial
| SpatialOverlap
::Full
=> true,
65 match temporal_order(ctx
, store
, load
) {
66 TemporalOrder
::NeverBefore
=> false, // Can never be the source of the loaded value.
67 TemporalOrder
::MaybeBefore
| TemporalOrder
::DefinitivelyBefore
=> true,
70 .collect
::<Vec
<Inst
>>()
73 fn potential_loads_of_store(&self, ctx
: &Context
, store
: Inst
) -> Vec
<Inst
> {
78 match spatial_overlap(&ctx
.func
, store
, load
) {
79 SpatialOverlap
::No
=> false, // Can never be the source of the loaded value.
80 SpatialOverlap
::Partial
| SpatialOverlap
::Full
=> true,
84 match temporal_order(ctx
, store
, load
) {
85 TemporalOrder
::NeverBefore
=> false, // Can never be the source of the loaded value.
86 TemporalOrder
::MaybeBefore
| TemporalOrder
::DefinitivelyBefore
=> true,
89 .collect
::<Vec
<Inst
>>()
92 fn remove_unused_stack_addr(func
: &mut Function
, inst
: Inst
) {
93 func
.dfg
.detach_results(inst
);
94 func
.dfg
.replace(inst
).nop();
97 fn remove_unused_load(func
: &mut Function
, load
: Inst
) {
98 func
.dfg
.detach_results(load
);
99 func
.dfg
.replace(load
).nop();
102 fn remove_dead_store(&mut self, func
: &mut Function
, store
: Inst
) {
103 func
.dfg
.replace(store
).nop();
104 self.stack_store
.remove(&store
);
107 fn change_load_to_alias(&mut self, func
: &mut Function
, load
: Inst
, value
: Value
) {
108 let loaded_value
= func
.dfg
.inst_results(load
)[0];
109 let loaded_type
= func
.dfg
.value_type(loaded_value
);
111 if func
.dfg
.value_type(value
) == loaded_type
{
112 func
.dfg
.detach_results(load
);
113 func
.dfg
.replace(load
).nop();
114 func
.dfg
.change_to_alias(loaded_value
, value
);
116 func
.dfg
.replace(load
).bitcast(loaded_type
, value
);
119 self.stack_load
.remove(&load
);
123 struct OptimizeContext
<'a
> {
124 ctx
: &'a
mut Context
,
125 stack_slot_usage_map
: BTreeMap
<OrdStackSlot
, StackSlotUsage
>,
128 impl<'a
> OptimizeContext
<'a
> {
129 fn for_context(ctx
: &'a
mut Context
) -> Self {
130 ctx
.flowgraph(); // Compute cfg and domtree.
132 // Record all stack_addr, stack_load and stack_store instructions.
133 let mut stack_slot_usage_map
= BTreeMap
::<OrdStackSlot
, StackSlotUsage
>::new();
135 let mut cursor
= FuncCursor
::new(&mut ctx
.func
);
136 while let Some(_block
) = cursor
.next_block() {
137 while let Some(inst
) = cursor
.next_inst() {
138 match cursor
.func
.dfg
[inst
] {
139 InstructionData
::StackLoad
{
140 opcode
: Opcode
::StackAddr
,
145 .entry(OrdStackSlot(stack_slot
))
146 .or_insert_with(StackSlotUsage
::default)
150 InstructionData
::StackLoad
{
151 opcode
: Opcode
::StackLoad
,
156 .entry(OrdStackSlot(stack_slot
))
157 .or_insert_with(StackSlotUsage
::default)
161 InstructionData
::StackStore
{
162 opcode
: Opcode
::StackStore
,
168 .entry(OrdStackSlot(stack_slot
))
169 .or_insert_with(StackSlotUsage
::default)
178 OptimizeContext { ctx, stack_slot_usage_map }
182 pub(super) fn optimize_function(
184 #[cfg_attr(not(debug_assertions), allow(unused_variables))]
185 clif_comments
: &mut crate::pretty_clif
::CommentWriter
,
187 combine_stack_addr_with_load_store(&mut ctx
.func
);
189 let mut opt_ctx
= OptimizeContext
::for_context(ctx
);
191 // FIXME Repeat following instructions until fixpoint.
193 remove_unused_stack_addr_and_stack_load(&mut opt_ctx
);
195 #[cfg(debug_assertions)]
197 for (&OrdStackSlot(stack_slot
), usage
) in &opt_ctx
.stack_slot_usage_map
{
198 clif_comments
.add_comment(stack_slot
, format
!("used by: {:?}", usage
));
202 for (stack_slot
, users
) in opt_ctx
.stack_slot_usage_map
.iter_mut() {
203 if users
.stack_addr
.is_empty().not() {
204 // Stack addr leaked; there may be unknown loads and stores.
205 // FIXME use stacked borrows to optimize
209 for load
in users
.stack_load
.clone().into_iter() {
210 let potential_stores
= users
.potential_stores_for_load(&opt_ctx
.ctx
, load
);
212 #[cfg(debug_assertions)]
213 for &store
in &potential_stores
{
214 clif_comments
.add_comment(
217 "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
218 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
219 opt_ctx
.ctx
.func
.dfg
.display_inst(load
, None
),
220 spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
),
221 temporal_order(&opt_ctx
.ctx
, store
, load
),
226 match *potential_stores
{
228 #[cfg(debug_assertions)]
230 .add_comment(load
, "[BUG?] Reading uninitialized memory".to_string());
233 if spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
) == SpatialOverlap
::Full
234 && temporal_order(&opt_ctx
.ctx
, store
, load
)
235 == TemporalOrder
::DefinitivelyBefore
=>
237 // Only one store could have been the origin of the value.
238 let stored_value
= opt_ctx
.ctx
.func
.dfg
.inst_args(store
)[0];
240 #[cfg(debug_assertions)]
242 .add_comment(load
, format
!("Store to load forward {} -> {}", store
, load
));
244 users
.change_load_to_alias(&mut opt_ctx
.ctx
.func
, load
, stored_value
);
246 _
=> {}
// FIXME implement this
250 for store
in users
.stack_store
.clone().into_iter() {
251 let potential_loads
= users
.potential_loads_of_store(&opt_ctx
.ctx
, store
);
253 #[cfg(debug_assertions)]
254 for &load
in &potential_loads
{
255 clif_comments
.add_comment(
258 "Potential load from store {} <- {} ({:?}, {:?})",
259 opt_ctx
.ctx
.func
.dfg
.display_inst(load
, None
),
260 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
261 spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
),
262 temporal_order(&opt_ctx
.ctx
, store
, load
),
267 if potential_loads
.is_empty() {
268 // Never loaded; can safely remove all stores and the stack slot.
269 // FIXME also remove stores when there is always a next store before a load.
271 #[cfg(debug_assertions)]
272 clif_comments
.add_comment(
275 "Remove dead stack store {} of {}",
276 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
281 users
.remove_dead_store(&mut opt_ctx
.ctx
.func
, store
);
285 if users
.stack_store
.is_empty() && users
.stack_load
.is_empty() {
286 opt_ctx
.ctx
.func
.stack_slots
[stack_slot
.0].size
= 0;
291 fn combine_stack_addr_with_load_store(func
: &mut Function
) {
292 // Turn load and store into stack_load and stack_store when possible.
293 let mut cursor
= FuncCursor
::new(func
);
294 while let Some(_block
) = cursor
.next_block() {
295 while let Some(inst
) = cursor
.next_inst() {
296 match cursor
.func
.dfg
[inst
] {
297 InstructionData
::Load { opcode: Opcode::Load, arg: addr, flags: _, offset }
=> {
298 if cursor
.func
.dfg
.ctrl_typevar(inst
) == types
::I128
299 || cursor
.func
.dfg
.ctrl_typevar(inst
).is_vector()
301 continue; // WORKAROUD: stack_load.i128 not yet implemented
303 if let Some((stack_slot
, stack_addr_offset
)) =
304 try_get_stack_slot_and_offset_for_addr(cursor
.func
, addr
)
306 if let Some(combined_offset
) = offset
.try_add_i64(stack_addr_offset
.into())
308 let ty
= cursor
.func
.dfg
.ctrl_typevar(inst
);
309 cursor
.func
.dfg
.replace(inst
).stack_load(
317 InstructionData
::Store
{
318 opcode
: Opcode
::Store
,
323 if cursor
.func
.dfg
.ctrl_typevar(inst
) == types
::I128
324 || cursor
.func
.dfg
.ctrl_typevar(inst
).is_vector()
326 continue; // WORKAROUND: stack_store.i128 not yet implemented
328 if let Some((stack_slot
, stack_addr_offset
)) =
329 try_get_stack_slot_and_offset_for_addr(cursor
.func
, addr
)
331 if let Some(combined_offset
) = offset
.try_add_i64(stack_addr_offset
.into())
333 cursor
.func
.dfg
.replace(inst
).stack_store(
347 fn remove_unused_stack_addr_and_stack_load(opt_ctx
: &mut OptimizeContext
<'_
>) {
348 // FIXME incrementally rebuild on each call?
349 let mut stack_addr_load_insts_users
= FxHashMap
::<Inst
, FxHashSet
<Inst
>>::default();
351 let mut cursor
= FuncCursor
::new(&mut opt_ctx
.ctx
.func
);
352 while let Some(_block
) = cursor
.next_block() {
353 while let Some(inst
) = cursor
.next_inst() {
354 for &arg
in cursor
.func
.dfg
.inst_args(inst
) {
355 if let ValueDef
::Result(arg_origin
, 0) = cursor
.func
.dfg
.value_def(arg
) {
356 match cursor
.func
.dfg
[arg_origin
].opcode() {
357 Opcode
::StackAddr
| Opcode
::StackLoad
=> {
358 stack_addr_load_insts_users
360 .or_insert_with(FxHashSet
::default)
370 #[cfg(debug_assertions)]
371 for inst
in stack_addr_load_insts_users
.keys() {
372 let mut is_recorded_stack_addr_or_stack_load
= false;
373 for stack_slot_users
in opt_ctx
.stack_slot_usage_map
.values() {
374 is_recorded_stack_addr_or_stack_load
|= stack_slot_users
.stack_addr
.contains(inst
)
375 || stack_slot_users
.stack_load
.contains(inst
);
377 assert
!(is_recorded_stack_addr_or_stack_load
);
380 // Replace all unused stack_addr and stack_load instructions with nop.
381 let mut func
= &mut opt_ctx
.ctx
.func
;
383 for stack_slot_users
in opt_ctx
.stack_slot_usage_map
.values_mut() {
386 .drain_filter(|inst
| {
387 stack_addr_load_insts_users
.get(inst
).map(|users
| users
.is_empty()).unwrap_or(true)
389 .for_each(|inst
| StackSlotUsage
::remove_unused_stack_addr(&mut func
, inst
));
393 .drain_filter(|inst
| {
394 stack_addr_load_insts_users
.get(inst
).map(|users
| users
.is_empty()).unwrap_or(true)
396 .for_each(|inst
| StackSlotUsage
::remove_unused_load(&mut func
, inst
));
400 fn try_get_stack_slot_and_offset_for_addr(
403 ) -> Option
<(StackSlot
, Offset32
)> {
404 if let ValueDef
::Result(addr_inst
, 0) = func
.dfg
.value_def(addr
) {
405 if let InstructionData
::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
=
408 return Some((stack_slot
, offset
));
414 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
415 enum SpatialOverlap
{
421 fn spatial_overlap(func
: &Function
, src
: Inst
, dest
: Inst
) -> SpatialOverlap
{
422 fn inst_info(func
: &Function
, inst
: Inst
) -> (StackSlot
, Offset32
, u32) {
423 match func
.dfg
[inst
] {
424 InstructionData
::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
425 | InstructionData
::StackLoad { opcode: Opcode::StackLoad, stack_slot, offset }
426 | InstructionData
::StackStore
{
427 opcode
: Opcode
::StackStore
,
431 } => (stack_slot
, offset
, func
.dfg
.ctrl_typevar(inst
).bytes()),
432 _
=> unreachable
!("{:?}", func
.dfg
[inst
]),
436 debug_assert_ne
!(src
, dest
);
438 let (src_ss
, src_offset
, src_size
) = inst_info(func
, src
);
439 let (dest_ss
, dest_offset
, dest_size
) = inst_info(func
, dest
);
441 if src_ss
!= dest_ss
{
442 return SpatialOverlap
::No
;
445 if src_offset
== dest_offset
&& src_size
== dest_size
{
446 return SpatialOverlap
::Full
;
449 let src_end
: i64 = src_offset
.try_add_i64(i64::from(src_size
)).unwrap().into();
450 let dest_end
: i64 = dest_offset
.try_add_i64(i64::from(dest_size
)).unwrap().into();
451 if src_end
<= dest_offset
.into() || dest_end
<= src_offset
.into() {
452 return SpatialOverlap
::No
;
455 SpatialOverlap
::Partial
458 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
460 /// `src` will never be executed before `dest`.
463 /// `src` may be executed before `dest`.
466 /// `src` will always be executed before `dest`.
467 /// There may still be other instructions in between.
471 fn temporal_order(ctx
: &Context
, src
: Inst
, dest
: Inst
) -> TemporalOrder
{
472 debug_assert_ne
!(src
, dest
);
474 if ctx
.domtree
.dominates(src
, dest
, &ctx
.func
.layout
) {
475 TemporalOrder
::DefinitivelyBefore
476 } else if ctx
.domtree
.dominates(src
, dest
, &ctx
.func
.layout
) {
477 TemporalOrder
::NeverBefore
479 TemporalOrder
::MaybeBefore