1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
3 //! # Undefined behaviour
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
12 use std
::collections
::BTreeMap
;
16 use rustc_data_structures
::fx
::FxHashSet
;
18 use cranelift_codegen
::cursor
::{Cursor, FuncCursor}
;
19 use cranelift_codegen
::ir
::immediates
::Offset32
;
20 use cranelift_codegen
::ir
::{InstructionData, Opcode, ValueDef}
;
22 use crate::prelude
::*;
24 /// Workaround for `StackSlot` not implementing `Ord`.
25 #[derive(Copy, Clone, PartialEq, Eq)]
26 struct OrdStackSlot(StackSlot
);
28 impl fmt
::Debug
for OrdStackSlot
{
29 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
30 write
!(f
, "{:?}", self.0)
34 impl PartialOrd
for OrdStackSlot
{
35 fn partial_cmp(&self, rhs
: &Self) -> Option
<std
::cmp
::Ordering
> {
36 self.0.as_u32().partial_cmp(&rhs
.0.as_u32())
40 impl Ord
for OrdStackSlot
{
41 fn cmp(&self, rhs
: &Self) -> std
::cmp
::Ordering
{
42 self.0.as_u32().cmp(&rhs
.0.as_u32())
46 #[derive(Debug, Default)]
47 struct StackSlotUsage
{
48 stack_addr
: FxHashSet
<Inst
>,
49 stack_load
: FxHashSet
<Inst
>,
50 stack_store
: FxHashSet
<Inst
>,
54 fn potential_stores_for_load(&self, ctx
: &Context
, load
: Inst
) -> Vec
<Inst
> {
59 match spatial_overlap(&ctx
.func
, store
, load
) {
60 SpatialOverlap
::No
=> false, // Can never be the source of the loaded value.
61 SpatialOverlap
::Partial
| SpatialOverlap
::Full
=> true,
65 match temporal_order(ctx
, store
, load
) {
66 TemporalOrder
::NeverBefore
=> false, // Can never be the source of the loaded value.
67 TemporalOrder
::MaybeBefore
| TemporalOrder
::DefinitivelyBefore
=> true,
70 .collect
::<Vec
<Inst
>>()
73 fn potential_loads_of_store(&self, ctx
: &Context
, store
: Inst
) -> Vec
<Inst
> {
78 match spatial_overlap(&ctx
.func
, store
, load
) {
79 SpatialOverlap
::No
=> false, // Can never be the source of the loaded value.
80 SpatialOverlap
::Partial
| SpatialOverlap
::Full
=> true,
84 match temporal_order(ctx
, store
, load
) {
85 TemporalOrder
::NeverBefore
=> false, // Can never be the source of the loaded value.
86 TemporalOrder
::MaybeBefore
| TemporalOrder
::DefinitivelyBefore
=> true,
89 .collect
::<Vec
<Inst
>>()
92 fn remove_unused_stack_addr(func
: &mut Function
, inst
: Inst
) {
93 func
.dfg
.detach_results(inst
);
94 func
.dfg
.replace(inst
).nop();
97 fn remove_unused_load(func
: &mut Function
, load
: Inst
) {
98 func
.dfg
.detach_results(load
);
99 func
.dfg
.replace(load
).nop();
102 fn remove_dead_store(&mut self, func
: &mut Function
, store
: Inst
) {
103 func
.dfg
.replace(store
).nop();
104 self.stack_store
.remove(&store
);
107 fn change_load_to_alias(&mut self, func
: &mut Function
, load
: Inst
, value
: Value
) {
108 let loaded_value
= func
.dfg
.inst_results(load
)[0];
109 let loaded_type
= func
.dfg
.value_type(loaded_value
);
111 if func
.dfg
.value_type(value
) == loaded_type
{
112 func
.dfg
.detach_results(load
);
113 func
.dfg
.replace(load
).nop();
114 func
.dfg
.change_to_alias(loaded_value
, value
);
116 func
.dfg
.replace(load
).bitcast(loaded_type
, value
);
119 self.stack_load
.remove(&load
);
123 struct OptimizeContext
<'a
> {
124 ctx
: &'a
mut Context
,
125 stack_slot_usage_map
: BTreeMap
<OrdStackSlot
, StackSlotUsage
>,
128 impl<'a
> OptimizeContext
<'a
> {
129 fn for_context(ctx
: &'a
mut Context
) -> Self {
130 ctx
.flowgraph(); // Compute cfg and domtree.
132 // Record all stack_addr, stack_load and stack_store instructions.
133 let mut stack_slot_usage_map
= BTreeMap
::<OrdStackSlot
, StackSlotUsage
>::new();
135 let mut cursor
= FuncCursor
::new(&mut ctx
.func
);
136 while let Some(_block
) = cursor
.next_block() {
137 while let Some(inst
) = cursor
.next_inst() {
138 match cursor
.func
.dfg
[inst
] {
139 InstructionData
::StackLoad
{
140 opcode
: Opcode
::StackAddr
,
145 .entry(OrdStackSlot(stack_slot
))
146 .or_insert_with(StackSlotUsage
::default)
150 InstructionData
::StackLoad
{
151 opcode
: Opcode
::StackLoad
,
156 .entry(OrdStackSlot(stack_slot
))
157 .or_insert_with(StackSlotUsage
::default)
161 InstructionData
::StackStore
{
162 opcode
: Opcode
::StackStore
,
168 .entry(OrdStackSlot(stack_slot
))
169 .or_insert_with(StackSlotUsage
::default)
178 OptimizeContext { ctx, stack_slot_usage_map }
182 pub(super) fn optimize_function(
184 clif_comments
: &mut crate::pretty_clif
::CommentWriter
,
186 combine_stack_addr_with_load_store(&mut ctx
.func
);
188 let mut opt_ctx
= OptimizeContext
::for_context(ctx
);
190 // FIXME Repeat following instructions until fixpoint.
192 remove_unused_stack_addr_and_stack_load(&mut opt_ctx
);
194 if clif_comments
.enabled() {
195 for (&OrdStackSlot(stack_slot
), usage
) in &opt_ctx
.stack_slot_usage_map
{
196 clif_comments
.add_comment(stack_slot
, format
!("used by: {:?}", usage
));
200 for (stack_slot
, users
) in opt_ctx
.stack_slot_usage_map
.iter_mut() {
201 if users
.stack_addr
.is_empty().not() {
202 // Stack addr leaked; there may be unknown loads and stores.
203 // FIXME use stacked borrows to optimize
207 for load
in users
.stack_load
.clone().into_iter() {
208 let potential_stores
= users
.potential_stores_for_load(&opt_ctx
.ctx
, load
);
210 if clif_comments
.enabled() {
211 for &store
in &potential_stores
{
212 clif_comments
.add_comment(
215 "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
216 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
217 opt_ctx
.ctx
.func
.dfg
.display_inst(load
, None
),
218 spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
),
219 temporal_order(&opt_ctx
.ctx
, store
, load
),
225 match *potential_stores
{
227 if clif_comments
.enabled() {
229 .add_comment(load
, "[BUG?] Reading uninitialized memory".to_string());
233 if spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
) == SpatialOverlap
::Full
234 && temporal_order(&opt_ctx
.ctx
, store
, load
)
235 == TemporalOrder
::DefinitivelyBefore
=>
237 // Only one store could have been the origin of the value.
238 let stored_value
= opt_ctx
.ctx
.func
.dfg
.inst_args(store
)[0];
240 if clif_comments
.enabled() {
241 clif_comments
.add_comment(
243 format
!("Store to load forward {} -> {}", store
, load
),
247 users
.change_load_to_alias(&mut opt_ctx
.ctx
.func
, load
, stored_value
);
249 _
=> {}
// FIXME implement this
253 for store
in users
.stack_store
.clone().into_iter() {
254 let potential_loads
= users
.potential_loads_of_store(&opt_ctx
.ctx
, store
);
256 if clif_comments
.enabled() {
257 for &load
in &potential_loads
{
258 clif_comments
.add_comment(
261 "Potential load from store {} <- {} ({:?}, {:?})",
262 opt_ctx
.ctx
.func
.dfg
.display_inst(load
, None
),
263 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
264 spatial_overlap(&opt_ctx
.ctx
.func
, store
, load
),
265 temporal_order(&opt_ctx
.ctx
, store
, load
),
271 if potential_loads
.is_empty() {
272 // Never loaded; can safely remove all stores and the stack slot.
273 // FIXME also remove stores when there is always a next store before a load.
275 if clif_comments
.enabled() {
276 clif_comments
.add_comment(
279 "Remove dead stack store {} of {}",
280 opt_ctx
.ctx
.func
.dfg
.display_inst(store
, None
),
286 users
.remove_dead_store(&mut opt_ctx
.ctx
.func
, store
);
290 if users
.stack_store
.is_empty() && users
.stack_load
.is_empty() {
291 opt_ctx
.ctx
.func
.stack_slots
[stack_slot
.0].size
= 0;
296 fn combine_stack_addr_with_load_store(func
: &mut Function
) {
297 // Turn load and store into stack_load and stack_store when possible.
298 let mut cursor
= FuncCursor
::new(func
);
299 while let Some(_block
) = cursor
.next_block() {
300 while let Some(inst
) = cursor
.next_inst() {
301 match cursor
.func
.dfg
[inst
] {
302 InstructionData
::Load { opcode: Opcode::Load, arg: addr, flags: _, offset }
=> {
303 if cursor
.func
.dfg
.ctrl_typevar(inst
) == types
::I128
304 || cursor
.func
.dfg
.ctrl_typevar(inst
).is_vector()
306 continue; // WORKAROUD: stack_load.i128 not yet implemented
308 if let Some((stack_slot
, stack_addr_offset
)) =
309 try_get_stack_slot_and_offset_for_addr(cursor
.func
, addr
)
311 if let Some(combined_offset
) = offset
.try_add_i64(stack_addr_offset
.into())
313 let ty
= cursor
.func
.dfg
.ctrl_typevar(inst
);
314 cursor
.func
.dfg
.replace(inst
).stack_load(
322 InstructionData
::Store
{
323 opcode
: Opcode
::Store
,
328 if cursor
.func
.dfg
.ctrl_typevar(inst
) == types
::I128
329 || cursor
.func
.dfg
.ctrl_typevar(inst
).is_vector()
331 continue; // WORKAROUND: stack_store.i128 not yet implemented
333 if let Some((stack_slot
, stack_addr_offset
)) =
334 try_get_stack_slot_and_offset_for_addr(cursor
.func
, addr
)
336 if let Some(combined_offset
) = offset
.try_add_i64(stack_addr_offset
.into())
338 cursor
.func
.dfg
.replace(inst
).stack_store(
352 fn remove_unused_stack_addr_and_stack_load(opt_ctx
: &mut OptimizeContext
<'_
>) {
353 // FIXME incrementally rebuild on each call?
354 let mut stack_addr_load_insts_users
= FxHashMap
::<Inst
, FxHashSet
<Inst
>>::default();
356 let mut cursor
= FuncCursor
::new(&mut opt_ctx
.ctx
.func
);
357 while let Some(_block
) = cursor
.next_block() {
358 while let Some(inst
) = cursor
.next_inst() {
359 for &arg
in cursor
.func
.dfg
.inst_args(inst
) {
360 if let ValueDef
::Result(arg_origin
, 0) = cursor
.func
.dfg
.value_def(arg
) {
361 match cursor
.func
.dfg
[arg_origin
].opcode() {
362 Opcode
::StackAddr
| Opcode
::StackLoad
=> {
363 stack_addr_load_insts_users
365 .or_insert_with(FxHashSet
::default)
375 #[cfg(debug_assertions)]
376 for inst
in stack_addr_load_insts_users
.keys() {
377 let mut is_recorded_stack_addr_or_stack_load
= false;
378 for stack_slot_users
in opt_ctx
.stack_slot_usage_map
.values() {
379 is_recorded_stack_addr_or_stack_load
|= stack_slot_users
.stack_addr
.contains(inst
)
380 || stack_slot_users
.stack_load
.contains(inst
);
382 assert
!(is_recorded_stack_addr_or_stack_load
);
385 // Replace all unused stack_addr and stack_load instructions with nop.
386 let mut func
= &mut opt_ctx
.ctx
.func
;
388 for stack_slot_users
in opt_ctx
.stack_slot_usage_map
.values_mut() {
391 .drain_filter(|inst
| {
392 stack_addr_load_insts_users
.get(inst
).map(|users
| users
.is_empty()).unwrap_or(true)
394 .for_each(|inst
| StackSlotUsage
::remove_unused_stack_addr(&mut func
, inst
));
398 .drain_filter(|inst
| {
399 stack_addr_load_insts_users
.get(inst
).map(|users
| users
.is_empty()).unwrap_or(true)
401 .for_each(|inst
| StackSlotUsage
::remove_unused_load(&mut func
, inst
));
405 fn try_get_stack_slot_and_offset_for_addr(
408 ) -> Option
<(StackSlot
, Offset32
)> {
409 if let ValueDef
::Result(addr_inst
, 0) = func
.dfg
.value_def(addr
) {
410 if let InstructionData
::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
=
413 return Some((stack_slot
, offset
));
419 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
420 enum SpatialOverlap
{
426 fn spatial_overlap(func
: &Function
, src
: Inst
, dest
: Inst
) -> SpatialOverlap
{
427 fn inst_info(func
: &Function
, inst
: Inst
) -> (StackSlot
, Offset32
, u32) {
428 match func
.dfg
[inst
] {
429 InstructionData
::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
430 | InstructionData
::StackLoad { opcode: Opcode::StackLoad, stack_slot, offset }
431 | InstructionData
::StackStore
{
432 opcode
: Opcode
::StackStore
,
436 } => (stack_slot
, offset
, func
.dfg
.ctrl_typevar(inst
).bytes()),
437 _
=> unreachable
!("{:?}", func
.dfg
[inst
]),
441 debug_assert_ne
!(src
, dest
);
443 let (src_ss
, src_offset
, src_size
) = inst_info(func
, src
);
444 let (dest_ss
, dest_offset
, dest_size
) = inst_info(func
, dest
);
446 if src_ss
!= dest_ss
{
447 return SpatialOverlap
::No
;
450 if src_offset
== dest_offset
&& src_size
== dest_size
{
451 return SpatialOverlap
::Full
;
454 let src_end
: i64 = src_offset
.try_add_i64(i64::from(src_size
)).unwrap().into();
455 let dest_end
: i64 = dest_offset
.try_add_i64(i64::from(dest_size
)).unwrap().into();
456 if src_end
<= dest_offset
.into() || dest_end
<= src_offset
.into() {
457 return SpatialOverlap
::No
;
460 SpatialOverlap
::Partial
463 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
465 /// `src` will never be executed before `dest`.
468 /// `src` may be executed before `dest`.
471 /// `src` will always be executed before `dest`.
472 /// There may still be other instructions in between.
476 fn temporal_order(ctx
: &Context
, src
: Inst
, dest
: Inst
) -> TemporalOrder
{
477 debug_assert_ne
!(src
, dest
);
479 if ctx
.domtree
.dominates(src
, dest
, &ctx
.func
.layout
) {
480 TemporalOrder
::DefinitivelyBefore
481 } else if ctx
.domtree
.dominates(src
, dest
, &ctx
.func
.layout
) {
482 TemporalOrder
::NeverBefore
484 TemporalOrder
::MaybeBefore