]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_codegen_cranelift/src/optimize/stack2reg.rs
New upstream version 1.53.0+dfsg1
[rustc.git] / compiler / rustc_codegen_cranelift / src / optimize / stack2reg.rs
1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
2 //!
3 //! # Undefined behaviour
4 //!
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
11
12 use std::collections::BTreeMap;
13 use std::fmt;
14 use std::ops::Not;
15
16 use rustc_data_structures::fx::FxHashSet;
17
18 use cranelift_codegen::cursor::{Cursor, FuncCursor};
19 use cranelift_codegen::ir::immediates::Offset32;
20 use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
21
22 use crate::prelude::*;
23
24 /// Workaround for `StackSlot` not implementing `Ord`.
25 #[derive(Copy, Clone, PartialEq, Eq)]
26 struct OrdStackSlot(StackSlot);
27
28 impl fmt::Debug for OrdStackSlot {
29 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30 write!(f, "{:?}", self.0)
31 }
32 }
33
34 impl PartialOrd for OrdStackSlot {
35 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
36 self.0.as_u32().partial_cmp(&rhs.0.as_u32())
37 }
38 }
39
40 impl Ord for OrdStackSlot {
41 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
42 self.0.as_u32().cmp(&rhs.0.as_u32())
43 }
44 }
45
46 #[derive(Debug, Default)]
47 struct StackSlotUsage {
48 stack_addr: FxHashSet<Inst>,
49 stack_load: FxHashSet<Inst>,
50 stack_store: FxHashSet<Inst>,
51 }
52
53 impl StackSlotUsage {
54 fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec<Inst> {
55 self.stack_store
56 .iter()
57 .cloned()
58 .filter(|&store| {
59 match spatial_overlap(&ctx.func, store, load) {
60 SpatialOverlap::No => false, // Can never be the source of the loaded value.
61 SpatialOverlap::Partial | SpatialOverlap::Full => true,
62 }
63 })
64 .filter(|&store| {
65 match temporal_order(ctx, store, load) {
66 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
67 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
68 }
69 })
70 .collect::<Vec<Inst>>()
71 }
72
73 fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec<Inst> {
74 self.stack_load
75 .iter()
76 .cloned()
77 .filter(|&load| {
78 match spatial_overlap(&ctx.func, store, load) {
79 SpatialOverlap::No => false, // Can never be the source of the loaded value.
80 SpatialOverlap::Partial | SpatialOverlap::Full => true,
81 }
82 })
83 .filter(|&load| {
84 match temporal_order(ctx, store, load) {
85 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
86 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
87 }
88 })
89 .collect::<Vec<Inst>>()
90 }
91
92 fn remove_unused_stack_addr(func: &mut Function, inst: Inst) {
93 func.dfg.detach_results(inst);
94 func.dfg.replace(inst).nop();
95 }
96
97 fn remove_unused_load(func: &mut Function, load: Inst) {
98 func.dfg.detach_results(load);
99 func.dfg.replace(load).nop();
100 }
101
102 fn remove_dead_store(&mut self, func: &mut Function, store: Inst) {
103 func.dfg.replace(store).nop();
104 self.stack_store.remove(&store);
105 }
106
107 fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) {
108 let loaded_value = func.dfg.inst_results(load)[0];
109 let loaded_type = func.dfg.value_type(loaded_value);
110
111 if func.dfg.value_type(value) == loaded_type {
112 func.dfg.detach_results(load);
113 func.dfg.replace(load).nop();
114 func.dfg.change_to_alias(loaded_value, value);
115 } else {
116 func.dfg.replace(load).bitcast(loaded_type, value);
117 }
118
119 self.stack_load.remove(&load);
120 }
121 }
122
123 struct OptimizeContext<'a> {
124 ctx: &'a mut Context,
125 stack_slot_usage_map: BTreeMap<OrdStackSlot, StackSlotUsage>,
126 }
127
128 impl<'a> OptimizeContext<'a> {
129 fn for_context(ctx: &'a mut Context) -> Self {
130 ctx.flowgraph(); // Compute cfg and domtree.
131
132 // Record all stack_addr, stack_load and stack_store instructions.
133 let mut stack_slot_usage_map = BTreeMap::<OrdStackSlot, StackSlotUsage>::new();
134
135 let mut cursor = FuncCursor::new(&mut ctx.func);
136 while let Some(_block) = cursor.next_block() {
137 while let Some(inst) = cursor.next_inst() {
138 match cursor.func.dfg[inst] {
139 InstructionData::StackLoad {
140 opcode: Opcode::StackAddr,
141 stack_slot,
142 offset: _,
143 } => {
144 stack_slot_usage_map
145 .entry(OrdStackSlot(stack_slot))
146 .or_insert_with(StackSlotUsage::default)
147 .stack_addr
148 .insert(inst);
149 }
150 InstructionData::StackLoad {
151 opcode: Opcode::StackLoad,
152 stack_slot,
153 offset: _,
154 } => {
155 stack_slot_usage_map
156 .entry(OrdStackSlot(stack_slot))
157 .or_insert_with(StackSlotUsage::default)
158 .stack_load
159 .insert(inst);
160 }
161 InstructionData::StackStore {
162 opcode: Opcode::StackStore,
163 arg: _,
164 stack_slot,
165 offset: _,
166 } => {
167 stack_slot_usage_map
168 .entry(OrdStackSlot(stack_slot))
169 .or_insert_with(StackSlotUsage::default)
170 .stack_store
171 .insert(inst);
172 }
173 _ => {}
174 }
175 }
176 }
177
178 OptimizeContext { ctx, stack_slot_usage_map }
179 }
180 }
181
182 pub(super) fn optimize_function(
183 ctx: &mut Context,
184 clif_comments: &mut crate::pretty_clif::CommentWriter,
185 ) {
186 combine_stack_addr_with_load_store(&mut ctx.func);
187
188 let mut opt_ctx = OptimizeContext::for_context(ctx);
189
190 // FIXME Repeat following instructions until fixpoint.
191
192 remove_unused_stack_addr_and_stack_load(&mut opt_ctx);
193
194 if clif_comments.enabled() {
195 for (&OrdStackSlot(stack_slot), usage) in &opt_ctx.stack_slot_usage_map {
196 clif_comments.add_comment(stack_slot, format!("used by: {:?}", usage));
197 }
198 }
199
200 for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() {
201 if users.stack_addr.is_empty().not() {
202 // Stack addr leaked; there may be unknown loads and stores.
203 // FIXME use stacked borrows to optimize
204 continue;
205 }
206
207 for load in users.stack_load.clone().into_iter() {
208 let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load);
209
210 if clif_comments.enabled() {
211 for &store in &potential_stores {
212 clif_comments.add_comment(
213 load,
214 format!(
215 "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
216 opt_ctx.ctx.func.dfg.display_inst(store, None),
217 opt_ctx.ctx.func.dfg.display_inst(load, None),
218 spatial_overlap(&opt_ctx.ctx.func, store, load),
219 temporal_order(&opt_ctx.ctx, store, load),
220 ),
221 );
222 }
223 }
224
225 match *potential_stores {
226 [] => {
227 if clif_comments.enabled() {
228 clif_comments
229 .add_comment(load, "[BUG?] Reading uninitialized memory".to_string());
230 }
231 }
232 [store]
233 if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full
234 && temporal_order(&opt_ctx.ctx, store, load)
235 == TemporalOrder::DefinitivelyBefore =>
236 {
237 // Only one store could have been the origin of the value.
238 let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0];
239
240 if clif_comments.enabled() {
241 clif_comments.add_comment(
242 load,
243 format!("Store to load forward {} -> {}", store, load),
244 );
245 }
246
247 users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value);
248 }
249 _ => {} // FIXME implement this
250 }
251 }
252
253 for store in users.stack_store.clone().into_iter() {
254 let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store);
255
256 if clif_comments.enabled() {
257 for &load in &potential_loads {
258 clif_comments.add_comment(
259 store,
260 format!(
261 "Potential load from store {} <- {} ({:?}, {:?})",
262 opt_ctx.ctx.func.dfg.display_inst(load, None),
263 opt_ctx.ctx.func.dfg.display_inst(store, None),
264 spatial_overlap(&opt_ctx.ctx.func, store, load),
265 temporal_order(&opt_ctx.ctx, store, load),
266 ),
267 );
268 }
269 }
270
271 if potential_loads.is_empty() {
272 // Never loaded; can safely remove all stores and the stack slot.
273 // FIXME also remove stores when there is always a next store before a load.
274
275 if clif_comments.enabled() {
276 clif_comments.add_comment(
277 store,
278 format!(
279 "Remove dead stack store {} of {}",
280 opt_ctx.ctx.func.dfg.display_inst(store, None),
281 stack_slot.0
282 ),
283 );
284 }
285
286 users.remove_dead_store(&mut opt_ctx.ctx.func, store);
287 }
288 }
289
290 if users.stack_store.is_empty() && users.stack_load.is_empty() {
291 opt_ctx.ctx.func.stack_slots[stack_slot.0].size = 0;
292 }
293 }
294 }
295
296 fn combine_stack_addr_with_load_store(func: &mut Function) {
297 // Turn load and store into stack_load and stack_store when possible.
298 let mut cursor = FuncCursor::new(func);
299 while let Some(_block) = cursor.next_block() {
300 while let Some(inst) = cursor.next_inst() {
301 match cursor.func.dfg[inst] {
302 InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => {
303 if cursor.func.dfg.ctrl_typevar(inst) == types::I128
304 || cursor.func.dfg.ctrl_typevar(inst).is_vector()
305 {
306 continue; // WORKAROUD: stack_load.i128 not yet implemented
307 }
308 if let Some((stack_slot, stack_addr_offset)) =
309 try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
310 {
311 if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
312 {
313 let ty = cursor.func.dfg.ctrl_typevar(inst);
314 cursor.func.dfg.replace(inst).stack_load(
315 ty,
316 stack_slot,
317 combined_offset,
318 );
319 }
320 }
321 }
322 InstructionData::Store {
323 opcode: Opcode::Store,
324 args: [value, addr],
325 flags: _,
326 offset,
327 } => {
328 if cursor.func.dfg.ctrl_typevar(inst) == types::I128
329 || cursor.func.dfg.ctrl_typevar(inst).is_vector()
330 {
331 continue; // WORKAROUND: stack_store.i128 not yet implemented
332 }
333 if let Some((stack_slot, stack_addr_offset)) =
334 try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
335 {
336 if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
337 {
338 cursor.func.dfg.replace(inst).stack_store(
339 value,
340 stack_slot,
341 combined_offset,
342 );
343 }
344 }
345 }
346 _ => {}
347 }
348 }
349 }
350 }
351
352 fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext<'_>) {
353 // FIXME incrementally rebuild on each call?
354 let mut stack_addr_load_insts_users = FxHashMap::<Inst, FxHashSet<Inst>>::default();
355
356 let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func);
357 while let Some(_block) = cursor.next_block() {
358 while let Some(inst) = cursor.next_inst() {
359 for &arg in cursor.func.dfg.inst_args(inst) {
360 if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) {
361 match cursor.func.dfg[arg_origin].opcode() {
362 Opcode::StackAddr | Opcode::StackLoad => {
363 stack_addr_load_insts_users
364 .entry(arg_origin)
365 .or_insert_with(FxHashSet::default)
366 .insert(inst);
367 }
368 _ => {}
369 }
370 }
371 }
372 }
373 }
374
375 #[cfg(debug_assertions)]
376 for inst in stack_addr_load_insts_users.keys() {
377 let mut is_recorded_stack_addr_or_stack_load = false;
378 for stack_slot_users in opt_ctx.stack_slot_usage_map.values() {
379 is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst)
380 || stack_slot_users.stack_load.contains(inst);
381 }
382 assert!(is_recorded_stack_addr_or_stack_load);
383 }
384
385 // Replace all unused stack_addr and stack_load instructions with nop.
386 let mut func = &mut opt_ctx.ctx.func;
387
388 for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() {
389 stack_slot_users
390 .stack_addr
391 .drain_filter(|inst| {
392 stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)
393 })
394 .for_each(|inst| StackSlotUsage::remove_unused_stack_addr(&mut func, inst));
395
396 stack_slot_users
397 .stack_load
398 .drain_filter(|inst| {
399 stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)
400 })
401 .for_each(|inst| StackSlotUsage::remove_unused_load(&mut func, inst));
402 }
403 }
404
405 fn try_get_stack_slot_and_offset_for_addr(
406 func: &Function,
407 addr: Value,
408 ) -> Option<(StackSlot, Offset32)> {
409 if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) {
410 if let InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset } =
411 func.dfg[addr_inst]
412 {
413 return Some((stack_slot, offset));
414 }
415 }
416 None
417 }
418
419 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
420 enum SpatialOverlap {
421 No,
422 Partial,
423 Full,
424 }
425
426 fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap {
427 fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) {
428 match func.dfg[inst] {
429 InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
430 | InstructionData::StackLoad { opcode: Opcode::StackLoad, stack_slot, offset }
431 | InstructionData::StackStore {
432 opcode: Opcode::StackStore,
433 stack_slot,
434 offset,
435 arg: _,
436 } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()),
437 _ => unreachable!("{:?}", func.dfg[inst]),
438 }
439 }
440
441 debug_assert_ne!(src, dest);
442
443 let (src_ss, src_offset, src_size) = inst_info(func, src);
444 let (dest_ss, dest_offset, dest_size) = inst_info(func, dest);
445
446 if src_ss != dest_ss {
447 return SpatialOverlap::No;
448 }
449
450 if src_offset == dest_offset && src_size == dest_size {
451 return SpatialOverlap::Full;
452 }
453
454 let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into();
455 let dest_end: i64 = dest_offset.try_add_i64(i64::from(dest_size)).unwrap().into();
456 if src_end <= dest_offset.into() || dest_end <= src_offset.into() {
457 return SpatialOverlap::No;
458 }
459
460 SpatialOverlap::Partial
461 }
462
463 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
464 enum TemporalOrder {
465 /// `src` will never be executed before `dest`.
466 NeverBefore,
467
468 /// `src` may be executed before `dest`.
469 MaybeBefore,
470
471 /// `src` will always be executed before `dest`.
472 /// There may still be other instructions in between.
473 DefinitivelyBefore,
474 }
475
476 fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder {
477 debug_assert_ne!(src, dest);
478
479 if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
480 TemporalOrder::DefinitivelyBefore
481 } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
482 TemporalOrder::NeverBefore
483 } else {
484 TemporalOrder::MaybeBefore
485 }
486 }