1 //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 // For best-case performance on Cortex-A57, we should try to use a balanced
10 // mix of odd and even D-registers when performing a critical sequence of
11 // independent, non-quadword FP/ASIMD floating-point multiply or
12 // multiply-accumulate operations.
14 // This pass attempts to detect situations where the register allocation may
15 // adversely affect this load balancing and to change the registers used so as
16 // to better utilize the CPU.
18 // Ideally we'd just take each multiply or multiply-accumulate in turn and
19 // allocate it alternating even or odd registers. However, multiply-accumulates
20 // are most efficiently performed in the same functional unit as their
21 // accumulation operand. Therefore this pass tries to find maximal sequences
22 // ("Chains") of multiply-accumulates linked via their accumulation operand,
23 // and assign them all the same "color" (oddness/evenness).
25 // This optimization affects S-register and D-register floating point
26 // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
27 // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
29 //===----------------------------------------------------------------------===//
32 #include "AArch64InstrInfo.h"
33 #include "AArch64Subtarget.h"
34 #include "llvm/ADT/BitVector.h"
35 #include "llvm/ADT/EquivalenceClasses.h"
36 #include "llvm/CodeGen/MachineFunction.h"
37 #include "llvm/CodeGen/MachineFunctionPass.h"
38 #include "llvm/CodeGen/MachineInstr.h"
39 #include "llvm/CodeGen/MachineInstrBuilder.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/RegisterClassInfo.h"
42 #include "llvm/CodeGen/RegisterScavenging.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/raw_ostream.h"
49 #define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
51 // Enforce the algorithm to use the scavenged register even when the original
52 // destination register is the correct color. Used for testing.
54 TransformAll("aarch64-a57-fp-load-balancing-force-all",
55 cl::desc("Always modify dest registers regardless of color"),
56 cl::init(false), cl::Hidden
);
58 // Never use the balance information obtained from chains - return a specific
59 // color always. Used for testing.
60 static cl::opt
<unsigned>
61 OverrideBalance("aarch64-a57-fp-load-balancing-override",
62 cl::desc("Ignore balance information, always return "
63 "(1: Even, 2: Odd)."),
64 cl::init(0), cl::Hidden
);
66 //===----------------------------------------------------------------------===//
69 // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
70 static bool isMul(MachineInstr
*MI
) {
71 switch (MI
->getOpcode()) {
72 case AArch64::FMULSrr
:
73 case AArch64::FNMULSrr
:
74 case AArch64::FMULDrr
:
75 case AArch64::FNMULDrr
:
82 // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
83 static bool isMla(MachineInstr
*MI
) {
84 switch (MI
->getOpcode()) {
85 case AArch64::FMSUBSrrr
:
86 case AArch64::FMADDSrrr
:
87 case AArch64::FNMSUBSrrr
:
88 case AArch64::FNMADDSrrr
:
89 case AArch64::FMSUBDrrr
:
90 case AArch64::FMADDDrrr
:
91 case AArch64::FNMSUBDrrr
:
92 case AArch64::FNMADDDrrr
:
99 //===----------------------------------------------------------------------===//
102 /// A "color", which is either even or odd. Yes, these aren't really colors
103 /// but the algorithm is conceptually doing two-color graph coloring.
104 enum class Color
{ Even
, Odd
};
106 static const char *ColorNames
[2] = { "Even", "Odd" };
111 class AArch64A57FPLoadBalancing
: public MachineFunctionPass
{
112 const AArch64InstrInfo
*TII
;
113 MachineRegisterInfo
*MRI
;
114 const TargetRegisterInfo
*TRI
;
115 RegisterClassInfo RCI
;
119 explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID
) {}
121 bool runOnMachineFunction(MachineFunction
&F
) override
;
123 const char *getPassName() const override
{
124 return "A57 FP Anti-dependency breaker";
127 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
128 AU
.setPreservesCFG();
129 MachineFunctionPass::getAnalysisUsage(AU
);
133 bool runOnBasicBlock(MachineBasicBlock
&MBB
);
134 bool colorChainSet(std::vector
<Chain
*> GV
, MachineBasicBlock
&MBB
,
136 bool colorChain(Chain
*G
, Color C
, MachineBasicBlock
&MBB
);
137 int scavengeRegister(Chain
*G
, Color C
, MachineBasicBlock
&MBB
);
138 void scanInstruction(MachineInstr
*MI
, unsigned Idx
,
139 std::map
<unsigned, Chain
*> &Active
,
140 std::set
<std::unique_ptr
<Chain
>> &AllChains
);
141 void maybeKillChain(MachineOperand
&MO
, unsigned Idx
,
142 std::map
<unsigned, Chain
*> &RegChains
);
143 Color
getColor(unsigned Register
);
144 Chain
*getAndEraseNext(Color PreferredColor
, std::vector
<Chain
*> &L
);
146 char AArch64A57FPLoadBalancing::ID
= 0;
148 /// A Chain is a sequence of instructions that are linked together by
149 /// an accumulation operand. For example:
152 /// fmla d1<def>, ?, ?, d0<kill>
153 /// fmla d2<def>, ?, ?, d1<kill>
155 /// There may be other instructions interleaved in the sequence that
156 /// do not belong to the chain. These other instructions must not use
157 /// the "chain" register at any point.
159 /// We currently only support chains where the "chain" operand is killed
160 /// at each link in the chain for simplicity.
161 /// A chain has three important instructions - Start, Last and Kill.
162 /// * The start instruction is the first instruction in the chain.
163 /// * Last is the final instruction in the chain.
164 /// * Kill may or may not be defined. If defined, Kill is the instruction
165 /// where the outgoing value of the Last instruction is killed.
166 /// This information is important as if we know the outgoing value is
167 /// killed with no intervening uses, we can safely change its register.
169 /// Without a kill instruction, we must assume the outgoing value escapes
170 /// beyond our model and either must not change its register or must
171 /// create a fixup FMOV to keep the old register value consistent.
175 /// The important (marker) instructions.
176 MachineInstr
*StartInst
, *LastInst
, *KillInst
;
177 /// The index, from the start of the basic block, that each marker
178 /// appears. These are stored so we can do quick interval tests.
179 unsigned StartInstIdx
, LastInstIdx
, KillInstIdx
;
180 /// All instructions in the chain.
181 std::set
<MachineInstr
*> Insts
;
182 /// True if KillInst cannot be modified. If this is true,
183 /// we cannot change LastInst's outgoing register.
184 /// This will be true for tied values and regmasks.
185 bool KillIsImmutable
;
186 /// The "color" of LastInst. This will be the preferred chain color,
187 /// as changing intermediate nodes is easy but changing the last
188 /// instruction can be more tricky.
191 Chain(MachineInstr
*MI
, unsigned Idx
, Color C
)
192 : StartInst(MI
), LastInst(MI
), KillInst(nullptr),
193 StartInstIdx(Idx
), LastInstIdx(Idx
), KillInstIdx(0),
198 /// Add a new instruction into the chain. The instruction's dest operand
199 /// has the given color.
200 void add(MachineInstr
*MI
, unsigned Idx
, Color C
) {
204 assert((KillInstIdx
== 0 || LastInstIdx
< KillInstIdx
) &&
205 "Chain: broken invariant. A Chain can only be killed after its last "
211 /// Return true if MI is a member of the chain.
212 bool contains(MachineInstr
*MI
) { return Insts
.count(MI
) > 0; }
214 /// Return the number of instructions in the chain.
215 unsigned size() const {
219 /// Inform the chain that its last active register (the dest register of
220 /// LastInst) is killed by MI with no intervening uses or defs.
221 void setKill(MachineInstr
*MI
, unsigned Idx
, bool Immutable
) {
224 KillIsImmutable
= Immutable
;
225 assert((KillInstIdx
== 0 || LastInstIdx
< KillInstIdx
) &&
226 "Chain: broken invariant. A Chain can only be killed after its last "
230 /// Return the first instruction in the chain.
231 MachineInstr
*getStart() const { return StartInst
; }
232 /// Return the last instruction in the chain.
233 MachineInstr
*getLast() const { return LastInst
; }
234 /// Return the "kill" instruction (as set with setKill()) or NULL.
235 MachineInstr
*getKill() const { return KillInst
; }
236 /// Return an instruction that can be used as an iterator for the end
237 /// of the chain. This is the maximum of KillInst (if set) and LastInst.
238 MachineBasicBlock::iterator
getEnd() const {
239 return ++MachineBasicBlock::iterator(KillInst
? KillInst
: LastInst
);
242 /// Can the Kill instruction (assuming one exists) be modified?
243 bool isKillImmutable() const { return KillIsImmutable
; }
245 /// Return the preferred color of this chain.
246 Color
getPreferredColor() {
247 if (OverrideBalance
!= 0)
248 return OverrideBalance
== 1 ? Color::Even
: Color::Odd
;
252 /// Return true if this chain (StartInst..KillInst) overlaps with Other.
253 bool rangeOverlapsWith(const Chain
&Other
) const {
254 unsigned End
= KillInst
? KillInstIdx
: LastInstIdx
;
255 unsigned OtherEnd
= Other
.KillInst
?
256 Other
.KillInstIdx
: Other
.LastInstIdx
;
258 return StartInstIdx
<= OtherEnd
&& Other
.StartInstIdx
<= End
;
261 /// Return true if this chain starts before Other.
262 bool startsBefore(Chain
*Other
) {
263 return StartInstIdx
< Other
->StartInstIdx
;
266 /// Return true if the group will require a fixup MOV at the end.
267 bool requiresFixup() const {
268 return (getKill() && isKillImmutable()) || !getKill();
271 /// Return a simple string representation of the chain.
272 std::string
str() const {
274 raw_string_ostream
OS(S
);
277 StartInst
->print(OS
, NULL
, true);
279 LastInst
->print(OS
, NULL
, true);
282 KillInst
->print(OS
, NULL
, true);
292 } // end anonymous namespace
294 //===----------------------------------------------------------------------===//
296 bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction
&F
) {
297 bool Changed
= false;
298 DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
300 const TargetMachine
&TM
= F
.getTarget();
301 MRI
= &F
.getRegInfo();
302 TRI
= F
.getRegInfo().getTargetRegisterInfo();
303 TII
= TM
.getSubtarget
<AArch64Subtarget
>().getInstrInfo();
304 RCI
.runOnMachineFunction(F
);
306 for (auto &MBB
: F
) {
307 Changed
|= runOnBasicBlock(MBB
);
313 bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock
&MBB
) {
314 bool Changed
= false;
315 DEBUG(dbgs() << "Running on MBB: " << MBB
<< " - scanning instructions...\n");
317 // First, scan the basic block producing a set of chains.
319 // The currently "active" chains - chains that can be added to and haven't
320 // been killed yet. This is keyed by register - all chains can only have one
321 // "link" register between each inst in the chain.
322 std::map
<unsigned, Chain
*> ActiveChains
;
323 std::set
<std::unique_ptr
<Chain
>> AllChains
;
326 scanInstruction(&MI
, Idx
++, ActiveChains
, AllChains
);
328 DEBUG(dbgs() << "Scan complete, "<< AllChains
.size() << " chains created.\n");
330 // Group the chains into disjoint sets based on their liveness range. This is
331 // a poor-man's version of graph coloring. Ideally we'd create an interference
332 // graph and perform full-on graph coloring on that, but;
333 // (a) That's rather heavyweight for only two colors.
334 // (b) We expect multiple disjoint interference regions - in practice the live
335 // range of chains is quite small and they are clustered between loads
337 EquivalenceClasses
<Chain
*> EC
;
338 for (auto &I
: AllChains
)
341 for (auto &I
: AllChains
)
342 for (auto &J
: AllChains
)
343 if (I
!= J
&& I
->rangeOverlapsWith(*J
))
344 EC
.unionSets(I
.get(), J
.get());
345 DEBUG(dbgs() << "Created " << EC
.getNumClasses() << " disjoint sets.\n");
347 // Now we assume that every member of an equivalence class interferes
348 // with every other member of that class, and with no members of other classes.
350 // Convert the EquivalenceClasses to a simpler set of sets.
351 std::vector
<std::vector
<Chain
*> > V
;
352 for (auto I
= EC
.begin(), E
= EC
.end(); I
!= E
; ++I
) {
353 std::vector
<Chain
*> Cs(EC
.member_begin(I
), EC
.member_end());
354 if (Cs
.empty()) continue;
355 V
.push_back(std::move(Cs
));
358 // Now we have a set of sets, order them by start address so
359 // we can iterate over them sequentially.
360 std::sort(V
.begin(), V
.end(),
361 [](const std::vector
<Chain
*> &A
,
362 const std::vector
<Chain
*> &B
) {
363 return A
.front()->startsBefore(B
.front());
366 // As we only have two colors, we can track the global (BB-level) balance of
367 // odds versus evens. We aim to keep this near zero to keep both execution
369 // Positive means we're even-heavy, negative we're odd-heavy.
371 // FIXME: If chains have interdependencies, for example:
374 // We do not model this and may color each one differently, assuming we'll
375 // get ILP when we obviously can't. This hasn't been seen to be a problem
376 // in practice so far, so we simplify the algorithm by ignoring it.
380 Changed
|= colorChainSet(std::move(I
), MBB
, Parity
);
385 Chain
*AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor
,
386 std::vector
<Chain
*> &L
) {
390 // We try and get the best candidate from L to color next, given that our
391 // preferred color is "PreferredColor". L is ordered from larger to smaller
392 // chains. It is beneficial to color the large chains before the small chains,
393 // but if we can't find a chain of the maximum length with the preferred color,
394 // we fuzz the size and look for slightly smaller chains before giving up and
395 // returning a chain that must be recolored.
397 // FIXME: Does this need to be configurable?
398 const unsigned SizeFuzz
= 1;
399 unsigned MinSize
= L
.front()->size() - SizeFuzz
;
400 for (auto I
= L
.begin(), E
= L
.end(); I
!= E
; ++I
) {
401 if ((*I
)->size() <= MinSize
) {
402 // We've gone past the size limit. Return the previous item.
408 if ((*I
)->getPreferredColor() == PreferredColor
) {
415 // Bailout case - just return the first item.
416 Chain
*Ch
= L
.front();
421 bool AArch64A57FPLoadBalancing::colorChainSet(std::vector
<Chain
*> GV
,
422 MachineBasicBlock
&MBB
,
424 bool Changed
= false;
425 DEBUG(dbgs() << "colorChainSet(): #sets=" << GV
.size() << "\n");
427 // Sort by descending size order so that we allocate the most important
429 // Tie-break equivalent sizes by sorting chains requiring fixups before
430 // those without fixups. The logic here is that we should look at the
431 // chains that we cannot change before we look at those we can,
432 // so the parity counter is updated and we know what color we should
434 std::sort(GV
.begin(), GV
.end(), [](const Chain
*G1
, const Chain
*G2
) {
435 if (G1
->size() != G2
->size())
436 return G1
->size() > G2
->size();
437 return G1
->requiresFixup() > G2
->requiresFixup();
440 Color PreferredColor
= Parity
< 0 ? Color::Even
: Color::Odd
;
441 while (Chain
*G
= getAndEraseNext(PreferredColor
, GV
)) {
442 // Start off by assuming we'll color to our own preferred color.
443 Color C
= PreferredColor
;
445 // But if we really don't care, use the chain's preferred color.
446 C
= G
->getPreferredColor();
448 DEBUG(dbgs() << " - Parity=" << Parity
<< ", Color="
449 << ColorNames
[(int)C
] << "\n");
451 // If we'll need a fixup FMOV, don't bother. Testing has shown that this
452 // happens infrequently and when it does it has at least a 50% chance of
453 // slowing code down instead of speeding it up.
454 if (G
->requiresFixup() && C
!= G
->getPreferredColor()) {
455 C
= G
->getPreferredColor();
456 DEBUG(dbgs() << " - " << G
->str() << " - not worthwhile changing; "
457 "color remains " << ColorNames
[(int)C
] << "\n");
460 Changed
|= colorChain(G
, C
, MBB
);
462 Parity
+= (C
== Color::Even
) ? G
->size() : -G
->size();
463 PreferredColor
= Parity
< 0 ? Color::Even
: Color::Odd
;
469 int AArch64A57FPLoadBalancing::scavengeRegister(Chain
*G
, Color C
,
470 MachineBasicBlock
&MBB
) {
472 RS
.enterBasicBlock(&MBB
);
473 RS
.forward(MachineBasicBlock::iterator(G
->getStart()));
475 // Can we find an appropriate register that is available throughout the life
477 unsigned RegClassID
= G
->getStart()->getDesc().OpInfo
[0].RegClass
;
478 BitVector AvailableRegs
= RS
.getRegsAvailable(TRI
->getRegClass(RegClassID
));
479 for (MachineBasicBlock::iterator I
= G
->getStart(), E
= G
->getEnd();
482 AvailableRegs
&= RS
.getRegsAvailable(TRI
->getRegClass(RegClassID
));
484 // Remove any registers clobbered by a regmask or any def register that is
486 for (auto J
: I
->operands()) {
488 AvailableRegs
.clearBitsNotInMask(J
.getRegMask());
490 if (J
.isReg() && J
.isDef() && AvailableRegs
[J
.getReg()]) {
491 assert(J
.isDead() && "Non-dead def should have been removed by now!");
492 AvailableRegs
.reset(J
.getReg());
497 // Make sure we allocate in-order, to get the cheapest registers first.
498 auto Ord
= RCI
.getOrder(TRI
->getRegClass(RegClassID
));
499 for (auto Reg
: Ord
) {
500 if (!AvailableRegs
[Reg
])
502 if ((C
== Color::Even
&& (Reg
% 2) == 0) ||
503 (C
== Color::Odd
&& (Reg
% 2) == 1))
510 bool AArch64A57FPLoadBalancing::colorChain(Chain
*G
, Color C
,
511 MachineBasicBlock
&MBB
) {
512 bool Changed
= false;
513 DEBUG(dbgs() << " - colorChain(" << G
->str() << ", "
514 << ColorNames
[(int)C
] << ")\n");
516 // Try and obtain a free register of the right class. Without a register
517 // to play with we cannot continue.
518 int Reg
= scavengeRegister(G
, C
, MBB
);
520 DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
523 DEBUG(dbgs() << " - Scavenged register: " << TRI
->getName(Reg
) << "\n");
525 std::map
<unsigned, unsigned> Substs
;
526 for (MachineBasicBlock::iterator I
= G
->getStart(), E
= G
->getEnd();
528 if (!G
->contains(I
) &&
529 (&*I
!= G
->getKill() || G
->isKillImmutable()))
532 // I is a member of G, or I is a mutable instruction that kills G.
534 std::vector
<unsigned> ToErase
;
535 for (auto &U
: I
->operands()) {
536 if (U
.isReg() && U
.isUse() && Substs
.find(U
.getReg()) != Substs
.end()) {
537 unsigned OrigReg
= U
.getReg();
538 U
.setReg(Substs
[OrigReg
]);
540 // Don't erase straight away, because there may be other operands
541 // that also reference this substitution!
542 ToErase
.push_back(OrigReg
);
543 } else if (U
.isRegMask()) {
544 for (auto J
: Substs
) {
545 if (U
.clobbersPhysReg(J
.first
))
546 ToErase
.push_back(J
.first
);
550 // Now it's safe to remove the substs identified earlier.
551 for (auto J
: ToErase
)
554 // Only change the def if this isn't the last instruction.
555 if (&*I
!= G
->getKill()) {
556 MachineOperand
&MO
= I
->getOperand(0);
558 bool Change
= TransformAll
|| getColor(MO
.getReg()) != C
;
559 if (G
->requiresFixup() && &*I
== G
->getLast())
563 Substs
[MO
.getReg()] = Reg
;
565 MRI
->setPhysRegUsed(Reg
);
571 assert(Substs
.size() == 0 && "No substitutions should be left active!");
574 DEBUG(dbgs() << " - Kill instruction seen.\n");
576 // We didn't have a kill instruction, but we didn't seem to need to change
577 // the destination register anyway.
578 DEBUG(dbgs() << " - Destination register not changed.\n");
583 void AArch64A57FPLoadBalancing::
584 scanInstruction(MachineInstr
*MI
, unsigned Idx
,
585 std::map
<unsigned, Chain
*> &ActiveChains
,
586 std::set
<std::unique_ptr
<Chain
>> &AllChains
) {
587 // Inspect "MI", updating ActiveChains and AllChains.
591 for (auto &I
: MI
->uses())
592 maybeKillChain(I
, Idx
, ActiveChains
);
593 for (auto &I
: MI
->defs())
594 maybeKillChain(I
, Idx
, ActiveChains
);
596 // Create a new chain. Multiplies don't require forwarding so can go on any
598 unsigned DestReg
= MI
->getOperand(0).getReg();
600 DEBUG(dbgs() << "New chain started for register "
601 << TRI
->getName(DestReg
) << " at " << *MI
);
603 auto G
= llvm::make_unique
<Chain
>(MI
, Idx
, getColor(DestReg
));
604 ActiveChains
[DestReg
] = G
.get();
605 AllChains
.insert(std::move(G
));
607 } else if (isMla(MI
)) {
609 // It is beneficial to keep MLAs on the same functional unit as their
610 // accumulator operand.
611 unsigned DestReg
= MI
->getOperand(0).getReg();
612 unsigned AccumReg
= MI
->getOperand(3).getReg();
614 maybeKillChain(MI
->getOperand(1), Idx
, ActiveChains
);
615 maybeKillChain(MI
->getOperand(2), Idx
, ActiveChains
);
616 if (DestReg
!= AccumReg
)
617 maybeKillChain(MI
->getOperand(0), Idx
, ActiveChains
);
619 if (ActiveChains
.find(AccumReg
) != ActiveChains
.end()) {
620 DEBUG(dbgs() << "Chain found for accumulator register "
621 << TRI
->getName(AccumReg
) << " in MI " << *MI
);
623 // For simplicity we only chain together sequences of MULs/MLAs where the
624 // accumulator register is killed on each instruction. This means we don't
625 // need to track other uses of the registers we want to rewrite.
627 // FIXME: We could extend to handle the non-kill cases for more coverage.
628 if (MI
->getOperand(3).isKill()) {
630 DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
631 ActiveChains
[AccumReg
]->add(MI
, Idx
, getColor(DestReg
));
632 // Handle cases where the destination is not the same as the accumulator.
633 if (DestReg
!= AccumReg
) {
634 ActiveChains
[DestReg
] = ActiveChains
[AccumReg
];
635 ActiveChains
.erase(AccumReg
);
640 DEBUG(dbgs() << "Cannot add to chain because accumulator operand wasn't "
641 << "marked <kill>!\n");
642 maybeKillChain(MI
->getOperand(3), Idx
, ActiveChains
);
645 DEBUG(dbgs() << "Creating new chain for dest register "
646 << TRI
->getName(DestReg
) << "\n");
647 auto G
= llvm::make_unique
<Chain
>(MI
, Idx
, getColor(DestReg
));
648 ActiveChains
[DestReg
] = G
.get();
649 AllChains
.insert(std::move(G
));
653 // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
655 for (auto &I
: MI
->uses())
656 maybeKillChain(I
, Idx
, ActiveChains
);
657 for (auto &I
: MI
->defs())
658 maybeKillChain(I
, Idx
, ActiveChains
);
663 void AArch64A57FPLoadBalancing::
664 maybeKillChain(MachineOperand
&MO
, unsigned Idx
,
665 std::map
<unsigned, Chain
*> &ActiveChains
) {
666 // Given an operand and the set of active chains (keyed by register),
667 // determine if a chain should be ended and remove from ActiveChains.
668 MachineInstr
*MI
= MO
.getParent();
672 // If this is a KILL of a current chain, record it.
673 if (MO
.isKill() && ActiveChains
.find(MO
.getReg()) != ActiveChains
.end()) {
674 DEBUG(dbgs() << "Kill seen for chain " << TRI
->getName(MO
.getReg())
676 ActiveChains
[MO
.getReg()]->setKill(MI
, Idx
, /*Immutable=*/MO
.isTied());
678 ActiveChains
.erase(MO
.getReg());
680 } else if (MO
.isRegMask()) {
682 for (auto I
= ActiveChains
.begin(), E
= ActiveChains
.end();
684 if (MO
.clobbersPhysReg(I
->first
)) {
685 DEBUG(dbgs() << "Kill (regmask) seen for chain "
686 << TRI
->getName(I
->first
) << "\n");
687 I
->second
->setKill(MI
, Idx
, /*Immutable=*/true);
688 ActiveChains
.erase(I
++);
696 Color
AArch64A57FPLoadBalancing::getColor(unsigned Reg
) {
697 if ((TRI
->getEncodingValue(Reg
) % 2) == 0)
703 // Factory function used by AArch64TargetMachine to add the pass to the passmanager.
704 FunctionPass
*llvm::createAArch64A57FPLoadBalancing() {
705 return new AArch64A57FPLoadBalancing();